dosage/dosagelib/plugins/k.py

88 lines
3.4 KiB
Python
Raw Normal View History

# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
2014-01-05 15:50:57 +00:00
# Copyright (C) 2012-2014 Bastian Kleineidam
2012-06-20 19:58:13 +00:00
from re import compile, escape, IGNORECASE
2012-10-11 10:03:12 +00:00
from ..scraper import _BasicScraper
2012-12-04 06:02:40 +00:00
from ..util import tagre
2013-04-11 16:27:43 +00:00
from ..helpers import indirectStarter
2012-06-20 19:58:13 +00:00
class KevinAndKell(_BasicScraper):
url = 'http://www.kevinandkell.com/'
stripUrl = url + '%s/kk%s%s.html'
2013-03-25 18:48:19 +00:00
firstStripUrl = stripUrl % ('1995', '09', '03')
2013-07-09 20:21:17 +00:00
imageSearch = compile(r'<img.+?src="(/?(\d+/)?strips/kk\d+.(gif|jpg))"', IGNORECASE)
2012-06-20 19:58:13 +00:00
prevSearch = compile(r'<a.+?href="(/?(\.\./)?\d+/kk\d+\.html)"[^>]*><span>Previous Strip', IGNORECASE)
help = 'Index format: yyyy-mm-dd'
def getIndexStripUrl(self, index):
return self.stripUrl % tuple(map(int, index.split('-')))
2012-06-20 19:58:13 +00:00
2013-03-06 19:21:10 +00:00
class Key(_BasicScraper):
2013-04-13 18:58:00 +00:00
baseUrl = 'http://key.shadilyn.com/'
url = baseUrl + 'latestpage.html'
stripUrl = baseUrl + 'pages/%s.html'
2013-03-06 19:21:10 +00:00
imageSearch = compile(r'"((?:images/.+?)|(?:pages/images/.+?))"')
prevSearch = compile(r'</a><a href="(.+?html)".+?prev')
help = 'Index format: nnn'
2013-03-12 20:16:17 +00:00
class KickInTheHead(_BasicScraper):
url = 'http://www.kickinthehead.org/'
rurl = escape(url)
2013-03-12 20:16:17 +00:00
stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2003/03/20/ipod-envy'
imageSearch = compile(tagre("img", "src", r'(%skickinthehead3/comics/\d+-\d+-\d+[^"]+)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="navi-prev"))
2013-03-12 20:16:17 +00:00
help = 'Index format: yyyy/mm/dd/stripname'
2012-06-20 19:58:13 +00:00
class KillerKomics(_BasicScraper):
2013-04-13 18:58:00 +00:00
baseUrl = 'http://www.killerkomics.com/web-comics/'
url = baseUrl + 'index_ang.cfm'
stripUrl = baseUrl + '%s.cfm'
2012-06-20 19:58:13 +00:00
imageSearch = compile(r'<img src="(http://www.killerkomics.com/FichiersUpload/Comics/.+?)"')
prevSearch = compile(r'<div id="precedent"><a href="(.+?)"')
help = 'Index format: strip-name'
2012-12-08 20:30:51 +00:00
2012-12-13 20:05:27 +00:00
# XXX disallowed by robots.txt
class _Kofightclub(_BasicScraper):
url = 'http://www.kofightclub.com/'
stripUrl = url + 'd/%s.html'
2012-12-08 20:30:51 +00:00
imageSearch = compile(tagre("img", "src", r'(\.\./images/\d+[^"]+)'))
prevSearch = compile(tagre("a", "href", r'((?:http://www\.kofightclub\.com)?/d/\d+\.html)')
+ tagre("img", "alt", "Previous comic"))
help = 'Index format: yyyymmdd'
2012-12-13 20:05:27 +00:00
2013-03-06 19:21:10 +00:00
class Krakow(_BasicScraper):
url = 'http://www.krakow.krakowstudios.com/'
stripUrl = url + 'archive.php?date=%s'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '20081111'
2013-03-06 19:21:10 +00:00
imageSearch = compile(r'<img src="(comics/.+?)"')
prevSearch = compile(r'<a href="(archive\.php\?date=.+?)"><img border=0 name=previous_day')
help = 'Index format: yyyymmdd'
class Kukuburi(_BasicScraper):
2013-04-13 18:58:00 +00:00
baseUrl = 'http://www.kukuburi.com/'
url = baseUrl + 'current/'
stripUrl = baseUrl + 'v2/%s/'
2013-04-10 21:57:09 +00:00
firstStripUrl = stripUrl % '2007/08/09/one'
2013-03-06 19:21:10 +00:00
imageSearch = compile(tagre("img", "src", r'(http://www\.kukuburi\.com/v2/comics/[^"]+)', after='alt="[^"]'))
prevSearch = compile(r'nav-previous.+?"(http.+?)"')
help = 'Index format: yyyy/mm/dd/stripname'
2012-12-13 20:05:27 +00:00
class KuroShouri(_BasicScraper):
url = 'http://kuroshouri.com/'
rurl = escape(url)
stripUrl = url + '?webcomic_post=%s'
imageSearch = compile(tagre("img", "src", r"(%swp-content/webcomic/kuroshouri/[^'\"]+)" % rurl, quote="['\"]"))
2013-04-11 16:27:43 +00:00
prevSearch = compile(tagre("a", "href", r'(%s\?webcomic_post\=[^"]+)' % rurl, after="previous"))
help = 'Index format: chapter-n-page-m'
2013-04-11 16:27:43 +00:00
starter = indirectStarter(url, prevSearch)