Fix some comics

This commit is contained in:
Bastian Kleineidam 2012-11-14 20:23:30 +01:00
parent 31e7ddbd7c
commit 7e39b291dc
3 changed files with 20 additions and 20 deletions

View file

@ -134,7 +134,7 @@ def blankLabel(name, baseUrl):
dict( dict(
name='BlankLabel/' + name, name='BlankLabel/' + name,
latestUrl=baseUrl, latestUrl=baseUrl,
stripUrl=baseUrl+'d/%s.shtml', stripUrl=baseUrl+'d/%s.html',
imageSearch=compile(tagre("img", "src", r'(/comic[s|/][^"]+)')), imageSearch=compile(tagre("img", "src", r'(/comic[s|/][^"]+)')),
prevSearch=compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif"), prevSearch=compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif"),
#prevSearch=compile(r'(?:"([^"]*(?:/d/[^"\r\n]*)|(?:/strip/.+?))")(?:(?:.{43}starshift_back.gif)|(?:.+?cxn_previous)|(?:.{43}previous)|(?:[^<>]*>[^<>]*<[^<>]*previous)|(?:.*?back_button)|(?:.*?comicnav-previous))'), #prevSearch=compile(r'(?:"([^"]*(?:/d/[^"\r\n]*)|(?:/strip/.+?))")(?:(?:.{43}starshift_back.gif)|(?:.+?cxn_previous)|(?:.{43}previous)|(?:[^<>]*>[^<>]*<[^<>]*previous)|(?:.*?back_button)|(?:.*?comicnav-previous))'),

View file

@ -50,10 +50,10 @@ class CaribbeanBlue(_BasicScraper):
class Catena(_BasicScraper): class Catena(_BasicScraper):
latestUrl = 'http://catenamanor.com/' latestUrl = 'http://catenamanor.com/'
stripUrl = 'http://catenamanor.com/%s.gif' stripUrl = latestUrl + '%s'
imageSearch = compile(tagre("img", "src", r'(http://catenamanor\.com/comics/[^"]+)')) imageSearch = compile(tagre("img", "src", r'(http://catenamanor\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"')) prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"'))
help = 'Index format: yyyy-mm-dd-<name>' help = 'Index format: yyyy/mm/dd/<name>'
class Catharsis(_BasicScraper): class Catharsis(_BasicScraper):
@ -109,16 +109,15 @@ class CigarroAndCerveja(_BasicScraper):
help = 'Index format: non' help = 'Index format: non'
# XXX move
class CombustibleOrange(_BasicScraper): class TinyKittenTeeth(_BasicScraper):
latestUrl = 'http://www.combustibleorange.com/' latestUrl = 'http://www.tinykittenteeth.com/'
stripUrl = 'http://www.combustibleorange.com/index.php?current=%s' stripUrl = latestUrl + 'index.php?current=%s'
imageSearch = compile(r'<img src="(/images/comics/\d+?\.gif)"') imageSearch = compile(tagre("img", "src", r'(http://www\.tinykittenteeth\.com/comics/[^"]+)'))
prevSearch = compile(r'><a href="(.+?)"><img src="images/button-last.gif" border="0">') prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
class Comedity(_BasicScraper): class Comedity(_BasicScraper):
latestUrl = 'http://www.comedity.com/' latestUrl = 'http://www.comedity.com/'
stripUrl = 'http://www.comedity.com/index.php?strip_id=%s' stripUrl = 'http://www.comedity.com/index.php?strip_id=%s'
@ -156,28 +155,26 @@ class CourtingDisaster(_BasicScraper):
class CrapIDrewOnMyLunchBreak(_BasicScraper): class CrapIDrewOnMyLunchBreak(_BasicScraper):
latestUrl = 'http://crap.jinwicked.com/' latestUrl = 'http://crap.jinwicked.com/'
stripUrl = 'http://crap.jinwicked.com/%s' stripUrl = latestUrl + '%s'
imageSearch = compile(r'<img src="(http://crap.jinwicked.com/comics/.+?)"') imageSearch = compile(tagre("img", "src", r'(http://crap\.jinwicked\.com/comics/[^"]+)'))
prevSearch = compile(r'<a href="(.+?)"><img src="http://comics.jinwicked.com/images/navigation_back.png"') prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/name' help = 'Index format: yyyy/mm/dd/name'
class CtrlAltDel(_BasicScraper): class CtrlAltDel(_BasicScraper):
latestUrl = 'http://www.cad-comic.com/cad/' latestUrl = 'http://www.cad-comic.com/cad/'
stripUrl = latestUrl + '%s'
imageSearch = compile(r'<img src="(/comics/\w+/\d{8}\..+?)"') imageSearch = compile(r'<img src="(/comics/\w+/\d{8}\..+?)"')
prevSearch = compile(r'<a href="(/\w+/\d{8})" class="nav-back') prevSearch = compile(r'<a href="(/\w+/\d{8})" class="nav-back')
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
@property
def stripUrl(self):
return self.latestUrl + '%s'
class CtrlAltDelSillies(CtrlAltDel): class CtrlAltDelSillies(CtrlAltDel):
name = 'CtrlAltDel/Sillies' name = 'CtrlAltDel/Sillies'
latestUrl = 'http://www.cad-comic.com/sillies/' latestUrl = 'http://www.cad-comic.com/sillies/'
stripUrl = latestUrl + '%s'
class Curvy(_BasicScraper): class Curvy(_BasicScraper):
latestUrl = 'http://www.c.urvy.org/' latestUrl = 'http://www.c.urvy.org/'
@ -190,7 +187,7 @@ class Curvy(_BasicScraper):
def cloneManga(name, shortName, lastStrip=None): def cloneManga(name, shortName, lastStrip=None):
url = 'http://manga.clone-army.org' url = 'http://manga.clone-army.org'
baseUrl = '%s/%s.php' % (url, shortName) baseUrl = '%s/%s.php' % (url, shortName)
stripUrl = baseUrl + '?&page=%s' stripUrl = baseUrl + '?page=%s'
if lastStrip is None: if lastStrip is None:
starter = bounceStarter(baseUrl, compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"next\.gif"))) starter = bounceStarter(baseUrl, compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"next\.gif")))
else: else:
@ -391,7 +388,7 @@ class CrimesOfCybeleCity(_BasicScraper):
class CatsAndCameras(_BasicScraper): class CatsAndCameras(_BasicScraper):
latestUrl = 'http://catsncameras.com/cnc/' latestUrl = 'http://catsncameras.com/cnc/'
stripUrl = 'hhttp://catsncameras.com/cnc/?p=%s' stripUrl = latestUrl + '?p=%s'
imageSearch = compile(r'<img src="(http://catsncameras.com/cnc/comics/.+?)"') imageSearch = compile(r'<img src="(http://catsncameras.com/cnc/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(http://catsncameras.com/cnc/.+?)">') prevSearch = compile(r'<div class="nav-previous"><a href="(http://catsncameras.com/cnc/.+?)">')
help = 'Index format: nnn' help = 'Index format: nnn'

View file

@ -166,6 +166,9 @@ def normaliseURL(url):
while segments and segments[0] == '': while segments and segments[0] == '':
del segments[0] del segments[0]
pu[2] = '/' + '/'.join(segments) pu[2] = '/' + '/'.join(segments)
# remove leading '&' from query
if pu[3].startswith('&'):
pu[3] = pu[3][1:]
return urlparse.urlunparse(pu) return urlparse.urlunparse(pu)
def urlopen(url, referrer=None, retries=3, retry_wait_seconds=5): def urlopen(url, referrer=None, retries=3, retry_wait_seconds=5):