Fix some comics

This commit is contained in:
Bastian Kleineidam 2012-10-12 21:47:57 +02:00
parent b3e51ddc93
commit 7bf54255f0
3 changed files with 20 additions and 31 deletions

View file

@ -148,8 +148,8 @@ class AppleGeeksLite(_BasicScraper):
class Achewood(_BasicScraper): class Achewood(_BasicScraper):
latestUrl = 'http://www.achewood.com/' latestUrl = 'http://www.achewood.com/'
imageUrl = 'http://www.achewood.com/index.php?date=%s' imageUrl = 'http://www.achewood.com/index.php?date=%s'
imageSearch = compile(r'<img src="(http://m.assetbar.com/achewood/autaux.+?)"') imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)'))
prevSearch = compile(r'<a href="(index\.php\?date=\d{8})" class="dateNav" title="Previous comic"') prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', after="Previous"))
help = 'Index format: mmddyyyy' help = 'Index format: mmddyyyy'
namer = regexNamer(compile(r'date%3D(\d{8})')) namer = regexNamer(compile(r'date%3D(\d{8})'))

View file

@ -101,14 +101,6 @@ class Brink(_BasicScraper):
class BonoboConspiracy(_BasicScraper):
latestUrl = 'http://ansuz.sooke.bc.ca/bonobo-conspiracy/'
imageUrl = 'http://ansuz.sooke.bc.ca/bonobo-conspiracy/%s'
imageSearch = compile(r'<P.+?<IMG SRC="(.+?)" ALT')
prevSearch = compile(r'ansuz.+?/(\?i=.+?)".+?Previous')
help = 'Index format: nnn'
class BoredAndEvil(_BasicScraper): class BoredAndEvil(_BasicScraper):
latestUrl = 'http://www.boredandevil.com/' latestUrl = 'http://www.boredandevil.com/'
imageUrl = 'http://www.boredandevil.com/archive.php?date=%s' imageUrl = 'http://www.boredandevil.com/archive.php?date=%s'
@ -152,7 +144,7 @@ def blankLabel(name, baseUrl):
latestUrl=baseUrl, latestUrl=baseUrl,
imageUrl='/d/%s.shtml', imageUrl='/d/%s.shtml',
imageSearch=compile(tagre("img", "src", r'(/comic[s|/][^"]+)')), imageSearch=compile(tagre("img", "src", r'(/comic[s|/][^"]+)')),
prevSearch=compile(tagre("a", "href", r'(/d/\d+\.shtml)')+r"[^>]+/images/nav_02\.gif"), prevSearch=compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif"),
#prevSearch=compile(r'(?:"([^"]*(?:/d/[^"\r\n]*)|(?:/strip/.+?))")(?:(?:.{43}starshift_back.gif)|(?:.+?cxn_previous)|(?:.{43}previous)|(?:[^<>]*>[^<>]*<[^<>]*previous)|(?:.*?back_button)|(?:.*?comicnav-previous))'), #prevSearch=compile(r'(?:"([^"]*(?:/d/[^"\r\n]*)|(?:/strip/.+?))")(?:(?:.{43}starshift_back.gif)|(?:.+?cxn_previous)|(?:.{43}previous)|(?:[^<>]*>[^<>]*<[^<>]*previous)|(?:.*?back_button)|(?:.*?comicnav-previous))'),
help='Index format: yyyymmdd') help='Index format: yyyymmdd')
) )
@ -186,15 +178,6 @@ class BeePower(_BasicScraper):
class Bellen(_BasicScraper):
latestUrl = 'http://boxbrown.com/'
imageUrl = 'http://boxbrown.com/?p=%s'
imageSearch = compile(r'<img src="(http://boxbrown.com/comics/[^"]+)"')
prevSearch = compile(r'<a href="(.+?)"><span class="prev">')
help = 'Index format: nnn'
class BlankIt(_BasicScraper): class BlankIt(_BasicScraper):
latestUrl = 'http://blankitcomics.com/' latestUrl = 'http://blankitcomics.com/'
imageUrl = 'http://blankitcomics.com/%s' imageUrl = 'http://blankitcomics.com/%s'

View file

@ -238,26 +238,32 @@ class CatAndGirl(_BasicScraper):
def comicsDotCom(name, section): def comicsDotCom(name, section):
baseUrl = 'http://www.comics.com/%s/%s/archive/' % (section, name) latestUrl = 'http://www.gocomics.com/%s' % name
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
htmlname = pageUrl.split('/')[-1] prefix, year, month, day = pageUrl.split('/', 3)
filename = htmlname.split('.')[0] return "%s_%s%s%s.gif" % (name, year, month, day)
return filename
return type('ComicsDotCom_%s' % name, return type('GoComicsDotCom_%s' % name,
(_BasicScraper,), (_BasicScraper,),
dict( dict(
name='ComicsDotCom/' + name, name='GoComicsDotCom/' + name,
starter=indirectStarter(baseUrl, compile(r'<A HREF="(/[\w/]+?/archive/\w+?-\d{8}\.html)">(?:<IMG SRC="/[\w/]+?/images/arrow_right.gif|(?:<font[^>]*?>)?Next Day)')), imageUrl=latestUrl + '/%s',
imageUrl=baseUrl + 'name-%s.html', imageSearch=compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[0-9a-f]+)')),
imageSearch=compile(r'SRC="(/[\w/]+?/archive/images/\w+?\d+\..+?)"'), prevSearch=compile(tagre("a", "href", "(/%s/\d+/\d+/\d+)")+"Previous"),
prevSearch=compile(r'<A HREF="(/[\w/]+?/archive/\w+?-\d{8}\.html)">(?:<IMG SRC="/[\w/]+?/images/arrow_left.gif|(?:<font[^>]*?>)?Previous Day)'), help='Index format: yyyy/mm/dd',
help='Index format: yyyymmdd',
namer=namer) namer=namer)
) )
# http://www.gocomics.com/features
# XXX
# http://www.gocomics.com/explore/editorial_list
# XXX
# http://www.gocomics.com/explore/sherpa_list
# XXX
acaseinpoint = comicsDotCom('acaseinpoint', 'comics') acaseinpoint = comicsDotCom('acaseinpoint', 'comics')
agnes = comicsDotCom('agnes', 'creators') agnes = comicsDotCom('agnes', 'creators')