Fix some comics
This commit is contained in:
parent
b3e51ddc93
commit
7bf54255f0
3 changed files with 20 additions and 31 deletions
|
@ -148,8 +148,8 @@ class AppleGeeksLite(_BasicScraper):
|
||||||
class Achewood(_BasicScraper):
|
class Achewood(_BasicScraper):
|
||||||
latestUrl = 'http://www.achewood.com/'
|
latestUrl = 'http://www.achewood.com/'
|
||||||
imageUrl = 'http://www.achewood.com/index.php?date=%s'
|
imageUrl = 'http://www.achewood.com/index.php?date=%s'
|
||||||
imageSearch = compile(r'<img src="(http://m.assetbar.com/achewood/autaux.+?)"')
|
imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)'))
|
||||||
prevSearch = compile(r'<a href="(index\.php\?date=\d{8})" class="dateNav" title="Previous comic"')
|
prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', after="Previous"))
|
||||||
help = 'Index format: mmddyyyy'
|
help = 'Index format: mmddyyyy'
|
||||||
namer = regexNamer(compile(r'date%3D(\d{8})'))
|
namer = regexNamer(compile(r'date%3D(\d{8})'))
|
||||||
|
|
||||||
|
|
|
@ -101,14 +101,6 @@ class Brink(_BasicScraper):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class BonoboConspiracy(_BasicScraper):
|
|
||||||
latestUrl = 'http://ansuz.sooke.bc.ca/bonobo-conspiracy/'
|
|
||||||
imageUrl = 'http://ansuz.sooke.bc.ca/bonobo-conspiracy/%s'
|
|
||||||
imageSearch = compile(r'<P.+?<IMG SRC="(.+?)" ALT')
|
|
||||||
prevSearch = compile(r'ansuz.+?/(\?i=.+?)".+?Previous')
|
|
||||||
help = 'Index format: nnn'
|
|
||||||
|
|
||||||
|
|
||||||
class BoredAndEvil(_BasicScraper):
|
class BoredAndEvil(_BasicScraper):
|
||||||
latestUrl = 'http://www.boredandevil.com/'
|
latestUrl = 'http://www.boredandevil.com/'
|
||||||
imageUrl = 'http://www.boredandevil.com/archive.php?date=%s'
|
imageUrl = 'http://www.boredandevil.com/archive.php?date=%s'
|
||||||
|
@ -152,7 +144,7 @@ def blankLabel(name, baseUrl):
|
||||||
latestUrl=baseUrl,
|
latestUrl=baseUrl,
|
||||||
imageUrl='/d/%s.shtml',
|
imageUrl='/d/%s.shtml',
|
||||||
imageSearch=compile(tagre("img", "src", r'(/comic[s|/][^"]+)')),
|
imageSearch=compile(tagre("img", "src", r'(/comic[s|/][^"]+)')),
|
||||||
prevSearch=compile(tagre("a", "href", r'(/d/\d+\.shtml)')+r"[^>]+/images/nav_02\.gif"),
|
prevSearch=compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif"),
|
||||||
#prevSearch=compile(r'(?:"([^"]*(?:/d/[^"\r\n]*)|(?:/strip/.+?))")(?:(?:.{43}starshift_back.gif)|(?:.+?cxn_previous)|(?:.{43}previous)|(?:[^<>]*>[^<>]*<[^<>]*previous)|(?:.*?back_button)|(?:.*?comicnav-previous))'),
|
#prevSearch=compile(r'(?:"([^"]*(?:/d/[^"\r\n]*)|(?:/strip/.+?))")(?:(?:.{43}starshift_back.gif)|(?:.+?cxn_previous)|(?:.{43}previous)|(?:[^<>]*>[^<>]*<[^<>]*previous)|(?:.*?back_button)|(?:.*?comicnav-previous))'),
|
||||||
help='Index format: yyyymmdd')
|
help='Index format: yyyymmdd')
|
||||||
)
|
)
|
||||||
|
@ -186,15 +178,6 @@ class BeePower(_BasicScraper):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Bellen(_BasicScraper):
|
|
||||||
latestUrl = 'http://boxbrown.com/'
|
|
||||||
imageUrl = 'http://boxbrown.com/?p=%s'
|
|
||||||
imageSearch = compile(r'<img src="(http://boxbrown.com/comics/[^"]+)"')
|
|
||||||
prevSearch = compile(r'<a href="(.+?)"><span class="prev">')
|
|
||||||
help = 'Index format: nnn'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class BlankIt(_BasicScraper):
|
class BlankIt(_BasicScraper):
|
||||||
latestUrl = 'http://blankitcomics.com/'
|
latestUrl = 'http://blankitcomics.com/'
|
||||||
imageUrl = 'http://blankitcomics.com/%s'
|
imageUrl = 'http://blankitcomics.com/%s'
|
||||||
|
|
|
@ -238,26 +238,32 @@ class CatAndGirl(_BasicScraper):
|
||||||
|
|
||||||
|
|
||||||
def comicsDotCom(name, section):
|
def comicsDotCom(name, section):
|
||||||
baseUrl = 'http://www.comics.com/%s/%s/archive/' % (section, name)
|
latestUrl = 'http://www.gocomics.com/%s' % name
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
htmlname = pageUrl.split('/')[-1]
|
prefix, year, month, day = pageUrl.split('/', 3)
|
||||||
filename = htmlname.split('.')[0]
|
return "%s_%s%s%s.gif" % (name, year, month, day)
|
||||||
return filename
|
|
||||||
|
|
||||||
return type('ComicsDotCom_%s' % name,
|
return type('GoComicsDotCom_%s' % name,
|
||||||
(_BasicScraper,),
|
(_BasicScraper,),
|
||||||
dict(
|
dict(
|
||||||
name='ComicsDotCom/' + name,
|
name='GoComicsDotCom/' + name,
|
||||||
starter=indirectStarter(baseUrl, compile(r'<A HREF="(/[\w/]+?/archive/\w+?-\d{8}\.html)">(?:<IMG SRC="/[\w/]+?/images/arrow_right.gif|(?:<font[^>]*?>)?Next Day)')),
|
imageUrl=latestUrl + '/%s',
|
||||||
imageUrl=baseUrl + 'name-%s.html',
|
imageSearch=compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[0-9a-f]+)')),
|
||||||
imageSearch=compile(r'SRC="(/[\w/]+?/archive/images/\w+?\d+\..+?)"'),
|
prevSearch=compile(tagre("a", "href", "(/%s/\d+/\d+/\d+)")+"Previous"),
|
||||||
prevSearch=compile(r'<A HREF="(/[\w/]+?/archive/\w+?-\d{8}\.html)">(?:<IMG SRC="/[\w/]+?/images/arrow_left.gif|(?:<font[^>]*?>)?Previous Day)'),
|
help='Index format: yyyy/mm/dd',
|
||||||
help='Index format: yyyymmdd',
|
|
||||||
namer=namer)
|
namer=namer)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# http://www.gocomics.com/features
|
||||||
|
# XXX
|
||||||
|
|
||||||
|
# http://www.gocomics.com/explore/editorial_list
|
||||||
|
# XXX
|
||||||
|
|
||||||
|
# http://www.gocomics.com/explore/sherpa_list
|
||||||
|
# XXX
|
||||||
|
|
||||||
acaseinpoint = comicsDotCom('acaseinpoint', 'comics')
|
acaseinpoint = comicsDotCom('acaseinpoint', 'comics')
|
||||||
agnes = comicsDotCom('agnes', 'creators')
|
agnes = comicsDotCom('agnes', 'creators')
|
||||||
|
|
Loading…
Reference in a new issue