Some fixes...

AbstruseGoose: fixed prev
Carciphona: fixed latest
Curtailed: fixed image and prev (moved to WP)
DorkTower: fixed image search
GrrlPower: fixed site name issue
MadamAndEve: archive not updated in a long time, but current strip is.
Works, but needs to be run daily.
PennyArcade: fixed namer
PvPonline: fixed prev
This commit is contained in:
Dirk Reiners 2014-10-24 16:42:32 -05:00
parent 77a5e09c10
commit fda654b5e0
6 changed files with 14 additions and 16 deletions

View file

@ -36,7 +36,8 @@ class AbstruseGoose(_BasicScraper):
stripUrl = url + '%s' stripUrl = url + '%s'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre('img', 'src', r'(http://abstrusegoose\.com/strips/[^<>"]+)')) imageSearch = compile(tagre('img', 'src', r'(http://abstrusegoose\.com/strips/[^<>"]+)'))
prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'&laquo; Previous</a>') prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'&laquo; Previous')
nextSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'Next &raquo;')
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
textSearch = compile(tagre("img", "title", r'([^"]+)')) textSearch = compile(tagre("img", "title", r'([^"]+)'))

View file

@ -46,8 +46,7 @@ class Carciphona(_BasicScraper):
stripUrl = url + 'view.php?page=%s&chapter=%s' stripUrl = url + 'view.php?page=%s&chapter=%s'
imageSearch = compile(tagre("div", "style", r'background-image:url\((_pages[^)]*)\)')) imageSearch = compile(tagre("div", "style", r'background-image:url\((_pages[^)]*)\)'))
prevSearch = compile(tagre("a", "href", r'(view\.php\?[^"]*)', after="prevarea")) prevSearch = compile(tagre("a", "href", r'(view\.php\?[^"]*)', after="prevarea"))
latestSearch = compile(tagre("a", "href", r'(view\.php\?[^"]*)') + latestSearch = compile(tagre("a", "href", r'(view\.php\?page=[0-9]+[^"]*)'))
tagre("span", "class", "linkslast"))
help = 'Index format: None' help = 'Index format: None'
starter = indirectStarter(url, latestSearch) starter = indirectStarter(url, latestSearch)
@ -385,8 +384,8 @@ class Curtailed(_BasicScraper):
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '2012/04/08/sneeze' firstStripUrl = stripUrl % '2012/04/08/sneeze'
rurl = escape(url) rurl = escape(url)
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]*)' % rurl)) imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/[0-9]+/[^"]*)' % rurl))
prevSearch = compile(tagre("a", "href", r'(%s\d{4}/[^"]*)' % rurl, after="navi-prev")) prevSearch = compile('<a href="([^"]*)" class="comic-nav-base comic-nav-previous"')
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'

View file

@ -243,7 +243,7 @@ class DorkTower(_BasicScraper):
rurl = escape(url) rurl = escape(url)
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '1997/01/01/shadis-magazine-strip-1' firstStripUrl = stripUrl % '1997/01/01/shadis-magazine-strip-1'
imageSearch = compile(tagre("img", "src", r'(%sfiles/\d+/\d+/[^"]+\.gif)' % rurl)) imageSearch = compile(tagre("div", "class", "entry-content") + "\s*<p>\s*" + tagre("img", "src", r'(%sfiles/[0-9]+/[0-9]+/[^"]*Dork[^"]+\.(?:gif|jpg))' % rurl, after=' alt'))
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl)+"Previous") prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl)+"Previous")
help = 'Index format: yyyy/mm/dd/stripname-dd-mm-yy' help = 'Index format: yyyy/mm/dd/stripname-dd-mm-yy'

View file

@ -189,12 +189,12 @@ class GoneWithTheBlastwave(_BasicScraper):
class GrrlPower(_BasicScraper): class GrrlPower(_BasicScraper):
description = u'Grrl Power - A webcomic about superheroines.' description = u'Grrl Power - A webcomic about superheroines.'
url = 'http://www.grrlpowercomic.com/' url = 'http://grrlpowercomic.com/'
rurl = escape(url) rurl = escape(url)
stripUrl = url + 'archives/%s' stripUrl = url + 'archives/%s'
firstStripUrl = stripUrl % '48' firstStripUrl = stripUrl % '48'
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) imageSearch = compile(tagre("img", "src", r'(.*/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="navi-prev")) prevSearch = compile(tagre("a", "href", r'(.*/archives/\d+)', after="navi-prev"))
help = 'Index format: number' help = 'Index format: number'

View file

@ -17,12 +17,10 @@ class MacHall(_BasicScraper):
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'
# broken links - disable for now
class MadamAndEve(_BasicScraper): class MadamAndEve(_BasicScraper):
url = 'http://www.madamandeve.co.za/week_of_cartns.php' url = 'http://www.madamandeve.co.za/'
stripUrl = None stripUrl = None
imageSearch = compile(r'<IMG BORDER="0" SRC="(cartoons/me\d{6}\.(gif|jpg))">') imageSearch = compile(tagre('img', 'src', r'(/cartoons/me\d{6}\.(gif|jpg))'))
prevSearch = compile(r'<a href="(weekend_cartoon.php)"')
multipleImagesPerStrip = True multipleImagesPerStrip = True

View file

@ -112,8 +112,8 @@ class PennyArcade(_BasicScraper):
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
dummy, yyyy, mm, dd = pageUrl.rsplit('/', 3) p = pageUrl.split('/')
return '%04d%02d%02d' % (int(yyyy), int(mm), int(dd)) return '%04d%02d%02d' % (int(p[4]), int(p[5]), int(p[6]))
class PeppermintSaga(_BasicScraper): class PeppermintSaga(_BasicScraper):
@ -266,5 +266,5 @@ class PvPonline(_BasicScraper):
url = 'http://pvponline.com/comic' url = 'http://pvponline.com/comic'
stripUrl = url + '%s' stripUrl = url + '%s'
imageSearch = compile(tagre("img", "src", r'(http://s3[^"]+\.amazonaws\.com/pvponlinenew/img/comic/\d+/\d+/pvp[^"]+\.jpg)')) imageSearch = compile(tagre("img", "src", r'(http://s3[^"]+\.amazonaws\.com/pvponlinenew/img/comic/\d+/\d+/pvp[^"]+\.jpg)'))
prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)', after="Previous")) prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)', after="left divider"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'