Some fixes...
AbstruseGoose: fixed prev Carciphona: fixed latest Curtailed: fixed image and prev (moved to WP) DorkTower: fixed image search GrrlPower: fixed site name issue MadamAndEve: archive not updated in a long time, but current strip is. Works, but needs to be run daily. PennyArcade: fixed namer PvPonline: fixed prev
This commit is contained in:
parent
77a5e09c10
commit
fda654b5e0
6 changed files with 14 additions and 16 deletions
|
@ -36,7 +36,8 @@ class AbstruseGoose(_BasicScraper):
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre('img', 'src', r'(http://abstrusegoose\.com/strips/[^<>"]+)'))
|
imageSearch = compile(tagre('img', 'src', r'(http://abstrusegoose\.com/strips/[^<>"]+)'))
|
||||||
prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'« Previous</a>')
|
prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'« Previous')
|
||||||
|
nextSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'Next »')
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
textSearch = compile(tagre("img", "title", r'([^"]+)'))
|
textSearch = compile(tagre("img", "title", r'([^"]+)'))
|
||||||
|
|
||||||
|
|
|
@ -46,8 +46,7 @@ class Carciphona(_BasicScraper):
|
||||||
stripUrl = url + 'view.php?page=%s&chapter=%s'
|
stripUrl = url + 'view.php?page=%s&chapter=%s'
|
||||||
imageSearch = compile(tagre("div", "style", r'background-image:url\((_pages[^)]*)\)'))
|
imageSearch = compile(tagre("div", "style", r'background-image:url\((_pages[^)]*)\)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(view\.php\?[^"]*)', after="prevarea"))
|
prevSearch = compile(tagre("a", "href", r'(view\.php\?[^"]*)', after="prevarea"))
|
||||||
latestSearch = compile(tagre("a", "href", r'(view\.php\?[^"]*)') +
|
latestSearch = compile(tagre("a", "href", r'(view\.php\?page=[0-9]+[^"]*)'))
|
||||||
tagre("span", "class", "linkslast"))
|
|
||||||
help = 'Index format: None'
|
help = 'Index format: None'
|
||||||
starter = indirectStarter(url, latestSearch)
|
starter = indirectStarter(url, latestSearch)
|
||||||
|
|
||||||
|
@ -385,8 +384,8 @@ class Curtailed(_BasicScraper):
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '2012/04/08/sneeze'
|
firstStripUrl = stripUrl % '2012/04/08/sneeze'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]*)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/[0-9]+/[^"]*)' % rurl))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s\d{4}/[^"]*)' % rurl, after="navi-prev"))
|
prevSearch = compile('<a href="([^"]*)" class="comic-nav-base comic-nav-previous"')
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -243,7 +243,7 @@ class DorkTower(_BasicScraper):
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '1997/01/01/shadis-magazine-strip-1'
|
firstStripUrl = stripUrl % '1997/01/01/shadis-magazine-strip-1'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%sfiles/\d+/\d+/[^"]+\.gif)' % rurl))
|
imageSearch = compile(tagre("div", "class", "entry-content") + "\s*<p>\s*" + tagre("img", "src", r'(%sfiles/[0-9]+/[0-9]+/[^"]*Dork[^"]+\.(?:gif|jpg))' % rurl, after=' alt'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl)+"Previous")
|
prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl)+"Previous")
|
||||||
help = 'Index format: yyyy/mm/dd/stripname-dd-mm-yy'
|
help = 'Index format: yyyy/mm/dd/stripname-dd-mm-yy'
|
||||||
|
|
||||||
|
|
|
@ -189,12 +189,12 @@ class GoneWithTheBlastwave(_BasicScraper):
|
||||||
|
|
||||||
class GrrlPower(_BasicScraper):
|
class GrrlPower(_BasicScraper):
|
||||||
description = u'Grrl Power - A webcomic about superheroines.'
|
description = u'Grrl Power - A webcomic about superheroines.'
|
||||||
url = 'http://www.grrlpowercomic.com/'
|
url = 'http://grrlpowercomic.com/'
|
||||||
rurl = escape(url)
|
rurl = escape(url)
|
||||||
stripUrl = url + 'archives/%s'
|
stripUrl = url + 'archives/%s'
|
||||||
firstStripUrl = stripUrl % '48'
|
firstStripUrl = stripUrl % '48'
|
||||||
imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl))
|
imageSearch = compile(tagre("img", "src", r'(.*/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="navi-prev"))
|
prevSearch = compile(tagre("a", "href", r'(.*/archives/\d+)', after="navi-prev"))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,12 +17,10 @@ class MacHall(_BasicScraper):
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
# broken links - disable for now
|
|
||||||
class MadamAndEve(_BasicScraper):
|
class MadamAndEve(_BasicScraper):
|
||||||
url = 'http://www.madamandeve.co.za/week_of_cartns.php'
|
url = 'http://www.madamandeve.co.za/'
|
||||||
stripUrl = None
|
stripUrl = None
|
||||||
imageSearch = compile(r'<IMG BORDER="0" SRC="(cartoons/me\d{6}\.(gif|jpg))">')
|
imageSearch = compile(tagre('img', 'src', r'(/cartoons/me\d{6}\.(gif|jpg))'))
|
||||||
prevSearch = compile(r'<a href="(weekend_cartoon.php)"')
|
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -112,8 +112,8 @@ class PennyArcade(_BasicScraper):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
dummy, yyyy, mm, dd = pageUrl.rsplit('/', 3)
|
p = pageUrl.split('/')
|
||||||
return '%04d%02d%02d' % (int(yyyy), int(mm), int(dd))
|
return '%04d%02d%02d' % (int(p[4]), int(p[5]), int(p[6]))
|
||||||
|
|
||||||
|
|
||||||
class PeppermintSaga(_BasicScraper):
|
class PeppermintSaga(_BasicScraper):
|
||||||
|
@ -266,5 +266,5 @@ class PvPonline(_BasicScraper):
|
||||||
url = 'http://pvponline.com/comic'
|
url = 'http://pvponline.com/comic'
|
||||||
stripUrl = url + '%s'
|
stripUrl = url + '%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://s3[^"]+\.amazonaws\.com/pvponlinenew/img/comic/\d+/\d+/pvp[^"]+\.jpg)'))
|
imageSearch = compile(tagre("img", "src", r'(http://s3[^"]+\.amazonaws\.com/pvponlinenew/img/comic/\d+/\d+/pvp[^"]+\.jpg)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)', after="Previous"))
|
prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)', after="left divider"))
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
Loading…
Reference in a new issue