diff --git a/dosagelib/plugins/b.py b/dosagelib/plugins/b.py index fa3b93609..8ef5fd4ac 100644 --- a/dosagelib/plugins/b.py +++ b/dosagelib/plugins/b.py @@ -256,17 +256,6 @@ class BrentalFlossGuest(BrentalFloss): firstStripUrl = stripUrl % '1' -# XXX disallowed by robots.txt -class _BringBackRoomies(_BasicScraper): - url = "http://www.bringbackroomies.com/" - rurl = escape(url) - stripUrl = url + "comic/%s" - imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl)) - prevSearch = compile(tagre("span", "class", "mininav-prev") + - tagre("a", "href", r'(%scomic/[^"]+)' % rurl)) - help = 'Index format: stripname' - - class Brink(_BasicScraper): url = 'http://paperfangs.com/brink/' rurl = escape(url) @@ -287,13 +276,13 @@ class BrightlyWound(_BasicScraper): help = 'Index format: nnn' -# XXX disallowed by robots.txt -class _ButtercupFestival(_BasicScraper): +class ButtercupFestival(_ParserScraper): url = 'http://www.buttercupfestival.com/' - stripUrl = url + '%s.html' - imageSearch = compile(tagre("img", "src", r'(http://www\.buttercupfestival\.com/\d+-\d+[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(\d+-\d+\.html)', quote="") + "previous") - help = 'Index format: number-number' + stripUrl = url + '%s.htm' + firstStripUrl = stripUrl % '2-1' + imageSearch = '//center/img' + prevSearch = '//a[text()="previous"]' + help = 'Index format: 2-number' class ButterSafe(_BasicScraper): diff --git a/dosagelib/plugins/d.py b/dosagelib/plugins/d.py index 9a46c8544..b851c80cf 100644 --- a/dosagelib/plugins/d.py +++ b/dosagelib/plugins/d.py @@ -286,15 +286,14 @@ class Drive(_BasicScraper): help = 'Index format: yymmdd' -# XXX navigation works only with JavaScript -class _DrMcNinja(_BasicScraper): +class DrMcNinja(_ParserScraper): url = 'http://drmcninja.com/' - rurl = escape(url) stripUrl = url + 'archives/comic/%s/' firstStripUrl = stripUrl % '0p1' - imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%sarchives/comic/[^"]+)' % rurl, after="prev")) - help = 'Index format: episode number and page' + css = True + imageSearch = 'div#comic img' + prevSearch = 'a.prev' + help = 'Index format: {episode}p{page}' class Drowtales(_BasicScraper): @@ -311,8 +310,7 @@ class Drowtales(_BasicScraper): help = 'Index format: number' -# XXX disallowed by robots.txt -class _DumbingOfAge(_BasicScraper): +class DumbingOfAge(_BasicScraper): url = 'http://www.dumbingofage.com/' rurl = escape(url) stripUrl = url + '%s/' diff --git a/dosagelib/plugins/e.py b/dosagelib/plugins/e.py index ea9497d8c..d0d258e95 100755 --- a/dosagelib/plugins/e.py +++ b/dosagelib/plugins/e.py @@ -152,8 +152,7 @@ class EmergencyExit(_BasicScraper): help = 'Index format: n' -# XXX disallowed by robots.txt -class _ErrantStory(_BasicScraper): +class ErrantStory(_BasicScraper): url = 'http://www.errantstory.com/' stripUrl = url + '%s' imageSearch = compile(r']+?src="([^"]*?comics/.+?)"') diff --git a/dosagelib/plugins/p.py b/dosagelib/plugins/p.py index 1baf995ad..1b50566b3 100755 --- a/dosagelib/plugins/p.py +++ b/dosagelib/plugins/p.py @@ -172,8 +172,7 @@ class Pimpette(_ParserScraper): help = 'Index format: yyyymmdd' -# Broken navigation: prev link at http://planescapecomic.com/201.html points to same URL. -class _PlanescapeSurvival(_BasicScraper): +class PlanescapeSurvival(_BasicScraper): url = 'http://planescapecomic.com/' stripUrl = url + '%s.html' imageSearch = compile(r'src="(comics/.+?)"') diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py index 61baa5b29..b55e5430a 100644 --- a/dosagelib/plugins/s.py +++ b/dosagelib/plugins/s.py @@ -159,17 +159,6 @@ class SexyLosers(_BasicScraper): return index + '-' + title -# XXX site has been hacked -class _ShadowGirls(_BasicScraper): - url = 'http://www.shadowgirlscomic.com/' - stripUrl = url + 'comics/%s' - firstStripUrl = stripUrl % 'book-1/chapter-1-broken-dreams/welcome' - imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]*)')) - prevSearch = compile(tagre("a", "href", r'([^"]*)', after='navi-prev')) - help = 'Index format: custom' - starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]*/comics/[^"]+)'))) - - class Sheldon(_BasicScraper): url = 'http://www.sheldoncomics.com/' rurl = escape(url) @@ -209,14 +198,13 @@ class Shivae(_BasicScraper): help = 'Index format: yyyy/mm/dd/stripname' -# XXX disallowed by robots.txt -class _Shortpacked(_BasicScraper): - url = 'http://www.shortpacked.com/' - rurl = escape(url) - stripUrl = url + '%s/' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s\d+/comic/[^"]+)' % rurl, after="prev")) - help = 'Index format: yyyy/comic/book-nn/mm-name1/name2' +class Shortpacked(_ParserScraper): + url = 'http://www.shortpacked.com/index.php' + stripUrl = url + '?id=%s' + css = True + imageSearch = 'img#comic' + prevSearch = 'a.prev' + help = 'Index format: nnn' class ShotgunShuffle(_BasicScraper): @@ -452,14 +440,12 @@ class StandStillStaySilent(_ParserScraper): help = 'Index Format: number' -# XXX disallowed by robots.txt -class _StationV3(_BasicScraper): +class StationV3(_ParserScraper): url = 'http://www.stationv3.com/' - rurl = escape(url) stripUrl = url + 'd/%s.html' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%sd/\d+\.html)' % rurl) + - tagre("img", "src", r'http://www\.stationv3\.com/images/previous\.gif')) + imageSearch = '//img[contains(@src,"/comics2/")]' + prevSearch = '//a[img[contains(@src,"/previous2")]]' + help = 'Index format: yyyymmdd' diff --git a/dosagelib/plugins/t.py b/dosagelib/plugins/t.py index 4cc3ef559..08af8dd2e 100755 --- a/dosagelib/plugins/t.py +++ b/dosagelib/plugins/t.py @@ -137,8 +137,7 @@ class ToonHole(_BasicScraper): return url in (self.stripUrl % "2013/03/if-game-of-thrones-was-animated",) -# XXX disallowed by robots.txt -class _TwoLumps(_BasicScraper): +class TwoLumps(_BasicScraper): url = 'http://www.twolumps.net/' stripUrl = url + 'd/%s.html' imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))