diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index 42733b20e..e097eddbc 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -54,6 +54,44 @@ class AcademyVale(_BasicScraper): help = 'Index format: nnn' +class Achewood(_BasicScraper): + url = 'http://www.achewood.com/' + stripUrl = url + 'index.php?date=%s' + firstStripUrl = stripUrl % '00000000' + imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)')) + prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', after="Previous")) + help = 'Index format: mmddyyyy' + namer = regexNamer(compile(r'date=(\d+)')) + + +class AetheriaEpics(_BasicScraper): + url = 'http://aetheria-epics.schala.net/' + stripUrl = url + '%s.html' + firstStripUrl = stripUrl % '00001' + imageSearch = compile(tagre("img", "src", r'(\d{5}\.jpg)')) + prevSearch = compile(tagre("a", "href", r'(\d{5}\.html)') + "Previous") + help = 'Index format: nnn' + + +class AfterStrife(_BasicScraper): + url = 'http://afterstrife.com/?p=262' + stripUrl = 'http://afterstrife.com/?p=%s' + imageSearch = compile(r'[^>]+Back')) + stripUrl = url + '1.%s.html' + firstStripUrl = stripUrl % '1' + imageSearch = compile(tagre("img", "src", r'(img/strip/[^"]+\.jpg)')) + prevSearch = compile(r'[^>]+Back') + help = 'Index format: nnn' + + class AhoiPolloi(_BasicScraper): url = 'http://ahoipolloi.blogger.de/' stripUrl = url + '?day=%s' @@ -69,6 +107,15 @@ class AhoiPolloi(_BasicScraper): return imageUrl.rsplit('/', 1)[1] +class AirForceBlues(_BasicScraper): + url = 'http://www.afblues.com/' + stripUrl = url + 'wordpress/%s/' + firstStripUrl = stripUrl % '1997/09/07/need-a-clue-do-ya' + imageSearch = compile(tagre("img", "src", r'(http://www\.afblues\.com/wordpress/comics/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'([^"]+)', after='Previous')) + help = 'Index format: yyyy/mm/dd/stripname' + + class ALessonIsLearned(_BasicScraper): url = 'http://www.alessonislearned.com/' prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", quote="'")+r"[^>]+previous") @@ -97,6 +144,33 @@ class AlienLovesPredator(_BasicScraper): help = 'Index format: yyyy/mm/dd/name' +class AlienShores(_BasicScraper): + url = 'http://alienshores.com/alienshores_band/' + stripUrl = url + '%s' + imageSearch = compile(tagre("img", "src", r'(http://alienshores\.com/alienshores_band/wp-content/uploads/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'(http://alienshores\.com/[^"]+)', after="prev")) + help = 'Index format: yyyy/mm/dd/p/' + + +class ALLCAPS(_BasicScraper): + url = 'http://www.allcapscomix.com/' + stripUrl = url + '%s/' + firstStripUrl = stripUrl % '2008/08/welcome-to-all-caps' + imageSearch = compile(tagre("img", "src", r'(http://www\.allcapscomix\.com/comics/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'([^"]+)')+r"[^<]+Previous") + help = 'Index format: yyyy/mm/strip-name' + + +class AllTheGrowingThings(_BasicScraper): + url = 'http://growingthings.typodmary.com/' + rurl = escape(url) + stripUrl = url + '%s/' + firstStripUrl = stripUrl % '2009/04/21/all-the-growing-things' + imageSearch = compile(tagre("img", "src", r'(%sfiles/comics/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) + help = 'Index format: yyyy/mm/dd/strip-name' + + class AlphaLuna(_BasicScraper): url = 'http://www.alphaluna.net/' stripUrl = url + 'issue-%s/' @@ -114,6 +188,15 @@ class AlphaLunaSpanish(AlphaLuna): firstStripUrl = stripUrl % '1/portada' +class AlsoBagels(_BasicScraper): + url = 'http://alsobagels.com/' + rurl = escape(url) + stripUrl = url + 'index.php/comic/%s/' + imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%sindex\.php/comic/[^"]+)' % rurl, after="Previous")) + help = 'Index format: strip-name' + + class Altermeta(_BasicScraper): url = 'http://altermeta.net/' stripUrl = url + 'archive.php?comic=%s' @@ -140,6 +223,24 @@ class AmazingSuperPowers(_BasicScraper): help = 'Index format: yyyy/mm/name' +class Amya(_BasicScraper): + url = 'http://www.amyachronicles.com/' + rurl = escape(url) + stripUrl = url + 'archives/%s' + imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="Previous")) + help = 'Index format: n' + + +class Angband(_BasicScraper): + url = 'http://angband.calamarain.net/' + stripUrl = url + 'view.php?date=%s' + firstStripUrl = stripUrl % '2005-12-30' + imageSearch = compile(tagre("img", "src", r'(comics/Scroll[^"]+)')) + prevSearch = compile(tagre("a", "href", r'(view\.php\?date\=[^"]+)')+"Previous") + help = 'Index format: yyyy-mm-dd' + + class Angels2200(_BasicScraper): url = 'http://www.janahoffmann.com/angels/' stripUrl = url + '%s' @@ -148,6 +249,14 @@ class Angels2200(_BasicScraper): help = 'Index format: yyyy/mm/dd/part--comic-' +class Annyseed(_BasicScraper): + url = 'http://www.colourofivy.com/annyseed_webcomic_latest.htm' + stripUrl = 'http://www.colourofivy.com/annyseed_webcomic%s.htm' + imageSearch = compile(tagre("img", "src", r'(Annyseed[^"]+)')) + prevSearch = compile(r'") - help = 'Index format: yyyy/mm/strip-name' - - class ASkeweredParadise(_BasicScraper): url = 'http://aspcomics.net/' stripUrl = url + 'comic/%s' @@ -239,88 +321,6 @@ class ASkeweredParadise(_BasicScraper): help = 'Index format: nnn' -class AGirlAndHerFed(_BasicScraper): - url = 'http://www.agirlandherfed.com/' - starter = bounceStarter(url, - compile(r'[^>]+Back')) - stripUrl = url + '1.%s.html' - firstStripUrl = stripUrl % '1' - imageSearch = compile(tagre("img", "src", r'(img/strip/[^"]+\.jpg)')) - prevSearch = compile(r'[^>]+Back') - help = 'Index format: nnn' - - -class AetheriaEpics(_BasicScraper): - url = 'http://aetheria-epics.schala.net/' - stripUrl = url + '%s.html' - firstStripUrl = stripUrl % '00001' - imageSearch = compile(tagre("img", "src", r'(\d{5}\.jpg)')) - prevSearch = compile(tagre("a", "href", r'(\d{5}\.html)') + "Previous") - help = 'Index format: nnn' - - -class AirForceBlues(_BasicScraper): - url = 'http://www.afblues.com/' - stripUrl = url + 'wordpress/%s/' - firstStripUrl = stripUrl % '1997/09/07/need-a-clue-do-ya' - imageSearch = compile(tagre("img", "src", r'(http://www\.afblues\.com/wordpress/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'([^"]+)', after='Previous')) - help = 'Index format: yyyy/mm/dd/stripname' - - -class AlienShores(_BasicScraper): - url = 'http://alienshores.com/alienshores_band/' - stripUrl = url + '%s' - imageSearch = compile(tagre("img", "src", r'(http://alienshores\.com/alienshores_band/wp-content/uploads/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(http://alienshores\.com/[^"]+)', after="prev")) - help = 'Index format: yyyy/mm/dd/p/' - - -class AllTheGrowingThings(_BasicScraper): - url = 'http://growingthings.typodmary.com/' - rurl = escape(url) - stripUrl = url + '%s/' - firstStripUrl = stripUrl % '2009/04/21/all-the-growing-things' - imageSearch = compile(tagre("img", "src", r'(%sfiles/comics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) - help = 'Index format: yyyy/mm/dd/strip-name' - - -class Amya(_BasicScraper): - url = 'http://www.amyachronicles.com/' - rurl = escape(url) - stripUrl = url + 'archives/%s' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="Previous")) - help = 'Index format: n' - - -class Angband(_BasicScraper): - url = 'http://angband.calamarain.net/' - stripUrl = url + 'view.php?date=%s' - firstStripUrl = stripUrl % '2005-12-30' - imageSearch = compile(tagre("img", "src", r'(comics/Scroll[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(view\.php\?date\=[^"]+)')+"Previous") - help = 'Index format: yyyy-mm-dd' - - -class AlsoBagels(_BasicScraper): - url = 'http://alsobagels.com/' - rurl = escape(url) - stripUrl = url + 'index.php/comic/%s/' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%sindex\.php/comic/[^"]+)' % rurl, after="Previous")) - help = 'Index format: strip-name' - - -class Annyseed(_BasicScraper): - url = 'http://www.colourofivy.com/annyseed_webcomic_latest.htm' - stripUrl = 'http://www.colourofivy.com/annyseed_webcomic%s.htm' - imageSearch = compile(tagre("img", "src", r'(Annyseed[^"]+)')) - prevSearch = compile(r']+Previous') + help = 'Index format: n (unpadded)' + + @classmethod + def prevUrlModifier(cls, prevUrl): + if prevUrl: + return prevUrl.replace("www.boxerhockey.com", "boxerhockey.fireball20xl.com") + + +class BoyOnAStickAndSlither(_BasicScraper): + url = 'http://www.boasas.com/' + stripUrl = url + 'page/%s' + firstStripUrl = stripUrl % '2' + imageSearch = compile(tagre("img", "src", r'(http://\d+\.media\.tumblr\.com/[^"]+_1280\.png)')) + prevSearch = compile(tagre("a", "href", r'(/page/\d+)') + "Next page") + help = 'Index format: n (unpadded)' + + @classmethod + def namer(cls, imageUrl, pageUrl): + return pageUrl.rsplit('/')[-1] + + class BratHalla(_BasicScraper): url = 'http://brat-halla.com/' stripUrl = url + 'comic/%s/' @@ -244,54 +292,6 @@ class Brink(_BasicScraper): help = 'Index format: number' -class BobWhite(_BasicScraper): - url = 'http://www.bobwhitecomics.com/' - rurl = escape(url) - stripUrl = url + '?webcomic_post=%s' - firstStripUrl = stripUrl % '20110504' - imageSearch = compile(tagre("img", "src", r"(%swp/wp-content/webcomic/untitled/\d+.jpg)" % rurl)) - prevSearch = compile(tagre("a", "href", "(%s\?webcomic_post=\d+)" % rurl)+r'[^"]+Previous') - help = 'Index format: yyyymmdd' - - -class BoredAndEvil(_BasicScraper): - url = 'http://www.boredandevil.com/' - stripUrl = url + '?date=%s' - firstStripUrl = stripUrl % '2004-06-07' - imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)')) - prevSearch = compile(r'First Comic.+]+Previous') - help = 'Index format: n (unpadded)' - - @classmethod - def prevUrlModifier(cls, prevUrl): - if prevUrl: - return prevUrl.replace("www.boxerhockey.com", "boxerhockey.fireball20xl.com") - - -class BoyOnAStickAndSlither(_BasicScraper): - url = 'http://www.boasas.com/' - stripUrl = url + 'page/%s' - firstStripUrl = stripUrl % '2' - imageSearch = compile(tagre("img", "src", r'(http://\d+\.media\.tumblr\.com/[^"]+_1280\.png)')) - prevSearch = compile(tagre("a", "href", r'(/page/\d+)') + "Next page") - help = 'Index format: n (unpadded)' - - @classmethod - def namer(cls, imageUrl, pageUrl): - return pageUrl.rsplit('/')[-1] - - class BrightlyWound(_BasicScraper): baseUrl = 'http://www.brightlywound.com/' url = baseUrl + '?comic=137' diff --git a/dosagelib/plugins/c.py b/dosagelib/plugins/c.py index 97542837c..3179c4957 100644 --- a/dosagelib/plugins/c.py +++ b/dosagelib/plugins/c.py @@ -39,6 +39,15 @@ class CaseyAndAndy(_BasicScraper): help = 'Index format: number' +class CasuallyKayla(_BasicScraper): + url = 'http://casuallykayla.com/' + stripUrl = url + '?p=%s' + firstStripUrl = stripUrl % '89' + imageSearch = compile(tagre("img", "src", r'(http://casuallykayla\.com/comics/[^"]+)')) + prevSearch = compile(tagre("div", "class", r'nav-previous') + tagre("a", "href", r'([^"]+)')) + help = 'Index format: nnn' + + class Catalyst(_BasicScraper): baseUrl = "http://catalyst.spiderforest.com/" rurl = escape(baseUrl) @@ -50,6 +59,14 @@ class Catalyst(_BasicScraper): help = 'Index format: number' +class CatAndGirl(_BasicScraper): + url = 'http://catandgirl.com/' + stripUrl = url + '?p=%s' + imageSearch = compile(tagre("img", "src", r'(http://catandgirl\.com/archive/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'([^"]+)')+r"[^<]+Previous") + help = 'Index format: n (unpadded)' + + class Catena(_BasicScraper): url = 'http://catenamanor.com/' stripUrl = url + '%s' @@ -58,6 +75,16 @@ class Catena(_BasicScraper): help = 'Index format: yyyy/mm/dd/' +class CatsAndCameras(_BasicScraper): + url = 'http://catsncameras.com/cnc/' + rurl = escape(url) + stripUrl = url + '?p=%s' + imageSearch = compile(tagre("img", "src", r'(%scnc/comics/[^"]+)' % rurl)) + prevSearch = compile(tagre("div", "class", r'nav-previous') + + tagre("a", "href", r'(%scnc/[^"]+)' % rurl)) + help = 'Index format: nnn' + + class ChainsawSuit(_BasicScraper): url = 'http://chainsawsuit.com/' rurl = escape(url) @@ -67,6 +94,17 @@ class ChainsawSuit(_BasicScraper): help = 'Index format: yyyy/mm/dd/stripname' +class Champ2010(_BasicScraper): + baseurl = 'http://jedcollins.com/champ2010/' + rurl = escape(baseurl) + # the latest URL is hard coded since the comic is discontinued + url = baseurl + 'champ-12-30-10.html' + stripUrl = baseurl + '%s.html' + imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="Previous")) + help = 'Index format: yy-dd-mm' + + class ChannelAte(_BasicScraper): url = 'http://www.channelate.com/' rurl = escape(url) @@ -92,6 +130,14 @@ class CheckerboardNightmare(_BasicScraper): help='Index format: yyyymmdd' +class Chester5000XYV(_BasicScraper): + url = 'http://jessfink.com/Chester5000XYV/' + stripUrl = url + '?p=%s' + imageSearch = compile(tagre("img", "src", r'(http://jessfink\.com/Chester5000XYV/comics/[^"]+)')) + prevSearch = compile(r'') + help = 'Index format: nnn' + + class Chisuji(_BasicScraper): url = 'http://www.chisuji.com/' stripUrl = url + '%s' @@ -100,6 +146,18 @@ class Chisuji(_BasicScraper): help = 'Index format: yyyy/mm/dd/strip-name' +class Chucklebrain(_BasicScraper): + url = 'http://www.chucklebrain.com/main.php' + starter = indirectStarter(url, + compile(tagre("a", "href", r'(/main\.php\?img\=\d+)', quote="'") + + tagre("img", "src", r'/images/last\.jpg', quote="'"))) + stripUrl = url + '?img=%s' + imageSearch = compile(tagre("img", "src", r'(/images/strip[^"]+)')) + prevSearch = compile(tagre("a", "href", r'(/main\.php\?img\=\d+)', quote="'") + + tagre("img", "src", r'/images/previous\.jpg', quote="'")) + help = 'Index format: nnn' + + class ChugworthAcademy(_BasicScraper): url = 'http://chugworth.com/' stripUrl = url + '?p=%s' @@ -124,6 +182,25 @@ class CigarroAndCerveja(_BasicScraper): help = 'Index format: non' +class Collar6(_BasicScraper): + url = 'http://collar6.com/' + rurl = escape(url) + stripUrl = url + 'archive/%s' + imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/collar6/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl, after="previous")) + help = 'Index format: ' + + +class CompanyY(_BasicScraper): + url = 'http://company-y.com/' + rurl = escape(url) + stripUrl = url + '%s/' + imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) + prevSearch = compile(tagre("div", "class", r"nav-previous") + + tagre("a", "href", r'(%s[^"]+)' % rurl)) + help = 'Index format: yyyy/mm/dd/strip-name' + + class Comedity(_BasicScraper): url = 'http://www.comedity.com/' stripUrl = url + 'index.php?strip_id=%s' @@ -159,6 +236,20 @@ class CoolCatStudio(_BasicScraper): help = 'Index format: yyyymmdd' +class CorydonCafe(_BasicScraper): + url = 'http://corydoncafe.com/' + starter = indirectStarter(url, + compile(tagre("a", "href", r'(\./\d+/[^"]+)'))) + stripUrl = url + '%s.php' + imageSearch = compile(tagre("img", "src", r"(\./[^']+)", quote="'")) + prevSearch = compile(tagre("a", "href", r"(http://corydoncafe\.com/\d+/[^']+)", after="prev", quote="'")) + help = 'Index format: yyyy/stripname' + + @classmethod + def namer(cls, imageUrl, pageUrl): + return pageUrl.split('/')[-1].split('.')[0] + + class CourtingDisaster(_BasicScraper): url = 'http://www.courting-disaster.com/' stripUrl = url + 'archive/%s.html' @@ -167,6 +258,15 @@ class CourtingDisaster(_BasicScraper): help = 'Index format: yyyymmdd' +class CowboyJedi(_BasicScraper): + url = 'http://www.cowboyjedi.com/' + rurl = escape(url) + stripUrl = url + '%s' + imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="navi-prev")) + help = 'Index format: yyyy/mm/dd/strip-name' + + class CrapIDrewOnMyLunchBreak(_BasicScraper): url = 'http://crap.jinwicked.com/' stripUrl = url + '%s' @@ -189,51 +289,6 @@ class CtrlAltDelSillies(CtrlAltDel): stripUrl = url + '%s' -class Curvy(_BasicScraper): - url = 'http://www.c.urvy.org/' - stripUrl = url + '?date=%s' - imageSearch = compile(tagre("img", "src", r'(/c/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(/\?date=\d+)') + - tagre("img", "src", "/nav/prev\.png")) - help = 'Index format: yyyymmdd' - starter = bounceStarter(url, - compile(tagre("a", "href", r'(/\?date=\d+)') + - tagre("img", "src", "/nav/next\.png"))) - - def shouldSkipUrl(self, url): - """Skip pages without images.""" - return url in ( - self.stripUrl % '20130402', - ) - - -class CatAndGirl(_BasicScraper): - url = 'http://catandgirl.com/' - stripUrl = url + '?p=%s' - imageSearch = compile(tagre("img", "src", r'(http://catandgirl\.com/archive/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'([^"]+)')+r"[^<]+Previous") - help = 'Index format: n (unpadded)' - - -class CyanideAndHappiness(_BasicScraper): - url = 'http://www.explosm.net/comics/' - starter = bounceStarter(url, compile(tagre("a", "href", r"(/comics/\d+/)", before="next"))) - stripUrl = url + '%s/' - imageSearch = compile(tagre("img", "src", r'(http://(?:www\.)?explosm\.net/db/files/[^"]+)', before="a daily webcomic")) - prevSearch = compile(tagre("a", "href", r'(/comics/\d+/)', before="prev")) - help = 'Index format: n (unpadded)' - - def shouldSkipUrl(self, url): - """Skip pages without images.""" - return url in (self.stripUrl % "3082",) - - @classmethod - def namer(cls, imageUrl, pageUrl): - imgname = imageUrl.split('/')[-1] - imgnum = pageUrl.split('/')[-2] - return '%s_%s' % (imgnum, imgname) - - class CrimsonDark(_BasicScraper): url = 'http://www.davidcsimon.com/crimsondark/' stripUrl = url + 'index.php?view=comic&strip_id=%s' @@ -242,98 +297,6 @@ class CrimsonDark(_BasicScraper): help = 'Index format: n (unpadded)' -class CatsAndCameras(_BasicScraper): - url = 'http://catsncameras.com/cnc/' - rurl = escape(url) - stripUrl = url + '?p=%s' - imageSearch = compile(tagre("img", "src", r'(%scnc/comics/[^"]+)' % rurl)) - prevSearch = compile(tagre("div", "class", r'nav-previous') + - tagre("a", "href", r'(%scnc/[^"]+)' % rurl)) - help = 'Index format: nnn' - - -class CowboyJedi(_BasicScraper): - url = 'http://www.cowboyjedi.com/' - rurl = escape(url) - stripUrl = url + '%s' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="navi-prev")) - help = 'Index format: yyyy/mm/dd/strip-name' - - -class CasuallyKayla(_BasicScraper): - url = 'http://casuallykayla.com/' - stripUrl = url + '?p=%s' - firstStripUrl = stripUrl % '89' - imageSearch = compile(tagre("img", "src", r'(http://casuallykayla\.com/comics/[^"]+)')) - prevSearch = compile(tagre("div", "class", r'nav-previous') + tagre("a", "href", r'([^"]+)')) - help = 'Index format: nnn' - - -class Collar6(_BasicScraper): - url = 'http://collar6.com/' - rurl = escape(url) - stripUrl = url + 'archive/%s' - imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/collar6/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl, after="previous")) - help = 'Index format: ' - - -class Chester5000XYV(_BasicScraper): - url = 'http://jessfink.com/Chester5000XYV/' - stripUrl = url + '?p=%s' - imageSearch = compile(tagre("img", "src", r'(http://jessfink\.com/Chester5000XYV/comics/[^"]+)')) - prevSearch = compile(r'') - help = 'Index format: nnn' - - -class Champ2010(_BasicScraper): - baseurl = 'http://jedcollins.com/champ2010/' - rurl = escape(baseurl) - # the latest URL is hard coded since the comic is discontinued - url = baseurl + 'champ-12-30-10.html' - stripUrl = baseurl + '%s.html' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="Previous")) - help = 'Index format: yy-dd-mm' - - -class Chucklebrain(_BasicScraper): - url = 'http://www.chucklebrain.com/main.php' - starter = indirectStarter(url, - compile(tagre("a", "href", r'(/main\.php\?img\=\d+)', quote="'") + - tagre("img", "src", r'/images/last\.jpg', quote="'"))) - stripUrl = url + '?img=%s' - imageSearch = compile(tagre("img", "src", r'(/images/strip[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(/main\.php\?img\=\d+)', quote="'") + - tagre("img", "src", r'/images/previous\.jpg', quote="'")) - help = 'Index format: nnn' - - -class CompanyY(_BasicScraper): - url = 'http://company-y.com/' - rurl = escape(url) - stripUrl = url + '%s/' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("div", "class", r"nav-previous") + - tagre("a", "href", r'(%s[^"]+)' % rurl)) - help = 'Index format: yyyy/mm/dd/strip-name' - - -class CorydonCafe(_BasicScraper): - url = 'http://corydoncafe.com/' - starter = indirectStarter(url, - compile(tagre("a", "href", r'(\./\d+/[^"]+)'))) - stripUrl = url + '%s.php' - imageSearch = compile(tagre("img", "src", r"(\./[^']+)", quote="'")) - prevSearch = compile(tagre("a", "href", r"(http://corydoncafe\.com/\d+/[^']+)", after="prev", quote="'")) - help = 'Index format: yyyy/stripname' - - @classmethod - def namer(cls, imageUrl, pageUrl): - return pageUrl.split('/')[-1].split('.')[0] - - class CraftedFables(_BasicScraper): url = 'http://www.craftedfables.com/' stripUrl = 'http://www.caf-fiends.net/craftedfables/?p=%s' @@ -352,3 +315,40 @@ class CucumberQuest(_BasicScraper): imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/cq/[^"]+)' % rurl)) prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+/)' % rurl, after="previous")) help = 'Index format: stripname' + + +class Curvy(_BasicScraper): + url = 'http://www.c.urvy.org/' + stripUrl = url + '?date=%s' + imageSearch = compile(tagre("img", "src", r'(/c/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'(/\?date=\d+)') + + tagre("img", "src", "/nav/prev\.png")) + help = 'Index format: yyyymmdd' + starter = bounceStarter(url, + compile(tagre("a", "href", r'(/\?date=\d+)') + + tagre("img", "src", "/nav/next\.png"))) + + def shouldSkipUrl(self, url): + """Skip pages without images.""" + return url in ( + self.stripUrl % '20130402', + ) + + +class CyanideAndHappiness(_BasicScraper): + url = 'http://www.explosm.net/comics/' + starter = bounceStarter(url, compile(tagre("a", "href", r"(/comics/\d+/)", before="next"))) + stripUrl = url + '%s/' + imageSearch = compile(tagre("img", "src", r'(http://(?:www\.)?explosm\.net/db/files/[^"]+)', before="a daily webcomic")) + prevSearch = compile(tagre("a", "href", r'(/comics/\d+/)', before="prev")) + help = 'Index format: n (unpadded)' + + def shouldSkipUrl(self, url): + """Skip pages without images.""" + return url in (self.stripUrl % "3082",) + + @classmethod + def namer(cls, imageUrl, pageUrl): + imgname = imageUrl.split('/')[-1] + imgnum = pageUrl.split('/')[-2] + return '%s_%s' % (imgnum, imgname) diff --git a/dosagelib/plugins/p.py b/dosagelib/plugins/p.py index 79688a101..c4b08864b 100644 --- a/dosagelib/plugins/p.py +++ b/dosagelib/plugins/p.py @@ -186,14 +186,6 @@ class Precocious(_BasicScraper): help = 'Index format: yyyy/mm/dd' -class PvPonline(_BasicScraper): - url = 'http://pvponline.com/comic' - stripUrl = url + '%s' - imageSearch = compile(tagre("img", "src", r'(http://s3[^"]+\.amazonaws\.com/pvponlinenew/img/comic/\d+/\d+/pvp[^"]+\.jpg)')) - prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)', after="Previous")) - help = 'Index format: yyyy/mm/dd/stripname' - - class ProperBarn(_BasicScraper): url = 'http://www.nitrocosm.com/go/gag/' stripUrl = url + '%s/' @@ -217,3 +209,11 @@ class PunksAndNerdsOld(_BasicScraper): imageSearch = compile(r' src="(/comics/.+?)"') prevSearch = compile(r'>]+?>]+?src="/previouscomic.gif">') help = 'Index format: yyyymmdd' + + +class PvPonline(_BasicScraper): + url = 'http://pvponline.com/comic' + stripUrl = url + '%s' + imageSearch = compile(tagre("img", "src", r'(http://s3[^"]+\.amazonaws\.com/pvponlinenew/img/comic/\d+/\d+/pvp[^"]+\.jpg)')) + prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)', after="Previous")) + help = 'Index format: yyyy/mm/dd/stripname' diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py index a767e6d3f..8770a5ac6 100644 --- a/dosagelib/plugins/s.py +++ b/dosagelib/plugins/s.py @@ -313,6 +313,16 @@ class SpaceTrawler(_BasicScraper): help = 'Index format: yyyy/mm/dd/stripname' +class Spamusement(_BasicScraper): + url = 'http://spamusement.com/' + rurl = escape(url) + stripUrl = url + 'index.php/comics/view/%s' + imageSearch = compile(r'' % rurl, IGNORECASE) + help = 'Index format: n (unpadded)' + starter = indirectStarter(url, prevSearch) + + class SpareParts(_BasicScraper): baseUrl = 'http://www.sparepartscomics.com/' url = baseUrl + 'comics/?date=20080328' @@ -449,16 +459,6 @@ class StarCrossdDestiny(_BasicScraper): return directory + '-' + filename -class Spamusement(_BasicScraper): - url = 'http://spamusement.com/' - rurl = escape(url) - stripUrl = url + 'index.php/comics/view/%s' - imageSearch = compile(r'' % rurl, IGNORECASE) - help = 'Index format: n (unpadded)' - starter = indirectStarter(url, prevSearch) - - # XXX disallowed by robots.txt class _StrangeCandy(_BasicScraper): url = 'http://www.strangecandy.net/' diff --git a/dosagelib/plugins/w.py b/dosagelib/plugins/w.py index 47b622589..8282bc70f 100644 --- a/dosagelib/plugins/w.py +++ b/dosagelib/plugins/w.py @@ -80,6 +80,15 @@ class WhiteNinja(_BasicScraper): help = 'Index format: s (comic name)' +class WhiteNoise(_BasicScraper): + baseurl = 'http://www.wncomic.com/' + url = baseurl + 'archive.php' + stripUrl = baseurl + 'archive_comments.php?strip_id=%s' + imageSearch = compile(r'(istrip_files/strips/.+?)"') + prevSearch = compile(r'') + help = 'Index format: nnn' + + class WorldOfMrToast(_BasicScraper): baseurl = 'http://www.theimaginaryworld.com/' url = baseurl + 'mrTcomicA.html' @@ -201,20 +218,3 @@ class Wulffmorgenthaler(_BasicScraper): imageSearch = compile(tagre("img", "src", r'(/img/strip/[^/"]+)')) prevSearch = compile(tagre("a", "href", r'([^"]+)') + "Previous") help = 'Index format: yyyy/mm/dd' - - -class WhiteNoise(_BasicScraper): - baseurl = 'http://www.wncomic.com/' - url = baseurl + 'archive.php' - stripUrl = baseurl + 'archive_comments.php?strip_id=%s' - imageSearch = compile(r'(istrip_files/strips/.+?)"') - prevSearch = compile(r'') - help = 'Index format: nnn'