diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index 3d185797b..bfc6f6e71 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -1,31 +1,23 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher +from __future__ import absolute_import, division, print_function from re import compile, escape, MULTILINE from ..util import tagre from ..scraper import _BasicScraper, _ParserScraper from ..helpers import regexNamer, bounceStarter, indirectStarter -class AbsurdNotions(_BasicScraper): - baseUrl = 'http://www.absurdnotions.org/' - url = baseUrl + 'page129.html' - stripUrl = baseUrl + 'page%s.html' - firstStripUrl = stripUrl % '1' - imageSearch = compile(tagre('img', 'src', r'(an[^"]+)')) - multipleImagesPerStrip = True - prevSearch = compile(tagre('a', 'href', r'([^"]+)') + tagre('img', 'src', 'nprev\.gif')) - help = 'Index format: n (unpadded)' - - class AbstruseGoose(_BasicScraper): url = 'http://abstrusegoose.com/' rurl = escape(url) starter = bounceStarter(url, compile(tagre('a', 'href', r'(%s\d+)' % rurl)+"Next »")) stripUrl = url + '%s' firstStripUrl = stripUrl % '1' - imageSearch = compile(tagre('img', 'src', r'(http://abstrusegoose\.com/strips/[^<>"]+)')) + imageSearch = compile(tagre('img', 'src', + r'(http://abstrusegoose\.com/strips/[^<>"]+)')) prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'« Previous') nextSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'Next »') help = 'Index format: n (unpadded)' @@ -38,12 +30,25 @@ class AbstruseGoose(_BasicScraper): return 'c%03d-%s' % (index, name) +class AbsurdNotions(_BasicScraper): + baseUrl = 'http://www.absurdnotions.org/' + url = baseUrl + 'page129.html' + stripUrl = baseUrl + 'page%s.html' + firstStripUrl = stripUrl % '1' + imageSearch = compile(tagre('img', 'src', r'(an[^"]+)')) + multipleImagesPerStrip = True + prevSearch = compile(tagre('a', 'href', r'([^"]+)') + + tagre('img', 'src', 'nprev\.gif')) + help = 'Index format: n (unpadded)' + + class AcademyVale(_BasicScraper): url = 'http://www.imagerie.com/vale/' stripUrl = url + 'avarch.cgi?%s' firstStripUrl = stripUrl % '001' imageSearch = compile(tagre('img', 'src', r'(avale\d{4}-\d{2}\.gif)')) - prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") + tagre('img', 'src', 'AVNavBack\.gif')) + prevSearch = compile(tagre('a', 'href', r'(avarch[^">]+)', quote="") + + tagre('img', 'src', 'AVNavBack\.gif')) help = 'Index format: nnn' @@ -52,7 +57,8 @@ class Achewood(_BasicScraper): stripUrl = url + 'index.php?date=%s' firstStripUrl = stripUrl % '00000000' imageSearch = compile(tagre("img", "src", r'(/comic\.php\?date=\d+)')) - prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', after="Previous")) + prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', + after="Previous")) help = 'Index format: mmddyyyy' namer = regexNamer(compile(r'date=(\d+)')) @@ -70,8 +76,7 @@ class AfterStrife(_BasicScraper): class AGirlAndHerFed(_BasicScraper): url = 'http://www.agirlandherfed.com/' - starter = bounceStarter(url, - compile(r'[^>]+Back')) + starter = bounceStarter(url, compile(r'[^>]+Back')) stripUrl = url + '1.%s.html' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", r'(img/strip/[^"]+\.jpg)')) @@ -79,24 +84,16 @@ class AGirlAndHerFed(_BasicScraper): help = 'Index format: nnn' -class AhoyEarth(_ParserScraper): - url = 'http://www.ahoyearth.com/' - rurl = escape(url) - stripUrl = url + '%s/' - css = True - imageSearch = '#comic-1 img' - prevSearch = '.navi-prev' - help = 'Index format: ddmmyyyy' - - class AhoiPolloi(_BasicScraper): url = 'http://ahoipolloi.blogger.de/' stripUrl = url + '?day=%s' firstStripUrl = stripUrl % '20060306' multipleImagesPerStrip = True lang = 'de' - imageSearch = compile(tagre('img', 'src', r'(/static/antville/ahoipolloi/images/[^"]+)')) - prevSearch = compile(tagre('a', 'href', r'(http://ahoipolloi\.blogger\.de/\?day=\d+)')) + imageSearch = compile(tagre('img', 'src', + r'(/static/antville/ahoipolloi/images/[^"]+)')) + prevSearch = compile(tagre('a', 'href', + r'(http://ahoipolloi\.blogger\.de/\?day=\d+)')) help = 'Index format: yyyymmdd' @classmethod @@ -104,6 +101,15 @@ class AhoiPolloi(_BasicScraper): return imageUrl.rsplit('/', 1)[1] +class AhoyEarth(_ParserScraper): + url = 'http://www.ahoyearth.com/' + stripUrl = url + '%s/' + css = True + imageSearch = '#comic-1 img' + prevSearch = '.navi-prev' + help = 'Index format: ddmmyyyy' + + class AirForceBlues(_BasicScraper): url = 'http://www.afblues.com/' stripUrl = url + 'wordpress/%s/' @@ -115,7 +121,8 @@ class AirForceBlues(_BasicScraper): class ALessonIsLearned(_BasicScraper): url = 'http://www.alessonislearned.com/' - prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", quote="'")+r"[^>]+previous") + prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", + quote="'")+r"[^>]+previous") starter = indirectStarter(url, prevSearch) stripUrl = url + 'index.php?comic=%s' firstStripUrl = stripUrl % '1' @@ -127,7 +134,8 @@ class AlienLovesPredator(_BasicScraper): url = 'http://alienlovespredator.com/' stripUrl = url + '%s/' firstStripUrl = stripUrl % '2004/10/12/unavoidable-delay' - imageSearch = compile(tagre("img", "src", r'([^"]+)', after='border="1" alt="" width="750"')) + imageSearch = compile(tagre("img", "src", r'([^"]+)', + after='border="1" alt="" width="750"')) prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev")) help = 'Index format: yyyy/mm/dd/name' @@ -244,10 +252,13 @@ class Antics(_BasicScraper): rurl = escape(url) stripUrl = url + '?p=%s' firstStripUrl = stripUrl % '3' - imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after='prev')) + imageSearch = compile(tagre("img", "src", + r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, + after='prev')) help = 'Index format: number' + class AoiHouse(_ParserScraper): url = 'http://www.aoihouse.net/' imageSearch = '//div[@id="comic"]/a[2]/img' @@ -270,10 +281,19 @@ class ARedTailsDream(_BasicScraper): url = baseUrl + 'comic/recent.php' imageSearch = compile(tagre('img', 'src', r'(chapter.+?/eng[^"]*)')) prevSearch = compile(tagre('a', 'href', r'(page\d+\.php)') + - tagre("img", "src", r'.*?aprev.*?')) + tagre("img", "src", r'.*?aprev.*?')) help = 'Index format: nn' +class ASkeweredParadise(_BasicScraper): + url = 'http://aspcomics.net/' + stripUrl = url + 'comic/%s' + firstStripUrl = stripUrl % '001' + imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+')) + prevSearch = compile(tagre("a", "href", "(/comic/\d+)")+r"[^>]+Previous") + help = 'Index format: nnn' + + class ASofterWorld(_ParserScraper): url = 'http://www.asofterworld.com/' stripUrl = url + 'index.php?id=%s' @@ -283,7 +303,6 @@ class ASofterWorld(_ParserScraper): help = 'Index format: n (unpadded)' - class AstronomyPOTD(_BasicScraper): baseUrl = 'http://antwrp.gsfc.nasa.gov/apod/' url = baseUrl + 'astropix.html' @@ -299,22 +318,13 @@ class AstronomyPOTD(_BasicScraper): def shouldSkipUrl(self, url, data): """Skip pages without images.""" return url in ( - self.stripUrl % '130217', # video - self.stripUrl % '130218', # video - self.stripUrl % '130226', # video - self.stripUrl % '130424', # video + self.stripUrl % '130217', # video + self.stripUrl % '130218', # video + self.stripUrl % '130226', # video + self.stripUrl % '130424', # video ) @classmethod def namer(cls, imageUrl, pageUrl): return '%s-%s' % (pageUrl.split('/')[-1].split('.')[0][2:], imageUrl.split('/')[-1].split('.')[0]) - - -class ASkeweredParadise(_BasicScraper): - url = 'http://aspcomics.net/' - stripUrl = url + 'comic/%s' - firstStripUrl = stripUrl % '001' - imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+')) - prevSearch = compile(tagre("a", "href", "(/comic/\d+)")+r"[^>]+Previous") - help = 'Index format: nnn' diff --git a/dosagelib/plugins/b.py b/dosagelib/plugins/b.py index 2498acbe4..dee2532db 100644 --- a/dosagelib/plugins/b.py +++ b/dosagelib/plugins/b.py @@ -1,7 +1,9 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher +from __future__ import absolute_import, division, print_function from re import compile, escape from ..util import tagre, getPageContent @@ -22,7 +24,8 @@ class BadassMuthas(_BasicScraper): stripUrl = url + '?%s' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", r'(/images/comicsissue[^"]+)')) - prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'/images/comicsbuttonBack\.gif')) + prevSearch = compile(tagre("a", "href", r'([^"]+)') + + tagre("img", "src", r'/images/comicsbuttonBack\.gif')) help = 'Index format: nnn' @@ -51,7 +54,8 @@ class Bearmageddon(_BasicScraper): stripUrl = url + '%s/' firstStripUrl = stripUrl % '2011/08/01/page-1' imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after='navi-prev')) + prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, + after='navi-prev')) help = 'Index format: yyyy/mm/dd/stripname' @@ -63,7 +67,9 @@ class Beetlebum(_BasicScraper): starter = indirectStarter(url, compile(tagre('a', 'href', r'(%s\d{4}/\d{2}/\d{2}/[^"]+)' % rurl, after='bookmark'))) multipleImagesPerStrip = True imageSearch = compile(tagre('img', 'src', r'(http://blog\.beetlebum\.de/wp-content/uploads/[^"]+)')) - prevSearch = compile(tagre('a', 'href', r'(%s\d{4}/\d{2}/\d{2}/[^"]*)' % rurl, after='prev')) + prevSearch = compile(tagre('a', 'href', + r'(%s\d{4}/\d{2}/\d{2}/[^"]*)' % rurl, + after='prev')) help = 'Index format: yyyy/mm/dd/striptitle' lang = 'de' @@ -71,7 +77,7 @@ class Beetlebum(_BasicScraper): def namer(cls, imageUrl, pageUrl): indexes = tuple(pageUrl.rstrip('/').split('/')[-4:]) name = '%s-%s-%s-%s' % indexes - name = name + '_' + imageUrl.split( '/' )[-1] + name = name + '_' + imageUrl.split('/')[-1] return name @@ -89,14 +95,16 @@ class BetweenFailures(_BasicScraper): rurl = escape(url) stripUrl = url + 'comics1/%s' imageSearch = compile(tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/\d+-\d+-\d+[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%scomics1/[^"]+)' % rurl, after="previous")) + prevSearch = compile(tagre("a", "href", r'(%scomics1/[^"]+)' % rurl, + after="previous")) help = 'Index format: stripname' class BigFatWhale(_BasicScraper): url = 'http://www.bigfatwhale.com/' stripUrl = url + 'archives/bfw_%s.htm' - imageSearch = compile(tagre("img", "src", r'(archives/bfw_[^"]+|bfw_[^"]+)')) + imageSearch = compile(tagre("img", "src", + r'(archives/bfw_[^"]+|bfw_[^"]+)')) prevSearch = compile(r' HREF="(.+?)" TARGET="_top" TITLE="Previous Cartoon"') help = 'Index format: nnn' @@ -125,7 +133,8 @@ class BizarreUprising(_BasicScraper): stripUrl = url + 'view/%s' firstStripUrl = stripUrl % '1/awakening-splash' imageSearch = compile(tagre("img", "src", r'(comic/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(view/\d+/[^"]+)') + tagre("img", "src", r'images/b_prev\.gif')) + prevSearch = compile(tagre("a", "href", r'(view/\d+/[^"]+)') + + tagre("img", "src", r'images/b_prev\.gif')) help = 'Index format: n/name' @@ -133,7 +142,8 @@ class BlankIt(_BasicScraper): url = 'http://blankitcomics.com/' stripUrl = url + '%s/' firstStripUrl = stripUrl % '0001' - imageSearch = compile(tagre("img", "src", r'(http://blankitcomics\.com/bicomics/[^"]+)')) + imageSearch = compile(tagre("img", "src", + r'(http://blankitcomics\.com/bicomics/[^"]+)')) prevSearch = compile(tagre("a", "href", r'([^"]+)', after='rel="prev"')) help = 'Index format: stripname' @@ -151,26 +161,28 @@ class Blip(_BasicScraper): if prevUrl: return prevUrl.replace("www.blipcomic.com", "blipcomic.com") + class BloomingFaeries(_BasicScraper): adult = True url = 'http://www.bloomingfaeries.com/' - rurl = escape(url) stripUrl = url + 'comic/public/%s/' firstStripUrl = stripUrl % "pit-stop" imageSearch = compile(tagre("img", "src", r'(http://www.bloomingfaeries.com/wp-content/uploads[^"]+)', after='title')) - prevSearch = compile(tagre("a", "href", r'([^"]+)', after='comic-nav-base comic-nav-previous')) + prevSearch = compile(tagre("a", "href", r'([^"]+)', + after='comic-nav-base comic-nav-previous')) help = 'Index format: stripname' - + @classmethod def namer(cls, imageUrl, pageUrl): bf = imageUrl.split('/') name = bf[-1] - re = compile(tagre("div","class",r'comic-id-([^"]+)')) + re = compile(tagre("div", "class", r'comic-id-([^"]+)')) content = getPageContent(pageUrl, cls.session) match = re.search(content) if not match: return None - return "BF%s_%s" % (match.group(1),name) + return "BF%s_%s" % (match.group(1), name) + class BMovieComic(_BasicScraper): url = 'http://www.bmoviecomic.com/' @@ -187,7 +199,9 @@ class BobWhite(_BasicScraper): stripUrl = url + '?webcomic_post=%s' firstStripUrl = stripUrl % '20110504' imageSearch = compile(tagre("img", "src", r"(%swp/wp-content/webcomic/untitled/\d+.jpg)" % rurl)) - prevSearch = compile(tagre("a", "href", "(%s\?webcomic_post=\d+)" % rurl)+r'[^"]+Previous') + prevSearch = compile(tagre("a", "href", + "(%s\?webcomic_post=\d+)" % rurl) + + r'[^"]+Previous') help = 'Index format: yyyymmdd' @@ -214,9 +228,11 @@ class BoxerHockey(_BasicScraper): url = 'http://boxerhockey.fireball20xl.com/' stripUrl = url + '?id=%s' firstStripUrl = stripUrl % '56' - imageSearch = compile(tagre("img", "src", r'(img/comic/[^"]+)', after="comicimg")) - prevSearch = compile(tagre("a", "href", r'(http://www\.boxerhockey\.com/\?id=\d+)') + - r'[^>]+Previous') + imageSearch = compile(tagre("img", "src", r'(img/comic/[^"]+)', + after="comicimg")) + prevSearch = compile(tagre("a", "href", + r'(http://www\.boxerhockey\.com/\?id=\d+)') + + r'[^>]+Previous') help = 'Index format: n (unpadded)' @classmethod @@ -230,7 +246,8 @@ class BoyOnAStickAndSlither(_BasicScraper): stripUrl = url + 'page/%s' firstStripUrl = stripUrl % '2' imageSearch = compile(tagre("img", "src", r'(http://\d+\.media\.tumblr\.com/[^"]+_1280\.png)')) - prevSearch = compile(tagre("a", "href", r'(/page/\d+)') + "Next page") + prevSearch = compile(tagre("a", "href", r'(/page/\d+)') + + "Next page") help = 'Index format: n (unpadded)' @classmethod @@ -276,16 +293,6 @@ class BrentalFlossGuest(BrentalFloss): firstStripUrl = stripUrl % '1' -class Brink(_BasicScraper): - url = 'http://paperfangs.com/brink/' - rurl = escape(url) - stripUrl = url + '?p=%s' - firstStripUrl = stripUrl % '5' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) - help = 'Index format: number' - - class BrightlyWound(_BasicScraper): baseUrl = 'http://www.brightlywound.com/' url = baseUrl + '?comic=137' @@ -296,6 +303,16 @@ class BrightlyWound(_BasicScraper): help = 'Index format: nnn' +class Brink(_BasicScraper): + url = 'http://paperfangs.com/brink/' + rurl = escape(url) + stripUrl = url + '?p=%s' + firstStripUrl = stripUrl % '5' + imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) + help = 'Index format: number' + + class ButtercupFestival(_ParserScraper): url = 'http://www.buttercupfestival.com/' stripUrl = url + '%s.htm' @@ -305,16 +322,6 @@ class ButtercupFestival(_ParserScraper): help = 'Index format: 2-number' -class ButterSafe(_BasicScraper): - url = 'http://buttersafe.com/' - rurl = escape(url) - stripUrl = url + '%s/' - firstStripUrl = stripUrl % '2007/04/03/breakfast-sad-turtle' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s\d+\d+/\d+/\d+/[^"]+)' % rurl, after="prev")) - help = 'Index format: yyyy/mm/dd/stripname' - - class ButternutSquash(_BasicScraper): url = 'http://www.butternutsquash.net/' rurl = escape(url) @@ -323,3 +330,14 @@ class ButternutSquash(_BasicScraper): imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) help = 'Index format: yyyy/mm/dd/strip-name-author-name' + + +class ButterSafe(_BasicScraper): + url = 'http://buttersafe.com/' + rurl = escape(url) + stripUrl = url + '%s/' + firstStripUrl = stripUrl % '2007/04/03/breakfast-sad-turtle' + imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%s\d+\d+/\d+/\d+/[^"]+)' % rurl, + after="prev")) + help = 'Index format: yyyy/mm/dd/stripname' diff --git a/dosagelib/plugins/c.py b/dosagelib/plugins/c.py index fa09e2a3f..ac7cb3e59 100755 --- a/dosagelib/plugins/c.py +++ b/dosagelib/plugins/c.py @@ -1,9 +1,9 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015 Tobias Gruetzmacher +# Copyright (C) 2015-2016 Tobias Gruetzmacher -from __future__ import absolute_import +from __future__ import absolute_import, division, print_function from re import compile, escape @@ -12,6 +12,7 @@ from ..helpers import bounceStarter, indirectStarter from ..util import tagre from .wordpress import _WordpressScraper + class Caggage(_BasicScraper): url = 'http://caggagecomic.com/' rurl = escape(url) @@ -21,6 +22,7 @@ class Caggage(_BasicScraper): prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="prev")) help = 'Index format: number' + class CampComic(_BasicScraper): url = 'http://campcomic.com/comic/' rurl = escape(url) @@ -30,24 +32,28 @@ class CampComic(_BasicScraper): prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btn btnPrev")) help = 'Index Format: number' + class CaptainSNES(_BasicScraper): url = 'http://www.captainsnes.com/' rurl = escape(url) stripUrl = url + '%s/' firstStripUrl = stripUrl % '2001/07/10/the-mistake' - imageSearch = compile(tagre("img", "src", r"(%scomics/[^']+)" % rurl, quote="'")) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + tagre("span", "class", "prev")) + imageSearch = compile(tagre("img", "src", r"(%scomics/[^']+)" % rurl, + quote="'")) + prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl) + + tagre("span", "class", "prev")) multipleImagesPerStrip = True help = 'Index format: yyyy/mm/dd/nnn-stripname' class Carciphona(_BasicScraper): url = 'http://carciphona.com/' - stripUrl = url + 'view.php?page=%s&chapter=%s' - imageSearch = compile(tagre("div", "style", r'background-image:url\((_pages[^)]*)\)')) - prevSearch = compile(tagre("a", "href", r'(view\.php\?[^"]*)', after="prevarea")) - latestSearch = compile(tagre("a", "href", r'(view\.php\?page=[0-9]+[^"]*)')) - help = 'Index format: None' + imageSearch = compile(tagre("div", "style", + r'background-image:url\((_pages[^)]*)\)')) + prevSearch = compile(tagre("a", "href", r'(view\.php\?[^"]*)', + after="prevarea")) + latestSearch = compile(tagre("a", "href", + r'(view\.php\?page=[0-9]+[^"]*)')) starter = indirectStarter(url, latestSearch) @classmethod @@ -61,8 +67,8 @@ class CaseyAndAndy(_BasicScraper): stripUrl = url + 'view.php?strip=%s' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", r'(Strip\d+\.gif)')) - prevSearch = compile(tagre("a", "href", r'(view\.php\?strip=\d+)') - + tagre("img", "src", r'previous\.gif')) + prevSearch = compile(tagre("a", "href", r'(view\.php\?strip=\d+)') + + tagre("img", "src", r'previous\.gif')) help = 'Index format: number' @@ -70,8 +76,10 @@ class CasuallyKayla(_BasicScraper): url = 'http://casuallykayla.com/' stripUrl = url + '?p=%s' firstStripUrl = stripUrl % '89' - imageSearch = compile(tagre("img", "src", r'(http://casuallykayla\.com/comics/[^"]+)')) - prevSearch = compile(tagre("div", "class", r'nav-previous') + tagre("a", "href", r'([^"]+)')) + imageSearch = compile(tagre("img", "src", + r'(http://casuallykayla\.com/comics/[^"]+)')) + prevSearch = compile(tagre("div", "class", r'nav-previous') + + tagre("a", "href", r'([^"]+)')) help = 'Index format: nnn' @@ -82,7 +90,9 @@ class Catalyst(_BasicScraper): stripUrl = baseUrl + "comic.php?comic_id=%s" firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", r'((?:%s)?comics/[^"]+)' % rurl)) - prevSearch = compile("
" + tagre("a", "href", r'(%scomic\.php\?comic_id=\d+)' % rurl)) + prevSearch = compile("
" + + tagre("a", "href", + r'(%scomic\.php\?comic_id=\d+)' % rurl)) help = 'Index format: number' @@ -101,10 +111,12 @@ class CatAndGirl(_BasicScraper): self.stripUrl % '4299', ) + class CatNine(_WordpressScraper): url = 'http://cat-nine.net' firstStripUrl = 'http://cat-nine.net/comic/episode-1/first-day-for-everything/' + class CatVersusHuman(_ParserScraper): url = 'http://www.catversushuman.com' multipleImagesPerStrip = True @@ -130,7 +142,8 @@ class Champ2010(_BasicScraper): stripUrl = baseUrl + '%s.html' firstStripUrl = stripUrl % 'champ1-1-10-fuck' imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="Previous")) + prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, + after="Previous")) help = 'Index format: yy-dd-mm' @@ -138,8 +151,10 @@ class ChannelAte(_BasicScraper): url = 'http://www.channelate.com/' rurl = escape(url) stripUrl = url + '%s/' - imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev")) + imageSearch = compile(tagre("img", "src", + r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", + r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev")) help = 'Index format: yyyy/mm/dd/name' @@ -186,13 +201,15 @@ class CigarroAndCerveja(_ParserScraper): imageSearch = '//div[@id="comic"]//img', prevSearch = '//a[contains(text()," Prev")]', + class Collar6(_BasicScraper): url = 'http://collar6.com/' rurl = escape(url) stripUrl = url + 'archive/%s' firstStripUrl = stripUrl % 'collar-6-187' imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/collar6/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl, after="previous")) + prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl, + after="previous")) help = 'Index format: ' @@ -211,7 +228,8 @@ class Commissioned(_BasicScraper): stripUrl = url + '?p=%s' firstStripUrl = stripUrl % '139' imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev")) + prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, + after="prev")) help = 'Index format: n' @@ -222,7 +240,7 @@ class CompanyY(_BasicScraper): firstStripUrl = stripUrl % '2009/08/14/coming-soon' imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) prevSearch = compile(tagre("div", "class", r"nav-previous") + - tagre("a", "href", r'(%s[^"]+)' % rurl)) + tagre("a", "href", r'(%s[^"]+)' % rurl)) help = 'Index format: yyyy/mm/dd/strip-name' @@ -268,6 +286,15 @@ class CrapIDrewOnMyLunchBreak(_BasicScraper): help = 'Index format: yyyy/mm/dd/name' +class CrimsonDark(_BasicScraper): + url = 'http://www.davidcsimon.com/crimsondark/' + stripUrl = url + 'index.php?view=comic&strip_id=%s' + firstStripUrl = stripUrl % '1' + imageSearch = compile(r'src="(.+?strips/.+?)"') + prevSearch = compile(r'\s*Previous') - help = 'Index format: nnnn/nnnnn' - class FredoAndPidjin(_BasicScraper): url = 'http://www.pidjin.net/' @@ -157,10 +154,19 @@ class FredoAndPidjin(_BasicScraper): ) multipleImagesPerStrip = True prevSearch = compile(tagre('a', 'href', '([^"]+)')+"Prev") - starter = indirectStarter(url, + starter = indirectStarter( + url, compile(tagre('a', 'href', "("+url+r'\d\d\d\d/\d\d/\d\d/[^"]+/)'))) +class Freefall(_BasicScraper): + url = 'http://freefall.purrsia.com/default.htm' + stripUrl = 'http://freefall.purrsia.com/ff%s/fc%s.htm' + imageSearch = compile(r'Previous') + help = 'Index format: nnnn/nnnnn' + + class FullFrontalNerdity(_BasicScraper): url = 'http://ffn.nodwick.com/' rurl = escape(url) diff --git a/dosagelib/plugins/footloosecomic.py b/dosagelib/plugins/footloosecomic.py index 13c5a0e79..1d3ab76b6 100644 --- a/dosagelib/plugins/footloosecomic.py +++ b/dosagelib/plugins/footloosecomic.py @@ -1,15 +1,21 @@ # -*- coding: utf-8 -*- -from __future__ import unicode_literals +# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs +# Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher + +from __future__ import absolute_import, division, print_function from ..scraper import _ParserScraper + class Footloose(_ParserScraper): url = 'http://footloosecomic.com/footloose.php' - imageSearch='//body/p[1]//img' - prevSearch='//body/a[2]' + imageSearch = '//body/p[1]//img' + prevSearch = '//body/a[2]' + class Cherry(Footloose): url = 'http://footloosecomic.com/cherry/index.php' + class Desigaspring(Footloose): url = 'http://footloosecomic.com/dspring/index.php' - diff --git a/dosagelib/plugins/g.py b/dosagelib/plugins/g.py index c60c7ed05..8b2060685 100644 --- a/dosagelib/plugins/g.py +++ b/dosagelib/plugins/g.py @@ -3,6 +3,7 @@ # Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2015-2016 Tobias Gruetzmacher +from __future__ import absolute_import, division, print_function from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper @@ -155,6 +156,16 @@ class GrrlPower(_BasicScraper): help = 'Index format: number' +class GUComics(_BasicScraper): + url = 'http://www.gucomics.com/' + stripUrl = url + '%s' + firstStripUrl = stripUrl % '20000710' + imageSearch = compile(tagre("img", "src", r'(/comics/\d{4}/gu_[^"]+)')) + prevSearch = compile(tagre("a", "href", r'(/\d+)') + + tagre("img", "src", r'/images/nav/prev\.png')) + help = 'Index format: yyyymmdd' + + class GunnerkriggCourt(_BasicScraper): url = 'http://www.gunnerkrigg.com/' stripUrl = url + '?p=%s' @@ -176,13 +187,3 @@ class Gunshow(_BasicScraper): tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'[^"]*menu/small/previous\.gif')) help = 'Index format: n' - - -class GUComics(_BasicScraper): - url = 'http://www.gucomics.com/' - stripUrl = url + '%s' - firstStripUrl = stripUrl % '20000710' - imageSearch = compile(tagre("img", "src", r'(/comics/\d{4}/gu_[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(/\d+)') + - tagre("img", "src", r'/images/nav/prev\.png')) - help = 'Index format: yyyymmdd' diff --git a/dosagelib/plugins/m.py b/dosagelib/plugins/m.py index 5987fe188..bb4d89f4f 100755 --- a/dosagelib/plugins/m.py +++ b/dosagelib/plugins/m.py @@ -1,7 +1,9 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher +from __future__ import absolute_import, division, print_function from re import compile, escape, IGNORECASE from ..scraper import _BasicScraper, _ParserScraper @@ -25,21 +27,19 @@ class MadamAndEve(_BasicScraper): class Magellan(_ParserScraper): - description = u'A comic strip about Superheroes and Not-Superheroes' url = 'http://magellanverse.com/' - stripUrl = url + '%s/' css = True imageSearch = '#comic-1 > a:first-child img' prevSearch = '.nav-previous > a' - help = 'Index format: stripname' - + class MagickChicks(_BasicScraper): url = 'http://www.magickchicks.com/' stripUrl = url + 'strips-mc/%s' firstStripUrl = stripUrl % 'tis_but_a_trifle' imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'([^"]*/strips-mc/[^"]+)', before="cn[id]prevt")) + prevSearch = compile(tagre("a", "href", r'([^"]*/strips-mc/[^"]+)', + before="cn[id]prevt")) help = 'Index format: name' @@ -54,7 +54,6 @@ class ManlyGuysDoingManlyThings(_ParserScraper): class MareInternum(_ParserScraper): - description = u'Mare Internum is an online science fiction graphic novel about the isolated inhabitants of the planet Mars. ' url = 'http://marecomic.com/' stripUrl = url + 'comics/ch%s' imageSearch = '//div[@id="comic"]//img' @@ -76,7 +75,8 @@ class MarriedToTheSea(_BasicScraper): rurl = escape(url) stripUrl = url + '%s' firstStripUrl = stripUrl % '022806' - imageSearch = compile(tagre("img", "src", r'(%s\d+/[^"]+)' % rurl, before="overflow")) + imageSearch = compile(tagre("img", "src", r'(%s\d+/[^"]+)' % rurl, + before="overflow")) prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<< Yesterday") help = 'Index format: mmddyy' @@ -85,6 +85,7 @@ class MarriedToTheSea(_BasicScraper): unused, date, filename = imageUrl.rsplit('/', 2) return '%s-%s' % (date, filename) + class MaxOveracts(_ParserScraper): url = 'http://occasionalcomics.com/' stripUrl = url + '%s/' @@ -108,7 +109,8 @@ class MenageA3(_BasicScraper): url = 'http://www.ma3comic.com/' stripUrl = url + 'strips-ma3/%s' imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'([^"]*/strips-ma3/[^"]+)', before="cn[id]prev")) + prevSearch = compile(tagre("a", "href", r'([^"]*/strips-ma3/[^"]+)', + before="cn[id]prev")) help = 'Index format: name' @@ -117,40 +119,43 @@ class Misfile(_BasicScraper): stripUrl = url + '?date=%s' firstStripUrl = stripUrl % '2004-02-22' imageSearch = compile(tagre("img", "src", r"(comics/[^']+)", quote="'")) - prevSearch = compile(tagre("link", "href", r"([^']+)", quote="'", before="Previous")) + prevSearch = compile(tagre("link", "href", r"([^']+)", quote="'", + before="Previous")) help = 'Index format: yyyy-mm-dd' -class Moonsticks(_ParserScraper): - url = "http://moonsticks.org/" - stripUrl = url - imageSearch = "//div[@class='entry']//img" - prevSearch = u"//a[text()='« Previous']" - help = 'Index format: stripname' - - class MonsieurLeChien(_BasicScraper): url = 'http://www.monsieur-le-chien.fr/' stripUrl = url + 'index.php?planche=%s' firstStripUrl = stripUrl % '2' lang = 'fr' imageSearch = compile(tagre("img", "src", r'(i/planches/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", "i/precedent.gif")) + prevSearch = compile(tagre("a", "href", r'([^"]+)') + + tagre("img", "src", "i/precedent.gif")) help = 'Index format: n' +class Moonsticks(_ParserScraper): + url = "http://moonsticks.org/" + imageSearch = "//div[@class='entry']//img" + prevSearch = u"//a[text()='« Previous']" + + class MrLovenstein(_BasicScraper): url = 'http://www.mrlovenstein.com/' - rurl = escape(url) stripUrl = url + 'comic/%s#comic' firstStripUrl = stripUrl % '1' - imageSearch = ( - #captures rollover comic - compile(tagre("div", "class", r'comic_image') + "\s*.*\s*" + tagre("div", "style", r'display: none;') + "\s*.*\s*" + tagre("img", "src", r'(/images/comics/[^"]+)')), - #captures standard comic - compile(tagre("img", "src", r'(/images/comics/[^"]+)', before="comic_main_image")), + imageSearch = ( + # captures rollover comic + compile(tagre("div", "class", r'comic_image') + "\s*.*\s*" + + tagre("div", "style", r'display: none;') + "\s*.*\s*" + + tagre("img", "src", r'(/images/comics/[^"]+)')), + # captures standard comic + compile(tagre("img", "src", r'(/images/comics/[^"]+)', + before="comic_main_image")), ) - prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", "/images/nav_left.png")) + prevSearch = compile(tagre("a", "href", r'([^"]+)') + + tagre("img", "src", "/images/nav_left.png")) textSearch = compile(r'') help = 'Index Format: n' @@ -163,7 +168,8 @@ class MyCartoons(_BasicScraper): compile(tagre("img", "src", r'(%swp-content/cartoons/(?:[^"]+/)?\d+-\d+-\d+[^"]+)' % rurl)), compile(tagre("img", "src", r'(%scartoons/[^"]+/\d+-\d+-\d+[^"]+)' % rurl)), ) - prevSearch = compile(tagre("a", "href", r'(%spage/[^"]+)' % rurl) + "«") + prevSearch = compile(tagre("a", "href", r'(%spage/[^"]+)' % rurl) + + "«") help = 'Index format: number' lang = 'de' @@ -172,4 +178,3 @@ class MysteriesOfTheArcana(_ParserScraper): url = 'http://mysteriesofthearcana.com/' imageSearch = '//div[@id="comic"]//img' prevSearch = '//a[@class="navprevious"]' - help = 'Index format: n (unpadded)' diff --git a/dosagelib/plugins/n.py b/dosagelib/plugins/n.py index 84da8b03a..885b8c4f9 100644 --- a/dosagelib/plugins/n.py +++ b/dosagelib/plugins/n.py @@ -1,7 +1,9 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher +from __future__ import absolute_import, division, print_function from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter @@ -37,7 +39,8 @@ class NatalieDee(_BasicScraper): rurl = escape(url) stripUrl = url + '%s' firstStripUrl = stripUrl % '022806' - imageSearch = compile(tagre("img", "src", r'(%s\d+/[^"]+)' % rurl, before="overflow")) + imageSearch = compile(tagre("img", "src", r'(%s\d+/[^"]+)' % rurl, + before="overflow")) prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<< Yesterday") help = 'Index format: mmddyy' @@ -47,6 +50,23 @@ class NatalieDee(_BasicScraper): return '%s-%s' % (date, filename) +class NekkoAndJoruba(_BasicScraper): + url = 'http://www.nekkoandjoruba.com/' + stripUrl = url + '?p=%s' + firstStripUrl = stripUrl % '7' + imageSearch = compile(r'‹') + help = 'Index format: nnn' + + +class NekoTheKitty(_ParserScraper): + url = 'http://www.nekothekitty.net/' + stripUrl = url + 'comics/%s' + firstStripUrl = stripUrl % '936393/001-video-games' + imageSearch = '//a[@id="comic_image"]/img' + prevSearch = '//a[text()="<-"]' + + class NeoEarth(_BasicScraper): url = 'http://www.neo-earth.com/NE/' stripUrl = url + 'index.php?date=%s' @@ -72,23 +92,6 @@ class NewWorld(_BasicScraper): help = 'Index format: yyyy/mm/dd/stripn' -class NekkoAndJoruba(_BasicScraper): - url = 'http://www.nekkoandjoruba.com/' - stripUrl = url + '?p=%s' - firstStripUrl = stripUrl % '7' - imageSearch = compile(r'‹') - help = 'Index format: nnn' - - -class NekoTheKitty(_ParserScraper): - url = 'http://www.nekothekitty.net/' - stripUrl = url + 'comics/%s' - firstStripUrl = stripUrl % '936393/001-video-games' - imageSearch = '//a[@id="comic_image"]/img' - prevSearch = '//a[text()="<-"]' - - class NichtLustig(_BasicScraper): url = 'http://www.nichtlustig.de/main.html' stripUrl = 'http://static.nichtlustig.de/toondb/%s.html' @@ -96,13 +99,12 @@ class NichtLustig(_BasicScraper): imageSearch = compile('background-image:url\((http://static\.nichtlustig\.de/comics/full/\d+\.jpg)') prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)')) help = 'Index format: yymmdd' - starter = indirectStarter(url, - compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)'))) + starter = indirectStarter( + url, compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)'))) class Nimona(_BasicScraper): url = 'http://gingerhaze.com/nimona/' - rurl = escape(url) stripUrl = url + '%s/' firstStripUrl = stripUrl % "comic/page-1" imageSearch = compile(tagre("img", "src", r'(http://gingerhaze\.com/sites/default/files/nimona-pages/.+?)')) @@ -111,20 +113,6 @@ class Nimona(_BasicScraper): endOfLife = True -class Nnewts(_BasicScraper): - url = 'http://nnewts.com/' - rurl = escape(url) - stripUrl = url + '%s/' - firstStripUrl = stripUrl % 'nnewts-page-1' - imageSearch = compile(tagre("img", "src", r'(%snewty/comics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s(?:nnewts-)?page-\d+/)' % rurl, after="navi-prev")) - help = 'Index format: page-number' - - @classmethod - def getDisabledReasons(cls): - return {'cannotReadOnline': 'Comic is not available for reading online.'} - - class NobodyScores(_BasicScraper): url = 'http://nobodyscores.loosenutstudio.com/' rurl = escape(url) @@ -143,11 +131,14 @@ class NoNeedForBushido(_BasicScraper): imageSearch = compile( tagre("a", "rel", "next") + tagre("img", "src", r'(%swp-content/uploads/\d+/\d+/[^"]+)' % rurl, - after="attachment-full")) - prevSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl, after="previous-webcomic")) + after="attachment-full")) + prevSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl, + after="previous-webcomic")) help = 'Index format: nnn' - starter = indirectStarter(url, - compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl, after="last-webcomic"))) + starter = indirectStarter( + url, compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl, + after="last-webcomic"))) + class NotInventedHere(_BasicScraper): url = 'http://notinventedhe.re/' @@ -158,6 +149,7 @@ class NotInventedHere(_BasicScraper): prevSearch = compile(tagre("a", "href", r'(/on/\d+-\d+-\d+)')+'\s*Previous') help = 'Index format: yyyy-mm-dd' + class Nukees(_BasicScraper): url = 'http://www.nukees.com/' stripUrl = url + 'd/%s' diff --git a/dosagelib/plugins/o.py b/dosagelib/plugins/o.py index 936fd97dd..743055aa5 100644 --- a/dosagelib/plugins/o.py +++ b/dosagelib/plugins/o.py @@ -1,7 +1,9 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher +from __future__ import absolute_import, division, print_function from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter @@ -47,9 +49,12 @@ class OhJoySexToy(_BasicScraper): rurl = escape(url) stripUrl = url + '%s/' firstStripUrl = stripUrl % 'introduction' - imageSearch = compile(tagre("div", "class", r'comicpane') + "\s*.*\s*" + tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after='navi navi-prev')) - textSearch = compile(tagre("div", "class", r'comicpane') + "\s*.*\s*" + tagre("img", "alt", r'([^"]+)')) + imageSearch = compile(tagre("div", "class", r'comicpane') + "\s*.*\s*" + + tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, + after='navi navi-prev')) + textSearch = compile(tagre("div", "class", r'comicpane') + "\s*.*\s*" + + tagre("img", "alt", r'([^"]+)')) help = 'Index Format: name' adult = True @@ -75,6 +80,15 @@ class OmakeTheater(_ParserScraper): help = 'Index format: number (unpadded)' +class OneQuestion(_BasicScraper): + url = 'http://onequestioncomic.com/' + stripUrl = url + 'comic.php?strip_id=%s' + firstStripUrl = stripUrl % '1' + imageSearch = compile(tagre("img", "src", r'((?:\.\./)?istrip_files/strips/\d+\.\w{3,4})')) + prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') + tagre("img", "src", r'img/arrow_prev\.jpg')) + help = 'Index format: n (unpadded)' + + class OnTheFastrack(_BasicScraper): url = 'http://onthefastrack.com/' stripUrl = url + 'comics/%s' @@ -82,7 +96,7 @@ class OnTheFastrack(_BasicScraper): imageSearch = compile(r'(http://safr\.kingfeatures\.com/idn/cnfeed/zone/js/content\.php\?file=.+)"') prevSearch = compile(r'id="previouscomic" class="button white">.*]*next_button\.gif')) @@ -172,6 +173,17 @@ class Pimpette(_ParserScraper): help = 'Index format: yyyymmdd' +class Pixel(_BasicScraper): + url = 'http://pixelcomic.net/' + rurl = escape(url) + stripUrl = url + '%s' + firstStripUrl = stripUrl % '000.shtml' + imageSearch = compile(tagre("img", "src", r'(\d+\.png)')) + prevSearch = compile(tagre("a", "href", r'(%s\d+\.(?:php|shtml))' % rurl, + before="prev")) + help = 'Index format: nnn' + + class PlanescapeSurvival(_BasicScraper): url = 'http://planescapecomic.com/' stripUrl = url + '%s.html' @@ -204,14 +216,16 @@ class PoorlyDrawnLines(_BasicScraper): stripUrl = url + '%s' firstStripUrl = stripUrl % 'campus-characters/' imageSearch = compile(tagre("img", "src", r'(http://poorlydrawnlines\.com/wp-content/uploads/\d+/\d+/[^"]+)')) - prevSearch = compile(tagre("li", "class", r'previous') + tagre("a", "href", r'(%s[^"]+)' % rurl)) + prevSearch = compile(tagre("li", "class", r'previous') + + tagre("a", "href", r'(%s[^"]+)' % rurl)) help = 'Index Format: name' class Precocious(_BasicScraper): url = 'http://www.precociouscomic.com/' - starter = indirectStarter(url, - compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png")) + starter = indirectStarter( + url, compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + + tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png")) ) stripUrl = url + 'archive/comic/%s' imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))')) @@ -234,7 +248,8 @@ class PunksAndNerds(_BasicScraper): stripUrl = url + '?p=%s' firstStripUrl = stripUrl % '15' imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="navi-prev")) + prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, + after="navi-prev")) help = 'Index format: nnn' @@ -250,5 +265,6 @@ class PvPonline(_BasicScraper): url = 'http://pvponline.com/comic' stripUrl = url + '%s' imageSearch = compile(tagre("img", "src", r'(http://s3[^"]+\.amazonaws\.com/pvponlinenew/img/comic/\d+/\d+/pvp[^"]+\.jpg)')) - prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)', after="left divider")) + prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)', + after="left divider")) help = 'Index format: yyyy/mm/dd/stripname' diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py index 04dc9d746..7d0635004 100644 --- a/dosagelib/plugins/s.py +++ b/dosagelib/plugins/s.py @@ -1,9 +1,11 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher +from __future__ import absolute_import, division, print_function from re import compile, escape, IGNORECASE, sub -from os.path import splitext, basename +from os.path import splitext from datetime import datetime from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter, bounceStarter @@ -14,7 +16,7 @@ class SabrinaOnline(_BasicScraper): url = 'http://sabrina-online.com/' imageSearch = compile(tagre("a", "href", r'(strips/[^"]*)')) prevSearch = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)") + - tagre("img", "src", "b_back.gif")) + tagre("img", "src", "b_back.gif")) help = 'Index format: n (unpadded)' adult = True multipleImagesPerStrip = True @@ -32,9 +34,10 @@ class SabrinaOnline(_BasicScraper): class SafelyEndangered(_BasicScraper): url = 'http://www.safelyendangered.com/' stripUrl = url + 'comic/%s' - firstStripUrl = stripUrl % 'ignored' + firstStripUrl = stripUrl % 'ignored' imageSearch = compile(tagre("img", "src", r'(http://www\.safelyendangered\.com/wp-content/uploads/\d+/\d+/[^"]+\.[a-z]+).*')) - prevSearch = compile(tagre("a", "href", r'([^"]+)', after="navi navi-prev")) + prevSearch = compile(tagre("a", "href", r'([^"]+)', + after="navi navi-prev")) textSearch = compile(tagre("img", "title", r'([^"]+)', before=r'http://www\.safelyendangered\.com/wp-content/uploads')) help = 'Index format: yyyy/mm/stripname' @@ -84,9 +87,12 @@ class ScenesFromAMultiverse(_BasicScraper): firstStripUrl = stripUrl % '2010/06/14/parenthood' imageSearch = ( compile(tagre("div", "id", "comic") + r"\s*" + - tagre("img", "src", r'(.*amultiverse.com/wp-content/uploads/\d+/\d+/[^"]+)')), - compile(tagre("div", "id", "comic") + r"\s*" + tagre("a", "href", r'[^"]*') + - tagre("img", "src", r'(.*amultiverse.com/wp-content/uploads/\d+/\d+/[^"]+)')), + tagre("img", "src", + r'(.*amultiverse.com/wp-content/uploads/\d+/\d+/[^"]+)')), + compile(tagre("div", "id", "comic") + r"\s*" + + tagre("a", "href", r'[^"]*') + + tagre("img", "src", + r'(.*amultiverse.com/wp-content/uploads/\d+/\d+/[^"]+)')), ) prevSearch = compile(tagre("a", "href", r'(%scomic/\d+\d+/\d+/\d+/[^"]+)' % rurl, after="prev")) help = 'Index format: yyyy/mm/dd/stripname' @@ -98,7 +104,8 @@ class SchlockMercenary(_BasicScraper): firstStripUrl = stripUrl % '2000-06-12' imageSearch = compile(tagre("img", "src", r'(http://static\.schlockmercenary\.com/comics/[^"]+)')) multipleImagesPerStrip = True - prevSearch = compile(tagre("a", "href", r'(/\d+-\d+-\d+)', quote="'", after="nav-previous")) + prevSearch = compile(tagre("a", "href", r'(/\d+-\d+-\d+)', quote="'", + after="nav-previous")) help = 'Index format: yyyy-mm-dd' @@ -137,8 +144,8 @@ class SequentialArt(_BasicScraper): stripUrl = url + '?s=%s' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", r'([^"]+)', before="strip")) - prevSearch = compile(tagre("a", "href", r'(/sequentialart\.php\?s=\d+)') - + tagre("img", "src", "Nav_BackOne\.gif")) + prevSearch = compile(tagre("a", "href", r'(/sequentialart\.php\?s=\d+)') + + tagre("img", "src", "Nav_BackOne\.gif")) help = 'Index format: name' @@ -165,7 +172,8 @@ class Sheldon(_BasicScraper): stripUrl = url + 'archive/%s.html' firstStripUrl = stripUrl % '011130' imageSearch = compile(tagre("img", "src", r'(http://cdn\.sheldoncomics\.com/strips/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl, after="sidenav-prev")) + prevSearch = compile(tagre("a", "href", r'(%sarchive/\d+\.html)' % rurl, + after="sidenav-prev")) help = 'Index format: yymmdd' @@ -194,7 +202,8 @@ class Shivae(_BasicScraper): stripUrl = url + 'blog/%s/' firstStripUrl = stripUrl % '2007/09/21/09212007' imageSearch = compile(tagre("img", "src", r'(%swp-content/blogs\.dir/\d+/files/\d+/\d+/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%sblog/[^"]+)' % rurl, after="navi-prev")) + prevSearch = compile(tagre("a", "href", r'(%sblog/[^"]+)' % rurl, + after="navi-prev")) help = 'Index format: yyyy/mm/dd/stripname' @@ -210,9 +219,10 @@ class Shortpacked(_ParserScraper): class ShotgunShuffle(_BasicScraper): url = 'http://shotgunshuffle.com/' stripUrl = url + 'comic/%s' - firstStripUrl = stripUrl % 'pilot/' + firstStripUrl = stripUrl % 'pilot/' imageSearch = compile(tagre("img", "src", r'(http://shotgunshuffle.com/wp-content/uploads/\d+/\d+/\d+-[^"]+)')) - prevSearch = compile(tagre("a", "href", r'([^"]+)', after="navi navi-prev")) + prevSearch = compile(tagre("a", "href", r'([^"]+)', + after="navi navi-prev")) help = 'Index format: stripname' @@ -220,28 +230,19 @@ class SinFest(_BasicScraper): name = 'KeenSpot/SinFest' url = 'http://www.sinfest.net/' stripUrl = url + 'view.php?date=%s' - imageSearch = compile(tagre("img","src", r'(btphp/comics/.+)', after="alt")) - prevSearch = compile(tagre("a", "href", r'(view\.php\?date=.+)') + '\\s*' + tagre("img", "src", r'\.\./images/prev\.gif')) + imageSearch = compile(tagre("img", "src", r'(btphp/comics/.+)', + after="alt")) + prevSearch = compile(tagre("a", "href", r'(view\.php\?date=.+)') + '\\s*' + + tagre("img", "src", r'\.\./images/prev\.gif')) help = 'Index format: yyyy-mm-dd' -# XXX disallowed by robots.txt -class _Sketchesnatched(_BasicScraper): - url = 'http://sketchesnatched.blogspot.com/' - stripUrl = url + 'search?updated-max=%s%%2B01:00&max-results=1' - firstStripUrl = stripUrl % '2011-01-27T08:32:00' - imageSearch = compile(tagre("meta", "content", r"(http://\d+\.bp\.blogspot\.com/[^']+)", - after=r'image_url', quote="'")) - prevSearch = compile(tagre("a", "href", r"(http://sketchesnatched\.blogspot\.[a-z]+/search[^']+)", - before=r"blog-pager-older-link", quote="'")) - help = 'Index format: yyyy-mm-ddThh:mm:ss' - - class SkinDeep(_BasicScraper): url = 'http://www.skindeepcomic.com/' stripUrl = url + 'archive/%s/' imageSearch = compile(r'' % rurl, IGNORECASE) + prevSearch = compile(r'' % rurl, + IGNORECASE) help = 'Index format: n (unpadded)' starter = indirectStarter(url, prevSearch) @@ -419,7 +440,8 @@ class SpareParts(_BasicScraper): stripUrl = baseUrl + 'comics/index.php?date=%s' firstStripUrl = stripUrl % '20031022' imageSearch = compile(tagre("img", "src", r'(http://www\.sparepartscomics\.com/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', quote="'") + "Previous Comic") + prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', + quote="'") + "Previous Comic") help = 'Index format: yyyymmdd' @@ -433,6 +455,29 @@ class StandStillStaySilent(_ParserScraper): help = 'Index Format: number' +class StarCrossdDestiny(_BasicScraper): + baseUrl = 'http://www.starcrossd.net/' + rurl = escape(baseUrl) + url = baseUrl + 'comic.html' + stripUrl = baseUrl + 'archives/%s.html' + firstStripUrl = stripUrl % '00000001' + imageSearch = compile(tagre("img", "src", r'(http://(?:www\.)?starcrossd\.net/(?:ch1|strips|book2)/[^"]+)')) + prevSearch = compile(r']*"[^"]*"[^>]*>prev' % rurl, IGNORECASE) + help = 'Index format: nnnnnnnn' + + @classmethod + def namer(cls, imageUrl, pageUrl): + if imageUrl.find('ch1') == -1: + # At first all images were stored in a strips/ directory but + # that was changed with the introduction of book2 + imageUrl = sub('(?:strips)|(?:images)', 'book1', imageUrl) + elif not imageUrl.find('strips') == -1: + imageUrl = imageUrl.replace('strips/', '') + directory, filename = imageUrl.split('/')[-2:] + filename, extension = splitext(filename) + return directory + '-' + filename + + class StationV3(_ParserScraper): url = 'http://www.stationv3.com/' stripUrl = url + 'd/%s.html' @@ -447,62 +492,18 @@ class StickyDillyBuns(_BasicScraper): stripUrl = url + 'strips-sdb/%s' firstStripUrl = stripUrl % 'awesome_leading_man' imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'([^"]*/strips-sdb/[^"]+)', before="cn[id]prev")) + prevSearch = compile(tagre("a", "href", r'([^"]*/strips-sdb/[^"]+)', + before="cn[id]prev")) help = 'Index format: name' -class Stubble(_BasicScraper): - url = 'http://stubblecomics.com/' - rurl = escape(url) - stripUrl = url + '?p=%s' - firstStripUrl = stripUrl % '4' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="navi-prev")) - help = 'Index format: number' - - -class StuffNoOneToldMe(_BasicScraper): - url = 'http://www.snotm.com/' - stripUrl = url + '%s.html' - firstStripUrl = stripUrl % '2010/05/01' - olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)" - starter = indirectStarter(url, - compile(tagre("a", "href", olderHref, quote="'"))) - imageSearch = ( - compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') + r"(?:|
)"), - compile(tagre("img", "src", r'(http://\d+\.bp\.blogspot\.com/[^"]+)') + r"(?:(?: )?|)"), - compile(tagre("img", "src", r'(https://lh\d+\.googleusercontent\.com/[^"]+)') + r""), - ) - prevSearch = compile(tagre("a", "href", olderHref, quote="'", before="older-link")) - multipleImagesPerStrip = True - help = 'Index format: yyyy/mm/stripname' - - @classmethod - def namer(cls, imageUrl, pageUrl): - """Use page URL to construct meaningful image name.""" - parts, year, month, stripname = pageUrl.rsplit('/', 3) - stripname = stripname.rsplit('.', 1)[0] - parts, imagename = imageUrl.rsplit('/', 1) - return '%s-%s-%s-%s' % (year, month, stripname, imagename) - - def shouldSkipUrl(self, url, data): - """Skip pages without images.""" - return url in ( - self.stripUrl % '2012/08/self-rant', # no comic - self.stripUrl % '2012/06/if-you-wonder-where-ive-been', # video - self.stripUrl % '2011/10/i-didnt-make-this-nor-have-anything-to', # video - self.stripUrl % '2010/12/first-snotm-fans-in-sao-paulo', # no comic - self.stripUrl % '2010/11/ear-infection', # no comic - ) - - class StrawberryDeathCake(_BasicScraper): url = 'http://strawberrydeathcake.com/' rurl = escape(url) - stripUrl = url + 'archive/%s/' - imageSearch = compile(tagre("img", "src", r'(%swp-content/webcomic/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl, after="previous")) - help = 'Index format: stripname' + imageSearch = compile(tagre("img", "src", + r'(%swp-content/webcomic/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%sarchive/[^"]+)' % rurl, + after="previous")) class StrongFemaleProtagonist(_ParserScraper): @@ -524,63 +525,72 @@ class StrongFemaleProtagonist(_ParserScraper): self.stripUrl % 'issue-5/hiatus-2', ) + +class Stubble(_BasicScraper): + url = 'http://stubblecomics.com/' + rurl = escape(url) + stripUrl = url + '?p=%s' + firstStripUrl = stripUrl % '4' + imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, + after="navi-prev")) + help = 'Index format: number' + + +class StuffNoOneToldMe(_BasicScraper): + url = 'http://www.snotm.com/' + stripUrl = url + '%s.html' + firstStripUrl = stripUrl % '2010/05/01' + olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)" + starter = indirectStarter( + url, compile(tagre("a", "href", olderHref, quote="'"))) + imageSearch = ( + compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') + + r"(?:|
)"), + compile(tagre("img", "src", r'(http://\d+\.bp\.blogspot\.com/[^"]+)') + + r"(?:(?: )?|)"), + compile(tagre("img", "src", r'(https://lh\d+\.googleusercontent\.com/[^"]+)') + r""), + ) + prevSearch = compile(tagre("a", "href", olderHref, quote="'", + before="older-link")) + multipleImagesPerStrip = True + help = 'Index format: yyyy/mm/stripname' + + @classmethod + def namer(cls, imageUrl, pageUrl): + """Use page URL to construct meaningful image name.""" + parts, year, month, stripname = pageUrl.rsplit('/', 3) + stripname = stripname.rsplit('.', 1)[0] + parts, imagename = imageUrl.rsplit('/', 1) + return '%s-%s-%s-%s' % (year, month, stripname, imagename) + + def shouldSkipUrl(self, url, data): + """Skip pages without images.""" + return url in ( + self.stripUrl % '2012/08/self-rant', # no comic + self.stripUrl % '2012/06/if-you-wonder-where-ive-been', # video + self.stripUrl % '2011/10/i-didnt-make-this-nor-have-anything-to', # video + self.stripUrl % '2010/12/first-snotm-fans-in-sao-paulo', # no comic + self.stripUrl % '2010/11/ear-infection', # no comic + ) + + class SuburbanTribe(_BasicScraper): url = 'http://www.pixelwhip.com/' rurl = escape(url) stripUrl = url + '?p=%s' imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev")) + prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, + after="prev")) help = 'Index format: nnnn' -class SomethingPositive(_BasicScraper): - url = 'http://www.somethingpositive.net/' - stripUrl = url + 'sp%s.shtml' - imageSearch = ( - compile(tagre("img", "src", r'(sp\d+\.png)')), - compile(tagre("img", "src", r'(twither\.gif)')), - ) - prevSearch = compile(tagre("a", "href", r'(sp\d+\.shtml)') + - "(?:" + tagre("img", "src", r'images/previous\.gif') + "|Previous)") - help = 'Index format: mmddyyyy' - - -class StarCrossdDestiny(_BasicScraper): - baseUrl = 'http://www.starcrossd.net/' - rurl = escape(baseUrl) - url = baseUrl + 'comic.html' - stripUrl = baseUrl + 'archives/%s.html' - firstStripUrl = stripUrl % '00000001' - imageSearch = compile(tagre("img", "src", r'(http://(?:www\.)?starcrossd\.net/(?:ch1|strips|book2)/[^"]+)')) - prevSearch = compile(r']*"[^"]*"[^>]*>prev' % rurl, IGNORECASE) - help = 'Index format: nnnnnnnn' - - @classmethod - def namer(cls, imageUrl, pageUrl): - if imageUrl.find('ch1') == -1: - # At first all images were stored in a strips/ directory but that was changed with the introduction of book2 - imageUrl = sub('(?:strips)|(?:images)','book1',imageUrl) - elif not imageUrl.find('strips') == -1: - imageUrl = imageUrl.replace('strips/','') - directory, filename = imageUrl.split('/')[-2:] - filename, extension = splitext(filename) - return directory + '-' + filename - - -# XXX disallowed by robots.txt -class _StrangeCandy(_BasicScraper): - url = 'http://www.strangecandy.net/' - stripUrl = url + 'd/%s.html' - imageSearch = compile(tagre("img", "src", r'(/comics/\d+\.jpg)')) - prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') + tagre("img", "alt", "Previous comic")) - help = 'Index format: yyyyddmm' - - class SupernormalStep(_BasicScraper): url = 'http://supernormalstep.com/' rurl = escape(url) stripUrl = url + '?p=%s' firstStripUrl = stripUrl % '8' imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, after="prev")) + prevSearch = compile(tagre("a", "href", r'(%s\?p=\d+)' % rurl, + after="prev")) help = 'Index format: number' diff --git a/dosagelib/plugins/t.py b/dosagelib/plugins/t.py index 08af8dd2e..7d7c1c27c 100755 --- a/dosagelib/plugins/t.py +++ b/dosagelib/plugins/t.py @@ -1,7 +1,9 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher +from __future__ import absolute_import, division, print_function from re import compile, escape, IGNORECASE from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter @@ -23,7 +25,8 @@ class TheDevilsPanties(_BasicScraper): stripUrl = url + 'archives/%s' firstStripUrl = stripUrl % '300' imageSearch = compile(tagre("img", "src", r'(http://cdn\.thedevilspanties\.com/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(/archives/\d+)', after="Previous")) + prevSearch = compile(tagre("a", "href", r'(/archives/\d+)', + after="Previous")) help = 'Index format: number' @@ -42,16 +45,20 @@ class TheLandscaper(_BasicScraper): rurl = escape(url) stripUrl = url + 'comic/%s' firstStripUrl = stripUrl % '1' - imageSearch = compile(tagre("img", "src", r'(/comics/comic/comic_page/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)')+'‹ Previous') + imageSearch = compile(tagre("img", "src", + r'(/comics/comic/comic_page/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)') + + '‹ Previous') help = 'Index format: name' + class TheNoob(_BasicScraper): url = 'http://www.thenoobcomic.com/index.php' stripUrl = url + '?pos=%s' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", r'(/headquarters/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(\?pos=\d+)', before="comic_nav_previous_button")) + prevSearch = compile(tagre("a", "href", r'(\?pos=\d+)', + before="comic_nav_previous_button")) help = 'Index format: nnnn' @@ -70,6 +77,16 @@ class TheOrderOfTheStick(_BasicScraper): return pageUrl.rsplit('/', 1)[-1][:-5] +class TheOuterQuarter(_BasicScraper): + url = 'http://theouterquarter.com/' + rurl = escape(url) + stripUrl = url + 'comic/%s' + firstStripUrl = stripUrl % 'oq-the-first-take/4' + imageSearch = compile(r'') + starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)', after='class="timestamp"'))) + adult = True + + indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl)) + + def getComicStrip(self, url, data): + """The comic strip image is in a separate page.""" + pageUrl = self.fetchUrl(url, data, self.indirectImageSearch) + pageData = self.getPage(pageUrl) + return super(TheThinHLine, self).getComicStrip(pageUrl, pageData) + + @classmethod + def namer(cls, imageUrl, pageUrl): + """Use page URL sequence which is apparently increasing.""" + num = pageUrl.split('/')[-1] + ext = imageUrl.rsplit('.', 1)[1] + return "thethinhline-%s.%s" % (num, ext) + + +class TheWhiteboard(_BasicScraper): + url = 'http://www.the-whiteboard.com/' + stripUrl = url + 'auto%s.html' + imageSearch = compile(r'', IGNORECASE) + prevSearch = compile(r' previous', IGNORECASE) + help = 'Index format: twb or wb + n wg. twb1000' + + class TheWotch(_BasicScraper): url = 'http://www.thewotch.com/' stripUrl = url + '?date=%s' @@ -101,6 +152,16 @@ class ThisIsIndexed(_BasicScraper): help = 'Index format: number' +class ThreePanelSoul(_BasicScraper): + url = 'http://threepanelsoul.com/' + rurl = escape(url) + stripUrl = url + '%s/' + firstStripUrl = stripUrl % '2006/05/11/a-test-comic' + imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev")) + help = 'Index format: yyyy/mm/dd/stripname' + + class ThunderAndLightning(_BasicScraper): url = 'http://www.talcomic.com/wp/' rurl = escape(url) @@ -137,68 +198,6 @@ class ToonHole(_BasicScraper): return url in (self.stripUrl % "2013/03/if-game-of-thrones-was-animated",) -class TwoLumps(_BasicScraper): - url = 'http://www.twolumps.net/' - stripUrl = url + 'd/%s.html' - imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)', after="prev")) - help = 'Index format: yyyymmdd' - - -class TheWhiteboard(_BasicScraper): - url = 'http://www.the-whiteboard.com/' - stripUrl = url + 'auto%s.html' - imageSearch = compile(r'', IGNORECASE) - prevSearch = compile(r' previous', IGNORECASE) - help = 'Index format: twb or wb + n wg. twb1000' - - -class TheOuterQuarter(_BasicScraper): - url = 'http://theouterquarter.com/' - rurl = escape(url) - stripUrl = url + 'comic/%s' - firstStripUrl = stripUrl % 'oq-the-first-take/4' - imageSearch = compile(r'') - starter = indirectStarter(url, compile(tagre("a", "href", r'([^"]+)', after='class="timestamp"'))) - adult = True - - indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl)) - - def getComicStrip(self, url, data): - """The comic strip image is in a separate page.""" - pageUrl = self.fetchUrl(url, data, self.indirectImageSearch) - pageData = self.getPage(pageUrl) - return super(TheThinHLine, self).getComicStrip(pageUrl, pageData) - - @classmethod - def namer(cls, imageUrl, pageUrl): - """Use page URL sequence which is apparently increasing.""" - num = pageUrl.split('/')[-1] - ext = imageUrl.rsplit('.', 1)[1] - return "thethinhline-%s.%s" % (num, ext) - - -class ThreePanelSoul(_BasicScraper): - url = 'http://threepanelsoul.com/' - rurl = escape(url) - stripUrl = url + '%s/' - firstStripUrl = stripUrl % '2006/05/11/a-test-comic' - imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="prev")) - help = 'Index format: yyyy/mm/dd/stripname' - - class TracyAndTristan(_BasicScraper): url = 'http://tandt.thecomicseries.com/' rurl = escape(url) @@ -214,6 +213,15 @@ class TwoGuysAndGuy(_BasicScraper): stripUrl = url + 'archives/%s' firstStripUrl = stripUrl % '4' imageSearch = compile(tagre('img', 'src', r'(%scomics/\d{4}-\d{2}-\d{2}[^"]*)' % rurl)) - prevSearch = compile(tagre('a', 'href', r'(%sarchives/\d+)' % rurl, after='title="Previous"')) + prevSearch = compile(tagre('a', 'href', r'(%sarchives/\d+)' % rurl, + after='title="Previous"')) help = 'Index format: number' adult = True + + +class TwoLumps(_BasicScraper): + url = 'http://www.twolumps.net/' + stripUrl = url + 'd/%s.html' + imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)', after="prev")) + help = 'Index format: yyyymmdd' diff --git a/dosagelib/plugins/u.py b/dosagelib/plugins/u.py index 4e2419f22..d0da70b5b 100644 --- a/dosagelib/plugins/u.py +++ b/dosagelib/plugins/u.py @@ -1,12 +1,15 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher +from __future__ import absolute_import, division, print_function from re import compile, escape from ..scraper import _BasicScraper -from ..helpers import bounceStarter, indirectStarter -from ..util import getQueryParams, tagre +from ..helpers import indirectStarter +from ..util import tagre + class Underling(_BasicScraper): url = 'http://underlingcomic.com/' @@ -14,7 +17,8 @@ class Underling(_BasicScraper): rurl = escape(url) firstStripUrl = stripUrl + 'page-one/' imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]*)' % rurl)) - prevSearch = compile(tagre("a", "href", r'([^"]+)', after = r'class="[^"]*navi-prev')) + prevSearch = compile(tagre("a", "href", r'([^"]+)', + after=r'class="[^"]*navi-prev')) help = 'Index format: nnn' @@ -45,26 +49,12 @@ class Unsounded(_BasicScraper): rurl = escape(url) imageSearch = compile(tagre("img", "src", r'(pageart/[^"]*)')) prevSearch = compile(tagre("a", "href", r'([^"]*)', after='class="back')) - starter = indirectStarter(url, - compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) + - tagre("img", "src", r"%simages/newpages\.png" % rurl))) + starter = indirectStarter( + url, compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) + + tagre("img", "src", r"%simages/newpages\.png" % rurl))) help = 'Index format: chapter-number' def getIndexStripUrl(self, index): """Get comic strip URL from index.""" chapter, num = index.split('-') return self.stripUrl % (chapter, chapter, num) - - -# XXX disallowed by robots.txt -class _UserFriendly(_BasicScraper): - url = 'http://ars.userfriendly.org/cartoons/?mode=classic' - stripUrl = url + '&id=%s' - starter = bounceStarter(url, compile(r'')) - imageSearch = compile(r'Previous Cartoon') - help = 'Index format: yyyymmdd' - - @classmethod - def namer(cls, imageUrl, pageUrl): - return 'uf%s' % (getQueryParams(pageUrl)['id'][0][2:],) diff --git a/dosagelib/plugins/v.py b/dosagelib/plugins/v.py index fcfb49586..f8fffc4f6 100644 --- a/dosagelib/plugins/v.py +++ b/dosagelib/plugins/v.py @@ -1,7 +1,9 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher +from __future__ import absolute_import, division, print_function from re import compile from ..scraper import _BasicScraper @@ -23,28 +25,29 @@ class VGCats(_BasicScraper): firstStripUrl = stripUrl % '0' imageSearch = compile(tagre("img", "src", r'(images/\d{6}\.[^"]+)')) prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + - tagre("img", "src", r"back\.gif")) + tagre("img", "src", r"back\.gif")) help = 'Index format: n (unpadded)' -class VGCatsSuper(VGCats): - name = 'VGCats/Super' - url = 'http://www.vgcats.com/super/' - stripUrl = url + '?strip_id=%s' - - class VGCatsAdventure(VGCats): name = 'VGCats/Adventure' url = 'http://www.vgcats.com/ffxi/' stripUrl = url + '?strip_id=%s' +class VGCatsSuper(VGCats): + name = 'VGCats/Super' + url = 'http://www.vgcats.com/super/' + stripUrl = url + '?strip_id=%s' + + class VictimsOfTheSystem(_BasicScraper): url = 'http://www.votscomic.com/' stripUrl = url + '?id=%s.jpg' firstStripUrl = stripUrl % '070103-002452' imageSearch = compile(tagre("img", "src", r'(comicpro/strips/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(\?id=\d+-\d+\.jpg)') + "Previous") + prevSearch = compile(tagre("a", "href", r'(\?id=\d+-\d+\.jpg)') + + "Previous") help = 'Index format: nnn-nnn' @@ -52,7 +55,8 @@ class ViiviJaWagner(_BasicScraper): url = 'http://www.hs.fi/viivijawagner/' stripUrl = None imageSearch = compile(tagre("img", "src", r'(http://hs\d+\.snstatic\.fi/webkuva/sarjis/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(/viivijawagner/[^"]+)', before="prev-cm")) + prevSearch = compile(tagre("a", "href", r'(/viivijawagner/[^"]+)', + before="prev-cm")) help = 'Index format: none' lang = 'fi' diff --git a/dosagelib/plugins/w.py b/dosagelib/plugins/w.py index b39771764..f164bc7d5 100644 --- a/dosagelib/plugins/w.py +++ b/dosagelib/plugins/w.py @@ -1,7 +1,9 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher +from __future__ import absolute_import, division, print_function from re import compile, escape, IGNORECASE from ..scraper import _BasicScraper @@ -24,7 +26,8 @@ class WastedTalent(_BasicScraper): stripUrl = url + 'comic/%s' firstStripUrl = stripUrl % 'anime-crack' imageSearch = compile(tagre("img", "src", r'(http://www\.wastedtalent\.ca/sites/default/files/imagecache/comic_full/comics/\d+/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)', after="comic_prev")) + prevSearch = compile(tagre("a", "href", r'(/comic/[^"]+)', + after="comic_prev")) help = 'Index format: stripname' @@ -50,7 +53,8 @@ class WebDesignerCOTW(_BasicScraper): compile(tagre("img", "src", r'(http://netdna\.webdesignerdepot\.com/uploads/comics/\d+\.[^"]+)')), ) multipleImagesPerStrip = True - prevSearch = compile(tagre("link", "href", r"(%s\d+/\d+/[^']+)" % rurl, before='prev', quote="'")) + prevSearch = compile(tagre("link", "href", r"(%s\d+/\d+/[^']+)" % rurl, + before='prev', quote="'")) help = 'Index format: yyyy/mm/stripname' def shouldSkipUrl(self, url, data): @@ -78,8 +82,10 @@ class Weregeek(_BasicScraper): rurl = escape(url) stripUrl = url + '%s/' firstStripUrl = stripUrl % '2006/11/27/' - imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'((%s)?(/)?\d+/\d+/\d+/)'% rurl)+'\s*'+ tagre('img', 'src', '[^"]*previous_day.gif')) + imageSearch = compile(tagre("img", "src", + r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) + prevSearch = compile(tagre("a", "href", r'((%s)?/?\d+/\d+/\d+/)' % rurl) + + '\s*' + tagre('img', 'src', '[^"]*previous_day.gif')) help = 'Index format: yyyy/mm/dd' @@ -108,7 +114,8 @@ class Whomp(_BasicScraper): stripUrl = url + '%s/' firstStripUrl = stripUrl % '2010/06/14/06142010' imageSearch = compile(tagre("img", "src", r'(%scomics/\d+-\d+-\d+[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, after="navi-prev")) + prevSearch = compile(tagre("a", "href", r'(%s\d+/\d+/\d+/[^"]+)' % rurl, + after="navi-prev")) help = 'Index format: yyyy/mm/dd/stripname' @@ -118,7 +125,8 @@ class WhyTheLongFace(_BasicScraper): url = baseUrl + 'wtlf200709.html' stripUrl = baseUrl + 'wtlf%s.html' firstStripUrl = stripUrl % '200306' - imageSearch = compile(r'