From bb1f20d867d93e04c457cbcd5541fe1d643cfbe0 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Sat, 2 Apr 2016 00:14:31 +0200 Subject: [PATCH] Remove make_scraper for most WordPress comics. - Dropped KatzenfutterGeleespritzer, because robots.txt. - Move all WordPress/ComicPress scrapers into alphabetical files. - Move _WordPressScraper & _ComicPress scraper into common.py. - Some smaller PEP8 fixes. --- dosagelib/plugins/a.py | 38 +++++++++++++----- dosagelib/plugins/b.py | 30 +++++++++++++++ dosagelib/plugins/c.py | 30 +++++++++++++-- dosagelib/plugins/common.py | 22 +++++++++++ dosagelib/plugins/f.py | 11 ++++-- dosagelib/plugins/h.py | 32 ++++++++++++---- dosagelib/plugins/i.py | 13 ++++++- dosagelib/plugins/k.py | 1 + dosagelib/plugins/m.py | 20 ++++++++++ dosagelib/plugins/n.py | 21 +++++++++- dosagelib/plugins/num.py | 13 +++++-- dosagelib/plugins/o.py | 17 +++++++-- dosagelib/plugins/p.py | 6 +++ dosagelib/plugins/s.py | 22 +++++++++++ dosagelib/plugins/t.py | 13 +++++++ dosagelib/plugins/wordpress.py | 70 +++------------------------------- dosagelib/plugins/y.py | 12 ++++-- 17 files changed, 268 insertions(+), 103 deletions(-) create mode 100644 dosagelib/plugins/common.py diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index bfc6f6e71..f74886204 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -8,18 +8,22 @@ from re import compile, escape, MULTILINE from ..util import tagre from ..scraper import _BasicScraper, _ParserScraper from ..helpers import regexNamer, bounceStarter, indirectStarter +from .common import _WordPressScraper, _ComicPressScraper class AbstruseGoose(_BasicScraper): url = 'http://abstrusegoose.com/' rurl = escape(url) - starter = bounceStarter(url, compile(tagre('a', 'href', r'(%s\d+)' % rurl)+"Next »")) + starter = bounceStarter( + url, compile(tagre('a', 'href', r'(%s\d+)' % rurl) + "Next »")) stripUrl = url + '%s' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre('img', 'src', r'(http://abstrusegoose\.com/strips/[^<>"]+)')) - prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'« Previous') - nextSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + r'Next »') + prevSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + + r'« Previous') + nextSearch = compile(tagre('a', 'href', r'(%s\d+)' % rurl) + + r'Next »') help = 'Index format: n (unpadded)' textSearch = compile(tagre("img", "title", r'([^"]+)')) @@ -122,7 +126,7 @@ class AirForceBlues(_BasicScraper): class ALessonIsLearned(_BasicScraper): url = 'http://www.alessonislearned.com/' prevSearch = compile(tagre("a", "href", r"(index\.php\?comic=\d+)", - quote="'")+r"[^>]+previous") + quote="'") + r"[^>]+previous") starter = indirectStarter(url, prevSearch) stripUrl = url + 'index.php?comic=%s' firstStripUrl = stripUrl % '1' @@ -130,6 +134,12 @@ class ALessonIsLearned(_BasicScraper): help = 'Index format: nnn' +class Alice(_ComicPressScraper): + url = 'http://www.alicecomics.com/' + starter = indirectStarter('http://www.alicecomics.com/', + '//a[text()="Latest Alice!"]') + + class AlienLovesPredator(_BasicScraper): url = 'http://alienlovespredator.com/' stripUrl = url + '%s/' @@ -220,12 +230,17 @@ class AmazingSuperPowers(_BasicScraper): ) +class Amya(_WordPressScraper): + url = 'http://www.amyachronicles.com/' + + class Angband(_BasicScraper): url = 'http://angband.calamarain.net/' stripUrl = url + 'view.php?date=%s' firstStripUrl = stripUrl % '2005-12-30' imageSearch = compile(tagre("img", "src", r'(comics/Scroll[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(view\.php\?date\=[^"]+)')+"Previous") + prevSearch = compile(tagre("a", "href", r'(view\.php\?date\=[^"]+)') + + "Previous") help = 'Index format: yyyy-mm-dd' @@ -233,7 +248,7 @@ class Angels2200(_BasicScraper): url = 'http://www.janahoffmann.com/angels/' stripUrl = url + '%s' imageSearch = compile(tagre("img", "src", r"(http://www\.janahoffmann\.com/angels/comics/[^']+)", quote="'")) - prevSearch = compile(tagre("a", "href", r'([^"]+)')+"« Previous") + prevSearch = compile(tagre("a", "href", r'([^"]+)') + "« Previous") help = 'Index format: yyyy/mm/dd/part--comic-' @@ -290,7 +305,8 @@ class ASkeweredParadise(_BasicScraper): stripUrl = url + 'comic/%s' firstStripUrl = stripUrl % '001' imageSearch = compile(tagre("img", "src", r'(http://aspcomics\.net/sites/default/files[^"]*/asp\d+\.jpg)[^"]+')) - prevSearch = compile(tagre("a", "href", "(/comic/\d+)")+r"[^>]+Previous") + prevSearch = compile(tagre("a", "href", "(/comic/\d+)") + + r"[^>]+Previous") help = 'Index format: nnn' @@ -306,8 +322,8 @@ class ASofterWorld(_ParserScraper): class AstronomyPOTD(_BasicScraper): baseUrl = 'http://antwrp.gsfc.nasa.gov/apod/' url = baseUrl + 'astropix.html' - starter = bounceStarter(url, - compile(tagre("a", "href", r'(ap\d{6}\.html)') + ">")) + starter = bounceStarter( + url, compile(tagre("a", "href", r'(ap\d{6}\.html)') + ">")) stripUrl = baseUrl + 'ap%s.html' firstStripUrl = stripUrl % '061012' imageSearch = compile(tagre("a", "href", r'(image/\d{4}/[^"]+)')) @@ -328,3 +344,7 @@ class AstronomyPOTD(_BasicScraper): def namer(cls, imageUrl, pageUrl): return '%s-%s' % (pageUrl.split('/')[-1].split('.')[0][2:], imageUrl.split('/')[-1].split('.')[0]) + + +class AxeCop(_WordPressScraper): + url = 'http://axecop.com/comic/season-two/' diff --git a/dosagelib/plugins/b.py b/dosagelib/plugins/b.py index dee2532db..b45c36db5 100644 --- a/dosagelib/plugins/b.py +++ b/dosagelib/plugins/b.py @@ -4,11 +4,13 @@ # Copyright (C) 2015-2016 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function + from re import compile, escape from ..util import tagre, getPageContent from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter +from .common import _WordPressScraper, _ComicPressScraper class BackwaterPlanet(_BasicScraper): @@ -38,6 +40,12 @@ class BadMachinery(_BasicScraper): help = 'Index format: yyyymmdd' +class Bardsworth(_WordPressScraper): + url = 'http://www.bardsworth.com/' + starter = indirectStarter('http://www.bardsworth.com/', + '//a[@rel="bookmark"]') + + class Baroquen(_BasicScraper): url = 'http://www.baroquencomics.com/' rurl = escape(url) @@ -162,6 +170,11 @@ class Blip(_BasicScraper): return prevUrl.replace("www.blipcomic.com", "blipcomic.com") +class BloodBound(_WordPressScraper): + url = 'http://bloodboundcomic.com/' + firstStripUrl = 'http://bloodboundcomic.com/comic/06112006/' + + class BloomingFaeries(_BasicScraper): adult = True url = 'http://www.bloomingfaeries.com/' @@ -255,6 +268,10 @@ class BoyOnAStickAndSlither(_BasicScraper): return pageUrl.rsplit('/')[-1] +class BratHalla(_WordPressScraper): + url = 'http://brat-halla.com/' + + class BrentalFloss(_BasicScraper): url = 'http://brentalflossthecomic.com/' stripUrl = url + '?id=%s' @@ -313,6 +330,19 @@ class Brink(_BasicScraper): help = 'Index format: number' +class BroodHollow(_WordPressScraper): + url = 'http://broodhollow.chainsawsuit.com/' + firstStripUrl = 'http://broodhollow.chainsawsuit.com/page/2012/10/06/book-1-curious-little-thing' + + +class Buni(_WordPressScraper): + url = 'http://www.bunicomic.com/' + + +class BusinessCat(_ComicPressScraper): + url = 'http://www.businesscat.happyjar.com/' + + class ButtercupFestival(_ParserScraper): url = 'http://www.buttercupfestival.com/' stripUrl = url + '%s.htm' diff --git a/dosagelib/plugins/c.py b/dosagelib/plugins/c.py index ac7cb3e59..79a3ec3ba 100755 --- a/dosagelib/plugins/c.py +++ b/dosagelib/plugins/c.py @@ -10,7 +10,7 @@ from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper from ..helpers import bounceStarter, indirectStarter from ..util import tagre -from .wordpress import _WordpressScraper +from .common import _WordPressScraper, _ComicPressScraper class Caggage(_BasicScraper): @@ -19,7 +19,8 @@ class Caggage(_BasicScraper): stripUrl = url + 'archives/%s' firstStripUrl = stripUrl % '77' imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, after="prev")) + prevSearch = compile(tagre("a", "href", r'(%sarchives/\d+)' % rurl, + after="prev")) help = 'Index format: number' @@ -102,7 +103,7 @@ class CatAndGirl(_BasicScraper): stripUrl = url + '?p=%s' firstStripUrl = stripUrl % '1602' imageSearch = compile(tagre("img", "src", r'(%sarchive/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'([^"]+)')+r"[^<]+Previous") + prevSearch = compile(tagre("a", "href", r'([^"]+)') + r"[^<]+Previous") help = 'Index format: n (unpadded)' def shouldSkipUrl(self, url, data): @@ -112,11 +113,19 @@ class CatAndGirl(_BasicScraper): ) -class CatNine(_WordpressScraper): +class Catena(_WordPressScraper): + url = 'http://catenamanor.com/' + + +class CatNine(_WordPressScraper): url = 'http://cat-nine.net' firstStripUrl = 'http://cat-nine.net/comic/episode-1/first-day-for-everything/' +class CatsAndCameras(_WordPressScraper): + url = 'http://catsncameras.com/' + + class CatVersusHuman(_ParserScraper): url = 'http://www.catversushuman.com' multipleImagesPerStrip = True @@ -277,6 +286,19 @@ class CorydonCafe(_ParserScraper): return pageUrl.split('/')[-1].split('.')[0] +class CourtingDisaster(_WordPressScraper): + url = 'http://www.courting-disaster.com/' + firstStripUrl = 'http://www.courting-disaster.com/comic/courting-disaster-17/' + + +class CowboyJedi(_WordPressScraper): + url = 'http://www.cowboyjedi.com/' + + +class CraftedFables(_ComicPressScraper): + url = 'http://www.caf-fiends.net/comicpress/' + + class CrapIDrewOnMyLunchBreak(_BasicScraper): url = 'http://crap.jinwicked.com/' stripUrl = url + '%s/' diff --git a/dosagelib/plugins/common.py b/dosagelib/plugins/common.py new file mode 100644 index 000000000..bcbda63c3 --- /dev/null +++ b/dosagelib/plugins/common.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs +# Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher + +from __future__ import absolute_import, division, print_function + +from ..scraper import _ParserScraper + +# Common base classes for comics with the same structure (same hosting +# software, for example) go here. Since those are shared by many modules, +# please don't use lists of expression, as that makes it hard to track which +# expression is for which comics. + + +class _WordPressScraper(_ParserScraper): + imageSearch = '//div[@id="comic"]//img' + prevSearch = "//a[contains(concat(' ', @class, ' '), ' comic-nav-previous ')]" + + +class _ComicPressScraper(_WordPressScraper): + prevSearch = "//a[contains(concat(' ', @class, ' '), ' navi-prev-in ')]" diff --git a/dosagelib/plugins/f.py b/dosagelib/plugins/f.py index abcdcba7d..a16e62ec7 100755 --- a/dosagelib/plugins/f.py +++ b/dosagelib/plugins/f.py @@ -9,6 +9,7 @@ from re import compile, escape, IGNORECASE from ..util import tagre from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter +from .common import _WordPressScraper class FalconTwin(_BasicScraper): @@ -116,7 +117,7 @@ class FonFlatter(_BasicScraper): self.stripUrl % "2006/09/21/danke", self.stripUrl % "2006/08/23/zgf-zuweilen-gestellte-fragen", self.stripUrl % "2005/10/19/naq-never-asked-questions", - ) + ) class ForLackOfABetterComic(_BasicScraper): @@ -129,6 +130,10 @@ class ForLackOfABetterComic(_BasicScraper): help = 'Index format: number' +class FowlLanguage(_WordPressScraper): + url = 'http://www.fowllanguagecomics.com/' + + class Fragile(_ParserScraper): url = 'http://www.fragilestory.com/' imageSearch = '//div[@id="comic_strip"]/a[@class="nobg"]/img' @@ -153,10 +158,10 @@ class FredoAndPidjin(_BasicScraper): compile(tagre('img', 'src', '(http://cdn\.pidjin\.net/wp-content/uploads/old/[^"]+\.[a-z]+)')), ) multipleImagesPerStrip = True - prevSearch = compile(tagre('a', 'href', '([^"]+)')+"Prev") + prevSearch = compile(tagre('a', 'href', '([^"]+)') + "Prev") starter = indirectStarter( url, - compile(tagre('a', 'href', "("+url+r'\d\d\d\d/\d\d/\d\d/[^"]+/)'))) + compile(tagre('a', 'href', "(" + url + r'\d\d\d\d/\d\d/\d\d/[^"]+/)'))) class Freefall(_BasicScraper): diff --git a/dosagelib/plugins/h.py b/dosagelib/plugins/h.py index 07bbe8682..3c4951876 100644 --- a/dosagelib/plugins/h.py +++ b/dosagelib/plugins/h.py @@ -1,10 +1,15 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- +# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher + +from __future__ import absolute_import, division, print_function from re import compile, escape -from ..scraper import _BasicScraper, _ParserScraper +from ..scraper import _BasicScraper from ..util import tagre from ..helpers import bounceStarter +from .common import _WordPressScraper class HagarTheHorrible(_BasicScraper): @@ -28,17 +33,22 @@ class HagarTheHorrible(_BasicScraper): return starturl +class HappyJar(_WordPressScraper): + url = 'http://www.happyjar.com/' + + class HarkAVagrant(_BasicScraper): url = 'http://www.harkavagrant.com/' rurl = escape(url) - starter = bounceStarter(url, - compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) + - tagre("img", "src", "buttonnext.png"))) + starter = bounceStarter( + url, compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) + + tagre("img", "src", "buttonnext.png"))) stripUrl = url + 'index.php?id=%s' firstStripUrl = stripUrl % '1' - imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl, after='BORDER')) + imageSearch = compile(tagre("img", "src", r'(%s[^"]+)' % rurl, + after='BORDER')) prevSearch = compile(tagre("a", "href", r'(%sindex\.php\?id=\d+)' % rurl) + - tagre("img", "src", "buttonprevious.png")) + tagre("img", "src", "buttonprevious.png")) help = 'Index format: number' @classmethod @@ -48,10 +58,16 @@ class HarkAVagrant(_BasicScraper): return '%s-%s' % (num, filename) +class Hipsters(_WordPressScraper): + url = 'http://www.hipsters-comic.com/' + firstStripUrl = 'http://www.hipsters-comic.com/comic/hip01/' + + class HorribleVille(_BasicScraper): url = 'http://horribleville.com/' stripUrl = url + 'd/%s.html' firstStripUrl = stripUrl % '20051220' imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)')) - prevSearch = compile(tagre("a", "href", r'(/d/[^"]+)') + tagre("img", "src", r'/images/previous\.png')) + prevSearch = compile(tagre("a", "href", r'(/d/[^"]+)') + + tagre("img", "src", r'/images/previous\.png')) help = 'Index format: yyyymmdd' diff --git a/dosagelib/plugins/i.py b/dosagelib/plugins/i.py index 88636ea70..99e7171ec 100644 --- a/dosagelib/plugins/i.py +++ b/dosagelib/plugins/i.py @@ -1,10 +1,13 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher +from __future__ import absolute_import, division, print_function from re import compile, escape from ..scraper import _BasicScraper from ..util import tagre +from .common import _WordPressScraper class IAmArg(_BasicScraper): @@ -27,6 +30,10 @@ class ICanBarelyDraw(_BasicScraper): help = 'Index format: number' +class IDreamOfAJeanieBottle(_WordPressScraper): + url = 'http://jeaniebottle.com/' + + class InternetWebcomic(_BasicScraper): url = 'http://www.internet-webcomic.com/' rurl = escape(url) @@ -44,3 +51,7 @@ class IrregularWebcomic(_BasicScraper): imageSearch = compile(r'') prevSearch = compile(r'Previous ') help = 'Index format: nnn' + + +class ItsWalky(_WordPressScraper): + url = 'http://www.itswalky.com/' diff --git a/dosagelib/plugins/k.py b/dosagelib/plugins/k.py index 26c3a95cf..c86d1dcc3 100644 --- a/dosagelib/plugins/k.py +++ b/dosagelib/plugins/k.py @@ -3,6 +3,7 @@ # Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2015-2016 Tobias Gruetzmacher +from __future__ import absolute_import, division, print_function from re import compile, escape, IGNORECASE from ..scraper import _BasicScraper from ..util import tagre diff --git a/dosagelib/plugins/m.py b/dosagelib/plugins/m.py index bb4d89f4f..019091087 100755 --- a/dosagelib/plugins/m.py +++ b/dosagelib/plugins/m.py @@ -4,10 +4,12 @@ # Copyright (C) 2015-2016 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function + from re import compile, escape, IGNORECASE from ..scraper import _BasicScraper, _ParserScraper from ..util import tagre +from .common import _WordPressScraper class MacHall(_BasicScraper): @@ -95,6 +97,10 @@ class MaxOveracts(_ParserScraper): help = 'Index format: nnn' +class Meek(_WordPressScraper): + url = 'http://www.meekcomic.com/' + + class MegaTokyo(_BasicScraper): url = 'http://megatokyo.com/' stripUrl = url + 'strip/%s' @@ -104,6 +110,14 @@ class MegaTokyo(_BasicScraper): help = 'Index format: nnnn' +class Meiosis(_WordPressScraper): + url = 'http://meiosiswebcomic.com/' + + +class Melonpool(_WordPressScraper): + url = 'http://www.melonpool.com/' + + class MenageA3(_BasicScraper): adult = True url = 'http://www.ma3comic.com/' @@ -124,6 +138,12 @@ class Misfile(_BasicScraper): help = 'Index format: yyyy-mm-dd' +class MistyTheMouse(_WordPressScraper): + url = 'http://www.mistythemouse.com/' + prevSearch = '//a[@rel="prev"]' + firstStripUrl = 'http://www.mistythemouse.com/?p=12' + + class MonsieurLeChien(_BasicScraper): url = 'http://www.monsieur-le-chien.fr/' stripUrl = url + 'index.php?planche=%s' diff --git a/dosagelib/plugins/n.py b/dosagelib/plugins/n.py index 885b8c4f9..124fce1b2 100644 --- a/dosagelib/plugins/n.py +++ b/dosagelib/plugins/n.py @@ -8,6 +8,7 @@ from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter from ..util import tagre +from .common import _WordPressScraper class Namesake(_BasicScraper): @@ -30,7 +31,7 @@ class NamirDeiter(_BasicScraper): stripUrl = url + 'comics/index.php?date=%s' firstStripUrl = stripUrl % '19991128' imageSearch = compile(tagre("img", "src", r"'?(%scomics/\d+\.jpg)'?" % rurl, quote="")) - prevSearch = compile(tagre("a", "href", r'(%scomics/index\.php\?date=\d+)' % rurl, quote="'")+"Previous") + prevSearch = compile(tagre("a", "href", r'(%scomics/index\.php\?date=\d+)' % rurl, quote="'") + "Previous") help = 'Index format: yyyymmdd' @@ -50,6 +51,11 @@ class NatalieDee(_BasicScraper): return '%s-%s' % (date, filename) +class Nedroid(_WordPressScraper): + url = 'http://nedroid.com/' + prevSearch = '//a[@rel="prev"]' + + class NekkoAndJoruba(_BasicScraper): url = 'http://www.nekkoandjoruba.com/' stripUrl = url + '?p=%s' @@ -76,6 +82,11 @@ class NeoEarth(_BasicScraper): help = 'Index format: yyyy-mm-dd' +class NerfNow(_WordPressScraper): + url = 'https://www.nerfnow.com/' + prevSearch = '//li[@id="nav_previous"]/a' + + class NewAdventuresOfBobbin(_BasicScraper): url = 'http://www.bobbin-comic.com/bobbin_strips/' imageSearch = compile(tagre("a", "href", r'(\d+\.gif)')) @@ -103,6 +114,11 @@ class NichtLustig(_BasicScraper): url, compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)'))) +class Nicky510(_WordPressScraper): + url = 'http://www.nickyitis.com/' + prevSearch = "//a[contains(concat(' ', @class, ' '), ' navi-prev ')]" + + class Nimona(_BasicScraper): url = 'http://gingerhaze.com/nimona/' stripUrl = url + '%s/' @@ -146,7 +162,8 @@ class NotInventedHere(_BasicScraper): stripUrl = url + '%s/' firstStripUrl = stripUrl % 'on/2009-9-21' imageSearch = compile(tagre("img", "src", r'(http://thiswas.notinventedhe.re/on/\d+-\d+-\d+)')) - prevSearch = compile(tagre("a", "href", r'(/on/\d+-\d+-\d+)')+'\s*Previous') + prevSearch = compile(tagre("a", "href", r'(/on/\d+-\d+-\d+)') + + '\s*Previous') help = 'Index format: yyyy-mm-dd' diff --git a/dosagelib/plugins/num.py b/dosagelib/plugins/num.py index e16600982..b381408be 100644 --- a/dosagelib/plugins/num.py +++ b/dosagelib/plugins/num.py @@ -1,8 +1,13 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher -from re import compile, escape +from __future__ import absolute_import, division, print_function -from ..util import tagre -from ..scraper import _BasicScraper +from .common import _WordPressScraper + + +class NineteenSeventySeven(_WordPressScraper): + name = '1977' + url = 'http://1977thecomic.com/' diff --git a/dosagelib/plugins/o.py b/dosagelib/plugins/o.py index 743055aa5..4f76ad230 100644 --- a/dosagelib/plugins/o.py +++ b/dosagelib/plugins/o.py @@ -4,10 +4,13 @@ # Copyright (C) 2015-2016 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function + from re import compile, escape + from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter from ..util import tagre +from .common import _WordPressScraper class OctopusPie(_ParserScraper): @@ -26,7 +29,8 @@ class OddFish(_BasicScraper): stripUrl = url + '%s/' firstStripUrl = stripUrl % 'tv-tentacles' imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) - prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="navi-prev")) + prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, + after="navi-prev")) help = 'Index format: stripname' @@ -65,7 +69,8 @@ class OkCancel(_BasicScraper): stripUrl = url + 'comic/%s.html' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", r'(%sstrips/okcancel\d{8}\.gif)' % rurl)) - prevSearch = compile(tagre("div", "class", "previous") + tagre("a", "href", r'(%scomic/\d{1,4}\.html)' % rurl)) + prevSearch = compile(tagre("div", "class", "previous") + + tagre("a", "href", r'(%scomic/\d{1,4}\.html)' % rurl)) starter = indirectStarter(url, prevSearch) help = 'Index format: yyyymmdd' @@ -85,10 +90,16 @@ class OneQuestion(_BasicScraper): stripUrl = url + 'comic.php?strip_id=%s' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", r'((?:\.\./)?istrip_files/strips/\d+\.\w{3,4})')) - prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') + tagre("img", "src", r'img/arrow_prev\.jpg')) + prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') + + tagre("img", "src", r'img/arrow_prev\.jpg')) help = 'Index format: n (unpadded)' +class OnTheEdge(_WordPressScraper): + url = 'http://ontheedgecomics.com/' + firstStripUrl = 'http://ontheedgecomics.com/comic/ote0001/' + + class OnTheFastrack(_BasicScraper): url = 'http://onthefastrack.com/' stripUrl = url + 'comics/%s' diff --git a/dosagelib/plugins/p.py b/dosagelib/plugins/p.py index eccddea41..f617b3025 100755 --- a/dosagelib/plugins/p.py +++ b/dosagelib/plugins/p.py @@ -8,6 +8,12 @@ from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper from ..helpers import bounceStarter, queryNamer, indirectStarter from ..util import tagre +from .common import _WordPressScraper + + +class PandyLand(_WordPressScraper): + url = 'http://pandyland.net/' + firstStripUrl = 'http://pandyland.net/1/' class ParadigmShift(_BasicScraper): diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py index 7d0635004..973d2065f 100644 --- a/dosagelib/plugins/s.py +++ b/dosagelib/plugins/s.py @@ -10,6 +10,7 @@ from datetime import datetime from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter, bounceStarter from ..util import tagre, getPageContent +from .common import _WordPressScraper class SabrinaOnline(_BasicScraper): @@ -42,6 +43,10 @@ class SafelyEndangered(_BasicScraper): help = 'Index format: yyyy/mm/stripname' +class SailorsunOrg(_WordPressScraper): + url = 'http://sailorsun.org/' + + class SamAndFuzzy(_BasicScraper): url = 'http://www.samandfuzzy.com/' stripUrl = 'http://samandfuzzy.com/%s' @@ -166,6 +171,11 @@ class SexyLosers(_BasicScraper): return index + '-' + title +class Sharksplode(_WordPressScraper): + url = 'http://sharksplode.com/' + textSearch = '//div[@id="comic"]//img/@alt' + + class Sheldon(_BasicScraper): url = 'http://www.sheldoncomics.com/' rurl = escape(url) @@ -237,6 +247,10 @@ class SinFest(_BasicScraper): help = 'Index format: yyyy-mm-dd' +class Sithrah(_WordPressScraper): + url = 'http://sithrah.com/' + + class SkinDeep(_BasicScraper): url = 'http://www.skindeepcomic.com/' stripUrl = url + 'archive/%s/' @@ -271,6 +285,10 @@ class SleeplessDomain(_ParserScraper): return start + "-" + pageUrl.rsplit('/', 1)[-1] +class SlightlyDamned(_WordPressScraper): + url = 'http://www.sdamned.com/' + + class SluggyFreelance(_BasicScraper): url = 'http://www.sluggy.com/' stripUrl = url + 'comics/archives/daily/%s' @@ -445,6 +463,10 @@ class SpareParts(_BasicScraper): help = 'Index format: yyyymmdd' +class SPQRBlues(_WordPressScraper): + url = 'http://spqrblues.com/IV/' + + class StandStillStaySilent(_ParserScraper): url = 'http://www.sssscomic.com/comic.php' rurl = escape(url) diff --git a/dosagelib/plugins/t.py b/dosagelib/plugins/t.py index 7d7c1c27c..c0c064ce0 100755 --- a/dosagelib/plugins/t.py +++ b/dosagelib/plugins/t.py @@ -8,6 +8,7 @@ from re import compile, escape, IGNORECASE from ..scraper import _BasicScraper, _ParserScraper from ..helpers import indirectStarter from ..util import tagre +from .common import _WordPressScraper class TheBrads(_BasicScraper): @@ -30,6 +31,10 @@ class TheDevilsPanties(_BasicScraper): help = 'Index format: number' +class TheDreamlandChronicles(_WordPressScraper): + url = 'http://www.thedreamlandchronicles.com/' + + class TheGamerCat(_ParserScraper): url = "http://www.thegamercat.com/" stripUrl = url + "comic/%s/" @@ -40,6 +45,10 @@ class TheGamerCat(_ParserScraper): help = 'Index format: stripname' +class TheGentlemansArmchair(_WordPressScraper): + url = 'http://thegentlemansarmchair.com/' + + class TheLandscaper(_BasicScraper): url = 'http://landscaper.visual-assault.net/comic/latest' rurl = escape(url) @@ -52,6 +61,10 @@ class TheLandscaper(_BasicScraper): help = 'Index format: name' +class TheMelvinChronicles(_WordPressScraper): + url = 'http://melvin.jeaniebottle.com/' + + class TheNoob(_BasicScraper): url = 'http://www.thenoobcomic.com/index.php' stripUrl = url + '?pos=%s' diff --git a/dosagelib/plugins/wordpress.py b/dosagelib/plugins/wordpress.py index 6fb431bf1..2374c9d4e 100644 --- a/dosagelib/plugins/wordpress.py +++ b/dosagelib/plugins/wordpress.py @@ -1,79 +1,19 @@ # -*- coding: utf-8 -*- -from dosagelib.helpers import indirectStarter -from ..scraper import make_scraper, _ParserScraper +from ..helpers import indirectStarter +from ..scraper import make_scraper +from .common import _WordPressScraper -class _WordpressScraper(_ParserScraper): - imageSearch = ('//div[@id="comic"]//img', - '//div[@class="webcomic-image"]//img') - prevSearch = ("//a[contains(concat(' ', text(), ' '), ' Prev ')]", - "//a[contains(concat(' ', text(), ' '), ' Previous ')]", - "//a[contains(concat(' ', @class, ' '), ' navi-prev ')]", - "//a[contains(concat(' ', @class, ' '), ' navi-prev-in ')]", - "//a[contains(concat(' ', @class, ' '), ' navi-previous ')]", - "//a[contains(concat(' ', @class, ' '), ' previous-webcomic-link ')]") - - -def add(name, url, firstUrl=None, starter=None, textSearch=None, lang=None): +def add(name, url, starter=None): attrs = dict( name=name, url=url ) - if lang: - attrs['lang'] = lang - if firstUrl: - attrs['firstUrl'] = url + firstUrl if starter: attrs['starter'] = starter - if textSearch: - attrs['textSearch'] = textSearch - globals()[name] = make_scraper(name, _WordpressScraper, **attrs) + globals()[name] = make_scraper(name, _WordPressScraper, **attrs) -class Amya(_WordpressScraper): - url = 'http://www.amyachronicles.com/' - - -add('1997', 'http://1977thecomic.com/') -add('Alice', 'http://www.alicecomics.com/', - starter=indirectStarter('http://www.alicecomics.com/', '//a[text()="Latest Alice!"]')) -add('AxeCop', 'http://axecop.com/comic/season-two/') -add('Bardsworth', 'http://www.bardsworth.com/') -add('BloodBound', 'http://bloodboundcomic.com/', 'comic/06112006/') -add('BratHalla', 'http://brat-halla.com/') -add('BroodHollow', 'http://broodhollow.chainsawsuit.com/', 'page/2012/10/06/book-1-curious-little-thing') -add('Buni', 'http://www.bunicomic.com/') -add('BusinessCat', 'http://www.businesscat.happyjar.com/') -add('Catena', 'http://catenamanor.com/') -add('CatsAndCameras', 'http://catsncameras.com/') -add('CraftedFables', 'http://www.caf-fiends.net/comicpress/') -add('CourtingDisaster', 'http://www.courting-disaster.com/', 'comic/courting-disaster-17/') -add('CowboyJedi', 'http://www.cowboyjedi.com/') -add('FowlLanguage', 'http://www.fowllanguagecomics.com/') -add('HappyJar', 'http://www.happyjar.com/') -add('Hipsters', 'http://www.hipsters-comic.com/', 'comic/hip01/') -add('IDreamOfAJeanieBottle', 'http://jeaniebottle.com/') -add('ItsWalky', 'http://www.itswalky.com/') -add('KatzenfutterGeleespritzer', 'http://www.katzenfuttergeleespritzer.de/', 'comics/gert-grendil/', lang='de') -add('Meek', 'http://www.meekcomic.com/') -add('Meiosis', 'http://meiosiswebcomic.com/') -add('Melonpool', 'http://www.melonpool.com/') -add('MistyTheMouse', 'http://www.mistythemouse.com/') -add('Nedroid', 'http://nedroid.com/') -add('NerfNow', 'https://www.nerfnow.com/') -add('Nicky510', 'http://www.nickyitis.com/') -add('OnTheEdge', 'http://ontheedgecomics.com/', 'comic/ote0001/') -add('PandyLand', 'http://pandyland.net/', '1/') -add('SailorsunOrg', 'http://sailorsun.org/') -add('Sharksplode', 'http://sharksplode.com/', textSearch='//div[@id="comic"]//img/@alt') -add('Sithrah', 'http://sithrah.com/') -add('SlightlyDamned', 'http://www.sdamned.com/') -add('SPQRBlues', 'http://spqrblues.com/IV/') -add('TheDreamlandChronicles', 'http://www.thedreamlandchronicles.com/') -add('TheGentlemansArmchair', 'http://thegentlemansarmchair.com/') -add('TheMelvinChronicles', 'http://melvin.jeaniebottle.com/') -add('YAFGC', 'http://yafgc.net/') - # all comics on HijiNKS ENSUE for (name, starterXPath) in [ ('HijinksEnsue', '//h4[text()="Read The Latest HijiNKS ENSUE"]/..//a'), diff --git a/dosagelib/plugins/y.py b/dosagelib/plugins/y.py index 9a7e08e3a..edb199741 100644 --- a/dosagelib/plugins/y.py +++ b/dosagelib/plugins/y.py @@ -1,7 +1,11 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher -from re import compile -from ..scraper import _BasicScraper -from ..util import tagre +from __future__ import absolute_import, division, print_function +from .common import _WordPressScraper + + +class YAFGC(_WordPressScraper): + url = 'http://yafgc.net/'