From 0468f2f31aad429350cc90de2f1c39523910070e Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Wed, 13 Apr 2016 20:01:51 +0200 Subject: [PATCH] Refactor: Convert starter to simple method. --- dosagelib/helpers.py | 26 ++++++++-------------- dosagelib/plugins/a.py | 8 +++---- dosagelib/plugins/b.py | 6 ++--- dosagelib/plugins/c.py | 8 +++---- dosagelib/plugins/clonemanga.py | 23 +++++++++++-------- dosagelib/plugins/comicfury.py | 2 +- dosagelib/plugins/creators.py | 9 ++++---- dosagelib/plugins/d.py | 8 +++---- dosagelib/plugins/drunkduck.py | 23 +++++++++++-------- dosagelib/plugins/e.py | 8 +++---- dosagelib/plugins/f.py | 6 ++--- dosagelib/plugins/g.py | 4 ++-- dosagelib/plugins/gocomics.py | 13 +++++------ dosagelib/plugins/h.py | 11 +++++----- dosagelib/plugins/j.py | 2 +- dosagelib/plugins/l.py | 8 +++---- dosagelib/plugins/n.py | 6 ++--- dosagelib/plugins/nuklearpower.py | 5 ++--- dosagelib/plugins/p.py | 20 +++++------------ dosagelib/plugins/r.py | 2 +- dosagelib/plugins/s.py | 28 +++++++++++------------ dosagelib/plugins/smackjeeves.py | 33 +++++++++++++++++----------- dosagelib/plugins/t.py | 15 +++++-------- dosagelib/plugins/u.py | 4 ++-- dosagelib/plugins/w.py | 7 +++--- dosagelib/plugins/webcomicfactory.py | 10 +++------ dosagelib/plugins/wlpcomics.py | 2 +- dosagelib/plugins/wordpress.py | 2 +- dosagelib/plugins/x.py | 2 +- dosagelib/plugins/z.py | 2 +- 30 files changed, 145 insertions(+), 158 deletions(-) mode change 100755 => 100644 dosagelib/plugins/e.py mode change 100755 => 100644 dosagelib/plugins/f.py mode change 100755 => 100644 dosagelib/plugins/p.py mode change 100755 => 100644 dosagelib/plugins/t.py diff --git a/dosagelib/helpers.py b/dosagelib/helpers.py index 2777891c2..67e13f58a 100644 --- a/dosagelib/helpers.py +++ b/dosagelib/helpers.py @@ -30,32 +30,24 @@ def regexNamer(regex, usePageUrl=False): return _namer -def bounceStarter(): +def bounceStarter(self): """Get start URL by "bouncing" back and forth one time. This needs the url and nextSearch properties be defined on the class. """ - @classmethod - def _starter(cls): - """Get bounced start URL.""" - data = cls.getPage(cls.url) - url1 = cls.fetchUrl(cls.url, data, cls.prevSearch) - data = cls.getPage(url1) - return cls.fetchUrl(url1, data, cls.nextSearch) - return _starter + data = self.getPage(self.url) + url1 = self.fetchUrl(self.url, data, self.prevSearch) + data = self.getPage(url1) + return self.fetchUrl(url1, data, self.nextSearch) -def indirectStarter(): +def indirectStarter(self): """Get start URL by indirection. This is useful for comics where the latest comic can't be reached at a stable URL. If the class has an attribute 'startUrl', this page is fetched first, otherwise the page at 'url' is fetched. After that, the attribute 'latestSearch' is used on the page content to find the latest strip.""" - @classmethod - def _starter(cls): - """Get indirect start URL.""" - url = cls.startUrl if hasattr(cls, "startUrl") else cls.url - data = cls.getPage(url) - return cls.fetchUrl(url, data, cls.latestSearch) - return _starter + url = self.startUrl if hasattr(self, "startUrl") else self.url + data = self.getPage(url) + return self.fetchUrl(url, data, self.latestSearch) diff --git a/dosagelib/plugins/a.py b/dosagelib/plugins/a.py index 141cd982b..21021b414 100644 --- a/dosagelib/plugins/a.py +++ b/dosagelib/plugins/a.py @@ -16,7 +16,7 @@ from .common import _WordPressScraper, xpath_class, WP_LATEST_SEARCH class AbstruseGoose(_BasicScraper): url = 'http://abstrusegoose.com/' rurl = escape(url) - starter = bounceStarter() + starter = bounceStarter stripUrl = url + '%s' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre('img', 'src', @@ -122,7 +122,7 @@ class Alice(_WordPressScraper): url = 'http://www.alicecomics.com/' prevSearch = '//a[%s]' % xpath_class('navi-prev-in') latestSearch = '//a[text()="Latest Alice!"]' - starter = indirectStarter() + starter = indirectStarter class AlienLovesPredator(_BasicScraper): @@ -262,7 +262,7 @@ class Ashes(_WordPressScraper): url = 'http://www.flowerlarkstudios.com/comic/prologue/10232009/' firstStripUrl = url latestSearch = WP_LATEST_SEARCH - starter = indirectStarter() + starter = indirectStarter class ASkeweredParadise(_BasicScraper): @@ -287,7 +287,7 @@ class ASofterWorld(_ParserScraper): class AstronomyPOTD(_ParserScraper): baseUrl = 'http://apod.nasa.gov/apod/' url = baseUrl + 'astropix.html' - starter = bounceStarter() + starter = bounceStarter stripUrl = baseUrl + 'ap%s.html' firstStripUrl = stripUrl % '061012' imageSearch = '//a/img' diff --git a/dosagelib/plugins/b.py b/dosagelib/plugins/b.py index ceb88f9ed..6f6eb331d 100644 --- a/dosagelib/plugins/b.py +++ b/dosagelib/plugins/b.py @@ -48,7 +48,7 @@ class BalderDash(_ComicControlScraper): class Bardsworth(_WordPressScraper): url = 'http://www.bardsworth.com/' latestSearch = '//a[@rel="bookmark"]' - starter = indirectStarter() + starter = indirectStarter class Baroquen(_BasicScraper): @@ -72,7 +72,7 @@ class Beetlebum(_BasicScraper): rurl = escape(url) stripUrl = url + '%s' firstStripUrl = stripUrl % '2006/03/10/quiz-fur-ruskiphile' - starter = indirectStarter() + starter = indirectStarter multipleImagesPerStrip = True imageSearch = compile(tagre('img', 'src', r'(http://blog\.beetlebum\.de/wp-content/uploads/[^"]+)')) prevSearch = compile(tagre('a', 'href', @@ -227,7 +227,7 @@ class BoredAndEvil(_BasicScraper): imageSearch = compile(tagre("img", "src", r'(strips/[^"]+)')) prevSearch = compile(r'First Comic.+") latestSearch = compile(tagre('a', 'href', "(" + url + r'\d\d\d\d/\d\d/\d\d/[^"]+/)')) - starter = indirectStarter() + starter = indirectStarter class Freefall(_BasicScraper): diff --git a/dosagelib/plugins/g.py b/dosagelib/plugins/g.py index 7f42d37b9..f7d26c892 100644 --- a/dosagelib/plugins/g.py +++ b/dosagelib/plugins/g.py @@ -27,7 +27,7 @@ class Garanos(_BasicScraper): baseUrl = 'http://garanos.alexheberling.com/' rurl = escape(baseUrl) url = baseUrl + 'pages/page-1/' - starter = indirectStarter() + starter = indirectStarter stripUrl = baseUrl + 'pages/page-%s' imageSearch = compile( tagre("img", "src", @@ -136,7 +136,7 @@ class GoGetARoomie(_ComicControlScraper): class GoneWithTheBlastwave(_BasicScraper): url = 'http://www.blastwave-comic.com/index.php?p=comic&nro=1' - starter = indirectStarter() + starter = indirectStarter stripUrl = url[:-1] + '%s' firstStripUrl = stripUrl % '1' imageSearch = compile(r']+\.html)', diff --git a/dosagelib/plugins/l.py b/dosagelib/plugins/l.py index 2d0f731ae..71ce04a05 100644 --- a/dosagelib/plugins/l.py +++ b/dosagelib/plugins/l.py @@ -24,7 +24,7 @@ class Lackadaisy(_BasicScraper): nextSearch = compile(tagre("a", "href", r"(/comic.php\?comicid=[0-9]+)") + "Next") help = 'Index format: n' - starter = bounceStarter() + starter = bounceStarter @classmethod def namer(cls, imageUrl, pageUrl): @@ -38,7 +38,7 @@ class Laiyu(_WordPressScraper): url = 'http://www.flowerlarkstudios.com/comic/preliminary-concepts/welcome/' firstStripUrl = url latestSearch = WP_LATEST_SEARCH - starter = indirectStarter() + starter = indirectStarter class LasLindas(_BasicScraper): @@ -67,7 +67,7 @@ class LeastICouldDo(_BasicScraper): after="Previous")) latestSearch = compile(tagre("a", "href", r'(%scomic/\d+/)' % rurl, after="feature-comic")) - starter = indirectStarter() + starter = indirectStarter help = 'Index format: yyyymmdd' @@ -117,5 +117,5 @@ class LookingForGroup(_ParserScraper): imageSearch = '#comic img' prevSearch = '#comic-left > a' latestSearch = '#header-dropdown-comic-lfg > a:nth-of-type(2)' - starter = indirectStarter() + starter = indirectStarter help = 'Index format: nnn' diff --git a/dosagelib/plugins/n.py b/dosagelib/plugins/n.py index d8b157ca4..91a5715ed 100644 --- a/dosagelib/plugins/n.py +++ b/dosagelib/plugins/n.py @@ -104,7 +104,7 @@ class NichtLustig(_BasicScraper): prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)')) latestSearch = compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)')) help = 'Index format: yymmdd' - starter = indirectStarter() + starter = indirectStarter class Nicky510(_WordPressScraper): @@ -137,7 +137,7 @@ class NoMoreSavePoints(_WordPressScraper): url = 'http://www.flowerlarkstudios.com/comic/no-more-save-points/mushroom-hopping/' firstStripUrl = url latestSearch = WP_LATEST_SEARCH - starter = indirectStarter() + starter = indirectStarter class NoNeedForBushido(_BasicScraper): @@ -153,7 +153,7 @@ class NoNeedForBushido(_BasicScraper): latestSearch = compile(tagre("a", "href", r'(%s\?webcomic1=[^"]+)' % rurl, after="last-webcomic")) help = 'Index format: nnn' - starter = indirectStarter() + starter = indirectStarter class NotInventedHere(_BasicScraper): diff --git a/dosagelib/plugins/nuklearpower.py b/dosagelib/plugins/nuklearpower.py index 8767ec21a..b6d59aec5 100644 --- a/dosagelib/plugins/nuklearpower.py +++ b/dosagelib/plugins/nuklearpower.py @@ -11,9 +11,8 @@ class _NuklearPower(_ParserScraper): prevSearch = '//a[@rel="prev"]' imageSearch = '//div[@id="comic"]/img' - @classmethod - def starter(cls): - return cls.url + cls.path + '/' + def starter(self): + return self.url + self.path + '/' @classmethod def getName(cls): diff --git a/dosagelib/plugins/p.py b/dosagelib/plugins/p.py old mode 100755 new mode 100644 index bc407aafe..67fc29799 --- a/dosagelib/plugins/p.py +++ b/dosagelib/plugins/p.py @@ -20,7 +20,7 @@ class PandyLand(_WordPressScraper): class ParadigmShift(_BasicScraper): url = 'http://www.paradigmshiftmanga.com/' - starter = indirectStarter() + starter = indirectStarter stripUrl = url + 'ps/%s.html' imageSearch = compile(tagre("img", "src", r'([^"]*comics/ps/[^"]*)')) prevSearch = compile(tagre("a", "href", r'([^"]+)', @@ -86,6 +86,7 @@ class PennyArcade(_BasicScraper): before="btnPrev")) nextSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, before="btnNext")) + starter = bounceStarter help = 'Index format: yyyy/mm/dd/' @classmethod @@ -99,15 +100,6 @@ class PennyArcade(_BasicScraper): prevUrl = "%s/%s/%s" % (dummy, yyyy, mm) return prevUrl - @classmethod - def starter(cls): - """Get bounced start URL.""" - data = cls.getPage(cls.url) - url1 = cls.fetchUrl(cls.url, data, cls.prevSearch) - data = cls.getPage(url1) - url2 = cls.fetchUrl(url1, data, cls.nextSearch) - return cls.prevUrlModifier(url2) - @classmethod def namer(cls, imageUrl, pageUrl): p = pageUrl.split('/') @@ -162,7 +154,7 @@ class PicPakDog(_BasicScraper): class PiledHigherAndDeeper(_BasicScraper): url = 'http://www.phdcomics.com/comics.php' - starter = bounceStarter() + starter = bounceStarter stripUrl = url + '?comicid=%s' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", r'(http://www\.phdcomics\.com/comics/archive/phd\d+s\d?\.\w{3,4})', quote="")) @@ -207,7 +199,7 @@ class PokeyThePenguin(_ParserScraper): imageSearch = '//p/img' latestSearch = '(//a)[last()]' multipleImagesPerStrip = True - starter = indirectStarter() + starter = indirectStarter help = 'Index format: number' def getPrevUrl(self, url, data): @@ -231,7 +223,7 @@ class PoorlyDrawnLines(_BasicScraper): class Precocious(_BasicScraper): url = 'http://www.precociouscomic.com/' - starter = indirectStarter() + starter = indirectStarter stripUrl = url + 'archive/comic/%s' imageSearch = compile(tagre("img", "src", r'(/comics/\d+[^"]*\.(?:jpg|gif))')) prevSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/back_arrow\.png")) @@ -243,7 +235,7 @@ class Precocious(_BasicScraper): class PS238(_ParserScraper): url = 'http://ps238.nodwick.com/' stripUrl = url + '/comic/%s/' - starter = bounceStarter() + starter = bounceStarter imageSearch = '//div[@id="comic"]//img' prevSearch = '//a[@class="comic-nav-base comic-nav-previous"]' nextSearch = '//a[@class="comic-nav-base comic-nav-next"]' diff --git a/dosagelib/plugins/r.py b/dosagelib/plugins/r.py index 83b13446b..300952c5d 100644 --- a/dosagelib/plugins/r.py +++ b/dosagelib/plugins/r.py @@ -51,7 +51,7 @@ class RealmOfAtland(_BasicScraper): class RedMeat(_BasicScraper): baseUrl = 'http://www.redmeat.com/redmeat/' url = baseUrl + 'current/index.html' - starter = bounceStarter() + starter = bounceStarter stripUrl = baseUrl + '%s/index.html' firstStripUrl = stripUrl % '1996-06-10' imageSearch = compile(tagre("img", "src", r'(http://www\.redmeat\.com/imager/b/redmeat/[^"]*\.png)')) diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py index ab55e43f4..db084e3a8 100644 --- a/dosagelib/plugins/s.py +++ b/dosagelib/plugins/s.py @@ -27,13 +27,12 @@ class SabrinaOnline(_BasicScraper): adult = True multipleImagesPerStrip = True - @classmethod - def starter(cls): + def starter(self): """Pick last one in a list of archive pages.""" - archive = cls.url + 'archive.html' - data = cls.getPage(archive) + archive = self.url + 'archive.html' + data = self.getPage(archive) search = compile(tagre("a", "href", r"(\d\d\d\d-\d\d.html)")) - archivepages = cls.fetchUrls(archive, data, search) + archivepages = self.fetchUrls(archive, data, search) return archivepages[-1] @@ -69,7 +68,7 @@ class ScandinaviaAndTheWorld(_ParserScraper): url = 'http://satwcomic.com/' stripUrl = url + '%s' firstStripUrl = stripUrl % 'sweden-denmark-and-norway' - starter = indirectStarter() + starter = indirectStarter imageSearch = '//img[@itemprop="image"]' prevSearch = '//a[@accesskey="p"]' latestSearch = '//a[text()="View latest comic"]' @@ -166,14 +165,13 @@ class ScurryAndCover(_ParserScraper): image = images[0] return [cls.url + '/images/pages/' + image + '-xsmall.png'] - @classmethod - def starter(cls): + def starter(self): """Go forward as far as possibe, then start.""" - url = cls.url + url = self.url while True: - data = cls.getPage(url) + data = self.getPage(url) try: - url = cls.fetchUrl(url, data, cls.nextSearch) + url = self.fetchUrl(url, data, self.nextSearch) except ValueError: break return url @@ -197,7 +195,7 @@ class SexyLosers(_BasicScraper): prevSearch = compile(r'<<', IGNORECASE) latestSearch = compile(r'SEXY LOSERS Latest SL Comic \(#\d+\)', IGNORECASE) help = 'Index format: nnn' - starter = indirectStarter() + starter = indirectStarter @classmethod def namer(cls, imageUrl, pageUrl): @@ -334,7 +332,7 @@ class SnowFlame(_WordPressScraper): url = 'http://www.snowflamecomic.com/' stripUrl = url + '?comic=snowflame-%s-%s' firstStripUrl = stripUrl % ('01', '01') - starter = bounceStarter() + starter = bounceStarter nextSearch = WP_LATEST_SEARCH help = 'Index format: chapter-page' @@ -396,7 +394,7 @@ class Spamusement(_BasicScraper): IGNORECASE) latestSearch = prevSearch help = 'Index format: n (unpadded)' - starter = indirectStarter() + starter = indirectStarter class SpareParts(_BasicScraper): @@ -507,7 +505,7 @@ class StuffNoOneToldMe(_BasicScraper): stripUrl = url + '%s.html' firstStripUrl = stripUrl % '2010/05/01' olderHref = r"(http://www\.snotm\.com/\d+/\d+/[^']+\.html)" - starter = indirectStarter() + starter = indirectStarter imageSearch = ( compile(tagre("img", "src", r'(http://i\.imgur\.com/[^"]+)') + r"(?:|
)"), diff --git a/dosagelib/plugins/smackjeeves.py b/dosagelib/plugins/smackjeeves.py index fcc318770..2b3b89443 100644 --- a/dosagelib/plugins/smackjeeves.py +++ b/dosagelib/plugins/smackjeeves.py @@ -1,10 +1,16 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher + +from __future__ import absolute_import, division, print_function + from re import compile + from ..scraper import make_scraper from ..util import tagre, quote, case_insensitive_re + # SmackJeeves is a crawlers nightmare - users are allowed to edit HTML directly. # That's why there are so much different search patterns. @@ -31,6 +37,7 @@ _nextSearch = ( compile(_linkSearch + tagre("img", "src", r"[^']+/(?:forthnav)\.png[^']*", quote="'")), ) + def add(name, url, adult, bounce): classname = 'SmackJeeves_' + name @@ -41,15 +48,14 @@ def add(name, url, adult, bounce): return 'http://www.smackjeeves.com/mature.php?ref=' + quote(pageUrl) return pageUrl - @classmethod - def _starter(cls): + def _starter(self): """Get start URL.""" url1 = modifier(url) - data = cls.getPage(url1) - url2 = cls.fetchUrl(url1, data, cls.prevSearch) + data = self.getPage(url1) + url2 = self.fetchUrl(url1, data, self.prevSearch) if bounce: - data = cls.getPage(url2) - url3 = cls.fetchUrl(url2, data, _nextSearch) + data = self.getPage(url2) + url3 = self.fetchUrl(url2, data, _nextSearch) return modifier(url3) return modifier(url2) @@ -76,7 +82,8 @@ def add(name, url, adult, bounce): ) -# do not edit anything below since these entries are generated from scripts/update.sh +# do not edit anything below since these entries are generated from +# scripts/update_plugins.sh # DO NOT REMOVE add('20TimesKirby', 'http://20xkirby.smackjeeves.com/comics/', False, True) add('2Kingdoms', 'http://2kingdoms.smackjeeves.com/comics/', False, False) @@ -110,7 +117,7 @@ add('AlwaysRainingHere', 'http://alwaysraininghere.smackjeeves.com/comics/', Fal add('Amaravati', 'http://amaravati.smackjeeves.com/comics/', False, True) add('AmorVincitOmnia', 'http://avo.smackjeeves.com/comics/', True, True) add('AmsdenEstate', 'http://monsterous.smackjeeves.com/comics/', False, True) -#add('Amya', 'http://amya.smackjeeves.com/comics/', False, True) +# add('Amya', 'http://amya.smackjeeves.com/comics/', False, True) add('Anathemacomics', 'http://anathema-comics.smackjeeves.com/comics/', False, True) add('AngelBeast', 'http://angel-beast.smackjeeves.com/comics/', False, True) add('AngelGuardian', 'http://angel-guardian.smackjeeves.com/comics/', False, True) @@ -176,7 +183,7 @@ add('Cambion', 'http://cambion.smackjeeves.com/comics/', True, True) add('CaptiveSoul', 'http://captive-soul.smackjeeves.com/comics/', False, True) add('Captor', 'http://captor.smackjeeves.com/comics/', False, True) add('CaravanaTaleofGodsandMen', 'http://www.caravantale.com/comics/', False, True) -#add('Carciphona', 'http://carciphona.smackjeeves.com/comics/', False, True) +# add('Carciphona', 'http://carciphona.smackjeeves.com/comics/', False, True) add('Cataclysm', 'http://cataclysm.smackjeeves.com/comics/', False, True) add('Catnip', 'http://catnipmanga.smackjeeves.com/comics/', True, True) add('Cerintha', 'http://cerintha.smackjeeves.com/comics/', False, True) @@ -281,7 +288,7 @@ add('FinalArcanum', 'http://finalarcanum.smackjeeves.com/comics/', False, True) add('FireWire', 'http://firewire.smackjeeves.com/comics/', False, True) add('FireredLisasReise', 'http://lisasreise.smackjeeves.com/comics/', False, True) add('FlyorFail', 'http://flyorfail.smackjeeves.com/comics/', False, False) -#add('FootLoose', 'http://footloose.smackjeeves.com/comics/', False, True) +# add('FootLoose', 'http://footloose.smackjeeves.com/comics/', False, True) add('ForcedSeduction', 'http://forced-seduction.smackjeeves.com/comics/', False, True) add('ForestHill', 'http://www.foresthillcomic.org/comics/', False, False) add('ForgettheDistance', 'http://forgetthedistance.smackjeeves.com/comics/', True, True) @@ -474,7 +481,7 @@ add('MythsofUnovaAWhiteNuzlockeRunHardMode', 'http://mythsofunova.smackjeeves.co add('NIK', 'http://nik.smackjeeves.com/comics/', False, True) add('Nah', 'http://thecomicformerlyknownasgenlab.smackjeeves.com/comics/', False, True) add('Negligence', 'http://negligence.smackjeeves.com/comics/', False, True) -#add('NekotheKitty', 'http://www.nekothekitty.net/comics/', False, True) +# add('NekotheKitty', 'http://www.nekothekitty.net/comics/', False, True) add('NeoCrystalAdventures', 'http://neocrystaladventures.smackjeeves.com/comics/', False, True) add('NeonGlow', 'http://neonglow.smackjeeves.com/comics/', False, True) add('NevertheHero', 'http://neverthehero.smackjeeves.com/comics/', False, True) @@ -766,7 +773,7 @@ add('WhenSheWasBad', 'http://whenshewasbad.smackjeeves.com/comics/', False, True add('Whenweweresilent', 'http://silence.smackjeeves.com/comics/', False, False) add('WhereaboutsOfTime', 'http://wot.smackjeeves.com/comics/', False, True) add('WhiteHeart', 'http://whiteheart.smackjeeves.com/comics/', True, False) -#add('WhiteNoise', 'http://white-noise.smackjeeves.com/comics/', False, True) +# add('WhiteNoise', 'http://white-noise.smackjeeves.com/comics/', False, True) add('WildWingBoys', 'http://wwb.smackjeeves.com/comics/', False, True) add('WildWingBoysKoathArc', 'http://wwbka.smackjeeves.com/comics/', False, True) add('Wildflowers', 'http://wildflowers.smackjeeves.com/comics/', False, True) diff --git a/dosagelib/plugins/t.py b/dosagelib/plugins/t.py old mode 100755 new mode 100644 index c4cdf58cb..90f39c1ee --- a/dosagelib/plugins/t.py +++ b/dosagelib/plugins/t.py @@ -83,7 +83,7 @@ class TheOrderOfTheStick(_BasicScraper): prevSearch = compile(r'') latestSearch = compile(tagre("a", "href", r'([^"]+)', after='class="timestamp"')) - starter = indirectStarter() + starter = indirectStarter adult = True indirectImageSearch = compile(tagre('a', 'href', r'(%simage/\d+)' % rurl)) @@ -180,17 +180,14 @@ class ThreePanelSoul(_BasicScraper): class ThunderAndLightning(_BasicScraper): - url = 'http://www.talcomic.com/wp/' - rurl = escape(url) - stripUrl = url + '%s/' + baseUrl = 'http://www.talcomic.com/wp/' + url = baseUrl + '?latestcomic' + rurl = escape(baseUrl) + stripUrl = baseUrl + '%s/' prevSearch = compile(tagre("a", "href", r'(%s[^"]+)' % rurl, after="prev")) imageSearch = compile(tagre("img", "src", r'(%scomics/[^"]+)' % rurl)) help = 'Index format: yyyy/mm/dd/page-nn' - @classmethod - def starter(cls): - return cls.url + '?latestcomic' - class TinyKittenTeeth(_BasicScraper): url = 'http://www.tinykittenteeth.com/' diff --git a/dosagelib/plugins/u.py b/dosagelib/plugins/u.py index c1c4e87d5..a77fb049d 100644 --- a/dosagelib/plugins/u.py +++ b/dosagelib/plugins/u.py @@ -24,7 +24,7 @@ class Undertow(_BasicScraper): imageSearch = compile(tagre("img", "src", r'([^"]+\.jpg)')) prevSearch = compile(r'href="(.+?)".+?teynpoint') latestSearch = compile(r'href="(.+?)".+?Most recent page') - starter = indirectStarter() + starter = indirectStarter class UnicornJelly(_BasicScraper): @@ -47,7 +47,7 @@ class Unsounded(_BasicScraper): latestSearch = compile(tagre("a", "href", r'(%scomic/[^"]*)' % rurl) + tagre("img", "src", r"%simages/newpages\.png" % rurl)) - starter = indirectStarter() + starter = indirectStarter help = 'Index format: chapter-number' def getIndexStripUrl(self, index): diff --git a/dosagelib/plugins/w.py b/dosagelib/plugins/w.py index 157e853f8..98e77a874 100644 --- a/dosagelib/plugins/w.py +++ b/dosagelib/plugins/w.py @@ -45,7 +45,7 @@ class WayfarersMoon(_BasicScraper): class WebDesignerCOTW(_BasicScraper): url = 'http://www.webdesignerdepot.com/' rurl = escape(url) - starter = indirectStarter() + starter = indirectStarter stripUrl = url + '%s/' firstStripUrl = stripUrl % '2009/11/comics-of-the-week-1' imageSearch = ( @@ -211,10 +211,9 @@ class WormWorldSaga(_BasicScraper): latestChapter = 5 multipleImagesPerStrip = True - @classmethod - def starter(cls): + def starter(self): return '%schapters/chapter%02d/%s/index.php' % ( - cls.url, cls.latestChapter, cls.lang.upper()) + self.url, self.latestChapter, self.lang.upper()) def getPrevUrl(self, url, data): """Find previous URL.""" diff --git a/dosagelib/plugins/webcomicfactory.py b/dosagelib/plugins/webcomicfactory.py index 5fb2c6fa5..3f5aa85ff 100644 --- a/dosagelib/plugins/webcomicfactory.py +++ b/dosagelib/plugins/webcomicfactory.py @@ -6,15 +6,11 @@ from __future__ import absolute_import, division, print_function from .common import _WordPressScraper, WP_LATEST_SEARCH - +from ..helpers import indirectStarter class _WebcomicFactory(_WordPressScraper): - @classmethod - def starter(cls): - """this is basically helpers.indirectStarter, but dynamically selecting - the right parameters.""" - data = cls.getPage(cls.firstStripUrl) - return cls.fetchUrl(cls.firstStripUrl, data, WP_LATEST_SEARCH) + starter = indirectStarter + latestSearch = WP_LATEST_SEARCH # do not edit anything below since these entries are generated from diff --git a/dosagelib/plugins/wlpcomics.py b/dosagelib/plugins/wlpcomics.py index fe35d093d..c854b22f5 100644 --- a/dosagelib/plugins/wlpcomics.py +++ b/dosagelib/plugins/wlpcomics.py @@ -13,7 +13,7 @@ class _WLPComics(_ParserScraper): imageSearch = '//center/*/img[contains(@alt, " Comic")]' prevSearch = '//a[contains(text(), "Previous ")]' nextSearch = '//a[contains(text(), "Next ")]' - starter = bounceStarter() + starter = bounceStarter help = 'Index format: nnn' @classmethod diff --git a/dosagelib/plugins/wordpress.py b/dosagelib/plugins/wordpress.py index 7683cb73b..8aac5d360 100644 --- a/dosagelib/plugins/wordpress.py +++ b/dosagelib/plugins/wordpress.py @@ -9,7 +9,7 @@ def add(name, start): name=name, url='http://hijinksensue.com/', latestSearch=start, - starter=indirectStarter() + starter=indirectStarter ) globals()[name] = make_scraper(name, _WordPressScraper, **attrs) diff --git a/dosagelib/plugins/x.py b/dosagelib/plugins/x.py index 7fa17322c..e169c3aa4 100644 --- a/dosagelib/plugins/x.py +++ b/dosagelib/plugins/x.py @@ -15,7 +15,7 @@ from ..util import tagre class Xkcd(_BasicScraper): name = 'xkcd' url = 'http://xkcd.com/' - starter = bounceStarter() + starter = bounceStarter stripUrl = url + '%s/' firstStripUrl = stripUrl % '1' imageSearch = compile(tagre("img", "src", diff --git a/dosagelib/plugins/z.py b/dosagelib/plugins/z.py index 07aa54dc6..a2a635308 100644 --- a/dosagelib/plugins/z.py +++ b/dosagelib/plugins/z.py @@ -22,7 +22,7 @@ class ZapComic(_ParserScraper): class Zapiro(_BasicScraper): url = 'http://www.mg.co.za/zapiro/' - starter = bounceStarter() + starter = bounceStarter stripUrl = 'http://mg.co.za/cartoon/%s' firstStripUrl = stripUrl % 'zapiro_681' imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)'))