diff --git a/dosagelib/plugins/arcamax.py b/dosagelib/plugins/arcamax.py index 896fbaff1..3f5cbf761 100644 --- a/dosagelib/plugins/arcamax.py +++ b/dosagelib/plugins/arcamax.py @@ -12,14 +12,13 @@ class _Arcamax(_ParserScraper): imageSearch = '//img[@id="comic-zoom"]' prevSearch = '//a[@class="prev"]' + def __init__(self, name): + super(_Arcamax, self).__init__('Arcamax/' + name) + @property def url(self): return 'http://www.arcamax.com/thefunnies/' + self.path + '/' - @property - def name(self): - return 'Arcamax/' + super(_Arcamax, self).name - # do not edit anything below since these entries are generated from # scripts/update_plugins.sh diff --git a/dosagelib/plugins/clonemanga.py b/dosagelib/plugins/clonemanga.py index ffaa75f90..5129f7df4 100644 --- a/dosagelib/plugins/clonemanga.py +++ b/dosagelib/plugins/clonemanga.py @@ -7,58 +7,55 @@ from __future__ import absolute_import, division, print_function from re import compile -from ..scraper import make_scraper +from ..scraper import _BasicScraper from ..util import tagre, getQueryParams -_linkTag = tagre("a", "href", r'([^"]+)') -_prevSearch = compile(_linkTag + tagre("img", "src", r"previous\.gif")) -_nextSearch = compile(_linkTag + tagre("img", "src", r"next\.gif")) -_lastSearch = compile(_linkTag + tagre("img", "src", r"last\.gif")) +class CloneManga(_BasicScraper): + _linkTag = tagre("a", "href", r'([^"]+)') + prevSearch = compile(_linkTag + tagre("img", "src", r"previous\.gif")) + nextSearch = compile(_linkTag + tagre("img", "src", r"next\.gif")) + latestSearch = compile(_linkTag + tagre("img", "src", r"last\.gif")) + help = 'Index format: n' + def __init__(self, name, shortName, imageFolder=None, lastStrip=None): + super(CloneManga, self).__init__('CloneManga/' + name) -def add(name, shortName, imageFolder=None, lastStrip=None): - classname = 'CloneManga_%s' % name - _url = 'http://manga.clone-army.org' - baseUrl = '%s/%s.php' % (_url, shortName) - if imageFolder is None: - imageFolder = shortName + _url = 'http://manga.clone-army.org' + self.url = '%s/%s.php' % (_url, shortName) + if imageFolder is None: + imageFolder = shortName + self.stripUrl = self.url + '?page=%s' + self.imageSearch = compile(tagre("img", "src", r'((?:%s/)?%s/[^"]+)' % (_url, imageFolder), after="center")) + + if lastStrip is None: + self.starter = self._starter + else: + self.url = self.stripUrl % lastStrip def namer(self, image_url, page_url): return '%03d' % int(getQueryParams(page_url)['page'][0]) def _starter(self): # first, try hopping to previous and next comic - data = self.getPage(baseUrl) + data = self.getPage(self.url) try: - url = self.fetchUrl(baseUrl, data, _prevSearch) + url = self.fetchUrl(self.url, data, self.prevSearch) except ValueError: # no previous link found, try hopping to last comic - return self.fetchUrl(baseUrl, data, _lastSearch) + return self.fetchUrl(self.url, data, self.latestSearch) else: data = self.getPage(url) - return self.fetchUrl(url, data, _nextSearch) + return self.fetchUrl(url, data, self.nextSearch) - attrs = dict( - name='CloneManga/' + name, - stripUrl=baseUrl + '?page=%s', - imageSearch=compile(tagre("img", "src", r'((?:%s/)?%s/[^"]+)' % (_url, imageFolder), after="center")), - prevSearch=_prevSearch, - help='Index format: n', - namer=namer, - url=baseUrl, - ) - if lastStrip is None: - attrs['starter'] = _starter - else: - attrs['url'] = attrs['stripUrl'] % lastStrip - globals()[classname] = make_scraper(classname, **attrs) - - -add('AprilAndMay', 'anm', imageFolder='AAM') -add('Kanami', 'kanami') -add('MomokaCorner', 'momoka') -add('NanasEverydayLife', 'nana', lastStrip='78') -add('PaperEleven', 'pxi', imageFolder='papereleven', lastStrip='311') -add('Tomoyo42sRoom', 't42r') -add('PennyTribute', 'penny') + @classmethod + def getmodules(cls): + return [ + cls('AprilAndMay', 'anm', imageFolder='AAM'), + cls('Kanami', 'kanami'), + cls('MomokaCorner', 'momoka'), + cls('NanasEverydayLife', 'nana', lastStrip='78'), + cls('PaperEleven', 'pxi', imageFolder='papereleven', lastStrip='311'), + cls('Tomoyo42sRoom', 't42r'), + cls('PennyTribute', 'penny'), + ] diff --git a/dosagelib/plugins/comicfury.py b/dosagelib/plugins/comicfury.py index 9b5364f5e..2dd63b3f8 100644 --- a/dosagelib/plugins/comicfury.py +++ b/dosagelib/plugins/comicfury.py @@ -22,6 +22,9 @@ class _ComicFury(_ParserScraper): help = 'Index format: n' starter = bounceStarter + def __init__(self, name): + super(_ComicFury, self).__init__('ComicFury/' + name[2:]) + def namer(self, image_url, page_url): parts = page_url.split('/') path, ext = os.path.splitext(image_url) @@ -32,10 +35,6 @@ class _ComicFury(_ParserScraper): def url(self): return 'http://%s.webcomic.ws/comics/' % self.sub - @property - def name(self): - return 'ComicFury/' + super(_ComicFury, self).name[2:] - def getIndexStripUrl(self, index): return self.url + 'comics/%s' % index diff --git a/dosagelib/plugins/comicgenesis.py b/dosagelib/plugins/comicgenesis.py index 145d68491..9637c971f 100644 --- a/dosagelib/plugins/comicgenesis.py +++ b/dosagelib/plugins/comicgenesis.py @@ -6,26 +6,25 @@ from __future__ import absolute_import, division, print_function from re import compile -from ..scraper import make_scraper + +from ..scraper import _BasicScraper from ..util import tagre - -_imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)')) -_prevSearch = compile(tagre("a", "href", r'([^"]*/d/\d{8}\.html)') + - '(?:Previous comic' + '|' + - tagre("img", "alt", "Previous comic") + '|' + - tagre("img", "src", "images/back\.gif") + - ')') +# Comicgenesis has a lot of comics, but most of them are disallowed by +# robots.txt -def add(name, url): - classname = 'ComicGenesis_%s' % name - if '/d/' in url: - stripUrl = url.split('/d/')[0] + '/d/%s.html' - else: - stripUrl = url + 'd/%s.html' +class ComicGenesis(_BasicScraper): + imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'([^"]*/d/\d{8}\.html)') + + '(?:Previous comic' + '|' + + tagre("img", "alt", "Previous comic") + '|' + + tagre("img", "src", "images/back\.gif") + + ')') + multipleImagesPerStrip = True + help = 'Index format: yyyymmdd' - def _prevUrlModifier(self, prev_url): + def prevUrlModifier(self, prev_url): if prev_url: return prev_url.replace( "keenspace.com", "comicgenesis.com").replace( @@ -33,95 +32,100 @@ def add(name, url): "toonspace.com", "comicgenesis.com").replace( "comicgen.com", "comicgenesis.com") - globals()[classname] = make_scraper( - classname, - name='ComicGenesis/' + name, - url=url, - stripUrl=stripUrl, - imageSearch=_imageSearch, - prevSearch=_prevSearch, - prevUrlModifier=_prevUrlModifier, - multipleImagesPerStrip=True, - help='Index format: yyyymmdd', - ) + def __init__(self, name, sub=None, last=None, baseUrl=None): + super(ComicGenesis, self).__init__('ComicGenesis/' + name) -# Comicgenesis has a lot of comics, but most of them are disallowed by robots.txt -# do not edit anything below since these entries are generated from scripts/update.sh -# DO NOT REMOVE -add('AAAAA', 'http://aaaaa.comicgenesis.com/') -add('AdventuresofKiltman', 'http://kiltman.comicgenesis.com/') -add('AmorModerno', 'http://amormoderno.comicgenesis.com/') -add('AnythingButRealLife', 'http://anythingbutreallife.comicgenesis.com/') -add('Ardra', 'http://ardra.comicgenesis.com/') -add('Artwork', 'http://artwork.comicgenesis.com/') -add('BabeintheWoods', 'http://babeinthewoods.comicgenesis.com/') -add('BackwaterPlanet', 'http://bobthespirit.comicgenesis.com/') -add('BendyStrawVampires', 'http://bsvampires.comicgenesis.com/') -add('BlindSight', 'http://blindsight.comicgenesis.com/') -add('BreakingtheDoldrum', 'http://breakingthedoldrum.comicgenesis.com/') -add('Candi', 'http://candicomics.com/') -add('CorporateLife', 'http://corporatelife.comicgenesis.com/') -add('DarkWelkin', 'http://darkwelkin.comicgenesis.com/') -add('DemonEater', 'http://demoneater.comicgenesis.com/') -add('DoodleDiaries', 'http://doodlediaries.comicgenesis.com/') -add('DormSweetDorm', 'http://dormsweetdorm.comicgenesis.com/') -add('DoubleyouTeeEff', 'http://doubleyouteeeff.comicgenesis.com/') -add('DragonsBane', 'http://jasonwhitewaterz.comicgenesis.com/') -add('Dreamaniac', 'http://dreamaniaccomic.comicgenesis.com/') -add('ElnifiChronicles', 'http://elnifichronicles.comicgenesis.com/') -add('EvesApple', 'http://evesapple.comicgenesis.com/') -add('FancyThat', 'http://fancythat.comicgenesis.com/') -add('FantasyQwest', 'http://creatorauthorman.comicgenesis.com/') -add('Fantazine', 'http://fantazin.comicgenesis.com/') -add('Flounderville', 'http://flounderville.comicgenesis.com/') -add('GEM', 'http://keltzy.comicgenesis.com/') -add('Gonefor300days', 'http://g4300d.comicgenesis.com/') -add('IBlameDanny', 'http://vileterror.comicgenesis.com/') -add('ImpendingDoom', 'http://impending.comicgenesis.com/') -add('InANutshell', 'http://nutshellcomics.comicgenesis.com/') -add('KernyMantisComics', 'http://kernymantis.comicgenesis.com/') -add('KitsuneJewel', 'http://kitsunejewel.comicgenesis.com/') -add('KittyCattyGames', 'http://kittycattygames.comicgenesis.com/') -add('KiwiDayN', 'http://kiwidayn.comicgenesis.com/') -add('KungFounded', 'http://kungfounded.comicgenesis.com/') -add('LabBratz', 'http://labbratz.comicgenesis.com/') -add('Laserwing', 'http://laserwing.comicgenesis.com/') -add('LumiasKingdom', 'http://lumia.comicgenesis.com/') -add('Majestic7', 'http://majestic7.comicgenesis.com/') -add('MaximumWhimsy', 'http://maximumwhimsy.comicgenesis.com/') -add('MenschunsererZeitGerman', 'http://muz.comicgenesis.com/') -add('MoonCrest24', 'http://mooncrest.comicgenesis.com/d/20121117.html') -add('Mushian', 'http://tentoumushi.comicgenesis.com/') -add('NightwolfCentral', 'http://nightwolfcentral.comicgenesis.com/') -add('NoTimeForLife', 'http://randyraven.comicgenesis.com/') -add('NoneMoreComic', 'http://nonemore.comicgenesis.com/') -add('ODCKS', 'http://odcks.comicgenesis.com/') -add('OfDoom', 'http://ofdoom.comicgenesis.com/') -add('OpportunityofaLifetime', 'http://carpathia.comicgenesis.com/') -add('Orbz', 'http://orbz.comicgenesis.com/') -add('OwMySanity', 'http://owmysanity.comicgenesis.com/') -add('PhantomThesis', 'http://phantomthesis.comicgenesis.com/') -add('ProfessorSaltinesAstrodynamicDirigible', 'http://drsaltine.comicgenesis.com/') -add('PsychicDyslexiaInstitute', 'http://pdi.comicgenesis.com/') -add('PublicidadeEnganosa', 'http://publicidadeenganosa.comicgenesis.com/') -add('RandomAxeOfKindness', 'http://randomaxe.comicgenesis.com/') -add('SalemUncommons', 'http://salemuncommons.comicgenesis.com/') -add('SamandElisAdventures', 'http://sameliadv.comicgenesis.com/') -add('SarahZero', 'http://plughead.comicgenesis.com/') -add('SixByNineCollege', 'http://sixbyninecollege.comicgenesis.com/') -add('SpoononHighandFireontheMountian', 'http://spoon.comicgenesis.com/') -add('SynapticMisfires', 'http://synapticmisfires.comicgenesis.com/') -add('TakingStock', 'http://mapaghimagsik.comicgenesis.com/') -add('TemplarArizona', 'http://templaraz.comicgenesis.com/') -add('TheAdventuresofKaniraBaxter', 'http://kanirabaxter.comicgenesis.com/') -add('TheAdventuresofVindibuddSuperheroInTraining', 'http://vindibudd.comicgenesis.com/d/20070720.html') -add('TheEasyBreather', 'http://easybreather.comicgenesis.com/') -add('TheLounge', 'http://thelounge.comicgenesis.com/') -add('TheMisadventuresofOkk', 'http://okk.comicgenesis.com/') -add('ThePath', 'http://thepath.comicgenesis.com/') -add('TheTalesofKalduras', 'http://kalduras.comicgenesis.com/') -add('Unconventional', 'http://unconventional.comicgenesis.com/') -add('WarMageNC17', 'http://warmage.comicgenesis.com/') -add('WebcomicTheWebcomicWebcomicWebcomicWebcomic', 'http://dannormnsanidey.comicgenesis.com/') -add('WhatYouDontSee', 'http://phantomlady4.comicgenesis.com/') -add('Wierdman', 'http://asa.comicgenesis.com/') + if sub: + baseUrl = 'http://%s.comicgenesis.com/' % sub + + self.stripUrl = baseUrl + 'd/%s.html' + if last: + self.url = self.stripUrl % last + self.endOfLife = True + else: + self.url = baseUrl + + @classmethod + def getmodules(cls): + return [ + # do not edit anything below since these entries are generated from + # scripts/update_plugins.sh + # DO NOT REMOVE + cls('AAAAA', 'aaaaa'), + cls('AdventuresofKiltman', 'kiltman'), + cls('AmorModerno', 'amormoderno'), + cls('AnythingButRealLife', 'anythingbutreallife'), + cls('Ardra', 'ardra'), + cls('Artwork', 'artwork'), + cls('BabeintheWoods', 'babeinthewoods'), + cls('BackwaterPlanet', 'bobthespirit'), + cls('BendyStrawVampires', 'bsvampires'), + cls('BlindSight', 'blindsight'), + cls('BreakingtheDoldrum', 'breakingthedoldrum'), + cls('Candi', baseUrl='http://candicomics.com/'), + cls('CorporateLife', 'corporatelife'), + cls('DarkWelkin', 'darkwelkin'), + cls('DemonEater', 'demoneater'), + cls('DoodleDiaries', 'doodlediaries'), + cls('DormSweetDorm', 'dormsweetdorm'), + cls('DoubleyouTeeEff', 'doubleyouteeeff'), + cls('DragonsBane', 'jasonwhitewaterz'), + cls('Dreamaniac', 'dreamaniaccomic'), + cls('ElnifiChronicles', 'elnifichronicles'), + cls('EvesApple', 'evesapple'), + cls('FancyThat', 'fancythat'), + cls('FantasyQwest', 'creatorauthorman'), + cls('Fantazine', 'fantazin'), + cls('Flounderville', 'flounderville'), + cls('GEM', 'keltzy'), + cls('Gonefor300days', 'g4300d'), + cls('IBlameDanny', 'vileterror'), + cls('ImpendingDoom', 'impending'), + cls('InANutshell', 'nutshellcomics'), + cls('KernyMantisComics', 'kernymantis'), + cls('KitsuneJewel', 'kitsunejewel'), + cls('KittyCattyGames', 'kittycattygames'), + cls('KiwiDayN', 'kiwidayn'), + cls('KungFounded', 'kungfounded'), + cls('LabBratz', 'labbratz'), + cls('Laserwing', 'laserwing'), + cls('LumiasKingdom', 'lumia'), + cls('Majestic7', 'majestic7'), + cls('MaximumWhimsy', 'maximumwhimsy'), + cls('MenschunsererZeitGerman', 'muz'), + cls('MoonCrest24', 'mooncrest', last='20121117'), + cls('Mushian', 'tentoumushi'), + cls('NightwolfCentral', 'nightwolfcentral'), + cls('NoTimeForLife', 'randyraven'), + cls('NoneMoreComic', 'nonemore'), + cls('ODCKS', 'odcks'), + cls('OfDoom', 'ofdoom'), + cls('OpportunityofaLifetime', 'carpathia'), + cls('Orbz', 'orbz'), + cls('OwMySanity', 'owmysanity'), + cls('PhantomThesis', 'phantomthesis'), + cls('ProfessorSaltinesAstrodynamicDirigible', 'drsaltine'), + cls('PsychicDyslexiaInstitute', 'pdi'), + cls('PublicidadeEnganosa', 'publicidadeenganosa'), + cls('RandomAxeOfKindness', 'randomaxe'), + cls('SalemUncommons', 'salemuncommons'), + cls('SamandElisAdventures', 'sameliadv'), + cls('SarahZero', 'plughead'), + cls('SixByNineCollege', 'sixbyninecollege'), + cls('SpoononHighandFireontheMountian', 'spoon'), + cls('SynapticMisfires', 'synapticmisfires'), + cls('TakingStock', 'mapaghimagsik'), + cls('TemplarArizona', 'templaraz'), + cls('TheAdventuresofKaniraBaxter', 'kanirabaxter'), + cls('TheAdventuresofVindibuddSuperheroInTraining', 'vindibudd', last='20070720'), + cls('TheEasyBreather', 'easybreather'), + cls('TheLounge', 'thelounge'), + cls('TheMisadventuresofOkk', 'okk'), + cls('ThePath', 'thepath'), + cls('TheTalesofKalduras', 'kalduras'), + cls('Unconventional', 'unconventional'), + cls('WarMageNC17', 'warmage'), + cls('WebcomicTheWebcomicWebcomicWebcomicWebcomic', 'dannormnsanidey'), + cls('WhatYouDontSee', 'phantomlady4'), + cls('Wierdman', 'asa'), + ] diff --git a/dosagelib/plugins/creators.py b/dosagelib/plugins/creators.py index 213ada458..b1602da35 100644 --- a/dosagelib/plugins/creators.py +++ b/dosagelib/plugins/creators.py @@ -15,9 +15,8 @@ class _Creators(_ParserScraper): latestSearch = '//div[contains(@class,"caption")]/a' starter = indirectStarter - @property - def name(self): - return 'Creators/' + super(_Creators, self).name + def __init__(self, name): + super(_Creators, self).__init__('Creators/' + name) @property def url(self): diff --git a/dosagelib/plugins/gocomics.py b/dosagelib/plugins/gocomics.py index f833a8081..6a931fc0a 100644 --- a/dosagelib/plugins/gocomics.py +++ b/dosagelib/plugins/gocomics.py @@ -18,9 +18,8 @@ class _GoComics(_ParserScraper): starter = bounceStarter help = 'Index format: yyyy/mm/dd' - @property - def name(self): - return 'GoComics/' + super(_GoComics, self).name[2:] + def __init__(self, name): + super(_GoComics, self).__init__('GoComics/' + name[2:]) @property def url(self): diff --git a/dosagelib/plugins/keenspot.py b/dosagelib/plugins/keenspot.py index c5f2852f9..45c2ecd12 100644 --- a/dosagelib/plugins/keenspot.py +++ b/dosagelib/plugins/keenspot.py @@ -1,79 +1,79 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher + +from __future__ import absolute_import, division, print_function from re import compile -from ..scraper import make_scraper + +from ..scraper import _BasicScraper from ..util import tagre -_imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)')) -_stripPattern = r'([^"]*/d/\d{8}\.html)' -_prevSearch = ( - compile(tagre("link", "href", _stripPattern, before="prev")), - compile(tagre("a", "href", _stripPattern, after="prev")), - compile(tagre("a", "href", _stripPattern) + tagre("img", "id", r"previous_day1")), - compile(tagre("a", "href", _stripPattern) + tagre("img", "id", r"katc7")), -) - -def add(name, url): - classname = 'KeenSpot_%s' % name - if '/d/' in url: - stripUrl = url.split('/d/')[0] + '/d/%s.html' - else: - stripUrl = url + 'd/%s.html' - - globals()[classname] = make_scraper(classname, - name='KeenSpot/' + name, - url=url, - stripUrl=stripUrl, - imageSearch = _imageSearch, - prevSearch = _prevSearch, - help = 'Index format: yyyymmdd', +class KeenSpot(_BasicScraper): + imageSearch = compile(tagre("img", "src", r'([^"]*/comics/[^"]+)')) + _stripPattern = r'([^"]*/d/\d{8}\.html)' + prevSearch = ( + compile(tagre("link", "href", _stripPattern, before="prev")), + compile(tagre("a", "href", _stripPattern, after="prev")), + compile(tagre("a", "href", _stripPattern) + tagre("img", "id", r"previous_day1")), + compile(tagre("a", "href", _stripPattern) + tagre("img", "id", r"katc7")), ) + help = 'Index format: yyyymmdd' -# do not edit anything below since these entries are generated from scripts/update.sh -# DO NOT REMOVE -add('27TwentySeven', 'http://twenty-seven.keenspot.com/') -add('Adventurers', 'http://adventurers.keenspot.com/') -add('AntiheroForHire', 'http://antihero.keenspot.com/') -add('BanzaiGirl', 'http://banzaigirl.keenspot.com/') -add('Barker', 'http://barkercomic.keenspot.com/') -add('Buzzboy', 'http://buzzboy.keenspot.com/') -add('ChoppingBlock', 'http://choppingblock.keenspot.com/') -add('ClichFlamb', 'http://clicheflambe.keenspot.com/') -add('CountYourSheep', 'http://countyoursheep.keenspot.com/') -add('EverythingJake', 'http://everythingjake.keenspot.com/') -add('FallOutToyWorks', 'http://fallouttoyworks.keenspot.com/') -add('FriarAndBrimstone', 'http://friarandbrimstone.keenspot.com/') -add('GeneCatlow', 'http://genecatlow.keenspot.com/') -add('GodMode', 'http://godmode.keenspot.com/') -add('GreenWake', 'http://greenwake.keenspot.com/') -add('HeadTrip', 'http://headtrip.keenspot.com/') -add('HoaxHunters', 'http://hoaxhunters.keenspot.com/') -add('InHere', 'http://inhere.keenspot.com/') -add('Katrina', 'http://katrina.keenspot.com/') -add('Landis', 'http://landis.keenspot.com/') -add('MakeshiftMiracle', 'http://makeshiftmiracle.keenspot.com/') -add('Marksmen', 'http://marksmen.keenspot.com/') -add('MarryMe', 'http://marryme.keenspot.com/') -add('MedusasDaughter', 'http://medusasdaughter.keenspot.com/') -add('MonsterMassacre', 'http://monstermassacre.keenspot.com/') -add('Newshounds', 'http://newshounds.keenspot.com/') -add('NoPinkPonies', 'http://nopinkponies.keenspot.com/') -add('OutThere', 'http://outthere.keenspot.com/') -add('Porcelain', 'http://porcelain.keenspot.com/') -add('QUILTBAG', 'http://quiltbag.keenspot.com/') -add('RedSpike', 'http://redspike.keenspot.com/') -add('RumbleFall', 'http://rumblefall.keenspot.com/') -add('SamuraisBlood', 'http://samuraisblood.keenspot.com/') -add('Sharky', 'http://sharky.keenspot.com/') -add('SomethingHappens', 'http://somethinghappens.keenspot.com/') -add('SoreThumbs', 'http://sorethumbs.keenspot.com/') -add('Striptease', 'http://striptease.keenspot.com/') -add('Superosity', 'http://superosity.keenspot.com/') -add('TheFirstDaughter', 'http://thefirstdaughter.keenspot.com/') -add('TheGodChild', 'http://godchild.keenspot.com/') -add('TheHuntersofSalamanstra', 'http://salamanstra.keenspot.com/') -add('TheLounge', 'http://thelounge.keenspot.com/') -add('WICKEDPOWERED', 'http://wickedpowered.keenspot.com/') + def __init__(self, name, sub): + super(KeenSpot, self).__init__('KeenSpot/' + name) + self.url = 'http://%s.keenspot.com/' % sub + self.stripUrl = self.url + 'd/%s.html' + + @classmethod + def getmodules(cls): + return [ + # do not edit anything below since these entries are generated from + # scripts/update_plugins.sh + # DO NOT REMOVE + cls('27TwentySeven', 'twenty-seven'), + cls('Adventurers', 'adventurers'), + cls('AntiheroForHire', 'antihero'), + cls('BanzaiGirl', 'banzaigirl'), + cls('Barker', 'barkercomic'), + cls('Buzzboy', 'buzzboy'), + cls('ChoppingBlock', 'choppingblock'), + cls('ClichFlamb', 'clicheflambe'), + cls('CountYourSheep', 'countyoursheep'), + cls('EverythingJake', 'everythingjake'), + cls('FallOutToyWorks', 'fallouttoyworks'), + cls('FriarAndBrimstone', 'friarandbrimstone'), + cls('GeneCatlow', 'genecatlow'), + cls('GodMode', 'godmode'), + cls('GreenWake', 'greenwake'), + cls('HeadTrip', 'headtrip'), + cls('HoaxHunters', 'hoaxhunters'), + cls('InHere', 'inhere'), + cls('Katrina', 'katrina'), + cls('Landis', 'landis'), + cls('MakeshiftMiracle', 'makeshiftmiracle'), + cls('Marksmen', 'marksmen'), + cls('MarryMe', 'marryme'), + cls('MedusasDaughter', 'medusasdaughter'), + cls('MonsterMassacre', 'monstermassacre'), + cls('Newshounds', 'newshounds'), + cls('NoPinkPonies', 'nopinkponies'), + cls('OutThere', 'outthere'), + cls('Porcelain', 'porcelain'), + cls('QUILTBAG', 'quiltbag'), + cls('RedSpike', 'redspike'), + cls('RumbleFall', 'rumblefall'), + cls('SamuraisBlood', 'samuraisblood'), + cls('Sharky', 'sharky'), + cls('SomethingHappens', 'somethinghappens'), + cls('SoreThumbs', 'sorethumbs'), + cls('Striptease', 'striptease'), + cls('Superosity', 'superosity'), + cls('TheFirstDaughter', 'thefirstdaughter'), + cls('TheGodChild', 'godchild'), + cls('TheHuntersofSalamanstra', 'salamanstra'), + cls('TheLounge', 'thelounge'), + cls('WICKEDPOWERED', 'wickedpowered'), + ] diff --git a/dosagelib/plugins/kindofnormal.py b/dosagelib/plugins/kindofnormal.py index 6a6be0473..5bca69019 100644 --- a/dosagelib/plugins/kindofnormal.py +++ b/dosagelib/plugins/kindofnormal.py @@ -1,19 +1,27 @@ # -*- coding: utf-8 -*- -from dosagelib.helpers import indirectStarter -from ..scraper import make_scraper, _ParserScraper +# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs +# Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher + +from __future__ import absolute_import, division, print_function + +from ..scraper import _ParserScraper -def add(name, url): - attrs = dict( - name=name, - url='http://kindofnormal.com/' + url, - imageSearch='//article[1]//div[@class="box-content"]//img', - prevSearch='//a[@class="prev"]' - ) - globals()[name] = make_scraper(name, _ParserScraper, **attrs) +class KindOfNormal(_ParserScraper): + imageSearch = '//article[1]//div[@class="box-content"]//img' + prevSearch = '//a[@class="prev"]' + def __init__(self, name, url): + super(KindOfNormal, self).__init__(name) + self.url = 'http://kindofnormal.com/' + url -add('MeAndDanielle', 'meanddanielle') -add('TruthFacts', 'truthfacts') -add('Wumo', 'wumo') -add('Wulffmorgenthaler', 'wumo') # name in previous versions + @classmethod + def getmodules(cls): + return [ + cls('MeAndDanielle', 'meanddanielle'), + cls('TruthFacts', 'truthfacts'), + cls('Wumo', 'wumo'), + # name in previous versions + cls('Wulffmorgenthaler', 'wumo'), + ] diff --git a/dosagelib/plugins/nuklearpower.py b/dosagelib/plugins/nuklearpower.py index b28ca3712..6bee036ff 100644 --- a/dosagelib/plugins/nuklearpower.py +++ b/dosagelib/plugins/nuklearpower.py @@ -1,7 +1,9 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2016 Tobias Gruetzmacher +# Copyright (C) 2015-2016 Tobias Gruetzmacher + +from __future__ import absolute_import, division, print_function from ..scraper import _ParserScraper @@ -10,14 +12,13 @@ class _NuklearPower(_ParserScraper): prevSearch = '//a[@rel="prev"]' imageSearch = '//div[@id="comic"]/img' + def __init__(self, name): + super(_NuklearPower, self).__init__('NuklearPower/' + name[2:]) + @property def url(self): return 'http://www.nuklearpower.com/' + self.path + '/' - @property - def name(self): - return 'NuklearPower/' + super(_NuklearPower, self).name[2:] - class NP8BitTheater(_NuklearPower): path = '8-bit-theater' diff --git a/dosagelib/plugins/petitesymphony.py b/dosagelib/plugins/petitesymphony.py index 88be97090..f4b356522 100644 --- a/dosagelib/plugins/petitesymphony.py +++ b/dosagelib/plugins/petitesymphony.py @@ -1,33 +1,50 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher + +from __future__ import absolute_import, division, print_function from re import compile -from ..scraper import make_scraper + +from ..scraper import _BasicScraper from ..util import tagre - -_imageSearch = compile(tagre("img", "src", r'(http://[a-z0-9]+\.petitesymphony\.com/files/comics/[^"]+)')) -_prevSearch = compile(tagre("a", "href", r'(http://[a-z0-9]+\.petitesymphony\.com/comic/[^"]+)', after="navi-prev")) - -def add(name): - classname = 'PetiteSymphony_%s' % name.capitalize() - url = 'http://%s.petitesymphony.com/' % name - globals()[classname] = make_scraper(classname, - name='PetiteSymphony/' + name.capitalize(), - url = url, - stripUrl = url + 'comic/%s', - imageSearch = _imageSearch, - prevSearch = _prevSearch, - multipleImagesPerStrip = True, - help='Index format: named number' - ) +from .common import _WordPressScraper -add("djandora") -add("generation17") -add("knuckleup") -add("kickinrad") -add("orangegrind") -add("rascals") -add("sangria") -add("seed") +class PetiteSymphony(_BasicScraper): + imageSearch = compile(tagre("img", "src", r'(http://[a-z0-9]+\.petitesymphony\.com/files/comics/[^"]+)')) + prevSearch = compile(tagre("a", "href", r'(http://[a-z0-9]+\.petitesymphony\.com/comic/[^"]+)', after="navi-prev")) + multipleImagesPerStrip = True + help = 'Index format: named number' + + def __init__(self, name): + super(PetiteSymphony, self).__init__('PetiteSymphony/' + + name.capitalize()) + self.url = 'http://%s.petitesymphony.com/' % name + self.stripUrl = self.url + 'comic/%s' + + @classmethod + def getmodules(cls): + return [ + cls("knuckleup"), + cls("kickinrad"), + cls("orangegrind"), + cls("rascals"), + cls("sangria"), + cls("seed"), + ] + + +class ComicsBreak(_WordPressScraper): + + def __init__(self, name): + super(ComicsBreak, self).__init__('ComicsBreak/' + name) + self.url = 'http://%s.comicsbreak.com/' % name.lower() + + @classmethod + def getmodules(cls): + return [ + cls("Djandora"), + cls("Generation17"), + ] diff --git a/dosagelib/plugins/smackjeeves.py b/dosagelib/plugins/smackjeeves.py index fe14ace53..16ec20771 100644 --- a/dosagelib/plugins/smackjeeves.py +++ b/dosagelib/plugins/smackjeeves.py @@ -44,9 +44,8 @@ class _SmackJeeves(_ParserScraper): broken_html_bugfix = True - @property - def name(self): - return 'SmackJeeves/' + super(_SmackJeeves, self).name[2:] + def __init__(self, name): + super(_SmackJeeves, self).__init__('SmackJeeves/' + name[2:]) @property def url(self): diff --git a/dosagelib/plugins/snafu.py b/dosagelib/plugins/snafu.py index 88292f9f8..cad24d30b 100644 --- a/dosagelib/plugins/snafu.py +++ b/dosagelib/plugins/snafu.py @@ -16,9 +16,8 @@ class _Snafu(_ParserScraper): latestSearch = '//div[@id="feed"]/a' starter = indirectStarter - @property - def name(self): - return 'SnafuComics/' + super(_Snafu, self).name + def __init__(self, name): + super(_Snafu, self).__init__('SnafuComics/' + name) def namer(self, image_url, page_url): year, month, name = image_url.rsplit('/', 3)[1:] diff --git a/dosagelib/plugins/webcomiceu.py b/dosagelib/plugins/webcomiceu.py index ec20fa2bc..db0215e5d 100644 --- a/dosagelib/plugins/webcomiceu.py +++ b/dosagelib/plugins/webcomiceu.py @@ -13,9 +13,8 @@ class _WebcomicEu(_ParserScraper): prevSearch = '//a[img[contains(@src, "navi-zurueck")]]' help = 'Index format: number' - @property - def name(self): - return 'WebcomicEu/' + super(_WebcomicEu, self).name + def __init__(self, name): + super(_WebcomicEu, self).__init__('WebcomicEu/' + name) @property def url(self): diff --git a/dosagelib/plugins/wlpcomics.py b/dosagelib/plugins/wlpcomics.py index 28ed496cc..00bb586cc 100644 --- a/dosagelib/plugins/wlpcomics.py +++ b/dosagelib/plugins/wlpcomics.py @@ -16,9 +16,8 @@ class _WLPComics(_ParserScraper): starter = bounceStarter help = 'Index format: nnn' - @property - def name(self): - return 'WLP/' + super(_WLPComics, self).name + def __init__(self, name): + super(_WLPComics, self).__init__('WLP/' + name) def namer(self, image_url, page_url): return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' + diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index b4b3ee2e2..cde2bc1b4 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -82,6 +82,10 @@ class Scraper(object): # HTTP session for configuration & cookies session = requests_session() + @classmethod + def getmodules(cls): + return [cls(cls.__name__)] + @property def indexes(self): return self._indexes @@ -91,8 +95,9 @@ class Scraper(object): if val: self._indexes = tuple(sorted(val)) - def __init__(self): + def __init__(self, name): """Initialize internal variables.""" + self.name = name self.urls = set() self._indexes = tuple() self.skippedUrls = set() @@ -222,11 +227,6 @@ class Scraper(object): """Get comic strip URL from index.""" return self.stripUrl % index - @property - def name(self): - """Get scraper name.""" - return self.__class__.__name__ - def starter(self): """Get starter URL from where to scrape comic strips.""" return self.url @@ -563,10 +563,12 @@ def get_scrapers(): if _scrapers is None: out.debug(u"Loading comic modules...") modules = loader.get_modules('plugins') - plugins = loader.get_plugins(modules, Scraper) - _scrapers = sorted([x() for x in plugins], key=lambda p: p.name) + plugins = list(loader.get_plugins(modules, Scraper)) + _scrapers = sorted([m for x in plugins for m in x.getmodules()], + key=lambda p: p.name) check_scrapers() - out.debug(u"... %d modules loaded." % len(_scrapers)) + out.debug(u"... %d modules loaded from %d classes." % ( + len(_scrapers), len(plugins))) return _scrapers diff --git a/tests/test_vote.py b/tests/test_vote.py index 8b8f8da37..e38bbf12a 100644 --- a/tests/test_vote.py +++ b/tests/test_vote.py @@ -1,16 +1,19 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2013-2014 Bastian Kleineidam -# Copyright (C) 2016 Tobias Gruetzmacher +# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs +# Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher + +from __future__ import absolute_import, division, print_function from dosagelib import scraper class ATestScraper(scraper._BasicScraper): - name = 'Test_Test' + pass class TestVote(object): def test_vote(self): - answer = ATestScraper().vote() + answer = ATestScraper('Test_Test').vote() assert answer in ('counted', 'no'), 'invalid answer %r' % answer