From 061efaac6e06a60564afcfeca50fa5efc3db8333 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Wed, 11 Jan 2017 01:34:52 +0100 Subject: [PATCH] New module for ComicSherpa (removed from GoComics) --- dosagelib/plugins/comicsherpa.py | 228 +++++++++++++++++++++++++++++++ scripts/comicsherpa.py | 43 ++++++ 2 files changed, 271 insertions(+) create mode 100644 dosagelib/plugins/comicsherpa.py create mode 100755 scripts/comicsherpa.py diff --git a/dosagelib/plugins/comicsherpa.py b/dosagelib/plugins/comicsherpa.py new file mode 100644 index 000000000..5358a0a3f --- /dev/null +++ b/dosagelib/plugins/comicsherpa.py @@ -0,0 +1,228 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs +# Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2017 Tobias Gruetzmacher + +from __future__ import absolute_import, division, print_function + +from ..scraper import _ParserScraper + + +class ComicSherpa(_ParserScraper): + url = 'http://www.comicssherpa.com/site/' + imageSearch = '//img[contains(@src, "/comics/")]' + prevSearch = '//a[text()="previous day"]' + help = 'Index format: yyyymmdd' + + def __init__(self, name, path): + super(ComicSherpa, self).__init__('ComicSherpa/' + name) + self.url = 'http://www.comicssherpa.com/site/feature?uc_comic=' + path + + def getIndexStripUrl(self, index): + return self.url + '&uc_full_date=%s' % index + + @classmethod + def getmodules(cls): + return ( + # do not edit anything below since these entries are generated from + # scripts/comicsherpa.py + # START AUTOUPDATE + cls('060', 'csadl'), + cls('AaronGuile', 'csdsf'), + cls('ABCStreet', 'csmbx'), + cls('ABitSketch', 'csxmy'), + cls('ABomb', 'csvur'), + cls('ACMEINKD', 'csmwt'), + cls('AcornPark', 'csdfe'), + cls('Adulting', 'cskky'), + cls('AJAndMagnus', 'csrxy'), + cls('AlisonWard', 'cspgh'), + cls('AllInGoodTime', 'csjhr'), + cls('AmandaTheGreat', 'cssyr'), + cls('AndNow', 'csnxr'), + cls('Anecdote', 'cspmf'), + cls('AnimalMitchell', 'csdnm'), + cls('AnneAndPythagoras', 'csokq'), + cls('AppleCreekComics', 'cstgq'), + cls('ATasteOfTimes', 'csprn'), + cls('BatchRejection', 'csgny'), + cls('Bazoobee', 'csfos'), + cls('BeMisery', 'csiiq'), + cls('BeneathTheFerns', 'csgzn'), + cls('BigJim', 'csiao'), + cls('Bluebonnets', 'cston'), + cls('BlueSkiesToons', 'csfoy'), + cls('BobsYourUncle', 'csmxz'), + cls('BoltsAndNuts', 'csnab'), + cls('Bork', 'csczn'), + cls('BottAuto', 'csmwz'), + cls('BUNS', 'csbft'), + cls('Bushscrubs', 'csmzx'), + cls('CAFFEINATED', 'csbmv'), + cls('CandacenCompany', 'csvpd'), + cls('CarteBlanche', 'csnwk'), + cls('CharmysArmy', 'cswrl'), + cls('CleoAndCompany', 'cscwy'), + cls('Complex', 'csusy'), + cls('CourageousManAdventures', 'csgkn'), + cls('DadsDay', 'cswly'), + cls('DBCartoons', 'csnvt'), + cls('DevinCraneComicStripGhostwriter', 'csadf'), + cls('DoghouseInYourSoul', 'cstwx'), + cls('DontPickTheFlowers', 'cswfs'), + cls('Dragin', 'cswgz'), + cls('DrWhiskers', 'cswvl'), + cls('DumbQuestionBadAnswer', 'cskro'), + cls('DungeonHordes', 'csnlo'), + cls('DustSpecks', 'csqgq'), + cls('DutchnPals', 'cskqc'), + cls('Dysconnected', 'csxbc'), + cls('Econogirl', 'csxoj'), + cls('EightballEyeball', 'csnfh'), + cls('Elmo', 'csvff'), + cls('Endangered', 'cshii'), + cls('Experiment42', 'csbjr'), + cls('FamousAndNotSoFamousQuotes', 'csdgz'), + cls('FarOut', 'csaem'), + cls('FatherOfTheBrood', 'csuul'), + cls('FloydAndTony', 'cszgj'), + cls('FoolsParadise', 'csvnw'), + cls('FrankAndSteinway', 'cseui'), + cls('FriedCritter', 'cshtp'), + cls('GarciaCartoonCo', 'csyuw'), + cls('GIRTH', 'csbjw'), + cls('GrandmaSnoops', 'csscq'), + cls('GrannyAnny', 'cskpg'), + cls('Gravy', 'csgvd'), + cls('GreenPieces', 'csnwy'), + cls('GunstonStreet', 'csgru'), + cls('HallEditorialCartoons', 'csgzx'), + cls('HaloAndHorns', 'csgub'), + cls('HaphazardHumor', 'cspsa'), + cls('Headcheese', 'cspku'), + cls('Hogwashed', 'csbnf'), + cls('HomeLife', 'csrbv'), + cls('Hubbel', 'cszrr'), + cls('HugoComics', 'csdwl'), + cls('HurrieTheMisManager', 'cssri'), + cls('HuskyTales', 'cslnp'), + cls('InkwellForest', 'csmuk'), + cls('IronyOr', 'csddz'), + cls('ItsJustJim', 'cszos'), + cls('JolleyStuffBrowser', 'csjpq'), + cls('KALEECHIKORNERS', 'cshdw'), + cls('KartoonsByKline', 'csoei'), + cls('LaffToons', 'cssvj'), + cls('LiliAndDerek', 'csvsy'), + cls('LilleysSillies', 'cstka'), + cls('LimboRoad', 'csfpp'), + cls('Loose', 'csmyn'), + cls('LumAndAbner', 'cscji'), + cls('MadDogGhettoCop', 'cskwp'), + cls('MarysNature', 'csogt'), + cls('Millennialville', 'csxrl'), + cls('Milton50', 'csmof'), + cls('Mindframe', 'csqnp'), + cls('Minihahas', 'csoat'), + cls('MiscSoup', 'csguq'), + cls('MisterAndMe', 'csvhr'), + cls('MockAll', 'csrds'), + cls('Moments', 'csnso'), + cls('Mongrels', 'csbjo'), + cls('MortsIsland', 'csfyq'), + cls('MySonIsADog', 'csfec'), + cls('NavyBean', 'csfiq'), + cls('NoAmbiguity', 'csryw'), + cls('NoBusinessIKnow', 'csmfg'), + cls('NoOrdinaryLife', 'csicr'), + cls('Npchumorcom', 'csbuv'), + cls('OneFunnyGoldenRetriever', 'csnrf'), + cls('ONIONAndPEA', 'cstsr'), + cls('OscarAndAnnie', 'csczw'), + cls('OverQuirked', 'cspes'), + cls('PaddedCell', 'csxqk'), + cls('Painterly', 'csuya'), + cls('PalAndBuddy', 'csjut'), + cls('PawsForThoughtComics', 'csced'), + cls('Peeples', 'csnkd'), + cls('PeopleOfEarth', 'csjqa'), + cls('PicpakDog', 'cstmm'), + cls('PirateMike', 'csxcb'), + cls('PoliceLimit', 'cspcc'), + cls('PoliticularJokesAndRuffus', 'csmvz'), + cls('Prideland', 'csaoa'), + cls('PrimusTheBadPhilosopher', 'csofd'), + cls('ProfessorHerbertAndGEO', 'cscje'), + cls('QueenBlackbeard', 'csecq'), + cls('QuickDraw', 'csydp'), + cls('RandysRationale', 'cshsw'), + cls('Ringers', 'csxhx'), + cls('RonWarren', 'csuwd'), + cls('SandSharkBeach', 'cssqk'), + cls('SharpCurveComics', 'csyek'), + cls('SherpaAid', 'csvku'), + cls('SignGarden', 'csbxu'), + cls('SignsOfAFrustratedGolfer', 'csxdy'), + cls('Skull', 'csdxo'), + cls('Skylarking', 'csyac'), + cls('SleepytownBeagles', 'cssbk'), + cls('SmallNerdyCreatures', 'cshqb'), + cls('Smith', 'csmdx'), + cls('Snootle', 'cseic'), + cls('SoccerDude', 'csnnb'), + cls('SoccerEarth', 'csdma'), + cls('SOD', 'cszdh'), + cls('SomethingAboutCeleste', 'csgtv'), + cls('SookyRottweiler', 'csegu'), + cls('Spaceport51', 'csbyh'), + cls('SportsByVoort', 'cskin'), + cls('StaleCrackers', 'csngu'), + cls('StankoAndTibor', 'csurl'), + cls('Strangeville', 'cskps'), + cls('SubSub', 'csvcv'), + cls('SuburbanFairyTales', 'cscek'), + cls('SUITSANDGUARDERS', 'cssag'), + cls('SuperSiblings', 'csdxj'), + cls('TheBeauforts', 'csfxu'), + cls('TheBellies', 'csubt'), + cls('TheBoobiehatch', 'csoev'), + cls('TheCardinal', 'csfjg'), + cls('TheDinkledorfs', 'cszhp'), + cls('TheEntrepiranha', 'cslml'), + cls('TheFabulousBushPigs', 'cscqi'), + cls('TheGrayZone', 'csmue'), + cls('TheGreenMonkeys', 'cscue'), + cls('TheMagicForest', 'csjts'), + cls('TheMothManAndLarvaeBoy', 'csycu'), + cls('TheMountainMen', 'cskqw'), + cls('TheNeighborhood', 'csrtu'), + cls('TheNevilleYouKnow', 'csnov'), + cls('TheNonsenseNewz', 'csghu'), + cls('TheOldManAndHisDog', 'csudu'), + cls('TheQuinnAndFinnShow', 'csynn'), + cls('TheRocks', 'cswky'), + cls('TheUnemployed', 'csanx'), + cls('TheWagesOfSindy', 'cszff'), + cls('Thingsesque', 'cstsq'), + cls('TodaysTrump', 'csbrj'), + cls('TopicToons', 'csgly'), + cls('ToughTown', 'csnjp'), + cls('ToxicValues', 'csyig'), + cls('TruthBeKnown', 'csfwi'), + cls('TuesdaysWithCory', 'csiea'), + cls('Underdone', 'csayl'), + cls('UnMannerlyWays', 'csjmh'), + cls('ViewFromTheCouch', 'csipm'), + cls('VoicesInTheDark', 'csyou'), + cls('WarpedAndDemented', 'csbgw'), + cls('Waskataskahiskewaskewan', 'cssfg'), + cls('WayOutComics', 'cstrs'), + cls('WeaselInk', 'csfsn'), + cls('WhiskeyFalls', 'csitw'), + cls('Windsock', 'csywy'), + cls('WrobbertCartoons', 'csupg'), + cls('YinYangster', 'csteo'), + cls('ZombieHeights', 'cswjq'), + cls('Zootopia', 'csquz'), + # END AUTOUPDATE + ) diff --git a/scripts/comicsherpa.py b/scripts/comicsherpa.py new file mode 100755 index 000000000..63a80da33 --- /dev/null +++ b/scripts/comicsherpa.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs +# Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2017 Tobias Gruetzmacher +""" +Script to get a list of ComicSherpa and save the info in a JSON file for +further processing. +""" +from __future__ import absolute_import, division, print_function + +from scriptutil import ComicListUpdater + + +class ComicSherpaUpdater(ComicListUpdater): + # names of comics to exclude + excluded_comics = ( + # missing images + 'Pi', + 'Rufus', + + # too short + 'BillyAndCo', + 'BuffaloChips', + 'Crawdiddy', + 'NewFeature', + ) + + def collect_results(self): + """Parse all listing pages.""" + data = self.get_url('http://www.comicssherpa.com/site/home.html', expand=False) + + for comiclink in data.xpath('//a[contains(@href, "site/feature")]'): + link = comiclink.attrib['href'].split('=')[1] + name = comiclink.text + self.add_comic(name, link) + + def get_entry(self, name, url): + return u"cls('%s', '%s')," % (name, url) + + +if __name__ == '__main__': + ComicSherpaUpdater(__file__).run()