New module for ComicSherpa (removed from GoComics)
This commit is contained in:
parent
8a89246d88
commit
061efaac6e
2 changed files with 271 additions and 0 deletions
228
dosagelib/plugins/comicsherpa.py
Normal file
228
dosagelib/plugins/comicsherpa.py
Normal file
|
@ -0,0 +1,228 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from ..scraper import _ParserScraper
|
||||
|
||||
|
||||
class ComicSherpa(_ParserScraper):
|
||||
url = 'http://www.comicssherpa.com/site/'
|
||||
imageSearch = '//img[contains(@src, "/comics/")]'
|
||||
prevSearch = '//a[text()="previous day"]'
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
def __init__(self, name, path):
|
||||
super(ComicSherpa, self).__init__('ComicSherpa/' + name)
|
||||
self.url = 'http://www.comicssherpa.com/site/feature?uc_comic=' + path
|
||||
|
||||
def getIndexStripUrl(self, index):
|
||||
return self.url + '&uc_full_date=%s' % index
|
||||
|
||||
@classmethod
|
||||
def getmodules(cls):
|
||||
return (
|
||||
# do not edit anything below since these entries are generated from
|
||||
# scripts/comicsherpa.py
|
||||
# START AUTOUPDATE
|
||||
cls('060', 'csadl'),
|
||||
cls('AaronGuile', 'csdsf'),
|
||||
cls('ABCStreet', 'csmbx'),
|
||||
cls('ABitSketch', 'csxmy'),
|
||||
cls('ABomb', 'csvur'),
|
||||
cls('ACMEINKD', 'csmwt'),
|
||||
cls('AcornPark', 'csdfe'),
|
||||
cls('Adulting', 'cskky'),
|
||||
cls('AJAndMagnus', 'csrxy'),
|
||||
cls('AlisonWard', 'cspgh'),
|
||||
cls('AllInGoodTime', 'csjhr'),
|
||||
cls('AmandaTheGreat', 'cssyr'),
|
||||
cls('AndNow', 'csnxr'),
|
||||
cls('Anecdote', 'cspmf'),
|
||||
cls('AnimalMitchell', 'csdnm'),
|
||||
cls('AnneAndPythagoras', 'csokq'),
|
||||
cls('AppleCreekComics', 'cstgq'),
|
||||
cls('ATasteOfTimes', 'csprn'),
|
||||
cls('BatchRejection', 'csgny'),
|
||||
cls('Bazoobee', 'csfos'),
|
||||
cls('BeMisery', 'csiiq'),
|
||||
cls('BeneathTheFerns', 'csgzn'),
|
||||
cls('BigJim', 'csiao'),
|
||||
cls('Bluebonnets', 'cston'),
|
||||
cls('BlueSkiesToons', 'csfoy'),
|
||||
cls('BobsYourUncle', 'csmxz'),
|
||||
cls('BoltsAndNuts', 'csnab'),
|
||||
cls('Bork', 'csczn'),
|
||||
cls('BottAuto', 'csmwz'),
|
||||
cls('BUNS', 'csbft'),
|
||||
cls('Bushscrubs', 'csmzx'),
|
||||
cls('CAFFEINATED', 'csbmv'),
|
||||
cls('CandacenCompany', 'csvpd'),
|
||||
cls('CarteBlanche', 'csnwk'),
|
||||
cls('CharmysArmy', 'cswrl'),
|
||||
cls('CleoAndCompany', 'cscwy'),
|
||||
cls('Complex', 'csusy'),
|
||||
cls('CourageousManAdventures', 'csgkn'),
|
||||
cls('DadsDay', 'cswly'),
|
||||
cls('DBCartoons', 'csnvt'),
|
||||
cls('DevinCraneComicStripGhostwriter', 'csadf'),
|
||||
cls('DoghouseInYourSoul', 'cstwx'),
|
||||
cls('DontPickTheFlowers', 'cswfs'),
|
||||
cls('Dragin', 'cswgz'),
|
||||
cls('DrWhiskers', 'cswvl'),
|
||||
cls('DumbQuestionBadAnswer', 'cskro'),
|
||||
cls('DungeonHordes', 'csnlo'),
|
||||
cls('DustSpecks', 'csqgq'),
|
||||
cls('DutchnPals', 'cskqc'),
|
||||
cls('Dysconnected', 'csxbc'),
|
||||
cls('Econogirl', 'csxoj'),
|
||||
cls('EightballEyeball', 'csnfh'),
|
||||
cls('Elmo', 'csvff'),
|
||||
cls('Endangered', 'cshii'),
|
||||
cls('Experiment42', 'csbjr'),
|
||||
cls('FamousAndNotSoFamousQuotes', 'csdgz'),
|
||||
cls('FarOut', 'csaem'),
|
||||
cls('FatherOfTheBrood', 'csuul'),
|
||||
cls('FloydAndTony', 'cszgj'),
|
||||
cls('FoolsParadise', 'csvnw'),
|
||||
cls('FrankAndSteinway', 'cseui'),
|
||||
cls('FriedCritter', 'cshtp'),
|
||||
cls('GarciaCartoonCo', 'csyuw'),
|
||||
cls('GIRTH', 'csbjw'),
|
||||
cls('GrandmaSnoops', 'csscq'),
|
||||
cls('GrannyAnny', 'cskpg'),
|
||||
cls('Gravy', 'csgvd'),
|
||||
cls('GreenPieces', 'csnwy'),
|
||||
cls('GunstonStreet', 'csgru'),
|
||||
cls('HallEditorialCartoons', 'csgzx'),
|
||||
cls('HaloAndHorns', 'csgub'),
|
||||
cls('HaphazardHumor', 'cspsa'),
|
||||
cls('Headcheese', 'cspku'),
|
||||
cls('Hogwashed', 'csbnf'),
|
||||
cls('HomeLife', 'csrbv'),
|
||||
cls('Hubbel', 'cszrr'),
|
||||
cls('HugoComics', 'csdwl'),
|
||||
cls('HurrieTheMisManager', 'cssri'),
|
||||
cls('HuskyTales', 'cslnp'),
|
||||
cls('InkwellForest', 'csmuk'),
|
||||
cls('IronyOr', 'csddz'),
|
||||
cls('ItsJustJim', 'cszos'),
|
||||
cls('JolleyStuffBrowser', 'csjpq'),
|
||||
cls('KALEECHIKORNERS', 'cshdw'),
|
||||
cls('KartoonsByKline', 'csoei'),
|
||||
cls('LaffToons', 'cssvj'),
|
||||
cls('LiliAndDerek', 'csvsy'),
|
||||
cls('LilleysSillies', 'cstka'),
|
||||
cls('LimboRoad', 'csfpp'),
|
||||
cls('Loose', 'csmyn'),
|
||||
cls('LumAndAbner', 'cscji'),
|
||||
cls('MadDogGhettoCop', 'cskwp'),
|
||||
cls('MarysNature', 'csogt'),
|
||||
cls('Millennialville', 'csxrl'),
|
||||
cls('Milton50', 'csmof'),
|
||||
cls('Mindframe', 'csqnp'),
|
||||
cls('Minihahas', 'csoat'),
|
||||
cls('MiscSoup', 'csguq'),
|
||||
cls('MisterAndMe', 'csvhr'),
|
||||
cls('MockAll', 'csrds'),
|
||||
cls('Moments', 'csnso'),
|
||||
cls('Mongrels', 'csbjo'),
|
||||
cls('MortsIsland', 'csfyq'),
|
||||
cls('MySonIsADog', 'csfec'),
|
||||
cls('NavyBean', 'csfiq'),
|
||||
cls('NoAmbiguity', 'csryw'),
|
||||
cls('NoBusinessIKnow', 'csmfg'),
|
||||
cls('NoOrdinaryLife', 'csicr'),
|
||||
cls('Npchumorcom', 'csbuv'),
|
||||
cls('OneFunnyGoldenRetriever', 'csnrf'),
|
||||
cls('ONIONAndPEA', 'cstsr'),
|
||||
cls('OscarAndAnnie', 'csczw'),
|
||||
cls('OverQuirked', 'cspes'),
|
||||
cls('PaddedCell', 'csxqk'),
|
||||
cls('Painterly', 'csuya'),
|
||||
cls('PalAndBuddy', 'csjut'),
|
||||
cls('PawsForThoughtComics', 'csced'),
|
||||
cls('Peeples', 'csnkd'),
|
||||
cls('PeopleOfEarth', 'csjqa'),
|
||||
cls('PicpakDog', 'cstmm'),
|
||||
cls('PirateMike', 'csxcb'),
|
||||
cls('PoliceLimit', 'cspcc'),
|
||||
cls('PoliticularJokesAndRuffus', 'csmvz'),
|
||||
cls('Prideland', 'csaoa'),
|
||||
cls('PrimusTheBadPhilosopher', 'csofd'),
|
||||
cls('ProfessorHerbertAndGEO', 'cscje'),
|
||||
cls('QueenBlackbeard', 'csecq'),
|
||||
cls('QuickDraw', 'csydp'),
|
||||
cls('RandysRationale', 'cshsw'),
|
||||
cls('Ringers', 'csxhx'),
|
||||
cls('RonWarren', 'csuwd'),
|
||||
cls('SandSharkBeach', 'cssqk'),
|
||||
cls('SharpCurveComics', 'csyek'),
|
||||
cls('SherpaAid', 'csvku'),
|
||||
cls('SignGarden', 'csbxu'),
|
||||
cls('SignsOfAFrustratedGolfer', 'csxdy'),
|
||||
cls('Skull', 'csdxo'),
|
||||
cls('Skylarking', 'csyac'),
|
||||
cls('SleepytownBeagles', 'cssbk'),
|
||||
cls('SmallNerdyCreatures', 'cshqb'),
|
||||
cls('Smith', 'csmdx'),
|
||||
cls('Snootle', 'cseic'),
|
||||
cls('SoccerDude', 'csnnb'),
|
||||
cls('SoccerEarth', 'csdma'),
|
||||
cls('SOD', 'cszdh'),
|
||||
cls('SomethingAboutCeleste', 'csgtv'),
|
||||
cls('SookyRottweiler', 'csegu'),
|
||||
cls('Spaceport51', 'csbyh'),
|
||||
cls('SportsByVoort', 'cskin'),
|
||||
cls('StaleCrackers', 'csngu'),
|
||||
cls('StankoAndTibor', 'csurl'),
|
||||
cls('Strangeville', 'cskps'),
|
||||
cls('SubSub', 'csvcv'),
|
||||
cls('SuburbanFairyTales', 'cscek'),
|
||||
cls('SUITSANDGUARDERS', 'cssag'),
|
||||
cls('SuperSiblings', 'csdxj'),
|
||||
cls('TheBeauforts', 'csfxu'),
|
||||
cls('TheBellies', 'csubt'),
|
||||
cls('TheBoobiehatch', 'csoev'),
|
||||
cls('TheCardinal', 'csfjg'),
|
||||
cls('TheDinkledorfs', 'cszhp'),
|
||||
cls('TheEntrepiranha', 'cslml'),
|
||||
cls('TheFabulousBushPigs', 'cscqi'),
|
||||
cls('TheGrayZone', 'csmue'),
|
||||
cls('TheGreenMonkeys', 'cscue'),
|
||||
cls('TheMagicForest', 'csjts'),
|
||||
cls('TheMothManAndLarvaeBoy', 'csycu'),
|
||||
cls('TheMountainMen', 'cskqw'),
|
||||
cls('TheNeighborhood', 'csrtu'),
|
||||
cls('TheNevilleYouKnow', 'csnov'),
|
||||
cls('TheNonsenseNewz', 'csghu'),
|
||||
cls('TheOldManAndHisDog', 'csudu'),
|
||||
cls('TheQuinnAndFinnShow', 'csynn'),
|
||||
cls('TheRocks', 'cswky'),
|
||||
cls('TheUnemployed', 'csanx'),
|
||||
cls('TheWagesOfSindy', 'cszff'),
|
||||
cls('Thingsesque', 'cstsq'),
|
||||
cls('TodaysTrump', 'csbrj'),
|
||||
cls('TopicToons', 'csgly'),
|
||||
cls('ToughTown', 'csnjp'),
|
||||
cls('ToxicValues', 'csyig'),
|
||||
cls('TruthBeKnown', 'csfwi'),
|
||||
cls('TuesdaysWithCory', 'csiea'),
|
||||
cls('Underdone', 'csayl'),
|
||||
cls('UnMannerlyWays', 'csjmh'),
|
||||
cls('ViewFromTheCouch', 'csipm'),
|
||||
cls('VoicesInTheDark', 'csyou'),
|
||||
cls('WarpedAndDemented', 'csbgw'),
|
||||
cls('Waskataskahiskewaskewan', 'cssfg'),
|
||||
cls('WayOutComics', 'cstrs'),
|
||||
cls('WeaselInk', 'csfsn'),
|
||||
cls('WhiskeyFalls', 'csitw'),
|
||||
cls('Windsock', 'csywy'),
|
||||
cls('WrobbertCartoons', 'csupg'),
|
||||
cls('YinYangster', 'csteo'),
|
||||
cls('ZombieHeights', 'cswjq'),
|
||||
cls('Zootopia', 'csquz'),
|
||||
# END AUTOUPDATE
|
||||
)
|
43
scripts/comicsherpa.py
Executable file
43
scripts/comicsherpa.py
Executable file
|
@ -0,0 +1,43 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
||||
"""
|
||||
Script to get a list of ComicSherpa and save the info in a JSON file for
|
||||
further processing.
|
||||
"""
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from scriptutil import ComicListUpdater
|
||||
|
||||
|
||||
class ComicSherpaUpdater(ComicListUpdater):
|
||||
# names of comics to exclude
|
||||
excluded_comics = (
|
||||
# missing images
|
||||
'Pi',
|
||||
'Rufus',
|
||||
|
||||
# too short
|
||||
'BillyAndCo',
|
||||
'BuffaloChips',
|
||||
'Crawdiddy',
|
||||
'NewFeature',
|
||||
)
|
||||
|
||||
def collect_results(self):
|
||||
"""Parse all listing pages."""
|
||||
data = self.get_url('http://www.comicssherpa.com/site/home.html', expand=False)
|
||||
|
||||
for comiclink in data.xpath('//a[contains(@href, "site/feature")]'):
|
||||
link = comiclink.attrib['href'].split('=')[1]
|
||||
name = comiclink.text
|
||||
self.add_comic(name, link)
|
||||
|
||||
def get_entry(self, name, url):
|
||||
return u"cls('%s', '%s')," % (name, url)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
ComicSherpaUpdater(__file__).run()
|
Loading…
Reference in a new issue