Update GoComics module

This commit is contained in:
Tobias Gruetzmacher 2022-06-05 20:23:56 +02:00
parent 02bcd6b741
commit 0d8871b253
5 changed files with 55 additions and 42 deletions

View file

@ -120,7 +120,6 @@ class ComicsKingdom(_ParserScraper):
cls('SallyForth', 'sally-forth'),
cls('SamAndSilo', 'sam-and-silo'),
cls('SecretAgentX9', 'secret-agent-x-9'),
cls('ShermansLagoon', 'sherman-s-lagoon'),
# Shoe has a duplicate in GoComics/Shoe
cls('SixChix', 'six-chix'),
cls('SlylockFoxAndComicsForKids', 'slylock-fox-and-comics-for-kids'),

View file

@ -1,7 +1,7 @@
# SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2021 Tobias Gruetzmacher
# Copyright (C) 2015-2022 Tobias Gruetzmacher
from ..scraper import _ParserScraper
from ..helpers import indirectStarter
@ -55,7 +55,6 @@ class GoComics(_ParserScraper):
cls('AlisHouse', 'alis-house'),
cls('AlleyOop', 'alley-oop'),
cls('AmandaTheGreat', 'amanda-the-great'),
cls('AmericanChopSuey', 'american-chop-suey'),
cls('Andertoons', 'andertoons'),
cls('AndyCapp', 'andycapp'),
cls('AngryLittleGirls', 'angry-little-girls'),
@ -63,17 +62,15 @@ class GoComics(_ParserScraper):
cls('Annie', 'annie'),
cls('AProblemLikeJamal', 'a-problem-like-jamal'),
cls('ArloAndJanis', 'arloandjanis'),
cls('AskACat', 'ask-a-cat'),
cls('AskShagg', 'askshagg'),
cls('AtTavicat', 'tavicat'),
cls('AuntyAcid', 'aunty-acid'),
cls('BabyTrump', 'baby-trump'),
cls('BabyBlues', 'babyblues'),
cls('BackInTheDay', 'backintheday'),
cls('BackToBC', 'back-to-bc'),
cls('Bacon', 'bacon'),
cls('Badlands', 'badlands'),
cls('BadMachinery', 'bad-machinery'),
cls('BadReporter', 'badreporter'),
cls('Baldo', 'baldo'),
cls('BaldoEnEspanol', 'baldoespanol', 'es'),
cls('BallardStreet', 'ballardstreet'),
@ -107,15 +104,16 @@ class GoComics(_ParserScraper):
cls('Boomerangs', 'boomerangs'),
cls('Bottomliners', 'bottomliners'),
cls('BoundAndGagged', 'boundandgagged'),
cls('Bozo', 'bozo'),
cls('BreakingCatNews', 'breaking-cat-news'),
cls('BreakOfDay', 'break-of-day'),
cls('Brevity', 'brevity'),
cls('BrewsterRockit', 'brewsterrockit'),
cls('BrianMcFadden', 'brian-mcfadden'),
cls('BroomHilda', 'broomhilda'),
cls('Buckles', 'buckles'),
cls('Bully', 'bully'),
cls('Buni', 'buni'),
cls('BushyTales', 'bushy-tales'),
cls('CalvinAndHobbes', 'calvinandhobbes'),
cls('CalvinAndHobbesEnEspanol', 'calvinandhobbesespanol', 'es'),
cls('Candorville', 'candorville'),
@ -125,7 +123,6 @@ class GoComics(_ParserScraper):
cls('CatsCafe', 'cats-cafe'),
cls('CattitudeDoggonit', 'cattitude-doggonit'),
cls('CestLaVie', 'cestlavie'),
cls('CheapThrillsCuisine', 'cheap-thrills-cuisine'),
cls('CheerUpEmoKid', 'cheer-up-emo-kid'),
cls('ChipBok', 'chipbok'),
cls('ChrisBritt', 'chrisbritt'),
@ -146,7 +143,6 @@ class GoComics(_ParserScraper):
cls('CulDeSac', 'culdesac'),
cls('DaddysHome', 'daddyshome'),
cls('DanaSummers', 'danasummers'),
cls('DanWasserman', 'danwasserman'),
cls('DarkSideOfTheHorse', 'darksideofthehorse'),
cls('DeepDarkFears', 'deep-dark-fears'),
cls('DeFlocked', 'deflocked'),
@ -210,8 +206,8 @@ class GoComics(_ParserScraper):
cls('GingerMeggs', 'gingermeggs'),
cls('GingerMeggsEnEspanol', 'gingermeggs-espanol', 'es'),
cls('GlasbergenCartoons', 'glasbergen-cartoons'),
cls('Globetrotter', 'globetrotter'),
cls('GManWebcomics', 'g-man-webcomics'),
cls('GnomeSyndicate', 'gnome-syndicate'),
cls('Goats', 'goats'),
cls('GrandAvenue', 'grand-avenue'),
cls('GrayMatters', 'gray-matters'),
@ -227,13 +223,11 @@ class GoComics(_ParserScraper):
cls('Herman', 'herman'),
cls('HomeAndAway', 'homeandaway'),
cls('HotComicsForCoolPeople', 'hot-comics-for-cool-people'),
cls('HUBRIS', 'hubris'),
cls('HutchOwen', 'hutch-owen'),
cls('ImagineThis', 'imaginethis'),
cls('ImogenQuest', 'imogen-quest'),
cls('InkPen', 'inkpen'),
cls('InSecurity', 'in-security'),
cls('InspectorDangersCrimeQuiz', 'inspector-dangers-crime-quiz'),
cls('InTheBleachers', 'inthebleachers'),
cls('InTheSticks', 'inthesticks'),
cls('InvisibleBread', 'invisible-bread'),
@ -246,16 +240,12 @@ class GoComics(_ParserScraper):
cls('JenSorensen', 'jen-sorensen'),
cls('JimBentonCartoons', 'jim-benton-cartoons'),
cls('JimMorin', 'jimmorin'),
cls('JimsJournal', 'jimsjournal'),
cls('JoeHeller', 'joe-heller'),
cls('JoelPett', 'joelpett'),
cls('JoeVanilla', 'joevanilla'),
cls('JoeyAlisonSayersComics', 'joey-alison-sayers-comics'),
cls('JohnDeering', 'johndeering'),
cls('JumpStart', 'jumpstart'),
cls('JunkDrawer', 'junk-drawer'),
cls('JustoYFranco', 'justo-y-franco', 'es'),
cls('KenCatalino', 'kencatalino'),
cls('KevinKallaugher', 'kal'),
cls('KevinNecessaryEditorialCartoons', 'kevin-necessary-editorial-cartoons'),
cls('KidBeowulf', 'kid-beowulf'),
@ -296,9 +286,9 @@ class GoComics(_ParserScraper):
cls('LugNuts', 'lug-nuts'),
cls('Lunarbaboon', 'lunarbaboon'),
cls('M2Bulls', 'm2bulls'),
cls('Magnificatz', 'magnificatz'),
cls('Maintaining', 'maintaining'),
cls('MakingIt', 'making-it'),
cls('MannequinOnTheMoon', 'mannequin-on-the-moon'),
cls('MariasDay', 'marias-day'),
cls('Marmaduke', 'marmaduke'),
cls('MarshallRamsey', 'marshallramsey'),
@ -313,15 +303,13 @@ class GoComics(_ParserScraper):
cls('MikeLester', 'mike-lester'),
cls('MikeLuckovich', 'mikeluckovich'),
cls('MissPeach', 'miss-peach'),
cls('Mo', 'mo'),
cls('ModeratelyConfused', 'moderately-confused'),
cls('Momma', 'momma'),
cls('MomsCancer', 'moms-cancer'),
cls('Monty', 'monty'),
cls('MontyDiaros', 'monty-diaros', 'es'),
cls('MotleyClassics', 'motley-classics'),
cls('MrLowe', 'mr-lowe'),
cls('MustardAndBoloney', 'mustard-and-boloney'),
cls('MtPleasant', 'mtpleasant'),
cls('MuttAndJeff', 'muttandjeff'),
cls('MyDadIsDracula', 'my-dad-is-dracula'),
cls('MythTickle', 'mythtickle'),
@ -360,12 +348,12 @@ class GoComics(_ParserScraper):
cls('Periquita', 'periquita', 'es'),
cls('PerlasParaLosCerdos', 'perlas-para-los-cerdos', 'es'),
cls('PerryBibleFellowship', 'perry-bible-fellowship'),
cls('PetuniaAndDre', 'petunia-and-dre'),
cls('PhilHands', 'phil-hands'),
cls('PhoebeAndHerUnicorn', 'phoebe-and-her-unicorn'),
cls('Pibgorn', 'pibgorn'),
cls('PibgornSketches', 'pibgornsketches'),
cls('Pickles', 'pickles'),
cls('PirateMike', 'pirate-mike'),
cls('PleaseListenToMe', 'please-listen-to-me'),
cls('Pluggers', 'pluggers'),
cls('PoochCafe', 'poochcafe'),
@ -374,7 +362,6 @@ class GoComics(_ParserScraper):
cls('PotShots', 'pot-shots'),
cls('PreTeena', 'preteena'),
cls('PricklyCity', 'pricklycity'),
cls('PromisesPromises', 'promises-promises'),
cls('QuestionableQuotebook', 'questionable-quotebook'),
cls('RabbitsAgainstMagic', 'rabbitsagainstmagic'),
cls('RaisingDuncan', 'raising-duncan'),
@ -392,9 +379,11 @@ class GoComics(_ParserScraper):
cls('RobertAriail', 'robert-ariail'),
cls('RobRogers', 'robrogers'),
cls('Rosebuds', 'rosebuds'),
cls('RosebudsEnEspanol', 'rosebuds-en-espanol'),
cls('RoseIsRose', 'roseisrose'),
cls('Rubes', 'rubes'),
cls('RudyPark', 'rudypark'),
cls('SaltNPepper', 'salt-n-pepper'),
cls('SarahsScribbles', 'sarahs-scribbles'),
cls('SaturdayMorningBreakfastCereal', 'saturday-morning-breakfast-cereal'),
cls('SavageChickens', 'savage-chickens'),
@ -402,6 +391,7 @@ class GoComics(_ParserScraper):
cls('ScenesFromAMultiverse', 'scenes-from-a-multiverse'),
cls('ScottStantis', 'scottstantis'),
cls('ShenComix', 'shen-comix'),
cls('ShermansLagoon', 'shermanslagoon'),
cls('ShirleyAndSonClassics', 'shirley-and-son-classics'),
cls('Shoe', 'shoe'),
cls('SigneWilkinson', 'signewilkinson'),
@ -415,7 +405,6 @@ class GoComics(_ParserScraper):
cls('SpeedBump', 'speedbump'),
cls('SpiritOfTheStaircase', 'spirit-of-the-staircase'),
cls('SpotTheFrog', 'spot-the-frog'),
cls('Starling', 'starling'),
cls('SteveBenson', 'stevebenson'),
cls('SteveBreen', 'stevebreen'),
cls('SteveKelley', 'stevekelley'),
@ -462,15 +451,12 @@ class GoComics(_ParserScraper):
cls('TheHumbleStumble', 'humble-stumble'),
cls('TheKChronicles', 'thekchronicles'),
cls('TheKnightLife', 'theknightlife'),
cls('TheLastMechanicalMonster', 'the-last-mechanical-monster'),
cls('TheLeftyBoscoPictureShow', 'leftyboscopictureshow'),
cls('TheMartianConfederacy', 'the-martian-confederacy'),
cls('TheMeaningOfLila', 'meaningoflila'),
cls('TheMiddleAge', 'the-middle-age'),
cls('TheMiddletons', 'themiddletons'),
cls('TheNormClassics', 'thenorm'),
cls('TheOtherCoast', 'theothercoast'),
cls('TheOtherEnd', 'the-other-end'),
cls('TheUpsideDownWorldOfGustaveVerbeek', 'upside-down-world-of-gustave-verbeek'),
cls('TheWanderingMelon', 'the-wandering-melon'),
cls('TheWizardOfIdSpanish', 'wizardofidespanol', 'es'),
@ -483,8 +469,6 @@ class GoComics(_ParserScraper):
cls('TomTheDancingBug', 'tomthedancingbug'),
cls('TomToles', 'tomtoles'),
cls('TooMuchCoffeeMan', 'toomuchcoffeeman'),
cls('ToughTown', 'tough-town'),
cls('Trivquiz', 'trivquiz'),
cls('Trucutu', 'trucutu', 'es'),
cls('TruthFacts', 'truth-facts'),
cls('Tutelandia', 'tutelandia', 'es'),
@ -511,12 +495,12 @@ class GoComics(_ParserScraper):
cls('Widdershins', 'widdershins'),
cls('WideOpen', 'wide-open'),
cls('WinLoseDrew', 'drewlitton'),
cls('Winston', 'winston'),
cls('WizardOfId', 'wizardofid'),
cls('WizardOfIdClassics', 'wizard-of-id-classics'),
cls('Wondermark', 'wondermark'),
cls('WorkingDaze', 'working-daze'),
cls('WorkingItOut', 'workingitout'),
cls('WorryLines', 'worry-lines'),
cls('WrongHands', 'wrong-hands'),
cls('WTDuck', 'wtduck'),
cls('WuMo', 'wumo'),

View file

@ -838,10 +838,14 @@ class Removed(Scraper):
cls('GoComics/060'),
cls('GoComics/2CowsAndAChicken'),
cls('GoComics/ABitSketch'),
cls('GoComics/AmericanChopSuey'),
cls('GoComics/Andnow'),
cls('GoComics/Anecdote'),
cls('GoComics/AppleCreekComics'),
cls('GoComics/AskACat'),
cls('GoComics/AskAPortlySyndicatePerson'),
cls('GoComics/BabyTrump'),
cls('GoComics/BadReporter'),
cls('GoComics/BarkingCrayon'),
cls('GoComics/Bazoobee'),
cls('GoComics/Bewley'),
@ -851,13 +855,16 @@ class Removed(Scraper):
cls('GoComics/BottAuto'),
cls('GoComics/BrainSquirts'),
cls('GoComics/BUNS'),
cls('GoComics/BushyTales'),
cls('GoComics/CAFFEINATED'),
cls('GoComics/CapsulasMedicas'),
cls('GoComics/CharmysArmy'),
cls('GoComics/CheapThrillsCuisine'),
cls('GoComics/ClearBlueWater'),
cls('GoComics/Committed'),
cls('GoComics/ConnieToTheWonnie'),
cls('GoComics/CourageousManAdventures'),
cls('GoComics/DanWasserman'),
cls('GoComics/DontPicktheFlowers'),
cls('GoComics/DorrisMcComics'),
cls('GoComics/Dragin'),
@ -878,6 +885,7 @@ class Removed(Scraper):
cls('GoComics/GarciaCartoonCo'),
cls('GoComics/GarfieldMinusGarfield'),
cls('GoComics/GIRTH'),
cls('GoComics/GnomeSyndicate'),
cls('GoComics/GoComicsFanArt'),
cls('GoComics/Graffiti'),
cls('GoComics/GrannyAnny'),
@ -887,18 +895,25 @@ class Removed(Scraper):
cls('GoComics/Headcheese'),
cls('GoComics/HealthCapsules'),
cls('GoComics/HowToCat'),
cls('GoComics/HUBRIS'),
cls('GoComics/HumanCull'),
cls('GoComics/InspectorDangersCrimeQuiz'),
cls('GoComics/ItsjustJim'),
cls('GoComics/JerryHolbert'),
cls('GoComics/JillpokeBohemia'),
cls('GoComics/JimsJournal'),
cls('GoComics/JoeVanilla'),
cls('GoComics/JoeyAlisonSayersComics'),
cls('GoComics/JustSayUncle'),
cls('GoComics/KartoonsByKline'),
cls('GoComics/KenCatalino'),
cls('GoComics/KidSpot'),
cls('GoComics/KidTown'),
cls('GoComics/KitNCarlyle'),
cls('GoComics/LostSideOfSuburbia'),
cls('GoComics/LumandAbner'),
cls('GoComics/MagicInAMinute'),
cls('GoComics/Magnificatz'),
cls('GoComics/MazeToonsPuzzle'),
cls('GoComics/MegClassics'),
cls('GoComics/MichaelAndrew'),
@ -908,7 +923,10 @@ class Removed(Scraper):
cls('GoComics/Mindframe'),
cls('GoComics/MiscSoup'),
cls('GoComics/MisterAndMe'),
cls('GoComics/Mo'),
cls('GoComics/MomsCancer'),
cls('GoComics/MortsIsland'),
cls('GoComics/MustardAndBoloney'),
cls('GoComics/MyCage'),
cls('GoComics/MyCageNewAndOld'),
cls('GoComics/NoOrdinaryLife'),
@ -919,8 +937,10 @@ class Removed(Scraper):
cls('GoComics/PicturesInBoxes'),
cls('GoComics/PieComic'),
cls('GoComics/Pinkerton'),
cls('GoComics/PirateMike'),
cls('GoComics/PoliceLimit'),
cls('GoComics/PopCultureShockTherapy'),
cls('GoComics/PromisesPromises'),
cls('GoComics/ReplyAll'),
cls('GoComics/ReplyAllLite'),
cls('GoComics/RonWarren'),
@ -937,6 +957,7 @@ class Removed(Scraper):
cls('GoComics/Speechless'),
cls('GoComics/SportsbyVoort'),
cls('GoComics/StankoAndTibor'),
cls('GoComics/Starling'),
cls('GoComics/SubSub'),
cls('GoComics/SuburbanFairyTales'),
cls('GoComics/SuperSiblings'),
@ -950,20 +971,26 @@ class Removed(Scraper):
cls('GoComics/TheCreeps'),
cls('GoComics/TheGentlemansArmchair'),
cls('GoComics/TheGreenMonkeys'),
cls('GoComics/TheLastMechanicalMonster'),
cls('GoComics/TheLeftyBoscoPictureShow'),
cls('GoComics/TheLostBear'),
cls('GoComics/TheNorm40'),
cls('GoComics/TheOldManAndHisDog'),
cls('GoComics/TheOtherEnd'),
cls('GoComics/TheQuinnAndFinnShow'),
cls('GoComics/TheQuixoteSyndrome'),
cls('GoComics/TheSunshineClub'),
cls('GoComics/Thingsesque'),
cls('GoComics/TimEagan'),
cls('GoComics/TOBY'),
cls('GoComics/ToughTown'),
cls('GoComics/Trivquiz'),
cls('GoComics/UncleArtsFunland'),
cls('GoComics/USAcres'),
cls('GoComics/WayOutComics'),
cls('GoComics/WhiskeyFalls'),
cls('GoComics/WhyattCartoons'),
cls('GoComics/Winston'),
cls('GoComics/WorldOfWonder'),
cls('GoComics/Wrobbertcartoons'),
cls('GoComics/Zootopia'),
@ -1607,7 +1634,7 @@ class Renamed(Scraper):
cls('PetiteSymphony/Generation17', 'ComicsBreak/Generation17'),
cls('PetiteSymphony/Rascals', 'KemonoCafe/Rascals'),
cls('QuentynQuinnSpaceRanger', 'RHJunior/QuentynQuinnSpaceRanger'),
cls('ShermansLagoon', 'ComicsKingdom/ShermansLagoon'),
cls('ShermansLagoon', 'GoComics/ShermansLagoon'),
cls('SmackJeeves/AddictiveScience', 'KemonoCafe/AddictiveScience'),
cls('SmackJeeves/CityFolk', 'ComicFury/CityFolk'),
cls('SmackJeeves/DoomsdayMyDear', 'DoomsdayMyDear'),
@ -1628,6 +1655,7 @@ class Renamed(Scraper):
# Renamed in 3.0
cls('AHClub', 'RickGriffinStudios/AHClub'),
cls('ComicFury/MuddlemarchMudCompany', 'ComicFury/MudCompany'),
cls('ComicsKingdom/ShermansLagoon', 'GoComics/ShermansLagoon'),
cls('CrapIDrewOnMyLunchBreak', 'WebToons/CrapIDrewOnMyLunchBreak'),
cls('GoComics/BloomCounty2017', 'GoComics/BloomCounty2019'),
cls('GoComics/Cathy', 'GoComics/CathyClassics'),

View file

@ -2,7 +2,7 @@
# SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2020 Tobias Gruetzmacher
# Copyright (C) 2015-2022 Tobias Gruetzmacher
"""
Script to get a list of gocomics and save the info in a JSON file for further
processing.
@ -29,11 +29,13 @@ class GoComicsUpdater(ComicListUpdater):
def collect_results(self):
"""Parse all listing pages."""
self.handle_gocomics('http://www.gocomics.com/comics/a-to-z')
self.handle_gocomics('http://www.gocomics.com/comics/espanol', lang='es')
self.handle_gocomics('http://www.gocomics.com/comics/espanol?page=2', lang='es')
# We add the spanish comics first since they are now also listed on the list of all
# comics... (Expect duplicate warnings for all spanish comics)
self.handle_gocomics('https://www.gocomics.com/comics/espanol', lang='es')
self.handle_gocomics('https://www.gocomics.com/comics/espanol?page=2', lang='es')
self.handle_gocomics('https://www.gocomics.com/comics/a-to-z')
def get_entry(self, name, data):
def get_entry(self, name: str, data: tuple[str, str]):
url, lang = data
langopt = ", '%s'" % lang if lang else ''
return u"cls('%s', '%s'%s)," % (name, url, langopt)

View file

@ -1,7 +1,7 @@
# SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2020 Tobias Gruetzmacher
# Copyright (C) 2015-2022 Tobias Gruetzmacher
import codecs
import html
import json
@ -22,18 +22,18 @@ def first_lower(x):
class ComicListUpdater(object):
dup_templates = ()
excluded_comics = ()
dup_templates: tuple[str, ...] = ()
excluded_comics: tuple[str, ...] = ()
START = "# START AUTOUPDATE"
END = "# END AUTOUPDATE"
def __init__(self, name):
def __init__(self, name: str):
self.json = name.replace(".py", ".json")
self.session = http.default_session
self.sleep = 0
def get_url(self, url, expand=True):
def get_url(self, url: str, expand=True):
"""Get an HTML page and parse it with LXML."""
print("Parsing", url, file=sys.stderr)
try:
@ -48,7 +48,7 @@ class ComicListUpdater(object):
print("ERROR:", msg, file=sys.stderr)
raise
def should_skip(self, name):
def should_skip(self, name: str):
if contains_case_insensitive(self.res, name):
# we cannot handle two comics that only differ in case
print("INFO: skipping possible duplicate", repr(name),
@ -69,7 +69,7 @@ class ComicListUpdater(object):
json.dump(self.res, f, sort_keys=True, indent=2,
separators=(',', ': '))
def add_comic(self, name, data, count=None):
def add_comic(self, name: str, data: tuple[str, ...], count=None):
"""Add a collected comic with a specific number of comics."""
name = format_name(name)
if not self.should_skip(name):