diff --git a/dosagelib/plugins/comicskingdom.py b/dosagelib/plugins/comicskingdom.py index 60e5864ee..0e7813e19 100644 --- a/dosagelib/plugins/comicskingdom.py +++ b/dosagelib/plugins/comicskingdom.py @@ -120,7 +120,6 @@ class ComicsKingdom(_ParserScraper): cls('SallyForth', 'sally-forth'), cls('SamAndSilo', 'sam-and-silo'), cls('SecretAgentX9', 'secret-agent-x-9'), - cls('ShermansLagoon', 'sherman-s-lagoon'), # Shoe has a duplicate in GoComics/Shoe cls('SixChix', 'six-chix'), cls('SlylockFoxAndComicsForKids', 'slylock-fox-and-comics-for-kids'), diff --git a/dosagelib/plugins/gocomics.py b/dosagelib/plugins/gocomics.py index cd206a5d5..081bc80bf 100644 --- a/dosagelib/plugins/gocomics.py +++ b/dosagelib/plugins/gocomics.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2021 Tobias Gruetzmacher +# Copyright (C) 2015-2022 Tobias Gruetzmacher from ..scraper import _ParserScraper from ..helpers import indirectStarter @@ -55,7 +55,6 @@ class GoComics(_ParserScraper): cls('AlisHouse', 'alis-house'), cls('AlleyOop', 'alley-oop'), cls('AmandaTheGreat', 'amanda-the-great'), - cls('AmericanChopSuey', 'american-chop-suey'), cls('Andertoons', 'andertoons'), cls('AndyCapp', 'andycapp'), cls('AngryLittleGirls', 'angry-little-girls'), @@ -63,17 +62,15 @@ class GoComics(_ParserScraper): cls('Annie', 'annie'), cls('AProblemLikeJamal', 'a-problem-like-jamal'), cls('ArloAndJanis', 'arloandjanis'), - cls('AskACat', 'ask-a-cat'), cls('AskShagg', 'askshagg'), cls('AtTavicat', 'tavicat'), cls('AuntyAcid', 'aunty-acid'), - cls('BabyTrump', 'baby-trump'), + cls('BabyBlues', 'babyblues'), cls('BackInTheDay', 'backintheday'), cls('BackToBC', 'back-to-bc'), cls('Bacon', 'bacon'), cls('Badlands', 'badlands'), cls('BadMachinery', 'bad-machinery'), - cls('BadReporter', 'badreporter'), cls('Baldo', 'baldo'), cls('BaldoEnEspanol', 'baldoespanol', 'es'), cls('BallardStreet', 'ballardstreet'), @@ -107,15 +104,16 @@ class GoComics(_ParserScraper): cls('Boomerangs', 'boomerangs'), cls('Bottomliners', 'bottomliners'), cls('BoundAndGagged', 'boundandgagged'), + cls('Bozo', 'bozo'), cls('BreakingCatNews', 'breaking-cat-news'), cls('BreakOfDay', 'break-of-day'), cls('Brevity', 'brevity'), cls('BrewsterRockit', 'brewsterrockit'), cls('BrianMcFadden', 'brian-mcfadden'), cls('BroomHilda', 'broomhilda'), + cls('Buckles', 'buckles'), cls('Bully', 'bully'), cls('Buni', 'buni'), - cls('BushyTales', 'bushy-tales'), cls('CalvinAndHobbes', 'calvinandhobbes'), cls('CalvinAndHobbesEnEspanol', 'calvinandhobbesespanol', 'es'), cls('Candorville', 'candorville'), @@ -125,7 +123,6 @@ class GoComics(_ParserScraper): cls('CatsCafe', 'cats-cafe'), cls('CattitudeDoggonit', 'cattitude-doggonit'), cls('CestLaVie', 'cestlavie'), - cls('CheapThrillsCuisine', 'cheap-thrills-cuisine'), cls('CheerUpEmoKid', 'cheer-up-emo-kid'), cls('ChipBok', 'chipbok'), cls('ChrisBritt', 'chrisbritt'), @@ -146,7 +143,6 @@ class GoComics(_ParserScraper): cls('CulDeSac', 'culdesac'), cls('DaddysHome', 'daddyshome'), cls('DanaSummers', 'danasummers'), - cls('DanWasserman', 'danwasserman'), cls('DarkSideOfTheHorse', 'darksideofthehorse'), cls('DeepDarkFears', 'deep-dark-fears'), cls('DeFlocked', 'deflocked'), @@ -210,8 +206,8 @@ class GoComics(_ParserScraper): cls('GingerMeggs', 'gingermeggs'), cls('GingerMeggsEnEspanol', 'gingermeggs-espanol', 'es'), cls('GlasbergenCartoons', 'glasbergen-cartoons'), + cls('Globetrotter', 'globetrotter'), cls('GManWebcomics', 'g-man-webcomics'), - cls('GnomeSyndicate', 'gnome-syndicate'), cls('Goats', 'goats'), cls('GrandAvenue', 'grand-avenue'), cls('GrayMatters', 'gray-matters'), @@ -227,13 +223,11 @@ class GoComics(_ParserScraper): cls('Herman', 'herman'), cls('HomeAndAway', 'homeandaway'), cls('HotComicsForCoolPeople', 'hot-comics-for-cool-people'), - cls('HUBRIS', 'hubris'), cls('HutchOwen', 'hutch-owen'), cls('ImagineThis', 'imaginethis'), cls('ImogenQuest', 'imogen-quest'), cls('InkPen', 'inkpen'), cls('InSecurity', 'in-security'), - cls('InspectorDangersCrimeQuiz', 'inspector-dangers-crime-quiz'), cls('InTheBleachers', 'inthebleachers'), cls('InTheSticks', 'inthesticks'), cls('InvisibleBread', 'invisible-bread'), @@ -246,16 +240,12 @@ class GoComics(_ParserScraper): cls('JenSorensen', 'jen-sorensen'), cls('JimBentonCartoons', 'jim-benton-cartoons'), cls('JimMorin', 'jimmorin'), - cls('JimsJournal', 'jimsjournal'), cls('JoeHeller', 'joe-heller'), cls('JoelPett', 'joelpett'), - cls('JoeVanilla', 'joevanilla'), - cls('JoeyAlisonSayersComics', 'joey-alison-sayers-comics'), cls('JohnDeering', 'johndeering'), cls('JumpStart', 'jumpstart'), cls('JunkDrawer', 'junk-drawer'), cls('JustoYFranco', 'justo-y-franco', 'es'), - cls('KenCatalino', 'kencatalino'), cls('KevinKallaugher', 'kal'), cls('KevinNecessaryEditorialCartoons', 'kevin-necessary-editorial-cartoons'), cls('KidBeowulf', 'kid-beowulf'), @@ -296,9 +286,9 @@ class GoComics(_ParserScraper): cls('LugNuts', 'lug-nuts'), cls('Lunarbaboon', 'lunarbaboon'), cls('M2Bulls', 'm2bulls'), - cls('Magnificatz', 'magnificatz'), cls('Maintaining', 'maintaining'), cls('MakingIt', 'making-it'), + cls('MannequinOnTheMoon', 'mannequin-on-the-moon'), cls('MariasDay', 'marias-day'), cls('Marmaduke', 'marmaduke'), cls('MarshallRamsey', 'marshallramsey'), @@ -313,15 +303,13 @@ class GoComics(_ParserScraper): cls('MikeLester', 'mike-lester'), cls('MikeLuckovich', 'mikeluckovich'), cls('MissPeach', 'miss-peach'), - cls('Mo', 'mo'), cls('ModeratelyConfused', 'moderately-confused'), cls('Momma', 'momma'), - cls('MomsCancer', 'moms-cancer'), cls('Monty', 'monty'), cls('MontyDiaros', 'monty-diaros', 'es'), cls('MotleyClassics', 'motley-classics'), cls('MrLowe', 'mr-lowe'), - cls('MustardAndBoloney', 'mustard-and-boloney'), + cls('MtPleasant', 'mtpleasant'), cls('MuttAndJeff', 'muttandjeff'), cls('MyDadIsDracula', 'my-dad-is-dracula'), cls('MythTickle', 'mythtickle'), @@ -360,12 +348,12 @@ class GoComics(_ParserScraper): cls('Periquita', 'periquita', 'es'), cls('PerlasParaLosCerdos', 'perlas-para-los-cerdos', 'es'), cls('PerryBibleFellowship', 'perry-bible-fellowship'), + cls('PetuniaAndDre', 'petunia-and-dre'), cls('PhilHands', 'phil-hands'), cls('PhoebeAndHerUnicorn', 'phoebe-and-her-unicorn'), cls('Pibgorn', 'pibgorn'), cls('PibgornSketches', 'pibgornsketches'), cls('Pickles', 'pickles'), - cls('PirateMike', 'pirate-mike'), cls('PleaseListenToMe', 'please-listen-to-me'), cls('Pluggers', 'pluggers'), cls('PoochCafe', 'poochcafe'), @@ -374,7 +362,6 @@ class GoComics(_ParserScraper): cls('PotShots', 'pot-shots'), cls('PreTeena', 'preteena'), cls('PricklyCity', 'pricklycity'), - cls('PromisesPromises', 'promises-promises'), cls('QuestionableQuotebook', 'questionable-quotebook'), cls('RabbitsAgainstMagic', 'rabbitsagainstmagic'), cls('RaisingDuncan', 'raising-duncan'), @@ -392,9 +379,11 @@ class GoComics(_ParserScraper): cls('RobertAriail', 'robert-ariail'), cls('RobRogers', 'robrogers'), cls('Rosebuds', 'rosebuds'), + cls('RosebudsEnEspanol', 'rosebuds-en-espanol'), cls('RoseIsRose', 'roseisrose'), cls('Rubes', 'rubes'), cls('RudyPark', 'rudypark'), + cls('SaltNPepper', 'salt-n-pepper'), cls('SarahsScribbles', 'sarahs-scribbles'), cls('SaturdayMorningBreakfastCereal', 'saturday-morning-breakfast-cereal'), cls('SavageChickens', 'savage-chickens'), @@ -402,6 +391,7 @@ class GoComics(_ParserScraper): cls('ScenesFromAMultiverse', 'scenes-from-a-multiverse'), cls('ScottStantis', 'scottstantis'), cls('ShenComix', 'shen-comix'), + cls('ShermansLagoon', 'shermanslagoon'), cls('ShirleyAndSonClassics', 'shirley-and-son-classics'), cls('Shoe', 'shoe'), cls('SigneWilkinson', 'signewilkinson'), @@ -415,7 +405,6 @@ class GoComics(_ParserScraper): cls('SpeedBump', 'speedbump'), cls('SpiritOfTheStaircase', 'spirit-of-the-staircase'), cls('SpotTheFrog', 'spot-the-frog'), - cls('Starling', 'starling'), cls('SteveBenson', 'stevebenson'), cls('SteveBreen', 'stevebreen'), cls('SteveKelley', 'stevekelley'), @@ -462,15 +451,12 @@ class GoComics(_ParserScraper): cls('TheHumbleStumble', 'humble-stumble'), cls('TheKChronicles', 'thekchronicles'), cls('TheKnightLife', 'theknightlife'), - cls('TheLastMechanicalMonster', 'the-last-mechanical-monster'), - cls('TheLeftyBoscoPictureShow', 'leftyboscopictureshow'), cls('TheMartianConfederacy', 'the-martian-confederacy'), cls('TheMeaningOfLila', 'meaningoflila'), cls('TheMiddleAge', 'the-middle-age'), cls('TheMiddletons', 'themiddletons'), cls('TheNormClassics', 'thenorm'), cls('TheOtherCoast', 'theothercoast'), - cls('TheOtherEnd', 'the-other-end'), cls('TheUpsideDownWorldOfGustaveVerbeek', 'upside-down-world-of-gustave-verbeek'), cls('TheWanderingMelon', 'the-wandering-melon'), cls('TheWizardOfIdSpanish', 'wizardofidespanol', 'es'), @@ -483,8 +469,6 @@ class GoComics(_ParserScraper): cls('TomTheDancingBug', 'tomthedancingbug'), cls('TomToles', 'tomtoles'), cls('TooMuchCoffeeMan', 'toomuchcoffeeman'), - cls('ToughTown', 'tough-town'), - cls('Trivquiz', 'trivquiz'), cls('Trucutu', 'trucutu', 'es'), cls('TruthFacts', 'truth-facts'), cls('Tutelandia', 'tutelandia', 'es'), @@ -511,12 +495,12 @@ class GoComics(_ParserScraper): cls('Widdershins', 'widdershins'), cls('WideOpen', 'wide-open'), cls('WinLoseDrew', 'drewlitton'), - cls('Winston', 'winston'), cls('WizardOfId', 'wizardofid'), cls('WizardOfIdClassics', 'wizard-of-id-classics'), cls('Wondermark', 'wondermark'), cls('WorkingDaze', 'working-daze'), cls('WorkingItOut', 'workingitout'), + cls('WorryLines', 'worry-lines'), cls('WrongHands', 'wrong-hands'), cls('WTDuck', 'wtduck'), cls('WuMo', 'wumo'), diff --git a/dosagelib/plugins/old.py b/dosagelib/plugins/old.py index 71bd098ff..7469c856a 100644 --- a/dosagelib/plugins/old.py +++ b/dosagelib/plugins/old.py @@ -838,10 +838,14 @@ class Removed(Scraper): cls('GoComics/060'), cls('GoComics/2CowsAndAChicken'), cls('GoComics/ABitSketch'), + cls('GoComics/AmericanChopSuey'), cls('GoComics/Andnow'), cls('GoComics/Anecdote'), cls('GoComics/AppleCreekComics'), + cls('GoComics/AskACat'), cls('GoComics/AskAPortlySyndicatePerson'), + cls('GoComics/BabyTrump'), + cls('GoComics/BadReporter'), cls('GoComics/BarkingCrayon'), cls('GoComics/Bazoobee'), cls('GoComics/Bewley'), @@ -851,13 +855,16 @@ class Removed(Scraper): cls('GoComics/BottAuto'), cls('GoComics/BrainSquirts'), cls('GoComics/BUNS'), + cls('GoComics/BushyTales'), cls('GoComics/CAFFEINATED'), cls('GoComics/CapsulasMedicas'), cls('GoComics/CharmysArmy'), + cls('GoComics/CheapThrillsCuisine'), cls('GoComics/ClearBlueWater'), cls('GoComics/Committed'), cls('GoComics/ConnieToTheWonnie'), cls('GoComics/CourageousManAdventures'), + cls('GoComics/DanWasserman'), cls('GoComics/DontPicktheFlowers'), cls('GoComics/DorrisMcComics'), cls('GoComics/Dragin'), @@ -878,6 +885,7 @@ class Removed(Scraper): cls('GoComics/GarciaCartoonCo'), cls('GoComics/GarfieldMinusGarfield'), cls('GoComics/GIRTH'), + cls('GoComics/GnomeSyndicate'), cls('GoComics/GoComicsFanArt'), cls('GoComics/Graffiti'), cls('GoComics/GrannyAnny'), @@ -887,18 +895,25 @@ class Removed(Scraper): cls('GoComics/Headcheese'), cls('GoComics/HealthCapsules'), cls('GoComics/HowToCat'), + cls('GoComics/HUBRIS'), cls('GoComics/HumanCull'), + cls('GoComics/InspectorDangersCrimeQuiz'), cls('GoComics/ItsjustJim'), cls('GoComics/JerryHolbert'), cls('GoComics/JillpokeBohemia'), + cls('GoComics/JimsJournal'), + cls('GoComics/JoeVanilla'), + cls('GoComics/JoeyAlisonSayersComics'), cls('GoComics/JustSayUncle'), cls('GoComics/KartoonsByKline'), + cls('GoComics/KenCatalino'), cls('GoComics/KidSpot'), cls('GoComics/KidTown'), cls('GoComics/KitNCarlyle'), cls('GoComics/LostSideOfSuburbia'), cls('GoComics/LumandAbner'), cls('GoComics/MagicInAMinute'), + cls('GoComics/Magnificatz'), cls('GoComics/MazeToonsPuzzle'), cls('GoComics/MegClassics'), cls('GoComics/MichaelAndrew'), @@ -908,7 +923,10 @@ class Removed(Scraper): cls('GoComics/Mindframe'), cls('GoComics/MiscSoup'), cls('GoComics/MisterAndMe'), + cls('GoComics/Mo'), + cls('GoComics/MomsCancer'), cls('GoComics/MortsIsland'), + cls('GoComics/MustardAndBoloney'), cls('GoComics/MyCage'), cls('GoComics/MyCageNewAndOld'), cls('GoComics/NoOrdinaryLife'), @@ -919,8 +937,10 @@ class Removed(Scraper): cls('GoComics/PicturesInBoxes'), cls('GoComics/PieComic'), cls('GoComics/Pinkerton'), + cls('GoComics/PirateMike'), cls('GoComics/PoliceLimit'), cls('GoComics/PopCultureShockTherapy'), + cls('GoComics/PromisesPromises'), cls('GoComics/ReplyAll'), cls('GoComics/ReplyAllLite'), cls('GoComics/RonWarren'), @@ -937,6 +957,7 @@ class Removed(Scraper): cls('GoComics/Speechless'), cls('GoComics/SportsbyVoort'), cls('GoComics/StankoAndTibor'), + cls('GoComics/Starling'), cls('GoComics/SubSub'), cls('GoComics/SuburbanFairyTales'), cls('GoComics/SuperSiblings'), @@ -950,20 +971,26 @@ class Removed(Scraper): cls('GoComics/TheCreeps'), cls('GoComics/TheGentlemansArmchair'), cls('GoComics/TheGreenMonkeys'), + cls('GoComics/TheLastMechanicalMonster'), + cls('GoComics/TheLeftyBoscoPictureShow'), cls('GoComics/TheLostBear'), cls('GoComics/TheNorm40'), cls('GoComics/TheOldManAndHisDog'), + cls('GoComics/TheOtherEnd'), cls('GoComics/TheQuinnAndFinnShow'), cls('GoComics/TheQuixoteSyndrome'), cls('GoComics/TheSunshineClub'), cls('GoComics/Thingsesque'), cls('GoComics/TimEagan'), cls('GoComics/TOBY'), + cls('GoComics/ToughTown'), + cls('GoComics/Trivquiz'), cls('GoComics/UncleArtsFunland'), cls('GoComics/USAcres'), cls('GoComics/WayOutComics'), cls('GoComics/WhiskeyFalls'), cls('GoComics/WhyattCartoons'), + cls('GoComics/Winston'), cls('GoComics/WorldOfWonder'), cls('GoComics/Wrobbertcartoons'), cls('GoComics/Zootopia'), @@ -1607,7 +1634,7 @@ class Renamed(Scraper): cls('PetiteSymphony/Generation17', 'ComicsBreak/Generation17'), cls('PetiteSymphony/Rascals', 'KemonoCafe/Rascals'), cls('QuentynQuinnSpaceRanger', 'RHJunior/QuentynQuinnSpaceRanger'), - cls('ShermansLagoon', 'ComicsKingdom/ShermansLagoon'), + cls('ShermansLagoon', 'GoComics/ShermansLagoon'), cls('SmackJeeves/AddictiveScience', 'KemonoCafe/AddictiveScience'), cls('SmackJeeves/CityFolk', 'ComicFury/CityFolk'), cls('SmackJeeves/DoomsdayMyDear', 'DoomsdayMyDear'), @@ -1628,6 +1655,7 @@ class Renamed(Scraper): # Renamed in 3.0 cls('AHClub', 'RickGriffinStudios/AHClub'), cls('ComicFury/MuddlemarchMudCompany', 'ComicFury/MudCompany'), + cls('ComicsKingdom/ShermansLagoon', 'GoComics/ShermansLagoon'), cls('CrapIDrewOnMyLunchBreak', 'WebToons/CrapIDrewOnMyLunchBreak'), cls('GoComics/BloomCounty2017', 'GoComics/BloomCounty2019'), cls('GoComics/Cathy', 'GoComics/CathyClassics'), diff --git a/scripts/gocomics.py b/scripts/gocomics.py index 3bbfbb22c..d9b600755 100755 --- a/scripts/gocomics.py +++ b/scripts/gocomics.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2020 Tobias Gruetzmacher +# Copyright (C) 2015-2022 Tobias Gruetzmacher """ Script to get a list of gocomics and save the info in a JSON file for further processing. @@ -29,11 +29,13 @@ class GoComicsUpdater(ComicListUpdater): def collect_results(self): """Parse all listing pages.""" - self.handle_gocomics('http://www.gocomics.com/comics/a-to-z') - self.handle_gocomics('http://www.gocomics.com/comics/espanol', lang='es') - self.handle_gocomics('http://www.gocomics.com/comics/espanol?page=2', lang='es') + # We add the spanish comics first since they are now also listed on the list of all + # comics... (Expect duplicate warnings for all spanish comics) + self.handle_gocomics('https://www.gocomics.com/comics/espanol', lang='es') + self.handle_gocomics('https://www.gocomics.com/comics/espanol?page=2', lang='es') + self.handle_gocomics('https://www.gocomics.com/comics/a-to-z') - def get_entry(self, name, data): + def get_entry(self, name: str, data: tuple[str, str]): url, lang = data langopt = ", '%s'" % lang if lang else '' return u"cls('%s', '%s'%s)," % (name, url, langopt) diff --git a/scripts/scriptutil.py b/scripts/scriptutil.py index 0df9e7b65..449e275fc 100644 --- a/scripts/scriptutil.py +++ b/scripts/scriptutil.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2020 Tobias Gruetzmacher +# Copyright (C) 2015-2022 Tobias Gruetzmacher import codecs import html import json @@ -22,18 +22,18 @@ def first_lower(x): class ComicListUpdater(object): - dup_templates = () - excluded_comics = () + dup_templates: tuple[str, ...] = () + excluded_comics: tuple[str, ...] = () START = "# START AUTOUPDATE" END = "# END AUTOUPDATE" - def __init__(self, name): + def __init__(self, name: str): self.json = name.replace(".py", ".json") self.session = http.default_session self.sleep = 0 - def get_url(self, url, expand=True): + def get_url(self, url: str, expand=True): """Get an HTML page and parse it with LXML.""" print("Parsing", url, file=sys.stderr) try: @@ -48,7 +48,7 @@ class ComicListUpdater(object): print("ERROR:", msg, file=sys.stderr) raise - def should_skip(self, name): + def should_skip(self, name: str): if contains_case_insensitive(self.res, name): # we cannot handle two comics that only differ in case print("INFO: skipping possible duplicate", repr(name), @@ -69,7 +69,7 @@ class ComicListUpdater(object): json.dump(self.res, f, sort_keys=True, indent=2, separators=(',', ': ')) - def add_comic(self, name, data, count=None): + def add_comic(self, name: str, data: tuple[str, ...], count=None): """Add a collected comic with a specific number of comics.""" name = format_name(name) if not self.should_skip(name):