From 4d369376c0703eabbf539aab6f3bedcf7b296d6c Mon Sep 17 00:00:00 2001 From: littauer Date: Mon, 16 Dec 2019 17:18:04 -0500 Subject: [PATCH] Add ComicsKingdom.com (aka King Features) (#134) Note that going back more than about seven days is a paid feature and is untested. --- dosagelib/plugins/comicskingdom.py | 166 +++++++++++++++++++++++++++++ scripts/comicskingdom.py | 47 ++++++++ scripts/generate_json.sh | 2 +- scripts/update_plugins.sh | 2 +- 4 files changed, 215 insertions(+), 2 deletions(-) create mode 100644 dosagelib/plugins/comicskingdom.py create mode 100755 scripts/comicskingdom.py diff --git a/dosagelib/plugins/comicskingdom.py b/dosagelib/plugins/comicskingdom.py new file mode 100644 index 000000000..0f85d8da2 --- /dev/null +++ b/dosagelib/plugins/comicskingdom.py @@ -0,0 +1,166 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs +# Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher +# Copyright (C) 2019 Thomas W. Littauer + +from __future__ import absolute_import, division, print_function + +from ..scraper import _BasicScraper +from ..helpers import indirectStarter + +import re + + +class ComicsKingdom(_BasicScraper): + # changed mid-June 2019 + # imageSearch = re.compile(r' image-url="(https://safr\.kingfeatures\.com/api/img\.php\?e=...&s=.&file=[^"]+)"') + imageSearch = re.compile(r'property="og:image" content="(https://safr\.kingfeatures\.com/api/img\.php\?e=...&s=.&file=[^"]+)"') + prevSearch = re.compile(r' :is-left-arrow="true" .*date-slug="(\d\d\d\d-\d\d-\d\d)"') + help = 'Index format: yyyy-mm-dd' + + + def __init__(self, name, path, lang=None): + super(ComicsKingdom, self).__init__('ComicsKingdom/' + name) + self.url = 'https://comicskingdom.com/' + path + if lang: + self.lang = lang + + def namer(self, image_url, page_url): + + if page_url != self.url: + + date = page_url.rsplit('/', 3)[3] + name = page_url.rsplit('/', 3)[2] + + else: + + import datetime + date = datetime.date.today().strftime("%Y-%m-%d") + name = page_url.rsplit('/', 2)[2] + + return "%s_%s.png" % (name.title(), date) + + def link_modifier(self, url, tourl): + + urllen = len(self.url) + if tourl[:urllen] != self.url: + + datestr = tourl[-11:] # /YYYY-MM-DD + tourl = self.url + datestr + + return tourl + + + @classmethod + def getmodules(cls): + return ( + # Some comics are not listed on the "all" page (too old?) + + # do not edit anything below since these entries are generated from + # scripts/comicskingdom.py + # START AUTOUPDATE + cls('AmazingSpiderMan', 'amazing-spider-man'), + cls('Apartment3G', 'apartment-3-g_1'), + cls('ArcticCircle', 'arctic-circle'), + cls('BabyBlues', 'baby-blues'), + cls('BarneyGoogleAndSnuffySmith', 'barney-google-and-snuffy-smith'), + cls('BeetleBailey', 'beetle-bailey-1'), + cls('BettyBoopSundays', 'betty-boop-sundays'), + cls('BetweenFriends', 'between-friends'), + cls('BigBenBolt', 'big-ben-bolt'), + cls('BigBenBoltSundays', 'big-ben-bolt-sundays'), + cls('Bizarro', 'bizarro'), + cls('Blondie', 'blondie'), + cls('BonersArk', 'boners-ark'), + cls('BonersArkSundays', 'boners-ark-sundays'), + cls('BrianDuffy', 'brian-duffy'), + cls('BrickBradford', 'brick-bradford'), + cls('BrilliantMindOfEdisonLee', 'brilliant-mind-of-edison-lee'), + cls('BringingUpFather', 'bringing-up-father'), + cls('Buckles', 'buckles'), + cls('BuzSawyer', 'buz-sawyer'), + cls('CarpeDiem', 'carpe-diem'), + cls('Crankshaft', 'crankshaft'), + cls('Crock', 'crock'), + cls('Curtis', 'curtis'), + cls('DaddyDaze', 'daddy-daze'), + # DarrinBell has a duplicate in GoComics/DarrinBell + cls('DavidMHitch', 'david-m-hitch'), + cls('DennisTheMenace', 'dennis-the-menace'), + cls('Dustin', 'dustin'), + cls('EdGamble', 'ed-gamble'), + cls('FamilyCircus', 'family-circus'), + cls('FlashGordon', 'flash-gordon'), + cls('FlashGordonSundays', 'flash-gordon-sundays'), + cls('FunkyWinkerbean', 'funky-winkerbean'), + cls('FunkyWinkerbeanSundays', 'funky-winkerbean-sundays'), + cls('HagarTheHorrible', 'hagar-the-horrible'), + cls('HeartOfJulietJones', 'heart-of-juliet-jones'), + cls('HeartOfJulietJonesSundays', 'heart-of-juliet-jones-sundays'), + cls('HiAndLois', 'hi-and-lois'), + cls('IntelligentLife', 'Intelligent'), + cls('JimmyMargulies', 'jimmy-margulies'), + cls('JohnBranch', 'john-branch'), + cls('JohnnyHazard', 'johnny-hazard'), + cls('JohnnyHazardSundays', 'johnny-hazard-sundays'), + cls('JudgeParker', 'judge-parker'), + cls('JungleJimSundays', 'jungle-jim-sundays'), + cls('KatzenjammerKids', 'katzenjammer-kids'), + cls('KatzenjammerKidsSundays', 'katzenjammer-kids-sundays'), + cls('KevinAndKell', 'kevin-and-kell'), + cls('KingOfTheRoyalMounted', 'king-of-the-royal-mounted'), + cls('KirkWalters', 'kirk-walters'), + cls('KrazyKat', 'krazy-kat'), + cls('LeeJudge', 'lee-judge'), + cls('LittleIodineSundays', 'little-iodine-sundays'), + cls('Lockhorns', 'lockhorns'), + cls('Macanudo', 'Macanudo'), + cls('MallardFillmore', 'mallard-fillmore'), + cls('MandrakeTheMagician', 'mandrake-the-magician-1'), + cls('MandrakeTheMagicianSundays', 'mandrake-the-magician-sundays'), + cls('MarkTrail', 'mark-trail'), + cls('Marvin', 'marvin'), + cls('MaryWorth', 'mary-worth'), + cls('MikePeters', 'mike-peters'), + cls('MikeShelton', 'mike-shelton'), + cls('MikeSmith', 'mike-smith'), + cls('MooseAndMolly', 'moose-and-molly'), + cls('MotherGooseAndGrimm', 'mother-goose-grimm'), + cls('Mutts', 'mutts'), + cls('OfficeHours', 'office-hours'), + cls('OnTheFastrack', 'on-the-fastrack'), + cls('PajamaDiaries', 'pajama-diaries'), + cls('PardonMyPlanet', 'pardon-my-planet'), + cls('Phantom', 'phantom'), + cls('PhantomSundays', 'phantom-sundays'), + cls('Popeye', 'popeye'), + cls('PopeyesCartoonClub', 'popeyes-cartoon-club'), + cls('PrinceValiant', 'prince-valiant'), + cls('ProsAndCons', 'pros-cons'), + cls('Quincy', 'quincy'), + cls('RadioPatrol', 'radio-patrol'), + cls('Redeye', 'redeye-2'), + cls('RedeyeSundays', 'redeye-sundays'), + cls('Retail', 'retail'), + cls('RexMorganMD', 'rex-morgan-m-d'), + cls('RhymesWithOrange', 'rhymes-with-orange'), + cls('RipKirby', 'rip-kirby'), + cls('SafeHavens', 'safe-havens'), + cls('SallyForth', 'sally-forth'), + cls('SamAndSilo', 'sam-and-silo'), + cls('SecretAgentX9', 'secret-agent-x-9'), + cls('ShermansLagoon', 'sherman-s-lagoon'), + # Shoe has a duplicate in GoComics/Shoe + cls('SixChix', 'six-chix'), + cls('SlylockFoxAndComicsForKids', 'slylock-fox-and-comics-for-kids'), + cls('TakeItFromTheTinkersons', 'take-it-from-the-tinkersons'), + cls('TheLittleKing', 'the-little-king'), + cls('ThimbleTheater', 'thimble-theater'), + cls('Tiger', 'tiger'), + cls('TigerSundays', 'tiger-sundays'), + cls('ToddTheDinosaur', 'todd-the-dinosaur'), + cls('ZippyThePinhead', 'zippy-the-pinhead'), + cls('Zits', 'zits'), + # END AUTOUPDATE + ) diff --git a/scripts/comicskingdom.py b/scripts/comicskingdom.py new file mode 100755 index 000000000..755cfe2b5 --- /dev/null +++ b/scripts/comicskingdom.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher +# Copyright (C) 2019 Thomas W. Littauer +""" +Script to get a list of comicskingdom.com comics and save the info in a JSON file +for further processing. +""" +from __future__ import absolute_import, division, print_function + +from scriptutil import ComicListUpdater + +class ComicsKingdomUpdater(ComicListUpdater): + dup_templates = ("Creators/%s", "DrunkDuck/%s", "GoComics/%s", + "KeenSpot/%s", "ComicGenesis/%s", "SmackJeeves/%s") + + + # names of comics to exclude + excluded_comics = ( + # no images + 'Doodles', + ) + + def handle_url(self, url): + """Parse one listing page.""" + data = self.get_url(url) + + for comicdiv in data.cssselect('ul.comic-link-group li'): + comiclink = comicdiv.cssselect('a')[0] + comicurl = comiclink.attrib['href'] + name = comicdiv.cssselect('a')[0].text + + self.add_comic(name, comicurl.rsplit('/', 1)[1]) + + def collect_results(self): + """Parse all search result pages.""" + self.handle_url('https://www.comicskingdom.com/') + + + def get_entry(self, name, path): + langopt = ", 'es'" if name.lower().endswith('spanish') else '' + return u"cls('%s', '%s'%s)," % (name, path, langopt) + + +if __name__ == '__main__': + ComicsKingdomUpdater(__file__).run() diff --git a/scripts/generate_json.sh b/scripts/generate_json.sh index c01833b26..8ca07236e 100755 --- a/scripts/generate_json.sh +++ b/scripts/generate_json.sh @@ -6,7 +6,7 @@ d=$(dirname $0) if [ $# -ge 1 ]; then list="$*" else - list="arcamax comicfury comicgenesis creators gocomics keenspot smackjeeves webcomicfactory" + list="arcamax comicfury comicgenesis creators gocomics keenspot smackjeeves webcomicfactory comicskingdom" fi for script in $list; do echo "Executing ${script}.py" diff --git a/scripts/update_plugins.sh b/scripts/update_plugins.sh index 22b7fbf3d..761c5b2c8 100755 --- a/scripts/update_plugins.sh +++ b/scripts/update_plugins.sh @@ -9,7 +9,7 @@ d=$(dirname $0) if [ $# -ge 1 ]; then list="$*" else - list="arcamax comicfury comicgenesis creators gocomics keenspot smackjeeves webcomicfactory" + list="arcamax comicfury comicgenesis creators gocomics keenspot smackjeeves webcomicfactory comicskingdom" fi for script in $list; do target="${d}/../dosagelib/plugins/${script}.py"