Add ComicsKingdom.com (aka King Features) (#134)

Note that going back more than about seven days is a paid feature and is untested.
This commit is contained in:
littauer 2019-12-16 17:18:04 -05:00 committed by Tobias Gruetzmacher
parent 1e3d4e58b4
commit 4d369376c0
4 changed files with 215 additions and 2 deletions

View file

@ -0,0 +1,166 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
# Copyright (C) 2019 Thomas W. Littauer
from __future__ import absolute_import, division, print_function
from ..scraper import _BasicScraper
from ..helpers import indirectStarter
import re
class ComicsKingdom(_BasicScraper):
# changed mid-June 2019
# imageSearch = re.compile(r' image-url="(https://safr\.kingfeatures\.com/api/img\.php\?e=...&s=.&file=[^"]+)"')
imageSearch = re.compile(r'property="og:image" content="(https://safr\.kingfeatures\.com/api/img\.php\?e=...&s=.&file=[^"]+)"')
prevSearch = re.compile(r' :is-left-arrow="true" .*date-slug="(\d\d\d\d-\d\d-\d\d)"')
help = 'Index format: yyyy-mm-dd'
def __init__(self, name, path, lang=None):
super(ComicsKingdom, self).__init__('ComicsKingdom/' + name)
self.url = 'https://comicskingdom.com/' + path
if lang:
self.lang = lang
def namer(self, image_url, page_url):
if page_url != self.url:
date = page_url.rsplit('/', 3)[3]
name = page_url.rsplit('/', 3)[2]
else:
import datetime
date = datetime.date.today().strftime("%Y-%m-%d")
name = page_url.rsplit('/', 2)[2]
return "%s_%s.png" % (name.title(), date)
def link_modifier(self, url, tourl):
urllen = len(self.url)
if tourl[:urllen] != self.url:
datestr = tourl[-11:] # /YYYY-MM-DD
tourl = self.url + datestr
return tourl
@classmethod
def getmodules(cls):
return (
# Some comics are not listed on the "all" page (too old?)
# do not edit anything below since these entries are generated from
# scripts/comicskingdom.py
# START AUTOUPDATE
cls('AmazingSpiderMan', 'amazing-spider-man'),
cls('Apartment3G', 'apartment-3-g_1'),
cls('ArcticCircle', 'arctic-circle'),
cls('BabyBlues', 'baby-blues'),
cls('BarneyGoogleAndSnuffySmith', 'barney-google-and-snuffy-smith'),
cls('BeetleBailey', 'beetle-bailey-1'),
cls('BettyBoopSundays', 'betty-boop-sundays'),
cls('BetweenFriends', 'between-friends'),
cls('BigBenBolt', 'big-ben-bolt'),
cls('BigBenBoltSundays', 'big-ben-bolt-sundays'),
cls('Bizarro', 'bizarro'),
cls('Blondie', 'blondie'),
cls('BonersArk', 'boners-ark'),
cls('BonersArkSundays', 'boners-ark-sundays'),
cls('BrianDuffy', 'brian-duffy'),
cls('BrickBradford', 'brick-bradford'),
cls('BrilliantMindOfEdisonLee', 'brilliant-mind-of-edison-lee'),
cls('BringingUpFather', 'bringing-up-father'),
cls('Buckles', 'buckles'),
cls('BuzSawyer', 'buz-sawyer'),
cls('CarpeDiem', 'carpe-diem'),
cls('Crankshaft', 'crankshaft'),
cls('Crock', 'crock'),
cls('Curtis', 'curtis'),
cls('DaddyDaze', 'daddy-daze'),
# DarrinBell has a duplicate in GoComics/DarrinBell
cls('DavidMHitch', 'david-m-hitch'),
cls('DennisTheMenace', 'dennis-the-menace'),
cls('Dustin', 'dustin'),
cls('EdGamble', 'ed-gamble'),
cls('FamilyCircus', 'family-circus'),
cls('FlashGordon', 'flash-gordon'),
cls('FlashGordonSundays', 'flash-gordon-sundays'),
cls('FunkyWinkerbean', 'funky-winkerbean'),
cls('FunkyWinkerbeanSundays', 'funky-winkerbean-sundays'),
cls('HagarTheHorrible', 'hagar-the-horrible'),
cls('HeartOfJulietJones', 'heart-of-juliet-jones'),
cls('HeartOfJulietJonesSundays', 'heart-of-juliet-jones-sundays'),
cls('HiAndLois', 'hi-and-lois'),
cls('IntelligentLife', 'Intelligent'),
cls('JimmyMargulies', 'jimmy-margulies'),
cls('JohnBranch', 'john-branch'),
cls('JohnnyHazard', 'johnny-hazard'),
cls('JohnnyHazardSundays', 'johnny-hazard-sundays'),
cls('JudgeParker', 'judge-parker'),
cls('JungleJimSundays', 'jungle-jim-sundays'),
cls('KatzenjammerKids', 'katzenjammer-kids'),
cls('KatzenjammerKidsSundays', 'katzenjammer-kids-sundays'),
cls('KevinAndKell', 'kevin-and-kell'),
cls('KingOfTheRoyalMounted', 'king-of-the-royal-mounted'),
cls('KirkWalters', 'kirk-walters'),
cls('KrazyKat', 'krazy-kat'),
cls('LeeJudge', 'lee-judge'),
cls('LittleIodineSundays', 'little-iodine-sundays'),
cls('Lockhorns', 'lockhorns'),
cls('Macanudo', 'Macanudo'),
cls('MallardFillmore', 'mallard-fillmore'),
cls('MandrakeTheMagician', 'mandrake-the-magician-1'),
cls('MandrakeTheMagicianSundays', 'mandrake-the-magician-sundays'),
cls('MarkTrail', 'mark-trail'),
cls('Marvin', 'marvin'),
cls('MaryWorth', 'mary-worth'),
cls('MikePeters', 'mike-peters'),
cls('MikeShelton', 'mike-shelton'),
cls('MikeSmith', 'mike-smith'),
cls('MooseAndMolly', 'moose-and-molly'),
cls('MotherGooseAndGrimm', 'mother-goose-grimm'),
cls('Mutts', 'mutts'),
cls('OfficeHours', 'office-hours'),
cls('OnTheFastrack', 'on-the-fastrack'),
cls('PajamaDiaries', 'pajama-diaries'),
cls('PardonMyPlanet', 'pardon-my-planet'),
cls('Phantom', 'phantom'),
cls('PhantomSundays', 'phantom-sundays'),
cls('Popeye', 'popeye'),
cls('PopeyesCartoonClub', 'popeyes-cartoon-club'),
cls('PrinceValiant', 'prince-valiant'),
cls('ProsAndCons', 'pros-cons'),
cls('Quincy', 'quincy'),
cls('RadioPatrol', 'radio-patrol'),
cls('Redeye', 'redeye-2'),
cls('RedeyeSundays', 'redeye-sundays'),
cls('Retail', 'retail'),
cls('RexMorganMD', 'rex-morgan-m-d'),
cls('RhymesWithOrange', 'rhymes-with-orange'),
cls('RipKirby', 'rip-kirby'),
cls('SafeHavens', 'safe-havens'),
cls('SallyForth', 'sally-forth'),
cls('SamAndSilo', 'sam-and-silo'),
cls('SecretAgentX9', 'secret-agent-x-9'),
cls('ShermansLagoon', 'sherman-s-lagoon'),
# Shoe has a duplicate in GoComics/Shoe
cls('SixChix', 'six-chix'),
cls('SlylockFoxAndComicsForKids', 'slylock-fox-and-comics-for-kids'),
cls('TakeItFromTheTinkersons', 'take-it-from-the-tinkersons'),
cls('TheLittleKing', 'the-little-king'),
cls('ThimbleTheater', 'thimble-theater'),
cls('Tiger', 'tiger'),
cls('TigerSundays', 'tiger-sundays'),
cls('ToddTheDinosaur', 'todd-the-dinosaur'),
cls('ZippyThePinhead', 'zippy-the-pinhead'),
cls('Zits', 'zits'),
# END AUTOUPDATE
)

47
scripts/comicskingdom.py Executable file
View file

@ -0,0 +1,47 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
# Copyright (C) 2019 Thomas W. Littauer
"""
Script to get a list of comicskingdom.com comics and save the info in a JSON file
for further processing.
"""
from __future__ import absolute_import, division, print_function
from scriptutil import ComicListUpdater
class ComicsKingdomUpdater(ComicListUpdater):
dup_templates = ("Creators/%s", "DrunkDuck/%s", "GoComics/%s",
"KeenSpot/%s", "ComicGenesis/%s", "SmackJeeves/%s")
# names of comics to exclude
excluded_comics = (
# no images
'Doodles',
)
def handle_url(self, url):
"""Parse one listing page."""
data = self.get_url(url)
for comicdiv in data.cssselect('ul.comic-link-group li'):
comiclink = comicdiv.cssselect('a')[0]
comicurl = comiclink.attrib['href']
name = comicdiv.cssselect('a')[0].text
self.add_comic(name, comicurl.rsplit('/', 1)[1])
def collect_results(self):
"""Parse all search result pages."""
self.handle_url('https://www.comicskingdom.com/')
def get_entry(self, name, path):
langopt = ", 'es'" if name.lower().endswith('spanish') else ''
return u"cls('%s', '%s'%s)," % (name, path, langopt)
if __name__ == '__main__':
ComicsKingdomUpdater(__file__).run()

View file

@ -6,7 +6,7 @@ d=$(dirname $0)
if [ $# -ge 1 ]; then
list="$*"
else
list="arcamax comicfury comicgenesis creators gocomics keenspot smackjeeves webcomicfactory"
list="arcamax comicfury comicgenesis creators gocomics keenspot smackjeeves webcomicfactory comicskingdom"
fi
for script in $list; do
echo "Executing ${script}.py"

View file

@ -9,7 +9,7 @@ d=$(dirname $0)
if [ $# -ge 1 ]; then
list="$*"
else
list="arcamax comicfury comicgenesis creators gocomics keenspot smackjeeves webcomicfactory"
list="arcamax comicfury comicgenesis creators gocomics keenspot smackjeeves webcomicfactory comicskingdom"
fi
for script in $list; do
target="${d}/../dosagelib/plugins/${script}.py"