Add comic strips from Arcamax.

This commit is contained in:
Bastian Kleineidam 2013-01-23 19:34:11 +01:00
parent d54d787af1
commit 0e438b864e
9 changed files with 210 additions and 10 deletions

View file

@ -1,15 +1,17 @@
Dosage 1.9 (released xx.xx.xxxx) Dosage 1.9 (released xx.xx.xxxx)
Features: Features:
- comics: Added AmazingSuperPowers strip. - comics: Added AmazingSuperPowers comic strip.
- comics: Added PandyLand strip. - comics: Added PandyLand comic strip.
- comics: Added all comic strips from Arcamax (eg. including
Hagar the horrible).
Changes: Changes:
- comics: CyanideAndHappiness image filename now has the strip number prefixed. - comics: CyanideAndHappiness image filename now has the strip number prefixed.
Fixes: Fixes:
- scripts: Ensure the generated comic names do not exceed 100 characters so they do - scripts: Ensure the generated comic names do not exceed 100 characters so
not cause problems with path length restrictions. they do not cause problems with path length restrictions.
Dosage 1.8 (released 20.12.2012) Dosage 1.8 (released 20.12.2012)

2
dosage
View file

@ -2,7 +2,7 @@
# -*- coding: iso-8859-1 -*- # -*- coding: iso-8859-1 -*-
# Dosage, the webcomic downloader # Dosage, the webcomic downloader
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from __future__ import print_function from __future__ import print_function
import sys import sys
import os import os

View file

@ -1,5 +1,5 @@
# -*- coding: iso-8859-1 -*- # -*- coding: iso-8859-1 -*-
# Copyright (C) 2012 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
""" """
Functions to load plugin modules. Functions to load plugin modules.
""" """

View file

@ -0,0 +1,109 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2013 Bastian Kleineidam
"""
Arcamax comic strips
"""
from re import compile
from ..scraper import make_scraper
from ..util import tagre
_imageSearch = compile(tagre("a", "href", r'(/newspics/[^"]+)', after='zoom'))
_prevSearch = compile(tagre("a", "href", r'(/[^"]+)', before='prev'))
def add(name, shortname):
latestUrl = 'http://www.arcamax.com%s' % shortname
classname = 'Arcamax_%s' % name
globals()[classname] = make_scraper(classname,
name='Arcamax/' + name,
latestUrl = latestUrl,
stripUrl = latestUrl + '%s/',
imageSearch = _imageSearch,
prevSearch = _prevSearch,
help = 'Index format: none',
)
# do not edit anything below since these entries are generated from scripts/update.sh
# DO NOT REMOVE
#add('9ChickweedLane', '/thefunnies/ninechickweedlane/')
#add('Agnes', '/thefunnies/agnes/')
#add('AndyCapp', '/thefunnies/andycapp/')
#add('Archie', '/thefunnies/archie/')
add('ArcticCircle', '/thefunnies/arcticcircle/')
#add('AskShagg', '/thefunnies/askshagg/')
#add('BC', '/thefunnies/bc/')
add('BabyBlues', '/thefunnies/babyblues/')
#add('BallardStreet', '/thefunnies/ballardstreet/')
#add('BarneyAndClyde', '/thefunnies/barneyandclyde/')
add('BarneyGoogleAndSnuffySmith', '/thefunnies/barneygoogle/')
add('BeetleBailey', '/thefunnies/beetlebailey/')
add('Bizarro', '/thefunnies/bizarro/')
add('BleekerTheRechargeableDog', '/thefunnies/bleekertherechargeabledog/')
add('Blondie', '/thefunnies/blondie/')
add('Boondocks', '/thefunnies/boondocks/')
add('BrilliantMindofEdisonLee', '/thefunnies/brilliantmindofedisonlee/')
add('CafC3A9ConLeche', '/thefunnies/cafeconleche/')
#add('Candorville', '/thefunnies/candorville/')
#add('Cathy', '/thefunnies/cathy/')
#add('ChuckleBros', '/thefunnies/chucklebros/')
add('Crankshaft', '/thefunnies/crankshaft/')
#add('CuldeSac', '/thefunnies/culdesac/')
add('Curtis', '/thefunnies/curtis/')
#add('DaddysHome', '/thefunnies/daddyshome/')
add('DeFlocked', '/thefunnies/deflocked/')
add('DennistheMenace', '/thefunnies/dennisthemenace/')
#add('DiamondLil', '/thefunnies/diamondlil/')
add('Dilbert', '/thefunnies/dilbert/')
add('DinetteSet', '/thefunnies/thedinetteset/')
#add('DogEatDoug', '/thefunnies/dogeatdoug/')
#add('DogsofCKennel', '/thefunnies/dogsofckennel/')
#add('Doonesbury', '/thefunnies/doonesbury/')
add('Dustin', '/thefunnies/dustin/')
add('FamilyCircus', '/thefunnies/familycircus/')
#add('FloAndFriends', '/thefunnies/floandfriends/')
#add('ForHeavensSake', '/thefunnies/forheavenssake/')
#add('FortKnox', '/thefunnies/fortknox/')
#add('FreeRange', '/thefunnies/freerange/')
#add('Garfield', '/thefunnies/garfield/')
#add('GetFuzzy', '/thefunnies/getfuzzy/')
add('HagartheHorrible', '/thefunnies/hagarthehorrible/')
#add('Heathcliff', '/thefunnies/heathcliff/')
#add('HerbandJamaal', '/thefunnies/herbandjamaal/')
add('HiandLois', '/thefunnies/hiandlois/')
#add('HomeAndAway', '/thefunnies/homeandaway/')
add('JerryKingCartoons', '/thefunnies/humorcartoon/')
#add('LittleDogLost', '/thefunnies/littledoglost/')
#add('Luann', '/thefunnies/luann/')
add('MallardFillmore', '/thefunnies/mallardfillmore/')
add('Marvin', '/thefunnies/marvin/')
add('MeaningofLila', '/thefunnies/meaningoflila/')
#add('Momma', '/thefunnies/momma/')
add('MotherGooseAndGrimm', '/thefunnies/mothergooseandgrimm/')
add('Mutts', '/thefunnies/mutts/')
#add('NestHeads', '/thefunnies/nestheads/')
#add('NonSequitur', '/thefunnies/nonsequitur/')
#add('OnaClaireDay', '/thefunnies/onaclaireday/')
#add('OneBigHappy', '/thefunnies/onebighappy/')
#add('Peanuts', '/thefunnies/peanuts/')
#add('PearlsBeforeSwine', '/thefunnies/pearlsbeforeswine/')
#add('Pickles', '/thefunnies/pickles/')
#add('RedandRover', '/thefunnies/redandrover/')
#add('ReplyAll', '/thefunnies/replyall/')
add('RhymeswithOrange', '/thefunnies/rhymeswithorange/')
#add('Rubes', '/thefunnies/rubes/')
#add('Rugrats', '/thefunnies/rugrats/')
#add('ScaryGary', '/thefunnies/scarygary/')
#add('SpeedBump', '/thefunnies/speedbump/')
#add('StrangeBrew', '/thefunnies/strangebrew/')
#add('TheBarn', '/thefunnies/thebarn/')
add('TheLockhorns', '/thefunnies/thelockhorns/')
#add('TheOtherCoast', '/thefunnies/theothercoast/')
#add('ThinLines', '/thefunnies/thinlines/')
add('TinasGroove', '/thefunnies/tinasgroove/')
#add('WatchYourHead', '/thefunnies/watchyourhead/')
#add('WeePals', '/thefunnies/weepals/')
#add('WizardofId', '/thefunnies/wizardofid/')
#add('WorkingitOut', '/thefunnies/workingitout/')
#add('ZackHill', '/thefunnies/zackhill/')
add('Zits', '/thefunnies/zits/')

1
scripts/arcamax.json Normal file
View file

@ -0,0 +1 @@
{"9ChickweedLane": "/thefunnies/ninechickweedlane/", "Agnes": "/thefunnies/agnes/", "AndyCapp": "/thefunnies/andycapp/", "Archie": "/thefunnies/archie/", "ArcticCircle": "/thefunnies/arcticcircle/", "AskShagg": "/thefunnies/askshagg/", "BC": "/thefunnies/bc/", "BabyBlues": "/thefunnies/babyblues/", "BallardStreet": "/thefunnies/ballardstreet/", "BarneyAndClyde": "/thefunnies/barneyandclyde/", "BarneyGoogleAndSnuffySmith": "/thefunnies/barneygoogle/", "BeetleBailey": "/thefunnies/beetlebailey/", "Bizarro": "/thefunnies/bizarro/", "BleekerTheRechargeableDog": "/thefunnies/bleekertherechargeabledog/", "Blondie": "/thefunnies/blondie/", "Boondocks": "/thefunnies/boondocks/", "BrilliantMindofEdisonLee": "/thefunnies/brilliantmindofedisonlee/", "CafC3A9ConLeche": "/thefunnies/cafeconleche/", "Candorville": "/thefunnies/candorville/", "Cathy": "/thefunnies/cathy/", "ChuckleBros": "/thefunnies/chucklebros/", "Crankshaft": "/thefunnies/crankshaft/", "CuldeSac": "/thefunnies/culdesac/", "Curtis": "/thefunnies/curtis/", "DaddysHome": "/thefunnies/daddyshome/", "DeFlocked": "/thefunnies/deflocked/", "DennistheMenace": "/thefunnies/dennisthemenace/", "DiamondLil": "/thefunnies/diamondlil/", "Dilbert": "/thefunnies/dilbert/", "DinetteSet": "/thefunnies/thedinetteset/", "DogEatDoug": "/thefunnies/dogeatdoug/", "DogsofCKennel": "/thefunnies/dogsofckennel/", "Doonesbury": "/thefunnies/doonesbury/", "Dustin": "/thefunnies/dustin/", "FamilyCircus": "/thefunnies/familycircus/", "FloAndFriends": "/thefunnies/floandfriends/", "ForHeavensSake": "/thefunnies/forheavenssake/", "FortKnox": "/thefunnies/fortknox/", "FreeRange": "/thefunnies/freerange/", "Garfield": "/thefunnies/garfield/", "GetFuzzy": "/thefunnies/getfuzzy/", "HagartheHorrible": "/thefunnies/hagarthehorrible/", "Heathcliff": "/thefunnies/heathcliff/", "HerbandJamaal": "/thefunnies/herbandjamaal/", "HiandLois": "/thefunnies/hiandlois/", "HomeAndAway": "/thefunnies/homeandaway/", "JerryKingCartoons": "/thefunnies/humorcartoon/", "LittleDogLost": "/thefunnies/littledoglost/", "Luann": "/thefunnies/luann/", "MallardFillmore": "/thefunnies/mallardfillmore/", "Marvin": "/thefunnies/marvin/", "MeaningofLila": "/thefunnies/meaningoflila/", "Momma": "/thefunnies/momma/", "MotherGooseAndGrimm": "/thefunnies/mothergooseandgrimm/", "Mutts": "/thefunnies/mutts/", "NestHeads": "/thefunnies/nestheads/", "NonSequitur": "/thefunnies/nonsequitur/", "OnaClaireDay": "/thefunnies/onaclaireday/", "OneBigHappy": "/thefunnies/onebighappy/", "Peanuts": "/thefunnies/peanuts/", "PearlsBeforeSwine": "/thefunnies/pearlsbeforeswine/", "Pickles": "/thefunnies/pickles/", "RedandRover": "/thefunnies/redandrover/", "ReplyAll": "/thefunnies/replyall/", "RhymeswithOrange": "/thefunnies/rhymeswithorange/", "Rubes": "/thefunnies/rubes/", "Rugrats": "/thefunnies/rugrats/", "ScaryGary": "/thefunnies/scarygary/", "SpeedBump": "/thefunnies/speedbump/", "StrangeBrew": "/thefunnies/strangebrew/", "TheBarn": "/thefunnies/thebarn/", "TheLockhorns": "/thefunnies/thelockhorns/", "TheOtherCoast": "/thefunnies/theothercoast/", "ThinLines": "/thefunnies/thinlines/", "TinasGroove": "/thefunnies/tinasgroove/", "WatchYourHead": "/thefunnies/watchyourhead/", "WeePals": "/thefunnies/weepals/", "WizardofId": "/thefunnies/wizardofid/", "WorkingitOut": "/thefunnies/workingitout/", "ZackHill": "/thefunnies/zackhill/", "Zits": "/thefunnies/zits/"}

88
scripts/arcamax.py Executable file
View file

@ -0,0 +1,88 @@
#!/usr/bin/env python
# Copyright (C) 2013 Bastian Kleineidam
"""
Script to get arcamax comics and save the info in a JSON file for further processing.
"""
from __future__ import print_function
import re
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from dosagelib.util import getPageContent, asciify, unescape
from dosagelib.scraper import get_scrapers
from scriptutil import contains_case_insensitive, capfirst, save_result, load_result, truncate_name
json_file = __file__.replace(".py", ".json")
url_matcher = re.compile(r'<li><b><a href="(/thefunnies/[^"]+)">([^<]+)</a>')
# names of comics to exclude
exclude_comics = [
]
def handle_url(url, res):
"""Parse one search result page."""
print("Parsing", url, file=sys.stderr)
try:
data, baseUrl = getPageContent(url)
except IOError as msg:
print("ERROR:", msg, file=sys.stderr)
return
for match in url_matcher.finditer(data):
shortname = match.group(1)
name = unescape(match.group(2))
name = asciify(name.replace('&', 'And').replace('@', 'At'))
name = capfirst(name)
if name in exclude_comics:
continue
if contains_case_insensitive(res, name):
# we cannot handle two comics that only differ in case
print("INFO: skipping possible duplicate", name, file=sys.stderr)
continue
res[name] = shortname
if not res:
print("ERROR:", "did not match any comics", file=sys.stderr)
def get_results():
"""Parse all search result pages."""
# store info in a dictionary {name -> shortname}
res = {}
handle_url('http://www.arcamax.com/comics', res)
save_result(res, json_file)
def has_comic(name):
"""Check if comic name already exists."""
names = [
("Creators/%s" % name).lower(),
("DrunkDuck/%s" % name).lower(),
("GoComics/%s" % name).lower(),
("KeenSpot/%s" % name).lower(),
("SmackJeeves/%s" % name).lower(),
]
for scraperclass in get_scrapers():
lname = scraperclass.get_name().lower()
if lname in names:
return True
return False
def print_results(args):
"""Print all comics that have at least the given number of minimum comic strips."""
for name, shortname in sorted(load_result(json_file).items()):
if name in exclude_comics:
continue
if has_comic(name):
prefix = '#'
else:
prefix = ''
print("%sadd(%r, %r)" % (prefix, str(truncate_name(name)), str(shortname)))
if __name__ == '__main__':
if len(sys.argv) > 1:
print_results(sys.argv[1:])
else:
get_results()

View file

@ -2,7 +2,7 @@
set -u set -u
d=$(dirname $0) d=$(dirname $0)
for script in creators gocomics drunkduck universal keenspot smackjeeves; do for script in creators gocomics drunkduck universal keenspot smackjeeves arcamax; do
echo "Executing ${script}.py" echo "Executing ${script}.py"
"${d}/${script}.py" "${d}/${script}.py"
done done

View file

@ -1,5 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
# Copyright (C) 2012 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
"""Remove all lines after a given marker line. """Remove all lines after a given marker line.
""" """
from __future__ import print_function from __future__ import print_function

View file

@ -1,11 +1,11 @@
#!/bin/sh -e #!/bin/sh -e
# Copyright (C) 2012 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
set -u set -u
mincomics=100 mincomics=100
d=$(dirname $0) d=$(dirname $0)
for script in creators gocomics drunkduck universal keenspot smackjeeves; do for script in creators gocomics drunkduck universal keenspot smackjeeves arcamax; do
target="${d}/../dosagelib/plugins/${script}.py" target="${d}/../dosagelib/plugins/${script}.py"
echo "Upating $target" echo "Upating $target"
"${d}/removeafter.py" "$target" "# DO NOT REMOVE" "${d}/removeafter.py" "$target" "# DO NOT REMOVE"