Add comic strips from Arcamax.
This commit is contained in:
parent
d54d787af1
commit
0e438b864e
9 changed files with 210 additions and 10 deletions
|
@ -1,15 +1,17 @@
|
|||
Dosage 1.9 (released xx.xx.xxxx)
|
||||
|
||||
Features:
|
||||
- comics: Added AmazingSuperPowers strip.
|
||||
- comics: Added PandyLand strip.
|
||||
- comics: Added AmazingSuperPowers comic strip.
|
||||
- comics: Added PandyLand comic strip.
|
||||
- comics: Added all comic strips from Arcamax (eg. including
|
||||
Hagar the horrible).
|
||||
|
||||
Changes:
|
||||
- comics: CyanideAndHappiness image filename now has the strip number prefixed.
|
||||
|
||||
Fixes:
|
||||
- scripts: Ensure the generated comic names do not exceed 100 characters so they do
|
||||
not cause problems with path length restrictions.
|
||||
- scripts: Ensure the generated comic names do not exceed 100 characters so
|
||||
they do not cause problems with path length restrictions.
|
||||
|
||||
|
||||
Dosage 1.8 (released 20.12.2012)
|
||||
|
|
2
dosage
2
dosage
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Dosage, the webcomic downloader
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
# Copyright (C) 2012-2013 Bastian Kleineidam
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
import os
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
# Copyright (C) 2012-2013 Bastian Kleineidam
|
||||
"""
|
||||
Functions to load plugin modules.
|
||||
"""
|
||||
|
|
109
dosagelib/plugins/arcamax.py
Normal file
109
dosagelib/plugins/arcamax.py
Normal file
|
@ -0,0 +1,109 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2013 Bastian Kleineidam
|
||||
"""
|
||||
Arcamax comic strips
|
||||
"""
|
||||
from re import compile
|
||||
from ..scraper import make_scraper
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
_imageSearch = compile(tagre("a", "href", r'(/newspics/[^"]+)', after='zoom'))
|
||||
_prevSearch = compile(tagre("a", "href", r'(/[^"]+)', before='prev'))
|
||||
|
||||
def add(name, shortname):
|
||||
latestUrl = 'http://www.arcamax.com%s' % shortname
|
||||
classname = 'Arcamax_%s' % name
|
||||
|
||||
globals()[classname] = make_scraper(classname,
|
||||
name='Arcamax/' + name,
|
||||
latestUrl = latestUrl,
|
||||
stripUrl = latestUrl + '%s/',
|
||||
imageSearch = _imageSearch,
|
||||
prevSearch = _prevSearch,
|
||||
help = 'Index format: none',
|
||||
)
|
||||
|
||||
# do not edit anything below since these entries are generated from scripts/update.sh
|
||||
# DO NOT REMOVE
|
||||
#add('9ChickweedLane', '/thefunnies/ninechickweedlane/')
|
||||
#add('Agnes', '/thefunnies/agnes/')
|
||||
#add('AndyCapp', '/thefunnies/andycapp/')
|
||||
#add('Archie', '/thefunnies/archie/')
|
||||
add('ArcticCircle', '/thefunnies/arcticcircle/')
|
||||
#add('AskShagg', '/thefunnies/askshagg/')
|
||||
#add('BC', '/thefunnies/bc/')
|
||||
add('BabyBlues', '/thefunnies/babyblues/')
|
||||
#add('BallardStreet', '/thefunnies/ballardstreet/')
|
||||
#add('BarneyAndClyde', '/thefunnies/barneyandclyde/')
|
||||
add('BarneyGoogleAndSnuffySmith', '/thefunnies/barneygoogle/')
|
||||
add('BeetleBailey', '/thefunnies/beetlebailey/')
|
||||
add('Bizarro', '/thefunnies/bizarro/')
|
||||
add('BleekerTheRechargeableDog', '/thefunnies/bleekertherechargeabledog/')
|
||||
add('Blondie', '/thefunnies/blondie/')
|
||||
add('Boondocks', '/thefunnies/boondocks/')
|
||||
add('BrilliantMindofEdisonLee', '/thefunnies/brilliantmindofedisonlee/')
|
||||
add('CafC3A9ConLeche', '/thefunnies/cafeconleche/')
|
||||
#add('Candorville', '/thefunnies/candorville/')
|
||||
#add('Cathy', '/thefunnies/cathy/')
|
||||
#add('ChuckleBros', '/thefunnies/chucklebros/')
|
||||
add('Crankshaft', '/thefunnies/crankshaft/')
|
||||
#add('CuldeSac', '/thefunnies/culdesac/')
|
||||
add('Curtis', '/thefunnies/curtis/')
|
||||
#add('DaddysHome', '/thefunnies/daddyshome/')
|
||||
add('DeFlocked', '/thefunnies/deflocked/')
|
||||
add('DennistheMenace', '/thefunnies/dennisthemenace/')
|
||||
#add('DiamondLil', '/thefunnies/diamondlil/')
|
||||
add('Dilbert', '/thefunnies/dilbert/')
|
||||
add('DinetteSet', '/thefunnies/thedinetteset/')
|
||||
#add('DogEatDoug', '/thefunnies/dogeatdoug/')
|
||||
#add('DogsofCKennel', '/thefunnies/dogsofckennel/')
|
||||
#add('Doonesbury', '/thefunnies/doonesbury/')
|
||||
add('Dustin', '/thefunnies/dustin/')
|
||||
add('FamilyCircus', '/thefunnies/familycircus/')
|
||||
#add('FloAndFriends', '/thefunnies/floandfriends/')
|
||||
#add('ForHeavensSake', '/thefunnies/forheavenssake/')
|
||||
#add('FortKnox', '/thefunnies/fortknox/')
|
||||
#add('FreeRange', '/thefunnies/freerange/')
|
||||
#add('Garfield', '/thefunnies/garfield/')
|
||||
#add('GetFuzzy', '/thefunnies/getfuzzy/')
|
||||
add('HagartheHorrible', '/thefunnies/hagarthehorrible/')
|
||||
#add('Heathcliff', '/thefunnies/heathcliff/')
|
||||
#add('HerbandJamaal', '/thefunnies/herbandjamaal/')
|
||||
add('HiandLois', '/thefunnies/hiandlois/')
|
||||
#add('HomeAndAway', '/thefunnies/homeandaway/')
|
||||
add('JerryKingCartoons', '/thefunnies/humorcartoon/')
|
||||
#add('LittleDogLost', '/thefunnies/littledoglost/')
|
||||
#add('Luann', '/thefunnies/luann/')
|
||||
add('MallardFillmore', '/thefunnies/mallardfillmore/')
|
||||
add('Marvin', '/thefunnies/marvin/')
|
||||
add('MeaningofLila', '/thefunnies/meaningoflila/')
|
||||
#add('Momma', '/thefunnies/momma/')
|
||||
add('MotherGooseAndGrimm', '/thefunnies/mothergooseandgrimm/')
|
||||
add('Mutts', '/thefunnies/mutts/')
|
||||
#add('NestHeads', '/thefunnies/nestheads/')
|
||||
#add('NonSequitur', '/thefunnies/nonsequitur/')
|
||||
#add('OnaClaireDay', '/thefunnies/onaclaireday/')
|
||||
#add('OneBigHappy', '/thefunnies/onebighappy/')
|
||||
#add('Peanuts', '/thefunnies/peanuts/')
|
||||
#add('PearlsBeforeSwine', '/thefunnies/pearlsbeforeswine/')
|
||||
#add('Pickles', '/thefunnies/pickles/')
|
||||
#add('RedandRover', '/thefunnies/redandrover/')
|
||||
#add('ReplyAll', '/thefunnies/replyall/')
|
||||
add('RhymeswithOrange', '/thefunnies/rhymeswithorange/')
|
||||
#add('Rubes', '/thefunnies/rubes/')
|
||||
#add('Rugrats', '/thefunnies/rugrats/')
|
||||
#add('ScaryGary', '/thefunnies/scarygary/')
|
||||
#add('SpeedBump', '/thefunnies/speedbump/')
|
||||
#add('StrangeBrew', '/thefunnies/strangebrew/')
|
||||
#add('TheBarn', '/thefunnies/thebarn/')
|
||||
add('TheLockhorns', '/thefunnies/thelockhorns/')
|
||||
#add('TheOtherCoast', '/thefunnies/theothercoast/')
|
||||
#add('ThinLines', '/thefunnies/thinlines/')
|
||||
add('TinasGroove', '/thefunnies/tinasgroove/')
|
||||
#add('WatchYourHead', '/thefunnies/watchyourhead/')
|
||||
#add('WeePals', '/thefunnies/weepals/')
|
||||
#add('WizardofId', '/thefunnies/wizardofid/')
|
||||
#add('WorkingitOut', '/thefunnies/workingitout/')
|
||||
#add('ZackHill', '/thefunnies/zackhill/')
|
||||
add('Zits', '/thefunnies/zits/')
|
1
scripts/arcamax.json
Normal file
1
scripts/arcamax.json
Normal file
|
@ -0,0 +1 @@
|
|||
{"9ChickweedLane": "/thefunnies/ninechickweedlane/", "Agnes": "/thefunnies/agnes/", "AndyCapp": "/thefunnies/andycapp/", "Archie": "/thefunnies/archie/", "ArcticCircle": "/thefunnies/arcticcircle/", "AskShagg": "/thefunnies/askshagg/", "BC": "/thefunnies/bc/", "BabyBlues": "/thefunnies/babyblues/", "BallardStreet": "/thefunnies/ballardstreet/", "BarneyAndClyde": "/thefunnies/barneyandclyde/", "BarneyGoogleAndSnuffySmith": "/thefunnies/barneygoogle/", "BeetleBailey": "/thefunnies/beetlebailey/", "Bizarro": "/thefunnies/bizarro/", "BleekerTheRechargeableDog": "/thefunnies/bleekertherechargeabledog/", "Blondie": "/thefunnies/blondie/", "Boondocks": "/thefunnies/boondocks/", "BrilliantMindofEdisonLee": "/thefunnies/brilliantmindofedisonlee/", "CafC3A9ConLeche": "/thefunnies/cafeconleche/", "Candorville": "/thefunnies/candorville/", "Cathy": "/thefunnies/cathy/", "ChuckleBros": "/thefunnies/chucklebros/", "Crankshaft": "/thefunnies/crankshaft/", "CuldeSac": "/thefunnies/culdesac/", "Curtis": "/thefunnies/curtis/", "DaddysHome": "/thefunnies/daddyshome/", "DeFlocked": "/thefunnies/deflocked/", "DennistheMenace": "/thefunnies/dennisthemenace/", "DiamondLil": "/thefunnies/diamondlil/", "Dilbert": "/thefunnies/dilbert/", "DinetteSet": "/thefunnies/thedinetteset/", "DogEatDoug": "/thefunnies/dogeatdoug/", "DogsofCKennel": "/thefunnies/dogsofckennel/", "Doonesbury": "/thefunnies/doonesbury/", "Dustin": "/thefunnies/dustin/", "FamilyCircus": "/thefunnies/familycircus/", "FloAndFriends": "/thefunnies/floandfriends/", "ForHeavensSake": "/thefunnies/forheavenssake/", "FortKnox": "/thefunnies/fortknox/", "FreeRange": "/thefunnies/freerange/", "Garfield": "/thefunnies/garfield/", "GetFuzzy": "/thefunnies/getfuzzy/", "HagartheHorrible": "/thefunnies/hagarthehorrible/", "Heathcliff": "/thefunnies/heathcliff/", "HerbandJamaal": "/thefunnies/herbandjamaal/", "HiandLois": "/thefunnies/hiandlois/", "HomeAndAway": "/thefunnies/homeandaway/", "JerryKingCartoons": "/thefunnies/humorcartoon/", "LittleDogLost": "/thefunnies/littledoglost/", "Luann": "/thefunnies/luann/", "MallardFillmore": "/thefunnies/mallardfillmore/", "Marvin": "/thefunnies/marvin/", "MeaningofLila": "/thefunnies/meaningoflila/", "Momma": "/thefunnies/momma/", "MotherGooseAndGrimm": "/thefunnies/mothergooseandgrimm/", "Mutts": "/thefunnies/mutts/", "NestHeads": "/thefunnies/nestheads/", "NonSequitur": "/thefunnies/nonsequitur/", "OnaClaireDay": "/thefunnies/onaclaireday/", "OneBigHappy": "/thefunnies/onebighappy/", "Peanuts": "/thefunnies/peanuts/", "PearlsBeforeSwine": "/thefunnies/pearlsbeforeswine/", "Pickles": "/thefunnies/pickles/", "RedandRover": "/thefunnies/redandrover/", "ReplyAll": "/thefunnies/replyall/", "RhymeswithOrange": "/thefunnies/rhymeswithorange/", "Rubes": "/thefunnies/rubes/", "Rugrats": "/thefunnies/rugrats/", "ScaryGary": "/thefunnies/scarygary/", "SpeedBump": "/thefunnies/speedbump/", "StrangeBrew": "/thefunnies/strangebrew/", "TheBarn": "/thefunnies/thebarn/", "TheLockhorns": "/thefunnies/thelockhorns/", "TheOtherCoast": "/thefunnies/theothercoast/", "ThinLines": "/thefunnies/thinlines/", "TinasGroove": "/thefunnies/tinasgroove/", "WatchYourHead": "/thefunnies/watchyourhead/", "WeePals": "/thefunnies/weepals/", "WizardofId": "/thefunnies/wizardofid/", "WorkingitOut": "/thefunnies/workingitout/", "ZackHill": "/thefunnies/zackhill/", "Zits": "/thefunnies/zits/"}
|
88
scripts/arcamax.py
Executable file
88
scripts/arcamax.py
Executable file
|
@ -0,0 +1,88 @@
|
|||
#!/usr/bin/env python
|
||||
# Copyright (C) 2013 Bastian Kleineidam
|
||||
"""
|
||||
Script to get arcamax comics and save the info in a JSON file for further processing.
|
||||
"""
|
||||
from __future__ import print_function
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
||||
from dosagelib.util import getPageContent, asciify, unescape
|
||||
from dosagelib.scraper import get_scrapers
|
||||
from scriptutil import contains_case_insensitive, capfirst, save_result, load_result, truncate_name
|
||||
|
||||
json_file = __file__.replace(".py", ".json")
|
||||
|
||||
url_matcher = re.compile(r'<li><b><a href="(/thefunnies/[^"]+)">([^<]+)</a>')
|
||||
|
||||
# names of comics to exclude
|
||||
exclude_comics = [
|
||||
]
|
||||
|
||||
|
||||
def handle_url(url, res):
|
||||
"""Parse one search result page."""
|
||||
print("Parsing", url, file=sys.stderr)
|
||||
try:
|
||||
data, baseUrl = getPageContent(url)
|
||||
except IOError as msg:
|
||||
print("ERROR:", msg, file=sys.stderr)
|
||||
return
|
||||
for match in url_matcher.finditer(data):
|
||||
shortname = match.group(1)
|
||||
name = unescape(match.group(2))
|
||||
name = asciify(name.replace('&', 'And').replace('@', 'At'))
|
||||
name = capfirst(name)
|
||||
if name in exclude_comics:
|
||||
continue
|
||||
if contains_case_insensitive(res, name):
|
||||
# we cannot handle two comics that only differ in case
|
||||
print("INFO: skipping possible duplicate", name, file=sys.stderr)
|
||||
continue
|
||||
res[name] = shortname
|
||||
if not res:
|
||||
print("ERROR:", "did not match any comics", file=sys.stderr)
|
||||
|
||||
|
||||
def get_results():
|
||||
"""Parse all search result pages."""
|
||||
# store info in a dictionary {name -> shortname}
|
||||
res = {}
|
||||
handle_url('http://www.arcamax.com/comics', res)
|
||||
save_result(res, json_file)
|
||||
|
||||
|
||||
def has_comic(name):
|
||||
"""Check if comic name already exists."""
|
||||
names = [
|
||||
("Creators/%s" % name).lower(),
|
||||
("DrunkDuck/%s" % name).lower(),
|
||||
("GoComics/%s" % name).lower(),
|
||||
("KeenSpot/%s" % name).lower(),
|
||||
("SmackJeeves/%s" % name).lower(),
|
||||
]
|
||||
for scraperclass in get_scrapers():
|
||||
lname = scraperclass.get_name().lower()
|
||||
if lname in names:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def print_results(args):
|
||||
"""Print all comics that have at least the given number of minimum comic strips."""
|
||||
for name, shortname in sorted(load_result(json_file).items()):
|
||||
if name in exclude_comics:
|
||||
continue
|
||||
if has_comic(name):
|
||||
prefix = '#'
|
||||
else:
|
||||
prefix = ''
|
||||
print("%sadd(%r, %r)" % (prefix, str(truncate_name(name)), str(shortname)))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) > 1:
|
||||
print_results(sys.argv[1:])
|
||||
else:
|
||||
get_results()
|
|
@ -2,7 +2,7 @@
|
|||
set -u
|
||||
|
||||
d=$(dirname $0)
|
||||
for script in creators gocomics drunkduck universal keenspot smackjeeves; do
|
||||
for script in creators gocomics drunkduck universal keenspot smackjeeves arcamax; do
|
||||
echo "Executing ${script}.py"
|
||||
"${d}/${script}.py"
|
||||
done
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#!/usr/bin/env python
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
# Copyright (C) 2012-2013 Bastian Kleineidam
|
||||
"""Remove all lines after a given marker line.
|
||||
"""
|
||||
from __future__ import print_function
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
#!/bin/sh -e
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
# Copyright (C) 2012-2013 Bastian Kleineidam
|
||||
set -u
|
||||
|
||||
mincomics=100
|
||||
d=$(dirname $0)
|
||||
|
||||
for script in creators gocomics drunkduck universal keenspot smackjeeves; do
|
||||
for script in creators gocomics drunkduck universal keenspot smackjeeves arcamax; do
|
||||
target="${d}/../dosagelib/plugins/${script}.py"
|
||||
echo "Upating $target"
|
||||
"${d}/removeafter.py" "$target" "# DO NOT REMOVE"
|
||||
|
|
Loading…
Reference in a new issue