diff --git a/doc/changelog.txt b/doc/changelog.txt index 5c0aca19b..f0344ef80 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -1,15 +1,17 @@ Dosage 1.9 (released xx.xx.xxxx) Features: -- comics: Added AmazingSuperPowers strip. -- comics: Added PandyLand strip. +- comics: Added AmazingSuperPowers comic strip. +- comics: Added PandyLand comic strip. +- comics: Added all comic strips from Arcamax (eg. including + Hagar the horrible). Changes: - comics: CyanideAndHappiness image filename now has the strip number prefixed. Fixes: -- scripts: Ensure the generated comic names do not exceed 100 characters so they do - not cause problems with path length restrictions. +- scripts: Ensure the generated comic names do not exceed 100 characters so + they do not cause problems with path length restrictions. Dosage 1.8 (released 20.12.2012) diff --git a/dosage b/dosage index a774df60f..6cb148c53 100755 --- a/dosage +++ b/dosage @@ -2,7 +2,7 @@ # -*- coding: iso-8859-1 -*- # Dosage, the webcomic downloader # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs -# Copyright (C) 2012 Bastian Kleineidam +# Copyright (C) 2012-2013 Bastian Kleineidam from __future__ import print_function import sys import os diff --git a/dosagelib/loader.py b/dosagelib/loader.py index e27d54957..37513c364 100644 --- a/dosagelib/loader.py +++ b/dosagelib/loader.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2012 Bastian Kleineidam +# Copyright (C) 2012-2013 Bastian Kleineidam """ Functions to load plugin modules. """ diff --git a/dosagelib/plugins/arcamax.py b/dosagelib/plugins/arcamax.py new file mode 100644 index 000000000..ea21a1920 --- /dev/null +++ b/dosagelib/plugins/arcamax.py @@ -0,0 +1,109 @@ +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2013 Bastian Kleineidam +""" +Arcamax comic strips +""" +from re import compile +from ..scraper import make_scraper +from ..util import tagre + + +_imageSearch = compile(tagre("a", "href", r'(/newspics/[^"]+)', after='zoom')) +_prevSearch = compile(tagre("a", "href", r'(/[^"]+)', before='prev')) + +def add(name, shortname): + latestUrl = 'http://www.arcamax.com%s' % shortname + classname = 'Arcamax_%s' % name + + globals()[classname] = make_scraper(classname, + name='Arcamax/' + name, + latestUrl = latestUrl, + stripUrl = latestUrl + '%s/', + imageSearch = _imageSearch, + prevSearch = _prevSearch, + help = 'Index format: none', + ) + +# do not edit anything below since these entries are generated from scripts/update.sh +# DO NOT REMOVE +#add('9ChickweedLane', '/thefunnies/ninechickweedlane/') +#add('Agnes', '/thefunnies/agnes/') +#add('AndyCapp', '/thefunnies/andycapp/') +#add('Archie', '/thefunnies/archie/') +add('ArcticCircle', '/thefunnies/arcticcircle/') +#add('AskShagg', '/thefunnies/askshagg/') +#add('BC', '/thefunnies/bc/') +add('BabyBlues', '/thefunnies/babyblues/') +#add('BallardStreet', '/thefunnies/ballardstreet/') +#add('BarneyAndClyde', '/thefunnies/barneyandclyde/') +add('BarneyGoogleAndSnuffySmith', '/thefunnies/barneygoogle/') +add('BeetleBailey', '/thefunnies/beetlebailey/') +add('Bizarro', '/thefunnies/bizarro/') +add('BleekerTheRechargeableDog', '/thefunnies/bleekertherechargeabledog/') +add('Blondie', '/thefunnies/blondie/') +add('Boondocks', '/thefunnies/boondocks/') +add('BrilliantMindofEdisonLee', '/thefunnies/brilliantmindofedisonlee/') +add('CafC3A9ConLeche', '/thefunnies/cafeconleche/') +#add('Candorville', '/thefunnies/candorville/') +#add('Cathy', '/thefunnies/cathy/') +#add('ChuckleBros', '/thefunnies/chucklebros/') +add('Crankshaft', '/thefunnies/crankshaft/') +#add('CuldeSac', '/thefunnies/culdesac/') +add('Curtis', '/thefunnies/curtis/') +#add('DaddysHome', '/thefunnies/daddyshome/') +add('DeFlocked', '/thefunnies/deflocked/') +add('DennistheMenace', '/thefunnies/dennisthemenace/') +#add('DiamondLil', '/thefunnies/diamondlil/') +add('Dilbert', '/thefunnies/dilbert/') +add('DinetteSet', '/thefunnies/thedinetteset/') +#add('DogEatDoug', '/thefunnies/dogeatdoug/') +#add('DogsofCKennel', '/thefunnies/dogsofckennel/') +#add('Doonesbury', '/thefunnies/doonesbury/') +add('Dustin', '/thefunnies/dustin/') +add('FamilyCircus', '/thefunnies/familycircus/') +#add('FloAndFriends', '/thefunnies/floandfriends/') +#add('ForHeavensSake', '/thefunnies/forheavenssake/') +#add('FortKnox', '/thefunnies/fortknox/') +#add('FreeRange', '/thefunnies/freerange/') +#add('Garfield', '/thefunnies/garfield/') +#add('GetFuzzy', '/thefunnies/getfuzzy/') +add('HagartheHorrible', '/thefunnies/hagarthehorrible/') +#add('Heathcliff', '/thefunnies/heathcliff/') +#add('HerbandJamaal', '/thefunnies/herbandjamaal/') +add('HiandLois', '/thefunnies/hiandlois/') +#add('HomeAndAway', '/thefunnies/homeandaway/') +add('JerryKingCartoons', '/thefunnies/humorcartoon/') +#add('LittleDogLost', '/thefunnies/littledoglost/') +#add('Luann', '/thefunnies/luann/') +add('MallardFillmore', '/thefunnies/mallardfillmore/') +add('Marvin', '/thefunnies/marvin/') +add('MeaningofLila', '/thefunnies/meaningoflila/') +#add('Momma', '/thefunnies/momma/') +add('MotherGooseAndGrimm', '/thefunnies/mothergooseandgrimm/') +add('Mutts', '/thefunnies/mutts/') +#add('NestHeads', '/thefunnies/nestheads/') +#add('NonSequitur', '/thefunnies/nonsequitur/') +#add('OnaClaireDay', '/thefunnies/onaclaireday/') +#add('OneBigHappy', '/thefunnies/onebighappy/') +#add('Peanuts', '/thefunnies/peanuts/') +#add('PearlsBeforeSwine', '/thefunnies/pearlsbeforeswine/') +#add('Pickles', '/thefunnies/pickles/') +#add('RedandRover', '/thefunnies/redandrover/') +#add('ReplyAll', '/thefunnies/replyall/') +add('RhymeswithOrange', '/thefunnies/rhymeswithorange/') +#add('Rubes', '/thefunnies/rubes/') +#add('Rugrats', '/thefunnies/rugrats/') +#add('ScaryGary', '/thefunnies/scarygary/') +#add('SpeedBump', '/thefunnies/speedbump/') +#add('StrangeBrew', '/thefunnies/strangebrew/') +#add('TheBarn', '/thefunnies/thebarn/') +add('TheLockhorns', '/thefunnies/thelockhorns/') +#add('TheOtherCoast', '/thefunnies/theothercoast/') +#add('ThinLines', '/thefunnies/thinlines/') +add('TinasGroove', '/thefunnies/tinasgroove/') +#add('WatchYourHead', '/thefunnies/watchyourhead/') +#add('WeePals', '/thefunnies/weepals/') +#add('WizardofId', '/thefunnies/wizardofid/') +#add('WorkingitOut', '/thefunnies/workingitout/') +#add('ZackHill', '/thefunnies/zackhill/') +add('Zits', '/thefunnies/zits/') diff --git a/scripts/arcamax.json b/scripts/arcamax.json new file mode 100644 index 000000000..1469dd1c9 --- /dev/null +++ b/scripts/arcamax.json @@ -0,0 +1 @@ +{"9ChickweedLane": "/thefunnies/ninechickweedlane/", "Agnes": "/thefunnies/agnes/", "AndyCapp": "/thefunnies/andycapp/", "Archie": "/thefunnies/archie/", "ArcticCircle": "/thefunnies/arcticcircle/", "AskShagg": "/thefunnies/askshagg/", "BC": "/thefunnies/bc/", "BabyBlues": "/thefunnies/babyblues/", "BallardStreet": "/thefunnies/ballardstreet/", "BarneyAndClyde": "/thefunnies/barneyandclyde/", "BarneyGoogleAndSnuffySmith": "/thefunnies/barneygoogle/", "BeetleBailey": "/thefunnies/beetlebailey/", "Bizarro": "/thefunnies/bizarro/", "BleekerTheRechargeableDog": "/thefunnies/bleekertherechargeabledog/", "Blondie": "/thefunnies/blondie/", "Boondocks": "/thefunnies/boondocks/", "BrilliantMindofEdisonLee": "/thefunnies/brilliantmindofedisonlee/", "CafC3A9ConLeche": "/thefunnies/cafeconleche/", "Candorville": "/thefunnies/candorville/", "Cathy": "/thefunnies/cathy/", "ChuckleBros": "/thefunnies/chucklebros/", "Crankshaft": "/thefunnies/crankshaft/", "CuldeSac": "/thefunnies/culdesac/", "Curtis": "/thefunnies/curtis/", "DaddysHome": "/thefunnies/daddyshome/", "DeFlocked": "/thefunnies/deflocked/", "DennistheMenace": "/thefunnies/dennisthemenace/", "DiamondLil": "/thefunnies/diamondlil/", "Dilbert": "/thefunnies/dilbert/", "DinetteSet": "/thefunnies/thedinetteset/", "DogEatDoug": "/thefunnies/dogeatdoug/", "DogsofCKennel": "/thefunnies/dogsofckennel/", "Doonesbury": "/thefunnies/doonesbury/", "Dustin": "/thefunnies/dustin/", "FamilyCircus": "/thefunnies/familycircus/", "FloAndFriends": "/thefunnies/floandfriends/", "ForHeavensSake": "/thefunnies/forheavenssake/", "FortKnox": "/thefunnies/fortknox/", "FreeRange": "/thefunnies/freerange/", "Garfield": "/thefunnies/garfield/", "GetFuzzy": "/thefunnies/getfuzzy/", "HagartheHorrible": "/thefunnies/hagarthehorrible/", "Heathcliff": "/thefunnies/heathcliff/", "HerbandJamaal": "/thefunnies/herbandjamaal/", "HiandLois": "/thefunnies/hiandlois/", "HomeAndAway": "/thefunnies/homeandaway/", "JerryKingCartoons": "/thefunnies/humorcartoon/", "LittleDogLost": "/thefunnies/littledoglost/", "Luann": "/thefunnies/luann/", "MallardFillmore": "/thefunnies/mallardfillmore/", "Marvin": "/thefunnies/marvin/", "MeaningofLila": "/thefunnies/meaningoflila/", "Momma": "/thefunnies/momma/", "MotherGooseAndGrimm": "/thefunnies/mothergooseandgrimm/", "Mutts": "/thefunnies/mutts/", "NestHeads": "/thefunnies/nestheads/", "NonSequitur": "/thefunnies/nonsequitur/", "OnaClaireDay": "/thefunnies/onaclaireday/", "OneBigHappy": "/thefunnies/onebighappy/", "Peanuts": "/thefunnies/peanuts/", "PearlsBeforeSwine": "/thefunnies/pearlsbeforeswine/", "Pickles": "/thefunnies/pickles/", "RedandRover": "/thefunnies/redandrover/", "ReplyAll": "/thefunnies/replyall/", "RhymeswithOrange": "/thefunnies/rhymeswithorange/", "Rubes": "/thefunnies/rubes/", "Rugrats": "/thefunnies/rugrats/", "ScaryGary": "/thefunnies/scarygary/", "SpeedBump": "/thefunnies/speedbump/", "StrangeBrew": "/thefunnies/strangebrew/", "TheBarn": "/thefunnies/thebarn/", "TheLockhorns": "/thefunnies/thelockhorns/", "TheOtherCoast": "/thefunnies/theothercoast/", "ThinLines": "/thefunnies/thinlines/", "TinasGroove": "/thefunnies/tinasgroove/", "WatchYourHead": "/thefunnies/watchyourhead/", "WeePals": "/thefunnies/weepals/", "WizardofId": "/thefunnies/wizardofid/", "WorkingitOut": "/thefunnies/workingitout/", "ZackHill": "/thefunnies/zackhill/", "Zits": "/thefunnies/zits/"} \ No newline at end of file diff --git a/scripts/arcamax.py b/scripts/arcamax.py new file mode 100755 index 000000000..a80c6391e --- /dev/null +++ b/scripts/arcamax.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python +# Copyright (C) 2013 Bastian Kleineidam +""" +Script to get arcamax comics and save the info in a JSON file for further processing. +""" +from __future__ import print_function +import re +import sys +import os +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) +from dosagelib.util import getPageContent, asciify, unescape +from dosagelib.scraper import get_scrapers +from scriptutil import contains_case_insensitive, capfirst, save_result, load_result, truncate_name + +json_file = __file__.replace(".py", ".json") + +url_matcher = re.compile(r'
  • ([^<]+)') + +# names of comics to exclude +exclude_comics = [ +] + + +def handle_url(url, res): + """Parse one search result page.""" + print("Parsing", url, file=sys.stderr) + try: + data, baseUrl = getPageContent(url) + except IOError as msg: + print("ERROR:", msg, file=sys.stderr) + return + for match in url_matcher.finditer(data): + shortname = match.group(1) + name = unescape(match.group(2)) + name = asciify(name.replace('&', 'And').replace('@', 'At')) + name = capfirst(name) + if name in exclude_comics: + continue + if contains_case_insensitive(res, name): + # we cannot handle two comics that only differ in case + print("INFO: skipping possible duplicate", name, file=sys.stderr) + continue + res[name] = shortname + if not res: + print("ERROR:", "did not match any comics", file=sys.stderr) + + +def get_results(): + """Parse all search result pages.""" + # store info in a dictionary {name -> shortname} + res = {} + handle_url('http://www.arcamax.com/comics', res) + save_result(res, json_file) + + +def has_comic(name): + """Check if comic name already exists.""" + names = [ + ("Creators/%s" % name).lower(), + ("DrunkDuck/%s" % name).lower(), + ("GoComics/%s" % name).lower(), + ("KeenSpot/%s" % name).lower(), + ("SmackJeeves/%s" % name).lower(), + ] + for scraperclass in get_scrapers(): + lname = scraperclass.get_name().lower() + if lname in names: + return True + return False + + +def print_results(args): + """Print all comics that have at least the given number of minimum comic strips.""" + for name, shortname in sorted(load_result(json_file).items()): + if name in exclude_comics: + continue + if has_comic(name): + prefix = '#' + else: + prefix = '' + print("%sadd(%r, %r)" % (prefix, str(truncate_name(name)), str(shortname))) + + +if __name__ == '__main__': + if len(sys.argv) > 1: + print_results(sys.argv[1:]) + else: + get_results() diff --git a/scripts/generate_json.sh b/scripts/generate_json.sh index c15c70453..587a9319e 100755 --- a/scripts/generate_json.sh +++ b/scripts/generate_json.sh @@ -2,7 +2,7 @@ set -u d=$(dirname $0) -for script in creators gocomics drunkduck universal keenspot smackjeeves; do +for script in creators gocomics drunkduck universal keenspot smackjeeves arcamax; do echo "Executing ${script}.py" "${d}/${script}.py" done diff --git a/scripts/removeafter.py b/scripts/removeafter.py index d75560ff1..190124696 100755 --- a/scripts/removeafter.py +++ b/scripts/removeafter.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (C) 2012 Bastian Kleineidam +# Copyright (C) 2012-2013 Bastian Kleineidam """Remove all lines after a given marker line. """ from __future__ import print_function diff --git a/scripts/update_plugins.sh b/scripts/update_plugins.sh index 37789ab72..fdf015e81 100755 --- a/scripts/update_plugins.sh +++ b/scripts/update_plugins.sh @@ -1,11 +1,11 @@ #!/bin/sh -e -# Copyright (C) 2012 Bastian Kleineidam +# Copyright (C) 2012-2013 Bastian Kleineidam set -u mincomics=100 d=$(dirname $0) -for script in creators gocomics drunkduck universal keenspot smackjeeves; do +for script in creators gocomics drunkduck universal keenspot smackjeeves arcamax; do target="${d}/../dosagelib/plugins/${script}.py" echo "Upating $target" "${d}/removeafter.py" "$target" "# DO NOT REMOVE"