Fix and update ArcaMax (fixes #8).

This commit is contained in:
Tobias Gruetzmacher 2015-04-17 21:53:13 +02:00
parent d89d4e4d6a
commit f0831a1f0f
3 changed files with 7 additions and 7 deletions

View file

@ -8,7 +8,7 @@ from ..scraper import make_scraper
from ..util import tagre from ..util import tagre
_imageSearch = compile(tagre("a", "href", r'(/newspics/[^"]+)', after='zoom')) _imageSearch = compile(tagre("img", "data-zoom-image", r'(/newspics/[^"]+)'))
_prevSearch = compile(tagre("a", "href", r'(/[^"]+)', before='prev')) _prevSearch = compile(tagre("a", "href", r'(/[^"]+)', before='prev'))
def add(name, shortname): def add(name, shortname):
@ -43,7 +43,6 @@ add('BleekerTheRechargeableDog', '/thefunnies/bleekertherechargeabledog/')
add('Blondie', '/thefunnies/blondie/') add('Blondie', '/thefunnies/blondie/')
add('Boondocks', '/thefunnies/boondocks/') add('Boondocks', '/thefunnies/boondocks/')
add('BrilliantMindofEdisonLee', '/thefunnies/brilliantmindofedisonlee/') add('BrilliantMindofEdisonLee', '/thefunnies/brilliantmindofedisonlee/')
#add('CafConLeche', '/thefunnies/cafeconleche/')
#add('Candorville', '/thefunnies/candorville/') #add('Candorville', '/thefunnies/candorville/')
#add('Cathy', '/thefunnies/cathy/') #add('Cathy', '/thefunnies/cathy/')
#add('ChuckleBros', '/thefunnies/chucklebros/') #add('ChuckleBros', '/thefunnies/chucklebros/')
@ -71,8 +70,11 @@ add('FamilyCircus', '/thefunnies/familycircus/')
#add('HerbandJamaal', '/thefunnies/herbandjamaal/') #add('HerbandJamaal', '/thefunnies/herbandjamaal/')
add('HiandLois', '/thefunnies/hiandlois/') add('HiandLois', '/thefunnies/hiandlois/')
#add('HomeAndAway', '/thefunnies/homeandaway/') #add('HomeAndAway', '/thefunnies/homeandaway/')
add('IntelligentLife', '/thefunnies/intelligentlife/')
add('JerryKingCartoons', '/thefunnies/humorcartoon/') add('JerryKingCartoons', '/thefunnies/humorcartoon/')
#add('LittleDogLost', '/thefunnies/littledoglost/') #add('LittleDogLost', '/thefunnies/littledoglost/')
#add('LongStoryShort', '/thefunnies/longstoryshort/')
#add('LooseParts', '/thefunnies/looseparts/')
#add('Luann', '/thefunnies/luann/') #add('Luann', '/thefunnies/luann/')
add('MallardFillmore', '/thefunnies/mallardfillmore/') add('MallardFillmore', '/thefunnies/mallardfillmore/')
add('Marvin', '/thefunnies/marvin/') add('Marvin', '/thefunnies/marvin/')
@ -83,7 +85,6 @@ add('MotherGooseAndGrimm', '/thefunnies/mothergooseandgrimm/')
add('Mutts', '/thefunnies/mutts/') add('Mutts', '/thefunnies/mutts/')
#add('NestHeads', '/thefunnies/nestheads/') #add('NestHeads', '/thefunnies/nestheads/')
#add('NonSequitur', '/thefunnies/nonsequitur/') #add('NonSequitur', '/thefunnies/nonsequitur/')
#add('OnaClaireDay', '/thefunnies/onaclaireday/')
#add('OneBigHappy', '/thefunnies/onebighappy/') #add('OneBigHappy', '/thefunnies/onebighappy/')
#add('Peanuts', '/thefunnies/peanuts/') #add('Peanuts', '/thefunnies/peanuts/')
#add('PearlsBeforeSwine', '/thefunnies/pearlsbeforeswine/') #add('PearlsBeforeSwine', '/thefunnies/pearlsbeforeswine/')
@ -102,7 +103,6 @@ add('TakeItFromTheTinkersons', '/thefunnies/takeitfromthetinkersons/')
add('TheLockhorns', '/thefunnies/thelockhorns/') add('TheLockhorns', '/thefunnies/thelockhorns/')
#add('TheOtherCoast', '/thefunnies/theothercoast/') #add('TheOtherCoast', '/thefunnies/theothercoast/')
add('TinasGroove', '/thefunnies/tinasgroove/') add('TinasGroove', '/thefunnies/tinasgroove/')
#add('WatchYourHead', '/thefunnies/watchyourhead/')
#add('WeePals', '/thefunnies/weepals/') #add('WeePals', '/thefunnies/weepals/')
#add('WizardofId', '/thefunnies/wizardofid/') #add('WizardofId', '/thefunnies/wizardofid/')
#add('WorkingitOut', '/thefunnies/workingitout/') #add('WorkingitOut', '/thefunnies/workingitout/')

View file

@ -1 +1 @@
{"9ChickweedLane": "/thefunnies/ninechickweedlane/", "Agnes": "/thefunnies/agnes/", "AndyCapp": "/thefunnies/andycapp/", "Archie": "/thefunnies/archie/", "ArcticCircle": "/thefunnies/arcticcircle/", "AskShagg": "/thefunnies/askshagg/", "BC": "/thefunnies/bc/", "BabyBlues": "/thefunnies/babyblues/", "BallardStreet": "/thefunnies/ballardstreet/", "BarneyAndClyde": "/thefunnies/barneyandclyde/", "BarneyGoogleAndSnuffySmith": "/thefunnies/barneygoogle/", "BeetleBailey": "/thefunnies/beetlebailey/", "Bizarro": "/thefunnies/bizarro/", "BleekerTheRechargeableDog": "/thefunnies/bleekertherechargeabledog/", "Blondie": "/thefunnies/blondie/", "Boondocks": "/thefunnies/boondocks/", "BrilliantMindofEdisonLee": "/thefunnies/brilliantmindofedisonlee/", "CafConLeche": "/thefunnies/cafeconleche/", "Candorville": "/thefunnies/candorville/", "Cathy": "/thefunnies/cathy/", "ChuckleBros": "/thefunnies/chucklebros/", "Crankshaft": "/thefunnies/crankshaft/", "CuldeSac": "/thefunnies/culdesac/", "Curtis": "/thefunnies/curtis/", "DaddysHome": "/thefunnies/daddyshome/", "DeFlocked": "/thefunnies/deflocked/", "DennistheMenace": "/thefunnies/dennisthemenace/", "DiamondLil": "/thefunnies/diamondlil/", "Dilbert": "/thefunnies/dilbert/", "DinetteSet": "/thefunnies/thedinetteset/", "DogEatDoug": "/thefunnies/dogeatdoug/", "DogsofCKennel": "/thefunnies/dogsofckennel/", "Doonesbury": "/thefunnies/doonesbury/", "Dustin": "/thefunnies/dustin/", "FamilyCircus": "/thefunnies/familycircus/", "FloAndFriends": "/thefunnies/floandfriends/", "ForHeavensSake": "/thefunnies/forheavenssake/", "FortKnox": "/thefunnies/fortknox/", "FreeRange": "/thefunnies/freerange/", "Garfield": "/thefunnies/garfield/", "GetFuzzy": "/thefunnies/getfuzzy/", "Heathcliff": "/thefunnies/heathcliff/", "HerbandJamaal": "/thefunnies/herbandjamaal/", "HiandLois": "/thefunnies/hiandlois/", "HomeAndAway": "/thefunnies/homeandaway/", "JerryKingCartoons": "/thefunnies/humorcartoon/", "LittleDogLost": "/thefunnies/littledoglost/", "Luann": "/thefunnies/luann/", "MallardFillmore": "/thefunnies/mallardfillmore/", "Marvin": "/thefunnies/marvin/", "MeaningofLila": "/thefunnies/meaningoflila/", "MikeDuJour": "/thefunnies/mikedujour/", "Momma": "/thefunnies/momma/", "MotherGooseAndGrimm": "/thefunnies/mothergooseandgrimm/", "Mutts": "/thefunnies/mutts/", "NestHeads": "/thefunnies/nestheads/", "NonSequitur": "/thefunnies/nonsequitur/", "OnaClaireDay": "/thefunnies/onaclaireday/", "OneBigHappy": "/thefunnies/onebighappy/", "Peanuts": "/thefunnies/peanuts/", "PearlsBeforeSwine": "/thefunnies/pearlsbeforeswine/", "Pickles": "/thefunnies/pickles/", "RedandRover": "/thefunnies/redandrover/", "ReplyAll": "/thefunnies/replyall/", "RhymeswithOrange": "/thefunnies/rhymeswithorange/", "Rubes": "/thefunnies/rubes/", "RudyPark": "/thefunnies/rudypark/", "Rugrats": "/thefunnies/rugrats/", "ScaryGary": "/thefunnies/scarygary/", "SpeedBump": "/thefunnies/speedbump/", "StrangeBrew": "/thefunnies/strangebrew/", "TakeItFromTheTinkersons": "/thefunnies/takeitfromthetinkersons/", "TheBarn": "/thefunnies/thebarn/", "TheLockhorns": "/thefunnies/thelockhorns/", "TheOtherCoast": "/thefunnies/theothercoast/", "TinasGroove": "/thefunnies/tinasgroove/", "WatchYourHead": "/thefunnies/watchyourhead/", "WeePals": "/thefunnies/weepals/", "WizardofId": "/thefunnies/wizardofid/", "WorkingitOut": "/thefunnies/workingitout/", "Wumo": "/thefunnies/wumo/", "ZackHill": "/thefunnies/zackhill/", "Zits": "/thefunnies/zits/"} {"9ChickweedLane": "/thefunnies/ninechickweedlane/", "Agnes": "/thefunnies/agnes/", "AndyCapp": "/thefunnies/andycapp/", "Archie": "/thefunnies/archie/", "ArcticCircle": "/thefunnies/arcticcircle/", "AskShagg": "/thefunnies/askshagg/", "BC": "/thefunnies/bc/", "BabyBlues": "/thefunnies/babyblues/", "BallardStreet": "/thefunnies/ballardstreet/", "BarneyAndClyde": "/thefunnies/barneyandclyde/", "BarneyGoogleAndSnuffySmith": "/thefunnies/barneygoogle/", "BeetleBailey": "/thefunnies/beetlebailey/", "Bizarro": "/thefunnies/bizarro/", "BleekerTheRechargeableDog": "/thefunnies/bleekertherechargeabledog/", "Blondie": "/thefunnies/blondie/", "Boondocks": "/thefunnies/boondocks/", "BrilliantMindofEdisonLee": "/thefunnies/brilliantmindofedisonlee/", "Candorville": "/thefunnies/candorville/", "Cathy": "/thefunnies/cathy/", "ChuckleBros": "/thefunnies/chucklebros/", "Crankshaft": "/thefunnies/crankshaft/", "CuldeSac": "/thefunnies/culdesac/", "Curtis": "/thefunnies/curtis/", "DaddysHome": "/thefunnies/daddyshome/", "DeFlocked": "/thefunnies/deflocked/", "DennistheMenace": "/thefunnies/dennisthemenace/", "DiamondLil": "/thefunnies/diamondlil/", "Dilbert": "/thefunnies/dilbert/", "DinetteSet": "/thefunnies/thedinetteset/", "DogEatDoug": "/thefunnies/dogeatdoug/", "DogsofCKennel": "/thefunnies/dogsofckennel/", "Doonesbury": "/thefunnies/doonesbury/", "Dustin": "/thefunnies/dustin/", "FamilyCircus": "/thefunnies/familycircus/", "FloAndFriends": "/thefunnies/floandfriends/", "ForHeavensSake": "/thefunnies/forheavenssake/", "FortKnox": "/thefunnies/fortknox/", "FreeRange": "/thefunnies/freerange/", "Garfield": "/thefunnies/garfield/", "GetFuzzy": "/thefunnies/getfuzzy/", "Heathcliff": "/thefunnies/heathcliff/", "HerbandJamaal": "/thefunnies/herbandjamaal/", "HiandLois": "/thefunnies/hiandlois/", "HomeAndAway": "/thefunnies/homeandaway/", "IntelligentLife": "/thefunnies/intelligentlife/", "JerryKingCartoons": "/thefunnies/humorcartoon/", "LittleDogLost": "/thefunnies/littledoglost/", "LongStoryShort": "/thefunnies/longstoryshort/", "LooseParts": "/thefunnies/looseparts/", "Luann": "/thefunnies/luann/", "MallardFillmore": "/thefunnies/mallardfillmore/", "Marvin": "/thefunnies/marvin/", "MeaningofLila": "/thefunnies/meaningoflila/", "MikeDuJour": "/thefunnies/mikedujour/", "Momma": "/thefunnies/momma/", "MotherGooseAndGrimm": "/thefunnies/mothergooseandgrimm/", "Mutts": "/thefunnies/mutts/", "NestHeads": "/thefunnies/nestheads/", "NonSequitur": "/thefunnies/nonsequitur/", "OneBigHappy": "/thefunnies/onebighappy/", "Peanuts": "/thefunnies/peanuts/", "PearlsBeforeSwine": "/thefunnies/pearlsbeforeswine/", "Pickles": "/thefunnies/pickles/", "RedandRover": "/thefunnies/redandrover/", "ReplyAll": "/thefunnies/replyall/", "RhymeswithOrange": "/thefunnies/rhymeswithorange/", "Rubes": "/thefunnies/rubes/", "RudyPark": "/thefunnies/rudypark/", "Rugrats": "/thefunnies/rugrats/", "ScaryGary": "/thefunnies/scarygary/", "SpeedBump": "/thefunnies/speedbump/", "StrangeBrew": "/thefunnies/strangebrew/", "TakeItFromTheTinkersons": "/thefunnies/takeitfromthetinkersons/", "TheBarn": "/thefunnies/thebarn/", "TheLockhorns": "/thefunnies/thelockhorns/", "TheOtherCoast": "/thefunnies/theothercoast/", "TinasGroove": "/thefunnies/tinasgroove/", "WeePals": "/thefunnies/weepals/", "WizardofId": "/thefunnies/wizardofid/", "WorkingitOut": "/thefunnies/workingitout/", "Wumo": "/thefunnies/wumo/", "ZackHill": "/thefunnies/zackhill/", "Zits": "/thefunnies/zits/"}

View file

@ -16,7 +16,7 @@ from scriptutil import contains_case_insensitive, capfirst, save_result, load_re
json_file = __file__.replace(".py", ".json") json_file = __file__.replace(".py", ".json")
url_matcher = re.compile(r'<li><b><a href="(/thefunnies/[^"]+)">([^<]+)</a>') url_matcher = re.compile(r'<li><a href="(/thefunnies/[^"]+)">([^<]+)</a>')
# names of comics to exclude # names of comics to exclude
exclude_comics = [ exclude_comics = [
@ -28,7 +28,7 @@ def handle_url(url, session, res):
"""Parse one search result page.""" """Parse one search result page."""
print("Parsing", url, file=sys.stderr) print("Parsing", url, file=sys.stderr)
try: try:
data, baseUrl = getPageContent(url, session) data = getPageContent(url, session)
except IOError as msg: except IOError as msg:
print("ERROR:", msg, file=sys.stderr) print("ERROR:", msg, file=sys.stderr)
return return