From f0831a1f0f9c2b7ae254a88ab2501c0d183561dd Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Fri, 17 Apr 2015 21:53:13 +0200 Subject: [PATCH] Fix and update ArcaMax (fixes #8). --- dosagelib/plugins/arcamax.py | 8 ++++---- scripts/arcamax.json | 2 +- scripts/arcamax.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dosagelib/plugins/arcamax.py b/dosagelib/plugins/arcamax.py index 2d4ea43db..d6c0ff330 100644 --- a/dosagelib/plugins/arcamax.py +++ b/dosagelib/plugins/arcamax.py @@ -8,7 +8,7 @@ from ..scraper import make_scraper from ..util import tagre -_imageSearch = compile(tagre("a", "href", r'(/newspics/[^"]+)', after='zoom')) +_imageSearch = compile(tagre("img", "data-zoom-image", r'(/newspics/[^"]+)')) _prevSearch = compile(tagre("a", "href", r'(/[^"]+)', before='prev')) def add(name, shortname): @@ -43,7 +43,6 @@ add('BleekerTheRechargeableDog', '/thefunnies/bleekertherechargeabledog/') add('Blondie', '/thefunnies/blondie/') add('Boondocks', '/thefunnies/boondocks/') add('BrilliantMindofEdisonLee', '/thefunnies/brilliantmindofedisonlee/') -#add('CafConLeche', '/thefunnies/cafeconleche/') #add('Candorville', '/thefunnies/candorville/') #add('Cathy', '/thefunnies/cathy/') #add('ChuckleBros', '/thefunnies/chucklebros/') @@ -71,8 +70,11 @@ add('FamilyCircus', '/thefunnies/familycircus/') #add('HerbandJamaal', '/thefunnies/herbandjamaal/') add('HiandLois', '/thefunnies/hiandlois/') #add('HomeAndAway', '/thefunnies/homeandaway/') +add('IntelligentLife', '/thefunnies/intelligentlife/') add('JerryKingCartoons', '/thefunnies/humorcartoon/') #add('LittleDogLost', '/thefunnies/littledoglost/') +#add('LongStoryShort', '/thefunnies/longstoryshort/') +#add('LooseParts', '/thefunnies/looseparts/') #add('Luann', '/thefunnies/luann/') add('MallardFillmore', '/thefunnies/mallardfillmore/') add('Marvin', '/thefunnies/marvin/') @@ -83,7 +85,6 @@ add('MotherGooseAndGrimm', '/thefunnies/mothergooseandgrimm/') add('Mutts', '/thefunnies/mutts/') #add('NestHeads', '/thefunnies/nestheads/') #add('NonSequitur', '/thefunnies/nonsequitur/') -#add('OnaClaireDay', '/thefunnies/onaclaireday/') #add('OneBigHappy', '/thefunnies/onebighappy/') #add('Peanuts', '/thefunnies/peanuts/') #add('PearlsBeforeSwine', '/thefunnies/pearlsbeforeswine/') @@ -102,7 +103,6 @@ add('TakeItFromTheTinkersons', '/thefunnies/takeitfromthetinkersons/') add('TheLockhorns', '/thefunnies/thelockhorns/') #add('TheOtherCoast', '/thefunnies/theothercoast/') add('TinasGroove', '/thefunnies/tinasgroove/') -#add('WatchYourHead', '/thefunnies/watchyourhead/') #add('WeePals', '/thefunnies/weepals/') #add('WizardofId', '/thefunnies/wizardofid/') #add('WorkingitOut', '/thefunnies/workingitout/') diff --git a/scripts/arcamax.json b/scripts/arcamax.json index 7fa890a31..198f96449 100644 --- a/scripts/arcamax.json +++ b/scripts/arcamax.json @@ -1 +1 @@ -{"9ChickweedLane": "/thefunnies/ninechickweedlane/", "Agnes": "/thefunnies/agnes/", "AndyCapp": "/thefunnies/andycapp/", "Archie": "/thefunnies/archie/", "ArcticCircle": "/thefunnies/arcticcircle/", "AskShagg": "/thefunnies/askshagg/", "BC": "/thefunnies/bc/", "BabyBlues": "/thefunnies/babyblues/", "BallardStreet": "/thefunnies/ballardstreet/", "BarneyAndClyde": "/thefunnies/barneyandclyde/", "BarneyGoogleAndSnuffySmith": "/thefunnies/barneygoogle/", "BeetleBailey": "/thefunnies/beetlebailey/", "Bizarro": "/thefunnies/bizarro/", "BleekerTheRechargeableDog": "/thefunnies/bleekertherechargeabledog/", "Blondie": "/thefunnies/blondie/", "Boondocks": "/thefunnies/boondocks/", "BrilliantMindofEdisonLee": "/thefunnies/brilliantmindofedisonlee/", "CafConLeche": "/thefunnies/cafeconleche/", "Candorville": "/thefunnies/candorville/", "Cathy": "/thefunnies/cathy/", "ChuckleBros": "/thefunnies/chucklebros/", "Crankshaft": "/thefunnies/crankshaft/", "CuldeSac": "/thefunnies/culdesac/", "Curtis": "/thefunnies/curtis/", "DaddysHome": "/thefunnies/daddyshome/", "DeFlocked": "/thefunnies/deflocked/", "DennistheMenace": "/thefunnies/dennisthemenace/", "DiamondLil": "/thefunnies/diamondlil/", "Dilbert": "/thefunnies/dilbert/", "DinetteSet": "/thefunnies/thedinetteset/", "DogEatDoug": "/thefunnies/dogeatdoug/", "DogsofCKennel": "/thefunnies/dogsofckennel/", "Doonesbury": "/thefunnies/doonesbury/", "Dustin": "/thefunnies/dustin/", "FamilyCircus": "/thefunnies/familycircus/", "FloAndFriends": "/thefunnies/floandfriends/", "ForHeavensSake": "/thefunnies/forheavenssake/", "FortKnox": "/thefunnies/fortknox/", "FreeRange": "/thefunnies/freerange/", "Garfield": "/thefunnies/garfield/", "GetFuzzy": "/thefunnies/getfuzzy/", "Heathcliff": "/thefunnies/heathcliff/", "HerbandJamaal": "/thefunnies/herbandjamaal/", "HiandLois": "/thefunnies/hiandlois/", "HomeAndAway": "/thefunnies/homeandaway/", "JerryKingCartoons": "/thefunnies/humorcartoon/", "LittleDogLost": "/thefunnies/littledoglost/", "Luann": "/thefunnies/luann/", "MallardFillmore": "/thefunnies/mallardfillmore/", "Marvin": "/thefunnies/marvin/", "MeaningofLila": "/thefunnies/meaningoflila/", "MikeDuJour": "/thefunnies/mikedujour/", "Momma": "/thefunnies/momma/", "MotherGooseAndGrimm": "/thefunnies/mothergooseandgrimm/", "Mutts": "/thefunnies/mutts/", "NestHeads": "/thefunnies/nestheads/", "NonSequitur": "/thefunnies/nonsequitur/", "OnaClaireDay": "/thefunnies/onaclaireday/", "OneBigHappy": "/thefunnies/onebighappy/", "Peanuts": "/thefunnies/peanuts/", "PearlsBeforeSwine": "/thefunnies/pearlsbeforeswine/", "Pickles": "/thefunnies/pickles/", "RedandRover": "/thefunnies/redandrover/", "ReplyAll": "/thefunnies/replyall/", "RhymeswithOrange": "/thefunnies/rhymeswithorange/", "Rubes": "/thefunnies/rubes/", "RudyPark": "/thefunnies/rudypark/", "Rugrats": "/thefunnies/rugrats/", "ScaryGary": "/thefunnies/scarygary/", "SpeedBump": "/thefunnies/speedbump/", "StrangeBrew": "/thefunnies/strangebrew/", "TakeItFromTheTinkersons": "/thefunnies/takeitfromthetinkersons/", "TheBarn": "/thefunnies/thebarn/", "TheLockhorns": "/thefunnies/thelockhorns/", "TheOtherCoast": "/thefunnies/theothercoast/", "TinasGroove": "/thefunnies/tinasgroove/", "WatchYourHead": "/thefunnies/watchyourhead/", "WeePals": "/thefunnies/weepals/", "WizardofId": "/thefunnies/wizardofid/", "WorkingitOut": "/thefunnies/workingitout/", "Wumo": "/thefunnies/wumo/", "ZackHill": "/thefunnies/zackhill/", "Zits": "/thefunnies/zits/"} \ No newline at end of file +{"9ChickweedLane": "/thefunnies/ninechickweedlane/", "Agnes": "/thefunnies/agnes/", "AndyCapp": "/thefunnies/andycapp/", "Archie": "/thefunnies/archie/", "ArcticCircle": "/thefunnies/arcticcircle/", "AskShagg": "/thefunnies/askshagg/", "BC": "/thefunnies/bc/", "BabyBlues": "/thefunnies/babyblues/", "BallardStreet": "/thefunnies/ballardstreet/", "BarneyAndClyde": "/thefunnies/barneyandclyde/", "BarneyGoogleAndSnuffySmith": "/thefunnies/barneygoogle/", "BeetleBailey": "/thefunnies/beetlebailey/", "Bizarro": "/thefunnies/bizarro/", "BleekerTheRechargeableDog": "/thefunnies/bleekertherechargeabledog/", "Blondie": "/thefunnies/blondie/", "Boondocks": "/thefunnies/boondocks/", "BrilliantMindofEdisonLee": "/thefunnies/brilliantmindofedisonlee/", "Candorville": "/thefunnies/candorville/", "Cathy": "/thefunnies/cathy/", "ChuckleBros": "/thefunnies/chucklebros/", "Crankshaft": "/thefunnies/crankshaft/", "CuldeSac": "/thefunnies/culdesac/", "Curtis": "/thefunnies/curtis/", "DaddysHome": "/thefunnies/daddyshome/", "DeFlocked": "/thefunnies/deflocked/", "DennistheMenace": "/thefunnies/dennisthemenace/", "DiamondLil": "/thefunnies/diamondlil/", "Dilbert": "/thefunnies/dilbert/", "DinetteSet": "/thefunnies/thedinetteset/", "DogEatDoug": "/thefunnies/dogeatdoug/", "DogsofCKennel": "/thefunnies/dogsofckennel/", "Doonesbury": "/thefunnies/doonesbury/", "Dustin": "/thefunnies/dustin/", "FamilyCircus": "/thefunnies/familycircus/", "FloAndFriends": "/thefunnies/floandfriends/", "ForHeavensSake": "/thefunnies/forheavenssake/", "FortKnox": "/thefunnies/fortknox/", "FreeRange": "/thefunnies/freerange/", "Garfield": "/thefunnies/garfield/", "GetFuzzy": "/thefunnies/getfuzzy/", "Heathcliff": "/thefunnies/heathcliff/", "HerbandJamaal": "/thefunnies/herbandjamaal/", "HiandLois": "/thefunnies/hiandlois/", "HomeAndAway": "/thefunnies/homeandaway/", "IntelligentLife": "/thefunnies/intelligentlife/", "JerryKingCartoons": "/thefunnies/humorcartoon/", "LittleDogLost": "/thefunnies/littledoglost/", "LongStoryShort": "/thefunnies/longstoryshort/", "LooseParts": "/thefunnies/looseparts/", "Luann": "/thefunnies/luann/", "MallardFillmore": "/thefunnies/mallardfillmore/", "Marvin": "/thefunnies/marvin/", "MeaningofLila": "/thefunnies/meaningoflila/", "MikeDuJour": "/thefunnies/mikedujour/", "Momma": "/thefunnies/momma/", "MotherGooseAndGrimm": "/thefunnies/mothergooseandgrimm/", "Mutts": "/thefunnies/mutts/", "NestHeads": "/thefunnies/nestheads/", "NonSequitur": "/thefunnies/nonsequitur/", "OneBigHappy": "/thefunnies/onebighappy/", "Peanuts": "/thefunnies/peanuts/", "PearlsBeforeSwine": "/thefunnies/pearlsbeforeswine/", "Pickles": "/thefunnies/pickles/", "RedandRover": "/thefunnies/redandrover/", "ReplyAll": "/thefunnies/replyall/", "RhymeswithOrange": "/thefunnies/rhymeswithorange/", "Rubes": "/thefunnies/rubes/", "RudyPark": "/thefunnies/rudypark/", "Rugrats": "/thefunnies/rugrats/", "ScaryGary": "/thefunnies/scarygary/", "SpeedBump": "/thefunnies/speedbump/", "StrangeBrew": "/thefunnies/strangebrew/", "TakeItFromTheTinkersons": "/thefunnies/takeitfromthetinkersons/", "TheBarn": "/thefunnies/thebarn/", "TheLockhorns": "/thefunnies/thelockhorns/", "TheOtherCoast": "/thefunnies/theothercoast/", "TinasGroove": "/thefunnies/tinasgroove/", "WeePals": "/thefunnies/weepals/", "WizardofId": "/thefunnies/wizardofid/", "WorkingitOut": "/thefunnies/workingitout/", "Wumo": "/thefunnies/wumo/", "ZackHill": "/thefunnies/zackhill/", "Zits": "/thefunnies/zits/"} \ No newline at end of file diff --git a/scripts/arcamax.py b/scripts/arcamax.py index 88271ee8e..e60935c60 100755 --- a/scripts/arcamax.py +++ b/scripts/arcamax.py @@ -16,7 +16,7 @@ from scriptutil import contains_case_insensitive, capfirst, save_result, load_re json_file = __file__.replace(".py", ".json") -url_matcher = re.compile(r'
  • ([^<]+)') +url_matcher = re.compile(r'
  • ([^<]+)') # names of comics to exclude exclude_comics = [ @@ -28,7 +28,7 @@ def handle_url(url, session, res): """Parse one search result page.""" print("Parsing", url, file=sys.stderr) try: - data, baseUrl = getPageContent(url, session) + data = getPageContent(url, session) except IOError as msg: print("ERROR:", msg, file=sys.stderr) return