Fix and update ArcaMax (fixes #8).
This commit is contained in:
parent
d89d4e4d6a
commit
f0831a1f0f
3 changed files with 7 additions and 7 deletions
|
@ -8,7 +8,7 @@ from ..scraper import make_scraper
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
|
|
||||||
|
|
||||||
_imageSearch = compile(tagre("a", "href", r'(/newspics/[^"]+)', after='zoom'))
|
_imageSearch = compile(tagre("img", "data-zoom-image", r'(/newspics/[^"]+)'))
|
||||||
_prevSearch = compile(tagre("a", "href", r'(/[^"]+)', before='prev'))
|
_prevSearch = compile(tagre("a", "href", r'(/[^"]+)', before='prev'))
|
||||||
|
|
||||||
def add(name, shortname):
|
def add(name, shortname):
|
||||||
|
@ -43,7 +43,6 @@ add('BleekerTheRechargeableDog', '/thefunnies/bleekertherechargeabledog/')
|
||||||
add('Blondie', '/thefunnies/blondie/')
|
add('Blondie', '/thefunnies/blondie/')
|
||||||
add('Boondocks', '/thefunnies/boondocks/')
|
add('Boondocks', '/thefunnies/boondocks/')
|
||||||
add('BrilliantMindofEdisonLee', '/thefunnies/brilliantmindofedisonlee/')
|
add('BrilliantMindofEdisonLee', '/thefunnies/brilliantmindofedisonlee/')
|
||||||
#add('CafConLeche', '/thefunnies/cafeconleche/')
|
|
||||||
#add('Candorville', '/thefunnies/candorville/')
|
#add('Candorville', '/thefunnies/candorville/')
|
||||||
#add('Cathy', '/thefunnies/cathy/')
|
#add('Cathy', '/thefunnies/cathy/')
|
||||||
#add('ChuckleBros', '/thefunnies/chucklebros/')
|
#add('ChuckleBros', '/thefunnies/chucklebros/')
|
||||||
|
@ -71,8 +70,11 @@ add('FamilyCircus', '/thefunnies/familycircus/')
|
||||||
#add('HerbandJamaal', '/thefunnies/herbandjamaal/')
|
#add('HerbandJamaal', '/thefunnies/herbandjamaal/')
|
||||||
add('HiandLois', '/thefunnies/hiandlois/')
|
add('HiandLois', '/thefunnies/hiandlois/')
|
||||||
#add('HomeAndAway', '/thefunnies/homeandaway/')
|
#add('HomeAndAway', '/thefunnies/homeandaway/')
|
||||||
|
add('IntelligentLife', '/thefunnies/intelligentlife/')
|
||||||
add('JerryKingCartoons', '/thefunnies/humorcartoon/')
|
add('JerryKingCartoons', '/thefunnies/humorcartoon/')
|
||||||
#add('LittleDogLost', '/thefunnies/littledoglost/')
|
#add('LittleDogLost', '/thefunnies/littledoglost/')
|
||||||
|
#add('LongStoryShort', '/thefunnies/longstoryshort/')
|
||||||
|
#add('LooseParts', '/thefunnies/looseparts/')
|
||||||
#add('Luann', '/thefunnies/luann/')
|
#add('Luann', '/thefunnies/luann/')
|
||||||
add('MallardFillmore', '/thefunnies/mallardfillmore/')
|
add('MallardFillmore', '/thefunnies/mallardfillmore/')
|
||||||
add('Marvin', '/thefunnies/marvin/')
|
add('Marvin', '/thefunnies/marvin/')
|
||||||
|
@ -83,7 +85,6 @@ add('MotherGooseAndGrimm', '/thefunnies/mothergooseandgrimm/')
|
||||||
add('Mutts', '/thefunnies/mutts/')
|
add('Mutts', '/thefunnies/mutts/')
|
||||||
#add('NestHeads', '/thefunnies/nestheads/')
|
#add('NestHeads', '/thefunnies/nestheads/')
|
||||||
#add('NonSequitur', '/thefunnies/nonsequitur/')
|
#add('NonSequitur', '/thefunnies/nonsequitur/')
|
||||||
#add('OnaClaireDay', '/thefunnies/onaclaireday/')
|
|
||||||
#add('OneBigHappy', '/thefunnies/onebighappy/')
|
#add('OneBigHappy', '/thefunnies/onebighappy/')
|
||||||
#add('Peanuts', '/thefunnies/peanuts/')
|
#add('Peanuts', '/thefunnies/peanuts/')
|
||||||
#add('PearlsBeforeSwine', '/thefunnies/pearlsbeforeswine/')
|
#add('PearlsBeforeSwine', '/thefunnies/pearlsbeforeswine/')
|
||||||
|
@ -102,7 +103,6 @@ add('TakeItFromTheTinkersons', '/thefunnies/takeitfromthetinkersons/')
|
||||||
add('TheLockhorns', '/thefunnies/thelockhorns/')
|
add('TheLockhorns', '/thefunnies/thelockhorns/')
|
||||||
#add('TheOtherCoast', '/thefunnies/theothercoast/')
|
#add('TheOtherCoast', '/thefunnies/theothercoast/')
|
||||||
add('TinasGroove', '/thefunnies/tinasgroove/')
|
add('TinasGroove', '/thefunnies/tinasgroove/')
|
||||||
#add('WatchYourHead', '/thefunnies/watchyourhead/')
|
|
||||||
#add('WeePals', '/thefunnies/weepals/')
|
#add('WeePals', '/thefunnies/weepals/')
|
||||||
#add('WizardofId', '/thefunnies/wizardofid/')
|
#add('WizardofId', '/thefunnies/wizardofid/')
|
||||||
#add('WorkingitOut', '/thefunnies/workingitout/')
|
#add('WorkingitOut', '/thefunnies/workingitout/')
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
{"9ChickweedLane": "/thefunnies/ninechickweedlane/", "Agnes": "/thefunnies/agnes/", "AndyCapp": "/thefunnies/andycapp/", "Archie": "/thefunnies/archie/", "ArcticCircle": "/thefunnies/arcticcircle/", "AskShagg": "/thefunnies/askshagg/", "BC": "/thefunnies/bc/", "BabyBlues": "/thefunnies/babyblues/", "BallardStreet": "/thefunnies/ballardstreet/", "BarneyAndClyde": "/thefunnies/barneyandclyde/", "BarneyGoogleAndSnuffySmith": "/thefunnies/barneygoogle/", "BeetleBailey": "/thefunnies/beetlebailey/", "Bizarro": "/thefunnies/bizarro/", "BleekerTheRechargeableDog": "/thefunnies/bleekertherechargeabledog/", "Blondie": "/thefunnies/blondie/", "Boondocks": "/thefunnies/boondocks/", "BrilliantMindofEdisonLee": "/thefunnies/brilliantmindofedisonlee/", "CafConLeche": "/thefunnies/cafeconleche/", "Candorville": "/thefunnies/candorville/", "Cathy": "/thefunnies/cathy/", "ChuckleBros": "/thefunnies/chucklebros/", "Crankshaft": "/thefunnies/crankshaft/", "CuldeSac": "/thefunnies/culdesac/", "Curtis": "/thefunnies/curtis/", "DaddysHome": "/thefunnies/daddyshome/", "DeFlocked": "/thefunnies/deflocked/", "DennistheMenace": "/thefunnies/dennisthemenace/", "DiamondLil": "/thefunnies/diamondlil/", "Dilbert": "/thefunnies/dilbert/", "DinetteSet": "/thefunnies/thedinetteset/", "DogEatDoug": "/thefunnies/dogeatdoug/", "DogsofCKennel": "/thefunnies/dogsofckennel/", "Doonesbury": "/thefunnies/doonesbury/", "Dustin": "/thefunnies/dustin/", "FamilyCircus": "/thefunnies/familycircus/", "FloAndFriends": "/thefunnies/floandfriends/", "ForHeavensSake": "/thefunnies/forheavenssake/", "FortKnox": "/thefunnies/fortknox/", "FreeRange": "/thefunnies/freerange/", "Garfield": "/thefunnies/garfield/", "GetFuzzy": "/thefunnies/getfuzzy/", "Heathcliff": "/thefunnies/heathcliff/", "HerbandJamaal": "/thefunnies/herbandjamaal/", "HiandLois": "/thefunnies/hiandlois/", "HomeAndAway": "/thefunnies/homeandaway/", "JerryKingCartoons": "/thefunnies/humorcartoon/", "LittleDogLost": "/thefunnies/littledoglost/", "Luann": "/thefunnies/luann/", "MallardFillmore": "/thefunnies/mallardfillmore/", "Marvin": "/thefunnies/marvin/", "MeaningofLila": "/thefunnies/meaningoflila/", "MikeDuJour": "/thefunnies/mikedujour/", "Momma": "/thefunnies/momma/", "MotherGooseAndGrimm": "/thefunnies/mothergooseandgrimm/", "Mutts": "/thefunnies/mutts/", "NestHeads": "/thefunnies/nestheads/", "NonSequitur": "/thefunnies/nonsequitur/", "OnaClaireDay": "/thefunnies/onaclaireday/", "OneBigHappy": "/thefunnies/onebighappy/", "Peanuts": "/thefunnies/peanuts/", "PearlsBeforeSwine": "/thefunnies/pearlsbeforeswine/", "Pickles": "/thefunnies/pickles/", "RedandRover": "/thefunnies/redandrover/", "ReplyAll": "/thefunnies/replyall/", "RhymeswithOrange": "/thefunnies/rhymeswithorange/", "Rubes": "/thefunnies/rubes/", "RudyPark": "/thefunnies/rudypark/", "Rugrats": "/thefunnies/rugrats/", "ScaryGary": "/thefunnies/scarygary/", "SpeedBump": "/thefunnies/speedbump/", "StrangeBrew": "/thefunnies/strangebrew/", "TakeItFromTheTinkersons": "/thefunnies/takeitfromthetinkersons/", "TheBarn": "/thefunnies/thebarn/", "TheLockhorns": "/thefunnies/thelockhorns/", "TheOtherCoast": "/thefunnies/theothercoast/", "TinasGroove": "/thefunnies/tinasgroove/", "WatchYourHead": "/thefunnies/watchyourhead/", "WeePals": "/thefunnies/weepals/", "WizardofId": "/thefunnies/wizardofid/", "WorkingitOut": "/thefunnies/workingitout/", "Wumo": "/thefunnies/wumo/", "ZackHill": "/thefunnies/zackhill/", "Zits": "/thefunnies/zits/"}
|
{"9ChickweedLane": "/thefunnies/ninechickweedlane/", "Agnes": "/thefunnies/agnes/", "AndyCapp": "/thefunnies/andycapp/", "Archie": "/thefunnies/archie/", "ArcticCircle": "/thefunnies/arcticcircle/", "AskShagg": "/thefunnies/askshagg/", "BC": "/thefunnies/bc/", "BabyBlues": "/thefunnies/babyblues/", "BallardStreet": "/thefunnies/ballardstreet/", "BarneyAndClyde": "/thefunnies/barneyandclyde/", "BarneyGoogleAndSnuffySmith": "/thefunnies/barneygoogle/", "BeetleBailey": "/thefunnies/beetlebailey/", "Bizarro": "/thefunnies/bizarro/", "BleekerTheRechargeableDog": "/thefunnies/bleekertherechargeabledog/", "Blondie": "/thefunnies/blondie/", "Boondocks": "/thefunnies/boondocks/", "BrilliantMindofEdisonLee": "/thefunnies/brilliantmindofedisonlee/", "Candorville": "/thefunnies/candorville/", "Cathy": "/thefunnies/cathy/", "ChuckleBros": "/thefunnies/chucklebros/", "Crankshaft": "/thefunnies/crankshaft/", "CuldeSac": "/thefunnies/culdesac/", "Curtis": "/thefunnies/curtis/", "DaddysHome": "/thefunnies/daddyshome/", "DeFlocked": "/thefunnies/deflocked/", "DennistheMenace": "/thefunnies/dennisthemenace/", "DiamondLil": "/thefunnies/diamondlil/", "Dilbert": "/thefunnies/dilbert/", "DinetteSet": "/thefunnies/thedinetteset/", "DogEatDoug": "/thefunnies/dogeatdoug/", "DogsofCKennel": "/thefunnies/dogsofckennel/", "Doonesbury": "/thefunnies/doonesbury/", "Dustin": "/thefunnies/dustin/", "FamilyCircus": "/thefunnies/familycircus/", "FloAndFriends": "/thefunnies/floandfriends/", "ForHeavensSake": "/thefunnies/forheavenssake/", "FortKnox": "/thefunnies/fortknox/", "FreeRange": "/thefunnies/freerange/", "Garfield": "/thefunnies/garfield/", "GetFuzzy": "/thefunnies/getfuzzy/", "Heathcliff": "/thefunnies/heathcliff/", "HerbandJamaal": "/thefunnies/herbandjamaal/", "HiandLois": "/thefunnies/hiandlois/", "HomeAndAway": "/thefunnies/homeandaway/", "IntelligentLife": "/thefunnies/intelligentlife/", "JerryKingCartoons": "/thefunnies/humorcartoon/", "LittleDogLost": "/thefunnies/littledoglost/", "LongStoryShort": "/thefunnies/longstoryshort/", "LooseParts": "/thefunnies/looseparts/", "Luann": "/thefunnies/luann/", "MallardFillmore": "/thefunnies/mallardfillmore/", "Marvin": "/thefunnies/marvin/", "MeaningofLila": "/thefunnies/meaningoflila/", "MikeDuJour": "/thefunnies/mikedujour/", "Momma": "/thefunnies/momma/", "MotherGooseAndGrimm": "/thefunnies/mothergooseandgrimm/", "Mutts": "/thefunnies/mutts/", "NestHeads": "/thefunnies/nestheads/", "NonSequitur": "/thefunnies/nonsequitur/", "OneBigHappy": "/thefunnies/onebighappy/", "Peanuts": "/thefunnies/peanuts/", "PearlsBeforeSwine": "/thefunnies/pearlsbeforeswine/", "Pickles": "/thefunnies/pickles/", "RedandRover": "/thefunnies/redandrover/", "ReplyAll": "/thefunnies/replyall/", "RhymeswithOrange": "/thefunnies/rhymeswithorange/", "Rubes": "/thefunnies/rubes/", "RudyPark": "/thefunnies/rudypark/", "Rugrats": "/thefunnies/rugrats/", "ScaryGary": "/thefunnies/scarygary/", "SpeedBump": "/thefunnies/speedbump/", "StrangeBrew": "/thefunnies/strangebrew/", "TakeItFromTheTinkersons": "/thefunnies/takeitfromthetinkersons/", "TheBarn": "/thefunnies/thebarn/", "TheLockhorns": "/thefunnies/thelockhorns/", "TheOtherCoast": "/thefunnies/theothercoast/", "TinasGroove": "/thefunnies/tinasgroove/", "WeePals": "/thefunnies/weepals/", "WizardofId": "/thefunnies/wizardofid/", "WorkingitOut": "/thefunnies/workingitout/", "Wumo": "/thefunnies/wumo/", "ZackHill": "/thefunnies/zackhill/", "Zits": "/thefunnies/zits/"}
|
|
@ -16,7 +16,7 @@ from scriptutil import contains_case_insensitive, capfirst, save_result, load_re
|
||||||
|
|
||||||
json_file = __file__.replace(".py", ".json")
|
json_file = __file__.replace(".py", ".json")
|
||||||
|
|
||||||
url_matcher = re.compile(r'<li><b><a href="(/thefunnies/[^"]+)">([^<]+)</a>')
|
url_matcher = re.compile(r'<li><a href="(/thefunnies/[^"]+)">([^<]+)</a>')
|
||||||
|
|
||||||
# names of comics to exclude
|
# names of comics to exclude
|
||||||
exclude_comics = [
|
exclude_comics = [
|
||||||
|
@ -28,7 +28,7 @@ def handle_url(url, session, res):
|
||||||
"""Parse one search result page."""
|
"""Parse one search result page."""
|
||||||
print("Parsing", url, file=sys.stderr)
|
print("Parsing", url, file=sys.stderr)
|
||||||
try:
|
try:
|
||||||
data, baseUrl = getPageContent(url, session)
|
data = getPageContent(url, session)
|
||||||
except IOError as msg:
|
except IOError as msg:
|
||||||
print("ERROR:", msg, file=sys.stderr)
|
print("ERROR:", msg, file=sys.stderr)
|
||||||
return
|
return
|
||||||
|
|
Loading…
Reference in a new issue