Fix some comics.
This commit is contained in:
parent
7e91c83753
commit
4528894c05
19 changed files with 583 additions and 381 deletions
|
@ -2,21 +2,21 @@
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012 Bastian Kleineidam
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from re import compile
|
||||||
from ..util import tagre
|
from ..scraper import make_scraper
|
||||||
|
from ..util import tagre, asciify
|
||||||
|
|
||||||
def creators(name, shortname):
|
def add(name, shortname):
|
||||||
baseUrl = 'http://www.creators.com/comics/'
|
baseUrl = 'http://www.creators.com/comics/'
|
||||||
return type('Creators_%s' % name,
|
classname = 'Creators_%s' % asciify(name)
|
||||||
(_BasicScraper,),
|
globals()[classname] = make_scraper(classname,
|
||||||
dict(
|
name = 'Creators/' + name,
|
||||||
name='Creators/' + name,
|
latestUrl = baseUrl + shortname + '.html',
|
||||||
latestUrl='%s%s.html' % (baseUrl, shortname),
|
stripUrl = baseUrl + shortname + '/%s.html',
|
||||||
stripUrl='%s%s/%%s.html' % (baseUrl, shortname),
|
imageSearch = compile(tagre("img", "src", r'(/comics/\d+/[^"]+)')),
|
||||||
imageSearch=compile(tagre("img", "src", r'(/comics/\d+/[^"]+)')),
|
prevSearch = compile(tagre("a", "href", r'(/comics/%s/\d+\.html)' % shortname) +
|
||||||
prevSearch=compile(tagre("a", "href", r'(/comics/%s/\d+\.html)' % shortname) +
|
|
||||||
tagre("img", "src", r'/img_comics/arrow_l\.gif')),
|
tagre("img", "src", r'/img_comics/arrow_l\.gif')),
|
||||||
help='Index format: n')
|
help = 'Index format: n',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -52,9 +52,9 @@ comics = {
|
||||||
'Momma': 'momma',
|
'Momma': 'momma',
|
||||||
'NestHeads': 'nest-heads',
|
'NestHeads': 'nest-heads',
|
||||||
'OneBigHappy': 'one-big-happy',
|
'OneBigHappy': 'one-big-happy',
|
||||||
'OnAClaireDay': 'on-a-clair-day',
|
'OnAClaireDay': 'on-a-claire-day',
|
||||||
'TheOtherCoast': 'other-coast',
|
'TheOtherCoast': 'the-other-coast',
|
||||||
'TheQuigmans': 'quigmans',
|
'TheQuigmans': 'the-quigmans',
|
||||||
'Rubes': 'rubes',
|
'Rubes': 'rubes',
|
||||||
'Rugrats': 'rugrats',
|
'Rugrats': 'rugrats',
|
||||||
'ScaryGary': 'scary-gary',
|
'ScaryGary': 'scary-gary',
|
||||||
|
@ -78,4 +78,4 @@ comics = {
|
||||||
}
|
}
|
||||||
|
|
||||||
for name, shortname in comics.items():
|
for name, shortname in comics.items():
|
||||||
globals()[name] = creators(name, shortname)
|
add(name, shortname)
|
||||||
|
|
|
@ -142,3 +142,5 @@ class DresdenCodak(_BasicScraper):
|
||||||
prevSearch = compile(r'<a href="http://dresdencodak.com(/.*?)"><img src=http://dresdencodak.com/m_prev.png>')
|
prevSearch = compile(r'<a href="http://dresdencodak.com(/.*?)"><img src=http://dresdencodak.com/m_prev.png>')
|
||||||
starter = indirectStarter('http://dresdencodak.com/', compile(r'<div id="preview"><a href="http://dresdencodak.com/(\d+/\d+/\d+/.*?)">'))
|
starter = indirectStarter('http://dresdencodak.com/', compile(r'<div id="preview"><a href="http://dresdencodak.com/(\d+/\d+/\d+/.*?)">'))
|
||||||
|
|
||||||
|
|
||||||
|
# XXX dilbert.com
|
||||||
|
|
|
@ -2,28 +2,27 @@
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012 Bastian Kleineidam
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
|
||||||
from re import compile, IGNORECASE
|
from re import compile
|
||||||
|
from ..scraper import make_scraper
|
||||||
from ..scraper import _BasicScraper
|
|
||||||
from ..helpers import bounceStarter, queryNamer
|
from ..helpers import bounceStarter, queryNamer
|
||||||
|
from ..util import tagre
|
||||||
|
|
||||||
|
|
||||||
def drunkDuck(shortName):
|
def add(name):
|
||||||
linkSearch = r"<a href='(/[^/]*/index\.php\?p=\d+)' title='The %s page!'>"
|
classname = 'DrunkDuck_%s' % name
|
||||||
return type('DrunkDuck_%s' % shortName,
|
url = 'http://www.drunkduck.com/%s/' % name
|
||||||
(_BasicScraper,),
|
linkSearch = tagre("a", "href", r"(/[^/]*/index\.php\?p=\d+)", quote="'", after="The %s page")
|
||||||
dict(
|
globals()[classname] = make_scraper(classname,
|
||||||
name='DrunkDuck/' + shortName,
|
name = 'DrunkDuck/' + name,
|
||||||
stripUrl='index.php?p=%s' % (shortName,),
|
starter = bounceStarter(url, compile(linkSearch % 'next')),
|
||||||
imageSearch=compile(r"<img src='(http://[a-z0-9]*.drunkduck.com/[^/]*/pages/[^'/]+)'>", IGNORECASE),
|
stripUrl = url + 'index.php?p=%s' % name,
|
||||||
prevSearch=compile(linkSearch % ('previous',), IGNORECASE),
|
imageSearch = compile(tagre("img", "src", r"(http://[a-z0-9]*\.drunkduck\.com/[^/]*/pages/[^'/]+)", quote="'")),
|
||||||
help='Index format: n (unpadded)',
|
prevSearch= compile(linkSearch % 'previous'),
|
||||||
namer=queryNamer('p', usePageUrl=True),
|
help = 'Index format: n (unpadded)',
|
||||||
starter=bounceStarter('http://www.drunkduck.com/%s/' % (shortName,), compile(linkSearch % ('next',), IGNORECASE))
|
namer = queryNamer('p', usePageUrl=True),
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
duckComics = [
|
comics = (
|
||||||
'0_Opposites_attract_0',
|
'0_Opposites_attract_0',
|
||||||
'0_eight',
|
'0_eight',
|
||||||
'101_Ways_to_Drive_a_Maren_Insane',
|
'101_Ways_to_Drive_a_Maren_Insane',
|
||||||
|
@ -2275,7 +2274,7 @@ duckComics = [
|
||||||
'yay_ponys',
|
'yay_ponys',
|
||||||
'yoshi_freaks_real_life',
|
'yoshi_freaks_real_life',
|
||||||
'zuchini',
|
'zuchini',
|
||||||
]
|
)
|
||||||
|
|
||||||
for shortName in duckComics:
|
for name in comics:
|
||||||
globals()[shortName] = drunkDuck(shortName)
|
add(name)
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: iso-8859-1 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012 Bastian Kleineidam
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
from ..scraper import _BasicScraper
|
||||||
|
|
||||||
|
|
||||||
def fallenangel(name, shortname):
|
def fallenangel(name, shortname):
|
||||||
pass # XXX
|
pass # XXX
|
||||||
|
|
309
dosagelib/plugins/gocomics.py
Normal file
309
dosagelib/plugins/gocomics.py
Normal file
|
@ -0,0 +1,309 @@
|
||||||
|
# -*- coding: iso-8859-1 -*-
|
||||||
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
|
||||||
|
from re import compile
|
||||||
|
from ..scraper import make_scraper
|
||||||
|
from ..util import tagre, asciify
|
||||||
|
|
||||||
|
def add(name, repl=''):
|
||||||
|
baseUrl = 'http://www.gocomics.com/'
|
||||||
|
comicname = asciify(name)
|
||||||
|
shortname = name.lower().replace(' ', repl)
|
||||||
|
classname = 'GoComics_%s' % comicname
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
prefix, year, month, day = pageUrl.split('/', 3)
|
||||||
|
return "%s_%s%s%s.gif" % (shortname, year, month, day)
|
||||||
|
|
||||||
|
globals()[classname] = make_scraper(classname,
|
||||||
|
latestUrl=baseUrl + shortname,
|
||||||
|
name='GoComics/' + comicname,
|
||||||
|
stripUrl=baseUrl + shortname + '/%s',
|
||||||
|
imageSearch=compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[0-9a-f]+)')),
|
||||||
|
prevSearch=compile(tagre("a", "href", r'(/[^"]+/\d+/\d+/\d+)', after="prev")),
|
||||||
|
help='Index format: yyyy/mm/dd',
|
||||||
|
namer=namer,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# http://www.gocomics.com/features
|
||||||
|
# note that comics from creators.com are not repeated here
|
||||||
|
add('2 Cows and a Chicken')
|
||||||
|
add('9 Chickweed Lane')
|
||||||
|
add('9 to 5')
|
||||||
|
add('The Academia Waltz')
|
||||||
|
add('Adam at Home')
|
||||||
|
add('Agnes')
|
||||||
|
add('Alley Oop', repl='-')
|
||||||
|
add('Andertoons')
|
||||||
|
add('Andy Capp')
|
||||||
|
add('Angry Little Girls', repl='-')
|
||||||
|
add('Animal Crackers')
|
||||||
|
add('Annie')
|
||||||
|
add('The Argyle Sweater')
|
||||||
|
add('Arlo and Janis')
|
||||||
|
add('Ask Shagg')
|
||||||
|
add('BC')
|
||||||
|
add('Back in the Day')
|
||||||
|
add('Bad Reporter')
|
||||||
|
add('Baldo')
|
||||||
|
add('Ballard Street')
|
||||||
|
add('Banana Triangle', repl='-')
|
||||||
|
add('Barkeater Lake')
|
||||||
|
add('The Barn')
|
||||||
|
add('Barney and Clyde')
|
||||||
|
add('Basic Instructions')
|
||||||
|
add('Beardo')
|
||||||
|
add('Ben')
|
||||||
|
add('Berger and Wyse', repl='-')
|
||||||
|
add('Betty')
|
||||||
|
add('Bewley')
|
||||||
|
add('Biff and Riley', repl='-')
|
||||||
|
add('Big Nate')
|
||||||
|
add('The Big Picture')
|
||||||
|
add('Big Top')
|
||||||
|
add('Biographic')
|
||||||
|
add('Birdbrains')
|
||||||
|
add('Bliss')
|
||||||
|
add('Bloom County')
|
||||||
|
add('Bo Nanas')
|
||||||
|
add('Bob the Squirrel')
|
||||||
|
add('Boomerangs')
|
||||||
|
add('The Boondocks')
|
||||||
|
add('The Born Loser')
|
||||||
|
add('Bottomliners')
|
||||||
|
add('Bound and Gagged')
|
||||||
|
add('Break of Day')
|
||||||
|
add('Brevity')
|
||||||
|
add('Brewster Rockit')
|
||||||
|
add('Broom Hilda')
|
||||||
|
add('The Buckets')
|
||||||
|
add('Buni')
|
||||||
|
add('Cafe con Leche')
|
||||||
|
add('Calvin and Hobbes')
|
||||||
|
add('Candorville')
|
||||||
|
add('Cathy')
|
||||||
|
add('Cest la Vie')
|
||||||
|
add('Cheap Thrills Cuisine', repl='-')
|
||||||
|
add('Chuckle Bros')
|
||||||
|
add('Citizen Dog')
|
||||||
|
add('The City')
|
||||||
|
add('Cleats')
|
||||||
|
add('Close to Home')
|
||||||
|
add('Committed')
|
||||||
|
add('Compu-toon')
|
||||||
|
add('Cornered')
|
||||||
|
add('Cow and Boy')
|
||||||
|
add('CowTown')
|
||||||
|
add('Crumb')
|
||||||
|
add('Cul de Sac')
|
||||||
|
add('Daddys Home')
|
||||||
|
add('Dark Side of the Horse')
|
||||||
|
add('Deep Cover')
|
||||||
|
add('Diamond Lil')
|
||||||
|
add('Dick Tracy')
|
||||||
|
add('The Dinette Set')
|
||||||
|
add('Dixie Drive', repl='-')
|
||||||
|
add('Dog Eat Doug')
|
||||||
|
add('Dogs of C Kennel')
|
||||||
|
add('Domestic Abuse')
|
||||||
|
add('Doonesbury')
|
||||||
|
add('The Doozies')
|
||||||
|
add('Drabble')
|
||||||
|
add('DudeDude')
|
||||||
|
add('The Duplex')
|
||||||
|
add('Eek')
|
||||||
|
add('The Elderberries')
|
||||||
|
add('Endtown')
|
||||||
|
add('Eric the Circle', repl='-')
|
||||||
|
add('F Minus')
|
||||||
|
add('Family Tree')
|
||||||
|
add('Farcus')
|
||||||
|
add('Fat Cats', repl='-')
|
||||||
|
add('Flo and Friends')
|
||||||
|
add('The Flying McCoys')
|
||||||
|
add('Foolish Mortals', repl='-')
|
||||||
|
add('For Better or For Worse')
|
||||||
|
add('For Heavens Sake')
|
||||||
|
add('Fort Knox')
|
||||||
|
add('FoxTrot')
|
||||||
|
add('FoxTrot Classics')
|
||||||
|
add('Frank and Ernest')
|
||||||
|
add('Frazz')
|
||||||
|
add('Fred Basset')
|
||||||
|
add('Free Range')
|
||||||
|
add('Freshly Squeezed')
|
||||||
|
add('Frog Applause')
|
||||||
|
add('The Fusco Brothers')
|
||||||
|
add('Garfield')
|
||||||
|
add('Garfield Minus Garfield')
|
||||||
|
add('Gasoline Alley')
|
||||||
|
add('Geech')
|
||||||
|
add('Get a Life')
|
||||||
|
add('Get Fuzzy')
|
||||||
|
add('Gil Thorp')
|
||||||
|
add('Ginger Meggs')
|
||||||
|
add('Gor Dominical')
|
||||||
|
add('Graffiti')
|
||||||
|
add('Grand Avenue')
|
||||||
|
add('Gray Matters')
|
||||||
|
add('The Grizzwells')
|
||||||
|
add('Haiku Ewe')
|
||||||
|
add('Ham Shears')
|
||||||
|
add('Health Capsules')
|
||||||
|
add('Heart of the City')
|
||||||
|
add('Heathcliff')
|
||||||
|
add('Heavenly Nostrils')
|
||||||
|
add('Herb and Jamaal')
|
||||||
|
add('Herman')
|
||||||
|
add('Home and Away')
|
||||||
|
add('HUBRIS!')
|
||||||
|
add('The Humble Stumble')
|
||||||
|
add('Imagine This')
|
||||||
|
add('In the Bleachers')
|
||||||
|
add('In the Sticks')
|
||||||
|
add('Incidental Comics')
|
||||||
|
add('Ink Pen')
|
||||||
|
add('Inspector Dangers Crime Quiz')
|
||||||
|
add('Its All About You')
|
||||||
|
add('Janes World')
|
||||||
|
add('Jims Journal')
|
||||||
|
add('Joe Vanilla')
|
||||||
|
add('Jump Start')
|
||||||
|
add('The K Chronicles')
|
||||||
|
add('KidCity')
|
||||||
|
add('KidSpot')
|
||||||
|
add('Kit N Carlyle')
|
||||||
|
add('Kitchen Capers')
|
||||||
|
add('Kliban')
|
||||||
|
add('Klibans Cats')
|
||||||
|
add('The Knight Life')
|
||||||
|
add('La Cucaracha')
|
||||||
|
add('Last Kiss')
|
||||||
|
add('The LeftyBosco Picture Show')
|
||||||
|
add('Legend of Bill')
|
||||||
|
add('Liberty Meadows')
|
||||||
|
add('Lil Abner')
|
||||||
|
add('Lio')
|
||||||
|
add('Little Dog Lost')
|
||||||
|
add('Lola')
|
||||||
|
add('Loose Parts')
|
||||||
|
add('The Lost Bear')
|
||||||
|
add('Lost Side of Suburbia')
|
||||||
|
add('Love Is...')
|
||||||
|
add('Luann')
|
||||||
|
add('Lucky Cow')
|
||||||
|
add('Mac')
|
||||||
|
add('Magic in a Minute')
|
||||||
|
add('Maintaining')
|
||||||
|
add('Marias Day')
|
||||||
|
add('Marmaduke')
|
||||||
|
add('McArroni')
|
||||||
|
add('The Meaning of Lila')
|
||||||
|
add('Medium Large')
|
||||||
|
add('Meg Classics')
|
||||||
|
add('The Middletons')
|
||||||
|
add('Mike du Jour')
|
||||||
|
add('Minimum Security')
|
||||||
|
add('Moderately Confused')
|
||||||
|
add('Molly and the Bear')
|
||||||
|
add('Momma')
|
||||||
|
add('Monty')
|
||||||
|
add('Motley Classics')
|
||||||
|
add('Mr. Gigi and the Squid')
|
||||||
|
add('Mutt and Jeff')
|
||||||
|
add('My Cage')
|
||||||
|
add('MythTickle')
|
||||||
|
add('Nancy')
|
||||||
|
add('Nest Heads')
|
||||||
|
add('NEUROTICA')
|
||||||
|
add('New Adventures of Queen Victoria')
|
||||||
|
add('Non Sequitur')
|
||||||
|
add('The Norm Classics')
|
||||||
|
add('Nothing is Not Something')
|
||||||
|
add('Off the Mark')
|
||||||
|
add('Ollie and Quentin')
|
||||||
|
add('On A Claire Day')
|
||||||
|
add('One Big Happy')
|
||||||
|
add('Ordinary Bill')
|
||||||
|
add('The Other Coast')
|
||||||
|
add('Out of the Gene Pool Re-Runs')
|
||||||
|
add('Over the Hedge')
|
||||||
|
add('Overboard')
|
||||||
|
add('Oyster War')
|
||||||
|
add('PC and Pixel')
|
||||||
|
add('Peanuts')
|
||||||
|
add('Pearls Before Swine')
|
||||||
|
add('Pibgorn')
|
||||||
|
add('Pibgorn Sketches')
|
||||||
|
add('Pickles')
|
||||||
|
add('Pinkerton')
|
||||||
|
add('Pluggers')
|
||||||
|
add('Pooch Cafe')
|
||||||
|
add('PreTeena')
|
||||||
|
add('Prickly City')
|
||||||
|
add('Rabbits Against Magic')
|
||||||
|
add('Raising Duncan')
|
||||||
|
add('Real Life Adventures')
|
||||||
|
add('Reality Check')
|
||||||
|
add('Red and Rover')
|
||||||
|
add('Red Meat')
|
||||||
|
add('Reply All')
|
||||||
|
add('Rip Haywire')
|
||||||
|
add('Ripleys Believe It or Not')
|
||||||
|
add('Rose is Rose')
|
||||||
|
add('Rubes')
|
||||||
|
add('Rudy Park')
|
||||||
|
add('Savage Chickens')
|
||||||
|
add('Scary Gary')
|
||||||
|
add('Shirley and Son Classics')
|
||||||
|
add('Shoe')
|
||||||
|
add('Shoecabbage')
|
||||||
|
add('Shortcuts')
|
||||||
|
add('Skin Horse')
|
||||||
|
add('Skippy')
|
||||||
|
add('Slowpoke')
|
||||||
|
add('Soup to Nutz')
|
||||||
|
add('Speed Bump')
|
||||||
|
add('Spot the Frog')
|
||||||
|
add('Starslip')
|
||||||
|
add('Stone Soup')
|
||||||
|
add('Strange Brew')
|
||||||
|
add('The Sunshine Club')
|
||||||
|
add('Sylvia')
|
||||||
|
add('Tank McNamara')
|
||||||
|
add('Tarzan')
|
||||||
|
add('Ten Cats')
|
||||||
|
add('Tales of TerraTopia')
|
||||||
|
add('That is Priceless')
|
||||||
|
add('Thats Life')
|
||||||
|
add('Thatababy')
|
||||||
|
add('Thin Lines')
|
||||||
|
add('Tiny Sepuku')
|
||||||
|
add('TOBY')
|
||||||
|
add('Todays Dogg')
|
||||||
|
add('Tom the Dancing Bug')
|
||||||
|
add('Too Much Coffee Man')
|
||||||
|
add('Trivquiz')
|
||||||
|
add('Twaggies')
|
||||||
|
add('Uncle Arts Funland')
|
||||||
|
add('Unstrange Phenomena')
|
||||||
|
add('U.S. Acres')
|
||||||
|
add('Viivi and Wagner')
|
||||||
|
add('Watch Your Head')
|
||||||
|
add('Wee Pals')
|
||||||
|
add('Wizard of Id')
|
||||||
|
add('Working Daze')
|
||||||
|
add('Working It Out')
|
||||||
|
add('W.T. Duck')
|
||||||
|
add('Zack Hill')
|
||||||
|
add('Ziggy')
|
||||||
|
|
||||||
|
# http://www.gocomics.com/explore/editorial_list
|
||||||
|
# XXX
|
||||||
|
|
||||||
|
# http://www.gocomics.com/explore/sherpa_list
|
||||||
|
# XXX
|
||||||
|
|
|
@ -3,31 +3,29 @@
|
||||||
# Copyright (C) 2012 Bastian Kleineidam
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
|
||||||
from re import compile
|
from re import compile
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import make_scraper
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
|
|
||||||
|
|
||||||
def keenSpot(name, urls):
|
def add(name, urls):
|
||||||
|
classname = 'KeenSpot_%s' % name
|
||||||
if not isinstance(urls, tuple):
|
if not isinstance(urls, tuple):
|
||||||
baseUrl = latestUrl = urls
|
baseUrl = latestUrl = urls
|
||||||
else:
|
else:
|
||||||
baseUrl, latestUrl = urls
|
baseUrl, latestUrl = urls
|
||||||
|
|
||||||
return type('KeenSpot_%s' % name,
|
globals()[classname] = make_scraper(classname,
|
||||||
(_BasicScraper,),
|
name='KeenSpot/' + name,
|
||||||
dict(
|
latestUrl=latestUrl,
|
||||||
name='KeenSpot/' + name,
|
stripUrl=baseUrl + 'd/%s.html',
|
||||||
latestUrl=latestUrl,
|
imageSearch = compile(tagre("img", "src", r'([^"]*comics/[^"]+)')),
|
||||||
stripUrl=baseUrl + 'd/%s.html',
|
prevSearch = compile(tagre("a", "href", r'"([^"]*d/\d{8}\.html)') +
|
||||||
imageSearch = compile(tagre("img", "src", r'([^"]*comics/[^"]+)')),
|
'(?:<img[^>]+?(?:name="previous_day"|alt="Previous"|src="[^"]*back[^"]*")|Previous comic)'),
|
||||||
prevSearch = compile(tagre("a", "href", r'"([^"]*d/\d{8}\.html)') +
|
help = 'Index format: yyyymmdd',
|
||||||
'(?:<img[^>]+?(?:name="previous_day"|alt="Previous"|src="[^"]*back[^"]*")|Previous comic)'),
|
|
||||||
help = 'Index format: yyyymmdd',
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
keenspotComics = {
|
comics = {
|
||||||
'13thLabour': 'http://the13labour.comicgenesis.com/',
|
'13thLabour': 'http://the13labour.comicgenesis.com/',
|
||||||
'1StComing': 'http://toon.comicgenesis.com/',
|
'1StComing': 'http://toon.comicgenesis.com/',
|
||||||
'1StGradeArt': 'http://art.comicgenesis.com/',
|
'1StGradeArt': 'http://art.comicgenesis.com/',
|
||||||
|
@ -1520,7 +1518,15 @@ keenspotComics = {
|
||||||
'Zortic': 'http://www.zortic.com/',
|
'Zortic': 'http://www.zortic.com/',
|
||||||
'ZosKias': 'http://kojika.comicgenesis.com/',
|
'ZosKias': 'http://kojika.comicgenesis.com/',
|
||||||
'ZuraZura': 'http://zurazura.comicgenesis.com/',
|
'ZuraZura': 'http://zurazura.comicgenesis.com/',
|
||||||
}
|
}
|
||||||
|
|
||||||
for name, urls in keenspotComics.items():
|
for name, urls in comics.items():
|
||||||
globals()[name] = keenSpot(name, urls)
|
add(name, urls)
|
||||||
|
|
||||||
|
|
||||||
|
#class Yirmumah(_BasicScraper):
|
||||||
|
# #http://yirmumah.keenspot.com/
|
||||||
|
# stripUrl = latestUrl + '?date=%s'
|
||||||
|
# imageSearch = compile(r'<img src="(strips/\d{8}\..*?)"')
|
||||||
|
# prevSearch = compile(r'<a href="(\?date=\d{8})">.*Previous')
|
||||||
|
# help = 'Index format: yyyymmdd'
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012 Bastian Kleineidam
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
|
||||||
from re import compile, sub
|
from re import compile
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
from ..helpers import indirectStarter, _PHPScraper
|
from ..helpers import indirectStarter, _PHPScraper
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
|
@ -64,35 +64,6 @@ class Nukees(_BasicScraper):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def nuklearpower(name, shortname):
|
|
||||||
baseUrl = 'http://www.nuklearpower.com/'
|
|
||||||
latestUrl = "%s%s/" % (baseUrl, shortname)
|
|
||||||
classname = sub("[^0-9a-zA-Z_]", "", name)
|
|
||||||
|
|
||||||
globals()[classname] = type('NuklearPower_%s' % classname,
|
|
||||||
(_BasicScraper,),
|
|
||||||
dict(
|
|
||||||
name='NuklearPower/' + classname,
|
|
||||||
latestUrl = latestUrl,
|
|
||||||
stripUrl = latestUrl + '%s',
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.nuklearpower\.com/comics/[^"]+)')),
|
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "Previous"),
|
|
||||||
help = 'Index format: yyyy/mm/dd/name',
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
npstrips = {
|
|
||||||
'8BitTheater': '8-bit-theater',
|
|
||||||
'Warbot': 'warbot',
|
|
||||||
'HowIKilledYourMaster': 'hikym',
|
|
||||||
'AtomicRobo': 'atomic-robo',
|
|
||||||
}
|
|
||||||
|
|
||||||
for name, shortname in npstrips.items():
|
|
||||||
nuklearpower(name, shortname)
|
|
||||||
|
|
||||||
|
|
||||||
class NekoTheKitty(_PHPScraper):
|
class NekoTheKitty(_PHPScraper):
|
||||||
basePath = 'http://www.nekothekitty.net/cusp/'
|
basePath = 'http://www.nekothekitty.net/cusp/'
|
||||||
latestUrl = basePath
|
latestUrl = basePath
|
||||||
|
|
26
dosagelib/plugins/nuklearpower.py
Normal file
26
dosagelib/plugins/nuklearpower.py
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
# -*- coding: iso-8859-1 -*-
|
||||||
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
|
||||||
|
from re import compile
|
||||||
|
from ..scraper import make_scraper
|
||||||
|
from ..util import tagre
|
||||||
|
|
||||||
|
def add(name, shortname):
|
||||||
|
baseUrl = 'http://www.nuklearpower.com/' + shortname + '/'
|
||||||
|
classname = 'NuklearPower_%s' % name
|
||||||
|
|
||||||
|
globals()[classname] = make_scraper(classname,
|
||||||
|
name='NuklearPower/' + name,
|
||||||
|
latestUrl = baseUrl,
|
||||||
|
stripUrl = baseUrl + '%s',
|
||||||
|
imageSearch = compile(tagre("img", "src", r'(http://www\.nuklearpower\.com/comics/[^"]+)')),
|
||||||
|
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "Previous"),
|
||||||
|
help = 'Index format: yyyy/mm/dd/name',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
add('8BitTheater', '8-bit-theater')
|
||||||
|
add('Warbot', 'warbot')
|
||||||
|
add('HowIKilledYourMaster', 'hikym')
|
||||||
|
add('AtomicRobo', 'atomic-robo')
|
|
@ -2,48 +2,39 @@
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012 Bastian Kleineidam
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
from re import compile
|
from re import compile
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import make_scraper
|
||||||
from ..helpers import bounceStarter
|
from ..helpers import bounceStarter
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
|
|
||||||
|
|
||||||
def smackJeeves(names):
|
def add(name):
|
||||||
|
classname = 'SmackJeeves/' + name
|
||||||
# XXX mature content can be viewed directly with:
|
# XXX mature content can be viewed directly with:
|
||||||
# http://www.smackjeeves.com/mature.php?ref=<percent-encoded-url>
|
# http://www.smackjeeves.com/mature.php?ref=<percent-encoded-url>
|
||||||
class _SJScraper(_BasicScraper):
|
baseUrl = 'http://%s.smackjeeves.com/comics/' % name
|
||||||
stripUrl = property(lambda self: self.baseUrl + self.shortName)
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.smackjeeves\.com/images/uploaded/comics/[^"]*)'))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="< Previous"')
|
|
||||||
help = 'Index format: nnnn (some increasing number)'
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
return pageUrl.split('/')[-2]
|
return pageUrl.split('/')[-2]
|
||||||
|
|
||||||
|
globals()[classname] = make_scraper(classname,
|
||||||
|
starter=bounceStarter(baseUrl, compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="Next >"')),
|
||||||
|
stripUrl = baseUrl,
|
||||||
|
imageSearch = compile(tagre("img", "src", r'(http://www\.smackjeeves\.com/images/uploaded/comics/[^"]*)')),
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="< Previous"'),
|
||||||
|
help = 'Index format: nnnn (some increasing number)',
|
||||||
|
namer = namer,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def makeScraper(shortName):
|
add('20galaxies')
|
||||||
baseUrl = 'http://%s.smackjeeves.com/comics/' % shortName
|
add('axe13')
|
||||||
return type('SmackJeeves_%s' % shortName,
|
add('beartholomew')
|
||||||
(_SJScraper,),
|
add('bliss')
|
||||||
dict(
|
add('durian')
|
||||||
name='SmackJeeves/' + shortName,
|
add('heard')
|
||||||
baseUrl=baseUrl,
|
add('mpmcomic')
|
||||||
starter=bounceStarter(baseUrl, compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="Next >"'))
|
add('nlmo-project')
|
||||||
)
|
add('paranoidloyd')
|
||||||
)
|
add('thatdreamagain')
|
||||||
return dict((name, makeScraper(name)) for name in names)
|
add('wowcomics')
|
||||||
|
|
||||||
|
|
||||||
globals().update(smackJeeves([
|
|
||||||
'20galaxies',
|
|
||||||
'axe13',
|
|
||||||
'beartholomew',
|
|
||||||
'bliss',
|
|
||||||
'durian',
|
|
||||||
'heard',
|
|
||||||
'mpmcomic',
|
|
||||||
'nlmo-project',
|
|
||||||
'paranoidloyd',
|
|
||||||
'thatdreamagain',
|
|
||||||
'wowcomics',
|
|
||||||
]))
|
|
||||||
|
|
|
@ -2,41 +2,34 @@
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012 Bastian Kleineidam
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from re import compile
|
||||||
|
from ..scraper import make_scraper
|
||||||
|
|
||||||
def snafuComics():
|
def add(name, host):
|
||||||
class _SnafuComics(_BasicScraper):
|
baseUrl = 'http://%s.snafu-comics.com/' % host
|
||||||
imageSearch = compile(r'<img src=http://\w+\.snafu-comics\.com/(comics/\d{6}_\w*\.\w{3,4})')
|
classname = 'SnafuComics_%s' % name
|
||||||
prevSearch = compile(r'<a href="(\?comic_id=\d+)">Previous</a>')
|
|
||||||
help = 'Index format: n (unpadded)'
|
|
||||||
|
|
||||||
@property
|
globals()[classname] = make_scraper(classname,
|
||||||
def stripUrl(self):
|
latestUrl = baseUrl,
|
||||||
return self.latestUrl + 'index.php?strip_id=%s'
|
stripUrl = baseUrl + 'index.php?strip_id=%s',
|
||||||
|
imageSearch = compile(r'<img src=http://\w+\.snafu-comics\.com/(comics/\d{6}_\w*\.\w{3,4})'),
|
||||||
|
prevSearch = compile(r'<a href="(\?comic_id=\d+)">Previous</a>'),
|
||||||
|
help = 'Index format: n (unpadded)',
|
||||||
|
)
|
||||||
|
|
||||||
comics = {
|
|
||||||
'Grim': 'grim',
|
|
||||||
'KOF': 'kof',
|
|
||||||
'PowerPuffGirls': 'ppg',
|
|
||||||
'Snafu': 'www',
|
|
||||||
'Tin': 'tin',
|
|
||||||
'TW': 'tw',
|
|
||||||
'Sugar': 'sugar',
|
|
||||||
'SF': 'sf',
|
|
||||||
'Titan': 'titan',
|
|
||||||
'EA': 'ea',
|
|
||||||
'Zim': 'zim',
|
|
||||||
'Soul': 'soul',
|
|
||||||
'FT': 'ft',
|
|
||||||
'Bunnywith': 'bunnywith',
|
|
||||||
'Braindead': 'braindead',
|
|
||||||
}
|
|
||||||
|
|
||||||
url = 'http://%s.snafu-comics.com/'
|
add('Grim', 'grim')
|
||||||
return dict((name, type('SnafuComics_%s' % name,
|
add('KOF', 'kof')
|
||||||
(_SnafuComics,),
|
add('PowerPuffGirls', 'ppg')
|
||||||
dict(name='SnafuComics/' + name,
|
add('Snafu', 'www')
|
||||||
latestUrl=url % host)))
|
add('Tin', 'tin')
|
||||||
for name, host in comics.items())
|
add('TW', 'tw')
|
||||||
|
add('Sugar', 'sugar')
|
||||||
globals().update(snafuComics())
|
add('SF', 'sf')
|
||||||
|
add('Titan', 'titan')
|
||||||
|
add('EA', 'ea')
|
||||||
|
add('Zim', 'zim')
|
||||||
|
add('Soul', 'soul')
|
||||||
|
add('FT', 'ft')
|
||||||
|
add('Bunnywith', 'bunnywith')
|
||||||
|
add('Braindead', 'braindead')
|
||||||
|
|
|
@ -2,29 +2,13 @@
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012 Bastian Kleineidam
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
|
||||||
from re import compile, IGNORECASE
|
from re import compile
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
from ..helpers import bounceStarter, indirectStarter
|
from ..helpers import bounceStarter, indirectStarter
|
||||||
from ..util import getQueryParams, tagre
|
from ..util import getQueryParams, tagre
|
||||||
|
|
||||||
|
|
||||||
class UglyHill(_BasicScraper):
|
|
||||||
latestUrl = 'http://www.uglyhill.com/'
|
|
||||||
stripUrl = latestUrl + 'd/%s.html'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
|
|
||||||
help = 'Index format: yyyymmdd'
|
|
||||||
|
|
||||||
|
|
||||||
class UnderPower(_BasicScraper):
|
|
||||||
latestUrl = 'http://underpower.non-essential.com/'
|
|
||||||
stripUrl = latestUrl + 'index.php?comic=%s'
|
|
||||||
imageSearch = compile(r'<img src="(comics/\d{8}\..+?)"')
|
|
||||||
prevSearch = compile(r'<a href="(/index.php\?comic=\d{8})"><img src="images/previous-comic\.gif"')
|
|
||||||
help = 'Index format: yyyymmdd'
|
|
||||||
|
|
||||||
|
|
||||||
class Undertow(_BasicScraper):
|
class Undertow(_BasicScraper):
|
||||||
stripUrl = 'http://undertow.dreamshards.org/%s'
|
stripUrl = 'http://undertow.dreamshards.org/%s'
|
||||||
imageSearch = compile(r'<img src="(.+?)"')
|
imageSearch = compile(r'<img src="(.+?)"')
|
||||||
|
@ -52,24 +36,3 @@ class UserFriendly(_BasicScraper):
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
return 'uf%s' % (getQueryParams(pageUrl)['id'][0][2:],)
|
return 'uf%s' % (getQueryParams(pageUrl)['id'][0][2:],)
|
||||||
|
|
||||||
|
|
||||||
class UndeadFriend(_BasicScraper):
|
|
||||||
latestUrl = 'http://www.undeadfriend.com/'
|
|
||||||
stripUrl = latestUrl + 'd/%s.html'
|
|
||||||
imageSearch = compile(r'src="(http://www\.undeadfriend\.com/comics/.+?)"', IGNORECASE)
|
|
||||||
prevSearch = compile(r'<a.+?href="(http://www\.undeadfriend\.com/d/\d+?\.html)"><img border="0" name="previous_day" alt="Previous comic" src="http://www\.undeadfriend\.com/images/previous_day\.jpg', IGNORECASE)
|
|
||||||
help = 'Index format: yyyymmdd'
|
|
||||||
|
|
||||||
|
|
||||||
class UnspeakableVault(_BasicScraper):
|
|
||||||
stripUrl = 'http://www.macguff.fr/goomi/unspeakable/WEBIMAGES/CARTOON/vault%s.html'
|
|
||||||
imageSearch = compile(r'(WEBIMAGES/CARTOON/.+?)"')
|
|
||||||
prevSearch = compile(r'PREVIOUS.+?" href="(.+?)"')
|
|
||||||
help = 'Index format: nn or nnn'
|
|
||||||
starter = indirectStarter('http://www.macguff.fr/goomi/unspeakable/home.html',
|
|
||||||
compile(r'http://www.macguff.fr/goomi/unspeakable/(.+?)"'))
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def namer(cls, imageUrl, imageSearch):
|
|
||||||
return '%s-%s' % (imageSearch.split('/')[-1].split('.')[0],imageUrl.split('/')[-1].split('.')[0])
|
|
||||||
|
|
|
@ -3,27 +3,19 @@
|
||||||
# Copyright (C) 2012 Bastian Kleineidam
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
|
||||||
from re import compile, sub
|
from re import compile, sub
|
||||||
|
from ..scraper import make_scraper
|
||||||
from ..scraper import _BasicScraper
|
|
||||||
from ..util import fetchUrl, tagre
|
from ..util import fetchUrl, tagre
|
||||||
|
|
||||||
|
|
||||||
class _UClickScraper(_BasicScraper):
|
def add(name, shortName):
|
||||||
homepage = 'http://content.uclick.com/a2z.html'
|
homepage = 'http://content.uclick.com/a2z.html'
|
||||||
baseUrl = 'http://www.uclick.com/client/zzz/%s/'
|
baseUrl = 'http://www.uclick.com/client/zzz/%s/'
|
||||||
stripUrl = property(lambda self: self.latestUrl + '%s/')
|
latestUrl = baseUrl % shortName
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://synd\.imgsrv\.uclick\.com/comics/\w+/\d{4}/[^"]+\.gif)'))
|
classname = 'UClick_%s' % name
|
||||||
prevSearch = compile(tagre("a", "href", r'(/client/zzz/\w+/\d{4}/\d{2}/\d{2}/)') + 'Previous date')
|
|
||||||
help = 'Index format: yyyy/mm/dd'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def starter(cls):
|
|
||||||
return cls.baseUrl % (cls.shortName,)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def fetchSubmodules(cls):
|
def fetchSubmodules(cls):
|
||||||
exclusions = ('index',)
|
exclusions = ('index',)
|
||||||
|
|
||||||
# XXX refactor this mess
|
# XXX refactor this mess
|
||||||
submoduleSearch = compile(tagre("a", "href", r'(http://content\.uclick\.com/content/\w+\.html)'))
|
submoduleSearch = compile(tagre("a", "href", r'(http://content\.uclick\.com/content/\w+\.html)'))
|
||||||
partsMatch = compile(tagre("a", "href", r'http://content\.uclick\.com/content/(\w+?)\.html'))
|
partsMatch = compile(tagre("a", "href", r'http://content\.uclick\.com/content/(\w+?)\.html'))
|
||||||
|
@ -43,11 +35,15 @@ class _UClickScraper(_BasicScraper):
|
||||||
|
|
||||||
return [normalizeName(name) for part, name in possibles if part not in exclusions and fetchSubmodule(part)]
|
return [normalizeName(name) for part, name in possibles if part not in exclusions and fetchSubmodule(part)]
|
||||||
|
|
||||||
|
globals()[classname] = make_scraper(classname,
|
||||||
|
name='UClick/' + name,
|
||||||
|
latestUrl = latestUrl,
|
||||||
|
stripUrl = latestUrl + '%s/',
|
||||||
|
imageSearch = compile(tagre("img", "src", r'(http://synd\.imgsrv\.uclick\.com/comics/\w+/\d{4}/[^"]+\.gif)')),
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(/client/zzz/\w+/\d{4}/\d{2}/\d{2}/)') + 'Previous date'),
|
||||||
|
help = 'Index format: yyyy/mm/dd',
|
||||||
|
)
|
||||||
|
|
||||||
def uclick(name, shortName):
|
|
||||||
return type('UClick_%s' % name,
|
|
||||||
(_UClickScraper,),
|
|
||||||
dict(name='UClick/' + name, shortName=shortName))
|
|
||||||
|
|
||||||
comics = {
|
comics = {
|
||||||
'5thWave': 'fw',
|
'5thWave': 'fw',
|
||||||
|
@ -278,6 +274,7 @@ comics = {
|
||||||
'ZackHill': 'crzhi',
|
'ZackHill': 'crzhi',
|
||||||
'ZiggySpanish': 'spzi',
|
'ZiggySpanish': 'spzi',
|
||||||
'Ziggy': 'zi',
|
'Ziggy': 'zi',
|
||||||
}
|
}
|
||||||
|
|
||||||
globals().update(dict((item[0], uclick(*item)) for item in comics.items()))
|
for name, shortname in comics.items():
|
||||||
|
add(name, shortname)
|
||||||
|
|
|
@ -2,11 +2,13 @@
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012 Bastian Kleineidam
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
|
||||||
from re import compile, IGNORECASE, MULTILINE
|
from re import compile
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
|
from ..util import tagre
|
||||||
|
|
||||||
|
|
||||||
|
# XXX make dynamic
|
||||||
class _VGCats(_BasicScraper):
|
class _VGCats(_BasicScraper):
|
||||||
latestUrl = 'http://www.vgcats.com/comics/'
|
latestUrl = 'http://www.vgcats.com/comics/'
|
||||||
imageSearch = compile(r'<img src="(images/\d{6}\..+?)"')
|
imageSearch = compile(r'<img src="(images/\d{6}\..+?)"')
|
||||||
|
@ -31,7 +33,6 @@ class Adventure(_VGCats):
|
||||||
|
|
||||||
class ViiviJaWagner(_BasicScraper):
|
class ViiviJaWagner(_BasicScraper):
|
||||||
latestUrl = 'http://www.hs.fi/viivijawagner/'
|
latestUrl = 'http://www.hs.fi/viivijawagner/'
|
||||||
imageSearch = compile(r'<img id="strip\d+"\s+src="([^"]+)"', IGNORECASE)
|
imageSearch = compile(tagre("link", "href", r'(http://hs12\.snstatic\.fi/webkuva/oletus/[^"]+)', before="image_src"))
|
||||||
prevSearch = compile(r'<a href="(.+?)"[^>]+?>\nEdellinen \n<img src="http://www.hs.fi/static/hs/img/viivitaakse.gif"', MULTILINE | IGNORECASE)
|
prevSearch = compile(tagre("a", "href", r'(/viivijawagner/\d+)', before="prev-cm"))
|
||||||
# XXX ?
|
help = 'Index format: none'
|
||||||
help = 'Index format: shrugs!'
|
|
||||||
|
|
|
@ -2,15 +2,15 @@
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012 Bastian Kleineidam
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
|
||||||
from re import compile, IGNORECASE, DOTALL
|
from re import compile, IGNORECASE
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
from ..helpers import queryNamer, bounceStarter
|
from ..util import tagre
|
||||||
|
|
||||||
|
|
||||||
class WayfarersMoon(_BasicScraper):
|
class WayfarersMoon(_BasicScraper):
|
||||||
latestUrl = 'http://www.wayfarersmoon.com/'
|
latestUrl = 'http://www.wayfarersmoon.com/'
|
||||||
stripUrl = latestUrl + 'index.php\?page=%s'
|
stripUrl = latestUrl + 'index.php?page=%s'
|
||||||
imageSearch = compile(r'<img src="(/admin.+?)"')
|
imageSearch = compile(r'<img src="(/admin.+?)"')
|
||||||
prevSearch = compile(r'<a href="(.+?)".+?btn_back.gif')
|
prevSearch = compile(r'<a href="(.+?)".+?btn_back.gif')
|
||||||
help = 'Index format: nn'
|
help = 'Index format: nn'
|
||||||
|
@ -32,7 +32,6 @@ class WhiteNoise(_BasicScraper):
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class WhyTheLongFace(_BasicScraper):
|
class WhyTheLongFace(_BasicScraper):
|
||||||
latestUrl = 'http://www.absurdnotions.org/wtlf200709.html'
|
latestUrl = 'http://www.absurdnotions.org/wtlf200709.html'
|
||||||
stripUrl = 'http://www.absurdnotions.org/wtlf%s.html'
|
stripUrl = 'http://www.absurdnotions.org/wtlf%s.html'
|
||||||
|
@ -41,23 +40,12 @@ class WhyTheLongFace(_BasicScraper):
|
||||||
help = 'Index format: yyyymm'
|
help = 'Index format: yyyymm'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Wigu(_BasicScraper):
|
class Wigu(_BasicScraper):
|
||||||
latestUrl = 'http://www.wigu.com/wigu/'
|
latestUrl = 'http://wigucomics.com/'
|
||||||
stripUrl = latestUrl + '?date=%s'
|
stripUrl = latestUrl + 'adventures/index.php?comic=%s'
|
||||||
imageSearch = compile(r'<img src="(strips/\d{8}\..+?)" alt=""')
|
imageSearch = compile(tagre("img", "src", r'(/adventures/comics/[^"]+)'))
|
||||||
prevSearch = compile(r'<a href="(.+?)"[^>]+?>< PREV COMIC</a> ')
|
prevSearch = compile(tagre("a", "href", r'(/adventures/index\.php\?comic=\d+)', after="go back"))
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class WiguTV(_BasicScraper):
|
|
||||||
latestUrl = 'http://jjrowland.com/'
|
|
||||||
stripUrl = latestUrl + 'archive/%s.html'
|
|
||||||
imageSearch = compile(r'"(/comics/.+?)"')
|
|
||||||
prevSearch = compile(r'<a href="(/archive/.+?)"[^>]+?> ')
|
|
||||||
help = 'Index format: yyyymmdd'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class WotNow(_BasicScraper):
|
class WotNow(_BasicScraper):
|
||||||
|
@ -68,7 +56,6 @@ class WotNow(_BasicScraper):
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class WorldOfWarcraftEh(_BasicScraper):
|
class WorldOfWarcraftEh(_BasicScraper):
|
||||||
latestUrl = 'http://woweh.com/'
|
latestUrl = 'http://woweh.com/'
|
||||||
stripUrl = None
|
stripUrl = None
|
||||||
|
@ -77,46 +64,11 @@ class WorldOfWarcraftEh(_BasicScraper):
|
||||||
|
|
||||||
|
|
||||||
class Wulffmorgenthaler(_BasicScraper):
|
class Wulffmorgenthaler(_BasicScraper):
|
||||||
latestUrl = 'http://www.wulffmorgenthaler.com/'
|
latestUrl = 'http://wumocomicstrip.com/'
|
||||||
stripUrl = latestUrl + 'Default.aspx?id=%s'
|
stripUrl = latestUrl + '%s/'
|
||||||
imageSearch = compile(r'img id="ctl00_content_Strip1_imgStrip".+?class="strip" src="(striphandler\.ashx\?stripid=[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"')
|
imageSearch = compile(tagre("img", "src", r'(/img/strip/thumb/[^"]+)'))
|
||||||
prevSearch = compile(r'<a href="(/default\.aspx\?id=[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})" id="ctl00_content_Strip1_aPrev">')
|
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<span>Previous")
|
||||||
help = 'Index format: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx (GUID)'
|
help = 'Index format: yyyy/mm/dd'
|
||||||
namer = queryNamer('stripid')
|
|
||||||
|
|
||||||
|
|
||||||
def webcomicsNation():
|
|
||||||
class _WebcomicsNation(_BasicScraper):
|
|
||||||
imageSearch = compile(r'<a name="strip\d*?">.*?<img[^>]+?src="([^"]*?memberimages/.+?)"', IGNORECASE + DOTALL)
|
|
||||||
prevSearch = compile(r'href="([^"]*?whichbutton=prev[^"]*?)"', IGNORECASE)
|
|
||||||
help = 'Index format: nnnn (non-contiguous)'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def stripUrl(self):
|
|
||||||
return self.baseUrl + '?view=archive&chapter=%s'
|
|
||||||
|
|
||||||
comics = {
|
|
||||||
'AgnesQuill': 'daveroman/agnes/',
|
|
||||||
'Elvenbaath': 'tdotodot2k/elvenbaath/',
|
|
||||||
'IrrationalFears': 'uvernon/irrationalfears/',
|
|
||||||
'KismetHuntersMoon': 'laylalawlor/huntersmoon/',
|
|
||||||
'SaikoAndLavender': 'gc/saiko/',
|
|
||||||
'MyMuse': 'gc/muse/',
|
|
||||||
'NekkoAndJoruba': 'nekkoandjoruba/nekkoandjoruba/',
|
|
||||||
'JaxEpoch': 'johngreen/quicken/',
|
|
||||||
'QuantumRockOfAges': 'DreamchildNYC/quantum/',
|
|
||||||
'ClownSamurai' : 'qsamurai/clownsamurai/',
|
|
||||||
}
|
|
||||||
|
|
||||||
return dict((name, type('WebcomicsNation_%s' % name,
|
|
||||||
(_WebcomicsNation,),
|
|
||||||
dict(name='WebcomicsNation/' + name,
|
|
||||||
latestUrl='http://www.webcomicsnation.com/' + subpath)))
|
|
||||||
for name, subpath in comics.items())
|
|
||||||
|
|
||||||
|
|
||||||
globals().update(webcomicsNation())
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class WhiteNoise(_BasicScraper):
|
class WhiteNoise(_BasicScraper):
|
||||||
|
@ -127,7 +79,6 @@ class WhiteNoise(_BasicScraper):
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class WapsiSquare(_BasicScraper):
|
class WapsiSquare(_BasicScraper):
|
||||||
latestUrl = 'http://wapsisquare.com/'
|
latestUrl = 'http://wapsisquare.com/'
|
||||||
stripUrl = latestUrl + 'comic/%s'
|
stripUrl = latestUrl + 'comic/%s'
|
||||||
|
@ -136,77 +87,14 @@ class WapsiSquare(_BasicScraper):
|
||||||
help = 'Index format: strip-name'
|
help = 'Index format: strip-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class WrongWay(_BasicScraper):
|
|
||||||
latestUrl = 'http://www.wrongwaycomics.com/'
|
|
||||||
stripUrl = latestUrl + '%s.html'
|
|
||||||
imageSearch = compile(r'<img src="(comics/.+?)"')
|
|
||||||
prevSearch = compile(r' <a class="comicNav" href="(.+?)" onmouseover="previousLinkIn\(\)"')
|
|
||||||
help = 'Index format: nnn'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class WeCanSleepTomorrow(_BasicScraper):
|
class WeCanSleepTomorrow(_BasicScraper):
|
||||||
latestUrl = 'http://wecansleeptomorrow.com/'
|
latestUrl = 'http://wecansleeptomorrow.com/'
|
||||||
imageSearch = compile(r'<img src="(http://wecansleeptomorrow.com/comics/.+?)"')
|
stripUrl = latestUrl + '%s/'
|
||||||
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
|
imageSearch = compile(tagre("img", "src", r'(http://wecansleeptomorrow\.com/comics/[^"]+)'))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(http://wecansleeptomorrow\.com/[^"]+)', after="prev"))
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class _WLP(_BasicScraper):
|
|
||||||
imageSearch=compile(r'SRC="(http://www.wlpcomics.com/adult/.+?|http://www.wlpcomics.com/general/.+?)"', IGNORECASE)
|
|
||||||
prevSearch=compile(r'</a> <A HREF="(\w+.html)">Previous Page</a>', IGNORECASE)
|
|
||||||
help='Index format: nnn'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def baseUrl(self):
|
|
||||||
return 'http://www.wlpcomics.com/%s' % (self.path,)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def stripUrl(self):
|
|
||||||
return self.baseUrl + '%s.html'
|
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
|
||||||
return pageUrl.split('/')[-1].split('.')[0]
|
|
||||||
|
|
||||||
def starter(self):
|
|
||||||
# XXX: ergh
|
|
||||||
meth = bounceStarter(self.baseUrl, compile(r'</a> <A HREF="(\w+.html)">Next Page</a>', IGNORECASE))
|
|
||||||
return meth.__get__(self, type(self))()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ChichiChan(_WLP):
|
|
||||||
name = 'WLP/ChichiChan'
|
|
||||||
path = 'adult/chichi/'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ChocolateMilkMaid(_WLP):
|
|
||||||
name = 'WLP/ChocolateMilkMaid'
|
|
||||||
path = 'adult/cm/'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class MaidAttack(_WLP):
|
|
||||||
name = 'WLP/MaidAttack'
|
|
||||||
path = 'general/maidattack/'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ShadowChasers(_WLP):
|
|
||||||
name = 'WLP/ShadowChasers'
|
|
||||||
path = 'general/shadowchasers/'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Stellar(_WLP):
|
|
||||||
name = 'WLP/Stellar'
|
|
||||||
path = 'adult/stellar/'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Wondermark(_BasicScraper):
|
class Wondermark(_BasicScraper):
|
||||||
latestUrl = 'http://wondermark.com/'
|
latestUrl = 'http://wondermark.com/'
|
||||||
stripUrl = latestUrl + '%s/'
|
stripUrl = latestUrl + '%s/'
|
||||||
|
|
31
dosagelib/plugins/webcomicnation.py
Normal file
31
dosagelib/plugins/webcomicnation.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
# -*- coding: iso-8859-1 -*-
|
||||||
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
|
||||||
|
from re import compile, IGNORECASE, DOTALL
|
||||||
|
from ..scraper import make_scraper
|
||||||
|
|
||||||
|
|
||||||
|
def add(name, subpath):
|
||||||
|
baseUrl = 'http://www.webcomicsnation.com/'
|
||||||
|
classname = 'WebcomicsNation_%s' % name
|
||||||
|
globals()[classname] = make_scraper(classname,
|
||||||
|
name = 'WebcomicsNation/' + name,
|
||||||
|
latestUrl = baseUrl + subpath,
|
||||||
|
stripUrl = baseUrl + '?view=archive&chapter=%s',
|
||||||
|
imageSearch = compile(r'<a name="strip\d*?">.*?<img[^>]+?src="([^"]*?memberimages/.+?)"', IGNORECASE + DOTALL),
|
||||||
|
prevSearch = compile(r'href="([^"]*?whichbutton=prev[^"]*?)"', IGNORECASE),
|
||||||
|
help = 'Index format: nnnn (non-contiguous)',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
add('AgnesQuill', 'daveroman/agnes/')
|
||||||
|
add('Elvenbaath', 'tdotodot2k/elvenbaath/')
|
||||||
|
add('IrrationalFears', 'uvernon/irrationalfears/')
|
||||||
|
add('KismetHuntersMoon', 'laylalawlor/huntersmoon/')
|
||||||
|
add('SaikoAndLavender', 'gc/saiko/')
|
||||||
|
add('MyMuse', 'gc/muse/')
|
||||||
|
add('NekkoAndJoruba', 'nekkoandjoruba/nekkoandjoruba/')
|
||||||
|
add('JaxEpoch', 'johngreen/quicken/')
|
||||||
|
add('QuantumRockOfAges', 'DreamchildNYC/quantum/')
|
||||||
|
add('ClownSamurai', 'qsamurai/clownsamurai/')
|
32
dosagelib/plugins/wlpcomics.py
Normal file
32
dosagelib/plugins/wlpcomics.py
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
# -*- coding: iso-8859-1 -*-
|
||||||
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
|
||||||
|
from re import compile, IGNORECASE
|
||||||
|
from ..scraper import make_scraper
|
||||||
|
from ..helpers import bounceStarter
|
||||||
|
|
||||||
|
|
||||||
|
def add(name, path):
|
||||||
|
baseUrl = 'http://www.wlpcomics.com/' + path
|
||||||
|
classname = 'WLP/' + name
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
return pageUrl.split('/')[-1].split('.')[0]
|
||||||
|
|
||||||
|
globals()[classname] = make_scraper(classname,
|
||||||
|
starter = bounceStarter(baseUrl, compile(r'</a> <A HREF="(\w+.html)">Next Page</a>', IGNORECASE)),
|
||||||
|
stripUrl = baseUrl + '%s.html',
|
||||||
|
imageSearch = compile(r'SRC="(http://www.wlpcomics.com/adult/.+?|http://www.wlpcomics.com/general/.+?)"', IGNORECASE),
|
||||||
|
prevSearch = compile(r'</a> <A HREF="(\w+.html)">Previous Page</a>', IGNORECASE),
|
||||||
|
namer = namer,
|
||||||
|
help = 'Index format: nnn',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
add('ChichiChan', 'adult/chichi/')
|
||||||
|
add('ChocolateMilkMaid', 'adult/cm/')
|
||||||
|
add('MaidAttack', 'general/maidattack/')
|
||||||
|
add('ShadowChasers', 'general/shadowchasers/')
|
||||||
|
add('Stellar', 'adult/stellar/')
|
|
@ -6,26 +6,19 @@ from re import compile
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
from ..helpers import bounceStarter
|
from ..helpers import bounceStarter
|
||||||
|
from ..util import tagre
|
||||||
|
|
||||||
|
|
||||||
class xkcd(_BasicScraper):
|
class xkcd(_BasicScraper):
|
||||||
starter = bounceStarter('http://xkcd.com/', compile(r'<a rel="next" href="(/?\d+/?)"[^>]*>Next'))
|
baseUrl = 'http://xkcd.com/'
|
||||||
stripUrl = 'http://xkcd.com/c%s.html'
|
starter = bounceStarter(baseUrl, compile(tagre("a", "href", r'(/\d+/)', before="next")))
|
||||||
imageSearch = compile(r'<img[^<]+src="(http://imgs.xkcd.com/comics/[^<>"]+)"')
|
stripUrl = baseUrl + '%s/'
|
||||||
prevSearch = compile(r'<a rel="prev" href="(/?\d+/?)"[^>]*>< Prev')
|
imageSearch = compile(tagre("img", "src", r'(http://imgs\.xkcd\.com/comics/[^"]+)'))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(/\d+/)', before="prev"))
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
index = int(pageUrl.rstrip('/').split('/')[-1])
|
index = int(pageUrl.rstrip('/').rsplit('/', 1)[-1])
|
||||||
name = imageUrl.split('/')[-1].split('.')[0]
|
name = imageUrl.rsplit('/', 1)[-1].split('.')[0]
|
||||||
return 'c%03d-%s' % (index, name)
|
return '%03d-%s' % (index, name)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class xkcdSpanish(_BasicScraper):
|
|
||||||
latestUrl = 'http://es.xkcd.com/xkcd-es/'
|
|
||||||
stripUrl = latestUrl + 'strips/%s/'
|
|
||||||
imageSearch = compile(r'src="(/site_media/strips/.+?)"')
|
|
||||||
prevSearch = compile(r'<a rel="prev" href="(http://es.xkcd.com/xkcd-es/strips/.+?)">Anterior</a>')
|
|
||||||
help = 'Index format: stripname'
|
|
||||||
|
|
|
@ -2,30 +2,24 @@
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012 Bastian Kleineidam
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
|
||||||
from re import compile, MULTILINE
|
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
|
from ..util import tagre
|
||||||
|
|
||||||
|
|
||||||
class YAFGC(_BasicScraper):
|
class YAFGC(_BasicScraper):
|
||||||
latestUrl = 'http://yafgc.shipsinker.com/'
|
latestUrl = 'http://yafgc.net/'
|
||||||
stripUrl = latestUrl + 'index.php?strip_id=%s'
|
stripUrl = latestUrl + '?id=%s'
|
||||||
imageSearch = compile(r'(istrip_.+?)"')
|
imageSearch = compile(tagre("img", "src", r'(http://yafgc\.net/img/comic/\d+\.jpg)'))
|
||||||
prevSearch = compile(r'(/.+?)">\r\n.+?prev.gif', MULTILINE)
|
prevSearch = compile(tagre("a", "href", r'(http://yafgc\.net/\?id=\d+)') +
|
||||||
|
tagre("img", "src", r'/img/navbar/go_to_previous\.gif'))
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
class YouSayItFirst(_BasicScraper):
|
class YouSayItFirst(_BasicScraper):
|
||||||
latestUrl = 'http://www.yousayitfirst.com/'
|
latestUrl = 'http://www.yousayitfirst.com/'
|
||||||
stripUrl = 'http://www.soapylemon.com/comics/index.php?date=%s'
|
stripUrl = latestUrl + 'comics/index.php?date=%s'
|
||||||
imageSearch = compile(r'(http://.+?comics/.+?.jpg)[^<]')
|
imageSearch = compile(tagre("img", "src", r'(http://www\.yousayitfirst\.com/comics/[^"]+)'))
|
||||||
prevSearch = compile(r'(/comics/index.php\?date=.+?)".+?P')
|
prevSearch = compile(tagre("a", "href", r'(http://www\.yousayitfirst\.com/comics/index\.php\?date=\d+)') + "Previous")
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
class Yirmumah(_BasicScraper):
|
|
||||||
latestUrl = 'http://yirmumah.net/archives.php'
|
|
||||||
stripUrl = latestUrl + '?date=%s'
|
|
||||||
imageSearch = compile(r'<img src="(strips/\d{8}\..*?)"')
|
|
||||||
prevSearch = compile(r'<a href="(\?date=\d{8})">.*Previous')
|
|
||||||
help = 'Index format: yyyymmdd'
|
|
||||||
|
|
|
@ -3,20 +3,24 @@
|
||||||
# Copyright (C) 2012 Bastian Kleineidam
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
|
||||||
from re import compile
|
from re import compile
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
|
from ..util import tagre
|
||||||
|
from ..helpers import bounceStarter
|
||||||
|
|
||||||
|
|
||||||
class Zapiro(_BasicScraper):
|
class Zapiro(_BasicScraper):
|
||||||
latestUrl = 'http://www.mg.co.za/zapiro/all'
|
baseUrl = 'http://www.mg.co.za/zapiro/'
|
||||||
imageSearch = compile(r'<img src="(cartoons/[^"]+)"')
|
starter = bounceStarter(baseUrl,
|
||||||
prevSearch = compile(r'<a href="([^"]+)">>')
|
compile(tagre("a", "href", r'(http://mg\.co\.za/cartoon/[^"]+)')+"Newer"))
|
||||||
|
stripUrl = 'http://mg.co.za/cartoon/%s'
|
||||||
|
imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)'))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(http://mg\.co\.za/cartoon/[^"]+)')+"Older")
|
||||||
|
help = 'Index format: yyyy-mm-dd-stripname'
|
||||||
|
|
||||||
|
|
||||||
class ZombieHunters(_BasicScraper):
|
class ZombieHunters(_BasicScraper):
|
||||||
latestUrl = 'http://www.thezombiehunters.com/'
|
latestUrl = 'http://www.thezombiehunters.com/'
|
||||||
stripUrl = latestUrl + 'index.php?strip_id=%s'
|
stripUrl = latestUrl + '?strip_id=%s'
|
||||||
imageSearch = compile(r'"(.+?strips/.+?)"')
|
imageSearch = compile(tagre("img", "src", r'(/istrip_files/strips/[^"]+)'))
|
||||||
prevSearch = compile(r'</a><a href="(.+?)"><img id="prevcomic" ')
|
prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "id", "prevcomic"))
|
||||||
help = 'Index format: n(unpadded)'
|
help = 'Index format: n(unpadded)'
|
||||||
|
|
Loading…
Reference in a new issue