Fix some comics.
This commit is contained in:
parent
7e91c83753
commit
4528894c05
19 changed files with 583 additions and 381 deletions
|
@ -2,21 +2,21 @@
|
|||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
from re import compile
|
||||
from ..scraper import make_scraper
|
||||
from ..util import tagre, asciify
|
||||
|
||||
def creators(name, shortname):
|
||||
def add(name, shortname):
|
||||
baseUrl = 'http://www.creators.com/comics/'
|
||||
return type('Creators_%s' % name,
|
||||
(_BasicScraper,),
|
||||
dict(
|
||||
name='Creators/' + name,
|
||||
latestUrl='%s%s.html' % (baseUrl, shortname),
|
||||
stripUrl='%s%s/%%s.html' % (baseUrl, shortname),
|
||||
imageSearch=compile(tagre("img", "src", r'(/comics/\d+/[^"]+)')),
|
||||
prevSearch=compile(tagre("a", "href", r'(/comics/%s/\d+\.html)' % shortname) +
|
||||
classname = 'Creators_%s' % asciify(name)
|
||||
globals()[classname] = make_scraper(classname,
|
||||
name = 'Creators/' + name,
|
||||
latestUrl = baseUrl + shortname + '.html',
|
||||
stripUrl = baseUrl + shortname + '/%s.html',
|
||||
imageSearch = compile(tagre("img", "src", r'(/comics/\d+/[^"]+)')),
|
||||
prevSearch = compile(tagre("a", "href", r'(/comics/%s/\d+\.html)' % shortname) +
|
||||
tagre("img", "src", r'/img_comics/arrow_l\.gif')),
|
||||
help='Index format: n')
|
||||
help = 'Index format: n',
|
||||
)
|
||||
|
||||
|
||||
|
@ -52,9 +52,9 @@ comics = {
|
|||
'Momma': 'momma',
|
||||
'NestHeads': 'nest-heads',
|
||||
'OneBigHappy': 'one-big-happy',
|
||||
'OnAClaireDay': 'on-a-clair-day',
|
||||
'TheOtherCoast': 'other-coast',
|
||||
'TheQuigmans': 'quigmans',
|
||||
'OnAClaireDay': 'on-a-claire-day',
|
||||
'TheOtherCoast': 'the-other-coast',
|
||||
'TheQuigmans': 'the-quigmans',
|
||||
'Rubes': 'rubes',
|
||||
'Rugrats': 'rugrats',
|
||||
'ScaryGary': 'scary-gary',
|
||||
|
@ -78,4 +78,4 @@ comics = {
|
|||
}
|
||||
|
||||
for name, shortname in comics.items():
|
||||
globals()[name] = creators(name, shortname)
|
||||
add(name, shortname)
|
||||
|
|
|
@ -142,3 +142,5 @@ class DresdenCodak(_BasicScraper):
|
|||
prevSearch = compile(r'<a href="http://dresdencodak.com(/.*?)"><img src=http://dresdencodak.com/m_prev.png>')
|
||||
starter = indirectStarter('http://dresdencodak.com/', compile(r'<div id="preview"><a href="http://dresdencodak.com/(\d+/\d+/\d+/.*?)">'))
|
||||
|
||||
|
||||
# XXX dilbert.com
|
||||
|
|
|
@ -2,28 +2,27 @@
|
|||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, IGNORECASE
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from re import compile
|
||||
from ..scraper import make_scraper
|
||||
from ..helpers import bounceStarter, queryNamer
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
def drunkDuck(shortName):
|
||||
linkSearch = r"<a href='(/[^/]*/index\.php\?p=\d+)' title='The %s page!'>"
|
||||
return type('DrunkDuck_%s' % shortName,
|
||||
(_BasicScraper,),
|
||||
dict(
|
||||
name='DrunkDuck/' + shortName,
|
||||
stripUrl='index.php?p=%s' % (shortName,),
|
||||
imageSearch=compile(r"<img src='(http://[a-z0-9]*.drunkduck.com/[^/]*/pages/[^'/]+)'>", IGNORECASE),
|
||||
prevSearch=compile(linkSearch % ('previous',), IGNORECASE),
|
||||
help='Index format: n (unpadded)',
|
||||
namer=queryNamer('p', usePageUrl=True),
|
||||
starter=bounceStarter('http://www.drunkduck.com/%s/' % (shortName,), compile(linkSearch % ('next',), IGNORECASE))
|
||||
)
|
||||
def add(name):
|
||||
classname = 'DrunkDuck_%s' % name
|
||||
url = 'http://www.drunkduck.com/%s/' % name
|
||||
linkSearch = tagre("a", "href", r"(/[^/]*/index\.php\?p=\d+)", quote="'", after="The %s page")
|
||||
globals()[classname] = make_scraper(classname,
|
||||
name = 'DrunkDuck/' + name,
|
||||
starter = bounceStarter(url, compile(linkSearch % 'next')),
|
||||
stripUrl = url + 'index.php?p=%s' % name,
|
||||
imageSearch = compile(tagre("img", "src", r"(http://[a-z0-9]*\.drunkduck\.com/[^/]*/pages/[^'/]+)", quote="'")),
|
||||
prevSearch= compile(linkSearch % 'previous'),
|
||||
help = 'Index format: n (unpadded)',
|
||||
namer = queryNamer('p', usePageUrl=True),
|
||||
)
|
||||
|
||||
duckComics = [
|
||||
comics = (
|
||||
'0_Opposites_attract_0',
|
||||
'0_eight',
|
||||
'101_Ways_to_Drive_a_Maren_Insane',
|
||||
|
@ -2275,7 +2274,7 @@ duckComics = [
|
|||
'yay_ponys',
|
||||
'yoshi_freaks_real_life',
|
||||
'zuchini',
|
||||
]
|
||||
)
|
||||
|
||||
for shortName in duckComics:
|
||||
globals()[shortName] = drunkDuck(shortName)
|
||||
for name in comics:
|
||||
add(name)
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
from ..scraper import _BasicScraper
|
||||
|
||||
|
||||
def fallenangel(name, shortname):
|
||||
pass # XXX
|
||||
|
|
309
dosagelib/plugins/gocomics.py
Normal file
309
dosagelib/plugins/gocomics.py
Normal file
|
@ -0,0 +1,309 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile
|
||||
from ..scraper import make_scraper
|
||||
from ..util import tagre, asciify
|
||||
|
||||
def add(name, repl=''):
|
||||
baseUrl = 'http://www.gocomics.com/'
|
||||
comicname = asciify(name)
|
||||
shortname = name.lower().replace(' ', repl)
|
||||
classname = 'GoComics_%s' % comicname
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
prefix, year, month, day = pageUrl.split('/', 3)
|
||||
return "%s_%s%s%s.gif" % (shortname, year, month, day)
|
||||
|
||||
globals()[classname] = make_scraper(classname,
|
||||
latestUrl=baseUrl + shortname,
|
||||
name='GoComics/' + comicname,
|
||||
stripUrl=baseUrl + shortname + '/%s',
|
||||
imageSearch=compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[0-9a-f]+)')),
|
||||
prevSearch=compile(tagre("a", "href", r'(/[^"]+/\d+/\d+/\d+)', after="prev")),
|
||||
help='Index format: yyyy/mm/dd',
|
||||
namer=namer,
|
||||
)
|
||||
|
||||
|
||||
# http://www.gocomics.com/features
|
||||
# note that comics from creators.com are not repeated here
|
||||
add('2 Cows and a Chicken')
|
||||
add('9 Chickweed Lane')
|
||||
add('9 to 5')
|
||||
add('The Academia Waltz')
|
||||
add('Adam at Home')
|
||||
add('Agnes')
|
||||
add('Alley Oop', repl='-')
|
||||
add('Andertoons')
|
||||
add('Andy Capp')
|
||||
add('Angry Little Girls', repl='-')
|
||||
add('Animal Crackers')
|
||||
add('Annie')
|
||||
add('The Argyle Sweater')
|
||||
add('Arlo and Janis')
|
||||
add('Ask Shagg')
|
||||
add('BC')
|
||||
add('Back in the Day')
|
||||
add('Bad Reporter')
|
||||
add('Baldo')
|
||||
add('Ballard Street')
|
||||
add('Banana Triangle', repl='-')
|
||||
add('Barkeater Lake')
|
||||
add('The Barn')
|
||||
add('Barney and Clyde')
|
||||
add('Basic Instructions')
|
||||
add('Beardo')
|
||||
add('Ben')
|
||||
add('Berger and Wyse', repl='-')
|
||||
add('Betty')
|
||||
add('Bewley')
|
||||
add('Biff and Riley', repl='-')
|
||||
add('Big Nate')
|
||||
add('The Big Picture')
|
||||
add('Big Top')
|
||||
add('Biographic')
|
||||
add('Birdbrains')
|
||||
add('Bliss')
|
||||
add('Bloom County')
|
||||
add('Bo Nanas')
|
||||
add('Bob the Squirrel')
|
||||
add('Boomerangs')
|
||||
add('The Boondocks')
|
||||
add('The Born Loser')
|
||||
add('Bottomliners')
|
||||
add('Bound and Gagged')
|
||||
add('Break of Day')
|
||||
add('Brevity')
|
||||
add('Brewster Rockit')
|
||||
add('Broom Hilda')
|
||||
add('The Buckets')
|
||||
add('Buni')
|
||||
add('Cafe con Leche')
|
||||
add('Calvin and Hobbes')
|
||||
add('Candorville')
|
||||
add('Cathy')
|
||||
add('Cest la Vie')
|
||||
add('Cheap Thrills Cuisine', repl='-')
|
||||
add('Chuckle Bros')
|
||||
add('Citizen Dog')
|
||||
add('The City')
|
||||
add('Cleats')
|
||||
add('Close to Home')
|
||||
add('Committed')
|
||||
add('Compu-toon')
|
||||
add('Cornered')
|
||||
add('Cow and Boy')
|
||||
add('CowTown')
|
||||
add('Crumb')
|
||||
add('Cul de Sac')
|
||||
add('Daddys Home')
|
||||
add('Dark Side of the Horse')
|
||||
add('Deep Cover')
|
||||
add('Diamond Lil')
|
||||
add('Dick Tracy')
|
||||
add('The Dinette Set')
|
||||
add('Dixie Drive', repl='-')
|
||||
add('Dog Eat Doug')
|
||||
add('Dogs of C Kennel')
|
||||
add('Domestic Abuse')
|
||||
add('Doonesbury')
|
||||
add('The Doozies')
|
||||
add('Drabble')
|
||||
add('DudeDude')
|
||||
add('The Duplex')
|
||||
add('Eek')
|
||||
add('The Elderberries')
|
||||
add('Endtown')
|
||||
add('Eric the Circle', repl='-')
|
||||
add('F Minus')
|
||||
add('Family Tree')
|
||||
add('Farcus')
|
||||
add('Fat Cats', repl='-')
|
||||
add('Flo and Friends')
|
||||
add('The Flying McCoys')
|
||||
add('Foolish Mortals', repl='-')
|
||||
add('For Better or For Worse')
|
||||
add('For Heavens Sake')
|
||||
add('Fort Knox')
|
||||
add('FoxTrot')
|
||||
add('FoxTrot Classics')
|
||||
add('Frank and Ernest')
|
||||
add('Frazz')
|
||||
add('Fred Basset')
|
||||
add('Free Range')
|
||||
add('Freshly Squeezed')
|
||||
add('Frog Applause')
|
||||
add('The Fusco Brothers')
|
||||
add('Garfield')
|
||||
add('Garfield Minus Garfield')
|
||||
add('Gasoline Alley')
|
||||
add('Geech')
|
||||
add('Get a Life')
|
||||
add('Get Fuzzy')
|
||||
add('Gil Thorp')
|
||||
add('Ginger Meggs')
|
||||
add('Gor Dominical')
|
||||
add('Graffiti')
|
||||
add('Grand Avenue')
|
||||
add('Gray Matters')
|
||||
add('The Grizzwells')
|
||||
add('Haiku Ewe')
|
||||
add('Ham Shears')
|
||||
add('Health Capsules')
|
||||
add('Heart of the City')
|
||||
add('Heathcliff')
|
||||
add('Heavenly Nostrils')
|
||||
add('Herb and Jamaal')
|
||||
add('Herman')
|
||||
add('Home and Away')
|
||||
add('HUBRIS!')
|
||||
add('The Humble Stumble')
|
||||
add('Imagine This')
|
||||
add('In the Bleachers')
|
||||
add('In the Sticks')
|
||||
add('Incidental Comics')
|
||||
add('Ink Pen')
|
||||
add('Inspector Dangers Crime Quiz')
|
||||
add('Its All About You')
|
||||
add('Janes World')
|
||||
add('Jims Journal')
|
||||
add('Joe Vanilla')
|
||||
add('Jump Start')
|
||||
add('The K Chronicles')
|
||||
add('KidCity')
|
||||
add('KidSpot')
|
||||
add('Kit N Carlyle')
|
||||
add('Kitchen Capers')
|
||||
add('Kliban')
|
||||
add('Klibans Cats')
|
||||
add('The Knight Life')
|
||||
add('La Cucaracha')
|
||||
add('Last Kiss')
|
||||
add('The LeftyBosco Picture Show')
|
||||
add('Legend of Bill')
|
||||
add('Liberty Meadows')
|
||||
add('Lil Abner')
|
||||
add('Lio')
|
||||
add('Little Dog Lost')
|
||||
add('Lola')
|
||||
add('Loose Parts')
|
||||
add('The Lost Bear')
|
||||
add('Lost Side of Suburbia')
|
||||
add('Love Is...')
|
||||
add('Luann')
|
||||
add('Lucky Cow')
|
||||
add('Mac')
|
||||
add('Magic in a Minute')
|
||||
add('Maintaining')
|
||||
add('Marias Day')
|
||||
add('Marmaduke')
|
||||
add('McArroni')
|
||||
add('The Meaning of Lila')
|
||||
add('Medium Large')
|
||||
add('Meg Classics')
|
||||
add('The Middletons')
|
||||
add('Mike du Jour')
|
||||
add('Minimum Security')
|
||||
add('Moderately Confused')
|
||||
add('Molly and the Bear')
|
||||
add('Momma')
|
||||
add('Monty')
|
||||
add('Motley Classics')
|
||||
add('Mr. Gigi and the Squid')
|
||||
add('Mutt and Jeff')
|
||||
add('My Cage')
|
||||
add('MythTickle')
|
||||
add('Nancy')
|
||||
add('Nest Heads')
|
||||
add('NEUROTICA')
|
||||
add('New Adventures of Queen Victoria')
|
||||
add('Non Sequitur')
|
||||
add('The Norm Classics')
|
||||
add('Nothing is Not Something')
|
||||
add('Off the Mark')
|
||||
add('Ollie and Quentin')
|
||||
add('On A Claire Day')
|
||||
add('One Big Happy')
|
||||
add('Ordinary Bill')
|
||||
add('The Other Coast')
|
||||
add('Out of the Gene Pool Re-Runs')
|
||||
add('Over the Hedge')
|
||||
add('Overboard')
|
||||
add('Oyster War')
|
||||
add('PC and Pixel')
|
||||
add('Peanuts')
|
||||
add('Pearls Before Swine')
|
||||
add('Pibgorn')
|
||||
add('Pibgorn Sketches')
|
||||
add('Pickles')
|
||||
add('Pinkerton')
|
||||
add('Pluggers')
|
||||
add('Pooch Cafe')
|
||||
add('PreTeena')
|
||||
add('Prickly City')
|
||||
add('Rabbits Against Magic')
|
||||
add('Raising Duncan')
|
||||
add('Real Life Adventures')
|
||||
add('Reality Check')
|
||||
add('Red and Rover')
|
||||
add('Red Meat')
|
||||
add('Reply All')
|
||||
add('Rip Haywire')
|
||||
add('Ripleys Believe It or Not')
|
||||
add('Rose is Rose')
|
||||
add('Rubes')
|
||||
add('Rudy Park')
|
||||
add('Savage Chickens')
|
||||
add('Scary Gary')
|
||||
add('Shirley and Son Classics')
|
||||
add('Shoe')
|
||||
add('Shoecabbage')
|
||||
add('Shortcuts')
|
||||
add('Skin Horse')
|
||||
add('Skippy')
|
||||
add('Slowpoke')
|
||||
add('Soup to Nutz')
|
||||
add('Speed Bump')
|
||||
add('Spot the Frog')
|
||||
add('Starslip')
|
||||
add('Stone Soup')
|
||||
add('Strange Brew')
|
||||
add('The Sunshine Club')
|
||||
add('Sylvia')
|
||||
add('Tank McNamara')
|
||||
add('Tarzan')
|
||||
add('Ten Cats')
|
||||
add('Tales of TerraTopia')
|
||||
add('That is Priceless')
|
||||
add('Thats Life')
|
||||
add('Thatababy')
|
||||
add('Thin Lines')
|
||||
add('Tiny Sepuku')
|
||||
add('TOBY')
|
||||
add('Todays Dogg')
|
||||
add('Tom the Dancing Bug')
|
||||
add('Too Much Coffee Man')
|
||||
add('Trivquiz')
|
||||
add('Twaggies')
|
||||
add('Uncle Arts Funland')
|
||||
add('Unstrange Phenomena')
|
||||
add('U.S. Acres')
|
||||
add('Viivi and Wagner')
|
||||
add('Watch Your Head')
|
||||
add('Wee Pals')
|
||||
add('Wizard of Id')
|
||||
add('Working Daze')
|
||||
add('Working It Out')
|
||||
add('W.T. Duck')
|
||||
add('Zack Hill')
|
||||
add('Ziggy')
|
||||
|
||||
# http://www.gocomics.com/explore/editorial_list
|
||||
# XXX
|
||||
|
||||
# http://www.gocomics.com/explore/sherpa_list
|
||||
# XXX
|
||||
|
|
@ -3,31 +3,29 @@
|
|||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile
|
||||
from ..scraper import _BasicScraper
|
||||
from ..scraper import make_scraper
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
def keenSpot(name, urls):
|
||||
def add(name, urls):
|
||||
classname = 'KeenSpot_%s' % name
|
||||
if not isinstance(urls, tuple):
|
||||
baseUrl = latestUrl = urls
|
||||
else:
|
||||
baseUrl, latestUrl = urls
|
||||
|
||||
return type('KeenSpot_%s' % name,
|
||||
(_BasicScraper,),
|
||||
dict(
|
||||
name='KeenSpot/' + name,
|
||||
latestUrl=latestUrl,
|
||||
stripUrl=baseUrl + 'd/%s.html',
|
||||
imageSearch = compile(tagre("img", "src", r'([^"]*comics/[^"]+)')),
|
||||
prevSearch = compile(tagre("a", "href", r'"([^"]*d/\d{8}\.html)') +
|
||||
'(?:<img[^>]+?(?:name="previous_day"|alt="Previous"|src="[^"]*back[^"]*")|Previous comic)'),
|
||||
help = 'Index format: yyyymmdd',
|
||||
)
|
||||
globals()[classname] = make_scraper(classname,
|
||||
name='KeenSpot/' + name,
|
||||
latestUrl=latestUrl,
|
||||
stripUrl=baseUrl + 'd/%s.html',
|
||||
imageSearch = compile(tagre("img", "src", r'([^"]*comics/[^"]+)')),
|
||||
prevSearch = compile(tagre("a", "href", r'"([^"]*d/\d{8}\.html)') +
|
||||
'(?:<img[^>]+?(?:name="previous_day"|alt="Previous"|src="[^"]*back[^"]*")|Previous comic)'),
|
||||
help = 'Index format: yyyymmdd',
|
||||
)
|
||||
|
||||
|
||||
keenspotComics = {
|
||||
comics = {
|
||||
'13thLabour': 'http://the13labour.comicgenesis.com/',
|
||||
'1StComing': 'http://toon.comicgenesis.com/',
|
||||
'1StGradeArt': 'http://art.comicgenesis.com/',
|
||||
|
@ -1520,7 +1518,15 @@ keenspotComics = {
|
|||
'Zortic': 'http://www.zortic.com/',
|
||||
'ZosKias': 'http://kojika.comicgenesis.com/',
|
||||
'ZuraZura': 'http://zurazura.comicgenesis.com/',
|
||||
}
|
||||
}
|
||||
|
||||
for name, urls in keenspotComics.items():
|
||||
globals()[name] = keenSpot(name, urls)
|
||||
for name, urls in comics.items():
|
||||
add(name, urls)
|
||||
|
||||
|
||||
#class Yirmumah(_BasicScraper):
|
||||
# #http://yirmumah.keenspot.com/
|
||||
# stripUrl = latestUrl + '?date=%s'
|
||||
# imageSearch = compile(r'<img src="(strips/\d{8}\..*?)"')
|
||||
# prevSearch = compile(r'<a href="(\?date=\d{8})">.*Previous')
|
||||
# help = 'Index format: yyyymmdd'
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, sub
|
||||
from re import compile
|
||||
from ..scraper import _BasicScraper
|
||||
from ..helpers import indirectStarter, _PHPScraper
|
||||
from ..util import tagre
|
||||
|
@ -64,35 +64,6 @@ class Nukees(_BasicScraper):
|
|||
|
||||
|
||||
|
||||
def nuklearpower(name, shortname):
|
||||
baseUrl = 'http://www.nuklearpower.com/'
|
||||
latestUrl = "%s%s/" % (baseUrl, shortname)
|
||||
classname = sub("[^0-9a-zA-Z_]", "", name)
|
||||
|
||||
globals()[classname] = type('NuklearPower_%s' % classname,
|
||||
(_BasicScraper,),
|
||||
dict(
|
||||
name='NuklearPower/' + classname,
|
||||
latestUrl = latestUrl,
|
||||
stripUrl = latestUrl + '%s',
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.nuklearpower\.com/comics/[^"]+)')),
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "Previous"),
|
||||
help = 'Index format: yyyy/mm/dd/name',
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
npstrips = {
|
||||
'8BitTheater': '8-bit-theater',
|
||||
'Warbot': 'warbot',
|
||||
'HowIKilledYourMaster': 'hikym',
|
||||
'AtomicRobo': 'atomic-robo',
|
||||
}
|
||||
|
||||
for name, shortname in npstrips.items():
|
||||
nuklearpower(name, shortname)
|
||||
|
||||
|
||||
class NekoTheKitty(_PHPScraper):
|
||||
basePath = 'http://www.nekothekitty.net/cusp/'
|
||||
latestUrl = basePath
|
||||
|
|
26
dosagelib/plugins/nuklearpower.py
Normal file
26
dosagelib/plugins/nuklearpower.py
Normal file
|
@ -0,0 +1,26 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile
|
||||
from ..scraper import make_scraper
|
||||
from ..util import tagre
|
||||
|
||||
def add(name, shortname):
|
||||
baseUrl = 'http://www.nuklearpower.com/' + shortname + '/'
|
||||
classname = 'NuklearPower_%s' % name
|
||||
|
||||
globals()[classname] = make_scraper(classname,
|
||||
name='NuklearPower/' + name,
|
||||
latestUrl = baseUrl,
|
||||
stripUrl = baseUrl + '%s',
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.nuklearpower\.com/comics/[^"]+)')),
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "Previous"),
|
||||
help = 'Index format: yyyy/mm/dd/name',
|
||||
)
|
||||
|
||||
|
||||
add('8BitTheater', '8-bit-theater')
|
||||
add('Warbot', 'warbot')
|
||||
add('HowIKilledYourMaster', 'hikym')
|
||||
add('AtomicRobo', 'atomic-robo')
|
|
@ -2,48 +2,39 @@
|
|||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
from re import compile
|
||||
from ..scraper import _BasicScraper
|
||||
from ..scraper import make_scraper
|
||||
from ..helpers import bounceStarter
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
def smackJeeves(names):
|
||||
def add(name):
|
||||
classname = 'SmackJeeves/' + name
|
||||
# XXX mature content can be viewed directly with:
|
||||
# http://www.smackjeeves.com/mature.php?ref=<percent-encoded-url>
|
||||
class _SJScraper(_BasicScraper):
|
||||
stripUrl = property(lambda self: self.baseUrl + self.shortName)
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.smackjeeves\.com/images/uploaded/comics/[^"]*)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="< Previous"')
|
||||
help = 'Index format: nnnn (some increasing number)'
|
||||
baseUrl = 'http://%s.smackjeeves.com/comics/' % name
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
return pageUrl.split('/')[-2]
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
return pageUrl.split('/')[-2]
|
||||
|
||||
globals()[classname] = make_scraper(classname,
|
||||
starter=bounceStarter(baseUrl, compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="Next >"')),
|
||||
stripUrl = baseUrl,
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.smackjeeves\.com/images/uploaded/comics/[^"]*)')),
|
||||
prevSearch = compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="< Previous"'),
|
||||
help = 'Index format: nnnn (some increasing number)',
|
||||
namer = namer,
|
||||
)
|
||||
|
||||
|
||||
def makeScraper(shortName):
|
||||
baseUrl = 'http://%s.smackjeeves.com/comics/' % shortName
|
||||
return type('SmackJeeves_%s' % shortName,
|
||||
(_SJScraper,),
|
||||
dict(
|
||||
name='SmackJeeves/' + shortName,
|
||||
baseUrl=baseUrl,
|
||||
starter=bounceStarter(baseUrl, compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="Next >"'))
|
||||
)
|
||||
)
|
||||
return dict((name, makeScraper(name)) for name in names)
|
||||
|
||||
|
||||
globals().update(smackJeeves([
|
||||
'20galaxies',
|
||||
'axe13',
|
||||
'beartholomew',
|
||||
'bliss',
|
||||
'durian',
|
||||
'heard',
|
||||
'mpmcomic',
|
||||
'nlmo-project',
|
||||
'paranoidloyd',
|
||||
'thatdreamagain',
|
||||
'wowcomics',
|
||||
]))
|
||||
add('20galaxies')
|
||||
add('axe13')
|
||||
add('beartholomew')
|
||||
add('bliss')
|
||||
add('durian')
|
||||
add('heard')
|
||||
add('mpmcomic')
|
||||
add('nlmo-project')
|
||||
add('paranoidloyd')
|
||||
add('thatdreamagain')
|
||||
add('wowcomics')
|
||||
|
|
|
@ -2,41 +2,34 @@
|
|||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from re import compile
|
||||
from ..scraper import make_scraper
|
||||
|
||||
def snafuComics():
|
||||
class _SnafuComics(_BasicScraper):
|
||||
imageSearch = compile(r'<img src=http://\w+\.snafu-comics\.com/(comics/\d{6}_\w*\.\w{3,4})')
|
||||
prevSearch = compile(r'<a href="(\?comic_id=\d+)">Previous</a>')
|
||||
help = 'Index format: n (unpadded)'
|
||||
def add(name, host):
|
||||
baseUrl = 'http://%s.snafu-comics.com/' % host
|
||||
classname = 'SnafuComics_%s' % name
|
||||
|
||||
@property
|
||||
def stripUrl(self):
|
||||
return self.latestUrl + 'index.php?strip_id=%s'
|
||||
globals()[classname] = make_scraper(classname,
|
||||
latestUrl = baseUrl,
|
||||
stripUrl = baseUrl + 'index.php?strip_id=%s',
|
||||
imageSearch = compile(r'<img src=http://\w+\.snafu-comics\.com/(comics/\d{6}_\w*\.\w{3,4})'),
|
||||
prevSearch = compile(r'<a href="(\?comic_id=\d+)">Previous</a>'),
|
||||
help = 'Index format: n (unpadded)',
|
||||
)
|
||||
|
||||
comics = {
|
||||
'Grim': 'grim',
|
||||
'KOF': 'kof',
|
||||
'PowerPuffGirls': 'ppg',
|
||||
'Snafu': 'www',
|
||||
'Tin': 'tin',
|
||||
'TW': 'tw',
|
||||
'Sugar': 'sugar',
|
||||
'SF': 'sf',
|
||||
'Titan': 'titan',
|
||||
'EA': 'ea',
|
||||
'Zim': 'zim',
|
||||
'Soul': 'soul',
|
||||
'FT': 'ft',
|
||||
'Bunnywith': 'bunnywith',
|
||||
'Braindead': 'braindead',
|
||||
}
|
||||
|
||||
url = 'http://%s.snafu-comics.com/'
|
||||
return dict((name, type('SnafuComics_%s' % name,
|
||||
(_SnafuComics,),
|
||||
dict(name='SnafuComics/' + name,
|
||||
latestUrl=url % host)))
|
||||
for name, host in comics.items())
|
||||
|
||||
globals().update(snafuComics())
|
||||
add('Grim', 'grim')
|
||||
add('KOF', 'kof')
|
||||
add('PowerPuffGirls', 'ppg')
|
||||
add('Snafu', 'www')
|
||||
add('Tin', 'tin')
|
||||
add('TW', 'tw')
|
||||
add('Sugar', 'sugar')
|
||||
add('SF', 'sf')
|
||||
add('Titan', 'titan')
|
||||
add('EA', 'ea')
|
||||
add('Zim', 'zim')
|
||||
add('Soul', 'soul')
|
||||
add('FT', 'ft')
|
||||
add('Bunnywith', 'bunnywith')
|
||||
add('Braindead', 'braindead')
|
||||
|
|
|
@ -2,29 +2,13 @@
|
|||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, IGNORECASE
|
||||
from re import compile
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..helpers import bounceStarter, indirectStarter
|
||||
from ..util import getQueryParams, tagre
|
||||
|
||||
|
||||
class UglyHill(_BasicScraper):
|
||||
latestUrl = 'http://www.uglyhill.com/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class UnderPower(_BasicScraper):
|
||||
latestUrl = 'http://underpower.non-essential.com/'
|
||||
stripUrl = latestUrl + 'index.php?comic=%s'
|
||||
imageSearch = compile(r'<img src="(comics/\d{8}\..+?)"')
|
||||
prevSearch = compile(r'<a href="(/index.php\?comic=\d{8})"><img src="images/previous-comic\.gif"')
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class Undertow(_BasicScraper):
|
||||
stripUrl = 'http://undertow.dreamshards.org/%s'
|
||||
imageSearch = compile(r'<img src="(.+?)"')
|
||||
|
@ -52,24 +36,3 @@ class UserFriendly(_BasicScraper):
|
|||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
return 'uf%s' % (getQueryParams(pageUrl)['id'][0][2:],)
|
||||
|
||||
|
||||
class UndeadFriend(_BasicScraper):
|
||||
latestUrl = 'http://www.undeadfriend.com/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch = compile(r'src="(http://www\.undeadfriend\.com/comics/.+?)"', IGNORECASE)
|
||||
prevSearch = compile(r'<a.+?href="(http://www\.undeadfriend\.com/d/\d+?\.html)"><img border="0" name="previous_day" alt="Previous comic" src="http://www\.undeadfriend\.com/images/previous_day\.jpg', IGNORECASE)
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class UnspeakableVault(_BasicScraper):
|
||||
stripUrl = 'http://www.macguff.fr/goomi/unspeakable/WEBIMAGES/CARTOON/vault%s.html'
|
||||
imageSearch = compile(r'(WEBIMAGES/CARTOON/.+?)"')
|
||||
prevSearch = compile(r'PREVIOUS.+?" href="(.+?)"')
|
||||
help = 'Index format: nn or nnn'
|
||||
starter = indirectStarter('http://www.macguff.fr/goomi/unspeakable/home.html',
|
||||
compile(r'http://www.macguff.fr/goomi/unspeakable/(.+?)"'))
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, imageSearch):
|
||||
return '%s-%s' % (imageSearch.split('/')[-1].split('.')[0],imageUrl.split('/')[-1].split('.')[0])
|
||||
|
|
|
@ -3,27 +3,19 @@
|
|||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, sub
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..scraper import make_scraper
|
||||
from ..util import fetchUrl, tagre
|
||||
|
||||
|
||||
class _UClickScraper(_BasicScraper):
|
||||
def add(name, shortName):
|
||||
homepage = 'http://content.uclick.com/a2z.html'
|
||||
baseUrl = 'http://www.uclick.com/client/zzz/%s/'
|
||||
stripUrl = property(lambda self: self.latestUrl + '%s/')
|
||||
imageSearch = compile(tagre("img", "src", r'(http://synd\.imgsrv\.uclick\.com/comics/\w+/\d{4}/[^"]+\.gif)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/client/zzz/\w+/\d{4}/\d{2}/\d{2}/)') + 'Previous date')
|
||||
help = 'Index format: yyyy/mm/dd'
|
||||
|
||||
@classmethod
|
||||
def starter(cls):
|
||||
return cls.baseUrl % (cls.shortName,)
|
||||
latestUrl = baseUrl % shortName
|
||||
classname = 'UClick_%s' % name
|
||||
|
||||
@classmethod
|
||||
def fetchSubmodules(cls):
|
||||
exclusions = ('index',)
|
||||
|
||||
# XXX refactor this mess
|
||||
submoduleSearch = compile(tagre("a", "href", r'(http://content\.uclick\.com/content/\w+\.html)'))
|
||||
partsMatch = compile(tagre("a", "href", r'http://content\.uclick\.com/content/(\w+?)\.html'))
|
||||
|
@ -43,11 +35,15 @@ class _UClickScraper(_BasicScraper):
|
|||
|
||||
return [normalizeName(name) for part, name in possibles if part not in exclusions and fetchSubmodule(part)]
|
||||
|
||||
globals()[classname] = make_scraper(classname,
|
||||
name='UClick/' + name,
|
||||
latestUrl = latestUrl,
|
||||
stripUrl = latestUrl + '%s/',
|
||||
imageSearch = compile(tagre("img", "src", r'(http://synd\.imgsrv\.uclick\.com/comics/\w+/\d{4}/[^"]+\.gif)')),
|
||||
prevSearch = compile(tagre("a", "href", r'(/client/zzz/\w+/\d{4}/\d{2}/\d{2}/)') + 'Previous date'),
|
||||
help = 'Index format: yyyy/mm/dd',
|
||||
)
|
||||
|
||||
def uclick(name, shortName):
|
||||
return type('UClick_%s' % name,
|
||||
(_UClickScraper,),
|
||||
dict(name='UClick/' + name, shortName=shortName))
|
||||
|
||||
comics = {
|
||||
'5thWave': 'fw',
|
||||
|
@ -278,6 +274,7 @@ comics = {
|
|||
'ZackHill': 'crzhi',
|
||||
'ZiggySpanish': 'spzi',
|
||||
'Ziggy': 'zi',
|
||||
}
|
||||
}
|
||||
|
||||
globals().update(dict((item[0], uclick(*item)) for item in comics.items()))
|
||||
for name, shortname in comics.items():
|
||||
add(name, shortname)
|
||||
|
|
|
@ -2,11 +2,13 @@
|
|||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, IGNORECASE, MULTILINE
|
||||
from re import compile
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
# XXX make dynamic
|
||||
class _VGCats(_BasicScraper):
|
||||
latestUrl = 'http://www.vgcats.com/comics/'
|
||||
imageSearch = compile(r'<img src="(images/\d{6}\..+?)"')
|
||||
|
@ -31,7 +33,6 @@ class Adventure(_VGCats):
|
|||
|
||||
class ViiviJaWagner(_BasicScraper):
|
||||
latestUrl = 'http://www.hs.fi/viivijawagner/'
|
||||
imageSearch = compile(r'<img id="strip\d+"\s+src="([^"]+)"', IGNORECASE)
|
||||
prevSearch = compile(r'<a href="(.+?)"[^>]+?>\nEdellinen \n<img src="http://www.hs.fi/static/hs/img/viivitaakse.gif"', MULTILINE | IGNORECASE)
|
||||
# XXX ?
|
||||
help = 'Index format: shrugs!'
|
||||
imageSearch = compile(tagre("link", "href", r'(http://hs12\.snstatic\.fi/webkuva/oletus/[^"]+)', before="image_src"))
|
||||
prevSearch = compile(tagre("a", "href", r'(/viivijawagner/\d+)', before="prev-cm"))
|
||||
help = 'Index format: none'
|
||||
|
|
|
@ -2,15 +2,15 @@
|
|||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, IGNORECASE, DOTALL
|
||||
from re import compile, IGNORECASE
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..helpers import queryNamer, bounceStarter
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
class WayfarersMoon(_BasicScraper):
|
||||
latestUrl = 'http://www.wayfarersmoon.com/'
|
||||
stripUrl = latestUrl + 'index.php\?page=%s'
|
||||
stripUrl = latestUrl + 'index.php?page=%s'
|
||||
imageSearch = compile(r'<img src="(/admin.+?)"')
|
||||
prevSearch = compile(r'<a href="(.+?)".+?btn_back.gif')
|
||||
help = 'Index format: nn'
|
||||
|
@ -32,7 +32,6 @@ class WhiteNoise(_BasicScraper):
|
|||
help = 'Index format: n'
|
||||
|
||||
|
||||
|
||||
class WhyTheLongFace(_BasicScraper):
|
||||
latestUrl = 'http://www.absurdnotions.org/wtlf200709.html'
|
||||
stripUrl = 'http://www.absurdnotions.org/wtlf%s.html'
|
||||
|
@ -41,23 +40,12 @@ class WhyTheLongFace(_BasicScraper):
|
|||
help = 'Index format: yyyymm'
|
||||
|
||||
|
||||
|
||||
class Wigu(_BasicScraper):
|
||||
latestUrl = 'http://www.wigu.com/wigu/'
|
||||
stripUrl = latestUrl + '?date=%s'
|
||||
imageSearch = compile(r'<img src="(strips/\d{8}\..+?)" alt=""')
|
||||
prevSearch = compile(r'<a href="(.+?)"[^>]+?>< PREV COMIC</a> ')
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
|
||||
class WiguTV(_BasicScraper):
|
||||
latestUrl = 'http://jjrowland.com/'
|
||||
stripUrl = latestUrl + 'archive/%s.html'
|
||||
imageSearch = compile(r'"(/comics/.+?)"')
|
||||
prevSearch = compile(r'<a href="(/archive/.+?)"[^>]+?> ')
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
latestUrl = 'http://wigucomics.com/'
|
||||
stripUrl = latestUrl + 'adventures/index.php?comic=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(/adventures/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/adventures/index\.php\?comic=\d+)', after="go back"))
|
||||
help = 'Index format: n'
|
||||
|
||||
|
||||
class WotNow(_BasicScraper):
|
||||
|
@ -68,7 +56,6 @@ class WotNow(_BasicScraper):
|
|||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
|
||||
class WorldOfWarcraftEh(_BasicScraper):
|
||||
latestUrl = 'http://woweh.com/'
|
||||
stripUrl = None
|
||||
|
@ -77,46 +64,11 @@ class WorldOfWarcraftEh(_BasicScraper):
|
|||
|
||||
|
||||
class Wulffmorgenthaler(_BasicScraper):
|
||||
latestUrl = 'http://www.wulffmorgenthaler.com/'
|
||||
stripUrl = latestUrl + 'Default.aspx?id=%s'
|
||||
imageSearch = compile(r'img id="ctl00_content_Strip1_imgStrip".+?class="strip" src="(striphandler\.ashx\?stripid=[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"')
|
||||
prevSearch = compile(r'<a href="(/default\.aspx\?id=[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})" id="ctl00_content_Strip1_aPrev">')
|
||||
help = 'Index format: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx (GUID)'
|
||||
namer = queryNamer('stripid')
|
||||
|
||||
|
||||
def webcomicsNation():
|
||||
class _WebcomicsNation(_BasicScraper):
|
||||
imageSearch = compile(r'<a name="strip\d*?">.*?<img[^>]+?src="([^"]*?memberimages/.+?)"', IGNORECASE + DOTALL)
|
||||
prevSearch = compile(r'href="([^"]*?whichbutton=prev[^"]*?)"', IGNORECASE)
|
||||
help = 'Index format: nnnn (non-contiguous)'
|
||||
|
||||
@property
|
||||
def stripUrl(self):
|
||||
return self.baseUrl + '?view=archive&chapter=%s'
|
||||
|
||||
comics = {
|
||||
'AgnesQuill': 'daveroman/agnes/',
|
||||
'Elvenbaath': 'tdotodot2k/elvenbaath/',
|
||||
'IrrationalFears': 'uvernon/irrationalfears/',
|
||||
'KismetHuntersMoon': 'laylalawlor/huntersmoon/',
|
||||
'SaikoAndLavender': 'gc/saiko/',
|
||||
'MyMuse': 'gc/muse/',
|
||||
'NekkoAndJoruba': 'nekkoandjoruba/nekkoandjoruba/',
|
||||
'JaxEpoch': 'johngreen/quicken/',
|
||||
'QuantumRockOfAges': 'DreamchildNYC/quantum/',
|
||||
'ClownSamurai' : 'qsamurai/clownsamurai/',
|
||||
}
|
||||
|
||||
return dict((name, type('WebcomicsNation_%s' % name,
|
||||
(_WebcomicsNation,),
|
||||
dict(name='WebcomicsNation/' + name,
|
||||
latestUrl='http://www.webcomicsnation.com/' + subpath)))
|
||||
for name, subpath in comics.items())
|
||||
|
||||
|
||||
globals().update(webcomicsNation())
|
||||
|
||||
latestUrl = 'http://wumocomicstrip.com/'
|
||||
stripUrl = latestUrl + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(/img/strip/thumb/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<span>Previous")
|
||||
help = 'Index format: yyyy/mm/dd'
|
||||
|
||||
|
||||
class WhiteNoise(_BasicScraper):
|
||||
|
@ -127,7 +79,6 @@ class WhiteNoise(_BasicScraper):
|
|||
help = 'Index format: n'
|
||||
|
||||
|
||||
|
||||
class WapsiSquare(_BasicScraper):
|
||||
latestUrl = 'http://wapsisquare.com/'
|
||||
stripUrl = latestUrl + 'comic/%s'
|
||||
|
@ -136,77 +87,14 @@ class WapsiSquare(_BasicScraper):
|
|||
help = 'Index format: strip-name'
|
||||
|
||||
|
||||
|
||||
class WrongWay(_BasicScraper):
|
||||
latestUrl = 'http://www.wrongwaycomics.com/'
|
||||
stripUrl = latestUrl + '%s.html'
|
||||
imageSearch = compile(r'<img src="(comics/.+?)"')
|
||||
prevSearch = compile(r' <a class="comicNav" href="(.+?)" onmouseover="previousLinkIn\(\)"')
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
||||
class WeCanSleepTomorrow(_BasicScraper):
|
||||
latestUrl = 'http://wecansleeptomorrow.com/'
|
||||
imageSearch = compile(r'<img src="(http://wecansleeptomorrow.com/comics/.+?)"')
|
||||
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
|
||||
stripUrl = latestUrl + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://wecansleeptomorrow\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://wecansleeptomorrow\.com/[^"]+)', after="prev"))
|
||||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
|
||||
|
||||
|
||||
class _WLP(_BasicScraper):
|
||||
imageSearch=compile(r'SRC="(http://www.wlpcomics.com/adult/.+?|http://www.wlpcomics.com/general/.+?)"', IGNORECASE)
|
||||
prevSearch=compile(r'</a> <A HREF="(\w+.html)">Previous Page</a>', IGNORECASE)
|
||||
help='Index format: nnn'
|
||||
|
||||
@property
|
||||
def baseUrl(self):
|
||||
return 'http://www.wlpcomics.com/%s' % (self.path,)
|
||||
|
||||
@property
|
||||
def stripUrl(self):
|
||||
return self.baseUrl + '%s.html'
|
||||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
return pageUrl.split('/')[-1].split('.')[0]
|
||||
|
||||
def starter(self):
|
||||
# XXX: ergh
|
||||
meth = bounceStarter(self.baseUrl, compile(r'</a> <A HREF="(\w+.html)">Next Page</a>', IGNORECASE))
|
||||
return meth.__get__(self, type(self))()
|
||||
|
||||
|
||||
|
||||
class ChichiChan(_WLP):
|
||||
name = 'WLP/ChichiChan'
|
||||
path = 'adult/chichi/'
|
||||
|
||||
|
||||
|
||||
class ChocolateMilkMaid(_WLP):
|
||||
name = 'WLP/ChocolateMilkMaid'
|
||||
path = 'adult/cm/'
|
||||
|
||||
|
||||
|
||||
class MaidAttack(_WLP):
|
||||
name = 'WLP/MaidAttack'
|
||||
path = 'general/maidattack/'
|
||||
|
||||
|
||||
|
||||
class ShadowChasers(_WLP):
|
||||
name = 'WLP/ShadowChasers'
|
||||
path = 'general/shadowchasers/'
|
||||
|
||||
|
||||
|
||||
class Stellar(_WLP):
|
||||
name = 'WLP/Stellar'
|
||||
path = 'adult/stellar/'
|
||||
|
||||
|
||||
|
||||
class Wondermark(_BasicScraper):
|
||||
latestUrl = 'http://wondermark.com/'
|
||||
stripUrl = latestUrl + '%s/'
|
||||
|
|
31
dosagelib/plugins/webcomicnation.py
Normal file
31
dosagelib/plugins/webcomicnation.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, IGNORECASE, DOTALL
|
||||
from ..scraper import make_scraper
|
||||
|
||||
|
||||
def add(name, subpath):
|
||||
baseUrl = 'http://www.webcomicsnation.com/'
|
||||
classname = 'WebcomicsNation_%s' % name
|
||||
globals()[classname] = make_scraper(classname,
|
||||
name = 'WebcomicsNation/' + name,
|
||||
latestUrl = baseUrl + subpath,
|
||||
stripUrl = baseUrl + '?view=archive&chapter=%s',
|
||||
imageSearch = compile(r'<a name="strip\d*?">.*?<img[^>]+?src="([^"]*?memberimages/.+?)"', IGNORECASE + DOTALL),
|
||||
prevSearch = compile(r'href="([^"]*?whichbutton=prev[^"]*?)"', IGNORECASE),
|
||||
help = 'Index format: nnnn (non-contiguous)',
|
||||
)
|
||||
|
||||
|
||||
add('AgnesQuill', 'daveroman/agnes/')
|
||||
add('Elvenbaath', 'tdotodot2k/elvenbaath/')
|
||||
add('IrrationalFears', 'uvernon/irrationalfears/')
|
||||
add('KismetHuntersMoon', 'laylalawlor/huntersmoon/')
|
||||
add('SaikoAndLavender', 'gc/saiko/')
|
||||
add('MyMuse', 'gc/muse/')
|
||||
add('NekkoAndJoruba', 'nekkoandjoruba/nekkoandjoruba/')
|
||||
add('JaxEpoch', 'johngreen/quicken/')
|
||||
add('QuantumRockOfAges', 'DreamchildNYC/quantum/')
|
||||
add('ClownSamurai', 'qsamurai/clownsamurai/')
|
32
dosagelib/plugins/wlpcomics.py
Normal file
32
dosagelib/plugins/wlpcomics.py
Normal file
|
@ -0,0 +1,32 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, IGNORECASE
|
||||
from ..scraper import make_scraper
|
||||
from ..helpers import bounceStarter
|
||||
|
||||
|
||||
def add(name, path):
|
||||
baseUrl = 'http://www.wlpcomics.com/' + path
|
||||
classname = 'WLP/' + name
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
return pageUrl.split('/')[-1].split('.')[0]
|
||||
|
||||
globals()[classname] = make_scraper(classname,
|
||||
starter = bounceStarter(baseUrl, compile(r'</a> <A HREF="(\w+.html)">Next Page</a>', IGNORECASE)),
|
||||
stripUrl = baseUrl + '%s.html',
|
||||
imageSearch = compile(r'SRC="(http://www.wlpcomics.com/adult/.+?|http://www.wlpcomics.com/general/.+?)"', IGNORECASE),
|
||||
prevSearch = compile(r'</a> <A HREF="(\w+.html)">Previous Page</a>', IGNORECASE),
|
||||
namer = namer,
|
||||
help = 'Index format: nnn',
|
||||
)
|
||||
|
||||
|
||||
add('ChichiChan', 'adult/chichi/')
|
||||
add('ChocolateMilkMaid', 'adult/cm/')
|
||||
add('MaidAttack', 'general/maidattack/')
|
||||
add('ShadowChasers', 'general/shadowchasers/')
|
||||
add('Stellar', 'adult/stellar/')
|
|
@ -6,26 +6,19 @@ from re import compile
|
|||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..helpers import bounceStarter
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
class xkcd(_BasicScraper):
|
||||
starter = bounceStarter('http://xkcd.com/', compile(r'<a rel="next" href="(/?\d+/?)"[^>]*>Next'))
|
||||
stripUrl = 'http://xkcd.com/c%s.html'
|
||||
imageSearch = compile(r'<img[^<]+src="(http://imgs.xkcd.com/comics/[^<>"]+)"')
|
||||
prevSearch = compile(r'<a rel="prev" href="(/?\d+/?)"[^>]*>< Prev')
|
||||
baseUrl = 'http://xkcd.com/'
|
||||
starter = bounceStarter(baseUrl, compile(tagre("a", "href", r'(/\d+/)', before="next")))
|
||||
stripUrl = baseUrl + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://imgs\.xkcd\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/\d+/)', before="prev"))
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
index = int(pageUrl.rstrip('/').split('/')[-1])
|
||||
name = imageUrl.split('/')[-1].split('.')[0]
|
||||
return 'c%03d-%s' % (index, name)
|
||||
|
||||
|
||||
|
||||
class xkcdSpanish(_BasicScraper):
|
||||
latestUrl = 'http://es.xkcd.com/xkcd-es/'
|
||||
stripUrl = latestUrl + 'strips/%s/'
|
||||
imageSearch = compile(r'src="(/site_media/strips/.+?)"')
|
||||
prevSearch = compile(r'<a rel="prev" href="(http://es.xkcd.com/xkcd-es/strips/.+?)">Anterior</a>')
|
||||
help = 'Index format: stripname'
|
||||
index = int(pageUrl.rstrip('/').rsplit('/', 1)[-1])
|
||||
name = imageUrl.rsplit('/', 1)[-1].split('.')[0]
|
||||
return '%03d-%s' % (index, name)
|
||||
|
|
|
@ -2,30 +2,24 @@
|
|||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, MULTILINE
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
class YAFGC(_BasicScraper):
|
||||
latestUrl = 'http://yafgc.shipsinker.com/'
|
||||
stripUrl = latestUrl + 'index.php?strip_id=%s'
|
||||
imageSearch = compile(r'(istrip_.+?)"')
|
||||
prevSearch = compile(r'(/.+?)">\r\n.+?prev.gif', MULTILINE)
|
||||
latestUrl = 'http://yafgc.net/'
|
||||
stripUrl = latestUrl + '?id=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://yafgc\.net/img/comic/\d+\.jpg)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://yafgc\.net/\?id=\d+)') +
|
||||
tagre("img", "src", r'/img/navbar/go_to_previous\.gif'))
|
||||
help = 'Index format: n'
|
||||
|
||||
|
||||
class YouSayItFirst(_BasicScraper):
|
||||
latestUrl = 'http://www.yousayitfirst.com/'
|
||||
stripUrl = 'http://www.soapylemon.com/comics/index.php?date=%s'
|
||||
imageSearch = compile(r'(http://.+?comics/.+?.jpg)[^<]')
|
||||
prevSearch = compile(r'(/comics/index.php\?date=.+?)".+?P')
|
||||
stripUrl = latestUrl + 'comics/index.php?date=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.yousayitfirst\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.yousayitfirst\.com/comics/index\.php\?date=\d+)') + "Previous")
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class Yirmumah(_BasicScraper):
|
||||
latestUrl = 'http://yirmumah.net/archives.php'
|
||||
stripUrl = latestUrl + '?date=%s'
|
||||
imageSearch = compile(r'<img src="(strips/\d{8}\..*?)"')
|
||||
prevSearch = compile(r'<a href="(\?date=\d{8})">.*Previous')
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
|
|
@ -3,20 +3,24 @@
|
|||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
from ..helpers import bounceStarter
|
||||
|
||||
|
||||
class Zapiro(_BasicScraper):
|
||||
latestUrl = 'http://www.mg.co.za/zapiro/all'
|
||||
imageSearch = compile(r'<img src="(cartoons/[^"]+)"')
|
||||
prevSearch = compile(r'<a href="([^"]+)">>')
|
||||
|
||||
baseUrl = 'http://www.mg.co.za/zapiro/'
|
||||
starter = bounceStarter(baseUrl,
|
||||
compile(tagre("a", "href", r'(http://mg\.co\.za/cartoon/[^"]+)')+"Newer"))
|
||||
stripUrl = 'http://mg.co.za/cartoon/%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://mg\.co\.za/cartoon/[^"]+)')+"Older")
|
||||
help = 'Index format: yyyy-mm-dd-stripname'
|
||||
|
||||
|
||||
class ZombieHunters(_BasicScraper):
|
||||
latestUrl = 'http://www.thezombiehunters.com/'
|
||||
stripUrl = latestUrl + 'index.php?strip_id=%s'
|
||||
imageSearch = compile(r'"(.+?strips/.+?)"')
|
||||
prevSearch = compile(r'</a><a href="(.+?)"><img id="prevcomic" ')
|
||||
stripUrl = latestUrl + '?strip_id=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(/istrip_files/strips/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "id", "prevcomic"))
|
||||
help = 'Index format: n(unpadded)'
|
||||
|
|
Loading…
Reference in a new issue