Fix some comics.

This commit is contained in:
Bastian Kleineidam 2012-11-26 07:13:32 +01:00
parent 7e91c83753
commit 4528894c05
19 changed files with 583 additions and 381 deletions

View file

@ -2,21 +2,21 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from ..scraper import _BasicScraper
from ..util import tagre
from re import compile
from ..scraper import make_scraper
from ..util import tagre, asciify
def creators(name, shortname):
def add(name, shortname):
baseUrl = 'http://www.creators.com/comics/'
return type('Creators_%s' % name,
(_BasicScraper,),
dict(
classname = 'Creators_%s' % asciify(name)
globals()[classname] = make_scraper(classname,
name = 'Creators/' + name,
latestUrl='%s%s.html' % (baseUrl, shortname),
stripUrl='%s%s/%%s.html' % (baseUrl, shortname),
latestUrl = baseUrl + shortname + '.html',
stripUrl = baseUrl + shortname + '/%s.html',
imageSearch = compile(tagre("img", "src", r'(/comics/\d+/[^"]+)')),
prevSearch = compile(tagre("a", "href", r'(/comics/%s/\d+\.html)' % shortname) +
tagre("img", "src", r'/img_comics/arrow_l\.gif')),
help='Index format: n')
help = 'Index format: n',
)
@ -52,9 +52,9 @@ comics = {
'Momma': 'momma',
'NestHeads': 'nest-heads',
'OneBigHappy': 'one-big-happy',
'OnAClaireDay': 'on-a-clair-day',
'TheOtherCoast': 'other-coast',
'TheQuigmans': 'quigmans',
'OnAClaireDay': 'on-a-claire-day',
'TheOtherCoast': 'the-other-coast',
'TheQuigmans': 'the-quigmans',
'Rubes': 'rubes',
'Rugrats': 'rugrats',
'ScaryGary': 'scary-gary',
@ -78,4 +78,4 @@ comics = {
}
for name, shortname in comics.items():
globals()[name] = creators(name, shortname)
add(name, shortname)

View file

@ -142,3 +142,5 @@ class DresdenCodak(_BasicScraper):
prevSearch = compile(r'<a href="http://dresdencodak.com(/.*?)"><img src=http://dresdencodak.com/m_prev.png>')
starter = indirectStarter('http://dresdencodak.com/', compile(r'<div id="preview"><a href="http://dresdencodak.com/(\d+/\d+/\d+/.*?)">'))
# XXX dilbert.com

View file

@ -2,28 +2,27 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE
from ..scraper import _BasicScraper
from re import compile
from ..scraper import make_scraper
from ..helpers import bounceStarter, queryNamer
from ..util import tagre
def drunkDuck(shortName):
linkSearch = r"<a href='(/[^/]*/index\.php\?p=\d+)' title='The %s page!'>"
return type('DrunkDuck_%s' % shortName,
(_BasicScraper,),
dict(
name='DrunkDuck/' + shortName,
stripUrl='index.php?p=%s' % (shortName,),
imageSearch=compile(r"<img src='(http://[a-z0-9]*.drunkduck.com/[^/]*/pages/[^'/]+)'>", IGNORECASE),
prevSearch=compile(linkSearch % ('previous',), IGNORECASE),
def add(name):
classname = 'DrunkDuck_%s' % name
url = 'http://www.drunkduck.com/%s/' % name
linkSearch = tagre("a", "href", r"(/[^/]*/index\.php\?p=\d+)", quote="'", after="The %s page")
globals()[classname] = make_scraper(classname,
name = 'DrunkDuck/' + name,
starter = bounceStarter(url, compile(linkSearch % 'next')),
stripUrl = url + 'index.php?p=%s' % name,
imageSearch = compile(tagre("img", "src", r"(http://[a-z0-9]*\.drunkduck\.com/[^/]*/pages/[^'/]+)", quote="'")),
prevSearch= compile(linkSearch % 'previous'),
help = 'Index format: n (unpadded)',
namer = queryNamer('p', usePageUrl=True),
starter=bounceStarter('http://www.drunkduck.com/%s/' % (shortName,), compile(linkSearch % ('next',), IGNORECASE))
)
)
duckComics = [
comics = (
'0_Opposites_attract_0',
'0_eight',
'101_Ways_to_Drive_a_Maren_Insane',
@ -2275,7 +2274,7 @@ duckComics = [
'yay_ponys',
'yoshi_freaks_real_life',
'zuchini',
]
)
for shortName in duckComics:
globals()[shortName] = drunkDuck(shortName)
for name in comics:
add(name)

View file

@ -1,6 +1,8 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from ..scraper import _BasicScraper
def fallenangel(name, shortname):
pass # XXX

View file

@ -0,0 +1,309 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import make_scraper
from ..util import tagre, asciify
def add(name, repl=''):
baseUrl = 'http://www.gocomics.com/'
comicname = asciify(name)
shortname = name.lower().replace(' ', repl)
classname = 'GoComics_%s' % comicname
@classmethod
def namer(cls, imageUrl, pageUrl):
prefix, year, month, day = pageUrl.split('/', 3)
return "%s_%s%s%s.gif" % (shortname, year, month, day)
globals()[classname] = make_scraper(classname,
latestUrl=baseUrl + shortname,
name='GoComics/' + comicname,
stripUrl=baseUrl + shortname + '/%s',
imageSearch=compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[0-9a-f]+)')),
prevSearch=compile(tagre("a", "href", r'(/[^"]+/\d+/\d+/\d+)', after="prev")),
help='Index format: yyyy/mm/dd',
namer=namer,
)
# http://www.gocomics.com/features
# note that comics from creators.com are not repeated here
add('2 Cows and a Chicken')
add('9 Chickweed Lane')
add('9 to 5')
add('The Academia Waltz')
add('Adam at Home')
add('Agnes')
add('Alley Oop', repl='-')
add('Andertoons')
add('Andy Capp')
add('Angry Little Girls', repl='-')
add('Animal Crackers')
add('Annie')
add('The Argyle Sweater')
add('Arlo and Janis')
add('Ask Shagg')
add('BC')
add('Back in the Day')
add('Bad Reporter')
add('Baldo')
add('Ballard Street')
add('Banana Triangle', repl='-')
add('Barkeater Lake')
add('The Barn')
add('Barney and Clyde')
add('Basic Instructions')
add('Beardo')
add('Ben')
add('Berger and Wyse', repl='-')
add('Betty')
add('Bewley')
add('Biff and Riley', repl='-')
add('Big Nate')
add('The Big Picture')
add('Big Top')
add('Biographic')
add('Birdbrains')
add('Bliss')
add('Bloom County')
add('Bo Nanas')
add('Bob the Squirrel')
add('Boomerangs')
add('The Boondocks')
add('The Born Loser')
add('Bottomliners')
add('Bound and Gagged')
add('Break of Day')
add('Brevity')
add('Brewster Rockit')
add('Broom Hilda')
add('The Buckets')
add('Buni')
add('Cafe con Leche')
add('Calvin and Hobbes')
add('Candorville')
add('Cathy')
add('Cest la Vie')
add('Cheap Thrills Cuisine', repl='-')
add('Chuckle Bros')
add('Citizen Dog')
add('The City')
add('Cleats')
add('Close to Home')
add('Committed')
add('Compu-toon')
add('Cornered')
add('Cow and Boy')
add('CowTown')
add('Crumb')
add('Cul de Sac')
add('Daddys Home')
add('Dark Side of the Horse')
add('Deep Cover')
add('Diamond Lil')
add('Dick Tracy')
add('The Dinette Set')
add('Dixie Drive', repl='-')
add('Dog Eat Doug')
add('Dogs of C Kennel')
add('Domestic Abuse')
add('Doonesbury')
add('The Doozies')
add('Drabble')
add('DudeDude')
add('The Duplex')
add('Eek')
add('The Elderberries')
add('Endtown')
add('Eric the Circle', repl='-')
add('F Minus')
add('Family Tree')
add('Farcus')
add('Fat Cats', repl='-')
add('Flo and Friends')
add('The Flying McCoys')
add('Foolish Mortals', repl='-')
add('For Better or For Worse')
add('For Heavens Sake')
add('Fort Knox')
add('FoxTrot')
add('FoxTrot Classics')
add('Frank and Ernest')
add('Frazz')
add('Fred Basset')
add('Free Range')
add('Freshly Squeezed')
add('Frog Applause')
add('The Fusco Brothers')
add('Garfield')
add('Garfield Minus Garfield')
add('Gasoline Alley')
add('Geech')
add('Get a Life')
add('Get Fuzzy')
add('Gil Thorp')
add('Ginger Meggs')
add('Gor Dominical')
add('Graffiti')
add('Grand Avenue')
add('Gray Matters')
add('The Grizzwells')
add('Haiku Ewe')
add('Ham Shears')
add('Health Capsules')
add('Heart of the City')
add('Heathcliff')
add('Heavenly Nostrils')
add('Herb and Jamaal')
add('Herman')
add('Home and Away')
add('HUBRIS!')
add('The Humble Stumble')
add('Imagine This')
add('In the Bleachers')
add('In the Sticks')
add('Incidental Comics')
add('Ink Pen')
add('Inspector Dangers Crime Quiz')
add('Its All About You')
add('Janes World')
add('Jims Journal')
add('Joe Vanilla')
add('Jump Start')
add('The K Chronicles')
add('KidCity')
add('KidSpot')
add('Kit N Carlyle')
add('Kitchen Capers')
add('Kliban')
add('Klibans Cats')
add('The Knight Life')
add('La Cucaracha')
add('Last Kiss')
add('The LeftyBosco Picture Show')
add('Legend of Bill')
add('Liberty Meadows')
add('Lil Abner')
add('Lio')
add('Little Dog Lost')
add('Lola')
add('Loose Parts')
add('The Lost Bear')
add('Lost Side of Suburbia')
add('Love Is...')
add('Luann')
add('Lucky Cow')
add('Mac')
add('Magic in a Minute')
add('Maintaining')
add('Marias Day')
add('Marmaduke')
add('McArroni')
add('The Meaning of Lila')
add('Medium Large')
add('Meg Classics')
add('The Middletons')
add('Mike du Jour')
add('Minimum Security')
add('Moderately Confused')
add('Molly and the Bear')
add('Momma')
add('Monty')
add('Motley Classics')
add('Mr. Gigi and the Squid')
add('Mutt and Jeff')
add('My Cage')
add('MythTickle')
add('Nancy')
add('Nest Heads')
add('NEUROTICA')
add('New Adventures of Queen Victoria')
add('Non Sequitur')
add('The Norm Classics')
add('Nothing is Not Something')
add('Off the Mark')
add('Ollie and Quentin')
add('On A Claire Day')
add('One Big Happy')
add('Ordinary Bill')
add('The Other Coast')
add('Out of the Gene Pool Re-Runs')
add('Over the Hedge')
add('Overboard')
add('Oyster War')
add('PC and Pixel')
add('Peanuts')
add('Pearls Before Swine')
add('Pibgorn')
add('Pibgorn Sketches')
add('Pickles')
add('Pinkerton')
add('Pluggers')
add('Pooch Cafe')
add('PreTeena')
add('Prickly City')
add('Rabbits Against Magic')
add('Raising Duncan')
add('Real Life Adventures')
add('Reality Check')
add('Red and Rover')
add('Red Meat')
add('Reply All')
add('Rip Haywire')
add('Ripleys Believe It or Not')
add('Rose is Rose')
add('Rubes')
add('Rudy Park')
add('Savage Chickens')
add('Scary Gary')
add('Shirley and Son Classics')
add('Shoe')
add('Shoecabbage')
add('Shortcuts')
add('Skin Horse')
add('Skippy')
add('Slowpoke')
add('Soup to Nutz')
add('Speed Bump')
add('Spot the Frog')
add('Starslip')
add('Stone Soup')
add('Strange Brew')
add('The Sunshine Club')
add('Sylvia')
add('Tank McNamara')
add('Tarzan')
add('Ten Cats')
add('Tales of TerraTopia')
add('That is Priceless')
add('Thats Life')
add('Thatababy')
add('Thin Lines')
add('Tiny Sepuku')
add('TOBY')
add('Todays Dogg')
add('Tom the Dancing Bug')
add('Too Much Coffee Man')
add('Trivquiz')
add('Twaggies')
add('Uncle Arts Funland')
add('Unstrange Phenomena')
add('U.S. Acres')
add('Viivi and Wagner')
add('Watch Your Head')
add('Wee Pals')
add('Wizard of Id')
add('Working Daze')
add('Working It Out')
add('W.T. Duck')
add('Zack Hill')
add('Ziggy')
# http://www.gocomics.com/explore/editorial_list
# XXX
# http://www.gocomics.com/explore/sherpa_list
# XXX

View file

@ -3,19 +3,18 @@
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import _BasicScraper
from ..scraper import make_scraper
from ..util import tagre
def keenSpot(name, urls):
def add(name, urls):
classname = 'KeenSpot_%s' % name
if not isinstance(urls, tuple):
baseUrl = latestUrl = urls
else:
baseUrl, latestUrl = urls
return type('KeenSpot_%s' % name,
(_BasicScraper,),
dict(
globals()[classname] = make_scraper(classname,
name='KeenSpot/' + name,
latestUrl=latestUrl,
stripUrl=baseUrl + 'd/%s.html',
@ -24,10 +23,9 @@ def keenSpot(name, urls):
'(?:<img[^>]+?(?:name="previous_day"|alt="Previous"|src="[^"]*back[^"]*")|Previous comic)'),
help = 'Index format: yyyymmdd',
)
)
keenspotComics = {
comics = {
'13thLabour': 'http://the13labour.comicgenesis.com/',
'1StComing': 'http://toon.comicgenesis.com/',
'1StGradeArt': 'http://art.comicgenesis.com/',
@ -1522,5 +1520,13 @@ keenspotComics = {
'ZuraZura': 'http://zurazura.comicgenesis.com/',
}
for name, urls in keenspotComics.items():
globals()[name] = keenSpot(name, urls)
for name, urls in comics.items():
add(name, urls)
#class Yirmumah(_BasicScraper):
# #http://yirmumah.keenspot.com/
# stripUrl = latestUrl + '?date=%s'
# imageSearch = compile(r'<img src="(strips/\d{8}\..*?)"')
# prevSearch = compile(r'<a href="(\?date=\d{8})">.*Previous')
# help = 'Index format: yyyymmdd'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, sub
from re import compile
from ..scraper import _BasicScraper
from ..helpers import indirectStarter, _PHPScraper
from ..util import tagre
@ -64,35 +64,6 @@ class Nukees(_BasicScraper):
def nuklearpower(name, shortname):
baseUrl = 'http://www.nuklearpower.com/'
latestUrl = "%s%s/" % (baseUrl, shortname)
classname = sub("[^0-9a-zA-Z_]", "", name)
globals()[classname] = type('NuklearPower_%s' % classname,
(_BasicScraper,),
dict(
name='NuklearPower/' + classname,
latestUrl = latestUrl,
stripUrl = latestUrl + '%s',
imageSearch = compile(tagre("img", "src", r'(http://www\.nuklearpower\.com/comics/[^"]+)')),
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "Previous"),
help = 'Index format: yyyy/mm/dd/name',
)
)
npstrips = {
'8BitTheater': '8-bit-theater',
'Warbot': 'warbot',
'HowIKilledYourMaster': 'hikym',
'AtomicRobo': 'atomic-robo',
}
for name, shortname in npstrips.items():
nuklearpower(name, shortname)
class NekoTheKitty(_PHPScraper):
basePath = 'http://www.nekothekitty.net/cusp/'
latestUrl = basePath

View file

@ -0,0 +1,26 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import make_scraper
from ..util import tagre
def add(name, shortname):
baseUrl = 'http://www.nuklearpower.com/' + shortname + '/'
classname = 'NuklearPower_%s' % name
globals()[classname] = make_scraper(classname,
name='NuklearPower/' + name,
latestUrl = baseUrl,
stripUrl = baseUrl + '%s',
imageSearch = compile(tagre("img", "src", r'(http://www\.nuklearpower\.com/comics/[^"]+)')),
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "Previous"),
help = 'Index format: yyyy/mm/dd/name',
)
add('8BitTheater', '8-bit-theater')
add('Warbot', 'warbot')
add('HowIKilledYourMaster', 'hikym')
add('AtomicRobo', 'atomic-robo')

View file

@ -2,48 +2,39 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import _BasicScraper
from ..scraper import make_scraper
from ..helpers import bounceStarter
from ..util import tagre
def smackJeeves(names):
def add(name):
classname = 'SmackJeeves/' + name
# XXX mature content can be viewed directly with:
# http://www.smackjeeves.com/mature.php?ref=<percent-encoded-url>
class _SJScraper(_BasicScraper):
stripUrl = property(lambda self: self.baseUrl + self.shortName)
imageSearch = compile(tagre("img", "src", r'(http://www\.smackjeeves\.com/images/uploaded/comics/[^"]*)'))
prevSearch = compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="< Previous"')
help = 'Index format: nnnn (some increasing number)'
baseUrl = 'http://%s.smackjeeves.com/comics/' % name
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('/')[-2]
def makeScraper(shortName):
baseUrl = 'http://%s.smackjeeves.com/comics/' % shortName
return type('SmackJeeves_%s' % shortName,
(_SJScraper,),
dict(
name='SmackJeeves/' + shortName,
baseUrl=baseUrl,
starter=bounceStarter(baseUrl, compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="Next >"'))
globals()[classname] = make_scraper(classname,
starter=bounceStarter(baseUrl, compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="Next >"')),
stripUrl = baseUrl,
imageSearch = compile(tagre("img", "src", r'(http://www\.smackjeeves\.com/images/uploaded/comics/[^"]*)')),
prevSearch = compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="< Previous"'),
help = 'Index format: nnnn (some increasing number)',
namer = namer,
)
)
return dict((name, makeScraper(name)) for name in names)
globals().update(smackJeeves([
'20galaxies',
'axe13',
'beartholomew',
'bliss',
'durian',
'heard',
'mpmcomic',
'nlmo-project',
'paranoidloyd',
'thatdreamagain',
'wowcomics',
]))
add('20galaxies')
add('axe13')
add('beartholomew')
add('bliss')
add('durian')
add('heard')
add('mpmcomic')
add('nlmo-project')
add('paranoidloyd')
add('thatdreamagain')
add('wowcomics')

View file

@ -2,41 +2,34 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from ..scraper import _BasicScraper
from re import compile
from ..scraper import make_scraper
def snafuComics():
class _SnafuComics(_BasicScraper):
imageSearch = compile(r'<img src=http://\w+\.snafu-comics\.com/(comics/\d{6}_\w*\.\w{3,4})')
prevSearch = compile(r'<a href="(\?comic_id=\d+)">Previous</a>')
help = 'Index format: n (unpadded)'
def add(name, host):
baseUrl = 'http://%s.snafu-comics.com/' % host
classname = 'SnafuComics_%s' % name
@property
def stripUrl(self):
return self.latestUrl + 'index.php?strip_id=%s'
globals()[classname] = make_scraper(classname,
latestUrl = baseUrl,
stripUrl = baseUrl + 'index.php?strip_id=%s',
imageSearch = compile(r'<img src=http://\w+\.snafu-comics\.com/(comics/\d{6}_\w*\.\w{3,4})'),
prevSearch = compile(r'<a href="(\?comic_id=\d+)">Previous</a>'),
help = 'Index format: n (unpadded)',
)
comics = {
'Grim': 'grim',
'KOF': 'kof',
'PowerPuffGirls': 'ppg',
'Snafu': 'www',
'Tin': 'tin',
'TW': 'tw',
'Sugar': 'sugar',
'SF': 'sf',
'Titan': 'titan',
'EA': 'ea',
'Zim': 'zim',
'Soul': 'soul',
'FT': 'ft',
'Bunnywith': 'bunnywith',
'Braindead': 'braindead',
}
url = 'http://%s.snafu-comics.com/'
return dict((name, type('SnafuComics_%s' % name,
(_SnafuComics,),
dict(name='SnafuComics/' + name,
latestUrl=url % host)))
for name, host in comics.items())
globals().update(snafuComics())
add('Grim', 'grim')
add('KOF', 'kof')
add('PowerPuffGirls', 'ppg')
add('Snafu', 'www')
add('Tin', 'tin')
add('TW', 'tw')
add('Sugar', 'sugar')
add('SF', 'sf')
add('Titan', 'titan')
add('EA', 'ea')
add('Zim', 'zim')
add('Soul', 'soul')
add('FT', 'ft')
add('Bunnywith', 'bunnywith')
add('Braindead', 'braindead')

View file

@ -2,29 +2,13 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE
from re import compile
from ..scraper import _BasicScraper
from ..helpers import bounceStarter, indirectStarter
from ..util import getQueryParams, tagre
class UglyHill(_BasicScraper):
latestUrl = 'http://www.uglyhill.com/'
stripUrl = latestUrl + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
prevSearch = compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
help = 'Index format: yyyymmdd'
class UnderPower(_BasicScraper):
latestUrl = 'http://underpower.non-essential.com/'
stripUrl = latestUrl + 'index.php?comic=%s'
imageSearch = compile(r'<img src="(comics/\d{8}\..+?)"')
prevSearch = compile(r'<a href="(/index.php\?comic=\d{8})"><img src="images/previous-comic\.gif"')
help = 'Index format: yyyymmdd'
class Undertow(_BasicScraper):
stripUrl = 'http://undertow.dreamshards.org/%s'
imageSearch = compile(r'<img src="(.+?)"')
@ -52,24 +36,3 @@ class UserFriendly(_BasicScraper):
@classmethod
def namer(cls, imageUrl, pageUrl):
return 'uf%s' % (getQueryParams(pageUrl)['id'][0][2:],)
class UndeadFriend(_BasicScraper):
latestUrl = 'http://www.undeadfriend.com/'
stripUrl = latestUrl + 'd/%s.html'
imageSearch = compile(r'src="(http://www\.undeadfriend\.com/comics/.+?)"', IGNORECASE)
prevSearch = compile(r'<a.+?href="(http://www\.undeadfriend\.com/d/\d+?\.html)"><img border="0" name="previous_day" alt="Previous comic" src="http://www\.undeadfriend\.com/images/previous_day\.jpg', IGNORECASE)
help = 'Index format: yyyymmdd'
class UnspeakableVault(_BasicScraper):
stripUrl = 'http://www.macguff.fr/goomi/unspeakable/WEBIMAGES/CARTOON/vault%s.html'
imageSearch = compile(r'(WEBIMAGES/CARTOON/.+?)"')
prevSearch = compile(r'PREVIOUS.+?" href="(.+?)"')
help = 'Index format: nn or nnn'
starter = indirectStarter('http://www.macguff.fr/goomi/unspeakable/home.html',
compile(r'http://www.macguff.fr/goomi/unspeakable/(.+?)"'))
@classmethod
def namer(cls, imageUrl, imageSearch):
return '%s-%s' % (imageSearch.split('/')[-1].split('.')[0],imageUrl.split('/')[-1].split('.')[0])

View file

@ -3,27 +3,19 @@
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, sub
from ..scraper import _BasicScraper
from ..scraper import make_scraper
from ..util import fetchUrl, tagre
class _UClickScraper(_BasicScraper):
def add(name, shortName):
homepage = 'http://content.uclick.com/a2z.html'
baseUrl = 'http://www.uclick.com/client/zzz/%s/'
stripUrl = property(lambda self: self.latestUrl + '%s/')
imageSearch = compile(tagre("img", "src", r'(http://synd\.imgsrv\.uclick\.com/comics/\w+/\d{4}/[^"]+\.gif)'))
prevSearch = compile(tagre("a", "href", r'(/client/zzz/\w+/\d{4}/\d{2}/\d{2}/)') + 'Previous date')
help = 'Index format: yyyy/mm/dd'
@classmethod
def starter(cls):
return cls.baseUrl % (cls.shortName,)
latestUrl = baseUrl % shortName
classname = 'UClick_%s' % name
@classmethod
def fetchSubmodules(cls):
exclusions = ('index',)
# XXX refactor this mess
submoduleSearch = compile(tagre("a", "href", r'(http://content\.uclick\.com/content/\w+\.html)'))
partsMatch = compile(tagre("a", "href", r'http://content\.uclick\.com/content/(\w+?)\.html'))
@ -43,11 +35,15 @@ class _UClickScraper(_BasicScraper):
return [normalizeName(name) for part, name in possibles if part not in exclusions and fetchSubmodule(part)]
globals()[classname] = make_scraper(classname,
name='UClick/' + name,
latestUrl = latestUrl,
stripUrl = latestUrl + '%s/',
imageSearch = compile(tagre("img", "src", r'(http://synd\.imgsrv\.uclick\.com/comics/\w+/\d{4}/[^"]+\.gif)')),
prevSearch = compile(tagre("a", "href", r'(/client/zzz/\w+/\d{4}/\d{2}/\d{2}/)') + 'Previous date'),
help = 'Index format: yyyy/mm/dd',
)
def uclick(name, shortName):
return type('UClick_%s' % name,
(_UClickScraper,),
dict(name='UClick/' + name, shortName=shortName))
comics = {
'5thWave': 'fw',
@ -280,4 +276,5 @@ comics = {
'Ziggy': 'zi',
}
globals().update(dict((item[0], uclick(*item)) for item in comics.items()))
for name, shortname in comics.items():
add(name, shortname)

View file

@ -2,11 +2,13 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE, MULTILINE
from re import compile
from ..scraper import _BasicScraper
from ..util import tagre
# XXX make dynamic
class _VGCats(_BasicScraper):
latestUrl = 'http://www.vgcats.com/comics/'
imageSearch = compile(r'<img src="(images/\d{6}\..+?)"')
@ -31,7 +33,6 @@ class Adventure(_VGCats):
class ViiviJaWagner(_BasicScraper):
latestUrl = 'http://www.hs.fi/viivijawagner/'
imageSearch = compile(r'<img id="strip\d+"\s+src="([^"]+)"', IGNORECASE)
prevSearch = compile(r'<a href="(.+?)"[^>]+?>\nEdellinen&nbsp;\n<img src="http://www.hs.fi/static/hs/img/viivitaakse.gif"', MULTILINE | IGNORECASE)
# XXX ?
help = 'Index format: shrugs!'
imageSearch = compile(tagre("link", "href", r'(http://hs12\.snstatic\.fi/webkuva/oletus/[^"]+)', before="image_src"))
prevSearch = compile(tagre("a", "href", r'(/viivijawagner/\d+)', before="prev-cm"))
help = 'Index format: none'

View file

@ -2,15 +2,15 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE, DOTALL
from re import compile, IGNORECASE
from ..scraper import _BasicScraper
from ..helpers import queryNamer, bounceStarter
from ..util import tagre
class WayfarersMoon(_BasicScraper):
latestUrl = 'http://www.wayfarersmoon.com/'
stripUrl = latestUrl + 'index.php\?page=%s'
stripUrl = latestUrl + 'index.php?page=%s'
imageSearch = compile(r'<img src="(/admin.+?)"')
prevSearch = compile(r'<a href="(.+?)".+?btn_back.gif')
help = 'Index format: nn'
@ -32,7 +32,6 @@ class WhiteNoise(_BasicScraper):
help = 'Index format: n'
class WhyTheLongFace(_BasicScraper):
latestUrl = 'http://www.absurdnotions.org/wtlf200709.html'
stripUrl = 'http://www.absurdnotions.org/wtlf%s.html'
@ -41,23 +40,12 @@ class WhyTheLongFace(_BasicScraper):
help = 'Index format: yyyymm'
class Wigu(_BasicScraper):
latestUrl = 'http://www.wigu.com/wigu/'
stripUrl = latestUrl + '?date=%s'
imageSearch = compile(r'<img src="(strips/\d{8}\..+?)" alt=""')
prevSearch = compile(r'<a href="(.+?)"[^>]+?>< PREV COMIC</a> ')
help = 'Index format: yyyymmdd'
class WiguTV(_BasicScraper):
latestUrl = 'http://jjrowland.com/'
stripUrl = latestUrl + 'archive/%s.html'
imageSearch = compile(r'"(/comics/.+?)"')
prevSearch = compile(r'<a href="(/archive/.+?)"[^>]+?>&nbsp;')
help = 'Index format: yyyymmdd'
latestUrl = 'http://wigucomics.com/'
stripUrl = latestUrl + 'adventures/index.php?comic=%s'
imageSearch = compile(tagre("img", "src", r'(/adventures/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/adventures/index\.php\?comic=\d+)', after="go back"))
help = 'Index format: n'
class WotNow(_BasicScraper):
@ -68,7 +56,6 @@ class WotNow(_BasicScraper):
help = 'Index format: n (unpadded)'
class WorldOfWarcraftEh(_BasicScraper):
latestUrl = 'http://woweh.com/'
stripUrl = None
@ -77,46 +64,11 @@ class WorldOfWarcraftEh(_BasicScraper):
class Wulffmorgenthaler(_BasicScraper):
latestUrl = 'http://www.wulffmorgenthaler.com/'
stripUrl = latestUrl + 'Default.aspx?id=%s'
imageSearch = compile(r'img id="ctl00_content_Strip1_imgStrip".+?class="strip" src="(striphandler\.ashx\?stripid=[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"')
prevSearch = compile(r'<a href="(/default\.aspx\?id=[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})" id="ctl00_content_Strip1_aPrev">')
help = 'Index format: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx (GUID)'
namer = queryNamer('stripid')
def webcomicsNation():
class _WebcomicsNation(_BasicScraper):
imageSearch = compile(r'<a name="strip\d*?">.*?<img[^>]+?src="([^"]*?memberimages/.+?)"', IGNORECASE + DOTALL)
prevSearch = compile(r'href="([^"]*?whichbutton=prev[^"]*?)"', IGNORECASE)
help = 'Index format: nnnn (non-contiguous)'
@property
def stripUrl(self):
return self.baseUrl + '?view=archive&amp;chapter=%s'
comics = {
'AgnesQuill': 'daveroman/agnes/',
'Elvenbaath': 'tdotodot2k/elvenbaath/',
'IrrationalFears': 'uvernon/irrationalfears/',
'KismetHuntersMoon': 'laylalawlor/huntersmoon/',
'SaikoAndLavender': 'gc/saiko/',
'MyMuse': 'gc/muse/',
'NekkoAndJoruba': 'nekkoandjoruba/nekkoandjoruba/',
'JaxEpoch': 'johngreen/quicken/',
'QuantumRockOfAges': 'DreamchildNYC/quantum/',
'ClownSamurai' : 'qsamurai/clownsamurai/',
}
return dict((name, type('WebcomicsNation_%s' % name,
(_WebcomicsNation,),
dict(name='WebcomicsNation/' + name,
latestUrl='http://www.webcomicsnation.com/' + subpath)))
for name, subpath in comics.items())
globals().update(webcomicsNation())
latestUrl = 'http://wumocomicstrip.com/'
stripUrl = latestUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(/img/strip/thumb/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<span>Previous")
help = 'Index format: yyyy/mm/dd'
class WhiteNoise(_BasicScraper):
@ -127,7 +79,6 @@ class WhiteNoise(_BasicScraper):
help = 'Index format: n'
class WapsiSquare(_BasicScraper):
latestUrl = 'http://wapsisquare.com/'
stripUrl = latestUrl + 'comic/%s'
@ -136,77 +87,14 @@ class WapsiSquare(_BasicScraper):
help = 'Index format: strip-name'
class WrongWay(_BasicScraper):
latestUrl = 'http://www.wrongwaycomics.com/'
stripUrl = latestUrl + '%s.html'
imageSearch = compile(r'<img src="(comics/.+?)"')
prevSearch = compile(r' <a class="comicNav" href="(.+?)" onmouseover="previousLinkIn\(\)"')
help = 'Index format: nnn'
class WeCanSleepTomorrow(_BasicScraper):
latestUrl = 'http://wecansleeptomorrow.com/'
imageSearch = compile(r'<img src="(http://wecansleeptomorrow.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
stripUrl = latestUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://wecansleeptomorrow\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://wecansleeptomorrow\.com/[^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class _WLP(_BasicScraper):
imageSearch=compile(r'SRC="(http://www.wlpcomics.com/adult/.+?|http://www.wlpcomics.com/general/.+?)"', IGNORECASE)
prevSearch=compile(r'</a> <A HREF="(\w+.html)">Previous Page</a>', IGNORECASE)
help='Index format: nnn'
@property
def baseUrl(self):
return 'http://www.wlpcomics.com/%s' % (self.path,)
@property
def stripUrl(self):
return self.baseUrl + '%s.html'
def namer(self, imageUrl, pageUrl):
return pageUrl.split('/')[-1].split('.')[0]
def starter(self):
# XXX: ergh
meth = bounceStarter(self.baseUrl, compile(r'</a> <A HREF="(\w+.html)">Next Page</a>', IGNORECASE))
return meth.__get__(self, type(self))()
class ChichiChan(_WLP):
name = 'WLP/ChichiChan'
path = 'adult/chichi/'
class ChocolateMilkMaid(_WLP):
name = 'WLP/ChocolateMilkMaid'
path = 'adult/cm/'
class MaidAttack(_WLP):
name = 'WLP/MaidAttack'
path = 'general/maidattack/'
class ShadowChasers(_WLP):
name = 'WLP/ShadowChasers'
path = 'general/shadowchasers/'
class Stellar(_WLP):
name = 'WLP/Stellar'
path = 'adult/stellar/'
class Wondermark(_BasicScraper):
latestUrl = 'http://wondermark.com/'
stripUrl = latestUrl + '%s/'

View file

@ -0,0 +1,31 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE, DOTALL
from ..scraper import make_scraper
def add(name, subpath):
baseUrl = 'http://www.webcomicsnation.com/'
classname = 'WebcomicsNation_%s' % name
globals()[classname] = make_scraper(classname,
name = 'WebcomicsNation/' + name,
latestUrl = baseUrl + subpath,
stripUrl = baseUrl + '?view=archive&amp;chapter=%s',
imageSearch = compile(r'<a name="strip\d*?">.*?<img[^>]+?src="([^"]*?memberimages/.+?)"', IGNORECASE + DOTALL),
prevSearch = compile(r'href="([^"]*?whichbutton=prev[^"]*?)"', IGNORECASE),
help = 'Index format: nnnn (non-contiguous)',
)
add('AgnesQuill', 'daveroman/agnes/')
add('Elvenbaath', 'tdotodot2k/elvenbaath/')
add('IrrationalFears', 'uvernon/irrationalfears/')
add('KismetHuntersMoon', 'laylalawlor/huntersmoon/')
add('SaikoAndLavender', 'gc/saiko/')
add('MyMuse', 'gc/muse/')
add('NekkoAndJoruba', 'nekkoandjoruba/nekkoandjoruba/')
add('JaxEpoch', 'johngreen/quicken/')
add('QuantumRockOfAges', 'DreamchildNYC/quantum/')
add('ClownSamurai', 'qsamurai/clownsamurai/')

View file

@ -0,0 +1,32 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE
from ..scraper import make_scraper
from ..helpers import bounceStarter
def add(name, path):
baseUrl = 'http://www.wlpcomics.com/' + path
classname = 'WLP/' + name
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('/')[-1].split('.')[0]
globals()[classname] = make_scraper(classname,
starter = bounceStarter(baseUrl, compile(r'</a> <A HREF="(\w+.html)">Next Page</a>', IGNORECASE)),
stripUrl = baseUrl + '%s.html',
imageSearch = compile(r'SRC="(http://www.wlpcomics.com/adult/.+?|http://www.wlpcomics.com/general/.+?)"', IGNORECASE),
prevSearch = compile(r'</a> <A HREF="(\w+.html)">Previous Page</a>', IGNORECASE),
namer = namer,
help = 'Index format: nnn',
)
add('ChichiChan', 'adult/chichi/')
add('ChocolateMilkMaid', 'adult/cm/')
add('MaidAttack', 'general/maidattack/')
add('ShadowChasers', 'general/shadowchasers/')
add('Stellar', 'adult/stellar/')

View file

@ -6,26 +6,19 @@ from re import compile
from ..scraper import _BasicScraper
from ..helpers import bounceStarter
from ..util import tagre
class xkcd(_BasicScraper):
starter = bounceStarter('http://xkcd.com/', compile(r'<a rel="next" href="(/?\d+/?)"[^>]*>Next'))
stripUrl = 'http://xkcd.com/c%s.html'
imageSearch = compile(r'<img[^<]+src="(http://imgs.xkcd.com/comics/[^<>"]+)"')
prevSearch = compile(r'<a rel="prev" href="(/?\d+/?)"[^>]*>&lt; Prev')
baseUrl = 'http://xkcd.com/'
starter = bounceStarter(baseUrl, compile(tagre("a", "href", r'(/\d+/)', before="next")))
stripUrl = baseUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://imgs\.xkcd\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/\d+/)', before="prev"))
help = 'Index format: n (unpadded)'
@classmethod
def namer(cls, imageUrl, pageUrl):
index = int(pageUrl.rstrip('/').split('/')[-1])
name = imageUrl.split('/')[-1].split('.')[0]
return 'c%03d-%s' % (index, name)
class xkcdSpanish(_BasicScraper):
latestUrl = 'http://es.xkcd.com/xkcd-es/'
stripUrl = latestUrl + 'strips/%s/'
imageSearch = compile(r'src="(/site_media/strips/.+?)"')
prevSearch = compile(r'<a rel="prev" href="(http://es.xkcd.com/xkcd-es/strips/.+?)">Anterior</a>')
help = 'Index format: stripname'
index = int(pageUrl.rstrip('/').rsplit('/', 1)[-1])
name = imageUrl.rsplit('/', 1)[-1].split('.')[0]
return '%03d-%s' % (index, name)

View file

@ -2,30 +2,24 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, MULTILINE
from ..scraper import _BasicScraper
from ..util import tagre
class YAFGC(_BasicScraper):
latestUrl = 'http://yafgc.shipsinker.com/'
stripUrl = latestUrl + 'index.php?strip_id=%s'
imageSearch = compile(r'(istrip_.+?)"')
prevSearch = compile(r'(/.+?)">\r\n.+?prev.gif', MULTILINE)
latestUrl = 'http://yafgc.net/'
stripUrl = latestUrl + '?id=%s'
imageSearch = compile(tagre("img", "src", r'(http://yafgc\.net/img/comic/\d+\.jpg)'))
prevSearch = compile(tagre("a", "href", r'(http://yafgc\.net/\?id=\d+)') +
tagre("img", "src", r'/img/navbar/go_to_previous\.gif'))
help = 'Index format: n'
class YouSayItFirst(_BasicScraper):
latestUrl = 'http://www.yousayitfirst.com/'
stripUrl = 'http://www.soapylemon.com/comics/index.php?date=%s'
imageSearch = compile(r'(http://.+?comics/.+?.jpg)[^<]')
prevSearch = compile(r'(/comics/index.php\?date=.+?)".+?P')
stripUrl = latestUrl + 'comics/index.php?date=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.yousayitfirst\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.yousayitfirst\.com/comics/index\.php\?date=\d+)') + "Previous")
help = 'Index format: yyyymmdd'
class Yirmumah(_BasicScraper):
latestUrl = 'http://yirmumah.net/archives.php'
stripUrl = latestUrl + '?date=%s'
imageSearch = compile(r'<img src="(strips/\d{8}\..*?)"')
prevSearch = compile(r'<a href="(\?date=\d{8})">.*Previous')
help = 'Index format: yyyymmdd'

View file

@ -3,20 +3,24 @@
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import _BasicScraper
from ..util import tagre
from ..helpers import bounceStarter
class Zapiro(_BasicScraper):
latestUrl = 'http://www.mg.co.za/zapiro/all'
imageSearch = compile(r'<img src="(cartoons/[^"]+)"')
prevSearch = compile(r'<a href="([^"]+)">&gt;')
baseUrl = 'http://www.mg.co.za/zapiro/'
starter = bounceStarter(baseUrl,
compile(tagre("a", "href", r'(http://mg\.co\.za/cartoon/[^"]+)')+"Newer"))
stripUrl = 'http://mg.co.za/cartoon/%s'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://mg\.co\.za/cartoon/[^"]+)')+"Older")
help = 'Index format: yyyy-mm-dd-stripname'
class ZombieHunters(_BasicScraper):
latestUrl = 'http://www.thezombiehunters.com/'
stripUrl = latestUrl + 'index.php?strip_id=%s'
imageSearch = compile(r'"(.+?strips/.+?)"')
prevSearch = compile(r'</a><a href="(.+?)"><img id="prevcomic" ')
stripUrl = latestUrl + '?strip_id=%s'
imageSearch = compile(tagre("img", "src", r'(/istrip_files/strips/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "id", "prevcomic"))
help = 'Index format: n(unpadded)'