Fix some comics.

This commit is contained in:
Bastian Kleineidam 2012-11-26 07:13:32 +01:00
parent 7e91c83753
commit 4528894c05
19 changed files with 583 additions and 381 deletions

View file

@ -2,21 +2,21 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam # Copyright (C) 2012 Bastian Kleineidam
from ..scraper import _BasicScraper from re import compile
from ..util import tagre from ..scraper import make_scraper
from ..util import tagre, asciify
def creators(name, shortname): def add(name, shortname):
baseUrl = 'http://www.creators.com/comics/' baseUrl = 'http://www.creators.com/comics/'
return type('Creators_%s' % name, classname = 'Creators_%s' % asciify(name)
(_BasicScraper,), globals()[classname] = make_scraper(classname,
dict( name = 'Creators/' + name,
name='Creators/' + name, latestUrl = baseUrl + shortname + '.html',
latestUrl='%s%s.html' % (baseUrl, shortname), stripUrl = baseUrl + shortname + '/%s.html',
stripUrl='%s%s/%%s.html' % (baseUrl, shortname), imageSearch = compile(tagre("img", "src", r'(/comics/\d+/[^"]+)')),
imageSearch=compile(tagre("img", "src", r'(/comics/\d+/[^"]+)')), prevSearch = compile(tagre("a", "href", r'(/comics/%s/\d+\.html)' % shortname) +
prevSearch=compile(tagre("a", "href", r'(/comics/%s/\d+\.html)' % shortname) +
tagre("img", "src", r'/img_comics/arrow_l\.gif')), tagre("img", "src", r'/img_comics/arrow_l\.gif')),
help='Index format: n') help = 'Index format: n',
) )
@ -52,9 +52,9 @@ comics = {
'Momma': 'momma', 'Momma': 'momma',
'NestHeads': 'nest-heads', 'NestHeads': 'nest-heads',
'OneBigHappy': 'one-big-happy', 'OneBigHappy': 'one-big-happy',
'OnAClaireDay': 'on-a-clair-day', 'OnAClaireDay': 'on-a-claire-day',
'TheOtherCoast': 'other-coast', 'TheOtherCoast': 'the-other-coast',
'TheQuigmans': 'quigmans', 'TheQuigmans': 'the-quigmans',
'Rubes': 'rubes', 'Rubes': 'rubes',
'Rugrats': 'rugrats', 'Rugrats': 'rugrats',
'ScaryGary': 'scary-gary', 'ScaryGary': 'scary-gary',
@ -78,4 +78,4 @@ comics = {
} }
for name, shortname in comics.items(): for name, shortname in comics.items():
globals()[name] = creators(name, shortname) add(name, shortname)

View file

@ -142,3 +142,5 @@ class DresdenCodak(_BasicScraper):
prevSearch = compile(r'<a href="http://dresdencodak.com(/.*?)"><img src=http://dresdencodak.com/m_prev.png>') prevSearch = compile(r'<a href="http://dresdencodak.com(/.*?)"><img src=http://dresdencodak.com/m_prev.png>')
starter = indirectStarter('http://dresdencodak.com/', compile(r'<div id="preview"><a href="http://dresdencodak.com/(\d+/\d+/\d+/.*?)">')) starter = indirectStarter('http://dresdencodak.com/', compile(r'<div id="preview"><a href="http://dresdencodak.com/(\d+/\d+/\d+/.*?)">'))
# XXX dilbert.com

View file

@ -2,28 +2,27 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam # Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE from re import compile
from ..scraper import make_scraper
from ..scraper import _BasicScraper
from ..helpers import bounceStarter, queryNamer from ..helpers import bounceStarter, queryNamer
from ..util import tagre
def drunkDuck(shortName): def add(name):
linkSearch = r"<a href='(/[^/]*/index\.php\?p=\d+)' title='The %s page!'>" classname = 'DrunkDuck_%s' % name
return type('DrunkDuck_%s' % shortName, url = 'http://www.drunkduck.com/%s/' % name
(_BasicScraper,), linkSearch = tagre("a", "href", r"(/[^/]*/index\.php\?p=\d+)", quote="'", after="The %s page")
dict( globals()[classname] = make_scraper(classname,
name='DrunkDuck/' + shortName, name = 'DrunkDuck/' + name,
stripUrl='index.php?p=%s' % (shortName,), starter = bounceStarter(url, compile(linkSearch % 'next')),
imageSearch=compile(r"<img src='(http://[a-z0-9]*.drunkduck.com/[^/]*/pages/[^'/]+)'>", IGNORECASE), stripUrl = url + 'index.php?p=%s' % name,
prevSearch=compile(linkSearch % ('previous',), IGNORECASE), imageSearch = compile(tagre("img", "src", r"(http://[a-z0-9]*\.drunkduck\.com/[^/]*/pages/[^'/]+)", quote="'")),
help='Index format: n (unpadded)', prevSearch= compile(linkSearch % 'previous'),
namer=queryNamer('p', usePageUrl=True), help = 'Index format: n (unpadded)',
starter=bounceStarter('http://www.drunkduck.com/%s/' % (shortName,), compile(linkSearch % ('next',), IGNORECASE)) namer = queryNamer('p', usePageUrl=True),
)
) )
duckComics = [ comics = (
'0_Opposites_attract_0', '0_Opposites_attract_0',
'0_eight', '0_eight',
'101_Ways_to_Drive_a_Maren_Insane', '101_Ways_to_Drive_a_Maren_Insane',
@ -2275,7 +2274,7 @@ duckComics = [
'yay_ponys', 'yay_ponys',
'yoshi_freaks_real_life', 'yoshi_freaks_real_life',
'zuchini', 'zuchini',
] )
for shortName in duckComics: for name in comics:
globals()[shortName] = drunkDuck(shortName) add(name)

View file

@ -1,6 +1,8 @@
# -*- coding: iso-8859-1 -*- # -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam # Copyright (C) 2012 Bastian Kleineidam
from ..scraper import _BasicScraper
def fallenangel(name, shortname): def fallenangel(name, shortname):
pass # XXX pass # XXX

View file

@ -0,0 +1,309 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import make_scraper
from ..util import tagre, asciify
def add(name, repl=''):
baseUrl = 'http://www.gocomics.com/'
comicname = asciify(name)
shortname = name.lower().replace(' ', repl)
classname = 'GoComics_%s' % comicname
@classmethod
def namer(cls, imageUrl, pageUrl):
prefix, year, month, day = pageUrl.split('/', 3)
return "%s_%s%s%s.gif" % (shortname, year, month, day)
globals()[classname] = make_scraper(classname,
latestUrl=baseUrl + shortname,
name='GoComics/' + comicname,
stripUrl=baseUrl + shortname + '/%s',
imageSearch=compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[0-9a-f]+)')),
prevSearch=compile(tagre("a", "href", r'(/[^"]+/\d+/\d+/\d+)', after="prev")),
help='Index format: yyyy/mm/dd',
namer=namer,
)
# http://www.gocomics.com/features
# note that comics from creators.com are not repeated here
add('2 Cows and a Chicken')
add('9 Chickweed Lane')
add('9 to 5')
add('The Academia Waltz')
add('Adam at Home')
add('Agnes')
add('Alley Oop', repl='-')
add('Andertoons')
add('Andy Capp')
add('Angry Little Girls', repl='-')
add('Animal Crackers')
add('Annie')
add('The Argyle Sweater')
add('Arlo and Janis')
add('Ask Shagg')
add('BC')
add('Back in the Day')
add('Bad Reporter')
add('Baldo')
add('Ballard Street')
add('Banana Triangle', repl='-')
add('Barkeater Lake')
add('The Barn')
add('Barney and Clyde')
add('Basic Instructions')
add('Beardo')
add('Ben')
add('Berger and Wyse', repl='-')
add('Betty')
add('Bewley')
add('Biff and Riley', repl='-')
add('Big Nate')
add('The Big Picture')
add('Big Top')
add('Biographic')
add('Birdbrains')
add('Bliss')
add('Bloom County')
add('Bo Nanas')
add('Bob the Squirrel')
add('Boomerangs')
add('The Boondocks')
add('The Born Loser')
add('Bottomliners')
add('Bound and Gagged')
add('Break of Day')
add('Brevity')
add('Brewster Rockit')
add('Broom Hilda')
add('The Buckets')
add('Buni')
add('Cafe con Leche')
add('Calvin and Hobbes')
add('Candorville')
add('Cathy')
add('Cest la Vie')
add('Cheap Thrills Cuisine', repl='-')
add('Chuckle Bros')
add('Citizen Dog')
add('The City')
add('Cleats')
add('Close to Home')
add('Committed')
add('Compu-toon')
add('Cornered')
add('Cow and Boy')
add('CowTown')
add('Crumb')
add('Cul de Sac')
add('Daddys Home')
add('Dark Side of the Horse')
add('Deep Cover')
add('Diamond Lil')
add('Dick Tracy')
add('The Dinette Set')
add('Dixie Drive', repl='-')
add('Dog Eat Doug')
add('Dogs of C Kennel')
add('Domestic Abuse')
add('Doonesbury')
add('The Doozies')
add('Drabble')
add('DudeDude')
add('The Duplex')
add('Eek')
add('The Elderberries')
add('Endtown')
add('Eric the Circle', repl='-')
add('F Minus')
add('Family Tree')
add('Farcus')
add('Fat Cats', repl='-')
add('Flo and Friends')
add('The Flying McCoys')
add('Foolish Mortals', repl='-')
add('For Better or For Worse')
add('For Heavens Sake')
add('Fort Knox')
add('FoxTrot')
add('FoxTrot Classics')
add('Frank and Ernest')
add('Frazz')
add('Fred Basset')
add('Free Range')
add('Freshly Squeezed')
add('Frog Applause')
add('The Fusco Brothers')
add('Garfield')
add('Garfield Minus Garfield')
add('Gasoline Alley')
add('Geech')
add('Get a Life')
add('Get Fuzzy')
add('Gil Thorp')
add('Ginger Meggs')
add('Gor Dominical')
add('Graffiti')
add('Grand Avenue')
add('Gray Matters')
add('The Grizzwells')
add('Haiku Ewe')
add('Ham Shears')
add('Health Capsules')
add('Heart of the City')
add('Heathcliff')
add('Heavenly Nostrils')
add('Herb and Jamaal')
add('Herman')
add('Home and Away')
add('HUBRIS!')
add('The Humble Stumble')
add('Imagine This')
add('In the Bleachers')
add('In the Sticks')
add('Incidental Comics')
add('Ink Pen')
add('Inspector Dangers Crime Quiz')
add('Its All About You')
add('Janes World')
add('Jims Journal')
add('Joe Vanilla')
add('Jump Start')
add('The K Chronicles')
add('KidCity')
add('KidSpot')
add('Kit N Carlyle')
add('Kitchen Capers')
add('Kliban')
add('Klibans Cats')
add('The Knight Life')
add('La Cucaracha')
add('Last Kiss')
add('The LeftyBosco Picture Show')
add('Legend of Bill')
add('Liberty Meadows')
add('Lil Abner')
add('Lio')
add('Little Dog Lost')
add('Lola')
add('Loose Parts')
add('The Lost Bear')
add('Lost Side of Suburbia')
add('Love Is...')
add('Luann')
add('Lucky Cow')
add('Mac')
add('Magic in a Minute')
add('Maintaining')
add('Marias Day')
add('Marmaduke')
add('McArroni')
add('The Meaning of Lila')
add('Medium Large')
add('Meg Classics')
add('The Middletons')
add('Mike du Jour')
add('Minimum Security')
add('Moderately Confused')
add('Molly and the Bear')
add('Momma')
add('Monty')
add('Motley Classics')
add('Mr. Gigi and the Squid')
add('Mutt and Jeff')
add('My Cage')
add('MythTickle')
add('Nancy')
add('Nest Heads')
add('NEUROTICA')
add('New Adventures of Queen Victoria')
add('Non Sequitur')
add('The Norm Classics')
add('Nothing is Not Something')
add('Off the Mark')
add('Ollie and Quentin')
add('On A Claire Day')
add('One Big Happy')
add('Ordinary Bill')
add('The Other Coast')
add('Out of the Gene Pool Re-Runs')
add('Over the Hedge')
add('Overboard')
add('Oyster War')
add('PC and Pixel')
add('Peanuts')
add('Pearls Before Swine')
add('Pibgorn')
add('Pibgorn Sketches')
add('Pickles')
add('Pinkerton')
add('Pluggers')
add('Pooch Cafe')
add('PreTeena')
add('Prickly City')
add('Rabbits Against Magic')
add('Raising Duncan')
add('Real Life Adventures')
add('Reality Check')
add('Red and Rover')
add('Red Meat')
add('Reply All')
add('Rip Haywire')
add('Ripleys Believe It or Not')
add('Rose is Rose')
add('Rubes')
add('Rudy Park')
add('Savage Chickens')
add('Scary Gary')
add('Shirley and Son Classics')
add('Shoe')
add('Shoecabbage')
add('Shortcuts')
add('Skin Horse')
add('Skippy')
add('Slowpoke')
add('Soup to Nutz')
add('Speed Bump')
add('Spot the Frog')
add('Starslip')
add('Stone Soup')
add('Strange Brew')
add('The Sunshine Club')
add('Sylvia')
add('Tank McNamara')
add('Tarzan')
add('Ten Cats')
add('Tales of TerraTopia')
add('That is Priceless')
add('Thats Life')
add('Thatababy')
add('Thin Lines')
add('Tiny Sepuku')
add('TOBY')
add('Todays Dogg')
add('Tom the Dancing Bug')
add('Too Much Coffee Man')
add('Trivquiz')
add('Twaggies')
add('Uncle Arts Funland')
add('Unstrange Phenomena')
add('U.S. Acres')
add('Viivi and Wagner')
add('Watch Your Head')
add('Wee Pals')
add('Wizard of Id')
add('Working Daze')
add('Working It Out')
add('W.T. Duck')
add('Zack Hill')
add('Ziggy')
# http://www.gocomics.com/explore/editorial_list
# XXX
# http://www.gocomics.com/explore/sherpa_list
# XXX

View file

@ -3,31 +3,29 @@
# Copyright (C) 2012 Bastian Kleineidam # Copyright (C) 2012 Bastian Kleineidam
from re import compile from re import compile
from ..scraper import _BasicScraper from ..scraper import make_scraper
from ..util import tagre from ..util import tagre
def keenSpot(name, urls): def add(name, urls):
classname = 'KeenSpot_%s' % name
if not isinstance(urls, tuple): if not isinstance(urls, tuple):
baseUrl = latestUrl = urls baseUrl = latestUrl = urls
else: else:
baseUrl, latestUrl = urls baseUrl, latestUrl = urls
return type('KeenSpot_%s' % name, globals()[classname] = make_scraper(classname,
(_BasicScraper,), name='KeenSpot/' + name,
dict( latestUrl=latestUrl,
name='KeenSpot/' + name, stripUrl=baseUrl + 'd/%s.html',
latestUrl=latestUrl, imageSearch = compile(tagre("img", "src", r'([^"]*comics/[^"]+)')),
stripUrl=baseUrl + 'd/%s.html', prevSearch = compile(tagre("a", "href", r'"([^"]*d/\d{8}\.html)') +
imageSearch = compile(tagre("img", "src", r'([^"]*comics/[^"]+)')), '(?:<img[^>]+?(?:name="previous_day"|alt="Previous"|src="[^"]*back[^"]*")|Previous comic)'),
prevSearch = compile(tagre("a", "href", r'"([^"]*d/\d{8}\.html)') + help = 'Index format: yyyymmdd',
'(?:<img[^>]+?(?:name="previous_day"|alt="Previous"|src="[^"]*back[^"]*")|Previous comic)'),
help = 'Index format: yyyymmdd',
)
) )
keenspotComics = { comics = {
'13thLabour': 'http://the13labour.comicgenesis.com/', '13thLabour': 'http://the13labour.comicgenesis.com/',
'1StComing': 'http://toon.comicgenesis.com/', '1StComing': 'http://toon.comicgenesis.com/',
'1StGradeArt': 'http://art.comicgenesis.com/', '1StGradeArt': 'http://art.comicgenesis.com/',
@ -1520,7 +1518,15 @@ keenspotComics = {
'Zortic': 'http://www.zortic.com/', 'Zortic': 'http://www.zortic.com/',
'ZosKias': 'http://kojika.comicgenesis.com/', 'ZosKias': 'http://kojika.comicgenesis.com/',
'ZuraZura': 'http://zurazura.comicgenesis.com/', 'ZuraZura': 'http://zurazura.comicgenesis.com/',
} }
for name, urls in keenspotComics.items(): for name, urls in comics.items():
globals()[name] = keenSpot(name, urls) add(name, urls)
#class Yirmumah(_BasicScraper):
# #http://yirmumah.keenspot.com/
# stripUrl = latestUrl + '?date=%s'
# imageSearch = compile(r'<img src="(strips/\d{8}\..*?)"')
# prevSearch = compile(r'<a href="(\?date=\d{8})">.*Previous')
# help = 'Index format: yyyymmdd'

View file

@ -2,7 +2,7 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam # Copyright (C) 2012 Bastian Kleineidam
from re import compile, sub from re import compile
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..helpers import indirectStarter, _PHPScraper from ..helpers import indirectStarter, _PHPScraper
from ..util import tagre from ..util import tagre
@ -64,35 +64,6 @@ class Nukees(_BasicScraper):
def nuklearpower(name, shortname):
baseUrl = 'http://www.nuklearpower.com/'
latestUrl = "%s%s/" % (baseUrl, shortname)
classname = sub("[^0-9a-zA-Z_]", "", name)
globals()[classname] = type('NuklearPower_%s' % classname,
(_BasicScraper,),
dict(
name='NuklearPower/' + classname,
latestUrl = latestUrl,
stripUrl = latestUrl + '%s',
imageSearch = compile(tagre("img", "src", r'(http://www\.nuklearpower\.com/comics/[^"]+)')),
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "Previous"),
help = 'Index format: yyyy/mm/dd/name',
)
)
npstrips = {
'8BitTheater': '8-bit-theater',
'Warbot': 'warbot',
'HowIKilledYourMaster': 'hikym',
'AtomicRobo': 'atomic-robo',
}
for name, shortname in npstrips.items():
nuklearpower(name, shortname)
class NekoTheKitty(_PHPScraper): class NekoTheKitty(_PHPScraper):
basePath = 'http://www.nekothekitty.net/cusp/' basePath = 'http://www.nekothekitty.net/cusp/'
latestUrl = basePath latestUrl = basePath

View file

@ -0,0 +1,26 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import make_scraper
from ..util import tagre
def add(name, shortname):
baseUrl = 'http://www.nuklearpower.com/' + shortname + '/'
classname = 'NuklearPower_%s' % name
globals()[classname] = make_scraper(classname,
name='NuklearPower/' + name,
latestUrl = baseUrl,
stripUrl = baseUrl + '%s',
imageSearch = compile(tagre("img", "src", r'(http://www\.nuklearpower\.com/comics/[^"]+)')),
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "Previous"),
help = 'Index format: yyyy/mm/dd/name',
)
add('8BitTheater', '8-bit-theater')
add('Warbot', 'warbot')
add('HowIKilledYourMaster', 'hikym')
add('AtomicRobo', 'atomic-robo')

View file

@ -2,48 +2,39 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam # Copyright (C) 2012 Bastian Kleineidam
from re import compile from re import compile
from ..scraper import _BasicScraper from ..scraper import make_scraper
from ..helpers import bounceStarter from ..helpers import bounceStarter
from ..util import tagre from ..util import tagre
def smackJeeves(names): def add(name):
classname = 'SmackJeeves/' + name
# XXX mature content can be viewed directly with: # XXX mature content can be viewed directly with:
# http://www.smackjeeves.com/mature.php?ref=<percent-encoded-url> # http://www.smackjeeves.com/mature.php?ref=<percent-encoded-url>
class _SJScraper(_BasicScraper): baseUrl = 'http://%s.smackjeeves.com/comics/' % name
stripUrl = property(lambda self: self.baseUrl + self.shortName)
imageSearch = compile(tagre("img", "src", r'(http://www\.smackjeeves\.com/images/uploaded/comics/[^"]*)'))
prevSearch = compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="< Previous"')
help = 'Index format: nnnn (some increasing number)'
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
return pageUrl.split('/')[-2] return pageUrl.split('/')[-2]
globals()[classname] = make_scraper(classname,
starter=bounceStarter(baseUrl, compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="Next >"')),
stripUrl = baseUrl,
imageSearch = compile(tagre("img", "src", r'(http://www\.smackjeeves\.com/images/uploaded/comics/[^"]*)')),
prevSearch = compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="< Previous"'),
help = 'Index format: nnnn (some increasing number)',
namer = namer,
)
def makeScraper(shortName): add('20galaxies')
baseUrl = 'http://%s.smackjeeves.com/comics/' % shortName add('axe13')
return type('SmackJeeves_%s' % shortName, add('beartholomew')
(_SJScraper,), add('bliss')
dict( add('durian')
name='SmackJeeves/' + shortName, add('heard')
baseUrl=baseUrl, add('mpmcomic')
starter=bounceStarter(baseUrl, compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="Next >"')) add('nlmo-project')
) add('paranoidloyd')
) add('thatdreamagain')
return dict((name, makeScraper(name)) for name in names) add('wowcomics')
globals().update(smackJeeves([
'20galaxies',
'axe13',
'beartholomew',
'bliss',
'durian',
'heard',
'mpmcomic',
'nlmo-project',
'paranoidloyd',
'thatdreamagain',
'wowcomics',
]))

View file

@ -2,41 +2,34 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam # Copyright (C) 2012 Bastian Kleineidam
from ..scraper import _BasicScraper from re import compile
from ..scraper import make_scraper
def snafuComics(): def add(name, host):
class _SnafuComics(_BasicScraper): baseUrl = 'http://%s.snafu-comics.com/' % host
imageSearch = compile(r'<img src=http://\w+\.snafu-comics\.com/(comics/\d{6}_\w*\.\w{3,4})') classname = 'SnafuComics_%s' % name
prevSearch = compile(r'<a href="(\?comic_id=\d+)">Previous</a>')
help = 'Index format: n (unpadded)'
@property globals()[classname] = make_scraper(classname,
def stripUrl(self): latestUrl = baseUrl,
return self.latestUrl + 'index.php?strip_id=%s' stripUrl = baseUrl + 'index.php?strip_id=%s',
imageSearch = compile(r'<img src=http://\w+\.snafu-comics\.com/(comics/\d{6}_\w*\.\w{3,4})'),
prevSearch = compile(r'<a href="(\?comic_id=\d+)">Previous</a>'),
help = 'Index format: n (unpadded)',
)
comics = {
'Grim': 'grim',
'KOF': 'kof',
'PowerPuffGirls': 'ppg',
'Snafu': 'www',
'Tin': 'tin',
'TW': 'tw',
'Sugar': 'sugar',
'SF': 'sf',
'Titan': 'titan',
'EA': 'ea',
'Zim': 'zim',
'Soul': 'soul',
'FT': 'ft',
'Bunnywith': 'bunnywith',
'Braindead': 'braindead',
}
url = 'http://%s.snafu-comics.com/' add('Grim', 'grim')
return dict((name, type('SnafuComics_%s' % name, add('KOF', 'kof')
(_SnafuComics,), add('PowerPuffGirls', 'ppg')
dict(name='SnafuComics/' + name, add('Snafu', 'www')
latestUrl=url % host))) add('Tin', 'tin')
for name, host in comics.items()) add('TW', 'tw')
add('Sugar', 'sugar')
globals().update(snafuComics()) add('SF', 'sf')
add('Titan', 'titan')
add('EA', 'ea')
add('Zim', 'zim')
add('Soul', 'soul')
add('FT', 'ft')
add('Bunnywith', 'bunnywith')
add('Braindead', 'braindead')

View file

@ -2,29 +2,13 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam # Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE from re import compile
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..helpers import bounceStarter, indirectStarter from ..helpers import bounceStarter, indirectStarter
from ..util import getQueryParams, tagre from ..util import getQueryParams, tagre
class UglyHill(_BasicScraper):
latestUrl = 'http://www.uglyhill.com/'
stripUrl = latestUrl + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
prevSearch = compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
help = 'Index format: yyyymmdd'
class UnderPower(_BasicScraper):
latestUrl = 'http://underpower.non-essential.com/'
stripUrl = latestUrl + 'index.php?comic=%s'
imageSearch = compile(r'<img src="(comics/\d{8}\..+?)"')
prevSearch = compile(r'<a href="(/index.php\?comic=\d{8})"><img src="images/previous-comic\.gif"')
help = 'Index format: yyyymmdd'
class Undertow(_BasicScraper): class Undertow(_BasicScraper):
stripUrl = 'http://undertow.dreamshards.org/%s' stripUrl = 'http://undertow.dreamshards.org/%s'
imageSearch = compile(r'<img src="(.+?)"') imageSearch = compile(r'<img src="(.+?)"')
@ -52,24 +36,3 @@ class UserFriendly(_BasicScraper):
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
return 'uf%s' % (getQueryParams(pageUrl)['id'][0][2:],) return 'uf%s' % (getQueryParams(pageUrl)['id'][0][2:],)
class UndeadFriend(_BasicScraper):
latestUrl = 'http://www.undeadfriend.com/'
stripUrl = latestUrl + 'd/%s.html'
imageSearch = compile(r'src="(http://www\.undeadfriend\.com/comics/.+?)"', IGNORECASE)
prevSearch = compile(r'<a.+?href="(http://www\.undeadfriend\.com/d/\d+?\.html)"><img border="0" name="previous_day" alt="Previous comic" src="http://www\.undeadfriend\.com/images/previous_day\.jpg', IGNORECASE)
help = 'Index format: yyyymmdd'
class UnspeakableVault(_BasicScraper):
stripUrl = 'http://www.macguff.fr/goomi/unspeakable/WEBIMAGES/CARTOON/vault%s.html'
imageSearch = compile(r'(WEBIMAGES/CARTOON/.+?)"')
prevSearch = compile(r'PREVIOUS.+?" href="(.+?)"')
help = 'Index format: nn or nnn'
starter = indirectStarter('http://www.macguff.fr/goomi/unspeakable/home.html',
compile(r'http://www.macguff.fr/goomi/unspeakable/(.+?)"'))
@classmethod
def namer(cls, imageUrl, imageSearch):
return '%s-%s' % (imageSearch.split('/')[-1].split('.')[0],imageUrl.split('/')[-1].split('.')[0])

View file

@ -3,27 +3,19 @@
# Copyright (C) 2012 Bastian Kleineidam # Copyright (C) 2012 Bastian Kleineidam
from re import compile, sub from re import compile, sub
from ..scraper import make_scraper
from ..scraper import _BasicScraper
from ..util import fetchUrl, tagre from ..util import fetchUrl, tagre
class _UClickScraper(_BasicScraper): def add(name, shortName):
homepage = 'http://content.uclick.com/a2z.html' homepage = 'http://content.uclick.com/a2z.html'
baseUrl = 'http://www.uclick.com/client/zzz/%s/' baseUrl = 'http://www.uclick.com/client/zzz/%s/'
stripUrl = property(lambda self: self.latestUrl + '%s/') latestUrl = baseUrl % shortName
imageSearch = compile(tagre("img", "src", r'(http://synd\.imgsrv\.uclick\.com/comics/\w+/\d{4}/[^"]+\.gif)')) classname = 'UClick_%s' % name
prevSearch = compile(tagre("a", "href", r'(/client/zzz/\w+/\d{4}/\d{2}/\d{2}/)') + 'Previous date')
help = 'Index format: yyyy/mm/dd'
@classmethod
def starter(cls):
return cls.baseUrl % (cls.shortName,)
@classmethod @classmethod
def fetchSubmodules(cls): def fetchSubmodules(cls):
exclusions = ('index',) exclusions = ('index',)
# XXX refactor this mess # XXX refactor this mess
submoduleSearch = compile(tagre("a", "href", r'(http://content\.uclick\.com/content/\w+\.html)')) submoduleSearch = compile(tagre("a", "href", r'(http://content\.uclick\.com/content/\w+\.html)'))
partsMatch = compile(tagre("a", "href", r'http://content\.uclick\.com/content/(\w+?)\.html')) partsMatch = compile(tagre("a", "href", r'http://content\.uclick\.com/content/(\w+?)\.html'))
@ -43,11 +35,15 @@ class _UClickScraper(_BasicScraper):
return [normalizeName(name) for part, name in possibles if part not in exclusions and fetchSubmodule(part)] return [normalizeName(name) for part, name in possibles if part not in exclusions and fetchSubmodule(part)]
globals()[classname] = make_scraper(classname,
name='UClick/' + name,
latestUrl = latestUrl,
stripUrl = latestUrl + '%s/',
imageSearch = compile(tagre("img", "src", r'(http://synd\.imgsrv\.uclick\.com/comics/\w+/\d{4}/[^"]+\.gif)')),
prevSearch = compile(tagre("a", "href", r'(/client/zzz/\w+/\d{4}/\d{2}/\d{2}/)') + 'Previous date'),
help = 'Index format: yyyy/mm/dd',
)
def uclick(name, shortName):
return type('UClick_%s' % name,
(_UClickScraper,),
dict(name='UClick/' + name, shortName=shortName))
comics = { comics = {
'5thWave': 'fw', '5thWave': 'fw',
@ -278,6 +274,7 @@ comics = {
'ZackHill': 'crzhi', 'ZackHill': 'crzhi',
'ZiggySpanish': 'spzi', 'ZiggySpanish': 'spzi',
'Ziggy': 'zi', 'Ziggy': 'zi',
} }
globals().update(dict((item[0], uclick(*item)) for item in comics.items())) for name, shortname in comics.items():
add(name, shortname)

View file

@ -2,11 +2,13 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam # Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE, MULTILINE from re import compile
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..util import tagre
# XXX make dynamic
class _VGCats(_BasicScraper): class _VGCats(_BasicScraper):
latestUrl = 'http://www.vgcats.com/comics/' latestUrl = 'http://www.vgcats.com/comics/'
imageSearch = compile(r'<img src="(images/\d{6}\..+?)"') imageSearch = compile(r'<img src="(images/\d{6}\..+?)"')
@ -31,7 +33,6 @@ class Adventure(_VGCats):
class ViiviJaWagner(_BasicScraper): class ViiviJaWagner(_BasicScraper):
latestUrl = 'http://www.hs.fi/viivijawagner/' latestUrl = 'http://www.hs.fi/viivijawagner/'
imageSearch = compile(r'<img id="strip\d+"\s+src="([^"]+)"', IGNORECASE) imageSearch = compile(tagre("link", "href", r'(http://hs12\.snstatic\.fi/webkuva/oletus/[^"]+)', before="image_src"))
prevSearch = compile(r'<a href="(.+?)"[^>]+?>\nEdellinen&nbsp;\n<img src="http://www.hs.fi/static/hs/img/viivitaakse.gif"', MULTILINE | IGNORECASE) prevSearch = compile(tagre("a", "href", r'(/viivijawagner/\d+)', before="prev-cm"))
# XXX ? help = 'Index format: none'
help = 'Index format: shrugs!'

View file

@ -2,15 +2,15 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam # Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE, DOTALL from re import compile, IGNORECASE
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..helpers import queryNamer, bounceStarter from ..util import tagre
class WayfarersMoon(_BasicScraper): class WayfarersMoon(_BasicScraper):
latestUrl = 'http://www.wayfarersmoon.com/' latestUrl = 'http://www.wayfarersmoon.com/'
stripUrl = latestUrl + 'index.php\?page=%s' stripUrl = latestUrl + 'index.php?page=%s'
imageSearch = compile(r'<img src="(/admin.+?)"') imageSearch = compile(r'<img src="(/admin.+?)"')
prevSearch = compile(r'<a href="(.+?)".+?btn_back.gif') prevSearch = compile(r'<a href="(.+?)".+?btn_back.gif')
help = 'Index format: nn' help = 'Index format: nn'
@ -32,7 +32,6 @@ class WhiteNoise(_BasicScraper):
help = 'Index format: n' help = 'Index format: n'
class WhyTheLongFace(_BasicScraper): class WhyTheLongFace(_BasicScraper):
latestUrl = 'http://www.absurdnotions.org/wtlf200709.html' latestUrl = 'http://www.absurdnotions.org/wtlf200709.html'
stripUrl = 'http://www.absurdnotions.org/wtlf%s.html' stripUrl = 'http://www.absurdnotions.org/wtlf%s.html'
@ -41,23 +40,12 @@ class WhyTheLongFace(_BasicScraper):
help = 'Index format: yyyymm' help = 'Index format: yyyymm'
class Wigu(_BasicScraper): class Wigu(_BasicScraper):
latestUrl = 'http://www.wigu.com/wigu/' latestUrl = 'http://wigucomics.com/'
stripUrl = latestUrl + '?date=%s' stripUrl = latestUrl + 'adventures/index.php?comic=%s'
imageSearch = compile(r'<img src="(strips/\d{8}\..+?)" alt=""') imageSearch = compile(tagre("img", "src", r'(/adventures/comics/[^"]+)'))
prevSearch = compile(r'<a href="(.+?)"[^>]+?>< PREV COMIC</a> ') prevSearch = compile(tagre("a", "href", r'(/adventures/index\.php\?comic=\d+)', after="go back"))
help = 'Index format: yyyymmdd' help = 'Index format: n'
class WiguTV(_BasicScraper):
latestUrl = 'http://jjrowland.com/'
stripUrl = latestUrl + 'archive/%s.html'
imageSearch = compile(r'"(/comics/.+?)"')
prevSearch = compile(r'<a href="(/archive/.+?)"[^>]+?>&nbsp;')
help = 'Index format: yyyymmdd'
class WotNow(_BasicScraper): class WotNow(_BasicScraper):
@ -68,7 +56,6 @@ class WotNow(_BasicScraper):
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
class WorldOfWarcraftEh(_BasicScraper): class WorldOfWarcraftEh(_BasicScraper):
latestUrl = 'http://woweh.com/' latestUrl = 'http://woweh.com/'
stripUrl = None stripUrl = None
@ -77,46 +64,11 @@ class WorldOfWarcraftEh(_BasicScraper):
class Wulffmorgenthaler(_BasicScraper): class Wulffmorgenthaler(_BasicScraper):
latestUrl = 'http://www.wulffmorgenthaler.com/' latestUrl = 'http://wumocomicstrip.com/'
stripUrl = latestUrl + 'Default.aspx?id=%s' stripUrl = latestUrl + '%s/'
imageSearch = compile(r'img id="ctl00_content_Strip1_imgStrip".+?class="strip" src="(striphandler\.ashx\?stripid=[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"') imageSearch = compile(tagre("img", "src", r'(/img/strip/thumb/[^"]+)'))
prevSearch = compile(r'<a href="(/default\.aspx\?id=[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})" id="ctl00_content_Strip1_aPrev">') prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<span>Previous")
help = 'Index format: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx (GUID)' help = 'Index format: yyyy/mm/dd'
namer = queryNamer('stripid')
def webcomicsNation():
class _WebcomicsNation(_BasicScraper):
imageSearch = compile(r'<a name="strip\d*?">.*?<img[^>]+?src="([^"]*?memberimages/.+?)"', IGNORECASE + DOTALL)
prevSearch = compile(r'href="([^"]*?whichbutton=prev[^"]*?)"', IGNORECASE)
help = 'Index format: nnnn (non-contiguous)'
@property
def stripUrl(self):
return self.baseUrl + '?view=archive&amp;chapter=%s'
comics = {
'AgnesQuill': 'daveroman/agnes/',
'Elvenbaath': 'tdotodot2k/elvenbaath/',
'IrrationalFears': 'uvernon/irrationalfears/',
'KismetHuntersMoon': 'laylalawlor/huntersmoon/',
'SaikoAndLavender': 'gc/saiko/',
'MyMuse': 'gc/muse/',
'NekkoAndJoruba': 'nekkoandjoruba/nekkoandjoruba/',
'JaxEpoch': 'johngreen/quicken/',
'QuantumRockOfAges': 'DreamchildNYC/quantum/',
'ClownSamurai' : 'qsamurai/clownsamurai/',
}
return dict((name, type('WebcomicsNation_%s' % name,
(_WebcomicsNation,),
dict(name='WebcomicsNation/' + name,
latestUrl='http://www.webcomicsnation.com/' + subpath)))
for name, subpath in comics.items())
globals().update(webcomicsNation())
class WhiteNoise(_BasicScraper): class WhiteNoise(_BasicScraper):
@ -127,7 +79,6 @@ class WhiteNoise(_BasicScraper):
help = 'Index format: n' help = 'Index format: n'
class WapsiSquare(_BasicScraper): class WapsiSquare(_BasicScraper):
latestUrl = 'http://wapsisquare.com/' latestUrl = 'http://wapsisquare.com/'
stripUrl = latestUrl + 'comic/%s' stripUrl = latestUrl + 'comic/%s'
@ -136,77 +87,14 @@ class WapsiSquare(_BasicScraper):
help = 'Index format: strip-name' help = 'Index format: strip-name'
class WrongWay(_BasicScraper):
latestUrl = 'http://www.wrongwaycomics.com/'
stripUrl = latestUrl + '%s.html'
imageSearch = compile(r'<img src="(comics/.+?)"')
prevSearch = compile(r' <a class="comicNav" href="(.+?)" onmouseover="previousLinkIn\(\)"')
help = 'Index format: nnn'
class WeCanSleepTomorrow(_BasicScraper): class WeCanSleepTomorrow(_BasicScraper):
latestUrl = 'http://wecansleeptomorrow.com/' latestUrl = 'http://wecansleeptomorrow.com/'
imageSearch = compile(r'<img src="(http://wecansleeptomorrow.com/comics/.+?)"') stripUrl = latestUrl + '%s/'
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">') imageSearch = compile(tagre("img", "src", r'(http://wecansleeptomorrow\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://wecansleeptomorrow\.com/[^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/stripname' help = 'Index format: yyyy/mm/dd/stripname'
class _WLP(_BasicScraper):
imageSearch=compile(r'SRC="(http://www.wlpcomics.com/adult/.+?|http://www.wlpcomics.com/general/.+?)"', IGNORECASE)
prevSearch=compile(r'</a> <A HREF="(\w+.html)">Previous Page</a>', IGNORECASE)
help='Index format: nnn'
@property
def baseUrl(self):
return 'http://www.wlpcomics.com/%s' % (self.path,)
@property
def stripUrl(self):
return self.baseUrl + '%s.html'
def namer(self, imageUrl, pageUrl):
return pageUrl.split('/')[-1].split('.')[0]
def starter(self):
# XXX: ergh
meth = bounceStarter(self.baseUrl, compile(r'</a> <A HREF="(\w+.html)">Next Page</a>', IGNORECASE))
return meth.__get__(self, type(self))()
class ChichiChan(_WLP):
name = 'WLP/ChichiChan'
path = 'adult/chichi/'
class ChocolateMilkMaid(_WLP):
name = 'WLP/ChocolateMilkMaid'
path = 'adult/cm/'
class MaidAttack(_WLP):
name = 'WLP/MaidAttack'
path = 'general/maidattack/'
class ShadowChasers(_WLP):
name = 'WLP/ShadowChasers'
path = 'general/shadowchasers/'
class Stellar(_WLP):
name = 'WLP/Stellar'
path = 'adult/stellar/'
class Wondermark(_BasicScraper): class Wondermark(_BasicScraper):
latestUrl = 'http://wondermark.com/' latestUrl = 'http://wondermark.com/'
stripUrl = latestUrl + '%s/' stripUrl = latestUrl + '%s/'

View file

@ -0,0 +1,31 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE, DOTALL
from ..scraper import make_scraper
def add(name, subpath):
baseUrl = 'http://www.webcomicsnation.com/'
classname = 'WebcomicsNation_%s' % name
globals()[classname] = make_scraper(classname,
name = 'WebcomicsNation/' + name,
latestUrl = baseUrl + subpath,
stripUrl = baseUrl + '?view=archive&amp;chapter=%s',
imageSearch = compile(r'<a name="strip\d*?">.*?<img[^>]+?src="([^"]*?memberimages/.+?)"', IGNORECASE + DOTALL),
prevSearch = compile(r'href="([^"]*?whichbutton=prev[^"]*?)"', IGNORECASE),
help = 'Index format: nnnn (non-contiguous)',
)
add('AgnesQuill', 'daveroman/agnes/')
add('Elvenbaath', 'tdotodot2k/elvenbaath/')
add('IrrationalFears', 'uvernon/irrationalfears/')
add('KismetHuntersMoon', 'laylalawlor/huntersmoon/')
add('SaikoAndLavender', 'gc/saiko/')
add('MyMuse', 'gc/muse/')
add('NekkoAndJoruba', 'nekkoandjoruba/nekkoandjoruba/')
add('JaxEpoch', 'johngreen/quicken/')
add('QuantumRockOfAges', 'DreamchildNYC/quantum/')
add('ClownSamurai', 'qsamurai/clownsamurai/')

View file

@ -0,0 +1,32 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE
from ..scraper import make_scraper
from ..helpers import bounceStarter
def add(name, path):
baseUrl = 'http://www.wlpcomics.com/' + path
classname = 'WLP/' + name
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('/')[-1].split('.')[0]
globals()[classname] = make_scraper(classname,
starter = bounceStarter(baseUrl, compile(r'</a> <A HREF="(\w+.html)">Next Page</a>', IGNORECASE)),
stripUrl = baseUrl + '%s.html',
imageSearch = compile(r'SRC="(http://www.wlpcomics.com/adult/.+?|http://www.wlpcomics.com/general/.+?)"', IGNORECASE),
prevSearch = compile(r'</a> <A HREF="(\w+.html)">Previous Page</a>', IGNORECASE),
namer = namer,
help = 'Index format: nnn',
)
add('ChichiChan', 'adult/chichi/')
add('ChocolateMilkMaid', 'adult/cm/')
add('MaidAttack', 'general/maidattack/')
add('ShadowChasers', 'general/shadowchasers/')
add('Stellar', 'adult/stellar/')

View file

@ -6,26 +6,19 @@ from re import compile
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..helpers import bounceStarter from ..helpers import bounceStarter
from ..util import tagre
class xkcd(_BasicScraper): class xkcd(_BasicScraper):
starter = bounceStarter('http://xkcd.com/', compile(r'<a rel="next" href="(/?\d+/?)"[^>]*>Next')) baseUrl = 'http://xkcd.com/'
stripUrl = 'http://xkcd.com/c%s.html' starter = bounceStarter(baseUrl, compile(tagre("a", "href", r'(/\d+/)', before="next")))
imageSearch = compile(r'<img[^<]+src="(http://imgs.xkcd.com/comics/[^<>"]+)"') stripUrl = baseUrl + '%s/'
prevSearch = compile(r'<a rel="prev" href="(/?\d+/?)"[^>]*>&lt; Prev') imageSearch = compile(tagre("img", "src", r'(http://imgs\.xkcd\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/\d+/)', before="prev"))
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@classmethod @classmethod
def namer(cls, imageUrl, pageUrl): def namer(cls, imageUrl, pageUrl):
index = int(pageUrl.rstrip('/').split('/')[-1]) index = int(pageUrl.rstrip('/').rsplit('/', 1)[-1])
name = imageUrl.split('/')[-1].split('.')[0] name = imageUrl.rsplit('/', 1)[-1].split('.')[0]
return 'c%03d-%s' % (index, name) return '%03d-%s' % (index, name)
class xkcdSpanish(_BasicScraper):
latestUrl = 'http://es.xkcd.com/xkcd-es/'
stripUrl = latestUrl + 'strips/%s/'
imageSearch = compile(r'src="(/site_media/strips/.+?)"')
prevSearch = compile(r'<a rel="prev" href="(http://es.xkcd.com/xkcd-es/strips/.+?)">Anterior</a>')
help = 'Index format: stripname'

View file

@ -2,30 +2,24 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam # Copyright (C) 2012 Bastian Kleineidam
from re import compile, MULTILINE
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..util import tagre
class YAFGC(_BasicScraper): class YAFGC(_BasicScraper):
latestUrl = 'http://yafgc.shipsinker.com/' latestUrl = 'http://yafgc.net/'
stripUrl = latestUrl + 'index.php?strip_id=%s' stripUrl = latestUrl + '?id=%s'
imageSearch = compile(r'(istrip_.+?)"') imageSearch = compile(tagre("img", "src", r'(http://yafgc\.net/img/comic/\d+\.jpg)'))
prevSearch = compile(r'(/.+?)">\r\n.+?prev.gif', MULTILINE) prevSearch = compile(tagre("a", "href", r'(http://yafgc\.net/\?id=\d+)') +
tagre("img", "src", r'/img/navbar/go_to_previous\.gif'))
help = 'Index format: n' help = 'Index format: n'
class YouSayItFirst(_BasicScraper): class YouSayItFirst(_BasicScraper):
latestUrl = 'http://www.yousayitfirst.com/' latestUrl = 'http://www.yousayitfirst.com/'
stripUrl = 'http://www.soapylemon.com/comics/index.php?date=%s' stripUrl = latestUrl + 'comics/index.php?date=%s'
imageSearch = compile(r'(http://.+?comics/.+?.jpg)[^<]') imageSearch = compile(tagre("img", "src", r'(http://www\.yousayitfirst\.com/comics/[^"]+)'))
prevSearch = compile(r'(/comics/index.php\?date=.+?)".+?P') prevSearch = compile(tagre("a", "href", r'(http://www\.yousayitfirst\.com/comics/index\.php\?date=\d+)') + "Previous")
help = 'Index format: yyyymmdd' help = 'Index format: yyyymmdd'
class Yirmumah(_BasicScraper):
latestUrl = 'http://yirmumah.net/archives.php'
stripUrl = latestUrl + '?date=%s'
imageSearch = compile(r'<img src="(strips/\d{8}\..*?)"')
prevSearch = compile(r'<a href="(\?date=\d{8})">.*Previous')
help = 'Index format: yyyymmdd'

View file

@ -3,20 +3,24 @@
# Copyright (C) 2012 Bastian Kleineidam # Copyright (C) 2012 Bastian Kleineidam
from re import compile from re import compile
from ..scraper import _BasicScraper from ..scraper import _BasicScraper
from ..util import tagre
from ..helpers import bounceStarter
class Zapiro(_BasicScraper): class Zapiro(_BasicScraper):
latestUrl = 'http://www.mg.co.za/zapiro/all' baseUrl = 'http://www.mg.co.za/zapiro/'
imageSearch = compile(r'<img src="(cartoons/[^"]+)"') starter = bounceStarter(baseUrl,
prevSearch = compile(r'<a href="([^"]+)">&gt;') compile(tagre("a", "href", r'(http://mg\.co\.za/cartoon/[^"]+)')+"Newer"))
stripUrl = 'http://mg.co.za/cartoon/%s'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.mg\.co\.za/crop/content/cartoons/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://mg\.co\.za/cartoon/[^"]+)')+"Older")
help = 'Index format: yyyy-mm-dd-stripname'
class ZombieHunters(_BasicScraper): class ZombieHunters(_BasicScraper):
latestUrl = 'http://www.thezombiehunters.com/' latestUrl = 'http://www.thezombiehunters.com/'
stripUrl = latestUrl + 'index.php?strip_id=%s' stripUrl = latestUrl + '?strip_id=%s'
imageSearch = compile(r'"(.+?strips/.+?)"') imageSearch = compile(tagre("img", "src", r'(/istrip_files/strips/[^"]+)'))
prevSearch = compile(r'</a><a href="(.+?)"><img id="prevcomic" ') prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "id", "prevcomic"))
help = 'Index format: n(unpadded)' help = 'Index format: n(unpadded)'