Fix some comics.

This commit is contained in:
Bastian Kleineidam 2012-11-21 21:57:26 +01:00
parent 54eaadf4fc
commit 958a788550
40 changed files with 823 additions and 1245 deletions

View file

@ -1,5 +1,7 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
import urllib2
import os
import locale

View file

@ -1,3 +1,4 @@
# Copyright (C) 2012 Bastian Kleineidam
"""
Define basic configuration data like version or application name.
"""

View file

@ -25,21 +25,16 @@ def regexNamer(regex):
return _namer
def constStarter(latestUrl):
"""Start from constant URL."""
@staticmethod
def _starter():
return latestUrl
return _starter
def bounceStarter(latestUrl, nextSearch):
"""Get start URL by "bouncing" back and forth one time."""
@classmethod
def _starter(cls):
url = fetchUrl(latestUrl, cls.prevSearch)
if url:
url = fetchUrl(url, nextSearch)
if not url:
raise ValueError("could not find prevSearch pattern %r in %s" % (cls.prevSearch.pattern, latestUrl))
url = fetchUrl(url, nextSearch)
if not url:
raise ValueError("could not find nextSearch pattern %r in %s" % (nextSearch.pattern, latestUrl))
return url
return _starter
@ -48,7 +43,10 @@ def indirectStarter(baseUrl, latestSearch):
"""Get start URL by indirection."""
@staticmethod
def _starter():
return fetchUrl(baseUrl, latestSearch)
url = fetchUrl(baseUrl, latestSearch)
if not url:
raise ValueError("could not find latestSearch pattern %r in %s" % (latestSearch.pattern, baseUrl))
return url
return _starter

View file

@ -1,5 +1,7 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, MULTILINE
from ..util import tagre
from ..scraper import _BasicScraper

View file

@ -1,5 +1,7 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..util import tagre

View file

@ -1,51 +1,28 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import _BasicScraper
from ..helpers import constStarter, bounceStarter
from ..helpers import bounceStarter, indirectStarter
from ..util import tagre, getQueryParams
class CalvinAndHobbes(_BasicScraper):
starter = bounceStarter('http://www.gocomics.com/calvinandhobbes/',
compile(tagre("a", "href", "(/calvinandhobbes/\d+/\d+/\d+)")+"Next feature</a>"))
stripUrl = 'http://www.gocomics.com/calvinandhobbes/%s'
imageSearch = compile(tagre("img", "src", "(http://assets\.amuniversal\.com/[a-f0-9]+)"))
prevSearch = compile(tagre("a", "href", "(/calvinandhobbes/\d+/\d+/\d+)")+"Previous feature</a>")
help = 'Index format: yyyy/mm/dd'
@classmethod
def namer(cls, imageUrl, pageUrl):
prefix, year, month, day = pageUrl.rsplit('/', 3)
return "%s%s%s.gif" % (year, month, day)
class CandyCartoon(_BasicScraper):
latestUrl = 'http://www.candycartoon.com/'
stripUrl = latestUrl + 'archives/%s.html'
imageSearch = compile(r'<img alt="[^"]*" src="(http://www\.candycartoon\.com/archives/[^"]+)"')
prevSearch = compile(r'<a href="(http://www\.candycartoon\.com/archives/\d{6}\.html)">prev')
help = 'Index format: nnnnnn'
class CaptainSNES(_BasicScraper):
latestUrl = 'http://captainsnes.com/'
stripUrl = latestUrl + '?date=%s'
imageSearch = compile(r'<img src=\'(http://www.captainsnes.com/comics/.+?)\'')
prevSearch = compile(r'<a href="http://www.captainsnes.com/(.+?)"><span class="prev">')
help = 'Index format: yyyymmdd'
latestUrl = 'http://www.captainsnes.com/'
stripUrl = latestUrl + '%s/'
imageSearch = compile(r"<img src='(http://www\.captainsnes\.com/comics/[^']+)'")
prevSearch = compile(r'<a href="(http://www\.captainsnes\.com/[^"]+)"><span class="prev">')
help = 'Index format: yyyy/mm/dd/nnn-stripname'
class CaribbeanBlue(_BasicScraper):
latestUrl = 'http://cblue.katbox.net/'
stripUrl = latestUrl + 'index.php?strip_id=%s'
imageSearch = compile(r'="(.+?strips/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><img src="images/navigation_back.png"')
help = 'Index format: n (unpadded)'
stripUrl = latestUrl + 'archive/%s'
imageSearch = compile(tagre("img", "src", r'(http://cblue\.katbox\.net/wp-content/webcomic/cblue/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://cblue\.katbox\.net/archive/[^"]+)', after="previous"))
help = 'Index format: nnn-stripname'
class Catena(_BasicScraper):
@ -56,15 +33,6 @@ class Catena(_BasicScraper):
help = 'Index format: yyyy/mm/dd/<name>'
class Catharsis(_BasicScraper):
latestUrl = 'http://catharsiscomic.com/'
stripUrl = latestUrl + 'archive.php?strip=%s'
imageSearch = compile(r'<img src="(strips/.+?)"')
prevSearch = compile(r'<a href="(.+?)".+"Previous')
help = 'Index format: yymmdd-<your guess>.html'
class ChasingTheSunset(_BasicScraper):
latestUrl = 'http://www.fantasycomic.com/'
stripUrl = latestUrl + 'index.php?p=c%s'
@ -89,7 +57,6 @@ class Chisuji(_BasicScraper):
help = 'Index format: yyyy/mm/dd/strip-name'
class ChugworthAcademy(_BasicScraper):
latestUrl = 'http://chugworth.com/'
stripUrl = latestUrl + '?p=%s'
@ -98,7 +65,6 @@ class ChugworthAcademy(_BasicScraper):
help = 'Index format: n (unpadded)'
class ChugworthAcademyArchive(_BasicScraper):
latestUrl = 'http://chugworth.com/archive/?strip_id=422'
stripUrl = 'http://chugworth.com/archive/?strip_id=%s'
@ -107,7 +73,6 @@ class ChugworthAcademyArchive(_BasicScraper):
help = 'Index format: nnn'
class CigarroAndCerveja(_BasicScraper):
latestUrl = 'http://www.cigarro.ca/'
stripUrl = latestUrl + '?p=%s'
@ -116,15 +81,6 @@ class CigarroAndCerveja(_BasicScraper):
help = 'Index format: non'
# XXX move
class TinyKittenTeeth(_BasicScraper):
latestUrl = 'http://www.tinykittenteeth.com/'
stripUrl = latestUrl + 'index.php?current=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.tinykittenteeth\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))
help = 'Index format: n (unpadded)'
class Comedity(_BasicScraper):
latestUrl = 'http://www.comedity.com/'
stripUrl = latestUrl + 'index.php?strip_id=%s'
@ -135,31 +91,28 @@ class Comedity(_BasicScraper):
class Commissioned(_BasicScraper):
latestUrl = 'http://www.commissionedcomic.com/'
stripUrl = latestUrl + 'index.php?strip=%s'
imageSearch = compile(r'<img src="(http://www.commissionedcomic.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)">&lsaquo;</a>')
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.commissionedcomic\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.commissionedcomic\.com/\?p=\d+)', after="prev"))
help = 'Index format: n'
class CoolCatStudio(_BasicScraper):
latestUrl = 'http://www.coolcatstudio.com/'
stripUrl = latestUrl + 'strips-cat/ccs%s'
imageSearch = compile(tagre("img", "src", r'(http://www.coolcatstudio.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.coolcatstudio\.com/strips-cat/[^"]+)', before="cniprevt"))
imageSearch = compile(tagre("img", "src", r'(http://www\.coolcatstudio\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.coolcatstudio\.com/strips-cat/[^"]+)', before="prev"))
help = 'Index format: yyyymmdd'
class CourtingDisaster(_BasicScraper):
latestUrl = 'http://www.courting-disaster.com/'
stripUrl = latestUrl + 'archive/%s.html'
imageSearch = compile(r'(/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><img src="/images/previous.gif"[^>]+?>')
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/archive/\d+\.html)') + tagre("img", "src", r'/images/previous\.gif'))
help = 'Index format: yyyymmdd'
class CrapIDrewOnMyLunchBreak(_BasicScraper):
latestUrl = 'http://crap.jinwicked.com/'
stripUrl = latestUrl + '%s'
@ -168,7 +121,6 @@ class CrapIDrewOnMyLunchBreak(_BasicScraper):
help = 'Index format: yyyy/mm/dd/name'
class CtrlAltDel(_BasicScraper):
latestUrl = 'http://www.cad-comic.com/cad/'
stripUrl = latestUrl + '%s'
@ -186,34 +138,31 @@ class CtrlAltDelSillies(CtrlAltDel):
class Curvy(_BasicScraper):
latestUrl = 'http://www.c.urvy.org/'
stripUrl = latestUrl + '?date=%s'
imageSearch = compile(r'(/c/.+?)"')
prevSearch = compile(r'(/\?date=.+?)">&lt;&lt; Previous page')
imageSearch = compile(tagre("img", "src", r'(/c/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/\?date=\d+)') + tagre("img", "src", "/nav/prev\.png"))
help = 'Index format: yyyymmdd'
def cloneManga(name, shortName, lastStrip=None):
url = 'http://manga.clone-army.org'
baseUrl = '%s/%s.php' % (url, shortName)
stripUrl = baseUrl + '?page=%s'
if lastStrip is None:
starter = bounceStarter(baseUrl, compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"next\.gif")))
else:
starter = constStarter(stripUrl % lastStrip)
def namer(self, imageUrl, pageUrl):
return '%03d' % int(getQueryParams(pageUrl)['page'][0])
return type('CloneManga_%s' % name,
(_BasicScraper,),
dict(
name='CloneManga/' + name,
starter=starter,
stripUrl=stripUrl,
imageSearch=compile(tagre("img", "src", r'((?:%s/)?%s/[^"]+)' % (url, shortName), after="center")),
prevSearch=compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"previous\.gif")),
help='Index format: n',
namer=namer)
attrs = dict(
name='CloneManga/' + name,
stripUrl = baseUrl + '?page=%s',
imageSearch=compile(tagre("img", "src", r'((?:%s/)?%s/[^"]+)' % (url, shortName), after="center")),
prevSearch=compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"previous\.gif")),
help='Index format: n',
namer=namer,
)
if lastStrip is None:
attrs['starter'] = indirectStarter(baseUrl, compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"last\.gif")))
else:
attrs['latestUrl'] = attrs['stripUrl'] % lastStrip
return type('CloneManga_%s' % name, (_BasicScraper,), attrs)
anm = cloneManga('AprilAndMay', 'anm')
@ -233,148 +182,14 @@ class CatAndGirl(_BasicScraper):
help = 'Index format: n (unpadded)'
def comicsDotCom(name, section):
latestUrl = 'http://www.gocomics.com/%s' % name
@classmethod
def namer(cls, imageUrl, pageUrl):
prefix, year, month, day = pageUrl.split('/', 3)
return "%s_%s%s%s.gif" % (name, year, month, day)
return type('GoComicsDotCom_%s' % name,
(_BasicScraper,),
dict(
name='GoComicsDotCom/' + name,
stripUrl=latestUrl + '/%s',
imageSearch=compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[0-9a-f]+)')),
prevSearch=compile(tagre("a", "href", "(/%s/\d+/\d+/\d+)")+"Previous"),
help='Index format: yyyy/mm/dd',
namer=namer)
)
# http://www.gocomics.com/features
# XXX
# http://www.gocomics.com/explore/editorial_list
# XXX
# http://www.gocomics.com/explore/sherpa_list
# XXX
acaseinpoint = comicsDotCom('acaseinpoint', 'comics')
agnes = comicsDotCom('agnes', 'creators')
alleyoop = comicsDotCom('alleyoop', 'comics')
andycapp = comicsDotCom('andycapp', 'creators')
arlonjanis = comicsDotCom('arlonjanis', 'comics')
ballardst = comicsDotCom('ballardst', 'creators')
barkeaterlake = comicsDotCom('barkeaterlake', 'comics')
bc = comicsDotCom('bc', 'creators')
ben = comicsDotCom('ben', 'comics')
betty = comicsDotCom('betty', 'comics')
bignate = comicsDotCom('bignate', 'comics')
bonanas = comicsDotCom('bonanas', 'wash')
bornloser = comicsDotCom('bornloser', 'comics')
buckets = comicsDotCom('buckets', 'comics')
candorville = comicsDotCom('candorville', 'wash')
cheapthrills = comicsDotCom('cheapthrills', 'wash')
chickweed = comicsDotCom('chickweed', 'comics')
committed = comicsDotCom('committed', 'comics')
dilbert = comicsDotCom('dilbert', 'comics')
drabble = comicsDotCom('drabble', 'comics')
fatcats = comicsDotCom('fatcats', 'comics')
ferdnand = comicsDotCom('ferdnand', 'comics')
flightdeck = comicsDotCom('flightdeck', 'creators')
floandfriends = comicsDotCom('floandfriends', 'creators')
franknernest = comicsDotCom('franknernest', 'comics')
frazz = comicsDotCom('frazz', 'comics')
geech = comicsDotCom('geech', 'comics')
genepool = comicsDotCom('genepool', 'wash')
getfuzzy = comicsDotCom('getfuzzy', 'comics')
gofish = comicsDotCom('gofish', 'comics')
graffiti = comicsDotCom('graffiti', 'comics')
grandave = comicsDotCom('grandave', 'comics')
grizzwells = comicsDotCom('grizzwells', 'comics')
heathcliff = comicsDotCom('heathcliff', 'creators')
hedge = comicsDotCom('hedge', 'comics')
herbnjamaal = comicsDotCom('herbnjamaal', 'creators')
herman = comicsDotCom('herman', 'comics')
humblestumble = comicsDotCom('humblestumble', 'comics')
janesworld = comicsDotCom('janesworld', 'comics')
jumpstart = comicsDotCom('jumpstart', 'comics')
kitncarlyle = comicsDotCom('kitncarlyle', 'comics')
liberty = comicsDotCom('liberty', 'creators')
lilabner = comicsDotCom('lilabner', 'comics')
luann = comicsDotCom('luann', 'comics')
marmaduke = comicsDotCom('marmaduke', 'comics')
meg = comicsDotCom('meg', 'comics')
moderatelyconfused = comicsDotCom('moderatelyconfused', 'comics')
momma = comicsDotCom('momma', 'creators')
monty = comicsDotCom('monty', 'comics')
motley = comicsDotCom('motley', 'comics')
nancy = comicsDotCom('nancy', 'comics')
naturalselection = comicsDotCom('naturalselection', 'creators')
offthemark = comicsDotCom('offthemark', 'comics')
onebighappy = comicsDotCom('onebighappy', 'creators')
othercoast = comicsDotCom('othercoast', 'creators')
pcnpixel = comicsDotCom('pcnpixel', 'wash')
peanuts = comicsDotCom('peanuts', 'comics')
pearls = comicsDotCom('pearls', 'comics')
pibgorn = comicsDotCom('pibgorn', 'comics')
pickles = comicsDotCom('pickles', 'wash')
raisingduncan = comicsDotCom('raisingduncan', 'comics')
reality = comicsDotCom('reality', 'comics')
redandrover = comicsDotCom('redandrover', 'wash')
ripleys = comicsDotCom('ripleys', 'comics')
roseisrose = comicsDotCom('roseisrose', 'comics')
rubes = comicsDotCom('rubes', 'creators')
rudypark = comicsDotCom('rudypark', 'comics')
shirleynson = comicsDotCom('shirleynson', 'comics')
soup2nutz = comicsDotCom('soup2nutz', 'comics')
speedbump = comicsDotCom('speedbump', 'creators')
spotthefrog = comicsDotCom('spotthefrog', 'comics')
strangebrew = comicsDotCom('strangebrew', 'creators')
sunshineclub = comicsDotCom('sunshineclub', 'comics')
tarzan = comicsDotCom('tarzan', 'comics')
thatslife = comicsDotCom('thatslife', 'wash')
wizardofid = comicsDotCom('wizardofid', 'creators')
workingdaze = comicsDotCom('workingdaze', 'comics')
workingitout = comicsDotCom('workingitout', 'creators')
def creators(name, shortname):
return type('Creators_%s' % name,
(_BasicScraper,),
dict(
name='Creators/' + name,
latestUrl='http://www.creators.com/comics_show.cfm?ComicName=%s' % (shortname,),
stripUrl=None,
imageSearch=compile(tagre("img", "src", r'(\d{4}/[^"]+/[^"]+\.[^"]+)')),
prevSearch=compile(tagre("a", "href", r'(comics_show\.cfm\?next=\d+&ComicName=[^"]+)', after='Previous Comic')),
help='Indexing unsupported')
)
arc = creators('Archie', 'arc')
shg = creators('AskShagg', 'shg')
hev = creators('ForHeavensSake', 'hev')
rug = creators('Rugrats', 'rug')
sou = creators('StateOfTheUnion', 'sou')
din = creators('TheDinetteSet', 'din')
lil = creators('TheMeaningOfLila', 'lil')
wee = creators('WeePals', 'wee')
zhi = creators('ZackHill', 'zhi')
class CyanideAndHappiness(_BasicScraper):
latestUrl = 'http://www.explosm.net/comics'
stripUrl = latestUrl + '/%s'
imageSearch = compile(r'<img alt="Cyanide and Happiness, a daily webcomic" src="(http:\/\/www\.explosm\.net/db/files/Comics/\w+/\S+\.\w+)"')
prevSearch = compile(r'<a href="(/comics/\d+/?)">< Previous</a>')
latestUrl = 'http://www.explosm.net/comics/'
stripUrl = latestUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(http:\/\/www\.explosm\.net/db/files/Comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/comics/\d+/)', before="prev"))
help = 'Index format: n (unpadded)'
class CrimsonDark(_BasicScraper):
latestUrl = 'http://www.davidcsimon.com/crimsondark/'
stripUrl = latestUrl + 'index.php?view=comic&strip_id=%s'
@ -383,16 +198,6 @@ class CrimsonDark(_BasicScraper):
help = 'Index format: n (unpadded)'
class CrimesOfCybeleCity(_BasicScraper):
latestUrl = 'http://www.pulledpunches.com/crimes/'
stripUrl = 'http://www.beaglespace.com/pulledpunches/crimes/?p=%s'
imageSearch = compile(r'<img src="(http://www\.beaglespace\.com/pulledpunches/crimes/comics/[^"]+)"')
prevSearch = compile(r'<a href="(http://www\.beaglespace\.com/pulledpunches/crimes/\?p=\d+)"><img src="back1\.gif"')
help = 'Index format: nn'
class CatsAndCameras(_BasicScraper):
latestUrl = 'http://catsncameras.com/cnc/'
stripUrl = latestUrl + '?p=%s'
@ -401,7 +206,6 @@ class CatsAndCameras(_BasicScraper):
help = 'Index format: nnn'
class CowboyJedi(_BasicScraper):
latestUrl = 'http://www.cowboyjedi.com/'
stripUrl = latestUrl + '%s'
@ -410,7 +214,6 @@ class CowboyJedi(_BasicScraper):
help = 'Index format: yyyy/mm/dd/strip-name'
class CasuallyKayla(_BasicScraper):
latestUrl = 'http://casuallykayla.com/'
stripUrl = latestUrl + '?p=%s'
@ -419,7 +222,6 @@ class CasuallyKayla(_BasicScraper):
help = 'Index format: nnn'
class Collar6(_BasicScraper):
latestUrl = 'http://collar6.com/'
stripUrl = latestUrl + 'archive/%s'
@ -428,7 +230,6 @@ class Collar6(_BasicScraper):
help = 'Index format: <name>'
class Chester5000XYV(_BasicScraper):
latestUrl = 'http://jessfink.com/Chester5000XYV/'
stripUrl = latestUrl + '?p=%s'
@ -437,7 +238,6 @@ class Chester5000XYV(_BasicScraper):
help = 'Index format: nnn'
class CalamitiesOfNature(_BasicScraper):
latestUrl = 'http://www.calamitiesofnature.com/'
stripUrl = latestUrl + 'archive/?c=%s'
@ -446,14 +246,13 @@ class CalamitiesOfNature(_BasicScraper):
help = 'Index format: nnn'
class Champ2010(_BasicScraper):
latestUrl = 'http://www.jedcollins.com/champ2010/'
stripUrl = 'http://jedcollins.com/champ2010/?p=%s'
imageSearch = compile(r'<img src="(http://jedcollins.com/champ2010/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(http://jedcollins.com/champ2010/.+?)"')
help = 'Index format: nnn'
# the latest URL is hard coded since the comic is discontinued
latestUrl = 'http://jedcollins.com/champ2010/champ-12-30-10.html'
stripUrl = 'http://jedcollins.com/champ2010/champ-%s.html'
imageSearch = compile(tagre("img", "src", r'(http://jedcollins\.com/champ2010/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://jedcollins\.com/champ2010/[^"]+)', after="Previous"))
help = 'Index format: yy-dd-mm'
class Chucklebrain(_BasicScraper):
@ -464,7 +263,6 @@ class Chucklebrain(_BasicScraper):
help = 'Index format: nnn'
class CompanyY(_BasicScraper):
latestUrl = 'http://company-y.com/'
stripUrl = latestUrl + '%s/'
@ -473,32 +271,21 @@ class CompanyY(_BasicScraper):
help = 'Index format: yyyy/mm/dd/strip-name'
class CorydonCafe(_BasicScraper):
starter = bounceStarter('http://corydoncafe.com/', compile(r' href="(\./comic-\d+.html)">Next&gt;</a>'))
stripUrl = 'http://corydoncafe.com/comic-%s.html'
imageSearch = compile(r'<img src=\'(\./comics/.+?)\' ')
prevSearch = compile(r' href="(\./comic-\d+.html)">&lt;Previous</a>')
help = 'Index format: nnn'
starter = bounceStarter('http://corydoncafe.com/', compile(tagre("a", "href", r"(http://corydoncafe\.com/\d+/[^']+)", after="next", quote="'")))
stripUrl = 'http://corydoncafe.com/%s.php'
imageSearch = compile(tagre("img", "src", r"(\./[^']+)", quote="'"))
prevSearch = compile(tagre("a", "href", r"(http://corydoncafe\.com/\d+/[^']+)", after="prev", quote="'"))
help = 'Index format: yyyy/stripname'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('/')[-1].split('.')[0]
class CraftedFables(_BasicScraper):
latestUrl = 'http://www.craftedfables.com/'
stripUrl = 'http://www.caf-fiends.net/craftedfables/?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.caf-fiends\.net/craftedfables/comics/[^"]+)'))
prevSearch = compile(r'<a href="(http://www.caf-fiends.net/craftedfables/.+?)"><span class="prev">')
help = 'Index format: nnn'
class Currhue(_BasicScraper):
latestUrl = 'http://www.currhue.com/'
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.currhue\.com/comics/[^"]+)'))
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.currhue.com/.+?)"')
help = 'Index format: nnn'

View file

@ -0,0 +1,87 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, sub
from ..scraper import _BasicScraper
from ..util import tagre
def comicsDotCom(name, section):
baseUrl = 'http://www.gocomics.com/'
classname = sub("[^0-9a-zA-Z_]", "", name)
@classmethod
def namer(cls, imageUrl, pageUrl):
prefix, year, month, day = pageUrl.split('/', 3)
return "%s_%s%s%s.gif" % (name, year, month, day)
return type('GoComicsDotCom_%s' % classname,
(_BasicScraper,),
dict(
latestUrl=baseUrl + name,
name='GoComicsDotCom/' + classname,
stripUrl=baseUrl + name + '/%s',
imageSearch=compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[0-9a-f]+)')),
prevSearch=compile(tagre("a", "href", r'(/[^"]+/\d+/\d+/\d+)', after="prev")),
help='Index format: yyyy/mm/dd',
namer=namer)
)
# http://www.gocomics.com/features
# XXX
# http://www.gocomics.com/explore/editorial_list
# XXX
# http://www.gocomics.com/explore/sherpa_list
# XXX
agnes = comicsDotCom('agnes', 'creators')
andycapp = comicsDotCom('andycapp', 'creators')
barkeaterlake = comicsDotCom('barkeaterlake', 'comics')
bc = comicsDotCom('bc', 'creators')
ben = comicsDotCom('ben', 'comics')
betty = comicsDotCom('betty', 'comics')
bignate = comicsDotCom('bignate', 'comics')
bonanas = comicsDotCom('bonanas', 'wash')
thebornloser = comicsDotCom('the-born-loser', 'comics')
thebuckets = comicsDotCom('thebuckets', 'comics')
candorville = comicsDotCom('candorville', 'wash')
calvinandhobbes = comicsDotCom('calvinandhobbes', 'comics')
chickweed = comicsDotCom('9chickweedlane', 'comics')
committed = comicsDotCom('committed', 'comics')
dilbert = comicsDotCom('dilbert', 'comics')
drabble = comicsDotCom('drabble', 'comics')
floandfriends = comicsDotCom('floandfriends', 'creators')
frazz = comicsDotCom('frazz', 'comics')
geech = comicsDotCom('geech', 'comics')
getfuzzy = comicsDotCom('getfuzzy', 'comics')
graffiti = comicsDotCom('graffiti', 'comics')
grandave = comicsDotCom('grand-avenue', 'comics')
heathcliff = comicsDotCom('heathcliff', 'creators')
herman = comicsDotCom('herman', 'comics')
janesworld = comicsDotCom('janesworld', 'comics')
jumpstart = comicsDotCom('jumpstart', 'comics')
kitandcarlyle = comicsDotCom('kitandcarlyle', 'comics')
luann = comicsDotCom('luann', 'comics')
marmaduke = comicsDotCom('marmaduke', 'comics')
moderatelyconfused = comicsDotCom('moderately-confused', 'comics')
momma = comicsDotCom('momma', 'creators')
monty = comicsDotCom('monty', 'comics')
nancy = comicsDotCom('nancy', 'comics')
offthemark = comicsDotCom('offthemark', 'comics')
onebighappy = comicsDotCom('onebighappy', 'creators')
peanuts = comicsDotCom('peanuts', 'comics')
pearlsbeforeswine = comicsDotCom('pearlsbeforeswine', 'comics')
pibgorn = comicsDotCom('pibgorn', 'comics')
pickles = comicsDotCom('pickles', 'wash')
redandrover = comicsDotCom('redandrover', 'wash')
roseisrose = comicsDotCom('roseisrose', 'comics')
rubes = comicsDotCom('rubes', 'creators')
rudypark = comicsDotCom('rudypark', 'comics')
speedbump = comicsDotCom('speedbump', 'creators')
strangebrew = comicsDotCom('strangebrew', 'creators')
tarzan = comicsDotCom('tarzan', 'comics')
wizardofid = comicsDotCom('wizardofid', 'creators')
workingdaze = comicsDotCom('working-daze', 'comics')
workingitout = comicsDotCom('workingitout', 'creators')

View file

@ -0,0 +1,81 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from ..scraper import _BasicScraper
from ..util import tagre
def creators(name, shortname):
baseUrl = 'http://www.creators.com/comics/'
return type('Creators_%s' % name,
(_BasicScraper,),
dict(
name='Creators/' + name,
latestUrl='%s%s.html' % (baseUrl, shortname),
stripUrl='%s%s/%%s.html' % (baseUrl, shortname),
imageSearch=compile(tagre("img", "src", r'(/comics/\d+/[^"]+)')),
prevSearch=compile(tagre("a", "href", r'(/comics/%s/\d+\.html)' % shortname) +
tagre("img", "src", r'/img_comics/arrow_l\.gif')),
help='Index format: n')
)
# for a complete list see http://www.creators.com/comics/cat-seeall.html
comics = {
'Agnes': 'agnes',
'AndyCapp': 'andy-capp',
'Archie': 'archie',
'AskShagg': 'ask-shagg',
'BallardStreet': 'ballard-street',
'BC': 'bc',
'TheBarn': 'the-barn',
'CafeConLeche': 'cafe-con-leche',
'ChuckleBros': 'chuckle-bros',
'DaddysHome': 'daddys-home',
'DiamondLil': 'diamond-lil',
'TheDinetteSet': 'dinette-set',
'DogEatDoug': 'dog-eat-doug',
'DogsOfCKennel': 'dogs-of-c-kennel',
'DonaldDuck': 'donald-duck',
'FloAndFriends': 'flo-and-friends',
'Flare': 'flare',
'FlightDeck': 'flight-deck',
'ForHeavensSake': 'for-heavens-sake',
'FreeRange': 'free-range',
'GirlsAndSports': 'girls-and-sports',
'Heathcliff': 'heathcliff',
'HerbAndJamaal': 'herb-and-jamaal',
'HopeAndDeath': 'hope-and-death',
'LibertyMeadows': 'liberty-meadows',
'TheMeaningOfLila': 'meaning-of-lila',
'MickeyMouse': 'mickey-mouse',
'Momma': 'momma',
'NestHeads': 'nest-heads',
'OneBigHappy': 'one-big-happy',
'OnAClaireDay': 'on-a-clair-day',
'TheOtherCoast': 'other-coast',
'TheQuigmans': 'quigmans',
'Rubes': 'rubes',
'Rugrats': 'rugrats',
'ScaryGary': 'scary-gary',
'SpeedBump': 'speed-bump',
'StrangeBrew': 'strange-brew',
'ThinLines': 'thin-lines',
'WeePals': 'wee-pals',
'WizardOfId': 'wizard-of-id',
'WorkingItOut': 'working-it-out',
'ZackHill': 'zack-hill',
'BCSpanish': 'bc-spanish',
'WizardOfIdSpanish': 'wizard-of-id-spanish',
'ArchieSpanish': 'archie-spanish',
'HeathcliffSpanish': 'heathcliff-spanish',
'RugratsSpanish': 'rugrats-spanish',
'LongStoryShort': 'long-story-short',
'Recess': 'recess',
'HomeOffice': 'stay-at-home-dad',
'OffCenter': 'off-center',
'GirlsAndSportsSpanish': 'girls-and-sports-spanish',
}
for name, shortname in comics.items():
globals()[name] = creators(name, shortname)

View file

@ -1,9 +1,11 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import _BasicScraper
from ..helpers import bounceStarter, indirectStarter
from ..helpers import indirectStarter
from ..util import tagre, getQueryParams
@ -11,9 +13,9 @@ from ..util import tagre, getQueryParams
class DMFA(_BasicScraper):
latestUrl = 'http://www.missmab.com/'
stripUrl = latestUrl + 'Comics/Vol_%s.php'
imageSearch = compile(tagre("img", "src", r'(Comics/[^"]+|Vol[^"]+)'))
imageSearch = compile(tagre("img", "src", r'(Comics/|Vol)[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"])+')+
tagre("img", "src", r'(Images/comicprev.gif|../Images/comicprev.gif)'))
tagre("img", "src", r'(?:../)?Images/comicprev.gif'))
help = 'Index format: nnn (normally, some specials)'
@ -27,10 +29,10 @@ class DandyAndCompany(_BasicScraper):
class DarkWings(_BasicScraper):
latestUrl = 'http://www.flowerlarkstudios.com/dark-wings/'
stripUrl = latestUrl + 'archive.php?day=%s'
imageSearch = compile(r'(comics/.+?)" W')
prevSearch = compile(r"first_day.+?/(archive.+?)'.+?previous_day")
help = 'Index format: yyyymmdd'
stripUrl = latestUrl + '%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.flowerlarkstudios\.com/dark-wings/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.flowerlarkstudios\.com/dark-wings/[^"]+)', after="navi-prev"))
help = 'Index format: yyyy/mm/dd/page-nn-mm'
class DeathToTheExtremist(_BasicScraper):
@ -44,12 +46,11 @@ class DeathToTheExtremist(_BasicScraper):
class DeepFried(_BasicScraper):
latestUrl = 'http://www.whatisdeepfried.com/'
stripUrl = latestUrl + '%s'
imageSearch = compile(r'(http://www.whatisdeepfried.com/comics/.+?)"')
prevSearch = compile(r'"(http://www.whatisdeepfried.com/.+?)"><span class="prev">')
imageSearch = compile(tagre("img", "src", r'(http://www\.whatisdeepfried\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.whatisdeepfried\.com/[^"]+)', after="prev"))
help = 'Index format: non'
class DoemainOfOurOwn(_BasicScraper):
latestUrl = 'http://www.doemain.com/'
stripUrl = latestUrl + 'index.cgi/%s'
@ -58,7 +59,6 @@ class DoemainOfOurOwn(_BasicScraper):
help = 'Index format: yyyy-mm-dd'
class DrFun(_BasicScraper):
latestUrl = 'http://www.ibiblio.org/Dave/ar00502.htm'
stripUrl = 'http://www.ibiblio.org/Dave/ar%s.htm'
@ -95,26 +95,17 @@ class DreamKeepersPrelude(_BasicScraper):
class Drowtales(_BasicScraper):
latestUrl = 'http://www.drowtales.com/mainarchive.php'
stripUrl = latestUrl + '?location=%s'
imageSearch = compile(r'src=".(/tmpmanga/.+?)"')
prevSearch = compile(r'<a href="mainarchive.php(\?location=\d+)"><img src="[^"]*previousday\.gif"')
help = 'Index format: yyyymmdd'
class DungeonCrawlInc(_BasicScraper):
latestUrl = 'http://www.dungeoncrawlinc.com/latest.html'
stripUrl = 'http://www.dungeoncrawlinc.com/comic%s'
imageSearch = compile(r'src="(.+?/DCI_.+?)"')
prevSearch = compile(r'<a href="(.+?)">.+?back')
help = 'Index format: nnn.html'
stripUrl = latestUrl + '?sid=%s'
imageSearch = compile(tagre("img", "src", r'("http://www.drowtales.com/mainarchive/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(\?sid=\d+)', before="link_prev_top"))
help = 'Index format: number'
class DieselSweeties(_BasicScraper):
latestUrl = 'http://www.dieselsweeties.com/'
stripUrl = latestUrl + 'archive/%s'
imageSearch = compile(r'src="(/hstrips/.+?)"')
prevSearch = compile(r'href="(/archive/.+?)">(<img src="http://www.dieselsweeties.com/ximages/blackbackarrow160.png|previous webcomic)')
imageSearch = compile(tagre("img", "src", r'(/hstrips/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/archive/\d+)') + tagre("img", "src", r'http://www\.dieselsweeties\.com/ximages/blackbackarrow160.png'))
help = 'Index format: n (unpadded)'
@classmethod
@ -136,14 +127,12 @@ class DominicDeegan(_BasicScraper):
return getQueryParams(imageUrl)['save-as'][0].rsplit('.', 1)[0]
class DorkTower(_BasicScraper):
latestUrl = 'http://www.dorktower.com/'
stripUrl = None
imageSearch = compile(r'<img src="(http://www\.dorktower\.com/images/comics/[^"]+)"')
prevSearch = compile(r'<a href="(/previous\.php\?[^"]+)"')
help = 'Index format: None'
stripUrl = latestUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.dorktower\.com/files/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.dorktower\.com/[^"]+)')+"Previous")
help = 'Index format: yyyy/mm/dd/stripname-dd-mm-yy'
class DresdenCodak(_BasicScraper):
@ -153,33 +142,3 @@ class DresdenCodak(_BasicScraper):
prevSearch = compile(r'<a href="http://dresdencodak.com(/.*?)"><img src=http://dresdencodak.com/m_prev.png>')
starter = indirectStarter('http://dresdencodak.com/', compile(r'<div id="preview"><a href="http://dresdencodak.com/(\d+/\d+/\d+/.*?)">'))
class DonkBirds(_BasicScraper):
latestUrl = 'http://www.donkbirds.com/'
stripUrl = latestUrl + 'index.php?date=%s'
imageSearch = compile(r'<img src="(strips/.+?)"')
prevSearch = compile(r'<a href="(.+?)">Previous</a>')
help = 'Index format: yyyy-mm-dd'
class DrawnByDrunks(_BasicScraper):
starter = bounceStarter('http://www.drawnbydrunks.co.uk/', compile(r'<div class="nav-last"><a href="(.+?)">'))
stripUrl = 'http://www.drawnbydrunks.co.uk/?p=%s'
imageSearch = compile(r'<img src="(http://www.drawnbydrunks.co.uk/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
help = 'Index format: nnn'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('=')[-1]
class DeathCord(_BasicScraper):
latestUrl = 'http://deathchord.com/index.php'
stripUrl = 'http://deathchord.com/__.php?comicID=%s'
imageSearch = compile(r'<img src="(http://deathchord.com/kill/\d+.+?)"')
prevSearch = compile(r'</a>?.+?<a href="(http://deathchord.com/.+?)"><img[^>]+?alt="Previous" />')
help = 'Index format: nnn'

View file

@ -1,5 +1,7 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE
from ..scraper import _BasicScraper

View file

@ -1,5 +1,7 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE
from ..helpers import indirectStarter
@ -9,28 +11,19 @@ from ..util import tagre
class EerieCuties(_BasicScraper):
latestUrl = 'http://www.eeriecuties.com/'
stripUrl = latestUrl + 'd/%s.html'
imageSearch = compile(r'(/comics/.+?)"')
prevSearch = compile(r'(/d/.+?.html).+?/previous_day.gif')
help = 'Index format: yyyymmdd'
class EdgeTheDevilhunter(_BasicScraper):
name = 'KeenSpot/EdgeTheDevilhunter'
latestUrl = 'http://www.edgethedevilhunter.com/'
stripUrl = latestUrl + 'comics/%s'
imageSearch = compile(r'(http://www.edgethedevilhunter.com/comics/.+?)" alt')
prevSearch = compile(r'(http://www.edgethedevilhunter.com/comics/.+?)"><span class="prev')
help = 'Index format: mmddyyyy or name'
stripUrl = latestUrl + 'strips-ec/%s'
imageSearch = compile(tagre("img", "src", r'(http://ace\.eeriecuties\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', before="prev"))
help = 'Index format: stripname'
class Eriadan(_BasicScraper):
stripUrl = 'http://www.shockdom.com/eriadan/?p=%s'
latestUrl = 'http://www.shockdom.com/webcomics/eriadan/'
stripUrl = latestUrl + '%s'
# XXX fix image search
imageSearch = compile(r'title="[^"]+?" src="http://www\.shockdom\.com/eriadan/(wp-content/uploads/.+?)"')
prevSearch = compile(r"<link rel='prev' title='.+?' href='http://www\.shockdom\.com/eriadan/(\?p=.+?)'")
starter = indirectStarter('http://www.shockdom.com/eriadan/', compile(r'<ul class="latest2">[^<]+?<li class="list-title"><a href="(http://www\.shockdom.com/eriadan/\?p=.+?)"'))
help = 'Index format: nnn (unpadded)'
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/nnn (unpadded)'
@classmethod
def namer(cls, imageUrl, pageUrl):
@ -57,16 +50,6 @@ class ElGoonishShiveNP(_BasicScraper):
help = 'Index format: yyyy-mm-dd'
class ElsieHooper(_BasicScraper):
latestUrl = 'http://www.elsiehooper.com/todaysserial.htm'
stripUrl = 'http://www.elsiehooper.com/comics/comic%s.htm'
imageSearch = compile(r'<img src="(/comics_/.+?)">')
prevSearch = compile(r'<A href="(.+?)"><IMG (height=27 src="/images/previous.gif"|src="/images/previous.gif")', IGNORECASE)
help = 'Index format: nnn'
class EmergencyExit(_BasicScraper):
latestUrl = 'http://www.eecomics.net/'
stripUrl = None
@ -79,27 +62,17 @@ class EmergencyExit(_BasicScraper):
class ErrantStory(_BasicScraper):
latestUrl = 'http://www.errantstory.com/'
stripUrl = latestUrl + 'archive.php?date=%s'
stripUrl = latestUrl + '%s'
imageSearch = compile(r'<img[^>]+?src="([^"]*?comics/.+?)"')
prevSearch = compile(r'><a href="(.+?)">&lt;Previous</a>')
help = 'Index format: yyyy-mm-dd'
class EternalVenture(_BasicScraper):
latestUrl = 'http://www.pulledpunches.com/venture/'
stripUrl = 'http://www.beaglespace.com/pulledpunches/venture/?p=%s'
imageSearch = compile(r'<img src="(http://www.beaglespace.com/pulledpunches/venture/comics/.+?)"')
prevSearch = compile(r'id="prev"><a href="(http://www.beaglespace.com/pulledpunches/venture/.+?)" ')
help = 'Index format: nn'
help = 'Index format: yyyy-mm-dd/num'
class Evercrest(_BasicScraper):
latestUrl = 'http://www.evercrest.com/archives/20030308'
stripUrl = latestUrl + '%s'
stripUrl = 'http://www.evercrest.com/archives/%s'
imageSearch = compile(r'<img.+?src="([^"]*/(images/oldstrips|archives/i)/[^"]*)"')
prevSearch = compile(r'<a.+?href="(http://www.evercrest.com/archives/\d+)">&lt; Previous')
prevSearch = compile(r'<a.+?href="(http://www\.evercrest\.com/archives/\d+)">&lt; Previous')
help = 'Index format: yyyymmdd'
@ -113,26 +86,25 @@ class EverybodyLovesEricRaymond(_BasicScraper):
class EvilDiva(_BasicScraper):
latestUrl = 'http://www.evildivacomics.com/'
stripUrl = latestUrl + '%s.html'
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(r'(/comics/.+?)"')
prevSearch = compile(r'http.+?com/(.+?)".+?"prev')
help = 'Index format: cpn (unpadded)'
help = 'Index format: n (unpadded)'
class EvilInc(_BasicScraper):
latestUrl = 'http://www.evil-comic.com/'
stripUrl = latestUrl + 'd/%s.html'
imageSearch=compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
prevSearch=compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
help='Index format: yyyymmdd'
stripUrl = latestUrl + 'archive/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'/images/previous\.gif'))
help = 'Index format: yyyymmdd'
class Exiern(_BasicScraper):
latestUrl = 'http://www.exiern.com/'
stripUrl = latestUrl + 'comic/%s'
imageSearch = compile(r'<img src="(http://www.exiern.com/comics/.+?)"')
prevSearch = compile(r'<a href="(http://www.exiern.com/.+?)" class="navi navi-prev"')
help = 'Index format: ChapterName-StripName'
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.exiern\.com/comics/[^"])'))
prevSearch = compile(tagre("a", "href", r'(http://www\.exiern\.com/[^"]+)', after="prev"))
help = 'Index format: n'
class ExiernDarkReflections(_BasicScraper):
@ -143,31 +115,27 @@ class ExiernDarkReflections(_BasicScraper):
help = 'Index format: n'
class ExtraLife(_BasicScraper):
latestUrl = 'http://www.myextralife.com/'
stripUrl = latestUrl + 'comic/%s/'
imageSearch = compile(r'<img src="(http://www.myextralife.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.myextralife.com/comic/.+?)"')
help = 'Index format: mmddyyyy'
imageSearch = compile(tagre("img", "src", r'(http://www\.myextralife\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
help = 'Index format: stripname'
class EyeOfRamalach(_BasicScraper):
latestUrl = 'http://theeye.katbox.net/'
stripUrl = latestUrl + 'index.php?strip_id=%s'
imageSearch = compile(r'="(.+?strips/.+?)"')
prevSearch = compile(r'(index.php\?strip_id=.+?)".+?navigation_back')
stripUrl = latestUrl + 'archive/%s/'
imageSearch = compile(tagre("img", "src", r'(http://theeye\.katbox\.net/wp-content/webcomic/theeye/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://theeye\.katbox\.net/archive/[^"]+)', after="previous"))
help = 'Index format: n (unpadded)'
class EarthsongSaga(_BasicScraper):
latestUrl = 'http://www.earthsongsaga.com/'
starter = indirectStarter('http://www.earthsongsaga.com/', compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'[^"]+current\.jpg')))
stripUrl = None
imageSearch = compile(r'<img src="((?:\.\./)?images/vol\d+/ch\d+/\d+\.\w+)"')
prevSearch = compile(r'<a href="([^"]+\.html)"[^>]*><img src="(?:(?:\.\.)?/)?images/testing/prev')
starter = indirectStarter('http://www.earthsongsaga.com/',
compile(r'a href="(.+?)".+?current-page.jpg'))
imageSearch = compile(tagre("img", "src", r'((?:\.\./)?images/vol\d+/ch\d+/\d+\.\w+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))
@classmethod
def namer(cls, imageUrl, pageUrl):
@ -175,19 +143,18 @@ class EarthsongSaga(_BasicScraper):
return 'vol%02d_ch%02d_%02d' % (int(imgmatch.group(1)), int(imgmatch.group(2)), int(imgmatch.group(3)))
class ExploitationNow(_BasicScraper):
latestUrl = 'http://exploitationnow.com/'
stripUrl = latestUrl + 'comic.php?date=%s'
imageSearch = compile(r'src="(comics/.+?)"')
prevSearch = compile(r' <a href="(.+?)" title="\[Back\]">')
help = 'Index format: yyyy-mm-dd'
latestUrl = 'http://www.exploitationnow.com/'
stripUrl = latestUrl + '%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.exploitationnow\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.exploitationnow\.com/[^"]+)', after="navi-prev"))
help = 'Index format: yyyy-mm-dd/num'
class Ellerbisms(_BasicScraper):
latestUrl = 'http://www.ellerbisms.com/'
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(r'<img src="(http://www.ellerbisms.com/comics/.+?)"')
prevSearch = compile(r'<a href="(http://www.ellerbisms.com/.+?)"><span class="prev">')
imageSearch = compile(tagre("img", "src", r'(http://www\.ellerbisms\.com/wp-content/uploads/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.ellerbisms\.com/[^"]+)', after="prev"))
help = 'Index format: nnn'

View file

@ -1,5 +1,7 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE, MULTILINE
from ..util import tagre
@ -32,35 +34,24 @@ class FeyWinds(_BasicScraper):
compile(r'(comic/page.php\?id.+?)"'))
class FightCastOrEvade(_BasicScraper):
latestUrl = 'http://www.fightcastorevade.net/'
stripUrl = latestUrl + 'd/%s'
imageSearch = compile(tagre("img", "src", r'"(http://www\.fightcastorevade\.net/comics/[^"]+)'))
prevSearch = compile(r'"(.+?/d/.+?)".+?previous')
help = 'Index format: yyyymmdd.html'
class FilibusterCartoons(_BasicScraper):
latestUrl = 'http://www.filibustercartoons.com/'
stripUrl = latestUrl + 'index.php/%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.filibustercartoons\.com/comics/[^"]+)'))
prevSearch = compile(r'<a href="(.+?)"><img src=\'(.+?/arrow-left.gif)\'')
prevSearch = compile(tagre("a", "href", r'(http://www\.filibustercartoons\.com/[^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/name'
class FlakyPastry(_BasicScraper):
latestUrl = 'http://flakypastry.runningwithpencils.com/index.php'
stripUrl = 'http://flakypastry.runningwithpencils.com/comic.php\?strip_id=%s'
stripUrl = 'http://flakypastry.runningwithpencils.com/comic.php?strip_id=%s'
imageSearch = compile(r'<img src="(comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)".+?btn_back')
help = 'Index format: nnnn'
# XXX move to keenspot
class Flipside(_BasicScraper):
latestUrl = 'http://www.flipsidecomics.com/comic.php'
latestUrl = 'http://flipside.keenspot.com/comic.php'
stripUrl = latestUrl + '?i=%s'
imageSearch = compile(r'<IMG SRC="(comic/.+?)"')
prevSearch = compile(r'<A HREF="(comic.php\?i=\d+?)">&lt')
@ -72,20 +63,9 @@ class Footloose(_BasicScraper):
stripUrl = 'http://footloosecomic.com/footloose/pages.php?page=%s'
imageSearch = compile(r'<img src="/footloose/(.+?)"')
prevSearch = compile(r'(?:first.+?[^>]).+?(/footloose/.+?)".+?(?:prev)')
# prevSearch = compile(r'(?:first.+?[^>]).+?(/footloose/.+?html).+?(?:prev|Prev)')
help = 'Index format: n (unpadded)'
class FragileGravity(_BasicScraper):
latestUrl = 'http://www.fragilegravity.com/'
stripUrl = latestUrl + 'core.php?archive=%s'
imageSearch = compile(r'<IMG SRC="(strips/.+?)"')
prevSearch = compile(r'<A HREF="(.+?)"\nonMouseover="window.status=\'Previous Strip', MULTILINE | IGNORECASE)
help = 'Index format: yyyymmdd'
class Freefall(_BasicScraper):
latestUrl = 'http://freefall.purrsia.com/default.htm'
stripUrl = 'http://freefall.purrsia.com/ff%s/fc%s.htm'
@ -94,7 +74,6 @@ class Freefall(_BasicScraper):
help = 'Index format: nnnn/nnnnn'
class FantasyRealms(_BasicScraper):
stripUrl = 'http://www.fantasyrealmsonline.com/manga/%s.php'
imageSearch = compile(r'<img src="(\d{1,4}.\w{3,4})" width="540"', IGNORECASE)
@ -104,14 +83,6 @@ class FantasyRealms(_BasicScraper):
compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE))
class FullFrontalNerdity(_BasicScraper):
latestUrl = 'http://nodwick.humor.gamespy.com/ffn/index.php'
stripUrl = None
imageSearch = compile(r'<img src="(http://nodwick.humor.gamespy.com/ffn/strips/[^"]*)"', IGNORECASE)
prevSearch = compile(r'<a href="(index.php\?date=[0-9-]*)"><img src="back.jpg"', IGNORECASE)
class FunInJammies(_BasicScraper):
latestUrl = 'http://www.funinjammies.com/'
stripUrl = latestUrl + 'comic.php?issue=%s'
@ -120,7 +91,6 @@ class FunInJammies(_BasicScraper):
help = 'Index format: n (unpadded)'
class Fallen(_BasicScraper):
stripUrl = 'http://www.fallencomic.com/pages/part%s/%s-p%s.htm'
imageSearch = compile(r'<IMG SRC="(page/.+?)"', IGNORECASE)
@ -140,19 +110,12 @@ class Fallen(_BasicScraper):
self.currentUrl = self.stripUrl % (part, index, part)
class FoxTails(_BasicScraper):
latestUrl = 'http://www.magickitsune.com/strips/current.html'
stripUrl = 'http://www.magickitsune.com/strips/%s'
imageSearch = compile(r'<img src=(img/.+?)[ |>]', IGNORECASE)
prevSearch = compile(r'(?<=first.gif)*(?<=</td>)*<a.*href=\'(.+?)\'.+?<img.+?src=\'../img/prev.gif\'>', IGNORECASE)
help = 'Index format: yyyymmdd'
class FredoAndPidjin(_BasicScraper):
homepage = 'http://www.pidjin.net/'
stripUrl = None
help = 'Index format: yyyy/mm/dd/name'
imageSearch = compile(tagre('img', 'src', '(http://cdn\.pidjin\.net/wp-content/uploads/\d\d\d\d/\d\d/\d+[^"]+\.png)'))
prevSearch = compile(tagre('a', 'href', '([^"]+)')+"Prev</a>")
starter = indirectStarter(homepage,
compile(tagre('a', 'href', "("+homepage+r'\d\d\d\d/\d\d/\d\d/[^"]+/)')))

View file

@ -0,0 +1,45 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
def fallenangel(name, shortname):
pass # XXX
class _TheFallenAngel(_BasicScraper):
imageSearch = compile(r'SRC="(http://www.thefallenangel.co.uk/\w+comics/.+?)"')
prevSearch = compile(r' <a href="(http://www.thefallenangel.co.uk/.+?)"><img[^>]+?src="http://www.thefallenangel.co.uk/images/previousday.jpg"')
help = 'Index format: yyyymmdd'
@property
def baseUrl(self):
return 'http://www.thefallenangel.co.uk/cgi-bin/%sautokeen/autokeenlite.cgi' % (self.shortName,)
@property
def stripUrl(self):
return self.baseUrl + '?date=%s'
def starter(self):
return self.baseUrl
class HighMaintenance(_TheFallenAngel):
name = 'TheFallenAngel/HighMaintenance'
shortName = 'hm'
class FAWK(_TheFallenAngel):
name = 'TheFallenAngel/FAWK'
shortName = 'fawk'
class MalloryChan(_TheFallenAngel):
name = 'TheFallenAngel/MalloryChan'
shortName = 'mallorychan'

View file

@ -1,5 +1,7 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import _BasicScraper
@ -9,17 +11,18 @@ from ..util import tagre
class Galaxion(_BasicScraper):
latestUrl = 'http://galaxioncomics.com/'
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(r'(wordpress/comics/.+?)"')
prevSearch = compile(r'\| <a href="http://galaxioncomics.com/(\?p=.+?)".+?vious.gif')
help = 'Index format: non'
stripUrl = latestUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://galaxioncomics\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://galaxioncomics\.com/[^"]+)', after="prev"))
help = 'Index format: n-comic/book-n/chapter-n/title-nnn'
class Garanos(_BasicScraper):
latestUrl = 'http://www.garanos.com/'
stripUrl = latestUrl + 'pages/page-%s'
imageSearch = compile(r'<img src=.+?(/pages/.+?)"')
prevSearch = compile(r'<a href="(http://www.garanos.com/pages/page-.../)">&#9668; Previous<')
starter = indirectStarter('http://garanos.alexheberling.com/pages/page-1/',
compile(tagre("a", "href", r'(http://garanos\.alexheberling\.com/pages/[^"]+)', after="navi-last")))
stripUrl = 'http://garanos.alexheberling.com/pages/page-%s'
imageSearch = compile(tagre("img", "src", r'(http://garanos\.alexheberling\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://garanos\.alexheberling\.com/pages/[^"]+)', after="prev"))
help = 'Index format: n (unpadded)'
@ -31,41 +34,30 @@ class GUComics(_BasicScraper):
help = 'Index format: yyyymmdd'
class GenrezvousPoint(_BasicScraper):
latestUrl = 'http://genrezvouspoint.com/'
latestUrl = 'http://www.genrezvouspoint.com/'
stripUrl = latestUrl + 'index.php?comicID=%s'
imageSearch = compile(r'<img src=\'(comics/.+?)\'')
prevSearch = compile(r' <a[^>]+?href="(.+?)">PREVIOUS</a>')
help = 'Index format: nnn'
class GirlGenius(_BasicScraper):
latestUrl = 'http://girlgeniusonline.com/comic.php'
stripUrl = 'http://www.girlgeniusonline.com/comic.php\?date=%s'
stripUrl = 'http://www.girlgeniusonline.com/comic.php?date=%s'
imageSearch = compile(r"(/ggmain/strips/.+?)'")
prevSearch = compile(r"</a> <a href=.+?(/comic.php\?date=.+?)'.+?Previous")
help = 'Index format: yyyymmdd'
class GirlsWithSlingshots(_BasicScraper):
latestUrl = 'http://www.daniellecorsetto.com/gws.html'
stripUrl = 'http://www.daniellecorsetto.com/GWS%s.html'
imageSearch = compile(r'<img src="(images/gws/GWS\d{3}.jpg)"')
prevSearch = compile(r'(archive.php\?today=\d{3}&comic=\d{3})"[^>]*><img[^>]+src="images/gwsmenu/back_off.jpg"')
latestUrl = 'http://www.girlswithslingshots.com/'
stripUrl = latestUrl + 'comic/gws-%s/'
imageSearch = compile(tagre("img", "src", r'(http://cdn\.girlswithslingshots\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.girlswithslingshots\.com/comic/[^"]+)', after="prev"))
help = 'Index format: nnn'
class Girly(_BasicScraper):
latestUrl = 'http://girlyyy.com/'
stripUrl = latestUrl + 'go/%s'
imageSearch = compile(r'<img src="(http://girlyyy.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"> &nbsp;&lt;&nbsp;prev')
help = 'Index format: nnn'
class GleefulNihilism(_BasicScraper):
latestUrl = 'http://gleefulnihilism.com/'
stripUrl = latestUrl + 'comics/%s/'
@ -82,7 +74,6 @@ class Goats(_BasicScraper):
help = 'Index format: yymmdd'
class GoneWithTheBlastwave(_BasicScraper):
starter = indirectStarter('http://www.blastwave-comic.com/index.php?p=comic&nro=1',
compile(r'href="(index.php\?p=comic&amp;nro=\d+)"><img src="images/page/default/latest'))
@ -96,34 +87,30 @@ class GoneWithTheBlastwave(_BasicScraper):
return '%02d' % int(compile(r'nro=(\d+)').search(pageUrl).group(1))
class GunnerkrigCourt(_BasicScraper):
latestUrl = 'http://www.gunnerkrigg.com/index2.php'
stripUrl = 'http://www.gunnerkrigg.com/archive_page.php\?comicID=%s'
stripUrl = 'http://www.gunnerkrigg.com/archive_page.php?comicID=%s'
imageSearch = compile(r'<img src="(.+?//comics/.+?)"')
prevSearch = compile(r'<.+?(/archive_page.php\?comicID=.+?)".+?prev')
help = 'Index format: n'
class Gunshow(_BasicScraper):
latestUrl = 'http://gunshowcomic.com/'
stripUrl = latestUrl + 'd/%s.html'
imageSearch = compile(r'src="(/comics/.+?)"')
prevSearch = compile(r'(/d/\d+\.html)"><img[^>]+?src="/images/previous_day')
help = 'Index format: yyyy/mm/dd'
stripUrl = latestUrl + '%s'
imageSearch = compile(tagre("img", "src", r'(http://gunshowcomic\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'[^"]+menu/small/previous\.gif'))
help = 'Index format: n'
class GleefulNihilism(_BasicScraper):
latestUrl = 'http://gleefulnihilism.com/'
stripUrl = latestUrl + 'comics/2009/12/01/just-one-of-the-perks/%s'
imageSearch = compile(r'<img src="(http://gleefulnihilism.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
stripUrl = latestUrl + 'comics/%s/'
imageSearch = compile(tagre("img", "src", r'(http://gleefulnihilism\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://gleefulnihilism\.com/comics/[^"]+)', after="Previous"))
help = 'Index format: yyyy/mm/dd/strip-name'
class GastroPhobia(_BasicScraper):
latestUrl = 'http://www.gastrophobia.com/'
stripUrl = latestUrl + 'index.php?date=%s'
@ -132,7 +119,6 @@ class GastroPhobia(_BasicScraper):
help = 'Index format: yyyy-mm-dd'
class Geeks(_BasicScraper):
latestUrl = 'http://sevenfloorsdown.com/geeks/'
stripUrl = latestUrl + 'archives/%s'
@ -141,7 +127,6 @@ class Geeks(_BasicScraper):
help = 'Index format: nnn'
class GlassHalfEmpty(_BasicScraper):
latestUrl = 'http://www.defectivity.com/ghe/index.php'
stripUrl = latestUrl + '?strip_id=%s'

View file

@ -1,67 +1,22 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import _BasicScraper
class HappyMedium(_BasicScraper):
latestUrl = 'http://happymedium.fast-bee.com/'
stripUrl = latestUrl + '%s'
imageSearch = compile(r'(/comics/.+?)"')
prevSearch = compile(r'com(/.+?)".+?"prev">&#9668')
help = 'Index format: yyyy/mm/chapter-n-page-n'
class Heliothaumic(_BasicScraper):
latestUrl = 'http://thaumic.net/'
stripUrl = latestUrl + '%s'
imageSearch = compile(r'<img src="(http://thaumic.net/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(http://thaumic.net/.+?)">')
help = 'Index format: yyyy/mm/dd/n(unpadded)-comicname'
class Housd(_BasicScraper):
latestUrl = 'http://housd.net/archive_page.php?comicID=1284'
stripUrl = 'http://housd.net/archive_page.php?comicID=%s'
imageSearch = compile(r'"(.+?/comics/.+?)"')
prevSearch = compile(r'"(h.+?comicID=.+?)".+?prev')
help = 'Index format: nnnn'
class HateSong(_BasicScraper):
latestUrl = 'http://hatesong.com/'
stripUrl = latestUrl + '%s/'
imageSearch = compile(r'src="(http://www.hatesong.com/strips/.+?)"')
prevSearch = compile(r'<div class="headernav"><a href="(http://hatesong.com/\d{4}/\d{2}/\d{2})')
help = 'Index format: yyyy/mm/dd'
from ..util import tagre
class HorribleVille(_BasicScraper):
latestUrl = 'http://horribleville.com/d/20090517.html'
stripUrl = 'http://horribleville.com/d/%s.html'
imageSearch = compile(r'src="(/comics/.+?)"')
prevSearch = compile(r'(\d+\.html)"><img[^>]+?src="/images/previous_day.png"')
help = 'Index format: yyyy/mm/dd'
latestUrl = 'http://horribleville.com/'
stripUrl = latestUrl + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/d/[^"]+)') + tagre("img", "src", r'/images/previous\.png'))
help = 'Index format: yyyymmdd'
class HelpDesk(_BasicScraper):
latestUrl = 'http://www.ubersoft.net/'
stripUrl = latestUrl + 'comic/hd/%s/%s/%s'
imageSearch = compile(r'src="(http://www.ubersoft.net/files/comics/hd/hd\d{8}.png)')
prevSearch = compile(r'<a href="(/comic/.+?)">(.+?)previous</a>')
help = 'Index format: yyyy/mm/name'
latestUrl = 'https://www.eviscerati.org/comics?page=78'
stripUrl = 'https://www.eviscerati.org/comics?page=%s'
imageSearch = compile(tagre("img", "src", r'(https://www\.eviscerati\.org/files/comics/[^"]+)'))
prevSearch = compile(tagre("li", "class", r'pager-previous[^"]+') + tagre("a", "href", r'(/comics\?page=%d+)'))
help = 'Index format: n'
class HardGraft(_BasicScraper):
latestUrl = 'http://hard-graft.net/'
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(r'<img src="(http://hard-graft.net/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)"')
help = 'Index format: nnn'

View file

@ -1,7 +1,8 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
from re import compile, IGNORECASE
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import _BasicScraper
from ..util import tagre
@ -16,7 +17,7 @@ class IDreamOfAJeanieBottle(_BasicScraper):
class IrregularWebcomic(_BasicScraper):
latestUrl = 'http://www.irregularwebcomic.net/'
stripUrl = latestUrl + 'cgi-bin/comic.pl?comic=%s'
stripUrl = latestUrl + '%s.html'
imageSearch = compile(r'<img .*src="(.*comics/.*(png|jpg|gif))".*>')
prevSearch = compile(r'<a href="(/\d+\.html|/cgi-bin/comic\.pl\?comic=\d+)">Previous ')
help = 'Index format: nnn'
@ -30,54 +31,6 @@ class InsideOut(_BasicScraper):
help = 'Index format: n_comic_name'
class InkTank(_BasicScraper):
shortName = 'inktank'
def starter(self):
return self.baseUrl + self.shortName + '/'
def inkTank(name, shortName):
@classmethod
def _namer(cls, imageUrl, pageUrl):
return '20%s-%s' % (imageUrl[-6:-4], imageUrl[-12:-7])
baseUrl = 'http://www.inktank.com/%s/' % (shortName,)
return type('InkTank_%s' % name,
(_BasicScraper,),
dict(
name='InkTank/' + name,
latestUrl=baseUrl,
stripUrl=baseUrl + 'd/%s.html',
imageSearch=compile(r'<IMG SRC="(/images/[^/]+/cartoons/\d{2}-\d{2}-\d{2}.+?)"'),
prevSearch=compile(r'<A HREF="(/[^/]+/index.cfm\?nav=\d+?)"><IMG SRC="/images/nav_last.gif"'),
help='Index format: n (unpadded)')
)
at = inkTank('AngstTechnology', 'AT')
ww = inkTank('WeakEndWarriors', 'WW')
swo = inkTank('SorryWereOpen', 'SWO')
class IlmanNaista(_BasicScraper):
latestUrl = 'http://kvantti.tky.fi/in/archive_end.shtml'
stripUrl = 'http://kvantti.tky.fi/in/%s.shtml'
imageSearch = compile(r'<img src="(kuvat/in_.+?)"', IGNORECASE)
prevSearch = compile(r'<a href="(\d+.shtml)"><img width="90" height="45" src="deco/edellinen.png" alt="Edellinen"/></a>')
class ICantDrawFeet(_BasicScraper):
latestUrl = 'http://icantdrawfeet.com/'
stripUrl = 'http://icantdrawfeet.com/%s'
imageSearch = compile(r'src="(http://icantdrawfeet.com/comics/.+?)"')
prevSearch = compile(r'<a href="(http://icantdrawfeet.com/.+?)"><img src="http://icantdrawfeet.com/pageimages/prev.png"')
help = 'Index format: yyyy/mm/dd/stripname'
class ItsWalky(_BasicScraper):
latestUrl = 'http://www.itswalky.com/'
stripUrl = latestUrl + 'd/%s.html'

View file

@ -1,26 +1,18 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, MULTILINE
from ..scraper import _BasicScraper
class Jack(_BasicScraper):
latestUrl = 'http://www.pholph.com/'
stripUrl = latestUrl + 'strip.php?id=5&sid=%s'
imageSearch = compile(r'<img src="(./artwork/.+?/Jack.+?)"')
prevSearch = compile(r'\|<a href="(.+?)">Previous Strip</a>')
help = 'Index format: n (unpadded)'
from ..util import tagre
class JerkCity(_BasicScraper):
latestUrl = 'http://www.jerkcity.com/'
stripUrl = latestUrl + 'jerkcity%s'
imageSearch = compile(r'"jerkcity.+?">.+?"(/jerkcity.+?)"')
prevSearch = compile(r'"(jerkcity.+?)">.+?"/jerkcity.+?"')
help = 'Index format: unknown'
stripUrl = latestUrl + '_jerkcity%s.html'
imageSearch = compile(tagre("img", "src", r'(/jerkcity[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/_jerkcity[^"]+)') + r'&lt;&lt;Previous')
help = 'Index format: n'
class JoeAndMonkey(_BasicScraper):
@ -31,10 +23,10 @@ class JoeAndMonkey(_BasicScraper):
help = 'Index format: nnn'
class JoyOfTech(_BasicScraper):
latestUrl = 'http://www.geekculture.com/joyoftech/index.html'
stripUrl = 'http://www.geekculture.com/joyoftech/joyarchives/%s.html'
imageSearch = compile(r'<img src="(joyimages/.+?|../joyimages/.+?)" alt="The Joy')
prevSearch = compile(r'<a href="((?:joyarchives/)?\w+\.\w{3,4})">(?:<font[^>]*>)?<img[^>]*><br>[\s\n]*Previous Joy', MULTILINE)
latestUrl = 'http://www.geekculture.com/joyoftech/'
stripUrl = latestUrl + 'joyarchives/%s.html'
imageSearch = compile(tagre("img", "src", r'(joyimages/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(joyarchives/[^"]+)') + r'.+?Previous', MULTILINE)
help = 'Index format: nnn'

View file

@ -1,24 +1,12 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
from re import compile, IGNORECASE
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE
from ..scraper import _BasicScraper
from ..util import tagre
class KernelPanic(_BasicScraper):
latestUrl = 'http://www.ubersoft.net/kpanic/'
stripUrl = latestUrl + 'd/%s'
imageSearch = compile(r'src="(.+?/kp/kp.+?)" ')
prevSearch = compile(r'<li class="previous"><a href="(.+?)">')
help = 'Index format: yyyymmdd.html'
@classmethod
def namer(cls, imageUrl, pageUrl):
return imageUrl.split('/')[-1].split('.')[0]
class Key(_BasicScraper):
latestUrl = 'http://key.shadilyn.com/latestpage.html'
stripUrl = 'http://key.shadilyn.com/pages/%s.html'
@ -27,9 +15,8 @@ class Key(_BasicScraper):
help = 'Index format: nnn'
class Krakow(_BasicScraper):
latestUrl = 'http://www.krakowstudios.com/'
latestUrl = 'http://www.krakow.krakowstudios.com/'
stripUrl = latestUrl + 'archive.php?date=%s'
imageSearch = compile(r'<img src="(comics/.+?)"')
prevSearch = compile(r'<a href="(archive\.php\?date=.+?)"><img border=0 name=previous_day')
@ -38,10 +25,10 @@ class Krakow(_BasicScraper):
class Kukuburi(_BasicScraper):
latestUrl = 'http://www.kukuburi.com/current/'
stripUrl = 'http://thaumic.net/%s'
stripUrl = 'http://www.kukuburi.com/v2/%s/'
imageSearch = compile(r'img src="(http://www.kukuburi.com/../comics/.+?)"')
prevSearch = compile(r'nav-previous.+?"(http.+?)"')
help = 'Index format: non'
help = 'Index format: yyyy/mm/dd/stripname'
class KevinAndKell(_BasicScraper):
@ -61,12 +48,3 @@ class KillerKomics(_BasicScraper):
imageSearch = compile(r'<img src="(http://www.killerkomics.com/FichiersUpload/Comics/.+?)"')
prevSearch = compile(r'<div id="precedent"><a href="(.+?)"')
help = 'Index format: strip-name'
class KrazyLarry(_BasicScraper):
latestUrl = 'http://www.krazylarry.com/'
stripUrl = latestUrl + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
prevSearch = compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
help = 'Index format: yyyymmdd'

View file

@ -1,32 +1,30 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
from re import compile, IGNORECASE
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import _BasicScraper
from ..util import tagre
def keenSpot(comics):
class _KeenSpotScraper(_BasicScraper):
stripUrl = property(lambda self: self.baseUrl + 'd/%s.html')
imageSearch = compile(r'<img[^>]+?src="([^"]*?comics/.+?)"', IGNORECASE)
prevSearch = compile(r'<a href="([^"]*?d/\d{8}\.html)"[^>]*>(?:<img[^>]+?(?:name="previous_day"|alt="Previous"|src="[^"]*back[^"]*")|Previous comic)', IGNORECASE)
help = 'Index format: yyyymmdd'
def keenSpot(name, urls):
if not isinstance(urls, tuple):
baseUrl = latestUrl = urls
else:
baseUrl, latestUrl = urls
for name, urls in keenspotComics.items():
if not isinstance(urls, tuple):
baseUrl = latestUrl = urls
else:
baseUrl, latestUrl = urls
comics[name] = type('KeenSpot_%s' % name,
(_KeenSpotScraper,),
dict(
name='KeenSpot/' + name,
latestUrl=latestUrl or baseUrl
)
return type('KeenSpot_%s' % name,
(_BasicScraper,),
dict(
name='KeenSpot/' + name,
latestUrl=latestUrl,
stripUrl=baseUrl + 'd/%s.html',
imageSearch = compile(tagre("img", "src", r'([^"]*comics/[^"]+)')),
prevSearch = compile(tagre("a", "href", r'"([^"]*d/\d{8}\.html)') +
'(?:<img[^>]+?(?:name="previous_day"|alt="Previous"|src="[^"]*back[^"]*")|Previous comic)'),
help = 'Index format: yyyymmdd',
)
return comics
)
keenspotComics = {
@ -1524,4 +1522,5 @@ keenspotComics = {
'ZuraZura': 'http://zurazura.comicgenesis.com/',
}
globals().update(keenSpot(keenspotComics))
for name, urls in keenspotComics.items():
globals()[name] = keenSpot(name, urls)

View file

@ -1,34 +1,27 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
from re import compile
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import _BasicScraper
from ..helpers import indirectStarter
from ..util import tagre
class LasLindas(_BasicScraper):
latestUrl = 'http://www.katbox.net/laslindas/'
stripUrl = latestUrl + 'index.php?strip_id=%s'
imageSearch = compile(r'"(istrip_files/strips/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><[^>]+?alt="Back"')
help = 'Index format: n (unpadded)'
class LastBlood(_BasicScraper):
latestUrl = 'http://www.lastblood.net/main/'
stripUrl = latestUrl + '%s'
imageSearch = compile(r'(/comicfolder/.+?)" alt')
prevSearch = compile(r'Previous Comic:</small><br />&laquo; <a href="(.+?)">')
help = 'Index format: yyyy/mm/dd/(page number and name)'
latestUrl = 'http://laslindas.katbox.net/'
stripUrl = latestUrl + 'archive/%s/'
imageSearch = compile(tagre("img", "src", r'(http://laslindas\.katbox\.net/wp-content/webcomic/las-lindas/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://laslindas\.katbox\.net/archive/[^"]+)', after="previous"))
help = 'Index format: stripname'
class LesbianPiratesFromOuterSpace(_BasicScraper):
latestUrl = 'http://rosalarian.com/lesbianpirates/'
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(r'(/lesbianpirates/comics/.+?)"')
prevSearch = compile(r'/(\?p=.+?)">&laquo')
stripUrl = latestUrl + 'index.php?p=%s'
imageSearch = compile(tagre("img", "src", r'("comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/index\.php\?id=\d+)', after="prev"))
help = 'Index format: n'
@ -55,34 +48,15 @@ class LookingForGroup(_BasicScraper):
return self.nameSearch.search(pageUrl).group(1)
class Loserz(_BasicScraper):
latestUrl = 'http://bukucomics.com/loserz/'
stripUrl = latestUrl + 'go/%s'
imageSearch = compile(r'<img src="(http://bukucomics.com/loserz/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"> &nbsp;&lt;&nbsp;')
help = 'Index format: n (unpadded)'
class LittleGamers(_BasicScraper):
latestUrl = 'http://www.little-gamers.com/'
stripUrl = latestUrl + '%s'
imageSearch = compile(r'<img src="(http://www.little-gamers.com/comics/[^"]+)"')
prevSearch = compile(r'href="(.+?)"><img id="comic-nav-prev"')
stripUrl = latestUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.little-gamers\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www.little-gamers.com/[^"]+)', before="comic-nav-prev-link"))
help = 'Index format: yyyy/mm/dd/name'
class LegoRobot(_BasicScraper):
latestUrl = 'http://www.legorobotcomics.com/'
stripUrl = latestUrl + '?id=%s'
imageSearch = compile(r'id="the_comic" src="(comics/.+?)"')
prevSearch = compile(r'(\?id=\d+)"><img src="images/back.png"')
help = 'Index format: nnnn'
class LeastICouldDo(_BasicScraper):
latestUrl = 'http://www.leasticoulddo.com/'
stripUrl = latestUrl + 'comic/%s'

View file

@ -1,28 +1,21 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE
from ..scraper import _BasicScraper
from ..helpers import queryNamer
from ..util import tagre
class MadamAndEve(_BasicScraper):
# broken links - disable for now
class _MadamAndEve(_BasicScraper):
latestUrl = 'http://www.madamandeve.co.za/week_of_cartns.php'
stripUrl = None
imageSearch = compile(r'<IMG BORDER="0" SRC="(cartoons/me\d{6}\.(gif|jpg))">')
prevSearch = compile(r'<a href="(weekend_cartoon.php)"')
class MagicHigh(_BasicScraper):
latestUrl = 'http://www.doomnstuff.com/magichigh/index.php'
stripUrl = latestUrl + '?strip_id=%s'
imageSearch = compile(r'(istrip_files/strips/.+?)"')
prevSearch = compile(r'First .+?"(/magichigh.+?)".+?top_back')
help = 'Index format: n'
class Marilith(_BasicScraper):
latestUrl = 'http://www.marilith.com/'
stripUrl = latestUrl + 'archive.php?date=%s'
@ -31,13 +24,12 @@ class Marilith(_BasicScraper):
help = 'Index format: yyyymmdd'
class MarryMe(_BasicScraper):
latestUrl = 'http://marrymemovie.com/main/'
stripUrl = latestUrl + '%s'
imageSearch = compile(r'(/comicfolder/.+?)"')
prevSearch = compile(r'Previous Comic:</small><br />&#171; <a href="(.+?)">')
help = 'Index format: good luck !'
latestUrl = 'http://marryme.keenspot.com/'
stripUrl = latestUrl + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
prevSearch = compile(tagre("link", "href", r'(/d/[^"]+)', before="prev"))
help = 'Index format: yyyymmdd'
class Meek(_BasicScraper):
@ -49,22 +41,13 @@ class Meek(_BasicScraper):
class MegaTokyo(_BasicScraper):
latestUrl = 'http://www.megatokyo.com/'
latestUrl = 'http://megatokyo.com/'
stripUrl = latestUrl + 'strip/%s'
imageSearch = compile(r'"(strips/.+?)"', IGNORECASE)
prevSearch = compile(r'"(./strip/\d+?)">Prev')
help = 'Index format: nnnn'
class MyPrivateLittleHell(_BasicScraper):
latestUrl = 'http://mutt.purrsia.com/mplh/'
stripUrl = latestUrl + '?date=%s'
imageSearch = compile(r'<img.+?src="(comics/.+?)"')
prevSearch = compile(r'<a.+?href="(\?date=\d+/\d+/\d+)">Prev</a>')
help = 'Index format: mm/dd/yyyy'
class MacHall(_BasicScraper):
latestUrl = 'http://www.machall.com/'
stripUrl = latestUrl + 'view.php?date=%s'
@ -75,43 +58,33 @@ class MacHall(_BasicScraper):
class Melonpool(_BasicScraper):
latestUrl = 'http://www.melonpool.com/'
stripUrl = latestUrl + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
prevSearch = compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
help = 'Index format: yyyymmdd'
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.melonpool\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.melonpool\.com/\?p=\d+)', after="prev"))
help = 'Index format: n'
class Misfile(_BasicScraper):
latestUrl = 'http://www.misfile.com/'
stripUrl = latestUrl + '?page=%s'
imageSearch = compile(r'<img src="(overlay\.php\?pageCalled=\d+)">')
prevSearch = compile(r'<a href="(\?page=\d+)"><img src="/images/back\.gif"')
help = 'Index format: n (unpadded)'
namer = queryNamer('pageCalled')
stripUrl = latestUrl + '?date=%s'
imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)'))
prevSearch = compile(tagre("link", "href", r'([^"]+)', before="Previous"))
help = 'Index format: yyyy-mm-dd'
class MysteriesOfTheArcana(_BasicScraper):
latestUrl = 'http://mysteriesofthearcana.com/'
stripUrl = latestUrl + 'index.php?action=comics&cid='
imageSearch = compile(r'(image.php\?type=com&i=.+?)"')
prevSearch = compile(r'(index.php\?action=comics&cid=.+?)".+?show_prev1')
stripUrl = latestUrl + 'index.php?action=comics&cid=%s'
imageSearch = compile(tagre("img", "src", r'(image\.php\?type=com&i=[^"]+)'))
prevSearch = compile(tagre("a", "href", r'()', after="navprevius"))
help = 'Index format: n (unpadded)'
# XXX move to keenspot?
class MysticRevolution(_BasicScraper):
latestUrl = 'http://www.mysticrev.com/index.php'
latestUrl = 'http://mysticrevolution.keenspot.com/'
stripUrl = latestUrl + '?cid=%s'
imageSearch = compile(r'(comics/.+?)"')
prevSearch = compile(r'(\?cid=.+?)".+?prev.gif')
imageSearch = compile(tagre("img", "src", r'(http://cdn\.mysticrevolution\.keenspot\.com/comics/[^"]+)'))
prevSearch = compile(tagre("link", "rel", r'(\?cid=\d+)', before="prev"))
help = 'Index format: n (unpadded)'
class MontyAndWooly(_BasicScraper):
latestUrl = 'http://www.montyandwoolley.co.uk/'
stripUrl = latestUrl + '%s'
imageSearch = compile(r'<img src="(http://montyandwoolley.co.uk/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
help = 'Index format: yyyy/mm/dd/strip-name'

View file

@ -1,21 +1,21 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
from re import compile, IGNORECASE
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, sub
from ..scraper import _BasicScraper
from ..helpers import indirectStarter, _PHPScraper
from ..util import tagre
class NamirDeiter(_BasicScraper):
latestUrl = 'http://www.namirdeiter.com/'
stripUrl = latestUrl + 'comics/index.php?date=%s'
imageSearch = compile(r'<img.+?(/comics/\d{8}.+?)[\'|\"]')
prevSearch = compile(r'(/comics/index.php\?date=.+?|http://www\.namirdeiter\.com/comics/index.php\?date=.+?)[\'|\"].+?previous')
imageSearch = compile(tagre("img", "src", r'(http://www\.namirdeiter\.com/comics/\d\.jpg)', quote=""))
prevSearch = compile(tagre("a", "href", r'(http://www\.namirdeiter\.com/comics/index\.php\?date=\d+)', quote="'")+"Previous")
help = 'Index format: yyyymmdd'
class NeoEarth(_BasicScraper):
latestUrl = 'http://www.neo-earth.com/NE/'
stripUrl = latestUrl + 'index.php?date=%s'
@ -24,23 +24,11 @@ class NeoEarth(_BasicScraper):
help = 'Index format: yyyy-mm-dd'
class Nervillsaga(_BasicScraper):
latestUrl = 'http://www.nervillsaga.com/'
stripUrl = latestUrl + 'index.php?s=%s'
imageSearch = compile(r'"(pic/.+?)"')
prevSearch = compile(r'"(.+?)">Previous')
help = 'Index format: nnn'
class NewAdventuresOfBobbin(_BasicScraper):
latestUrl = 'http://www.bobbin-comic.com/'
stripUrl = latestUrl + 'wordpress/?p=%s'
imageSearch = compile(r'<img src="(http://www.bobbin-comic.com/wordpress/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><span class="prev">')
help = 'Index format: n'
latestUrl = 'http://www.bobbin-comic.com/bobbin_strips/'
imageSearch = compile(tagre("a", "href", r'(\d+\.gif)'))
prevSearch = None
help = 'Index format: none'
class NewWorld(_BasicScraper):
@ -51,25 +39,22 @@ class NewWorld(_BasicScraper):
help = 'Index format: yyyy/mm/dd/stripn'
class Nicky510(_BasicScraper):
latestUrl = 'http://www.nicky510.com/'
stripUrl = latestUrl + '%s'
imageSearch = compile(r'(http://www.nicky510.com/comics/.+?)"')
prevSearch = compile(r'<a href="(http://www.nicky510.com/.+?)" class="navi navi-prev"')
help = 'Index format: yyyy/mm/dd/stripname/'
latestUrl = 'http://www.nickyitis.com/'
stripUrl = latestUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.nickyitis\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.nickyitis\.com/comic/[^"]+)', after="Previous"))
help = 'Index format: stripname'
class NoNeedForBushido(_BasicScraper):
latestUrl = 'http://www.noneedforbushido.com/latest/'
stripUrl = 'http://www.noneedforbushido.com/%s'
imageSearch = compile(r'<div class="comics"><img src="([^"]+)"')
prevSearch = compile(r'<a href="([^"]+)" title="[^"]*" class="previous-comic-link')
latestUrl = 'http://noneedforbushido.com/latest/'
stripUrl = latestUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://noneedforbushido\.com/comics/comic/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://noneedforbushido\.com/[^"]+)', after="previous-comic-link"))
help = 'Index format: yyyy/comic/nnn'
class Nukees(_BasicScraper):
latestUrl = 'http://www.nukees.com/'
stripUrl = latestUrl + 'd/%s'
@ -79,82 +64,58 @@ class Nukees(_BasicScraper):
class _NuklearPower(_BasicScraper):
imageSearch = compile(r'<img src="(http://www.nuklearpower.com/comics/.+?)"')
prevSearch = compile(r'><a href="(.+?)">Previous</a>')
help = 'Index format: yyyy/mm/dd/name'
def nuklearpower(name, shortname):
baseUrl = 'http://www.nuklearpower.com/'
latestUrl = "%s%s/" % (baseUrl, shortname)
classname = sub("[^0-9a-zA-Z_]", "", name)
@property
def baseUrl(self):
return 'http://www.nuklearpower.com/%s/' % (self.shortName,)
def starter(self):
return self.baseUrl
@property
def stripUrl(self):
return self.baseUrl + '%s'
globals()[classname] = type('NuklearPower_%s' % classname,
(_BasicScraper,),
dict(
name='NuklearPower/' + classname,
latestUrl = latestUrl,
stripUrl = latestUrl + '%s',
imageSearch = compile(tagre("img", "src", r'(http://www\.nuklearpower\.com/comics/[^"]+)')),
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "Previous"),
help = 'Index format: yyyy/mm/dd/name',
)
)
npstrips = {
'8BitTheater': '8-bit-theater',
'Warbot': 'warbot',
'HowIKilledYourMaster': 'hikym',
'AtomicRobo': 'atomic-robo',
}
class NP8BitTheater(_NuklearPower):
name = 'NuklearPower/8BitTheater'
shortName = '8-bit-theater'
class NPWarbot(_NuklearPower):
name = 'NuklearPower/Warbot'
shortName = 'warbot'
class NPHIKYM(_NuklearPower):
name = 'NuklearPower/HowIKilledYourMaster'
shortName = 'hikym'
class NPAtomicRobo(_NuklearPower):
name = 'NuklearPower/AtomicRobo'
shortName = 'atomic-robo'
for name, shortname in npstrips.items():
nuklearpower(name, shortname)
class NekoTheKitty(_PHPScraper):
basePath = 'http://www.nekothekitty.net/cusp/'
latestUrl = 'latest.php'
prevSearch = compile(r"<a href=\"(http://www\.nekothekitty\.net/cusp/daily\.php\?date=\d+)\"><img[^>]+alt='Previous Comic'")
latestUrl = basePath
prevSearch = compile(tagre("a", "href", r'(http://www.nekothekitty.net/comics/[^"]+)') +
tagre("img", "src", r'http://www\.nekothekitty\.net/files/smallprev.png'))
class NichtLustig(_BasicScraper):
stripUrl = 'http://www.nichtlustig.de/toondb/%s.html'
imageSearch = compile(r'<img src="([^"]+)" id="cartoon"', IGNORECASE)
prevSearch = compile(r'<a href="(\d+\.html)"[^<>]*><img[^<>]*id="pfeil_links', IGNORECASE)
imageSearch = compile('background-image:url\((http://static\.nichtlustig\.de/comics/full/\d+\.jpg)')
prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
help = 'Index format: yymmdd'
starter = indirectStarter('http://www.nichtlustig.de/main.html',
compile(r'<a href="([^"]*toondb/\d+\.html)"', IGNORECASE))
class NinthElsewhere(_BasicScraper):
latestUrl = 'http://www.9thelsewhere.com/icenter.html'
stripUrl = 'http://www.9thelsewhere.com/%s/9e%s_%s.html'
imageSearch = compile(r'<img src="([^"]*9e\d+_\d+\.jpg)"')
prevSearch = compile(r'<a href="([^"]+\.html)">\s*PREV')
help = 'Index format: year-chapter-page'
def setStrip(self, index):
self.currentUrl = self.stripUrl % tuple(map(int, index.split('-')))
compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)')))
class Nodwick(_BasicScraper):
stripUrl = None
imageSearch = compile(r'<img src="(http://nodwick.humor.gamespy.com/gamespyarchive/strips/[^"]*)"', IGNORECASE)
prevSearch = compile(r'<a href="(index.php\?date=[0-9-]*)"><img src="back.jpg"', IGNORECASE)
starter = indirectStarter('http://nodwick.humor.gamespy.com/gamespyarchive/index.php', prevSearch)
help = 'Index format: None'
latestUrl = 'http://comic.nodwick.com/'
stripUrl = latestUrl + "?p=%s"
imageSearch = compile(tagre("img", "src", r'(http://comic\.nodwick\.com/nodwickstrips/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://comic\.nodwick\.com/\?p=\d+)', after="prev"))
help = 'Index format: stripnumber'
class NekkoAndJoruba(_BasicScraper):

View file

@ -1,5 +1,7 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..util import tagre

View file

@ -1,57 +1,47 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
from re import compile, IGNORECASE
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import _BasicScraper
from ..helpers import indirectStarter
from ..util import tagre
class OctopusPie(_BasicScraper):
starter = indirectStarter('http://www.octopuspie.com/2007-05-14/001-pea-wiggle/',
compile(r'<a href="(http://www.octopuspie.com/.+?)"><b>latest comic</b>', IGNORECASE))
starter = indirectStarter('http://www.octopuspie.com/',
compile(tagre("a", "href", r'(http://www\.octopuspie\.com/[^"]+)') +
tagre("img", "src", r'http://www\.octopuspie\.com/junk/latest\.png')))
stripUrl = 'http://www.octopuspie.com/%s'
imageSearch = compile(r'<img src="(http://www.octopuspie.com/strippy/.+?)"')
prevSearch = compile(r'<link rel=\'prev\'[^>]+?href=\'(http://www.octopuspie.com/.+?)\'')
imageSearch = compile(tagre("img", "src", r'(http://www\.octopuspie\.com/strippy/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.octopuspie\.com/[^"]+)', after="prev"))
help = 'Index format: yyyy-mm-dd/nnn-strip-name'
class OddFish(_BasicScraper):
latestUrl = 'http://www.odd-fish.net/'
stripUrl = latestUrl + 'viewing.php?&comic_id=%s'
imageSearch = compile(r'<img src="(images/\d{1,4}.\w{3,4})" ')
prevSearch = compile(r'<a href="(.+?)"><img src="http://www.odd-fishing.net/i/older.gif" ')
help = 'Index format: n (unpadded)'
class OhMyGods(_BasicScraper):
latestUrl = 'http://ohmygods.co.uk/'
stripUrl = latestUrl + 'strips/%s'
imageSearch = compile(r'<p class="omgs-strip"><img src="(/system/files/.+?)"')
prevSearch = compile(r'<li class="custom_pager_prev"><a href="(/strips/.+?)"')
help = 'Index format: yyyy-mm-dd'
stripUrl = latestUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.odd-fish\.net/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.odd-fish\.net/[^"]+)', after="navi-prev"))
help = 'Index format: stripname'
class OnTheEdge(_BasicScraper):
latestUrl = 'http://www.ontheedgecomics.com/'
latestUrl = 'http://ontheedgecomics.com/'
stripUrl = 'http://ontheedgecomics.com/comic/ote%s'
imageSearch = compile(r'<img src="(http://ontheedgecomics.com/comics/.+?)"')
prevSearch = compile(r'<a href="([^"]+)" rel="prev">')
help = 'Index format: nnn (unpadded)'
class OneQuestion(_BasicScraper):
latestUrl = 'http://onequestioncomic.com/'
stripUrl = latestUrl + 'comics/%s/'
imageSearch = compile(r'(istrip_files.+?)"')
prevSearch = compile(r'First.+?"(comic.php.+?)".+?previous.png')
stripUrl = latestUrl + 'comic.php?strip_id=%s'
imageSearch = compile(tagre("img", "src", r'(istrip_files/strips/\d+\.jpg)'))
prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') + tagre("img", "src", r'img/arrow_prev\.jpg'))
help = 'Index format: n (unpadded)'
class OurHomePlanet(_BasicScraper):
latestUrl = 'http://gdk.gd-kun.net/'
stripUrl = latestUrl + '%s.html'
@ -61,22 +51,19 @@ class OurHomePlanet(_BasicScraper):
class OkCancel(_BasicScraper):
stripUrl = 'http://www.ok-cancel.com/comic/%s.html'
imageSearch = compile(r'src="(http://www.ok-cancel.com/strips/okcancel\d{8}.gif)"', IGNORECASE)
prevSearch = compile(r'<div class="previous"><a href="(http://www.ok-cancel.com/comic/\d{1,4}.html)">', IGNORECASE)
starter = indirectStarter('http://www.ok-cancel.com/', prevSearch)
stripUrl = 'http://okcancel.com/comic/%s.html'
imageSearch = compile(tagre("img", "src", r'(http://okcancel\.com/strips/okcancel\d{8}\.gif)'))
prevSearch = compile(tagre("div", "class", "previous") + tagre("a", "href", r'(http://okcancel\.com/comic/\d{1,4}\.html)'))
starter = indirectStarter('http://okcancel.com/', prevSearch)
help = 'Index format: yyyymmdd'
class Oglaf(_BasicScraper):
starter = indirectStarter('http://oglaf.com/',
compile(r'<a href="(.+?)"><img src="over18.gif"', IGNORECASE))
stripUrl = 'http://oglaf.com/%s.html'
imageSearch = compile(r'/><img src="(.+?)"[^>]+?width="760" height="596"', IGNORECASE)
prevSearch = compile(r'<a href="(.+?)"[^>]+?><img src="prev.gif"', IGNORECASE)
help = 'Index format: nn'
latestUrl = 'http://oglaf.com/'
stripUrl = latestUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(/media/comic/[^"]+)', before="strip"))
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("div", "id", "pvs"))
help = 'Index format: stripname/nn'
class OverCompensating(_BasicScraper):

View file

@ -1,17 +1,19 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
from re import compile, IGNORECASE
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE
from ..scraper import _BasicScraper
from ..helpers import bounceStarter, queryNamer
from ..helpers import bounceStarter, queryNamer, indirectStarter
from ..util import tagre
class PartiallyClips(_BasicScraper):
latestUrl = 'http://www.partiallyclips.com/'
stripUrl = latestUrl + 'index.php?id=%s'
imageSearch = compile(r'"(http://www.partiallyclips.com/storage/.+?)"')
prevSearch = compile(r'"(index.php\?id=.+?)".+?prev')
help = 'Index format: nnnn'
latestUrl = 'http://partiallyclips.com/'
stripUrl = latestUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://partiallyclips\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://partiallyclips\.com/[^"]+)', before="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
@ -33,20 +35,23 @@ class PebbleVersion(_BasicScraper):
class PennyAndAggie(_BasicScraper):
latestUrl = 'http://www.pennyandaggie.com/index.php'
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(r'src=".+?(/comics/.+?)"')
prevSearch = compile(r"</a><a href='(index.php\?p=.+?)'.+?prev")
baseUrl = 'http://www.pennyandaggie.com/'
stripUrl = baseUrl + 'index.php?p=%s'
imageSearch = compile(tagre("a", "href", r'(http://www\.pennyandaggie\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r"(index\.php\?p=\d+)", quote="'") +
tagre("img", "src", r'http://pennyandaggie\.com/images/previous_day\.gif', quote=""))
starter = indirectStarter(baseUrl, prevSearch)
help = 'Index format: n (unpadded)'
class PennyArcade(_BasicScraper):
starter = bounceStarter('http://www.penny-arcade.com/comic/',
compile(r'<a href="(/comic/[^"]+)">Next</a>'))
stripUrl = 'http://www.penny-arcade.com/comic/%s/'
imageSearch = compile(r'(?<!<!--)<img src="(http://art\.penny-arcade\.com/photos/[^"]+)"')
prevSearch = compile(r'<a href="(/comic/[^"]+)">Back</a>')
baseUrl = 'http://penny-arcade.com/comic/'
starter = bounceStarter(baseUrl,
compile(tagre("a", "href", r'(http://penny-arcade\.com/comic/[^"]+)', before="bntNext"))
)
stripUrl = baseUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://art\.penny-arcade\.com/photos/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://penny-arcade\.com/comic/[^"]+)', before="bntPrev"))
help = 'Index format: yyyy/mm/dd'
@classmethod
@ -58,25 +63,17 @@ class PennyArcade(_BasicScraper):
class PeppermintSaga(_BasicScraper):
latestUrl = 'http://www.pepsaga.com/'
stripUrl = latestUrl + 'comics/%s/'
imageSearch = compile(r'src=.+?(http.+?/comics/.+?)"')
prevSearch = compile(r'First</a><a href="(http://www.pepsaga.com/comics/.+?/)"')
help = 'Index format: non'
class PerkiGoth(_BasicScraper):
latestUrl = 'http://mutt.purrsia.com/main.php'
stripUrl = latestUrl + '?date=%s'
imageSearch = compile(r'<img.+?src="(comics/.+?)"')
prevSearch = compile(r'<a.+?href="(\?date=\d+/\d+/\d+)">Prev</a>')
help = 'Index format: mm/dd/yyyy'
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.pepsaga\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.pepsaga\.com/\?p=\d+)', after="prev"))
help = 'Index format: number'
class Pixel(_BasicScraper):
latestUrl = 'http://www.chrisdlugosz.net/pixel/'
stripUrl = latestUrl + '%s.shtml'
imageSearch = compile(r'<IMG SRC="(\d+\.png)" ALT=""><BR><BR>')
prevSearch = compile(r'<A HREF="(\d+\.shtml)"><IMG SRC="_prev.png" BORDER=0 ALT=""></A>')
latestUrl = 'http://pixelcomic.net/'
stripUrl = latestUrl + '%s.php'
imageSearch = compile(tagre("img", "src", r'(\d+\.png)'))
prevSearch = compile(tagre("a", "href", r'(http://pixelcomic\.net/\d+\.php)', before="prev"))
help = 'Index format: nnn'
@ -91,19 +88,22 @@ class PiledHigherAndDeeper(_BasicScraper):
class Precocious(_BasicScraper):
latestUrl = 'http://www.precociouscomic.com/'
stripUrl = latestUrl + 'comic.php?page=%s'
imageSearch = compile(r'(archive/strips/.+?)"')
prevSearch = compile(r'First.+?(comic.php\?page=.+?)">Previous<')
help = 'Index format: n (unpadded)'
baseUrl = 'http://www.precociouscomic.com/'
starter = indirectStarter(baseUrl,
compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png"))
)
stripUrl = baseUrl + 'archive/comic/%s'
imageSearch = compile(tagre("img", "src", r'(/comics/\d+\.jpg)'))
prevSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/back_arrow\.png"))
help = 'Index format: yyyy/mm/dd'
class PvPonline(_BasicScraper):
latestUrl = 'http://www.pvponline.com/'
stripUrl = None
imageSearch = compile(r'<img src="(http://www.pvponline.com/comics/pvp\d{8}\..+?)"', IGNORECASE)
prevSearch = compile(r'<a href="(http://www.pvponline.com/[^"]+)"[^>]*>&lsaquo; Previous', IGNORECASE)
help = 'Index format: yyyymmdd'
latestUrl = 'http://pvponline.com/comic'
stripUrl = latestUrl + '%s'
imageSearch = compile(tagre("img", "src", r'(http://newcdn\.pvponline\.com/img/comic/pvp\d+\.jpg)'))
prevSearch = compile(tagre("a", "href", r'(http://pvponline\.com/comic/[^"]+)', after="Previous"))
help = 'Index format: yyyy/mm/dd/stripname'
@ -113,7 +113,7 @@ def pensAndTales(name, baseUrl):
dict(
name='PensAndTales/' + name,
latestUrl=baseUrl,
stripUrl=baseUrl + '?date=',
stripUrl=baseUrl + '?date=%s',
imageSearch=compile(r'<img[^>]+?src="([^"]*?comics/.+?)"', IGNORECASE),
prevSearch=compile(r'<a href="([^"]*?\?date=\d+)">(:?<img[^>]+?alt=")?Previous Comic', IGNORECASE),
help='Index format: yyyymmdd')
@ -126,30 +126,26 @@ def pensAndTales(name, baseUrl):
# strangekith = pensAndTales('Strangekith', 'http://strangekith.pensandtales.com/')
# XXX: comic broken
# fireflycross = pensAndTales('FireflyCross', 'http://fireflycross.pensandtales.com/')
thosedestined = pensAndTales('ThoseDestined', 'http://thosedestined.pensandtales.com/')
evilish = pensAndTales('Evilish', 'http://evilish.pensandtales.com/')
redallover = pensAndTales('RedAllOver', 'http://redallover.pensandtales.com/')
stickyevil = pensAndTales('StickyEvil', 'http://stickyevil.pensandtales.com/')
# XXX: moved / layout changed
#ynt = pensAndTales('YamiNoTainai', 'http://ynt.pensandtales.com/')
earthbound = pensAndTales('Earthbound', 'http://earthbound.pensandtales.com/')
class ProperBarn(_BasicScraper):
latestUrl = 'http://www.nitrocosm.com/go/gag/'
stripUrl = latestUrl + '%s/'
imageSearch = compile(r'<img class="gallery_display" src="([^"]+)"')
prevSearch = compile(r'<a href="([^"]+)"[^>]*><button type="submit" class="nav_btn_previous">')
imageSearch = compile(tagre("img", "src", r'(http://content\.nitrocosm\.com/gag/\d+.png)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.nitrocosm\.com/go/gag/\d+/)', after="nav_btn_previous"))
help = 'Index format: nnn'
class PunksAndNerds(_BasicScraper):
latestUrl = 'http://www.punksandnerds.com/'
stripUrl = latestUrl + '?id=%s/'
imageSearch = compile(r'<img src="(http://www.punksandnerds.com/img/comic/.+?)"')
prevSearch = compile(r'<td><a href="(.+?)"[^>]+?><img src="backcomic.gif"')
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.punksandnerds\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.punksandnerds\.com/\?p=\d+)', after="navi-prev"))
help = 'Index format: nnn'

View file

@ -1,8 +1,10 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
from re import compile
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import _BasicScraper
from ..util import tagre
class QuestionableContent(_BasicScraper):
@ -13,10 +15,9 @@ class QuestionableContent(_BasicScraper):
help = 'Index format: n (unpadded)'
class Qwantz(_BasicScraper):
latestUrl = 'http://www.qwantz.com/index.php'
stripUrl = latestUrl + '?comic=%s'
imageSearch = compile(r'<img src="(http://www.qwantz.com/comics/.+?)" class="comic"')
prevSearch = compile(r'"><a href="(.+?)">&larr; previous</a>')
imageSearch = compile(tagre("img", "src", r'(http://www\.qwantz\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.qwantz\.com/index\.php\?comic=\d+)', before="prev"))
help = 'Index format: n'

View file

@ -1,9 +1,11 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
from re import compile
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import _BasicScraper
from ..helpers import bounceStarter
from ..util import tagre
class RadioactivePanda(_BasicScraper):
@ -14,32 +16,31 @@ class RadioactivePanda(_BasicScraper):
help = 'Index format: n (no padding)'
# XXX add other comics at http://petitesymphony.com/comics/
class Rascals(_BasicScraper):
latestUrl = 'http://petitesymphony.com/rascals'
stripUrl = 'http://petitesymphony.com/comic/rascals/%s'
imageSearch = compile(r'(http://petitesymphony.com/comics/.+?)"')
prevSearch = compile(r"KR-nav-previous.><a href=.(http.+?).>")
help = 'Index format: non'
latestUrl = 'http://rascals.petitesymphony.com/'
stripUrl = latestUrl + '/comic/rascals-pg-%s/'
imageSearch = compile(tagre("img", "src", r'(http://rascals\.petitesymphony\.com/files/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://rascals\.petitesymphony\.com/comic/[^"]+)', after="Previous"))
help = 'Index format: num'
class RealLife(_BasicScraper):
latestUrl = 'http://www.reallifecomics.com/'
stripUrl = latestUrl + 'achive/%s.html'
imageSearch = compile(r'"(/comics/.+?)"')
prevSearch = compile(r'"(/archive/.+?)".+?nav_previous')
stripUrl = latestUrl + 'archive/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/archive/\d+.html)') + tagre("img", "src", r'/images/nav_prev\.png'))
help = 'Index format: yymmdd)'
class RedString(_BasicScraper):
latestUrl = 'http://www.redstring.strawberrycomics.com/'
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(r'<img src="(http://www.redstring.strawberrycomics.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)">Previous Comic</a>')
stripUrl = latestUrl + 'index.php?id=%s'
imageSearch = compile(tagre("img", "src", r'("comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/index\.php\?id=\d+)', after="prev"))
help = 'Index format: nnn'
class Roza(_BasicScraper):
latestUrl = 'http://www.junglestudio.com/roza/index.php'
stripUrl = latestUrl + '?date=%s'
@ -58,10 +59,3 @@ class RedMeat(_BasicScraper):
@classmethod
def namer(cls, imageUrl, pageUrl):
return imageUrl.split('/')[-2]
class RunningWild(_BasicScraper):
latestUrl = 'http://runningwild.katbox.net/'
stripUrl = latestUrl + 'index.php?strip_id=%s'
imageSearch = compile(r'="(.+?strips/.+?)"')
prevSearch = compile(r'(index.php\?strip_id=.+?)".+?navigation_back')
help = 'Index format: n (unpadded)'

View file

@ -1,22 +1,22 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, MULTILINE, IGNORECASE, sub
from os.path import splitext
from ..scraper import _BasicScraper
from ..helpers import bounceStarter, indirectStarter
from ..helpers import indirectStarter
from ..util import tagre
class SailorsunOrg(_BasicScraper):
latestUrl = 'http://www.sailorsun.org/'
stripUrl = latestUrl + 'browse.php?comicID=%s'
imageSearch = compile(r'(comics/.+?)"')
prevSearch = compile(r'/(browse.php.+?)".+?/prev.gif')
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://sailorsun\.org/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://sailorsun\.org/\?p=\d+)', after="prev"))
help = 'Index format: n (unpadded)'
class SamAndFuzzy(_BasicScraper):
latestUrl = 'http://www.samandfuzzy.com/'
stripUrl = 'http://samandfuzzy.com/%s'
@ -25,53 +25,51 @@ class SamAndFuzzy(_BasicScraper):
help = 'Index format: nnnn'
class SarahZero(_BasicScraper):
latestUrl = 'http://www.sarahzero.com/'
stripUrl = latestUrl + 'sz_%s.html'
imageSearch = compile(r'<img src="(z_(?:(?:spreads)|(?:temp)).+?)" alt=""')
prevSearch = compile(r'onmouseout="changeImages\(\'sz_05_nav\',\'z_site/sz_05_nav.gif\'\);return true" href="(sz_.+?)">')
imageSearch = compile(tagre("img", "src", r'(z_spreads/sz_[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(sz_\d+\.html)') + tagre("img", "src", r'z_site/sz_05_nav\.gif'))
help = 'Index format: nnnn'
class ScaryGoRound(_BasicScraper):
latestUrl = 'http://www.scarygoround.com/'
stripUrl = latestUrl + '?date=%s'
imageSearch = compile(r'<img src="(strips/\d{8}\..{3})"')
prevSearch = compile(r'f><a href="(.+?)"><img src="site-images/previous.png"')
imageSearch = compile(tagre("img", "src", r'(strips/\d+\.png)'))
prevSearch = compile(tagre("a", "href", r'(\?date=\d+)') + "Previous")
help = 'Index format: n (unpadded)'
class SchlockMercenary(_BasicScraper):
latestUrl = 'http://www.schlockmercenary.com/'
stripUrl = latestUrl + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
prevSearch = compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
help = 'Index format: yyyymmdd'
stripUrl = latestUrl + '%s'
imageSearch = compile(tagre("img", "src", r'(http://static\.schlockmercenary\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(/d+)', after="nav-previous"))
help = 'Index format: yyyy-mm-dd'
class SchoolBites(_BasicScraper):
latestUrl = 'http://www.schoolbites.net/'
latestUrl = 'http://schoolbites.net/'
stripUrl = latestUrl + 'd/%s.html'
imageSearch = compile(r'(/comics/.+?)"')
prevSearch = compile(r'first_day.+?(/d/.+?.html).+?/previous_day.gif')
imageSearch = compile(tagre("img", "src", r'(http://cdn\.schoolbites\.net/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://schoolbites\.net/d/\d+\.html)', after="prev"))
help = 'Index format: yyyymmdd'
class Sheldon(_BasicScraper):
latestUrl = 'http://www.sheldoncomics.com/'
stripUrl = latestUrl + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
prevSearch = compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
help = 'Index format: yyyymmdd'
stripUrl = latestUrl + 'archive/%s.html'
imageSearch = compile(tagre("img", "src", r'(/strips/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'/archive/\d+\.html)', after="sidenav-prev"))
help = 'Index format: yymmdd'
class Shortpacked(_BasicScraper):
latestUrl = 'http://www.shortpacked.com/'
stripUrl = latestUrl + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
prevSearch = compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
imageSearch = compile(tagre("img", "src", r'(http://www\.shortpacked\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.shortpacked\.com/\d+/comic/[^"]+)', after="prev"))
help = 'Index format: yyyymmdd'
@ -85,12 +83,11 @@ class SinFest(_BasicScraper):
class SlightlyDamned(_BasicScraper):
latestUrl = 'http://raizap.com/sdamned/index.php'
stripUrl = 'http://raizap.com/sdamned/pages.php\?comicID=%s'
imageSearch = compile(r'"(.+?comics2/.+?)"')
prevSearch = compile(r'</a>.+?(pages.php\?comicID=.+?)".+?back1')
help = 'Index format: n (unpadded)'
latestUrl = 'http://www.sdamned.com/'
stripUrl = latestUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://www\.sdamned\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.sdamned\.com/[^"]+)', after="prev"))
help = 'Index format: yyyy/mm/number'
class SluggyFreelance(_BasicScraper):
@ -103,81 +100,51 @@ class SluggyFreelance(_BasicScraper):
class SodiumEyes(_BasicScraper):
stripUrl = 'http://sodiumeyes.com/%s'
imageSearch = compile(r'(/comic/.+?)"')
prevSearch = compile(r'"http://sodiumeyes.com/(.+?/)"><.+?comic-prev')
help = 'Index format: nnn'
starter = indirectStarter('http://sodiumeyes.com/',
compile(r'<a href="http://sodiumeyes.com/(\d\d\d\d.+?/)">'))
latestUrl = 'http://sodiumeyes.com/'
stripUrl = latestUrl + '%s/'
imageSearch = compile(tagre("img", "src", r'(http://sodiumeyes\.com/comic/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://sodiumeyes\.com/[^"]+)', after="prev"))
help = 'Index format: yyyy/mm/dd/stripname'
class SpareParts(_BasicScraper):
latestUrl = 'http://www.sparepartscomics.com/'
stripUrl = latestUrl + 'comics/\\?date=s%'
imageSearch = compile(r'(/comics/2.+?)[" ]')
prevSearch = compile(r'(/comics/.+?|index.php\?.+?)".+?Prev')
help = 'Index format: yyyymmdd'
class StarslipCrisis(_BasicScraper):
latestUrl = 'http://www.starslipcrisis.com/'
stripUrl = latestUrl + 'd/%s.html'
imageSearch = compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
prevSearch = compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
baseUrl = 'http://www.sparepartscomics.com/'
latestUrl = baseUrl + 'comics/?date=20080328'
stripUrl = baseUrl + 'comics/?date=s%'
imageSearch = compile(tagre("img", "src", r'http://www\.sparepartscomics\.com/comics/[^"]+'))
prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)') + "Previous Comic")
help = 'Index format: yyyymmdd'
class Stubble(_BasicScraper):
latestUrl = 'http://www.stubblecomics.com/d/20051230.html'
stripUrl = 'http://www.stubblecomics.com/d/%s.html'
imageSearch = compile(r'"(/comics/.*?)"')
prevSearch = compile(r'"(.*?)".*?backarrow')
help = 'Index format: yyyymmdd'
latestUrl = 'http://stubblecomics.com/'
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://stubblecomics\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://stubblecomics\.com/\?p=\d+)', after="navi-prev"))
help = 'Index format: number'
class StrawberryDeathCake(_BasicScraper):
latestUrl = 'http://rainchildstudios.com/strawberry/'
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(r'/(comics/.+?)"')
prevSearch = compile(r'strawberry/(\?p=.+?)".+?span class="prev"')
help = 'Index format: n (good luck)'
latestUrl = 'http://strawberrydeathcake.com/'
stripUrl = latestUrl + 'archive/%s/'
imageSearch = compile(tagre("img", "src", r'http://strawberrydeathcake\.com/wp-content/webcomic/[^"]+'))
prevSearch = compile(tagre("a", "href", r'(http://strawberrydeathcake\.com/archive/[^"]+)', after="previous"))
help = 'Index format: stripname'
class SuburbanTribe(_BasicScraper):
latestUrl = 'http://www.pixelwhip.com/'
stripUrl = latestUrl + '?p%s'
imageSearch = compile(r'<img src="(http://www.pixelwhip.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">')
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.pixelwhip\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'(http://www\.pixelwhip\.com/\?p=\d+)', after="prev"))
help = 'Index format: nnnn'
class SuccubusJustice(_BasicScraper):
latestUrl = 'http://www.succubus-justice.com/Com%20main%20frame.htm'
stripUrl = 'http://www.succubus-justice.com/%s%%20frame.htm'
imageSearch = compile(r'<p align="center"><img src="(/\d+.\w{3,4})"')
prevSearch = compile(r'<a href="(/[\w%]+\.htm|[\w%]+\.htm)"[^>]+?><img src="124.gif"')
help = 'Index format: nnn'
class Supafine(_BasicScraper):
latestUrl = 'http://www.supafine.com/comics/classic.php'
stripUrl = latestUrl + '?comicID=%s'
imageSearch = compile(r'<img src="(http://www.supafine.com/comics/.+?)"')
prevSearch = compile(r'<a href="(http://www.supafine.com/comics/classic.php\?.+?)"><img src="http://supafine.com/comikaze/images/previous.gif" ')
help = 'Index format: nnn'
class SomethingPositive(_BasicScraper):
latestUrl = 'http://www.somethingpositive.net/'
stripUrl = latestUrl + 'sp%s.shtml'
imageSearch = compile(r'<img src="(/arch/sp\d+.\w{3,4}|/sp\d+.\w{3,4})"')
prevSearch = compile(r'<a \n?href="(sp\d{8}\.shtml)">(<font size=1\nface=".+?"\nSTYLE=".+?">Previous|<img src="images2/previous|<img src="images/previous.gif")', MULTILINE | IGNORECASE)
imageSearch = compile(tagre("img", "src", r'(sp\d+\.png)'))
prevSearch = compile(tagre("a", "href", r'(sp\d+\.shtml)') + "Previous")
help = 'Index format: mmddyyyy'
@classmethod
@ -202,48 +169,6 @@ class SexyLosers(_BasicScraper):
def smackJeeves(names):
# XXX mature content can be viewed directly with:
# http://www.smackjeeves.com/mature.php?ref=<percent-encoded-url>
class _SJScraper(_BasicScraper):
stripUrl = property(lambda self: self.baseUrl + self.shortName)
imageSearch = compile(r'<img src="(http://www\.smackjeeves\.com/images/uploaded/comics/[^"]*)"', IGNORECASE)
prevSearch = compile(r'<a href="(/comics/\d+/[^"]*)"><img[^>]*alt="< Previous"', IGNORECASE)
help = 'Index format: nnnn (some increasing number)'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('/')[-2]
def makeScraper(shortName):
baseUrl = 'http://%s.smackjeeves.com/comics/' % shortName
return type('SmackJeeves_%s' % shortName,
(_SJScraper,),
dict(
name='SmackJeeves/' + shortName,
baseUrl=baseUrl,
starter=bounceStarter(baseUrl, compile(r'<a href="(/comics/\d+/[^"]*)"><img[^>]*alt="Next >"', IGNORECASE)))
)
return dict((name, makeScraper(name)) for name in names)
globals().update(smackJeeves([
'20galaxies',
'axe13',
'beartholomew',
'bliss',
'durian',
'heard',
'mpmcomic',
'nlmo-project',
'paranoidloyd',
'thatdreamagain',
'wowcomics',
]))
class StarCrossdDestiny(_BasicScraper):
latestUrl = 'http://www.starcrossd.net/comic.html'
stripUrl = 'http://www.starcrossd.net/archives/%s.html'
@ -263,19 +188,6 @@ class StarCrossdDestiny(_BasicScraper):
return directory + '-' + filename
class SGVY(_BasicScraper):
stripUrl = 'http://www.sgvy.com/Edda%s/Issue%s/Page%s.html'
imageSearch = compile(r'"comic" src="((?:\.\./)+images/sgvy/sgvy-[-\w\d]+\.\w+)"')
prevSearch = compile(r'<a href="((?:\.\./)+(?:Edda\d+|Holiday)/(?:Issue\d+/Page\d+|Cover)\.html)">Prev</a>')
help = 'Index format: edda-issue-page'
starter = indirectStarter('http://www.sgvy.com/', compile(r'<a href="(archives/(?:Edda\d+|Holiday)/(?:Issue\d+/Page\d+|Cover)\.html)">'))
def setStrip(self, index):
self.currentUrl = self.stripUrl % tuple(map(int, index.split('-')))
class Spamusement(_BasicScraper):
stripUrl = 'http://spamusement.com/index.php/comics/view/%s'
imageSearch = compile(r'<img src="(http://spamusement.com/gfx/\d+\..+?)"', IGNORECASE)
@ -285,63 +197,14 @@ class Spamusement(_BasicScraper):
starter = indirectStarter('http://spamusement.com/', prevSearch)
def snafuComics():
class _SnafuComics(_BasicScraper):
imageSearch = compile(r'<img src=http://\w+\.snafu-comics\.com/(comics/\d{6}_\w*\.\w{3,4})')
prevSearch = compile(r'<a href="(\?comic_id=\d+)">Previous</a>')
help = 'Index format: n (unpadded)'
@property
def stripUrl(self):
return self.latestUrl + 'index.php?strip_id=%s'
comics = {
'Grim': 'grim',
'KOF': 'kof',
'PowerPuffGirls': 'ppg',
'Snafu': 'www',
'Tin': 'tin',
'TW': 'tw',
'Sugar': 'sugar',
'SF': 'sf',
'Titan': 'titan',
'EA': 'ea',
'Zim': 'zim',
'Soul': 'soul',
'FT': 'ft',
'Bunnywith': 'bunnywith',
'Braindead': 'braindead',
}
url = 'http://%s.snafu-comics.com/'
return dict((name, type('SnafuComics_%s' % name,
(_SnafuComics,),
dict(name='SnafuComics/' + name,
latestUrl=url % host)))
for name, host in comics.items())
globals().update(snafuComics())
class SosiaalisestiRajoittuneet(_BasicScraper):
latestUrl = 'http://sosiaalisestirajoittuneet.fi/index_nocomment.php'
stripUrl = latestUrl + '?date=%s'
imageSearch = compile(r'<img src="(strips/web/\d+.jpg)" alt=".*?" />')
prevSearch = compile(r'<a href="(index_nocomment\.php\?date=\d+)"><img\s+src="images/active_edellinen\.gif"', MULTILINE)
class StrangeCandy(_BasicScraper):
latestUrl = 'http://www.strangecandy.net/'
stripUrl = latestUrl + 'd/%s.html'
imageSearch = compile(r'src="(http://www.strangecandy.net/comics/\d{8}.\w{1,4})"')
prevSearch = compile(r'<a href="(http://www.strangecandy.net/d/\d{8}.html)"><img[^>]+?src="http://www.strangecandy.net/images/previous_day.gif"')
imageSearch = compile(tagre("img", "src", r'(/comics/\d+\.jpg)'))
prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') + tagre("img", "alt", "Previous comic"))
help = 'Index format: yyyyddmm'
class SMBC(_BasicScraper):
latestUrl = 'http://www.smbc-comics.com/'
stripUrl = latestUrl + 'index.php?db=comics&id=%s'
@ -357,21 +220,3 @@ class SomethingLikeLife(_BasicScraper):
imageSearch = compile(r'<img src="(http://www.pulledpunches.com/comics/[^"]*)"')
prevSearch = compile(r'</a> <a href="(http://www.pulledpunches.com/\?p=[^"]*)"><img src="back1.gif"')
help = 'Index format: nn'
class StickEmUpComics(_BasicScraper):
latestUrl = 'http://stickemupcomics.com/'
stripUrl = latestUrl + '%s'
imageSearch = compile(r'<img src="(http://stickemupcomics.com/comics/.+?)"')
prevSearch = compile(r'<a href="(.+?)"><span class="prev">')
help = 'Index format: yyyy/mm/dd/stripname'
class SexDemonBag(_BasicScraper):
latestUrl = 'http://www.sexdemonbag.com/'
stripUrl = latestUrl + '?p=%s'
imageSearch = compile(r'<img src="(http://www.sexdemonbag.com/comics/.+?)"')
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
help = 'Index format: nnn'

View file

@ -0,0 +1,49 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import _BasicScraper
from ..helpers import bounceStarter
from ..util import tagre
def smackJeeves(names):
# XXX mature content can be viewed directly with:
# http://www.smackjeeves.com/mature.php?ref=<percent-encoded-url>
class _SJScraper(_BasicScraper):
stripUrl = property(lambda self: self.baseUrl + self.shortName)
imageSearch = compile(tagre("img", "src", r'(http://www\.smackjeeves\.com/images/uploaded/comics/[^"]*)'))
prevSearch = compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="< Previous"')
help = 'Index format: nnnn (some increasing number)'
@classmethod
def namer(cls, imageUrl, pageUrl):
return pageUrl.split('/')[-2]
def makeScraper(shortName):
baseUrl = 'http://%s.smackjeeves.com/comics/' % shortName
return type('SmackJeeves_%s' % shortName,
(_SJScraper,),
dict(
name='SmackJeeves/' + shortName,
baseUrl=baseUrl,
starter=bounceStarter(baseUrl, compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="Next >"'))
)
)
return dict((name, makeScraper(name)) for name in names)
globals().update(smackJeeves([
'20galaxies',
'axe13',
'beartholomew',
'bliss',
'durian',
'heard',
'mpmcomic',
'nlmo-project',
'paranoidloyd',
'thatdreamagain',
'wowcomics',
]))

View file

@ -0,0 +1,42 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from ..scraper import _BasicScraper
def snafuComics():
class _SnafuComics(_BasicScraper):
imageSearch = compile(r'<img src=http://\w+\.snafu-comics\.com/(comics/\d{6}_\w*\.\w{3,4})')
prevSearch = compile(r'<a href="(\?comic_id=\d+)">Previous</a>')
help = 'Index format: n (unpadded)'
@property
def stripUrl(self):
return self.latestUrl + 'index.php?strip_id=%s'
comics = {
'Grim': 'grim',
'KOF': 'kof',
'PowerPuffGirls': 'ppg',
'Snafu': 'www',
'Tin': 'tin',
'TW': 'tw',
'Sugar': 'sugar',
'SF': 'sf',
'Titan': 'titan',
'EA': 'ea',
'Zim': 'zim',
'Soul': 'soul',
'FT': 'ft',
'Bunnywith': 'bunnywith',
'Braindead': 'braindead',
}
url = 'http://%s.snafu-comics.com/'
return dict((name, type('SnafuComics_%s' % name,
(_SnafuComics,),
dict(name='SnafuComics/' + name,
latestUrl=url % host)))
for name, host in comics.items())
globals().update(snafuComics())

View file

@ -1,9 +1,11 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
from re import compile, IGNORECASE
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE
from ..scraper import _BasicScraper
from ..helpers import indirectStarter
from ..util import tagre
class TalesOfPylea(_BasicScraper):
@ -59,6 +61,13 @@ class Thorn(_BasicScraper):
help = 'Index format: nnn'
class TinyKittenTeeth(_BasicScraper):
latestUrl = 'http://www.tinykittenteeth.com/'
stripUrl = latestUrl + 'index.php?current=%s'
imageSearch = compile(tagre("img", "src", r'(http://www\.tinykittenteeth\.com/comics/[^"]+)'))
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))
help = 'Index format: n (unpadded)'
class TwoTwoOneFour(_BasicScraper):
latestUrl = 'http://www.nitrocosm.com/go/2214_classic/'
@ -78,44 +87,6 @@ class TheWhiteboard(_BasicScraper):
class _TheFallenAngel(_BasicScraper):
imageSearch = compile(r'SRC="(http://www.thefallenangel.co.uk/\w+comics/.+?)"')
prevSearch = compile(r' <a href="(http://www.thefallenangel.co.uk/.+?)"><img[^>]+?src="http://www.thefallenangel.co.uk/images/previousday.jpg"')
help = 'Index format: yyyymmdd'
@property
def baseUrl(self):
return 'http://www.thefallenangel.co.uk/cgi-bin/%sautokeen/autokeenlite.cgi' % (self.shortName,)
@property
def stripUrl(self):
return self.baseUrl + '?date=%s'
def starter(self):
return self.baseUrl
class HighMaintenance(_TheFallenAngel):
name = 'TheFallenAngel/HighMaintenance'
shortName = 'hm'
class FAWK(_TheFallenAngel):
name = 'TheFallenAngel/FAWK'
shortName = 'fawk'
class MalloryChan(_TheFallenAngel):
name = 'TheFallenAngel/MalloryChan'
shortName = 'mallorychan'
class HMHigh(_BasicScraper):
name = 'TheFallenAngel/HMHigh'
latestUrl = 'http://www.thefallenangel.co.uk/hmhigh/'

View file

@ -1,5 +1,7 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE
from ..scraper import _BasicScraper

View file

@ -1,5 +1,7 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, sub
from ..scraper import _BasicScraper

View file

@ -1,5 +1,7 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE, MULTILINE
from ..scraper import _BasicScraper

View file

@ -1,5 +1,7 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, IGNORECASE, DOTALL
from ..scraper import _BasicScraper

View file

@ -1,5 +1,7 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import _BasicScraper

View file

@ -1,5 +1,7 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile, MULTILINE
from ..scraper import _BasicScraper

View file

@ -1,5 +1,7 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012 Bastian Kleineidam
from re import compile
from ..scraper import _BasicScraper

View file

@ -93,7 +93,7 @@ def fetchUrl(url, urlSearch):
if not searchUrl:
raise ValueError("Match empty URL at %s with pattern %s" % (url, urlSearch.pattern))
out.write('matched URL %r' % searchUrl, 2)
return urlparse.urljoin(baseUrl, searchUrl)
return normaliseURL(urlparse.urljoin(baseUrl, searchUrl))
return None
@ -106,7 +106,7 @@ def fetchUrls(url, imageSearch, prevSearch=None):
if not imageUrl:
raise ValueError("Match empty image URL at %s with pattern %s" % (url, imageSearch.pattern))
out.write('matched image URL %r' % imageUrl, 2)
imageUrls.add(urlparse.urljoin(baseUrl, imageUrl))
imageUrls.add(normaliseURL(urlparse.urljoin(baseUrl, imageUrl)))
if not imageUrls:
out.write("warning: no images found at %s with pattern %s" % (url, imageSearch.pattern))
if prevSearch is not None:
@ -117,12 +117,12 @@ def fetchUrls(url, imageSearch, prevSearch=None):
if not prevUrl:
raise ValueError("Match empty previous URL at %s with pattern %s" % (url, prevSearch.pattern))
out.write('matched previous URL %r' % prevUrl, 2)
prevUrl = urlparse.urljoin(baseUrl, prevUrl)
prevUrl = normaliseURL(urlparse.urljoin(baseUrl, prevUrl))
else:
out.write('no previous URL %s at %s' % (prevSearch.pattern, url), 2)
prevUrl = None
return imageUrls, prevUrl
return imageUrls
return imageUrls, None
def _unescape(text):
@ -150,7 +150,8 @@ def _unescape(text):
text = text.encode('utf-8')
text = urllib2.quote(text, safe=';/?:@&=+$,')
return text
return re.sub("&#?\w+;", _fixup, text)
return re.sub(r"&#?\w+;", _fixup, text)
def normaliseURL(url):
"""
@ -159,24 +160,24 @@ def normaliseURL(url):
"""
# XXX: brutal hack
url = _unescape(url)
url = url.replace(' ', '%20')
pu = list(urlparse.urlparse(url))
segments = pu[2].replace(' ', '%20').split('/')
segments = pu[2].split('/')
while segments and segments[0] == '':
del segments[0]
pu[2] = '/' + '/'.join(segments)
pu[2] = '/' + '/'.join(segments).replace(' ', '%20')
# remove leading '&' from query
if pu[3].startswith('&'):
pu[3] = pu[3][1:]
if pu[4].startswith('&'):
pu[4] = pu[4][1:]
# remove anchor
pu[5] = ""
return urlparse.urlunparse(pu)
def urlopen(url, referrer=None, retries=3, retry_wait_seconds=5):
out.write('Open URL %s' % url, 2)
assert retries >= 0, 'invalid retry value %r' % retries
assert retry_wait_seconds > 0, 'invalid retry seconds value %r' % retry_wait_seconds
# Work around urllib2 brokenness
url = normaliseURL(url)
req = urllib2.Request(url)
if referrer:
req.add_header('Referer', referrer)
@ -185,13 +186,14 @@ def urlopen(url, referrer=None, retries=3, retry_wait_seconds=5):
while True:
try:
return urllib2.urlopen(req)
except IOError as msg:
out.write('URL retrieval of %s failed: %s' % (url, msg))
except IOError as err:
msg = 'URL retrieval of %s failed: %s' % (url, err)
out.write(msg)
out.write('waiting %d seconds and retrying (%d)' % (retry_wait_seconds, tries), 2)
time.sleep(retry_wait_seconds)
tries += 1
if tries >= retries:
raise
raise IOError(msg)
def get_columns (fp):
@ -212,6 +214,7 @@ def get_columns (fp):
suffixes = ('B', 'kB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB')
def saneDataSize(size):
if size == 0:
return 'unk B'
@ -221,6 +224,7 @@ def saneDataSize(size):
factor = 1024 ** index
return '%0.3f %s' % (float(size) / factor, suffixes[index])
def splitpath(path):
c = []
head, tail = os.path.split(path)
@ -229,10 +233,10 @@ def splitpath(path):
head, tail = os.path.split(head)
return c
def getRelativePath(basepath, path):
basepath = splitpath(os.path.abspath(basepath))
path = splitpath(os.path.abspath(path))
afterCommon = False
for c in basepath:
if afterCommon or path[0] != c:
@ -240,9 +244,9 @@ def getRelativePath(basepath, path):
afterCommon = True
else:
del path[0]
return os.path.join(*path)
def getQueryParams(url):
query = urlparse.urlsplit(url)[3]
out.write('Extracting query parameters from %r (%r)...' % (url, query), 3)
@ -267,7 +271,7 @@ I can work with ;) .
etype = sys.exc_info()[0]
if evalue is None:
evalue = sys.exc_info()[1]
print >> out, etype, evalue
print(etype, evalue, file=out)
if tb is None:
tb = sys.exc_info()[2]
traceback.print_exception(etype, evalue, tb, None, out)

View file

@ -29,19 +29,26 @@ class _ComicTester(TestCase):
images += 1
self.save(image)
if num > 0:
# test that the stripUrl regex matches the retrieved strip URL
urlmatch = re.escape(self.scraperclass.stripUrl)
urlmatch = urlmatch.replace(r"\%s", r".+")
urlmatch = "^%s$" % urlmatch
ro = re.compile(urlmatch)
mo = ro.search(strip.stripUrl)
self.check(mo is not None, 'strip URL %r does not match stripUrl pattern %s' % (strip.stripUrl, urlmatch))
self.check_stripurl(strip)
else:
empty += 1
num += 1
self.check(num >= 4, 'traversal failed after %d strips, check the prevSearch pattern.' % num)
if self.scraperclass.prevSearch:
self.check(num >= 4, 'traversal failed after %d strips, check the prevSearch pattern.' % num)
self.check(empty <= 1, 'failed to find images on %d pages, check the imageSearch pattern.' % empty)
def check_stripurl(self, strip):
if not self.scraperclass.stripUrl:
# no indexing support
return
# test that the stripUrl regex matches the retrieved strip URL
urlmatch = re.escape(self.scraperclass.stripUrl)
urlmatch = urlmatch.replace(r"\%s", r".+")
urlmatch = "^%s$" % urlmatch
ro = re.compile(urlmatch)
mo = ro.search(strip.stripUrl)
self.check(mo is not None, 'strip URL %r does not match stripUrl pattern %s' % (strip.stripUrl, urlmatch))
def save(self, image):
# create a temporary directory
tmpdir = tempfile.mkdtemp()