Fix some comics.
This commit is contained in:
parent
54eaadf4fc
commit
958a788550
40 changed files with 823 additions and 1245 deletions
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
import urllib2
|
||||
import os
|
||||
import locale
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
"""
|
||||
Define basic configuration data like version or application name.
|
||||
"""
|
||||
|
|
|
@ -25,21 +25,16 @@ def regexNamer(regex):
|
|||
return _namer
|
||||
|
||||
|
||||
def constStarter(latestUrl):
|
||||
"""Start from constant URL."""
|
||||
@staticmethod
|
||||
def _starter():
|
||||
return latestUrl
|
||||
return _starter
|
||||
|
||||
|
||||
def bounceStarter(latestUrl, nextSearch):
|
||||
"""Get start URL by "bouncing" back and forth one time."""
|
||||
@classmethod
|
||||
def _starter(cls):
|
||||
url = fetchUrl(latestUrl, cls.prevSearch)
|
||||
if url:
|
||||
if not url:
|
||||
raise ValueError("could not find prevSearch pattern %r in %s" % (cls.prevSearch.pattern, latestUrl))
|
||||
url = fetchUrl(url, nextSearch)
|
||||
if not url:
|
||||
raise ValueError("could not find nextSearch pattern %r in %s" % (nextSearch.pattern, latestUrl))
|
||||
return url
|
||||
return _starter
|
||||
|
||||
|
@ -48,7 +43,10 @@ def indirectStarter(baseUrl, latestSearch):
|
|||
"""Get start URL by indirection."""
|
||||
@staticmethod
|
||||
def _starter():
|
||||
return fetchUrl(baseUrl, latestSearch)
|
||||
url = fetchUrl(baseUrl, latestSearch)
|
||||
if not url:
|
||||
raise ValueError("could not find latestSearch pattern %r in %s" % (latestSearch.pattern, baseUrl))
|
||||
return url
|
||||
return _starter
|
||||
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, MULTILINE
|
||||
from ..util import tagre
|
||||
from ..scraper import _BasicScraper
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile
|
||||
|
||||
from ..util import tagre
|
||||
|
|
|
@ -1,51 +1,28 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..helpers import constStarter, bounceStarter
|
||||
from ..helpers import bounceStarter, indirectStarter
|
||||
from ..util import tagre, getQueryParams
|
||||
|
||||
|
||||
class CalvinAndHobbes(_BasicScraper):
|
||||
starter = bounceStarter('http://www.gocomics.com/calvinandhobbes/',
|
||||
compile(tagre("a", "href", "(/calvinandhobbes/\d+/\d+/\d+)")+"Next feature</a>"))
|
||||
stripUrl = 'http://www.gocomics.com/calvinandhobbes/%s'
|
||||
imageSearch = compile(tagre("img", "src", "(http://assets\.amuniversal\.com/[a-f0-9]+)"))
|
||||
prevSearch = compile(tagre("a", "href", "(/calvinandhobbes/\d+/\d+/\d+)")+"Previous feature</a>")
|
||||
help = 'Index format: yyyy/mm/dd'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
prefix, year, month, day = pageUrl.rsplit('/', 3)
|
||||
return "%s%s%s.gif" % (year, month, day)
|
||||
|
||||
|
||||
class CandyCartoon(_BasicScraper):
|
||||
latestUrl = 'http://www.candycartoon.com/'
|
||||
stripUrl = latestUrl + 'archives/%s.html'
|
||||
imageSearch = compile(r'<img alt="[^"]*" src="(http://www\.candycartoon\.com/archives/[^"]+)"')
|
||||
prevSearch = compile(r'<a href="(http://www\.candycartoon\.com/archives/\d{6}\.html)">prev')
|
||||
help = 'Index format: nnnnnn'
|
||||
|
||||
|
||||
|
||||
class CaptainSNES(_BasicScraper):
|
||||
latestUrl = 'http://captainsnes.com/'
|
||||
stripUrl = latestUrl + '?date=%s'
|
||||
imageSearch = compile(r'<img src=\'(http://www.captainsnes.com/comics/.+?)\'')
|
||||
prevSearch = compile(r'<a href="http://www.captainsnes.com/(.+?)"><span class="prev">')
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
latestUrl = 'http://www.captainsnes.com/'
|
||||
stripUrl = latestUrl + '%s/'
|
||||
imageSearch = compile(r"<img src='(http://www\.captainsnes\.com/comics/[^']+)'")
|
||||
prevSearch = compile(r'<a href="(http://www\.captainsnes\.com/[^"]+)"><span class="prev">')
|
||||
help = 'Index format: yyyy/mm/dd/nnn-stripname'
|
||||
|
||||
|
||||
class CaribbeanBlue(_BasicScraper):
|
||||
latestUrl = 'http://cblue.katbox.net/'
|
||||
stripUrl = latestUrl + 'index.php?strip_id=%s'
|
||||
imageSearch = compile(r'="(.+?strips/.+?)"')
|
||||
prevSearch = compile(r'<a href="(.+?)"><img src="images/navigation_back.png"')
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
stripUrl = latestUrl + 'archive/%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://cblue\.katbox\.net/wp-content/webcomic/cblue/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://cblue\.katbox\.net/archive/[^"]+)', after="previous"))
|
||||
help = 'Index format: nnn-stripname'
|
||||
|
||||
|
||||
class Catena(_BasicScraper):
|
||||
|
@ -56,15 +33,6 @@ class Catena(_BasicScraper):
|
|||
help = 'Index format: yyyy/mm/dd/<name>'
|
||||
|
||||
|
||||
class Catharsis(_BasicScraper):
|
||||
latestUrl = 'http://catharsiscomic.com/'
|
||||
stripUrl = latestUrl + 'archive.php?strip=%s'
|
||||
imageSearch = compile(r'<img src="(strips/.+?)"')
|
||||
prevSearch = compile(r'<a href="(.+?)".+"Previous')
|
||||
help = 'Index format: yymmdd-<your guess>.html'
|
||||
|
||||
|
||||
|
||||
class ChasingTheSunset(_BasicScraper):
|
||||
latestUrl = 'http://www.fantasycomic.com/'
|
||||
stripUrl = latestUrl + 'index.php?p=c%s'
|
||||
|
@ -89,7 +57,6 @@ class Chisuji(_BasicScraper):
|
|||
help = 'Index format: yyyy/mm/dd/strip-name'
|
||||
|
||||
|
||||
|
||||
class ChugworthAcademy(_BasicScraper):
|
||||
latestUrl = 'http://chugworth.com/'
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
|
@ -98,7 +65,6 @@ class ChugworthAcademy(_BasicScraper):
|
|||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
|
||||
class ChugworthAcademyArchive(_BasicScraper):
|
||||
latestUrl = 'http://chugworth.com/archive/?strip_id=422'
|
||||
stripUrl = 'http://chugworth.com/archive/?strip_id=%s'
|
||||
|
@ -107,7 +73,6 @@ class ChugworthAcademyArchive(_BasicScraper):
|
|||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
||||
class CigarroAndCerveja(_BasicScraper):
|
||||
latestUrl = 'http://www.cigarro.ca/'
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
|
@ -116,15 +81,6 @@ class CigarroAndCerveja(_BasicScraper):
|
|||
help = 'Index format: non'
|
||||
|
||||
|
||||
# XXX move
|
||||
class TinyKittenTeeth(_BasicScraper):
|
||||
latestUrl = 'http://www.tinykittenteeth.com/'
|
||||
stripUrl = latestUrl + 'index.php?current=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.tinykittenteeth\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
class Comedity(_BasicScraper):
|
||||
latestUrl = 'http://www.comedity.com/'
|
||||
stripUrl = latestUrl + 'index.php?strip_id=%s'
|
||||
|
@ -135,31 +91,28 @@ class Comedity(_BasicScraper):
|
|||
|
||||
class Commissioned(_BasicScraper):
|
||||
latestUrl = 'http://www.commissionedcomic.com/'
|
||||
stripUrl = latestUrl + 'index.php?strip=%s'
|
||||
imageSearch = compile(r'<img src="(http://www.commissionedcomic.com/comics/.+?)"')
|
||||
prevSearch = compile(r'<a href="(.+?)">‹</a>')
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.commissionedcomic\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.commissionedcomic\.com/\?p=\d+)', after="prev"))
|
||||
help = 'Index format: n'
|
||||
|
||||
|
||||
|
||||
class CoolCatStudio(_BasicScraper):
|
||||
latestUrl = 'http://www.coolcatstudio.com/'
|
||||
stripUrl = latestUrl + 'strips-cat/ccs%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www.coolcatstudio.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.coolcatstudio\.com/strips-cat/[^"]+)', before="cniprevt"))
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.coolcatstudio\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.coolcatstudio\.com/strips-cat/[^"]+)', before="prev"))
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
|
||||
class CourtingDisaster(_BasicScraper):
|
||||
latestUrl = 'http://www.courting-disaster.com/'
|
||||
stripUrl = latestUrl + 'archive/%s.html'
|
||||
imageSearch = compile(r'(/comics/.+?)"')
|
||||
prevSearch = compile(r'<a href="(.+?)"><img src="/images/previous.gif"[^>]+?>')
|
||||
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/archive/\d+\.html)') + tagre("img", "src", r'/images/previous\.gif'))
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
|
||||
class CrapIDrewOnMyLunchBreak(_BasicScraper):
|
||||
latestUrl = 'http://crap.jinwicked.com/'
|
||||
stripUrl = latestUrl + '%s'
|
||||
|
@ -168,7 +121,6 @@ class CrapIDrewOnMyLunchBreak(_BasicScraper):
|
|||
help = 'Index format: yyyy/mm/dd/name'
|
||||
|
||||
|
||||
|
||||
class CtrlAltDel(_BasicScraper):
|
||||
latestUrl = 'http://www.cad-comic.com/cad/'
|
||||
stripUrl = latestUrl + '%s'
|
||||
|
@ -186,34 +138,31 @@ class CtrlAltDelSillies(CtrlAltDel):
|
|||
class Curvy(_BasicScraper):
|
||||
latestUrl = 'http://www.c.urvy.org/'
|
||||
stripUrl = latestUrl + '?date=%s'
|
||||
imageSearch = compile(r'(/c/.+?)"')
|
||||
prevSearch = compile(r'(/\?date=.+?)"><< Previous page')
|
||||
imageSearch = compile(tagre("img", "src", r'(/c/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/\?date=\d+)') + tagre("img", "src", "/nav/prev\.png"))
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
def cloneManga(name, shortName, lastStrip=None):
|
||||
url = 'http://manga.clone-army.org'
|
||||
baseUrl = '%s/%s.php' % (url, shortName)
|
||||
stripUrl = baseUrl + '?page=%s'
|
||||
if lastStrip is None:
|
||||
starter = bounceStarter(baseUrl, compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"next\.gif")))
|
||||
else:
|
||||
starter = constStarter(stripUrl % lastStrip)
|
||||
|
||||
def namer(self, imageUrl, pageUrl):
|
||||
return '%03d' % int(getQueryParams(pageUrl)['page'][0])
|
||||
|
||||
return type('CloneManga_%s' % name,
|
||||
(_BasicScraper,),
|
||||
dict(
|
||||
attrs = dict(
|
||||
name='CloneManga/' + name,
|
||||
starter=starter,
|
||||
stripUrl=stripUrl,
|
||||
stripUrl = baseUrl + '?page=%s',
|
||||
imageSearch=compile(tagre("img", "src", r'((?:%s/)?%s/[^"]+)' % (url, shortName), after="center")),
|
||||
prevSearch=compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"previous\.gif")),
|
||||
help='Index format: n',
|
||||
namer=namer)
|
||||
namer=namer,
|
||||
)
|
||||
if lastStrip is None:
|
||||
attrs['starter'] = indirectStarter(baseUrl, compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"last\.gif")))
|
||||
else:
|
||||
attrs['latestUrl'] = attrs['stripUrl'] % lastStrip
|
||||
return type('CloneManga_%s' % name, (_BasicScraper,), attrs)
|
||||
|
||||
|
||||
anm = cloneManga('AprilAndMay', 'anm')
|
||||
|
@ -233,148 +182,14 @@ class CatAndGirl(_BasicScraper):
|
|||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
def comicsDotCom(name, section):
|
||||
latestUrl = 'http://www.gocomics.com/%s' % name
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
prefix, year, month, day = pageUrl.split('/', 3)
|
||||
return "%s_%s%s%s.gif" % (name, year, month, day)
|
||||
|
||||
return type('GoComicsDotCom_%s' % name,
|
||||
(_BasicScraper,),
|
||||
dict(
|
||||
name='GoComicsDotCom/' + name,
|
||||
stripUrl=latestUrl + '/%s',
|
||||
imageSearch=compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[0-9a-f]+)')),
|
||||
prevSearch=compile(tagre("a", "href", "(/%s/\d+/\d+/\d+)")+"Previous"),
|
||||
help='Index format: yyyy/mm/dd',
|
||||
namer=namer)
|
||||
)
|
||||
|
||||
# http://www.gocomics.com/features
|
||||
# XXX
|
||||
|
||||
# http://www.gocomics.com/explore/editorial_list
|
||||
# XXX
|
||||
|
||||
# http://www.gocomics.com/explore/sherpa_list
|
||||
# XXX
|
||||
|
||||
acaseinpoint = comicsDotCom('acaseinpoint', 'comics')
|
||||
agnes = comicsDotCom('agnes', 'creators')
|
||||
alleyoop = comicsDotCom('alleyoop', 'comics')
|
||||
andycapp = comicsDotCom('andycapp', 'creators')
|
||||
arlonjanis = comicsDotCom('arlonjanis', 'comics')
|
||||
ballardst = comicsDotCom('ballardst', 'creators')
|
||||
barkeaterlake = comicsDotCom('barkeaterlake', 'comics')
|
||||
bc = comicsDotCom('bc', 'creators')
|
||||
ben = comicsDotCom('ben', 'comics')
|
||||
betty = comicsDotCom('betty', 'comics')
|
||||
bignate = comicsDotCom('bignate', 'comics')
|
||||
bonanas = comicsDotCom('bonanas', 'wash')
|
||||
bornloser = comicsDotCom('bornloser', 'comics')
|
||||
buckets = comicsDotCom('buckets', 'comics')
|
||||
candorville = comicsDotCom('candorville', 'wash')
|
||||
cheapthrills = comicsDotCom('cheapthrills', 'wash')
|
||||
chickweed = comicsDotCom('chickweed', 'comics')
|
||||
committed = comicsDotCom('committed', 'comics')
|
||||
dilbert = comicsDotCom('dilbert', 'comics')
|
||||
drabble = comicsDotCom('drabble', 'comics')
|
||||
fatcats = comicsDotCom('fatcats', 'comics')
|
||||
ferdnand = comicsDotCom('ferdnand', 'comics')
|
||||
flightdeck = comicsDotCom('flightdeck', 'creators')
|
||||
floandfriends = comicsDotCom('floandfriends', 'creators')
|
||||
franknernest = comicsDotCom('franknernest', 'comics')
|
||||
frazz = comicsDotCom('frazz', 'comics')
|
||||
geech = comicsDotCom('geech', 'comics')
|
||||
genepool = comicsDotCom('genepool', 'wash')
|
||||
getfuzzy = comicsDotCom('getfuzzy', 'comics')
|
||||
gofish = comicsDotCom('gofish', 'comics')
|
||||
graffiti = comicsDotCom('graffiti', 'comics')
|
||||
grandave = comicsDotCom('grandave', 'comics')
|
||||
grizzwells = comicsDotCom('grizzwells', 'comics')
|
||||
heathcliff = comicsDotCom('heathcliff', 'creators')
|
||||
hedge = comicsDotCom('hedge', 'comics')
|
||||
herbnjamaal = comicsDotCom('herbnjamaal', 'creators')
|
||||
herman = comicsDotCom('herman', 'comics')
|
||||
humblestumble = comicsDotCom('humblestumble', 'comics')
|
||||
janesworld = comicsDotCom('janesworld', 'comics')
|
||||
jumpstart = comicsDotCom('jumpstart', 'comics')
|
||||
kitncarlyle = comicsDotCom('kitncarlyle', 'comics')
|
||||
liberty = comicsDotCom('liberty', 'creators')
|
||||
lilabner = comicsDotCom('lilabner', 'comics')
|
||||
luann = comicsDotCom('luann', 'comics')
|
||||
marmaduke = comicsDotCom('marmaduke', 'comics')
|
||||
meg = comicsDotCom('meg', 'comics')
|
||||
moderatelyconfused = comicsDotCom('moderatelyconfused', 'comics')
|
||||
momma = comicsDotCom('momma', 'creators')
|
||||
monty = comicsDotCom('monty', 'comics')
|
||||
motley = comicsDotCom('motley', 'comics')
|
||||
nancy = comicsDotCom('nancy', 'comics')
|
||||
naturalselection = comicsDotCom('naturalselection', 'creators')
|
||||
offthemark = comicsDotCom('offthemark', 'comics')
|
||||
onebighappy = comicsDotCom('onebighappy', 'creators')
|
||||
othercoast = comicsDotCom('othercoast', 'creators')
|
||||
pcnpixel = comicsDotCom('pcnpixel', 'wash')
|
||||
peanuts = comicsDotCom('peanuts', 'comics')
|
||||
pearls = comicsDotCom('pearls', 'comics')
|
||||
pibgorn = comicsDotCom('pibgorn', 'comics')
|
||||
pickles = comicsDotCom('pickles', 'wash')
|
||||
raisingduncan = comicsDotCom('raisingduncan', 'comics')
|
||||
reality = comicsDotCom('reality', 'comics')
|
||||
redandrover = comicsDotCom('redandrover', 'wash')
|
||||
ripleys = comicsDotCom('ripleys', 'comics')
|
||||
roseisrose = comicsDotCom('roseisrose', 'comics')
|
||||
rubes = comicsDotCom('rubes', 'creators')
|
||||
rudypark = comicsDotCom('rudypark', 'comics')
|
||||
shirleynson = comicsDotCom('shirleynson', 'comics')
|
||||
soup2nutz = comicsDotCom('soup2nutz', 'comics')
|
||||
speedbump = comicsDotCom('speedbump', 'creators')
|
||||
spotthefrog = comicsDotCom('spotthefrog', 'comics')
|
||||
strangebrew = comicsDotCom('strangebrew', 'creators')
|
||||
sunshineclub = comicsDotCom('sunshineclub', 'comics')
|
||||
tarzan = comicsDotCom('tarzan', 'comics')
|
||||
thatslife = comicsDotCom('thatslife', 'wash')
|
||||
wizardofid = comicsDotCom('wizardofid', 'creators')
|
||||
workingdaze = comicsDotCom('workingdaze', 'comics')
|
||||
workingitout = comicsDotCom('workingitout', 'creators')
|
||||
|
||||
|
||||
def creators(name, shortname):
|
||||
return type('Creators_%s' % name,
|
||||
(_BasicScraper,),
|
||||
dict(
|
||||
name='Creators/' + name,
|
||||
latestUrl='http://www.creators.com/comics_show.cfm?ComicName=%s' % (shortname,),
|
||||
stripUrl=None,
|
||||
imageSearch=compile(tagre("img", "src", r'(\d{4}/[^"]+/[^"]+\.[^"]+)')),
|
||||
prevSearch=compile(tagre("a", "href", r'(comics_show\.cfm\?next=\d+&ComicName=[^"]+)', after='Previous Comic')),
|
||||
help='Indexing unsupported')
|
||||
)
|
||||
|
||||
|
||||
arc = creators('Archie', 'arc')
|
||||
shg = creators('AskShagg', 'shg')
|
||||
hev = creators('ForHeavensSake', 'hev')
|
||||
rug = creators('Rugrats', 'rug')
|
||||
sou = creators('StateOfTheUnion', 'sou')
|
||||
din = creators('TheDinetteSet', 'din')
|
||||
lil = creators('TheMeaningOfLila', 'lil')
|
||||
wee = creators('WeePals', 'wee')
|
||||
zhi = creators('ZackHill', 'zhi')
|
||||
|
||||
|
||||
|
||||
class CyanideAndHappiness(_BasicScraper):
|
||||
latestUrl = 'http://www.explosm.net/comics'
|
||||
stripUrl = latestUrl + '/%s'
|
||||
imageSearch = compile(r'<img alt="Cyanide and Happiness, a daily webcomic" src="(http:\/\/www\.explosm\.net/db/files/Comics/\w+/\S+\.\w+)"')
|
||||
prevSearch = compile(r'<a href="(/comics/\d+/?)">< Previous</a>')
|
||||
latestUrl = 'http://www.explosm.net/comics/'
|
||||
stripUrl = latestUrl + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http:\/\/www\.explosm\.net/db/files/Comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/comics/\d+/)', before="prev"))
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
|
||||
class CrimsonDark(_BasicScraper):
|
||||
latestUrl = 'http://www.davidcsimon.com/crimsondark/'
|
||||
stripUrl = latestUrl + 'index.php?view=comic&strip_id=%s'
|
||||
|
@ -383,16 +198,6 @@ class CrimsonDark(_BasicScraper):
|
|||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
|
||||
class CrimesOfCybeleCity(_BasicScraper):
|
||||
latestUrl = 'http://www.pulledpunches.com/crimes/'
|
||||
stripUrl = 'http://www.beaglespace.com/pulledpunches/crimes/?p=%s'
|
||||
imageSearch = compile(r'<img src="(http://www\.beaglespace\.com/pulledpunches/crimes/comics/[^"]+)"')
|
||||
prevSearch = compile(r'<a href="(http://www\.beaglespace\.com/pulledpunches/crimes/\?p=\d+)"><img src="back1\.gif"')
|
||||
help = 'Index format: nn'
|
||||
|
||||
|
||||
|
||||
class CatsAndCameras(_BasicScraper):
|
||||
latestUrl = 'http://catsncameras.com/cnc/'
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
|
@ -401,7 +206,6 @@ class CatsAndCameras(_BasicScraper):
|
|||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
||||
class CowboyJedi(_BasicScraper):
|
||||
latestUrl = 'http://www.cowboyjedi.com/'
|
||||
stripUrl = latestUrl + '%s'
|
||||
|
@ -410,7 +214,6 @@ class CowboyJedi(_BasicScraper):
|
|||
help = 'Index format: yyyy/mm/dd/strip-name'
|
||||
|
||||
|
||||
|
||||
class CasuallyKayla(_BasicScraper):
|
||||
latestUrl = 'http://casuallykayla.com/'
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
|
@ -419,7 +222,6 @@ class CasuallyKayla(_BasicScraper):
|
|||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
||||
class Collar6(_BasicScraper):
|
||||
latestUrl = 'http://collar6.com/'
|
||||
stripUrl = latestUrl + 'archive/%s'
|
||||
|
@ -428,7 +230,6 @@ class Collar6(_BasicScraper):
|
|||
help = 'Index format: <name>'
|
||||
|
||||
|
||||
|
||||
class Chester5000XYV(_BasicScraper):
|
||||
latestUrl = 'http://jessfink.com/Chester5000XYV/'
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
|
@ -437,7 +238,6 @@ class Chester5000XYV(_BasicScraper):
|
|||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
||||
class CalamitiesOfNature(_BasicScraper):
|
||||
latestUrl = 'http://www.calamitiesofnature.com/'
|
||||
stripUrl = latestUrl + 'archive/?c=%s'
|
||||
|
@ -446,14 +246,13 @@ class CalamitiesOfNature(_BasicScraper):
|
|||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
||||
class Champ2010(_BasicScraper):
|
||||
latestUrl = 'http://www.jedcollins.com/champ2010/'
|
||||
stripUrl = 'http://jedcollins.com/champ2010/?p=%s'
|
||||
imageSearch = compile(r'<img src="(http://jedcollins.com/champ2010/comics/.+?)"')
|
||||
prevSearch = compile(r'<div class="nav-previous"><a href="(http://jedcollins.com/champ2010/.+?)"')
|
||||
help = 'Index format: nnn'
|
||||
|
||||
# the latest URL is hard coded since the comic is discontinued
|
||||
latestUrl = 'http://jedcollins.com/champ2010/champ-12-30-10.html'
|
||||
stripUrl = 'http://jedcollins.com/champ2010/champ-%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://jedcollins\.com/champ2010/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://jedcollins\.com/champ2010/[^"]+)', after="Previous"))
|
||||
help = 'Index format: yy-dd-mm'
|
||||
|
||||
|
||||
class Chucklebrain(_BasicScraper):
|
||||
|
@ -464,7 +263,6 @@ class Chucklebrain(_BasicScraper):
|
|||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
||||
class CompanyY(_BasicScraper):
|
||||
latestUrl = 'http://company-y.com/'
|
||||
stripUrl = latestUrl + '%s/'
|
||||
|
@ -473,32 +271,21 @@ class CompanyY(_BasicScraper):
|
|||
help = 'Index format: yyyy/mm/dd/strip-name'
|
||||
|
||||
|
||||
|
||||
class CorydonCafe(_BasicScraper):
|
||||
starter = bounceStarter('http://corydoncafe.com/', compile(r' href="(\./comic-\d+.html)">Next></a>'))
|
||||
stripUrl = 'http://corydoncafe.com/comic-%s.html'
|
||||
imageSearch = compile(r'<img src=\'(\./comics/.+?)\' ')
|
||||
prevSearch = compile(r' href="(\./comic-\d+.html)"><Previous</a>')
|
||||
help = 'Index format: nnn'
|
||||
starter = bounceStarter('http://corydoncafe.com/', compile(tagre("a", "href", r"(http://corydoncafe\.com/\d+/[^']+)", after="next", quote="'")))
|
||||
stripUrl = 'http://corydoncafe.com/%s.php'
|
||||
imageSearch = compile(tagre("img", "src", r"(\./[^']+)", quote="'"))
|
||||
prevSearch = compile(tagre("a", "href", r"(http://corydoncafe\.com/\d+/[^']+)", after="prev", quote="'"))
|
||||
help = 'Index format: yyyy/stripname'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
return pageUrl.split('/')[-1].split('.')[0]
|
||||
|
||||
|
||||
|
||||
class CraftedFables(_BasicScraper):
|
||||
latestUrl = 'http://www.craftedfables.com/'
|
||||
stripUrl = 'http://www.caf-fiends.net/craftedfables/?p=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.caf-fiends\.net/craftedfables/comics/[^"]+)'))
|
||||
prevSearch = compile(r'<a href="(http://www.caf-fiends.net/craftedfables/.+?)"><span class="prev">')
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
||||
class Currhue(_BasicScraper):
|
||||
latestUrl = 'http://www.currhue.com/'
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.currhue\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.currhue.com/.+?)"')
|
||||
help = 'Index format: nnn'
|
||||
|
|
87
dosagelib/plugins/comicsdotcom.py
Normal file
87
dosagelib/plugins/comicsdotcom.py
Normal file
|
@ -0,0 +1,87 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
from re import compile, sub
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
|
||||
def comicsDotCom(name, section):
|
||||
baseUrl = 'http://www.gocomics.com/'
|
||||
classname = sub("[^0-9a-zA-Z_]", "", name)
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
prefix, year, month, day = pageUrl.split('/', 3)
|
||||
return "%s_%s%s%s.gif" % (name, year, month, day)
|
||||
|
||||
return type('GoComicsDotCom_%s' % classname,
|
||||
(_BasicScraper,),
|
||||
dict(
|
||||
latestUrl=baseUrl + name,
|
||||
name='GoComicsDotCom/' + classname,
|
||||
stripUrl=baseUrl + name + '/%s',
|
||||
imageSearch=compile(tagre("img", "src", r'(http://assets\.amuniversal\.com/[0-9a-f]+)')),
|
||||
prevSearch=compile(tagre("a", "href", r'(/[^"]+/\d+/\d+/\d+)', after="prev")),
|
||||
help='Index format: yyyy/mm/dd',
|
||||
namer=namer)
|
||||
)
|
||||
|
||||
# http://www.gocomics.com/features
|
||||
# XXX
|
||||
|
||||
# http://www.gocomics.com/explore/editorial_list
|
||||
# XXX
|
||||
|
||||
# http://www.gocomics.com/explore/sherpa_list
|
||||
# XXX
|
||||
|
||||
agnes = comicsDotCom('agnes', 'creators')
|
||||
andycapp = comicsDotCom('andycapp', 'creators')
|
||||
barkeaterlake = comicsDotCom('barkeaterlake', 'comics')
|
||||
bc = comicsDotCom('bc', 'creators')
|
||||
ben = comicsDotCom('ben', 'comics')
|
||||
betty = comicsDotCom('betty', 'comics')
|
||||
bignate = comicsDotCom('bignate', 'comics')
|
||||
bonanas = comicsDotCom('bonanas', 'wash')
|
||||
thebornloser = comicsDotCom('the-born-loser', 'comics')
|
||||
thebuckets = comicsDotCom('thebuckets', 'comics')
|
||||
candorville = comicsDotCom('candorville', 'wash')
|
||||
calvinandhobbes = comicsDotCom('calvinandhobbes', 'comics')
|
||||
chickweed = comicsDotCom('9chickweedlane', 'comics')
|
||||
committed = comicsDotCom('committed', 'comics')
|
||||
dilbert = comicsDotCom('dilbert', 'comics')
|
||||
drabble = comicsDotCom('drabble', 'comics')
|
||||
floandfriends = comicsDotCom('floandfriends', 'creators')
|
||||
frazz = comicsDotCom('frazz', 'comics')
|
||||
geech = comicsDotCom('geech', 'comics')
|
||||
getfuzzy = comicsDotCom('getfuzzy', 'comics')
|
||||
graffiti = comicsDotCom('graffiti', 'comics')
|
||||
grandave = comicsDotCom('grand-avenue', 'comics')
|
||||
heathcliff = comicsDotCom('heathcliff', 'creators')
|
||||
herman = comicsDotCom('herman', 'comics')
|
||||
janesworld = comicsDotCom('janesworld', 'comics')
|
||||
jumpstart = comicsDotCom('jumpstart', 'comics')
|
||||
kitandcarlyle = comicsDotCom('kitandcarlyle', 'comics')
|
||||
luann = comicsDotCom('luann', 'comics')
|
||||
marmaduke = comicsDotCom('marmaduke', 'comics')
|
||||
moderatelyconfused = comicsDotCom('moderately-confused', 'comics')
|
||||
momma = comicsDotCom('momma', 'creators')
|
||||
monty = comicsDotCom('monty', 'comics')
|
||||
nancy = comicsDotCom('nancy', 'comics')
|
||||
offthemark = comicsDotCom('offthemark', 'comics')
|
||||
onebighappy = comicsDotCom('onebighappy', 'creators')
|
||||
peanuts = comicsDotCom('peanuts', 'comics')
|
||||
pearlsbeforeswine = comicsDotCom('pearlsbeforeswine', 'comics')
|
||||
pibgorn = comicsDotCom('pibgorn', 'comics')
|
||||
pickles = comicsDotCom('pickles', 'wash')
|
||||
redandrover = comicsDotCom('redandrover', 'wash')
|
||||
roseisrose = comicsDotCom('roseisrose', 'comics')
|
||||
rubes = comicsDotCom('rubes', 'creators')
|
||||
rudypark = comicsDotCom('rudypark', 'comics')
|
||||
speedbump = comicsDotCom('speedbump', 'creators')
|
||||
strangebrew = comicsDotCom('strangebrew', 'creators')
|
||||
tarzan = comicsDotCom('tarzan', 'comics')
|
||||
wizardofid = comicsDotCom('wizardofid', 'creators')
|
||||
workingdaze = comicsDotCom('working-daze', 'comics')
|
||||
workingitout = comicsDotCom('workingitout', 'creators')
|
81
dosagelib/plugins/creators.py
Normal file
81
dosagelib/plugins/creators.py
Normal file
|
@ -0,0 +1,81 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
|
||||
def creators(name, shortname):
|
||||
baseUrl = 'http://www.creators.com/comics/'
|
||||
return type('Creators_%s' % name,
|
||||
(_BasicScraper,),
|
||||
dict(
|
||||
name='Creators/' + name,
|
||||
latestUrl='%s%s.html' % (baseUrl, shortname),
|
||||
stripUrl='%s%s/%%s.html' % (baseUrl, shortname),
|
||||
imageSearch=compile(tagre("img", "src", r'(/comics/\d+/[^"]+)')),
|
||||
prevSearch=compile(tagre("a", "href", r'(/comics/%s/\d+\.html)' % shortname) +
|
||||
tagre("img", "src", r'/img_comics/arrow_l\.gif')),
|
||||
help='Index format: n')
|
||||
)
|
||||
|
||||
|
||||
# for a complete list see http://www.creators.com/comics/cat-seeall.html
|
||||
comics = {
|
||||
'Agnes': 'agnes',
|
||||
'AndyCapp': 'andy-capp',
|
||||
'Archie': 'archie',
|
||||
'AskShagg': 'ask-shagg',
|
||||
'BallardStreet': 'ballard-street',
|
||||
'BC': 'bc',
|
||||
'TheBarn': 'the-barn',
|
||||
'CafeConLeche': 'cafe-con-leche',
|
||||
'ChuckleBros': 'chuckle-bros',
|
||||
'DaddysHome': 'daddys-home',
|
||||
'DiamondLil': 'diamond-lil',
|
||||
'TheDinetteSet': 'dinette-set',
|
||||
'DogEatDoug': 'dog-eat-doug',
|
||||
'DogsOfCKennel': 'dogs-of-c-kennel',
|
||||
'DonaldDuck': 'donald-duck',
|
||||
'FloAndFriends': 'flo-and-friends',
|
||||
'Flare': 'flare',
|
||||
'FlightDeck': 'flight-deck',
|
||||
'ForHeavensSake': 'for-heavens-sake',
|
||||
'FreeRange': 'free-range',
|
||||
'GirlsAndSports': 'girls-and-sports',
|
||||
'Heathcliff': 'heathcliff',
|
||||
'HerbAndJamaal': 'herb-and-jamaal',
|
||||
'HopeAndDeath': 'hope-and-death',
|
||||
'LibertyMeadows': 'liberty-meadows',
|
||||
'TheMeaningOfLila': 'meaning-of-lila',
|
||||
'MickeyMouse': 'mickey-mouse',
|
||||
'Momma': 'momma',
|
||||
'NestHeads': 'nest-heads',
|
||||
'OneBigHappy': 'one-big-happy',
|
||||
'OnAClaireDay': 'on-a-clair-day',
|
||||
'TheOtherCoast': 'other-coast',
|
||||
'TheQuigmans': 'quigmans',
|
||||
'Rubes': 'rubes',
|
||||
'Rugrats': 'rugrats',
|
||||
'ScaryGary': 'scary-gary',
|
||||
'SpeedBump': 'speed-bump',
|
||||
'StrangeBrew': 'strange-brew',
|
||||
'ThinLines': 'thin-lines',
|
||||
'WeePals': 'wee-pals',
|
||||
'WizardOfId': 'wizard-of-id',
|
||||
'WorkingItOut': 'working-it-out',
|
||||
'ZackHill': 'zack-hill',
|
||||
'BCSpanish': 'bc-spanish',
|
||||
'WizardOfIdSpanish': 'wizard-of-id-spanish',
|
||||
'ArchieSpanish': 'archie-spanish',
|
||||
'HeathcliffSpanish': 'heathcliff-spanish',
|
||||
'RugratsSpanish': 'rugrats-spanish',
|
||||
'LongStoryShort': 'long-story-short',
|
||||
'Recess': 'recess',
|
||||
'HomeOffice': 'stay-at-home-dad',
|
||||
'OffCenter': 'off-center',
|
||||
'GirlsAndSportsSpanish': 'girls-and-sports-spanish',
|
||||
}
|
||||
|
||||
for name, shortname in comics.items():
|
||||
globals()[name] = creators(name, shortname)
|
|
@ -1,9 +1,11 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..helpers import bounceStarter, indirectStarter
|
||||
from ..helpers import indirectStarter
|
||||
from ..util import tagre, getQueryParams
|
||||
|
||||
|
||||
|
@ -11,9 +13,9 @@ from ..util import tagre, getQueryParams
|
|||
class DMFA(_BasicScraper):
|
||||
latestUrl = 'http://www.missmab.com/'
|
||||
stripUrl = latestUrl + 'Comics/Vol_%s.php'
|
||||
imageSearch = compile(tagre("img", "src", r'(Comics/[^"]+|Vol[^"]+)'))
|
||||
imageSearch = compile(tagre("img", "src", r'(Comics/|Vol)[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"])+')+
|
||||
tagre("img", "src", r'(Images/comicprev.gif|../Images/comicprev.gif)'))
|
||||
tagre("img", "src", r'(?:../)?Images/comicprev.gif'))
|
||||
help = 'Index format: nnn (normally, some specials)'
|
||||
|
||||
|
||||
|
@ -27,10 +29,10 @@ class DandyAndCompany(_BasicScraper):
|
|||
|
||||
class DarkWings(_BasicScraper):
|
||||
latestUrl = 'http://www.flowerlarkstudios.com/dark-wings/'
|
||||
stripUrl = latestUrl + 'archive.php?day=%s'
|
||||
imageSearch = compile(r'(comics/.+?)" W')
|
||||
prevSearch = compile(r"first_day.+?/(archive.+?)'.+?previous_day")
|
||||
help = 'Index format: yyyymmdd'
|
||||
stripUrl = latestUrl + '%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.flowerlarkstudios\.com/dark-wings/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.flowerlarkstudios\.com/dark-wings/[^"]+)', after="navi-prev"))
|
||||
help = 'Index format: yyyy/mm/dd/page-nn-mm'
|
||||
|
||||
|
||||
class DeathToTheExtremist(_BasicScraper):
|
||||
|
@ -44,12 +46,11 @@ class DeathToTheExtremist(_BasicScraper):
|
|||
class DeepFried(_BasicScraper):
|
||||
latestUrl = 'http://www.whatisdeepfried.com/'
|
||||
stripUrl = latestUrl + '%s'
|
||||
imageSearch = compile(r'(http://www.whatisdeepfried.com/comics/.+?)"')
|
||||
prevSearch = compile(r'"(http://www.whatisdeepfried.com/.+?)"><span class="prev">')
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.whatisdeepfried\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.whatisdeepfried\.com/[^"]+)', after="prev"))
|
||||
help = 'Index format: non'
|
||||
|
||||
|
||||
|
||||
class DoemainOfOurOwn(_BasicScraper):
|
||||
latestUrl = 'http://www.doemain.com/'
|
||||
stripUrl = latestUrl + 'index.cgi/%s'
|
||||
|
@ -58,7 +59,6 @@ class DoemainOfOurOwn(_BasicScraper):
|
|||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
|
||||
class DrFun(_BasicScraper):
|
||||
latestUrl = 'http://www.ibiblio.org/Dave/ar00502.htm'
|
||||
stripUrl = 'http://www.ibiblio.org/Dave/ar%s.htm'
|
||||
|
@ -95,26 +95,17 @@ class DreamKeepersPrelude(_BasicScraper):
|
|||
|
||||
class Drowtales(_BasicScraper):
|
||||
latestUrl = 'http://www.drowtales.com/mainarchive.php'
|
||||
stripUrl = latestUrl + '?location=%s'
|
||||
imageSearch = compile(r'src=".(/tmpmanga/.+?)"')
|
||||
prevSearch = compile(r'<a href="mainarchive.php(\?location=\d+)"><img src="[^"]*previousday\.gif"')
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class DungeonCrawlInc(_BasicScraper):
|
||||
latestUrl = 'http://www.dungeoncrawlinc.com/latest.html'
|
||||
stripUrl = 'http://www.dungeoncrawlinc.com/comic%s'
|
||||
imageSearch = compile(r'src="(.+?/DCI_.+?)"')
|
||||
prevSearch = compile(r'<a href="(.+?)">.+?back')
|
||||
help = 'Index format: nnn.html'
|
||||
|
||||
stripUrl = latestUrl + '?sid=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'("http://www.drowtales.com/mainarchive/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(\?sid=\d+)', before="link_prev_top"))
|
||||
help = 'Index format: number'
|
||||
|
||||
|
||||
class DieselSweeties(_BasicScraper):
|
||||
latestUrl = 'http://www.dieselsweeties.com/'
|
||||
stripUrl = latestUrl + 'archive/%s'
|
||||
imageSearch = compile(r'src="(/hstrips/.+?)"')
|
||||
prevSearch = compile(r'href="(/archive/.+?)">(<img src="http://www.dieselsweeties.com/ximages/blackbackarrow160.png|previous webcomic)')
|
||||
imageSearch = compile(tagre("img", "src", r'(/hstrips/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/archive/\d+)') + tagre("img", "src", r'http://www\.dieselsweeties\.com/ximages/blackbackarrow160.png'))
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
@classmethod
|
||||
|
@ -136,14 +127,12 @@ class DominicDeegan(_BasicScraper):
|
|||
return getQueryParams(imageUrl)['save-as'][0].rsplit('.', 1)[0]
|
||||
|
||||
|
||||
|
||||
class DorkTower(_BasicScraper):
|
||||
latestUrl = 'http://www.dorktower.com/'
|
||||
stripUrl = None
|
||||
imageSearch = compile(r'<img src="(http://www\.dorktower\.com/images/comics/[^"]+)"')
|
||||
prevSearch = compile(r'<a href="(/previous\.php\?[^"]+)"')
|
||||
help = 'Index format: None'
|
||||
|
||||
stripUrl = latestUrl + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.dorktower\.com/files/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.dorktower\.com/[^"]+)')+"Previous")
|
||||
help = 'Index format: yyyy/mm/dd/stripname-dd-mm-yy'
|
||||
|
||||
|
||||
class DresdenCodak(_BasicScraper):
|
||||
|
@ -153,33 +142,3 @@ class DresdenCodak(_BasicScraper):
|
|||
prevSearch = compile(r'<a href="http://dresdencodak.com(/.*?)"><img src=http://dresdencodak.com/m_prev.png>')
|
||||
starter = indirectStarter('http://dresdencodak.com/', compile(r'<div id="preview"><a href="http://dresdencodak.com/(\d+/\d+/\d+/.*?)">'))
|
||||
|
||||
|
||||
|
||||
class DonkBirds(_BasicScraper):
|
||||
latestUrl = 'http://www.donkbirds.com/'
|
||||
stripUrl = latestUrl + 'index.php?date=%s'
|
||||
imageSearch = compile(r'<img src="(strips/.+?)"')
|
||||
prevSearch = compile(r'<a href="(.+?)">Previous</a>')
|
||||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
|
||||
class DrawnByDrunks(_BasicScraper):
|
||||
starter = bounceStarter('http://www.drawnbydrunks.co.uk/', compile(r'<div class="nav-last"><a href="(.+?)">'))
|
||||
stripUrl = 'http://www.drawnbydrunks.co.uk/?p=%s'
|
||||
imageSearch = compile(r'<img src="(http://www.drawnbydrunks.co.uk/comics/.+?)"')
|
||||
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
|
||||
help = 'Index format: nnn'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
return pageUrl.split('=')[-1]
|
||||
|
||||
|
||||
|
||||
class DeathCord(_BasicScraper):
|
||||
latestUrl = 'http://deathchord.com/index.php'
|
||||
stripUrl = 'http://deathchord.com/__.php?comicID=%s'
|
||||
imageSearch = compile(r'<img src="(http://deathchord.com/kill/\d+.+?)"')
|
||||
prevSearch = compile(r'</a>?.+?<a href="(http://deathchord.com/.+?)"><img[^>]+?alt="Previous" />')
|
||||
help = 'Index format: nnn'
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, IGNORECASE
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, IGNORECASE
|
||||
|
||||
from ..helpers import indirectStarter
|
||||
|
@ -9,28 +11,19 @@ from ..util import tagre
|
|||
|
||||
class EerieCuties(_BasicScraper):
|
||||
latestUrl = 'http://www.eeriecuties.com/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch = compile(r'(/comics/.+?)"')
|
||||
prevSearch = compile(r'(/d/.+?.html).+?/previous_day.gif')
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class EdgeTheDevilhunter(_BasicScraper):
|
||||
name = 'KeenSpot/EdgeTheDevilhunter'
|
||||
latestUrl = 'http://www.edgethedevilhunter.com/'
|
||||
stripUrl = latestUrl + 'comics/%s'
|
||||
imageSearch = compile(r'(http://www.edgethedevilhunter.com/comics/.+?)" alt')
|
||||
prevSearch = compile(r'(http://www.edgethedevilhunter.com/comics/.+?)"><span class="prev')
|
||||
help = 'Index format: mmddyyyy or name'
|
||||
|
||||
stripUrl = latestUrl + 'strips-ec/%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://ace\.eeriecuties\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', before="prev"))
|
||||
help = 'Index format: stripname'
|
||||
|
||||
|
||||
class Eriadan(_BasicScraper):
|
||||
stripUrl = 'http://www.shockdom.com/eriadan/?p=%s'
|
||||
latestUrl = 'http://www.shockdom.com/webcomics/eriadan/'
|
||||
stripUrl = latestUrl + '%s'
|
||||
# XXX fix image search
|
||||
imageSearch = compile(r'title="[^"]+?" src="http://www\.shockdom\.com/eriadan/(wp-content/uploads/.+?)"')
|
||||
prevSearch = compile(r"<link rel='prev' title='.+?' href='http://www\.shockdom\.com/eriadan/(\?p=.+?)'")
|
||||
starter = indirectStarter('http://www.shockdom.com/eriadan/', compile(r'<ul class="latest2">[^<]+?<li class="list-title"><a href="(http://www\.shockdom.com/eriadan/\?p=.+?)"'))
|
||||
help = 'Index format: nnn (unpadded)'
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
|
||||
help = 'Index format: yyyy/mm/dd/nnn (unpadded)'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
|
@ -57,16 +50,6 @@ class ElGoonishShiveNP(_BasicScraper):
|
|||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
|
||||
class ElsieHooper(_BasicScraper):
|
||||
latestUrl = 'http://www.elsiehooper.com/todaysserial.htm'
|
||||
stripUrl = 'http://www.elsiehooper.com/comics/comic%s.htm'
|
||||
imageSearch = compile(r'<img src="(/comics_/.+?)">')
|
||||
prevSearch = compile(r'<A href="(.+?)"><IMG (height=27 src="/images/previous.gif"|src="/images/previous.gif")', IGNORECASE)
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
||||
class EmergencyExit(_BasicScraper):
|
||||
latestUrl = 'http://www.eecomics.net/'
|
||||
stripUrl = None
|
||||
|
@ -79,27 +62,17 @@ class EmergencyExit(_BasicScraper):
|
|||
|
||||
class ErrantStory(_BasicScraper):
|
||||
latestUrl = 'http://www.errantstory.com/'
|
||||
stripUrl = latestUrl + 'archive.php?date=%s'
|
||||
stripUrl = latestUrl + '%s'
|
||||
imageSearch = compile(r'<img[^>]+?src="([^"]*?comics/.+?)"')
|
||||
prevSearch = compile(r'><a href="(.+?)"><Previous</a>')
|
||||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
|
||||
class EternalVenture(_BasicScraper):
|
||||
latestUrl = 'http://www.pulledpunches.com/venture/'
|
||||
stripUrl = 'http://www.beaglespace.com/pulledpunches/venture/?p=%s'
|
||||
imageSearch = compile(r'<img src="(http://www.beaglespace.com/pulledpunches/venture/comics/.+?)"')
|
||||
prevSearch = compile(r'id="prev"><a href="(http://www.beaglespace.com/pulledpunches/venture/.+?)" ')
|
||||
help = 'Index format: nn'
|
||||
|
||||
help = 'Index format: yyyy-mm-dd/num'
|
||||
|
||||
|
||||
class Evercrest(_BasicScraper):
|
||||
latestUrl = 'http://www.evercrest.com/archives/20030308'
|
||||
stripUrl = latestUrl + '%s'
|
||||
stripUrl = 'http://www.evercrest.com/archives/%s'
|
||||
imageSearch = compile(r'<img.+?src="([^"]*/(images/oldstrips|archives/i)/[^"]*)"')
|
||||
prevSearch = compile(r'<a.+?href="(http://www.evercrest.com/archives/\d+)">< Previous')
|
||||
prevSearch = compile(r'<a.+?href="(http://www\.evercrest\.com/archives/\d+)">< Previous')
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
|
@ -113,26 +86,25 @@ class EverybodyLovesEricRaymond(_BasicScraper):
|
|||
|
||||
class EvilDiva(_BasicScraper):
|
||||
latestUrl = 'http://www.evildivacomics.com/'
|
||||
stripUrl = latestUrl + '%s.html'
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
imageSearch = compile(r'(/comics/.+?)"')
|
||||
prevSearch = compile(r'http.+?com/(.+?)".+?"prev')
|
||||
help = 'Index format: cpn (unpadded)'
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
class EvilInc(_BasicScraper):
|
||||
latestUrl = 'http://www.evil-comic.com/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch=compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
|
||||
prevSearch=compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
|
||||
help='Index format: yyyymmdd'
|
||||
stripUrl = latestUrl + 'archive/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'/images/previous\.gif'))
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class Exiern(_BasicScraper):
|
||||
latestUrl = 'http://www.exiern.com/'
|
||||
stripUrl = latestUrl + 'comic/%s'
|
||||
imageSearch = compile(r'<img src="(http://www.exiern.com/comics/.+?)"')
|
||||
prevSearch = compile(r'<a href="(http://www.exiern.com/.+?)" class="navi navi-prev"')
|
||||
help = 'Index format: ChapterName-StripName'
|
||||
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.exiern\.com/comics/[^"])'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.exiern\.com/[^"]+)', after="prev"))
|
||||
help = 'Index format: n'
|
||||
|
||||
|
||||
class ExiernDarkReflections(_BasicScraper):
|
||||
|
@ -143,31 +115,27 @@ class ExiernDarkReflections(_BasicScraper):
|
|||
help = 'Index format: n'
|
||||
|
||||
|
||||
|
||||
class ExtraLife(_BasicScraper):
|
||||
latestUrl = 'http://www.myextralife.com/'
|
||||
stripUrl = latestUrl + 'comic/%s/'
|
||||
imageSearch = compile(r'<img src="(http://www.myextralife.com/comics/.+?)"')
|
||||
prevSearch = compile(r'<div class="nav-previous"><a href="(http://www.myextralife.com/comic/.+?)"')
|
||||
help = 'Index format: mmddyyyy'
|
||||
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.myextralife\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
|
||||
help = 'Index format: stripname'
|
||||
|
||||
|
||||
class EyeOfRamalach(_BasicScraper):
|
||||
latestUrl = 'http://theeye.katbox.net/'
|
||||
stripUrl = latestUrl + 'index.php?strip_id=%s'
|
||||
imageSearch = compile(r'="(.+?strips/.+?)"')
|
||||
prevSearch = compile(r'(index.php\?strip_id=.+?)".+?navigation_back')
|
||||
stripUrl = latestUrl + 'archive/%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://theeye\.katbox\.net/wp-content/webcomic/theeye/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://theeye\.katbox\.net/archive/[^"]+)', after="previous"))
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
class EarthsongSaga(_BasicScraper):
|
||||
latestUrl = 'http://www.earthsongsaga.com/'
|
||||
starter = indirectStarter('http://www.earthsongsaga.com/', compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'[^"]+current\.jpg')))
|
||||
stripUrl = None
|
||||
imageSearch = compile(r'<img src="((?:\.\./)?images/vol\d+/ch\d+/\d+\.\w+)"')
|
||||
prevSearch = compile(r'<a href="([^"]+\.html)"[^>]*><img src="(?:(?:\.\.)?/)?images/testing/prev')
|
||||
starter = indirectStarter('http://www.earthsongsaga.com/',
|
||||
compile(r'a href="(.+?)".+?current-page.jpg'))
|
||||
imageSearch = compile(tagre("img", "src", r'((?:\.\./)?images/vol\d+/ch\d+/\d+\.\w+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
|
@ -175,19 +143,18 @@ class EarthsongSaga(_BasicScraper):
|
|||
return 'vol%02d_ch%02d_%02d' % (int(imgmatch.group(1)), int(imgmatch.group(2)), int(imgmatch.group(3)))
|
||||
|
||||
|
||||
|
||||
class ExploitationNow(_BasicScraper):
|
||||
latestUrl = 'http://exploitationnow.com/'
|
||||
stripUrl = latestUrl + 'comic.php?date=%s'
|
||||
imageSearch = compile(r'src="(comics/.+?)"')
|
||||
prevSearch = compile(r' <a href="(.+?)" title="\[Back\]">')
|
||||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
latestUrl = 'http://www.exploitationnow.com/'
|
||||
stripUrl = latestUrl + '%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.exploitationnow\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.exploitationnow\.com/[^"]+)', after="navi-prev"))
|
||||
help = 'Index format: yyyy-mm-dd/num'
|
||||
|
||||
|
||||
class Ellerbisms(_BasicScraper):
|
||||
latestUrl = 'http://www.ellerbisms.com/'
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
imageSearch = compile(r'<img src="(http://www.ellerbisms.com/comics/.+?)"')
|
||||
prevSearch = compile(r'<a href="(http://www.ellerbisms.com/.+?)"><span class="prev">')
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.ellerbisms\.com/wp-content/uploads/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.ellerbisms\.com/[^"]+)', after="prev"))
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, IGNORECASE, MULTILINE
|
||||
|
||||
from ..util import tagre
|
||||
|
@ -32,35 +34,24 @@ class FeyWinds(_BasicScraper):
|
|||
compile(r'(comic/page.php\?id.+?)"'))
|
||||
|
||||
|
||||
|
||||
class FightCastOrEvade(_BasicScraper):
|
||||
latestUrl = 'http://www.fightcastorevade.net/'
|
||||
stripUrl = latestUrl + 'd/%s'
|
||||
imageSearch = compile(tagre("img", "src", r'"(http://www\.fightcastorevade\.net/comics/[^"]+)'))
|
||||
prevSearch = compile(r'"(.+?/d/.+?)".+?previous')
|
||||
help = 'Index format: yyyymmdd.html'
|
||||
|
||||
|
||||
|
||||
class FilibusterCartoons(_BasicScraper):
|
||||
latestUrl = 'http://www.filibustercartoons.com/'
|
||||
stripUrl = latestUrl + 'index.php/%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.filibustercartoons\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(r'<a href="(.+?)"><img src=\'(.+?/arrow-left.gif)\'')
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.filibustercartoons\.com/[^"]+)', after="prev"))
|
||||
help = 'Index format: yyyy/mm/dd/name'
|
||||
|
||||
|
||||
|
||||
class FlakyPastry(_BasicScraper):
|
||||
latestUrl = 'http://flakypastry.runningwithpencils.com/index.php'
|
||||
stripUrl = 'http://flakypastry.runningwithpencils.com/comic.php\?strip_id=%s'
|
||||
stripUrl = 'http://flakypastry.runningwithpencils.com/comic.php?strip_id=%s'
|
||||
imageSearch = compile(r'<img src="(comics/.+?)"')
|
||||
prevSearch = compile(r'<a href="(.+?)".+?btn_back')
|
||||
help = 'Index format: nnnn'
|
||||
|
||||
|
||||
# XXX move to keenspot
|
||||
class Flipside(_BasicScraper):
|
||||
latestUrl = 'http://www.flipsidecomics.com/comic.php'
|
||||
latestUrl = 'http://flipside.keenspot.com/comic.php'
|
||||
stripUrl = latestUrl + '?i=%s'
|
||||
imageSearch = compile(r'<IMG SRC="(comic/.+?)"')
|
||||
prevSearch = compile(r'<A HREF="(comic.php\?i=\d+?)"><')
|
||||
|
@ -72,20 +63,9 @@ class Footloose(_BasicScraper):
|
|||
stripUrl = 'http://footloosecomic.com/footloose/pages.php?page=%s'
|
||||
imageSearch = compile(r'<img src="/footloose/(.+?)"')
|
||||
prevSearch = compile(r'(?:first.+?[^>]).+?(/footloose/.+?)".+?(?:prev)')
|
||||
# prevSearch = compile(r'(?:first.+?[^>]).+?(/footloose/.+?html).+?(?:prev|Prev)')
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
|
||||
class FragileGravity(_BasicScraper):
|
||||
latestUrl = 'http://www.fragilegravity.com/'
|
||||
stripUrl = latestUrl + 'core.php?archive=%s'
|
||||
imageSearch = compile(r'<IMG SRC="(strips/.+?)"')
|
||||
prevSearch = compile(r'<A HREF="(.+?)"\nonMouseover="window.status=\'Previous Strip', MULTILINE | IGNORECASE)
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
|
||||
class Freefall(_BasicScraper):
|
||||
latestUrl = 'http://freefall.purrsia.com/default.htm'
|
||||
stripUrl = 'http://freefall.purrsia.com/ff%s/fc%s.htm'
|
||||
|
@ -94,7 +74,6 @@ class Freefall(_BasicScraper):
|
|||
help = 'Index format: nnnn/nnnnn'
|
||||
|
||||
|
||||
|
||||
class FantasyRealms(_BasicScraper):
|
||||
stripUrl = 'http://www.fantasyrealmsonline.com/manga/%s.php'
|
||||
imageSearch = compile(r'<img src="(\d{1,4}.\w{3,4})" width="540"', IGNORECASE)
|
||||
|
@ -104,14 +83,6 @@ class FantasyRealms(_BasicScraper):
|
|||
compile(r'<a href="(manga/.+?)"><img src="preview.jpg"', IGNORECASE))
|
||||
|
||||
|
||||
|
||||
class FullFrontalNerdity(_BasicScraper):
|
||||
latestUrl = 'http://nodwick.humor.gamespy.com/ffn/index.php'
|
||||
stripUrl = None
|
||||
imageSearch = compile(r'<img src="(http://nodwick.humor.gamespy.com/ffn/strips/[^"]*)"', IGNORECASE)
|
||||
prevSearch = compile(r'<a href="(index.php\?date=[0-9-]*)"><img src="back.jpg"', IGNORECASE)
|
||||
|
||||
|
||||
class FunInJammies(_BasicScraper):
|
||||
latestUrl = 'http://www.funinjammies.com/'
|
||||
stripUrl = latestUrl + 'comic.php?issue=%s'
|
||||
|
@ -120,7 +91,6 @@ class FunInJammies(_BasicScraper):
|
|||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
|
||||
class Fallen(_BasicScraper):
|
||||
stripUrl = 'http://www.fallencomic.com/pages/part%s/%s-p%s.htm'
|
||||
imageSearch = compile(r'<IMG SRC="(page/.+?)"', IGNORECASE)
|
||||
|
@ -140,19 +110,12 @@ class Fallen(_BasicScraper):
|
|||
self.currentUrl = self.stripUrl % (part, index, part)
|
||||
|
||||
|
||||
|
||||
class FoxTails(_BasicScraper):
|
||||
latestUrl = 'http://www.magickitsune.com/strips/current.html'
|
||||
stripUrl = 'http://www.magickitsune.com/strips/%s'
|
||||
imageSearch = compile(r'<img src=(img/.+?)[ |>]', IGNORECASE)
|
||||
prevSearch = compile(r'(?<=first.gif)*(?<=</td>)*<a.*href=\'(.+?)\'.+?<img.+?src=\'../img/prev.gif\'>', IGNORECASE)
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class FredoAndPidjin(_BasicScraper):
|
||||
homepage = 'http://www.pidjin.net/'
|
||||
stripUrl = None
|
||||
help = 'Index format: yyyy/mm/dd/name'
|
||||
imageSearch = compile(tagre('img', 'src', '(http://cdn\.pidjin\.net/wp-content/uploads/\d\d\d\d/\d\d/\d+[^"]+\.png)'))
|
||||
prevSearch = compile(tagre('a', 'href', '([^"]+)')+"Prev</a>")
|
||||
starter = indirectStarter(homepage,
|
||||
compile(tagre('a', 'href', "("+homepage+r'\d\d\d\d/\d\d/\d\d/[^"]+/)')))
|
||||
|
||||
|
|
45
dosagelib/plugins/fallenangel.py
Normal file
45
dosagelib/plugins/fallenangel.py
Normal file
|
@ -0,0 +1,45 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
def fallenangel(name, shortname):
|
||||
pass # XXX
|
||||
|
||||
class _TheFallenAngel(_BasicScraper):
|
||||
imageSearch = compile(r'SRC="(http://www.thefallenangel.co.uk/\w+comics/.+?)"')
|
||||
prevSearch = compile(r' <a href="(http://www.thefallenangel.co.uk/.+?)"><img[^>]+?src="http://www.thefallenangel.co.uk/images/previousday.jpg"')
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
@property
|
||||
def baseUrl(self):
|
||||
return 'http://www.thefallenangel.co.uk/cgi-bin/%sautokeen/autokeenlite.cgi' % (self.shortName,)
|
||||
|
||||
|
||||
@property
|
||||
def stripUrl(self):
|
||||
return self.baseUrl + '?date=%s'
|
||||
|
||||
|
||||
def starter(self):
|
||||
return self.baseUrl
|
||||
|
||||
|
||||
|
||||
class HighMaintenance(_TheFallenAngel):
|
||||
name = 'TheFallenAngel/HighMaintenance'
|
||||
shortName = 'hm'
|
||||
|
||||
|
||||
|
||||
class FAWK(_TheFallenAngel):
|
||||
name = 'TheFallenAngel/FAWK'
|
||||
shortName = 'fawk'
|
||||
|
||||
|
||||
|
||||
class MalloryChan(_TheFallenAngel):
|
||||
name = 'TheFallenAngel/MalloryChan'
|
||||
shortName = 'mallorychan'
|
||||
|
||||
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
|
@ -9,17 +11,18 @@ from ..util import tagre
|
|||
|
||||
class Galaxion(_BasicScraper):
|
||||
latestUrl = 'http://galaxioncomics.com/'
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
imageSearch = compile(r'(wordpress/comics/.+?)"')
|
||||
prevSearch = compile(r'\| <a href="http://galaxioncomics.com/(\?p=.+?)".+?vious.gif')
|
||||
help = 'Index format: non'
|
||||
stripUrl = latestUrl + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://galaxioncomics\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://galaxioncomics\.com/[^"]+)', after="prev"))
|
||||
help = 'Index format: n-comic/book-n/chapter-n/title-nnn'
|
||||
|
||||
|
||||
class Garanos(_BasicScraper):
|
||||
latestUrl = 'http://www.garanos.com/'
|
||||
stripUrl = latestUrl + 'pages/page-%s'
|
||||
imageSearch = compile(r'<img src=.+?(/pages/.+?)"')
|
||||
prevSearch = compile(r'<a href="(http://www.garanos.com/pages/page-.../)">◄ Previous<')
|
||||
starter = indirectStarter('http://garanos.alexheberling.com/pages/page-1/',
|
||||
compile(tagre("a", "href", r'(http://garanos\.alexheberling\.com/pages/[^"]+)', after="navi-last")))
|
||||
stripUrl = 'http://garanos.alexheberling.com/pages/page-%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://garanos\.alexheberling\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://garanos\.alexheberling\.com/pages/[^"]+)', after="prev"))
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
|
@ -31,41 +34,30 @@ class GUComics(_BasicScraper):
|
|||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
|
||||
class GenrezvousPoint(_BasicScraper):
|
||||
latestUrl = 'http://genrezvouspoint.com/'
|
||||
latestUrl = 'http://www.genrezvouspoint.com/'
|
||||
stripUrl = latestUrl + 'index.php?comicID=%s'
|
||||
imageSearch = compile(r'<img src=\'(comics/.+?)\'')
|
||||
prevSearch = compile(r' <a[^>]+?href="(.+?)">PREVIOUS</a>')
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
||||
class GirlGenius(_BasicScraper):
|
||||
latestUrl = 'http://girlgeniusonline.com/comic.php'
|
||||
stripUrl = 'http://www.girlgeniusonline.com/comic.php\?date=%s'
|
||||
stripUrl = 'http://www.girlgeniusonline.com/comic.php?date=%s'
|
||||
imageSearch = compile(r"(/ggmain/strips/.+?)'")
|
||||
prevSearch = compile(r"</a> <a href=.+?(/comic.php\?date=.+?)'.+?Previous")
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
|
||||
class GirlsWithSlingshots(_BasicScraper):
|
||||
latestUrl = 'http://www.daniellecorsetto.com/gws.html'
|
||||
stripUrl = 'http://www.daniellecorsetto.com/GWS%s.html'
|
||||
imageSearch = compile(r'<img src="(images/gws/GWS\d{3}.jpg)"')
|
||||
prevSearch = compile(r'(archive.php\?today=\d{3}&comic=\d{3})"[^>]*><img[^>]+src="images/gwsmenu/back_off.jpg"')
|
||||
latestUrl = 'http://www.girlswithslingshots.com/'
|
||||
stripUrl = latestUrl + 'comic/gws-%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://cdn\.girlswithslingshots\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.girlswithslingshots\.com/comic/[^"]+)', after="prev"))
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
class Girly(_BasicScraper):
|
||||
latestUrl = 'http://girlyyy.com/'
|
||||
stripUrl = latestUrl + 'go/%s'
|
||||
imageSearch = compile(r'<img src="(http://girlyyy.com/comics/.+?)"')
|
||||
prevSearch = compile(r'<a href="(.+?)"> < prev')
|
||||
|
||||
help = 'Index format: nnn'
|
||||
|
||||
class GleefulNihilism(_BasicScraper):
|
||||
latestUrl = 'http://gleefulnihilism.com/'
|
||||
stripUrl = latestUrl + 'comics/%s/'
|
||||
|
@ -82,7 +74,6 @@ class Goats(_BasicScraper):
|
|||
help = 'Index format: yymmdd'
|
||||
|
||||
|
||||
|
||||
class GoneWithTheBlastwave(_BasicScraper):
|
||||
starter = indirectStarter('http://www.blastwave-comic.com/index.php?p=comic&nro=1',
|
||||
compile(r'href="(index.php\?p=comic&nro=\d+)"><img src="images/page/default/latest'))
|
||||
|
@ -96,34 +87,30 @@ class GoneWithTheBlastwave(_BasicScraper):
|
|||
return '%02d' % int(compile(r'nro=(\d+)').search(pageUrl).group(1))
|
||||
|
||||
|
||||
|
||||
class GunnerkrigCourt(_BasicScraper):
|
||||
latestUrl = 'http://www.gunnerkrigg.com/index2.php'
|
||||
stripUrl = 'http://www.gunnerkrigg.com/archive_page.php\?comicID=%s'
|
||||
stripUrl = 'http://www.gunnerkrigg.com/archive_page.php?comicID=%s'
|
||||
imageSearch = compile(r'<img src="(.+?//comics/.+?)"')
|
||||
prevSearch = compile(r'<.+?(/archive_page.php\?comicID=.+?)".+?prev')
|
||||
help = 'Index format: n'
|
||||
|
||||
|
||||
|
||||
class Gunshow(_BasicScraper):
|
||||
latestUrl = 'http://gunshowcomic.com/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch = compile(r'src="(/comics/.+?)"')
|
||||
prevSearch = compile(r'(/d/\d+\.html)"><img[^>]+?src="/images/previous_day')
|
||||
help = 'Index format: yyyy/mm/dd'
|
||||
|
||||
stripUrl = latestUrl + '%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://gunshowcomic\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'[^"]+menu/small/previous\.gif'))
|
||||
help = 'Index format: n'
|
||||
|
||||
|
||||
class GleefulNihilism(_BasicScraper):
|
||||
latestUrl = 'http://gleefulnihilism.com/'
|
||||
stripUrl = latestUrl + 'comics/2009/12/01/just-one-of-the-perks/%s'
|
||||
imageSearch = compile(r'<img src="(http://gleefulnihilism.com/comics/.+?)"')
|
||||
prevSearch = compile(r'<a href="(.+?)"[^>]+?>Previous</a>')
|
||||
stripUrl = latestUrl + 'comics/%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://gleefulnihilism\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://gleefulnihilism\.com/comics/[^"]+)', after="Previous"))
|
||||
help = 'Index format: yyyy/mm/dd/strip-name'
|
||||
|
||||
|
||||
|
||||
class GastroPhobia(_BasicScraper):
|
||||
latestUrl = 'http://www.gastrophobia.com/'
|
||||
stripUrl = latestUrl + 'index.php?date=%s'
|
||||
|
@ -132,7 +119,6 @@ class GastroPhobia(_BasicScraper):
|
|||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
|
||||
class Geeks(_BasicScraper):
|
||||
latestUrl = 'http://sevenfloorsdown.com/geeks/'
|
||||
stripUrl = latestUrl + 'archives/%s'
|
||||
|
@ -141,7 +127,6 @@ class Geeks(_BasicScraper):
|
|||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
||||
class GlassHalfEmpty(_BasicScraper):
|
||||
latestUrl = 'http://www.defectivity.com/ghe/index.php'
|
||||
stripUrl = latestUrl + '?strip_id=%s'
|
||||
|
|
|
@ -1,67 +1,22 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
|
||||
|
||||
class HappyMedium(_BasicScraper):
|
||||
latestUrl = 'http://happymedium.fast-bee.com/'
|
||||
stripUrl = latestUrl + '%s'
|
||||
imageSearch = compile(r'(/comics/.+?)"')
|
||||
prevSearch = compile(r'com(/.+?)".+?"prev">◄')
|
||||
help = 'Index format: yyyy/mm/chapter-n-page-n'
|
||||
|
||||
|
||||
|
||||
class Heliothaumic(_BasicScraper):
|
||||
latestUrl = 'http://thaumic.net/'
|
||||
stripUrl = latestUrl + '%s'
|
||||
imageSearch = compile(r'<img src="(http://thaumic.net/comics/.+?)"')
|
||||
prevSearch = compile(r'<div class="nav-previous"><a href="(http://thaumic.net/.+?)">')
|
||||
help = 'Index format: yyyy/mm/dd/n(unpadded)-comicname'
|
||||
|
||||
|
||||
|
||||
class Housd(_BasicScraper):
|
||||
latestUrl = 'http://housd.net/archive_page.php?comicID=1284'
|
||||
stripUrl = 'http://housd.net/archive_page.php?comicID=%s'
|
||||
imageSearch = compile(r'"(.+?/comics/.+?)"')
|
||||
prevSearch = compile(r'"(h.+?comicID=.+?)".+?prev')
|
||||
help = 'Index format: nnnn'
|
||||
|
||||
|
||||
|
||||
class HateSong(_BasicScraper):
|
||||
latestUrl = 'http://hatesong.com/'
|
||||
stripUrl = latestUrl + '%s/'
|
||||
imageSearch = compile(r'src="(http://www.hatesong.com/strips/.+?)"')
|
||||
prevSearch = compile(r'<div class="headernav"><a href="(http://hatesong.com/\d{4}/\d{2}/\d{2})')
|
||||
help = 'Index format: yyyy/mm/dd'
|
||||
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
class HorribleVille(_BasicScraper):
|
||||
latestUrl = 'http://horribleville.com/d/20090517.html'
|
||||
stripUrl = 'http://horribleville.com/d/%s.html'
|
||||
imageSearch = compile(r'src="(/comics/.+?)"')
|
||||
prevSearch = compile(r'(\d+\.html)"><img[^>]+?src="/images/previous_day.png"')
|
||||
help = 'Index format: yyyy/mm/dd'
|
||||
|
||||
latestUrl = 'http://horribleville.com/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/d/[^"]+)') + tagre("img", "src", r'/images/previous\.png'))
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class HelpDesk(_BasicScraper):
|
||||
latestUrl = 'http://www.ubersoft.net/'
|
||||
stripUrl = latestUrl + 'comic/hd/%s/%s/%s'
|
||||
imageSearch = compile(r'src="(http://www.ubersoft.net/files/comics/hd/hd\d{8}.png)')
|
||||
prevSearch = compile(r'<a href="(/comic/.+?)">(.+?)previous</a>')
|
||||
help = 'Index format: yyyy/mm/name'
|
||||
latestUrl = 'https://www.eviscerati.org/comics?page=78'
|
||||
stripUrl = 'https://www.eviscerati.org/comics?page=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(https://www\.eviscerati\.org/files/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("li", "class", r'pager-previous[^"]+') + tagre("a", "href", r'(/comics\?page=%d+)'))
|
||||
help = 'Index format: n'
|
||||
|
||||
|
||||
|
||||
class HardGraft(_BasicScraper):
|
||||
latestUrl = 'http://hard-graft.net/'
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
imageSearch = compile(r'<img src="(http://hard-graft.net/comics/.+?)"')
|
||||
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)"')
|
||||
help = 'Index format: nnn'
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
from re import compile, IGNORECASE
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile
|
||||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
|
||||
|
@ -16,7 +17,7 @@ class IDreamOfAJeanieBottle(_BasicScraper):
|
|||
|
||||
class IrregularWebcomic(_BasicScraper):
|
||||
latestUrl = 'http://www.irregularwebcomic.net/'
|
||||
stripUrl = latestUrl + 'cgi-bin/comic.pl?comic=%s'
|
||||
stripUrl = latestUrl + '%s.html'
|
||||
imageSearch = compile(r'<img .*src="(.*comics/.*(png|jpg|gif))".*>')
|
||||
prevSearch = compile(r'<a href="(/\d+\.html|/cgi-bin/comic\.pl\?comic=\d+)">Previous ')
|
||||
help = 'Index format: nnn'
|
||||
|
@ -30,54 +31,6 @@ class InsideOut(_BasicScraper):
|
|||
help = 'Index format: n_comic_name'
|
||||
|
||||
|
||||
|
||||
class InkTank(_BasicScraper):
|
||||
shortName = 'inktank'
|
||||
|
||||
def starter(self):
|
||||
return self.baseUrl + self.shortName + '/'
|
||||
|
||||
|
||||
def inkTank(name, shortName):
|
||||
@classmethod
|
||||
def _namer(cls, imageUrl, pageUrl):
|
||||
return '20%s-%s' % (imageUrl[-6:-4], imageUrl[-12:-7])
|
||||
|
||||
baseUrl = 'http://www.inktank.com/%s/' % (shortName,)
|
||||
return type('InkTank_%s' % name,
|
||||
(_BasicScraper,),
|
||||
dict(
|
||||
name='InkTank/' + name,
|
||||
latestUrl=baseUrl,
|
||||
stripUrl=baseUrl + 'd/%s.html',
|
||||
imageSearch=compile(r'<IMG SRC="(/images/[^/]+/cartoons/\d{2}-\d{2}-\d{2}.+?)"'),
|
||||
prevSearch=compile(r'<A HREF="(/[^/]+/index.cfm\?nav=\d+?)"><IMG SRC="/images/nav_last.gif"'),
|
||||
help='Index format: n (unpadded)')
|
||||
)
|
||||
|
||||
|
||||
at = inkTank('AngstTechnology', 'AT')
|
||||
ww = inkTank('WeakEndWarriors', 'WW')
|
||||
swo = inkTank('SorryWereOpen', 'SWO')
|
||||
|
||||
|
||||
|
||||
class IlmanNaista(_BasicScraper):
|
||||
latestUrl = 'http://kvantti.tky.fi/in/archive_end.shtml'
|
||||
stripUrl = 'http://kvantti.tky.fi/in/%s.shtml'
|
||||
imageSearch = compile(r'<img src="(kuvat/in_.+?)"', IGNORECASE)
|
||||
prevSearch = compile(r'<a href="(\d+.shtml)"><img width="90" height="45" src="deco/edellinen.png" alt="Edellinen"/></a>')
|
||||
|
||||
|
||||
|
||||
class ICantDrawFeet(_BasicScraper):
|
||||
latestUrl = 'http://icantdrawfeet.com/'
|
||||
stripUrl = 'http://icantdrawfeet.com/%s'
|
||||
imageSearch = compile(r'src="(http://icantdrawfeet.com/comics/.+?)"')
|
||||
prevSearch = compile(r'<a href="(http://icantdrawfeet.com/.+?)"><img src="http://icantdrawfeet.com/pageimages/prev.png"')
|
||||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
|
||||
|
||||
class ItsWalky(_BasicScraper):
|
||||
latestUrl = 'http://www.itswalky.com/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
|
|
|
@ -1,26 +1,18 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, MULTILINE
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
|
||||
|
||||
class Jack(_BasicScraper):
|
||||
latestUrl = 'http://www.pholph.com/'
|
||||
stripUrl = latestUrl + 'strip.php?id=5&sid=%s'
|
||||
imageSearch = compile(r'<img src="(./artwork/.+?/Jack.+?)"')
|
||||
prevSearch = compile(r'\|<a href="(.+?)">Previous Strip</a>')
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
class JerkCity(_BasicScraper):
|
||||
latestUrl = 'http://www.jerkcity.com/'
|
||||
stripUrl = latestUrl + 'jerkcity%s'
|
||||
imageSearch = compile(r'"jerkcity.+?">.+?"(/jerkcity.+?)"')
|
||||
prevSearch = compile(r'"(jerkcity.+?)">.+?"/jerkcity.+?"')
|
||||
help = 'Index format: unknown'
|
||||
|
||||
stripUrl = latestUrl + '_jerkcity%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(/jerkcity[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/_jerkcity[^"]+)') + r'<<Previous')
|
||||
help = 'Index format: n'
|
||||
|
||||
|
||||
class JoeAndMonkey(_BasicScraper):
|
||||
|
@ -31,10 +23,10 @@ class JoeAndMonkey(_BasicScraper):
|
|||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
||||
class JoyOfTech(_BasicScraper):
|
||||
latestUrl = 'http://www.geekculture.com/joyoftech/index.html'
|
||||
stripUrl = 'http://www.geekculture.com/joyoftech/joyarchives/%s.html'
|
||||
imageSearch = compile(r'<img src="(joyimages/.+?|../joyimages/.+?)" alt="The Joy')
|
||||
prevSearch = compile(r'<a href="((?:joyarchives/)?\w+\.\w{3,4})">(?:<font[^>]*>)?<img[^>]*><br>[\s\n]*Previous Joy', MULTILINE)
|
||||
latestUrl = 'http://www.geekculture.com/joyoftech/'
|
||||
stripUrl = latestUrl + 'joyarchives/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(joyimages/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(joyarchives/[^"]+)') + r'.+?Previous', MULTILINE)
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
|
|
@ -1,24 +1,12 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
from re import compile, IGNORECASE
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, IGNORECASE
|
||||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
class KernelPanic(_BasicScraper):
|
||||
latestUrl = 'http://www.ubersoft.net/kpanic/'
|
||||
stripUrl = latestUrl + 'd/%s'
|
||||
imageSearch = compile(r'src="(.+?/kp/kp.+?)" ')
|
||||
prevSearch = compile(r'<li class="previous"><a href="(.+?)">')
|
||||
help = 'Index format: yyyymmdd.html'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
return imageUrl.split('/')[-1].split('.')[0]
|
||||
|
||||
|
||||
|
||||
class Key(_BasicScraper):
|
||||
latestUrl = 'http://key.shadilyn.com/latestpage.html'
|
||||
stripUrl = 'http://key.shadilyn.com/pages/%s.html'
|
||||
|
@ -27,9 +15,8 @@ class Key(_BasicScraper):
|
|||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
||||
class Krakow(_BasicScraper):
|
||||
latestUrl = 'http://www.krakowstudios.com/'
|
||||
latestUrl = 'http://www.krakow.krakowstudios.com/'
|
||||
stripUrl = latestUrl + 'archive.php?date=%s'
|
||||
imageSearch = compile(r'<img src="(comics/.+?)"')
|
||||
prevSearch = compile(r'<a href="(archive\.php\?date=.+?)"><img border=0 name=previous_day')
|
||||
|
@ -38,10 +25,10 @@ class Krakow(_BasicScraper):
|
|||
|
||||
class Kukuburi(_BasicScraper):
|
||||
latestUrl = 'http://www.kukuburi.com/current/'
|
||||
stripUrl = 'http://thaumic.net/%s'
|
||||
stripUrl = 'http://www.kukuburi.com/v2/%s/'
|
||||
imageSearch = compile(r'img src="(http://www.kukuburi.com/../comics/.+?)"')
|
||||
prevSearch = compile(r'nav-previous.+?"(http.+?)"')
|
||||
help = 'Index format: non'
|
||||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
|
||||
|
||||
class KevinAndKell(_BasicScraper):
|
||||
|
@ -61,12 +48,3 @@ class KillerKomics(_BasicScraper):
|
|||
imageSearch = compile(r'<img src="(http://www.killerkomics.com/FichiersUpload/Comics/.+?)"')
|
||||
prevSearch = compile(r'<div id="precedent"><a href="(.+?)"')
|
||||
help = 'Index format: strip-name'
|
||||
|
||||
|
||||
class KrazyLarry(_BasicScraper):
|
||||
latestUrl = 'http://www.krazylarry.com/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
|
|
@ -1,33 +1,31 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
from re import compile, IGNORECASE
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile
|
||||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
def keenSpot(comics):
|
||||
class _KeenSpotScraper(_BasicScraper):
|
||||
stripUrl = property(lambda self: self.baseUrl + 'd/%s.html')
|
||||
imageSearch = compile(r'<img[^>]+?src="([^"]*?comics/.+?)"', IGNORECASE)
|
||||
prevSearch = compile(r'<a href="([^"]*?d/\d{8}\.html)"[^>]*>(?:<img[^>]+?(?:name="previous_day"|alt="Previous"|src="[^"]*back[^"]*")|Previous comic)', IGNORECASE)
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
for name, urls in keenspotComics.items():
|
||||
def keenSpot(name, urls):
|
||||
if not isinstance(urls, tuple):
|
||||
baseUrl = latestUrl = urls
|
||||
else:
|
||||
baseUrl, latestUrl = urls
|
||||
|
||||
comics[name] = type('KeenSpot_%s' % name,
|
||||
(_KeenSpotScraper,),
|
||||
return type('KeenSpot_%s' % name,
|
||||
(_BasicScraper,),
|
||||
dict(
|
||||
name='KeenSpot/' + name,
|
||||
latestUrl=latestUrl or baseUrl
|
||||
latestUrl=latestUrl,
|
||||
stripUrl=baseUrl + 'd/%s.html',
|
||||
imageSearch = compile(tagre("img", "src", r'([^"]*comics/[^"]+)')),
|
||||
prevSearch = compile(tagre("a", "href", r'"([^"]*d/\d{8}\.html)') +
|
||||
'(?:<img[^>]+?(?:name="previous_day"|alt="Previous"|src="[^"]*back[^"]*")|Previous comic)'),
|
||||
help = 'Index format: yyyymmdd',
|
||||
)
|
||||
)
|
||||
|
||||
return comics
|
||||
|
||||
|
||||
keenspotComics = {
|
||||
'13thLabour': 'http://the13labour.comicgenesis.com/',
|
||||
|
@ -1524,4 +1522,5 @@ keenspotComics = {
|
|||
'ZuraZura': 'http://zurazura.comicgenesis.com/',
|
||||
}
|
||||
|
||||
globals().update(keenSpot(keenspotComics))
|
||||
for name, urls in keenspotComics.items():
|
||||
globals()[name] = keenSpot(name, urls)
|
||||
|
|
|
@ -1,34 +1,27 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
from re import compile
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile
|
||||
from ..scraper import _BasicScraper
|
||||
from ..helpers import indirectStarter
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
class LasLindas(_BasicScraper):
|
||||
latestUrl = 'http://www.katbox.net/laslindas/'
|
||||
stripUrl = latestUrl + 'index.php?strip_id=%s'
|
||||
imageSearch = compile(r'"(istrip_files/strips/.+?)"')
|
||||
prevSearch = compile(r'<a href="(.+?)"><[^>]+?alt="Back"')
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
|
||||
class LastBlood(_BasicScraper):
|
||||
latestUrl = 'http://www.lastblood.net/main/'
|
||||
stripUrl = latestUrl + '%s'
|
||||
imageSearch = compile(r'(/comicfolder/.+?)" alt')
|
||||
prevSearch = compile(r'Previous Comic:</small><br />« <a href="(.+?)">')
|
||||
help = 'Index format: yyyy/mm/dd/(page number and name)'
|
||||
latestUrl = 'http://laslindas.katbox.net/'
|
||||
stripUrl = latestUrl + 'archive/%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://laslindas\.katbox\.net/wp-content/webcomic/las-lindas/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://laslindas\.katbox\.net/archive/[^"]+)', after="previous"))
|
||||
help = 'Index format: stripname'
|
||||
|
||||
|
||||
|
||||
class LesbianPiratesFromOuterSpace(_BasicScraper):
|
||||
latestUrl = 'http://rosalarian.com/lesbianpirates/'
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
imageSearch = compile(r'(/lesbianpirates/comics/.+?)"')
|
||||
prevSearch = compile(r'/(\?p=.+?)">«')
|
||||
stripUrl = latestUrl + 'index.php?p=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'("comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/index\.php\?id=\d+)', after="prev"))
|
||||
help = 'Index format: n'
|
||||
|
||||
|
||||
|
@ -55,34 +48,15 @@ class LookingForGroup(_BasicScraper):
|
|||
return self.nameSearch.search(pageUrl).group(1)
|
||||
|
||||
|
||||
|
||||
class Loserz(_BasicScraper):
|
||||
latestUrl = 'http://bukucomics.com/loserz/'
|
||||
stripUrl = latestUrl + 'go/%s'
|
||||
imageSearch = compile(r'<img src="(http://bukucomics.com/loserz/comics/.+?)"')
|
||||
prevSearch = compile(r'<a href="(.+?)"> < ')
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
|
||||
class LittleGamers(_BasicScraper):
|
||||
latestUrl = 'http://www.little-gamers.com/'
|
||||
stripUrl = latestUrl + '%s'
|
||||
imageSearch = compile(r'<img src="(http://www.little-gamers.com/comics/[^"]+)"')
|
||||
prevSearch = compile(r'href="(.+?)"><img id="comic-nav-prev"')
|
||||
stripUrl = latestUrl + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.little-gamers\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www.little-gamers.com/[^"]+)', before="comic-nav-prev-link"))
|
||||
help = 'Index format: yyyy/mm/dd/name'
|
||||
|
||||
|
||||
|
||||
class LegoRobot(_BasicScraper):
|
||||
latestUrl = 'http://www.legorobotcomics.com/'
|
||||
stripUrl = latestUrl + '?id=%s'
|
||||
imageSearch = compile(r'id="the_comic" src="(comics/.+?)"')
|
||||
prevSearch = compile(r'(\?id=\d+)"><img src="images/back.png"')
|
||||
help = 'Index format: nnnn'
|
||||
|
||||
|
||||
|
||||
class LeastICouldDo(_BasicScraper):
|
||||
latestUrl = 'http://www.leasticoulddo.com/'
|
||||
stripUrl = latestUrl + 'comic/%s'
|
||||
|
|
|
@ -1,28 +1,21 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, IGNORECASE
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..helpers import queryNamer
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
class MadamAndEve(_BasicScraper):
|
||||
# broken links - disable for now
|
||||
class _MadamAndEve(_BasicScraper):
|
||||
latestUrl = 'http://www.madamandeve.co.za/week_of_cartns.php'
|
||||
stripUrl = None
|
||||
imageSearch = compile(r'<IMG BORDER="0" SRC="(cartoons/me\d{6}\.(gif|jpg))">')
|
||||
prevSearch = compile(r'<a href="(weekend_cartoon.php)"')
|
||||
|
||||
|
||||
class MagicHigh(_BasicScraper):
|
||||
latestUrl = 'http://www.doomnstuff.com/magichigh/index.php'
|
||||
stripUrl = latestUrl + '?strip_id=%s'
|
||||
imageSearch = compile(r'(istrip_files/strips/.+?)"')
|
||||
prevSearch = compile(r'First .+?"(/magichigh.+?)".+?top_back')
|
||||
help = 'Index format: n'
|
||||
|
||||
|
||||
|
||||
class Marilith(_BasicScraper):
|
||||
latestUrl = 'http://www.marilith.com/'
|
||||
stripUrl = latestUrl + 'archive.php?date=%s'
|
||||
|
@ -31,13 +24,12 @@ class Marilith(_BasicScraper):
|
|||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
|
||||
class MarryMe(_BasicScraper):
|
||||
latestUrl = 'http://marrymemovie.com/main/'
|
||||
stripUrl = latestUrl + '%s'
|
||||
imageSearch = compile(r'(/comicfolder/.+?)"')
|
||||
prevSearch = compile(r'Previous Comic:</small><br />« <a href="(.+?)">')
|
||||
help = 'Index format: good luck !'
|
||||
latestUrl = 'http://marryme.keenspot.com/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("link", "href", r'(/d/[^"]+)', before="prev"))
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class Meek(_BasicScraper):
|
||||
|
@ -49,22 +41,13 @@ class Meek(_BasicScraper):
|
|||
|
||||
|
||||
class MegaTokyo(_BasicScraper):
|
||||
latestUrl = 'http://www.megatokyo.com/'
|
||||
latestUrl = 'http://megatokyo.com/'
|
||||
stripUrl = latestUrl + 'strip/%s'
|
||||
imageSearch = compile(r'"(strips/.+?)"', IGNORECASE)
|
||||
prevSearch = compile(r'"(./strip/\d+?)">Prev')
|
||||
help = 'Index format: nnnn'
|
||||
|
||||
|
||||
class MyPrivateLittleHell(_BasicScraper):
|
||||
latestUrl = 'http://mutt.purrsia.com/mplh/'
|
||||
stripUrl = latestUrl + '?date=%s'
|
||||
imageSearch = compile(r'<img.+?src="(comics/.+?)"')
|
||||
prevSearch = compile(r'<a.+?href="(\?date=\d+/\d+/\d+)">Prev</a>')
|
||||
help = 'Index format: mm/dd/yyyy'
|
||||
|
||||
|
||||
|
||||
class MacHall(_BasicScraper):
|
||||
latestUrl = 'http://www.machall.com/'
|
||||
stripUrl = latestUrl + 'view.php?date=%s'
|
||||
|
@ -75,43 +58,33 @@ class MacHall(_BasicScraper):
|
|||
|
||||
class Melonpool(_BasicScraper):
|
||||
latestUrl = 'http://www.melonpool.com/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
|
||||
help = 'Index format: yyyymmdd'
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.melonpool\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.melonpool\.com/\?p=\d+)', after="prev"))
|
||||
help = 'Index format: n'
|
||||
|
||||
|
||||
class Misfile(_BasicScraper):
|
||||
latestUrl = 'http://www.misfile.com/'
|
||||
stripUrl = latestUrl + '?page=%s'
|
||||
imageSearch = compile(r'<img src="(overlay\.php\?pageCalled=\d+)">')
|
||||
prevSearch = compile(r'<a href="(\?page=\d+)"><img src="/images/back\.gif"')
|
||||
help = 'Index format: n (unpadded)'
|
||||
namer = queryNamer('pageCalled')
|
||||
|
||||
stripUrl = latestUrl + '?date=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("link", "href", r'([^"]+)', before="Previous"))
|
||||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
class MysteriesOfTheArcana(_BasicScraper):
|
||||
latestUrl = 'http://mysteriesofthearcana.com/'
|
||||
stripUrl = latestUrl + 'index.php?action=comics&cid='
|
||||
imageSearch = compile(r'(image.php\?type=com&i=.+?)"')
|
||||
prevSearch = compile(r'(index.php\?action=comics&cid=.+?)".+?show_prev1')
|
||||
stripUrl = latestUrl + 'index.php?action=comics&cid=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(image\.php\?type=com&i=[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'()', after="navprevius"))
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
|
||||
# XXX move to keenspot?
|
||||
class MysticRevolution(_BasicScraper):
|
||||
latestUrl = 'http://www.mysticrev.com/index.php'
|
||||
latestUrl = 'http://mysticrevolution.keenspot.com/'
|
||||
stripUrl = latestUrl + '?cid=%s'
|
||||
imageSearch = compile(r'(comics/.+?)"')
|
||||
prevSearch = compile(r'(\?cid=.+?)".+?prev.gif')
|
||||
imageSearch = compile(tagre("img", "src", r'(http://cdn\.mysticrevolution\.keenspot\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("link", "rel", r'(\?cid=\d+)', before="prev"))
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
|
||||
class MontyAndWooly(_BasicScraper):
|
||||
latestUrl = 'http://www.montyandwoolley.co.uk/'
|
||||
stripUrl = latestUrl + '%s'
|
||||
imageSearch = compile(r'<img src="(http://montyandwoolley.co.uk/.+?)"')
|
||||
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
|
||||
help = 'Index format: yyyy/mm/dd/strip-name'
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
from re import compile, IGNORECASE
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, sub
|
||||
from ..scraper import _BasicScraper
|
||||
from ..helpers import indirectStarter, _PHPScraper
|
||||
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
class NamirDeiter(_BasicScraper):
|
||||
latestUrl = 'http://www.namirdeiter.com/'
|
||||
stripUrl = latestUrl + 'comics/index.php?date=%s'
|
||||
imageSearch = compile(r'<img.+?(/comics/\d{8}.+?)[\'|\"]')
|
||||
prevSearch = compile(r'(/comics/index.php\?date=.+?|http://www\.namirdeiter\.com/comics/index.php\?date=.+?)[\'|\"].+?previous')
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.namirdeiter\.com/comics/\d\.jpg)', quote=""))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.namirdeiter\.com/comics/index\.php\?date=\d+)', quote="'")+"Previous")
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
|
||||
class NeoEarth(_BasicScraper):
|
||||
latestUrl = 'http://www.neo-earth.com/NE/'
|
||||
stripUrl = latestUrl + 'index.php?date=%s'
|
||||
|
@ -24,23 +24,11 @@ class NeoEarth(_BasicScraper):
|
|||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
|
||||
class Nervillsaga(_BasicScraper):
|
||||
latestUrl = 'http://www.nervillsaga.com/'
|
||||
stripUrl = latestUrl + 'index.php?s=%s'
|
||||
imageSearch = compile(r'"(pic/.+?)"')
|
||||
prevSearch = compile(r'"(.+?)">Previous')
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
||||
class NewAdventuresOfBobbin(_BasicScraper):
|
||||
latestUrl = 'http://www.bobbin-comic.com/'
|
||||
stripUrl = latestUrl + 'wordpress/?p=%s'
|
||||
imageSearch = compile(r'<img src="(http://www.bobbin-comic.com/wordpress/comics/.+?)"')
|
||||
prevSearch = compile(r'<a href="(.+?)"><span class="prev">')
|
||||
help = 'Index format: n'
|
||||
|
||||
latestUrl = 'http://www.bobbin-comic.com/bobbin_strips/'
|
||||
imageSearch = compile(tagre("a", "href", r'(\d+\.gif)'))
|
||||
prevSearch = None
|
||||
help = 'Index format: none'
|
||||
|
||||
|
||||
class NewWorld(_BasicScraper):
|
||||
|
@ -51,25 +39,22 @@ class NewWorld(_BasicScraper):
|
|||
help = 'Index format: yyyy/mm/dd/stripn'
|
||||
|
||||
|
||||
|
||||
class Nicky510(_BasicScraper):
|
||||
latestUrl = 'http://www.nicky510.com/'
|
||||
stripUrl = latestUrl + '%s'
|
||||
imageSearch = compile(r'(http://www.nicky510.com/comics/.+?)"')
|
||||
prevSearch = compile(r'<a href="(http://www.nicky510.com/.+?)" class="navi navi-prev"')
|
||||
help = 'Index format: yyyy/mm/dd/stripname/'
|
||||
|
||||
latestUrl = 'http://www.nickyitis.com/'
|
||||
stripUrl = latestUrl + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.nickyitis\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.nickyitis\.com/comic/[^"]+)', after="Previous"))
|
||||
help = 'Index format: stripname'
|
||||
|
||||
|
||||
class NoNeedForBushido(_BasicScraper):
|
||||
latestUrl = 'http://www.noneedforbushido.com/latest/'
|
||||
stripUrl = 'http://www.noneedforbushido.com/%s'
|
||||
imageSearch = compile(r'<div class="comics"><img src="([^"]+)"')
|
||||
prevSearch = compile(r'<a href="([^"]+)" title="[^"]*" class="previous-comic-link')
|
||||
latestUrl = 'http://noneedforbushido.com/latest/'
|
||||
stripUrl = latestUrl + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://noneedforbushido\.com/comics/comic/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://noneedforbushido\.com/[^"]+)', after="previous-comic-link"))
|
||||
help = 'Index format: yyyy/comic/nnn'
|
||||
|
||||
|
||||
|
||||
class Nukees(_BasicScraper):
|
||||
latestUrl = 'http://www.nukees.com/'
|
||||
stripUrl = latestUrl + 'd/%s'
|
||||
|
@ -79,82 +64,58 @@ class Nukees(_BasicScraper):
|
|||
|
||||
|
||||
|
||||
class _NuklearPower(_BasicScraper):
|
||||
imageSearch = compile(r'<img src="(http://www.nuklearpower.com/comics/.+?)"')
|
||||
prevSearch = compile(r'><a href="(.+?)">Previous</a>')
|
||||
help = 'Index format: yyyy/mm/dd/name'
|
||||
def nuklearpower(name, shortname):
|
||||
baseUrl = 'http://www.nuklearpower.com/'
|
||||
latestUrl = "%s%s/" % (baseUrl, shortname)
|
||||
classname = sub("[^0-9a-zA-Z_]", "", name)
|
||||
|
||||
@property
|
||||
def baseUrl(self):
|
||||
return 'http://www.nuklearpower.com/%s/' % (self.shortName,)
|
||||
|
||||
def starter(self):
|
||||
return self.baseUrl
|
||||
|
||||
@property
|
||||
def stripUrl(self):
|
||||
return self.baseUrl + '%s'
|
||||
globals()[classname] = type('NuklearPower_%s' % classname,
|
||||
(_BasicScraper,),
|
||||
dict(
|
||||
name='NuklearPower/' + classname,
|
||||
latestUrl = latestUrl,
|
||||
stripUrl = latestUrl + '%s',
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.nuklearpower\.com/comics/[^"]+)')),
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "Previous"),
|
||||
help = 'Index format: yyyy/mm/dd/name',
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
npstrips = {
|
||||
'8BitTheater': '8-bit-theater',
|
||||
'Warbot': 'warbot',
|
||||
'HowIKilledYourMaster': 'hikym',
|
||||
'AtomicRobo': 'atomic-robo',
|
||||
}
|
||||
|
||||
class NP8BitTheater(_NuklearPower):
|
||||
name = 'NuklearPower/8BitTheater'
|
||||
shortName = '8-bit-theater'
|
||||
|
||||
|
||||
|
||||
class NPWarbot(_NuklearPower):
|
||||
name = 'NuklearPower/Warbot'
|
||||
shortName = 'warbot'
|
||||
|
||||
|
||||
|
||||
class NPHIKYM(_NuklearPower):
|
||||
name = 'NuklearPower/HowIKilledYourMaster'
|
||||
shortName = 'hikym'
|
||||
|
||||
|
||||
|
||||
class NPAtomicRobo(_NuklearPower):
|
||||
name = 'NuklearPower/AtomicRobo'
|
||||
shortName = 'atomic-robo'
|
||||
|
||||
for name, shortname in npstrips.items():
|
||||
nuklearpower(name, shortname)
|
||||
|
||||
|
||||
class NekoTheKitty(_PHPScraper):
|
||||
basePath = 'http://www.nekothekitty.net/cusp/'
|
||||
latestUrl = 'latest.php'
|
||||
prevSearch = compile(r"<a href=\"(http://www\.nekothekitty\.net/cusp/daily\.php\?date=\d+)\"><img[^>]+alt='Previous Comic'")
|
||||
latestUrl = basePath
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www.nekothekitty.net/comics/[^"]+)') +
|
||||
tagre("img", "src", r'http://www\.nekothekitty\.net/files/smallprev.png'))
|
||||
|
||||
|
||||
|
||||
class NichtLustig(_BasicScraper):
|
||||
stripUrl = 'http://www.nichtlustig.de/toondb/%s.html'
|
||||
imageSearch = compile(r'<img src="([^"]+)" id="cartoon"', IGNORECASE)
|
||||
prevSearch = compile(r'<a href="(\d+\.html)"[^<>]*><img[^<>]*id="pfeil_links', IGNORECASE)
|
||||
imageSearch = compile('background-image:url\((http://static\.nichtlustig\.de/comics/full/\d+\.jpg)')
|
||||
prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
|
||||
help = 'Index format: yymmdd'
|
||||
starter = indirectStarter('http://www.nichtlustig.de/main.html',
|
||||
compile(r'<a href="([^"]*toondb/\d+\.html)"', IGNORECASE))
|
||||
|
||||
|
||||
class NinthElsewhere(_BasicScraper):
|
||||
latestUrl = 'http://www.9thelsewhere.com/icenter.html'
|
||||
stripUrl = 'http://www.9thelsewhere.com/%s/9e%s_%s.html'
|
||||
imageSearch = compile(r'<img src="([^"]*9e\d+_\d+\.jpg)"')
|
||||
prevSearch = compile(r'<a href="([^"]+\.html)">\s*PREV')
|
||||
help = 'Index format: year-chapter-page'
|
||||
|
||||
def setStrip(self, index):
|
||||
self.currentUrl = self.stripUrl % tuple(map(int, index.split('-')))
|
||||
compile(tagre("a", "href", r'([^"]*toondb/\d+\.html)')))
|
||||
|
||||
|
||||
class Nodwick(_BasicScraper):
|
||||
stripUrl = None
|
||||
imageSearch = compile(r'<img src="(http://nodwick.humor.gamespy.com/gamespyarchive/strips/[^"]*)"', IGNORECASE)
|
||||
prevSearch = compile(r'<a href="(index.php\?date=[0-9-]*)"><img src="back.jpg"', IGNORECASE)
|
||||
starter = indirectStarter('http://nodwick.humor.gamespy.com/gamespyarchive/index.php', prevSearch)
|
||||
help = 'Index format: None'
|
||||
|
||||
latestUrl = 'http://comic.nodwick.com/'
|
||||
stripUrl = latestUrl + "?p=%s"
|
||||
imageSearch = compile(tagre("img", "src", r'(http://comic\.nodwick\.com/nodwickstrips/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://comic\.nodwick\.com/\?p=\d+)', after="prev"))
|
||||
help = 'Index format: stripnumber'
|
||||
|
||||
|
||||
class NekkoAndJoruba(_BasicScraper):
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile
|
||||
|
||||
from ..util import tagre
|
||||
|
|
|
@ -1,57 +1,47 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
from re import compile, IGNORECASE
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile
|
||||
from ..scraper import _BasicScraper
|
||||
from ..helpers import indirectStarter
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
class OctopusPie(_BasicScraper):
|
||||
starter = indirectStarter('http://www.octopuspie.com/2007-05-14/001-pea-wiggle/',
|
||||
compile(r'<a href="(http://www.octopuspie.com/.+?)"><b>latest comic</b>', IGNORECASE))
|
||||
starter = indirectStarter('http://www.octopuspie.com/',
|
||||
compile(tagre("a", "href", r'(http://www\.octopuspie\.com/[^"]+)') +
|
||||
tagre("img", "src", r'http://www\.octopuspie\.com/junk/latest\.png')))
|
||||
stripUrl = 'http://www.octopuspie.com/%s'
|
||||
imageSearch = compile(r'<img src="(http://www.octopuspie.com/strippy/.+?)"')
|
||||
prevSearch = compile(r'<link rel=\'prev\'[^>]+?href=\'(http://www.octopuspie.com/.+?)\'')
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.octopuspie\.com/strippy/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.octopuspie\.com/[^"]+)', after="prev"))
|
||||
help = 'Index format: yyyy-mm-dd/nnn-strip-name'
|
||||
|
||||
|
||||
|
||||
class OddFish(_BasicScraper):
|
||||
latestUrl = 'http://www.odd-fish.net/'
|
||||
stripUrl = latestUrl + 'viewing.php?&comic_id=%s'
|
||||
imageSearch = compile(r'<img src="(images/\d{1,4}.\w{3,4})" ')
|
||||
prevSearch = compile(r'<a href="(.+?)"><img src="http://www.odd-fishing.net/i/older.gif" ')
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
|
||||
class OhMyGods(_BasicScraper):
|
||||
latestUrl = 'http://ohmygods.co.uk/'
|
||||
stripUrl = latestUrl + 'strips/%s'
|
||||
imageSearch = compile(r'<p class="omgs-strip"><img src="(/system/files/.+?)"')
|
||||
prevSearch = compile(r'<li class="custom_pager_prev"><a href="(/strips/.+?)"')
|
||||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
stripUrl = latestUrl + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.odd-fish\.net/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.odd-fish\.net/[^"]+)', after="navi-prev"))
|
||||
help = 'Index format: stripname'
|
||||
|
||||
|
||||
class OnTheEdge(_BasicScraper):
|
||||
latestUrl = 'http://www.ontheedgecomics.com/'
|
||||
latestUrl = 'http://ontheedgecomics.com/'
|
||||
stripUrl = 'http://ontheedgecomics.com/comic/ote%s'
|
||||
imageSearch = compile(r'<img src="(http://ontheedgecomics.com/comics/.+?)"')
|
||||
prevSearch = compile(r'<a href="([^"]+)" rel="prev">')
|
||||
help = 'Index format: nnn (unpadded)'
|
||||
|
||||
|
||||
|
||||
class OneQuestion(_BasicScraper):
|
||||
latestUrl = 'http://onequestioncomic.com/'
|
||||
stripUrl = latestUrl + 'comics/%s/'
|
||||
imageSearch = compile(r'(istrip_files.+?)"')
|
||||
prevSearch = compile(r'First.+?"(comic.php.+?)".+?previous.png')
|
||||
stripUrl = latestUrl + 'comic.php?strip_id=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(istrip_files/strips/\d+\.jpg)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(comic\.php\?strip_id=\d+)') + tagre("img", "src", r'img/arrow_prev\.jpg'))
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
|
||||
class OurHomePlanet(_BasicScraper):
|
||||
latestUrl = 'http://gdk.gd-kun.net/'
|
||||
stripUrl = latestUrl + '%s.html'
|
||||
|
@ -61,22 +51,19 @@ class OurHomePlanet(_BasicScraper):
|
|||
|
||||
|
||||
class OkCancel(_BasicScraper):
|
||||
stripUrl = 'http://www.ok-cancel.com/comic/%s.html'
|
||||
imageSearch = compile(r'src="(http://www.ok-cancel.com/strips/okcancel\d{8}.gif)"', IGNORECASE)
|
||||
prevSearch = compile(r'<div class="previous"><a href="(http://www.ok-cancel.com/comic/\d{1,4}.html)">', IGNORECASE)
|
||||
starter = indirectStarter('http://www.ok-cancel.com/', prevSearch)
|
||||
stripUrl = 'http://okcancel.com/comic/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://okcancel\.com/strips/okcancel\d{8}\.gif)'))
|
||||
prevSearch = compile(tagre("div", "class", "previous") + tagre("a", "href", r'(http://okcancel\.com/comic/\d{1,4}\.html)'))
|
||||
starter = indirectStarter('http://okcancel.com/', prevSearch)
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
|
||||
class Oglaf(_BasicScraper):
|
||||
starter = indirectStarter('http://oglaf.com/',
|
||||
compile(r'<a href="(.+?)"><img src="over18.gif"', IGNORECASE))
|
||||
stripUrl = 'http://oglaf.com/%s.html'
|
||||
imageSearch = compile(r'/><img src="(.+?)"[^>]+?width="760" height="596"', IGNORECASE)
|
||||
prevSearch = compile(r'<a href="(.+?)"[^>]+?><img src="prev.gif"', IGNORECASE)
|
||||
help = 'Index format: nn'
|
||||
|
||||
latestUrl = 'http://oglaf.com/'
|
||||
stripUrl = latestUrl + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(/media/comic/[^"]+)', before="strip"))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("div", "id", "pvs"))
|
||||
help = 'Index format: stripname/nn'
|
||||
|
||||
|
||||
class OverCompensating(_BasicScraper):
|
||||
|
|
|
@ -1,17 +1,19 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
from re import compile, IGNORECASE
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, IGNORECASE
|
||||
from ..scraper import _BasicScraper
|
||||
from ..helpers import bounceStarter, queryNamer
|
||||
from ..helpers import bounceStarter, queryNamer, indirectStarter
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
class PartiallyClips(_BasicScraper):
|
||||
latestUrl = 'http://www.partiallyclips.com/'
|
||||
stripUrl = latestUrl + 'index.php?id=%s'
|
||||
imageSearch = compile(r'"(http://www.partiallyclips.com/storage/.+?)"')
|
||||
prevSearch = compile(r'"(index.php\?id=.+?)".+?prev')
|
||||
help = 'Index format: nnnn'
|
||||
latestUrl = 'http://partiallyclips.com/'
|
||||
stripUrl = latestUrl + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://partiallyclips\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://partiallyclips\.com/[^"]+)', before="prev"))
|
||||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
|
||||
|
||||
|
||||
|
@ -33,20 +35,23 @@ class PebbleVersion(_BasicScraper):
|
|||
|
||||
|
||||
class PennyAndAggie(_BasicScraper):
|
||||
latestUrl = 'http://www.pennyandaggie.com/index.php'
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
imageSearch = compile(r'src=".+?(/comics/.+?)"')
|
||||
prevSearch = compile(r"</a><a href='(index.php\?p=.+?)'.+?prev")
|
||||
baseUrl = 'http://www.pennyandaggie.com/'
|
||||
stripUrl = baseUrl + 'index.php?p=%s'
|
||||
imageSearch = compile(tagre("a", "href", r'(http://www\.pennyandaggie\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r"(index\.php\?p=\d+)", quote="'") +
|
||||
tagre("img", "src", r'http://pennyandaggie\.com/images/previous_day\.gif', quote=""))
|
||||
starter = indirectStarter(baseUrl, prevSearch)
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
|
||||
class PennyArcade(_BasicScraper):
|
||||
starter = bounceStarter('http://www.penny-arcade.com/comic/',
|
||||
compile(r'<a href="(/comic/[^"]+)">Next</a>'))
|
||||
stripUrl = 'http://www.penny-arcade.com/comic/%s/'
|
||||
imageSearch = compile(r'(?<!<!--)<img src="(http://art\.penny-arcade\.com/photos/[^"]+)"')
|
||||
prevSearch = compile(r'<a href="(/comic/[^"]+)">Back</a>')
|
||||
baseUrl = 'http://penny-arcade.com/comic/'
|
||||
starter = bounceStarter(baseUrl,
|
||||
compile(tagre("a", "href", r'(http://penny-arcade\.com/comic/[^"]+)', before="bntNext"))
|
||||
)
|
||||
stripUrl = baseUrl + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://art\.penny-arcade\.com/photos/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://penny-arcade\.com/comic/[^"]+)', before="bntPrev"))
|
||||
help = 'Index format: yyyy/mm/dd'
|
||||
|
||||
@classmethod
|
||||
|
@ -58,25 +63,17 @@ class PennyArcade(_BasicScraper):
|
|||
|
||||
class PeppermintSaga(_BasicScraper):
|
||||
latestUrl = 'http://www.pepsaga.com/'
|
||||
stripUrl = latestUrl + 'comics/%s/'
|
||||
imageSearch = compile(r'src=.+?(http.+?/comics/.+?)"')
|
||||
prevSearch = compile(r'First</a><a href="(http://www.pepsaga.com/comics/.+?/)"')
|
||||
help = 'Index format: non'
|
||||
|
||||
|
||||
class PerkiGoth(_BasicScraper):
|
||||
latestUrl = 'http://mutt.purrsia.com/main.php'
|
||||
stripUrl = latestUrl + '?date=%s'
|
||||
imageSearch = compile(r'<img.+?src="(comics/.+?)"')
|
||||
prevSearch = compile(r'<a.+?href="(\?date=\d+/\d+/\d+)">Prev</a>')
|
||||
help = 'Index format: mm/dd/yyyy'
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.pepsaga\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.pepsaga\.com/\?p=\d+)', after="prev"))
|
||||
help = 'Index format: number'
|
||||
|
||||
|
||||
class Pixel(_BasicScraper):
|
||||
latestUrl = 'http://www.chrisdlugosz.net/pixel/'
|
||||
stripUrl = latestUrl + '%s.shtml'
|
||||
imageSearch = compile(r'<IMG SRC="(\d+\.png)" ALT=""><BR><BR>')
|
||||
prevSearch = compile(r'<A HREF="(\d+\.shtml)"><IMG SRC="_prev.png" BORDER=0 ALT=""></A>')
|
||||
latestUrl = 'http://pixelcomic.net/'
|
||||
stripUrl = latestUrl + '%s.php'
|
||||
imageSearch = compile(tagre("img", "src", r'(\d+\.png)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://pixelcomic\.net/\d+\.php)', before="prev"))
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
@ -91,19 +88,22 @@ class PiledHigherAndDeeper(_BasicScraper):
|
|||
|
||||
|
||||
class Precocious(_BasicScraper):
|
||||
latestUrl = 'http://www.precociouscomic.com/'
|
||||
stripUrl = latestUrl + 'comic.php?page=%s'
|
||||
imageSearch = compile(r'(archive/strips/.+?)"')
|
||||
prevSearch = compile(r'First.+?(comic.php\?page=.+?)">Previous<')
|
||||
help = 'Index format: n (unpadded)'
|
||||
baseUrl = 'http://www.precociouscomic.com/'
|
||||
starter = indirectStarter(baseUrl,
|
||||
compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/next_arrow\.png"))
|
||||
)
|
||||
stripUrl = baseUrl + 'archive/comic/%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comics/\d+\.jpg)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/archive/comic/[^"]+)') + tagre("img", "src", r"/templates/precocious_main/images/back_arrow\.png"))
|
||||
help = 'Index format: yyyy/mm/dd'
|
||||
|
||||
|
||||
class PvPonline(_BasicScraper):
|
||||
latestUrl = 'http://www.pvponline.com/'
|
||||
stripUrl = None
|
||||
imageSearch = compile(r'<img src="(http://www.pvponline.com/comics/pvp\d{8}\..+?)"', IGNORECASE)
|
||||
prevSearch = compile(r'<a href="(http://www.pvponline.com/[^"]+)"[^>]*>‹ Previous', IGNORECASE)
|
||||
help = 'Index format: yyyymmdd'
|
||||
latestUrl = 'http://pvponline.com/comic'
|
||||
stripUrl = latestUrl + '%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://newcdn\.pvponline\.com/img/comic/pvp\d+\.jpg)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://pvponline\.com/comic/[^"]+)', after="Previous"))
|
||||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
|
||||
|
||||
|
||||
|
@ -113,7 +113,7 @@ def pensAndTales(name, baseUrl):
|
|||
dict(
|
||||
name='PensAndTales/' + name,
|
||||
latestUrl=baseUrl,
|
||||
stripUrl=baseUrl + '?date=',
|
||||
stripUrl=baseUrl + '?date=%s',
|
||||
imageSearch=compile(r'<img[^>]+?src="([^"]*?comics/.+?)"', IGNORECASE),
|
||||
prevSearch=compile(r'<a href="([^"]*?\?date=\d+)">(:?<img[^>]+?alt=")?Previous Comic', IGNORECASE),
|
||||
help='Index format: yyyymmdd')
|
||||
|
@ -126,30 +126,26 @@ def pensAndTales(name, baseUrl):
|
|||
# strangekith = pensAndTales('Strangekith', 'http://strangekith.pensandtales.com/')
|
||||
# XXX: comic broken
|
||||
# fireflycross = pensAndTales('FireflyCross', 'http://fireflycross.pensandtales.com/')
|
||||
thosedestined = pensAndTales('ThoseDestined', 'http://thosedestined.pensandtales.com/')
|
||||
evilish = pensAndTales('Evilish', 'http://evilish.pensandtales.com/')
|
||||
redallover = pensAndTales('RedAllOver', 'http://redallover.pensandtales.com/')
|
||||
stickyevil = pensAndTales('StickyEvil', 'http://stickyevil.pensandtales.com/')
|
||||
# XXX: moved / layout changed
|
||||
#ynt = pensAndTales('YamiNoTainai', 'http://ynt.pensandtales.com/')
|
||||
earthbound = pensAndTales('Earthbound', 'http://earthbound.pensandtales.com/')
|
||||
|
||||
|
||||
|
||||
class ProperBarn(_BasicScraper):
|
||||
latestUrl = 'http://www.nitrocosm.com/go/gag/'
|
||||
stripUrl = latestUrl + '%s/'
|
||||
imageSearch = compile(r'<img class="gallery_display" src="([^"]+)"')
|
||||
prevSearch = compile(r'<a href="([^"]+)"[^>]*><button type="submit" class="nav_btn_previous">')
|
||||
imageSearch = compile(tagre("img", "src", r'(http://content\.nitrocosm\.com/gag/\d+.png)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.nitrocosm\.com/go/gag/\d+/)', after="nav_btn_previous"))
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
||||
class PunksAndNerds(_BasicScraper):
|
||||
latestUrl = 'http://www.punksandnerds.com/'
|
||||
stripUrl = latestUrl + '?id=%s/'
|
||||
imageSearch = compile(r'<img src="(http://www.punksandnerds.com/img/comic/.+?)"')
|
||||
prevSearch = compile(r'<td><a href="(.+?)"[^>]+?><img src="backcomic.gif"')
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.punksandnerds\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.punksandnerds\.com/\?p=\d+)', after="navi-prev"))
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
from re import compile
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile
|
||||
from ..scraper import _BasicScraper
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
class QuestionableContent(_BasicScraper):
|
||||
|
@ -13,10 +15,9 @@ class QuestionableContent(_BasicScraper):
|
|||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
|
||||
class Qwantz(_BasicScraper):
|
||||
latestUrl = 'http://www.qwantz.com/index.php'
|
||||
stripUrl = latestUrl + '?comic=%s'
|
||||
imageSearch = compile(r'<img src="(http://www.qwantz.com/comics/.+?)" class="comic"')
|
||||
prevSearch = compile(r'"><a href="(.+?)">← previous</a>')
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.qwantz\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.qwantz\.com/index\.php\?comic=\d+)', before="prev"))
|
||||
help = 'Index format: n'
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
from re import compile
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile
|
||||
from ..scraper import _BasicScraper
|
||||
from ..helpers import bounceStarter
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
class RadioactivePanda(_BasicScraper):
|
||||
|
@ -14,32 +16,31 @@ class RadioactivePanda(_BasicScraper):
|
|||
help = 'Index format: n (no padding)'
|
||||
|
||||
|
||||
# XXX add other comics at http://petitesymphony.com/comics/
|
||||
class Rascals(_BasicScraper):
|
||||
latestUrl = 'http://petitesymphony.com/rascals'
|
||||
stripUrl = 'http://petitesymphony.com/comic/rascals/%s'
|
||||
imageSearch = compile(r'(http://petitesymphony.com/comics/.+?)"')
|
||||
prevSearch = compile(r"KR-nav-previous.><a href=.(http.+?).>")
|
||||
help = 'Index format: non'
|
||||
latestUrl = 'http://rascals.petitesymphony.com/'
|
||||
stripUrl = latestUrl + '/comic/rascals-pg-%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://rascals\.petitesymphony\.com/files/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://rascals\.petitesymphony\.com/comic/[^"]+)', after="Previous"))
|
||||
help = 'Index format: num'
|
||||
|
||||
|
||||
class RealLife(_BasicScraper):
|
||||
latestUrl = 'http://www.reallifecomics.com/'
|
||||
stripUrl = latestUrl + 'achive/%s.html'
|
||||
imageSearch = compile(r'"(/comics/.+?)"')
|
||||
prevSearch = compile(r'"(/archive/.+?)".+?nav_previous')
|
||||
stripUrl = latestUrl + 'archive/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/archive/\d+.html)') + tagre("img", "src", r'/images/nav_prev\.png'))
|
||||
help = 'Index format: yymmdd)'
|
||||
|
||||
|
||||
|
||||
class RedString(_BasicScraper):
|
||||
latestUrl = 'http://www.redstring.strawberrycomics.com/'
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
imageSearch = compile(r'<img src="(http://www.redstring.strawberrycomics.com/comics/.+?)"')
|
||||
prevSearch = compile(r'<a href="(.+?)">Previous Comic</a>')
|
||||
stripUrl = latestUrl + 'index.php?id=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'("comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/index\.php\?id=\d+)', after="prev"))
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
||||
class Roza(_BasicScraper):
|
||||
latestUrl = 'http://www.junglestudio.com/roza/index.php'
|
||||
stripUrl = latestUrl + '?date=%s'
|
||||
|
@ -58,10 +59,3 @@ class RedMeat(_BasicScraper):
|
|||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
return imageUrl.split('/')[-2]
|
||||
|
||||
class RunningWild(_BasicScraper):
|
||||
latestUrl = 'http://runningwild.katbox.net/'
|
||||
stripUrl = latestUrl + 'index.php?strip_id=%s'
|
||||
imageSearch = compile(r'="(.+?strips/.+?)"')
|
||||
prevSearch = compile(r'(index.php\?strip_id=.+?)".+?navigation_back')
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
|
|
@ -1,22 +1,22 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, MULTILINE, IGNORECASE, sub
|
||||
from os.path import splitext
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..helpers import bounceStarter, indirectStarter
|
||||
from ..helpers import indirectStarter
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
class SailorsunOrg(_BasicScraper):
|
||||
latestUrl = 'http://www.sailorsun.org/'
|
||||
stripUrl = latestUrl + 'browse.php?comicID=%s'
|
||||
imageSearch = compile(r'(comics/.+?)"')
|
||||
prevSearch = compile(r'/(browse.php.+?)".+?/prev.gif')
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://sailorsun\.org/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://sailorsun\.org/\?p=\d+)', after="prev"))
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
|
||||
class SamAndFuzzy(_BasicScraper):
|
||||
latestUrl = 'http://www.samandfuzzy.com/'
|
||||
stripUrl = 'http://samandfuzzy.com/%s'
|
||||
|
@ -25,53 +25,51 @@ class SamAndFuzzy(_BasicScraper):
|
|||
help = 'Index format: nnnn'
|
||||
|
||||
|
||||
|
||||
class SarahZero(_BasicScraper):
|
||||
latestUrl = 'http://www.sarahzero.com/'
|
||||
stripUrl = latestUrl + 'sz_%s.html'
|
||||
imageSearch = compile(r'<img src="(z_(?:(?:spreads)|(?:temp)).+?)" alt=""')
|
||||
prevSearch = compile(r'onmouseout="changeImages\(\'sz_05_nav\',\'z_site/sz_05_nav.gif\'\);return true" href="(sz_.+?)">')
|
||||
imageSearch = compile(tagre("img", "src", r'(z_spreads/sz_[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(sz_\d+\.html)') + tagre("img", "src", r'z_site/sz_05_nav\.gif'))
|
||||
help = 'Index format: nnnn'
|
||||
|
||||
|
||||
|
||||
class ScaryGoRound(_BasicScraper):
|
||||
latestUrl = 'http://www.scarygoround.com/'
|
||||
stripUrl = latestUrl + '?date=%s'
|
||||
imageSearch = compile(r'<img src="(strips/\d{8}\..{3})"')
|
||||
prevSearch = compile(r'f><a href="(.+?)"><img src="site-images/previous.png"')
|
||||
imageSearch = compile(tagre("img", "src", r'(strips/\d+\.png)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(\?date=\d+)') + "Previous")
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
class SchlockMercenary(_BasicScraper):
|
||||
latestUrl = 'http://www.schlockmercenary.com/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
|
||||
help = 'Index format: yyyymmdd'
|
||||
stripUrl = latestUrl + '%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://static\.schlockmercenary\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/d+)', after="nav-previous"))
|
||||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
|
||||
class SchoolBites(_BasicScraper):
|
||||
latestUrl = 'http://www.schoolbites.net/'
|
||||
latestUrl = 'http://schoolbites.net/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch = compile(r'(/comics/.+?)"')
|
||||
prevSearch = compile(r'first_day.+?(/d/.+?.html).+?/previous_day.gif')
|
||||
imageSearch = compile(tagre("img", "src", r'(http://cdn\.schoolbites\.net/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://schoolbites\.net/d/\d+\.html)', after="prev"))
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class Sheldon(_BasicScraper):
|
||||
latestUrl = 'http://www.sheldoncomics.com/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
|
||||
help = 'Index format: yyyymmdd'
|
||||
stripUrl = latestUrl + 'archive/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(/strips/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'/archive/\d+\.html)', after="sidenav-prev"))
|
||||
help = 'Index format: yymmdd'
|
||||
|
||||
|
||||
class Shortpacked(_BasicScraper):
|
||||
latestUrl = 'http://www.shortpacked.com/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.shortpacked\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.shortpacked\.com/\d+/comic/[^"]+)', after="prev"))
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
|
@ -85,12 +83,11 @@ class SinFest(_BasicScraper):
|
|||
|
||||
|
||||
class SlightlyDamned(_BasicScraper):
|
||||
latestUrl = 'http://raizap.com/sdamned/index.php'
|
||||
stripUrl = 'http://raizap.com/sdamned/pages.php\?comicID=%s'
|
||||
imageSearch = compile(r'"(.+?comics2/.+?)"')
|
||||
prevSearch = compile(r'</a>.+?(pages.php\?comicID=.+?)".+?back1')
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
latestUrl = 'http://www.sdamned.com/'
|
||||
stripUrl = latestUrl + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.sdamned\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.sdamned\.com/[^"]+)', after="prev"))
|
||||
help = 'Index format: yyyy/mm/number'
|
||||
|
||||
|
||||
class SluggyFreelance(_BasicScraper):
|
||||
|
@ -103,81 +100,51 @@ class SluggyFreelance(_BasicScraper):
|
|||
|
||||
|
||||
class SodiumEyes(_BasicScraper):
|
||||
stripUrl = 'http://sodiumeyes.com/%s'
|
||||
imageSearch = compile(r'(/comic/.+?)"')
|
||||
prevSearch = compile(r'"http://sodiumeyes.com/(.+?/)"><.+?comic-prev')
|
||||
help = 'Index format: nnn'
|
||||
starter = indirectStarter('http://sodiumeyes.com/',
|
||||
compile(r'<a href="http://sodiumeyes.com/(\d\d\d\d.+?/)">'))
|
||||
|
||||
latestUrl = 'http://sodiumeyes.com/'
|
||||
stripUrl = latestUrl + '%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://sodiumeyes\.com/comic/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://sodiumeyes\.com/[^"]+)', after="prev"))
|
||||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
|
||||
|
||||
class SpareParts(_BasicScraper):
|
||||
latestUrl = 'http://www.sparepartscomics.com/'
|
||||
stripUrl = latestUrl + 'comics/\\?date=s%'
|
||||
imageSearch = compile(r'(/comics/2.+?)[" ]')
|
||||
prevSearch = compile(r'(/comics/.+?|index.php\?.+?)".+?Prev')
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class StarslipCrisis(_BasicScraper):
|
||||
latestUrl = 'http://www.starslipcrisis.com/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch = compile(tagre("img", "src", r'(/comic[s|/][^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'[^"]*(/d/\d+\.s?html)')+r"[^>]+/images/(?:nav_02|previous_day)\.gif")
|
||||
baseUrl = 'http://www.sparepartscomics.com/'
|
||||
latestUrl = baseUrl + 'comics/?date=20080328'
|
||||
stripUrl = baseUrl + 'comics/?date=s%'
|
||||
imageSearch = compile(tagre("img", "src", r'http://www\.sparepartscomics\.com/comics/[^"]+'))
|
||||
prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)') + "Previous Comic")
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
|
||||
class Stubble(_BasicScraper):
|
||||
latestUrl = 'http://www.stubblecomics.com/d/20051230.html'
|
||||
stripUrl = 'http://www.stubblecomics.com/d/%s.html'
|
||||
imageSearch = compile(r'"(/comics/.*?)"')
|
||||
prevSearch = compile(r'"(.*?)".*?backarrow')
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
latestUrl = 'http://stubblecomics.com/'
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://stubblecomics\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://stubblecomics\.com/\?p=\d+)', after="navi-prev"))
|
||||
help = 'Index format: number'
|
||||
|
||||
|
||||
class StrawberryDeathCake(_BasicScraper):
|
||||
latestUrl = 'http://rainchildstudios.com/strawberry/'
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
imageSearch = compile(r'/(comics/.+?)"')
|
||||
prevSearch = compile(r'strawberry/(\?p=.+?)".+?span class="prev"')
|
||||
help = 'Index format: n (good luck)'
|
||||
|
||||
latestUrl = 'http://strawberrydeathcake.com/'
|
||||
stripUrl = latestUrl + 'archive/%s/'
|
||||
imageSearch = compile(tagre("img", "src", r'http://strawberrydeathcake\.com/wp-content/webcomic/[^"]+'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://strawberrydeathcake\.com/archive/[^"]+)', after="previous"))
|
||||
help = 'Index format: stripname'
|
||||
|
||||
|
||||
class SuburbanTribe(_BasicScraper):
|
||||
latestUrl = 'http://www.pixelwhip.com/'
|
||||
stripUrl = latestUrl + '?p%s'
|
||||
imageSearch = compile(r'<img src="(http://www.pixelwhip.com/comics/.+?)"')
|
||||
prevSearch = compile(r'<div class="nav-previous"><a href="([^"]+)" rel="prev">')
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.pixelwhip\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(http://www\.pixelwhip\.com/\?p=\d+)', after="prev"))
|
||||
help = 'Index format: nnnn'
|
||||
|
||||
|
||||
|
||||
class SuccubusJustice(_BasicScraper):
|
||||
latestUrl = 'http://www.succubus-justice.com/Com%20main%20frame.htm'
|
||||
stripUrl = 'http://www.succubus-justice.com/%s%%20frame.htm'
|
||||
imageSearch = compile(r'<p align="center"><img src="(/\d+.\w{3,4})"')
|
||||
prevSearch = compile(r'<a href="(/[\w%]+\.htm|[\w%]+\.htm)"[^>]+?><img src="124.gif"')
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
||||
class Supafine(_BasicScraper):
|
||||
latestUrl = 'http://www.supafine.com/comics/classic.php'
|
||||
stripUrl = latestUrl + '?comicID=%s'
|
||||
imageSearch = compile(r'<img src="(http://www.supafine.com/comics/.+?)"')
|
||||
prevSearch = compile(r'<a href="(http://www.supafine.com/comics/classic.php\?.+?)"><img src="http://supafine.com/comikaze/images/previous.gif" ')
|
||||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
|
||||
class SomethingPositive(_BasicScraper):
|
||||
latestUrl = 'http://www.somethingpositive.net/'
|
||||
stripUrl = latestUrl + 'sp%s.shtml'
|
||||
imageSearch = compile(r'<img src="(/arch/sp\d+.\w{3,4}|/sp\d+.\w{3,4})"')
|
||||
prevSearch = compile(r'<a \n?href="(sp\d{8}\.shtml)">(<font size=1\nface=".+?"\nSTYLE=".+?">Previous|<img src="images2/previous|<img src="images/previous.gif")', MULTILINE | IGNORECASE)
|
||||
imageSearch = compile(tagre("img", "src", r'(sp\d+\.png)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(sp\d+\.shtml)') + "Previous")
|
||||
help = 'Index format: mmddyyyy'
|
||||
|
||||
@classmethod
|
||||
|
@ -202,48 +169,6 @@ class SexyLosers(_BasicScraper):
|
|||
|
||||
|
||||
|
||||
def smackJeeves(names):
|
||||
# XXX mature content can be viewed directly with:
|
||||
# http://www.smackjeeves.com/mature.php?ref=<percent-encoded-url>
|
||||
class _SJScraper(_BasicScraper):
|
||||
stripUrl = property(lambda self: self.baseUrl + self.shortName)
|
||||
imageSearch = compile(r'<img src="(http://www\.smackjeeves\.com/images/uploaded/comics/[^"]*)"', IGNORECASE)
|
||||
prevSearch = compile(r'<a href="(/comics/\d+/[^"]*)"><img[^>]*alt="< Previous"', IGNORECASE)
|
||||
help = 'Index format: nnnn (some increasing number)'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
return pageUrl.split('/')[-2]
|
||||
|
||||
|
||||
def makeScraper(shortName):
|
||||
baseUrl = 'http://%s.smackjeeves.com/comics/' % shortName
|
||||
return type('SmackJeeves_%s' % shortName,
|
||||
(_SJScraper,),
|
||||
dict(
|
||||
name='SmackJeeves/' + shortName,
|
||||
baseUrl=baseUrl,
|
||||
starter=bounceStarter(baseUrl, compile(r'<a href="(/comics/\d+/[^"]*)"><img[^>]*alt="Next >"', IGNORECASE)))
|
||||
)
|
||||
return dict((name, makeScraper(name)) for name in names)
|
||||
|
||||
|
||||
globals().update(smackJeeves([
|
||||
'20galaxies',
|
||||
'axe13',
|
||||
'beartholomew',
|
||||
'bliss',
|
||||
'durian',
|
||||
'heard',
|
||||
'mpmcomic',
|
||||
'nlmo-project',
|
||||
'paranoidloyd',
|
||||
'thatdreamagain',
|
||||
'wowcomics',
|
||||
]))
|
||||
|
||||
|
||||
|
||||
class StarCrossdDestiny(_BasicScraper):
|
||||
latestUrl = 'http://www.starcrossd.net/comic.html'
|
||||
stripUrl = 'http://www.starcrossd.net/archives/%s.html'
|
||||
|
@ -263,19 +188,6 @@ class StarCrossdDestiny(_BasicScraper):
|
|||
return directory + '-' + filename
|
||||
|
||||
|
||||
|
||||
class SGVY(_BasicScraper):
|
||||
stripUrl = 'http://www.sgvy.com/Edda%s/Issue%s/Page%s.html'
|
||||
imageSearch = compile(r'"comic" src="((?:\.\./)+images/sgvy/sgvy-[-\w\d]+\.\w+)"')
|
||||
prevSearch = compile(r'<a href="((?:\.\./)+(?:Edda\d+|Holiday)/(?:Issue\d+/Page\d+|Cover)\.html)">Prev</a>')
|
||||
help = 'Index format: edda-issue-page'
|
||||
|
||||
starter = indirectStarter('http://www.sgvy.com/', compile(r'<a href="(archives/(?:Edda\d+|Holiday)/(?:Issue\d+/Page\d+|Cover)\.html)">'))
|
||||
|
||||
def setStrip(self, index):
|
||||
self.currentUrl = self.stripUrl % tuple(map(int, index.split('-')))
|
||||
|
||||
|
||||
class Spamusement(_BasicScraper):
|
||||
stripUrl = 'http://spamusement.com/index.php/comics/view/%s'
|
||||
imageSearch = compile(r'<img src="(http://spamusement.com/gfx/\d+\..+?)"', IGNORECASE)
|
||||
|
@ -285,63 +197,14 @@ class Spamusement(_BasicScraper):
|
|||
starter = indirectStarter('http://spamusement.com/', prevSearch)
|
||||
|
||||
|
||||
|
||||
def snafuComics():
|
||||
class _SnafuComics(_BasicScraper):
|
||||
imageSearch = compile(r'<img src=http://\w+\.snafu-comics\.com/(comics/\d{6}_\w*\.\w{3,4})')
|
||||
prevSearch = compile(r'<a href="(\?comic_id=\d+)">Previous</a>')
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
@property
|
||||
def stripUrl(self):
|
||||
return self.latestUrl + 'index.php?strip_id=%s'
|
||||
|
||||
comics = {
|
||||
'Grim': 'grim',
|
||||
'KOF': 'kof',
|
||||
'PowerPuffGirls': 'ppg',
|
||||
'Snafu': 'www',
|
||||
'Tin': 'tin',
|
||||
'TW': 'tw',
|
||||
'Sugar': 'sugar',
|
||||
'SF': 'sf',
|
||||
'Titan': 'titan',
|
||||
'EA': 'ea',
|
||||
'Zim': 'zim',
|
||||
'Soul': 'soul',
|
||||
'FT': 'ft',
|
||||
'Bunnywith': 'bunnywith',
|
||||
'Braindead': 'braindead',
|
||||
}
|
||||
|
||||
url = 'http://%s.snafu-comics.com/'
|
||||
return dict((name, type('SnafuComics_%s' % name,
|
||||
(_SnafuComics,),
|
||||
dict(name='SnafuComics/' + name,
|
||||
latestUrl=url % host)))
|
||||
for name, host in comics.items())
|
||||
|
||||
globals().update(snafuComics())
|
||||
|
||||
|
||||
|
||||
class SosiaalisestiRajoittuneet(_BasicScraper):
|
||||
latestUrl = 'http://sosiaalisestirajoittuneet.fi/index_nocomment.php'
|
||||
stripUrl = latestUrl + '?date=%s'
|
||||
imageSearch = compile(r'<img src="(strips/web/\d+.jpg)" alt=".*?" />')
|
||||
prevSearch = compile(r'<a href="(index_nocomment\.php\?date=\d+)"><img\s+src="images/active_edellinen\.gif"', MULTILINE)
|
||||
|
||||
|
||||
|
||||
class StrangeCandy(_BasicScraper):
|
||||
latestUrl = 'http://www.strangecandy.net/'
|
||||
stripUrl = latestUrl + 'd/%s.html'
|
||||
imageSearch = compile(r'src="(http://www.strangecandy.net/comics/\d{8}.\w{1,4})"')
|
||||
prevSearch = compile(r'<a href="(http://www.strangecandy.net/d/\d{8}.html)"><img[^>]+?src="http://www.strangecandy.net/images/previous_day.gif"')
|
||||
imageSearch = compile(tagre("img", "src", r'(/comics/\d+\.jpg)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/d/\d+\.html)') + tagre("img", "alt", "Previous comic"))
|
||||
help = 'Index format: yyyyddmm'
|
||||
|
||||
|
||||
|
||||
class SMBC(_BasicScraper):
|
||||
latestUrl = 'http://www.smbc-comics.com/'
|
||||
stripUrl = latestUrl + 'index.php?db=comics&id=%s'
|
||||
|
@ -357,21 +220,3 @@ class SomethingLikeLife(_BasicScraper):
|
|||
imageSearch = compile(r'<img src="(http://www.pulledpunches.com/comics/[^"]*)"')
|
||||
prevSearch = compile(r'</a> <a href="(http://www.pulledpunches.com/\?p=[^"]*)"><img src="back1.gif"')
|
||||
help = 'Index format: nn'
|
||||
|
||||
|
||||
|
||||
class StickEmUpComics(_BasicScraper):
|
||||
latestUrl = 'http://stickemupcomics.com/'
|
||||
stripUrl = latestUrl + '%s'
|
||||
imageSearch = compile(r'<img src="(http://stickemupcomics.com/comics/.+?)"')
|
||||
prevSearch = compile(r'<a href="(.+?)"><span class="prev">')
|
||||
help = 'Index format: yyyy/mm/dd/stripname'
|
||||
|
||||
|
||||
|
||||
class SexDemonBag(_BasicScraper):
|
||||
latestUrl = 'http://www.sexdemonbag.com/'
|
||||
stripUrl = latestUrl + '?p=%s'
|
||||
imageSearch = compile(r'<img src="(http://www.sexdemonbag.com/comics/.+?)"')
|
||||
prevSearch = compile(r'<div class="nav-previous"><a href="(.+?)">')
|
||||
help = 'Index format: nnn'
|
||||
|
|
49
dosagelib/plugins/smackjeeves.py
Normal file
49
dosagelib/plugins/smackjeeves.py
Normal file
|
@ -0,0 +1,49 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
from re import compile
|
||||
from ..scraper import _BasicScraper
|
||||
from ..helpers import bounceStarter
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
def smackJeeves(names):
|
||||
# XXX mature content can be viewed directly with:
|
||||
# http://www.smackjeeves.com/mature.php?ref=<percent-encoded-url>
|
||||
class _SJScraper(_BasicScraper):
|
||||
stripUrl = property(lambda self: self.baseUrl + self.shortName)
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.smackjeeves\.com/images/uploaded/comics/[^"]*)'))
|
||||
prevSearch = compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="< Previous"')
|
||||
help = 'Index format: nnnn (some increasing number)'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
return pageUrl.split('/')[-2]
|
||||
|
||||
|
||||
def makeScraper(shortName):
|
||||
baseUrl = 'http://%s.smackjeeves.com/comics/' % shortName
|
||||
return type('SmackJeeves_%s' % shortName,
|
||||
(_SJScraper,),
|
||||
dict(
|
||||
name='SmackJeeves/' + shortName,
|
||||
baseUrl=baseUrl,
|
||||
starter=bounceStarter(baseUrl, compile(tagre("a", "href", r'(/comics/\d+/[^"]*)') + '<img[^>]*alt="Next >"'))
|
||||
)
|
||||
)
|
||||
return dict((name, makeScraper(name)) for name in names)
|
||||
|
||||
|
||||
globals().update(smackJeeves([
|
||||
'20galaxies',
|
||||
'axe13',
|
||||
'beartholomew',
|
||||
'bliss',
|
||||
'durian',
|
||||
'heard',
|
||||
'mpmcomic',
|
||||
'nlmo-project',
|
||||
'paranoidloyd',
|
||||
'thatdreamagain',
|
||||
'wowcomics',
|
||||
]))
|
42
dosagelib/plugins/snafu.py
Normal file
42
dosagelib/plugins/snafu.py
Normal file
|
@ -0,0 +1,42 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
|
||||
def snafuComics():
|
||||
class _SnafuComics(_BasicScraper):
|
||||
imageSearch = compile(r'<img src=http://\w+\.snafu-comics\.com/(comics/\d{6}_\w*\.\w{3,4})')
|
||||
prevSearch = compile(r'<a href="(\?comic_id=\d+)">Previous</a>')
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
@property
|
||||
def stripUrl(self):
|
||||
return self.latestUrl + 'index.php?strip_id=%s'
|
||||
|
||||
comics = {
|
||||
'Grim': 'grim',
|
||||
'KOF': 'kof',
|
||||
'PowerPuffGirls': 'ppg',
|
||||
'Snafu': 'www',
|
||||
'Tin': 'tin',
|
||||
'TW': 'tw',
|
||||
'Sugar': 'sugar',
|
||||
'SF': 'sf',
|
||||
'Titan': 'titan',
|
||||
'EA': 'ea',
|
||||
'Zim': 'zim',
|
||||
'Soul': 'soul',
|
||||
'FT': 'ft',
|
||||
'Bunnywith': 'bunnywith',
|
||||
'Braindead': 'braindead',
|
||||
}
|
||||
|
||||
url = 'http://%s.snafu-comics.com/'
|
||||
return dict((name, type('SnafuComics_%s' % name,
|
||||
(_SnafuComics,),
|
||||
dict(name='SnafuComics/' + name,
|
||||
latestUrl=url % host)))
|
||||
for name, host in comics.items())
|
||||
|
||||
globals().update(snafuComics())
|
|
@ -1,9 +1,11 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
from re import compile, IGNORECASE
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, IGNORECASE
|
||||
from ..scraper import _BasicScraper
|
||||
from ..helpers import indirectStarter
|
||||
from ..util import tagre
|
||||
|
||||
|
||||
class TalesOfPylea(_BasicScraper):
|
||||
|
@ -59,6 +61,13 @@ class Thorn(_BasicScraper):
|
|||
help = 'Index format: nnn'
|
||||
|
||||
|
||||
class TinyKittenTeeth(_BasicScraper):
|
||||
latestUrl = 'http://www.tinykittenteeth.com/'
|
||||
stripUrl = latestUrl + 'index.php?current=%s'
|
||||
imageSearch = compile(tagre("img", "src", r'(http://www\.tinykittenteeth\.com/comics/[^"]+)'))
|
||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="Previous"))
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
|
||||
class TwoTwoOneFour(_BasicScraper):
|
||||
latestUrl = 'http://www.nitrocosm.com/go/2214_classic/'
|
||||
|
@ -78,44 +87,6 @@ class TheWhiteboard(_BasicScraper):
|
|||
|
||||
|
||||
|
||||
class _TheFallenAngel(_BasicScraper):
|
||||
imageSearch = compile(r'SRC="(http://www.thefallenangel.co.uk/\w+comics/.+?)"')
|
||||
prevSearch = compile(r' <a href="(http://www.thefallenangel.co.uk/.+?)"><img[^>]+?src="http://www.thefallenangel.co.uk/images/previousday.jpg"')
|
||||
help = 'Index format: yyyymmdd'
|
||||
|
||||
@property
|
||||
def baseUrl(self):
|
||||
return 'http://www.thefallenangel.co.uk/cgi-bin/%sautokeen/autokeenlite.cgi' % (self.shortName,)
|
||||
|
||||
|
||||
@property
|
||||
def stripUrl(self):
|
||||
return self.baseUrl + '?date=%s'
|
||||
|
||||
|
||||
def starter(self):
|
||||
return self.baseUrl
|
||||
|
||||
|
||||
|
||||
class HighMaintenance(_TheFallenAngel):
|
||||
name = 'TheFallenAngel/HighMaintenance'
|
||||
shortName = 'hm'
|
||||
|
||||
|
||||
|
||||
class FAWK(_TheFallenAngel):
|
||||
name = 'TheFallenAngel/FAWK'
|
||||
shortName = 'fawk'
|
||||
|
||||
|
||||
|
||||
class MalloryChan(_TheFallenAngel):
|
||||
name = 'TheFallenAngel/MalloryChan'
|
||||
shortName = 'mallorychan'
|
||||
|
||||
|
||||
|
||||
class HMHigh(_BasicScraper):
|
||||
name = 'TheFallenAngel/HMHigh'
|
||||
latestUrl = 'http://www.thefallenangel.co.uk/hmhigh/'
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, IGNORECASE
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, sub
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, IGNORECASE, MULTILINE
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, IGNORECASE, DOTALL
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile, MULTILINE
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
|
||||
from re import compile
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
|
|
|
@ -93,7 +93,7 @@ def fetchUrl(url, urlSearch):
|
|||
if not searchUrl:
|
||||
raise ValueError("Match empty URL at %s with pattern %s" % (url, urlSearch.pattern))
|
||||
out.write('matched URL %r' % searchUrl, 2)
|
||||
return urlparse.urljoin(baseUrl, searchUrl)
|
||||
return normaliseURL(urlparse.urljoin(baseUrl, searchUrl))
|
||||
return None
|
||||
|
||||
|
||||
|
@ -106,7 +106,7 @@ def fetchUrls(url, imageSearch, prevSearch=None):
|
|||
if not imageUrl:
|
||||
raise ValueError("Match empty image URL at %s with pattern %s" % (url, imageSearch.pattern))
|
||||
out.write('matched image URL %r' % imageUrl, 2)
|
||||
imageUrls.add(urlparse.urljoin(baseUrl, imageUrl))
|
||||
imageUrls.add(normaliseURL(urlparse.urljoin(baseUrl, imageUrl)))
|
||||
if not imageUrls:
|
||||
out.write("warning: no images found at %s with pattern %s" % (url, imageSearch.pattern))
|
||||
if prevSearch is not None:
|
||||
|
@ -117,12 +117,12 @@ def fetchUrls(url, imageSearch, prevSearch=None):
|
|||
if not prevUrl:
|
||||
raise ValueError("Match empty previous URL at %s with pattern %s" % (url, prevSearch.pattern))
|
||||
out.write('matched previous URL %r' % prevUrl, 2)
|
||||
prevUrl = urlparse.urljoin(baseUrl, prevUrl)
|
||||
prevUrl = normaliseURL(urlparse.urljoin(baseUrl, prevUrl))
|
||||
else:
|
||||
out.write('no previous URL %s at %s' % (prevSearch.pattern, url), 2)
|
||||
prevUrl = None
|
||||
return imageUrls, prevUrl
|
||||
return imageUrls
|
||||
return imageUrls, None
|
||||
|
||||
|
||||
def _unescape(text):
|
||||
|
@ -150,7 +150,8 @@ def _unescape(text):
|
|||
text = text.encode('utf-8')
|
||||
text = urllib2.quote(text, safe=';/?:@&=+$,')
|
||||
return text
|
||||
return re.sub("&#?\w+;", _fixup, text)
|
||||
return re.sub(r"&#?\w+;", _fixup, text)
|
||||
|
||||
|
||||
def normaliseURL(url):
|
||||
"""
|
||||
|
@ -159,24 +160,24 @@ def normaliseURL(url):
|
|||
"""
|
||||
# XXX: brutal hack
|
||||
url = _unescape(url)
|
||||
url = url.replace(' ', '%20')
|
||||
|
||||
pu = list(urlparse.urlparse(url))
|
||||
segments = pu[2].replace(' ', '%20').split('/')
|
||||
segments = pu[2].split('/')
|
||||
while segments and segments[0] == '':
|
||||
del segments[0]
|
||||
pu[2] = '/' + '/'.join(segments)
|
||||
pu[2] = '/' + '/'.join(segments).replace(' ', '%20')
|
||||
# remove leading '&' from query
|
||||
if pu[3].startswith('&'):
|
||||
pu[3] = pu[3][1:]
|
||||
if pu[4].startswith('&'):
|
||||
pu[4] = pu[4][1:]
|
||||
# remove anchor
|
||||
pu[5] = ""
|
||||
return urlparse.urlunparse(pu)
|
||||
|
||||
|
||||
def urlopen(url, referrer=None, retries=3, retry_wait_seconds=5):
|
||||
out.write('Open URL %s' % url, 2)
|
||||
assert retries >= 0, 'invalid retry value %r' % retries
|
||||
assert retry_wait_seconds > 0, 'invalid retry seconds value %r' % retry_wait_seconds
|
||||
# Work around urllib2 brokenness
|
||||
url = normaliseURL(url)
|
||||
req = urllib2.Request(url)
|
||||
if referrer:
|
||||
req.add_header('Referer', referrer)
|
||||
|
@ -185,13 +186,14 @@ def urlopen(url, referrer=None, retries=3, retry_wait_seconds=5):
|
|||
while True:
|
||||
try:
|
||||
return urllib2.urlopen(req)
|
||||
except IOError as msg:
|
||||
out.write('URL retrieval of %s failed: %s' % (url, msg))
|
||||
except IOError as err:
|
||||
msg = 'URL retrieval of %s failed: %s' % (url, err)
|
||||
out.write(msg)
|
||||
out.write('waiting %d seconds and retrying (%d)' % (retry_wait_seconds, tries), 2)
|
||||
time.sleep(retry_wait_seconds)
|
||||
tries += 1
|
||||
if tries >= retries:
|
||||
raise
|
||||
raise IOError(msg)
|
||||
|
||||
|
||||
def get_columns (fp):
|
||||
|
@ -212,6 +214,7 @@ def get_columns (fp):
|
|||
|
||||
suffixes = ('B', 'kB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB')
|
||||
|
||||
|
||||
def saneDataSize(size):
|
||||
if size == 0:
|
||||
return 'unk B'
|
||||
|
@ -221,6 +224,7 @@ def saneDataSize(size):
|
|||
factor = 1024 ** index
|
||||
return '%0.3f %s' % (float(size) / factor, suffixes[index])
|
||||
|
||||
|
||||
def splitpath(path):
|
||||
c = []
|
||||
head, tail = os.path.split(path)
|
||||
|
@ -229,10 +233,10 @@ def splitpath(path):
|
|||
head, tail = os.path.split(head)
|
||||
return c
|
||||
|
||||
|
||||
def getRelativePath(basepath, path):
|
||||
basepath = splitpath(os.path.abspath(basepath))
|
||||
path = splitpath(os.path.abspath(path))
|
||||
|
||||
afterCommon = False
|
||||
for c in basepath:
|
||||
if afterCommon or path[0] != c:
|
||||
|
@ -240,9 +244,9 @@ def getRelativePath(basepath, path):
|
|||
afterCommon = True
|
||||
else:
|
||||
del path[0]
|
||||
|
||||
return os.path.join(*path)
|
||||
|
||||
|
||||
def getQueryParams(url):
|
||||
query = urlparse.urlsplit(url)[3]
|
||||
out.write('Extracting query parameters from %r (%r)...' % (url, query), 3)
|
||||
|
@ -267,7 +271,7 @@ I can work with ;) .
|
|||
etype = sys.exc_info()[0]
|
||||
if evalue is None:
|
||||
evalue = sys.exc_info()[1]
|
||||
print >> out, etype, evalue
|
||||
print(etype, evalue, file=out)
|
||||
if tb is None:
|
||||
tb = sys.exc_info()[2]
|
||||
traceback.print_exception(etype, evalue, tb, None, out)
|
||||
|
|
|
@ -29,6 +29,18 @@ class _ComicTester(TestCase):
|
|||
images += 1
|
||||
self.save(image)
|
||||
if num > 0:
|
||||
self.check_stripurl(strip)
|
||||
else:
|
||||
empty += 1
|
||||
num += 1
|
||||
if self.scraperclass.prevSearch:
|
||||
self.check(num >= 4, 'traversal failed after %d strips, check the prevSearch pattern.' % num)
|
||||
self.check(empty <= 1, 'failed to find images on %d pages, check the imageSearch pattern.' % empty)
|
||||
|
||||
def check_stripurl(self, strip):
|
||||
if not self.scraperclass.stripUrl:
|
||||
# no indexing support
|
||||
return
|
||||
# test that the stripUrl regex matches the retrieved strip URL
|
||||
urlmatch = re.escape(self.scraperclass.stripUrl)
|
||||
urlmatch = urlmatch.replace(r"\%s", r".+")
|
||||
|
@ -36,11 +48,6 @@ class _ComicTester(TestCase):
|
|||
ro = re.compile(urlmatch)
|
||||
mo = ro.search(strip.stripUrl)
|
||||
self.check(mo is not None, 'strip URL %r does not match stripUrl pattern %s' % (strip.stripUrl, urlmatch))
|
||||
else:
|
||||
empty += 1
|
||||
num += 1
|
||||
self.check(num >= 4, 'traversal failed after %d strips, check the prevSearch pattern.' % num)
|
||||
self.check(empty <= 1, 'failed to find images on %d pages, check the imageSearch pattern.' % empty)
|
||||
|
||||
def save(self, image):
|
||||
# create a temporary directory
|
||||
|
|
Loading…
Reference in a new issue