Fix comics.
This commit is contained in:
parent
45df462a47
commit
387dff79a9
33 changed files with 372 additions and 241 deletions
22
dosage
22
dosage
|
@ -98,15 +98,19 @@ def getComics(options, comics):
|
||||||
else:
|
else:
|
||||||
strips = scraperobj.getCurrentStrips()
|
strips = scraperobj.getCurrentStrips()
|
||||||
first = True
|
first = True
|
||||||
for strip in strips:
|
try:
|
||||||
_errors, skipped = saveComicStrip(strip, options.basepath)
|
for strip in strips:
|
||||||
errors += _errors
|
_errors, skipped = saveComicStrip(strip, options.basepath)
|
||||||
if not first and scraperobj.indexes:
|
errors += _errors
|
||||||
# stop when indexed retrieval skipped all images for one
|
if not first and scraperobj.indexes:
|
||||||
# comie strip (except the first one)
|
# stop when indexed retrieval skipped all images for one
|
||||||
out.write("Stop retrieval because image file already exists")
|
# comie strip (except the first one)
|
||||||
break
|
out.write("Stop retrieval because image file already exists")
|
||||||
first = False
|
break
|
||||||
|
first = False
|
||||||
|
except IOError as msg:
|
||||||
|
out.write('Error getting strip: %s' % msg)
|
||||||
|
errors += 1
|
||||||
events.getHandler().end()
|
events.getHandler().end()
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,7 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: iso-8859-1 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012 Bastian Kleineidam
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
import re
|
|
||||||
|
|
||||||
from .util import fetchUrl, getQueryParams
|
from .util import fetchUrl, getQueryParams
|
||||||
from .scraper import _BasicScraper
|
|
||||||
|
|
||||||
def queryNamer(paramName, usePageUrl=False):
|
def queryNamer(paramName, usePageUrl=False):
|
||||||
"""Get name from URL query part."""
|
"""Get name from URL query part."""
|
||||||
|
@ -81,19 +78,3 @@ class IndirectLatestMixin(object):
|
||||||
|
|
||||||
latestUrl = property(getLatestUrl)
|
latestUrl = property(getLatestUrl)
|
||||||
|
|
||||||
|
|
||||||
class _PHPScraper(_BasicScraper):
|
|
||||||
"""
|
|
||||||
Scraper for comics using phpComic/CUSP.
|
|
||||||
|
|
||||||
This provides an easy way to define scrapers for webcomics using phpComic.
|
|
||||||
"""
|
|
||||||
imageUrl = property(lambda self: self.basePath + 'daily.php?date=%s')
|
|
||||||
imageSearch = property(lambda self: re.compile(r'<img alt=[^>]+ src="(%scomics/\d{6}\..+?)">' % (self.basePath,)))
|
|
||||||
|
|
||||||
help = 'Index format: yymmdd'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def starter(cls):
|
|
||||||
"""Get starter URL."""
|
|
||||||
return cls.basePath + cls.latestUrl
|
|
||||||
|
|
|
@ -5,8 +5,8 @@
|
||||||
from re import compile
|
from re import compile
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
from ..helpers import bounceStarter, indirectStarter
|
from ..helpers import bounceStarter
|
||||||
from ..util import tagre, getQueryParams
|
from ..util import tagre
|
||||||
|
|
||||||
|
|
||||||
class CaptainSNES(_BasicScraper):
|
class CaptainSNES(_BasicScraper):
|
||||||
|
@ -144,37 +144,6 @@ class Curvy(_BasicScraper):
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
def cloneManga(name, shortName, lastStrip=None):
|
|
||||||
url = 'http://manga.clone-army.org'
|
|
||||||
baseUrl = '%s/%s.php' % (url, shortName)
|
|
||||||
|
|
||||||
def namer(self, imageUrl, pageUrl):
|
|
||||||
return '%03d' % int(getQueryParams(pageUrl)['page'][0])
|
|
||||||
|
|
||||||
attrs = dict(
|
|
||||||
name='CloneManga/' + name,
|
|
||||||
stripUrl = baseUrl + '?page=%s',
|
|
||||||
imageSearch=compile(tagre("img", "src", r'((?:%s/)?%s/[^"]+)' % (url, shortName), after="center")),
|
|
||||||
prevSearch=compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"previous\.gif")),
|
|
||||||
help='Index format: n',
|
|
||||||
namer=namer,
|
|
||||||
)
|
|
||||||
if lastStrip is None:
|
|
||||||
attrs['starter'] = indirectStarter(baseUrl, compile(tagre("a", "href", r'([^"]+)')+tagre("img", "src", r"last\.gif")))
|
|
||||||
else:
|
|
||||||
attrs['latestUrl'] = attrs['stripUrl'] % lastStrip
|
|
||||||
return type('CloneManga_%s' % name, (_BasicScraper,), attrs)
|
|
||||||
|
|
||||||
|
|
||||||
anm = cloneManga('AprilAndMay', 'anm')
|
|
||||||
kanami = cloneManga('Kanami', 'kanami')
|
|
||||||
momoka = cloneManga('MomokaCorner', 'momoka')
|
|
||||||
nana = cloneManga('NanasEverydayLife', 'nana', '78')
|
|
||||||
pxi = cloneManga('PaperEleven', 'pxi', '311')
|
|
||||||
t42r = cloneManga('Tomoyo42sRoom', 't42r')
|
|
||||||
penny = cloneManga('PennyTribute', 'penny')
|
|
||||||
|
|
||||||
|
|
||||||
class CatAndGirl(_BasicScraper):
|
class CatAndGirl(_BasicScraper):
|
||||||
latestUrl = 'http://catandgirl.com/'
|
latestUrl = 'http://catandgirl.com/'
|
||||||
stripUrl = latestUrl + '?p=%s'
|
stripUrl = latestUrl + '?p=%s'
|
||||||
|
|
61
dosagelib/plugins/clonemanga.py
Normal file
61
dosagelib/plugins/clonemanga.py
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
# -*- coding: iso-8859-1 -*-
|
||||||
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
from re import compile
|
||||||
|
from ..scraper import make_scraper
|
||||||
|
from ..util import tagre, getQueryParams, fetchUrl
|
||||||
|
|
||||||
|
|
||||||
|
_linkTag = tagre("a", "href", r'([^"]+)')
|
||||||
|
_prevSearch = compile(_linkTag + tagre("img", "src", r"previous\.gif"))
|
||||||
|
_nextSearch = compile(_linkTag + tagre("img", "src", r"next\.gif"))
|
||||||
|
_lastSearch = compile(_linkTag + tagre("img", "src", r"last\.gif"))
|
||||||
|
|
||||||
|
def add(name, shortName, imageFolder=None, lastStrip=None):
|
||||||
|
classname = 'CloneManga_%s' % name
|
||||||
|
_url = 'http://manga.clone-army.org'
|
||||||
|
baseUrl = '%s/%s.php' % (_url, shortName)
|
||||||
|
if imageFolder is None:
|
||||||
|
imageFolder = shortName
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
return '%03d' % int(getQueryParams(pageUrl)['page'][0])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _starter(cls):
|
||||||
|
# first, try hopping to previous and next comic
|
||||||
|
url = fetchUrl(baseUrl, _prevSearch)
|
||||||
|
if not url:
|
||||||
|
# no previous link found, try hopping to last comic
|
||||||
|
url = fetchUrl(baseUrl, _lastSearch)
|
||||||
|
if not url:
|
||||||
|
raise ValueError("could not find lastSearch pattern %r in %s" % (_lastSearch.pattern, baseUrl))
|
||||||
|
return url
|
||||||
|
url = fetchUrl(url, _nextSearch)
|
||||||
|
if not url:
|
||||||
|
raise ValueError("could not find nextSearch pattern %r in %s" % (_nextSearch.pattern, url))
|
||||||
|
return url
|
||||||
|
|
||||||
|
attrs = dict(
|
||||||
|
name='CloneManga/' + name,
|
||||||
|
stripUrl = baseUrl + '?page=%s',
|
||||||
|
imageSearch=compile(tagre("img", "src", r'((?:%s/)?%s/[^"]+)' % (_url, imageFolder), after="center")),
|
||||||
|
prevSearch=_prevSearch,
|
||||||
|
help='Index format: n',
|
||||||
|
namer=namer,
|
||||||
|
)
|
||||||
|
if lastStrip is None:
|
||||||
|
attrs['starter'] = _starter
|
||||||
|
else:
|
||||||
|
attrs['latestUrl'] = attrs['stripUrl'] % lastStrip
|
||||||
|
globals()[classname] = make_scraper(classname, **attrs)
|
||||||
|
|
||||||
|
|
||||||
|
add('AprilAndMay', 'anm', imageFolder='AAM')
|
||||||
|
add('Kanami', 'kanami')
|
||||||
|
add('MomokaCorner', 'momoka')
|
||||||
|
add('NanasEverydayLife', 'nana', lastStrip='78')
|
||||||
|
add('PaperEleven', 'pxi', imageFolder='papereleven', lastStrip='311')
|
||||||
|
add('Tomoyo42sRoom', 't42r')
|
||||||
|
add('PennyTribute', 'penny')
|
|
@ -6,25 +6,26 @@ from re import compile
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
from ..helpers import indirectStarter
|
from ..helpers import indirectStarter
|
||||||
from ..util import tagre, getQueryParams
|
from ..util import tagre
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class DMFA(_BasicScraper):
|
class DMFA(_BasicScraper):
|
||||||
latestUrl = 'http://www.missmab.com/'
|
latestUrl = 'http://www.missmab.com/'
|
||||||
stripUrl = latestUrl + 'Comics/Vol_%s.php'
|
stripUrl = latestUrl + 'Comics/Vol_%s.php'
|
||||||
imageSearch = compile(tagre("img", "src", r'((?:Comics/|Vol)[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'((?:Comics/|Vol)[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"])+')+
|
multipleImagesPerStrip = True
|
||||||
tagre("img", "src", r'(?:../)?Images/comicprev.gif'))
|
prevSearch = compile(tagre("a", "href", r'((?:Comics/)?Vol[^"]+)')+
|
||||||
|
tagre("img", "src", r'(?:../)?Images/comicprev\.gif'))
|
||||||
help = 'Index format: nnn (normally, some specials)'
|
help = 'Index format: nnn (normally, some specials)'
|
||||||
|
|
||||||
|
|
||||||
class DandyAndCompany(_BasicScraper):
|
class DandyAndCompany(_BasicScraper):
|
||||||
latestUrl = 'http://www.dandyandcompany.com/'
|
latestUrl = 'http://www.dandyandcompany.com/'
|
||||||
stripUrl = latestUrl + '%s'
|
stripUrl = None
|
||||||
imageSearch = compile(tagre("img", "src", r'([^"]*/strips/[^"]+)'))
|
multipleImagesPerStrip = True
|
||||||
prevSearch = compile(r'<a href="(.*)" class="prev"')
|
imageSearch = compile(tagre("a", "href", r'(http://\d+\.bp\.blogspot\.com/[^"]+)', after="imageanchor"))
|
||||||
help = 'Index format: yyyy/mm/dd'
|
prevSearch = compile(tagre("a", "href", r"([^']+)", quote="'", after="Older Posts"))
|
||||||
|
help = 'Index format: none'
|
||||||
|
|
||||||
|
|
||||||
class DarkWings(_BasicScraper):
|
class DarkWings(_BasicScraper):
|
||||||
|
@ -63,11 +64,11 @@ class DrFun(_BasicScraper):
|
||||||
latestUrl = 'http://www.ibiblio.org/Dave/ar00502.htm'
|
latestUrl = 'http://www.ibiblio.org/Dave/ar00502.htm'
|
||||||
stripUrl = 'http://www.ibiblio.org/Dave/ar%s.htm'
|
stripUrl = 'http://www.ibiblio.org/Dave/ar%s.htm'
|
||||||
imageSearch = compile(r'<A HREF= "(Dr-Fun/df\d{6}/df.+?)">')
|
imageSearch = compile(r'<A HREF= "(Dr-Fun/df\d{6}/df.+?)">')
|
||||||
|
multipleImagesPerStrip = True
|
||||||
prevSearch = compile(r'<A HREF="(.+?)">Previous Week,')
|
prevSearch = compile(r'<A HREF="(.+?)">Previous Week,')
|
||||||
help = 'Index format: nnnnn'
|
help = 'Index format: nnnnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Dracula(_BasicScraper):
|
class Dracula(_BasicScraper):
|
||||||
latestUrl = 'http://draculacomic.net/'
|
latestUrl = 'http://draculacomic.net/'
|
||||||
stripUrl = latestUrl + 'comic.php?comicID=%s'
|
stripUrl = latestUrl + 'comic.php?comicID=%s'
|
||||||
|
@ -76,7 +77,6 @@ class Dracula(_BasicScraper):
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class DragonTails(_BasicScraper):
|
class DragonTails(_BasicScraper):
|
||||||
latestUrl = 'http://www.dragon-tails.com/'
|
latestUrl = 'http://www.dragon-tails.com/'
|
||||||
stripUrl = latestUrl + 'archive.php?date=%s'
|
stripUrl = latestUrl + 'archive.php?date=%s'
|
||||||
|
@ -96,7 +96,7 @@ class DreamKeepersPrelude(_BasicScraper):
|
||||||
class Drowtales(_BasicScraper):
|
class Drowtales(_BasicScraper):
|
||||||
latestUrl = 'http://www.drowtales.com/mainarchive.php'
|
latestUrl = 'http://www.drowtales.com/mainarchive.php'
|
||||||
stripUrl = latestUrl + '?sid=%s'
|
stripUrl = latestUrl + '?sid=%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'("http://www.drowtales.com/mainarchive/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://www\.drowtales\.com/mainarchive/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(\?sid=\d+)', before="link_prev_top"))
|
prevSearch = compile(tagre("a", "href", r'(\?sid=\d+)', before="link_prev_top"))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
|
@ -105,7 +105,8 @@ class DieselSweeties(_BasicScraper):
|
||||||
latestUrl = 'http://www.dieselsweeties.com/'
|
latestUrl = 'http://www.dieselsweeties.com/'
|
||||||
stripUrl = latestUrl + 'archive/%s'
|
stripUrl = latestUrl + 'archive/%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/hstrips/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(/hstrips/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/archive/\d+)') + tagre("img", "src", r'http://www\.dieselsweeties\.com/ximages/blackbackarrow160.png'))
|
prevSearch = compile(tagre("a", "href", r'(/archive/\d+)') +
|
||||||
|
tagre("img", "src", r'(?:http://www\.dieselsweeties\.com/ximages/blackbackarrow160.png|/ximages/prev\.gif)'))
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -118,14 +119,10 @@ class DieselSweeties(_BasicScraper):
|
||||||
class DominicDeegan(_BasicScraper):
|
class DominicDeegan(_BasicScraper):
|
||||||
latestUrl = 'http://www.dominic-deegan.com/'
|
latestUrl = 'http://www.dominic-deegan.com/'
|
||||||
stripUrl = latestUrl + 'view.php?date=%s'
|
stripUrl = latestUrl + 'view.php?date=%s'
|
||||||
imageSearch = compile(r'<img src="(.+?save-as=.+?)" alt')
|
imageSearch = compile(tagre("img", "src", r'(comics/\d+\.gif)'))
|
||||||
prevSearch = compile(r'"(view.php\?date=.+?)".+?prev21')
|
prevSearch = compile(r'"(view.php\?date=.+?)".+?prev21')
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def namer(cls, imageUrl, pageUrl):
|
|
||||||
return getQueryParams(imageUrl)['save-as'][0].rsplit('.', 1)[0]
|
|
||||||
|
|
||||||
|
|
||||||
class DorkTower(_BasicScraper):
|
class DorkTower(_BasicScraper):
|
||||||
latestUrl = 'http://www.dorktower.com/'
|
latestUrl = 'http://www.dorktower.com/'
|
||||||
|
|
|
@ -35,7 +35,7 @@ def add(name):
|
||||||
return url
|
return url
|
||||||
url = fetchUrl(url, _nextSearch)
|
url = fetchUrl(url, _nextSearch)
|
||||||
if not url:
|
if not url:
|
||||||
raise ValueError("could not find nextSearch pattern %r in %s" % (_nextSearch.pattern, _url))
|
raise ValueError("could not find nextSearch pattern %r in %s" % (_nextSearch.pattern, url))
|
||||||
return url
|
return url
|
||||||
|
|
||||||
globals()[classname] = make_scraper(classname,
|
globals()[classname] = make_scraper(classname,
|
||||||
|
|
|
@ -20,16 +20,10 @@ class EerieCuties(_BasicScraper):
|
||||||
class Eriadan(_BasicScraper):
|
class Eriadan(_BasicScraper):
|
||||||
latestUrl = 'http://www.shockdom.com/webcomics/eriadan/'
|
latestUrl = 'http://www.shockdom.com/webcomics/eriadan/'
|
||||||
stripUrl = latestUrl + '%s'
|
stripUrl = latestUrl + '%s'
|
||||||
# XXX fix image search
|
imageSearch = compile(tagre("img", "src", r'(http://www\.shockdom\.com/webcomics/eriadan/files/[^"]+)', after='alt=""'))
|
||||||
imageSearch = compile(r'title="[^"]+?" src="http://www\.shockdom\.com/eriadan/(wp-content/uploads/.+?)"')
|
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
|
prevSearch = compile(tagre("a", "href", r'([^"]+)', after="prev"))
|
||||||
help = 'Index format: yyyy/mm/dd/nnn (unpadded)'
|
help = 'Index format: yyyy/mm/dd/nnn (unpadded)'
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def namer(cls, imageUrl, pageUrl):
|
|
||||||
return '%d' % (int(compile(r'p=(\d+)').search(pageUrl).group(1)))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ElGoonishShive(_BasicScraper):
|
class ElGoonishShive(_BasicScraper):
|
||||||
name = 'KeenSpot/ElGoonishShive'
|
name = 'KeenSpot/ElGoonishShive'
|
||||||
|
@ -40,7 +34,6 @@ class ElGoonishShive(_BasicScraper):
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ElGoonishShiveNP(_BasicScraper):
|
class ElGoonishShiveNP(_BasicScraper):
|
||||||
name = 'KeenSpot/ElGoonishShiveNP'
|
name = 'KeenSpot/ElGoonishShiveNP'
|
||||||
latestUrl = 'http://www.egscomics.com/egsnp/'
|
latestUrl = 'http://www.egscomics.com/egsnp/'
|
||||||
|
@ -52,12 +45,10 @@ class ElGoonishShiveNP(_BasicScraper):
|
||||||
|
|
||||||
class EmergencyExit(_BasicScraper):
|
class EmergencyExit(_BasicScraper):
|
||||||
latestUrl = 'http://www.eecomics.net/'
|
latestUrl = 'http://www.eecomics.net/'
|
||||||
stripUrl = None
|
stripUrl = latestUrl + "?strip_id=%s"
|
||||||
imageSearch = compile(r'"(comics/.+?)"')
|
imageSearch = compile(r'"(comics/.+?)"')
|
||||||
prevSearch = compile(r'START.+?"(.+?)"')
|
prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "alt", r"Prior"))
|
||||||
# XXX ?
|
help = 'Index format: n'
|
||||||
help = 'God help us now!'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ErrantStory(_BasicScraper):
|
class ErrantStory(_BasicScraper):
|
||||||
|
@ -102,7 +93,7 @@ class EvilInc(_BasicScraper):
|
||||||
class Exiern(_BasicScraper):
|
class Exiern(_BasicScraper):
|
||||||
latestUrl = 'http://www.exiern.com/'
|
latestUrl = 'http://www.exiern.com/'
|
||||||
stripUrl = latestUrl + '?p=%s'
|
stripUrl = latestUrl + '?p=%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.exiern\.com/comics/[^"])'))
|
imageSearch = compile(tagre("img", "src", r'(http://www\.exiern\.com/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://www\.exiern\.com/[^"]+)', after="prev"))
|
prevSearch = compile(tagre("a", "href", r'(http://www\.exiern\.com/[^"]+)', after="prev"))
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
@ -154,7 +145,6 @@ class ExploitationNow(_BasicScraper):
|
||||||
class Ellerbisms(_BasicScraper):
|
class Ellerbisms(_BasicScraper):
|
||||||
latestUrl = 'http://www.ellerbisms.com/'
|
latestUrl = 'http://www.ellerbisms.com/'
|
||||||
stripUrl = latestUrl + '?p=%s'
|
stripUrl = latestUrl + '?p=%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.ellerbisms\.com/wp-content/uploads/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://www\.ellerbisms\.com/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://www\.ellerbisms\.com/[^"]+)', after="prev"))
|
prevSearch = compile(tagre("a", "href", r'(http://www\.ellerbisms\.com/[^"]+)', after="prev"))
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
|
@ -49,12 +49,12 @@ class FlakyPastry(_BasicScraper):
|
||||||
prevSearch = compile(r'<a href="(.+?)".+?btn_back')
|
prevSearch = compile(r'<a href="(.+?)".+?btn_back')
|
||||||
help = 'Index format: nnnn'
|
help = 'Index format: nnnn'
|
||||||
|
|
||||||
# XXX move to keenspot
|
|
||||||
class Flipside(_BasicScraper):
|
class Flipside(_BasicScraper):
|
||||||
latestUrl = 'http://flipside.keenspot.com/comic.php'
|
latestUrl = 'http://flipside.keenspot.com/comic.php'
|
||||||
stripUrl = latestUrl + '?i=%s'
|
stripUrl = latestUrl + '?i=%s'
|
||||||
imageSearch = compile(r'<IMG SRC="(comic/.+?)"')
|
imageSearch = compile(tagre("img", "src", r'(http://cdn\.flipside\.keenspot\.com/comic/[^"]+)'))
|
||||||
prevSearch = compile(r'<A HREF="(comic.php\?i=\d+?)"><')
|
prevSearch = compile(tagre("a", "href", r'(http://flipside\.keenspot\.com/comic\.php\?i=\d+)', after="prev"))
|
||||||
help = 'Index format: nnnn'
|
help = 'Index format: nnnn'
|
||||||
|
|
||||||
|
|
||||||
|
@ -114,7 +114,8 @@ class FredoAndPidjin(_BasicScraper):
|
||||||
homepage = 'http://www.pidjin.net/'
|
homepage = 'http://www.pidjin.net/'
|
||||||
stripUrl = None
|
stripUrl = None
|
||||||
help = 'Index format: yyyy/mm/dd/name'
|
help = 'Index format: yyyy/mm/dd/name'
|
||||||
imageSearch = compile(tagre('img', 'src', '(http://cdn\.pidjin\.net/wp-content/uploads/\d\d\d\d/\d\d/\d+[^"]+\.png)'))
|
imageSearch = compile(tagre('img', 'src', '(http://cdn\.pidjin\.net/wp-content/uploads/\d+/\d+/[^"]+\.png)'))
|
||||||
|
multipleImagesPerStrip = True
|
||||||
prevSearch = compile(tagre('a', 'href', '([^"]+)')+"Prev</a>")
|
prevSearch = compile(tagre('a', 'href', '([^"]+)')+"Prev</a>")
|
||||||
starter = indirectStarter(homepage,
|
starter = indirectStarter(homepage,
|
||||||
compile(tagre('a', 'href', "("+homepage+r'\d\d\d\d/\d\d/\d\d/[^"]+/)')))
|
compile(tagre('a', 'href', "("+homepage+r'\d\d\d\d/\d\d/\d\d/[^"]+/)')))
|
||||||
|
|
|
@ -12,7 +12,7 @@ _prevSearch = compile(r' <a href="(http://www\.thefallenangel\.co\.uk/.+?)"><img
|
||||||
|
|
||||||
def add(name, shortname):
|
def add(name, shortname):
|
||||||
latestUrl = 'http://www.thefallenangel.co.uk/cgi-bin/%sautokeen/autokeenlite.cgi' % shortname
|
latestUrl = 'http://www.thefallenangel.co.uk/cgi-bin/%sautokeen/autokeenlite.cgi' % shortname
|
||||||
classname = asciify(name)
|
classname = "FallenAngel_" + asciify(name)
|
||||||
globals()[classname] = make_scraper(classname,
|
globals()[classname] = make_scraper(classname,
|
||||||
latestUrl = latestUrl,
|
latestUrl = latestUrl,
|
||||||
stripUrl = latestUrl + '?date=%s',
|
stripUrl = latestUrl + '?date=%s',
|
||||||
|
|
|
@ -34,17 +34,9 @@ class GUComics(_BasicScraper):
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
class GenrezvousPoint(_BasicScraper):
|
|
||||||
latestUrl = 'http://www.genrezvouspoint.com/'
|
|
||||||
stripUrl = latestUrl + 'index.php?comicID=%s'
|
|
||||||
imageSearch = compile(r'<img src=\'(comics/.+?)\'')
|
|
||||||
prevSearch = compile(r' <a[^>]+?href="(.+?)">PREVIOUS</a>')
|
|
||||||
help = 'Index format: nnn'
|
|
||||||
|
|
||||||
|
|
||||||
class GirlGenius(_BasicScraper):
|
class GirlGenius(_BasicScraper):
|
||||||
latestUrl = 'http://girlgeniusonline.com/comic.php'
|
latestUrl = 'http://girlgeniusonline.com/comic.php'
|
||||||
stripUrl = 'http://www.girlgeniusonline.com/comic.php?date=%s'
|
stripUrl = latestUrl + '?date=%s'
|
||||||
imageSearch = compile(r"(/ggmain/strips/.+?)'")
|
imageSearch = compile(r"(/ggmain/strips/.+?)'")
|
||||||
prevSearch = compile(r"</a> <a href=.+?(/comic.php\?date=.+?)'.+?Previous")
|
prevSearch = compile(r"</a> <a href=.+?(/comic.php\?date=.+?)'.+?Previous")
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
@ -99,7 +91,8 @@ class Gunshow(_BasicScraper):
|
||||||
latestUrl = 'http://gunshowcomic.com/'
|
latestUrl = 'http://gunshowcomic.com/'
|
||||||
stripUrl = latestUrl + '%s'
|
stripUrl = latestUrl + '%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://gunshowcomic\.com/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://gunshowcomic\.com/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'[^"]+menu/small/previous\.gif'))
|
multipleImagesPerStrip = True
|
||||||
|
prevSearch = compile(tagre("a", "href", r'([^"]+)') + tagre("img", "src", r'[^"]*menu/small/previous\.gif'))
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
|
|
||||||
|
@ -131,7 +124,7 @@ class GlassHalfEmpty(_BasicScraper):
|
||||||
latestUrl = 'http://www.defectivity.com/ghe/index.php'
|
latestUrl = 'http://www.defectivity.com/ghe/index.php'
|
||||||
stripUrl = latestUrl + '?strip_id=%s'
|
stripUrl = latestUrl + '?strip_id=%s'
|
||||||
imageSearch = compile(r'src="(comics/.+?)"')
|
imageSearch = compile(r'src="(comics/.+?)"')
|
||||||
prevSearch = compile(r'</a><a href="(.+?)"><img src="\.\./images/onback\.jpg"')
|
prevSearch = compile(tagre("a", "href", r'(\?strip_id=\d+)') + tagre("img", "src", r'\.\./images/arrowbuttons/onback\.jpg'))
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -11,12 +11,3 @@ class HorribleVille(_BasicScraper):
|
||||||
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/d/[^"]+)') + tagre("img", "src", r'/images/previous\.png'))
|
prevSearch = compile(tagre("a", "href", r'(/d/[^"]+)') + tagre("img", "src", r'/images/previous\.png'))
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
class HelpDesk(_BasicScraper):
|
|
||||||
latestUrl = 'https://www.eviscerati.org/comics?page=78'
|
|
||||||
stripUrl = 'https://www.eviscerati.org/comics?page=%s'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(https://www\.eviscerati\.org/files/comics/[^"]+)'))
|
|
||||||
prevSearch = compile(tagre("li", "class", r'pager-previous[^"]+') + tagre("a", "href", r'(/comics\?page=%d+)'))
|
|
||||||
help = 'Index format: n'
|
|
||||||
|
|
||||||
|
|
|
@ -9,9 +9,9 @@ from ..util import tagre
|
||||||
|
|
||||||
class IDreamOfAJeanieBottle(_BasicScraper):
|
class IDreamOfAJeanieBottle(_BasicScraper):
|
||||||
latestUrl = 'http://jeaniebottle.com/'
|
latestUrl = 'http://jeaniebottle.com/'
|
||||||
stripUrl = latestUrl + 'review.php?comicID='
|
stripUrl = latestUrl + '?p=%s'
|
||||||
imageSearch = compile(r'(/comics/.+?)"')
|
imageSearch = compile(r'(/comics/.+?)"')
|
||||||
prevSearch = compile(r'First".+?(review.php.+?)".+?prev_a.gif')
|
prevSearch = compile(tagre("a", "href", r'(http://jeaniebottle\.com/\?p=\d+)', after="prev"))
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012 Bastian Kleineidam
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
|
||||||
from re import compile, MULTILINE
|
from re import compile
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
|
|
||||||
|
@ -21,12 +21,3 @@ class JoeAndMonkey(_BasicScraper):
|
||||||
imageSearch = compile(r'"(/comic/[^"]+)"')
|
imageSearch = compile(r'"(/comic/[^"]+)"')
|
||||||
prevSearch = compile(r"<a href='(/\d+)'>Previous")
|
prevSearch = compile(r"<a href='(/\d+)'>Previous")
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
class JoyOfTech(_BasicScraper):
|
|
||||||
latestUrl = 'http://www.geekculture.com/joyoftech/'
|
|
||||||
stripUrl = latestUrl + 'joyarchives/%s.html'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(joyimages/[^"]+)'))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(joyarchives/[^"]+)') + r'.+?Previous', MULTILINE)
|
|
||||||
help = 'Index format: nnn'
|
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
|
|
||||||
from re import compile, IGNORECASE
|
from re import compile, IGNORECASE
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
|
from ..util import tagre
|
||||||
|
|
||||||
class Key(_BasicScraper):
|
class Key(_BasicScraper):
|
||||||
latestUrl = 'http://key.shadilyn.com/latestpage.html'
|
latestUrl = 'http://key.shadilyn.com/latestpage.html'
|
||||||
|
@ -25,7 +25,7 @@ class Krakow(_BasicScraper):
|
||||||
class Kukuburi(_BasicScraper):
|
class Kukuburi(_BasicScraper):
|
||||||
latestUrl = 'http://www.kukuburi.com/current/'
|
latestUrl = 'http://www.kukuburi.com/current/'
|
||||||
stripUrl = 'http://www.kukuburi.com/v2/%s/'
|
stripUrl = 'http://www.kukuburi.com/v2/%s/'
|
||||||
imageSearch = compile(r'img src="(http://www.kukuburi.com/../comics/.+?)"')
|
imageSearch = compile(tagre("img", "src", r'(http://www\.kukuburi\.com/v2/comics/[^"]+)', after='alt="[^"]'))
|
||||||
prevSearch = compile(r'nav-previous.+?"(http.+?)"')
|
prevSearch = compile(r'nav-previous.+?"(http.+?)"')
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
||||||
|
|
|
@ -16,16 +16,6 @@ class LasLindas(_BasicScraper):
|
||||||
help = 'Index format: stripname'
|
help = 'Index format: stripname'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class LesbianPiratesFromOuterSpace(_BasicScraper):
|
|
||||||
latestUrl = 'http://rosalarian.com/lesbianpirates/'
|
|
||||||
stripUrl = latestUrl + 'index.php?p=%s'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'("comics/[^"]+)'))
|
|
||||||
prevSearch = compile(tagre("a", "href", r'(/index\.php\?id=\d+)', after="prev"))
|
|
||||||
help = 'Index format: n'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Lint(_BasicScraper):
|
class Lint(_BasicScraper):
|
||||||
latestUrl = 'http://www.purnicellin.com/lint/'
|
latestUrl = 'http://www.purnicellin.com/lint/'
|
||||||
stripUrl = latestUrl + '%s'
|
stripUrl = latestUrl + '%s'
|
||||||
|
@ -34,7 +24,6 @@ class Lint(_BasicScraper):
|
||||||
help = 'Index format: yyyy/mm/dd/num-name'
|
help = 'Index format: yyyy/mm/dd/num-name'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class LookingForGroup(_BasicScraper):
|
class LookingForGroup(_BasicScraper):
|
||||||
latestUrl = 'http://www.lfgcomic.com/page/latest'
|
latestUrl = 'http://www.lfgcomic.com/page/latest'
|
||||||
stripUrl = 'http://www.lfgcomic.com/page/%s'
|
stripUrl = 'http://www.lfgcomic.com/page/%s'
|
||||||
|
@ -51,8 +40,8 @@ class LookingForGroup(_BasicScraper):
|
||||||
class LittleGamers(_BasicScraper):
|
class LittleGamers(_BasicScraper):
|
||||||
latestUrl = 'http://www.little-gamers.com/'
|
latestUrl = 'http://www.little-gamers.com/'
|
||||||
stripUrl = latestUrl + '%s/'
|
stripUrl = latestUrl + '%s/'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.little-gamers\.com/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://little-gamers\.com/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://www.little-gamers.com/[^"]+)', before="comic-nav-prev-link"))
|
prevSearch = compile(tagre("a", "href", r'(http://www\.little-gamers.com/[^"]+)', before="comic-nav-prev-link"))
|
||||||
help = 'Index format: yyyy/mm/dd/name'
|
help = 'Index format: yyyy/mm/dd/name'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -67,8 +67,8 @@ class Melonpool(_BasicScraper):
|
||||||
class Misfile(_BasicScraper):
|
class Misfile(_BasicScraper):
|
||||||
latestUrl = 'http://www.misfile.com/'
|
latestUrl = 'http://www.misfile.com/'
|
||||||
stripUrl = latestUrl + '?date=%s'
|
stripUrl = latestUrl + '?date=%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r"(comics/[^']+)", quote="'"))
|
||||||
prevSearch = compile(tagre("link", "href", r'([^"]+)', before="Previous"))
|
prevSearch = compile(tagre("link", "href", r"([^']+)", quote="'", before="Previous"))
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
@ -76,15 +76,6 @@ class MysteriesOfTheArcana(_BasicScraper):
|
||||||
latestUrl = 'http://mysteriesofthearcana.com/'
|
latestUrl = 'http://mysteriesofthearcana.com/'
|
||||||
stripUrl = latestUrl + 'index.php?action=comics&cid=%s'
|
stripUrl = latestUrl + 'index.php?action=comics&cid=%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(image\.php\?type=com&i=[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(image\.php\?type=com&i=[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'()', after="navprevius"))
|
prevSearch = compile(tagre("a", "href", r'(index\.php[^"]+)', after="navprevious"))
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# XXX move to keenspot?
|
|
||||||
class MysticRevolution(_BasicScraper):
|
|
||||||
latestUrl = 'http://mysticrevolution.keenspot.com/'
|
|
||||||
stripUrl = latestUrl + '?cid=%s'
|
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://cdn\.mysticrevolution\.keenspot\.com/comics/[^"]+)'))
|
|
||||||
prevSearch = compile(tagre("link", "rel", r'(\?cid=\d+)', before="prev"))
|
|
||||||
help = 'Index format: n (unpadded)'
|
|
||||||
|
|
|
@ -4,14 +4,14 @@
|
||||||
|
|
||||||
from re import compile
|
from re import compile
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
from ..helpers import indirectStarter, _PHPScraper
|
from ..helpers import indirectStarter, bounceStarter
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
|
|
||||||
|
|
||||||
class NamirDeiter(_BasicScraper):
|
class NamirDeiter(_BasicScraper):
|
||||||
latestUrl = 'http://www.namirdeiter.com/'
|
latestUrl = 'http://www.namirdeiter.com/'
|
||||||
stripUrl = latestUrl + 'comics/index.php?date=%s'
|
stripUrl = latestUrl + 'comics/index.php?date=%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.namirdeiter\.com/comics/\d\.jpg)', quote=""))
|
imageSearch = compile(tagre("img", "src", r"'?(http://www\.namirdeiter\.com/comics/\d+\.jpg)'?", quote=""))
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://www\.namirdeiter\.com/comics/index\.php\?date=\d+)', quote="'")+"Previous")
|
prevSearch = compile(tagre("a", "href", r'(http://www\.namirdeiter\.com/comics/index\.php\?date=\d+)', quote="'")+"Previous")
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
@ -63,17 +63,19 @@ class Nukees(_BasicScraper):
|
||||||
help = 'Index format: yyyymmdd.html'
|
help = 'Index format: yyyymmdd.html'
|
||||||
|
|
||||||
|
|
||||||
|
class NekoTheKitty(_BasicScraper):
|
||||||
class NekoTheKitty(_PHPScraper):
|
basePath = 'http://www.nekothekitty.net/'
|
||||||
basePath = 'http://www.nekothekitty.net/cusp/'
|
stripUrl = basePath + 'comics/%s'
|
||||||
latestUrl = basePath
|
starter = bounceStarter(basePath, compile(tagre("a", "href", r'(http://www\.nekothekitty\.net/comics/[^"]+)') +
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://www.nekothekitty.net/comics/[^"]+)') +
|
tagre("img", "src", r'http://www\.nekothekitty\.net/files/smallnext.png')))
|
||||||
|
imageSearch = compile(tagre("img", "src", r'(http://(?:img\d+|www)\.smackjeeves\.com/images/uploaded/comics/[^"]+)'))
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(http://www\.nekothekitty\.net/comics/[^"]+)') +
|
||||||
tagre("img", "src", r'http://www\.nekothekitty\.net/files/smallprev.png'))
|
tagre("img", "src", r'http://www\.nekothekitty\.net/files/smallprev.png'))
|
||||||
|
help = 'Index format: n/n-name'
|
||||||
|
|
||||||
|
|
||||||
class NichtLustig(_BasicScraper):
|
class NichtLustig(_BasicScraper):
|
||||||
stripUrl = 'http://www.nichtlustig.de/toondb/%s.html'
|
stripUrl = 'http://static.nichtlustig.de/toondb/%s.html'
|
||||||
imageSearch = compile('background-image:url\((http://static\.nichtlustig\.de/comics/full/\d+\.jpg)')
|
imageSearch = compile('background-image:url\((http://static\.nichtlustig\.de/comics/full/\d+\.jpg)')
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
|
prevSearch = compile(tagre("a", "href", r'(http://static\.nichtlustig\.de/toondb/\d+\.html)'))
|
||||||
help = 'Index format: yymmdd'
|
help = 'Index format: yymmdd'
|
||||||
|
@ -101,6 +103,7 @@ class NekkoAndJoruba(_BasicScraper):
|
||||||
class NobodyScores(_BasicScraper):
|
class NobodyScores(_BasicScraper):
|
||||||
latestUrl = 'http://nobodyscores.loosenutstudio.com/'
|
latestUrl = 'http://nobodyscores.loosenutstudio.com/'
|
||||||
stripUrl = latestUrl + 'index.php?id=%s'
|
stripUrl = latestUrl + 'index.php?id=%s'
|
||||||
imageSearch = compile(r'><img src="(http://nobodyscores\.loosenutstudio\.com/comix/.+?)"')
|
imageSearch = compile(tagre("img", "src", r'(http://nobodyscores\.loosenutstudio\.com/comix/[^"]+)'))
|
||||||
|
multipleImagesPerStrip = True
|
||||||
prevSearch = compile(r'<a href="(http://nobodyscores\.loosenutstudio\.com/index.php.+?)">the one before </a>')
|
prevSearch = compile(r'<a href="(http://nobodyscores\.loosenutstudio\.com/index.php.+?)">the one before </a>')
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
|
@ -28,7 +28,7 @@ class OddFish(_BasicScraper):
|
||||||
|
|
||||||
class OnTheEdge(_BasicScraper):
|
class OnTheEdge(_BasicScraper):
|
||||||
latestUrl = 'http://ontheedgecomics.com/'
|
latestUrl = 'http://ontheedgecomics.com/'
|
||||||
stripUrl = 'http://ontheedgecomics.com/comic/ote%s'
|
stripUrl = 'http://ontheedgecomics.com/comic/%s'
|
||||||
imageSearch = compile(r'<img src="(http://ontheedgecomics.com/comics/.+?)"')
|
imageSearch = compile(r'<img src="(http://ontheedgecomics.com/comics/.+?)"')
|
||||||
prevSearch = compile(r'<a href="([^"]+)" rel="prev">')
|
prevSearch = compile(r'<a href="([^"]+)" rel="prev">')
|
||||||
help = 'Index format: nnn (unpadded)'
|
help = 'Index format: nnn (unpadded)'
|
||||||
|
|
|
@ -12,11 +12,10 @@ class PartiallyClips(_BasicScraper):
|
||||||
latestUrl = 'http://partiallyclips.com/'
|
latestUrl = 'http://partiallyclips.com/'
|
||||||
stripUrl = latestUrl + '%s/'
|
stripUrl = latestUrl + '%s/'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://partiallyclips\.com/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://partiallyclips\.com/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://partiallyclips\.com/[^"]+)', before="prev"))
|
prevSearch = compile(tagre("a", "href", r'(http://partiallyclips\.com/[^"]+)', after="prev"))
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class PastelDefender(_BasicScraper):
|
class PastelDefender(_BasicScraper):
|
||||||
latestUrl = 'http://www.pasteldefender.com/coverbackcover.html'
|
latestUrl = 'http://www.pasteldefender.com/coverbackcover.html'
|
||||||
stripUrl = 'http://www.pasteldefender.com/%s.html'
|
stripUrl = 'http://www.pasteldefender.com/%s.html'
|
||||||
|
@ -25,7 +24,6 @@ class PastelDefender(_BasicScraper):
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class PebbleVersion(_BasicScraper):
|
class PebbleVersion(_BasicScraper):
|
||||||
latestUrl = 'http://www.pebbleversion.com/'
|
latestUrl = 'http://www.pebbleversion.com/'
|
||||||
stripUrl = latestUrl + 'Archives/Strip%s.html'
|
stripUrl = latestUrl + 'Archives/Strip%s.html'
|
||||||
|
@ -37,7 +35,7 @@ class PebbleVersion(_BasicScraper):
|
||||||
class PennyAndAggie(_BasicScraper):
|
class PennyAndAggie(_BasicScraper):
|
||||||
baseUrl = 'http://www.pennyandaggie.com/'
|
baseUrl = 'http://www.pennyandaggie.com/'
|
||||||
stripUrl = baseUrl + 'index.php?p=%s'
|
stripUrl = baseUrl + 'index.php?p=%s'
|
||||||
imageSearch = compile(tagre("a", "href", r'(http://www\.pennyandaggie\.com/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://www\.pennyandaggie\.com/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r"(index\.php\?p=\d+)", quote="'") +
|
prevSearch = compile(tagre("a", "href", r"(index\.php\?p=\d+)", quote="'") +
|
||||||
tagre("img", "src", r'http://pennyandaggie\.com/images/previous_day\.gif', quote=""))
|
tagre("img", "src", r'http://pennyandaggie\.com/images/previous_day\.gif', quote=""))
|
||||||
starter = indirectStarter(baseUrl, prevSearch)
|
starter = indirectStarter(baseUrl, prevSearch)
|
||||||
|
@ -47,20 +45,19 @@ class PennyAndAggie(_BasicScraper):
|
||||||
class PennyArcade(_BasicScraper):
|
class PennyArcade(_BasicScraper):
|
||||||
baseUrl = 'http://penny-arcade.com/comic/'
|
baseUrl = 'http://penny-arcade.com/comic/'
|
||||||
starter = bounceStarter(baseUrl,
|
starter = bounceStarter(baseUrl,
|
||||||
compile(tagre("a", "href", r'(http://penny-arcade\.com/comic/[^"]+)', before="bntNext"))
|
compile(tagre("a", "href", r'(http://penny-arcade\.com/comic/[^"]+)', before="btnNext"))
|
||||||
)
|
)
|
||||||
stripUrl = baseUrl + '%s/'
|
stripUrl = baseUrl + '%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://art\.penny-arcade\.com/photos/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://art\.penny-arcade\.com/photos/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://penny-arcade\.com/comic/[^"]+)', before="bntPrev"))
|
prevSearch = compile(tagre("a", "href", r'(http://penny-arcade\.com/comic/[^"]+)', before="btnPrev"))
|
||||||
help = 'Index format: yyyy/mm/dd'
|
help = 'Index format: yyyy/mm/dd'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def namer(cls, imageUrl, pageUrl):
|
def namer(cls, imageUrl, pageUrl):
|
||||||
yyyy, mm, dd = pageUrl.split('/')[-4:-1]
|
dummy, yyyy, mm, dd = pageUrl.rsplit('/', 3)
|
||||||
return '%04d%02d%02d' % (int(yyyy), int(mm), int(dd))
|
return '%04d%02d%02d' % (int(yyyy), int(mm), int(dd))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class PeppermintSaga(_BasicScraper):
|
class PeppermintSaga(_BasicScraper):
|
||||||
latestUrl = 'http://www.pepsaga.com/'
|
latestUrl = 'http://www.pepsaga.com/'
|
||||||
stripUrl = latestUrl + '?p=%s'
|
stripUrl = latestUrl + '?p=%s'
|
||||||
|
@ -101,7 +98,7 @@ class Precocious(_BasicScraper):
|
||||||
class PvPonline(_BasicScraper):
|
class PvPonline(_BasicScraper):
|
||||||
latestUrl = 'http://pvponline.com/comic'
|
latestUrl = 'http://pvponline.com/comic'
|
||||||
stripUrl = latestUrl + '%s'
|
stripUrl = latestUrl + '%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://newcdn\.pvponline\.com/img/comic/pvp\d+\.jpg)'))
|
imageSearch = compile(tagre("img", "src", r'(http://newcdn\.pvponline\.com/img/comic/pvp[^"]+\.jpg)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://pvponline\.com/comic/[^"]+)', after="Previous"))
|
prevSearch = compile(tagre("a", "href", r'(http://pvponline\.com/comic/[^"]+)', after="Previous"))
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
||||||
|
@ -135,7 +132,7 @@ evilish = pensAndTales('Evilish', 'http://evilish.pensandtales.com/')
|
||||||
class ProperBarn(_BasicScraper):
|
class ProperBarn(_BasicScraper):
|
||||||
latestUrl = 'http://www.nitrocosm.com/go/gag/'
|
latestUrl = 'http://www.nitrocosm.com/go/gag/'
|
||||||
stripUrl = latestUrl + '%s/'
|
stripUrl = latestUrl + '%s/'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://content\.nitrocosm\.com/gag/\d+.png)'))
|
imageSearch = compile(tagre("img", "src", r'(http://content\.nitrocosm\.com/gag/\d+\.[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://www\.nitrocosm\.com/go/gag/\d+/)', after="nav_btn_previous"))
|
prevSearch = compile(tagre("a", "href", r'(http://www\.nitrocosm\.com/go/gag/\d+/)', after="nav_btn_previous"))
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,7 @@ class RadioactivePanda(_BasicScraper):
|
||||||
# XXX add other comics at http://petitesymphony.com/comics/
|
# XXX add other comics at http://petitesymphony.com/comics/
|
||||||
class Rascals(_BasicScraper):
|
class Rascals(_BasicScraper):
|
||||||
latestUrl = 'http://rascals.petitesymphony.com/'
|
latestUrl = 'http://rascals.petitesymphony.com/'
|
||||||
stripUrl = latestUrl + '/comic/rascals-pg-%s/'
|
stripUrl = latestUrl + 'comic/rascals-pg-%s/'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://rascals\.petitesymphony\.com/files/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://rascals\.petitesymphony\.com/files/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://rascals\.petitesymphony\.com/comic/[^"]+)', after="Previous"))
|
prevSearch = compile(tagre("a", "href", r'(http://rascals\.petitesymphony\.com/comic/[^"]+)', after="Previous"))
|
||||||
help = 'Index format: num'
|
help = 'Index format: num'
|
||||||
|
@ -36,7 +36,7 @@ class RealLife(_BasicScraper):
|
||||||
class RedString(_BasicScraper):
|
class RedString(_BasicScraper):
|
||||||
latestUrl = 'http://www.redstring.strawberrycomics.com/'
|
latestUrl = 'http://www.redstring.strawberrycomics.com/'
|
||||||
stripUrl = latestUrl + 'index.php?id=%s'
|
stripUrl = latestUrl + 'index.php?id=%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'("comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/index\.php\?id=\d+)', after="prev"))
|
prevSearch = compile(tagre("a", "href", r'(/index\.php\?id=\d+)', after="prev"))
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ from ..util import tagre
|
||||||
|
|
||||||
|
|
||||||
class SailorsunOrg(_BasicScraper):
|
class SailorsunOrg(_BasicScraper):
|
||||||
latestUrl = 'http://www.sailorsun.org/'
|
latestUrl = 'http://sailorsun.org/'
|
||||||
stripUrl = latestUrl + '?p=%s'
|
stripUrl = latestUrl + '?p=%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://sailorsun\.org/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://sailorsun\.org/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://sailorsun\.org/\?p=\d+)', after="prev"))
|
prevSearch = compile(tagre("a", "href", r'(http://sailorsun\.org/\?p=\d+)', after="prev"))
|
||||||
|
@ -28,7 +28,7 @@ class SamAndFuzzy(_BasicScraper):
|
||||||
class SarahZero(_BasicScraper):
|
class SarahZero(_BasicScraper):
|
||||||
latestUrl = 'http://www.sarahzero.com/'
|
latestUrl = 'http://www.sarahzero.com/'
|
||||||
stripUrl = latestUrl + 'sz_%s.html'
|
stripUrl = latestUrl + 'sz_%s.html'
|
||||||
imageSearch = compile(tagre("img", "src", r'(z_spreads/sz_[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(z_(?:spreads|decoy)/sz_[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(sz_\d+\.html)') + tagre("img", "src", r'z_site/sz_05_nav\.gif'))
|
prevSearch = compile(tagre("a", "href", r'(sz_\d+\.html)') + tagre("img", "src", r'z_site/sz_05_nav\.gif'))
|
||||||
help = 'Index format: nnnn'
|
help = 'Index format: nnnn'
|
||||||
|
|
||||||
|
@ -45,7 +45,8 @@ class SchlockMercenary(_BasicScraper):
|
||||||
latestUrl = 'http://www.schlockmercenary.com/'
|
latestUrl = 'http://www.schlockmercenary.com/'
|
||||||
stripUrl = latestUrl + '%s'
|
stripUrl = latestUrl + '%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://static\.schlockmercenary\.com/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://static\.schlockmercenary\.com/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(/d+)', after="nav-previous"))
|
multipleImagesPerStrip = True
|
||||||
|
prevSearch = compile(tagre("a", "href", r'(/\d+-\d+-\d+)', quote="'", after="nav-previous"))
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
@ -102,7 +103,7 @@ class SluggyFreelance(_BasicScraper):
|
||||||
class SodiumEyes(_BasicScraper):
|
class SodiumEyes(_BasicScraper):
|
||||||
latestUrl = 'http://sodiumeyes.com/'
|
latestUrl = 'http://sodiumeyes.com/'
|
||||||
stripUrl = latestUrl + '%s/'
|
stripUrl = latestUrl + '%s/'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://sodiumeyes\.com/comic/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(http://sodiumeyes\.com/comic/[^ ]+)', quote=""))
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://sodiumeyes\.com/[^"]+)', after="prev"))
|
prevSearch = compile(tagre("a", "href", r'(http://sodiumeyes\.com/[^"]+)', after="prev"))
|
||||||
help = 'Index format: yyyy/mm/dd/stripname'
|
help = 'Index format: yyyy/mm/dd/stripname'
|
||||||
|
|
||||||
|
@ -110,9 +111,9 @@ class SodiumEyes(_BasicScraper):
|
||||||
class SpareParts(_BasicScraper):
|
class SpareParts(_BasicScraper):
|
||||||
baseUrl = 'http://www.sparepartscomics.com/'
|
baseUrl = 'http://www.sparepartscomics.com/'
|
||||||
latestUrl = baseUrl + 'comics/?date=20080328'
|
latestUrl = baseUrl + 'comics/?date=20080328'
|
||||||
stripUrl = baseUrl + 'comics/?date=s%'
|
stripUrl = baseUrl + 'comics/index.php?date=%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'http://www\.sparepartscomics\.com/comics/[^"]+'))
|
imageSearch = compile(tagre("img", "src", r'(http://www\.sparepartscomics\.com/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)') + "Previous Comic")
|
prevSearch = compile(tagre("a", "href", r'(index\.php\?date=\d+)', quote="'") + "Previous Comic")
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
@ -127,7 +128,7 @@ class Stubble(_BasicScraper):
|
||||||
class StrawberryDeathCake(_BasicScraper):
|
class StrawberryDeathCake(_BasicScraper):
|
||||||
latestUrl = 'http://strawberrydeathcake.com/'
|
latestUrl = 'http://strawberrydeathcake.com/'
|
||||||
stripUrl = latestUrl + 'archive/%s/'
|
stripUrl = latestUrl + 'archive/%s/'
|
||||||
imageSearch = compile(tagre("img", "src", r'http://strawberrydeathcake\.com/wp-content/webcomic/[^"]+'))
|
imageSearch = compile(tagre("img", "src", r'(http://strawberrydeathcake\.com/wp-content/webcomic/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://strawberrydeathcake\.com/archive/[^"]+)', after="previous"))
|
prevSearch = compile(tagre("a", "href", r'(http://strawberrydeathcake\.com/archive/[^"]+)', after="previous"))
|
||||||
help = 'Index format: stripname'
|
help = 'Index format: stripname'
|
||||||
|
|
||||||
|
@ -144,7 +145,8 @@ class SomethingPositive(_BasicScraper):
|
||||||
latestUrl = 'http://www.somethingpositive.net/'
|
latestUrl = 'http://www.somethingpositive.net/'
|
||||||
stripUrl = latestUrl + 'sp%s.shtml'
|
stripUrl = latestUrl + 'sp%s.shtml'
|
||||||
imageSearch = compile(tagre("img", "src", r'(sp\d+\.png)'))
|
imageSearch = compile(tagre("img", "src", r'(sp\d+\.png)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(sp\d+\.shtml)') + "Previous")
|
prevSearch = compile(tagre("a", "href", r'(sp\d+\.shtml)') +
|
||||||
|
"(?:" + tagre("img", "src", r'images/previous\.gif') + "|Previous)")
|
||||||
help = 'Index format: mmddyyyy'
|
help = 'Index format: mmddyyyy'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -152,7 +154,6 @@ class SomethingPositive(_BasicScraper):
|
||||||
return pageUrl.split('/')[-1].split('.')[0]
|
return pageUrl.split('/')[-1].split('.')[0]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class SexyLosers(_BasicScraper):
|
class SexyLosers(_BasicScraper):
|
||||||
stripUrl = 'http://www.sexylosers.com/%s.html'
|
stripUrl = 'http://www.sexylosers.com/%s.html'
|
||||||
imageSearch = compile(r'<img src\s*=\s*"\s*(comics/[\w\.]+?)"', IGNORECASE)
|
imageSearch = compile(r'<img src\s*=\s*"\s*(comics/[\w\.]+?)"', IGNORECASE)
|
||||||
|
@ -172,7 +173,7 @@ class SexyLosers(_BasicScraper):
|
||||||
class StarCrossdDestiny(_BasicScraper):
|
class StarCrossdDestiny(_BasicScraper):
|
||||||
latestUrl = 'http://www.starcrossd.net/comic.html'
|
latestUrl = 'http://www.starcrossd.net/comic.html'
|
||||||
stripUrl = 'http://www.starcrossd.net/archives/%s.html'
|
stripUrl = 'http://www.starcrossd.net/archives/%s.html'
|
||||||
imageSearch = compile(r'<img src="(http://www\.starcrossd\.net/(?:ch1|strips|book2)/[^"]+)">')
|
imageSearch = compile(tagre("img", "src", r'(http://www\.starcrossd\.net/(?:ch1|strips|book2)/[^"]+)'))
|
||||||
prevSearch = compile(r'<a href="(http://www\.starcrossd\.net/(?:ch1/)?archives/\d+\.html)"[^>]*"[^"]*"[^>]*>prev', IGNORECASE)
|
prevSearch = compile(r'<a href="(http://www\.starcrossd\.net/(?:ch1/)?archives/\d+\.html)"[^>]*"[^"]*"[^>]*>prev', IGNORECASE)
|
||||||
help = 'Index format: nnnnnnnn'
|
help = 'Index format: nnnnnnnn'
|
||||||
|
|
||||||
|
@ -212,11 +213,3 @@ class SMBC(_BasicScraper):
|
||||||
prevSearch = compile(r'131,13,216,84"\n\s+href="(.+?)#comic"\n>', MULTILINE)
|
prevSearch = compile(r'131,13,216,84"\n\s+href="(.+?)#comic"\n>', MULTILINE)
|
||||||
help = 'Index format: nnnn'
|
help = 'Index format: nnnn'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class SomethingLikeLife(_BasicScraper):
|
|
||||||
latestUrl = 'http://www.pulledpunches.com/'
|
|
||||||
stripUrl = latestUrl + '?p=%s'
|
|
||||||
imageSearch = compile(r'<img src="(http://www.pulledpunches.com/comics/[^"]*)"')
|
|
||||||
prevSearch = compile(r'</a> <a href="(http://www.pulledpunches.com/\?p=[^"]*)"><img src="back1.gif"')
|
|
||||||
help = 'Index format: nn'
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ from ..util import tagre
|
||||||
|
|
||||||
class TheNoob(_BasicScraper):
|
class TheNoob(_BasicScraper):
|
||||||
latestUrl = 'http://www.thenoobcomic.com/index.php'
|
latestUrl = 'http://www.thenoobcomic.com/index.php'
|
||||||
stripUrl = latestUrl + '?pos=%'
|
stripUrl = latestUrl + '?pos=%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/headquarters/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(/headquarters/comics/[^"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'(\?pos=\d+)', before="comic_nav_previous_button"))
|
prevSearch = compile(tagre("a", "href", r'(\?pos=\d+)', before="comic_nav_previous_button"))
|
||||||
help = 'Index format: nnnn'
|
help = 'Index format: nnnn'
|
||||||
|
@ -19,7 +19,7 @@ class TheNoob(_BasicScraper):
|
||||||
|
|
||||||
class TheOrderOfTheStick(_BasicScraper):
|
class TheOrderOfTheStick(_BasicScraper):
|
||||||
latestUrl = 'http://www.giantitp.com/comics/oots0863.html'
|
latestUrl = 'http://www.giantitp.com/comics/oots0863.html'
|
||||||
stripUrl = latestUrl + 'comics/oots%s.html'
|
stripUrl = 'http://www.giantitp.com/comics/oots%s.html'
|
||||||
imageSearch = compile(r'<IMG src="(/comics/images/.+?)">')
|
imageSearch = compile(r'<IMG src="(/comics/images/.+?)">')
|
||||||
prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"')
|
prevSearch = compile(r'<A href="(/comics/oots\d{4}\.html)"><IMG src="/Images/redesign/ComicNav_Back.gif"')
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
@ -31,7 +31,8 @@ class TheParkingLotIsFull(_BasicScraper):
|
||||||
latestUrl = 'http://plif.courageunfettered.com/archive/arch2002.htm'
|
latestUrl = 'http://plif.courageunfettered.com/archive/arch2002.htm'
|
||||||
stripUrl = 'http://plif.courageunfettered.com/archive/arch%s.htm'
|
stripUrl = 'http://plif.courageunfettered.com/archive/arch%s.htm'
|
||||||
imageSearch = compile(r'<td align="center"><A TARGET=_parent HREF="(wc\d+\..+?)">')
|
imageSearch = compile(r'<td align="center"><A TARGET=_parent HREF="(wc\d+\..+?)">')
|
||||||
prevSearch = compile(r'-\s*\n\s*<A HREF="(arch\d{4}\.htm)">\d{4}</A>')
|
multipleImagesPerStrip = True
|
||||||
|
prevSearch = compile(r'\d{4} -\s+<A HREF="(arch\d{4}\.htm)">\d{4}')
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
|
|
||||||
|
|
||||||
|
@ -40,7 +41,7 @@ class TheWotch(_BasicScraper):
|
||||||
latestUrl = 'http://www.thewotch.com/'
|
latestUrl = 'http://www.thewotch.com/'
|
||||||
stripUrl = latestUrl + '?date=%s'
|
stripUrl = latestUrl + '?date=%s'
|
||||||
imageSearch = compile(r"<img.+?src='(comics/.+?)'")
|
imageSearch = compile(r"<img.+?src='(comics/.+?)'")
|
||||||
prevSearch = compile(r"<link rel='Previous' href='(\?date=\d+-\d+-\d+)'")
|
prevSearch = compile(r"<link rel='Previous' href='(/\?date=\d+-\d+-\d+)'")
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -6,12 +6,12 @@ from re import compile
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
from ..scraper import _BasicScraper
|
||||||
from ..helpers import bounceStarter, indirectStarter
|
from ..helpers import bounceStarter, indirectStarter
|
||||||
from ..util import getQueryParams
|
from ..util import getQueryParams, tagre
|
||||||
|
|
||||||
|
|
||||||
class Undertow(_BasicScraper):
|
class Undertow(_BasicScraper):
|
||||||
stripUrl = 'http://undertow.dreamshards.org/%s'
|
stripUrl = 'http://undertow.dreamshards.org/%s'
|
||||||
imageSearch = compile(r'<img src="(.+?)"')
|
imageSearch = compile(tagre("img", "src", r'([^"]+\.jpg)'))
|
||||||
prevSearch = compile(r'href="(.+?)".+?teynpoint')
|
prevSearch = compile(r'href="(.+?)".+?teynpoint')
|
||||||
help = 'Index format: good luck !'
|
help = 'Index format: good luck !'
|
||||||
starter = indirectStarter('http://undertow.dreamshards.org/',
|
starter = indirectStarter('http://undertow.dreamshards.org/',
|
||||||
|
|
|
@ -36,6 +36,7 @@ class WhyTheLongFace(_BasicScraper):
|
||||||
latestUrl = 'http://www.absurdnotions.org/wtlf200709.html'
|
latestUrl = 'http://www.absurdnotions.org/wtlf200709.html'
|
||||||
stripUrl = 'http://www.absurdnotions.org/wtlf%s.html'
|
stripUrl = 'http://www.absurdnotions.org/wtlf%s.html'
|
||||||
imageSearch = compile(r'<img src="(http://www.absurdnotions.org/wtlf.+?|lf\d+.\w{1,4})"', IGNORECASE)
|
imageSearch = compile(r'<img src="(http://www.absurdnotions.org/wtlf.+?|lf\d+.\w{1,4})"', IGNORECASE)
|
||||||
|
multipleImagesPerStrip = True
|
||||||
prevSearch = compile(r'HREF="(.+?)"><IMG SRC="nprev.gif" ')
|
prevSearch = compile(r'HREF="(.+?)"><IMG SRC="nprev.gif" ')
|
||||||
help = 'Index format: yyyymm'
|
help = 'Index format: yyyymm'
|
||||||
|
|
||||||
|
@ -66,7 +67,7 @@ class WorldOfWarcraftEh(_BasicScraper):
|
||||||
class Wulffmorgenthaler(_BasicScraper):
|
class Wulffmorgenthaler(_BasicScraper):
|
||||||
latestUrl = 'http://wumocomicstrip.com/'
|
latestUrl = 'http://wumocomicstrip.com/'
|
||||||
stripUrl = latestUrl + '%s/'
|
stripUrl = latestUrl + '%s/'
|
||||||
imageSearch = compile(tagre("img", "src", r'(/img/strip/thumb/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r'(/img/strip/[^/"]+)'))
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<span>Previous")
|
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<span>Previous")
|
||||||
help = 'Index format: yyyy/mm/dd'
|
help = 'Index format: yyyy/mm/dd'
|
||||||
|
|
||||||
|
|
|
@ -2,14 +2,15 @@
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012 Bastian Kleineidam
|
# Copyright (C) 2012 Bastian Kleineidam
|
||||||
|
|
||||||
from re import compile, IGNORECASE
|
from re import compile
|
||||||
|
from ..util import tagre
|
||||||
from ..scraper import make_scraper
|
from ..scraper import make_scraper
|
||||||
from ..helpers import bounceStarter
|
from ..helpers import bounceStarter
|
||||||
|
|
||||||
|
|
||||||
_imageSearch = compile(r'SRC="(http://www\.wlpcomics\.com/adult/.+?|http://www\.wlpcomics\.com/general/.+?)"', IGNORECASE)
|
_imageSearch = compile(tagre("img", "src", r'(http://www\.wlpcomics\.com/(?:adult|general)/[^"]+)'))
|
||||||
_prevSearch = compile(r'</a> <A HREF="(\w+.html)">Previous Page</a>', IGNORECASE)
|
_prevSearch = compile(tagre("a", "href", r'(\w+.html)') + 'Previous')
|
||||||
_nextSearch = compile(r'</a> <A HREF="(\w+.html)">Next Page</a>', IGNORECASE)
|
_nextSearch = compile(tagre("a", "href", r'(\w+.html)') + 'Next')
|
||||||
|
|
||||||
|
|
||||||
def add(name, path):
|
def add(name, path):
|
||||||
|
@ -35,4 +36,3 @@ add('ChichiChan', 'adult/chichi/')
|
||||||
add('ChocolateMilkMaid', 'adult/cm/')
|
add('ChocolateMilkMaid', 'adult/cm/')
|
||||||
add('MaidAttack', 'general/maidattack/')
|
add('MaidAttack', 'general/maidattack/')
|
||||||
add('ShadowChasers', 'general/shadowchasers/')
|
add('ShadowChasers', 'general/shadowchasers/')
|
||||||
add('Stellar', 'adult/stellar/')
|
|
||||||
|
|
|
@ -19,8 +19,8 @@ class YAFGC(_BasicScraper):
|
||||||
class YouSayItFirst(_BasicScraper):
|
class YouSayItFirst(_BasicScraper):
|
||||||
latestUrl = 'http://www.yousayitfirst.com/'
|
latestUrl = 'http://www.yousayitfirst.com/'
|
||||||
stripUrl = latestUrl + 'comics/index.php?date=%s'
|
stripUrl = latestUrl + 'comics/index.php?date=%s'
|
||||||
imageSearch = compile(tagre("img", "src", r'(http://www\.yousayitfirst\.com/comics/[^"]+)'))
|
imageSearch = compile(tagre("img", "src", r"(http://www\.yousayitfirst\.com/comics/[^>']+)", quote="'?"))
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://www\.yousayitfirst\.com/comics/index\.php\?date=\d+)') + "Previous")
|
prevSearch = compile(tagre("a", "href", r'(http://www\.yousayitfirst\.com/comics/index\.php\?date=\d+)', quote="'") + "Previous")
|
||||||
help = 'Index format: yyyymmdd'
|
help = 'Index format: yyyymmdd'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,11 @@ class Zapiro(_BasicScraper):
|
||||||
prevSearch = compile(tagre("a", "href", r'(http://mg\.co\.za/cartoon/[^"]+)')+"Older")
|
prevSearch = compile(tagre("a", "href", r'(http://mg\.co\.za/cartoon/[^"]+)')+"Older")
|
||||||
help = 'Index format: yyyy-mm-dd-stripname'
|
help = 'Index format: yyyy-mm-dd-stripname'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def namer(cls, imageUrl, pageUrl):
|
||||||
|
name = imageUrl.split('/')[-3]
|
||||||
|
return name
|
||||||
|
|
||||||
|
|
||||||
class ZombieHunters(_BasicScraper):
|
class ZombieHunters(_BasicScraper):
|
||||||
latestUrl = 'http://www.thezombiehunters.com/'
|
latestUrl = 'http://www.thezombiehunters.com/'
|
||||||
|
|
|
@ -78,6 +78,7 @@ class _BasicScraper(object):
|
||||||
while url:
|
while url:
|
||||||
imageUrls, prevUrl = fetchUrls(url, self.imageSearch, self.prevSearch)
|
imageUrls, prevUrl = fetchUrls(url, self.imageSearch, self.prevSearch)
|
||||||
prevUrl = self.prevUrlModifier(prevUrl)
|
prevUrl = self.prevUrlModifier(prevUrl)
|
||||||
|
out.write("Matched previous URL %s" % prevUrl, 2)
|
||||||
seen_urls.add(url)
|
seen_urls.add(url)
|
||||||
yield self.getComicStrip(url, imageUrls)
|
yield self.getComicStrip(url, imageUrls)
|
||||||
# avoid recursive URL loops
|
# avoid recursive URL loops
|
||||||
|
|
|
@ -163,7 +163,7 @@ def normaliseURL(url):
|
||||||
|
|
||||||
pu = list(urlparse.urlparse(url))
|
pu = list(urlparse.urlparse(url))
|
||||||
segments = pu[2].split('/')
|
segments = pu[2].split('/')
|
||||||
while segments and segments[0] == '':
|
while segments and segments[0] in ('', '..'):
|
||||||
del segments[0]
|
del segments[0]
|
||||||
pu[2] = '/' + '/'.join(segments).replace(' ', '%20')
|
pu[2] = '/' + '/'.join(segments).replace(' ', '%20')
|
||||||
# remove leading '&' from query
|
# remove leading '&' from query
|
||||||
|
|
|
@ -16,14 +16,137 @@ json_file = __file__.replace(".py", ".json")
|
||||||
|
|
||||||
# names of comics to exclude
|
# names of comics to exclude
|
||||||
exclude_comics = [
|
exclude_comics = [
|
||||||
"Twonks_and_Plonkers", # broken images, no real content
|
|
||||||
"U_Chuu_No_Hoshi_Hotoshi_Tsuko", # broken images
|
|
||||||
"Red_Dog_Venue", # start page is broken
|
|
||||||
"Monster_Lover", # start page is broken
|
"Monster_Lover", # start page is broken
|
||||||
"Legacy_of_Blaze", # broken images
|
"Legacy_of_Blaze", # broken images
|
||||||
"Dead_Strangers", # broken images
|
"Dead_Strangers", # broken images
|
||||||
"Crack", # broken images
|
"Crack", # broken images
|
||||||
"Iron_Wolf", # broken images
|
"Iron_Wolf", # broken images
|
||||||
|
"A_Call_to_Destiny__NC_17", # start page requires login
|
||||||
|
"A_Call_to_Destiny_Reloaded", # start page requires login
|
||||||
|
"A_Day_in_the_Life_for_Erik", # broken images
|
||||||
|
"A_Fairly_Twisted_Reality", # start page requires login
|
||||||
|
"Al_and_Scout", # broken images
|
||||||
|
"ANGELOU_____Las_aventuras_de_Nikole", # broken images
|
||||||
|
"Apartment_408_Full_Size", # broken images
|
||||||
|
"Apple_Valley", # broken images
|
||||||
|
"Apt_408_Minis", # broken images
|
||||||
|
"atxs", # broken images
|
||||||
|
"A_Word_Of_Wisdom", # broken images
|
||||||
|
"Brathalla", # broken images
|
||||||
|
"Binary_Souls_Other_Dimensions", # broken images
|
||||||
|
"BK_Shattered_Hate", # broken images
|
||||||
|
"Chomp", # broken images
|
||||||
|
"Chu_and_Kenny", # broken images
|
||||||
|
"Coga_Suro_2", # broken images
|
||||||
|
"Creepy_Girl_and_Her_Zombie_Dog", # broken images
|
||||||
|
"CuoreVoodoo", # broken images
|
||||||
|
"dairyaire", # broken images
|
||||||
|
"DIS", # broken images
|
||||||
|
"Dot_TXT", # broken images
|
||||||
|
"Dreadnought_Invasion_Six", # broken images
|
||||||
|
"Emerald_Winter", # broken images
|
||||||
|
"Enter_the_Duck_2", # broken images
|
||||||
|
"ffff", # broken images
|
||||||
|
"Function_Over_Fashion", # broken images
|
||||||
|
"Funday_Morning", # broken images
|
||||||
|
"greys_journey", # broken images
|
||||||
|
"Head_over_Heart", # broken images
|
||||||
|
"Hurrocks_Fardel", # broken images
|
||||||
|
"Bhaddland", # start page requires login
|
||||||
|
"Bouncing_Orbs_of_Beauty", # start page requires login
|
||||||
|
"Busty_Solar", # start page requires login
|
||||||
|
"Illusional_Beauty", # broken images
|
||||||
|
"Indigo_Bunting__Vampire", # start page requires login
|
||||||
|
"Irrumator", # start page requires login
|
||||||
|
"Its_A_Boy_Thing", # start page requires login
|
||||||
|
"Kokuahiru_comics", # start page requires login
|
||||||
|
"Inside_OuT", # broken images
|
||||||
|
"Journey_to_Raifina", # broken images
|
||||||
|
"KALA_dan", # broken images
|
||||||
|
"Live_to_tell", # start page requires login
|
||||||
|
"Locoma", # broken images
|
||||||
|
"London_Underworld", # broken images
|
||||||
|
"Louder_Than_Bombs", # broken images
|
||||||
|
"Lucky_Dawg", # broken images
|
||||||
|
"Mario_in_Johto", # broken images
|
||||||
|
"Master", # start page requires login
|
||||||
|
"Mastermind_BTRN", # broken images
|
||||||
|
"MAYA_____The_legend_of_Wolf", # broken images
|
||||||
|
"Megaman_Zero", # broken images
|
||||||
|
"Monster_Lover_Destinys_Path", # start page requires login
|
||||||
|
"M_Organ_Art", # start page requires login
|
||||||
|
"Morning_Squirtz", # start page requires login
|
||||||
|
"MOSAIC", # broken images
|
||||||
|
"My_Angel_and_My_Devil", # broken images
|
||||||
|
"Nemution_Jewel", # start page requires login
|
||||||
|
"Nemution_Redux", # start page requires login
|
||||||
|
"New_Pages", # broken images
|
||||||
|
"Ninja_Shizatch", # broken images
|
||||||
|
"Normalcy_is_for_Wimps", # broken images
|
||||||
|
"MIKYAGU", # broken images
|
||||||
|
"One_Third_Of_Your_Life_Is_Spent_Sleeping_One_Third_Of_Your_Life_Is_Spent_Working_And_Half_Of_One_Third_Is_Spent_Waiting_The_Question_Is_It_Really_Your_Life", # broken images
|
||||||
|
"OTENBA_Files", # start page requires login
|
||||||
|
"Panacea", # start page requires login
|
||||||
|
"Parker_Lot", # broken images
|
||||||
|
"Peter_And_The_Wolf", # start page requires login
|
||||||
|
"Perspectives", # broken images
|
||||||
|
"Pokemon_Sinnoh_Surfer", # broken images
|
||||||
|
"Pokemon_World_Trainers", # broken images
|
||||||
|
"Potpourri_of_Lascivious_Whimsy", # start page requires login
|
||||||
|
"Pr0nCrest", # start page requires login
|
||||||
|
"punished_girls", # start page requires login
|
||||||
|
"Powerjeff", # broken images
|
||||||
|
"Comicarotica", # start page requires login
|
||||||
|
"Dark_Sisters", # start page requires login
|
||||||
|
"Death_P0rn", # start page requires login
|
||||||
|
"Dreams_in_Synergy", # broken images
|
||||||
|
"GNight_Shade", # start page requires login
|
||||||
|
"GRIND", # start page requires login
|
||||||
|
"HUSS", # start page requires login
|
||||||
|
"Red_Dog_Venue", # start page is broken
|
||||||
|
"rubber_girls", # start page requires login
|
||||||
|
"Robomeks", # broken images
|
||||||
|
"Robot_Friday", # broken images
|
||||||
|
"SFA", # start page requires login
|
||||||
|
"Shadow_Root", # start page requires login
|
||||||
|
"Shiro_Karasu", # start page requires login
|
||||||
|
"Shelter_of_Wings", # broken images
|
||||||
|
"Some_Notes", # broken images
|
||||||
|
"Sonic_Advanced_Online", # broken images
|
||||||
|
"Sonic_and_tails_corner", # broken images
|
||||||
|
"Sonic_Unreal", # broken images
|
||||||
|
"Tales_of_Schlock", # start page requires login
|
||||||
|
"Splices_of_Life", # broken images
|
||||||
|
"STARSEARCHERS", # broken images
|
||||||
|
"Ted_The_Terrible_Superhero", # broken images
|
||||||
|
"Terra_online_comic", # broken images
|
||||||
|
"The_Auragon_Base", # broken images
|
||||||
|
"The_Bend", # broken images
|
||||||
|
"The_Chronicles_of_Drew", # broken images
|
||||||
|
"The_Devils_Horn", # broken images
|
||||||
|
"The_Dragon_and_the_Lemur", # start page requires login
|
||||||
|
"The_Fighting_Stranger", # broken images
|
||||||
|
"The_Mighty_Omega", # broken images
|
||||||
|
"The_Misadventures_of_Everyone", # start page requires login
|
||||||
|
"The_NEW_Life_Of_TimmY", # broken images
|
||||||
|
"The_SSA", # broken images
|
||||||
|
"Tony_The_Hedgehog", # broken images
|
||||||
|
"Trapped_in_a_Comic", # start page requires login
|
||||||
|
"Unsound_of_Mind", # broken images
|
||||||
|
"Vampire_Chronicles__Dark_Lust", # start page requires login
|
||||||
|
"WarMage", # start page requires login
|
||||||
|
"Watashi_No_Ame", # broken images
|
||||||
|
"Weave", # broken images
|
||||||
|
"Weirdlings", # template error
|
||||||
|
"Welcome_To_Border_City", # broken images
|
||||||
|
"what_comes_first", # start page requires login
|
||||||
|
"Within_Shadows", # broken images
|
||||||
|
"Xolta", # start page requires login
|
||||||
|
"XTIN__The_Dragons_Dream_World", # start page requires login
|
||||||
|
"X_UP", # start page requires login
|
||||||
|
"Zandars_Saga", # start page requires login
|
||||||
|
"Twonks_and_Plonkers", # broken images, no real content
|
||||||
|
"U_Chuu_No_Hoshi_Hotoshi_Tsuko", # broken images
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,22 @@ url_matcher = re.compile(tagre("a", "href", r'(/[^"]+)', after="alpha_list") + r
|
||||||
# names of comics to exclude
|
# names of comics to exclude
|
||||||
exclude_comics = [
|
exclude_comics = [
|
||||||
"FrikkFrakkAndFrank", # too few comics
|
"FrikkFrakkAndFrank", # too few comics
|
||||||
|
"Apocalypseharry", # too few comics
|
||||||
|
"BatkidandBatrat", # too few comics
|
||||||
|
"BETWEENTHELINES", # comic unavailable
|
||||||
|
"Bonner", # missing page
|
||||||
|
"Buster", # comic unavailabe
|
||||||
|
"DALTONDOG", # comic unavailable
|
||||||
|
"DellAndSteve", # too few comics
|
||||||
|
"Dilbert", # redirect
|
||||||
|
"InkeeDoodles", # comic unavailable
|
||||||
|
"MaggiesComics", # too few comics
|
||||||
|
"OfMiceandMud", # too few comics
|
||||||
|
"OysterWar", # too few comics
|
||||||
|
"PIGTIMES", # comic unavailable
|
||||||
|
"PS", # comic unavailable
|
||||||
|
"SherpaAid", # comic unavailable
|
||||||
|
"SparComics", # comic unavailable
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -21,27 +21,59 @@ num_matcher = re.compile(r'Number of Days: (\d+)')
|
||||||
|
|
||||||
# names of comics to exclude
|
# names of comics to exclude
|
||||||
exclude_comics = [
|
exclude_comics = [
|
||||||
|
"10", # page is gone
|
||||||
|
"54sinRed", # page is 403 forbidden
|
||||||
|
"6D4", # redirected to another page
|
||||||
|
"AaaSoCAwesomenessandaSliceofCheese", # broken images
|
||||||
|
"AcrossthePond", # page moved
|
||||||
|
"ACDeceptibotscomic", # no images
|
||||||
|
"AdamandSei", # page has 403 forbidden
|
||||||
|
"AdamsRoadGang", # page is gone
|
||||||
|
"ADVENTURERS", # page is gone
|
||||||
|
"AiYaiYai", # page moved
|
||||||
|
"AlltheCommies", # missing images
|
||||||
|
"AltaModaMetro", # page redirected
|
||||||
|
"AltarGirl", # page redirected
|
||||||
|
"Amerika", # no images
|
||||||
|
"Angels", # page has 403 forbidden
|
||||||
|
"AngryDMonkey", # page redirected
|
||||||
|
"Angst", # page redirected
|
||||||
|
"Animenifesto", # too few images
|
||||||
|
"Anna", # no images
|
||||||
|
"Arcana", # archive broken
|
||||||
|
"Area15", # no images
|
||||||
|
"BaidheTu", # no images
|
||||||
|
"BasilFlint", # page redirected
|
||||||
|
"beerkada", # no images
|
||||||
|
"BelovedLeader", # broken images
|
||||||
|
"BigMouthComics", # page does not follow standard layout
|
||||||
|
"", # page is gone
|
||||||
|
"", # page is gone
|
||||||
|
"", # page is gone
|
||||||
|
"BlueZombie", # broken page
|
||||||
|
"BoomerExpress", # redirection to another page
|
||||||
|
"DungeonDamage", # page does not follow standard layout
|
||||||
|
"EarthRiser", # redirects to a new page
|
||||||
|
"FaultyLogic", # page does not follow standard layout
|
||||||
|
"GoForIt", # page is gone
|
||||||
"JuvenileDiversion", # page moved
|
"JuvenileDiversion", # page moved
|
||||||
"JustWeird", # page has 403 forbidden
|
"JustWeird", # page has 403 forbidden
|
||||||
|
"Michikomonogatari", # page does not follow standard layout
|
||||||
"MobileMadness", # page does not follow standard layout
|
"MobileMadness", # page does not follow standard layout
|
||||||
"KnightsOfTheNexus", # page does not follow standard layout
|
"KnightsOfTheNexus", # page does not follow standard layout
|
||||||
"RogerAndDominic", # page does not follow standard layout
|
"RogerAndDominic", # page does not follow standard layout
|
||||||
"TheAvatar", # page does not follow standard layout
|
|
||||||
"Michikomonogatari", # page does not follow standard layout
|
|
||||||
"DungeonDamage", # page does not follow standard layout
|
|
||||||
"SaveMeGebus", # page does not follow standard layout
|
"SaveMeGebus", # page does not follow standard layout
|
||||||
"BlueZombie", # broken page
|
"TheAvatar", # page does not follow standard layout
|
||||||
"BoomerExpress", # redirection to another page
|
|
||||||
"FaultyLogic", # page does not follow standard layout
|
|
||||||
"EarthRiser", # redirects to a new page
|
|
||||||
"GoForIt", # page is gone
|
|
||||||
"ACDeceptibotscomic", # no images
|
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# links to last valid strips
|
||||||
url_overrides = {
|
url_overrides = {
|
||||||
# link to last valid strip
|
|
||||||
"BallofYarn": "http://ballofyarn.comicgenesis.com/d/20020624.html",
|
"BallofYarn": "http://ballofyarn.comicgenesis.com/d/20020624.html",
|
||||||
|
"AmazonSpaceRangers": "http://amazons.comicgenesis.com/d/20051015.html",
|
||||||
|
"ArroganceinSimplicity": "http://arrogance.comicgenesis.com/d/20030217.html",
|
||||||
|
"ATasteofEvil": "http://atasteofevil.comicgenesis.com/d/20050314.html",
|
||||||
|
"": "",
|
||||||
|
"": "",
|
||||||
}
|
}
|
||||||
|
|
||||||
def handle_url(url, res):
|
def handle_url(url, res):
|
||||||
|
|
|
@ -33,30 +33,31 @@ class _ComicTester(TestCase):
|
||||||
# at least 5 strips from the start, and find strip images
|
# at least 5 strips from the start, and find strip images
|
||||||
# on at least 4 pages.
|
# on at least 4 pages.
|
||||||
scraperobj = self.scraperclass()
|
scraperobj = self.scraperclass()
|
||||||
num = empty = 0
|
num = 0
|
||||||
max_strips = 5
|
max_strips = 5
|
||||||
for strip in islice(scraperobj.getAllStrips(), 0, max_strips):
|
for strip in islice(scraperobj.getAllStrips(), 0, max_strips):
|
||||||
images = 0
|
images = 0
|
||||||
for image in strip.getImages():
|
for image in strip.getImages():
|
||||||
images += 1
|
images += 1
|
||||||
self.save(image)
|
self.save(image)
|
||||||
if images == 0:
|
self.check(images > 0, 'failed to find images at %s' % strip.stripUrl)
|
||||||
empty += 1
|
if not self.scraperclass.multipleImagesPerStrip:
|
||||||
|
self.check(images == 1, 'found %d instead of 1 image at %s' % (images, strip.stripUrl))
|
||||||
if num > 0:
|
if num > 0:
|
||||||
self.check_stripurl(strip)
|
self.check_stripurl(strip)
|
||||||
num += 1
|
num += 1
|
||||||
if self.scraperclass.prevSearch:
|
if self.scraperclass.prevSearch:
|
||||||
self.check(num >= 4, 'traversal failed after %d strips, check the prevSearch pattern.' % num)
|
self.check(num >= 4, 'traversal failed after %d strips, check the prevSearch pattern at %s.' % (num, strip.stripUrl))
|
||||||
# check that at exactly or for multiple pages at least 5 images are saved
|
# Check that exactly or for multiple pages at least 5 images are saved.
|
||||||
|
# This is different than the image number check above since it checks saved files,
|
||||||
|
# ie. it detects duplicate filenames.
|
||||||
saved_images = self.get_saved_images()
|
saved_images = self.get_saved_images()
|
||||||
num_images = len(saved_images)
|
num_images = len(saved_images)
|
||||||
|
attrs = (num_images, saved_images, max_strips, self.tmpdir)
|
||||||
if self.scraperclass.multipleImagesPerStrip:
|
if self.scraperclass.multipleImagesPerStrip:
|
||||||
self.check(num_images >= max_strips,
|
self.check(num_images >= max_strips, 'saved %d %s instead of at least %d images in %s' % attrs)
|
||||||
'saved %d %s instead of at least %d images in %s' % (num_images, saved_images, max_strips, self.tmpdir))
|
|
||||||
else:
|
else:
|
||||||
self.check(num_images == max_strips,
|
self.check(num_images == max_strips, 'saved %d %s instead of %d images in %s' % attrs)
|
||||||
'saved %d %s instead of %d images in %s' % (num_images, saved_images, max_strips, self.tmpdir))
|
|
||||||
self.check(empty == 0, 'failed to find images on %d pages, check the imageSearch pattern.' % empty)
|
|
||||||
|
|
||||||
def check_stripurl(self, strip):
|
def check_stripurl(self, strip):
|
||||||
if not self.scraperclass.stripUrl:
|
if not self.scraperclass.stripUrl:
|
||||||
|
|
Loading…
Reference in a new issue