Add firstStripUrl to scrapers.
This commit is contained in:
parent
312d117ff3
commit
23a1acd398
1 changed files with 9 additions and 0 deletions
|
@ -16,6 +16,9 @@ class _BasicScraper(object):
|
||||||
@type stripUrl: C{string}
|
@type stripUrl: C{string}
|
||||||
@cvar stripUrl: A string that is interpolated with the strip index
|
@cvar stripUrl: A string that is interpolated with the strip index
|
||||||
to yield the URL for a particular strip.
|
to yield the URL for a particular strip.
|
||||||
|
@type firstStripUrl: C{string} optional
|
||||||
|
@cvar firstStripUrl: Stop searching for previous URLs at this URL.
|
||||||
|
If not set and no previous URL is found a warning is printed.
|
||||||
@type imageSearch: C{regex}
|
@type imageSearch: C{regex}
|
||||||
@cvar imageSearch: A compiled regex that will locate the strip image URL
|
@cvar imageSearch: A compiled regex that will locate the strip image URL
|
||||||
when applied to the strip page.
|
when applied to the strip page.
|
||||||
|
@ -24,6 +27,9 @@ class _BasicScraper(object):
|
||||||
previous strip when applied to a strip page.
|
previous strip when applied to a strip page.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
# stop at this URL
|
||||||
|
firstStripUrl = None
|
||||||
|
|
||||||
# if more than one image per URL is expected
|
# if more than one image per URL is expected
|
||||||
multipleImagesPerStrip = False
|
multipleImagesPerStrip = False
|
||||||
|
|
||||||
|
@ -101,6 +107,9 @@ class _BasicScraper(object):
|
||||||
data, baseUrl = getPageContent(url, self.session)
|
data, baseUrl = getPageContent(url, self.session)
|
||||||
imageUrls = set(fetchUrls(url, data, baseUrl, self.imageSearch))
|
imageUrls = set(fetchUrls(url, data, baseUrl, self.imageSearch))
|
||||||
yield self.getComicStrip(url, imageUrls)
|
yield self.getComicStrip(url, imageUrls)
|
||||||
|
if self.firstStripUrl == url:
|
||||||
|
out.debug("Stop at first URL %s" % url)
|
||||||
|
break
|
||||||
prevUrl = None
|
prevUrl = None
|
||||||
if self.prevSearch:
|
if self.prevSearch:
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Reference in a new issue