Add firstStripUrl to scrapers.

This commit is contained in:
Bastian Kleineidam 2013-02-13 19:59:59 +01:00
parent 312d117ff3
commit 23a1acd398

View file

@ -16,6 +16,9 @@ class _BasicScraper(object):
@type stripUrl: C{string} @type stripUrl: C{string}
@cvar stripUrl: A string that is interpolated with the strip index @cvar stripUrl: A string that is interpolated with the strip index
to yield the URL for a particular strip. to yield the URL for a particular strip.
@type firstStripUrl: C{string} optional
@cvar firstStripUrl: Stop searching for previous URLs at this URL.
If not set and no previous URL is found a warning is printed.
@type imageSearch: C{regex} @type imageSearch: C{regex}
@cvar imageSearch: A compiled regex that will locate the strip image URL @cvar imageSearch: A compiled regex that will locate the strip image URL
when applied to the strip page. when applied to the strip page.
@ -24,6 +27,9 @@ class _BasicScraper(object):
previous strip when applied to a strip page. previous strip when applied to a strip page.
''' '''
# stop at this URL
firstStripUrl = None
# if more than one image per URL is expected # if more than one image per URL is expected
multipleImagesPerStrip = False multipleImagesPerStrip = False
@ -101,6 +107,9 @@ class _BasicScraper(object):
data, baseUrl = getPageContent(url, self.session) data, baseUrl = getPageContent(url, self.session)
imageUrls = set(fetchUrls(url, data, baseUrl, self.imageSearch)) imageUrls = set(fetchUrls(url, data, baseUrl, self.imageSearch))
yield self.getComicStrip(url, imageUrls) yield self.getComicStrip(url, imageUrls)
if self.firstStripUrl == url:
out.debug("Stop at first URL %s" % url)
break
prevUrl = None prevUrl = None
if self.prevSearch: if self.prevSearch:
try: try: