Provide HTML page data for image URL modifier function.
This commit is contained in:
parent
e9e4b278fb
commit
0e5c59133c
2 changed files with 7 additions and 4 deletions
|
@ -271,7 +271,7 @@ class BrentalFlossFit(BrentalFloss):
|
||||||
return prevUrl.replace("\n", "")
|
return prevUrl.replace("\n", "")
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def imageUrlModifier(cls, url):
|
def imageUrlModifier(cls, url, data):
|
||||||
if url:
|
if url:
|
||||||
return url.replace("\n", "")
|
return url.replace("\n", "")
|
||||||
|
|
||||||
|
|
|
@ -111,7 +111,10 @@ class _BasicScraper(object):
|
||||||
def getComicStrip(self, url, data, baseUrl):
|
def getComicStrip(self, url, data, baseUrl):
|
||||||
"""Get comic strip downloader for given URL and data."""
|
"""Get comic strip downloader for given URL and data."""
|
||||||
imageUrls = fetchUrls(url, data, baseUrl, self.imageSearch)
|
imageUrls = fetchUrls(url, data, baseUrl, self.imageSearch)
|
||||||
imageUrls = set(map(self.imageUrlModifier, imageUrls))
|
# map modifier function on image URLs
|
||||||
|
imageUrls = [self.imageUrlModifier(x, data) for x in imageUrls]
|
||||||
|
# remove duplicate URLs
|
||||||
|
imageUrls = set(imageUrls)
|
||||||
if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
|
if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
|
||||||
patterns = [x.pattern for x in makeSequence(self.imageSearch)]
|
patterns = [x.pattern for x in makeSequence(self.imageSearch)]
|
||||||
out.warn(u"found %d images instead of 1 at %s with patterns %s" % (len(imageUrls), url, patterns))
|
out.warn(u"found %d images instead of 1 at %s with patterns %s" % (len(imageUrls), url, patterns))
|
||||||
|
@ -234,10 +237,10 @@ class _BasicScraper(object):
|
||||||
return prevUrl
|
return prevUrl
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def imageUrlModifier(cls, imageUrl):
|
def imageUrlModifier(cls, imageUrl, data):
|
||||||
"""Optional modification of parsed image URLs. Useful if the URL
|
"""Optional modification of parsed image URLs. Useful if the URL
|
||||||
needs to be fixed before usage. The default implementation does
|
needs to be fixed before usage. The default implementation does
|
||||||
not modify the URL.
|
not modify the URL. The given data is the URL page data.
|
||||||
"""
|
"""
|
||||||
return imageUrl
|
return imageUrl
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue