From 0e5c59133c223a512eca892b52332ed03b3cbda3 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Wed, 4 Dec 2013 17:54:55 +0100 Subject: [PATCH] Provide HTML page data for image URL modifier function. --- dosagelib/plugins/b.py | 2 +- dosagelib/scraper.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/dosagelib/plugins/b.py b/dosagelib/plugins/b.py index 0fd95a721..ddeb1f9cc 100644 --- a/dosagelib/plugins/b.py +++ b/dosagelib/plugins/b.py @@ -271,7 +271,7 @@ class BrentalFlossFit(BrentalFloss): return prevUrl.replace("\n", "") @classmethod - def imageUrlModifier(cls, url): + def imageUrlModifier(cls, url, data): if url: return url.replace("\n", "") diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index 234db1b8f..b9336f838 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -111,7 +111,10 @@ class _BasicScraper(object): def getComicStrip(self, url, data, baseUrl): """Get comic strip downloader for given URL and data.""" imageUrls = fetchUrls(url, data, baseUrl, self.imageSearch) - imageUrls = set(map(self.imageUrlModifier, imageUrls)) + # map modifier function on image URLs + imageUrls = [self.imageUrlModifier(x, data) for x in imageUrls] + # remove duplicate URLs + imageUrls = set(imageUrls) if len(imageUrls) > 1 and not self.multipleImagesPerStrip: patterns = [x.pattern for x in makeSequence(self.imageSearch)] out.warn(u"found %d images instead of 1 at %s with patterns %s" % (len(imageUrls), url, patterns)) @@ -234,10 +237,10 @@ class _BasicScraper(object): return prevUrl @classmethod - def imageUrlModifier(cls, imageUrl): + def imageUrlModifier(cls, imageUrl, data): """Optional modification of parsed image URLs. Useful if the URL needs to be fixed before usage. The default implementation does - not modify the URL. + not modify the URL. The given data is the URL page data. """ return imageUrl