From 3a929ceea6cf41e7ec465161cd9db59bb595cea4 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Thu, 24 Jul 2014 20:49:57 +0200 Subject: [PATCH] Allow comic text to be optional. Patch from TobiX --- dosagelib/scraper.py | 7 ++++++- dosagelib/util.py | 5 +++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index 060f7ffe3..3a85cbb69 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -71,6 +71,11 @@ class _BasicScraper(object): # sometimes comic strips have additional text info for each comic textSearch = None + # Is the additional text required or optional? When it is required (the + # default), you see an error message whenever a comic page is encountered + # that does not have the text + textOptional = False + # usually the index format help help = '' @@ -123,7 +128,7 @@ class _BasicScraper(object): patterns = [x.pattern for x in makeSequence(self.imageSearch)] out.warn(u"found no images at %s with patterns %s" % (url, patterns)) if self.textSearch: - text = fetchText(url, data, self.textSearch) + text = fetchText(url, data, self.textSearch, optional=self.textOptional) if text: text = unescape(text).strip() else: diff --git a/dosagelib/util.py b/dosagelib/util.py index 351142891..d1201806c 100644 --- a/dosagelib/util.py +++ b/dosagelib/util.py @@ -251,14 +251,15 @@ def fetchUrl(url, data, baseUrl, urlSearch): return fetchUrls(url, data, baseUrl, urlSearch)[0] -def fetchText(url, data, textSearch): +def fetchText(url, data, textSearch, optional=False): """Search text entry for given text pattern in a HTML page."""# match = textSearch.search(data) if match: text = match.group(1) out.debug(u'matched text %r with pattern %s' % (text, textSearch.pattern)) return text - raise ValueError("Pattern %s not found at URL %s." % (textSearch.pattern, url)) + if not optional: + raise ValueError("Pattern %s not found at URL %s." % (textSearch.pattern, url)) _htmlparser = HTMLParser()