Allow comic text to be optional. Patch from TobiX

This commit is contained in:
Bastian Kleineidam 2014-07-24 20:49:57 +02:00
parent 950dd2932c
commit 3a929ceea6
2 changed files with 9 additions and 3 deletions

View file

@ -71,6 +71,11 @@ class _BasicScraper(object):
# sometimes comic strips have additional text info for each comic # sometimes comic strips have additional text info for each comic
textSearch = None textSearch = None
# Is the additional text required or optional? When it is required (the
# default), you see an error message whenever a comic page is encountered
# that does not have the text
textOptional = False
# usually the index format help # usually the index format help
help = '' help = ''
@ -123,7 +128,7 @@ class _BasicScraper(object):
patterns = [x.pattern for x in makeSequence(self.imageSearch)] patterns = [x.pattern for x in makeSequence(self.imageSearch)]
out.warn(u"found no images at %s with patterns %s" % (url, patterns)) out.warn(u"found no images at %s with patterns %s" % (url, patterns))
if self.textSearch: if self.textSearch:
text = fetchText(url, data, self.textSearch) text = fetchText(url, data, self.textSearch, optional=self.textOptional)
if text: if text:
text = unescape(text).strip() text = unescape(text).strip()
else: else:

View file

@ -251,14 +251,15 @@ def fetchUrl(url, data, baseUrl, urlSearch):
return fetchUrls(url, data, baseUrl, urlSearch)[0] return fetchUrls(url, data, baseUrl, urlSearch)[0]
def fetchText(url, data, textSearch): def fetchText(url, data, textSearch, optional=False):
"""Search text entry for given text pattern in a HTML page."""# """Search text entry for given text pattern in a HTML page."""#
match = textSearch.search(data) match = textSearch.search(data)
if match: if match:
text = match.group(1) text = match.group(1)
out.debug(u'matched text %r with pattern %s' % (text, textSearch.pattern)) out.debug(u'matched text %r with pattern %s' % (text, textSearch.pattern))
return text return text
raise ValueError("Pattern %s not found at URL %s." % (textSearch.pattern, url)) if not optional:
raise ValueError("Pattern %s not found at URL %s." % (textSearch.pattern, url))
_htmlparser = HTMLParser() _htmlparser = HTMLParser()