Allow comic text to be optional. Patch from TobiX
This commit is contained in:
parent
950dd2932c
commit
3a929ceea6
2 changed files with 9 additions and 3 deletions
|
@ -71,6 +71,11 @@ class _BasicScraper(object):
|
|||
# sometimes comic strips have additional text info for each comic
|
||||
textSearch = None
|
||||
|
||||
# Is the additional text required or optional? When it is required (the
|
||||
# default), you see an error message whenever a comic page is encountered
|
||||
# that does not have the text
|
||||
textOptional = False
|
||||
|
||||
# usually the index format help
|
||||
help = ''
|
||||
|
||||
|
@ -123,7 +128,7 @@ class _BasicScraper(object):
|
|||
patterns = [x.pattern for x in makeSequence(self.imageSearch)]
|
||||
out.warn(u"found no images at %s with patterns %s" % (url, patterns))
|
||||
if self.textSearch:
|
||||
text = fetchText(url, data, self.textSearch)
|
||||
text = fetchText(url, data, self.textSearch, optional=self.textOptional)
|
||||
if text:
|
||||
text = unescape(text).strip()
|
||||
else:
|
||||
|
|
|
@ -251,14 +251,15 @@ def fetchUrl(url, data, baseUrl, urlSearch):
|
|||
return fetchUrls(url, data, baseUrl, urlSearch)[0]
|
||||
|
||||
|
||||
def fetchText(url, data, textSearch):
|
||||
def fetchText(url, data, textSearch, optional=False):
|
||||
"""Search text entry for given text pattern in a HTML page."""#
|
||||
match = textSearch.search(data)
|
||||
if match:
|
||||
text = match.group(1)
|
||||
out.debug(u'matched text %r with pattern %s' % (text, textSearch.pattern))
|
||||
return text
|
||||
raise ValueError("Pattern %s not found at URL %s." % (textSearch.pattern, url))
|
||||
if not optional:
|
||||
raise ValueError("Pattern %s not found at URL %s." % (textSearch.pattern, url))
|
||||
|
||||
|
||||
_htmlparser = HTMLParser()
|
||||
|
|
Loading…
Reference in a new issue