Allow comic text to be optional. Patch from TobiX
This commit is contained in:
parent
950dd2932c
commit
3a929ceea6
2 changed files with 9 additions and 3 deletions
|
@ -71,6 +71,11 @@ class _BasicScraper(object):
|
||||||
# sometimes comic strips have additional text info for each comic
|
# sometimes comic strips have additional text info for each comic
|
||||||
textSearch = None
|
textSearch = None
|
||||||
|
|
||||||
|
# Is the additional text required or optional? When it is required (the
|
||||||
|
# default), you see an error message whenever a comic page is encountered
|
||||||
|
# that does not have the text
|
||||||
|
textOptional = False
|
||||||
|
|
||||||
# usually the index format help
|
# usually the index format help
|
||||||
help = ''
|
help = ''
|
||||||
|
|
||||||
|
@ -123,7 +128,7 @@ class _BasicScraper(object):
|
||||||
patterns = [x.pattern for x in makeSequence(self.imageSearch)]
|
patterns = [x.pattern for x in makeSequence(self.imageSearch)]
|
||||||
out.warn(u"found no images at %s with patterns %s" % (url, patterns))
|
out.warn(u"found no images at %s with patterns %s" % (url, patterns))
|
||||||
if self.textSearch:
|
if self.textSearch:
|
||||||
text = fetchText(url, data, self.textSearch)
|
text = fetchText(url, data, self.textSearch, optional=self.textOptional)
|
||||||
if text:
|
if text:
|
||||||
text = unescape(text).strip()
|
text = unescape(text).strip()
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -251,14 +251,15 @@ def fetchUrl(url, data, baseUrl, urlSearch):
|
||||||
return fetchUrls(url, data, baseUrl, urlSearch)[0]
|
return fetchUrls(url, data, baseUrl, urlSearch)[0]
|
||||||
|
|
||||||
|
|
||||||
def fetchText(url, data, textSearch):
|
def fetchText(url, data, textSearch, optional=False):
|
||||||
"""Search text entry for given text pattern in a HTML page."""#
|
"""Search text entry for given text pattern in a HTML page."""#
|
||||||
match = textSearch.search(data)
|
match = textSearch.search(data)
|
||||||
if match:
|
if match:
|
||||||
text = match.group(1)
|
text = match.group(1)
|
||||||
out.debug(u'matched text %r with pattern %s' % (text, textSearch.pattern))
|
out.debug(u'matched text %r with pattern %s' % (text, textSearch.pattern))
|
||||||
return text
|
return text
|
||||||
raise ValueError("Pattern %s not found at URL %s." % (textSearch.pattern, url))
|
if not optional:
|
||||||
|
raise ValueError("Pattern %s not found at URL %s." % (textSearch.pattern, url))
|
||||||
|
|
||||||
|
|
||||||
_htmlparser = HTMLParser()
|
_htmlparser = HTMLParser()
|
||||||
|
|
Loading…
Reference in a new issue