Bugfix: Don't assume RE patterns in base class.
This commit is contained in:
parent
e92a3fb3a1
commit
17bc454132
2 changed files with 15 additions and 5 deletions
|
@ -19,7 +19,7 @@ except ImportError:
|
|||
|
||||
from . import loader, configuration, util
|
||||
from .util import (getPageContent, makeSequence, get_system_uid, urlopen,
|
||||
getDirname, unescape, tagre, normaliseURL)
|
||||
getDirname, unescape, tagre, normaliseURL, prettyMatcherList)
|
||||
from .comic import ComicStrip
|
||||
from .output import out
|
||||
from .events import getHandler
|
||||
|
@ -131,14 +131,12 @@ class Scraper(object):
|
|||
# remove duplicate URLs
|
||||
imageUrls = set(imageUrls)
|
||||
if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
|
||||
patterns = [x.pattern for x in makeSequence(self.imageSearch)]
|
||||
out.warn(u"Found %d images instead of 1 at %s with expressions %s" % (len(imageUrls), url, patterns))
|
||||
out.warn(u"Found %d images instead of 1 at %s with expressions %s" % (len(imageUrls), url, prettyMatcherList(self.imageSearch)))
|
||||
image = sorted(imageUrls)[0]
|
||||
out.warn(u"Choosing image %s" % image)
|
||||
imageUrls = (image,)
|
||||
elif not imageUrls:
|
||||
patterns = [x.pattern for x in makeSequence(self.imageSearch)]
|
||||
out.warn(u"Found no images at %s with expressions %s" % (url, patterns))
|
||||
out.warn(u"Found no images at %s with expressions %s" % (url, prettyMatcherList(self.imageSearch)))
|
||||
if self.textSearch:
|
||||
text = self.fetchText(url, data, self.textSearch, optional=self.textOptional)
|
||||
else:
|
||||
|
|
|
@ -217,6 +217,18 @@ def makeSequence(item):
|
|||
return (item,)
|
||||
|
||||
|
||||
def prettyMatcherList(things):
|
||||
"""Try to construct a nicely-formatted string for a list of matcher
|
||||
objects. Those may be compiled regular expressions or strings..."""
|
||||
norm = []
|
||||
for x in makeSequence(things):
|
||||
if hasattr(x, 'pattern'):
|
||||
norm.append(x.pattern)
|
||||
else:
|
||||
norm.append(x)
|
||||
return "('%s')" % "', '".join(norm)
|
||||
|
||||
|
||||
_htmlparser = HTMLParser()
|
||||
def unescape(text):
|
||||
"""Replace HTML entities and character references."""
|
||||
|
|
Loading…
Reference in a new issue