Bugfix: Don't assume RE patterns in base class.

2014-10-13 22:29:47 +02:00 · 2014-10-13 22:29:47 +02:00 · 17bc454132
commit 17bc454132
parent e92a3fb3a1
2 changed files with 15 additions and 5 deletions
--- a/dosagelib/scraper.py
+++ b/dosagelib/scraper.py
@ -19,7 +19,7 @@ except ImportError:
 from . import loader, configuration, util
 from .util import (getPageContent, makeSequence, get_system_uid, urlopen,
-        getDirname, unescape, tagre, normaliseURL)
+        getDirname, unescape, tagre, normaliseURL, prettyMatcherList)
 from .comic import ComicStrip
 from .output import out
 from .events import getHandler
@ -131,14 +131,12 @@ class Scraper(object):
        # remove duplicate URLs
        imageUrls = set(imageUrls)
        if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
-            patterns = [x.pattern for x in makeSequence(self.imageSearch)]
+            out.warn(u"Found %d images instead of 1 at %s with expressions %s" % (len(imageUrls), url, prettyMatcherList(self.imageSearch)))
            out.warn(u"Found %d images instead of 1 at %s with expressions %s" % (len(imageUrls), url, patterns))
            image = sorted(imageUrls)[0]
            out.warn(u"Choosing image %s" % image)
            imageUrls = (image,)
        elif not imageUrls:
-            patterns = [x.pattern for x in makeSequence(self.imageSearch)]
+            out.warn(u"Found no images at %s with expressions %s" % (url, prettyMatcherList(self.imageSearch)))
            out.warn(u"Found no images at %s with expressions %s" % (url, patterns))
        if self.textSearch:
            text = self.fetchText(url, data, self.textSearch, optional=self.textOptional)
        else:
--- a/dosagelib/util.py
+++ b/dosagelib/util.py
@ -217,6 +217,18 @@ def makeSequence(item):
    return (item,)
 def prettyMatcherList(things):
    """Try to construct a nicely-formatted string for a list of matcher
    objects. Those may be compiled regular expressions or strings..."""
    norm = []
    for x in makeSequence(things):
        if hasattr(x, 'pattern'):
            norm.append(x.pattern)
        else:
            norm.append(x)
    return "('%s')" % "', '".join(norm)
 _htmlparser = HTMLParser()
 def unescape(text):
    """Replace HTML entities and character references."""