From 17bc45413263f4d8de1302848ccf11eff36682ae Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Mon, 13 Oct 2014 22:29:47 +0200 Subject: [PATCH] Bugfix: Don't assume RE patterns in base class. --- dosagelib/scraper.py | 8 +++----- dosagelib/util.py | 12 ++++++++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index 0a4ea33ad..bdfad25a8 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -19,7 +19,7 @@ except ImportError: from . import loader, configuration, util from .util import (getPageContent, makeSequence, get_system_uid, urlopen, - getDirname, unescape, tagre, normaliseURL) + getDirname, unescape, tagre, normaliseURL, prettyMatcherList) from .comic import ComicStrip from .output import out from .events import getHandler @@ -131,14 +131,12 @@ class Scraper(object): # remove duplicate URLs imageUrls = set(imageUrls) if len(imageUrls) > 1 and not self.multipleImagesPerStrip: - patterns = [x.pattern for x in makeSequence(self.imageSearch)] - out.warn(u"Found %d images instead of 1 at %s with expressions %s" % (len(imageUrls), url, patterns)) + out.warn(u"Found %d images instead of 1 at %s with expressions %s" % (len(imageUrls), url, prettyMatcherList(self.imageSearch))) image = sorted(imageUrls)[0] out.warn(u"Choosing image %s" % image) imageUrls = (image,) elif not imageUrls: - patterns = [x.pattern for x in makeSequence(self.imageSearch)] - out.warn(u"Found no images at %s with expressions %s" % (url, patterns)) + out.warn(u"Found no images at %s with expressions %s" % (url, prettyMatcherList(self.imageSearch))) if self.textSearch: text = self.fetchText(url, data, self.textSearch, optional=self.textOptional) else: diff --git a/dosagelib/util.py b/dosagelib/util.py index d586da69a..42556fe0e 100644 --- a/dosagelib/util.py +++ b/dosagelib/util.py @@ -217,6 +217,18 @@ def makeSequence(item): return (item,) +def prettyMatcherList(things): + """Try to construct a nicely-formatted string for a list of matcher + objects. Those may be compiled regular expressions or strings...""" + norm = [] + for x in makeSequence(things): + if hasattr(x, 'pattern'): + norm.append(x.pattern) + else: + norm.append(x) + return "('%s')" % "', '".join(norm) + + _htmlparser = HTMLParser() def unescape(text): """Replace HTML entities and character references."""