Bugfix: Don't assume RE patterns in base class.
This commit is contained in:
parent
e92a3fb3a1
commit
17bc454132
2 changed files with 15 additions and 5 deletions
|
@ -19,7 +19,7 @@ except ImportError:
|
||||||
|
|
||||||
from . import loader, configuration, util
|
from . import loader, configuration, util
|
||||||
from .util import (getPageContent, makeSequence, get_system_uid, urlopen,
|
from .util import (getPageContent, makeSequence, get_system_uid, urlopen,
|
||||||
getDirname, unescape, tagre, normaliseURL)
|
getDirname, unescape, tagre, normaliseURL, prettyMatcherList)
|
||||||
from .comic import ComicStrip
|
from .comic import ComicStrip
|
||||||
from .output import out
|
from .output import out
|
||||||
from .events import getHandler
|
from .events import getHandler
|
||||||
|
@ -131,14 +131,12 @@ class Scraper(object):
|
||||||
# remove duplicate URLs
|
# remove duplicate URLs
|
||||||
imageUrls = set(imageUrls)
|
imageUrls = set(imageUrls)
|
||||||
if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
|
if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
|
||||||
patterns = [x.pattern for x in makeSequence(self.imageSearch)]
|
out.warn(u"Found %d images instead of 1 at %s with expressions %s" % (len(imageUrls), url, prettyMatcherList(self.imageSearch)))
|
||||||
out.warn(u"Found %d images instead of 1 at %s with expressions %s" % (len(imageUrls), url, patterns))
|
|
||||||
image = sorted(imageUrls)[0]
|
image = sorted(imageUrls)[0]
|
||||||
out.warn(u"Choosing image %s" % image)
|
out.warn(u"Choosing image %s" % image)
|
||||||
imageUrls = (image,)
|
imageUrls = (image,)
|
||||||
elif not imageUrls:
|
elif not imageUrls:
|
||||||
patterns = [x.pattern for x in makeSequence(self.imageSearch)]
|
out.warn(u"Found no images at %s with expressions %s" % (url, prettyMatcherList(self.imageSearch)))
|
||||||
out.warn(u"Found no images at %s with expressions %s" % (url, patterns))
|
|
||||||
if self.textSearch:
|
if self.textSearch:
|
||||||
text = self.fetchText(url, data, self.textSearch, optional=self.textOptional)
|
text = self.fetchText(url, data, self.textSearch, optional=self.textOptional)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -217,6 +217,18 @@ def makeSequence(item):
|
||||||
return (item,)
|
return (item,)
|
||||||
|
|
||||||
|
|
||||||
|
def prettyMatcherList(things):
|
||||||
|
"""Try to construct a nicely-formatted string for a list of matcher
|
||||||
|
objects. Those may be compiled regular expressions or strings..."""
|
||||||
|
norm = []
|
||||||
|
for x in makeSequence(things):
|
||||||
|
if hasattr(x, 'pattern'):
|
||||||
|
norm.append(x.pattern)
|
||||||
|
else:
|
||||||
|
norm.append(x)
|
||||||
|
return "('%s')" % "', '".join(norm)
|
||||||
|
|
||||||
|
|
||||||
_htmlparser = HTMLParser()
|
_htmlparser = HTMLParser()
|
||||||
def unescape(text):
|
def unescape(text):
|
||||||
"""Replace HTML entities and character references."""
|
"""Replace HTML entities and character references."""
|
||||||
|
|
Loading…
Reference in a new issue