From 2c0ca04882c9809cc282693c6da82ceb788808d6 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Wed, 3 Apr 2013 20:32:19 +0200 Subject: [PATCH] Fix warning for scrapers with multiple image patterns. --- dosagelib/scraper.py | 8 +++++--- dosagelib/util.py | 15 ++++++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index 5e3acbf31..04b463664 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -4,7 +4,7 @@ import requests import time from . import loader -from .util import fetchUrl, fetchUrls, getPageContent +from .util import fetchUrl, fetchUrls, getPageContent, makeList from .comic import ComicStrip from .output import out from .events import getHandler @@ -104,9 +104,11 @@ class _BasicScraper(object): imageUrls = fetchUrls(url, data, baseUrl, self.imageSearch) imageUrls = set(map(self.imageUrlModifier, imageUrls)) if len(imageUrls) > 1 and not self.multipleImagesPerStrip: - out.warn("found %d images instead of 1 at %s with %s" % (len(imageUrls), url, self.imageSearch.pattern)) + patterns = [x.pattern for x in makeList(self.imageSearch)] + out.warn("found %d images instead of 1 at %s with patterns %s" % (len(imageUrls), url, patterns)) elif not imageUrls: - out.warn("found no images at %s with %s" % (url, self.imageSearch.pattern)) + patterns = [x.pattern for x in makeList(self.imageSearch)] + out.warn("found no images at %s with patterns %s" % (url, patterns)) return ComicStrip(self.getName(), url, imageUrls, self.namer, self.session) def getStrips(self, maxstrips=None): diff --git a/dosagelib/util.py b/dosagelib/util.py index 056393e58..76bbb9213 100644 --- a/dosagelib/util.py +++ b/dosagelib/util.py @@ -136,20 +136,25 @@ def getImageObject(url, referrer, session, max_content_bytes=MaxImageBytes): return urlopen(url, session, referrer=referrer, max_content_bytes=max_content_bytes) +def makeList(item): + """If tiem is already a list or tuple, return it. + Else return a list with item as single element.""" + if isinstance(item, (list, tuple)): + return item + return [item] + + def fetchUrls(url, data, baseUrl, urlSearch): """Search all entries for given URL pattern(s) in a HTML page.""" searchUrls = [] - if isinstance(urlSearch, (types.ListType, types.TupleType)): - searches = urlSearch - else: - searches = [urlSearch] + searches = makeList(urlSearch) for search in searches: for match in search.finditer(data): searchUrl = match.group(1) if not searchUrl: raise ValueError("Pattern %s matched empty URL at %s." % (search.pattern, url)) out.debug('matched URL %r with pattern %s' % (searchUrl, search.pattern)) - searchUrls.append(normaliseURL(urlparse.urljoin(baseUrl, searchUrl))) + searchUrls.append(normaliseURL(urljoin(baseUrl, searchUrl))) if searchUrls: # do not search other links if one pattern matched break