Fix warning for scrapers with multiple image patterns.
This commit is contained in:
parent
f53a516219
commit
2c0ca04882
2 changed files with 15 additions and 8 deletions
|
@ -4,7 +4,7 @@
|
|||
import requests
|
||||
import time
|
||||
from . import loader
|
||||
from .util import fetchUrl, fetchUrls, getPageContent
|
||||
from .util import fetchUrl, fetchUrls, getPageContent, makeList
|
||||
from .comic import ComicStrip
|
||||
from .output import out
|
||||
from .events import getHandler
|
||||
|
@ -104,9 +104,11 @@ class _BasicScraper(object):
|
|||
imageUrls = fetchUrls(url, data, baseUrl, self.imageSearch)
|
||||
imageUrls = set(map(self.imageUrlModifier, imageUrls))
|
||||
if len(imageUrls) > 1 and not self.multipleImagesPerStrip:
|
||||
out.warn("found %d images instead of 1 at %s with %s" % (len(imageUrls), url, self.imageSearch.pattern))
|
||||
patterns = [x.pattern for x in makeList(self.imageSearch)]
|
||||
out.warn("found %d images instead of 1 at %s with patterns %s" % (len(imageUrls), url, patterns))
|
||||
elif not imageUrls:
|
||||
out.warn("found no images at %s with %s" % (url, self.imageSearch.pattern))
|
||||
patterns = [x.pattern for x in makeList(self.imageSearch)]
|
||||
out.warn("found no images at %s with patterns %s" % (url, patterns))
|
||||
return ComicStrip(self.getName(), url, imageUrls, self.namer, self.session)
|
||||
|
||||
def getStrips(self, maxstrips=None):
|
||||
|
|
|
@ -136,20 +136,25 @@ def getImageObject(url, referrer, session, max_content_bytes=MaxImageBytes):
|
|||
return urlopen(url, session, referrer=referrer, max_content_bytes=max_content_bytes)
|
||||
|
||||
|
||||
def makeList(item):
|
||||
"""If tiem is already a list or tuple, return it.
|
||||
Else return a list with item as single element."""
|
||||
if isinstance(item, (list, tuple)):
|
||||
return item
|
||||
return [item]
|
||||
|
||||
|
||||
def fetchUrls(url, data, baseUrl, urlSearch):
|
||||
"""Search all entries for given URL pattern(s) in a HTML page."""
|
||||
searchUrls = []
|
||||
if isinstance(urlSearch, (types.ListType, types.TupleType)):
|
||||
searches = urlSearch
|
||||
else:
|
||||
searches = [urlSearch]
|
||||
searches = makeList(urlSearch)
|
||||
for search in searches:
|
||||
for match in search.finditer(data):
|
||||
searchUrl = match.group(1)
|
||||
if not searchUrl:
|
||||
raise ValueError("Pattern %s matched empty URL at %s." % (search.pattern, url))
|
||||
out.debug('matched URL %r with pattern %s' % (searchUrl, search.pattern))
|
||||
searchUrls.append(normaliseURL(urlparse.urljoin(baseUrl, searchUrl)))
|
||||
searchUrls.append(normaliseURL(urljoin(baseUrl, searchUrl)))
|
||||
if searchUrls:
|
||||
# do not search other links if one pattern matched
|
||||
break
|
||||
|
|
Loading…
Reference in a new issue