Allow selected strips without images.
This commit is contained in:
parent
4f03963b9e
commit
6155b022a6
2 changed files with 23 additions and 7 deletions
|
@ -33,6 +33,9 @@ class _BasicScraper(object):
|
|||
# if more than one image per URL is expected
|
||||
multipleImagesPerStrip = False
|
||||
|
||||
# set of URLs that have no image (eg. only a video link)
|
||||
noImageUrls = set()
|
||||
|
||||
# set to False if previous URLs do not match the strip URL (ie. because of redirects)
|
||||
prevUrlMatchesStripUrl = True
|
||||
|
||||
|
@ -80,9 +83,17 @@ class _BasicScraper(object):
|
|||
if self.indexes:
|
||||
for index in self.indexes:
|
||||
url = self.stripUrl % index
|
||||
yield self.getStrip(url)
|
||||
if url in self.noImageUrls:
|
||||
out.info('Skipping no-image URL %s' % url)
|
||||
else:
|
||||
yield self.getStrip(url)
|
||||
|
||||
else:
|
||||
yield self.getStrip(self.getLatestUrl())
|
||||
url = self.getLatestUrl()
|
||||
if url in self.noImageUrls:
|
||||
out.info('Skipping no-image URL %s' % url)
|
||||
else:
|
||||
yield self.getStrip(self.getLatestUrl())
|
||||
|
||||
def getStrip(self, url):
|
||||
"""Get comic strip for given URL."""
|
||||
|
@ -123,8 +134,11 @@ class _BasicScraper(object):
|
|||
seen_urls = set()
|
||||
while url:
|
||||
data, baseUrl = getPageContent(url, self.session)
|
||||
imageUrls = set(fetchUrls(url, data, baseUrl, self.imageSearch))
|
||||
yield self.getComicStrip(url, imageUrls)
|
||||
if url in self.noImageUrls:
|
||||
out.info('Skipping no-image URL %s' % url)
|
||||
else:
|
||||
imageUrls = set(fetchUrls(url, data, baseUrl, self.imageSearch))
|
||||
yield self.getComicStrip(url, imageUrls)
|
||||
if self.firstStripUrl == url:
|
||||
out.debug("Stop at first URL %s" % url)
|
||||
break
|
||||
|
|
|
@ -53,11 +53,13 @@ class _ComicTester(TestCase):
|
|||
# ie. it detects duplicate filenames.
|
||||
saved_images = self.get_saved_images()
|
||||
num_images = len(saved_images)
|
||||
attrs = (num_images, saved_images, max_strips, self.tmpdir)
|
||||
# subtract the number of URLs with no image from the expected image number
|
||||
num_images_expected = max_strips - len(scraperobj.noImageUrls)
|
||||
attrs = (num_images, saved_images, num_images_expected, self.tmpdir)
|
||||
if self.scraperclass.multipleImagesPerStrip:
|
||||
self.check(num_images >= max_strips, 'saved %d %s instead of at least %d images in %s' % attrs)
|
||||
self.check(num_images >= num_images_expected, 'saved %d %s instead of at least %d images in %s' % attrs)
|
||||
else:
|
||||
self.check(num_images == max_strips, 'saved %d %s instead of %d images in %s' % attrs)
|
||||
self.check(num_images == num_images_expected, 'saved %d %s instead of %d images in %s' % attrs)
|
||||
|
||||
def check_stripurl(self, strip):
|
||||
if not self.scraperclass.stripUrl:
|
||||
|
|
Loading…
Reference in a new issue