Allow selected strips without images.

This commit is contained in:
Bastian Kleineidam 2013-02-18 20:03:27 +01:00
parent 4f03963b9e
commit 6155b022a6
2 changed files with 23 additions and 7 deletions

View file

@ -33,6 +33,9 @@ class _BasicScraper(object):
# if more than one image per URL is expected # if more than one image per URL is expected
multipleImagesPerStrip = False multipleImagesPerStrip = False
# set of URLs that have no image (eg. only a video link)
noImageUrls = set()
# set to False if previous URLs do not match the strip URL (ie. because of redirects) # set to False if previous URLs do not match the strip URL (ie. because of redirects)
prevUrlMatchesStripUrl = True prevUrlMatchesStripUrl = True
@ -80,7 +83,15 @@ class _BasicScraper(object):
if self.indexes: if self.indexes:
for index in self.indexes: for index in self.indexes:
url = self.stripUrl % index url = self.stripUrl % index
if url in self.noImageUrls:
out.info('Skipping no-image URL %s' % url)
else:
yield self.getStrip(url) yield self.getStrip(url)
else:
url = self.getLatestUrl()
if url in self.noImageUrls:
out.info('Skipping no-image URL %s' % url)
else: else:
yield self.getStrip(self.getLatestUrl()) yield self.getStrip(self.getLatestUrl())
@ -123,6 +134,9 @@ class _BasicScraper(object):
seen_urls = set() seen_urls = set()
while url: while url:
data, baseUrl = getPageContent(url, self.session) data, baseUrl = getPageContent(url, self.session)
if url in self.noImageUrls:
out.info('Skipping no-image URL %s' % url)
else:
imageUrls = set(fetchUrls(url, data, baseUrl, self.imageSearch)) imageUrls = set(fetchUrls(url, data, baseUrl, self.imageSearch))
yield self.getComicStrip(url, imageUrls) yield self.getComicStrip(url, imageUrls)
if self.firstStripUrl == url: if self.firstStripUrl == url:

View file

@ -53,11 +53,13 @@ class _ComicTester(TestCase):
# ie. it detects duplicate filenames. # ie. it detects duplicate filenames.
saved_images = self.get_saved_images() saved_images = self.get_saved_images()
num_images = len(saved_images) num_images = len(saved_images)
attrs = (num_images, saved_images, max_strips, self.tmpdir) # subtract the number of URLs with no image from the expected image number
num_images_expected = max_strips - len(scraperobj.noImageUrls)
attrs = (num_images, saved_images, num_images_expected, self.tmpdir)
if self.scraperclass.multipleImagesPerStrip: if self.scraperclass.multipleImagesPerStrip:
self.check(num_images >= max_strips, 'saved %d %s instead of at least %d images in %s' % attrs) self.check(num_images >= num_images_expected, 'saved %d %s instead of at least %d images in %s' % attrs)
else: else:
self.check(num_images == max_strips, 'saved %d %s instead of %d images in %s' % attrs) self.check(num_images == num_images_expected, 'saved %d %s instead of %d images in %s' % attrs)
def check_stripurl(self, strip): def check_stripurl(self, strip):
if not self.scraperclass.stripUrl: if not self.scraperclass.stripUrl: