Speed up comic module tests.

This fakes an If-Modified-Since header, so most web servers don't need
to send comic images at all. This should also reduce the amount of data
that needs to be fetched for comic module tests.
This commit is contained in:
Tobias Gruetzmacher 2016-08-01 00:44:34 +02:00
parent 4f80016bf0
commit fb37f946e0
2 changed files with 30 additions and 24 deletions

View file

@ -92,24 +92,21 @@ class ComicImage(object):
def save(self, basepath): def save(self, basepath):
"""Save comic URL to filename on disk.""" """Save comic URL to filename on disk."""
comicdir = self.scraper.get_download_dir(basepath) fnbase = self._fnbase(basepath)
if not os.path.isdir(comicdir):
os.makedirs(comicdir)
fnbase = os.path.join(comicdir, self.filename)
exist = [x for x in glob.glob(fnbase + ".*") if not x.endswith(".txt")] exist = [x for x in glob.glob(fnbase + ".*") if not x.endswith(".txt")]
out.info(u"Get image URL %s" % self.url, level=1) out.info(u"Get image URL %s" % self.url, level=1)
if len(exist) == 1: if len(exist) == 1:
lastchange = os.path.getmtime(exist[0]) lastchange = os.path.getmtime(exist[0])
self.connect(datetime.utcfromtimestamp(lastchange)) self.connect(datetime.utcfromtimestamp(lastchange))
if self.urlobj.status_code == 304: # Not modified if self.urlobj.status_code == 304: # Not modified
self.exist_err(exist[0]) self._exist_err(exist[0])
return exist[0], False return exist[0], False
else: else:
self.connect() self.connect()
fn = fnbase + self.ext fn = fnbase + self.ext
# compare with >= since content length could be the compressed size # compare with >= since content length could be the compressed size
if os.path.isfile(fn) and os.path.getsize(fn) >= self.contentLength: if os.path.isfile(fn) and os.path.getsize(fn) >= self.contentLength:
self.exist_err(fn) self._exist_err(fn)
return fn, False return fn, False
out.debug(u'Writing comic to file %s...' % fn) out.debug(u'Writing comic to file %s...' % fn)
with self.fileout(fn) as f: with self.fileout(fn) as f:
@ -144,5 +141,13 @@ class ComicImage(object):
else: else:
out.info(u"Saved %s (%s)." % (filename, strsize(size))) out.info(u"Saved %s (%s)." % (filename, strsize(size)))
def exist_err(self, fn): def _exist_err(self, fn):
out.info(u'Skipping existing file "%s".' % fn) out.info(u'Skipping existing file "%s".' % fn)
def _fnbase(self, basepath):
'''Determine the target base name of this comic file and make sure the
directory exists.'''
comicdir = self.scraper.get_download_dir(basepath)
if not os.path.isdir(comicdir):
os.makedirs(comicdir)
return os.path.join(comicdir, self.filename)

View file

@ -6,7 +6,6 @@
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
import re import re
import os
import multiprocessing import multiprocessing
from six.moves.urllib.parse import urlsplit from six.moves.urllib.parse import urlsplit
@ -31,14 +30,6 @@ def get_lock(host):
return _locks[host] return _locks[host]
def _get_saved_images(outdir, scraperobj):
"""Get saved images."""
dirs = tuple(scraperobj.name.split('/'))
files = os.listdir(os.path.join(outdir, *dirs))
files = [x for x in files if not x.endswith(".txt")]
return files
def test_comicmodule(tmpdir, scraperobj): def test_comicmodule(tmpdir, scraperobj):
'''Test a scraper. It must be able to traverse backward for at least 5 '''Test a scraper. It must be able to traverse backward for at least 5
strips from the start, and find strip images on at least 4 pages.''' strips from the start, and find strip images on at least 4 pages.'''
@ -55,8 +46,10 @@ def test_comicmodule(tmpdir, scraperobj):
def _test_comic(outdir, scraperobj): def _test_comic(outdir, scraperobj):
num_strips = 0 num_strips = 0
strip = None strip = None
files = []
for strip in scraperobj.getStrips(MaxStrips): for strip in scraperobj.getStrips(MaxStrips):
_check_strip(outdir, strip, scraperobj.multipleImagesPerStrip) files.append(_check_strip(outdir, strip,
scraperobj.multipleImagesPerStrip))
if num_strips > 0: if num_strips > 0:
_check_stripurl(strip, scraperobj) _check_stripurl(strip, scraperobj)
@ -72,34 +65,42 @@ def _test_comic(outdir, scraperobj):
msg += " Check the prevSearch pattern at %s" % strip.strip_url msg += " Check the prevSearch pattern at %s" % strip.strip_url
assert num_strips == num_strips_expected, msg assert num_strips == num_strips_expected, msg
if strip: if strip:
_check_scraperesult(outdir, num_strips_expected, strip, scraperobj) _check_scraperesult(files, num_strips_expected, strip, scraperobj)
def _check_strip(outdir, strip, multipleImagesPerStrip): def _check_strip(outdir, strip, multipleImagesPerStrip):
'''Check that a specific page yields images and the comic module correctly '''Check that a specific page yields images and the comic module correctly
declares if there are multiple images per page.''' declares if there are multiple images per page.'''
images = [] images = []
files = []
for image in strip.getImages(): for image in strip.getImages():
images.append(image.url) images.append(image.url)
image.save(outdir)
# write a fake image (to download less)
fakeimg = image._fnbase(outdir) + '.fake'
with open(fakeimg, 'w') as f:
f.write("fake image for testing")
fn, _ = image.save(outdir)
files.append(fn)
assert images, 'failed to find images at %s' % strip.strip_url assert images, 'failed to find images at %s' % strip.strip_url
if not multipleImagesPerStrip: if not multipleImagesPerStrip:
assert len(images) == 1, 'found more than 1 image at %s: %s' % ( assert len(images) == 1, 'found more than 1 image at %s: %s' % (
strip.strip_url, images) strip.strip_url, images)
return files
def _check_scraperesult(outdir, num_images_expected, strip, scraperobj): def _check_scraperesult(saved_images, num_images_expected, strip, scraperobj):
'''Check that exactly or for multiple pages at least num_strips images are '''Check that exactly or for multiple pages at least num_strips images are
saved. This checks saved files, ie. it detects duplicate filenames.''' saved. This checks saved files, ie. it detects duplicate filenames.'''
saved_images = _get_saved_images(outdir, scraperobj)
num_images = len(saved_images) num_images = len(saved_images)
attrs = (num_images, saved_images, num_images_expected, outdir) attrs = (num_images, saved_images, num_images_expected)
if scraperobj.multipleImagesPerStrip: if scraperobj.multipleImagesPerStrip:
err = 'saved %d %s instead of at least %d images in %s' % attrs err = 'saved %d %s instead of at least %d images' % attrs
assert num_images >= num_images_expected, err assert num_images >= num_images_expected, err
else: else:
err = 'saved %d %s instead of %d images in %s' % attrs err = 'saved %d %s instead of %d images' % attrs
assert num_images == num_images_expected, err assert num_images == num_images_expected, err