From fb37f946e0b2a5d406d85f773911b41563a69314 Mon Sep 17 00:00:00 2001
From: Tobias Gruetzmacher <tobias-git@23.gs>
Date: Mon, 1 Aug 2016 00:44:34 +0200
Subject: [PATCH] Speed up comic module tests.

This fakes an If-Modified-Since header, so most web servers don't need
to send comic images at all. This should also reduce the amount of data
that needs to be fetched for comic module tests.
---
 dosagelib/comic.py   | 19 ++++++++++++-------
 tests/test_comics.py | 35 ++++++++++++++++++-----------------
 2 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/dosagelib/comic.py b/dosagelib/comic.py
index 5a4605f3d..a94413cbb 100755
--- a/dosagelib/comic.py
+++ b/dosagelib/comic.py
@@ -92,24 +92,21 @@ class ComicImage(object):
 
     def save(self, basepath):
         """Save comic URL to filename on disk."""
-        comicdir = self.scraper.get_download_dir(basepath)
-        if not os.path.isdir(comicdir):
-            os.makedirs(comicdir)
-        fnbase = os.path.join(comicdir, self.filename)
+        fnbase = self._fnbase(basepath)
         exist = [x for x in glob.glob(fnbase + ".*") if not x.endswith(".txt")]
         out.info(u"Get image URL %s" % self.url, level=1)
         if len(exist) == 1:
             lastchange = os.path.getmtime(exist[0])
             self.connect(datetime.utcfromtimestamp(lastchange))
             if self.urlobj.status_code == 304:  # Not modified
-                self.exist_err(exist[0])
+                self._exist_err(exist[0])
                 return exist[0], False
         else:
             self.connect()
         fn = fnbase + self.ext
         # compare with >= since content length could be the compressed size
         if os.path.isfile(fn) and os.path.getsize(fn) >= self.contentLength:
-            self.exist_err(fn)
+            self._exist_err(fn)
             return fn, False
         out.debug(u'Writing comic to file %s...' % fn)
         with self.fileout(fn) as f:
@@ -144,5 +141,13 @@ class ComicImage(object):
         else:
             out.info(u"Saved %s (%s)." % (filename, strsize(size)))
 
-    def exist_err(self, fn):
+    def _exist_err(self, fn):
         out.info(u'Skipping existing file "%s".' % fn)
+
+    def _fnbase(self, basepath):
+        '''Determine the target base name of this comic file and make sure the
+        directory exists.'''
+        comicdir = self.scraper.get_download_dir(basepath)
+        if not os.path.isdir(comicdir):
+            os.makedirs(comicdir)
+        return os.path.join(comicdir, self.filename)
diff --git a/tests/test_comics.py b/tests/test_comics.py
index 8aa6369cb..50b7c14f1 100644
--- a/tests/test_comics.py
+++ b/tests/test_comics.py
@@ -6,7 +6,6 @@
 from __future__ import absolute_import, division, print_function
 
 import re
-import os
 import multiprocessing
 from six.moves.urllib.parse import urlsplit
 
@@ -31,14 +30,6 @@ def get_lock(host):
     return _locks[host]
 
 
-def _get_saved_images(outdir, scraperobj):
-    """Get saved images."""
-    dirs = tuple(scraperobj.name.split('/'))
-    files = os.listdir(os.path.join(outdir, *dirs))
-    files = [x for x in files if not x.endswith(".txt")]
-    return files
-
-
 def test_comicmodule(tmpdir, scraperobj):
     '''Test a scraper. It must be able to traverse backward for at least 5
     strips from the start, and find strip images on at least 4 pages.'''
@@ -55,8 +46,10 @@ def test_comicmodule(tmpdir, scraperobj):
 def _test_comic(outdir, scraperobj):
     num_strips = 0
     strip = None
+    files = []
     for strip in scraperobj.getStrips(MaxStrips):
-        _check_strip(outdir, strip, scraperobj.multipleImagesPerStrip)
+        files.append(_check_strip(outdir, strip,
+                                  scraperobj.multipleImagesPerStrip))
 
         if num_strips > 0:
             _check_stripurl(strip, scraperobj)
@@ -72,34 +65,42 @@ def _test_comic(outdir, scraperobj):
             msg += " Check the prevSearch pattern at %s" % strip.strip_url
         assert num_strips == num_strips_expected, msg
         if strip:
-            _check_scraperesult(outdir, num_strips_expected, strip, scraperobj)
+            _check_scraperesult(files, num_strips_expected, strip, scraperobj)
 
 
 def _check_strip(outdir, strip, multipleImagesPerStrip):
     '''Check that a specific page yields images and the comic module correctly
     declares if there are multiple images per page.'''
     images = []
+    files = []
     for image in strip.getImages():
         images.append(image.url)
-        image.save(outdir)
+
+        # write a fake image (to download less)
+        fakeimg = image._fnbase(outdir) + '.fake'
+        with open(fakeimg, 'w') as f:
+            f.write("fake image for testing")
+
+        fn, _ = image.save(outdir)
+        files.append(fn)
     assert images, 'failed to find images at %s' % strip.strip_url
     if not multipleImagesPerStrip:
         assert len(images) == 1, 'found more than 1 image at %s: %s' % (
                 strip.strip_url, images)
+    return files
 
 
-def _check_scraperesult(outdir, num_images_expected, strip, scraperobj):
+def _check_scraperesult(saved_images, num_images_expected, strip, scraperobj):
     '''Check that exactly or for multiple pages at least num_strips images are
     saved. This checks saved files, ie. it detects duplicate filenames.'''
-    saved_images = _get_saved_images(outdir, scraperobj)
     num_images = len(saved_images)
 
-    attrs = (num_images, saved_images, num_images_expected, outdir)
+    attrs = (num_images, saved_images, num_images_expected)
     if scraperobj.multipleImagesPerStrip:
-        err = 'saved %d %s instead of at least %d images in %s' % attrs
+        err = 'saved %d %s instead of at least %d images' % attrs
         assert num_images >= num_images_expected, err
     else:
-        err = 'saved %d %s instead of %d images in %s' % attrs
+        err = 'saved %d %s instead of %d images' % attrs
         assert num_images == num_images_expected, err