Refactor comic module test.

All those create-classes-on-the-fly games make my head hurt ;)
2015-07-17 23:33:25 +02:00 · 2015-07-17 23:33:25 +02:00 · f0e61a614f
commit f0e61a614f
parent 68d4dd463a
1 changed files with 82 additions and 114 deletions
--- a/tests/test_comics.py
+++ b/tests/test_comics.py
@ -1,16 +1,17 @@
-# -*- coding: iso-8859-1 -*-
+# -*- coding: utf-8 -*-
 # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
+# Copyright (C) 2015 Tobias Gruetzmacher
 import tempfile
 import shutil
 import re
 import os
 import multiprocessing
+import pytest
 try:
    from urllib.parse import urlsplit
 except ImportError:
    from urlparse import urlsplit
-from unittest import TestCase
 from dosagelib import scraper


@ -30,133 +31,100 @@ def get_lock(host):
        _locks[host] = multiprocessing.BoundedSemaphore(MaxConnections)
    return _locks[host]

+@pytest.yield_fixture
+def tmpdir():
+    tmpdir = tempfile.mkdtemp()
+    yield tmpdir
+    shutil.rmtree(tmpdir)

-class _ComicTester(TestCase):
-    """Basic comic test class."""
-    scraperclass=None
+def get_saved_images(tmpdir, scraper, filtertxt=False):
+    """Get saved images."""
+    dirs = tuple(scraper.getName().split('/'))
+    files = os.listdir(os.path.join(tmpdir, *dirs))
+    if filtertxt:
+        files = [x for x in files if not x.endswith(".txt")]
+    return files

-    def setUp(self):
-        if self.scraperclass is not None:
-            self.name = self.scraperclass.getName()
-            self.url = self.scraperclass.starter()
-            # create a temporary directory for images
-            self.tmpdir = tempfile.mkdtemp()
-        else:
-            self.tmpdir = None
+def test_comicmodule(tmpdir, scraperclass):
+    # Test a scraper. It must be able to traverse backward for
+    # at least 5 strips from the start, and find strip images
+    # on at least 4 pages.
+    scraperobj = scraperclass()
+    # Limit number of connections to one host.
+    host = get_host(scraperobj.url)
+    try:
+        with get_lock(host):
+            _test_comic(tmpdir, scraperobj)
+    except OSError:
+        # interprocess lock not supported
+        _test_comic(tmpdir, scraperobj)

-    def tearDown(self):
-        if self.tmpdir is not None:
-            shutil.rmtree(self.tmpdir)
+def _test_comic(tmpdir, scraperobj):
+    num_strips = 0
+    max_strips = 5
+    strip = None
+    for strip in scraperobj.getStrips(max_strips):
+        images = []
+        for image in strip.getImages():
+            images.append(image.url)
+            image.save(tmpdir)
+        assert images, 'failed to find images at %s' % strip.stripUrl
+        if not scraperobj.multipleImagesPerStrip:
+            assert len(images) == 1, 'found more than 1 image at %s: %s' % (strip.stripUrl, images)
+        if num_strips > 0 and scraperobj.prevUrlMatchesStripUrl:
+            check_stripurl(strip, scraperobj)
+        num_strips += 1
+    if scraperobj.prevSearch and not scraperobj.hitFirstStripUrl:
+        # check strips
+        num_strips_expected = max_strips - len(scraperobj.skippedUrls)
+        msg = 'Traversed %d strips instead of %d.' % (num_strips, num_strips_expected)
+        if strip:
+            msg += " Check the prevSearch pattern at %s" % strip.stripUrl
+        assert num_strips == num_strips_expected, msg
+        # check images
+        if strip:
+            check_scraperesult(tmpdir, num_strips_expected, strip, scraperobj)

-    def get_saved_images(self, filtertxt=False):
-        """Get saved images."""
-        dirs = tuple(self.name.split('/'))
-        files = os.listdir(os.path.join(self.tmpdir, *dirs))
-        if filtertxt:
-            files = [x for x in files if not x.endswith(".txt")]
-        return files
+def check_scraperesult(tmpdir, num_images_expected, strip, scraperobj):
+    # Check that exactly or for multiple pages at least num_strips images are saved.
+    # This checks saved files, ie. it detects duplicate filenames.
+    saved_images = get_saved_images(tmpdir, scraperobj, filtertxt=bool(scraperobj.textSearch))
+    num_images = len(saved_images)
+    # subtract the number of skipped URLs with no image from the expected image number
+    attrs = (num_images, saved_images, num_images_expected, tmpdir)
+    if scraperobj.multipleImagesPerStrip:
+        assert num_images >= num_images_expected, 'saved %d %s instead of at least %d images in %s' % attrs
+    else:
+        assert num_images == num_images_expected, 'saved %d %s instead of %d images in %s' % attrs

-    def test_comic(self):
-        if self.scraperclass is None:
-            # only run subclasses
-            import pytest
-            pytest.skip("base class")
-        # Test a scraper. It must be able to traverse backward for
-        # at least 5 strips from the start, and find strip images
-        # on at least 4 pages.
-        scraperobj = self.scraperclass()
-        # Limit number of connections to one host.
-        host = get_host(scraperobj.url)
-        try:
-            with get_lock(host):
-                self._test_comic(scraperobj)
-        except OSError:
-            # interprocess lock not supported
-            self._test_comic(scraperobj)
+def check_stripurl(strip, scraperobj):
+    if not scraperobj.stripUrl:
+        # no indexing support
+        return
+    # test that the stripUrl regex matches the retrieved strip URL
+    urlmatch = re.escape(scraperobj.stripUrl)
+    urlmatch = urlmatch.replace(r"\%s", r".+")
+    urlmatch = "^%s$" % urlmatch
+    ro = re.compile(urlmatch)
+    mo = ro.search(strip.stripUrl)
+    assert mo is not None, 'strip URL %r does not match stripUrl pattern %s' % (strip.stripUrl, urlmatch)

-    def _test_comic(self, scraperobj):
-        num_strips = 0
-        max_strips = 5
-        strip = None
-        for strip in scraperobj.getStrips(max_strips):
-            images = []
-            for image in strip.getImages():
-                images.append(image.url)
-                self.save(image)
-            self.check(images, 'failed to find images at %s' % strip.stripUrl)
-            if not self.scraperclass.multipleImagesPerStrip:
-                self.check(len(images) == 1, 'found more than 1 image at %s: %s' % (strip.stripUrl, images))
-            if num_strips > 0 and self.scraperclass.prevUrlMatchesStripUrl:
-                self.check_stripurl(strip)
-            num_strips += 1
-        if self.scraperclass.prevSearch and not scraperobj.hitFirstStripUrl:
-            # check strips
-            num_strips_expected = max_strips - len(scraperobj.skippedUrls)
-            msg = 'Traversed %d strips instead of %d.' % (num_strips, num_strips_expected)
-            if strip:
-                msg += " Check the prevSearch pattern at %s" % strip.stripUrl
-            self.check(num_strips == num_strips_expected, msg)
-            # check images
-            if strip:
-                self.check_scraperesult(num_strips_expected, strip, scraperobj)
-
-    def check_scraperesult(self, num_images_expected, strip, scraperobj):
-        # Check that exactly or for multiple pages at least num_strips images are saved.
-        # This checks saved files, ie. it detects duplicate filenames.
-        saved_images = self.get_saved_images(filtertxt=bool(scraperobj.textSearch))
-        num_images = len(saved_images)
-        # subtract the number of skipped URLs with no image from the expected image number
-        attrs = (num_images, saved_images, num_images_expected, self.tmpdir)
-        if self.scraperclass.multipleImagesPerStrip:
-            self.check(num_images >= num_images_expected, 'saved %d %s instead of at least %d images in %s' % attrs)
-        else:
-            self.check(num_images == num_images_expected, 'saved %d %s instead of %d images in %s' % attrs)
-
-    def check_stripurl(self, strip):
-        if not self.scraperclass.stripUrl:
-            # no indexing support
-            return
-        # test that the stripUrl regex matches the retrieved strip URL
-        urlmatch = re.escape(self.scraperclass.stripUrl)
-        urlmatch = urlmatch.replace(r"\%s", r".+")
-        urlmatch = "^%s$" % urlmatch
-        ro = re.compile(urlmatch)
-        mo = ro.search(strip.stripUrl)
-        self.check(mo is not None, 'strip URL %r does not match stripUrl pattern %s' % (strip.stripUrl, urlmatch))
-
-    def save(self, image):
-        try:
-            image.save(self.tmpdir)
-        except Exception as msg:
-            self.check(False, 'could not save %s at %s to %s: %s' % (image.url, image.referrer, self.tmpdir, msg))
-
-    def check(self, condition, msg):
-        self.assertTrue(condition, "%s %s %s" % (self.name, self.url, msg))
-
-
-def make_comic_tester(name, **kwargs):
-    """Create and return a _ComicTester class with given name and attributes."""
-    return type(name, (_ComicTester,), kwargs)
-
-
-def generate_comic_testers():
-    """For each comic scraper, create a test class."""
-    g = globals()
+def get_test_scraperclasses():
+    """Return scrapers that should be tested."""
    if "TESTALL" in os.environ:
        # test all comics (this will take some time)
        scraperclasses = scraper.get_scraperclasses()
    else:
        # Get limited number of scraper tests on Travis builds to make
        # it faster
-        testscrapernames = ['AbstruseGoose', 'xkcd']
+        testscrapernames = ['AbstruseGoose', 'GoComics/CalvinandHobbes', 'xkcd']
        scraperclasses = [
            scraperclass for scraperclass in scraper.get_scraperclasses()
            if scraperclass.getName() in testscrapernames
        ]
-    scraperclasses.sort(key=lambda scraperclass: scraperclass.__name__)
-    for scraperclass in scraperclasses:
-        name = 'Test'+scraperclass.__name__
-        g[name] = make_comic_tester(name, scraperclass=scraperclass)
+    return scraperclasses

+def pytest_generate_tests(metafunc):
+    if 'scraperclass' in metafunc.fixturenames:
+        metafunc.parametrize('scraperclass', get_test_scraperclasses())

-generate_comic_testers()