Refactor comic module test.

All those create-classes-on-the-fly games make my head hurt ;)
This commit is contained in:
Tobias Gruetzmacher 2015-07-17 23:33:25 +02:00
parent 68d4dd463a
commit f0e61a614f

View file

@ -1,16 +1,17 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015 Tobias Gruetzmacher
import tempfile import tempfile
import shutil import shutil
import re import re
import os import os
import multiprocessing import multiprocessing
import pytest
try: try:
from urllib.parse import urlsplit from urllib.parse import urlsplit
except ImportError: except ImportError:
from urlparse import urlsplit from urlparse import urlsplit
from unittest import TestCase
from dosagelib import scraper from dosagelib import scraper
@ -30,51 +31,35 @@ def get_lock(host):
_locks[host] = multiprocessing.BoundedSemaphore(MaxConnections) _locks[host] = multiprocessing.BoundedSemaphore(MaxConnections)
return _locks[host] return _locks[host]
@pytest.yield_fixture
def tmpdir():
tmpdir = tempfile.mkdtemp()
yield tmpdir
shutil.rmtree(tmpdir)
class _ComicTester(TestCase): def get_saved_images(tmpdir, scraper, filtertxt=False):
"""Basic comic test class."""
scraperclass=None
def setUp(self):
if self.scraperclass is not None:
self.name = self.scraperclass.getName()
self.url = self.scraperclass.starter()
# create a temporary directory for images
self.tmpdir = tempfile.mkdtemp()
else:
self.tmpdir = None
def tearDown(self):
if self.tmpdir is not None:
shutil.rmtree(self.tmpdir)
def get_saved_images(self, filtertxt=False):
"""Get saved images.""" """Get saved images."""
dirs = tuple(self.name.split('/')) dirs = tuple(scraper.getName().split('/'))
files = os.listdir(os.path.join(self.tmpdir, *dirs)) files = os.listdir(os.path.join(tmpdir, *dirs))
if filtertxt: if filtertxt:
files = [x for x in files if not x.endswith(".txt")] files = [x for x in files if not x.endswith(".txt")]
return files return files
def test_comic(self): def test_comicmodule(tmpdir, scraperclass):
if self.scraperclass is None:
# only run subclasses
import pytest
pytest.skip("base class")
# Test a scraper. It must be able to traverse backward for # Test a scraper. It must be able to traverse backward for
# at least 5 strips from the start, and find strip images # at least 5 strips from the start, and find strip images
# on at least 4 pages. # on at least 4 pages.
scraperobj = self.scraperclass() scraperobj = scraperclass()
# Limit number of connections to one host. # Limit number of connections to one host.
host = get_host(scraperobj.url) host = get_host(scraperobj.url)
try: try:
with get_lock(host): with get_lock(host):
self._test_comic(scraperobj) _test_comic(tmpdir, scraperobj)
except OSError: except OSError:
# interprocess lock not supported # interprocess lock not supported
self._test_comic(scraperobj) _test_comic(tmpdir, scraperobj)
def _test_comic(self, scraperobj): def _test_comic(tmpdir, scraperobj):
num_strips = 0 num_strips = 0
max_strips = 5 max_strips = 5
strip = None strip = None
@ -82,81 +67,64 @@ class _ComicTester(TestCase):
images = [] images = []
for image in strip.getImages(): for image in strip.getImages():
images.append(image.url) images.append(image.url)
self.save(image) image.save(tmpdir)
self.check(images, 'failed to find images at %s' % strip.stripUrl) assert images, 'failed to find images at %s' % strip.stripUrl
if not self.scraperclass.multipleImagesPerStrip: if not scraperobj.multipleImagesPerStrip:
self.check(len(images) == 1, 'found more than 1 image at %s: %s' % (strip.stripUrl, images)) assert len(images) == 1, 'found more than 1 image at %s: %s' % (strip.stripUrl, images)
if num_strips > 0 and self.scraperclass.prevUrlMatchesStripUrl: if num_strips > 0 and scraperobj.prevUrlMatchesStripUrl:
self.check_stripurl(strip) check_stripurl(strip, scraperobj)
num_strips += 1 num_strips += 1
if self.scraperclass.prevSearch and not scraperobj.hitFirstStripUrl: if scraperobj.prevSearch and not scraperobj.hitFirstStripUrl:
# check strips # check strips
num_strips_expected = max_strips - len(scraperobj.skippedUrls) num_strips_expected = max_strips - len(scraperobj.skippedUrls)
msg = 'Traversed %d strips instead of %d.' % (num_strips, num_strips_expected) msg = 'Traversed %d strips instead of %d.' % (num_strips, num_strips_expected)
if strip: if strip:
msg += " Check the prevSearch pattern at %s" % strip.stripUrl msg += " Check the prevSearch pattern at %s" % strip.stripUrl
self.check(num_strips == num_strips_expected, msg) assert num_strips == num_strips_expected, msg
# check images # check images
if strip: if strip:
self.check_scraperesult(num_strips_expected, strip, scraperobj) check_scraperesult(tmpdir, num_strips_expected, strip, scraperobj)
def check_scraperesult(self, num_images_expected, strip, scraperobj): def check_scraperesult(tmpdir, num_images_expected, strip, scraperobj):
# Check that exactly or for multiple pages at least num_strips images are saved. # Check that exactly or for multiple pages at least num_strips images are saved.
# This checks saved files, ie. it detects duplicate filenames. # This checks saved files, ie. it detects duplicate filenames.
saved_images = self.get_saved_images(filtertxt=bool(scraperobj.textSearch)) saved_images = get_saved_images(tmpdir, scraperobj, filtertxt=bool(scraperobj.textSearch))
num_images = len(saved_images) num_images = len(saved_images)
# subtract the number of skipped URLs with no image from the expected image number # subtract the number of skipped URLs with no image from the expected image number
attrs = (num_images, saved_images, num_images_expected, self.tmpdir) attrs = (num_images, saved_images, num_images_expected, tmpdir)
if self.scraperclass.multipleImagesPerStrip: if scraperobj.multipleImagesPerStrip:
self.check(num_images >= num_images_expected, 'saved %d %s instead of at least %d images in %s' % attrs) assert num_images >= num_images_expected, 'saved %d %s instead of at least %d images in %s' % attrs
else: else:
self.check(num_images == num_images_expected, 'saved %d %s instead of %d images in %s' % attrs) assert num_images == num_images_expected, 'saved %d %s instead of %d images in %s' % attrs
def check_stripurl(self, strip): def check_stripurl(strip, scraperobj):
if not self.scraperclass.stripUrl: if not scraperobj.stripUrl:
# no indexing support # no indexing support
return return
# test that the stripUrl regex matches the retrieved strip URL # test that the stripUrl regex matches the retrieved strip URL
urlmatch = re.escape(self.scraperclass.stripUrl) urlmatch = re.escape(scraperobj.stripUrl)
urlmatch = urlmatch.replace(r"\%s", r".+") urlmatch = urlmatch.replace(r"\%s", r".+")
urlmatch = "^%s$" % urlmatch urlmatch = "^%s$" % urlmatch
ro = re.compile(urlmatch) ro = re.compile(urlmatch)
mo = ro.search(strip.stripUrl) mo = ro.search(strip.stripUrl)
self.check(mo is not None, 'strip URL %r does not match stripUrl pattern %s' % (strip.stripUrl, urlmatch)) assert mo is not None, 'strip URL %r does not match stripUrl pattern %s' % (strip.stripUrl, urlmatch)
def save(self, image): def get_test_scraperclasses():
try: """Return scrapers that should be tested."""
image.save(self.tmpdir)
except Exception as msg:
self.check(False, 'could not save %s at %s to %s: %s' % (image.url, image.referrer, self.tmpdir, msg))
def check(self, condition, msg):
self.assertTrue(condition, "%s %s %s" % (self.name, self.url, msg))
def make_comic_tester(name, **kwargs):
"""Create and return a _ComicTester class with given name and attributes."""
return type(name, (_ComicTester,), kwargs)
def generate_comic_testers():
"""For each comic scraper, create a test class."""
g = globals()
if "TESTALL" in os.environ: if "TESTALL" in os.environ:
# test all comics (this will take some time) # test all comics (this will take some time)
scraperclasses = scraper.get_scraperclasses() scraperclasses = scraper.get_scraperclasses()
else: else:
# Get limited number of scraper tests on Travis builds to make # Get limited number of scraper tests on Travis builds to make
# it faster # it faster
testscrapernames = ['AbstruseGoose', 'xkcd'] testscrapernames = ['AbstruseGoose', 'GoComics/CalvinandHobbes', 'xkcd']
scraperclasses = [ scraperclasses = [
scraperclass for scraperclass in scraper.get_scraperclasses() scraperclass for scraperclass in scraper.get_scraperclasses()
if scraperclass.getName() in testscrapernames if scraperclass.getName() in testscrapernames
] ]
scraperclasses.sort(key=lambda scraperclass: scraperclass.__name__) return scraperclasses
for scraperclass in scraperclasses:
name = 'Test'+scraperclass.__name__
g[name] = make_comic_tester(name, scraperclass=scraperclass)
def pytest_generate_tests(metafunc):
if 'scraperclass' in metafunc.fixturenames:
metafunc.parametrize('scraperclass', get_test_scraperclasses())
generate_comic_testers()