Refactor comic module test.
All those create-classes-on-the-fly games make my head hurt ;)
This commit is contained in:
parent
68d4dd463a
commit
f0e61a614f
1 changed files with 82 additions and 114 deletions
|
@ -1,16 +1,17 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015 Tobias Gruetzmacher
|
||||||
import tempfile
|
import tempfile
|
||||||
import shutil
|
import shutil
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
|
import pytest
|
||||||
try:
|
try:
|
||||||
from urllib.parse import urlsplit
|
from urllib.parse import urlsplit
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from urlparse import urlsplit
|
from urlparse import urlsplit
|
||||||
from unittest import TestCase
|
|
||||||
from dosagelib import scraper
|
from dosagelib import scraper
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,51 +31,35 @@ def get_lock(host):
|
||||||
_locks[host] = multiprocessing.BoundedSemaphore(MaxConnections)
|
_locks[host] = multiprocessing.BoundedSemaphore(MaxConnections)
|
||||||
return _locks[host]
|
return _locks[host]
|
||||||
|
|
||||||
|
@pytest.yield_fixture
|
||||||
|
def tmpdir():
|
||||||
|
tmpdir = tempfile.mkdtemp()
|
||||||
|
yield tmpdir
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
|
|
||||||
class _ComicTester(TestCase):
|
def get_saved_images(tmpdir, scraper, filtertxt=False):
|
||||||
"""Basic comic test class."""
|
|
||||||
scraperclass=None
|
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
if self.scraperclass is not None:
|
|
||||||
self.name = self.scraperclass.getName()
|
|
||||||
self.url = self.scraperclass.starter()
|
|
||||||
# create a temporary directory for images
|
|
||||||
self.tmpdir = tempfile.mkdtemp()
|
|
||||||
else:
|
|
||||||
self.tmpdir = None
|
|
||||||
|
|
||||||
def tearDown(self):
|
|
||||||
if self.tmpdir is not None:
|
|
||||||
shutil.rmtree(self.tmpdir)
|
|
||||||
|
|
||||||
def get_saved_images(self, filtertxt=False):
|
|
||||||
"""Get saved images."""
|
"""Get saved images."""
|
||||||
dirs = tuple(self.name.split('/'))
|
dirs = tuple(scraper.getName().split('/'))
|
||||||
files = os.listdir(os.path.join(self.tmpdir, *dirs))
|
files = os.listdir(os.path.join(tmpdir, *dirs))
|
||||||
if filtertxt:
|
if filtertxt:
|
||||||
files = [x for x in files if not x.endswith(".txt")]
|
files = [x for x in files if not x.endswith(".txt")]
|
||||||
return files
|
return files
|
||||||
|
|
||||||
def test_comic(self):
|
def test_comicmodule(tmpdir, scraperclass):
|
||||||
if self.scraperclass is None:
|
|
||||||
# only run subclasses
|
|
||||||
import pytest
|
|
||||||
pytest.skip("base class")
|
|
||||||
# Test a scraper. It must be able to traverse backward for
|
# Test a scraper. It must be able to traverse backward for
|
||||||
# at least 5 strips from the start, and find strip images
|
# at least 5 strips from the start, and find strip images
|
||||||
# on at least 4 pages.
|
# on at least 4 pages.
|
||||||
scraperobj = self.scraperclass()
|
scraperobj = scraperclass()
|
||||||
# Limit number of connections to one host.
|
# Limit number of connections to one host.
|
||||||
host = get_host(scraperobj.url)
|
host = get_host(scraperobj.url)
|
||||||
try:
|
try:
|
||||||
with get_lock(host):
|
with get_lock(host):
|
||||||
self._test_comic(scraperobj)
|
_test_comic(tmpdir, scraperobj)
|
||||||
except OSError:
|
except OSError:
|
||||||
# interprocess lock not supported
|
# interprocess lock not supported
|
||||||
self._test_comic(scraperobj)
|
_test_comic(tmpdir, scraperobj)
|
||||||
|
|
||||||
def _test_comic(self, scraperobj):
|
def _test_comic(tmpdir, scraperobj):
|
||||||
num_strips = 0
|
num_strips = 0
|
||||||
max_strips = 5
|
max_strips = 5
|
||||||
strip = None
|
strip = None
|
||||||
|
@ -82,81 +67,64 @@ class _ComicTester(TestCase):
|
||||||
images = []
|
images = []
|
||||||
for image in strip.getImages():
|
for image in strip.getImages():
|
||||||
images.append(image.url)
|
images.append(image.url)
|
||||||
self.save(image)
|
image.save(tmpdir)
|
||||||
self.check(images, 'failed to find images at %s' % strip.stripUrl)
|
assert images, 'failed to find images at %s' % strip.stripUrl
|
||||||
if not self.scraperclass.multipleImagesPerStrip:
|
if not scraperobj.multipleImagesPerStrip:
|
||||||
self.check(len(images) == 1, 'found more than 1 image at %s: %s' % (strip.stripUrl, images))
|
assert len(images) == 1, 'found more than 1 image at %s: %s' % (strip.stripUrl, images)
|
||||||
if num_strips > 0 and self.scraperclass.prevUrlMatchesStripUrl:
|
if num_strips > 0 and scraperobj.prevUrlMatchesStripUrl:
|
||||||
self.check_stripurl(strip)
|
check_stripurl(strip, scraperobj)
|
||||||
num_strips += 1
|
num_strips += 1
|
||||||
if self.scraperclass.prevSearch and not scraperobj.hitFirstStripUrl:
|
if scraperobj.prevSearch and not scraperobj.hitFirstStripUrl:
|
||||||
# check strips
|
# check strips
|
||||||
num_strips_expected = max_strips - len(scraperobj.skippedUrls)
|
num_strips_expected = max_strips - len(scraperobj.skippedUrls)
|
||||||
msg = 'Traversed %d strips instead of %d.' % (num_strips, num_strips_expected)
|
msg = 'Traversed %d strips instead of %d.' % (num_strips, num_strips_expected)
|
||||||
if strip:
|
if strip:
|
||||||
msg += " Check the prevSearch pattern at %s" % strip.stripUrl
|
msg += " Check the prevSearch pattern at %s" % strip.stripUrl
|
||||||
self.check(num_strips == num_strips_expected, msg)
|
assert num_strips == num_strips_expected, msg
|
||||||
# check images
|
# check images
|
||||||
if strip:
|
if strip:
|
||||||
self.check_scraperesult(num_strips_expected, strip, scraperobj)
|
check_scraperesult(tmpdir, num_strips_expected, strip, scraperobj)
|
||||||
|
|
||||||
def check_scraperesult(self, num_images_expected, strip, scraperobj):
|
def check_scraperesult(tmpdir, num_images_expected, strip, scraperobj):
|
||||||
# Check that exactly or for multiple pages at least num_strips images are saved.
|
# Check that exactly or for multiple pages at least num_strips images are saved.
|
||||||
# This checks saved files, ie. it detects duplicate filenames.
|
# This checks saved files, ie. it detects duplicate filenames.
|
||||||
saved_images = self.get_saved_images(filtertxt=bool(scraperobj.textSearch))
|
saved_images = get_saved_images(tmpdir, scraperobj, filtertxt=bool(scraperobj.textSearch))
|
||||||
num_images = len(saved_images)
|
num_images = len(saved_images)
|
||||||
# subtract the number of skipped URLs with no image from the expected image number
|
# subtract the number of skipped URLs with no image from the expected image number
|
||||||
attrs = (num_images, saved_images, num_images_expected, self.tmpdir)
|
attrs = (num_images, saved_images, num_images_expected, tmpdir)
|
||||||
if self.scraperclass.multipleImagesPerStrip:
|
if scraperobj.multipleImagesPerStrip:
|
||||||
self.check(num_images >= num_images_expected, 'saved %d %s instead of at least %d images in %s' % attrs)
|
assert num_images >= num_images_expected, 'saved %d %s instead of at least %d images in %s' % attrs
|
||||||
else:
|
else:
|
||||||
self.check(num_images == num_images_expected, 'saved %d %s instead of %d images in %s' % attrs)
|
assert num_images == num_images_expected, 'saved %d %s instead of %d images in %s' % attrs
|
||||||
|
|
||||||
def check_stripurl(self, strip):
|
def check_stripurl(strip, scraperobj):
|
||||||
if not self.scraperclass.stripUrl:
|
if not scraperobj.stripUrl:
|
||||||
# no indexing support
|
# no indexing support
|
||||||
return
|
return
|
||||||
# test that the stripUrl regex matches the retrieved strip URL
|
# test that the stripUrl regex matches the retrieved strip URL
|
||||||
urlmatch = re.escape(self.scraperclass.stripUrl)
|
urlmatch = re.escape(scraperobj.stripUrl)
|
||||||
urlmatch = urlmatch.replace(r"\%s", r".+")
|
urlmatch = urlmatch.replace(r"\%s", r".+")
|
||||||
urlmatch = "^%s$" % urlmatch
|
urlmatch = "^%s$" % urlmatch
|
||||||
ro = re.compile(urlmatch)
|
ro = re.compile(urlmatch)
|
||||||
mo = ro.search(strip.stripUrl)
|
mo = ro.search(strip.stripUrl)
|
||||||
self.check(mo is not None, 'strip URL %r does not match stripUrl pattern %s' % (strip.stripUrl, urlmatch))
|
assert mo is not None, 'strip URL %r does not match stripUrl pattern %s' % (strip.stripUrl, urlmatch)
|
||||||
|
|
||||||
def save(self, image):
|
def get_test_scraperclasses():
|
||||||
try:
|
"""Return scrapers that should be tested."""
|
||||||
image.save(self.tmpdir)
|
|
||||||
except Exception as msg:
|
|
||||||
self.check(False, 'could not save %s at %s to %s: %s' % (image.url, image.referrer, self.tmpdir, msg))
|
|
||||||
|
|
||||||
def check(self, condition, msg):
|
|
||||||
self.assertTrue(condition, "%s %s %s" % (self.name, self.url, msg))
|
|
||||||
|
|
||||||
|
|
||||||
def make_comic_tester(name, **kwargs):
|
|
||||||
"""Create and return a _ComicTester class with given name and attributes."""
|
|
||||||
return type(name, (_ComicTester,), kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
def generate_comic_testers():
|
|
||||||
"""For each comic scraper, create a test class."""
|
|
||||||
g = globals()
|
|
||||||
if "TESTALL" in os.environ:
|
if "TESTALL" in os.environ:
|
||||||
# test all comics (this will take some time)
|
# test all comics (this will take some time)
|
||||||
scraperclasses = scraper.get_scraperclasses()
|
scraperclasses = scraper.get_scraperclasses()
|
||||||
else:
|
else:
|
||||||
# Get limited number of scraper tests on Travis builds to make
|
# Get limited number of scraper tests on Travis builds to make
|
||||||
# it faster
|
# it faster
|
||||||
testscrapernames = ['AbstruseGoose', 'xkcd']
|
testscrapernames = ['AbstruseGoose', 'GoComics/CalvinandHobbes', 'xkcd']
|
||||||
scraperclasses = [
|
scraperclasses = [
|
||||||
scraperclass for scraperclass in scraper.get_scraperclasses()
|
scraperclass for scraperclass in scraper.get_scraperclasses()
|
||||||
if scraperclass.getName() in testscrapernames
|
if scraperclass.getName() in testscrapernames
|
||||||
]
|
]
|
||||||
scraperclasses.sort(key=lambda scraperclass: scraperclass.__name__)
|
return scraperclasses
|
||||||
for scraperclass in scraperclasses:
|
|
||||||
name = 'Test'+scraperclass.__name__
|
|
||||||
g[name] = make_comic_tester(name, scraperclass=scraperclass)
|
|
||||||
|
|
||||||
|
def pytest_generate_tests(metafunc):
|
||||||
|
if 'scraperclass' in metafunc.fixturenames:
|
||||||
|
metafunc.parametrize('scraperclass', get_test_scraperclasses())
|
||||||
|
|
||||||
generate_comic_testers()
|
|
||||||
|
|
Loading…
Reference in a new issue