Tests: Keep comics of the same module in the same process

This allows our host-based throttling to be effective and keeps
cross-process locks to a minimum.
This commit is contained in:
Tobias Gruetzmacher 2019-12-03 23:35:41 +01:00
parent a347bebfe3
commit 18f8e093a7
3 changed files with 23 additions and 15 deletions

View file

@ -55,6 +55,7 @@ bash =
argcomplete
test =
pytest-cov
pytest-xdist
responses
[bdist_wheel]

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2018 Tobias Gruetzmacher
# Copyright (C) 2015-2019 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
@ -10,15 +10,10 @@ import multiprocessing
from six.moves.urllib.parse import urlsplit
def get_host(url):
"""Get host part of URL."""
return urlsplit(url)[1].lower()
# Dictionary with per-host locks.
_locks = {}
# Allowed number of connections per host
MaxConnections = 4
MaxConnections = 2
# Maximum number of strips to get to test a comic
MaxStrips = 5
@ -30,16 +25,12 @@ def get_lock(host):
return _locks[host]
def test_comicmodule(tmpdir, scraperobj):
def test_comicmodule(tmpdir, scraperobj, worker_id):
'''Test a scraper. It must be able to traverse backward for at least 5
strips from the start, and find strip images on at least 4 pages.'''
# Limit number of connections to one host.
host = get_host(scraperobj.url)
try:
with get_lock(host):
_test_comic(str(tmpdir), scraperobj)
except OSError:
# interprocess lock not supported
host = urlsplit(scraperobj.url).hostname
with get_lock(host):
_test_comic(str(tmpdir), scraperobj)

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
# Copyright (C) 2015-2019 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
@ -9,6 +9,9 @@ import re
import operator
import os
import pytest
from xdist.dsession import LoadScopeScheduling
from dosagelib import scraper
@ -45,3 +48,16 @@ def pytest_generate_tests(metafunc):
if 'scraperobj' in metafunc.fixturenames:
metafunc.parametrize('scraperobj', get_test_scrapers(),
ids=operator.attrgetter('name'))
class LoadModScheduling(LoadScopeScheduling):
"""Implement load scheduling for comic modules. See xdist for details."""
def _split_scope(self, nodeid):
mod, test = nodeid.split("::", 1)
return mod + "::" + test.split("/", 1)[0]
@pytest.mark.trylast
def pytest_xdist_make_scheduler(config, log):
return LoadModScheduling(config, log)