From 0428fd52b37a8c6c29196b6e99c65f82ef4dcefb Mon Sep 17 00:00:00 2001
From: Tobias Gruetzmacher <tobias-git@23.gs>
Date: Tue, 19 Jan 2021 00:20:09 +0100
Subject: [PATCH] Speed up comic checks by avoiding redundant tests

We don't need to test the "navigation" of each and every comic on the
same hoster, if those test give us no new information (this is true for
most "modern" hosters which don't allow individual designs/HTML per
comic)
---
 tests/modules/check_comics.py | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/tests/modules/check_comics.py b/tests/modules/check_comics.py
index 0e11a9d7a..d90126faf 100644
--- a/tests/modules/check_comics.py
+++ b/tests/modules/check_comics.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2020 Tobias Gruetzmacher
+# Copyright (C) 2015-2021 Tobias Gruetzmacher
 import json
 import multiprocessing
 import os
@@ -20,6 +20,17 @@ MaxStrips = 5
 ARCHIVE_ORG_MATCH = re.compile(r'(?<=web\\.archive\\.org/web)/\d+/')
 # Matches some (maybe-escaped - because Python 2) printf-style format specifiers
 PRINTF_MATCH = re.compile(r'\\?%[0-9]*[sd]')
+# Classes where the modules are very similar, so that testing the history of
+# each modules doesn't make much sense
+standarized_modules = {
+    'ComicSherpa',
+    'ComicsKingdom',
+    'GoComics',
+    'MangaDex',
+    'WebToons',
+}
+# Already seen classes
+seen_modules = set()
 
 
 def get_lock(host):
@@ -35,15 +46,23 @@ def test_comicmodule(tmpdir, scraperobj, worker_id):
     # Limit number of connections to one host.
     host = urlsplit(scraperobj.url).hostname
     with get_lock(host):
-        _test_comic(str(tmpdir), scraperobj)
+        maxstrips = MaxStrips
+        parts = scraperobj.name.split('/', maxsplit=1)
+        if len(parts) > 1 and parts[0] in standarized_modules:
+            if parts[0] in seen_modules:
+                maxstrips = 1
+            else:
+                seen_modules.add(parts[0])
+
+        _test_comic(str(tmpdir), scraperobj, maxstrips)
 
 
-def _test_comic(outdir, scraperobj):
+def _test_comic(outdir, scraperobj, maxstrips):
     num_strips = 0
     strip = None
     files = []
     PROXYMAP.apply(scraperobj.name)
-    for strip in scraperobj.getStrips(MaxStrips):
+    for strip in scraperobj.getStrips(maxstrips):
         files.append(_check_strip(outdir, strip,
                                   scraperobj.multipleImagesPerStrip))
 
@@ -54,7 +73,7 @@ def _test_comic(outdir, scraperobj):
     if scraperobj.prevSearch and not scraperobj.hitFirstStripUrl:
         # subtract the number of skipped URLs with no image from the expected
         # image number
-        num_strips_expected = MaxStrips - len(scraperobj.skippedUrls)
+        num_strips_expected = maxstrips - len(scraperobj.skippedUrls)
         msg = 'Traversed %d strips instead of %d.' % (num_strips,
                                                       num_strips_expected)
         if strip: