From 99b72c90be6416b1b415538b2d7b9eed67680cc3 Mon Sep 17 00:00:00 2001
From: Tobias Gruetzmacher <tobias-git@23.gs>
Date: Sat, 4 Jun 2022 10:56:25 +0200
Subject: [PATCH] Remove unused multi-match logic

---
 dosagelib/cmd.py          | 13 ++++-------
 dosagelib/director.py     | 23 ++++++++++----------
 dosagelib/scraper.py      | 45 +++++++++++++++++----------------------
 tests/modules/conftest.py |  4 ++--
 tests/test_comicnames.py  |  8 +++----
 tests/test_scraper.py     | 17 +++++++--------
 6 files changed, 49 insertions(+), 61 deletions(-)

diff --git a/dosagelib/cmd.py b/dosagelib/cmd.py
index b59606c7d..b6c766b7d 100644
--- a/dosagelib/cmd.py
+++ b/dosagelib/cmd.py
@@ -11,7 +11,7 @@ from platformdirs import PlatformDirs
 from . import events, configuration, singleton, director
 from . import AppName, __version__
 from .output import out
-from .scraper import scrapers as allscrapers
+from .scraper import scrapers as scrapercache
 from .util import internal_error, strlimit
 
 
@@ -99,10 +99,6 @@ def setup_options():
     # used for development testing prev/next matching
     parser.add_argument('--dry-run', action='store_true',
         help=argparse.SUPPRESS)
-    # multimatch is only used for development, eg. testing if all comics of
-    # a scripted plugin are working
-    parser.add_argument('--multimatch', action='store_true',
-        help=argparse.SUPPRESS)
     # List all comic modules, even those normally suppressed, because they
     # are not "real" (moved & removed)
     parser.add_argument('--list-all', action='store_true',
@@ -200,8 +196,7 @@ def vote_comics(options):
     errors = 0
     try:
         for scraperobj in director.getScrapers(options.comic, options.basepath,
-                                               options.adult,
-                                               options.multimatch):
+                options.adult):
             errors += vote_comic(scraperobj)
     except ValueError as msg:
         out.exception(msg)
@@ -228,7 +223,7 @@ def vote_comic(scraperobj):
 def run(options):
     """Execute comic commands."""
     set_output_info(options)
-    allscrapers.adddir(user_plugin_path)
+    scrapercache.adddir(user_plugin_path)
     # ensure only one instance of dosage is running
     if not options.allow_multiple:
         singleton.SingleInstance()
@@ -257,7 +252,7 @@ def do_list(column_list=True, verbose=False, listall=False):
         out.info(u'Comics tagged with [{}] require age confirmation'
             ' with the --adult option.'.format(TAG_ADULT))
         out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
-        scrapers = sorted(allscrapers.get(listall),
+        scrapers = sorted(scrapercache.all(listall),
                           key=lambda s: s.name.lower())
         if column_list:
             num, disabled = do_column_list(scrapers)
diff --git a/dosagelib/director.py b/dosagelib/director.py
index 5c0516967..d0e6b7152 100644
--- a/dosagelib/director.py
+++ b/dosagelib/director.py
@@ -11,7 +11,7 @@ from typing import Dict
 from urllib.parse import urlparse
 
 from .output import out
-from .scraper import scrapers as allscrapers
+from .scraper import scrapers as scrapercache
 from . import events
 
 
@@ -160,7 +160,7 @@ def getComics(options):
     errors = 0
     try:
         for scraperobj in getScrapers(options.comic, options.basepath,
-                                      options.adult, options.multimatch):
+                options.adult):
             jobs.put(scraperobj)
         # start threads
         num_threads = min(options.parallel, jobs.qsize())
@@ -186,7 +186,7 @@ def getComics(options):
     return errors
 
 
-def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listing=False):
+def getScrapers(comics, basepath=None, adult=True, listing=False):
     """Get scraper objects for the given comics."""
     if '@' in comics:
         # only scrapers whose directory already exists
@@ -211,18 +211,17 @@ def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listi
             else:
                 name = comic
                 indexes = None
-            found_scrapers = allscrapers.find(name, multiple_allowed=multiple_allowed)
-            for scraperobj in found_scrapers:
-                if shouldRunScraper(scraperobj, adult, listing):
-                    # FIXME: Find a better way to work with indexes
-                    scraperobj.indexes = indexes
-                    if scraperobj not in scrapers:
-                        scrapers.add(scraperobj)
-                        yield scraperobj
+            scraper = scrapercache.find(name)
+            if shouldRunScraper(scraper, adult, listing):
+                # FIXME: Find a better way to work with indexes
+                scraper.indexes = indexes
+                if scraper not in scrapers:
+                    scrapers.add(scraper)
+                    yield scraper
 
 
 def get_existing_comics(basepath=None, adult=True, listing=False):
-    for scraperobj in allscrapers.get(include_removed=True):
+    for scraperobj in scrapercache.all(include_removed=True):
         dirname = scraperobj.get_download_dir(basepath)
         if os.path.isdir(dirname):
             if shouldRunScraper(scraperobj, adult, listing):
diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py
index acdbeb385..eb141448a 100644
--- a/dosagelib/scraper.py
+++ b/dosagelib/scraper.py
@@ -7,7 +7,7 @@ import os
 import re
 import warnings
 from urllib.parse import urljoin
-from typing import Optional, Union, Pattern, Sequence
+from typing import Dict, List, Optional, Union, Pattern, Sequence
 
 import lxml
 from lxml.html.defs import link_attrs as html_link_attrs
@@ -541,38 +541,33 @@ class Cache:
     slow.
     """
     def __init__(self):
-        self.data = []
+        self.data: List[Scraper] = []
         self.userdirs = set()
 
-    def find(self, comic, multiple_allowed=False):
-        """Get a list comic scraper objects.
-
-        Can return more than one entry if multiple_allowed is True, else it raises
-        a ValueError if multiple modules match. The match is a case insensitive
-        substring search.
+    def find(self, comic: str) -> Scraper:
+        """Find a comic scraper object based on its name. This prefers a
+        perfect match, but falls back to a substring match, if that is unique.
+        Otharwise a ValueError is thrown.
         """
         if not comic:
             raise ValueError("empty comic name")
         candidates = []
         cname = comic.lower()
-        for scrapers in self.get(include_removed=True):
-            lname = scrapers.name.lower()
+        for scraper in self.all(include_removed=True):
+            lname = scraper.name.lower()
             if lname == cname:
                 # perfect match
-                if not multiple_allowed:
-                    return [scrapers]
-                else:
-                    candidates.append(scrapers)
-            elif cname in lname and scrapers.url:
-                candidates.append(scrapers)
-        if len(candidates) > 1 and not multiple_allowed:
+                return scraper
+            elif cname in lname and scraper.url:
+                candidates.append(scraper)
+        if len(candidates) > 1:
             comics = ", ".join(x.name for x in candidates)
             raise ValueError('multiple comics found: %s' % comics)
         elif not candidates:
             raise ValueError('comic %r not found' % comic)
-        return candidates
+        return candidates[0]
 
-    def load(self):
+    def load(self) -> None:
         out.debug("Loading comic modules...")
         modules = 0
         classes = 0
@@ -583,7 +578,7 @@ class Cache:
         out.debug("... %d scrapers loaded from %d classes in %d modules." % (
             len(self.data), classes, modules))
 
-    def adddir(self, path):
+    def adddir(self, path) -> None:
         """Add an additional directory with python modules to the scraper list.
         These are handled as if the were part of the plugins package.
         """
@@ -603,7 +598,7 @@ class Cache:
             out.debug("Added %d user classes from %d modules." % (
                 classes, modules))
 
-    def addmodule(self, module):
+    def addmodule(self, module) -> int:
         """Adds all valid plugin classes from the specified module to the cache.
         @return: number of classes added
         """
@@ -613,8 +608,8 @@ class Cache:
             self.data.extend(plugin.getmodules())
         return classes
 
-    def get(self, include_removed=False):
-        """Find all comic scraper classes in the plugins directory.
+    def all(self, include_removed=False) -> List[Scraper]:
+        """Return all comic scraper classes in the plugins directory.
         @return: list of Scraper classes
         @rtype: list of Scraper
         """
@@ -625,9 +620,9 @@ class Cache:
         else:
             return [x for x in self.data if x.url]
 
-    def validate(self):
+    def validate(self) -> None:
         """Check for duplicate scraper names."""
-        d = {}
+        d: Dict[str, Scraper] = {}
         for scraper in self.data:
             name = scraper.name.lower()
             if name in d:
diff --git a/tests/modules/conftest.py b/tests/modules/conftest.py
index 35a6d99a2..43498c062 100644
--- a/tests/modules/conftest.py
+++ b/tests/modules/conftest.py
@@ -16,7 +16,7 @@ def get_test_scrapers():
     """Return scrapers that should be tested."""
     if 'TESTALL' in os.environ:
         # test all comics (this will take some time)
-        return scrapers.get()
+        return scrapers.all()
     elif 'TESTCOMICS' in os.environ:
         scraper_pattern = os.environ['TESTCOMICS']
     else:
@@ -33,7 +33,7 @@ def get_test_scrapers():
 
     matcher = re.compile(scraper_pattern)
     return [
-        scraperobj for scraperobj in scrapers.get()
+        scraperobj for scraperobj in scrapers.all()
         if matcher.match(scraperobj.name)
     ]
 
diff --git a/tests/test_comicnames.py b/tests/test_comicnames.py
index 7d094e0bf..8d3b11e5e 100644
--- a/tests/test_comicnames.py
+++ b/tests/test_comicnames.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2020 Tobias Gruetzmacher
+# Copyright (C) 2015-2022 Tobias Gruetzmacher
 import re
 
 from dosagelib.scraper import scrapers
@@ -11,7 +11,7 @@ from dosagelib.plugins import old
 class TestComicNames(object):
 
     def test_names(self):
-        for scraperobj in scrapers.get():
+        for scraperobj in scrapers.all():
             name = scraperobj.name
             assert name.count('/') <= 1
             if '/' in name:
@@ -21,10 +21,10 @@ class TestComicNames(object):
             assert re.sub("[^0-9a-zA-Z_]", "", comicname) == comicname
 
     def test_renamed(self):
-        for scraperobj in scrapers.get(include_removed=True):
+        for scraperobj in scrapers.all(include_removed=True):
             if not isinstance(scraperobj, old.Renamed):
                 continue
             assert len(scraperobj.getDisabledReasons()) > 0
             # Renamed scraper should only point to an non-disabled scraper
-            newscraper = scrapers.find(scraperobj.newname)[0]
+            newscraper = scrapers.find(scraperobj.newname)
             assert len(newscraper.getDisabledReasons()) == 0
diff --git a/tests/test_scraper.py b/tests/test_scraper.py
index 9c48be900..8c0caaadd 100644
--- a/tests/test_scraper.py
+++ b/tests/test_scraper.py
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2013-2014 Bastian Kleineidam
-# Copyright (C) 2015-2020 Tobias Gruetzmacher
+# Copyright (C) 2015-2022 Tobias Gruetzmacher
 from pathlib import Path
 
 import pytest
@@ -12,24 +12,23 @@ class TestScraper(object):
     """Test scraper module functions."""
 
     def test_get_scrapers(self):
-        for scraperobj in scrapers.get():
+        for scraperobj in scrapers.all():
             scraperobj.indexes = ["bla"]
             assert scraperobj.url, "missing url in %s" % scraperobj.name
 
     def test_find_scrapers_single(self):
-        result = scrapers.find("xkcd")
-        assert len(result) == 1
+        assert scrapers.find("xkcd")
 
     def test_find_scrapers_multi(self):
-        result = scrapers.find("a", multiple_allowed=True)
-        assert len(result) > 1
+        with pytest.raises(ValueError, match='multiple comics found'):
+            scrapers.find("a")
 
     def test_find_scrapers_error(self):
         with pytest.raises(ValueError, match='empty comic name'):
             scrapers.find('')
 
     def test_user_dir(self):
-        oldlen = len(scrapers.get())
+        oldlen = len(scrapers.all())
         scrapers.adddir(Path(__file__).parent / 'mocks' / 'extra')
-        assert len(scrapers.get()) == oldlen + 1
-        assert len(scrapers.find('AnotherDummyTestScraper')) == 1
+        assert len(scrapers.all()) == oldlen + 1
+        assert scrapers.find('AnotherDummyTestScraper')