Remove unused multi-match logic

2022-06-04 10:56:25 +02:00 · 2022-06-04 10:56:25 +02:00 · 99b72c90be
commit 99b72c90be
parent 680ba0969e
6 changed files with 49 additions and 61 deletions
--- a/dosagelib/cmd.py
+++ b/dosagelib/cmd.py
@ -11,7 +11,7 @@ from platformdirs import PlatformDirs
 from . import events, configuration, singleton, director
 from . import AppName, __version__
 from .output import out
-from .scraper import scrapers as allscrapers
+from .scraper import scrapers as scrapercache
 from .util import internal_error, strlimit
@ -99,10 +99,6 @@ def setup_options():
    # used for development testing prev/next matching
    parser.add_argument('--dry-run', action='store_true',
        help=argparse.SUPPRESS)
    # multimatch is only used for development, eg. testing if all comics of
    # a scripted plugin are working
    parser.add_argument('--multimatch', action='store_true',
        help=argparse.SUPPRESS)
    # List all comic modules, even those normally suppressed, because they
    # are not "real" (moved & removed)
    parser.add_argument('--list-all', action='store_true',
@ -200,8 +196,7 @@ def vote_comics(options):
    errors = 0
    try:
        for scraperobj in director.getScrapers(options.comic, options.basepath,
-                                               options.adult,
+                options.adult):
                                               options.multimatch):
            errors += vote_comic(scraperobj)
    except ValueError as msg:
        out.exception(msg)
@ -228,7 +223,7 @@ def vote_comic(scraperobj):
 def run(options):
    """Execute comic commands."""
    set_output_info(options)
-    allscrapers.adddir(user_plugin_path)
+    scrapercache.adddir(user_plugin_path)
    # ensure only one instance of dosage is running
    if not options.allow_multiple:
        singleton.SingleInstance()
@ -257,7 +252,7 @@ def do_list(column_list=True, verbose=False, listall=False):
        out.info(u'Comics tagged with [{}] require age confirmation'
            ' with the --adult option.'.format(TAG_ADULT))
        out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
-        scrapers = sorted(allscrapers.get(listall),
+        scrapers = sorted(scrapercache.all(listall),
                          key=lambda s: s.name.lower())
        if column_list:
            num, disabled = do_column_list(scrapers)
--- a/dosagelib/director.py
+++ b/dosagelib/director.py
@ -11,7 +11,7 @@ from typing import Dict
 from urllib.parse import urlparse
 from .output import out
-from .scraper import scrapers as allscrapers
+from .scraper import scrapers as scrapercache
 from . import events
@ -160,7 +160,7 @@ def getComics(options):
    errors = 0
    try:
        for scraperobj in getScrapers(options.comic, options.basepath,
-                                      options.adult, options.multimatch):
+                options.adult):
            jobs.put(scraperobj)
        # start threads
        num_threads = min(options.parallel, jobs.qsize())
@ -186,7 +186,7 @@ def getComics(options):
    return errors
-def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listing=False):
+def getScrapers(comics, basepath=None, adult=True, listing=False):
    """Get scraper objects for the given comics."""
    if '@' in comics:
        # only scrapers whose directory already exists
@ -211,18 +211,17 @@ def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listi
            else:
                name = comic
                indexes = None
-            found_scrapers = allscrapers.find(name, multiple_allowed=multiple_allowed)
+            scraper = scrapercache.find(name)
-            for scraperobj in found_scrapers:
+            if shouldRunScraper(scraper, adult, listing):
-                if shouldRunScraper(scraperobj, adult, listing):
+                # FIXME: Find a better way to work with indexes
-                    # FIXME: Find a better way to work with indexes
+                scraper.indexes = indexes
-                    scraperobj.indexes = indexes
+                if scraper not in scrapers:
-                    if scraperobj not in scrapers:
+                    scrapers.add(scraper)
-                        scrapers.add(scraperobj)
+                    yield scraper
                        yield scraperobj
 def get_existing_comics(basepath=None, adult=True, listing=False):
-    for scraperobj in allscrapers.get(include_removed=True):
+    for scraperobj in scrapercache.all(include_removed=True):
        dirname = scraperobj.get_download_dir(basepath)
        if os.path.isdir(dirname):
            if shouldRunScraper(scraperobj, adult, listing):
--- a/dosagelib/scraper.py
+++ b/dosagelib/scraper.py
@ -7,7 +7,7 @@ import os
 import re
 import warnings
 from urllib.parse import urljoin
-from typing import Optional, Union, Pattern, Sequence
+from typing import Dict, List, Optional, Union, Pattern, Sequence
 import lxml
 from lxml.html.defs import link_attrs as html_link_attrs
@ -541,38 +541,33 @@ class Cache:
    slow.
    """
    def __init__(self):
-        self.data = []
+        self.data: List[Scraper] = []
        self.userdirs = set()
-    def find(self, comic, multiple_allowed=False):
+    def find(self, comic: str) -> Scraper:
-        """Get a list comic scraper objects.
+        """Find a comic scraper object based on its name. This prefers a
-
+        perfect match, but falls back to a substring match, if that is unique.
-        Can return more than one entry if multiple_allowed is True, else it raises
+        Otharwise a ValueError is thrown.
        a ValueError if multiple modules match. The match is a case insensitive
        substring search.
        """
        if not comic:
            raise ValueError("empty comic name")
        candidates = []
        cname = comic.lower()
-        for scrapers in self.get(include_removed=True):
+        for scraper in self.all(include_removed=True):
-            lname = scrapers.name.lower()
+            lname = scraper.name.lower()
            if lname == cname:
                # perfect match
-                if not multiple_allowed:
+                return scraper
-                    return [scrapers]
+            elif cname in lname and scraper.url:
-                else:
+                candidates.append(scraper)
-                    candidates.append(scrapers)
+        if len(candidates) > 1:
            elif cname in lname and scrapers.url:
                candidates.append(scrapers)
        if len(candidates) > 1 and not multiple_allowed:
            comics = ", ".join(x.name for x in candidates)
            raise ValueError('multiple comics found: %s' % comics)
        elif not candidates:
            raise ValueError('comic %r not found' % comic)
-        return candidates
+        return candidates[0]
-    def load(self):
+    def load(self) -> None:
        out.debug("Loading comic modules...")
        modules = 0
        classes = 0
@ -583,7 +578,7 @@ class Cache:
        out.debug("... %d scrapers loaded from %d classes in %d modules." % (
            len(self.data), classes, modules))
-    def adddir(self, path):
+    def adddir(self, path) -> None:
        """Add an additional directory with python modules to the scraper list.
        These are handled as if the were part of the plugins package.
        """
@ -603,7 +598,7 @@ class Cache:
            out.debug("Added %d user classes from %d modules." % (
                classes, modules))
-    def addmodule(self, module):
+    def addmodule(self, module) -> int:
        """Adds all valid plugin classes from the specified module to the cache.
        @return: number of classes added
        """
@ -613,8 +608,8 @@ class Cache:
            self.data.extend(plugin.getmodules())
        return classes
-    def get(self, include_removed=False):
+    def all(self, include_removed=False) -> List[Scraper]:
-        """Find all comic scraper classes in the plugins directory.
+        """Return all comic scraper classes in the plugins directory.
        @return: list of Scraper classes
        @rtype: list of Scraper
        """
@ -625,9 +620,9 @@ class Cache:
        else:
            return [x for x in self.data if x.url]
-    def validate(self):
+    def validate(self) -> None:
        """Check for duplicate scraper names."""
-        d = {}
+        d: Dict[str, Scraper] = {}
        for scraper in self.data:
            name = scraper.name.lower()
            if name in d:
--- a/tests/modules/conftest.py
+++ b/tests/modules/conftest.py
@ -16,7 +16,7 @@ def get_test_scrapers():
    """Return scrapers that should be tested."""
    if 'TESTALL' in os.environ:
        # test all comics (this will take some time)
-        return scrapers.get()
+        return scrapers.all()
    elif 'TESTCOMICS' in os.environ:
        scraper_pattern = os.environ['TESTCOMICS']
    else:
@ -33,7 +33,7 @@ def get_test_scrapers():
    matcher = re.compile(scraper_pattern)
    return [
-        scraperobj for scraperobj in scrapers.get()
+        scraperobj for scraperobj in scrapers.all()
        if matcher.match(scraperobj.name)
    ]
--- a/tests/test_comicnames.py
+++ b/tests/test_comicnames.py
@ -1,7 +1,7 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2020 Tobias Gruetzmacher
+# Copyright (C) 2015-2022 Tobias Gruetzmacher
 import re
 from dosagelib.scraper import scrapers
@ -11,7 +11,7 @@ from dosagelib.plugins import old
 class TestComicNames(object):
    def test_names(self):
-        for scraperobj in scrapers.get():
+        for scraperobj in scrapers.all():
            name = scraperobj.name
            assert name.count('/') <= 1
            if '/' in name:
@ -21,10 +21,10 @@ class TestComicNames(object):
            assert re.sub("[^0-9a-zA-Z_]", "", comicname) == comicname
    def test_renamed(self):
-        for scraperobj in scrapers.get(include_removed=True):
+        for scraperobj in scrapers.all(include_removed=True):
            if not isinstance(scraperobj, old.Renamed):
                continue
            assert len(scraperobj.getDisabledReasons()) > 0
            # Renamed scraper should only point to an non-disabled scraper
-            newscraper = scrapers.find(scraperobj.newname)[0]
+            newscraper = scrapers.find(scraperobj.newname)
            assert len(newscraper.getDisabledReasons()) == 0
--- a/tests/test_scraper.py
+++ b/tests/test_scraper.py
@ -1,6 +1,6 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2013-2014 Bastian Kleineidam
-# Copyright (C) 2015-2020 Tobias Gruetzmacher
+# Copyright (C) 2015-2022 Tobias Gruetzmacher
 from pathlib import Path
 import pytest
@ -12,24 +12,23 @@ class TestScraper(object):
    """Test scraper module functions."""
    def test_get_scrapers(self):
-        for scraperobj in scrapers.get():
+        for scraperobj in scrapers.all():
            scraperobj.indexes = ["bla"]
            assert scraperobj.url, "missing url in %s" % scraperobj.name
    def test_find_scrapers_single(self):
-        result = scrapers.find("xkcd")
+        assert scrapers.find("xkcd")
        assert len(result) == 1
    def test_find_scrapers_multi(self):
-        result = scrapers.find("a", multiple_allowed=True)
+        with pytest.raises(ValueError, match='multiple comics found'):
-        assert len(result) > 1
+            scrapers.find("a")
    def test_find_scrapers_error(self):
        with pytest.raises(ValueError, match='empty comic name'):
            scrapers.find('')
    def test_user_dir(self):
-        oldlen = len(scrapers.get())
+        oldlen = len(scrapers.all())
        scrapers.adddir(Path(__file__).parent / 'mocks' / 'extra')
-        assert len(scrapers.get()) == oldlen + 1
+        assert len(scrapers.all()) == oldlen + 1
-        assert len(scrapers.find('AnotherDummyTestScraper')) == 1
+        assert scrapers.find('AnotherDummyTestScraper')