From 99b72c90be6416b1b415538b2d7b9eed67680cc3 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Sat, 4 Jun 2022 10:56:25 +0200 Subject: [PATCH] Remove unused multi-match logic --- dosagelib/cmd.py | 13 ++++------- dosagelib/director.py | 23 ++++++++++---------- dosagelib/scraper.py | 45 +++++++++++++++++---------------------- tests/modules/conftest.py | 4 ++-- tests/test_comicnames.py | 8 +++---- tests/test_scraper.py | 17 +++++++-------- 6 files changed, 49 insertions(+), 61 deletions(-) diff --git a/dosagelib/cmd.py b/dosagelib/cmd.py index b59606c7d..b6c766b7d 100644 --- a/dosagelib/cmd.py +++ b/dosagelib/cmd.py @@ -11,7 +11,7 @@ from platformdirs import PlatformDirs from . import events, configuration, singleton, director from . import AppName, __version__ from .output import out -from .scraper import scrapers as allscrapers +from .scraper import scrapers as scrapercache from .util import internal_error, strlimit @@ -99,10 +99,6 @@ def setup_options(): # used for development testing prev/next matching parser.add_argument('--dry-run', action='store_true', help=argparse.SUPPRESS) - # multimatch is only used for development, eg. testing if all comics of - # a scripted plugin are working - parser.add_argument('--multimatch', action='store_true', - help=argparse.SUPPRESS) # List all comic modules, even those normally suppressed, because they # are not "real" (moved & removed) parser.add_argument('--list-all', action='store_true', @@ -200,8 +196,7 @@ def vote_comics(options): errors = 0 try: for scraperobj in director.getScrapers(options.comic, options.basepath, - options.adult, - options.multimatch): + options.adult): errors += vote_comic(scraperobj) except ValueError as msg: out.exception(msg) @@ -228,7 +223,7 @@ def vote_comic(scraperobj): def run(options): """Execute comic commands.""" set_output_info(options) - allscrapers.adddir(user_plugin_path) + scrapercache.adddir(user_plugin_path) # ensure only one instance of dosage is running if not options.allow_multiple: singleton.SingleInstance() @@ -257,7 +252,7 @@ def do_list(column_list=True, verbose=False, listall=False): out.info(u'Comics tagged with [{}] require age confirmation' ' with the --adult option.'.format(TAG_ADULT)) out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG) - scrapers = sorted(allscrapers.get(listall), + scrapers = sorted(scrapercache.all(listall), key=lambda s: s.name.lower()) if column_list: num, disabled = do_column_list(scrapers) diff --git a/dosagelib/director.py b/dosagelib/director.py index 5c0516967..d0e6b7152 100644 --- a/dosagelib/director.py +++ b/dosagelib/director.py @@ -11,7 +11,7 @@ from typing import Dict from urllib.parse import urlparse from .output import out -from .scraper import scrapers as allscrapers +from .scraper import scrapers as scrapercache from . import events @@ -160,7 +160,7 @@ def getComics(options): errors = 0 try: for scraperobj in getScrapers(options.comic, options.basepath, - options.adult, options.multimatch): + options.adult): jobs.put(scraperobj) # start threads num_threads = min(options.parallel, jobs.qsize()) @@ -186,7 +186,7 @@ def getComics(options): return errors -def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listing=False): +def getScrapers(comics, basepath=None, adult=True, listing=False): """Get scraper objects for the given comics.""" if '@' in comics: # only scrapers whose directory already exists @@ -211,18 +211,17 @@ def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listi else: name = comic indexes = None - found_scrapers = allscrapers.find(name, multiple_allowed=multiple_allowed) - for scraperobj in found_scrapers: - if shouldRunScraper(scraperobj, adult, listing): - # FIXME: Find a better way to work with indexes - scraperobj.indexes = indexes - if scraperobj not in scrapers: - scrapers.add(scraperobj) - yield scraperobj + scraper = scrapercache.find(name) + if shouldRunScraper(scraper, adult, listing): + # FIXME: Find a better way to work with indexes + scraper.indexes = indexes + if scraper not in scrapers: + scrapers.add(scraper) + yield scraper def get_existing_comics(basepath=None, adult=True, listing=False): - for scraperobj in allscrapers.get(include_removed=True): + for scraperobj in scrapercache.all(include_removed=True): dirname = scraperobj.get_download_dir(basepath) if os.path.isdir(dirname): if shouldRunScraper(scraperobj, adult, listing): diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index acdbeb385..eb141448a 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -7,7 +7,7 @@ import os import re import warnings from urllib.parse import urljoin -from typing import Optional, Union, Pattern, Sequence +from typing import Dict, List, Optional, Union, Pattern, Sequence import lxml from lxml.html.defs import link_attrs as html_link_attrs @@ -541,38 +541,33 @@ class Cache: slow. """ def __init__(self): - self.data = [] + self.data: List[Scraper] = [] self.userdirs = set() - def find(self, comic, multiple_allowed=False): - """Get a list comic scraper objects. - - Can return more than one entry if multiple_allowed is True, else it raises - a ValueError if multiple modules match. The match is a case insensitive - substring search. + def find(self, comic: str) -> Scraper: + """Find a comic scraper object based on its name. This prefers a + perfect match, but falls back to a substring match, if that is unique. + Otharwise a ValueError is thrown. """ if not comic: raise ValueError("empty comic name") candidates = [] cname = comic.lower() - for scrapers in self.get(include_removed=True): - lname = scrapers.name.lower() + for scraper in self.all(include_removed=True): + lname = scraper.name.lower() if lname == cname: # perfect match - if not multiple_allowed: - return [scrapers] - else: - candidates.append(scrapers) - elif cname in lname and scrapers.url: - candidates.append(scrapers) - if len(candidates) > 1 and not multiple_allowed: + return scraper + elif cname in lname and scraper.url: + candidates.append(scraper) + if len(candidates) > 1: comics = ", ".join(x.name for x in candidates) raise ValueError('multiple comics found: %s' % comics) elif not candidates: raise ValueError('comic %r not found' % comic) - return candidates + return candidates[0] - def load(self): + def load(self) -> None: out.debug("Loading comic modules...") modules = 0 classes = 0 @@ -583,7 +578,7 @@ class Cache: out.debug("... %d scrapers loaded from %d classes in %d modules." % ( len(self.data), classes, modules)) - def adddir(self, path): + def adddir(self, path) -> None: """Add an additional directory with python modules to the scraper list. These are handled as if the were part of the plugins package. """ @@ -603,7 +598,7 @@ class Cache: out.debug("Added %d user classes from %d modules." % ( classes, modules)) - def addmodule(self, module): + def addmodule(self, module) -> int: """Adds all valid plugin classes from the specified module to the cache. @return: number of classes added """ @@ -613,8 +608,8 @@ class Cache: self.data.extend(plugin.getmodules()) return classes - def get(self, include_removed=False): - """Find all comic scraper classes in the plugins directory. + def all(self, include_removed=False) -> List[Scraper]: + """Return all comic scraper classes in the plugins directory. @return: list of Scraper classes @rtype: list of Scraper """ @@ -625,9 +620,9 @@ class Cache: else: return [x for x in self.data if x.url] - def validate(self): + def validate(self) -> None: """Check for duplicate scraper names.""" - d = {} + d: Dict[str, Scraper] = {} for scraper in self.data: name = scraper.name.lower() if name in d: diff --git a/tests/modules/conftest.py b/tests/modules/conftest.py index 35a6d99a2..43498c062 100644 --- a/tests/modules/conftest.py +++ b/tests/modules/conftest.py @@ -16,7 +16,7 @@ def get_test_scrapers(): """Return scrapers that should be tested.""" if 'TESTALL' in os.environ: # test all comics (this will take some time) - return scrapers.get() + return scrapers.all() elif 'TESTCOMICS' in os.environ: scraper_pattern = os.environ['TESTCOMICS'] else: @@ -33,7 +33,7 @@ def get_test_scrapers(): matcher = re.compile(scraper_pattern) return [ - scraperobj for scraperobj in scrapers.get() + scraperobj for scraperobj in scrapers.all() if matcher.match(scraperobj.name) ] diff --git a/tests/test_comicnames.py b/tests/test_comicnames.py index 7d094e0bf..8d3b11e5e 100644 --- a/tests/test_comicnames.py +++ b/tests/test_comicnames.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2020 Tobias Gruetzmacher +# Copyright (C) 2015-2022 Tobias Gruetzmacher import re from dosagelib.scraper import scrapers @@ -11,7 +11,7 @@ from dosagelib.plugins import old class TestComicNames(object): def test_names(self): - for scraperobj in scrapers.get(): + for scraperobj in scrapers.all(): name = scraperobj.name assert name.count('/') <= 1 if '/' in name: @@ -21,10 +21,10 @@ class TestComicNames(object): assert re.sub("[^0-9a-zA-Z_]", "", comicname) == comicname def test_renamed(self): - for scraperobj in scrapers.get(include_removed=True): + for scraperobj in scrapers.all(include_removed=True): if not isinstance(scraperobj, old.Renamed): continue assert len(scraperobj.getDisabledReasons()) > 0 # Renamed scraper should only point to an non-disabled scraper - newscraper = scrapers.find(scraperobj.newname)[0] + newscraper = scrapers.find(scraperobj.newname) assert len(newscraper.getDisabledReasons()) == 0 diff --git a/tests/test_scraper.py b/tests/test_scraper.py index 9c48be900..8c0caaadd 100644 --- a/tests/test_scraper.py +++ b/tests/test_scraper.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: MIT # Copyright (C) 2013-2014 Bastian Kleineidam -# Copyright (C) 2015-2020 Tobias Gruetzmacher +# Copyright (C) 2015-2022 Tobias Gruetzmacher from pathlib import Path import pytest @@ -12,24 +12,23 @@ class TestScraper(object): """Test scraper module functions.""" def test_get_scrapers(self): - for scraperobj in scrapers.get(): + for scraperobj in scrapers.all(): scraperobj.indexes = ["bla"] assert scraperobj.url, "missing url in %s" % scraperobj.name def test_find_scrapers_single(self): - result = scrapers.find("xkcd") - assert len(result) == 1 + assert scrapers.find("xkcd") def test_find_scrapers_multi(self): - result = scrapers.find("a", multiple_allowed=True) - assert len(result) > 1 + with pytest.raises(ValueError, match='multiple comics found'): + scrapers.find("a") def test_find_scrapers_error(self): with pytest.raises(ValueError, match='empty comic name'): scrapers.find('') def test_user_dir(self): - oldlen = len(scrapers.get()) + oldlen = len(scrapers.all()) scrapers.adddir(Path(__file__).parent / 'mocks' / 'extra') - assert len(scrapers.get()) == oldlen + 1 - assert len(scrapers.find('AnotherDummyTestScraper')) == 1 + assert len(scrapers.all()) == oldlen + 1 + assert scrapers.find('AnotherDummyTestScraper')