Remove unused multi-match logic

This commit is contained in:
Tobias Gruetzmacher 2022-06-04 10:56:25 +02:00
parent 680ba0969e
commit 99b72c90be
6 changed files with 49 additions and 61 deletions

View file

@ -11,7 +11,7 @@ from platformdirs import PlatformDirs
from . import events, configuration, singleton, director from . import events, configuration, singleton, director
from . import AppName, __version__ from . import AppName, __version__
from .output import out from .output import out
from .scraper import scrapers as allscrapers from .scraper import scrapers as scrapercache
from .util import internal_error, strlimit from .util import internal_error, strlimit
@ -99,10 +99,6 @@ def setup_options():
# used for development testing prev/next matching # used for development testing prev/next matching
parser.add_argument('--dry-run', action='store_true', parser.add_argument('--dry-run', action='store_true',
help=argparse.SUPPRESS) help=argparse.SUPPRESS)
# multimatch is only used for development, eg. testing if all comics of
# a scripted plugin are working
parser.add_argument('--multimatch', action='store_true',
help=argparse.SUPPRESS)
# List all comic modules, even those normally suppressed, because they # List all comic modules, even those normally suppressed, because they
# are not "real" (moved & removed) # are not "real" (moved & removed)
parser.add_argument('--list-all', action='store_true', parser.add_argument('--list-all', action='store_true',
@ -200,8 +196,7 @@ def vote_comics(options):
errors = 0 errors = 0
try: try:
for scraperobj in director.getScrapers(options.comic, options.basepath, for scraperobj in director.getScrapers(options.comic, options.basepath,
options.adult, options.adult):
options.multimatch):
errors += vote_comic(scraperobj) errors += vote_comic(scraperobj)
except ValueError as msg: except ValueError as msg:
out.exception(msg) out.exception(msg)
@ -228,7 +223,7 @@ def vote_comic(scraperobj):
def run(options): def run(options):
"""Execute comic commands.""" """Execute comic commands."""
set_output_info(options) set_output_info(options)
allscrapers.adddir(user_plugin_path) scrapercache.adddir(user_plugin_path)
# ensure only one instance of dosage is running # ensure only one instance of dosage is running
if not options.allow_multiple: if not options.allow_multiple:
singleton.SingleInstance() singleton.SingleInstance()
@ -257,7 +252,7 @@ def do_list(column_list=True, verbose=False, listall=False):
out.info(u'Comics tagged with [{}] require age confirmation' out.info(u'Comics tagged with [{}] require age confirmation'
' with the --adult option.'.format(TAG_ADULT)) ' with the --adult option.'.format(TAG_ADULT))
out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG) out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
scrapers = sorted(allscrapers.get(listall), scrapers = sorted(scrapercache.all(listall),
key=lambda s: s.name.lower()) key=lambda s: s.name.lower())
if column_list: if column_list:
num, disabled = do_column_list(scrapers) num, disabled = do_column_list(scrapers)

View file

@ -11,7 +11,7 @@ from typing import Dict
from urllib.parse import urlparse from urllib.parse import urlparse
from .output import out from .output import out
from .scraper import scrapers as allscrapers from .scraper import scrapers as scrapercache
from . import events from . import events
@ -160,7 +160,7 @@ def getComics(options):
errors = 0 errors = 0
try: try:
for scraperobj in getScrapers(options.comic, options.basepath, for scraperobj in getScrapers(options.comic, options.basepath,
options.adult, options.multimatch): options.adult):
jobs.put(scraperobj) jobs.put(scraperobj)
# start threads # start threads
num_threads = min(options.parallel, jobs.qsize()) num_threads = min(options.parallel, jobs.qsize())
@ -186,7 +186,7 @@ def getComics(options):
return errors return errors
def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listing=False): def getScrapers(comics, basepath=None, adult=True, listing=False):
"""Get scraper objects for the given comics.""" """Get scraper objects for the given comics."""
if '@' in comics: if '@' in comics:
# only scrapers whose directory already exists # only scrapers whose directory already exists
@ -211,18 +211,17 @@ def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listi
else: else:
name = comic name = comic
indexes = None indexes = None
found_scrapers = allscrapers.find(name, multiple_allowed=multiple_allowed) scraper = scrapercache.find(name)
for scraperobj in found_scrapers: if shouldRunScraper(scraper, adult, listing):
if shouldRunScraper(scraperobj, adult, listing): # FIXME: Find a better way to work with indexes
# FIXME: Find a better way to work with indexes scraper.indexes = indexes
scraperobj.indexes = indexes if scraper not in scrapers:
if scraperobj not in scrapers: scrapers.add(scraper)
scrapers.add(scraperobj) yield scraper
yield scraperobj
def get_existing_comics(basepath=None, adult=True, listing=False): def get_existing_comics(basepath=None, adult=True, listing=False):
for scraperobj in allscrapers.get(include_removed=True): for scraperobj in scrapercache.all(include_removed=True):
dirname = scraperobj.get_download_dir(basepath) dirname = scraperobj.get_download_dir(basepath)
if os.path.isdir(dirname): if os.path.isdir(dirname):
if shouldRunScraper(scraperobj, adult, listing): if shouldRunScraper(scraperobj, adult, listing):

View file

@ -7,7 +7,7 @@ import os
import re import re
import warnings import warnings
from urllib.parse import urljoin from urllib.parse import urljoin
from typing import Optional, Union, Pattern, Sequence from typing import Dict, List, Optional, Union, Pattern, Sequence
import lxml import lxml
from lxml.html.defs import link_attrs as html_link_attrs from lxml.html.defs import link_attrs as html_link_attrs
@ -541,38 +541,33 @@ class Cache:
slow. slow.
""" """
def __init__(self): def __init__(self):
self.data = [] self.data: List[Scraper] = []
self.userdirs = set() self.userdirs = set()
def find(self, comic, multiple_allowed=False): def find(self, comic: str) -> Scraper:
"""Get a list comic scraper objects. """Find a comic scraper object based on its name. This prefers a
perfect match, but falls back to a substring match, if that is unique.
Can return more than one entry if multiple_allowed is True, else it raises Otharwise a ValueError is thrown.
a ValueError if multiple modules match. The match is a case insensitive
substring search.
""" """
if not comic: if not comic:
raise ValueError("empty comic name") raise ValueError("empty comic name")
candidates = [] candidates = []
cname = comic.lower() cname = comic.lower()
for scrapers in self.get(include_removed=True): for scraper in self.all(include_removed=True):
lname = scrapers.name.lower() lname = scraper.name.lower()
if lname == cname: if lname == cname:
# perfect match # perfect match
if not multiple_allowed: return scraper
return [scrapers] elif cname in lname and scraper.url:
else: candidates.append(scraper)
candidates.append(scrapers) if len(candidates) > 1:
elif cname in lname and scrapers.url:
candidates.append(scrapers)
if len(candidates) > 1 and not multiple_allowed:
comics = ", ".join(x.name for x in candidates) comics = ", ".join(x.name for x in candidates)
raise ValueError('multiple comics found: %s' % comics) raise ValueError('multiple comics found: %s' % comics)
elif not candidates: elif not candidates:
raise ValueError('comic %r not found' % comic) raise ValueError('comic %r not found' % comic)
return candidates return candidates[0]
def load(self): def load(self) -> None:
out.debug("Loading comic modules...") out.debug("Loading comic modules...")
modules = 0 modules = 0
classes = 0 classes = 0
@ -583,7 +578,7 @@ class Cache:
out.debug("... %d scrapers loaded from %d classes in %d modules." % ( out.debug("... %d scrapers loaded from %d classes in %d modules." % (
len(self.data), classes, modules)) len(self.data), classes, modules))
def adddir(self, path): def adddir(self, path) -> None:
"""Add an additional directory with python modules to the scraper list. """Add an additional directory with python modules to the scraper list.
These are handled as if the were part of the plugins package. These are handled as if the were part of the plugins package.
""" """
@ -603,7 +598,7 @@ class Cache:
out.debug("Added %d user classes from %d modules." % ( out.debug("Added %d user classes from %d modules." % (
classes, modules)) classes, modules))
def addmodule(self, module): def addmodule(self, module) -> int:
"""Adds all valid plugin classes from the specified module to the cache. """Adds all valid plugin classes from the specified module to the cache.
@return: number of classes added @return: number of classes added
""" """
@ -613,8 +608,8 @@ class Cache:
self.data.extend(plugin.getmodules()) self.data.extend(plugin.getmodules())
return classes return classes
def get(self, include_removed=False): def all(self, include_removed=False) -> List[Scraper]:
"""Find all comic scraper classes in the plugins directory. """Return all comic scraper classes in the plugins directory.
@return: list of Scraper classes @return: list of Scraper classes
@rtype: list of Scraper @rtype: list of Scraper
""" """
@ -625,9 +620,9 @@ class Cache:
else: else:
return [x for x in self.data if x.url] return [x for x in self.data if x.url]
def validate(self): def validate(self) -> None:
"""Check for duplicate scraper names.""" """Check for duplicate scraper names."""
d = {} d: Dict[str, Scraper] = {}
for scraper in self.data: for scraper in self.data:
name = scraper.name.lower() name = scraper.name.lower()
if name in d: if name in d:

View file

@ -16,7 +16,7 @@ def get_test_scrapers():
"""Return scrapers that should be tested.""" """Return scrapers that should be tested."""
if 'TESTALL' in os.environ: if 'TESTALL' in os.environ:
# test all comics (this will take some time) # test all comics (this will take some time)
return scrapers.get() return scrapers.all()
elif 'TESTCOMICS' in os.environ: elif 'TESTCOMICS' in os.environ:
scraper_pattern = os.environ['TESTCOMICS'] scraper_pattern = os.environ['TESTCOMICS']
else: else:
@ -33,7 +33,7 @@ def get_test_scrapers():
matcher = re.compile(scraper_pattern) matcher = re.compile(scraper_pattern)
return [ return [
scraperobj for scraperobj in scrapers.get() scraperobj for scraperobj in scrapers.all()
if matcher.match(scraperobj.name) if matcher.match(scraperobj.name)
] ]

View file

@ -1,7 +1,7 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2020 Tobias Gruetzmacher # Copyright (C) 2015-2022 Tobias Gruetzmacher
import re import re
from dosagelib.scraper import scrapers from dosagelib.scraper import scrapers
@ -11,7 +11,7 @@ from dosagelib.plugins import old
class TestComicNames(object): class TestComicNames(object):
def test_names(self): def test_names(self):
for scraperobj in scrapers.get(): for scraperobj in scrapers.all():
name = scraperobj.name name = scraperobj.name
assert name.count('/') <= 1 assert name.count('/') <= 1
if '/' in name: if '/' in name:
@ -21,10 +21,10 @@ class TestComicNames(object):
assert re.sub("[^0-9a-zA-Z_]", "", comicname) == comicname assert re.sub("[^0-9a-zA-Z_]", "", comicname) == comicname
def test_renamed(self): def test_renamed(self):
for scraperobj in scrapers.get(include_removed=True): for scraperobj in scrapers.all(include_removed=True):
if not isinstance(scraperobj, old.Renamed): if not isinstance(scraperobj, old.Renamed):
continue continue
assert len(scraperobj.getDisabledReasons()) > 0 assert len(scraperobj.getDisabledReasons()) > 0
# Renamed scraper should only point to an non-disabled scraper # Renamed scraper should only point to an non-disabled scraper
newscraper = scrapers.find(scraperobj.newname)[0] newscraper = scrapers.find(scraperobj.newname)
assert len(newscraper.getDisabledReasons()) == 0 assert len(newscraper.getDisabledReasons()) == 0

View file

@ -1,6 +1,6 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
# Copyright (C) 2013-2014 Bastian Kleineidam # Copyright (C) 2013-2014 Bastian Kleineidam
# Copyright (C) 2015-2020 Tobias Gruetzmacher # Copyright (C) 2015-2022 Tobias Gruetzmacher
from pathlib import Path from pathlib import Path
import pytest import pytest
@ -12,24 +12,23 @@ class TestScraper(object):
"""Test scraper module functions.""" """Test scraper module functions."""
def test_get_scrapers(self): def test_get_scrapers(self):
for scraperobj in scrapers.get(): for scraperobj in scrapers.all():
scraperobj.indexes = ["bla"] scraperobj.indexes = ["bla"]
assert scraperobj.url, "missing url in %s" % scraperobj.name assert scraperobj.url, "missing url in %s" % scraperobj.name
def test_find_scrapers_single(self): def test_find_scrapers_single(self):
result = scrapers.find("xkcd") assert scrapers.find("xkcd")
assert len(result) == 1
def test_find_scrapers_multi(self): def test_find_scrapers_multi(self):
result = scrapers.find("a", multiple_allowed=True) with pytest.raises(ValueError, match='multiple comics found'):
assert len(result) > 1 scrapers.find("a")
def test_find_scrapers_error(self): def test_find_scrapers_error(self):
with pytest.raises(ValueError, match='empty comic name'): with pytest.raises(ValueError, match='empty comic name'):
scrapers.find('') scrapers.find('')
def test_user_dir(self): def test_user_dir(self):
oldlen = len(scrapers.get()) oldlen = len(scrapers.all())
scrapers.adddir(Path(__file__).parent / 'mocks' / 'extra') scrapers.adddir(Path(__file__).parent / 'mocks' / 'extra')
assert len(scrapers.get()) == oldlen + 1 assert len(scrapers.all()) == oldlen + 1
assert len(scrapers.find('AnotherDummyTestScraper')) == 1 assert scrapers.find('AnotherDummyTestScraper')