Remove unused multi-match logic

This commit is contained in:
Tobias Gruetzmacher 2022-06-04 10:56:25 +02:00
parent 680ba0969e
commit 99b72c90be
6 changed files with 49 additions and 61 deletions

View file

@ -11,7 +11,7 @@ from platformdirs import PlatformDirs
from . import events, configuration, singleton, director
from . import AppName, __version__
from .output import out
from .scraper import scrapers as allscrapers
from .scraper import scrapers as scrapercache
from .util import internal_error, strlimit
@ -99,10 +99,6 @@ def setup_options():
# used for development testing prev/next matching
parser.add_argument('--dry-run', action='store_true',
help=argparse.SUPPRESS)
# multimatch is only used for development, eg. testing if all comics of
# a scripted plugin are working
parser.add_argument('--multimatch', action='store_true',
help=argparse.SUPPRESS)
# List all comic modules, even those normally suppressed, because they
# are not "real" (moved & removed)
parser.add_argument('--list-all', action='store_true',
@ -200,8 +196,7 @@ def vote_comics(options):
errors = 0
try:
for scraperobj in director.getScrapers(options.comic, options.basepath,
options.adult,
options.multimatch):
options.adult):
errors += vote_comic(scraperobj)
except ValueError as msg:
out.exception(msg)
@ -228,7 +223,7 @@ def vote_comic(scraperobj):
def run(options):
"""Execute comic commands."""
set_output_info(options)
allscrapers.adddir(user_plugin_path)
scrapercache.adddir(user_plugin_path)
# ensure only one instance of dosage is running
if not options.allow_multiple:
singleton.SingleInstance()
@ -257,7 +252,7 @@ def do_list(column_list=True, verbose=False, listall=False):
out.info(u'Comics tagged with [{}] require age confirmation'
' with the --adult option.'.format(TAG_ADULT))
out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
scrapers = sorted(allscrapers.get(listall),
scrapers = sorted(scrapercache.all(listall),
key=lambda s: s.name.lower())
if column_list:
num, disabled = do_column_list(scrapers)

View file

@ -11,7 +11,7 @@ from typing import Dict
from urllib.parse import urlparse
from .output import out
from .scraper import scrapers as allscrapers
from .scraper import scrapers as scrapercache
from . import events
@ -160,7 +160,7 @@ def getComics(options):
errors = 0
try:
for scraperobj in getScrapers(options.comic, options.basepath,
options.adult, options.multimatch):
options.adult):
jobs.put(scraperobj)
# start threads
num_threads = min(options.parallel, jobs.qsize())
@ -186,7 +186,7 @@ def getComics(options):
return errors
def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listing=False):
def getScrapers(comics, basepath=None, adult=True, listing=False):
"""Get scraper objects for the given comics."""
if '@' in comics:
# only scrapers whose directory already exists
@ -211,18 +211,17 @@ def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listi
else:
name = comic
indexes = None
found_scrapers = allscrapers.find(name, multiple_allowed=multiple_allowed)
for scraperobj in found_scrapers:
if shouldRunScraper(scraperobj, adult, listing):
# FIXME: Find a better way to work with indexes
scraperobj.indexes = indexes
if scraperobj not in scrapers:
scrapers.add(scraperobj)
yield scraperobj
scraper = scrapercache.find(name)
if shouldRunScraper(scraper, adult, listing):
# FIXME: Find a better way to work with indexes
scraper.indexes = indexes
if scraper not in scrapers:
scrapers.add(scraper)
yield scraper
def get_existing_comics(basepath=None, adult=True, listing=False):
for scraperobj in allscrapers.get(include_removed=True):
for scraperobj in scrapercache.all(include_removed=True):
dirname = scraperobj.get_download_dir(basepath)
if os.path.isdir(dirname):
if shouldRunScraper(scraperobj, adult, listing):

View file

@ -7,7 +7,7 @@ import os
import re
import warnings
from urllib.parse import urljoin
from typing import Optional, Union, Pattern, Sequence
from typing import Dict, List, Optional, Union, Pattern, Sequence
import lxml
from lxml.html.defs import link_attrs as html_link_attrs
@ -541,38 +541,33 @@ class Cache:
slow.
"""
def __init__(self):
self.data = []
self.data: List[Scraper] = []
self.userdirs = set()
def find(self, comic, multiple_allowed=False):
"""Get a list comic scraper objects.
Can return more than one entry if multiple_allowed is True, else it raises
a ValueError if multiple modules match. The match is a case insensitive
substring search.
def find(self, comic: str) -> Scraper:
"""Find a comic scraper object based on its name. This prefers a
perfect match, but falls back to a substring match, if that is unique.
Otharwise a ValueError is thrown.
"""
if not comic:
raise ValueError("empty comic name")
candidates = []
cname = comic.lower()
for scrapers in self.get(include_removed=True):
lname = scrapers.name.lower()
for scraper in self.all(include_removed=True):
lname = scraper.name.lower()
if lname == cname:
# perfect match
if not multiple_allowed:
return [scrapers]
else:
candidates.append(scrapers)
elif cname in lname and scrapers.url:
candidates.append(scrapers)
if len(candidates) > 1 and not multiple_allowed:
return scraper
elif cname in lname and scraper.url:
candidates.append(scraper)
if len(candidates) > 1:
comics = ", ".join(x.name for x in candidates)
raise ValueError('multiple comics found: %s' % comics)
elif not candidates:
raise ValueError('comic %r not found' % comic)
return candidates
return candidates[0]
def load(self):
def load(self) -> None:
out.debug("Loading comic modules...")
modules = 0
classes = 0
@ -583,7 +578,7 @@ class Cache:
out.debug("... %d scrapers loaded from %d classes in %d modules." % (
len(self.data), classes, modules))
def adddir(self, path):
def adddir(self, path) -> None:
"""Add an additional directory with python modules to the scraper list.
These are handled as if the were part of the plugins package.
"""
@ -603,7 +598,7 @@ class Cache:
out.debug("Added %d user classes from %d modules." % (
classes, modules))
def addmodule(self, module):
def addmodule(self, module) -> int:
"""Adds all valid plugin classes from the specified module to the cache.
@return: number of classes added
"""
@ -613,8 +608,8 @@ class Cache:
self.data.extend(plugin.getmodules())
return classes
def get(self, include_removed=False):
"""Find all comic scraper classes in the plugins directory.
def all(self, include_removed=False) -> List[Scraper]:
"""Return all comic scraper classes in the plugins directory.
@return: list of Scraper classes
@rtype: list of Scraper
"""
@ -625,9 +620,9 @@ class Cache:
else:
return [x for x in self.data if x.url]
def validate(self):
def validate(self) -> None:
"""Check for duplicate scraper names."""
d = {}
d: Dict[str, Scraper] = {}
for scraper in self.data:
name = scraper.name.lower()
if name in d:

View file

@ -16,7 +16,7 @@ def get_test_scrapers():
"""Return scrapers that should be tested."""
if 'TESTALL' in os.environ:
# test all comics (this will take some time)
return scrapers.get()
return scrapers.all()
elif 'TESTCOMICS' in os.environ:
scraper_pattern = os.environ['TESTCOMICS']
else:
@ -33,7 +33,7 @@ def get_test_scrapers():
matcher = re.compile(scraper_pattern)
return [
scraperobj for scraperobj in scrapers.get()
scraperobj for scraperobj in scrapers.all()
if matcher.match(scraperobj.name)
]

View file

@ -1,7 +1,7 @@
# SPDX-License-Identifier: MIT
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2020 Tobias Gruetzmacher
# Copyright (C) 2015-2022 Tobias Gruetzmacher
import re
from dosagelib.scraper import scrapers
@ -11,7 +11,7 @@ from dosagelib.plugins import old
class TestComicNames(object):
def test_names(self):
for scraperobj in scrapers.get():
for scraperobj in scrapers.all():
name = scraperobj.name
assert name.count('/') <= 1
if '/' in name:
@ -21,10 +21,10 @@ class TestComicNames(object):
assert re.sub("[^0-9a-zA-Z_]", "", comicname) == comicname
def test_renamed(self):
for scraperobj in scrapers.get(include_removed=True):
for scraperobj in scrapers.all(include_removed=True):
if not isinstance(scraperobj, old.Renamed):
continue
assert len(scraperobj.getDisabledReasons()) > 0
# Renamed scraper should only point to an non-disabled scraper
newscraper = scrapers.find(scraperobj.newname)[0]
newscraper = scrapers.find(scraperobj.newname)
assert len(newscraper.getDisabledReasons()) == 0

View file

@ -1,6 +1,6 @@
# SPDX-License-Identifier: MIT
# Copyright (C) 2013-2014 Bastian Kleineidam
# Copyright (C) 2015-2020 Tobias Gruetzmacher
# Copyright (C) 2015-2022 Tobias Gruetzmacher
from pathlib import Path
import pytest
@ -12,24 +12,23 @@ class TestScraper(object):
"""Test scraper module functions."""
def test_get_scrapers(self):
for scraperobj in scrapers.get():
for scraperobj in scrapers.all():
scraperobj.indexes = ["bla"]
assert scraperobj.url, "missing url in %s" % scraperobj.name
def test_find_scrapers_single(self):
result = scrapers.find("xkcd")
assert len(result) == 1
assert scrapers.find("xkcd")
def test_find_scrapers_multi(self):
result = scrapers.find("a", multiple_allowed=True)
assert len(result) > 1
with pytest.raises(ValueError, match='multiple comics found'):
scrapers.find("a")
def test_find_scrapers_error(self):
with pytest.raises(ValueError, match='empty comic name'):
scrapers.find('')
def test_user_dir(self):
oldlen = len(scrapers.get())
oldlen = len(scrapers.all())
scrapers.adddir(Path(__file__).parent / 'mocks' / 'extra')
assert len(scrapers.get()) == oldlen + 1
assert len(scrapers.find('AnotherDummyTestScraper')) == 1
assert len(scrapers.all()) == oldlen + 1
assert scrapers.find('AnotherDummyTestScraper')