Remove unused multi-match logic
This commit is contained in:
parent
680ba0969e
commit
99b72c90be
6 changed files with 49 additions and 61 deletions
|
@ -11,7 +11,7 @@ from platformdirs import PlatformDirs
|
|||
from . import events, configuration, singleton, director
|
||||
from . import AppName, __version__
|
||||
from .output import out
|
||||
from .scraper import scrapers as allscrapers
|
||||
from .scraper import scrapers as scrapercache
|
||||
from .util import internal_error, strlimit
|
||||
|
||||
|
||||
|
@ -99,10 +99,6 @@ def setup_options():
|
|||
# used for development testing prev/next matching
|
||||
parser.add_argument('--dry-run', action='store_true',
|
||||
help=argparse.SUPPRESS)
|
||||
# multimatch is only used for development, eg. testing if all comics of
|
||||
# a scripted plugin are working
|
||||
parser.add_argument('--multimatch', action='store_true',
|
||||
help=argparse.SUPPRESS)
|
||||
# List all comic modules, even those normally suppressed, because they
|
||||
# are not "real" (moved & removed)
|
||||
parser.add_argument('--list-all', action='store_true',
|
||||
|
@ -200,8 +196,7 @@ def vote_comics(options):
|
|||
errors = 0
|
||||
try:
|
||||
for scraperobj in director.getScrapers(options.comic, options.basepath,
|
||||
options.adult,
|
||||
options.multimatch):
|
||||
options.adult):
|
||||
errors += vote_comic(scraperobj)
|
||||
except ValueError as msg:
|
||||
out.exception(msg)
|
||||
|
@ -228,7 +223,7 @@ def vote_comic(scraperobj):
|
|||
def run(options):
|
||||
"""Execute comic commands."""
|
||||
set_output_info(options)
|
||||
allscrapers.adddir(user_plugin_path)
|
||||
scrapercache.adddir(user_plugin_path)
|
||||
# ensure only one instance of dosage is running
|
||||
if not options.allow_multiple:
|
||||
singleton.SingleInstance()
|
||||
|
@ -257,7 +252,7 @@ def do_list(column_list=True, verbose=False, listall=False):
|
|||
out.info(u'Comics tagged with [{}] require age confirmation'
|
||||
' with the --adult option.'.format(TAG_ADULT))
|
||||
out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
|
||||
scrapers = sorted(allscrapers.get(listall),
|
||||
scrapers = sorted(scrapercache.all(listall),
|
||||
key=lambda s: s.name.lower())
|
||||
if column_list:
|
||||
num, disabled = do_column_list(scrapers)
|
||||
|
|
|
@ -11,7 +11,7 @@ from typing import Dict
|
|||
from urllib.parse import urlparse
|
||||
|
||||
from .output import out
|
||||
from .scraper import scrapers as allscrapers
|
||||
from .scraper import scrapers as scrapercache
|
||||
from . import events
|
||||
|
||||
|
||||
|
@ -160,7 +160,7 @@ def getComics(options):
|
|||
errors = 0
|
||||
try:
|
||||
for scraperobj in getScrapers(options.comic, options.basepath,
|
||||
options.adult, options.multimatch):
|
||||
options.adult):
|
||||
jobs.put(scraperobj)
|
||||
# start threads
|
||||
num_threads = min(options.parallel, jobs.qsize())
|
||||
|
@ -186,7 +186,7 @@ def getComics(options):
|
|||
return errors
|
||||
|
||||
|
||||
def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listing=False):
|
||||
def getScrapers(comics, basepath=None, adult=True, listing=False):
|
||||
"""Get scraper objects for the given comics."""
|
||||
if '@' in comics:
|
||||
# only scrapers whose directory already exists
|
||||
|
@ -211,18 +211,17 @@ def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listi
|
|||
else:
|
||||
name = comic
|
||||
indexes = None
|
||||
found_scrapers = allscrapers.find(name, multiple_allowed=multiple_allowed)
|
||||
for scraperobj in found_scrapers:
|
||||
if shouldRunScraper(scraperobj, adult, listing):
|
||||
# FIXME: Find a better way to work with indexes
|
||||
scraperobj.indexes = indexes
|
||||
if scraperobj not in scrapers:
|
||||
scrapers.add(scraperobj)
|
||||
yield scraperobj
|
||||
scraper = scrapercache.find(name)
|
||||
if shouldRunScraper(scraper, adult, listing):
|
||||
# FIXME: Find a better way to work with indexes
|
||||
scraper.indexes = indexes
|
||||
if scraper not in scrapers:
|
||||
scrapers.add(scraper)
|
||||
yield scraper
|
||||
|
||||
|
||||
def get_existing_comics(basepath=None, adult=True, listing=False):
|
||||
for scraperobj in allscrapers.get(include_removed=True):
|
||||
for scraperobj in scrapercache.all(include_removed=True):
|
||||
dirname = scraperobj.get_download_dir(basepath)
|
||||
if os.path.isdir(dirname):
|
||||
if shouldRunScraper(scraperobj, adult, listing):
|
||||
|
|
|
@ -7,7 +7,7 @@ import os
|
|||
import re
|
||||
import warnings
|
||||
from urllib.parse import urljoin
|
||||
from typing import Optional, Union, Pattern, Sequence
|
||||
from typing import Dict, List, Optional, Union, Pattern, Sequence
|
||||
|
||||
import lxml
|
||||
from lxml.html.defs import link_attrs as html_link_attrs
|
||||
|
@ -541,38 +541,33 @@ class Cache:
|
|||
slow.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.data = []
|
||||
self.data: List[Scraper] = []
|
||||
self.userdirs = set()
|
||||
|
||||
def find(self, comic, multiple_allowed=False):
|
||||
"""Get a list comic scraper objects.
|
||||
|
||||
Can return more than one entry if multiple_allowed is True, else it raises
|
||||
a ValueError if multiple modules match. The match is a case insensitive
|
||||
substring search.
|
||||
def find(self, comic: str) -> Scraper:
|
||||
"""Find a comic scraper object based on its name. This prefers a
|
||||
perfect match, but falls back to a substring match, if that is unique.
|
||||
Otharwise a ValueError is thrown.
|
||||
"""
|
||||
if not comic:
|
||||
raise ValueError("empty comic name")
|
||||
candidates = []
|
||||
cname = comic.lower()
|
||||
for scrapers in self.get(include_removed=True):
|
||||
lname = scrapers.name.lower()
|
||||
for scraper in self.all(include_removed=True):
|
||||
lname = scraper.name.lower()
|
||||
if lname == cname:
|
||||
# perfect match
|
||||
if not multiple_allowed:
|
||||
return [scrapers]
|
||||
else:
|
||||
candidates.append(scrapers)
|
||||
elif cname in lname and scrapers.url:
|
||||
candidates.append(scrapers)
|
||||
if len(candidates) > 1 and not multiple_allowed:
|
||||
return scraper
|
||||
elif cname in lname and scraper.url:
|
||||
candidates.append(scraper)
|
||||
if len(candidates) > 1:
|
||||
comics = ", ".join(x.name for x in candidates)
|
||||
raise ValueError('multiple comics found: %s' % comics)
|
||||
elif not candidates:
|
||||
raise ValueError('comic %r not found' % comic)
|
||||
return candidates
|
||||
return candidates[0]
|
||||
|
||||
def load(self):
|
||||
def load(self) -> None:
|
||||
out.debug("Loading comic modules...")
|
||||
modules = 0
|
||||
classes = 0
|
||||
|
@ -583,7 +578,7 @@ class Cache:
|
|||
out.debug("... %d scrapers loaded from %d classes in %d modules." % (
|
||||
len(self.data), classes, modules))
|
||||
|
||||
def adddir(self, path):
|
||||
def adddir(self, path) -> None:
|
||||
"""Add an additional directory with python modules to the scraper list.
|
||||
These are handled as if the were part of the plugins package.
|
||||
"""
|
||||
|
@ -603,7 +598,7 @@ class Cache:
|
|||
out.debug("Added %d user classes from %d modules." % (
|
||||
classes, modules))
|
||||
|
||||
def addmodule(self, module):
|
||||
def addmodule(self, module) -> int:
|
||||
"""Adds all valid plugin classes from the specified module to the cache.
|
||||
@return: number of classes added
|
||||
"""
|
||||
|
@ -613,8 +608,8 @@ class Cache:
|
|||
self.data.extend(plugin.getmodules())
|
||||
return classes
|
||||
|
||||
def get(self, include_removed=False):
|
||||
"""Find all comic scraper classes in the plugins directory.
|
||||
def all(self, include_removed=False) -> List[Scraper]:
|
||||
"""Return all comic scraper classes in the plugins directory.
|
||||
@return: list of Scraper classes
|
||||
@rtype: list of Scraper
|
||||
"""
|
||||
|
@ -625,9 +620,9 @@ class Cache:
|
|||
else:
|
||||
return [x for x in self.data if x.url]
|
||||
|
||||
def validate(self):
|
||||
def validate(self) -> None:
|
||||
"""Check for duplicate scraper names."""
|
||||
d = {}
|
||||
d: Dict[str, Scraper] = {}
|
||||
for scraper in self.data:
|
||||
name = scraper.name.lower()
|
||||
if name in d:
|
||||
|
|
|
@ -16,7 +16,7 @@ def get_test_scrapers():
|
|||
"""Return scrapers that should be tested."""
|
||||
if 'TESTALL' in os.environ:
|
||||
# test all comics (this will take some time)
|
||||
return scrapers.get()
|
||||
return scrapers.all()
|
||||
elif 'TESTCOMICS' in os.environ:
|
||||
scraper_pattern = os.environ['TESTCOMICS']
|
||||
else:
|
||||
|
@ -33,7 +33,7 @@ def get_test_scrapers():
|
|||
|
||||
matcher = re.compile(scraper_pattern)
|
||||
return [
|
||||
scraperobj for scraperobj in scrapers.get()
|
||||
scraperobj for scraperobj in scrapers.all()
|
||||
if matcher.match(scraperobj.name)
|
||||
]
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||
import re
|
||||
|
||||
from dosagelib.scraper import scrapers
|
||||
|
@ -11,7 +11,7 @@ from dosagelib.plugins import old
|
|||
class TestComicNames(object):
|
||||
|
||||
def test_names(self):
|
||||
for scraperobj in scrapers.get():
|
||||
for scraperobj in scrapers.all():
|
||||
name = scraperobj.name
|
||||
assert name.count('/') <= 1
|
||||
if '/' in name:
|
||||
|
@ -21,10 +21,10 @@ class TestComicNames(object):
|
|||
assert re.sub("[^0-9a-zA-Z_]", "", comicname) == comicname
|
||||
|
||||
def test_renamed(self):
|
||||
for scraperobj in scrapers.get(include_removed=True):
|
||||
for scraperobj in scrapers.all(include_removed=True):
|
||||
if not isinstance(scraperobj, old.Renamed):
|
||||
continue
|
||||
assert len(scraperobj.getDisabledReasons()) > 0
|
||||
# Renamed scraper should only point to an non-disabled scraper
|
||||
newscraper = scrapers.find(scraperobj.newname)[0]
|
||||
newscraper = scrapers.find(scraperobj.newname)
|
||||
assert len(newscraper.getDisabledReasons()) == 0
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2013-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
@ -12,24 +12,23 @@ class TestScraper(object):
|
|||
"""Test scraper module functions."""
|
||||
|
||||
def test_get_scrapers(self):
|
||||
for scraperobj in scrapers.get():
|
||||
for scraperobj in scrapers.all():
|
||||
scraperobj.indexes = ["bla"]
|
||||
assert scraperobj.url, "missing url in %s" % scraperobj.name
|
||||
|
||||
def test_find_scrapers_single(self):
|
||||
result = scrapers.find("xkcd")
|
||||
assert len(result) == 1
|
||||
assert scrapers.find("xkcd")
|
||||
|
||||
def test_find_scrapers_multi(self):
|
||||
result = scrapers.find("a", multiple_allowed=True)
|
||||
assert len(result) > 1
|
||||
with pytest.raises(ValueError, match='multiple comics found'):
|
||||
scrapers.find("a")
|
||||
|
||||
def test_find_scrapers_error(self):
|
||||
with pytest.raises(ValueError, match='empty comic name'):
|
||||
scrapers.find('')
|
||||
|
||||
def test_user_dir(self):
|
||||
oldlen = len(scrapers.get())
|
||||
oldlen = len(scrapers.all())
|
||||
scrapers.adddir(Path(__file__).parent / 'mocks' / 'extra')
|
||||
assert len(scrapers.get()) == oldlen + 1
|
||||
assert len(scrapers.find('AnotherDummyTestScraper')) == 1
|
||||
assert len(scrapers.all()) == oldlen + 1
|
||||
assert scrapers.find('AnotherDummyTestScraper')
|
||||
|
|
Loading…
Reference in a new issue