Remove unused multi-match logic
This commit is contained in:
parent
680ba0969e
commit
99b72c90be
6 changed files with 49 additions and 61 deletions
|
@ -11,7 +11,7 @@ from platformdirs import PlatformDirs
|
||||||
from . import events, configuration, singleton, director
|
from . import events, configuration, singleton, director
|
||||||
from . import AppName, __version__
|
from . import AppName, __version__
|
||||||
from .output import out
|
from .output import out
|
||||||
from .scraper import scrapers as allscrapers
|
from .scraper import scrapers as scrapercache
|
||||||
from .util import internal_error, strlimit
|
from .util import internal_error, strlimit
|
||||||
|
|
||||||
|
|
||||||
|
@ -99,10 +99,6 @@ def setup_options():
|
||||||
# used for development testing prev/next matching
|
# used for development testing prev/next matching
|
||||||
parser.add_argument('--dry-run', action='store_true',
|
parser.add_argument('--dry-run', action='store_true',
|
||||||
help=argparse.SUPPRESS)
|
help=argparse.SUPPRESS)
|
||||||
# multimatch is only used for development, eg. testing if all comics of
|
|
||||||
# a scripted plugin are working
|
|
||||||
parser.add_argument('--multimatch', action='store_true',
|
|
||||||
help=argparse.SUPPRESS)
|
|
||||||
# List all comic modules, even those normally suppressed, because they
|
# List all comic modules, even those normally suppressed, because they
|
||||||
# are not "real" (moved & removed)
|
# are not "real" (moved & removed)
|
||||||
parser.add_argument('--list-all', action='store_true',
|
parser.add_argument('--list-all', action='store_true',
|
||||||
|
@ -200,8 +196,7 @@ def vote_comics(options):
|
||||||
errors = 0
|
errors = 0
|
||||||
try:
|
try:
|
||||||
for scraperobj in director.getScrapers(options.comic, options.basepath,
|
for scraperobj in director.getScrapers(options.comic, options.basepath,
|
||||||
options.adult,
|
options.adult):
|
||||||
options.multimatch):
|
|
||||||
errors += vote_comic(scraperobj)
|
errors += vote_comic(scraperobj)
|
||||||
except ValueError as msg:
|
except ValueError as msg:
|
||||||
out.exception(msg)
|
out.exception(msg)
|
||||||
|
@ -228,7 +223,7 @@ def vote_comic(scraperobj):
|
||||||
def run(options):
|
def run(options):
|
||||||
"""Execute comic commands."""
|
"""Execute comic commands."""
|
||||||
set_output_info(options)
|
set_output_info(options)
|
||||||
allscrapers.adddir(user_plugin_path)
|
scrapercache.adddir(user_plugin_path)
|
||||||
# ensure only one instance of dosage is running
|
# ensure only one instance of dosage is running
|
||||||
if not options.allow_multiple:
|
if not options.allow_multiple:
|
||||||
singleton.SingleInstance()
|
singleton.SingleInstance()
|
||||||
|
@ -257,7 +252,7 @@ def do_list(column_list=True, verbose=False, listall=False):
|
||||||
out.info(u'Comics tagged with [{}] require age confirmation'
|
out.info(u'Comics tagged with [{}] require age confirmation'
|
||||||
' with the --adult option.'.format(TAG_ADULT))
|
' with the --adult option.'.format(TAG_ADULT))
|
||||||
out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
|
out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
|
||||||
scrapers = sorted(allscrapers.get(listall),
|
scrapers = sorted(scrapercache.all(listall),
|
||||||
key=lambda s: s.name.lower())
|
key=lambda s: s.name.lower())
|
||||||
if column_list:
|
if column_list:
|
||||||
num, disabled = do_column_list(scrapers)
|
num, disabled = do_column_list(scrapers)
|
||||||
|
|
|
@ -11,7 +11,7 @@ from typing import Dict
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from .output import out
|
from .output import out
|
||||||
from .scraper import scrapers as allscrapers
|
from .scraper import scrapers as scrapercache
|
||||||
from . import events
|
from . import events
|
||||||
|
|
||||||
|
|
||||||
|
@ -160,7 +160,7 @@ def getComics(options):
|
||||||
errors = 0
|
errors = 0
|
||||||
try:
|
try:
|
||||||
for scraperobj in getScrapers(options.comic, options.basepath,
|
for scraperobj in getScrapers(options.comic, options.basepath,
|
||||||
options.adult, options.multimatch):
|
options.adult):
|
||||||
jobs.put(scraperobj)
|
jobs.put(scraperobj)
|
||||||
# start threads
|
# start threads
|
||||||
num_threads = min(options.parallel, jobs.qsize())
|
num_threads = min(options.parallel, jobs.qsize())
|
||||||
|
@ -186,7 +186,7 @@ def getComics(options):
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
|
|
||||||
def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listing=False):
|
def getScrapers(comics, basepath=None, adult=True, listing=False):
|
||||||
"""Get scraper objects for the given comics."""
|
"""Get scraper objects for the given comics."""
|
||||||
if '@' in comics:
|
if '@' in comics:
|
||||||
# only scrapers whose directory already exists
|
# only scrapers whose directory already exists
|
||||||
|
@ -211,18 +211,17 @@ def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False, listi
|
||||||
else:
|
else:
|
||||||
name = comic
|
name = comic
|
||||||
indexes = None
|
indexes = None
|
||||||
found_scrapers = allscrapers.find(name, multiple_allowed=multiple_allowed)
|
scraper = scrapercache.find(name)
|
||||||
for scraperobj in found_scrapers:
|
if shouldRunScraper(scraper, adult, listing):
|
||||||
if shouldRunScraper(scraperobj, adult, listing):
|
# FIXME: Find a better way to work with indexes
|
||||||
# FIXME: Find a better way to work with indexes
|
scraper.indexes = indexes
|
||||||
scraperobj.indexes = indexes
|
if scraper not in scrapers:
|
||||||
if scraperobj not in scrapers:
|
scrapers.add(scraper)
|
||||||
scrapers.add(scraperobj)
|
yield scraper
|
||||||
yield scraperobj
|
|
||||||
|
|
||||||
|
|
||||||
def get_existing_comics(basepath=None, adult=True, listing=False):
|
def get_existing_comics(basepath=None, adult=True, listing=False):
|
||||||
for scraperobj in allscrapers.get(include_removed=True):
|
for scraperobj in scrapercache.all(include_removed=True):
|
||||||
dirname = scraperobj.get_download_dir(basepath)
|
dirname = scraperobj.get_download_dir(basepath)
|
||||||
if os.path.isdir(dirname):
|
if os.path.isdir(dirname):
|
||||||
if shouldRunScraper(scraperobj, adult, listing):
|
if shouldRunScraper(scraperobj, adult, listing):
|
||||||
|
|
|
@ -7,7 +7,7 @@ import os
|
||||||
import re
|
import re
|
||||||
import warnings
|
import warnings
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
from typing import Optional, Union, Pattern, Sequence
|
from typing import Dict, List, Optional, Union, Pattern, Sequence
|
||||||
|
|
||||||
import lxml
|
import lxml
|
||||||
from lxml.html.defs import link_attrs as html_link_attrs
|
from lxml.html.defs import link_attrs as html_link_attrs
|
||||||
|
@ -541,38 +541,33 @@ class Cache:
|
||||||
slow.
|
slow.
|
||||||
"""
|
"""
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.data = []
|
self.data: List[Scraper] = []
|
||||||
self.userdirs = set()
|
self.userdirs = set()
|
||||||
|
|
||||||
def find(self, comic, multiple_allowed=False):
|
def find(self, comic: str) -> Scraper:
|
||||||
"""Get a list comic scraper objects.
|
"""Find a comic scraper object based on its name. This prefers a
|
||||||
|
perfect match, but falls back to a substring match, if that is unique.
|
||||||
Can return more than one entry if multiple_allowed is True, else it raises
|
Otharwise a ValueError is thrown.
|
||||||
a ValueError if multiple modules match. The match is a case insensitive
|
|
||||||
substring search.
|
|
||||||
"""
|
"""
|
||||||
if not comic:
|
if not comic:
|
||||||
raise ValueError("empty comic name")
|
raise ValueError("empty comic name")
|
||||||
candidates = []
|
candidates = []
|
||||||
cname = comic.lower()
|
cname = comic.lower()
|
||||||
for scrapers in self.get(include_removed=True):
|
for scraper in self.all(include_removed=True):
|
||||||
lname = scrapers.name.lower()
|
lname = scraper.name.lower()
|
||||||
if lname == cname:
|
if lname == cname:
|
||||||
# perfect match
|
# perfect match
|
||||||
if not multiple_allowed:
|
return scraper
|
||||||
return [scrapers]
|
elif cname in lname and scraper.url:
|
||||||
else:
|
candidates.append(scraper)
|
||||||
candidates.append(scrapers)
|
if len(candidates) > 1:
|
||||||
elif cname in lname and scrapers.url:
|
|
||||||
candidates.append(scrapers)
|
|
||||||
if len(candidates) > 1 and not multiple_allowed:
|
|
||||||
comics = ", ".join(x.name for x in candidates)
|
comics = ", ".join(x.name for x in candidates)
|
||||||
raise ValueError('multiple comics found: %s' % comics)
|
raise ValueError('multiple comics found: %s' % comics)
|
||||||
elif not candidates:
|
elif not candidates:
|
||||||
raise ValueError('comic %r not found' % comic)
|
raise ValueError('comic %r not found' % comic)
|
||||||
return candidates
|
return candidates[0]
|
||||||
|
|
||||||
def load(self):
|
def load(self) -> None:
|
||||||
out.debug("Loading comic modules...")
|
out.debug("Loading comic modules...")
|
||||||
modules = 0
|
modules = 0
|
||||||
classes = 0
|
classes = 0
|
||||||
|
@ -583,7 +578,7 @@ class Cache:
|
||||||
out.debug("... %d scrapers loaded from %d classes in %d modules." % (
|
out.debug("... %d scrapers loaded from %d classes in %d modules." % (
|
||||||
len(self.data), classes, modules))
|
len(self.data), classes, modules))
|
||||||
|
|
||||||
def adddir(self, path):
|
def adddir(self, path) -> None:
|
||||||
"""Add an additional directory with python modules to the scraper list.
|
"""Add an additional directory with python modules to the scraper list.
|
||||||
These are handled as if the were part of the plugins package.
|
These are handled as if the were part of the plugins package.
|
||||||
"""
|
"""
|
||||||
|
@ -603,7 +598,7 @@ class Cache:
|
||||||
out.debug("Added %d user classes from %d modules." % (
|
out.debug("Added %d user classes from %d modules." % (
|
||||||
classes, modules))
|
classes, modules))
|
||||||
|
|
||||||
def addmodule(self, module):
|
def addmodule(self, module) -> int:
|
||||||
"""Adds all valid plugin classes from the specified module to the cache.
|
"""Adds all valid plugin classes from the specified module to the cache.
|
||||||
@return: number of classes added
|
@return: number of classes added
|
||||||
"""
|
"""
|
||||||
|
@ -613,8 +608,8 @@ class Cache:
|
||||||
self.data.extend(plugin.getmodules())
|
self.data.extend(plugin.getmodules())
|
||||||
return classes
|
return classes
|
||||||
|
|
||||||
def get(self, include_removed=False):
|
def all(self, include_removed=False) -> List[Scraper]:
|
||||||
"""Find all comic scraper classes in the plugins directory.
|
"""Return all comic scraper classes in the plugins directory.
|
||||||
@return: list of Scraper classes
|
@return: list of Scraper classes
|
||||||
@rtype: list of Scraper
|
@rtype: list of Scraper
|
||||||
"""
|
"""
|
||||||
|
@ -625,9 +620,9 @@ class Cache:
|
||||||
else:
|
else:
|
||||||
return [x for x in self.data if x.url]
|
return [x for x in self.data if x.url]
|
||||||
|
|
||||||
def validate(self):
|
def validate(self) -> None:
|
||||||
"""Check for duplicate scraper names."""
|
"""Check for duplicate scraper names."""
|
||||||
d = {}
|
d: Dict[str, Scraper] = {}
|
||||||
for scraper in self.data:
|
for scraper in self.data:
|
||||||
name = scraper.name.lower()
|
name = scraper.name.lower()
|
||||||
if name in d:
|
if name in d:
|
||||||
|
|
|
@ -16,7 +16,7 @@ def get_test_scrapers():
|
||||||
"""Return scrapers that should be tested."""
|
"""Return scrapers that should be tested."""
|
||||||
if 'TESTALL' in os.environ:
|
if 'TESTALL' in os.environ:
|
||||||
# test all comics (this will take some time)
|
# test all comics (this will take some time)
|
||||||
return scrapers.get()
|
return scrapers.all()
|
||||||
elif 'TESTCOMICS' in os.environ:
|
elif 'TESTCOMICS' in os.environ:
|
||||||
scraper_pattern = os.environ['TESTCOMICS']
|
scraper_pattern = os.environ['TESTCOMICS']
|
||||||
else:
|
else:
|
||||||
|
@ -33,7 +33,7 @@ def get_test_scrapers():
|
||||||
|
|
||||||
matcher = re.compile(scraper_pattern)
|
matcher = re.compile(scraper_pattern)
|
||||||
return [
|
return [
|
||||||
scraperobj for scraperobj in scrapers.get()
|
scraperobj for scraperobj in scrapers.all()
|
||||||
if matcher.match(scraperobj.name)
|
if matcher.match(scraperobj.name)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from dosagelib.scraper import scrapers
|
from dosagelib.scraper import scrapers
|
||||||
|
@ -11,7 +11,7 @@ from dosagelib.plugins import old
|
||||||
class TestComicNames(object):
|
class TestComicNames(object):
|
||||||
|
|
||||||
def test_names(self):
|
def test_names(self):
|
||||||
for scraperobj in scrapers.get():
|
for scraperobj in scrapers.all():
|
||||||
name = scraperobj.name
|
name = scraperobj.name
|
||||||
assert name.count('/') <= 1
|
assert name.count('/') <= 1
|
||||||
if '/' in name:
|
if '/' in name:
|
||||||
|
@ -21,10 +21,10 @@ class TestComicNames(object):
|
||||||
assert re.sub("[^0-9a-zA-Z_]", "", comicname) == comicname
|
assert re.sub("[^0-9a-zA-Z_]", "", comicname) == comicname
|
||||||
|
|
||||||
def test_renamed(self):
|
def test_renamed(self):
|
||||||
for scraperobj in scrapers.get(include_removed=True):
|
for scraperobj in scrapers.all(include_removed=True):
|
||||||
if not isinstance(scraperobj, old.Renamed):
|
if not isinstance(scraperobj, old.Renamed):
|
||||||
continue
|
continue
|
||||||
assert len(scraperobj.getDisabledReasons()) > 0
|
assert len(scraperobj.getDisabledReasons()) > 0
|
||||||
# Renamed scraper should only point to an non-disabled scraper
|
# Renamed scraper should only point to an non-disabled scraper
|
||||||
newscraper = scrapers.find(scraperobj.newname)[0]
|
newscraper = scrapers.find(scraperobj.newname)
|
||||||
assert len(newscraper.getDisabledReasons()) == 0
|
assert len(newscraper.getDisabledReasons()) == 0
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2013-2014 Bastian Kleineidam
|
# Copyright (C) 2013-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
# Copyright (C) 2015-2022 Tobias Gruetzmacher
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
@ -12,24 +12,23 @@ class TestScraper(object):
|
||||||
"""Test scraper module functions."""
|
"""Test scraper module functions."""
|
||||||
|
|
||||||
def test_get_scrapers(self):
|
def test_get_scrapers(self):
|
||||||
for scraperobj in scrapers.get():
|
for scraperobj in scrapers.all():
|
||||||
scraperobj.indexes = ["bla"]
|
scraperobj.indexes = ["bla"]
|
||||||
assert scraperobj.url, "missing url in %s" % scraperobj.name
|
assert scraperobj.url, "missing url in %s" % scraperobj.name
|
||||||
|
|
||||||
def test_find_scrapers_single(self):
|
def test_find_scrapers_single(self):
|
||||||
result = scrapers.find("xkcd")
|
assert scrapers.find("xkcd")
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
def test_find_scrapers_multi(self):
|
def test_find_scrapers_multi(self):
|
||||||
result = scrapers.find("a", multiple_allowed=True)
|
with pytest.raises(ValueError, match='multiple comics found'):
|
||||||
assert len(result) > 1
|
scrapers.find("a")
|
||||||
|
|
||||||
def test_find_scrapers_error(self):
|
def test_find_scrapers_error(self):
|
||||||
with pytest.raises(ValueError, match='empty comic name'):
|
with pytest.raises(ValueError, match='empty comic name'):
|
||||||
scrapers.find('')
|
scrapers.find('')
|
||||||
|
|
||||||
def test_user_dir(self):
|
def test_user_dir(self):
|
||||||
oldlen = len(scrapers.get())
|
oldlen = len(scrapers.all())
|
||||||
scrapers.adddir(Path(__file__).parent / 'mocks' / 'extra')
|
scrapers.adddir(Path(__file__).parent / 'mocks' / 'extra')
|
||||||
assert len(scrapers.get()) == oldlen + 1
|
assert len(scrapers.all()) == oldlen + 1
|
||||||
assert len(scrapers.find('AnotherDummyTestScraper')) == 1
|
assert scrapers.find('AnotherDummyTestScraper')
|
||||||
|
|
Loading…
Reference in a new issue