Fix basepath prefix removal

This was a funny bug that has existed for 9 years. It only manifests if
a scraper name starts with the configured basepath... Since the default
basepath is `Comics`, this might manifest if the user wants to fetch
something like `ComicsKingdom/Tiger`. Internally, dosage was removing
the prefix matching the basepath, so we end up with `Kingdom/Tiger`.
This won't cause any problems as long as this substring is still unique
for all supported comics, but if this now matches multiple comics, an
error is thrown...
This commit is contained in:
Tobias Gruetzmacher 2022-06-05 23:55:18 +02:00
parent 0d8871b253
commit 0d8e1b4a6f
2 changed files with 13 additions and 4 deletions

View file

@ -4,10 +4,11 @@
# Copyright (C) 2015-2022 Tobias Gruetzmacher # Copyright (C) 2015-2022 Tobias Gruetzmacher
# Copyright (C) 2019-2020 Daniel Ring # Copyright (C) 2019-2020 Daniel Ring
import os import os
import re
import threading import threading
import _thread import _thread
from queue import Queue, Empty from queue import Queue, Empty
from typing import Dict from typing import Collection, Dict
from urllib.parse import urlparse from urllib.parse import urlparse
from .output import out from .output import out
@ -186,7 +187,7 @@ def getComics(options):
return errors return errors
def getScrapers(comics, basepath=None, adult=True, listing=False): def getScrapers(comics: Collection[str], basepath: str, adult=True, listing=False):
"""Get scraper objects for the given comics.""" """Get scraper objects for the given comics."""
if '@' in comics: if '@' in comics:
# only scrapers whose directory already exists # only scrapers whose directory already exists
@ -198,13 +199,14 @@ def getScrapers(comics, basepath=None, adult=True, listing=False):
# get only selected comic scrapers # get only selected comic scrapers
# store them in a set to eliminate duplicates # store them in a set to eliminate duplicates
scrapers = set() scrapers = set()
basere = re.compile(r'^' + re.escape(basepath) + r'[/\\]')
for comic in comics: for comic in comics:
# Helpful when using shell completion to pick comics to get # Helpful when using shell completion to pick comics to get
comic = comic.rstrip(os.path.sep) comic = comic.rstrip(os.path.sep)
if basepath and comic.startswith(basepath): if basere.match(comic):
# make the following command work: # make the following command work:
# find Comics -type d | xargs -n1 -P10 dosage -b Comics # find Comics -type d | xargs -n1 -P10 dosage -b Comics
comic = comic[len(basepath):].lstrip(os.sep) comic = comic[len(basepath) + 1:].lstrip(os.sep)
if ':' in comic: if ':' in comic:
name, index = comic.split(':', 1) name, index = comic.split(':', 1)
indexes = index.split(',') indexes = index.split(',')

View file

@ -94,6 +94,13 @@ class TestDosage(object):
out, err = capfd.readouterr() out, err = capfd.readouterr()
assert re.match(r'([0-9][0-9]:){2}.. xkcd>', out) assert re.match(r'([0-9][0-9]:){2}.. xkcd>', out)
def test_broken_basepath_removal(self):
assert cmd('-m', 'Comicsxkcd') == 2
def test_working_basepath_removal(self):
cmd_ok('-m', 'Comics/xkcd')
cmd_ok('-m', 'Comics\\xkcd')
def test_no_comics_specified(self): def test_no_comics_specified(self):
cmd_err() cmd_err()