Fix basepath prefix removal

This was a funny bug that has existed for 9 years. It only manifests if a scraper name starts with the configured basepath... Since the default basepath is `Comics`, this might manifest if the user wants to fetch something like `ComicsKingdom/Tiger`. Internally, dosage was removing the prefix matching the basepath, so we end up with `Kingdom/Tiger`. This won't cause any problems as long as this substring is still unique for all supported comics, but if this now matches multiple comics, an error is thrown...
2022-06-05 23:55:18 +02:00 · 2022-06-05 23:55:18 +02:00 · 0d8e1b4a6f
commit 0d8e1b4a6f
parent 0d8871b253
2 changed files with 13 additions and 4 deletions
--- a/dosagelib/director.py
+++ b/dosagelib/director.py
@ -4,10 +4,11 @@
 # Copyright (C) 2015-2022 Tobias Gruetzmacher
 # Copyright (C) 2019-2020 Daniel Ring
 import os
+import re
 import threading
 import _thread
 from queue import Queue, Empty
-from typing import Dict
+from typing import Collection, Dict
 from urllib.parse import urlparse

 from .output import out
@ -186,7 +187,7 @@ def getComics(options):
    return errors


-def getScrapers(comics, basepath=None, adult=True, listing=False):
+def getScrapers(comics: Collection[str], basepath: str, adult=True, listing=False):
    """Get scraper objects for the given comics."""
    if '@' in comics:
        # only scrapers whose directory already exists
@ -198,13 +199,14 @@ def getScrapers(comics, basepath=None, adult=True, listing=False):
        # get only selected comic scrapers
        # store them in a set to eliminate duplicates
        scrapers = set()
+        basere = re.compile(r'^' + re.escape(basepath) + r'[/\\]')
        for comic in comics:
            # Helpful when using shell completion to pick comics to get
            comic = comic.rstrip(os.path.sep)
-            if basepath and comic.startswith(basepath):
+            if basere.match(comic):
                # make the following command work:
                # find Comics -type d | xargs -n1 -P10 dosage -b Comics
-                comic = comic[len(basepath):].lstrip(os.sep)
+                comic = comic[len(basepath) + 1:].lstrip(os.sep)
            if ':' in comic:
                name, index = comic.split(':', 1)
                indexes = index.split(',')
--- a/tests/test_dosage.py
+++ b/tests/test_dosage.py
@ -94,6 +94,13 @@ class TestDosage(object):
        out, err = capfd.readouterr()
        assert re.match(r'([0-9][0-9]:){2}.. xkcd>', out)

+    def test_broken_basepath_removal(self):
+        assert cmd('-m', 'Comicsxkcd') == 2
+
+    def test_working_basepath_removal(self):
+        cmd_ok('-m', 'Comics/xkcd')
+        cmd_ok('-m', 'Comics\\xkcd')
+
    def test_no_comics_specified(self):
        cmd_err()