From 0d8e1b4a6f5c12d389c9df821f30afc535889900 Mon Sep 17 00:00:00 2001
From: Tobias Gruetzmacher <tobias-git@23.gs>
Date: Sun, 5 Jun 2022 23:55:18 +0200
Subject: [PATCH] Fix basepath prefix removal

This was a funny bug that has existed for 9 years. It only manifests if
a scraper name starts with the configured basepath... Since the default
basepath is `Comics`, this might manifest if the user wants to fetch
something like `ComicsKingdom/Tiger`. Internally, dosage was removing
the prefix matching the basepath, so we end up with `Kingdom/Tiger`.
This won't cause any problems as long as this substring is still unique
for all supported comics, but if this now matches multiple comics, an
error is thrown...
---
 dosagelib/director.py | 10 ++++++----
 tests/test_dosage.py  |  7 +++++++
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/dosagelib/director.py b/dosagelib/director.py
index d0e6b7152..225e8de3c 100644
--- a/dosagelib/director.py
+++ b/dosagelib/director.py
@@ -4,10 +4,11 @@
 # Copyright (C) 2015-2022 Tobias Gruetzmacher
 # Copyright (C) 2019-2020 Daniel Ring
 import os
+import re
 import threading
 import _thread
 from queue import Queue, Empty
-from typing import Dict
+from typing import Collection, Dict
 from urllib.parse import urlparse
 
 from .output import out
@@ -186,7 +187,7 @@ def getComics(options):
     return errors
 
 
-def getScrapers(comics, basepath=None, adult=True, listing=False):
+def getScrapers(comics: Collection[str], basepath: str, adult=True, listing=False):
     """Get scraper objects for the given comics."""
     if '@' in comics:
         # only scrapers whose directory already exists
@@ -198,13 +199,14 @@ def getScrapers(comics, basepath=None, adult=True, listing=False):
         # get only selected comic scrapers
         # store them in a set to eliminate duplicates
         scrapers = set()
+        basere = re.compile(r'^' + re.escape(basepath) + r'[/\\]')
         for comic in comics:
             # Helpful when using shell completion to pick comics to get
             comic = comic.rstrip(os.path.sep)
-            if basepath and comic.startswith(basepath):
+            if basere.match(comic):
                 # make the following command work:
                 # find Comics -type d | xargs -n1 -P10 dosage -b Comics
-                comic = comic[len(basepath):].lstrip(os.sep)
+                comic = comic[len(basepath) + 1:].lstrip(os.sep)
             if ':' in comic:
                 name, index = comic.split(':', 1)
                 indexes = index.split(',')
diff --git a/tests/test_dosage.py b/tests/test_dosage.py
index 59bff0b89..75ee30fd2 100644
--- a/tests/test_dosage.py
+++ b/tests/test_dosage.py
@@ -94,6 +94,13 @@ class TestDosage(object):
         out, err = capfd.readouterr()
         assert re.match(r'([0-9][0-9]:){2}.. xkcd>', out)
 
+    def test_broken_basepath_removal(self):
+        assert cmd('-m', 'Comicsxkcd') == 2
+
+    def test_working_basepath_removal(self):
+        cmd_ok('-m', 'Comics/xkcd')
+        cmd_ok('-m', 'Comics\\xkcd')
+
     def test_no_comics_specified(self):
         cmd_err()