From 4f932803a34f67bd7a9d5ea0fa381d2f6741bf64 Mon Sep 17 00:00:00 2001
From: Tobias Gruetzmacher <tobias-git@23.gs>
Date: Sat, 10 Jun 2023 15:05:57 +0200
Subject: [PATCH] Extend scraper API with a extract_image_urls method

This is just a light wrapper around fetchUrls, but frees comic modules
from second-guessing for what purpose fetchUrls was called when they are
overriding that API - And yes, some comic modules already got this
wrong, they are now all fixed.
---
 dosagelib/plugins/e.py        | 23 ++++++++---------
 dosagelib/plugins/f.py        | 12 ++++-----
 dosagelib/plugins/l.py        | 16 ++++++------
 dosagelib/plugins/m.py        | 14 +++++------
 dosagelib/plugins/mangadex.py | 24 +++++++++---------
 dosagelib/plugins/o.py        | 47 ++++++++++++-----------------------
 dosagelib/plugins/s.py        | 24 +++++++++---------
 dosagelib/plugins/tapas.py    | 14 +++++------
 dosagelib/plugins/u.py        | 27 ++++++++++----------
 dosagelib/plugins/webtoons.py | 12 ++++-----
 dosagelib/scraper.py          | 14 ++++++++---
 11 files changed, 107 insertions(+), 120 deletions(-)

diff --git a/dosagelib/plugins/e.py b/dosagelib/plugins/e.py
index 44341f711..d423528dd 100644
--- a/dosagelib/plugins/e.py
+++ b/dosagelib/plugins/e.py
@@ -1,13 +1,13 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
-# Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2022 Tobias Gruetzmacher
-# Copyright (C) 2019-2020 Daniel Ring
+# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
+# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
+# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
+# SPDX-FileCopyrightText: © 2019 Daniel Ring
 import os
 from re import compile, IGNORECASE
 
 from ..helpers import bounceStarter, indirectStarter
-from ..scraper import _BasicScraper, _ParserScraper
+from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
 from ..util import tagre
 from .common import ComicControlScraper, WordPressScraper, WordPressNavi
 
@@ -99,7 +99,7 @@ class EmergencyExit(_BasicScraper):
     help = 'Index format: n'
 
 
-class Erfworld(_ParserScraper):
+class Erfworld(ParserScraper):
     stripUrl = 'https://archives.erfworld.com/%s'
     url = stripUrl % 'getLatestPage.php'
     firstStripUrl = stripUrl % 'Kickstarter+Stories/1'
@@ -111,12 +111,9 @@ class Erfworld(_ParserScraper):
     textOptional = True
     starter = bounceStarter
 
-    def fetchUrls(self, url, data, urlSearch):
-        # Return the main logo for text-only pages
-        try:
-            return super().fetchUrls(url, data, urlSearch)
-        except ValueError:
-            return super().fetchUrls(url, data, '//li[@class="erf-logo"]//img')
+    def shouldSkipUrl(self, url, data):
+        """Skip pages without images."""
+        return not data.xpath(self.imageSearch)
 
     def namer(self, imageUrl, pageUrl):
         # Fix inconsistent filenames
@@ -138,7 +135,7 @@ class Erfworld(_ParserScraper):
             return self.stripUrl % 'Book+0/81'
         elif url == self.stripUrl % 'Book+0/1':
             return self.stripUrl % 'Kickstarter+Stories/54'
-        return super(Erfworld, self).getPrevUrl(url, data)
+        return super().getPrevUrl(url, data)
 
 
 class ErmaFelnaEDF(_ParserScraper):
diff --git a/dosagelib/plugins/f.py b/dosagelib/plugins/f.py
index 2bef57265..2a8d04572 100644
--- a/dosagelib/plugins/f.py
+++ b/dosagelib/plugins/f.py
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
-# Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2021 Tobias Gruetzmacher
-# Copyright (C) 2019-2020 Daniel Ring
+# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
+# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
+# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
+# SPDX-FileCopyrightText: © 2019 Daniel Ring
 from re import compile, escape
 
 from ..util import tagre
@@ -240,7 +240,7 @@ class FunInJammies(WordPressScraper):
     help = 'Index format: n (unpadded)'
 
 
-class FurPiled(_ParserScraper):
+class FurPiled(ParserScraper):
     stripUrl = ('https://web.archive.org/web/20160404074145/'
         'http://www.liondogworks.com/images/fp-%03d.jpg')
     url = stripUrl % 427
@@ -254,7 +254,7 @@ class FurPiled(_ParserScraper):
             nextStrip = nextStrip - 1
         return self.stripUrl % nextStrip
 
-    def fetchUrls(self, url, data, urlSearch):
+    def extract_image_urls(self, url, data):
         # URLs are direct links to images
         return [url]
 
diff --git a/dosagelib/plugins/l.py b/dosagelib/plugins/l.py
index f28bccc93..d75126782 100644
--- a/dosagelib/plugins/l.py
+++ b/dosagelib/plugins/l.py
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
-# Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2022 Tobias Gruetzmacher
-# Copyright (C) 2019-2020 Daniel Ring
+# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
+# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
+# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
+# SPDX-FileCopyrightText: © 2019 Daniel Ring
 from re import compile
 
 from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
@@ -86,7 +86,7 @@ class LifeAintNoPonyFarm(WordPressScraper):
     endOfLife = True
 
 
-class LifeAsRendered(_ParserScraper):
+class LifeAsRendered(ParserScraper):
     # Reverse navigation doesn't work properly, so search forward instead
     stripUrl = 'https://kittyredden.com/LAR/%s/'
     url = stripUrl % '0100'
@@ -121,11 +121,11 @@ class LifeAsRendered(_ParserScraper):
         filename = imageUrl.rsplit('/', 1)[-1]
         return filename.replace('ReN', 'N').replace('N01P', 'A02S')
 
-    def fetchUrls(self, url, data, urlSearch):
+    def extract_image_urls(self, url, data):
         # Fix missing image link
-        if 'LAR/0403' in url and urlSearch == self.imageSearch:
+        if 'LAR/0403' in url:
             return [self.stripUrl.rstrip('/') % 'A04/A04P03.png']
-        return super(LifeAsRendered, self).fetchUrls(url, data, urlSearch)
+        return super().extract_image_urls(url, data)
 
     def getPrevUrl(self, url, data):
         # Fix broken navigation links
diff --git a/dosagelib/plugins/m.py b/dosagelib/plugins/m.py
index baf4d7f6e..8dac3469b 100644
--- a/dosagelib/plugins/m.py
+++ b/dosagelib/plugins/m.py
@@ -1,13 +1,13 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
-# Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2022 Tobias Gruetzmacher
-# Copyright (C) 2019-2020 Daniel Ring
+# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
+# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
+# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
+# SPDX-FileCopyrightText: © 2019 Daniel Ring
 import json
 from re import compile, escape, IGNORECASE
 
 from ..helpers import indirectStarter
-from ..scraper import _BasicScraper, _ParserScraper
+from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
 from ..util import tagre
 from ..xml import NS
 from .common import ComicControlScraper, WordPressScraper, WordPressWebcomic
@@ -233,7 +233,7 @@ class MyCartoons(_BasicScraper):
     lang = 'de'
 
 
-class MyLifeWithFel(_ParserScraper):
+class MyLifeWithFel(ParserScraper):
     baseUrl = 'https://www.mylifewithfel.com/'
     stripUrl = baseUrl + 'api/posts/%s'
     firstStripUrl = stripUrl % '1'
@@ -249,7 +249,7 @@ class MyLifeWithFel(_ParserScraper):
     def getPrevUrl(self, url, data):
         return self.stripUrl % json.loads(data.text_content())['previous']['id']
 
-    def fetchUrls(self, url, data, urlSearch):
+    def extract_image_urls(self, url, data):
         return [self.baseUrl + json.loads(data.text_content())['post']['image']]
 
     def namer(self, imageUrl, pageUrl):
diff --git a/dosagelib/plugins/mangadex.py b/dosagelib/plugins/mangadex.py
index 1bd6e9ea9..91c64a25d 100644
--- a/dosagelib/plugins/mangadex.py
+++ b/dosagelib/plugins/mangadex.py
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2019-2022 Tobias Gruetzmacher
-# Copyright (C) 2019-2020 Daniel Ring
+# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
+# SPDX-FileCopyrightText: © 2019 Daniel Ring
 import json
 
 from ..scraper import ParserScraper
@@ -71,24 +71,24 @@ class MangaDex(ParserScraper):
             return None
         return self.stripUrl % self.chapters[self.chapters.index(chapter[0]) - 1]['id']
 
-    def fetchUrls(self, url, data, urlSearch):
+    def extract_image_urls(self, url, data):
         # Retrieve chapter metadata from API
-        chapterData = json.loads(data.text_content())
-        self.chapter = chapterData['data']
-        cdnData = self.session.get(self.cdnUrl % self.chapter['id'])
-        cdnData.raise_for_status()
-        cdnBlock = cdnData.json()
+        chapters = json.loads(data.text_content())
+        self.chapter = chapters['data']
+        cdnresponse = self.session.get(self.cdnUrl % self.chapter['id'])
+        cdnresponse.raise_for_status()
+        cdnblock = cdnresponse.json()
 
         # Save link order for position-based filenames
-        imageUrl = self.imageUrl % cdnBlock['chapter']['hash']
-        self.imageUrls = [imageUrl % page for page in cdnBlock['chapter']['data']]
-        return self.imageUrls
+        urltemplate = self.imageUrl % cdnblock['chapter']['hash']
+        self._cached_image_urls = [urltemplate % page for page in cdnblock['chapter']['data']]
+        return self._cached_image_urls
 
     def namer(self, imageUrl, pageUrl):
         # Construct filename from episode number and page index in array
         chapter = self.chapter['attributes']['chapter']
         chapterNum = chapter if chapter is not None else 0
-        pageNum = self.imageUrls.index(imageUrl)
+        pageNum = self._cached_image_urls.index(imageUrl)
         pageExt = imageUrl.rsplit('.')[-1]
         return '%s-%02d.%s' % (chapterNum, pageNum, pageExt)
 
diff --git a/dosagelib/plugins/o.py b/dosagelib/plugins/o.py
index 954d5d69b..5706d2ba2 100644
--- a/dosagelib/plugins/o.py
+++ b/dosagelib/plugins/o.py
@@ -1,12 +1,12 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
-# Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2020 Tobias Gruetzmacher
-# Copyright (C) 2019-2020 Daniel Ring
+# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
+# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
+# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
+# SPDX-FileCopyrightText: © 2019 Daniel Ring
 from re import compile, escape
 
 from ..helpers import bounceStarter, indirectStarter
-from ..scraper import _BasicScraper, _ParserScraper
+from ..scraper import ParserScraper, _BasicScraper, _ParserScraper
 from ..util import tagre
 from .common import WordPressScraper, WordPressNavi
 
@@ -21,7 +21,7 @@ class OctopusPie(_ParserScraper):
     help = 'Index format: yyyy-mm-dd/nnn-strip-name'
 
 
-class OffWhite(_ParserScraper):
+class OffWhite(ParserScraper):
     baseUrl = 'https://web.archive.org/web/20200627222318/http://off-white.eu/'
     stripUrl = baseUrl + 'comic/%s/'
     firstStripUrl = stripUrl % 'prologue-page-1-2'
@@ -32,18 +32,6 @@ class OffWhite(_ParserScraper):
     starter = indirectStarter
     endOfLife = True
 
-    def fetchUrls(self, url, data, urlSearch):
-        # Fix missing page
-        if url == self.stripUrl % 'page-37':
-            return [self.baseUrl + 'ow_v2/wp-content/uploads/2011/01/new-037.jpg']
-        return super(OffWhite, self).fetchUrls(url, data, urlSearch)
-
-    def getPrevUrl(self, url, data):
-        # Fix missing page
-        if url == self.stripUrl % 'page-37':
-            return self.stripUrl % 'page-36'
-        return super(OffWhite, self).getPrevUrl(url, data)
-
 
 class Oglaf(_ParserScraper):
     url = 'http://oglaf.com/'
@@ -55,19 +43,16 @@ class Oglaf(_ParserScraper):
     multipleImagesPerStrip = True
     adult = True
 
-    def fetchUrls(self, url, data, search):
-        urls = []
-        urls.extend(super(Oglaf, self).fetchUrls(url, data, search))
-        if search == self.imageSearch:
-            try:
-                nexturls = self.fetchUrls(url, data, self.nextSearch)
-            except ValueError:
-                pass
-            else:
-                while nexturls and nexturls[0].startswith(url):
-                    data = self.getPage(nexturls[0])
-                    urls.extend(super(Oglaf, self).fetchUrls(nexturls, data, search))
-                    nexturls = self.fetchUrls(url, data, self.nextSearch)
+    def extract_image_urls(self, url, data):
+        urls = super().extract_image_urls(url, data)
+        try:
+            nexturl = self.fetchUrls(url, data, self.nextSearch)[0]
+            while nexturl.startswith(url):
+                data = self.getPage(nexturl)
+                urls.extend(super().extract_image_urls(url, data))
+                nexturl = self.fetchUrls(url, data, self.nextSearch)[0]
+        except ValueError:
+            pass
         return urls
 
 
diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py
index 131873281..b1516b16e 100644
--- a/dosagelib/plugins/s.py
+++ b/dosagelib/plugins/s.py
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
-# Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2022 Tobias Gruetzmacher
-# Copyright (C) 2019-2020 Daniel Ring
+# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
+# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
+# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
+# SPDX-FileCopyrightText: © 2019 Daniel Ring
 from re import compile, escape, IGNORECASE, sub
 from os.path import splitext
 
@@ -436,20 +436,20 @@ class Sorcery101(WordPressWebcomic):
     help = 'Index format: stripname'
 
 
-class SpaceFurries(_ParserScraper):
-    url = 'http://www.spacefurrs.org/'
+class SpaceFurries(ParserScraper):
+    url = 'https://www.spacefurrs.org/'
     firstStripUrl = url
     multipleImagesPerStrip = True
     adult = True
     endOfLife = True
 
-    def fetchUrls(self, url, data, urlSearch):
+    def extract_image_urls(self, url, data):
         # Website requires JS, so build the list of image URLs manually
-        imageUrls = []
-        currentPage = int(data.xpath('//input[@name="pagnum"]')[0].get('value'))
-        for page in reversed(range(1, currentPage + 1)):
-            imageUrls.append(self.url + 'comics/' + str(page) + '.jpg')
-        return imageUrls
+        imageurls = []
+        current = int(data.xpath('//input[@name="pagnum"]')[0].get('value'))
+        for page in reversed(range(1, current + 1)):
+            imageurls.append(self.url + 'comics/' + str(page) + '.jpg')
+        return imageurls
 
 
 class SpaceJunkArlia(_ParserScraper):
diff --git a/dosagelib/plugins/tapas.py b/dosagelib/plugins/tapas.py
index a1db5bdb0..f3c6088fb 100644
--- a/dosagelib/plugins/tapas.py
+++ b/dosagelib/plugins/tapas.py
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2019-2022 Tobias Gruetzmacher
-# Copyright (C) 2019-2022 Daniel Ring
+# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
+# SPDX-FileCopyrightText: © 2019 Daniel Ring
 from ..output import out
 from ..scraper import ParserScraper
 from ..xml import NS
@@ -37,10 +37,10 @@ class Tapas(ParserScraper):
             self.firstStripUrl = self.stripUrl % apiData['prev_ep_id']
         return self.stripUrl % apiData['prev_ep_id']
 
-    def fetchUrls(self, url, data, urlSearch):
+    def extract_image_urls(self, url, data):
         # Save link order for position-based filenames
-        self.imageUrls = super().fetchUrls(url, data, urlSearch)
-        return self.imageUrls
+        self._cached_image_urls = super().extract_image_urls(url, data)
+        return self._cached_image_urls
 
     def shouldSkipUrl(self, url, data):
         if data.xpath('//button[d:class("js-have-to-sign")]', namespaces=NS):
@@ -51,9 +51,9 @@ class Tapas(ParserScraper):
     def namer(self, imageUrl, pageUrl):
         # Construct filename from episode number and image position on page
         episodeNum = pageUrl.rsplit('/', 1)[-1]
-        imageNum = self.imageUrls.index(imageUrl)
+        imageNum = self._cached_image_urls.index(imageUrl)
         imageExt = pageUrl.rsplit('.', 1)[-1]
-        if len(self.imageUrls) > 1:
+        if len(self._cached_image_urls) > 1:
             filename = "%s-%d.%s" % (episodeNum, imageNum, imageExt)
         else:
             filename = "%s.%s" % (episodeNum, imageExt)
diff --git a/dosagelib/plugins/u.py b/dosagelib/plugins/u.py
index 12eeb0bbd..99e31d682 100644
--- a/dosagelib/plugins/u.py
+++ b/dosagelib/plugins/u.py
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
-# Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2020 Tobias Gruetzmacher
-# Copyright (C) 2019-2020 Daniel Ring
+# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
+# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
+# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
+# SPDX-FileCopyrightText: © 2019 Daniel Ring
 import json
 from re import compile
 from urllib.parse import urljoin
@@ -29,7 +29,7 @@ class UberQuest(ParserScraper):
     def getPrevUrl(self, url, data):
         return self.stripUrl % json.loads(data.text_content())[0]['prev_id']
 
-    def fetchUrls(self, url, data, urlSearch):
+    def extract_image_urls(self, url, data):
         return [json.loads(data.text_content())[0]['attachment']]
 
     def namer(self, imageUrl, pageUrl):
@@ -80,7 +80,7 @@ class UnicornJelly(BasicScraper):
 
 
 class Unsounded(ParserScraper):
-    url = 'http://www.casualvillain.com/Unsounded/'
+    url = 'https://www.casualvillain.com/Unsounded/'
     startUrl = url + 'comic+index/'
     stripUrl = url + 'comic/ch%s/ch%s_%s.html'
     firstStripUrl = stripUrl % ('01', '01', '01')
@@ -91,18 +91,17 @@ class Unsounded(ParserScraper):
     starter = indirectStarter
     help = 'Index format: chapter-page'
 
-    def fetchUrls(self, url, data, urlSearch):
-        imageUrls = super(Unsounded, self).fetchUrls(url, data, urlSearch)
+    def extract_image_urls(self, url, data):
+        imageUrls = super().extract_image_urls(url, data)
         # Include background for multi-image pages
         imageRegex = compile(r'background-image: url\((pageart/.*)\)')
         for match in imageRegex.finditer(str(etree.tostring(data))):
-            print(match)
-            searchUrls.append(normaliseURL(urljoin(data[1], match.group(1))))
+            imageUrls.append(normaliseURL(urljoin(data[1], match.group(1))))
         return imageUrls
 
-    def namer(self, imageUrl, pageUrl):
-        filename = imageUrl.rsplit('/', 1)[-1]
-        pagename = pageUrl.rsplit('/', 1)[-1]
+    def namer(self, image_url, page_url):
+        filename = image_url.rsplit('/', 1)[-1]
+        pagename = page_url.rsplit('/', 1)[-1]
         if pagename.split('.', 1)[0] != filename.split('.', 1)[0]:
             filename = pagename.split('_', 1)[0] + '_' + filename
         return filename
@@ -111,7 +110,7 @@ class Unsounded(ParserScraper):
         # Fix missing navigation links between chapters
         if 'ch13/you_let_me_fall' in url:
             return self.stripUrl % ('13', '13', '85')
-        return super(Unsounded, self).getPrevUrl(url, data)
+        return super().getPrevUrl(url, data)
 
     def getIndexStripUrl(self, index):
         chapter, num = index.split('-')
diff --git a/dosagelib/plugins/webtoons.py b/dosagelib/plugins/webtoons.py
index ecd3f47c2..d78c6229b 100644
--- a/dosagelib/plugins/webtoons.py
+++ b/dosagelib/plugins/webtoons.py
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2019-2022 Tobias Gruetzmacher
-# Copyright (C) 2019-2022 Daniel Ring
+# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
+# SPDX-FileCopyrightText: © 2019 Daniel Ring
 from ..scraper import ParserScraper
 
 
@@ -29,18 +29,18 @@ class WebToons(ParserScraper):
         self.endOfLife = (listPage.xpath('//div[@id="_asideDetail"]//span[@class="txt_ico_completed2"]') != [])
         return self.stripUrl % currentEpisode
 
-    def fetchUrls(self, url, data, urlSearch):
+    def extract_image_urls(self, url, data):
         # Save link order for position-based filenames
-        self.imageUrls = super().fetchUrls(url, data, urlSearch)
+        self._cached_image_urls = super().extract_image_urls(url, data)
         # Update firstStripUrl with the correct episode title
         if url.rsplit('=', 1)[-1] == '1':
             self.firstStripUrl = url
-        return self.imageUrls
+        return self._cached_image_urls
 
     def namer(self, imageUrl, pageUrl):
         # Construct filename from episode number and image position on page
         episodeNum = pageUrl.rsplit('=', 1)[-1]
-        imageNum = self.imageUrls.index(imageUrl)
+        imageNum = self._cached_image_urls.index(imageUrl)
         imageExt = pageUrl.rsplit('.', 1)[-1].split('?', 1)[0]
         return "%s-%03d.%s" % (episodeNum, imageNum, imageExt)
 
diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py
index 7078f842b..5a411b9b4 100644
--- a/dosagelib/scraper.py
+++ b/dosagelib/scraper.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
-# Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2022 Tobias Gruetzmacher
+# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
+# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
+# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
 from __future__ import annotations
 
 import html
@@ -137,7 +137,7 @@ class Scraper:
 
     def getComicStrip(self, url, data):
         """Get comic strip downloader for given URL and data."""
-        imageUrls = self.fetchUrls(url, data, self.imageSearch)
+        imageUrls = self.extract_image_urls(url, data)
         # map modifier function on image URLs
         imageUrls = [self.imageUrlModifier(x, data) for x in imageUrls]
         # remove duplicate URLs
@@ -325,6 +325,12 @@ class Scraper:
         """
         return get_page(url, self.session, allow_errors=self.allow_errors)
 
+    def extract_image_urls(self, url, data):
+        """
+        Extract image URLs from page data using the classes imageSearch attribute.
+        """
+        return self.fetchUrls(url, data, self.imageSearch)
+
     def fetchUrls(self, url, data, urlsearch):
         raise ValueError("No implementation for fetchUrls!")