diff --git a/dosagelib/plugins/u.py b/dosagelib/plugins/u.py index 99e31d682..c26c96ff4 100644 --- a/dosagelib/plugins/u.py +++ b/dosagelib/plugins/u.py @@ -4,9 +4,9 @@ # SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher # SPDX-FileCopyrightText: © 2019 Daniel Ring import json +import re +from contextlib import suppress from re import compile -from urllib.parse import urljoin -from lxml import etree from ..scraper import BasicScraper, ParserScraper from ..helpers import indirectStarter @@ -89,15 +89,30 @@ class Unsounded(ParserScraper): latestSearch = '//div[@id="chapter_box"][1]//a[last()]' multipleImagesPerStrip = True starter = indirectStarter + style_bg_regex = re.compile(r'background-image: url\((.*pageart/.*)\)') help = 'Index format: chapter-page' def extract_image_urls(self, url, data): - imageUrls = super().extract_image_urls(url, data) + urls = [] + with suppress(ValueError): + urls.extend(super().extract_image_urls(url, data)) # Include background for multi-image pages - imageRegex = compile(r'background-image: url\((pageart/.*)\)') - for match in imageRegex.finditer(str(etree.tostring(data))): - imageUrls.append(normaliseURL(urljoin(data[1], match.group(1)))) - return imageUrls + cssbg = self.extract_css_bg(data) + if cssbg: + urls.append(cssbg) + if not urls: + raise ValueError(f'No comic found at {url!r}') + return urls + + def extract_css_bg(self, page) -> str | None: + comicdivs = page.xpath('//div[@id="comic"]') + if comicdivs: + style = comicdivs[0].attrib.get('style') + if style: + hit = self.style_bg_regex.search(style) + if hit: + return hit.group(1) + return None def namer(self, image_url, page_url): filename = image_url.rsplit('/', 1)[-1] diff --git a/tests/responses/unsounded-17-137.html.gz b/tests/responses/unsounded-17-137.html.gz new file mode 100644 index 000000000..b69a44451 Binary files /dev/null and b/tests/responses/unsounded-17-137.html.gz differ diff --git a/tests/responses/unsounded-17-92.html.gz b/tests/responses/unsounded-17-92.html.gz new file mode 100644 index 000000000..3b28c44be Binary files /dev/null and b/tests/responses/unsounded-17-92.html.gz differ diff --git a/tests/responses/unsounded-root.html.gz b/tests/responses/unsounded-root.html.gz new file mode 100644 index 000000000..7a0731d2c Binary files /dev/null and b/tests/responses/unsounded-root.html.gz differ diff --git a/tests/test_modules.py b/tests/test_modules.py index c6ec1e021..71228ed8d 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2019-2021 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher import re import pytest @@ -15,7 +15,7 @@ def cmd(*options): @pytest.mark.usefixtures('_nosleep', '_noappdirs') -class TestModules(object): +class TestModules: """Test that specific comic modules work correctly.""" @responses.activate @@ -40,9 +40,25 @@ class TestModules(object): cmd('--basepath', str(tmpdir), 'CalvinAndHobbesEnEspanol') cmd('--basepath', str(tmpdir), 'CalvinAndHobbesEnEspanol:2012/07/22') + @responses.activate + def test_unsounded(self, tmpdir, capfd): + httpmocks.page('https://www.casualvillain.com/Unsounded/comic+index/', + 'unsounded-root') + httpmocks.page('https://www.casualvillain.com/Unsounded/comic/ch17/ch17_92.html', + 'unsounded-17-92') + httpmocks.page('https://www.casualvillain.com/Unsounded/comic/ch17/ch17_137.html', + 'unsounded-17-137') + httpmocks.jpeg(re.compile(r'.*/pageart/ch\d+_\d+.jpg')) + + cmd('--basepath', str(tmpdir), 'Unsounded') + cmd('--basepath', str(tmpdir), 'Unsounded:17-92') + + out = capfd.readouterr().out + assert 'ERROR' not in out + @responses.activate @pytest.mark.skip(reason="SoloeLeveling was removed, so we have no way to test this...") - def test_sololeveling_geoblock(self, tmpdir): + def test_sololeveling_geoblock(self): from dosagelib.plugins.s import SoloLeveling from dosagelib.scraper import GeoblockedException