From 3722fbe7e4aafce7486e99ad3b000b8b1c87a1d2 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Sun, 18 Feb 2024 18:02:02 +0100 Subject: [PATCH] Update joinPathPartsNamer: Remove defaults --- dosagelib/helpers.py | 42 ++++++++++++++++++------------ dosagelib/plugins/c.py | 4 +-- dosagelib/plugins/comicskingdom.py | 7 +---- dosagelib/plugins/f.py | 4 +-- dosagelib/plugins/t.py | 2 +- dosagelib/plugins/z.py | 4 +-- tests/test_helpers.py | 12 +++++---- 7 files changed, 41 insertions(+), 34 deletions(-) diff --git a/dosagelib/helpers.py b/dosagelib/helpers.py index d53e04cfb..b3e4f00cc 100644 --- a/dosagelib/helpers.py +++ b/dosagelib/helpers.py @@ -1,39 +1,49 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs -# Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2020 Tobias Gruetzmacher -# Copyright (C) 2019-2020 Daniel Ring +# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs +# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam +# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Daniel Ring +from __future__ import annotations + +from typing import Protocol + from .util import getQueryParams +from .scraper import Scraper -def queryNamer(param, use_page_url=False): +class Namer(Protocol): + """A protocol for generic callbacks to name web comic images.""" + def __call__(_, self: Scraper, image_url: str, page_url: str) -> str | None: + ... + + +def queryNamer(param, use_page_url=False) -> Namer: """Get name from URL query part.""" - def _namer(self, image_url, page_url): + def _namer(self, image_url: str, page_url: str) -> str | None: """Get URL query part.""" url = page_url if use_page_url else image_url return getQueryParams(url)[param][0] return _namer -def regexNamer(regex, use_page_url=False): +def regexNamer(regex, use_page_url=False) -> Namer: """Get name from regular expression.""" - def _namer(self, image_url, page_url): + def _namer(self, image_url: str, page_url: str) -> str | None: """Get first regular expression group.""" url = page_url if use_page_url else image_url mo = regex.search(url) - if mo: - return mo.group(1) + return mo.group(1) if mo else None return _namer -def joinPathPartsNamer(pageurlparts, imageurlparts=(-1,), joinchar='_'): +def joinPathPartsNamer(pageparts=(), imageparts=(), joinchar='_') -> Namer: """Get name by mashing path parts together with underscores.""" - def _namer(self, imageurl, pageurl): + def _namer(self: Scraper, image_url: str, page_url: str) -> str | None: # Split and drop host name - pageurlsplit = pageurl.split('/')[3:] - imageurlsplit = imageurl.split('/')[3:] - joinparts = ([pageurlsplit[i] for i in pageurlparts] + - [imageurlsplit[i] for i in imageurlparts]) + pagesplit = page_url.split('/')[3:] + imagesplit = image_url.split('/')[3:] + joinparts = ([pagesplit[i] for i in pageparts] + + [imagesplit[i] for i in imageparts]) return joinchar.join(joinparts) return _namer diff --git a/dosagelib/plugins/c.py b/dosagelib/plugins/c.py index c596ede60..27f7278d2 100644 --- a/dosagelib/plugins/c.py +++ b/dosagelib/plugins/c.py @@ -404,7 +404,7 @@ class CrossTimeCafe(_ParserScraper): class CSectionComics(WordPressScraper): url = 'https://www.csectioncomics.com/' firstStripUrl = url + 'comics/one-day-in-country' - namer = joinPathPartsNamer((), (-3, -2, -1)) + namer = joinPathPartsNamer(imageparts=(-3, -2, -1)) multipleImagesPerStrip = True @@ -466,7 +466,7 @@ class CyanideAndHappiness(ParserScraper): prevSearch = '//div[@type="comic"]//a[*[local-name()="svg" and @rotate="180deg"]]' nextSearch = '//div[@type="comic"]//a[*[local-name()="svg" and @rotate="0deg"]]' starter = bounceStarter - namer = joinPathPartsNamer((), range(-4, 0)) + namer = joinPathPartsNamer(imageparts=range(-4, 0)) class CynWolf(_ParserScraper): diff --git a/dosagelib/plugins/comicskingdom.py b/dosagelib/plugins/comicskingdom.py index 818a37fa7..0a792dfd5 100644 --- a/dosagelib/plugins/comicskingdom.py +++ b/dosagelib/plugins/comicskingdom.py @@ -1,11 +1,6 @@ # SPDX-License-Identifier: MIT # SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher # SPDX-FileCopyrightText: © 2019 Thomas W. Littauer -try: - from importlib_resources import as_file, files -except ImportError: - from importlib.resources import as_file, files - from ..helpers import bounceStarter, joinPathPartsNamer from ..scraper import ParserScraper @@ -15,7 +10,7 @@ class ComicsKingdom(ParserScraper): prevSearch = '//a[./img[contains(@alt, "Previous")]]' nextSearch = '//a[./img[contains(@alt, "Next")]]' starter = bounceStarter - namer = joinPathPartsNamer((-2, -1), ()) + namer = joinPathPartsNamer(pageparts=(-2, -1)) help = 'Index format: yyyy-mm-dd' def __init__(self, name, path, lang=None): diff --git a/dosagelib/plugins/f.py b/dosagelib/plugins/f.py index d3f45ac98..01c43da33 100644 --- a/dosagelib/plugins/f.py +++ b/dosagelib/plugins/f.py @@ -171,7 +171,7 @@ class Fragile(_ParserScraper): endOfLife = True -class FredoAndPidjin(_ParserScraper): +class FredoAndPidjin(ParserScraper): url = 'https://www.pidjin.net/' stripUrl = url + '%s/' firstStripUrl = stripUrl % '2006/02/19/goofy-monday' @@ -180,7 +180,7 @@ class FredoAndPidjin(_ParserScraper): prevSearch = '//span[d:class("prev")]/a' latestSearch = '//section[d:class("latest")]//a' starter = indirectStarter - namer = joinPathPartsNamer((0, 1, 2)) + namer = joinPathPartsNamer(pageparts=(0, 1, 2), imageparts=(-1,)) class Freefall(_ParserScraper): diff --git a/dosagelib/plugins/t.py b/dosagelib/plugins/t.py index ee6801a21..1919e274a 100644 --- a/dosagelib/plugins/t.py +++ b/dosagelib/plugins/t.py @@ -272,7 +272,7 @@ class ToonHole(ParserScraper): prevSearch = '//a[@rel="prev"]' latestSearch = '//a[@rel="bookmark"]' starter = indirectStarter - namer = joinPathPartsNamer((), (-3, -2, -1)) + namer = joinPathPartsNamer(imageparts=(-3, -2, -1)) class TrippingOverYou(_BasicScraper): diff --git a/dosagelib/plugins/z.py b/dosagelib/plugins/z.py index f7556110a..f5ef8e954 100644 --- a/dosagelib/plugins/z.py +++ b/dosagelib/plugins/z.py @@ -23,7 +23,7 @@ class Zapiro(ParserScraper): imageSearch = '//div[@id="cartoon"]/img' prevSearch = '//a[d:class("left")]' nextSearch = '//a[d:class("right")]' - namer = joinPathPartsNamer((-1,), ()) + namer = joinPathPartsNamer(pageparts=(-1,)) class ZenPencils(WordPressNavi): @@ -60,7 +60,7 @@ class Zwarwald(BasicScraper): tagre("img", "src", r'http://zwarwald\.de/images/prev\.jpg', quote="'")) - namer = joinPathPartsNamer((), (-3, -2, -1)) + namer = joinPathPartsNamer(imageparts=(-3, -2, -1)) help = 'Index format: number' def shouldSkipUrl(self, url, data): diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 598a74fc4..8c13c89ca 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -1,9 +1,9 @@ # SPDX-License-Identifier: MIT -# Copyright (C) 2019 Tobias Gruetzmacher +# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher from dosagelib.helpers import joinPathPartsNamer, queryNamer -class TestNamer(object): +class TestNamer: """ Tests for comic namer. """ @@ -16,6 +16,8 @@ class TestNamer(object): def test_joinPathPartsNamer(self): imgurl = 'https://HOST/wp-content/uploads/2019/02/tennis5wp-1.png' pageurl = 'https://HOST/2019/03/11/12450/' - assert joinPathPartsNamer((0, 1, 2))(self, imgurl, pageurl) == '2019_03_11_tennis5wp-1.png' - assert joinPathPartsNamer((0, 1, 2), (-1,), '-')(self, imgurl, pageurl) == '2019-03-11-tennis5wp-1.png' - assert joinPathPartsNamer((0, -2), ())(self, imgurl, pageurl) == '2019_12450' + assert joinPathPartsNamer(pageparts=(0, 1, 2), imageparts=(-1,))(self, + imgurl, pageurl) == '2019_03_11_tennis5wp-1.png' + assert joinPathPartsNamer(pageparts=(0, 1, 2), imageparts=(-1,), joinchar='-')(self, + imgurl, pageurl) == '2019-03-11-tennis5wp-1.png' + assert joinPathPartsNamer(pageparts=(0, -2))(self, imgurl, pageurl) == '2019_12450'