Update joinPathPartsNamer: Remove defaults
This commit is contained in:
parent
15423eab21
commit
3722fbe7e4
7 changed files with 41 additions and 34 deletions
|
@ -1,39 +1,49 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
||||
# Copyright (C) 2019-2020 Daniel Ring
|
||||
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Protocol
|
||||
|
||||
from .util import getQueryParams
|
||||
from .scraper import Scraper
|
||||
|
||||
|
||||
def queryNamer(param, use_page_url=False):
|
||||
class Namer(Protocol):
|
||||
"""A protocol for generic callbacks to name web comic images."""
|
||||
def __call__(_, self: Scraper, image_url: str, page_url: str) -> str | None:
|
||||
...
|
||||
|
||||
|
||||
def queryNamer(param, use_page_url=False) -> Namer:
|
||||
"""Get name from URL query part."""
|
||||
def _namer(self, image_url, page_url):
|
||||
def _namer(self, image_url: str, page_url: str) -> str | None:
|
||||
"""Get URL query part."""
|
||||
url = page_url if use_page_url else image_url
|
||||
return getQueryParams(url)[param][0]
|
||||
return _namer
|
||||
|
||||
|
||||
def regexNamer(regex, use_page_url=False):
|
||||
def regexNamer(regex, use_page_url=False) -> Namer:
|
||||
"""Get name from regular expression."""
|
||||
def _namer(self, image_url, page_url):
|
||||
def _namer(self, image_url: str, page_url: str) -> str | None:
|
||||
"""Get first regular expression group."""
|
||||
url = page_url if use_page_url else image_url
|
||||
mo = regex.search(url)
|
||||
if mo:
|
||||
return mo.group(1)
|
||||
return mo.group(1) if mo else None
|
||||
return _namer
|
||||
|
||||
|
||||
def joinPathPartsNamer(pageurlparts, imageurlparts=(-1,), joinchar='_'):
|
||||
def joinPathPartsNamer(pageparts=(), imageparts=(), joinchar='_') -> Namer:
|
||||
"""Get name by mashing path parts together with underscores."""
|
||||
def _namer(self, imageurl, pageurl):
|
||||
def _namer(self: Scraper, image_url: str, page_url: str) -> str | None:
|
||||
# Split and drop host name
|
||||
pageurlsplit = pageurl.split('/')[3:]
|
||||
imageurlsplit = imageurl.split('/')[3:]
|
||||
joinparts = ([pageurlsplit[i] for i in pageurlparts] +
|
||||
[imageurlsplit[i] for i in imageurlparts])
|
||||
pagesplit = page_url.split('/')[3:]
|
||||
imagesplit = image_url.split('/')[3:]
|
||||
joinparts = ([pagesplit[i] for i in pageparts] +
|
||||
[imagesplit[i] for i in imageparts])
|
||||
return joinchar.join(joinparts)
|
||||
return _namer
|
||||
|
||||
|
|
|
@ -404,7 +404,7 @@ class CrossTimeCafe(_ParserScraper):
|
|||
class CSectionComics(WordPressScraper):
|
||||
url = 'https://www.csectioncomics.com/'
|
||||
firstStripUrl = url + 'comics/one-day-in-country'
|
||||
namer = joinPathPartsNamer((), (-3, -2, -1))
|
||||
namer = joinPathPartsNamer(imageparts=(-3, -2, -1))
|
||||
multipleImagesPerStrip = True
|
||||
|
||||
|
||||
|
@ -466,7 +466,7 @@ class CyanideAndHappiness(ParserScraper):
|
|||
prevSearch = '//div[@type="comic"]//a[*[local-name()="svg" and @rotate="180deg"]]'
|
||||
nextSearch = '//div[@type="comic"]//a[*[local-name()="svg" and @rotate="0deg"]]'
|
||||
starter = bounceStarter
|
||||
namer = joinPathPartsNamer((), range(-4, 0))
|
||||
namer = joinPathPartsNamer(imageparts=range(-4, 0))
|
||||
|
||||
|
||||
class CynWolf(_ParserScraper):
|
||||
|
|
|
@ -1,11 +1,6 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Thomas W. Littauer
|
||||
try:
|
||||
from importlib_resources import as_file, files
|
||||
except ImportError:
|
||||
from importlib.resources import as_file, files
|
||||
|
||||
from ..helpers import bounceStarter, joinPathPartsNamer
|
||||
from ..scraper import ParserScraper
|
||||
|
||||
|
@ -15,7 +10,7 @@ class ComicsKingdom(ParserScraper):
|
|||
prevSearch = '//a[./img[contains(@alt, "Previous")]]'
|
||||
nextSearch = '//a[./img[contains(@alt, "Next")]]'
|
||||
starter = bounceStarter
|
||||
namer = joinPathPartsNamer((-2, -1), ())
|
||||
namer = joinPathPartsNamer(pageparts=(-2, -1))
|
||||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
def __init__(self, name, path, lang=None):
|
||||
|
|
|
@ -171,7 +171,7 @@ class Fragile(_ParserScraper):
|
|||
endOfLife = True
|
||||
|
||||
|
||||
class FredoAndPidjin(_ParserScraper):
|
||||
class FredoAndPidjin(ParserScraper):
|
||||
url = 'https://www.pidjin.net/'
|
||||
stripUrl = url + '%s/'
|
||||
firstStripUrl = stripUrl % '2006/02/19/goofy-monday'
|
||||
|
@ -180,7 +180,7 @@ class FredoAndPidjin(_ParserScraper):
|
|||
prevSearch = '//span[d:class("prev")]/a'
|
||||
latestSearch = '//section[d:class("latest")]//a'
|
||||
starter = indirectStarter
|
||||
namer = joinPathPartsNamer((0, 1, 2))
|
||||
namer = joinPathPartsNamer(pageparts=(0, 1, 2), imageparts=(-1,))
|
||||
|
||||
|
||||
class Freefall(_ParserScraper):
|
||||
|
|
|
@ -272,7 +272,7 @@ class ToonHole(ParserScraper):
|
|||
prevSearch = '//a[@rel="prev"]'
|
||||
latestSearch = '//a[@rel="bookmark"]'
|
||||
starter = indirectStarter
|
||||
namer = joinPathPartsNamer((), (-3, -2, -1))
|
||||
namer = joinPathPartsNamer(imageparts=(-3, -2, -1))
|
||||
|
||||
|
||||
class TrippingOverYou(_BasicScraper):
|
||||
|
|
|
@ -23,7 +23,7 @@ class Zapiro(ParserScraper):
|
|||
imageSearch = '//div[@id="cartoon"]/img'
|
||||
prevSearch = '//a[d:class("left")]'
|
||||
nextSearch = '//a[d:class("right")]'
|
||||
namer = joinPathPartsNamer((-1,), ())
|
||||
namer = joinPathPartsNamer(pageparts=(-1,))
|
||||
|
||||
|
||||
class ZenPencils(WordPressNavi):
|
||||
|
@ -60,7 +60,7 @@ class Zwarwald(BasicScraper):
|
|||
tagre("img", "src",
|
||||
r'http://zwarwald\.de/images/prev\.jpg',
|
||||
quote="'"))
|
||||
namer = joinPathPartsNamer((), (-3, -2, -1))
|
||||
namer = joinPathPartsNamer(imageparts=(-3, -2, -1))
|
||||
help = 'Index format: number'
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (C) 2019 Tobias Gruetzmacher
|
||||
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
||||
from dosagelib.helpers import joinPathPartsNamer, queryNamer
|
||||
|
||||
|
||||
class TestNamer(object):
|
||||
class TestNamer:
|
||||
"""
|
||||
Tests for comic namer.
|
||||
"""
|
||||
|
@ -16,6 +16,8 @@ class TestNamer(object):
|
|||
def test_joinPathPartsNamer(self):
|
||||
imgurl = 'https://HOST/wp-content/uploads/2019/02/tennis5wp-1.png'
|
||||
pageurl = 'https://HOST/2019/03/11/12450/'
|
||||
assert joinPathPartsNamer((0, 1, 2))(self, imgurl, pageurl) == '2019_03_11_tennis5wp-1.png'
|
||||
assert joinPathPartsNamer((0, 1, 2), (-1,), '-')(self, imgurl, pageurl) == '2019-03-11-tennis5wp-1.png'
|
||||
assert joinPathPartsNamer((0, -2), ())(self, imgurl, pageurl) == '2019_12450'
|
||||
assert joinPathPartsNamer(pageparts=(0, 1, 2), imageparts=(-1,))(self,
|
||||
imgurl, pageurl) == '2019_03_11_tennis5wp-1.png'
|
||||
assert joinPathPartsNamer(pageparts=(0, 1, 2), imageparts=(-1,), joinchar='-')(self,
|
||||
imgurl, pageurl) == '2019-03-11-tennis5wp-1.png'
|
||||
assert joinPathPartsNamer(pageparts=(0, -2))(self, imgurl, pageurl) == '2019_12450'
|
||||
|
|
Loading…
Reference in a new issue