Update joinPathPartsNamer: Remove defaults
This commit is contained in:
parent
15423eab21
commit
3722fbe7e4
7 changed files with 41 additions and 34 deletions
|
@ -1,39 +1,49 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2020 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
|
||||||
# Copyright (C) 2019-2020 Daniel Ring
|
# SPDX-FileCopyrightText: © 2019 Daniel Ring
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Protocol
|
||||||
|
|
||||||
from .util import getQueryParams
|
from .util import getQueryParams
|
||||||
|
from .scraper import Scraper
|
||||||
|
|
||||||
|
|
||||||
def queryNamer(param, use_page_url=False):
|
class Namer(Protocol):
|
||||||
|
"""A protocol for generic callbacks to name web comic images."""
|
||||||
|
def __call__(_, self: Scraper, image_url: str, page_url: str) -> str | None:
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
def queryNamer(param, use_page_url=False) -> Namer:
|
||||||
"""Get name from URL query part."""
|
"""Get name from URL query part."""
|
||||||
def _namer(self, image_url, page_url):
|
def _namer(self, image_url: str, page_url: str) -> str | None:
|
||||||
"""Get URL query part."""
|
"""Get URL query part."""
|
||||||
url = page_url if use_page_url else image_url
|
url = page_url if use_page_url else image_url
|
||||||
return getQueryParams(url)[param][0]
|
return getQueryParams(url)[param][0]
|
||||||
return _namer
|
return _namer
|
||||||
|
|
||||||
|
|
||||||
def regexNamer(regex, use_page_url=False):
|
def regexNamer(regex, use_page_url=False) -> Namer:
|
||||||
"""Get name from regular expression."""
|
"""Get name from regular expression."""
|
||||||
def _namer(self, image_url, page_url):
|
def _namer(self, image_url: str, page_url: str) -> str | None:
|
||||||
"""Get first regular expression group."""
|
"""Get first regular expression group."""
|
||||||
url = page_url if use_page_url else image_url
|
url = page_url if use_page_url else image_url
|
||||||
mo = regex.search(url)
|
mo = regex.search(url)
|
||||||
if mo:
|
return mo.group(1) if mo else None
|
||||||
return mo.group(1)
|
|
||||||
return _namer
|
return _namer
|
||||||
|
|
||||||
|
|
||||||
def joinPathPartsNamer(pageurlparts, imageurlparts=(-1,), joinchar='_'):
|
def joinPathPartsNamer(pageparts=(), imageparts=(), joinchar='_') -> Namer:
|
||||||
"""Get name by mashing path parts together with underscores."""
|
"""Get name by mashing path parts together with underscores."""
|
||||||
def _namer(self, imageurl, pageurl):
|
def _namer(self: Scraper, image_url: str, page_url: str) -> str | None:
|
||||||
# Split and drop host name
|
# Split and drop host name
|
||||||
pageurlsplit = pageurl.split('/')[3:]
|
pagesplit = page_url.split('/')[3:]
|
||||||
imageurlsplit = imageurl.split('/')[3:]
|
imagesplit = image_url.split('/')[3:]
|
||||||
joinparts = ([pageurlsplit[i] for i in pageurlparts] +
|
joinparts = ([pagesplit[i] for i in pageparts] +
|
||||||
[imageurlsplit[i] for i in imageurlparts])
|
[imagesplit[i] for i in imageparts])
|
||||||
return joinchar.join(joinparts)
|
return joinchar.join(joinparts)
|
||||||
return _namer
|
return _namer
|
||||||
|
|
||||||
|
|
|
@ -404,7 +404,7 @@ class CrossTimeCafe(_ParserScraper):
|
||||||
class CSectionComics(WordPressScraper):
|
class CSectionComics(WordPressScraper):
|
||||||
url = 'https://www.csectioncomics.com/'
|
url = 'https://www.csectioncomics.com/'
|
||||||
firstStripUrl = url + 'comics/one-day-in-country'
|
firstStripUrl = url + 'comics/one-day-in-country'
|
||||||
namer = joinPathPartsNamer((), (-3, -2, -1))
|
namer = joinPathPartsNamer(imageparts=(-3, -2, -1))
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
|
|
||||||
|
|
||||||
|
@ -466,7 +466,7 @@ class CyanideAndHappiness(ParserScraper):
|
||||||
prevSearch = '//div[@type="comic"]//a[*[local-name()="svg" and @rotate="180deg"]]'
|
prevSearch = '//div[@type="comic"]//a[*[local-name()="svg" and @rotate="180deg"]]'
|
||||||
nextSearch = '//div[@type="comic"]//a[*[local-name()="svg" and @rotate="0deg"]]'
|
nextSearch = '//div[@type="comic"]//a[*[local-name()="svg" and @rotate="0deg"]]'
|
||||||
starter = bounceStarter
|
starter = bounceStarter
|
||||||
namer = joinPathPartsNamer((), range(-4, 0))
|
namer = joinPathPartsNamer(imageparts=range(-4, 0))
|
||||||
|
|
||||||
|
|
||||||
class CynWolf(_ParserScraper):
|
class CynWolf(_ParserScraper):
|
||||||
|
|
|
@ -1,11 +1,6 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
||||||
# SPDX-FileCopyrightText: © 2019 Thomas W. Littauer
|
# SPDX-FileCopyrightText: © 2019 Thomas W. Littauer
|
||||||
try:
|
|
||||||
from importlib_resources import as_file, files
|
|
||||||
except ImportError:
|
|
||||||
from importlib.resources import as_file, files
|
|
||||||
|
|
||||||
from ..helpers import bounceStarter, joinPathPartsNamer
|
from ..helpers import bounceStarter, joinPathPartsNamer
|
||||||
from ..scraper import ParserScraper
|
from ..scraper import ParserScraper
|
||||||
|
|
||||||
|
@ -15,7 +10,7 @@ class ComicsKingdom(ParserScraper):
|
||||||
prevSearch = '//a[./img[contains(@alt, "Previous")]]'
|
prevSearch = '//a[./img[contains(@alt, "Previous")]]'
|
||||||
nextSearch = '//a[./img[contains(@alt, "Next")]]'
|
nextSearch = '//a[./img[contains(@alt, "Next")]]'
|
||||||
starter = bounceStarter
|
starter = bounceStarter
|
||||||
namer = joinPathPartsNamer((-2, -1), ())
|
namer = joinPathPartsNamer(pageparts=(-2, -1))
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
def __init__(self, name, path, lang=None):
|
def __init__(self, name, path, lang=None):
|
||||||
|
|
|
@ -171,7 +171,7 @@ class Fragile(_ParserScraper):
|
||||||
endOfLife = True
|
endOfLife = True
|
||||||
|
|
||||||
|
|
||||||
class FredoAndPidjin(_ParserScraper):
|
class FredoAndPidjin(ParserScraper):
|
||||||
url = 'https://www.pidjin.net/'
|
url = 'https://www.pidjin.net/'
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '2006/02/19/goofy-monday'
|
firstStripUrl = stripUrl % '2006/02/19/goofy-monday'
|
||||||
|
@ -180,7 +180,7 @@ class FredoAndPidjin(_ParserScraper):
|
||||||
prevSearch = '//span[d:class("prev")]/a'
|
prevSearch = '//span[d:class("prev")]/a'
|
||||||
latestSearch = '//section[d:class("latest")]//a'
|
latestSearch = '//section[d:class("latest")]//a'
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
namer = joinPathPartsNamer((0, 1, 2))
|
namer = joinPathPartsNamer(pageparts=(0, 1, 2), imageparts=(-1,))
|
||||||
|
|
||||||
|
|
||||||
class Freefall(_ParserScraper):
|
class Freefall(_ParserScraper):
|
||||||
|
|
|
@ -272,7 +272,7 @@ class ToonHole(ParserScraper):
|
||||||
prevSearch = '//a[@rel="prev"]'
|
prevSearch = '//a[@rel="prev"]'
|
||||||
latestSearch = '//a[@rel="bookmark"]'
|
latestSearch = '//a[@rel="bookmark"]'
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
namer = joinPathPartsNamer((), (-3, -2, -1))
|
namer = joinPathPartsNamer(imageparts=(-3, -2, -1))
|
||||||
|
|
||||||
|
|
||||||
class TrippingOverYou(_BasicScraper):
|
class TrippingOverYou(_BasicScraper):
|
||||||
|
|
|
@ -23,7 +23,7 @@ class Zapiro(ParserScraper):
|
||||||
imageSearch = '//div[@id="cartoon"]/img'
|
imageSearch = '//div[@id="cartoon"]/img'
|
||||||
prevSearch = '//a[d:class("left")]'
|
prevSearch = '//a[d:class("left")]'
|
||||||
nextSearch = '//a[d:class("right")]'
|
nextSearch = '//a[d:class("right")]'
|
||||||
namer = joinPathPartsNamer((-1,), ())
|
namer = joinPathPartsNamer(pageparts=(-1,))
|
||||||
|
|
||||||
|
|
||||||
class ZenPencils(WordPressNavi):
|
class ZenPencils(WordPressNavi):
|
||||||
|
@ -60,7 +60,7 @@ class Zwarwald(BasicScraper):
|
||||||
tagre("img", "src",
|
tagre("img", "src",
|
||||||
r'http://zwarwald\.de/images/prev\.jpg',
|
r'http://zwarwald\.de/images/prev\.jpg',
|
||||||
quote="'"))
|
quote="'"))
|
||||||
namer = joinPathPartsNamer((), (-3, -2, -1))
|
namer = joinPathPartsNamer(imageparts=(-3, -2, -1))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2019 Tobias Gruetzmacher
|
# SPDX-FileCopyrightText: © 2019 Tobias Gruetzmacher
|
||||||
from dosagelib.helpers import joinPathPartsNamer, queryNamer
|
from dosagelib.helpers import joinPathPartsNamer, queryNamer
|
||||||
|
|
||||||
|
|
||||||
class TestNamer(object):
|
class TestNamer:
|
||||||
"""
|
"""
|
||||||
Tests for comic namer.
|
Tests for comic namer.
|
||||||
"""
|
"""
|
||||||
|
@ -16,6 +16,8 @@ class TestNamer(object):
|
||||||
def test_joinPathPartsNamer(self):
|
def test_joinPathPartsNamer(self):
|
||||||
imgurl = 'https://HOST/wp-content/uploads/2019/02/tennis5wp-1.png'
|
imgurl = 'https://HOST/wp-content/uploads/2019/02/tennis5wp-1.png'
|
||||||
pageurl = 'https://HOST/2019/03/11/12450/'
|
pageurl = 'https://HOST/2019/03/11/12450/'
|
||||||
assert joinPathPartsNamer((0, 1, 2))(self, imgurl, pageurl) == '2019_03_11_tennis5wp-1.png'
|
assert joinPathPartsNamer(pageparts=(0, 1, 2), imageparts=(-1,))(self,
|
||||||
assert joinPathPartsNamer((0, 1, 2), (-1,), '-')(self, imgurl, pageurl) == '2019-03-11-tennis5wp-1.png'
|
imgurl, pageurl) == '2019_03_11_tennis5wp-1.png'
|
||||||
assert joinPathPartsNamer((0, -2), ())(self, imgurl, pageurl) == '2019_12450'
|
assert joinPathPartsNamer(pageparts=(0, 1, 2), imageparts=(-1,), joinchar='-')(self,
|
||||||
|
imgurl, pageurl) == '2019-03-11-tennis5wp-1.png'
|
||||||
|
assert joinPathPartsNamer(pageparts=(0, -2))(self, imgurl, pageurl) == '2019_12450'
|
||||||
|
|
Loading…
Reference in a new issue