From 328b3cd072f9e821b369915fb024f79a735f07c0 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Sun, 30 Jun 2019 20:52:15 +0200 Subject: [PATCH] Add new namer "joinPathPartsNamer" Additionally, switch some comics which benefit from it to the new namer. This fixes #127. --- dosagelib/helpers.py | 14 +++++++++++++- dosagelib/plugins/f.py | 6 +++--- dosagelib/plugins/s.py | 8 ++------ dosagelib/plugins/z.py | 14 ++++---------- 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/dosagelib/helpers.py b/dosagelib/helpers.py index d07e5e9f1..1f649f03f 100644 --- a/dosagelib/helpers.py +++ b/dosagelib/helpers.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2017 Tobias Gruetzmacher +# Copyright (C) 2015-2019 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function @@ -28,6 +28,18 @@ def regexNamer(regex, use_page_url=False): return _namer +def joinPathPartsNamer(pageurlparts, imageurlparts=(-1,), joinchar='_'): + """Get name by mashing path parts together with underscores.""" + def _namer(self, imageurl, pageurl): + # Split and drop host name + pageurlsplit = pageurl.split('/')[3:] + imageurlsplit = imageurl.split('/')[3:] + joinparts = ([pageurlsplit[i] for i in pageurlparts] + + [imageurlsplit[i] for i in imageurlparts]) + return joinchar.join(joinparts) + return _namer + + def bounceStarter(self): """Get start URL by "bouncing" back and forth one time. diff --git a/dosagelib/plugins/f.py b/dosagelib/plugins/f.py index 0b3acaf94..4a7ec7e7f 100644 --- a/dosagelib/plugins/f.py +++ b/dosagelib/plugins/f.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2017 Tobias Gruetzmacher +# Copyright (C) 2015-2019 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function from re import compile, escape, IGNORECASE from ..util import tagre from ..scraper import _BasicScraper, _ParserScraper -from ..helpers import indirectStarter, xpath_class +from ..helpers import indirectStarter, joinPathPartsNamer, xpath_class from .common import _WPNaviIn, _WordPressScraper @@ -135,7 +135,7 @@ class FredoAndPidjin(_ParserScraper): prevSearch = '//span[%s]/a' % xpath_class("prev") latestSearch = '//section[%s]//a' % xpath_class("latest") starter = indirectStarter - + namer = joinPathPartsNamer((0, 1, 2)) class Freefall(_BasicScraper): url = 'http://freefall.purrsia.com/default.htm' diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py index b6f40c748..13f2c5200 100644 --- a/dosagelib/plugins/s.py +++ b/dosagelib/plugins/s.py @@ -10,7 +10,7 @@ from os.path import splitext import datetime from ..scraper import _BasicScraper, _ParserScraper -from ..helpers import indirectStarter, bounceStarter, xpath_class +from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer, xpath_class from ..util import tagre from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, WP_LATEST_SEARCH @@ -162,11 +162,7 @@ class SexyLosers(_ParserScraper): latestSearch = '//a[@rel="bookmark"]' help = 'Index format: nnn' starter = indirectStarter - - def namer(self, image_url, page_url): - index = page_url.rsplit('/', 2)[1] - title = image_url.rsplit('/', 1)[1] - return index + '-' + title + namer = joinPathPartsNamer((-2,), (-1,), '-') class Sharksplode(_WordPressScraper): diff --git a/dosagelib/plugins/z.py b/dosagelib/plugins/z.py index 8cb8256be..e01906175 100644 --- a/dosagelib/plugins/z.py +++ b/dosagelib/plugins/z.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2017 Tobias Gruetzmacher +# Copyright (C) 2015-2019 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function @@ -9,7 +9,7 @@ from re import compile, escape from ..scraper import _BasicScraper, _ParserScraper from ..util import tagre -from ..helpers import bounceStarter, xpath_class +from ..helpers import bounceStarter, joinPathPartsNamer, xpath_class from .common import _WPNavi @@ -26,10 +26,7 @@ class Zapiro(_ParserScraper): imageSearch = '//div[@id="cartoon"]/img' prevSearch = '//a[%s]' % xpath_class('left') nextSearch = '//a[%s]' % xpath_class('right') - - def namer(self, image_url, page_url): - parts = page_url.rsplit('/', 1) - return parts[1] + namer = joinPathPartsNamer((-1,), ()) class ZenPencils(_WPNavi): @@ -65,6 +62,7 @@ class Zwarwald(_BasicScraper): tagre("img", "src", r'http://zwarwald\.de/images/prev\.jpg', quote="'")) + namer = joinPathPartsNamer((), (-3, -2, -1)) help = 'Index format: number' def shouldSkipUrl(self, url, data): @@ -77,7 +75,3 @@ class Zwarwald(_BasicScraper): self.stripUrl % "368", self.stripUrl % '495', ) - - def namer(self, image_url, page_url): - prefix, year, month, name = image_url.rsplit('/', 3) - return "%s_%s_%s" % (year, month, name)