Add new namer "joinPathPartsNamer"

Additionally, switch some comics which benefit from it to the new namer. This fixes #127.
2019-06-30 20:52:15 +02:00 · 2019-06-30 20:52:15 +02:00 · 328b3cd072
commit 328b3cd072
parent a7b6393d6f
4 changed files with 22 additions and 20 deletions
--- a/dosagelib/helpers.py
+++ b/dosagelib/helpers.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2017 Tobias Gruetzmacher
+# Copyright (C) 2015-2019 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -28,6 +28,18 @@ def regexNamer(regex, use_page_url=False):
    return _namer


+def joinPathPartsNamer(pageurlparts, imageurlparts=(-1,), joinchar='_'):
+    """Get name by mashing path parts together with underscores."""
+    def _namer(self, imageurl, pageurl):
+        # Split and drop host name
+        pageurlsplit = pageurl.split('/')[3:]
+        imageurlsplit = imageurl.split('/')[3:]
+        joinparts = ([pageurlsplit[i] for i in pageurlparts] +
+            [imageurlsplit[i] for i in imageurlparts])
+        return joinchar.join(joinparts)
+    return _namer
+
+
 def bounceStarter(self):
    """Get start URL by "bouncing" back and forth one time.

--- a/dosagelib/plugins/f.py
+++ b/dosagelib/plugins/f.py
@ -1,14 +1,14 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2017 Tobias Gruetzmacher
+# Copyright (C) 2015-2019 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function
 from re import compile, escape, IGNORECASE

 from ..util import tagre
 from ..scraper import _BasicScraper, _ParserScraper
-from ..helpers import indirectStarter, xpath_class
+from ..helpers import indirectStarter, joinPathPartsNamer, xpath_class
 from .common import _WPNaviIn, _WordPressScraper


@ -135,7 +135,7 @@ class FredoAndPidjin(_ParserScraper):
    prevSearch = '//span[%s]/a' % xpath_class("prev")
    latestSearch = '//section[%s]//a' % xpath_class("latest")
    starter = indirectStarter
-
+    namer = joinPathPartsNamer((0, 1, 2))

 class Freefall(_BasicScraper):
    url = 'http://freefall.purrsia.com/default.htm'
--- a/dosagelib/plugins/s.py
+++ b/dosagelib/plugins/s.py
@ -10,7 +10,7 @@ from os.path import splitext
 import datetime

 from ..scraper import _BasicScraper, _ParserScraper
-from ..helpers import indirectStarter, bounceStarter, xpath_class
+from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer, xpath_class
 from ..util import tagre
 from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, WP_LATEST_SEARCH

@ -162,11 +162,7 @@ class SexyLosers(_ParserScraper):
    latestSearch = '//a[@rel="bookmark"]'
    help = 'Index format: nnn'
    starter = indirectStarter
-
-    def namer(self, image_url, page_url):
-        index = page_url.rsplit('/', 2)[1]
-        title = image_url.rsplit('/', 1)[1]
-        return index + '-' + title
+    namer = joinPathPartsNamer((-2,), (-1,), '-')


 class Sharksplode(_WordPressScraper):
--- a/dosagelib/plugins/z.py
+++ b/dosagelib/plugins/z.py
@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2015-2017 Tobias Gruetzmacher
+# Copyright (C) 2015-2019 Tobias Gruetzmacher

 from __future__ import absolute_import, division, print_function

@ -9,7 +9,7 @@ from re import compile, escape

 from ..scraper import _BasicScraper, _ParserScraper
 from ..util import tagre
-from ..helpers import bounceStarter, xpath_class
+from ..helpers import bounceStarter, joinPathPartsNamer, xpath_class
 from .common import _WPNavi


@ -26,10 +26,7 @@ class Zapiro(_ParserScraper):
    imageSearch = '//div[@id="cartoon"]/img'
    prevSearch = '//a[%s]' % xpath_class('left')
    nextSearch = '//a[%s]' % xpath_class('right')
-
-    def namer(self, image_url, page_url):
-        parts = page_url.rsplit('/', 1)
-        return parts[1]
+    namer = joinPathPartsNamer((-1,), ())


 class ZenPencils(_WPNavi):
@ -65,6 +62,7 @@ class Zwarwald(_BasicScraper):
                         tagre("img", "src",
                               r'http://zwarwald\.de/images/prev\.jpg',
                               quote="'"))
+    namer = joinPathPartsNamer((), (-3, -2, -1))
    help = 'Index format: number'

    def shouldSkipUrl(self, url, data):
@ -77,7 +75,3 @@ class Zwarwald(_BasicScraper):
            self.stripUrl % "368",
            self.stripUrl % '495',
        )
-
-    def namer(self, image_url, page_url):
-        prefix, year, month, name = image_url.rsplit('/', 3)
-        return "%s_%s_%s" % (year, month, name)