Add new namer "joinPathPartsNamer"

Additionally, switch some comics which benefit from it to the new namer.
This fixes #127.
This commit is contained in:
Tobias Gruetzmacher 2019-06-30 20:52:15 +02:00
parent a7b6393d6f
commit 328b3cd072
4 changed files with 22 additions and 20 deletions

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2017 Tobias Gruetzmacher
# Copyright (C) 2015-2019 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
@ -28,6 +28,18 @@ def regexNamer(regex, use_page_url=False):
return _namer
def joinPathPartsNamer(pageurlparts, imageurlparts=(-1,), joinchar='_'):
"""Get name by mashing path parts together with underscores."""
def _namer(self, imageurl, pageurl):
# Split and drop host name
pageurlsplit = pageurl.split('/')[3:]
imageurlsplit = imageurl.split('/')[3:]
joinparts = ([pageurlsplit[i] for i in pageurlparts] +
[imageurlsplit[i] for i in imageurlparts])
return joinchar.join(joinparts)
return _namer
def bounceStarter(self):
"""Get start URL by "bouncing" back and forth one time.

View file

@ -1,14 +1,14 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2017 Tobias Gruetzmacher
# Copyright (C) 2015-2019 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile, escape, IGNORECASE
from ..util import tagre
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter, xpath_class
from ..helpers import indirectStarter, joinPathPartsNamer, xpath_class
from .common import _WPNaviIn, _WordPressScraper
@ -135,7 +135,7 @@ class FredoAndPidjin(_ParserScraper):
prevSearch = '//span[%s]/a' % xpath_class("prev")
latestSearch = '//section[%s]//a' % xpath_class("latest")
starter = indirectStarter
namer = joinPathPartsNamer((0, 1, 2))
class Freefall(_BasicScraper):
url = 'http://freefall.purrsia.com/default.htm'

View file

@ -10,7 +10,7 @@ from os.path import splitext
import datetime
from ..scraper import _BasicScraper, _ParserScraper
from ..helpers import indirectStarter, bounceStarter, xpath_class
from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer, xpath_class
from ..util import tagre
from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, WP_LATEST_SEARCH
@ -162,11 +162,7 @@ class SexyLosers(_ParserScraper):
latestSearch = '//a[@rel="bookmark"]'
help = 'Index format: nnn'
starter = indirectStarter
def namer(self, image_url, page_url):
index = page_url.rsplit('/', 2)[1]
title = image_url.rsplit('/', 1)[1]
return index + '-' + title
namer = joinPathPartsNamer((-2,), (-1,), '-')
class Sharksplode(_WordPressScraper):

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2017 Tobias Gruetzmacher
# Copyright (C) 2015-2019 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
@ -9,7 +9,7 @@ from re import compile, escape
from ..scraper import _BasicScraper, _ParserScraper
from ..util import tagre
from ..helpers import bounceStarter, xpath_class
from ..helpers import bounceStarter, joinPathPartsNamer, xpath_class
from .common import _WPNavi
@ -26,10 +26,7 @@ class Zapiro(_ParserScraper):
imageSearch = '//div[@id="cartoon"]/img'
prevSearch = '//a[%s]' % xpath_class('left')
nextSearch = '//a[%s]' % xpath_class('right')
def namer(self, image_url, page_url):
parts = page_url.rsplit('/', 1)
return parts[1]
namer = joinPathPartsNamer((-1,), ())
class ZenPencils(_WPNavi):
@ -65,6 +62,7 @@ class Zwarwald(_BasicScraper):
tagre("img", "src",
r'http://zwarwald\.de/images/prev\.jpg',
quote="'"))
namer = joinPathPartsNamer((), (-3, -2, -1))
help = 'Index format: number'
def shouldSkipUrl(self, url, data):
@ -77,7 +75,3 @@ class Zwarwald(_BasicScraper):
self.stripUrl % "368",
self.stripUrl % '495',
)
def namer(self, image_url, page_url):
prefix, year, month, name = image_url.rsplit('/', 3)
return "%s_%s_%s" % (year, month, name)