Add new namer "joinPathPartsNamer"
Additionally, switch some comics which benefit from it to the new namer. This fixes #127.
This commit is contained in:
parent
a7b6393d6f
commit
328b3cd072
4 changed files with 22 additions and 20 deletions
|
@ -1,7 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
|
@ -28,6 +28,18 @@ def regexNamer(regex, use_page_url=False):
|
|||
return _namer
|
||||
|
||||
|
||||
def joinPathPartsNamer(pageurlparts, imageurlparts=(-1,), joinchar='_'):
|
||||
"""Get name by mashing path parts together with underscores."""
|
||||
def _namer(self, imageurl, pageurl):
|
||||
# Split and drop host name
|
||||
pageurlsplit = pageurl.split('/')[3:]
|
||||
imageurlsplit = imageurl.split('/')[3:]
|
||||
joinparts = ([pageurlsplit[i] for i in pageurlparts] +
|
||||
[imageurlsplit[i] for i in imageurlparts])
|
||||
return joinchar.join(joinparts)
|
||||
return _namer
|
||||
|
||||
|
||||
def bounceStarter(self):
|
||||
"""Get start URL by "bouncing" back and forth one time.
|
||||
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
from re import compile, escape, IGNORECASE
|
||||
|
||||
from ..util import tagre
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter, xpath_class
|
||||
from ..helpers import indirectStarter, joinPathPartsNamer, xpath_class
|
||||
from .common import _WPNaviIn, _WordPressScraper
|
||||
|
||||
|
||||
|
@ -135,7 +135,7 @@ class FredoAndPidjin(_ParserScraper):
|
|||
prevSearch = '//span[%s]/a' % xpath_class("prev")
|
||||
latestSearch = '//section[%s]//a' % xpath_class("latest")
|
||||
starter = indirectStarter
|
||||
|
||||
namer = joinPathPartsNamer((0, 1, 2))
|
||||
|
||||
class Freefall(_BasicScraper):
|
||||
url = 'http://freefall.purrsia.com/default.htm'
|
||||
|
|
|
@ -10,7 +10,7 @@ from os.path import splitext
|
|||
import datetime
|
||||
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..helpers import indirectStarter, bounceStarter, xpath_class
|
||||
from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer, xpath_class
|
||||
from ..util import tagre
|
||||
from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, WP_LATEST_SEARCH
|
||||
|
||||
|
@ -162,11 +162,7 @@ class SexyLosers(_ParserScraper):
|
|||
latestSearch = '//a[@rel="bookmark"]'
|
||||
help = 'Index format: nnn'
|
||||
starter = indirectStarter
|
||||
|
||||
def namer(self, image_url, page_url):
|
||||
index = page_url.rsplit('/', 2)[1]
|
||||
title = image_url.rsplit('/', 1)[1]
|
||||
return index + '-' + title
|
||||
namer = joinPathPartsNamer((-2,), (-1,), '-')
|
||||
|
||||
|
||||
class Sharksplode(_WordPressScraper):
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
||||
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
|
@ -9,7 +9,7 @@ from re import compile, escape
|
|||
|
||||
from ..scraper import _BasicScraper, _ParserScraper
|
||||
from ..util import tagre
|
||||
from ..helpers import bounceStarter, xpath_class
|
||||
from ..helpers import bounceStarter, joinPathPartsNamer, xpath_class
|
||||
from .common import _WPNavi
|
||||
|
||||
|
||||
|
@ -26,10 +26,7 @@ class Zapiro(_ParserScraper):
|
|||
imageSearch = '//div[@id="cartoon"]/img'
|
||||
prevSearch = '//a[%s]' % xpath_class('left')
|
||||
nextSearch = '//a[%s]' % xpath_class('right')
|
||||
|
||||
def namer(self, image_url, page_url):
|
||||
parts = page_url.rsplit('/', 1)
|
||||
return parts[1]
|
||||
namer = joinPathPartsNamer((-1,), ())
|
||||
|
||||
|
||||
class ZenPencils(_WPNavi):
|
||||
|
@ -65,6 +62,7 @@ class Zwarwald(_BasicScraper):
|
|||
tagre("img", "src",
|
||||
r'http://zwarwald\.de/images/prev\.jpg',
|
||||
quote="'"))
|
||||
namer = joinPathPartsNamer((), (-3, -2, -1))
|
||||
help = 'Index format: number'
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
|
@ -77,7 +75,3 @@ class Zwarwald(_BasicScraper):
|
|||
self.stripUrl % "368",
|
||||
self.stripUrl % '495',
|
||||
)
|
||||
|
||||
def namer(self, image_url, page_url):
|
||||
prefix, year, month, name = image_url.rsplit('/', 3)
|
||||
return "%s_%s_%s" % (year, month, name)
|
||||
|
|
Loading…
Reference in a new issue