Add new namer "joinPathPartsNamer"
Additionally, switch some comics which benefit from it to the new namer. This fixes #127.
This commit is contained in:
parent
a7b6393d6f
commit
328b3cd072
4 changed files with 22 additions and 20 deletions
|
@ -1,7 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
||||||
|
|
||||||
from __future__ import absolute_import, division, print_function
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
|
@ -28,6 +28,18 @@ def regexNamer(regex, use_page_url=False):
|
||||||
return _namer
|
return _namer
|
||||||
|
|
||||||
|
|
||||||
|
def joinPathPartsNamer(pageurlparts, imageurlparts=(-1,), joinchar='_'):
|
||||||
|
"""Get name by mashing path parts together with underscores."""
|
||||||
|
def _namer(self, imageurl, pageurl):
|
||||||
|
# Split and drop host name
|
||||||
|
pageurlsplit = pageurl.split('/')[3:]
|
||||||
|
imageurlsplit = imageurl.split('/')[3:]
|
||||||
|
joinparts = ([pageurlsplit[i] for i in pageurlparts] +
|
||||||
|
[imageurlsplit[i] for i in imageurlparts])
|
||||||
|
return joinchar.join(joinparts)
|
||||||
|
return _namer
|
||||||
|
|
||||||
|
|
||||||
def bounceStarter(self):
|
def bounceStarter(self):
|
||||||
"""Get start URL by "bouncing" back and forth one time.
|
"""Get start URL by "bouncing" back and forth one time.
|
||||||
|
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
||||||
|
|
||||||
from __future__ import absolute_import, division, print_function
|
from __future__ import absolute_import, division, print_function
|
||||||
from re import compile, escape, IGNORECASE
|
from re import compile, escape, IGNORECASE
|
||||||
|
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..helpers import indirectStarter, xpath_class
|
from ..helpers import indirectStarter, joinPathPartsNamer, xpath_class
|
||||||
from .common import _WPNaviIn, _WordPressScraper
|
from .common import _WPNaviIn, _WordPressScraper
|
||||||
|
|
||||||
|
|
||||||
|
@ -135,7 +135,7 @@ class FredoAndPidjin(_ParserScraper):
|
||||||
prevSearch = '//span[%s]/a' % xpath_class("prev")
|
prevSearch = '//span[%s]/a' % xpath_class("prev")
|
||||||
latestSearch = '//section[%s]//a' % xpath_class("latest")
|
latestSearch = '//section[%s]//a' % xpath_class("latest")
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
|
namer = joinPathPartsNamer((0, 1, 2))
|
||||||
|
|
||||||
class Freefall(_BasicScraper):
|
class Freefall(_BasicScraper):
|
||||||
url = 'http://freefall.purrsia.com/default.htm'
|
url = 'http://freefall.purrsia.com/default.htm'
|
||||||
|
|
|
@ -10,7 +10,7 @@ from os.path import splitext
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..helpers import indirectStarter, bounceStarter, xpath_class
|
from ..helpers import indirectStarter, bounceStarter, joinPathPartsNamer, xpath_class
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, WP_LATEST_SEARCH
|
from .common import _ComicControlScraper, _WordPressScraper, _WPNavi, WP_LATEST_SEARCH
|
||||||
|
|
||||||
|
@ -162,11 +162,7 @@ class SexyLosers(_ParserScraper):
|
||||||
latestSearch = '//a[@rel="bookmark"]'
|
latestSearch = '//a[@rel="bookmark"]'
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
|
namer = joinPathPartsNamer((-2,), (-1,), '-')
|
||||||
def namer(self, image_url, page_url):
|
|
||||||
index = page_url.rsplit('/', 2)[1]
|
|
||||||
title = image_url.rsplit('/', 1)[1]
|
|
||||||
return index + '-' + title
|
|
||||||
|
|
||||||
|
|
||||||
class Sharksplode(_WordPressScraper):
|
class Sharksplode(_WordPressScraper):
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2015-2017 Tobias Gruetzmacher
|
# Copyright (C) 2015-2019 Tobias Gruetzmacher
|
||||||
|
|
||||||
from __future__ import absolute_import, division, print_function
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ from re import compile, escape
|
||||||
|
|
||||||
from ..scraper import _BasicScraper, _ParserScraper
|
from ..scraper import _BasicScraper, _ParserScraper
|
||||||
from ..util import tagre
|
from ..util import tagre
|
||||||
from ..helpers import bounceStarter, xpath_class
|
from ..helpers import bounceStarter, joinPathPartsNamer, xpath_class
|
||||||
from .common import _WPNavi
|
from .common import _WPNavi
|
||||||
|
|
||||||
|
|
||||||
|
@ -26,10 +26,7 @@ class Zapiro(_ParserScraper):
|
||||||
imageSearch = '//div[@id="cartoon"]/img'
|
imageSearch = '//div[@id="cartoon"]/img'
|
||||||
prevSearch = '//a[%s]' % xpath_class('left')
|
prevSearch = '//a[%s]' % xpath_class('left')
|
||||||
nextSearch = '//a[%s]' % xpath_class('right')
|
nextSearch = '//a[%s]' % xpath_class('right')
|
||||||
|
namer = joinPathPartsNamer((-1,), ())
|
||||||
def namer(self, image_url, page_url):
|
|
||||||
parts = page_url.rsplit('/', 1)
|
|
||||||
return parts[1]
|
|
||||||
|
|
||||||
|
|
||||||
class ZenPencils(_WPNavi):
|
class ZenPencils(_WPNavi):
|
||||||
|
@ -65,6 +62,7 @@ class Zwarwald(_BasicScraper):
|
||||||
tagre("img", "src",
|
tagre("img", "src",
|
||||||
r'http://zwarwald\.de/images/prev\.jpg',
|
r'http://zwarwald\.de/images/prev\.jpg',
|
||||||
quote="'"))
|
quote="'"))
|
||||||
|
namer = joinPathPartsNamer((), (-3, -2, -1))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
|
@ -77,7 +75,3 @@ class Zwarwald(_BasicScraper):
|
||||||
self.stripUrl % "368",
|
self.stripUrl % "368",
|
||||||
self.stripUrl % '495',
|
self.stripUrl % '495',
|
||||||
)
|
)
|
||||||
|
|
||||||
def namer(self, image_url, page_url):
|
|
||||||
prefix, year, month, name = image_url.rsplit('/', 3)
|
|
||||||
return "%s_%s_%s" % (year, month, name)
|
|
||||||
|
|
Loading…
Reference in a new issue