Refactor: url modifiers to normal methods.

As before, to implement #42 these might want to access information from
the instance, so they should be normal methods.
This commit is contained in:
Tobias Gruetzmacher 2016-04-21 21:28:41 +02:00
parent c3f32dfef7
commit 0d436b8ca9
4 changed files with 18 additions and 20 deletions

View file

@ -1,6 +1,9 @@
# -*- coding: iso-8859-1 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam # Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile from re import compile
from ..scraper import make_scraper from ..scraper import make_scraper
@ -21,10 +24,9 @@ def add(name, url):
else: else:
stripUrl = url + 'd/%s.html' stripUrl = url + 'd/%s.html'
@classmethod def _prevUrlModifier(self, prev_url):
def _prevUrlModifier(cls, prevUrl): if prev_url:
if prevUrl: return prev_url.replace("keenspace.com", "comicgenesis.com"
return prevUrl.replace("keenspace.com", "comicgenesis.com"
).replace("keenspot.com", "comicgenesis.com" ).replace("keenspot.com", "comicgenesis.com"
).replace("toonspace.com", "comicgenesis.com" ).replace("toonspace.com", "comicgenesis.com"
).replace("comicgen.com", "comicgenesis.com") ).replace("comicgen.com", "comicgenesis.com")

View file

@ -89,16 +89,15 @@ class PennyArcade(_BasicScraper):
starter = bounceStarter starter = bounceStarter
help = 'Index format: yyyy/mm/dd/' help = 'Index format: yyyy/mm/dd/'
@classmethod def prevUrlModifier(self, prev_url):
def prevUrlModifier(cls, prevUrl): if prev_url:
if prevUrl: dummy, yyyy, mm, dd = prev_url.rsplit('/', 3)
dummy, yyyy, mm, dd = prevUrl.rsplit('/', 3)
try: try:
int(dd) int(dd)
except ValueError: except ValueError:
# URL has form yyyy/mm/dd/stripname # URL has form yyyy/mm/dd/stripname
prevUrl = "%s/%s/%s" % (dummy, yyyy, mm) prev_url = "%s/%s/%s" % (dummy, yyyy, mm)
return prevUrl return prev_url
def namer(self, image_url, page_url): def namer(self, image_url, page_url):
p = page_url.split('/') p = page_url.split('/')

View file

@ -31,8 +31,7 @@ class Xkcd(_BasicScraper):
name = image_url.rsplit('/', 1)[-1].split('.')[0] name = image_url.rsplit('/', 1)[-1].split('.')[0]
return '%03d-%s' % (index, name) return '%03d-%s' % (index, name)
@classmethod def imageUrlModifier(self, url, data):
def imageUrlModifier(cls, url, data):
if url and '/large/' in data: if url and '/large/' in data:
return url.replace(".png", "_large.png") return url.replace(".png", "_large.png")
return url return url

View file

@ -241,21 +241,19 @@ class Scraper(object):
"""Return filename for given image and page URL.""" """Return filename for given image and page URL."""
return None return None
@classmethod def prevUrlModifier(self, prev_url):
def prevUrlModifier(cls, prevUrl):
"""Optional modification of parsed previous URLs. Useful if """Optional modification of parsed previous URLs. Useful if
there are domain redirects. The default implementation does there are domain redirects. The default implementation does
not modify the URL. not modify the URL.
""" """
return prevUrl return prev_url
@classmethod def imageUrlModifier(self, image_url, data):
def imageUrlModifier(cls, imageUrl, data):
"""Optional modification of parsed image URLs. Useful if the URL """Optional modification of parsed image URLs. Useful if the URL
needs to be fixed before usage. The default implementation does needs to be fixed before usage. The default implementation does
not modify the URL. The given data is the URL page data. not modify the URL. The given data is the URL page data.
""" """
return imageUrl return image_url
def vote(self): def vote(self):
"""Cast a public vote for this comic.""" """Cast a public vote for this comic."""
@ -469,7 +467,7 @@ class _ParserScraper(Scraper):
if attrib in match.attrib: if attrib in match.attrib:
searchUrl = match.get(attrib) searchUrl = match.get(attrib)
except AttributeError: except AttributeError:
searchUrls = str(match) searchUrl = str(match)
out.debug(u'Matched URL %r with pattern %s' % out.debug(u'Matched URL %r with pattern %s' %
(searchUrl, search)) (searchUrl, search))
searchUrls.append(searchUrl) searchUrls.append(searchUrl)