Refactor: url modifiers to normal methods.

As before, to implement #42 these might want to access information from
the instance, so they should be normal methods.
This commit is contained in:
Tobias Gruetzmacher 2016-04-21 21:28:41 +02:00
parent c3f32dfef7
commit 0d436b8ca9
4 changed files with 18 additions and 20 deletions

View file

@ -1,6 +1,9 @@
# -*- coding: iso-8859-1 -*-
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
from re import compile
from ..scraper import make_scraper
@ -21,10 +24,9 @@ def add(name, url):
else:
stripUrl = url + 'd/%s.html'
@classmethod
def _prevUrlModifier(cls, prevUrl):
if prevUrl:
return prevUrl.replace("keenspace.com", "comicgenesis.com"
def _prevUrlModifier(self, prev_url):
if prev_url:
return prev_url.replace("keenspace.com", "comicgenesis.com"
).replace("keenspot.com", "comicgenesis.com"
).replace("toonspace.com", "comicgenesis.com"
).replace("comicgen.com", "comicgenesis.com")

View file

@ -89,16 +89,15 @@ class PennyArcade(_BasicScraper):
starter = bounceStarter
help = 'Index format: yyyy/mm/dd/'
@classmethod
def prevUrlModifier(cls, prevUrl):
if prevUrl:
dummy, yyyy, mm, dd = prevUrl.rsplit('/', 3)
def prevUrlModifier(self, prev_url):
if prev_url:
dummy, yyyy, mm, dd = prev_url.rsplit('/', 3)
try:
int(dd)
except ValueError:
# URL has form yyyy/mm/dd/stripname
prevUrl = "%s/%s/%s" % (dummy, yyyy, mm)
return prevUrl
prev_url = "%s/%s/%s" % (dummy, yyyy, mm)
return prev_url
def namer(self, image_url, page_url):
p = page_url.split('/')

View file

@ -31,8 +31,7 @@ class Xkcd(_BasicScraper):
name = image_url.rsplit('/', 1)[-1].split('.')[0]
return '%03d-%s' % (index, name)
@classmethod
def imageUrlModifier(cls, url, data):
def imageUrlModifier(self, url, data):
if url and '/large/' in data:
return url.replace(".png", "_large.png")
return url

View file

@ -241,21 +241,19 @@ class Scraper(object):
"""Return filename for given image and page URL."""
return None
@classmethod
def prevUrlModifier(cls, prevUrl):
def prevUrlModifier(self, prev_url):
"""Optional modification of parsed previous URLs. Useful if
there are domain redirects. The default implementation does
not modify the URL.
"""
return prevUrl
return prev_url
@classmethod
def imageUrlModifier(cls, imageUrl, data):
def imageUrlModifier(self, image_url, data):
"""Optional modification of parsed image URLs. Useful if the URL
needs to be fixed before usage. The default implementation does
not modify the URL. The given data is the URL page data.
"""
return imageUrl
return image_url
def vote(self):
"""Cast a public vote for this comic."""
@ -469,7 +467,7 @@ class _ParserScraper(Scraper):
if attrib in match.attrib:
searchUrl = match.get(attrib)
except AttributeError:
searchUrls = str(match)
searchUrl = str(match)
out.debug(u'Matched URL %r with pattern %s' %
(searchUrl, search))
searchUrls.append(searchUrl)