Refactor: url modifiers to normal methods.
As before, to implement #42 these might want to access information from the instance, so they should be normal methods.
This commit is contained in:
parent
c3f32dfef7
commit
0d436b8ca9
4 changed files with 18 additions and 20 deletions
|
@ -1,6 +1,9 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
from re import compile
|
||||
from ..scraper import make_scraper
|
||||
|
@ -21,10 +24,9 @@ def add(name, url):
|
|||
else:
|
||||
stripUrl = url + 'd/%s.html'
|
||||
|
||||
@classmethod
|
||||
def _prevUrlModifier(cls, prevUrl):
|
||||
if prevUrl:
|
||||
return prevUrl.replace("keenspace.com", "comicgenesis.com"
|
||||
def _prevUrlModifier(self, prev_url):
|
||||
if prev_url:
|
||||
return prev_url.replace("keenspace.com", "comicgenesis.com"
|
||||
).replace("keenspot.com", "comicgenesis.com"
|
||||
).replace("toonspace.com", "comicgenesis.com"
|
||||
).replace("comicgen.com", "comicgenesis.com")
|
||||
|
|
|
@ -89,16 +89,15 @@ class PennyArcade(_BasicScraper):
|
|||
starter = bounceStarter
|
||||
help = 'Index format: yyyy/mm/dd/'
|
||||
|
||||
@classmethod
|
||||
def prevUrlModifier(cls, prevUrl):
|
||||
if prevUrl:
|
||||
dummy, yyyy, mm, dd = prevUrl.rsplit('/', 3)
|
||||
def prevUrlModifier(self, prev_url):
|
||||
if prev_url:
|
||||
dummy, yyyy, mm, dd = prev_url.rsplit('/', 3)
|
||||
try:
|
||||
int(dd)
|
||||
except ValueError:
|
||||
# URL has form yyyy/mm/dd/stripname
|
||||
prevUrl = "%s/%s/%s" % (dummy, yyyy, mm)
|
||||
return prevUrl
|
||||
prev_url = "%s/%s/%s" % (dummy, yyyy, mm)
|
||||
return prev_url
|
||||
|
||||
def namer(self, image_url, page_url):
|
||||
p = page_url.split('/')
|
||||
|
|
|
@ -31,8 +31,7 @@ class Xkcd(_BasicScraper):
|
|||
name = image_url.rsplit('/', 1)[-1].split('.')[0]
|
||||
return '%03d-%s' % (index, name)
|
||||
|
||||
@classmethod
|
||||
def imageUrlModifier(cls, url, data):
|
||||
def imageUrlModifier(self, url, data):
|
||||
if url and '/large/' in data:
|
||||
return url.replace(".png", "_large.png")
|
||||
return url
|
||||
|
|
|
@ -241,21 +241,19 @@ class Scraper(object):
|
|||
"""Return filename for given image and page URL."""
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def prevUrlModifier(cls, prevUrl):
|
||||
def prevUrlModifier(self, prev_url):
|
||||
"""Optional modification of parsed previous URLs. Useful if
|
||||
there are domain redirects. The default implementation does
|
||||
not modify the URL.
|
||||
"""
|
||||
return prevUrl
|
||||
return prev_url
|
||||
|
||||
@classmethod
|
||||
def imageUrlModifier(cls, imageUrl, data):
|
||||
def imageUrlModifier(self, image_url, data):
|
||||
"""Optional modification of parsed image URLs. Useful if the URL
|
||||
needs to be fixed before usage. The default implementation does
|
||||
not modify the URL. The given data is the URL page data.
|
||||
"""
|
||||
return imageUrl
|
||||
return image_url
|
||||
|
||||
def vote(self):
|
||||
"""Cast a public vote for this comic."""
|
||||
|
@ -469,7 +467,7 @@ class _ParserScraper(Scraper):
|
|||
if attrib in match.attrib:
|
||||
searchUrl = match.get(attrib)
|
||||
except AttributeError:
|
||||
searchUrls = str(match)
|
||||
searchUrl = str(match)
|
||||
out.debug(u'Matched URL %r with pattern %s' %
|
||||
(searchUrl, search))
|
||||
searchUrls.append(searchUrl)
|
||||
|
|
Loading…
Reference in a new issue