Refactor: url modifiers to normal methods.
As before, to implement #42 these might want to access information from the instance, so they should be normal methods.
This commit is contained in:
parent
c3f32dfef7
commit
0d436b8ca9
4 changed files with 18 additions and 20 deletions
|
@ -1,6 +1,9 @@
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
from re import compile
|
from re import compile
|
||||||
from ..scraper import make_scraper
|
from ..scraper import make_scraper
|
||||||
|
@ -21,10 +24,9 @@ def add(name, url):
|
||||||
else:
|
else:
|
||||||
stripUrl = url + 'd/%s.html'
|
stripUrl = url + 'd/%s.html'
|
||||||
|
|
||||||
@classmethod
|
def _prevUrlModifier(self, prev_url):
|
||||||
def _prevUrlModifier(cls, prevUrl):
|
if prev_url:
|
||||||
if prevUrl:
|
return prev_url.replace("keenspace.com", "comicgenesis.com"
|
||||||
return prevUrl.replace("keenspace.com", "comicgenesis.com"
|
|
||||||
).replace("keenspot.com", "comicgenesis.com"
|
).replace("keenspot.com", "comicgenesis.com"
|
||||||
).replace("toonspace.com", "comicgenesis.com"
|
).replace("toonspace.com", "comicgenesis.com"
|
||||||
).replace("comicgen.com", "comicgenesis.com")
|
).replace("comicgen.com", "comicgenesis.com")
|
||||||
|
|
|
@ -89,16 +89,15 @@ class PennyArcade(_BasicScraper):
|
||||||
starter = bounceStarter
|
starter = bounceStarter
|
||||||
help = 'Index format: yyyy/mm/dd/'
|
help = 'Index format: yyyy/mm/dd/'
|
||||||
|
|
||||||
@classmethod
|
def prevUrlModifier(self, prev_url):
|
||||||
def prevUrlModifier(cls, prevUrl):
|
if prev_url:
|
||||||
if prevUrl:
|
dummy, yyyy, mm, dd = prev_url.rsplit('/', 3)
|
||||||
dummy, yyyy, mm, dd = prevUrl.rsplit('/', 3)
|
|
||||||
try:
|
try:
|
||||||
int(dd)
|
int(dd)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
# URL has form yyyy/mm/dd/stripname
|
# URL has form yyyy/mm/dd/stripname
|
||||||
prevUrl = "%s/%s/%s" % (dummy, yyyy, mm)
|
prev_url = "%s/%s/%s" % (dummy, yyyy, mm)
|
||||||
return prevUrl
|
return prev_url
|
||||||
|
|
||||||
def namer(self, image_url, page_url):
|
def namer(self, image_url, page_url):
|
||||||
p = page_url.split('/')
|
p = page_url.split('/')
|
||||||
|
|
|
@ -31,8 +31,7 @@ class Xkcd(_BasicScraper):
|
||||||
name = image_url.rsplit('/', 1)[-1].split('.')[0]
|
name = image_url.rsplit('/', 1)[-1].split('.')[0]
|
||||||
return '%03d-%s' % (index, name)
|
return '%03d-%s' % (index, name)
|
||||||
|
|
||||||
@classmethod
|
def imageUrlModifier(self, url, data):
|
||||||
def imageUrlModifier(cls, url, data):
|
|
||||||
if url and '/large/' in data:
|
if url and '/large/' in data:
|
||||||
return url.replace(".png", "_large.png")
|
return url.replace(".png", "_large.png")
|
||||||
return url
|
return url
|
||||||
|
|
|
@ -241,21 +241,19 @@ class Scraper(object):
|
||||||
"""Return filename for given image and page URL."""
|
"""Return filename for given image and page URL."""
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@classmethod
|
def prevUrlModifier(self, prev_url):
|
||||||
def prevUrlModifier(cls, prevUrl):
|
|
||||||
"""Optional modification of parsed previous URLs. Useful if
|
"""Optional modification of parsed previous URLs. Useful if
|
||||||
there are domain redirects. The default implementation does
|
there are domain redirects. The default implementation does
|
||||||
not modify the URL.
|
not modify the URL.
|
||||||
"""
|
"""
|
||||||
return prevUrl
|
return prev_url
|
||||||
|
|
||||||
@classmethod
|
def imageUrlModifier(self, image_url, data):
|
||||||
def imageUrlModifier(cls, imageUrl, data):
|
|
||||||
"""Optional modification of parsed image URLs. Useful if the URL
|
"""Optional modification of parsed image URLs. Useful if the URL
|
||||||
needs to be fixed before usage. The default implementation does
|
needs to be fixed before usage. The default implementation does
|
||||||
not modify the URL. The given data is the URL page data.
|
not modify the URL. The given data is the URL page data.
|
||||||
"""
|
"""
|
||||||
return imageUrl
|
return image_url
|
||||||
|
|
||||||
def vote(self):
|
def vote(self):
|
||||||
"""Cast a public vote for this comic."""
|
"""Cast a public vote for this comic."""
|
||||||
|
@ -469,7 +467,7 @@ class _ParserScraper(Scraper):
|
||||||
if attrib in match.attrib:
|
if attrib in match.attrib:
|
||||||
searchUrl = match.get(attrib)
|
searchUrl = match.get(attrib)
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
searchUrls = str(match)
|
searchUrl = str(match)
|
||||||
out.debug(u'Matched URL %r with pattern %s' %
|
out.debug(u'Matched URL %r with pattern %s' %
|
||||||
(searchUrl, search))
|
(searchUrl, search))
|
||||||
searchUrls.append(searchUrl)
|
searchUrls.append(searchUrl)
|
||||||
|
|
Loading…
Reference in a new issue