From 0d436b8ca98de841f4e8fc3447af4b2aa16eea79 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Thu, 21 Apr 2016 21:28:41 +0200 Subject: [PATCH] Refactor: url modifiers to normal methods. As before, to implement #42 these might want to access information from the instance, so they should be normal methods. --- dosagelib/plugins/comicgenesis.py | 12 +++++++----- dosagelib/plugins/p.py | 11 +++++------ dosagelib/plugins/x.py | 3 +-- dosagelib/scraper.py | 12 +++++------- 4 files changed, 18 insertions(+), 20 deletions(-) diff --git a/dosagelib/plugins/comicgenesis.py b/dosagelib/plugins/comicgenesis.py index 3e195e047..87d821531 100644 --- a/dosagelib/plugins/comicgenesis.py +++ b/dosagelib/plugins/comicgenesis.py @@ -1,6 +1,9 @@ -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam +# Copyright (C) 2015-2016 Tobias Gruetzmacher + +from __future__ import absolute_import, division, print_function from re import compile from ..scraper import make_scraper @@ -21,10 +24,9 @@ def add(name, url): else: stripUrl = url + 'd/%s.html' - @classmethod - def _prevUrlModifier(cls, prevUrl): - if prevUrl: - return prevUrl.replace("keenspace.com", "comicgenesis.com" + def _prevUrlModifier(self, prev_url): + if prev_url: + return prev_url.replace("keenspace.com", "comicgenesis.com" ).replace("keenspot.com", "comicgenesis.com" ).replace("toonspace.com", "comicgenesis.com" ).replace("comicgen.com", "comicgenesis.com") diff --git a/dosagelib/plugins/p.py b/dosagelib/plugins/p.py index b3f749ef6..c87271025 100644 --- a/dosagelib/plugins/p.py +++ b/dosagelib/plugins/p.py @@ -89,16 +89,15 @@ class PennyArcade(_BasicScraper): starter = bounceStarter help = 'Index format: yyyy/mm/dd/' - @classmethod - def prevUrlModifier(cls, prevUrl): - if prevUrl: - dummy, yyyy, mm, dd = prevUrl.rsplit('/', 3) + def prevUrlModifier(self, prev_url): + if prev_url: + dummy, yyyy, mm, dd = prev_url.rsplit('/', 3) try: int(dd) except ValueError: # URL has form yyyy/mm/dd/stripname - prevUrl = "%s/%s/%s" % (dummy, yyyy, mm) - return prevUrl + prev_url = "%s/%s/%s" % (dummy, yyyy, mm) + return prev_url def namer(self, image_url, page_url): p = page_url.split('/') diff --git a/dosagelib/plugins/x.py b/dosagelib/plugins/x.py index c7c58daef..162a75a83 100644 --- a/dosagelib/plugins/x.py +++ b/dosagelib/plugins/x.py @@ -31,8 +31,7 @@ class Xkcd(_BasicScraper): name = image_url.rsplit('/', 1)[-1].split('.')[0] return '%03d-%s' % (index, name) - @classmethod - def imageUrlModifier(cls, url, data): + def imageUrlModifier(self, url, data): if url and '/large/' in data: return url.replace(".png", "_large.png") return url diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index 48966413a..200ea460a 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -241,21 +241,19 @@ class Scraper(object): """Return filename for given image and page URL.""" return None - @classmethod - def prevUrlModifier(cls, prevUrl): + def prevUrlModifier(self, prev_url): """Optional modification of parsed previous URLs. Useful if there are domain redirects. The default implementation does not modify the URL. """ - return prevUrl + return prev_url - @classmethod - def imageUrlModifier(cls, imageUrl, data): + def imageUrlModifier(self, image_url, data): """Optional modification of parsed image URLs. Useful if the URL needs to be fixed before usage. The default implementation does not modify the URL. The given data is the URL page data. """ - return imageUrl + return image_url def vote(self): """Cast a public vote for this comic.""" @@ -469,7 +467,7 @@ class _ParserScraper(Scraper): if attrib in match.attrib: searchUrl = match.get(attrib) except AttributeError: - searchUrls = str(match) + searchUrl = str(match) out.debug(u'Matched URL %r with pattern %s' % (searchUrl, search)) searchUrls.append(searchUrl)