Refactor: Make namer a method.

When #42 is realized, the naming of files might differ between comic
modules, so the namer's logical location is the instance, not the class.
This commit is contained in:
Tobias Gruetzmacher 2016-04-21 08:20:49 +02:00
parent 5bd2a49f48
commit c3f32dfef7
27 changed files with 100 additions and 142 deletions

View file

@ -8,22 +8,20 @@ from __future__ import absolute_import, division, print_function
from .util import getQueryParams from .util import getQueryParams
def queryNamer(paramName, usePageUrl=False): def queryNamer(param, use_page_url=False):
"""Get name from URL query part.""" """Get name from URL query part."""
@classmethod def _namer(self, image_url, page_url):
def _namer(cls, imageUrl, pageUrl):
"""Get URL query part.""" """Get URL query part."""
url = pageUrl if usePageUrl else imageUrl url = page_url if use_page_url else image_url
return getQueryParams(url)[paramName][0] return getQueryParams(url)[param][0]
return _namer return _namer
def regexNamer(regex, usePageUrl=False): def regexNamer(regex, use_page_url=False):
"""Get name from regular expression.""" """Get name from regular expression."""
@classmethod def _namer(self, image_url, page_url):
def _namer(cls, imageUrl, pageUrl):
"""Get first regular expression group.""" """Get first regular expression group."""
url = pageUrl if usePageUrl else imageUrl url = page_url if use_page_url else image_url
mo = regex.search(url) mo = regex.search(url)
if mo: if mo:
return mo.group(1) return mo.group(1)

View file

@ -28,8 +28,7 @@ class AbstruseGoose(_BasicScraper):
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
textSearch = compile(tagre("img", "title", r'([^"]+)')) textSearch = compile(tagre("img", "title", r'([^"]+)'))
@classmethod def namer(self, image_url, page_url):
def namer(cls, image_url, page_url):
index = int(page_url.rstrip('/').split('/')[-1]) index = int(page_url.rstrip('/').split('/')[-1])
name = image_url.split('/')[-1].split('.')[0] name = image_url.split('/')[-1].split('.')[0]
return 'c%03d-%s' % (index, name) return 'c%03d-%s' % (index, name)
@ -300,8 +299,7 @@ class AstronomyPOTD(_ParserScraper):
"""Skip pages without images.""" """Skip pages without images."""
return data.xpath('//iframe') # videos return data.xpath('//iframe') # videos
@classmethod def namer(self, image_url, page_url):
def namer(cls, image_url, page_url):
return '%s-%s' % (page_url.split('/')[-1].split('.')[0][2:], return '%s-%s' % (page_url.split('/')[-1].split('.')[0][2:],
image_url.split('/')[-1].split('.')[0]) image_url.split('/')[-1].split('.')[0])

View file

@ -76,11 +76,10 @@ class Beetlebum(_BasicScraper):
help = 'Index format: yyyy/mm/dd/striptitle' help = 'Index format: yyyy/mm/dd/striptitle'
lang = 'de' lang = 'de'
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): indexes = tuple(page_url.rstrip('/').split('/')[-4:])
indexes = tuple(pageUrl.rstrip('/').split('/')[-4:])
name = '%s-%s-%s-%s' % indexes name = '%s-%s-%s-%s' % indexes
name = name + '_' + imageUrl.split('/')[-1] name = name + '_' + image_url.split('/')[-1]
return name return name
@ -154,8 +153,7 @@ class BloomingFaeries(_ParserScraper):
imageSearch = '//div[@id="comic"]//img' imageSearch = '//div[@id="comic"]//img'
prevSearch = WP_PREV_SEARCH prevSearch = WP_PREV_SEARCH
@classmethod def namer(self, image_url, page_url):
def namer(cls, image_url, page_url):
return "_".join(image_url.rsplit('/', 3)[1:]) return "_".join(image_url.rsplit('/', 3)[1:])
@ -197,9 +195,8 @@ class BoyOnAStickAndSlither(_BasicScraper):
"<span>Next page") "<span>Next page")
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): return page_url.rsplit('/')[-1]
return pageUrl.rsplit('/')[-1]
class BratHalla(_WordPressScraper): class BratHalla(_WordPressScraper):

View file

@ -57,9 +57,8 @@ class Carciphona(_BasicScraper):
r'(view\.php\?page=[0-9]+[^"]*)')) r'(view\.php\?page=[0-9]+[^"]*)'))
starter = indirectStarter starter = indirectStarter
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): ip = image_url.split('/')
ip = imageUrl.split('/')
return "volume_%s_page_%s" % (ip[-2], ip[-1]) return "volume_%s_page_%s" % (ip[-2], ip[-1])
@ -262,9 +261,8 @@ class CorydonCafe(_ParserScraper):
latestSearch = '//ul//a' latestSearch = '//ul//a'
help = 'Index format: yyyy/stripname' help = 'Index format: yyyy/stripname'
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): return page_url.split('/')[-1].split('.')[0]
return pageUrl.split('/')[-1].split('.')[0]
class CourtingDisaster(_WordPressScraper): class CourtingDisaster(_WordPressScraper):
@ -349,10 +347,9 @@ class CyanideAndHappiness(_BasicScraper):
"""Skip pages without images.""" """Skip pages without images."""
return "/comics/play-button.png" in data[0] return "/comics/play-button.png" in data[0]
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): imgname = image_url.split('/')[-1]
imgname = imageUrl.split('/')[-1]
# only get the first 100 chars for the image name # only get the first 100 chars for the image name
imgname = imgname[:100] imgname = imgname[:100]
imgnum = pageUrl.split('/')[-2] imgnum = page_url.split('/')[-2]
return '%s_%s' % (imgnum, imgname) return '%s_%s' % (imgnum, imgname)

View file

@ -24,9 +24,8 @@ def add(name, shortName, imageFolder=None, lastStrip=None):
if imageFolder is None: if imageFolder is None:
imageFolder = shortName imageFolder = shortName
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): return '%03d' % int(getQueryParams(page_url)['page'][0])
return '%03d' % int(getQueryParams(pageUrl)['page'][0])
def _starter(self): def _starter(self):
# first, try hopping to previous and next comic # first, try hopping to previous and next comic

View file

@ -22,12 +22,11 @@ class _ComicFury(_ParserScraper):
help = 'Index format: n' help = 'Index format: n'
starter = bounceStarter starter = bounceStarter
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): parts = page_url.split('/')
parts = pageUrl.split('/') path, ext = os.path.splitext(image_url)
path, ext = os.path.splitext(imageUrl)
num = parts[-1] num = parts[-1]
return "%s_%s%s" % (cls.__name__[2:], num, ext) return "%s_%s%s" % (self.__class__.__name__[2:], num, ext)
@property @property
def url(self): def url(self):

View file

@ -34,10 +34,9 @@ class DamnLol(_BasicScraper):
help = 'Index format: stripname-number' help = 'Index format: stripname-number'
starter = bounceStarter starter = bounceStarter
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): ext = image_url.rsplit('.', 1)[1]
ext = imageUrl.rsplit('.', 1)[1] path = page_url.rsplit('/', 1)[1][:-5]
path = pageUrl.rsplit('/', 1)[1][:-5]
stripname, number = path.rsplit('-', 1) stripname, number = path.rsplit('-', 1)
return '%s-%s.%s' % (number, stripname, ext) return '%s-%s.%s' % (number, stripname, ext)
@ -136,10 +135,9 @@ class DieselSweeties(_BasicScraper):
tagre("img", "src", r'(?:http://www\.dieselsweeties\.com/ximages/blackbackarrow160.png|/ximages/prev\.gif)')) tagre("img", "src", r'(?:http://www\.dieselsweeties\.com/ximages/blackbackarrow160.png|/ximages/prev\.gif)'))
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): index = int(image_url.split('/')[-1].split('.')[0])
index = int(imageUrl.split('/')[-1].split('.')[0]) return 'sw%02d' % index
return 'sw%02d' % (index,)
class Dilbert(_BasicScraper): class Dilbert(_BasicScraper):
@ -154,9 +152,8 @@ class Dilbert(_BasicScraper):
after="Click to see")) after="Click to see"))
help = 'Index format: yyyy-mm-dd' help = 'Index format: yyyy-mm-dd'
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): name = page_url.rsplit("/", 1)[1]
name = pageUrl.rsplit("/", 1)[1]
return "%s" % name return "%s" % name

View file

@ -24,10 +24,9 @@ def add(name, path):
classname = '_DrunkDuck_%s' % name classname = '_DrunkDuck_%s' % name
_url = 'http://www.theduckwebcomics.com/%s/' % path _url = 'http://www.theduckwebcomics.com/%s/' % path
@classmethod def _namer(self, image_url, page_url):
def _namer(cls, imageUrl, pageUrl): index = int(page_url.rstrip('/').split('/')[-1])
index = int(pageUrl.rstrip('/').split('/')[-1]) ext = image_url.rsplit('.')[-1]
ext = imageUrl.rsplit('.')[-1]
return '%d.%s' % (index, ext) return '%d.%s' % (index, ext)
def _starter(self): def _starter(self):

View file

@ -26,13 +26,12 @@ class EarthsongSaga(_ParserScraper):
return [x.replace('earthsongsaga.com/../', return [x.replace('earthsongsaga.com/../',
'earthsongsaga.com/') for x in urls] 'earthsongsaga.com/') for x in urls]
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl):
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/(\d+)\.\w+$', imgmatch = compile(r'images/vol(\d+)/ch(\d+)/(\d+)\.\w+$',
IGNORECASE).search(imageUrl) IGNORECASE).search(image_url)
if not imgmatch: if not imgmatch:
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/ch(\d+)cover\.\w+$', imgmatch = compile(r'images/vol(\d+)/ch(\d+)/ch(\d+)cover\.\w+$',
IGNORECASE).search(imageUrl) IGNORECASE).search(image_url)
suffix = "cover" suffix = "cover"
else: else:
suffix = "" suffix = ""

View file

@ -146,9 +146,8 @@ class GoneWithTheBlastwave(_BasicScraper):
r'<img src="images/page/default/latest') r'<img src="images/page/default/latest')
help = 'Index format: n' help = 'Index format: n'
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): return '%02d' % int(compile(r'nro=(\d+)').search(page_url).group(1))
return '%02d' % int(compile(r'nro=(\d+)').search(pageUrl).group(1))
class GrrlPower(_WordPressScraper): class GrrlPower(_WordPressScraper):

View file

@ -26,10 +26,9 @@ class _GoComics(_ParserScraper):
def url(self): def url(self):
return 'http://www.gocomics.com/' + self.path return 'http://www.gocomics.com/' + self.path
@classmethod def namer(self, image_url, page_url):
def namer(cls, image_url, page_url):
prefix, year, month, day = page_url.rsplit('/', 3) prefix, year, month, day = page_url.rsplit('/', 3)
return "%s_%s%s%s.gif" % (cls.__name__[2:], year, month, day) return "%s_%s%s%s.gif" % (self.__class__.__name__[2:], year, month, day)
def getIndexStripUrl(self, index): def getIndexStripUrl(self, index):
return self.url + self.path + '/%s' % index return self.url + self.path + '/%s' % index

View file

@ -51,10 +51,9 @@ class HarkAVagrant(_BasicScraper):
tagre("img", "src", "buttonnext.png")) tagre("img", "src", "buttonnext.png"))
help = 'Index format: number' help = 'Index format: number'
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): filename = image_url.rsplit('/', 1)[1]
filename = imageUrl.rsplit('/', 1)[1] num = page_url.rsplit('=', 1)[1]
num = pageUrl.rsplit('=', 1)[1]
return '%s-%s' % (num, filename) return '%s-%s' % (num, filename)

View file

@ -26,11 +26,10 @@ class Lackadaisy(_BasicScraper):
help = 'Index format: n' help = 'Index format: n'
starter = bounceStarter starter = bounceStarter
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl):
"""Use comic id for filename.""" """Use comic id for filename."""
num = pageUrl.rsplit('=', 1)[-1] num = page_url.rsplit('=', 1)[-1]
ext = imageUrl.rsplit('.', 1)[-1] ext = image_url.rsplit('.', 1)[-1]
return 'lackadaisy_%s.%s' % (num, ext) return 'lackadaisy_%s.%s' % (num, ext)

View file

@ -82,9 +82,8 @@ class MarriedToTheSea(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "&lt;&lt; Yesterday") prevSearch = compile(tagre("a", "href", r'([^"]+)') + "&lt;&lt; Yesterday")
help = 'Index format: mmddyy' help = 'Index format: mmddyy'
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): unused, date, filename = image_url.rsplit('/', 2)
unused, date, filename = imageUrl.rsplit('/', 2)
return '%s-%s' % (date, filename) return '%s-%s' % (date, filename)

View file

@ -38,9 +38,8 @@ class NatalieDee(_BasicScraper):
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "&lt;&lt; Yesterday") prevSearch = compile(tagre("a", "href", r'([^"]+)') + "&lt;&lt; Yesterday")
help = 'Index format: mmddyy' help = 'Index format: mmddyy'
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): unused, date, filename = image_url.rsplit('/', 2)
unused, date, filename = imageUrl.rsplit('/', 2)
return '%s-%s' % (date, filename) return '%s-%s' % (date, filename)

View file

@ -74,13 +74,12 @@ class OnTheFastrack(_BasicScraper):
url = 'http://onthefastrack.com/' url = 'http://onthefastrack.com/'
stripUrl = url + 'comics/%s' stripUrl = url + 'comics/%s'
firstStripUrl = stripUrl % 'november-13-2000' firstStripUrl = stripUrl % 'november-13-2000'
imageSearch = compile(r'(http://safr\.kingfeatures\.com/idn/cnfeed/zone/js/content\.php\?file=.+)"') imageSearch = compile(r'(https://safr\.kingfeatures\.com/idn/cnfeed/zone/js/content\.php\?file=.+)"')
prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url) prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url)
help = 'Index format: monthname-dd-yyyy' help = 'Index format: monthname-dd-yyyy'
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): name = page_url.rsplit('/', 3)[2]
name = pageUrl.rsplit('/', 3)[2]
if name == "onthefastrack.com": if name == "onthefastrack.com":
import datetime import datetime
name = datetime.date.today().strftime("%B-%d-%Y") name = datetime.date.today().strftime("%B-%d-%Y")

View file

@ -100,9 +100,8 @@ class PennyArcade(_BasicScraper):
prevUrl = "%s/%s/%s" % (dummy, yyyy, mm) prevUrl = "%s/%s/%s" % (dummy, yyyy, mm)
return prevUrl return prevUrl
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): p = page_url.split('/')
p = pageUrl.split('/')
return '%04d%02d%02d' % (int(p[4]), int(p[5]), int(p[6])) return '%04d%02d%02d' % (int(p[4]), int(p[5]), int(p[6]))
@ -161,7 +160,7 @@ class PiledHigherAndDeeper(_BasicScraper):
prevSearch = compile(r'<a href=((comics/)?archive\.php\?comicid=\d+)>.*<img [^>]*prev_button\.gif') prevSearch = compile(r'<a href=((comics/)?archive\.php\?comicid=\d+)>.*<img [^>]*prev_button\.gif')
nextSearch = compile(r'<a href=(archive\.php\?comicid=\d+)>.*<img [^>]*next_button\.gif') nextSearch = compile(r'<a href=(archive\.php\?comicid=\d+)>.*<img [^>]*next_button\.gif')
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
namer = queryNamer('comicid', usePageUrl=True) namer = queryNamer('comicid', use_page_url=True)
class Pimpette(_ParserScraper): class Pimpette(_ParserScraper):

View file

@ -197,10 +197,9 @@ class SexyLosers(_BasicScraper):
help = 'Index format: nnn' help = 'Index format: nnn'
starter = indirectStarter starter = indirectStarter
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): index = page_url.split('/')[-1].split('.')[0]
index = pageUrl.split('/')[-1].split('.')[0] title = image_url.split('/')[-1].split('.')[0]
title = imageUrl.split('/')[-1].split('.')[0]
return index + '-' + title return index + '-' + title
@ -228,9 +227,8 @@ class ShermansLagoon(_BasicScraper):
prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url) prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url)
help = 'Index format: monthname-day-year' help = 'Index format: monthname-day-year'
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): name = page_url.rsplit('/', 3)[2]
name = pageUrl.rsplit('/', 3)[2]
if name == "shermanslagoon.com": if name == "shermanslagoon.com":
name = datetime.date.today().strftime("%B-%d-%Y").lower() name = datetime.date.today().strftime("%B-%d-%Y").lower()
# name is monthname-day-year # name is monthname-day-year
@ -309,10 +307,9 @@ class SMBC(_ParserScraper):
help = 'Index format: nnnn' help = 'Index format: nnnn'
textSearch = '//img[@id="comic"]/@title' textSearch = '//img[@id="comic"]/@title'
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl):
"""Remove random noise from name.""" """Remove random noise from name."""
return imageUrl.rsplit('-', 1)[-1] return image_url.rsplit('-', 1)[-1]
def shouldSkipUrl(self, url, data): def shouldSkipUrl(self, url, data):
"""Skip promo or missing update pages.""" """Skip promo or missing update pages."""
@ -339,11 +336,10 @@ class SnowFlame(_WordPressScraper):
def getIndexStripUrl(self, index): def getIndexStripUrl(self, index):
return self.stripUrl % tuple(index.split('-')) return self.stripUrl % tuple(index.split('-'))
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): prefix, filename = image_url.rsplit('/', 1)
prefix, filename = imageUrl.rsplit('/', 1)
ro = compile(r'snowflame-([^-]+)-([^-]+)') ro = compile(r'snowflame-([^-]+)-([^-]+)')
mo = ro.search(pageUrl) mo = ro.search(page_url)
chapter = mo.group(1) chapter = mo.group(1)
page = mo.group(2) page = mo.group(2)
return "%s-%s-%s" % (chapter, page, filename) return "%s-%s-%s" % (chapter, page, filename)
@ -434,15 +430,14 @@ class StarCrossdDestiny(_ParserScraper):
prevSearch = '//a[text()="prev"]' prevSearch = '//a[text()="prev"]'
help = 'Index format: nnnnnnnn' help = 'Index format: nnnnnnnn'
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): if image_url.find('ch1') == -1:
if imageUrl.find('ch1') == -1:
# At first all images were stored in a strips/ directory but # At first all images were stored in a strips/ directory but
# that was changed with the introduction of book2 # that was changed with the introduction of book2
imageUrl = sub('(?:strips)|(?:images)', 'book1', imageUrl) image_url = sub('(?:strips)|(?:images)', 'book1', image_url)
elif not imageUrl.find('strips') == -1: elif not image_url.find('strips') == -1:
imageUrl = imageUrl.replace('strips/', '') image_url = image_url.replace('strips/', '')
directory, filename = imageUrl.split('/')[-2:] directory, filename = image_url.split('/')[-2:]
filename, extension = splitext(filename) filename, extension = splitext(filename)
return directory + '-' + filename return directory + '-' + filename
@ -519,12 +514,11 @@ class StuffNoOneToldMe(_BasicScraper):
multipleImagesPerStrip = True multipleImagesPerStrip = True
help = 'Index format: yyyy/mm/stripname' help = 'Index format: yyyy/mm/stripname'
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl):
"""Use page URL to construct meaningful image name.""" """Use page URL to construct meaningful image name."""
parts, year, month, stripname = pageUrl.rsplit('/', 3) parts, year, month, stripname = page_url.rsplit('/', 3)
stripname = stripname.rsplit('.', 1)[0] stripname = stripname.rsplit('.', 1)[0]
parts, imagename = imageUrl.rsplit('/', 1) parts, imagename = image_url.rsplit('/', 1)
return '%s-%s-%s-%s' % (year, month, stripname, imagename) return '%s-%s-%s-%s' % (year, month, stripname, imagename)
def shouldSkipUrl(self, url, data): def shouldSkipUrl(self, url, data):

View file

@ -68,8 +68,7 @@ class _SmackJeeves(_ParserScraper):
else: else:
return self.fetchUrl(prevurl, data, self.nextSearch) return self.fetchUrl(prevurl, data, self.nextSearch)
@classmethod def namer(self, image_url, page_url):
def namer(cls, image_url, page_url):
parts = page_url.split('/') parts = page_url.split('/')
name = parts[-2] name = parts[-2]
num = parts[-3] num = parts[-3]

View file

@ -20,8 +20,7 @@ class _Snafu(_ParserScraper):
def name(self): def name(self):
return 'SnafuComics/' + super(_Snafu, self).name return 'SnafuComics/' + super(_Snafu, self).name
@classmethod def namer(self, image_url, page_url):
def namer(cls, image_url, page_url):
year, month, name = image_url.rsplit('/', 3)[1:] year, month, name = image_url.rsplit('/', 3)[1:]
return "%04s_%02s_%s" % (year, month, name) return "%04s_%02s_%s" % (year, month, name)

View file

@ -85,9 +85,8 @@ class TheOrderOfTheStick(_BasicScraper):
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
starter = indirectStarter starter = indirectStarter
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): return page_url.rsplit('/', 1)[-1][:-5]
return pageUrl.rsplit('/', 1)[-1][:-5]
class TheParkingLotIsFull(_BasicScraper): class TheParkingLotIsFull(_BasicScraper):
@ -123,11 +122,10 @@ class TheThinHLine(_BasicScraper):
pageData = self.getPage(pageUrl) pageData = self.getPage(pageUrl)
return super(TheThinHLine, self).getComicStrip(pageUrl, pageData) return super(TheThinHLine, self).getComicStrip(pageUrl, pageData)
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl):
"""Use page URL sequence which is apparently increasing.""" """Use page URL sequence which is apparently increasing."""
num = pageUrl.split('/')[-1] num = page_url.split('/')[-1]
ext = imageUrl.rsplit('.', 1)[1] ext = image_url.rsplit('.', 1)[1]
return "thethinhline-%s.%s" % (num, ext) return "thethinhline-%s.%s" % (num, ext)

View file

@ -60,6 +60,5 @@ class ViiviJaWagner(_BasicScraper):
help = 'Index format: none' help = 'Index format: none'
lang = 'fi' lang = 'fi'
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): return image_url.split('=')[1]
return imageUrl.split('=')[1]

View file

@ -64,10 +64,9 @@ class WebDesignerCOTW(_BasicScraper):
"""Skip non-comic URLs.""" """Skip non-comic URLs."""
return 'comics-of-the-week' not in url return 'comics-of-the-week' not in url
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): imagename = image_url.rsplit('/', 1)[1]
imagename = imageUrl.rsplit('/', 1)[1] week = compile(r'week-(\d+)').search(page_url).group(1)
week = compile(r'week-(\d+)').search(pageUrl).group(1)
return "%s-%s" % (week, imagename) return "%s-%s" % (week, imagename)

View file

@ -20,8 +20,7 @@ class _WLPComics(_ParserScraper):
def name(self): def name(self):
return 'WLP/' + super(_WLPComics, self).name return 'WLP/' + super(_WLPComics, self).name
@classmethod def namer(self, image_url, page_url):
def namer(cls, image_url, page_url):
return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' + return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' +
image_url.rsplit('/', 1)[-1]) image_url.rsplit('/', 1)[-1])

View file

@ -26,8 +26,7 @@ class Xkcd(_BasicScraper):
textSearch = compile(tagre("img", "title", r'([^"]+)', textSearch = compile(tagre("img", "title", r'([^"]+)',
before=r'//imgs\.xkcd\.com/comics/')) before=r'//imgs\.xkcd\.com/comics/'))
@classmethod def namer(self, image_url, page_url):
def namer(cls, image_url, page_url):
index = int(page_url.rstrip('/').rsplit('/', 1)[-1]) index = int(page_url.rstrip('/').rsplit('/', 1)[-1])
name = image_url.rsplit('/', 1)[-1].split('.')[0] name = image_url.rsplit('/', 1)[-1].split('.')[0]
return '%03d-%s' % (index, name) return '%03d-%s' % (index, name)

View file

@ -34,9 +34,8 @@ class Zapiro(_BasicScraper):
r'(http://mg\.co\.za/cartoon/[^"]+)')) r'(http://mg\.co\.za/cartoon/[^"]+)'))
help = 'Index format: yyyy-mm-dd-stripname' help = 'Index format: yyyy-mm-dd-stripname'
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): name = image_url.split('/')[-3]
name = imageUrl.split('/')[-3]
return name return name
@ -84,7 +83,6 @@ class Zwarwald(_BasicScraper):
self.stripUrl % '495', self.stripUrl % '495',
) )
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl): prefix, year, month, name = image_url.rsplit('/', 3)
prefix, year, month, name = imageUrl.rsplit('/', 3)
return "%s_%s_%s" % (year, month, name) return "%s_%s_%s" % (year, month, name)

View file

@ -237,8 +237,7 @@ class Scraper(object):
"""Get starter URL from where to scrape comic strips.""" """Get starter URL from where to scrape comic strips."""
return self.url return self.url
@classmethod def namer(self, image_url, page_url):
def namer(cls, imageUrl, pageUrl):
"""Return filename for given image and page URL.""" """Return filename for given image and page URL."""
return None return None