Refactor: Make namer a method.
When #42 is realized, the naming of files might differ between comic modules, so the namer's logical location is the instance, not the class.
This commit is contained in:
parent
5bd2a49f48
commit
c3f32dfef7
27 changed files with 100 additions and 142 deletions
|
@ -8,22 +8,20 @@ from __future__ import absolute_import, division, print_function
|
|||
from .util import getQueryParams
|
||||
|
||||
|
||||
def queryNamer(paramName, usePageUrl=False):
|
||||
def queryNamer(param, use_page_url=False):
|
||||
"""Get name from URL query part."""
|
||||
@classmethod
|
||||
def _namer(cls, imageUrl, pageUrl):
|
||||
def _namer(self, image_url, page_url):
|
||||
"""Get URL query part."""
|
||||
url = pageUrl if usePageUrl else imageUrl
|
||||
return getQueryParams(url)[paramName][0]
|
||||
url = page_url if use_page_url else image_url
|
||||
return getQueryParams(url)[param][0]
|
||||
return _namer
|
||||
|
||||
|
||||
def regexNamer(regex, usePageUrl=False):
|
||||
def regexNamer(regex, use_page_url=False):
|
||||
"""Get name from regular expression."""
|
||||
@classmethod
|
||||
def _namer(cls, imageUrl, pageUrl):
|
||||
def _namer(self, image_url, page_url):
|
||||
"""Get first regular expression group."""
|
||||
url = pageUrl if usePageUrl else imageUrl
|
||||
url = page_url if use_page_url else image_url
|
||||
mo = regex.search(url)
|
||||
if mo:
|
||||
return mo.group(1)
|
||||
|
|
|
@ -28,8 +28,7 @@ class AbstruseGoose(_BasicScraper):
|
|||
help = 'Index format: n (unpadded)'
|
||||
textSearch = compile(tagre("img", "title", r'([^"]+)'))
|
||||
|
||||
@classmethod
|
||||
def namer(cls, image_url, page_url):
|
||||
def namer(self, image_url, page_url):
|
||||
index = int(page_url.rstrip('/').split('/')[-1])
|
||||
name = image_url.split('/')[-1].split('.')[0]
|
||||
return 'c%03d-%s' % (index, name)
|
||||
|
@ -300,8 +299,7 @@ class AstronomyPOTD(_ParserScraper):
|
|||
"""Skip pages without images."""
|
||||
return data.xpath('//iframe') # videos
|
||||
|
||||
@classmethod
|
||||
def namer(cls, image_url, page_url):
|
||||
def namer(self, image_url, page_url):
|
||||
return '%s-%s' % (page_url.split('/')[-1].split('.')[0][2:],
|
||||
image_url.split('/')[-1].split('.')[0])
|
||||
|
||||
|
|
|
@ -76,11 +76,10 @@ class Beetlebum(_BasicScraper):
|
|||
help = 'Index format: yyyy/mm/dd/striptitle'
|
||||
lang = 'de'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
indexes = tuple(pageUrl.rstrip('/').split('/')[-4:])
|
||||
def namer(self, image_url, page_url):
|
||||
indexes = tuple(page_url.rstrip('/').split('/')[-4:])
|
||||
name = '%s-%s-%s-%s' % indexes
|
||||
name = name + '_' + imageUrl.split('/')[-1]
|
||||
name = name + '_' + image_url.split('/')[-1]
|
||||
return name
|
||||
|
||||
|
||||
|
@ -154,8 +153,7 @@ class BloomingFaeries(_ParserScraper):
|
|||
imageSearch = '//div[@id="comic"]//img'
|
||||
prevSearch = WP_PREV_SEARCH
|
||||
|
||||
@classmethod
|
||||
def namer(cls, image_url, page_url):
|
||||
def namer(self, image_url, page_url):
|
||||
return "_".join(image_url.rsplit('/', 3)[1:])
|
||||
|
||||
|
||||
|
@ -197,9 +195,8 @@ class BoyOnAStickAndSlither(_BasicScraper):
|
|||
"<span>Next page")
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
return pageUrl.rsplit('/')[-1]
|
||||
def namer(self, image_url, page_url):
|
||||
return page_url.rsplit('/')[-1]
|
||||
|
||||
|
||||
class BratHalla(_WordPressScraper):
|
||||
|
|
|
@ -57,9 +57,8 @@ class Carciphona(_BasicScraper):
|
|||
r'(view\.php\?page=[0-9]+[^"]*)'))
|
||||
starter = indirectStarter
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
ip = imageUrl.split('/')
|
||||
def namer(self, image_url, page_url):
|
||||
ip = image_url.split('/')
|
||||
return "volume_%s_page_%s" % (ip[-2], ip[-1])
|
||||
|
||||
|
||||
|
@ -262,9 +261,8 @@ class CorydonCafe(_ParserScraper):
|
|||
latestSearch = '//ul//a'
|
||||
help = 'Index format: yyyy/stripname'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
return pageUrl.split('/')[-1].split('.')[0]
|
||||
def namer(self, image_url, page_url):
|
||||
return page_url.split('/')[-1].split('.')[0]
|
||||
|
||||
|
||||
class CourtingDisaster(_WordPressScraper):
|
||||
|
@ -349,10 +347,9 @@ class CyanideAndHappiness(_BasicScraper):
|
|||
"""Skip pages without images."""
|
||||
return "/comics/play-button.png" in data[0]
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
imgname = imageUrl.split('/')[-1]
|
||||
def namer(self, image_url, page_url):
|
||||
imgname = image_url.split('/')[-1]
|
||||
# only get the first 100 chars for the image name
|
||||
imgname = imgname[:100]
|
||||
imgnum = pageUrl.split('/')[-2]
|
||||
imgnum = page_url.split('/')[-2]
|
||||
return '%s_%s' % (imgnum, imgname)
|
||||
|
|
|
@ -24,9 +24,8 @@ def add(name, shortName, imageFolder=None, lastStrip=None):
|
|||
if imageFolder is None:
|
||||
imageFolder = shortName
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
return '%03d' % int(getQueryParams(pageUrl)['page'][0])
|
||||
def namer(self, image_url, page_url):
|
||||
return '%03d' % int(getQueryParams(page_url)['page'][0])
|
||||
|
||||
def _starter(self):
|
||||
# first, try hopping to previous and next comic
|
||||
|
|
|
@ -22,12 +22,11 @@ class _ComicFury(_ParserScraper):
|
|||
help = 'Index format: n'
|
||||
starter = bounceStarter
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
parts = pageUrl.split('/')
|
||||
path, ext = os.path.splitext(imageUrl)
|
||||
def namer(self, image_url, page_url):
|
||||
parts = page_url.split('/')
|
||||
path, ext = os.path.splitext(image_url)
|
||||
num = parts[-1]
|
||||
return "%s_%s%s" % (cls.__name__[2:], num, ext)
|
||||
return "%s_%s%s" % (self.__class__.__name__[2:], num, ext)
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
|
|
|
@ -34,10 +34,9 @@ class DamnLol(_BasicScraper):
|
|||
help = 'Index format: stripname-number'
|
||||
starter = bounceStarter
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
ext = imageUrl.rsplit('.', 1)[1]
|
||||
path = pageUrl.rsplit('/', 1)[1][:-5]
|
||||
def namer(self, image_url, page_url):
|
||||
ext = image_url.rsplit('.', 1)[1]
|
||||
path = page_url.rsplit('/', 1)[1][:-5]
|
||||
stripname, number = path.rsplit('-', 1)
|
||||
return '%s-%s.%s' % (number, stripname, ext)
|
||||
|
||||
|
@ -136,10 +135,9 @@ class DieselSweeties(_BasicScraper):
|
|||
tagre("img", "src", r'(?:http://www\.dieselsweeties\.com/ximages/blackbackarrow160.png|/ximages/prev\.gif)'))
|
||||
help = 'Index format: n (unpadded)'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
index = int(imageUrl.split('/')[-1].split('.')[0])
|
||||
return 'sw%02d' % (index,)
|
||||
def namer(self, image_url, page_url):
|
||||
index = int(image_url.split('/')[-1].split('.')[0])
|
||||
return 'sw%02d' % index
|
||||
|
||||
|
||||
class Dilbert(_BasicScraper):
|
||||
|
@ -154,9 +152,8 @@ class Dilbert(_BasicScraper):
|
|||
after="Click to see"))
|
||||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
name = pageUrl.rsplit("/", 1)[1]
|
||||
def namer(self, image_url, page_url):
|
||||
name = page_url.rsplit("/", 1)[1]
|
||||
return "%s" % name
|
||||
|
||||
|
||||
|
|
|
@ -24,10 +24,9 @@ def add(name, path):
|
|||
classname = '_DrunkDuck_%s' % name
|
||||
_url = 'http://www.theduckwebcomics.com/%s/' % path
|
||||
|
||||
@classmethod
|
||||
def _namer(cls, imageUrl, pageUrl):
|
||||
index = int(pageUrl.rstrip('/').split('/')[-1])
|
||||
ext = imageUrl.rsplit('.')[-1]
|
||||
def _namer(self, image_url, page_url):
|
||||
index = int(page_url.rstrip('/').split('/')[-1])
|
||||
ext = image_url.rsplit('.')[-1]
|
||||
return '%d.%s' % (index, ext)
|
||||
|
||||
def _starter(self):
|
||||
|
|
|
@ -26,13 +26,12 @@ class EarthsongSaga(_ParserScraper):
|
|||
return [x.replace('earthsongsaga.com/../',
|
||||
'earthsongsaga.com/') for x in urls]
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
def namer(self, image_url, page_url):
|
||||
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/(\d+)\.\w+$',
|
||||
IGNORECASE).search(imageUrl)
|
||||
IGNORECASE).search(image_url)
|
||||
if not imgmatch:
|
||||
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/ch(\d+)cover\.\w+$',
|
||||
IGNORECASE).search(imageUrl)
|
||||
IGNORECASE).search(image_url)
|
||||
suffix = "cover"
|
||||
else:
|
||||
suffix = ""
|
||||
|
|
|
@ -146,9 +146,8 @@ class GoneWithTheBlastwave(_BasicScraper):
|
|||
r'<img src="images/page/default/latest')
|
||||
help = 'Index format: n'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
return '%02d' % int(compile(r'nro=(\d+)').search(pageUrl).group(1))
|
||||
def namer(self, image_url, page_url):
|
||||
return '%02d' % int(compile(r'nro=(\d+)').search(page_url).group(1))
|
||||
|
||||
|
||||
class GrrlPower(_WordPressScraper):
|
||||
|
|
|
@ -26,10 +26,9 @@ class _GoComics(_ParserScraper):
|
|||
def url(self):
|
||||
return 'http://www.gocomics.com/' + self.path
|
||||
|
||||
@classmethod
|
||||
def namer(cls, image_url, page_url):
|
||||
def namer(self, image_url, page_url):
|
||||
prefix, year, month, day = page_url.rsplit('/', 3)
|
||||
return "%s_%s%s%s.gif" % (cls.__name__[2:], year, month, day)
|
||||
return "%s_%s%s%s.gif" % (self.__class__.__name__[2:], year, month, day)
|
||||
|
||||
def getIndexStripUrl(self, index):
|
||||
return self.url + self.path + '/%s' % index
|
||||
|
|
|
@ -51,10 +51,9 @@ class HarkAVagrant(_BasicScraper):
|
|||
tagre("img", "src", "buttonnext.png"))
|
||||
help = 'Index format: number'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
filename = imageUrl.rsplit('/', 1)[1]
|
||||
num = pageUrl.rsplit('=', 1)[1]
|
||||
def namer(self, image_url, page_url):
|
||||
filename = image_url.rsplit('/', 1)[1]
|
||||
num = page_url.rsplit('=', 1)[1]
|
||||
return '%s-%s' % (num, filename)
|
||||
|
||||
|
||||
|
|
|
@ -26,11 +26,10 @@ class Lackadaisy(_BasicScraper):
|
|||
help = 'Index format: n'
|
||||
starter = bounceStarter
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
def namer(self, image_url, page_url):
|
||||
"""Use comic id for filename."""
|
||||
num = pageUrl.rsplit('=', 1)[-1]
|
||||
ext = imageUrl.rsplit('.', 1)[-1]
|
||||
num = page_url.rsplit('=', 1)[-1]
|
||||
ext = image_url.rsplit('.', 1)[-1]
|
||||
return 'lackadaisy_%s.%s' % (num, ext)
|
||||
|
||||
|
||||
|
|
|
@ -82,9 +82,8 @@ class MarriedToTheSea(_BasicScraper):
|
|||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<< Yesterday")
|
||||
help = 'Index format: mmddyy'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
unused, date, filename = imageUrl.rsplit('/', 2)
|
||||
def namer(self, image_url, page_url):
|
||||
unused, date, filename = image_url.rsplit('/', 2)
|
||||
return '%s-%s' % (date, filename)
|
||||
|
||||
|
||||
|
|
|
@ -38,9 +38,8 @@ class NatalieDee(_BasicScraper):
|
|||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<< Yesterday")
|
||||
help = 'Index format: mmddyy'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
unused, date, filename = imageUrl.rsplit('/', 2)
|
||||
def namer(self, image_url, page_url):
|
||||
unused, date, filename = image_url.rsplit('/', 2)
|
||||
return '%s-%s' % (date, filename)
|
||||
|
||||
|
||||
|
|
|
@ -74,13 +74,12 @@ class OnTheFastrack(_BasicScraper):
|
|||
url = 'http://onthefastrack.com/'
|
||||
stripUrl = url + 'comics/%s'
|
||||
firstStripUrl = stripUrl % 'november-13-2000'
|
||||
imageSearch = compile(r'(http://safr\.kingfeatures\.com/idn/cnfeed/zone/js/content\.php\?file=.+)"')
|
||||
imageSearch = compile(r'(https://safr\.kingfeatures\.com/idn/cnfeed/zone/js/content\.php\?file=.+)"')
|
||||
prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url)
|
||||
help = 'Index format: monthname-dd-yyyy'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
name = pageUrl.rsplit('/', 3)[2]
|
||||
def namer(self, image_url, page_url):
|
||||
name = page_url.rsplit('/', 3)[2]
|
||||
if name == "onthefastrack.com":
|
||||
import datetime
|
||||
name = datetime.date.today().strftime("%B-%d-%Y")
|
||||
|
|
|
@ -100,9 +100,8 @@ class PennyArcade(_BasicScraper):
|
|||
prevUrl = "%s/%s/%s" % (dummy, yyyy, mm)
|
||||
return prevUrl
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
p = pageUrl.split('/')
|
||||
def namer(self, image_url, page_url):
|
||||
p = page_url.split('/')
|
||||
return '%04d%02d%02d' % (int(p[4]), int(p[5]), int(p[6]))
|
||||
|
||||
|
||||
|
@ -161,7 +160,7 @@ class PiledHigherAndDeeper(_BasicScraper):
|
|||
prevSearch = compile(r'<a href=((comics/)?archive\.php\?comicid=\d+)>.*<img [^>]*prev_button\.gif')
|
||||
nextSearch = compile(r'<a href=(archive\.php\?comicid=\d+)>.*<img [^>]*next_button\.gif')
|
||||
help = 'Index format: n (unpadded)'
|
||||
namer = queryNamer('comicid', usePageUrl=True)
|
||||
namer = queryNamer('comicid', use_page_url=True)
|
||||
|
||||
|
||||
class Pimpette(_ParserScraper):
|
||||
|
|
|
@ -197,10 +197,9 @@ class SexyLosers(_BasicScraper):
|
|||
help = 'Index format: nnn'
|
||||
starter = indirectStarter
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
index = pageUrl.split('/')[-1].split('.')[0]
|
||||
title = imageUrl.split('/')[-1].split('.')[0]
|
||||
def namer(self, image_url, page_url):
|
||||
index = page_url.split('/')[-1].split('.')[0]
|
||||
title = image_url.split('/')[-1].split('.')[0]
|
||||
return index + '-' + title
|
||||
|
||||
|
||||
|
@ -228,9 +227,8 @@ class ShermansLagoon(_BasicScraper):
|
|||
prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url)
|
||||
help = 'Index format: monthname-day-year'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
name = pageUrl.rsplit('/', 3)[2]
|
||||
def namer(self, image_url, page_url):
|
||||
name = page_url.rsplit('/', 3)[2]
|
||||
if name == "shermanslagoon.com":
|
||||
name = datetime.date.today().strftime("%B-%d-%Y").lower()
|
||||
# name is monthname-day-year
|
||||
|
@ -309,10 +307,9 @@ class SMBC(_ParserScraper):
|
|||
help = 'Index format: nnnn'
|
||||
textSearch = '//img[@id="comic"]/@title'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
def namer(self, image_url, page_url):
|
||||
"""Remove random noise from name."""
|
||||
return imageUrl.rsplit('-', 1)[-1]
|
||||
return image_url.rsplit('-', 1)[-1]
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
"""Skip promo or missing update pages."""
|
||||
|
@ -339,11 +336,10 @@ class SnowFlame(_WordPressScraper):
|
|||
def getIndexStripUrl(self, index):
|
||||
return self.stripUrl % tuple(index.split('-'))
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
prefix, filename = imageUrl.rsplit('/', 1)
|
||||
def namer(self, image_url, page_url):
|
||||
prefix, filename = image_url.rsplit('/', 1)
|
||||
ro = compile(r'snowflame-([^-]+)-([^-]+)')
|
||||
mo = ro.search(pageUrl)
|
||||
mo = ro.search(page_url)
|
||||
chapter = mo.group(1)
|
||||
page = mo.group(2)
|
||||
return "%s-%s-%s" % (chapter, page, filename)
|
||||
|
@ -434,15 +430,14 @@ class StarCrossdDestiny(_ParserScraper):
|
|||
prevSearch = '//a[text()="prev"]'
|
||||
help = 'Index format: nnnnnnnn'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
if imageUrl.find('ch1') == -1:
|
||||
def namer(self, image_url, page_url):
|
||||
if image_url.find('ch1') == -1:
|
||||
# At first all images were stored in a strips/ directory but
|
||||
# that was changed with the introduction of book2
|
||||
imageUrl = sub('(?:strips)|(?:images)', 'book1', imageUrl)
|
||||
elif not imageUrl.find('strips') == -1:
|
||||
imageUrl = imageUrl.replace('strips/', '')
|
||||
directory, filename = imageUrl.split('/')[-2:]
|
||||
image_url = sub('(?:strips)|(?:images)', 'book1', image_url)
|
||||
elif not image_url.find('strips') == -1:
|
||||
image_url = image_url.replace('strips/', '')
|
||||
directory, filename = image_url.split('/')[-2:]
|
||||
filename, extension = splitext(filename)
|
||||
return directory + '-' + filename
|
||||
|
||||
|
@ -519,12 +514,11 @@ class StuffNoOneToldMe(_BasicScraper):
|
|||
multipleImagesPerStrip = True
|
||||
help = 'Index format: yyyy/mm/stripname'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
def namer(self, image_url, page_url):
|
||||
"""Use page URL to construct meaningful image name."""
|
||||
parts, year, month, stripname = pageUrl.rsplit('/', 3)
|
||||
parts, year, month, stripname = page_url.rsplit('/', 3)
|
||||
stripname = stripname.rsplit('.', 1)[0]
|
||||
parts, imagename = imageUrl.rsplit('/', 1)
|
||||
parts, imagename = image_url.rsplit('/', 1)
|
||||
return '%s-%s-%s-%s' % (year, month, stripname, imagename)
|
||||
|
||||
def shouldSkipUrl(self, url, data):
|
||||
|
|
|
@ -68,8 +68,7 @@ class _SmackJeeves(_ParserScraper):
|
|||
else:
|
||||
return self.fetchUrl(prevurl, data, self.nextSearch)
|
||||
|
||||
@classmethod
|
||||
def namer(cls, image_url, page_url):
|
||||
def namer(self, image_url, page_url):
|
||||
parts = page_url.split('/')
|
||||
name = parts[-2]
|
||||
num = parts[-3]
|
||||
|
|
|
@ -20,8 +20,7 @@ class _Snafu(_ParserScraper):
|
|||
def name(self):
|
||||
return 'SnafuComics/' + super(_Snafu, self).name
|
||||
|
||||
@classmethod
|
||||
def namer(cls, image_url, page_url):
|
||||
def namer(self, image_url, page_url):
|
||||
year, month, name = image_url.rsplit('/', 3)[1:]
|
||||
return "%04s_%02s_%s" % (year, month, name)
|
||||
|
||||
|
|
|
@ -85,9 +85,8 @@ class TheOrderOfTheStick(_BasicScraper):
|
|||
help = 'Index format: n (unpadded)'
|
||||
starter = indirectStarter
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
return pageUrl.rsplit('/', 1)[-1][:-5]
|
||||
def namer(self, image_url, page_url):
|
||||
return page_url.rsplit('/', 1)[-1][:-5]
|
||||
|
||||
|
||||
class TheParkingLotIsFull(_BasicScraper):
|
||||
|
@ -123,11 +122,10 @@ class TheThinHLine(_BasicScraper):
|
|||
pageData = self.getPage(pageUrl)
|
||||
return super(TheThinHLine, self).getComicStrip(pageUrl, pageData)
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
def namer(self, image_url, page_url):
|
||||
"""Use page URL sequence which is apparently increasing."""
|
||||
num = pageUrl.split('/')[-1]
|
||||
ext = imageUrl.rsplit('.', 1)[1]
|
||||
num = page_url.split('/')[-1]
|
||||
ext = image_url.rsplit('.', 1)[1]
|
||||
return "thethinhline-%s.%s" % (num, ext)
|
||||
|
||||
|
||||
|
|
|
@ -60,6 +60,5 @@ class ViiviJaWagner(_BasicScraper):
|
|||
help = 'Index format: none'
|
||||
lang = 'fi'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
return imageUrl.split('=')[1]
|
||||
def namer(self, image_url, page_url):
|
||||
return image_url.split('=')[1]
|
||||
|
|
|
@ -64,10 +64,9 @@ class WebDesignerCOTW(_BasicScraper):
|
|||
"""Skip non-comic URLs."""
|
||||
return 'comics-of-the-week' not in url
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
imagename = imageUrl.rsplit('/', 1)[1]
|
||||
week = compile(r'week-(\d+)').search(pageUrl).group(1)
|
||||
def namer(self, image_url, page_url):
|
||||
imagename = image_url.rsplit('/', 1)[1]
|
||||
week = compile(r'week-(\d+)').search(page_url).group(1)
|
||||
return "%s-%s" % (week, imagename)
|
||||
|
||||
|
||||
|
|
|
@ -20,8 +20,7 @@ class _WLPComics(_ParserScraper):
|
|||
def name(self):
|
||||
return 'WLP/' + super(_WLPComics, self).name
|
||||
|
||||
@classmethod
|
||||
def namer(cls, image_url, page_url):
|
||||
def namer(self, image_url, page_url):
|
||||
return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' +
|
||||
image_url.rsplit('/', 1)[-1])
|
||||
|
||||
|
|
|
@ -26,8 +26,7 @@ class Xkcd(_BasicScraper):
|
|||
textSearch = compile(tagre("img", "title", r'([^"]+)',
|
||||
before=r'//imgs\.xkcd\.com/comics/'))
|
||||
|
||||
@classmethod
|
||||
def namer(cls, image_url, page_url):
|
||||
def namer(self, image_url, page_url):
|
||||
index = int(page_url.rstrip('/').rsplit('/', 1)[-1])
|
||||
name = image_url.rsplit('/', 1)[-1].split('.')[0]
|
||||
return '%03d-%s' % (index, name)
|
||||
|
|
|
@ -34,9 +34,8 @@ class Zapiro(_BasicScraper):
|
|||
r'(http://mg\.co\.za/cartoon/[^"]+)'))
|
||||
help = 'Index format: yyyy-mm-dd-stripname'
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
name = imageUrl.split('/')[-3]
|
||||
def namer(self, image_url, page_url):
|
||||
name = image_url.split('/')[-3]
|
||||
return name
|
||||
|
||||
|
||||
|
@ -84,7 +83,6 @@ class Zwarwald(_BasicScraper):
|
|||
self.stripUrl % '495',
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
prefix, year, month, name = imageUrl.rsplit('/', 3)
|
||||
def namer(self, image_url, page_url):
|
||||
prefix, year, month, name = image_url.rsplit('/', 3)
|
||||
return "%s_%s_%s" % (year, month, name)
|
||||
|
|
|
@ -237,8 +237,7 @@ class Scraper(object):
|
|||
"""Get starter URL from where to scrape comic strips."""
|
||||
return self.url
|
||||
|
||||
@classmethod
|
||||
def namer(cls, imageUrl, pageUrl):
|
||||
def namer(self, image_url, page_url):
|
||||
"""Return filename for given image and page URL."""
|
||||
return None
|
||||
|
||||
|
|
Loading…
Reference in a new issue