Refactor: Make namer a method.
When #42 is realized, the naming of files might differ between comic modules, so the namer's logical location is the instance, not the class.
This commit is contained in:
parent
5bd2a49f48
commit
c3f32dfef7
27 changed files with 100 additions and 142 deletions
|
@ -8,22 +8,20 @@ from __future__ import absolute_import, division, print_function
|
||||||
from .util import getQueryParams
|
from .util import getQueryParams
|
||||||
|
|
||||||
|
|
||||||
def queryNamer(paramName, usePageUrl=False):
|
def queryNamer(param, use_page_url=False):
|
||||||
"""Get name from URL query part."""
|
"""Get name from URL query part."""
|
||||||
@classmethod
|
def _namer(self, image_url, page_url):
|
||||||
def _namer(cls, imageUrl, pageUrl):
|
|
||||||
"""Get URL query part."""
|
"""Get URL query part."""
|
||||||
url = pageUrl if usePageUrl else imageUrl
|
url = page_url if use_page_url else image_url
|
||||||
return getQueryParams(url)[paramName][0]
|
return getQueryParams(url)[param][0]
|
||||||
return _namer
|
return _namer
|
||||||
|
|
||||||
|
|
||||||
def regexNamer(regex, usePageUrl=False):
|
def regexNamer(regex, use_page_url=False):
|
||||||
"""Get name from regular expression."""
|
"""Get name from regular expression."""
|
||||||
@classmethod
|
def _namer(self, image_url, page_url):
|
||||||
def _namer(cls, imageUrl, pageUrl):
|
|
||||||
"""Get first regular expression group."""
|
"""Get first regular expression group."""
|
||||||
url = pageUrl if usePageUrl else imageUrl
|
url = page_url if use_page_url else image_url
|
||||||
mo = regex.search(url)
|
mo = regex.search(url)
|
||||||
if mo:
|
if mo:
|
||||||
return mo.group(1)
|
return mo.group(1)
|
||||||
|
|
|
@ -28,8 +28,7 @@ class AbstruseGoose(_BasicScraper):
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
textSearch = compile(tagre("img", "title", r'([^"]+)'))
|
textSearch = compile(tagre("img", "title", r'([^"]+)'))
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, image_url, page_url):
|
|
||||||
index = int(page_url.rstrip('/').split('/')[-1])
|
index = int(page_url.rstrip('/').split('/')[-1])
|
||||||
name = image_url.split('/')[-1].split('.')[0]
|
name = image_url.split('/')[-1].split('.')[0]
|
||||||
return 'c%03d-%s' % (index, name)
|
return 'c%03d-%s' % (index, name)
|
||||||
|
@ -300,8 +299,7 @@ class AstronomyPOTD(_ParserScraper):
|
||||||
"""Skip pages without images."""
|
"""Skip pages without images."""
|
||||||
return data.xpath('//iframe') # videos
|
return data.xpath('//iframe') # videos
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, image_url, page_url):
|
|
||||||
return '%s-%s' % (page_url.split('/')[-1].split('.')[0][2:],
|
return '%s-%s' % (page_url.split('/')[-1].split('.')[0][2:],
|
||||||
image_url.split('/')[-1].split('.')[0])
|
image_url.split('/')[-1].split('.')[0])
|
||||||
|
|
||||||
|
|
|
@ -76,11 +76,10 @@ class Beetlebum(_BasicScraper):
|
||||||
help = 'Index format: yyyy/mm/dd/striptitle'
|
help = 'Index format: yyyy/mm/dd/striptitle'
|
||||||
lang = 'de'
|
lang = 'de'
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
indexes = tuple(page_url.rstrip('/').split('/')[-4:])
|
||||||
indexes = tuple(pageUrl.rstrip('/').split('/')[-4:])
|
|
||||||
name = '%s-%s-%s-%s' % indexes
|
name = '%s-%s-%s-%s' % indexes
|
||||||
name = name + '_' + imageUrl.split('/')[-1]
|
name = name + '_' + image_url.split('/')[-1]
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
@ -154,8 +153,7 @@ class BloomingFaeries(_ParserScraper):
|
||||||
imageSearch = '//div[@id="comic"]//img'
|
imageSearch = '//div[@id="comic"]//img'
|
||||||
prevSearch = WP_PREV_SEARCH
|
prevSearch = WP_PREV_SEARCH
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, image_url, page_url):
|
|
||||||
return "_".join(image_url.rsplit('/', 3)[1:])
|
return "_".join(image_url.rsplit('/', 3)[1:])
|
||||||
|
|
||||||
|
|
||||||
|
@ -197,9 +195,8 @@ class BoyOnAStickAndSlither(_BasicScraper):
|
||||||
"<span>Next page")
|
"<span>Next page")
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
return page_url.rsplit('/')[-1]
|
||||||
return pageUrl.rsplit('/')[-1]
|
|
||||||
|
|
||||||
|
|
||||||
class BratHalla(_WordPressScraper):
|
class BratHalla(_WordPressScraper):
|
||||||
|
|
|
@ -57,9 +57,8 @@ class Carciphona(_BasicScraper):
|
||||||
r'(view\.php\?page=[0-9]+[^"]*)'))
|
r'(view\.php\?page=[0-9]+[^"]*)'))
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
ip = image_url.split('/')
|
||||||
ip = imageUrl.split('/')
|
|
||||||
return "volume_%s_page_%s" % (ip[-2], ip[-1])
|
return "volume_%s_page_%s" % (ip[-2], ip[-1])
|
||||||
|
|
||||||
|
|
||||||
|
@ -262,9 +261,8 @@ class CorydonCafe(_ParserScraper):
|
||||||
latestSearch = '//ul//a'
|
latestSearch = '//ul//a'
|
||||||
help = 'Index format: yyyy/stripname'
|
help = 'Index format: yyyy/stripname'
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
return page_url.split('/')[-1].split('.')[0]
|
||||||
return pageUrl.split('/')[-1].split('.')[0]
|
|
||||||
|
|
||||||
|
|
||||||
class CourtingDisaster(_WordPressScraper):
|
class CourtingDisaster(_WordPressScraper):
|
||||||
|
@ -349,10 +347,9 @@ class CyanideAndHappiness(_BasicScraper):
|
||||||
"""Skip pages without images."""
|
"""Skip pages without images."""
|
||||||
return "/comics/play-button.png" in data[0]
|
return "/comics/play-button.png" in data[0]
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
imgname = image_url.split('/')[-1]
|
||||||
imgname = imageUrl.split('/')[-1]
|
|
||||||
# only get the first 100 chars for the image name
|
# only get the first 100 chars for the image name
|
||||||
imgname = imgname[:100]
|
imgname = imgname[:100]
|
||||||
imgnum = pageUrl.split('/')[-2]
|
imgnum = page_url.split('/')[-2]
|
||||||
return '%s_%s' % (imgnum, imgname)
|
return '%s_%s' % (imgnum, imgname)
|
||||||
|
|
|
@ -24,9 +24,8 @@ def add(name, shortName, imageFolder=None, lastStrip=None):
|
||||||
if imageFolder is None:
|
if imageFolder is None:
|
||||||
imageFolder = shortName
|
imageFolder = shortName
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
return '%03d' % int(getQueryParams(page_url)['page'][0])
|
||||||
return '%03d' % int(getQueryParams(pageUrl)['page'][0])
|
|
||||||
|
|
||||||
def _starter(self):
|
def _starter(self):
|
||||||
# first, try hopping to previous and next comic
|
# first, try hopping to previous and next comic
|
||||||
|
|
|
@ -22,12 +22,11 @@ class _ComicFury(_ParserScraper):
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
starter = bounceStarter
|
starter = bounceStarter
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
parts = page_url.split('/')
|
||||||
parts = pageUrl.split('/')
|
path, ext = os.path.splitext(image_url)
|
||||||
path, ext = os.path.splitext(imageUrl)
|
|
||||||
num = parts[-1]
|
num = parts[-1]
|
||||||
return "%s_%s%s" % (cls.__name__[2:], num, ext)
|
return "%s_%s%s" % (self.__class__.__name__[2:], num, ext)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def url(self):
|
def url(self):
|
||||||
|
|
|
@ -34,10 +34,9 @@ class DamnLol(_BasicScraper):
|
||||||
help = 'Index format: stripname-number'
|
help = 'Index format: stripname-number'
|
||||||
starter = bounceStarter
|
starter = bounceStarter
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
ext = image_url.rsplit('.', 1)[1]
|
||||||
ext = imageUrl.rsplit('.', 1)[1]
|
path = page_url.rsplit('/', 1)[1][:-5]
|
||||||
path = pageUrl.rsplit('/', 1)[1][:-5]
|
|
||||||
stripname, number = path.rsplit('-', 1)
|
stripname, number = path.rsplit('-', 1)
|
||||||
return '%s-%s.%s' % (number, stripname, ext)
|
return '%s-%s.%s' % (number, stripname, ext)
|
||||||
|
|
||||||
|
@ -136,10 +135,9 @@ class DieselSweeties(_BasicScraper):
|
||||||
tagre("img", "src", r'(?:http://www\.dieselsweeties\.com/ximages/blackbackarrow160.png|/ximages/prev\.gif)'))
|
tagre("img", "src", r'(?:http://www\.dieselsweeties\.com/ximages/blackbackarrow160.png|/ximages/prev\.gif)'))
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
index = int(image_url.split('/')[-1].split('.')[0])
|
||||||
index = int(imageUrl.split('/')[-1].split('.')[0])
|
return 'sw%02d' % index
|
||||||
return 'sw%02d' % (index,)
|
|
||||||
|
|
||||||
|
|
||||||
class Dilbert(_BasicScraper):
|
class Dilbert(_BasicScraper):
|
||||||
|
@ -154,9 +152,8 @@ class Dilbert(_BasicScraper):
|
||||||
after="Click to see"))
|
after="Click to see"))
|
||||||
help = 'Index format: yyyy-mm-dd'
|
help = 'Index format: yyyy-mm-dd'
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
name = page_url.rsplit("/", 1)[1]
|
||||||
name = pageUrl.rsplit("/", 1)[1]
|
|
||||||
return "%s" % name
|
return "%s" % name
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -24,10 +24,9 @@ def add(name, path):
|
||||||
classname = '_DrunkDuck_%s' % name
|
classname = '_DrunkDuck_%s' % name
|
||||||
_url = 'http://www.theduckwebcomics.com/%s/' % path
|
_url = 'http://www.theduckwebcomics.com/%s/' % path
|
||||||
|
|
||||||
@classmethod
|
def _namer(self, image_url, page_url):
|
||||||
def _namer(cls, imageUrl, pageUrl):
|
index = int(page_url.rstrip('/').split('/')[-1])
|
||||||
index = int(pageUrl.rstrip('/').split('/')[-1])
|
ext = image_url.rsplit('.')[-1]
|
||||||
ext = imageUrl.rsplit('.')[-1]
|
|
||||||
return '%d.%s' % (index, ext)
|
return '%d.%s' % (index, ext)
|
||||||
|
|
||||||
def _starter(self):
|
def _starter(self):
|
||||||
|
|
|
@ -26,13 +26,12 @@ class EarthsongSaga(_ParserScraper):
|
||||||
return [x.replace('earthsongsaga.com/../',
|
return [x.replace('earthsongsaga.com/../',
|
||||||
'earthsongsaga.com/') for x in urls]
|
'earthsongsaga.com/') for x in urls]
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
|
||||||
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/(\d+)\.\w+$',
|
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/(\d+)\.\w+$',
|
||||||
IGNORECASE).search(imageUrl)
|
IGNORECASE).search(image_url)
|
||||||
if not imgmatch:
|
if not imgmatch:
|
||||||
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/ch(\d+)cover\.\w+$',
|
imgmatch = compile(r'images/vol(\d+)/ch(\d+)/ch(\d+)cover\.\w+$',
|
||||||
IGNORECASE).search(imageUrl)
|
IGNORECASE).search(image_url)
|
||||||
suffix = "cover"
|
suffix = "cover"
|
||||||
else:
|
else:
|
||||||
suffix = ""
|
suffix = ""
|
||||||
|
|
|
@ -146,9 +146,8 @@ class GoneWithTheBlastwave(_BasicScraper):
|
||||||
r'<img src="images/page/default/latest')
|
r'<img src="images/page/default/latest')
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
return '%02d' % int(compile(r'nro=(\d+)').search(page_url).group(1))
|
||||||
return '%02d' % int(compile(r'nro=(\d+)').search(pageUrl).group(1))
|
|
||||||
|
|
||||||
|
|
||||||
class GrrlPower(_WordPressScraper):
|
class GrrlPower(_WordPressScraper):
|
||||||
|
|
|
@ -26,10 +26,9 @@ class _GoComics(_ParserScraper):
|
||||||
def url(self):
|
def url(self):
|
||||||
return 'http://www.gocomics.com/' + self.path
|
return 'http://www.gocomics.com/' + self.path
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, image_url, page_url):
|
|
||||||
prefix, year, month, day = page_url.rsplit('/', 3)
|
prefix, year, month, day = page_url.rsplit('/', 3)
|
||||||
return "%s_%s%s%s.gif" % (cls.__name__[2:], year, month, day)
|
return "%s_%s%s%s.gif" % (self.__class__.__name__[2:], year, month, day)
|
||||||
|
|
||||||
def getIndexStripUrl(self, index):
|
def getIndexStripUrl(self, index):
|
||||||
return self.url + self.path + '/%s' % index
|
return self.url + self.path + '/%s' % index
|
||||||
|
|
|
@ -51,10 +51,9 @@ class HarkAVagrant(_BasicScraper):
|
||||||
tagre("img", "src", "buttonnext.png"))
|
tagre("img", "src", "buttonnext.png"))
|
||||||
help = 'Index format: number'
|
help = 'Index format: number'
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
filename = image_url.rsplit('/', 1)[1]
|
||||||
filename = imageUrl.rsplit('/', 1)[1]
|
num = page_url.rsplit('=', 1)[1]
|
||||||
num = pageUrl.rsplit('=', 1)[1]
|
|
||||||
return '%s-%s' % (num, filename)
|
return '%s-%s' % (num, filename)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -26,11 +26,10 @@ class Lackadaisy(_BasicScraper):
|
||||||
help = 'Index format: n'
|
help = 'Index format: n'
|
||||||
starter = bounceStarter
|
starter = bounceStarter
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
|
||||||
"""Use comic id for filename."""
|
"""Use comic id for filename."""
|
||||||
num = pageUrl.rsplit('=', 1)[-1]
|
num = page_url.rsplit('=', 1)[-1]
|
||||||
ext = imageUrl.rsplit('.', 1)[-1]
|
ext = image_url.rsplit('.', 1)[-1]
|
||||||
return 'lackadaisy_%s.%s' % (num, ext)
|
return 'lackadaisy_%s.%s' % (num, ext)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -82,9 +82,8 @@ class MarriedToTheSea(_BasicScraper):
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<< Yesterday")
|
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<< Yesterday")
|
||||||
help = 'Index format: mmddyy'
|
help = 'Index format: mmddyy'
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
unused, date, filename = image_url.rsplit('/', 2)
|
||||||
unused, date, filename = imageUrl.rsplit('/', 2)
|
|
||||||
return '%s-%s' % (date, filename)
|
return '%s-%s' % (date, filename)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -38,9 +38,8 @@ class NatalieDee(_BasicScraper):
|
||||||
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<< Yesterday")
|
prevSearch = compile(tagre("a", "href", r'([^"]+)') + "<< Yesterday")
|
||||||
help = 'Index format: mmddyy'
|
help = 'Index format: mmddyy'
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
unused, date, filename = image_url.rsplit('/', 2)
|
||||||
unused, date, filename = imageUrl.rsplit('/', 2)
|
|
||||||
return '%s-%s' % (date, filename)
|
return '%s-%s' % (date, filename)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -74,13 +74,12 @@ class OnTheFastrack(_BasicScraper):
|
||||||
url = 'http://onthefastrack.com/'
|
url = 'http://onthefastrack.com/'
|
||||||
stripUrl = url + 'comics/%s'
|
stripUrl = url + 'comics/%s'
|
||||||
firstStripUrl = stripUrl % 'november-13-2000'
|
firstStripUrl = stripUrl % 'november-13-2000'
|
||||||
imageSearch = compile(r'(http://safr\.kingfeatures\.com/idn/cnfeed/zone/js/content\.php\?file=.+)"')
|
imageSearch = compile(r'(https://safr\.kingfeatures\.com/idn/cnfeed/zone/js/content\.php\?file=.+)"')
|
||||||
prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url)
|
prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url)
|
||||||
help = 'Index format: monthname-dd-yyyy'
|
help = 'Index format: monthname-dd-yyyy'
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
name = page_url.rsplit('/', 3)[2]
|
||||||
name = pageUrl.rsplit('/', 3)[2]
|
|
||||||
if name == "onthefastrack.com":
|
if name == "onthefastrack.com":
|
||||||
import datetime
|
import datetime
|
||||||
name = datetime.date.today().strftime("%B-%d-%Y")
|
name = datetime.date.today().strftime("%B-%d-%Y")
|
||||||
|
|
|
@ -100,9 +100,8 @@ class PennyArcade(_BasicScraper):
|
||||||
prevUrl = "%s/%s/%s" % (dummy, yyyy, mm)
|
prevUrl = "%s/%s/%s" % (dummy, yyyy, mm)
|
||||||
return prevUrl
|
return prevUrl
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
p = page_url.split('/')
|
||||||
p = pageUrl.split('/')
|
|
||||||
return '%04d%02d%02d' % (int(p[4]), int(p[5]), int(p[6]))
|
return '%04d%02d%02d' % (int(p[4]), int(p[5]), int(p[6]))
|
||||||
|
|
||||||
|
|
||||||
|
@ -161,7 +160,7 @@ class PiledHigherAndDeeper(_BasicScraper):
|
||||||
prevSearch = compile(r'<a href=((comics/)?archive\.php\?comicid=\d+)>.*<img [^>]*prev_button\.gif')
|
prevSearch = compile(r'<a href=((comics/)?archive\.php\?comicid=\d+)>.*<img [^>]*prev_button\.gif')
|
||||||
nextSearch = compile(r'<a href=(archive\.php\?comicid=\d+)>.*<img [^>]*next_button\.gif')
|
nextSearch = compile(r'<a href=(archive\.php\?comicid=\d+)>.*<img [^>]*next_button\.gif')
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
namer = queryNamer('comicid', usePageUrl=True)
|
namer = queryNamer('comicid', use_page_url=True)
|
||||||
|
|
||||||
|
|
||||||
class Pimpette(_ParserScraper):
|
class Pimpette(_ParserScraper):
|
||||||
|
|
|
@ -197,10 +197,9 @@ class SexyLosers(_BasicScraper):
|
||||||
help = 'Index format: nnn'
|
help = 'Index format: nnn'
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
index = page_url.split('/')[-1].split('.')[0]
|
||||||
index = pageUrl.split('/')[-1].split('.')[0]
|
title = image_url.split('/')[-1].split('.')[0]
|
||||||
title = imageUrl.split('/')[-1].split('.')[0]
|
|
||||||
return index + '-' + title
|
return index + '-' + title
|
||||||
|
|
||||||
|
|
||||||
|
@ -228,9 +227,8 @@ class ShermansLagoon(_BasicScraper):
|
||||||
prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url)
|
prevSearch = compile(r'id="previouscomic" class="button white"><a href="(%scomics/[a-z0-9-]+/)"' % url)
|
||||||
help = 'Index format: monthname-day-year'
|
help = 'Index format: monthname-day-year'
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
name = page_url.rsplit('/', 3)[2]
|
||||||
name = pageUrl.rsplit('/', 3)[2]
|
|
||||||
if name == "shermanslagoon.com":
|
if name == "shermanslagoon.com":
|
||||||
name = datetime.date.today().strftime("%B-%d-%Y").lower()
|
name = datetime.date.today().strftime("%B-%d-%Y").lower()
|
||||||
# name is monthname-day-year
|
# name is monthname-day-year
|
||||||
|
@ -309,10 +307,9 @@ class SMBC(_ParserScraper):
|
||||||
help = 'Index format: nnnn'
|
help = 'Index format: nnnn'
|
||||||
textSearch = '//img[@id="comic"]/@title'
|
textSearch = '//img[@id="comic"]/@title'
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
|
||||||
"""Remove random noise from name."""
|
"""Remove random noise from name."""
|
||||||
return imageUrl.rsplit('-', 1)[-1]
|
return image_url.rsplit('-', 1)[-1]
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
"""Skip promo or missing update pages."""
|
"""Skip promo or missing update pages."""
|
||||||
|
@ -339,11 +336,10 @@ class SnowFlame(_WordPressScraper):
|
||||||
def getIndexStripUrl(self, index):
|
def getIndexStripUrl(self, index):
|
||||||
return self.stripUrl % tuple(index.split('-'))
|
return self.stripUrl % tuple(index.split('-'))
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
prefix, filename = image_url.rsplit('/', 1)
|
||||||
prefix, filename = imageUrl.rsplit('/', 1)
|
|
||||||
ro = compile(r'snowflame-([^-]+)-([^-]+)')
|
ro = compile(r'snowflame-([^-]+)-([^-]+)')
|
||||||
mo = ro.search(pageUrl)
|
mo = ro.search(page_url)
|
||||||
chapter = mo.group(1)
|
chapter = mo.group(1)
|
||||||
page = mo.group(2)
|
page = mo.group(2)
|
||||||
return "%s-%s-%s" % (chapter, page, filename)
|
return "%s-%s-%s" % (chapter, page, filename)
|
||||||
|
@ -434,15 +430,14 @@ class StarCrossdDestiny(_ParserScraper):
|
||||||
prevSearch = '//a[text()="prev"]'
|
prevSearch = '//a[text()="prev"]'
|
||||||
help = 'Index format: nnnnnnnn'
|
help = 'Index format: nnnnnnnn'
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
if image_url.find('ch1') == -1:
|
||||||
if imageUrl.find('ch1') == -1:
|
|
||||||
# At first all images were stored in a strips/ directory but
|
# At first all images were stored in a strips/ directory but
|
||||||
# that was changed with the introduction of book2
|
# that was changed with the introduction of book2
|
||||||
imageUrl = sub('(?:strips)|(?:images)', 'book1', imageUrl)
|
image_url = sub('(?:strips)|(?:images)', 'book1', image_url)
|
||||||
elif not imageUrl.find('strips') == -1:
|
elif not image_url.find('strips') == -1:
|
||||||
imageUrl = imageUrl.replace('strips/', '')
|
image_url = image_url.replace('strips/', '')
|
||||||
directory, filename = imageUrl.split('/')[-2:]
|
directory, filename = image_url.split('/')[-2:]
|
||||||
filename, extension = splitext(filename)
|
filename, extension = splitext(filename)
|
||||||
return directory + '-' + filename
|
return directory + '-' + filename
|
||||||
|
|
||||||
|
@ -519,12 +514,11 @@ class StuffNoOneToldMe(_BasicScraper):
|
||||||
multipleImagesPerStrip = True
|
multipleImagesPerStrip = True
|
||||||
help = 'Index format: yyyy/mm/stripname'
|
help = 'Index format: yyyy/mm/stripname'
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
|
||||||
"""Use page URL to construct meaningful image name."""
|
"""Use page URL to construct meaningful image name."""
|
||||||
parts, year, month, stripname = pageUrl.rsplit('/', 3)
|
parts, year, month, stripname = page_url.rsplit('/', 3)
|
||||||
stripname = stripname.rsplit('.', 1)[0]
|
stripname = stripname.rsplit('.', 1)[0]
|
||||||
parts, imagename = imageUrl.rsplit('/', 1)
|
parts, imagename = image_url.rsplit('/', 1)
|
||||||
return '%s-%s-%s-%s' % (year, month, stripname, imagename)
|
return '%s-%s-%s-%s' % (year, month, stripname, imagename)
|
||||||
|
|
||||||
def shouldSkipUrl(self, url, data):
|
def shouldSkipUrl(self, url, data):
|
||||||
|
|
|
@ -68,8 +68,7 @@ class _SmackJeeves(_ParserScraper):
|
||||||
else:
|
else:
|
||||||
return self.fetchUrl(prevurl, data, self.nextSearch)
|
return self.fetchUrl(prevurl, data, self.nextSearch)
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, image_url, page_url):
|
|
||||||
parts = page_url.split('/')
|
parts = page_url.split('/')
|
||||||
name = parts[-2]
|
name = parts[-2]
|
||||||
num = parts[-3]
|
num = parts[-3]
|
||||||
|
|
|
@ -20,8 +20,7 @@ class _Snafu(_ParserScraper):
|
||||||
def name(self):
|
def name(self):
|
||||||
return 'SnafuComics/' + super(_Snafu, self).name
|
return 'SnafuComics/' + super(_Snafu, self).name
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, image_url, page_url):
|
|
||||||
year, month, name = image_url.rsplit('/', 3)[1:]
|
year, month, name = image_url.rsplit('/', 3)[1:]
|
||||||
return "%04s_%02s_%s" % (year, month, name)
|
return "%04s_%02s_%s" % (year, month, name)
|
||||||
|
|
||||||
|
|
|
@ -85,9 +85,8 @@ class TheOrderOfTheStick(_BasicScraper):
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
starter = indirectStarter
|
starter = indirectStarter
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
return page_url.rsplit('/', 1)[-1][:-5]
|
||||||
return pageUrl.rsplit('/', 1)[-1][:-5]
|
|
||||||
|
|
||||||
|
|
||||||
class TheParkingLotIsFull(_BasicScraper):
|
class TheParkingLotIsFull(_BasicScraper):
|
||||||
|
@ -123,11 +122,10 @@ class TheThinHLine(_BasicScraper):
|
||||||
pageData = self.getPage(pageUrl)
|
pageData = self.getPage(pageUrl)
|
||||||
return super(TheThinHLine, self).getComicStrip(pageUrl, pageData)
|
return super(TheThinHLine, self).getComicStrip(pageUrl, pageData)
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
|
||||||
"""Use page URL sequence which is apparently increasing."""
|
"""Use page URL sequence which is apparently increasing."""
|
||||||
num = pageUrl.split('/')[-1]
|
num = page_url.split('/')[-1]
|
||||||
ext = imageUrl.rsplit('.', 1)[1]
|
ext = image_url.rsplit('.', 1)[1]
|
||||||
return "thethinhline-%s.%s" % (num, ext)
|
return "thethinhline-%s.%s" % (num, ext)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -60,6 +60,5 @@ class ViiviJaWagner(_BasicScraper):
|
||||||
help = 'Index format: none'
|
help = 'Index format: none'
|
||||||
lang = 'fi'
|
lang = 'fi'
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
return image_url.split('=')[1]
|
||||||
return imageUrl.split('=')[1]
|
|
||||||
|
|
|
@ -64,10 +64,9 @@ class WebDesignerCOTW(_BasicScraper):
|
||||||
"""Skip non-comic URLs."""
|
"""Skip non-comic URLs."""
|
||||||
return 'comics-of-the-week' not in url
|
return 'comics-of-the-week' not in url
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
imagename = image_url.rsplit('/', 1)[1]
|
||||||
imagename = imageUrl.rsplit('/', 1)[1]
|
week = compile(r'week-(\d+)').search(page_url).group(1)
|
||||||
week = compile(r'week-(\d+)').search(pageUrl).group(1)
|
|
||||||
return "%s-%s" % (week, imagename)
|
return "%s-%s" % (week, imagename)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -20,8 +20,7 @@ class _WLPComics(_ParserScraper):
|
||||||
def name(self):
|
def name(self):
|
||||||
return 'WLP/' + super(_WLPComics, self).name
|
return 'WLP/' + super(_WLPComics, self).name
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, image_url, page_url):
|
|
||||||
return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' +
|
return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' +
|
||||||
image_url.rsplit('/', 1)[-1])
|
image_url.rsplit('/', 1)[-1])
|
||||||
|
|
||||||
|
|
|
@ -26,8 +26,7 @@ class Xkcd(_BasicScraper):
|
||||||
textSearch = compile(tagre("img", "title", r'([^"]+)',
|
textSearch = compile(tagre("img", "title", r'([^"]+)',
|
||||||
before=r'//imgs\.xkcd\.com/comics/'))
|
before=r'//imgs\.xkcd\.com/comics/'))
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, image_url, page_url):
|
|
||||||
index = int(page_url.rstrip('/').rsplit('/', 1)[-1])
|
index = int(page_url.rstrip('/').rsplit('/', 1)[-1])
|
||||||
name = image_url.rsplit('/', 1)[-1].split('.')[0]
|
name = image_url.rsplit('/', 1)[-1].split('.')[0]
|
||||||
return '%03d-%s' % (index, name)
|
return '%03d-%s' % (index, name)
|
||||||
|
|
|
@ -34,9 +34,8 @@ class Zapiro(_BasicScraper):
|
||||||
r'(http://mg\.co\.za/cartoon/[^"]+)'))
|
r'(http://mg\.co\.za/cartoon/[^"]+)'))
|
||||||
help = 'Index format: yyyy-mm-dd-stripname'
|
help = 'Index format: yyyy-mm-dd-stripname'
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
name = image_url.split('/')[-3]
|
||||||
name = imageUrl.split('/')[-3]
|
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
@ -84,7 +83,6 @@ class Zwarwald(_BasicScraper):
|
||||||
self.stripUrl % '495',
|
self.stripUrl % '495',
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
prefix, year, month, name = image_url.rsplit('/', 3)
|
||||||
prefix, year, month, name = imageUrl.rsplit('/', 3)
|
|
||||||
return "%s_%s_%s" % (year, month, name)
|
return "%s_%s_%s" % (year, month, name)
|
||||||
|
|
|
@ -237,8 +237,7 @@ class Scraper(object):
|
||||||
"""Get starter URL from where to scrape comic strips."""
|
"""Get starter URL from where to scrape comic strips."""
|
||||||
return self.url
|
return self.url
|
||||||
|
|
||||||
@classmethod
|
def namer(self, image_url, page_url):
|
||||||
def namer(cls, imageUrl, pageUrl):
|
|
||||||
"""Return filename for given image and page URL."""
|
"""Return filename for given image and page URL."""
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue