Added comic ScurryAndCover...

- Yay, funky JavaScript parsing!
- Start page isn't latest comic...

Updated-by: Tobias Gruetzmacher <tobias-git@23.gs>
This commit is contained in:
Damjan Košir 2015-11-06 23:47:24 +13:00 committed by Tobias Gruetzmacher
parent fa98f6ddbf
commit af2e57d850

View file

@ -144,6 +144,40 @@ class Science(_BasicScraper):
help = 'Index format: stripname'
class ScurryAndCover(_ParserScraper):
url = 'http://scurry.ink'
prevSearch = '//div[@id="prevpage"]/..'
nextSearch = '//div[@id="nextpage"]/..'
imageSearch = 'MARKER'
@classmethod
def fetchUrls(cls, url, data, urlSearch):
if urlSearch != cls.imageSearch:
return super(ScurryAndCover, cls).fetchUrls(url, data, urlSearch)
# get javascript element and parse a variable value
scripts = data.xpath('//body/script[@type="text/javascript"]')
regex = compile("var fileRoot = '([^']+)")
for script in scripts:
images = regex.findall(script.text)
if len(images) > 0:
image = images[0]
return [cls.url + '/images/pages/' + image + '-xsmall.png']
@classmethod
def starter(cls):
"""Go forward as far as possibe, then start."""
url = cls.url
while True:
data = cls.getPage(url)
try:
url = cls.fetchUrl(url, data, cls.nextSearch)
except ValueError:
break
return url
class SequentialArt(_BasicScraper):
url = 'http://www.collectedcurios.com/sequentialart.php'
stripUrl = url + '?s=%s'