Added comic ScurryAndCover...
- Yay, funky JavaScript parsing! - Start page isn't latest comic... Updated-by: Tobias Gruetzmacher <tobias-git@23.gs>
This commit is contained in:
parent
fa98f6ddbf
commit
af2e57d850
1 changed files with 34 additions and 0 deletions
|
@ -144,6 +144,40 @@ class Science(_BasicScraper):
|
|||
help = 'Index format: stripname'
|
||||
|
||||
|
||||
class ScurryAndCover(_ParserScraper):
|
||||
url = 'http://scurry.ink'
|
||||
prevSearch = '//div[@id="prevpage"]/..'
|
||||
nextSearch = '//div[@id="nextpage"]/..'
|
||||
imageSearch = 'MARKER'
|
||||
|
||||
@classmethod
|
||||
def fetchUrls(cls, url, data, urlSearch):
|
||||
if urlSearch != cls.imageSearch:
|
||||
return super(ScurryAndCover, cls).fetchUrls(url, data, urlSearch)
|
||||
|
||||
# get javascript element and parse a variable value
|
||||
scripts = data.xpath('//body/script[@type="text/javascript"]')
|
||||
|
||||
regex = compile("var fileRoot = '([^']+)")
|
||||
for script in scripts:
|
||||
images = regex.findall(script.text)
|
||||
if len(images) > 0:
|
||||
image = images[0]
|
||||
return [cls.url + '/images/pages/' + image + '-xsmall.png']
|
||||
|
||||
@classmethod
|
||||
def starter(cls):
|
||||
"""Go forward as far as possibe, then start."""
|
||||
url = cls.url
|
||||
while True:
|
||||
data = cls.getPage(url)
|
||||
try:
|
||||
url = cls.fetchUrl(url, data, cls.nextSearch)
|
||||
except ValueError:
|
||||
break
|
||||
return url
|
||||
|
||||
|
||||
class SequentialArt(_BasicScraper):
|
||||
url = 'http://www.collectedcurios.com/sequentialart.php'
|
||||
stripUrl = url + '?s=%s'
|
||||
|
|
Loading…
Reference in a new issue