From af2e57d85042b314b6b66dd062b560244f3ad5d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Damjan=20Ko=C5=A1ir?= Date: Fri, 6 Nov 2015 23:47:24 +1300 Subject: [PATCH] Added comic ScurryAndCover... - Yay, funky JavaScript parsing! - Start page isn't latest comic... Updated-by: Tobias Gruetzmacher --- dosagelib/plugins/s.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/dosagelib/plugins/s.py b/dosagelib/plugins/s.py index 7e7c422a2..3b884dffe 100644 --- a/dosagelib/plugins/s.py +++ b/dosagelib/plugins/s.py @@ -144,6 +144,40 @@ class Science(_BasicScraper): help = 'Index format: stripname' +class ScurryAndCover(_ParserScraper): + url = 'http://scurry.ink' + prevSearch = '//div[@id="prevpage"]/..' + nextSearch = '//div[@id="nextpage"]/..' + imageSearch = 'MARKER' + + @classmethod + def fetchUrls(cls, url, data, urlSearch): + if urlSearch != cls.imageSearch: + return super(ScurryAndCover, cls).fetchUrls(url, data, urlSearch) + + # get javascript element and parse a variable value + scripts = data.xpath('//body/script[@type="text/javascript"]') + + regex = compile("var fileRoot = '([^']+)") + for script in scripts: + images = regex.findall(script.text) + if len(images) > 0: + image = images[0] + return [cls.url + '/images/pages/' + image + '-xsmall.png'] + + @classmethod + def starter(cls): + """Go forward as far as possibe, then start.""" + url = cls.url + while True: + data = cls.getPage(url) + try: + url = cls.fetchUrl(url, data, cls.nextSearch) + except ValueError: + break + return url + + class SequentialArt(_BasicScraper): url = 'http://www.collectedcurios.com/sequentialart.php' stripUrl = url + '?s=%s'