Added comic ScurryAndCover...

- Yay, funky JavaScript parsing! - Start page isn't latest comic... Updated-by: Tobias Gruetzmacher <tobias-git@23.gs>
2015-11-06 23:47:24 +13:00 · 2015-11-06 23:47:24 +13:00 · af2e57d850
commit af2e57d850
parent fa98f6ddbf
1 changed files with 34 additions and 0 deletions
--- a/dosagelib/plugins/s.py
+++ b/dosagelib/plugins/s.py
@ -144,6 +144,40 @@ class Science(_BasicScraper):
    help = 'Index format: stripname'


+class ScurryAndCover(_ParserScraper):
+    url = 'http://scurry.ink'
+    prevSearch = '//div[@id="prevpage"]/..'
+    nextSearch = '//div[@id="nextpage"]/..'
+    imageSearch = 'MARKER'
+
+    @classmethod
+    def fetchUrls(cls, url, data, urlSearch):
+        if urlSearch != cls.imageSearch:
+            return super(ScurryAndCover, cls).fetchUrls(url, data, urlSearch)
+
+        # get javascript element and parse a variable value
+        scripts = data.xpath('//body/script[@type="text/javascript"]')
+
+        regex = compile("var fileRoot = '([^']+)")
+        for script in scripts:
+            images = regex.findall(script.text)
+            if len(images) > 0:
+                image = images[0]
+                return [cls.url + '/images/pages/' + image + '-xsmall.png']
+
+    @classmethod
+    def starter(cls):
+        """Go forward as far as possibe, then start."""
+        url = cls.url
+        while True:
+            data = cls.getPage(url)
+            try:
+                url = cls.fetchUrl(url, data, cls.nextSearch)
+            except ValueError:
+                break
+        return url
+
+
 class SequentialArt(_BasicScraper):
    url = 'http://www.collectedcurios.com/sequentialart.php'
    stripUrl = url + '?s=%s'