xkcd now hone with xpaths

2016-08-18 21:28:25 +12:00 · 2016-08-18 21:28:25 +12:00 · c04c62e92b
commit c04c62e92b
parent 9ba184eb43
1 changed files with 6 additions and 12 deletions
--- a/dosagelib/plugins/x.py
+++ b/dosagelib/plugins/x.py
@ -5,26 +5,20 @@
 from __future__ import absolute_import, division, print_function
-from re import compile
+from ..scraper import _ParserScraper
 from ..scraper import _BasicScraper
 from ..helpers import bounceStarter
 from ..util import tagre
-
+class Xkcd(_ParserScraper):
 class Xkcd(_BasicScraper):
    name = 'xkcd'
    url = 'http://xkcd.com/'
    starter = bounceStarter
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '1'
-    imageSearch = compile(tagre("img", "src",
+    imageSearch = '//div[@id="comic"]/img'
-                                r'(//imgs\.xkcd\.com/comics/[^"]+)'))
+    prevSearch = '//a[@rel="prev"]'
-    prevSearch = compile(tagre("a", "href", r'(/\d+/)', before="prev"))
+    nextSearch = '//a[@rel="next"]'
    nextSearch = compile(tagre("a", "href", r'(/\d+/)', before="next"))
    help = 'Index format: n (unpadded)'
-    textSearch = compile(tagre("img", "title", r'([^"]+)',
+    textSearch = '//div[@id="comic"]/img/@title'
                               before=r'//imgs\.xkcd\.com/comics/'))
    def namer(self, image_url, page_url):
        index = int(page_url.rstrip('/').rsplit('/', 1)[-1])