xkcd now hone with xpaths

2016-08-18 21:28:25 +12:00 · 2016-08-18 21:28:25 +12:00 · c04c62e92b
commit c04c62e92b
parent 9ba184eb43
1 changed files with 6 additions and 12 deletions
--- a/dosagelib/plugins/x.py
+++ b/dosagelib/plugins/x.py
@ -5,26 +5,20 @@

 from __future__ import absolute_import, division, print_function

-from re import compile
-
-from ..scraper import _BasicScraper
+from ..scraper import _ParserScraper
 from ..helpers import bounceStarter
-from ..util import tagre

-
-class Xkcd(_BasicScraper):
+class Xkcd(_ParserScraper):
    name = 'xkcd'
    url = 'http://xkcd.com/'
    starter = bounceStarter
    stripUrl = url + '%s/'
    firstStripUrl = stripUrl % '1'
-    imageSearch = compile(tagre("img", "src",
-                                r'(//imgs\.xkcd\.com/comics/[^"]+)'))
-    prevSearch = compile(tagre("a", "href", r'(/\d+/)', before="prev"))
-    nextSearch = compile(tagre("a", "href", r'(/\d+/)', before="next"))
+    imageSearch = '//div[@id="comic"]/img'
+    prevSearch = '//a[@rel="prev"]'
+    nextSearch = '//a[@rel="next"]'
    help = 'Index format: n (unpadded)'
-    textSearch = compile(tagre("img", "title", r'([^"]+)',
-                               before=r'//imgs\.xkcd\.com/comics/'))
+    textSearch = '//div[@id="comic"]/img/@title'

    def namer(self, image_url, page_url):
        index = int(page_url.rstrip('/').rsplit('/', 1)[-1])