xkcd now hone with xpaths

This commit is contained in:
Damjan Košir 2016-08-18 21:28:25 +12:00
parent 9ba184eb43
commit c04c62e92b

View file

@ -5,26 +5,20 @@
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
from re import compile from ..scraper import _ParserScraper
from ..scraper import _BasicScraper
from ..helpers import bounceStarter from ..helpers import bounceStarter
from ..util import tagre
class Xkcd(_ParserScraper):
class Xkcd(_BasicScraper):
name = 'xkcd' name = 'xkcd'
url = 'http://xkcd.com/' url = 'http://xkcd.com/'
starter = bounceStarter starter = bounceStarter
stripUrl = url + '%s/' stripUrl = url + '%s/'
firstStripUrl = stripUrl % '1' firstStripUrl = stripUrl % '1'
imageSearch = compile(tagre("img", "src", imageSearch = '//div[@id="comic"]/img'
r'(//imgs\.xkcd\.com/comics/[^"]+)')) prevSearch = '//a[@rel="prev"]'
prevSearch = compile(tagre("a", "href", r'(/\d+/)', before="prev")) nextSearch = '//a[@rel="next"]'
nextSearch = compile(tagre("a", "href", r'(/\d+/)', before="next"))
help = 'Index format: n (unpadded)' help = 'Index format: n (unpadded)'
textSearch = compile(tagre("img", "title", r'([^"]+)', textSearch = '//div[@id="comic"]/img/@title'
before=r'//imgs\.xkcd\.com/comics/'))
def namer(self, image_url, page_url): def namer(self, image_url, page_url):
index = int(page_url.rstrip('/').rsplit('/', 1)[-1]) index = int(page_url.rstrip('/').rsplit('/', 1)[-1])