xkcd now hone with xpaths
This commit is contained in:
parent
9ba184eb43
commit
c04c62e92b
1 changed files with 6 additions and 12 deletions
|
@ -5,26 +5,20 @@
|
||||||
|
|
||||||
from __future__ import absolute_import, division, print_function
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
from re import compile
|
from ..scraper import _ParserScraper
|
||||||
|
|
||||||
from ..scraper import _BasicScraper
|
|
||||||
from ..helpers import bounceStarter
|
from ..helpers import bounceStarter
|
||||||
from ..util import tagre
|
|
||||||
|
|
||||||
|
class Xkcd(_ParserScraper):
|
||||||
class Xkcd(_BasicScraper):
|
|
||||||
name = 'xkcd'
|
name = 'xkcd'
|
||||||
url = 'http://xkcd.com/'
|
url = 'http://xkcd.com/'
|
||||||
starter = bounceStarter
|
starter = bounceStarter
|
||||||
stripUrl = url + '%s/'
|
stripUrl = url + '%s/'
|
||||||
firstStripUrl = stripUrl % '1'
|
firstStripUrl = stripUrl % '1'
|
||||||
imageSearch = compile(tagre("img", "src",
|
imageSearch = '//div[@id="comic"]/img'
|
||||||
r'(//imgs\.xkcd\.com/comics/[^"]+)'))
|
prevSearch = '//a[@rel="prev"]'
|
||||||
prevSearch = compile(tagre("a", "href", r'(/\d+/)', before="prev"))
|
nextSearch = '//a[@rel="next"]'
|
||||||
nextSearch = compile(tagre("a", "href", r'(/\d+/)', before="next"))
|
|
||||||
help = 'Index format: n (unpadded)'
|
help = 'Index format: n (unpadded)'
|
||||||
textSearch = compile(tagre("img", "title", r'([^"]+)',
|
textSearch = '//div[@id="comic"]/img/@title'
|
||||||
before=r'//imgs\.xkcd\.com/comics/'))
|
|
||||||
|
|
||||||
def namer(self, image_url, page_url):
|
def namer(self, image_url, page_url):
|
||||||
index = int(page_url.rstrip('/').rsplit('/', 1)[-1])
|
index = int(page_url.rstrip('/').rsplit('/', 1)[-1])
|
||||||
|
|
Loading…
Reference in a new issue