Register EXSLT extensions by default.

This allows comic module authors to use the full power of regular
expressions in XPath expression, see http://exslt.org/regexp/regexp.html
for usage. Please be aware that these use the prefix re: instead of
regexp: here.
This commit is contained in:
Tobias Gruetzmacher 2016-04-19 23:48:14 +02:00
parent 4204f5f1e4
commit df46907f39

View file

@ -1,7 +1,9 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# Copyright (C) 2014-2016 Tobias Gruetzmacher
# Copyright (C) 2015-2016 Tobias Gruetzmacher
from __future__ import absolute_import, division, print_function
import time
import random
@ -428,6 +430,10 @@ class _ParserScraper(Scraper):
XML_DECL = re.compile(
r'^(<\?xml[^>]+)\s+encoding\s*=\s*["\'][^"\']*["\'](\s*\?>|)', re.U)
NS = {
"re": "http://exslt.org/regular-expressions"
}
# Switch between CSS and XPath selectors for this class. Since CSS needs
# another Python module, XPath is the default for now.
css = False
@ -455,7 +461,8 @@ class _ParserScraper(Scraper):
if cls.css:
searchFun = data.cssselect
else:
searchFun = data.xpath
def searchFun(s):
return data.xpath(s, namespaces=cls.NS)
searches = makeSequence(urlSearch)
for search in searches:
for match in searchFun(search):