Register EXSLT extensions by default.
This allows comic module authors to use the full power of regular expressions in XPath expression, see http://exslt.org/regexp/regexp.html for usage. Please be aware that these use the prefix re: instead of regexp: here.
This commit is contained in:
parent
4204f5f1e4
commit
df46907f39
1 changed files with 9 additions and 2 deletions
|
@ -1,7 +1,9 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2014 Bastian Kleineidam
|
# Copyright (C) 2012-2014 Bastian Kleineidam
|
||||||
# Copyright (C) 2014-2016 Tobias Gruetzmacher
|
# Copyright (C) 2015-2016 Tobias Gruetzmacher
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
import time
|
import time
|
||||||
import random
|
import random
|
||||||
|
@ -428,6 +430,10 @@ class _ParserScraper(Scraper):
|
||||||
XML_DECL = re.compile(
|
XML_DECL = re.compile(
|
||||||
r'^(<\?xml[^>]+)\s+encoding\s*=\s*["\'][^"\']*["\'](\s*\?>|)', re.U)
|
r'^(<\?xml[^>]+)\s+encoding\s*=\s*["\'][^"\']*["\'](\s*\?>|)', re.U)
|
||||||
|
|
||||||
|
NS = {
|
||||||
|
"re": "http://exslt.org/regular-expressions"
|
||||||
|
}
|
||||||
|
|
||||||
# Switch between CSS and XPath selectors for this class. Since CSS needs
|
# Switch between CSS and XPath selectors for this class. Since CSS needs
|
||||||
# another Python module, XPath is the default for now.
|
# another Python module, XPath is the default for now.
|
||||||
css = False
|
css = False
|
||||||
|
@ -455,7 +461,8 @@ class _ParserScraper(Scraper):
|
||||||
if cls.css:
|
if cls.css:
|
||||||
searchFun = data.cssselect
|
searchFun = data.cssselect
|
||||||
else:
|
else:
|
||||||
searchFun = data.xpath
|
def searchFun(s):
|
||||||
|
return data.xpath(s, namespaces=cls.NS)
|
||||||
searches = makeSequence(urlSearch)
|
searches = makeSequence(urlSearch)
|
||||||
for search in searches:
|
for search in searches:
|
||||||
for match in searchFun(search):
|
for match in searchFun(search):
|
||||||
|
|
Loading…
Reference in a new issue