Register EXSLT extensions by default.

This allows comic module authors to use the full power of regular expressions in XPath expression, see http://exslt.org/regexp/regexp.html for usage. Please be aware that these use the prefix re: instead of regexp: here.
2016-04-19 23:48:14 +02:00 · 2016-04-19 23:48:14 +02:00 · df46907f39
commit df46907f39
parent 4204f5f1e4
1 changed files with 9 additions and 2 deletions
--- a/dosagelib/scraper.py
+++ b/dosagelib/scraper.py
@ -1,7 +1,9 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
 # Copyright (C) 2012-2014 Bastian Kleineidam
-# Copyright (C) 2014-2016 Tobias Gruetzmacher
+# Copyright (C) 2015-2016 Tobias Gruetzmacher
+
+from __future__ import absolute_import, division, print_function

 import time
 import random
@ -428,6 +430,10 @@ class _ParserScraper(Scraper):
    XML_DECL = re.compile(
        r'^(<\?xml[^>]+)\s+encoding\s*=\s*["\'][^"\']*["\'](\s*\?>|)', re.U)

+    NS = {
+        "re": "http://exslt.org/regular-expressions"
+    }
+
    # Switch between CSS and XPath selectors for this class. Since CSS needs
    # another Python module, XPath is the default for now.
    css = False
@ -455,7 +461,8 @@ class _ParserScraper(Scraper):
        if cls.css:
            searchFun = data.cssselect
        else:
-            searchFun = data.xpath
+            def searchFun(s):
+                return data.xpath(s, namespaces=cls.NS)
        searches = makeSequence(urlSearch)
        for search in searches:
            for match in searchFun(search):