From df46907f39309ab747a97069c3c96562c2b19fab Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Tue, 19 Apr 2016 23:48:14 +0200 Subject: [PATCH] Register EXSLT extensions by default. This allows comic module authors to use the full power of regular expressions in XPath expression, see http://exslt.org/regexp/regexp.html for usage. Please be aware that these use the prefix re: instead of regexp: here. --- dosagelib/scraper.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index 79f113614..438f42d66 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -1,7 +1,9 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2014-2016 Tobias Gruetzmacher +# Copyright (C) 2015-2016 Tobias Gruetzmacher + +from __future__ import absolute_import, division, print_function import time import random @@ -428,6 +430,10 @@ class _ParserScraper(Scraper): XML_DECL = re.compile( r'^(<\?xml[^>]+)\s+encoding\s*=\s*["\'][^"\']*["\'](\s*\?>|)', re.U) + NS = { + "re": "http://exslt.org/regular-expressions" + } + # Switch between CSS and XPath selectors for this class. Since CSS needs # another Python module, XPath is the default for now. css = False @@ -455,7 +461,8 @@ class _ParserScraper(Scraper): if cls.css: searchFun = data.cssselect else: - searchFun = data.xpath + def searchFun(s): + return data.xpath(s, namespaces=cls.NS) searches = makeSequence(urlSearch) for search in searches: for match in searchFun(search):