A Python3 fix.
This commit is contained in:
parent
97522bc5ae
commit
0fbc005377
1 changed files with 12 additions and 3 deletions
|
@ -59,6 +59,17 @@ if hasattr(requests, 'adapters'):
|
||||||
requests.adapters.DEFAULT_RETRIES = MaxRetries
|
requests.adapters.DEFAULT_RETRIES = MaxRetries
|
||||||
|
|
||||||
|
|
||||||
|
def unicode_safe(text, encoding=UrlEncoding, errors='ignore'):
|
||||||
|
"""Decode text to Unicode if not already done."""
|
||||||
|
try:
|
||||||
|
text_type = unicode
|
||||||
|
except NameError:
|
||||||
|
text_type = str
|
||||||
|
if isinstance(text, text_type):
|
||||||
|
return text
|
||||||
|
return text.decode(encoding, errors)
|
||||||
|
|
||||||
|
|
||||||
def tagre(tag, attribute, value, quote='"', before="", after=""):
|
def tagre(tag, attribute, value, quote='"', before="", after=""):
|
||||||
"""Return a regular expression matching the given HTML tag, attribute
|
"""Return a regular expression matching the given HTML tag, attribute
|
||||||
and value. It matches the tag and attribute names case insensitive,
|
and value. It matches the tag and attribute names case insensitive,
|
||||||
|
@ -192,9 +203,7 @@ def normaliseURL(url):
|
||||||
"""Removes any leading empty segments to avoid breaking urllib2; also replaces
|
"""Removes any leading empty segments to avoid breaking urllib2; also replaces
|
||||||
HTML entities and character references.
|
HTML entities and character references.
|
||||||
"""
|
"""
|
||||||
# XXX does not work for python3
|
url = unicode_safe(url)
|
||||||
if isinstance(url, unicode):
|
|
||||||
url = url.encode(UrlEncoding, 'ignore')
|
|
||||||
# XXX: brutal hack
|
# XXX: brutal hack
|
||||||
url = unescape(url)
|
url = unescape(url)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue