A Python3 fix.

This commit is contained in:
Bastian Kleineidam 2013-04-05 18:57:44 +02:00
parent 97522bc5ae
commit 0fbc005377

View file

@ -59,6 +59,17 @@ if hasattr(requests, 'adapters'):
requests.adapters.DEFAULT_RETRIES = MaxRetries requests.adapters.DEFAULT_RETRIES = MaxRetries
def unicode_safe(text, encoding=UrlEncoding, errors='ignore'):
"""Decode text to Unicode if not already done."""
try:
text_type = unicode
except NameError:
text_type = str
if isinstance(text, text_type):
return text
return text.decode(encoding, errors)
def tagre(tag, attribute, value, quote='"', before="", after=""): def tagre(tag, attribute, value, quote='"', before="", after=""):
"""Return a regular expression matching the given HTML tag, attribute """Return a regular expression matching the given HTML tag, attribute
and value. It matches the tag and attribute names case insensitive, and value. It matches the tag and attribute names case insensitive,
@ -192,9 +203,7 @@ def normaliseURL(url):
"""Removes any leading empty segments to avoid breaking urllib2; also replaces """Removes any leading empty segments to avoid breaking urllib2; also replaces
HTML entities and character references. HTML entities and character references.
""" """
# XXX does not work for python3 url = unicode_safe(url)
if isinstance(url, unicode):
url = url.encode(UrlEncoding, 'ignore')
# XXX: brutal hack # XXX: brutal hack
url = unescape(url) url = unescape(url)