Stripping should be done in normaliseUrl.

This commit is contained in:
Bastian Kleineidam 2014-06-08 10:12:33 +02:00
parent c528fd1822
commit 687d27d534
2 changed files with 6 additions and 4 deletions

View file

@ -42,7 +42,7 @@ class ComicImage(object):
"""Set URL and filename.""" """Set URL and filename."""
self.name = name self.name = name
self.referrer = referrer self.referrer = referrer
self.url = url.strip() self.url = url
self.dirname = dirname self.dirname = dirname
filename = getFilename(filename) filename = getFilename(filename)
self.filename, self.ext = os.path.splitext(filename) self.filename, self.ext = os.path.splitext(filename)

View file

@ -270,10 +270,12 @@ def unescape(text):
_nopathquote_chars = "-;/=,~*+()@!" _nopathquote_chars = "-;/=,~*+()@!"
def normaliseURL(url): def normaliseURL(url):
"""Removes any leading empty segments to avoid breaking urllib2; also replaces """Normalising
HTML entities and character references. - strips and leading or trailing whitespace,
- replaces HTML entities and character references,
- removes any leading empty segments to avoid breaking urllib2.
""" """
url = unicode_safe(url) url = unicode_safe(url).strip()
# XXX: brutal hack # XXX: brutal hack
url = unescape(url) url = unescape(url)