Some Python3 fixes.
This commit is contained in:
parent
2c0ca04882
commit
0054ebfe0b
2 changed files with 29 additions and 16 deletions
|
@ -2,7 +2,10 @@
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import urllib
|
try:
|
||||||
|
import urllib.parse.quote as url_quote
|
||||||
|
except ImportError:
|
||||||
|
from urllib import quote as url_quote
|
||||||
import codecs
|
import codecs
|
||||||
import json
|
import json
|
||||||
from . import rss, util, configuration
|
from . import rss, util, configuration
|
||||||
|
@ -22,13 +25,13 @@ class EventHandler(object):
|
||||||
This is used as a halfway sane default when the base URL is not
|
This is used as a halfway sane default when the base URL is not
|
||||||
provided; not perfect, but should work in most cases.'''
|
provided; not perfect, but should work in most cases.'''
|
||||||
components = util.splitpath(os.path.abspath(self.basepath))
|
components = util.splitpath(os.path.abspath(self.basepath))
|
||||||
url = '/'.join([urllib.quote(component, '') for component in components])
|
url = '/'.join([url_quote(component, '') for component in components])
|
||||||
return 'file:///' + url + '/'
|
return 'file:///' + url + '/'
|
||||||
|
|
||||||
def getUrlFromFilename(self, filename):
|
def getUrlFromFilename(self, filename):
|
||||||
"""Construct URL from filename."""
|
"""Construct URL from filename."""
|
||||||
components = util.splitpath(util.getRelativePath(self.basepath, filename))
|
components = util.splitpath(util.getRelativePath(self.basepath, filename))
|
||||||
url = '/'.join([urllib.quote(component, '') for component in components])
|
url = '/'.join([url_quote(component, '') for component in components])
|
||||||
return self.baseurl + url
|
return self.baseurl + url
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
|
|
|
@ -2,9 +2,18 @@
|
||||||
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
|
||||||
# Copyright (C) 2012-2013 Bastian Kleineidam
|
# Copyright (C) 2012-2013 Bastian Kleineidam
|
||||||
from __future__ import division, print_function
|
from __future__ import division, print_function
|
||||||
|
try:
|
||||||
import urllib, urlparse
|
from urllib.parse import quote as url_quote, unquote as url_unquote
|
||||||
import robotparser
|
except ImportError:
|
||||||
|
from urllib import quote as url_quote, unquote as url_unquote
|
||||||
|
try:
|
||||||
|
from urllib.parse import urlparse, urlunparse, urljoin, urlsplit
|
||||||
|
except ImportError:
|
||||||
|
from urlparse import urlparse, urlunparse, urljoin, urlsplit
|
||||||
|
try:
|
||||||
|
from urllib import robotparser
|
||||||
|
except ImportError:
|
||||||
|
import robotparser
|
||||||
import requests
|
import requests
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
@ -12,9 +21,10 @@ import cgi
|
||||||
import re
|
import re
|
||||||
import traceback
|
import traceback
|
||||||
import time
|
import time
|
||||||
import types
|
try:
|
||||||
from htmlentitydefs import name2codepoint
|
from html.entities import name2codepoint
|
||||||
|
except ImportError:
|
||||||
|
from htmlentitydefs import name2codepoint
|
||||||
from .decorators import memoized
|
from .decorators import memoized
|
||||||
from .output import out
|
from .output import out
|
||||||
from .configuration import UserAgent, AppName, App, SupportUrl
|
from .configuration import UserAgent, AppName, App, SupportUrl
|
||||||
|
@ -205,7 +215,7 @@ def normaliseURL(url):
|
||||||
# XXX: brutal hack
|
# XXX: brutal hack
|
||||||
url = unescape(url)
|
url = unescape(url)
|
||||||
|
|
||||||
pu = list(urlparse.urlparse(url))
|
pu = list(urlparse(url))
|
||||||
segments = pu[2].split('/')
|
segments = pu[2].split('/')
|
||||||
while segments and segments[0] in ('', '..'):
|
while segments and segments[0] in ('', '..'):
|
||||||
del segments[0]
|
del segments[0]
|
||||||
|
@ -215,13 +225,13 @@ def normaliseURL(url):
|
||||||
pu[4] = pu[4][1:]
|
pu[4] = pu[4][1:]
|
||||||
# remove anchor
|
# remove anchor
|
||||||
pu[5] = ""
|
pu[5] = ""
|
||||||
return urlparse.urlunparse(pu)
|
return urlunparse(pu)
|
||||||
|
|
||||||
|
|
||||||
def get_roboturl(url):
|
def get_roboturl(url):
|
||||||
"""Get robots.txt URL from given URL."""
|
"""Get robots.txt URL from given URL."""
|
||||||
pu = urlparse.urlparse(url)
|
pu = urlparse(url)
|
||||||
return urlparse.urlunparse((pu[0], pu[1], "/robots.txt", "", "", ""))
|
return urlunparse((pu[0], pu[1], "/robots.txt", "", "", ""))
|
||||||
|
|
||||||
|
|
||||||
def check_robotstxt(url, session):
|
def check_robotstxt(url, session):
|
||||||
|
@ -323,7 +333,7 @@ def getRelativePath(basepath, path):
|
||||||
|
|
||||||
def getQueryParams(url):
|
def getQueryParams(url):
|
||||||
"""Get URL query parameters."""
|
"""Get URL query parameters."""
|
||||||
query = urlparse.urlsplit(url)[3]
|
query = urlsplit(url)[3]
|
||||||
out.debug('Extracting query parameters from %r (%r)...' % (url, query))
|
out.debug('Extracting query parameters from %r (%r)...' % (url, query))
|
||||||
return cgi.parse_qs(query)
|
return cgi.parse_qs(query)
|
||||||
|
|
||||||
|
@ -413,7 +423,7 @@ def asciify(name):
|
||||||
def unquote(text):
|
def unquote(text):
|
||||||
"""Replace all percent-encoded entities in text."""
|
"""Replace all percent-encoded entities in text."""
|
||||||
while '%' in text:
|
while '%' in text:
|
||||||
newtext = urllib.unquote(text)
|
newtext = url_unquote(text)
|
||||||
if newtext == text:
|
if newtext == text:
|
||||||
break
|
break
|
||||||
text = newtext
|
text = newtext
|
||||||
|
@ -422,7 +432,7 @@ def unquote(text):
|
||||||
|
|
||||||
def quote(text, safechars='/'):
|
def quote(text, safechars='/'):
|
||||||
"""Percent-encode given text."""
|
"""Percent-encode given text."""
|
||||||
return urllib.quote(text, safechars)
|
return url_quote(text, safechars)
|
||||||
|
|
||||||
|
|
||||||
def strsize (b):
|
def strsize (b):
|
||||||
|
|
Loading…
Reference in a new issue