Some Python3 fixes.

This commit is contained in:
Bastian Kleineidam 2013-04-03 20:32:43 +02:00
parent 2c0ca04882
commit 0054ebfe0b
2 changed files with 29 additions and 16 deletions

View file

@ -2,7 +2,10 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
import os import os
import time import time
import urllib try:
import urllib.parse.quote as url_quote
except ImportError:
from urllib import quote as url_quote
import codecs import codecs
import json import json
from . import rss, util, configuration from . import rss, util, configuration
@ -22,13 +25,13 @@ class EventHandler(object):
This is used as a halfway sane default when the base URL is not This is used as a halfway sane default when the base URL is not
provided; not perfect, but should work in most cases.''' provided; not perfect, but should work in most cases.'''
components = util.splitpath(os.path.abspath(self.basepath)) components = util.splitpath(os.path.abspath(self.basepath))
url = '/'.join([urllib.quote(component, '') for component in components]) url = '/'.join([url_quote(component, '') for component in components])
return 'file:///' + url + '/' return 'file:///' + url + '/'
def getUrlFromFilename(self, filename): def getUrlFromFilename(self, filename):
"""Construct URL from filename.""" """Construct URL from filename."""
components = util.splitpath(util.getRelativePath(self.basepath, filename)) components = util.splitpath(util.getRelativePath(self.basepath, filename))
url = '/'.join([urllib.quote(component, '') for component in components]) url = '/'.join([url_quote(component, '') for component in components])
return self.baseurl + url return self.baseurl + url
def start(self): def start(self):

View file

@ -2,9 +2,18 @@
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
from __future__ import division, print_function from __future__ import division, print_function
try:
import urllib, urlparse from urllib.parse import quote as url_quote, unquote as url_unquote
import robotparser except ImportError:
from urllib import quote as url_quote, unquote as url_unquote
try:
from urllib.parse import urlparse, urlunparse, urljoin, urlsplit
except ImportError:
from urlparse import urlparse, urlunparse, urljoin, urlsplit
try:
from urllib import robotparser
except ImportError:
import robotparser
import requests import requests
import sys import sys
import os import os
@ -12,9 +21,10 @@ import cgi
import re import re
import traceback import traceback
import time import time
import types try:
from htmlentitydefs import name2codepoint from html.entities import name2codepoint
except ImportError:
from htmlentitydefs import name2codepoint
from .decorators import memoized from .decorators import memoized
from .output import out from .output import out
from .configuration import UserAgent, AppName, App, SupportUrl from .configuration import UserAgent, AppName, App, SupportUrl
@ -205,7 +215,7 @@ def normaliseURL(url):
# XXX: brutal hack # XXX: brutal hack
url = unescape(url) url = unescape(url)
pu = list(urlparse.urlparse(url)) pu = list(urlparse(url))
segments = pu[2].split('/') segments = pu[2].split('/')
while segments and segments[0] in ('', '..'): while segments and segments[0] in ('', '..'):
del segments[0] del segments[0]
@ -215,13 +225,13 @@ def normaliseURL(url):
pu[4] = pu[4][1:] pu[4] = pu[4][1:]
# remove anchor # remove anchor
pu[5] = "" pu[5] = ""
return urlparse.urlunparse(pu) return urlunparse(pu)
def get_roboturl(url): def get_roboturl(url):
"""Get robots.txt URL from given URL.""" """Get robots.txt URL from given URL."""
pu = urlparse.urlparse(url) pu = urlparse(url)
return urlparse.urlunparse((pu[0], pu[1], "/robots.txt", "", "", "")) return urlunparse((pu[0], pu[1], "/robots.txt", "", "", ""))
def check_robotstxt(url, session): def check_robotstxt(url, session):
@ -323,7 +333,7 @@ def getRelativePath(basepath, path):
def getQueryParams(url): def getQueryParams(url):
"""Get URL query parameters.""" """Get URL query parameters."""
query = urlparse.urlsplit(url)[3] query = urlsplit(url)[3]
out.debug('Extracting query parameters from %r (%r)...' % (url, query)) out.debug('Extracting query parameters from %r (%r)...' % (url, query))
return cgi.parse_qs(query) return cgi.parse_qs(query)
@ -413,7 +423,7 @@ def asciify(name):
def unquote(text): def unquote(text):
"""Replace all percent-encoded entities in text.""" """Replace all percent-encoded entities in text."""
while '%' in text: while '%' in text:
newtext = urllib.unquote(text) newtext = url_unquote(text)
if newtext == text: if newtext == text:
break break
text = newtext text = newtext
@ -422,7 +432,7 @@ def unquote(text):
def quote(text, safechars='/'): def quote(text, safechars='/'):
"""Percent-encode given text.""" """Percent-encode given text."""
return urllib.quote(text, safechars) return url_quote(text, safechars)
def strsize (b): def strsize (b):