From e5e7dfacd667c0ff5377aca66a0adcf39ab74f8b Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Tue, 3 Dec 2019 20:27:37 +0100 Subject: [PATCH] Move basic HTTP setup into a new module We now subclass requests' Session to make further extensions of the HTTP flow possible. --- dosagelib/http.py | 39 +++++++++++++++++++++++++++++++++++++++ dosagelib/scraper.py | 6 +++--- dosagelib/updater.py | 17 ++++++++++------- dosagelib/util.py | 23 ----------------------- scripts/scriptutil.py | 7 +++---- 5 files changed, 55 insertions(+), 37 deletions(-) create mode 100644 dosagelib/http.py diff --git a/dosagelib/http.py b/dosagelib/http.py new file mode 100644 index 000000000..209a93681 --- /dev/null +++ b/dosagelib/http.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2019 Tobias Gruetzmacher + +from __future__ import absolute_import, division, print_function + +import requests +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry + +from .configuration import UserAgent + +# Default number of retries +MaxRetries = 3 + +# Factor for retry backoff (see urllib3.util.retry, this default means +# 2s, 4s, 8s) +RetryBackoffFactor = 2 + +# Default connection timeout +ConnectionTimeoutSecs = 60 + + +class Session(requests.Session): + def __init__(self): + super(Session, self).__init__() + + retry = Retry(MaxRetries, backoff_factor=RetryBackoffFactor) + self.mount('http://', HTTPAdapter(max_retries=retry)) + self.mount('https://', HTTPAdapter(max_retries=retry)) + self.headers.update({'User-Agent': UserAgent}) + + def send(self, request, **kwargs): + if 'timeout' not in kwargs: + kwargs['timeout'] = ConnectionTimeoutSecs + return super(Session, self).send(request, **kwargs) + + +# A default session for cookie and connection sharing +default_session = Session() diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index 0607a3f6e..8083f0712 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -24,9 +24,9 @@ try: except ImportError: pycountry = None -from . import loader, configuration, languages +from . import configuration, http, languages, loader from .util import (get_page, makeSequence, get_system_uid, unescape, tagre, - normaliseURL, prettyMatcherList, requests_session, uniq) + normaliseURL, prettyMatcherList, uniq) from .comic import ComicStrip from .output import out from .events import getHandler @@ -85,7 +85,7 @@ class Scraper(object): allow_errors = () # HTTP session for configuration & cookies - session = requests_session() + session = http.default_session @classmethod def getmodules(cls): diff --git a/dosagelib/updater.py b/dosagelib/updater.py index ce2392b9b..8dfb90064 100644 --- a/dosagelib/updater.py +++ b/dosagelib/updater.py @@ -1,16 +1,18 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2017 Tobias Gruetzmacher +# Copyright (C) 2015-2019 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function import os + +from distutils.version import StrictVersion + import dosagelib from dosagelib import configuration -from .util import urlopen -from distutils.version import StrictVersion -import requests +from . import http + UPDATE_URL = "https://api.github.com/repos/webcomics/dosage/releases/latest" @@ -38,13 +40,14 @@ def check_update(): def get_online_version(): """Download update info and parse it.""" - session = requests.session() - page = urlopen(UPDATE_URL, session).json() + page = http.default_session.get(UPDATE_URL).json() version, url = None, None version = page['tag_name'] if os.name == 'nt': - url = next((x['browser_download_url'] for x in page['assets'] if x['content_type'] == 'application/x-msdos-program'), configuration.Url) + url = next((x['browser_download_url'] for x in page['assets'] if + x['content_type'] == 'application/x-msdos-program'), + configuration.Url) else: url = page['tarball_url'] return version, url diff --git a/dosagelib/util.py b/dosagelib/util.py index 723091fdc..2ac5c7dad 100644 --- a/dosagelib/util.py +++ b/dosagelib/util.py @@ -9,8 +9,6 @@ from six.moves.urllib.parse import ( quote as url_quote, unquote as url_unquote, urlparse, urlunparse, urlsplit) from six.moves.urllib_robotparser import RobotFileParser import requests -from requests.adapters import HTTPAdapter -from requests.packages.urllib3.util.retry import Retry import sys import os import cgi @@ -40,16 +38,6 @@ from . import AppName # Maximum content size for HTML pages MaxContentBytes = 1024 * 1024 * 3 # 3 MB -# Default number of retries -MaxRetries = 3 - -# Factor for retry backoff (see urllib3.util.retry, this default means -# 2s, 4s, 8s) -RetryBackoffFactor = 2 - -# Default connection timeout -ConnectionTimeoutSecs = 60 - # The character set to encode non-ASCII characters in a URL. See also # http://tools.ietf.org/html/rfc2396#section-2.1 # Note that the encoding is not really specified, but most browsers @@ -59,15 +47,6 @@ ConnectionTimeoutSecs = 60 UrlEncoding = "utf-8" -def requests_session(): - s = requests.Session() - retry = Retry(MaxRetries, backoff_factor=RetryBackoffFactor) - s.mount('http://', HTTPAdapter(max_retries=retry)) - s.mount('https://', HTTPAdapter(max_retries=retry)) - s.headers.update({'User-Agent': UserAgent}) - return s - - def get_system_uid(): """Get a (probably) unique ID to identify a system. Used to differentiate votes. @@ -285,8 +264,6 @@ def urlopen(url, session, referrer=None, max_content_bytes=None, kwargs['headers']['Referer'] = referrer out.debug(u'Sending headers %s' % kwargs['headers'], level=3) out.debug(u'Sending cookies %s' % session.cookies) - if 'timeout' not in kwargs: - kwargs['timeout'] = ConnectionTimeoutSecs if 'data' not in kwargs: method = 'GET' else: diff --git a/scripts/scriptutil.py b/scripts/scriptutil.py index f82c2e257..33d4e5163 100644 --- a/scripts/scriptutil.py +++ b/scripts/scriptutil.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2017 Tobias Gruetzmacher +# Copyright (C) 2015-2019 Tobias Gruetzmacher from __future__ import absolute_import, division, print_function @@ -17,13 +17,12 @@ try: except ImportError: from os import rename -import requests from lxml import html sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) # noqa from dosagelib.util import unescape, get_page -from dosagelib import scraper +from dosagelib import scraper, http def first_lower(x): @@ -39,7 +38,7 @@ class ComicListUpdater(object): def __init__(self, name): self.json = name.replace(".py", ".json") - self.session = requests.Session() + self.session = http.default_session self.sleep = 0 def get_url(self, url, expand=True):