From 45162bf7f27340c15ab80144fa943c540622794b Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Tue, 15 Feb 2022 00:18:48 +0100 Subject: [PATCH] Enhance default session with host-specific options This makes much more sense then building custom sessions inside specific modules. --- dosagelib/data/__init__.py | 0 dosagelib/http.py | 10 +++++++++- dosagelib/plugins/comicskingdom.py | 23 ++++++++--------------- 3 files changed, 17 insertions(+), 16 deletions(-) create mode 100644 dosagelib/data/__init__.py diff --git a/dosagelib/data/__init__.py b/dosagelib/data/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/dosagelib/http.py b/dosagelib/http.py index 1ece66ff5..aa5b4e5e8 100644 --- a/dosagelib/http.py +++ b/dosagelib/http.py @@ -31,7 +31,7 @@ class Session(requests.Session): longer delays. """ def __init__(self): - super(Session, self).__init__() + super().__init__() retry = Retry(MaxRetries, backoff_factor=RetryBackoffFactor) self.mount('http://', HTTPAdapter(max_retries=retry)) @@ -39,6 +39,7 @@ class Session(requests.Session): self.headers.update({'User-Agent': UserAgent}) self.throttles = collections.defaultdict(lambda: RandomThrottle()) + self.host_options = {} def send(self, request, **kwargs): if 'timeout' not in kwargs: @@ -46,6 +47,8 @@ class Session(requests.Session): hostname = urlparse(request.url).hostname self.throttles[hostname].delay() + if hostname in self.host_options: + kwargs.update(self.host_options[hostname]) return super(Session, self).send(request, **kwargs) @@ -54,6 +57,11 @@ class Session(requests.Session): """ self.throttles[hostname] = RandomThrottle(th_min, th_max) + def add_host_options(self, hostname, options): + """Adds custom options for a specific host: Might overwrite the existing one. + """ + self.host_options[hostname] = options + class RandomThrottle(object): def __init__(self, th_min=0.0, th_max=0.3): diff --git a/dosagelib/plugins/comicskingdom.py b/dosagelib/plugins/comicskingdom.py index 4222fe0b2..cebe1cb16 100644 --- a/dosagelib/plugins/comicskingdom.py +++ b/dosagelib/plugins/comicskingdom.py @@ -3,15 +3,10 @@ # Copyright (C) 2019 Thomas W. Littauer import re -try: - from functools import cached_property -except ImportError: - from cached_property import cached_property from importlib.resources import path as get_path from ..scraper import _BasicScraper from ..helpers import bounceStarter, joinPathPartsNamer -from ..http import Session class ComicsKingdom(_BasicScraper): @@ -22,21 +17,19 @@ class ComicsKingdom(_BasicScraper): namer = joinPathPartsNamer((-2, -1), ()) help = 'Index format: yyyy-mm-dd' - @cached_property - def session(self): - '''Use our own isolated session (ComicsKingdom screws up their TLS setup - from time to time, this should "fix" it)''' - s = Session() - # slightly iffy hack taken from certifi - self.cert_ctx = get_path("dosagelib.data", "godaddy-bundle-g2-2031.pem") - s.verify = str(self.cert_ctx.__enter__()) - return s - def __init__(self, name, path): super(ComicsKingdom, self).__init__('ComicsKingdom/' + name) self.url = 'https://comicskingdom.com/' + path self.stripUrl = self.url + '/%s' + # slightly iffy hack taken from certifi + # We need or own certificate bundle since ComicsKingdom screws up their + # TLS setup from time to time, this should "fix" it) + self.cert_ctx = get_path('dosagelib.data', 'godaddy-bundle-g2-2031.pem') + self.session.add_host_options('comicskingdom.com', { + 'verify': str(self.cert_ctx.__enter__()), + }) + def link_modifier(self, url, tourl): if self.url not in tourl: tourl = self.url + '/' + tourl.rsplit("/", 1)[1]