Enhance default session with host-specific options
This makes much more sense then building custom sessions inside specific modules.
This commit is contained in:
parent
6041c1e03b
commit
45162bf7f2
3 changed files with 17 additions and 16 deletions
0
dosagelib/data/__init__.py
Normal file
0
dosagelib/data/__init__.py
Normal file
|
@ -31,7 +31,7 @@ class Session(requests.Session):
|
|||
longer delays.
|
||||
"""
|
||||
def __init__(self):
|
||||
super(Session, self).__init__()
|
||||
super().__init__()
|
||||
|
||||
retry = Retry(MaxRetries, backoff_factor=RetryBackoffFactor)
|
||||
self.mount('http://', HTTPAdapter(max_retries=retry))
|
||||
|
@ -39,6 +39,7 @@ class Session(requests.Session):
|
|||
self.headers.update({'User-Agent': UserAgent})
|
||||
|
||||
self.throttles = collections.defaultdict(lambda: RandomThrottle())
|
||||
self.host_options = {}
|
||||
|
||||
def send(self, request, **kwargs):
|
||||
if 'timeout' not in kwargs:
|
||||
|
@ -46,6 +47,8 @@ class Session(requests.Session):
|
|||
|
||||
hostname = urlparse(request.url).hostname
|
||||
self.throttles[hostname].delay()
|
||||
if hostname in self.host_options:
|
||||
kwargs.update(self.host_options[hostname])
|
||||
|
||||
return super(Session, self).send(request, **kwargs)
|
||||
|
||||
|
@ -54,6 +57,11 @@ class Session(requests.Session):
|
|||
"""
|
||||
self.throttles[hostname] = RandomThrottle(th_min, th_max)
|
||||
|
||||
def add_host_options(self, hostname, options):
|
||||
"""Adds custom options for a specific host: Might overwrite the existing one.
|
||||
"""
|
||||
self.host_options[hostname] = options
|
||||
|
||||
|
||||
class RandomThrottle(object):
|
||||
def __init__(self, th_min=0.0, th_max=0.3):
|
||||
|
|
|
@ -3,15 +3,10 @@
|
|||
# Copyright (C) 2019 Thomas W. Littauer
|
||||
import re
|
||||
|
||||
try:
|
||||
from functools import cached_property
|
||||
except ImportError:
|
||||
from cached_property import cached_property
|
||||
from importlib.resources import path as get_path
|
||||
|
||||
from ..scraper import _BasicScraper
|
||||
from ..helpers import bounceStarter, joinPathPartsNamer
|
||||
from ..http import Session
|
||||
|
||||
|
||||
class ComicsKingdom(_BasicScraper):
|
||||
|
@ -22,21 +17,19 @@ class ComicsKingdom(_BasicScraper):
|
|||
namer = joinPathPartsNamer((-2, -1), ())
|
||||
help = 'Index format: yyyy-mm-dd'
|
||||
|
||||
@cached_property
|
||||
def session(self):
|
||||
'''Use our own isolated session (ComicsKingdom screws up their TLS setup
|
||||
from time to time, this should "fix" it)'''
|
||||
s = Session()
|
||||
# slightly iffy hack taken from certifi
|
||||
self.cert_ctx = get_path("dosagelib.data", "godaddy-bundle-g2-2031.pem")
|
||||
s.verify = str(self.cert_ctx.__enter__())
|
||||
return s
|
||||
|
||||
def __init__(self, name, path):
|
||||
super(ComicsKingdom, self).__init__('ComicsKingdom/' + name)
|
||||
self.url = 'https://comicskingdom.com/' + path
|
||||
self.stripUrl = self.url + '/%s'
|
||||
|
||||
# slightly iffy hack taken from certifi
|
||||
# We need or own certificate bundle since ComicsKingdom screws up their
|
||||
# TLS setup from time to time, this should "fix" it)
|
||||
self.cert_ctx = get_path('dosagelib.data', 'godaddy-bundle-g2-2031.pem')
|
||||
self.session.add_host_options('comicskingdom.com', {
|
||||
'verify': str(self.cert_ctx.__enter__()),
|
||||
})
|
||||
|
||||
def link_modifier(self, url, tourl):
|
||||
if self.url not in tourl:
|
||||
tourl = self.url + '/' + tourl.rsplit("/", 1)[1]
|
||||
|
|
Loading…
Reference in a new issue