Wait some time between requests.

This commit is contained in:
Bastian Kleineidam 2014-01-05 16:23:45 +01:00
parent 1affe58370
commit b6c913e2d5
2 changed files with 7 additions and 6 deletions

View file

@ -1,11 +1,13 @@
Dosage 2.10 (released xx.xx.2014) Dosage 2.10 (released xx.xx.2014)
Features: Features:
- comics: Comic strips are downloaded in parallel. - comics: Comic strips are downloaded in parallel. To prevent overload
of comic sites, no more than on download thread per host is allowed.
Changes: Changes:
- cmdline: Ensure only one instance of dosage is running to prevent - cmdline: Ensure only one instance of dosage is running to prevent
accidental DoS when fetching multiple comics of one site. accidental DoS when fetching multiple comics of one site.
- comics: Wait up to 1 second between two URL page downloads.
Dosage 2.9 (released 22.12.2013) Dosage 2.9 (released 22.12.2013)

View file

@ -3,6 +3,7 @@
# Copyright (C) 2012-2013 Bastian Kleineidam # Copyright (C) 2012-2013 Bastian Kleineidam
import requests import requests
import time import time
import random
import os import os
from . import loader, configuration from . import loader, configuration
from .util import (fetchUrl, fetchUrls, fetchText, getPageContent, from .util import (fetchUrl, fetchUrls, fetchText, getPageContent,
@ -73,9 +74,6 @@ class _BasicScraper(object):
# usually the index format help # usually the index format help
help = '' help = ''
# wait time between downloading comic strips
waitSeconds = 0
# HTTP session storing cookies # HTTP session storing cookies
session = requests.session() session = requests.session()
@ -189,8 +187,9 @@ class _BasicScraper(object):
out.warn(u"Already seen previous URL %r" % prevUrl) out.warn(u"Already seen previous URL %r" % prevUrl)
break break
url = prevUrl url = prevUrl
if url and self.waitSeconds: if url:
time.sleep(self.waitSeconds) # wait up to 1 second for next URL
time.sleep(random.random())
def getPrevUrl(self, url, data, baseUrl): def getPrevUrl(self, url, data, baseUrl):
"""Find previous URL.""" """Find previous URL."""