Wait some time between requests.

This commit is contained in:
Bastian Kleineidam 2014-01-05 16:23:45 +01:00
parent 1affe58370
commit b6c913e2d5
2 changed files with 7 additions and 6 deletions

View file

@ -1,11 +1,13 @@
Dosage 2.10 (released xx.xx.2014)
Features:
- comics: Comic strips are downloaded in parallel.
- comics: Comic strips are downloaded in parallel. To prevent overload
of comic sites, no more than on download thread per host is allowed.
Changes:
- cmdline: Ensure only one instance of dosage is running to prevent
accidental DoS when fetching multiple comics of one site.
- comics: Wait up to 1 second between two URL page downloads.
Dosage 2.9 (released 22.12.2013)

View file

@ -3,6 +3,7 @@
# Copyright (C) 2012-2013 Bastian Kleineidam
import requests
import time
import random
import os
from . import loader, configuration
from .util import (fetchUrl, fetchUrls, fetchText, getPageContent,
@ -73,9 +74,6 @@ class _BasicScraper(object):
# usually the index format help
help = ''
# wait time between downloading comic strips
waitSeconds = 0
# HTTP session storing cookies
session = requests.session()
@ -189,8 +187,9 @@ class _BasicScraper(object):
out.warn(u"Already seen previous URL %r" % prevUrl)
break
url = prevUrl
if url and self.waitSeconds:
time.sleep(self.waitSeconds)
if url:
# wait up to 1 second for next URL
time.sleep(random.random())
def getPrevUrl(self, url, data, baseUrl):
"""Find previous URL."""