From b6c913e2d5afb3b73ddf34a7b17c54cd1f2d45b1 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Sun, 5 Jan 2014 16:23:45 +0100 Subject: [PATCH] Wait some time between requests. --- doc/changelog.txt | 4 +++- dosagelib/scraper.py | 9 ++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/doc/changelog.txt b/doc/changelog.txt index 96510be15..b4a3979f9 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -1,11 +1,13 @@ Dosage 2.10 (released xx.xx.2014) Features: -- comics: Comic strips are downloaded in parallel. +- comics: Comic strips are downloaded in parallel. To prevent overload + of comic sites, no more than on download thread per host is allowed. Changes: - cmdline: Ensure only one instance of dosage is running to prevent accidental DoS when fetching multiple comics of one site. +- comics: Wait up to 1 second between two URL page downloads. Dosage 2.9 (released 22.12.2013) diff --git a/dosagelib/scraper.py b/dosagelib/scraper.py index edfd70181..0567ba700 100644 --- a/dosagelib/scraper.py +++ b/dosagelib/scraper.py @@ -3,6 +3,7 @@ # Copyright (C) 2012-2013 Bastian Kleineidam import requests import time +import random import os from . import loader, configuration from .util import (fetchUrl, fetchUrls, fetchText, getPageContent, @@ -73,9 +74,6 @@ class _BasicScraper(object): # usually the index format help help = '' - # wait time between downloading comic strips - waitSeconds = 0 - # HTTP session storing cookies session = requests.session() @@ -189,8 +187,9 @@ class _BasicScraper(object): out.warn(u"Already seen previous URL %r" % prevUrl) break url = prevUrl - if url and self.waitSeconds: - time.sleep(self.waitSeconds) + if url: + # wait up to 1 second for next URL + time.sleep(random.random()) def getPrevUrl(self, url, data, baseUrl): """Find previous URL."""