Wait some time between requests.
This commit is contained in:
parent
1affe58370
commit
b6c913e2d5
2 changed files with 7 additions and 6 deletions
|
@ -1,11 +1,13 @@
|
||||||
Dosage 2.10 (released xx.xx.2014)
|
Dosage 2.10 (released xx.xx.2014)
|
||||||
|
|
||||||
Features:
|
Features:
|
||||||
- comics: Comic strips are downloaded in parallel.
|
- comics: Comic strips are downloaded in parallel. To prevent overload
|
||||||
|
of comic sites, no more than on download thread per host is allowed.
|
||||||
|
|
||||||
Changes:
|
Changes:
|
||||||
- cmdline: Ensure only one instance of dosage is running to prevent
|
- cmdline: Ensure only one instance of dosage is running to prevent
|
||||||
accidental DoS when fetching multiple comics of one site.
|
accidental DoS when fetching multiple comics of one site.
|
||||||
|
- comics: Wait up to 1 second between two URL page downloads.
|
||||||
|
|
||||||
|
|
||||||
Dosage 2.9 (released 22.12.2013)
|
Dosage 2.9 (released 22.12.2013)
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
# Copyright (C) 2012-2013 Bastian Kleineidam
|
# Copyright (C) 2012-2013 Bastian Kleineidam
|
||||||
import requests
|
import requests
|
||||||
import time
|
import time
|
||||||
|
import random
|
||||||
import os
|
import os
|
||||||
from . import loader, configuration
|
from . import loader, configuration
|
||||||
from .util import (fetchUrl, fetchUrls, fetchText, getPageContent,
|
from .util import (fetchUrl, fetchUrls, fetchText, getPageContent,
|
||||||
|
@ -73,9 +74,6 @@ class _BasicScraper(object):
|
||||||
# usually the index format help
|
# usually the index format help
|
||||||
help = ''
|
help = ''
|
||||||
|
|
||||||
# wait time between downloading comic strips
|
|
||||||
waitSeconds = 0
|
|
||||||
|
|
||||||
# HTTP session storing cookies
|
# HTTP session storing cookies
|
||||||
session = requests.session()
|
session = requests.session()
|
||||||
|
|
||||||
|
@ -189,8 +187,9 @@ class _BasicScraper(object):
|
||||||
out.warn(u"Already seen previous URL %r" % prevUrl)
|
out.warn(u"Already seen previous URL %r" % prevUrl)
|
||||||
break
|
break
|
||||||
url = prevUrl
|
url = prevUrl
|
||||||
if url and self.waitSeconds:
|
if url:
|
||||||
time.sleep(self.waitSeconds)
|
# wait up to 1 second for next URL
|
||||||
|
time.sleep(random.random())
|
||||||
|
|
||||||
def getPrevUrl(self, url, data, baseUrl):
|
def getPrevUrl(self, url, data, baseUrl):
|
||||||
"""Find previous URL."""
|
"""Find previous URL."""
|
||||||
|
|
Loading…
Reference in a new issue