diff --git a/scripts/scriptutil.py b/scripts/scriptutil.py index 33a67769a..f82c2e257 100644 --- a/scripts/scriptutil.py +++ b/scripts/scriptutil.py @@ -8,6 +8,7 @@ from __future__ import absolute_import, division, print_function import os import re import sys +import time import json import codecs @@ -39,6 +40,7 @@ class ComicListUpdater(object): def __init__(self, name): self.json = name.replace(".py", ".json") self.session = requests.Session() + self.sleep = 0 def get_url(self, url, expand=True): """Get an HTML page and parse it with LXML.""" @@ -47,6 +49,8 @@ class ComicListUpdater(object): data = html.document_fromstring(get_page(url, self.session).text) if expand: data.make_links_absolute(url) + if self.sleep > 0: + time.sleep(self.sleep) return data except IOError as msg: print("ERROR:", msg, file=sys.stderr) diff --git a/scripts/smackjeeves.py b/scripts/smackjeeves.py index 10f01424d..690604729 100755 --- a/scripts/smackjeeves.py +++ b/scripts/smackjeeves.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # Copyright (C) 2004-2008 Tristan Seligmann and Jonathan Jacobs # Copyright (C) 2012-2014 Bastian Kleineidam -# Copyright (C) 2015-2016 Tobias Gruetzmacher +# Copyright (C) 2015-2017 Tobias Gruetzmacher """ Script to get a list of smackjeeves.com comics and save the info in a JSON file for further processing. @@ -115,6 +115,10 @@ class SmackJeevesUpdater(ComicListUpdater): "Razor", ) + def __init__(self, name): + super(SmackJeevesUpdater, self).__init__(name) + self.sleep = 2 + def handle_url(self, url): """Parse one search result page.""" data = self.get_url(url)