From f16e860f1e58d63cbc6893aa1d8f8c1f603b7439 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Wed, 13 Feb 2013 17:52:07 +0100 Subject: [PATCH] Only cache robots.txt URL on memoize. --- dosagelib/decorators.py | 6 +++--- dosagelib/util.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dosagelib/decorators.py b/dosagelib/decorators.py index ced98e702..76f5f7312 100644 --- a/dosagelib/decorators.py +++ b/dosagelib/decorators.py @@ -11,18 +11,18 @@ class memoized (object): self.func = func self.cache = {} - def __call__(self, *args): + def __call__(self, *args, **kwargs): """Lookup and return cached result if found. Else call stored function with given arguments.""" try: return self.cache[args] except KeyError: - self.cache[args] = value = self.func(*args) + self.cache[args] = value = self.func(*args, **kwargs) return value except TypeError: # uncachable -- for instance, passing a list as an argument. # Better to not cache than to blow up entirely. - return self.func(*args) + return self.func(*args, **kwargs) def __repr__(self): """Return the function's docstring.""" diff --git a/dosagelib/util.py b/dosagelib/util.py index e1f541ceb..573b0e6ae 100644 --- a/dosagelib/util.py +++ b/dosagelib/util.py @@ -203,13 +203,13 @@ def check_robotstxt(url, session): @raises: IOError if URL is not allowed """ roboturl = get_roboturl(url) - rp = get_robotstxt_parser(roboturl, session) + rp = get_robotstxt_parser(roboturl, session=session) if not rp.can_fetch(UserAgent, url): raise IOError("%s is disallowed by robots.txt" % url) @memoized -def get_robotstxt_parser(url, session): +def get_robotstxt_parser(url, session=None): """Get a RobotFileParser for the given robots.txt URL.""" rp = robotparser.RobotFileParser() req = urlopen(url, session, max_content_bytes=MaxContentBytes, raise_for_status=False)