Only cache robots.txt URL on memoize.

2013-02-13 17:52:07 +01:00 · 2013-02-13 17:52:07 +01:00 · f16e860f1e
commit f16e860f1e
parent a8f0a4f2c8
2 changed files with 5 additions and 5 deletions
--- a/dosagelib/decorators.py
+++ b/dosagelib/decorators.py
@ -11,18 +11,18 @@ class memoized (object):
        self.func = func
        self.cache = {}

-    def __call__(self, *args):
+    def __call__(self, *args, **kwargs):
        """Lookup and return cached result if found. Else call stored
        function with given arguments."""
        try:
            return self.cache[args]
        except KeyError:
-            self.cache[args] = value = self.func(*args)
+            self.cache[args] = value = self.func(*args, **kwargs)
            return value
        except TypeError:
            # uncachable -- for instance, passing a list as an argument.
            # Better to not cache than to blow up entirely.
-            return self.func(*args)
+            return self.func(*args, **kwargs)

    def __repr__(self):
        """Return the function's docstring."""
--- a/dosagelib/util.py
+++ b/dosagelib/util.py
@ -203,13 +203,13 @@ def check_robotstxt(url, session):
    @raises: IOError if URL is not allowed
    """
    roboturl = get_roboturl(url)
-    rp = get_robotstxt_parser(roboturl, session)
+    rp = get_robotstxt_parser(roboturl, session=session)
    if not rp.can_fetch(UserAgent, url):
        raise IOError("%s is disallowed by robots.txt" % url)


@memoized
-def get_robotstxt_parser(url, session):
+def get_robotstxt_parser(url, session=None):
    """Get a RobotFileParser for the given robots.txt URL."""
    rp = robotparser.RobotFileParser()
    req = urlopen(url, session, max_content_bytes=MaxContentBytes, raise_for_status=False)