Minor useragent refactoring
This commit is contained in:
parent
4c2a339e25
commit
93fe5d5987
1 changed files with 3 additions and 3 deletions
|
@ -305,7 +305,7 @@ def check_robotstxt(url, session):
|
||||||
roboturl = get_roboturl(url)
|
roboturl = get_roboturl(url)
|
||||||
rp = get_robotstxt_parser(roboturl, session=session)
|
rp = get_robotstxt_parser(roboturl, session=session)
|
||||||
if not rp.can_fetch(UserAgent, str(url)):
|
if not rp.can_fetch(UserAgent, str(url)):
|
||||||
raise IOError("%s is disallowed by robots.txt" % url)
|
raise IOError("%s is disallowed by %s" % (url, roboturl))
|
||||||
|
|
||||||
|
|
||||||
@memoized
|
@memoized
|
||||||
|
@ -329,10 +329,10 @@ def get_robotstxt_parser(url, session=None):
|
||||||
|
|
||||||
def urlopen(url, session, referrer=None, max_content_bytes=None,
|
def urlopen(url, session, referrer=None, max_content_bytes=None,
|
||||||
timeout=ConnectionTimeoutSecs, raise_for_status=True,
|
timeout=ConnectionTimeoutSecs, raise_for_status=True,
|
||||||
stream=False, data=None):
|
stream=False, data=None, useragent=UserAgent):
|
||||||
"""Open an URL and return the response object."""
|
"""Open an URL and return the response object."""
|
||||||
out.debug(u'Open URL %s' % url)
|
out.debug(u'Open URL %s' % url)
|
||||||
headers = {'User-Agent': UserAgent}
|
headers = {'User-Agent': useragent}
|
||||||
if referrer:
|
if referrer:
|
||||||
headers['Referer'] = referrer
|
headers['Referer'] = referrer
|
||||||
out.debug(u'Sending headers %s' % headers, level=3)
|
out.debug(u'Sending headers %s' % headers, level=3)
|
||||||
|
|
Loading…
Reference in a new issue