mirror of
https://github.com/python/cpython.git
synced 2025-11-01 14:11:41 +00:00
#4108: the first default entry (User-agent: *) wins.
This commit is contained in:
parent
70120e202d
commit
0a0fc07d37
3 changed files with 21 additions and 2 deletions
|
|
@ -216,6 +216,20 @@ def RobotTest(index, robots_txt, good_urls, bad_urls,
|
||||||
|
|
||||||
RobotTest(14, doc, good, bad)
|
RobotTest(14, doc, good, bad)
|
||||||
|
|
||||||
|
# 15. For issue #4108 (obey first * entry)
|
||||||
|
doc = """
|
||||||
|
User-agent: *
|
||||||
|
Disallow: /some/path
|
||||||
|
|
||||||
|
User-agent: *
|
||||||
|
Disallow: /another/path
|
||||||
|
"""
|
||||||
|
|
||||||
|
good = ['/another/path']
|
||||||
|
bad = ['/some/path']
|
||||||
|
|
||||||
|
RobotTest(15, doc, good, bad)
|
||||||
|
|
||||||
|
|
||||||
class NetworkTestCase(unittest.TestCase):
|
class NetworkTestCase(unittest.TestCase):
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -66,7 +66,9 @@ def read(self):
|
||||||
def _add_entry(self, entry):
|
def _add_entry(self, entry):
|
||||||
if "*" in entry.useragents:
|
if "*" in entry.useragents:
|
||||||
# the default entry is considered last
|
# the default entry is considered last
|
||||||
self.default_entry = entry
|
if self.default_entry is None:
|
||||||
|
# the first default entry wins
|
||||||
|
self.default_entry = entry
|
||||||
else:
|
else:
|
||||||
self.entries.append(entry)
|
self.entries.append(entry)
|
||||||
|
|
||||||
|
|
@ -118,7 +120,7 @@ def parse(self, lines):
|
||||||
entry.rulelines.append(RuleLine(line[1], True))
|
entry.rulelines.append(RuleLine(line[1], True))
|
||||||
state = 2
|
state = 2
|
||||||
if state == 2:
|
if state == 2:
|
||||||
self.entries.append(entry)
|
self._add_entry(entry)
|
||||||
|
|
||||||
|
|
||||||
def can_fetch(self, useragent, url):
|
def can_fetch(self, useragent, url):
|
||||||
|
|
|
||||||
|
|
@ -475,6 +475,9 @@ C-API
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #4108: In urllib.robotparser, if there are multiple 'User-agent: *'
|
||||||
|
entries, consider the first one.
|
||||||
|
|
||||||
- Issue #6630: Allow customizing regex flags when subclassing the
|
- Issue #6630: Allow customizing regex flags when subclassing the
|
||||||
string.Template class.
|
string.Template class.
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue