mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	#17403: urllib.parse.robotparser normalizes the urls before adding to ruleline.
This helps in handling certain types invalid urls in a conservative manner.
This commit is contained in:
		
							parent
							
								
									eb4c9c77b8
								
							
						
					
					
						commit
						c70a6ae49b
					
				
					 3 changed files with 17 additions and 0 deletions
				
			
		| 
						 | 
					@ -234,6 +234,18 @@ def RobotTest(index, robots_txt, good_urls, bad_urls,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
RobotTest(15, doc, good, bad)
 | 
					RobotTest(15, doc, good, bad)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# 16. Empty query (issue #17403). Normalizing the url first.
 | 
				
			||||||
 | 
					doc = """
 | 
				
			||||||
 | 
					User-agent: *
 | 
				
			||||||
 | 
					Allow: /some/path?
 | 
				
			||||||
 | 
					Disallow: /another/path?
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					good = ['/some/path?']
 | 
				
			||||||
 | 
					bad = ['/another/path?']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					RobotTest(16, doc, good, bad)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class NetworkTestCase(unittest.TestCase):
 | 
					class NetworkTestCase(unittest.TestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -157,6 +157,7 @@ def __init__(self, path, allowance):
 | 
				
			||||||
        if path == '' and not allowance:
 | 
					        if path == '' and not allowance:
 | 
				
			||||||
            # an empty value means allow all
 | 
					            # an empty value means allow all
 | 
				
			||||||
            allowance = True
 | 
					            allowance = True
 | 
				
			||||||
 | 
					        path = urllib.parse.urlunparse(urllib.parse.urlparse(path))
 | 
				
			||||||
        self.path = urllib.parse.quote(path)
 | 
					        self.path = urllib.parse.quote(path)
 | 
				
			||||||
        self.allowance = allowance
 | 
					        self.allowance = allowance
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -24,6 +24,10 @@ Core and Builtins
 | 
				
			||||||
Library
 | 
					Library
 | 
				
			||||||
-------
 | 
					-------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- Issue #17403: urllib.parse.robotparser normalizes the urls before adding to
 | 
				
			||||||
 | 
					  ruleline. This helps in handling certain types invalid urls in a conservative
 | 
				
			||||||
 | 
					  manner.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- Issue #18025: Fixed a segfault in io.BufferedIOBase.readinto() when raw
 | 
					- Issue #18025: Fixed a segfault in io.BufferedIOBase.readinto() when raw
 | 
				
			||||||
  stream's read() returns more bytes than requested.
 | 
					  stream's read() returns more bytes than requested.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue