mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 21:51:50 +00:00 
			
		
		
		
	Bug 3347: robotparser failed because it didn't convert bytes to string.
The solution is to convert bytes to text via utf-8. I'm not entirely sure if this is safe, but it looks like robots.txt is expected to be ascii.
This commit is contained in:
		
							parent
							
								
									48577d1944
								
							
						
					
					
						commit
						73fd46d24e
					
				
					 2 changed files with 18 additions and 5 deletions
				
			
		|  | @ -136,8 +136,9 @@ def RobotTest(index, robots_txt, good_urls, bad_urls, | |||
| 
 | ||||
| RobotTest(7, doc, good, bad) | ||||
| 
 | ||||
| class TestCase(unittest.TestCase): | ||||
|     def runTest(self): | ||||
| class NetworkTestCase(unittest.TestCase): | ||||
| 
 | ||||
|     def testPasswordProtectedSite(self): | ||||
|         support.requires('network') | ||||
|         # whole site is password-protected. | ||||
|         url = 'http://mueblesmoraleda.com' | ||||
|  | @ -146,9 +147,17 @@ def runTest(self): | |||
|         parser.read() | ||||
|         self.assertEqual(parser.can_fetch("*", url+"/robots.txt"), False) | ||||
| 
 | ||||
|     def testPythonOrg(self): | ||||
|         support.requires('network') | ||||
|         parser = urllib.robotparser.RobotFileParser( | ||||
|             "http://www.python.org/robots.txt") | ||||
|         parser.read() | ||||
|         self.assertTrue(parser.can_fetch("*", | ||||
|                                          "http://www.python.org/robots.txt")) | ||||
| 
 | ||||
| def test_main(): | ||||
|     support.run_unittest(NetworkTestCase) | ||||
|     support.run_unittest(tests) | ||||
|     TestCase().run() | ||||
| 
 | ||||
| if __name__=='__main__': | ||||
|     support.Verbose = 1 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Jeremy Hylton
						Jeremy Hylton