mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	Close issue 3437 - missing state change when Allow lines are processed.
Adds test cases which use Allow: as well.
This commit is contained in:
		
							parent
							
								
									4b99e9b479
								
							
						
					
					
						commit
						1ef19f0de1
					
				
					 2 changed files with 74 additions and 0 deletions
				
			
		|  | @ -76,6 +76,10 @@ def parse(self, lines): | ||||||
|         """parse the input lines from a robots.txt file. |         """parse the input lines from a robots.txt file. | ||||||
|            We allow that a user-agent: line is not preceded by |            We allow that a user-agent: line is not preceded by | ||||||
|            one or more blank lines.""" |            one or more blank lines.""" | ||||||
|  |         # states: | ||||||
|  |         #   0: start state | ||||||
|  |         #   1: saw user-agent line | ||||||
|  |         #   2: saw an allow or disallow line | ||||||
|         state = 0 |         state = 0 | ||||||
|         linenumber = 0 |         linenumber = 0 | ||||||
|         entry = Entry() |         entry = Entry() | ||||||
|  | @ -114,6 +118,7 @@ def parse(self, lines): | ||||||
|                 elif line[0] == "allow": |                 elif line[0] == "allow": | ||||||
|                     if state != 0: |                     if state != 0: | ||||||
|                         entry.rulelines.append(RuleLine(line[1], True)) |                         entry.rulelines.append(RuleLine(line[1], True)) | ||||||
|  |                         state = 2 | ||||||
|         if state == 2: |         if state == 2: | ||||||
|             self.entries.append(entry) |             self.entries.append(entry) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -134,6 +134,75 @@ def RobotTest(index, robots_txt, good_urls, bad_urls, | ||||||
| 
 | 
 | ||||||
| RobotTest(7, doc, good, bad) | RobotTest(7, doc, good, bad) | ||||||
| 
 | 
 | ||||||
|  | # From Google: http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=40364 | ||||||
|  | 
 | ||||||
|  | # 8. | ||||||
|  | doc = """ | ||||||
|  | User-agent: Googlebot | ||||||
|  | Allow: /folder1/myfile.html | ||||||
|  | Disallow: /folder1/ | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | good = ['/folder1/myfile.html'] | ||||||
|  | bad = ['/folder1/anotherfile.html'] | ||||||
|  | 
 | ||||||
|  | RobotTest(8, doc, good, bad, agent="Googlebot") | ||||||
|  | 
 | ||||||
|  | # 9.  This file is incorrect because "Googlebot" is a substring of | ||||||
|  | #     "Googlebot-Mobile", so test 10 works just like test 9. | ||||||
|  | doc = """ | ||||||
|  | User-agent: Googlebot | ||||||
|  | Disallow: / | ||||||
|  | 
 | ||||||
|  | User-agent: Googlebot-Mobile | ||||||
|  | Allow: / | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | good = [] | ||||||
|  | bad = ['/something.jpg'] | ||||||
|  | 
 | ||||||
|  | RobotTest(9, doc, good, bad, agent="Googlebot") | ||||||
|  | 
 | ||||||
|  | good = [] | ||||||
|  | bad = ['/something.jpg'] | ||||||
|  | 
 | ||||||
|  | RobotTest(10, doc, good, bad, agent="Googlebot-Mobile") | ||||||
|  | 
 | ||||||
|  | # 11.  Get the order correct. | ||||||
|  | doc = """ | ||||||
|  | User-agent: Googlebot-Mobile | ||||||
|  | Allow: / | ||||||
|  | 
 | ||||||
|  | User-agent: Googlebot | ||||||
|  | Disallow: / | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | good = [] | ||||||
|  | bad = ['/something.jpg'] | ||||||
|  | 
 | ||||||
|  | RobotTest(11, doc, good, bad, agent="Googlebot") | ||||||
|  | 
 | ||||||
|  | good = ['/something.jpg'] | ||||||
|  | bad = [] | ||||||
|  | 
 | ||||||
|  | RobotTest(12, doc, good, bad, agent="Googlebot-Mobile") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # 13.  Google also got the order wrong in #8.  You need to specify the | ||||||
|  | #      URLs from more specific to more general. | ||||||
|  | doc = """ | ||||||
|  | User-agent: Googlebot | ||||||
|  | Allow: /folder1/myfile.html | ||||||
|  | Disallow: /folder1/ | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | good = ['/folder1/myfile.html'] | ||||||
|  | bad = ['/folder1/anotherfile.html'] | ||||||
|  | 
 | ||||||
|  | RobotTest(13, doc, good, bad, agent="googlebot") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| class TestCase(unittest.TestCase): | class TestCase(unittest.TestCase): | ||||||
|     def runTest(self): |     def runTest(self): | ||||||
|         test_support.requires('network') |         test_support.requires('network') | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Skip Montanaro
						Skip Montanaro