mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 21:51:50 +00:00 
			
		
		
		
	Give in to tabnanny
This commit is contained in:
		
							parent
							
								
									36dfbcf323
								
							
						
					
					
						commit
						986abac1ba
					
				
					 7 changed files with 907 additions and 1098 deletions
				
			
		|  | @ -9,79 +9,79 @@ | |||
| class RobotFileParser: | ||||
| 
 | ||||
|     def __init__(self): | ||||
| 	self.rules = {} | ||||
| 	self.debug = 0 | ||||
| 	self.url = '' | ||||
| 	self.last_checked = 0 | ||||
|         self.rules = {} | ||||
|         self.debug = 0 | ||||
|         self.url = '' | ||||
|         self.last_checked = 0 | ||||
| 
 | ||||
|     def mtime(self): | ||||
| 	return self.last_checked | ||||
|         return self.last_checked | ||||
| 
 | ||||
|     def modified(self): | ||||
| 	import time | ||||
| 	self.last_checked = time.time() | ||||
|         import time | ||||
|         self.last_checked = time.time() | ||||
| 
 | ||||
|     def set_url(self, url): | ||||
| 	self.url = url | ||||
| ## 	import urlmisc | ||||
| ## 	self.url = urlmisc.canonical_url(url) | ||||
|         self.url = url | ||||
| ##      import urlmisc | ||||
| ##      self.url = urlmisc.canonical_url(url) | ||||
| 
 | ||||
|     def read(self): | ||||
| 	import urllib | ||||
| 	self.parse(urllib.urlopen(self.url).readlines()) | ||||
|         import urllib | ||||
|         self.parse(urllib.urlopen(self.url).readlines()) | ||||
| 
 | ||||
|     def parse(self, lines): | ||||
| 	import regsub, string, regex | ||||
| 	active = [] | ||||
| 	for line in lines: | ||||
| 	    if self.debug: print '>', line, | ||||
| 	    # blank line terminates current record | ||||
| 	    if not line[:-1]: | ||||
| 		active = [] | ||||
| 		continue | ||||
| 	    # remove optional comment and strip line | ||||
| 	    line = string.strip(line[:string.find(line, '#')]) | ||||
| 	    if not line: | ||||
| 		continue | ||||
| 	    line = regsub.split(line, ' *: *') | ||||
| 	    if len(line) == 2: | ||||
| 		line[0] = string.lower(line[0]) | ||||
| 		if line[0] == 'user-agent': | ||||
| 		    # this record applies to this user agent | ||||
| 		    if self.debug: print '>> user-agent:', line[1] | ||||
| 		    active.append(line[1]) | ||||
| 		    if not self.rules.has_key(line[1]): | ||||
| 			self.rules[line[1]] = [] | ||||
| 		elif line[0] == 'disallow': | ||||
| 		    if line[1]: | ||||
| 			if self.debug: print '>> disallow:', line[1] | ||||
| 			for agent in active: | ||||
| 			    self.rules[agent].append(regex.compile(line[1])) | ||||
| 		    else: | ||||
| 			pass | ||||
| 			for agent in active: | ||||
| 			    if self.debug: print '>> allow', agent | ||||
| 			    self.rules[agent] = [] | ||||
| 		else: | ||||
| 		    if self.debug: print '>> unknown:', line | ||||
|         import regsub, string, regex | ||||
|         active = [] | ||||
|         for line in lines: | ||||
|             if self.debug: print '>', line, | ||||
|             # blank line terminates current record | ||||
|             if not line[:-1]: | ||||
|                 active = [] | ||||
|                 continue | ||||
|             # remove optional comment and strip line | ||||
|             line = string.strip(line[:string.find(line, '#')]) | ||||
|             if not line: | ||||
|                 continue | ||||
|             line = regsub.split(line, ' *: *') | ||||
|             if len(line) == 2: | ||||
|                 line[0] = string.lower(line[0]) | ||||
|                 if line[0] == 'user-agent': | ||||
|                     # this record applies to this user agent | ||||
|                     if self.debug: print '>> user-agent:', line[1] | ||||
|                     active.append(line[1]) | ||||
|                     if not self.rules.has_key(line[1]): | ||||
|                         self.rules[line[1]] = [] | ||||
|                 elif line[0] == 'disallow': | ||||
|                     if line[1]: | ||||
|                         if self.debug: print '>> disallow:', line[1] | ||||
|                         for agent in active: | ||||
|                             self.rules[agent].append(regex.compile(line[1])) | ||||
|                     else: | ||||
|                         pass | ||||
|                         for agent in active: | ||||
|                             if self.debug: print '>> allow', agent | ||||
|                             self.rules[agent] = [] | ||||
|                 else: | ||||
|                     if self.debug: print '>> unknown:', line | ||||
| 
 | ||||
| 	self.modified() | ||||
|         self.modified() | ||||
| 
 | ||||
|     # returns true if agent is allowed to fetch url | ||||
|     def can_fetch(self, agent, url): | ||||
| 	import urlparse | ||||
| 	ag = agent | ||||
| 	if not self.rules.has_key(ag): ag = '*' | ||||
| 	if not self.rules.has_key(ag): | ||||
| 	    if self.debug: print '>> allowing', url, 'fetch by', agent | ||||
| 	    return 1 | ||||
| 	path = urlparse.urlparse(url)[2] | ||||
| 	for rule in self.rules[ag]: | ||||
| 	    if rule.match(path) != -1: | ||||
| 		if self.debug: print '>> disallowing', url, 'fetch by', agent | ||||
| 		return 0 | ||||
| 	if self.debug: print '>> allowing', url, 'fetch by', agent | ||||
| 	return 1 | ||||
|         import urlparse | ||||
|         ag = agent | ||||
|         if not self.rules.has_key(ag): ag = '*' | ||||
|         if not self.rules.has_key(ag): | ||||
|             if self.debug: print '>> allowing', url, 'fetch by', agent | ||||
|             return 1 | ||||
|         path = urlparse.urlparse(url)[2] | ||||
|         for rule in self.rules[ag]: | ||||
|             if rule.match(path) != -1: | ||||
|                 if self.debug: print '>> disallowing', url, 'fetch by', agent | ||||
|                 return 0 | ||||
|         if self.debug: print '>> allowing', url, 'fetch by', agent | ||||
|         return 1 | ||||
| 
 | ||||
| def test(): | ||||
|     rp = RobotFileParser() | ||||
|  | @ -91,7 +91,7 @@ def test(): | |||
|     print rp.rules | ||||
|     print rp.can_fetch('*', 'http://www.calendar.com/concerts/') | ||||
|     print rp.can_fetch('Musi-Cal-Robot', | ||||
| 		       'http://dolphin:80/cgi-bin/music-search?performer=Rolling+Stones') | ||||
|                        'http://dolphin:80/cgi-bin/music-search?performer=Rolling+Stones') | ||||
| 
 | ||||
|     print rp.can_fetch('Lycos', 'http://www/~skip/volkswagen/') | ||||
|     print rp.can_fetch('Lycos', 'http://www/~skip/volkswagen/vanagon-list-001') | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Guido van Rossum
						Guido van Rossum