| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  | """Gopher protocol client interface.""" | 
					
						
							| 
									
										
										
										
											1994-02-21 16:36:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-01-23 15:35:05 +00:00
										 |  |  | __all__ = ["send_selector","send_query"] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1994-02-21 16:36:04 +00:00
										 |  |  | # Default selector, host and port | 
					
						
							|  |  |  | DEF_SELECTOR = '1/' | 
					
						
							|  |  |  | DEF_HOST     = 'gopher.micro.umn.edu' | 
					
						
							|  |  |  | DEF_PORT     = 70 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Recognized file types | 
					
						
							|  |  |  | A_TEXT       = '0' | 
					
						
							|  |  |  | A_MENU       = '1' | 
					
						
							|  |  |  | A_CSO        = '2' | 
					
						
							|  |  |  | A_ERROR      = '3' | 
					
						
							|  |  |  | A_MACBINHEX  = '4' | 
					
						
							|  |  |  | A_PCBINHEX   = '5' | 
					
						
							|  |  |  | A_UUENCODED  = '6' | 
					
						
							|  |  |  | A_INDEX      = '7' | 
					
						
							|  |  |  | A_TELNET     = '8' | 
					
						
							|  |  |  | A_BINARY     = '9' | 
					
						
							|  |  |  | A_DUPLICATE  = '+' | 
					
						
							|  |  |  | A_SOUND      = 's' | 
					
						
							|  |  |  | A_EVENT      = 'e' | 
					
						
							|  |  |  | A_CALENDAR   = 'c' | 
					
						
							|  |  |  | A_HTML       = 'h' | 
					
						
							|  |  |  | A_TN3270     = 'T' | 
					
						
							|  |  |  | A_MIME       = 'M' | 
					
						
							|  |  |  | A_IMAGE      = 'I' | 
					
						
							|  |  |  | A_WHOIS      = 'w' | 
					
						
							|  |  |  | A_QUERY      = 'q' | 
					
						
							|  |  |  | A_GIF        = 'g' | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  | A_HTML       = 'h'          # HTML file | 
					
						
							|  |  |  | A_WWW        = 'w'          # WWW address | 
					
						
							| 
									
										
										
										
											1994-02-21 16:36:04 +00:00
										 |  |  | A_PLUS_IMAGE = ':' | 
					
						
							|  |  |  | A_PLUS_MOVIE = ';' | 
					
						
							|  |  |  | A_PLUS_SOUND = '<' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _names = dir() | 
					
						
							| 
									
										
										
										
											1998-01-19 21:59:48 +00:00
										 |  |  | _type_to_name_map = {} | 
					
						
							| 
									
										
										
										
											1994-02-21 16:36:04 +00:00
										 |  |  | def type_to_name(gtype): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |     """Map all file types to strings; unknown types become TYPE='x'.""" | 
					
						
							|  |  |  |     global _type_to_name_map | 
					
						
							|  |  |  |     if _type_to_name_map=={}: | 
					
						
							|  |  |  |         for name in _names: | 
					
						
							|  |  |  |             if name[:2] == 'A_': | 
					
						
							|  |  |  |                 _type_to_name_map[eval(name)] = name[2:] | 
					
						
							| 
									
										
										
										
											2002-06-01 14:18:47 +00:00
										 |  |  |     if gtype in _type_to_name_map: | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |         return _type_to_name_map[gtype] | 
					
						
							| 
									
										
										
										
											2004-02-12 17:35:32 +00:00
										 |  |  |     return 'TYPE=%r' % (gtype,) | 
					
						
							| 
									
										
										
										
											1994-02-21 16:36:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # Names for characters and strings | 
					
						
							|  |  |  | CRLF = '\r\n' | 
					
						
							|  |  |  | TAB = '\t' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1994-05-06 14:28:19 +00:00
										 |  |  | def send_selector(selector, host, port = 0): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |     """Send a selector to a given host and port, return a file with the reply.""" | 
					
						
							|  |  |  |     import socket | 
					
						
							|  |  |  |     if not port: | 
					
						
							| 
									
										
										
										
											2001-02-09 10:10:02 +00:00
										 |  |  |         i = host.find(':') | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |         if i >= 0: | 
					
						
							| 
									
										
										
										
											2001-02-09 10:10:02 +00:00
										 |  |  |             host, port = host[:i], int(host[i+1:]) | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |     if not port: | 
					
						
							|  |  |  |         port = DEF_PORT | 
					
						
							|  |  |  |     elif type(port) == type(''): | 
					
						
							| 
									
										
										
										
											2001-02-09 10:10:02 +00:00
										 |  |  |         port = int(port) | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |     s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | 
					
						
							| 
									
										
										
										
											2000-03-28 21:45:46 +00:00
										 |  |  |     s.connect((host, port)) | 
					
						
							| 
									
										
										
										
											2002-02-16 23:06:19 +00:00
										 |  |  |     s.sendall(selector + CRLF) | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |     s.shutdown(1) | 
					
						
							|  |  |  |     return s.makefile('rb') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1994-05-06 14:28:19 +00:00
										 |  |  | def send_query(selector, query, host, port = 0): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |     """Send a selector and a query string.""" | 
					
						
							|  |  |  |     return send_selector(selector + '\t' + query, host, port) | 
					
						
							| 
									
										
										
										
											1994-02-21 16:36:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-01-19 21:59:48 +00:00
										 |  |  | def path_to_selector(path): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |     """Takes a path as returned by urlparse and returns the appropriate selector.""" | 
					
						
							|  |  |  |     if path=="/": | 
					
						
							|  |  |  |         return "/" | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         return path[2:] # Cuts initial slash and data type identifier | 
					
						
							| 
									
										
										
										
											1998-01-19 21:59:48 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def path_to_datatype_name(path): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |     """Takes a path as returned by urlparse and maps it to a string.
 | 
					
						
							|  |  |  |     See section 3.4 of RFC 1738 for details."""
 | 
					
						
							|  |  |  |     if path=="/": | 
					
						
							|  |  |  |         # No way to tell, although "INDEX" is likely | 
					
						
							|  |  |  |         return "TYPE='unknown'" | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         return type_to_name(path[1]) | 
					
						
							| 
									
										
										
										
											1998-03-26 20:56:10 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1994-02-21 16:36:04 +00:00
										 |  |  | # The following functions interpret the data returned by the gopher | 
					
						
							|  |  |  | # server according to the expected type, e.g. textfile or directory | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_directory(f): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |     """Get a directory in the form of a list of entries.""" | 
					
						
							| 
									
										
										
										
											2003-09-22 12:43:16 +00:00
										 |  |  |     entries = [] | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |     while 1: | 
					
						
							|  |  |  |         line = f.readline() | 
					
						
							|  |  |  |         if not line: | 
					
						
							|  |  |  |             print '(Unexpected EOF from server)' | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         if line[-2:] == CRLF: | 
					
						
							|  |  |  |             line = line[:-2] | 
					
						
							|  |  |  |         elif line[-1:] in CRLF: | 
					
						
							|  |  |  |             line = line[:-1] | 
					
						
							|  |  |  |         if line == '.': | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         if not line: | 
					
						
							|  |  |  |             print '(Empty line from server)' | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         gtype = line[0] | 
					
						
							| 
									
										
										
										
											2001-02-09 10:10:02 +00:00
										 |  |  |         parts = line[1:].split(TAB) | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |         if len(parts) < 4: | 
					
						
							| 
									
										
										
										
											2004-02-12 17:35:32 +00:00
										 |  |  |             print '(Bad line from server: %r)' % (line,) | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |             continue | 
					
						
							|  |  |  |         if len(parts) > 4: | 
					
						
							|  |  |  |             if parts[4:] != ['+']: | 
					
						
							|  |  |  |                 print '(Extra info from server:', | 
					
						
							|  |  |  |                 print parts[4:], ')' | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             parts.append('') | 
					
						
							|  |  |  |         parts.insert(0, gtype) | 
					
						
							| 
									
										
										
										
											2003-09-22 12:43:16 +00:00
										 |  |  |         entries.append(parts) | 
					
						
							|  |  |  |     return entries | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1994-02-21 16:36:04 +00:00
										 |  |  | def get_textfile(f): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |     """Get a text file as a list of lines, with trailing CRLF stripped.""" | 
					
						
							| 
									
										
										
										
											2003-09-22 12:43:16 +00:00
										 |  |  |     lines = [] | 
					
						
							|  |  |  |     get_alt_textfile(f, lines.append) | 
					
						
							|  |  |  |     return lines | 
					
						
							| 
									
										
										
										
											1994-02-21 16:36:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def get_alt_textfile(f, func): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |     """Get a text file and pass each line to a function, with trailing CRLF stripped.""" | 
					
						
							|  |  |  |     while 1: | 
					
						
							|  |  |  |         line = f.readline() | 
					
						
							|  |  |  |         if not line: | 
					
						
							|  |  |  |             print '(Unexpected EOF from server)' | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         if line[-2:] == CRLF: | 
					
						
							|  |  |  |             line = line[:-2] | 
					
						
							|  |  |  |         elif line[-1:] in CRLF: | 
					
						
							|  |  |  |             line = line[:-1] | 
					
						
							|  |  |  |         if line == '.': | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         if line[:2] == '..': | 
					
						
							|  |  |  |             line = line[1:] | 
					
						
							|  |  |  |         func(line) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1994-02-21 16:36:04 +00:00
										 |  |  | def get_binary(f): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |     """Get a binary file as one solid data block.""" | 
					
						
							|  |  |  |     data = f.read() | 
					
						
							|  |  |  |     return data | 
					
						
							| 
									
										
										
										
											1994-02-21 16:36:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def get_alt_binary(f, func, blocksize): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |     """Get a binary file and pass each block to a function.""" | 
					
						
							|  |  |  |     while 1: | 
					
						
							|  |  |  |         data = f.read(blocksize) | 
					
						
							|  |  |  |         if not data: | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         func(data) | 
					
						
							| 
									
										
										
										
											1994-02-21 16:36:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def test(): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |     """Trivial test program.""" | 
					
						
							|  |  |  |     import sys | 
					
						
							|  |  |  |     import getopt | 
					
						
							|  |  |  |     opts, args = getopt.getopt(sys.argv[1:], '') | 
					
						
							|  |  |  |     selector = DEF_SELECTOR | 
					
						
							|  |  |  |     type = selector[0] | 
					
						
							|  |  |  |     host = DEF_HOST | 
					
						
							|  |  |  |     if args: | 
					
						
							|  |  |  |         host = args[0] | 
					
						
							|  |  |  |         args = args[1:] | 
					
						
							|  |  |  |     if args: | 
					
						
							|  |  |  |         type = args[0] | 
					
						
							|  |  |  |         args = args[1:] | 
					
						
							|  |  |  |         if len(type) > 1: | 
					
						
							|  |  |  |             type, selector = type[0], type | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             selector = '' | 
					
						
							|  |  |  |             if args: | 
					
						
							|  |  |  |                 selector = args[0] | 
					
						
							|  |  |  |                 args = args[1:] | 
					
						
							|  |  |  |         query = '' | 
					
						
							|  |  |  |         if args: | 
					
						
							|  |  |  |             query = args[0] | 
					
						
							|  |  |  |             args = args[1:] | 
					
						
							|  |  |  |     if type == A_INDEX: | 
					
						
							|  |  |  |         f = send_query(selector, query, host) | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         f = send_selector(selector, host) | 
					
						
							|  |  |  |     if type == A_TEXT: | 
					
						
							| 
									
										
										
										
											2003-09-22 12:43:16 +00:00
										 |  |  |         lines = get_textfile(f) | 
					
						
							|  |  |  |         for item in lines: print item | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |     elif type in (A_MENU, A_INDEX): | 
					
						
							| 
									
										
										
										
											2003-09-22 12:43:16 +00:00
										 |  |  |         entries = get_directory(f) | 
					
						
							|  |  |  |         for item in entries: print item | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |     else: | 
					
						
							|  |  |  |         data = get_binary(f) | 
					
						
							| 
									
										
										
										
											2004-02-12 17:35:32 +00:00
										 |  |  |         print 'binary data:', len(data), 'bytes:', repr(data[:100])[:40] | 
					
						
							| 
									
										
										
										
											1994-02-21 16:36:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # Run the test when run as script | 
					
						
							|  |  |  | if __name__ == '__main__': | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  |     test() |