| 
									
										
										
										
											1990-10-13 19:23:40 +00:00
										 |  |  | # Module 'parser' | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Parse S-expressions output by the Panel Editor | 
					
						
							|  |  |  | # (which is written in Scheme so it can't help writing S-expressions). | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # See notes at end of file. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | whitespace = ' \t\n' | 
					
						
							|  |  |  | operators = '()\'' | 
					
						
							|  |  |  | separators = operators + whitespace + ';' + '"' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Tokenize a string. | 
					
						
							|  |  |  | # Return a list of tokens (strings). | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | def tokenize_string(s): | 
					
						
							| 
									
										
										
										
											2004-07-18 06:16:08 +00:00
										 |  |  |     tokens = [] | 
					
						
							|  |  |  |     while s: | 
					
						
							|  |  |  |         c = s[:1] | 
					
						
							|  |  |  |         if c in whitespace: | 
					
						
							|  |  |  |             s = s[1:] | 
					
						
							|  |  |  |         elif c == ';': | 
					
						
							|  |  |  |             s = '' | 
					
						
							|  |  |  |         elif c == '"': | 
					
						
							|  |  |  |             n = len(s) | 
					
						
							|  |  |  |             i = 1 | 
					
						
							|  |  |  |             while i < n: | 
					
						
							|  |  |  |                 c = s[i] | 
					
						
							|  |  |  |                 i = i+1 | 
					
						
							|  |  |  |                 if c == '"': break | 
					
						
							|  |  |  |                 if c == '\\': i = i+1 | 
					
						
							|  |  |  |             tokens.append(s[:i]) | 
					
						
							|  |  |  |             s = s[i:] | 
					
						
							|  |  |  |         elif c in operators: | 
					
						
							|  |  |  |             tokens.append(c) | 
					
						
							|  |  |  |             s = s[1:] | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             n = len(s) | 
					
						
							|  |  |  |             i = 1 | 
					
						
							|  |  |  |             while i < n: | 
					
						
							|  |  |  |                 if s[i] in separators: break | 
					
						
							|  |  |  |                 i = i+1 | 
					
						
							|  |  |  |             tokens.append(s[:i]) | 
					
						
							|  |  |  |             s = s[i:] | 
					
						
							|  |  |  |     return tokens | 
					
						
							| 
									
										
										
										
											1990-10-13 19:23:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Tokenize a whole file (given as file object, not as file name). | 
					
						
							|  |  |  | # Return a list of tokens (strings). | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | def tokenize_file(fp): | 
					
						
							| 
									
										
										
										
											2004-07-18 06:16:08 +00:00
										 |  |  |     tokens = [] | 
					
						
							|  |  |  |     while 1: | 
					
						
							|  |  |  |         line = fp.readline() | 
					
						
							|  |  |  |         if not line: break | 
					
						
							|  |  |  |         tokens = tokens + tokenize_string(line) | 
					
						
							|  |  |  |     return tokens | 
					
						
							| 
									
										
										
										
											1990-10-13 19:23:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Exception raised by parse_exr. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | syntax_error = 'syntax error' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Parse an S-expression. | 
					
						
							|  |  |  | # Input is a list of tokens as returned by tokenize_*(). | 
					
						
							|  |  |  | # Return a pair (expr, tokens) | 
					
						
							|  |  |  | # where expr is a list representing the s-expression, | 
					
						
							|  |  |  | # and tokens contains the remaining tokens. | 
					
						
							|  |  |  | # May raise syntax_error. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | def parse_expr(tokens): | 
					
						
							| 
									
										
										
										
											2004-07-18 06:16:08 +00:00
										 |  |  |     if (not tokens) or tokens[0] != '(': | 
					
						
							|  |  |  |         raise syntax_error, 'expected "("' | 
					
						
							|  |  |  |     tokens = tokens[1:] | 
					
						
							|  |  |  |     expr = [] | 
					
						
							|  |  |  |     while 1: | 
					
						
							|  |  |  |         if not tokens: | 
					
						
							|  |  |  |             raise syntax_error, 'missing ")"' | 
					
						
							|  |  |  |         if tokens[0] == ')': | 
					
						
							|  |  |  |             return expr, tokens[1:] | 
					
						
							|  |  |  |         elif tokens[0] == '(': | 
					
						
							|  |  |  |             subexpr, tokens = parse_expr(tokens) | 
					
						
							|  |  |  |             expr.append(subexpr) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             expr.append(tokens[0]) | 
					
						
							|  |  |  |             tokens = tokens[1:] | 
					
						
							| 
									
										
										
										
											1990-10-13 19:23:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Parse a file (given as file object, not as file name). | 
					
						
							|  |  |  | # Return a list of parsed S-expressions found at the top level. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | def parse_file(fp): | 
					
						
							| 
									
										
										
										
											2004-07-18 06:16:08 +00:00
										 |  |  |     tokens = tokenize_file(fp) | 
					
						
							|  |  |  |     exprlist = [] | 
					
						
							|  |  |  |     while tokens: | 
					
						
							|  |  |  |         expr, tokens = parse_expr(tokens) | 
					
						
							|  |  |  |         exprlist.append(expr) | 
					
						
							|  |  |  |     return exprlist | 
					
						
							| 
									
										
										
										
											1990-10-13 19:23:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # EXAMPLE: | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # The input | 
					
						
							| 
									
										
										
										
											2004-07-18 06:16:08 +00:00
										 |  |  | #       '(hip (hop hur-ray))' | 
					
						
							| 
									
										
										
										
											1990-10-13 19:23:40 +00:00
										 |  |  | # | 
					
						
							|  |  |  | # passed to tokenize_string() returns the token list | 
					
						
							| 
									
										
										
										
											2004-07-18 06:16:08 +00:00
										 |  |  | #       ['(', 'hip', '(', 'hop', 'hur-ray', ')', ')'] | 
					
						
							| 
									
										
										
										
											1990-10-13 19:23:40 +00:00
										 |  |  | # | 
					
						
							|  |  |  | # When this is passed to parse_expr() it returns the expression | 
					
						
							| 
									
										
										
										
											2004-07-18 06:16:08 +00:00
										 |  |  | #       ['hip', ['hop', 'hur-ray']] | 
					
						
							| 
									
										
										
										
											1990-10-13 19:23:40 +00:00
										 |  |  | # plus an empty token list (because there are no tokens left. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # When a file containing the example is passed to parse_file() it returns | 
					
						
							|  |  |  | # a list whose only element is the output of parse_expr() above: | 
					
						
							| 
									
										
										
										
											2004-07-18 06:16:08 +00:00
										 |  |  | #       [['hip', ['hop', 'hur-ray']]] | 
					
						
							| 
									
										
										
										
											1990-10-13 19:23:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # TOKENIZING: | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Comments start with semicolon (;) and continue till the end of the line. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Tokens are separated by whitespace, except the following characters | 
					
						
							|  |  |  | # always form a separate token (outside strings): | 
					
						
							| 
									
										
										
										
											2004-07-18 06:16:08 +00:00
										 |  |  | #       ( ) ' | 
					
						
							| 
									
										
										
										
											1990-10-13 19:23:40 +00:00
										 |  |  | # Strings are enclosed in double quotes (") and backslash (\) is used | 
					
						
							|  |  |  | # as escape character in strings. |