mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 21:51:50 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			128 lines
		
	
	
	
		
			3.2 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			128 lines
		
	
	
	
		
			3.2 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
| # Module 'parser'
 | |
| #
 | |
| # Parse S-expressions output by the Panel Editor
 | |
| # (which is written in Scheme so it can't help writing S-expressions).
 | |
| #
 | |
| # See notes at end of file.
 | |
| 
 | |
| 
 | |
| whitespace = ' \t\n'
 | |
| operators = '()\''
 | |
| separators = operators + whitespace + ';' + '"'
 | |
| 
 | |
| 
 | |
| # Tokenize a string.
 | |
| # Return a list of tokens (strings).
 | |
| #
 | |
| def tokenize_string(s):
 | |
|     tokens = []
 | |
|     while s:
 | |
|         c = s[:1]
 | |
|         if c in whitespace:
 | |
|             s = s[1:]
 | |
|         elif c == ';':
 | |
|             s = ''
 | |
|         elif c == '"':
 | |
|             n = len(s)
 | |
|             i = 1
 | |
|             while i < n:
 | |
|                 c = s[i]
 | |
|                 i = i+1
 | |
|                 if c == '"': break
 | |
|                 if c == '\\': i = i+1
 | |
|             tokens.append(s[:i])
 | |
|             s = s[i:]
 | |
|         elif c in operators:
 | |
|             tokens.append(c)
 | |
|             s = s[1:]
 | |
|         else:
 | |
|             n = len(s)
 | |
|             i = 1
 | |
|             while i < n:
 | |
|                 if s[i] in separators: break
 | |
|                 i = i+1
 | |
|             tokens.append(s[:i])
 | |
|             s = s[i:]
 | |
|     return tokens
 | |
| 
 | |
| 
 | |
| # Tokenize a whole file (given as file object, not as file name).
 | |
| # Return a list of tokens (strings).
 | |
| #
 | |
| def tokenize_file(fp):
 | |
|     tokens = []
 | |
|     while 1:
 | |
|         line = fp.readline()
 | |
|         if not line: break
 | |
|         tokens = tokens + tokenize_string(line)
 | |
|     return tokens
 | |
| 
 | |
| 
 | |
| # Exception raised by parse_exr.
 | |
| #
 | |
| syntax_error = 'syntax error'
 | |
| 
 | |
| 
 | |
| # Parse an S-expression.
 | |
| # Input is a list of tokens as returned by tokenize_*().
 | |
| # Return a pair (expr, tokens)
 | |
| # where expr is a list representing the s-expression,
 | |
| # and tokens contains the remaining tokens.
 | |
| # May raise syntax_error.
 | |
| #
 | |
| def parse_expr(tokens):
 | |
|     if (not tokens) or tokens[0] != '(':
 | |
|         raise syntax_error, 'expected "("'
 | |
|     tokens = tokens[1:]
 | |
|     expr = []
 | |
|     while 1:
 | |
|         if not tokens:
 | |
|             raise syntax_error, 'missing ")"'
 | |
|         if tokens[0] == ')':
 | |
|             return expr, tokens[1:]
 | |
|         elif tokens[0] == '(':
 | |
|             subexpr, tokens = parse_expr(tokens)
 | |
|             expr.append(subexpr)
 | |
|         else:
 | |
|             expr.append(tokens[0])
 | |
|             tokens = tokens[1:]
 | |
| 
 | |
| 
 | |
| # Parse a file (given as file object, not as file name).
 | |
| # Return a list of parsed S-expressions found at the top level.
 | |
| #
 | |
| def parse_file(fp):
 | |
|     tokens = tokenize_file(fp)
 | |
|     exprlist = []
 | |
|     while tokens:
 | |
|         expr, tokens = parse_expr(tokens)
 | |
|         exprlist.append(expr)
 | |
|     return exprlist
 | |
| 
 | |
| 
 | |
| # EXAMPLE:
 | |
| #
 | |
| # The input
 | |
| #       '(hip (hop hur-ray))'
 | |
| #
 | |
| # passed to tokenize_string() returns the token list
 | |
| #       ['(', 'hip', '(', 'hop', 'hur-ray', ')', ')']
 | |
| #
 | |
| # When this is passed to parse_expr() it returns the expression
 | |
| #       ['hip', ['hop', 'hur-ray']]
 | |
| # plus an empty token list (because there are no tokens left.
 | |
| #
 | |
| # When a file containing the example is passed to parse_file() it returns
 | |
| # a list whose only element is the output of parse_expr() above:
 | |
| #       [['hip', ['hop', 'hur-ray']]]
 | |
| 
 | |
| 
 | |
| # TOKENIZING:
 | |
| #
 | |
| # Comments start with semicolon (;) and continue till the end of the line.
 | |
| #
 | |
| # Tokens are separated by whitespace, except the following characters
 | |
| # always form a separate token (outside strings):
 | |
| #       ( ) '
 | |
| # Strings are enclosed in double quotes (") and backslash (\) is used
 | |
| # as escape character in strings.
 | 
