mirror of
				https://github.com/python/cpython.git
				synced 2025-10-26 11:14:33 +00:00 
			
		
		
		
	
		
			
	
	
		
			129 lines
		
	
	
	
		
			2.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			129 lines
		
	
	
	
		
			2.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | # Module 'parser' | ||
|  | # | ||
|  | # Parse S-expressions output by the Panel Editor | ||
|  | # (which is written in Scheme so it can't help writing S-expressions). | ||
|  | # | ||
|  | # See notes at end of file. | ||
|  | 
 | ||
|  | 
 | ||
|  | whitespace = ' \t\n' | ||
|  | operators = '()\'' | ||
|  | separators = operators + whitespace + ';' + '"' | ||
|  | 
 | ||
|  | 
 | ||
|  | # Tokenize a string. | ||
|  | # Return a list of tokens (strings). | ||
|  | # | ||
|  | def tokenize_string(s): | ||
|  | 	tokens = [] | ||
|  | 	while s: | ||
|  | 		c = s[:1] | ||
|  | 		if c in whitespace: | ||
|  | 			s = s[1:] | ||
|  | 		elif c == ';': | ||
|  | 			s = '' | ||
|  | 		elif c == '"': | ||
|  | 			n = len(s) | ||
|  | 			i = 1 | ||
|  | 			while i < n: | ||
|  | 				c = s[i] | ||
|  | 				i = i+1 | ||
|  | 				if c == '"': break | ||
|  | 				if c == '\\': i = i+1 | ||
|  | 			tokens.append(s[:i]) | ||
|  | 			s = s[i:] | ||
|  | 		elif c in operators: | ||
|  | 			tokens.append(c) | ||
|  | 			s = s[1:] | ||
|  | 		else: | ||
|  | 			n = len(s) | ||
|  | 			i = 1 | ||
|  | 			while i < n: | ||
|  | 				if s[i] in separators: break | ||
|  | 				i = i+1 | ||
|  | 			tokens.append(s[:i]) | ||
|  | 			s = s[i:] | ||
|  | 	return tokens | ||
|  | 
 | ||
|  | 
 | ||
|  | # Tokenize a whole file (given as file object, not as file name). | ||
|  | # Return a list of tokens (strings). | ||
|  | # | ||
|  | def tokenize_file(fp): | ||
|  | 	tokens = [] | ||
|  | 	while 1: | ||
|  | 		line = fp.readline() | ||
|  | 		if not line: break | ||
|  | 		tokens = tokens + tokenize_string(line) | ||
|  | 	return tokens | ||
|  | 
 | ||
|  | 
 | ||
|  | # Exception raised by parse_exr. | ||
|  | # | ||
|  | syntax_error = 'syntax error' | ||
|  | 
 | ||
|  | 
 | ||
|  | # Parse an S-expression. | ||
|  | # Input is a list of tokens as returned by tokenize_*(). | ||
|  | # Return a pair (expr, tokens) | ||
|  | # where expr is a list representing the s-expression, | ||
|  | # and tokens contains the remaining tokens. | ||
|  | # May raise syntax_error. | ||
|  | # | ||
|  | def parse_expr(tokens): | ||
|  | 	if (not tokens) or tokens[0] <> '(': | ||
|  | 		raise syntax_error, 'expected "("' | ||
|  | 	tokens = tokens[1:] | ||
|  | 	expr = [] | ||
|  | 	while 1: | ||
|  | 		if not tokens: | ||
|  | 			raise syntax_error, 'missing ")"' | ||
|  | 		if tokens[0] == ')': | ||
|  | 			return expr, tokens[1:] | ||
|  | 		elif tokens[0] == '(': | ||
|  | 			subexpr, tokens = parse_expr(tokens) | ||
|  | 			expr.append(subexpr) | ||
|  | 		else: | ||
|  | 			expr.append(tokens[0]) | ||
|  | 			tokens = tokens[1:] | ||
|  | 
 | ||
|  | 
 | ||
|  | # Parse a file (given as file object, not as file name). | ||
|  | # Return a list of parsed S-expressions found at the top level. | ||
|  | # | ||
|  | def parse_file(fp): | ||
|  | 	tokens = tokenize_file(fp) | ||
|  | 	exprlist = [] | ||
|  | 	while tokens: | ||
|  | 		expr, tokens = parse_expr(tokens) | ||
|  | 		exprlist.append(expr) | ||
|  | 	return exprlist | ||
|  | 
 | ||
|  | 
 | ||
|  | # EXAMPLE: | ||
|  | # | ||
|  | # The input | ||
|  | #	'(hip (hop hur-ray))' | ||
|  | # | ||
|  | # passed to tokenize_string() returns the token list | ||
|  | #	['(', 'hip', '(', 'hop', 'hur-ray', ')', ')'] | ||
|  | # | ||
|  | # When this is passed to parse_expr() it returns the expression | ||
|  | #	['hip', ['hop', 'hur-ray']] | ||
|  | # plus an empty token list (because there are no tokens left. | ||
|  | # | ||
|  | # When a file containing the example is passed to parse_file() it returns | ||
|  | # a list whose only element is the output of parse_expr() above: | ||
|  | #	[['hip', ['hop', 'hur-ray']]] | ||
|  | 
 | ||
|  | 
 | ||
|  | # TOKENIZING: | ||
|  | # | ||
|  | # Comments start with semicolon (;) and continue till the end of the line. | ||
|  | # | ||
|  | # Tokens are separated by whitespace, except the following characters | ||
|  | # always form a separate token (outside strings): | ||
|  | #	( ) ' | ||
|  | # Strings are enclosed in double quotes (") and backslash (\) is used | ||
|  | # as escape character in strings. |