mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	
		
			
	
	
		
			269 lines
		
	
	
	
		
			6.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			269 lines
		
	
	
	
		
			6.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | #! /usr/bin/env python3 | ||
|  | # This script generates token related files from Grammar/Tokens: | ||
|  | # | ||
|  | #   Doc/library/token-list.inc | ||
|  | #   Include/token.h | ||
|  | #   Parser/token.c | ||
|  | #   Lib/token.py | ||
|  | 
 | ||
|  | 
 | ||
|  | NT_OFFSET = 256 | ||
|  | 
 | ||
|  | def load_tokens(path): | ||
|  |     tok_names = [] | ||
|  |     string_to_tok = {} | ||
|  |     ERRORTOKEN = None | ||
|  |     with open(path) as fp: | ||
|  |         for line in fp: | ||
|  |             line = line.strip() | ||
|  |             # strip comments | ||
|  |             i = line.find('#') | ||
|  |             if i >= 0: | ||
|  |                 line = line[:i].strip() | ||
|  |             if not line: | ||
|  |                 continue | ||
|  |             fields = line.split() | ||
|  |             name = fields[0] | ||
|  |             value = len(tok_names) | ||
|  |             if name == 'ERRORTOKEN': | ||
|  |                 ERRORTOKEN = value | ||
|  |             string = fields[1] if len(fields) > 1 else None | ||
|  |             if string: | ||
|  |                 string = eval(string) | ||
|  |                 string_to_tok[string] = value | ||
|  |             tok_names.append(name) | ||
|  |     return tok_names, ERRORTOKEN, string_to_tok | ||
|  | 
 | ||
|  | 
 | ||
|  | def update_file(file, content): | ||
|  |     try: | ||
|  |         with open(file, 'r') as fobj: | ||
|  |             if fobj.read() == content: | ||
|  |                 return False | ||
|  |     except (OSError, ValueError): | ||
|  |         pass | ||
|  |     with open(file, 'w') as fobj: | ||
|  |         fobj.write(content) | ||
|  |     return True | ||
|  | 
 | ||
|  | 
 | ||
|  | token_h_template = """\
 | ||
|  | /* Auto-generated by Tools/scripts/generate_token.py */ | ||
|  | 
 | ||
|  | /* Token types */ | ||
|  | #ifndef Py_LIMITED_API | ||
|  | #ifndef Py_TOKEN_H | ||
|  | #define Py_TOKEN_H | ||
|  | #ifdef __cplusplus | ||
|  | extern "C" { | ||
|  | #endif | ||
|  | 
 | ||
|  | #undef TILDE   /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ | ||
|  | 
 | ||
|  | %s\ | ||
|  | #define N_TOKENS        %d | ||
|  | #define NT_OFFSET       %d | ||
|  | 
 | ||
|  | /* Special definitions for cooperation with parser */ | ||
|  | 
 | ||
|  | #define ISTERMINAL(x)           ((x) < NT_OFFSET) | ||
|  | #define ISNONTERMINAL(x)        ((x) >= NT_OFFSET) | ||
|  | #define ISEOF(x)                ((x) == ENDMARKER) | ||
|  | 
 | ||
|  | 
 | ||
|  | PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ | ||
|  | PyAPI_FUNC(int) PyToken_OneChar(int); | ||
|  | PyAPI_FUNC(int) PyToken_TwoChars(int, int); | ||
|  | PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int); | ||
|  | 
 | ||
|  | #ifdef __cplusplus | ||
|  | } | ||
|  | #endif | ||
|  | #endif /* !Py_TOKEN_H */ | ||
|  | #endif /* Py_LIMITED_API */ | ||
|  | """
 | ||
|  | 
 | ||
|  | def make_h(infile, outfile='Include/token.h'): | ||
|  |     tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) | ||
|  | 
 | ||
|  |     defines = [] | ||
|  |     for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): | ||
|  |         defines.append("#define %-15s %d\n" % (name, value)) | ||
|  | 
 | ||
|  |     if update_file(outfile, token_h_template % ( | ||
|  |             ''.join(defines), | ||
|  |             len(tok_names), | ||
|  |             NT_OFFSET | ||
|  |         )): | ||
|  |         print("%s regenerated from %s" % (outfile, infile)) | ||
|  | 
 | ||
|  | 
 | ||
|  | token_c_template = """\
 | ||
|  | /* Auto-generated by Tools/scripts/generate_token.py */ | ||
|  | 
 | ||
|  | #include "Python.h" | ||
|  | #include "token.h" | ||
|  | 
 | ||
|  | /* Token names */ | ||
|  | 
 | ||
|  | const char * const _PyParser_TokenNames[] = { | ||
|  | %s\ | ||
|  | }; | ||
|  | 
 | ||
|  | /* Return the token corresponding to a single character */ | ||
|  | 
 | ||
|  | int | ||
|  | PyToken_OneChar(int c1) | ||
|  | { | ||
|  | %s\ | ||
|  |     return OP; | ||
|  | } | ||
|  | 
 | ||
|  | int | ||
|  | PyToken_TwoChars(int c1, int c2) | ||
|  | { | ||
|  | %s\ | ||
|  |     return OP; | ||
|  | } | ||
|  | 
 | ||
|  | int | ||
|  | PyToken_ThreeChars(int c1, int c2, int c3) | ||
|  | { | ||
|  | %s\ | ||
|  |     return OP; | ||
|  | } | ||
|  | """
 | ||
|  | 
 | ||
|  | def generate_chars_to_token(mapping, n=1): | ||
|  |     result = [] | ||
|  |     write = result.append | ||
|  |     indent = '    ' * n | ||
|  |     write(indent) | ||
|  |     write('switch (c%d) {\n' % (n,)) | ||
|  |     for c in sorted(mapping): | ||
|  |         write(indent) | ||
|  |         value = mapping[c] | ||
|  |         if isinstance(value, dict): | ||
|  |             write("case '%s':\n" % (c,)) | ||
|  |             write(generate_chars_to_token(value, n + 1)) | ||
|  |             write(indent) | ||
|  |             write('    break;\n') | ||
|  |         else: | ||
|  |             write("case '%s': return %s;\n" % (c, value)) | ||
|  |     write(indent) | ||
|  |     write('}\n') | ||
|  |     return ''.join(result) | ||
|  | 
 | ||
|  | def make_c(infile, outfile='Parser/token.c'): | ||
|  |     tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) | ||
|  |     string_to_tok['<>'] = string_to_tok['!='] | ||
|  |     chars_to_token = {} | ||
|  |     for string, value in string_to_tok.items(): | ||
|  |         assert 1 <= len(string) <= 3 | ||
|  |         name = tok_names[value] | ||
|  |         m = chars_to_token.setdefault(len(string), {}) | ||
|  |         for c in string[:-1]: | ||
|  |             m = m.setdefault(c, {}) | ||
|  |         m[string[-1]] = name | ||
|  | 
 | ||
|  |     names = [] | ||
|  |     for value, name in enumerate(tok_names): | ||
|  |         if value >= ERRORTOKEN: | ||
|  |             name = '<%s>' % name | ||
|  |         names.append('    "%s",\n' % name) | ||
|  |     names.append('    "<N_TOKENS>",\n') | ||
|  | 
 | ||
|  |     if update_file(outfile, token_c_template % ( | ||
|  |             ''.join(names), | ||
|  |             generate_chars_to_token(chars_to_token[1]), | ||
|  |             generate_chars_to_token(chars_to_token[2]), | ||
|  |             generate_chars_to_token(chars_to_token[3]) | ||
|  |         )): | ||
|  |         print("%s regenerated from %s" % (outfile, infile)) | ||
|  | 
 | ||
|  | 
 | ||
|  | token_inc_template = """\
 | ||
|  | .. Auto-generated by Tools/scripts/generate_token.py | ||
|  | %s | ||
|  | .. data:: N_TOKENS | ||
|  | 
 | ||
|  | .. data:: NT_OFFSET | ||
|  | """
 | ||
|  | 
 | ||
|  | def make_rst(infile, outfile='Doc/library/token-list.inc'): | ||
|  |     tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) | ||
|  |     tok_to_string = {value: s for s, value in string_to_tok.items()} | ||
|  | 
 | ||
|  |     names = [] | ||
|  |     for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): | ||
|  |         names.append('.. data:: %s' % (name,)) | ||
|  |         if value in tok_to_string: | ||
|  |             names.append('') | ||
|  |             names.append('   Token value for ``"%s"``.' % tok_to_string[value]) | ||
|  |         names.append('') | ||
|  | 
 | ||
|  |     if update_file(outfile, token_inc_template % '\n'.join(names)): | ||
|  |         print("%s regenerated from %s" % (outfile, infile)) | ||
|  | 
 | ||
|  | 
 | ||
|  | token_py_template = '''\
 | ||
|  | """Token constants.""" | ||
|  | # Auto-generated by Tools/scripts/generate_token.py | ||
|  | 
 | ||
|  | __all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] | ||
|  | 
 | ||
|  | %s | ||
|  | N_TOKENS = %d | ||
|  | # Special definitions for cooperation with parser | ||
|  | NT_OFFSET = %d | ||
|  | 
 | ||
|  | tok_name = {value: name | ||
|  |             for name, value in globals().items() | ||
|  |             if isinstance(value, int) and not name.startswith('_')} | ||
|  | __all__.extend(tok_name.values()) | ||
|  | 
 | ||
|  | EXACT_TOKEN_TYPES = { | ||
|  | %s | ||
|  | } | ||
|  | 
 | ||
|  | def ISTERMINAL(x): | ||
|  |     return x < NT_OFFSET | ||
|  | 
 | ||
|  | def ISNONTERMINAL(x): | ||
|  |     return x >= NT_OFFSET | ||
|  | 
 | ||
|  | def ISEOF(x): | ||
|  |     return x == ENDMARKER | ||
|  | '''
 | ||
|  | 
 | ||
|  | def make_py(infile, outfile='Lib/token.py'): | ||
|  |     tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) | ||
|  | 
 | ||
|  |     constants = [] | ||
|  |     for value, name in enumerate(tok_names): | ||
|  |         constants.append('%s = %d' % (name, value)) | ||
|  |     constants.insert(ERRORTOKEN, | ||
|  |         "# These aren't used by the C tokenizer but are needed for tokenize.py") | ||
|  | 
 | ||
|  |     token_types = [] | ||
|  |     for s, value in sorted(string_to_tok.items()): | ||
|  |         token_types.append('    %r: %s,' % (s, tok_names[value])) | ||
|  | 
 | ||
|  |     if update_file(outfile, token_py_template % ( | ||
|  |             '\n'.join(constants), | ||
|  |             len(tok_names), | ||
|  |             NT_OFFSET, | ||
|  |             '\n'.join(token_types), | ||
|  |         )): | ||
|  |         print("%s regenerated from %s" % (outfile, infile)) | ||
|  | 
 | ||
|  | 
 | ||
|  | def main(op, infile='Grammar/Tokens', *args): | ||
|  |     make = globals()['make_' + op] | ||
|  |     make(infile, *args) | ||
|  | 
 | ||
|  | 
 | ||
|  | if __name__ == '__main__': | ||
|  |     import sys | ||
|  |     main(*sys.argv[1:]) |