| 
									
										
										
										
											2018-12-22 11:18:40 +02:00
										 |  |  | #! /usr/bin/env python3 | 
					
						
							|  |  |  | # This script generates token related files from Grammar/Tokens: | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #   Doc/library/token-list.inc | 
					
						
							|  |  |  | #   Include/token.h | 
					
						
							|  |  |  | #   Parser/token.c | 
					
						
							|  |  |  | #   Lib/token.py | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | NT_OFFSET = 256 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def load_tokens(path): | 
					
						
							|  |  |  |     tok_names = [] | 
					
						
							|  |  |  |     string_to_tok = {} | 
					
						
							|  |  |  |     ERRORTOKEN = None | 
					
						
							|  |  |  |     with open(path) as fp: | 
					
						
							|  |  |  |         for line in fp: | 
					
						
							|  |  |  |             line = line.strip() | 
					
						
							|  |  |  |             # strip comments | 
					
						
							|  |  |  |             i = line.find('#') | 
					
						
							|  |  |  |             if i >= 0: | 
					
						
							|  |  |  |                 line = line[:i].strip() | 
					
						
							|  |  |  |             if not line: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             fields = line.split() | 
					
						
							|  |  |  |             name = fields[0] | 
					
						
							|  |  |  |             value = len(tok_names) | 
					
						
							|  |  |  |             if name == 'ERRORTOKEN': | 
					
						
							|  |  |  |                 ERRORTOKEN = value | 
					
						
							|  |  |  |             string = fields[1] if len(fields) > 1 else None | 
					
						
							|  |  |  |             if string: | 
					
						
							|  |  |  |                 string = eval(string) | 
					
						
							|  |  |  |                 string_to_tok[string] = value | 
					
						
							|  |  |  |             tok_names.append(name) | 
					
						
							|  |  |  |     return tok_names, ERRORTOKEN, string_to_tok | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def update_file(file, content): | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         with open(file, 'r') as fobj: | 
					
						
							|  |  |  |             if fobj.read() == content: | 
					
						
							|  |  |  |                 return False | 
					
						
							|  |  |  |     except (OSError, ValueError): | 
					
						
							|  |  |  |         pass | 
					
						
							|  |  |  |     with open(file, 'w') as fobj: | 
					
						
							|  |  |  |         fobj.write(content) | 
					
						
							|  |  |  |     return True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | token_h_template = """\
 | 
					
						
							|  |  |  | /* Auto-generated by Tools/scripts/generate_token.py */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Token types */ | 
					
						
							|  |  |  | #ifndef Py_LIMITED_API | 
					
						
							|  |  |  | #ifndef Py_TOKEN_H | 
					
						
							|  |  |  | #define Py_TOKEN_H | 
					
						
							|  |  |  | #ifdef __cplusplus | 
					
						
							|  |  |  | extern "C" { | 
					
						
							|  |  |  | #endif | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #undef TILDE   /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | %s\ | 
					
						
							|  |  |  | #define N_TOKENS        %d | 
					
						
							|  |  |  | #define NT_OFFSET       %d | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Special definitions for cooperation with parser */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define ISTERMINAL(x)           ((x) < NT_OFFSET) | 
					
						
							|  |  |  | #define ISNONTERMINAL(x)        ((x) >= NT_OFFSET) | 
					
						
							|  |  |  | #define ISEOF(x)                ((x) == ENDMARKER) | 
					
						
							| 
									
										
										
										
											2020-04-15 21:22:10 +03:00
										 |  |  | #define ISWHITESPACE(x)         ((x) == ENDMARKER || \\ | 
					
						
							|  |  |  |                                  (x) == NEWLINE   || \\ | 
					
						
							|  |  |  |                                  (x) == INDENT    || \\ | 
					
						
							|  |  |  |                                  (x) == DEDENT) | 
					
						
							| 
									
										
										
										
											2018-12-22 11:18:40 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ | 
					
						
							|  |  |  | PyAPI_FUNC(int) PyToken_OneChar(int); | 
					
						
							|  |  |  | PyAPI_FUNC(int) PyToken_TwoChars(int, int); | 
					
						
							|  |  |  | PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __cplusplus | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | #endif | 
					
						
							|  |  |  | #endif /* !Py_TOKEN_H */ | 
					
						
							|  |  |  | #endif /* Py_LIMITED_API */ | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def make_h(infile, outfile='Include/token.h'): | 
					
						
							|  |  |  |     tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     defines = [] | 
					
						
							|  |  |  |     for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): | 
					
						
							|  |  |  |         defines.append("#define %-15s %d\n" % (name, value)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if update_file(outfile, token_h_template % ( | 
					
						
							|  |  |  |             ''.join(defines), | 
					
						
							|  |  |  |             len(tok_names), | 
					
						
							|  |  |  |             NT_OFFSET | 
					
						
							|  |  |  |         )): | 
					
						
							|  |  |  |         print("%s regenerated from %s" % (outfile, infile)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | token_c_template = """\
 | 
					
						
							|  |  |  | /* Auto-generated by Tools/scripts/generate_token.py */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "Python.h" | 
					
						
							|  |  |  | #include "token.h" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Token names */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | const char * const _PyParser_TokenNames[] = { | 
					
						
							|  |  |  | %s\ | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Return the token corresponding to a single character */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | PyToken_OneChar(int c1) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | %s\ | 
					
						
							|  |  |  |     return OP; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | PyToken_TwoChars(int c1, int c2) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | %s\ | 
					
						
							|  |  |  |     return OP; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | PyToken_ThreeChars(int c1, int c2, int c3) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | %s\ | 
					
						
							|  |  |  |     return OP; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def generate_chars_to_token(mapping, n=1): | 
					
						
							|  |  |  |     result = [] | 
					
						
							|  |  |  |     write = result.append | 
					
						
							|  |  |  |     indent = '    ' * n | 
					
						
							|  |  |  |     write(indent) | 
					
						
							|  |  |  |     write('switch (c%d) {\n' % (n,)) | 
					
						
							|  |  |  |     for c in sorted(mapping): | 
					
						
							|  |  |  |         write(indent) | 
					
						
							|  |  |  |         value = mapping[c] | 
					
						
							|  |  |  |         if isinstance(value, dict): | 
					
						
							|  |  |  |             write("case '%s':\n" % (c,)) | 
					
						
							|  |  |  |             write(generate_chars_to_token(value, n + 1)) | 
					
						
							|  |  |  |             write(indent) | 
					
						
							|  |  |  |             write('    break;\n') | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             write("case '%s': return %s;\n" % (c, value)) | 
					
						
							|  |  |  |     write(indent) | 
					
						
							|  |  |  |     write('}\n') | 
					
						
							|  |  |  |     return ''.join(result) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def make_c(infile, outfile='Parser/token.c'): | 
					
						
							|  |  |  |     tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) | 
					
						
							|  |  |  |     string_to_tok['<>'] = string_to_tok['!='] | 
					
						
							|  |  |  |     chars_to_token = {} | 
					
						
							|  |  |  |     for string, value in string_to_tok.items(): | 
					
						
							|  |  |  |         assert 1 <= len(string) <= 3 | 
					
						
							|  |  |  |         name = tok_names[value] | 
					
						
							|  |  |  |         m = chars_to_token.setdefault(len(string), {}) | 
					
						
							|  |  |  |         for c in string[:-1]: | 
					
						
							|  |  |  |             m = m.setdefault(c, {}) | 
					
						
							|  |  |  |         m[string[-1]] = name | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     names = [] | 
					
						
							|  |  |  |     for value, name in enumerate(tok_names): | 
					
						
							|  |  |  |         if value >= ERRORTOKEN: | 
					
						
							|  |  |  |             name = '<%s>' % name | 
					
						
							|  |  |  |         names.append('    "%s",\n' % name) | 
					
						
							|  |  |  |     names.append('    "<N_TOKENS>",\n') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if update_file(outfile, token_c_template % ( | 
					
						
							|  |  |  |             ''.join(names), | 
					
						
							|  |  |  |             generate_chars_to_token(chars_to_token[1]), | 
					
						
							|  |  |  |             generate_chars_to_token(chars_to_token[2]), | 
					
						
							|  |  |  |             generate_chars_to_token(chars_to_token[3]) | 
					
						
							|  |  |  |         )): | 
					
						
							|  |  |  |         print("%s regenerated from %s" % (outfile, infile)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | token_inc_template = """\
 | 
					
						
							|  |  |  | .. Auto-generated by Tools/scripts/generate_token.py | 
					
						
							|  |  |  | %s | 
					
						
							|  |  |  | .. data:: N_TOKENS | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | .. data:: NT_OFFSET | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def make_rst(infile, outfile='Doc/library/token-list.inc'): | 
					
						
							|  |  |  |     tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) | 
					
						
							|  |  |  |     tok_to_string = {value: s for s, value in string_to_tok.items()} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     names = [] | 
					
						
							|  |  |  |     for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): | 
					
						
							|  |  |  |         names.append('.. data:: %s' % (name,)) | 
					
						
							|  |  |  |         if value in tok_to_string: | 
					
						
							|  |  |  |             names.append('') | 
					
						
							|  |  |  |             names.append('   Token value for ``"%s"``.' % tok_to_string[value]) | 
					
						
							|  |  |  |         names.append('') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if update_file(outfile, token_inc_template % '\n'.join(names)): | 
					
						
							|  |  |  |         print("%s regenerated from %s" % (outfile, infile)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | token_py_template = '''\
 | 
					
						
							|  |  |  | """Token constants.""" | 
					
						
							|  |  |  | # Auto-generated by Tools/scripts/generate_token.py | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | __all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | %s | 
					
						
							|  |  |  | N_TOKENS = %d | 
					
						
							|  |  |  | # Special definitions for cooperation with parser | 
					
						
							|  |  |  | NT_OFFSET = %d | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | tok_name = {value: name | 
					
						
							|  |  |  |             for name, value in globals().items() | 
					
						
							|  |  |  |             if isinstance(value, int) and not name.startswith('_')} | 
					
						
							|  |  |  | __all__.extend(tok_name.values()) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | EXACT_TOKEN_TYPES = { | 
					
						
							|  |  |  | %s | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def ISTERMINAL(x): | 
					
						
							|  |  |  |     return x < NT_OFFSET | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def ISNONTERMINAL(x): | 
					
						
							|  |  |  |     return x >= NT_OFFSET | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def ISEOF(x): | 
					
						
							|  |  |  |     return x == ENDMARKER | 
					
						
							|  |  |  | '''
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def make_py(infile, outfile='Lib/token.py'): | 
					
						
							|  |  |  |     tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     constants = [] | 
					
						
							|  |  |  |     for value, name in enumerate(tok_names): | 
					
						
							|  |  |  |         constants.append('%s = %d' % (name, value)) | 
					
						
							|  |  |  |     constants.insert(ERRORTOKEN, | 
					
						
							|  |  |  |         "# These aren't used by the C tokenizer but are needed for tokenize.py") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     token_types = [] | 
					
						
							|  |  |  |     for s, value in sorted(string_to_tok.items()): | 
					
						
							|  |  |  |         token_types.append('    %r: %s,' % (s, tok_names[value])) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if update_file(outfile, token_py_template % ( | 
					
						
							|  |  |  |             '\n'.join(constants), | 
					
						
							|  |  |  |             len(tok_names), | 
					
						
							|  |  |  |             NT_OFFSET, | 
					
						
							|  |  |  |             '\n'.join(token_types), | 
					
						
							|  |  |  |         )): | 
					
						
							|  |  |  |         print("%s regenerated from %s" % (outfile, infile)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def main(op, infile='Grammar/Tokens', *args): | 
					
						
							|  |  |  |     make = globals()['make_' + op] | 
					
						
							|  |  |  |     make(infile, *args) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if __name__ == '__main__': | 
					
						
							|  |  |  |     import sys | 
					
						
							|  |  |  |     main(*sys.argv[1:]) |