mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			319 lines
		
	
	
	
		
			8.3 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			319 lines
		
	
	
	
		
			8.3 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
| #! /usr/bin/env python3
 | |
| # This script generates token related files from Grammar/Tokens:
 | |
| #
 | |
| #   make_rst:
 | |
| #       Doc/library/token-list.inc
 | |
| #       Doc/library/token.rst  (checked, not generated)
 | |
| #   make_h:
 | |
| #       Include/token.h
 | |
| #   make_c:
 | |
| #       Parser/token.c
 | |
| #   make_py:
 | |
| #       Lib/token.py
 | |
| 
 | |
| import re
 | |
| 
 | |
| SCRIPT_NAME = 'Tools/build/generate_token.py'
 | |
| AUTO_GENERATED_BY_SCRIPT = f'Auto-generated by {SCRIPT_NAME}'
 | |
| NT_OFFSET = 256
 | |
| 
 | |
| def load_tokens(path):
 | |
|     tok_names = []
 | |
|     string_to_tok = {}
 | |
|     ERRORTOKEN = None
 | |
|     with open(path) as fp:
 | |
|         for line in fp:
 | |
|             line = line.strip()
 | |
|             # strip comments
 | |
|             i = line.find('#')
 | |
|             if i >= 0:
 | |
|                 line = line[:i].strip()
 | |
|             if not line:
 | |
|                 continue
 | |
|             fields = line.split()
 | |
|             name = fields[0]
 | |
|             value = len(tok_names)
 | |
|             if name == 'ERRORTOKEN':
 | |
|                 ERRORTOKEN = value
 | |
|             string = fields[1] if len(fields) > 1 else None
 | |
|             if string:
 | |
|                 string = eval(string)
 | |
|                 string_to_tok[string] = value
 | |
|             tok_names.append(name)
 | |
|     return tok_names, ERRORTOKEN, string_to_tok
 | |
| 
 | |
| 
 | |
| def update_file(file, content):
 | |
|     try:
 | |
|         with open(file) as fobj:
 | |
|             if fobj.read() == content:
 | |
|                 return False
 | |
|     except (OSError, ValueError):
 | |
|         pass
 | |
|     with open(file, 'w') as fobj:
 | |
|         fobj.write(content)
 | |
|     return True
 | |
| 
 | |
| 
 | |
| token_h_template = f"""\
 | |
| // {AUTO_GENERATED_BY_SCRIPT}
 | |
| """
 | |
| token_h_template += """\
 | |
| 
 | |
| /* Token types */
 | |
| #ifndef Py_INTERNAL_TOKEN_H
 | |
| #define Py_INTERNAL_TOKEN_H
 | |
| #ifdef __cplusplus
 | |
| extern "C" {
 | |
| #endif
 | |
| 
 | |
| #ifndef Py_BUILD_CORE
 | |
| #  error "this header requires Py_BUILD_CORE define"
 | |
| #endif
 | |
| 
 | |
| #undef TILDE   /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
 | |
| 
 | |
| %s\
 | |
| #define N_TOKENS        %d
 | |
| #define NT_OFFSET       %d
 | |
| 
 | |
| /* Special definitions for cooperation with parser */
 | |
| 
 | |
| #define ISTERMINAL(x)           ((x) < NT_OFFSET)
 | |
| #define ISNONTERMINAL(x)        ((x) >= NT_OFFSET)
 | |
| #define ISEOF(x)                ((x) == ENDMARKER)
 | |
| #define ISWHITESPACE(x)         ((x) == ENDMARKER || \\
 | |
|                                  (x) == NEWLINE   || \\
 | |
|                                  (x) == INDENT    || \\
 | |
|                                  (x) == DEDENT)
 | |
| #define ISSTRINGLIT(x)          ((x) == STRING           || \\
 | |
|                                  (x) == FSTRING_MIDDLE)
 | |
| 
 | |
| 
 | |
| // Export these 4 symbols for 'test_peg_generator'
 | |
| PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
 | |
| PyAPI_FUNC(int) _PyToken_OneChar(int);
 | |
| PyAPI_FUNC(int) _PyToken_TwoChars(int, int);
 | |
| PyAPI_FUNC(int) _PyToken_ThreeChars(int, int, int);
 | |
| 
 | |
| #ifdef __cplusplus
 | |
| }
 | |
| #endif
 | |
| #endif  // !Py_INTERNAL_TOKEN_H
 | |
| """
 | |
| 
 | |
| def make_h(infile, outfile='Include/internal/pycore_token.h'):
 | |
|     tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
 | |
| 
 | |
|     defines = []
 | |
|     for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
 | |
|         defines.append("#define %-15s %d\n" % (name, value))
 | |
| 
 | |
|     if update_file(outfile, token_h_template % (
 | |
|             ''.join(defines),
 | |
|             len(tok_names),
 | |
|             NT_OFFSET
 | |
|         )):
 | |
|         print("%s regenerated from %s" % (outfile, infile))
 | |
| 
 | |
| 
 | |
| token_c_template = f"""\
 | |
| /* {AUTO_GENERATED_BY_SCRIPT} */
 | |
| """
 | |
| token_c_template += """\
 | |
| 
 | |
| #include "Python.h"
 | |
| #include "pycore_token.h"
 | |
| 
 | |
| /* Token names */
 | |
| 
 | |
| const char * const _PyParser_TokenNames[] = {
 | |
| %s\
 | |
| };
 | |
| 
 | |
| /* Return the token corresponding to a single character */
 | |
| 
 | |
| int
 | |
| _PyToken_OneChar(int c1)
 | |
| {
 | |
| %s\
 | |
|     return OP;
 | |
| }
 | |
| 
 | |
| int
 | |
| _PyToken_TwoChars(int c1, int c2)
 | |
| {
 | |
| %s\
 | |
|     return OP;
 | |
| }
 | |
| 
 | |
| int
 | |
| _PyToken_ThreeChars(int c1, int c2, int c3)
 | |
| {
 | |
| %s\
 | |
|     return OP;
 | |
| }
 | |
| """
 | |
| 
 | |
| def generate_chars_to_token(mapping, n=1):
 | |
|     result = []
 | |
|     write = result.append
 | |
|     indent = '    ' * n
 | |
|     write(indent)
 | |
|     write('switch (c%d) {\n' % (n,))
 | |
|     for c in sorted(mapping):
 | |
|         write(indent)
 | |
|         value = mapping[c]
 | |
|         if isinstance(value, dict):
 | |
|             write("case '%s':\n" % (c,))
 | |
|             write(generate_chars_to_token(value, n + 1))
 | |
|             write(indent)
 | |
|             write('    break;\n')
 | |
|         else:
 | |
|             write("case '%s': return %s;\n" % (c, value))
 | |
|     write(indent)
 | |
|     write('}\n')
 | |
|     return ''.join(result)
 | |
| 
 | |
| def make_c(infile, outfile='Parser/token.c'):
 | |
|     tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
 | |
|     string_to_tok['<>'] = string_to_tok['!=']
 | |
|     chars_to_token = {}
 | |
|     for string, value in string_to_tok.items():
 | |
|         assert 1 <= len(string) <= 3
 | |
|         name = tok_names[value]
 | |
|         m = chars_to_token.setdefault(len(string), {})
 | |
|         for c in string[:-1]:
 | |
|             m = m.setdefault(c, {})
 | |
|         m[string[-1]] = name
 | |
| 
 | |
|     names = []
 | |
|     for value, name in enumerate(tok_names):
 | |
|         if value >= ERRORTOKEN:
 | |
|             name = '<%s>' % name
 | |
|         names.append('    "%s",\n' % name)
 | |
|     names.append('    "<N_TOKENS>",\n')
 | |
| 
 | |
|     if update_file(outfile, token_c_template % (
 | |
|             ''.join(names),
 | |
|             generate_chars_to_token(chars_to_token[1]),
 | |
|             generate_chars_to_token(chars_to_token[2]),
 | |
|             generate_chars_to_token(chars_to_token[3])
 | |
|         )):
 | |
|         print("%s regenerated from %s" % (outfile, infile))
 | |
| 
 | |
| 
 | |
| token_inc_template = f"""\
 | |
| .. {AUTO_GENERATED_BY_SCRIPT}
 | |
| 
 | |
| .. list-table::
 | |
|    :align: left
 | |
|    :header-rows: 1
 | |
| 
 | |
|    * - Token
 | |
|      - Value
 | |
| %s
 | |
| """
 | |
| 
 | |
| def make_rst(infile, outfile='Doc/library/token-list.inc',
 | |
|              rstfile='Doc/library/token.rst'):
 | |
|     tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
 | |
|     tok_to_string = {value: s for s, value in string_to_tok.items()}
 | |
| 
 | |
|     needs_handwritten_doc = set()
 | |
| 
 | |
|     names = []
 | |
|     for value, name in enumerate(tok_names):
 | |
|         if value in tok_to_string:
 | |
|             assert name.isupper()
 | |
|             names.append(f'   * - .. data:: {name}')
 | |
|             names.append(f'     - ``"{tok_to_string[value]}"``')
 | |
|         else:
 | |
|             needs_handwritten_doc.add(name)
 | |
| 
 | |
|     has_handwritten_doc = set()
 | |
|     with open(rstfile) as fileobj:
 | |
|         tokendef_re = re.compile(r'.. data:: ([0-9A-Z_]+)\s*')
 | |
|         for line in fileobj:
 | |
|             if match := tokendef_re.fullmatch(line):
 | |
|                 has_handwritten_doc.add(match[1])
 | |
| 
 | |
|     # Exclude non-token constants in token.py
 | |
|     has_handwritten_doc -= {'N_TOKENS', 'NT_OFFSET', 'EXACT_TOKEN_TYPES'}
 | |
| 
 | |
|     if needs_handwritten_doc != has_handwritten_doc:
 | |
|         message_parts = [f'ERROR: {rstfile} does not document all tokens!']
 | |
|         undocumented = needs_handwritten_doc - has_handwritten_doc
 | |
|         extra = has_handwritten_doc - needs_handwritten_doc
 | |
|         if undocumented:
 | |
|             message_parts.append(f'Undocumented tokens: {undocumented}')
 | |
|         if extra:
 | |
|             message_parts.append(f'Documented nonexistent tokens: {extra}')
 | |
|         exit('\n'.join(message_parts))
 | |
| 
 | |
|     if update_file(outfile, token_inc_template % '\n'.join(names)):
 | |
|         print("%s regenerated from %s" % (outfile, infile))
 | |
| 
 | |
| 
 | |
| token_py_template = f'''\
 | |
| """Token constants."""
 | |
| # {AUTO_GENERATED_BY_SCRIPT}
 | |
| '''
 | |
| token_py_template += '''
 | |
| __all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF',
 | |
|            'EXACT_TOKEN_TYPES']
 | |
| 
 | |
| %s
 | |
| N_TOKENS = %d
 | |
| # Special definitions for cooperation with parser
 | |
| NT_OFFSET = %d
 | |
| 
 | |
| tok_name = {value: name
 | |
|             for name, value in globals().items()
 | |
|             if isinstance(value, int) and not name.startswith('_')}
 | |
| __all__.extend(tok_name.values())
 | |
| 
 | |
| EXACT_TOKEN_TYPES = {
 | |
| %s
 | |
| }
 | |
| 
 | |
| def ISTERMINAL(x):
 | |
|     return x < NT_OFFSET
 | |
| 
 | |
| def ISNONTERMINAL(x):
 | |
|     return x >= NT_OFFSET
 | |
| 
 | |
| def ISEOF(x):
 | |
|     return x == ENDMARKER
 | |
| '''
 | |
| 
 | |
| def make_py(infile, outfile='Lib/token.py'):
 | |
|     tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
 | |
| 
 | |
|     constants = []
 | |
|     for value, name in enumerate(tok_names):
 | |
|         constants.append('%s = %d' % (name, value))
 | |
|     constants.insert(ERRORTOKEN,
 | |
|         "# These aren't used by the C tokenizer but are needed for tokenize.py")
 | |
| 
 | |
|     token_types = []
 | |
|     for s, value in sorted(string_to_tok.items()):
 | |
|         token_types.append('    %r: %s,' % (s, tok_names[value]))
 | |
| 
 | |
|     if update_file(outfile, token_py_template % (
 | |
|             '\n'.join(constants),
 | |
|             len(tok_names),
 | |
|             NT_OFFSET,
 | |
|             '\n'.join(token_types),
 | |
|         )):
 | |
|         print("%s regenerated from %s" % (outfile, infile))
 | |
| 
 | |
| 
 | |
| def main(op, infile='Grammar/Tokens', *args):
 | |
|     make = globals()['make_' + op]
 | |
|     make(infile, *args)
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     import sys
 | |
|     main(*sys.argv[1:])
 | 
