mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	 e4c431ecf5
			
		
	
	
		e4c431ecf5
		
			
		
	
	
	
	
		
			
			This is partly a cleanup of the code. It also is preparation for getting the variables from the source (cross-platform) rather than from the symbols. The change only touches the tool (and its tests).
		
			
				
	
	
		
			339 lines
		
	
	
	
		
			9.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			339 lines
		
	
	
	
		
			9.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import re
 | |
| import shlex
 | |
| import subprocess
 | |
| 
 | |
| from ..common.info import UNKNOWN
 | |
| 
 | |
| from . import source
 | |
| 
 | |
| 
 | |
| IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)'
 | |
| 
 | |
| TYPE_QUAL = r'(?:const|volatile)'
 | |
| 
 | |
| VAR_TYPE_SPEC = r'''(?:
 | |
|         void |
 | |
|         (?:
 | |
|          (?:(?:un)?signed\s+)?
 | |
|          (?:
 | |
|           char |
 | |
|           short |
 | |
|           int |
 | |
|           long |
 | |
|           long\s+int |
 | |
|           long\s+long
 | |
|           ) |
 | |
|          ) |
 | |
|         float |
 | |
|         double |
 | |
|         {IDENTIFIER} |
 | |
|         (?:struct|union)\s+{IDENTIFIER}
 | |
|         )'''
 | |
| 
 | |
| POINTER = rf'''(?:
 | |
|         (?:\s+const)?\s*[*]
 | |
|         )'''
 | |
| 
 | |
| #STRUCT = r'''(?:
 | |
| #        (?:struct|(struct\s+%s))\s*[{]
 | |
| #            [^}]*
 | |
| #        [}]
 | |
| #        )''' % (IDENTIFIER)
 | |
| #UNION = r'''(?:
 | |
| #        (?:union|(union\s+%s))\s*[{]
 | |
| #            [^}]*
 | |
| #        [}]
 | |
| #        )''' % (IDENTIFIER)
 | |
| #DECL_SPEC = rf'''(?:
 | |
| #        ({VAR_TYPE_SPEC}) |
 | |
| #        ({STRUCT}) |
 | |
| #        ({UNION})
 | |
| #        )'''
 | |
| 
 | |
| FUNC_START = rf'''(?:
 | |
|         (?:
 | |
|           (?:
 | |
|             extern |
 | |
|             static |
 | |
|             static\s+inline
 | |
|            )\s+
 | |
|          )?
 | |
|         #(?:const\s+)?
 | |
|         {VAR_TYPE_SPEC}
 | |
|         )'''
 | |
| #GLOBAL_VAR_START = rf'''(?:
 | |
| #        (?:
 | |
| #          (?:
 | |
| #            extern |
 | |
| #            static
 | |
| #           )\s+
 | |
| #         )?
 | |
| #        (?:
 | |
| #           {TYPE_QUAL}
 | |
| #           (?:\s+{TYPE_QUAL})?
 | |
| #         )?\s+
 | |
| #        {VAR_TYPE_SPEC}
 | |
| #        )'''
 | |
| GLOBAL_DECL_START_RE = re.compile(rf'''
 | |
|         ^
 | |
|         (?:
 | |
|             ({FUNC_START})
 | |
|          )
 | |
|         ''', re.VERBOSE)
 | |
| 
 | |
| LOCAL_VAR_START = rf'''(?:
 | |
|         (?:
 | |
|           (?:
 | |
|             register |
 | |
|             static
 | |
|            )\s+
 | |
|          )?
 | |
|         (?:
 | |
|           (?:
 | |
|             {TYPE_QUAL}
 | |
|             (?:\s+{TYPE_QUAL})?
 | |
|            )\s+
 | |
|          )?
 | |
|         {VAR_TYPE_SPEC}
 | |
|         {POINTER}?
 | |
|         )'''
 | |
| LOCAL_STMT_START_RE = re.compile(rf'''
 | |
|         ^
 | |
|         (?:
 | |
|             ({LOCAL_VAR_START})
 | |
|          )
 | |
|         ''', re.VERBOSE)
 | |
| 
 | |
| 
 | |
| def iter_global_declarations(lines):
 | |
|     """Yield (decl, body) for each global declaration in the given lines.
 | |
| 
 | |
|     For function definitions the header is reduced to one line and
 | |
|     the body is provided as-is.  For other compound declarations (e.g.
 | |
|     struct) the entire declaration is reduced to one line and "body"
 | |
|     is None.  Likewise for simple declarations (e.g. variables).
 | |
| 
 | |
|     Declarations inside function bodies are ignored, though their text
 | |
|     is provided in the function body.
 | |
|     """
 | |
|     # XXX Bail out upon bogus syntax.
 | |
|     lines = source.iter_clean_lines(lines)
 | |
|     for line in lines:
 | |
|         if not GLOBAL_DECL_START_RE.match(line):
 | |
|             continue
 | |
|         # We only need functions here, since we only need locals for now.
 | |
|         if line.endswith(';'):
 | |
|             continue
 | |
|         if line.endswith('{') and '(' not in line:
 | |
|             continue
 | |
| 
 | |
|         # Capture the function.
 | |
|         # (assume no func is a one-liner)
 | |
|         decl = line
 | |
|         while '{' not in line:  # assume no inline structs, etc.
 | |
|             try:
 | |
|                 line = next(lines)
 | |
|             except StopIteration:
 | |
|                 return
 | |
|             decl += ' ' + line
 | |
| 
 | |
|         body, end = _extract_block(lines)
 | |
|         if end is None:
 | |
|             return
 | |
|         assert end == '}'
 | |
|         yield (f'{decl}\n{body}\n{end}', body)
 | |
| 
 | |
| 
 | |
| def iter_local_statements(lines):
 | |
|     """Yield (lines, blocks) for each statement in the given lines.
 | |
| 
 | |
|     For simple statements, "blocks" is None and the statement is reduced
 | |
|     to a single line.  For compound statements, "blocks" is a pair of
 | |
|     (header, body) for each block in the statement.  The headers are
 | |
|     reduced to a single line each, but the bpdies are provided as-is.
 | |
|     """
 | |
|     # XXX Bail out upon bogus syntax.
 | |
|     lines = source.iter_clean_lines(lines)
 | |
|     for line in lines:
 | |
|         if not LOCAL_STMT_START_RE.match(line):
 | |
|             continue
 | |
| 
 | |
|         stmt = line
 | |
|         blocks = None
 | |
|         if not line.endswith(';'):
 | |
|             # XXX Support compound & multiline simple statements.
 | |
|             #blocks = []
 | |
|             continue
 | |
| 
 | |
|         yield (stmt, blocks)
 | |
| 
 | |
| 
 | |
| def _extract_block(lines):
 | |
|     end = None
 | |
|     depth = 1
 | |
|     body = []
 | |
|     for line in lines:
 | |
|         depth += line.count('{') - line.count('}')
 | |
|         if depth == 0:
 | |
|             end = line
 | |
|             break
 | |
|         body.append(line)
 | |
|     return '\n'.join(body), end
 | |
| 
 | |
| 
 | |
| def parse_func(stmt, body):
 | |
|     """Return (name, signature) for the given function definition."""
 | |
|     header, _, end = stmt.partition(body)
 | |
|     assert end.strip() == '}'
 | |
|     assert header.strip().endswith('{')
 | |
|     header, _, _= header.rpartition('{')
 | |
| 
 | |
|     signature = ' '.join(header.strip().splitlines())
 | |
| 
 | |
|     _, _, name = signature.split('(')[0].strip().rpartition(' ')
 | |
|     assert name
 | |
| 
 | |
|     return name, signature
 | |
| 
 | |
| 
 | |
| #TYPE_SPEC = rf'''(?:
 | |
| #        )'''
 | |
| #VAR_DECLARATOR = rf'''(?:
 | |
| #        )'''
 | |
| #VAR_DECL = rf'''(?:
 | |
| #            {TYPE_SPEC}+
 | |
| #            {VAR_DECLARATOR}
 | |
| #            \s*
 | |
| #        )'''
 | |
| #VAR_DECLARATION = rf'''(?:
 | |
| #            {VAR_DECL}
 | |
| #            (?: = [^=] [^;]* )?
 | |
| #            ;
 | |
| #        )'''
 | |
| #
 | |
| #
 | |
| #def parse_variable(decl, *, inFunc=False):
 | |
| #    """Return [(name, storage, vartype)] for the given variable declaration."""
 | |
| #    ...
 | |
| 
 | |
| 
 | |
| def _parse_var(stmt):
 | |
|     """Return (name, vartype) for the given variable declaration."""
 | |
|     stmt = stmt.rstrip(';')
 | |
|     m = LOCAL_STMT_START_RE.match(stmt)
 | |
|     assert m
 | |
|     vartype = m.group(0)
 | |
|     name = stmt[len(vartype):].partition('=')[0].strip()
 | |
| 
 | |
|     if name.startswith('('):
 | |
|         name, _, after = name[1:].partition(')')
 | |
|         assert after
 | |
|         name = name.replace('*', '* ')
 | |
|         inside, _, name = name.strip().rpartition(' ')
 | |
|         vartype = f'{vartype} ({inside.strip()}){after}'
 | |
|     else:
 | |
|         name = name.replace('*', '* ')
 | |
|         before, _, name = name.rpartition(' ')
 | |
|         vartype = f'{vartype} {before}'
 | |
| 
 | |
|     vartype = vartype.strip()
 | |
|     while '  ' in vartype:
 | |
|         vartype = vartype.replace('  ', ' ')
 | |
| 
 | |
|     return name, vartype
 | |
| 
 | |
| 
 | |
| def extract_storage(decl, *, infunc=None):
 | |
|     """Return (storage, vartype) based on the given declaration.
 | |
| 
 | |
|     The default storage is "implicit" (or "local" if infunc is True).
 | |
|     """
 | |
|     if decl == UNKNOWN:
 | |
|         return decl
 | |
|     if decl.startswith('static '):
 | |
|         return 'static'
 | |
|         #return 'static', decl.partition(' ')[2].strip()
 | |
|     elif decl.startswith('extern '):
 | |
|         return 'extern'
 | |
|         #return 'extern', decl.partition(' ')[2].strip()
 | |
|     elif re.match('.*\b(static|extern)\b', decl):
 | |
|         raise NotImplementedError
 | |
|     elif infunc:
 | |
|         return 'local'
 | |
|     else:
 | |
|         return 'implicit'
 | |
| 
 | |
| 
 | |
| def parse_compound(stmt, blocks):
 | |
|     """Return (headers, bodies) for the given compound statement."""
 | |
|     # XXX Identify declarations inside compound statements
 | |
|     # (if/switch/for/while).
 | |
|     raise NotImplementedError
 | |
| 
 | |
| 
 | |
| def iter_variables(filename, *,
 | |
|                    preprocessed=False,
 | |
|                    _iter_source_lines=source.iter_lines,
 | |
|                    _iter_global=iter_global_declarations,
 | |
|                    _iter_local=iter_local_statements,
 | |
|                    _parse_func=parse_func,
 | |
|                    _parse_var=_parse_var,
 | |
|                    _parse_compound=parse_compound,
 | |
|                    ):
 | |
|     """Yield (funcname, name, vartype) for every variable in the given file."""
 | |
|     if preprocessed:
 | |
|         raise NotImplementedError
 | |
|     lines = _iter_source_lines(filename)
 | |
|     for stmt, body in _iter_global(lines):
 | |
|         # At the file top-level we only have to worry about vars & funcs.
 | |
|         if not body:
 | |
|             name, vartype = _parse_var(stmt)
 | |
|             if name:
 | |
|                 yield (None, name, vartype)
 | |
|         else:
 | |
|             funcname, _ = _parse_func(stmt, body)
 | |
|             localvars = _iter_locals(body,
 | |
|                                      _iter_statements=_iter_local,
 | |
|                                      _parse_var=_parse_var,
 | |
|                                      _parse_compound=_parse_compound,
 | |
|                                      )
 | |
|             for name, vartype in localvars:
 | |
|                 yield (funcname, name, vartype)
 | |
| 
 | |
| 
 | |
| def _iter_locals(lines, *,
 | |
|                  _iter_statements=iter_local_statements,
 | |
|                  _parse_var=_parse_var,
 | |
|                  _parse_compound=parse_compound,
 | |
|                  ):
 | |
|     compound = [lines]
 | |
|     while compound:
 | |
|         body = compound.pop(0)
 | |
|         bodylines = body.splitlines()
 | |
|         for stmt, blocks in _iter_statements(bodylines):
 | |
|             if not blocks:
 | |
|                 name, vartype = _parse_var(stmt)
 | |
|                 if name:
 | |
|                     yield (name, vartype)
 | |
|             else:
 | |
|                 headers, bodies = _parse_compound(stmt, blocks)
 | |
|                 for header in headers:
 | |
|                     for line in header:
 | |
|                         name, vartype = _parse_var(line)
 | |
|                         if name:
 | |
|                             yield (name, vartype)
 | |
|                 compound.extend(bodies)
 | |
| 
 | |
| 
 | |
| def iter_all(filename, *,
 | |
|              preprocessed=False,
 | |
|              ):
 | |
|     """Yield a Declaration for each one found.
 | |
| 
 | |
|     If there are duplicates, due to preprocessor conditionals, then
 | |
|     they are checked to make sure they are the same.
 | |
|     """
 | |
|     # XXX For the moment we cheat.
 | |
|     for funcname, name, decl in iter_variables(filename,
 | |
|                                                preprocessed=preprocessed):
 | |
|         yield 'variable', funcname, name, decl
 |