mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 07:31:38 +00:00 
			
		
		
		
	
		
			
	
	
		
			279 lines
		
	
	
	
		
			9.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			279 lines
		
	
	
	
		
			9.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| 
								 | 
							
								import re
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								from ._regexes import (
							 | 
						||
| 
								 | 
							
								    LOCAL as _LOCAL,
							 | 
						||
| 
								 | 
							
								    LOCAL_STATICS as _LOCAL_STATICS,
							 | 
						||
| 
								 | 
							
								)
							 | 
						||
| 
								 | 
							
								from ._common import (
							 | 
						||
| 
								 | 
							
								    log_match,
							 | 
						||
| 
								 | 
							
								    parse_var_decl,
							 | 
						||
| 
								 | 
							
								    set_capture_groups,
							 | 
						||
| 
								 | 
							
								    match_paren,
							 | 
						||
| 
								 | 
							
								)
							 | 
						||
| 
								 | 
							
								from ._compound_decl_body import DECL_BODY_PARSERS
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								LOCAL = set_capture_groups(_LOCAL, (
							 | 
						||
| 
								 | 
							
								    'EMPTY',
							 | 
						||
| 
								 | 
							
								    'INLINE_LEADING',
							 | 
						||
| 
								 | 
							
								    'INLINE_PRE',
							 | 
						||
| 
								 | 
							
								    'INLINE_KIND',
							 | 
						||
| 
								 | 
							
								    'INLINE_NAME',
							 | 
						||
| 
								 | 
							
								    'STORAGE',
							 | 
						||
| 
								 | 
							
								    'VAR_DECL',
							 | 
						||
| 
								 | 
							
								    'VAR_INIT',
							 | 
						||
| 
								 | 
							
								    'VAR_ENDING',
							 | 
						||
| 
								 | 
							
								    'COMPOUND_BARE',
							 | 
						||
| 
								 | 
							
								    'COMPOUND_LABELED',
							 | 
						||
| 
								 | 
							
								    'COMPOUND_PAREN',
							 | 
						||
| 
								 | 
							
								    'BLOCK_LEADING',
							 | 
						||
| 
								 | 
							
								    'BLOCK_OPEN',
							 | 
						||
| 
								 | 
							
								    'SIMPLE_STMT',
							 | 
						||
| 
								 | 
							
								    'SIMPLE_ENDING',
							 | 
						||
| 
								 | 
							
								    'BLOCK_CLOSE',
							 | 
						||
| 
								 | 
							
								))
							 | 
						||
| 
								 | 
							
								LOCAL_RE = re.compile(rf'^ \s* {LOCAL}', re.VERBOSE)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Note that parse_function_body() still has trouble with a few files
							 | 
						||
| 
								 | 
							
								# in the CPython codebase.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def parse_function_body(source, name, anon_name):
							 | 
						||
| 
								 | 
							
								    # XXX
							 | 
						||
| 
								 | 
							
								    raise NotImplementedError
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def parse_function_body(name, text, resolve, source, anon_name, parent):
							 | 
						||
| 
								 | 
							
								    raise NotImplementedError
							 | 
						||
| 
								 | 
							
								    # For now we do not worry about locals declared in for loop "headers".
							 | 
						||
| 
								 | 
							
								    depth = 1;
							 | 
						||
| 
								 | 
							
								    while depth > 0:
							 | 
						||
| 
								 | 
							
								        m = LOCAL_RE.match(text)
							 | 
						||
| 
								 | 
							
								        while not m:
							 | 
						||
| 
								 | 
							
								            text, resolve = continue_text(source, text or '{', resolve)
							 | 
						||
| 
								 | 
							
								            m = LOCAL_RE.match(text)
							 | 
						||
| 
								 | 
							
								        text = text[m.end():]
							 | 
						||
| 
								 | 
							
								        (
							 | 
						||
| 
								 | 
							
								         empty,
							 | 
						||
| 
								 | 
							
								         inline_leading, inline_pre, inline_kind, inline_name,
							 | 
						||
| 
								 | 
							
								         storage, decl,
							 | 
						||
| 
								 | 
							
								         var_init, var_ending,
							 | 
						||
| 
								 | 
							
								         compound_bare, compound_labeled, compound_paren,
							 | 
						||
| 
								 | 
							
								         block_leading, block_open,
							 | 
						||
| 
								 | 
							
								         simple_stmt, simple_ending,
							 | 
						||
| 
								 | 
							
								         block_close,
							 | 
						||
| 
								 | 
							
								         ) = m.groups()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if empty:
							 | 
						||
| 
								 | 
							
								            log_match('', m)
							 | 
						||
| 
								 | 
							
								            resolve(None, None, None, text)
							 | 
						||
| 
								 | 
							
								            yield None, text
							 | 
						||
| 
								 | 
							
								        elif inline_kind:
							 | 
						||
| 
								 | 
							
								            log_match('', m)
							 | 
						||
| 
								 | 
							
								            kind = inline_kind
							 | 
						||
| 
								 | 
							
								            name = inline_name or anon_name('inline-')
							 | 
						||
| 
								 | 
							
								            data = []  # members
							 | 
						||
| 
								 | 
							
								            # We must set the internal "text" from _iter_source() to the
							 | 
						||
| 
								 | 
							
								            # start of the inline compound body,
							 | 
						||
| 
								 | 
							
								            # Note that this is effectively like a forward reference that
							 | 
						||
| 
								 | 
							
								            # we do not emit.
							 | 
						||
| 
								 | 
							
								            resolve(kind, None, name, text, None)
							 | 
						||
| 
								 | 
							
								            _parse_body = DECL_BODY_PARSERS[kind]
							 | 
						||
| 
								 | 
							
								            before = []
							 | 
						||
| 
								 | 
							
								            ident = f'{kind} {name}'
							 | 
						||
| 
								 | 
							
								            for member, inline, text in _parse_body(text, resolve, source, anon_name, ident):
							 | 
						||
| 
								 | 
							
								                if member:
							 | 
						||
| 
								 | 
							
								                    data.append(member)
							 | 
						||
| 
								 | 
							
								                if inline:
							 | 
						||
| 
								 | 
							
								                    yield from inline
							 | 
						||
| 
								 | 
							
								            # un-inline the decl.  Note that it might not actually be inline.
							 | 
						||
| 
								 | 
							
								            # We handle the case in the "maybe_inline_actual" branch.
							 | 
						||
| 
								 | 
							
								            text = f'{inline_leading or ""} {inline_pre or ""} {kind} {name} {text}'
							 | 
						||
| 
								 | 
							
								            # XXX Should "parent" really be None for inline type decls?
							 | 
						||
| 
								 | 
							
								            yield resolve(kind, data, name, text, None), text
							 | 
						||
| 
								 | 
							
								        elif block_close:
							 | 
						||
| 
								 | 
							
								            log_match('', m)
							 | 
						||
| 
								 | 
							
								            depth -= 1
							 | 
						||
| 
								 | 
							
								            resolve(None, None, None, text)
							 | 
						||
| 
								 | 
							
								            # XXX This isn't great.  Calling resolve() should have
							 | 
						||
| 
								 | 
							
								            # cleared the closing bracket.  However, some code relies
							 | 
						||
| 
								 | 
							
								            # on the yielded value instead of the resolved one.  That
							 | 
						||
| 
								 | 
							
								            # needs to be fixed.
							 | 
						||
| 
								 | 
							
								            yield None, text
							 | 
						||
| 
								 | 
							
								        elif compound_bare:
							 | 
						||
| 
								 | 
							
								            log_match('', m)
							 | 
						||
| 
								 | 
							
								            yield resolve('statement', compound_bare, None, text, parent), text
							 | 
						||
| 
								 | 
							
								        elif compound_labeled:
							 | 
						||
| 
								 | 
							
								            log_match('', m)
							 | 
						||
| 
								 | 
							
								            yield resolve('statement', compound_labeled, None, text, parent), text
							 | 
						||
| 
								 | 
							
								        elif compound_paren:
							 | 
						||
| 
								 | 
							
								            log_match('', m)
							 | 
						||
| 
								 | 
							
								            try:
							 | 
						||
| 
								 | 
							
								                pos = match_paren(text)
							 | 
						||
| 
								 | 
							
								            except ValueError:
							 | 
						||
| 
								 | 
							
								                text = f'{compound_paren} {text}'
							 | 
						||
| 
								 | 
							
								                #resolve(None, None, None, text)
							 | 
						||
| 
								 | 
							
								                text, resolve = continue_text(source, text, resolve)
							 | 
						||
| 
								 | 
							
								                yield None, text
							 | 
						||
| 
								 | 
							
								            else:
							 | 
						||
| 
								 | 
							
								                head = text[:pos]
							 | 
						||
| 
								 | 
							
								                text = text[pos:]
							 | 
						||
| 
								 | 
							
								                if compound_paren == 'for':
							 | 
						||
| 
								 | 
							
								                    # XXX Parse "head" as a compound statement.
							 | 
						||
| 
								 | 
							
								                    stmt1, stmt2, stmt3 = head.split(';', 2)
							 | 
						||
| 
								 | 
							
								                    data = {
							 | 
						||
| 
								 | 
							
								                        'compound': compound_paren,
							 | 
						||
| 
								 | 
							
								                        'statements': (stmt1, stmt2, stmt3),
							 | 
						||
| 
								 | 
							
								                    }
							 | 
						||
| 
								 | 
							
								                else:
							 | 
						||
| 
								 | 
							
								                    data = {
							 | 
						||
| 
								 | 
							
								                        'compound': compound_paren,
							 | 
						||
| 
								 | 
							
								                        'statement': head,
							 | 
						||
| 
								 | 
							
								                    }
							 | 
						||
| 
								 | 
							
								                yield resolve('statement', data, None, text, parent), text
							 | 
						||
| 
								 | 
							
								        elif block_open:
							 | 
						||
| 
								 | 
							
								            log_match('', m)
							 | 
						||
| 
								 | 
							
								            depth += 1
							 | 
						||
| 
								 | 
							
								            if block_leading:
							 | 
						||
| 
								 | 
							
								                # An inline block: the last evaluated expression is used
							 | 
						||
| 
								 | 
							
								                # in place of the block.
							 | 
						||
| 
								 | 
							
								                # XXX Combine it with the remainder after the block close.
							 | 
						||
| 
								 | 
							
								                stmt = f'{block_open}{{<expr>}}...;'
							 | 
						||
| 
								 | 
							
								                yield resolve('statement', stmt, None, text, parent), text
							 | 
						||
| 
								 | 
							
								            else:
							 | 
						||
| 
								 | 
							
								                resolve(None, None, None, text)
							 | 
						||
| 
								 | 
							
								                yield None, text
							 | 
						||
| 
								 | 
							
								        elif simple_ending:
							 | 
						||
| 
								 | 
							
								            log_match('', m)
							 | 
						||
| 
								 | 
							
								            yield resolve('statement', simple_stmt, None, text, parent), text
							 | 
						||
| 
								 | 
							
								        elif var_ending:
							 | 
						||
| 
								 | 
							
								            log_match('', m)
							 | 
						||
| 
								 | 
							
								            kind = 'variable'
							 | 
						||
| 
								 | 
							
								            _, name, vartype = parse_var_decl(decl)
							 | 
						||
| 
								 | 
							
								            data = {
							 | 
						||
| 
								 | 
							
								                'storage': storage,
							 | 
						||
| 
								 | 
							
								                'vartype': vartype,
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								            after = ()
							 | 
						||
| 
								 | 
							
								            if var_ending == ',':
							 | 
						||
| 
								 | 
							
								                # It was a multi-declaration, so queue up the next one.
							 | 
						||
| 
								 | 
							
								                _, qual, typespec, _ = vartype.values()
							 | 
						||
| 
								 | 
							
								                text = f'{storage or ""} {qual or ""} {typespec} {text}'
							 | 
						||
| 
								 | 
							
								            yield resolve(kind, data, name, text, parent), text
							 | 
						||
| 
								 | 
							
								            if var_init:
							 | 
						||
| 
								 | 
							
								                _data = f'{name} = {var_init.strip()}'
							 | 
						||
| 
								 | 
							
								                yield resolve('statement', _data, None, text, parent), text
							 | 
						||
| 
								 | 
							
								        else:
							 | 
						||
| 
								 | 
							
								            # This should be unreachable.
							 | 
						||
| 
								 | 
							
								            raise NotImplementedError
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#############################
							 | 
						||
| 
								 | 
							
								# static local variables
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								LOCAL_STATICS = set_capture_groups(_LOCAL_STATICS, (
							 | 
						||
| 
								 | 
							
								    'INLINE_LEADING',
							 | 
						||
| 
								 | 
							
								    'INLINE_PRE',
							 | 
						||
| 
								 | 
							
								    'INLINE_KIND',
							 | 
						||
| 
								 | 
							
								    'INLINE_NAME',
							 | 
						||
| 
								 | 
							
								    'STATIC_DECL',
							 | 
						||
| 
								 | 
							
								    'STATIC_INIT',
							 | 
						||
| 
								 | 
							
								    'STATIC_ENDING',
							 | 
						||
| 
								 | 
							
								    'DELIM_LEADING',
							 | 
						||
| 
								 | 
							
								    'BLOCK_OPEN',
							 | 
						||
| 
								 | 
							
								    'BLOCK_CLOSE',
							 | 
						||
| 
								 | 
							
								    'STMT_END',
							 | 
						||
| 
								 | 
							
								))
							 | 
						||
| 
								 | 
							
								LOCAL_STATICS_RE = re.compile(rf'^ \s* {LOCAL_STATICS}', re.VERBOSE)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def parse_function_statics(source, func, anon_name):
							 | 
						||
| 
								 | 
							
								    # For now we do not worry about locals declared in for loop "headers".
							 | 
						||
| 
								 | 
							
								    depth = 1;
							 | 
						||
| 
								 | 
							
								    while depth > 0:
							 | 
						||
| 
								 | 
							
								        for srcinfo in source:
							 | 
						||
| 
								 | 
							
								            m = LOCAL_STATICS_RE.match(srcinfo.text)
							 | 
						||
| 
								 | 
							
								            if m:
							 | 
						||
| 
								 | 
							
								                break
							 | 
						||
| 
								 | 
							
								        else:
							 | 
						||
| 
								 | 
							
								            # We ran out of lines.
							 | 
						||
| 
								 | 
							
								            if srcinfo is not None:
							 | 
						||
| 
								 | 
							
								                srcinfo.done()
							 | 
						||
| 
								 | 
							
								            return
							 | 
						||
| 
								 | 
							
								        for item, depth in _parse_next_local_static(m, srcinfo,
							 | 
						||
| 
								 | 
							
								                                                    anon_name, func, depth):
							 | 
						||
| 
								 | 
							
								            if callable(item):
							 | 
						||
| 
								 | 
							
								                parse_body = item
							 | 
						||
| 
								 | 
							
								                yield from parse_body(source)
							 | 
						||
| 
								 | 
							
								            elif item is not None:
							 | 
						||
| 
								 | 
							
								                yield item
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def _parse_next_local_static(m, srcinfo, anon_name, func, depth):
							 | 
						||
| 
								 | 
							
								    (inline_leading, inline_pre, inline_kind, inline_name,
							 | 
						||
| 
								 | 
							
								     static_decl, static_init, static_ending,
							 | 
						||
| 
								 | 
							
								     _delim_leading,
							 | 
						||
| 
								 | 
							
								     block_open,
							 | 
						||
| 
								 | 
							
								     block_close,
							 | 
						||
| 
								 | 
							
								     stmt_end,
							 | 
						||
| 
								 | 
							
								     ) = m.groups()
							 | 
						||
| 
								 | 
							
								    remainder = srcinfo.text[m.end():]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if inline_kind:
							 | 
						||
| 
								 | 
							
								        log_match('func inline', m)
							 | 
						||
| 
								 | 
							
								        kind = inline_kind
							 | 
						||
| 
								 | 
							
								        name = inline_name or anon_name('inline-')
							 | 
						||
| 
								 | 
							
								        # Immediately emit a forward declaration.
							 | 
						||
| 
								 | 
							
								        yield srcinfo.resolve(kind, name=name, data=None), depth
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        # un-inline the decl.  Note that it might not actually be inline.
							 | 
						||
| 
								 | 
							
								        # We handle the case in the "maybe_inline_actual" branch.
							 | 
						||
| 
								 | 
							
								        srcinfo.nest(
							 | 
						||
| 
								 | 
							
								            remainder,
							 | 
						||
| 
								 | 
							
								            f'{inline_leading or ""} {inline_pre or ""} {kind} {name}'
							 | 
						||
| 
								 | 
							
								        )
							 | 
						||
| 
								 | 
							
								        def parse_body(source):
							 | 
						||
| 
								 | 
							
								            _parse_body = DECL_BODY_PARSERS[kind]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            data = []  # members
							 | 
						||
| 
								 | 
							
								            ident = f'{kind} {name}'
							 | 
						||
| 
								 | 
							
								            for item in _parse_body(source, anon_name, ident):
							 | 
						||
| 
								 | 
							
								                if item.kind == 'field':
							 | 
						||
| 
								 | 
							
								                    data.append(item)
							 | 
						||
| 
								 | 
							
								                else:
							 | 
						||
| 
								 | 
							
								                    yield item
							 | 
						||
| 
								 | 
							
								            # XXX Should "parent" really be None for inline type decls?
							 | 
						||
| 
								 | 
							
								            yield srcinfo.resolve(kind, data, name, parent=None)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            srcinfo.resume()
							 | 
						||
| 
								 | 
							
								        yield parse_body, depth
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    elif static_decl:
							 | 
						||
| 
								 | 
							
								        log_match('local variable', m)
							 | 
						||
| 
								 | 
							
								        _, name, data = parse_var_decl(static_decl)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        yield srcinfo.resolve('variable', data, name, parent=func), depth
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if static_init:
							 | 
						||
| 
								 | 
							
								            srcinfo.advance(f'{name} {static_init} {remainder}')
							 | 
						||
| 
								 | 
							
								        elif static_ending == ',':
							 | 
						||
| 
								 | 
							
								            # It was a multi-declaration, so queue up the next one.
							 | 
						||
| 
								 | 
							
								            _, qual, typespec, _ = data.values()
							 | 
						||
| 
								 | 
							
								            srcinfo.advance(f'static {qual or ""} {typespec} {remainder}')
							 | 
						||
| 
								 | 
							
								        else:
							 | 
						||
| 
								 | 
							
								            srcinfo.advance('')
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    else:
							 | 
						||
| 
								 | 
							
								        log_match('func other', m)
							 | 
						||
| 
								 | 
							
								        if block_open:
							 | 
						||
| 
								 | 
							
								            depth += 1
							 | 
						||
| 
								 | 
							
								        elif block_close:
							 | 
						||
| 
								 | 
							
								            depth -= 1
							 | 
						||
| 
								 | 
							
								        elif stmt_end:
							 | 
						||
| 
								 | 
							
								            pass
							 | 
						||
| 
								 | 
							
								        else:
							 | 
						||
| 
								 | 
							
								            # This should be unreachable.
							 | 
						||
| 
								 | 
							
								            raise NotImplementedError
							 | 
						||
| 
								 | 
							
								        srcinfo.advance(remainder)
							 | 
						||
| 
								 | 
							
								        yield None, depth
							 |