diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index 16d42c010f6..b5e3a84b455 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -978,6 +978,12 @@ their subgroups based on the types of the contained exceptions. raises a :exc:`TypeError` if any contained exception is not an :exc:`Exception` subclass. + .. impl-detail:: + + The ``excs`` parameter may be any sequence, but lists and tuples are + specifically processed more efficiently here. For optimal performance, + pass a tuple as ``excs``. + .. attribute:: message The ``msg`` argument to the constructor. This is a read-only attribute. diff --git a/Include/cpython/pyerrors.h b/Include/cpython/pyerrors.h index 6b63d304b0d..be2e3b641c2 100644 --- a/Include/cpython/pyerrors.h +++ b/Include/cpython/pyerrors.h @@ -18,6 +18,7 @@ typedef struct { PyException_HEAD PyObject *msg; PyObject *excs; + PyObject *excs_str; } PyBaseExceptionGroupObject; typedef struct { diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 08d71070ddc..22df26bd37a 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -135,6 +135,13 @@ struct _ts { /* Pointer to currently executing frame. */ struct _PyInterpreterFrame *current_frame; + /* Pointer to the base frame (bottommost sentinel frame). + Used by profilers to validate complete stack unwinding. + Points to the embedded base_frame in _PyThreadStateImpl. + The frame is embedded there rather than here because _PyInterpreterFrame + is defined in internal headers that cannot be exposed in the public API. */ + struct _PyInterpreterFrame *base_frame; + struct _PyInterpreterFrame *last_profiled_frame; Py_tracefunc c_profilefunc; diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h index bfd86c08887..1cdc4449b17 100644 --- a/Include/internal/pycore_debug_offsets.h +++ b/Include/internal/pycore_debug_offsets.h @@ -102,6 +102,7 @@ typedef struct _Py_DebugOffsets { uint64_t next; uint64_t interp; uint64_t current_frame; + uint64_t base_frame; uint64_t last_profiled_frame; uint64_t thread_id; uint64_t native_thread_id; @@ -273,6 +274,7 @@ typedef struct _Py_DebugOffsets { .next = offsetof(PyThreadState, next), \ .interp = offsetof(PyThreadState, interp), \ .current_frame = offsetof(PyThreadState, current_frame), \ + .base_frame = offsetof(PyThreadState, base_frame), \ .last_profiled_frame = offsetof(PyThreadState, last_profiled_frame), \ .thread_id = offsetof(PyThreadState, thread_id), \ .native_thread_id = offsetof(PyThreadState, native_thread_id), \ diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index 50048801b2e..c4f723ac8ab 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -10,6 +10,7 @@ extern "C" { #include "pycore_brc.h" // struct _brc_thread_state #include "pycore_freelist_state.h" // struct _Py_freelists +#include "pycore_interpframe_structs.h" // _PyInterpreterFrame #include "pycore_mimalloc.h" // struct _mimalloc_thread_state #include "pycore_qsbr.h" // struct qsbr #include "pycore_uop.h" // struct _PyUOpInstruction @@ -61,6 +62,10 @@ typedef struct _PyThreadStateImpl { // semi-public fields are in PyThreadState. PyThreadState base; + // Embedded base frame - sentinel at the bottom of the frame stack. + // Used by profiling/sampling to detect incomplete stack traces. + _PyInterpreterFrame base_frame; + // The reference count field is used to synchronize deallocation of the // thread state during runtime finalization. Py_ssize_t refcount; diff --git a/InternalDocs/frames.md b/InternalDocs/frames.md index d56e5481d3c..60ab2055afa 100644 --- a/InternalDocs/frames.md +++ b/InternalDocs/frames.md @@ -111,6 +111,35 @@ ### Shim frames instruction which cleans up the shim frame and returns. +### Base frame + +Each thread state contains an embedded `_PyInterpreterFrame` called the "base frame" +that serves as a sentinel at the bottom of the frame stack. This frame is allocated +in `_PyThreadStateImpl` (the internal extension of `PyThreadState`) and initialized +when the thread state is created. The `owner` field is set to `FRAME_OWNED_BY_INTERPRETER`. + +External profilers and sampling tools can validate that they have successfully unwound +the complete call stack by checking that the frame chain terminates at the base frame. +The `PyThreadState.base_frame` pointer provides the expected address to compare against. +If a stack walk doesn't reach this frame, the sample is incomplete (possibly due to a +race condition) and should be discarded. + +The base frame is embedded in `_PyThreadStateImpl` rather than `PyThreadState` because +`_PyInterpreterFrame` is defined in internal headers that cannot be exposed in the +public API. A pointer (`PyThreadState.base_frame`) is provided for profilers to access +the address without needing internal headers. + +See the initialization in `new_threadstate()` in [Python/pystate.c](../Python/pystate.c). + +#### How profilers should use the base frame + +External profilers should read `tstate->base_frame` before walking the stack, then +walk from `tstate->current_frame` following `frame->previous` pointers until reaching +a frame with `owner == FRAME_OWNED_BY_INTERPRETER`. After the walk, verify that the +last frame address matches `base_frame`. If not, discard the sample as incomplete +since the frame chain may have been in an inconsistent state due to concurrent updates. + + ### Remote Profiling Frame Cache The `last_profiled_frame` field in `PyThreadState` supports an optimization for diff --git a/Lib/argparse.py b/Lib/argparse.py index 9e2e076936c..398825508f5 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -189,6 +189,8 @@ def __init__( self._whitespace_matcher = _re.compile(r'\s+', _re.ASCII) self._long_break_matcher = _re.compile(r'\n\n\n+') + self._set_color(False) + def _set_color(self, color): from _colorize import can_colorize, decolor, get_theme @@ -334,31 +336,15 @@ def _format_usage(self, usage, actions, groups, prefix): elif usage is None: prog = '%(prog)s' % dict(prog=self._prog) - # split optionals from positionals - optionals = [] - positionals = [] - for action in actions: - if action.option_strings: - optionals.append(action) - else: - positionals.append(action) - + parts, pos_start = self._get_actions_usage_parts(actions, groups) # build full usage string - format = self._format_actions_usage - action_usage = format(optionals + positionals, groups) - usage = ' '.join([s for s in [prog, action_usage] if s]) + usage = ' '.join(filter(None, [prog, *parts])) # wrap the usage parts if it's too long text_width = self._width - self._current_indent if len(prefix) + len(self._decolor(usage)) > text_width: # break usage into wrappable parts - # keep optionals and positionals together to preserve - # mutually exclusive group formatting (gh-75949) - all_actions = optionals + positionals - parts, pos_start = self._get_actions_usage_parts_with_split( - all_actions, groups, len(optionals) - ) opt_parts = parts[:pos_start] pos_parts = parts[pos_start:] @@ -417,125 +403,114 @@ def get_lines(parts, indent, prefix=None): # prefix with 'usage:' return f'{t.usage}{prefix}{t.reset}{usage}\n\n' - def _format_actions_usage(self, actions, groups): - return ' '.join(self._get_actions_usage_parts(actions, groups)) - def _is_long_option(self, string): return len(string) > 2 def _get_actions_usage_parts(self, actions, groups): - parts, _ = self._get_actions_usage_parts_with_split(actions, groups) - return parts - - def _get_actions_usage_parts_with_split(self, actions, groups, opt_count=None): """Get usage parts with split index for optionals/positionals. Returns (parts, pos_start) where pos_start is the index in parts - where positionals begin. When opt_count is None, pos_start is None. + where positionals begin. This preserves mutually exclusive group formatting across the optionals/positionals boundary (gh-75949). """ - # find group indices and identify actions in groups - group_actions = set() - inserts = {} + actions = [action for action in actions if action.help is not SUPPRESS] + # group actions by mutually exclusive groups + action_groups = dict.fromkeys(actions) for group in groups: - if not group._group_actions: - raise ValueError(f'empty group {group}') - - if all(action.help is SUPPRESS for action in group._group_actions): - continue - - try: - start = min(actions.index(item) for item in group._group_actions) - except ValueError: - continue - else: - end = start + len(group._group_actions) - if set(actions[start:end]) == set(group._group_actions): - group_actions.update(group._group_actions) - inserts[start, end] = group + for action in group._group_actions: + if action in action_groups: + action_groups[action] = group + # positional arguments keep their position + positionals = [] + for action in actions: + if not action.option_strings: + group = action_groups.pop(action) + if group: + group_actions = [ + action2 for action2 in group._group_actions + if action2.option_strings and + action_groups.pop(action2, None) + ] + [action] + positionals.append((group.required, group_actions)) + else: + positionals.append((None, [action])) + # the remaining optional arguments are sorted by the position of + # the first option in the group + optionals = [] + for action in actions: + if action.option_strings and action in action_groups: + group = action_groups.pop(action) + if group: + group_actions = [action] + [ + action2 for action2 in group._group_actions + if action2.option_strings and + action_groups.pop(action2, None) + ] + optionals.append((group.required, group_actions)) + else: + optionals.append((None, [action])) # collect all actions format strings parts = [] t = self._theme - for action in actions: + pos_start = None + for i, (required, group) in enumerate(optionals + positionals): + start = len(parts) + if i == len(optionals): + pos_start = start + in_group = len(group) > 1 + for action in group: + # produce all arg strings + if not action.option_strings: + default = self._get_default_metavar_for_positional(action) + part = self._format_args(action, default) + # if it's in a group, strip the outer [] + if in_group: + if part[0] == '[' and part[-1] == ']': + part = part[1:-1] + part = t.summary_action + part + t.reset - # suppressed arguments are marked with None - if action.help is SUPPRESS: - part = None - - # produce all arg strings - elif not action.option_strings: - default = self._get_default_metavar_for_positional(action) - part = self._format_args(action, default) - # if it's in a group, strip the outer [] - if action in group_actions: - if part[0] == '[' and part[-1] == ']': - part = part[1:-1] - part = t.summary_action + part + t.reset - - # produce the first way to invoke the option in brackets - else: - option_string = action.option_strings[0] - if self._is_long_option(option_string): - option_color = t.summary_long_option + # produce the first way to invoke the option in brackets else: - option_color = t.summary_short_option + option_string = action.option_strings[0] + if self._is_long_option(option_string): + option_color = t.summary_long_option + else: + option_color = t.summary_short_option - # if the Optional doesn't take a value, format is: - # -s or --long - if action.nargs == 0: - part = action.format_usage() - part = f"{option_color}{part}{t.reset}" + # if the Optional doesn't take a value, format is: + # -s or --long + if action.nargs == 0: + part = action.format_usage() + part = f"{option_color}{part}{t.reset}" - # if the Optional takes a value, format is: - # -s ARGS or --long ARGS - else: - default = self._get_default_metavar_for_optional(action) - args_string = self._format_args(action, default) - part = ( - f"{option_color}{option_string} " - f"{t.summary_label}{args_string}{t.reset}" - ) + # if the Optional takes a value, format is: + # -s ARGS or --long ARGS + else: + default = self._get_default_metavar_for_optional(action) + args_string = self._format_args(action, default) + part = ( + f"{option_color}{option_string} " + f"{t.summary_label}{args_string}{t.reset}" + ) - # make it look optional if it's not required or in a group - if not action.required and action not in group_actions: - part = '[%s]' % part + # make it look optional if it's not required or in a group + if not (action.required or required or in_group): + part = '[%s]' % part - # add the action string to the list - parts.append(part) + # add the action string to the list + parts.append(part) - # group mutually exclusive actions - inserted_separators_indices = set() - for start, end in sorted(inserts, reverse=True): - group = inserts[start, end] - group_parts = [item for item in parts[start:end] if item is not None] - group_size = len(group_parts) - if group.required: - open, close = "()" if group_size > 1 else ("", "") - else: - open, close = "[]" - group_parts[0] = open + group_parts[0] - group_parts[-1] = group_parts[-1] + close - for i, part in enumerate(group_parts[:-1], start=start): - # insert a separator if not already done in a nested group - if i not in inserted_separators_indices: - parts[i] = part + ' |' - inserted_separators_indices.add(i) - parts[start + group_size - 1] = group_parts[-1] - for i in range(start + group_size, end): - parts[i] = None + if in_group: + parts[start] = ('(' if required else '[') + parts[start] + for i in range(start, len(parts) - 1): + parts[i] += ' |' + parts[-1] += ')' if required else ']' - # if opt_count is provided, calculate where positionals start in - # the final parts list (for wrapping onto separate lines). - # Count before filtering None entries since indices shift after. - if opt_count is not None: - pos_start = sum(1 for p in parts[:opt_count] if p is not None) - else: - pos_start = None - - # return the usage parts and split point (gh-75949) - return [item for item in parts if item is not None], pos_start + if pos_start is None: + pos_start = len(parts) + return parts, pos_start def _format_text(self, text): if '%(prog)' in text: diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 248a92db74e..7c5eed21219 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -3398,12 +3398,11 @@ def test_help_subparser_all_mutually_exclusive_group_members_suppressed(self): ''' self.assertEqual(cmd_foo.format_help(), textwrap.dedent(expected)) - def test_empty_group(self): + def test_usage_empty_group(self): # See issue 26952 - parser = argparse.ArgumentParser() + parser = ErrorRaisingArgumentParser(prog='PROG') group = parser.add_mutually_exclusive_group() - with self.assertRaises(ValueError): - parser.parse_args(['-h']) + self.assertEqual(parser.format_usage(), 'usage: PROG [-h]\n') def test_nested_mutex_groups(self): parser = argparse.ArgumentParser(prog='PROG') @@ -3671,25 +3670,29 @@ def get_parser(self, required): group.add_argument('-b', action='store_true', help='b help') parser.add_argument('-y', action='store_true', help='y help') group.add_argument('-c', action='store_true', help='c help') + parser.add_argument('-z', action='store_true', help='z help') return parser failures = ['-a -b', '-b -c', '-a -c', '-a -b -c'] successes = [ - ('-a', NS(a=True, b=False, c=False, x=False, y=False)), - ('-b', NS(a=False, b=True, c=False, x=False, y=False)), - ('-c', NS(a=False, b=False, c=True, x=False, y=False)), - ('-a -x', NS(a=True, b=False, c=False, x=True, y=False)), - ('-y -b', NS(a=False, b=True, c=False, x=False, y=True)), - ('-x -y -c', NS(a=False, b=False, c=True, x=True, y=True)), + ('-a', NS(a=True, b=False, c=False, x=False, y=False, z=False)), + ('-b', NS(a=False, b=True, c=False, x=False, y=False, z=False)), + ('-c', NS(a=False, b=False, c=True, x=False, y=False, z=False)), + ('-a -x', NS(a=True, b=False, c=False, x=True, y=False, z=False)), + ('-y -b', NS(a=False, b=True, c=False, x=False, y=True, z=False)), + ('-x -y -c', NS(a=False, b=False, c=True, x=True, y=True, z=False)), ] successes_when_not_required = [ - ('', NS(a=False, b=False, c=False, x=False, y=False)), - ('-x', NS(a=False, b=False, c=False, x=True, y=False)), - ('-y', NS(a=False, b=False, c=False, x=False, y=True)), + ('', NS(a=False, b=False, c=False, x=False, y=False, z=False)), + ('-x', NS(a=False, b=False, c=False, x=True, y=False, z=False)), + ('-y', NS(a=False, b=False, c=False, x=False, y=True, z=False)), ] - usage_when_required = usage_when_not_required = '''\ - usage: PROG [-h] [-x] [-a] [-b] [-y] [-c] + usage_when_not_required = '''\ + usage: PROG [-h] [-x] [-a | -b | -c] [-y] [-z] + ''' + usage_when_required = '''\ + usage: PROG [-h] [-x] (-a | -b | -c) [-y] [-z] ''' help = '''\ @@ -3700,6 +3703,7 @@ def get_parser(self, required): -b b help -y y help -c c help + -z z help ''' @@ -3753,23 +3757,27 @@ def get_parser(self, required): group.add_argument('a', nargs='?', help='a help') group.add_argument('-b', action='store_true', help='b help') group.add_argument('-c', action='store_true', help='c help') + parser.add_argument('-z', action='store_true', help='z help') return parser failures = ['X A -b', '-b -c', '-c X A'] successes = [ - ('X A', NS(a='A', b=False, c=False, x='X', y=False)), - ('X -b', NS(a=None, b=True, c=False, x='X', y=False)), - ('X -c', NS(a=None, b=False, c=True, x='X', y=False)), - ('X A -y', NS(a='A', b=False, c=False, x='X', y=True)), - ('X -y -b', NS(a=None, b=True, c=False, x='X', y=True)), + ('X A', NS(a='A', b=False, c=False, x='X', y=False, z=False)), + ('X -b', NS(a=None, b=True, c=False, x='X', y=False, z=False)), + ('X -c', NS(a=None, b=False, c=True, x='X', y=False, z=False)), + ('X A -y', NS(a='A', b=False, c=False, x='X', y=True, z=False)), + ('X -y -b', NS(a=None, b=True, c=False, x='X', y=True, z=False)), ] successes_when_not_required = [ - ('X', NS(a=None, b=False, c=False, x='X', y=False)), - ('X -y', NS(a=None, b=False, c=False, x='X', y=True)), + ('X', NS(a=None, b=False, c=False, x='X', y=False, z=False)), + ('X -y', NS(a=None, b=False, c=False, x='X', y=True, z=False)), ] - usage_when_required = usage_when_not_required = '''\ - usage: PROG [-h] [-y] [-b] [-c] x [a] + usage_when_not_required = '''\ + usage: PROG [-h] [-y] [-z] x [-b | -c | a] + ''' + usage_when_required = '''\ + usage: PROG [-h] [-y] [-z] x (-b | -c | a) ''' help = '''\ @@ -3782,6 +3790,7 @@ def get_parser(self, required): -y y help -b b help -c c help + -z z help ''' @@ -4989,9 +4998,9 @@ def test_mutex_groups_with_mixed_optionals_positionals_wrap(self): g.add_argument('positional', nargs='?') usage = textwrap.dedent('''\ - usage: PROG [-h] [-v | -q | -x [EXTRA_LONG_OPTION_NAME] | - -y [YET_ANOTHER_LONG_OPTION] | - positional] + usage: PROG [-h] + [-v | -q | -x [EXTRA_LONG_OPTION_NAME] | + -y [YET_ANOTHER_LONG_OPTION] | positional] ''') self.assertEqual(parser.format_usage(), usage) @@ -5685,6 +5694,11 @@ def custom_formatter(prog): a-very-long-command command that does something ''')) + def test_direct_formatter_instantiation(self): + formatter = argparse.HelpFormatter(prog="program") + formatter.add_usage(usage=None, actions=[], groups=[]) + help_text = formatter.format_help() + self.assertEqual(help_text, "usage: program\n") # ===================================== # Optional/Positional constructor tests diff --git a/Lib/test/test_ast/test_ast.py b/Lib/test/test_ast/test_ast.py index 608ffdfad12..d2b76b46dbe 100644 --- a/Lib/test/test_ast/test_ast.py +++ b/Lib/test/test_ast/test_ast.py @@ -992,7 +992,8 @@ def next(self): @skip_wasi_stack_overflow() @skip_emscripten_stack_overflow() def test_ast_recursion_limit(self): - crash_depth = 500_000 + # Android test devices have less memory. + crash_depth = 100_000 if sys.platform == "android" else 500_000 success_depth = 200 if _testinternalcapi is not None: remaining = _testinternalcapi.get_c_recursion_remaining() diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 30f21875b22..fa611f480d6 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -728,7 +728,8 @@ def test_yet_more_evil_still_undecodable(self): def test_compiler_recursion_limit(self): # Compiler frames are small limit = 100 - crash_depth = limit * 5000 + # Android test devices have less memory. + crash_depth = limit * (1000 if sys.platform == "android" else 5000) success_depth = limit def check_limit(prefix, repeated, mode="single"): @@ -1036,11 +1037,13 @@ def test_path_like_objects(self): # An implicit test for PyUnicode_FSDecoder(). compile("42", FakePath("test_compile_pathlike"), "single") + # bpo-31113: Stack overflow when compile a long sequence of + # complex statements. @support.requires_resource('cpu') def test_stack_overflow(self): - # bpo-31113: Stack overflow when compile a long sequence of - # complex statements. - compile("if a: b\n" * 200000, "", "exec") + # Android test devices have less memory. + size = 100_000 if sys.platform == "android" else 200_000 + compile("if a: b\n" * size, "", "exec") # Multiple users rely on the fact that CPython does not generate # bytecode for dead code blocks. See bpo-37500 for more context. diff --git a/Lib/test/test_exception_group.py b/Lib/test/test_exception_group.py index 5df2c41c6b5..ace7ec72917 100644 --- a/Lib/test/test_exception_group.py +++ b/Lib/test/test_exception_group.py @@ -1,4 +1,4 @@ -import collections.abc +import collections import types import unittest from test.support import skip_emscripten_stack_overflow, skip_wasi_stack_overflow, exceeds_recursion_limit @@ -193,6 +193,77 @@ class MyEG(ExceptionGroup): "MyEG('flat', [ValueError(1), TypeError(2)]), " "TypeError(2)])")) + def test_exceptions_mutation(self): + class MyEG(ExceptionGroup): + pass + + excs = [ValueError(1), TypeError(2)] + eg = MyEG('test', excs) + + self.assertEqual(repr(eg), "MyEG('test', [ValueError(1), TypeError(2)])") + excs.clear() + + # Ensure that clearing the exceptions sequence doesn't change the repr. + self.assertEqual(repr(eg), "MyEG('test', [ValueError(1), TypeError(2)])") + + # Ensure that the args are still as passed. + self.assertEqual(eg.args, ('test', [])) + + excs = (ValueError(1), KeyboardInterrupt(2)) + eg = BaseExceptionGroup('test', excs) + + # Ensure that immutable sequences still work fine. + self.assertEqual( + repr(eg), + "BaseExceptionGroup('test', (ValueError(1), KeyboardInterrupt(2)))" + ) + + # Test non-standard custom sequences. + excs = collections.deque([ValueError(1), TypeError(2)]) + eg = ExceptionGroup('test', excs) + + self.assertEqual( + repr(eg), + "ExceptionGroup('test', deque([ValueError(1), TypeError(2)]))" + ) + excs.clear() + + # Ensure that clearing the exceptions sequence doesn't change the repr. + self.assertEqual( + repr(eg), + "ExceptionGroup('test', deque([ValueError(1), TypeError(2)]))" + ) + + def test_repr_raises(self): + class MySeq(collections.abc.Sequence): + def __init__(self, raises): + self.raises = raises + + def __len__(self): + return 1 + + def __getitem__(self, index): + if index == 0: + return ValueError(1) + raise IndexError + + def __repr__(self): + if self.raises: + raise self.raises + return None + + seq = MySeq(None) + with self.assertRaisesRegex( + TypeError, + r".*MySeq\.__repr__\(\) must return a str, not NoneType" + ): + ExceptionGroup("test", seq) + + seq = MySeq(ValueError) + with self.assertRaises(ValueError): + BaseExceptionGroup("test", seq) + + def create_simple_eg(): excs = [] diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 971708b3e09..53fdf166e25 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -1,7 +1,7 @@ -import contextlib import unittest import os import textwrap +import contextlib import importlib import sys import socket @@ -216,33 +216,13 @@ def requires_subinterpreters(meth): # Simple wrapper functions for RemoteUnwinder # ============================================================================ -# Errors that can occur transiently when reading process memory without synchronization -RETRIABLE_ERRORS = ( - "Task list appears corrupted", - "Invalid linked list structure reading remote memory", - "Unknown error reading memory", - "Unhandled frame owner", - "Failed to parse initial frame", - "Failed to process frame chain", - "Failed to unwind stack", -) - - -def _is_retriable_error(exc): - """Check if an exception is a transient error that should be retried.""" - msg = str(exc) - return any(msg.startswith(err) or err in msg for err in RETRIABLE_ERRORS) - - def get_stack_trace(pid): for _ in busy_retry(SHORT_TIMEOUT): try: unwinder = RemoteUnwinder(pid, all_threads=True, debug=True) return unwinder.get_stack_trace() except RuntimeError as e: - if _is_retriable_error(e): - continue - raise + continue raise RuntimeError("Failed to get stack trace after retries") @@ -252,9 +232,7 @@ def get_async_stack_trace(pid): unwinder = RemoteUnwinder(pid, debug=True) return unwinder.get_async_stack_trace() except RuntimeError as e: - if _is_retriable_error(e): - continue - raise + continue raise RuntimeError("Failed to get async stack trace after retries") @@ -264,9 +242,7 @@ def get_all_awaited_by(pid): unwinder = RemoteUnwinder(pid, debug=True) return unwinder.get_all_awaited_by() except RuntimeError as e: - if _is_retriable_error(e): - continue - raise + continue raise RuntimeError("Failed to get all awaited_by after retries") @@ -2334,18 +2310,13 @@ def make_unwinder(cache_frames=True): def _get_frames_with_retry(self, unwinder, required_funcs): """Get frames containing required_funcs, with retry for transient errors.""" for _ in range(MAX_TRIES): - try: + with contextlib.suppress(OSError, RuntimeError): traces = unwinder.get_stack_trace() for interp in traces: for thread in interp.threads: funcs = {f.funcname for f in thread.frame_info} if required_funcs.issubset(funcs): return thread.frame_info - except RuntimeError as e: - if _is_retriable_error(e): - pass - else: - raise time.sleep(0.1) return None @@ -2868,70 +2839,39 @@ def foo2(): make_unwinder, ): unwinder = make_unwinder(cache_frames=True) - buffer = b"" - def recv_msg(): - """Receive a single message from socket.""" - nonlocal buffer - while b"\n" not in buffer: - chunk = client_socket.recv(256) - if not chunk: - return None - buffer += chunk - msg, buffer = buffer.split(b"\n", 1) - return msg - - def get_thread_frames(target_funcs): - """Get frames for thread matching target functions.""" - retries = 0 - for _ in busy_retry(SHORT_TIMEOUT): - if retries >= 5: - break - retries += 1 - # On Windows, ReadProcessMemory can fail with OSError - # (WinError 299) when frame pointers are in flux - with contextlib.suppress(RuntimeError, OSError): - traces = unwinder.get_stack_trace() - for interp in traces: - for thread in interp.threads: - funcs = [f.funcname for f in thread.frame_info] - if any(f in funcs for f in target_funcs): - return funcs - return None + # Message dispatch table: signal -> required functions for that thread + dispatch = { + b"t1:baz1": {"baz1", "bar1", "foo1"}, + b"t2:baz2": {"baz2", "bar2", "foo2"}, + b"t1:blech1": {"blech1", "foo1"}, + b"t2:blech2": {"blech2", "foo2"}, + } # Track results for each sync point results = {} - # Process 4 sync points: baz1, baz2, blech1, blech2 - # With the lock, threads are serialized - handle one at a time - for _ in range(4): - msg = recv_msg() - self.assertIsNotNone(msg, "Expected message from subprocess") + # Process 4 sync points (order depends on thread scheduling) + buffer = _wait_for_signal(client_socket, b"\n") + for i in range(4): + # Extract first message from buffer + msg, sep, buffer = buffer.partition(b"\n") + self.assertIn(msg, dispatch, f"Unexpected message: {msg!r}") - # Determine which thread/function and take snapshot - if msg == b"t1:baz1": - funcs = get_thread_frames(["baz1", "bar1", "foo1"]) - self.assertIsNotNone(funcs, "Thread 1 not found at baz1") - results["t1:baz1"] = funcs - elif msg == b"t2:baz2": - funcs = get_thread_frames(["baz2", "bar2", "foo2"]) - self.assertIsNotNone(funcs, "Thread 2 not found at baz2") - results["t2:baz2"] = funcs - elif msg == b"t1:blech1": - funcs = get_thread_frames(["blech1", "foo1"]) - self.assertIsNotNone(funcs, "Thread 1 not found at blech1") - results["t1:blech1"] = funcs - elif msg == b"t2:blech2": - funcs = get_thread_frames(["blech2", "foo2"]) - self.assertIsNotNone(funcs, "Thread 2 not found at blech2") - results["t2:blech2"] = funcs + # Sample frames for the thread at this sync point + required_funcs = dispatch[msg] + frames = self._get_frames_with_retry(unwinder, required_funcs) + self.assertIsNotNone(frames, f"Thread not found for {msg!r}") + results[msg] = [f.funcname for f in frames] - # Release thread to continue + # Release thread and wait for next message (if not last) client_socket.sendall(b"k") + if i < 3: + buffer += _wait_for_signal(client_socket, b"\n") # Validate Phase 1: baz snapshots - t1_baz = results.get("t1:baz1") - t2_baz = results.get("t2:baz2") + t1_baz = results.get(b"t1:baz1") + t2_baz = results.get(b"t2:baz2") self.assertIsNotNone(t1_baz, "Missing t1:baz1 snapshot") self.assertIsNotNone(t2_baz, "Missing t2:baz2 snapshot") @@ -2956,8 +2896,8 @@ def get_thread_frames(target_funcs): self.assertNotIn("foo1", t2_baz) # Validate Phase 2: blech snapshots (cache invalidation test) - t1_blech = results.get("t1:blech1") - t2_blech = results.get("t2:blech2") + t1_blech = results.get(b"t1:blech1") + t2_blech = results.get(b"t2:blech2") self.assertIsNotNone(t1_blech, "Missing t1:blech1 snapshot") self.assertIsNotNone(t2_blech, "Missing t2:blech2 snapshot") diff --git a/Lib/test/test_profiling/test_sampling_profiler/helpers.py b/Lib/test/test_profiling/test_sampling_profiler/helpers.py index f1c01afd0fa..0e32d8dd9ea 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/helpers.py +++ b/Lib/test/test_profiling/test_sampling_profiler/helpers.py @@ -38,12 +38,88 @@ SubprocessInfo = namedtuple("SubprocessInfo", ["process", "socket"]) +def _wait_for_signal(sock, expected_signals, timeout=SHORT_TIMEOUT): + """ + Wait for expected signal(s) from a socket with proper timeout and EOF handling. + + Args: + sock: Connected socket to read from + expected_signals: Single bytes object or list of bytes objects to wait for + timeout: Socket timeout in seconds + + Returns: + bytes: Complete accumulated response buffer + + Raises: + RuntimeError: If connection closed before signal received or timeout + """ + if isinstance(expected_signals, bytes): + expected_signals = [expected_signals] + + sock.settimeout(timeout) + buffer = b"" + + while True: + # Check if all expected signals are in buffer + if all(sig in buffer for sig in expected_signals): + return buffer + + try: + chunk = sock.recv(4096) + if not chunk: + raise RuntimeError( + f"Connection closed before receiving expected signals. " + f"Expected: {expected_signals}, Got: {buffer[-200:]!r}" + ) + buffer += chunk + except socket.timeout: + raise RuntimeError( + f"Timeout waiting for signals. " + f"Expected: {expected_signals}, Got: {buffer[-200:]!r}" + ) from None + except OSError as e: + raise RuntimeError( + f"Socket error while waiting for signals: {e}. " + f"Expected: {expected_signals}, Got: {buffer[-200:]!r}" + ) from None + + +def _cleanup_sockets(*sockets): + """Safely close multiple sockets, ignoring errors.""" + for sock in sockets: + if sock is not None: + try: + sock.close() + except OSError: + pass + + +def _cleanup_process(proc, timeout=SHORT_TIMEOUT): + """Terminate a process gracefully, escalating to kill if needed.""" + if proc.poll() is not None: + return + proc.terminate() + try: + proc.wait(timeout=timeout) + return + except subprocess.TimeoutExpired: + pass + proc.kill() + try: + proc.wait(timeout=timeout) + except subprocess.TimeoutExpired: + pass # Process refuses to die, nothing more we can do + + @contextlib.contextmanager -def test_subprocess(script): +def test_subprocess(script, wait_for_working=False): """Context manager to create a test subprocess with socket synchronization. Args: - script: Python code to execute in the subprocess + script: Python code to execute in the subprocess. If wait_for_working + is True, script should send b"working" after starting work. + wait_for_working: If True, wait for both "ready" and "working" signals. + Default False for backward compatibility. Yields: SubprocessInfo: Named tuple with process and socket objects @@ -80,19 +156,18 @@ def test_subprocess(script): # Wait for process to connect and send ready signal client_socket, _ = server_socket.accept() server_socket.close() - response = client_socket.recv(1024) - if response != b"ready": - raise RuntimeError( - f"Unexpected response from subprocess: {response!r}" - ) + server_socket = None + + # Wait for ready signal, and optionally working signal + if wait_for_working: + _wait_for_signal(client_socket, [b"ready", b"working"]) + else: + _wait_for_signal(client_socket, b"ready") yield SubprocessInfo(proc, client_socket) finally: - if client_socket is not None: - client_socket.close() - if proc.poll() is None: - proc.kill() - proc.wait() + _cleanup_sockets(client_socket, server_socket) + _cleanup_process(proc) def close_and_unlink(file): diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_advanced.py b/Lib/test/test_profiling/test_sampling_profiler/test_advanced.py index 94946d74aa4..843fb3b7416 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_advanced.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_advanced.py @@ -39,32 +39,26 @@ def setUpClass(cls): import gc class ExpensiveGarbage: - """Class that triggers GC with expensive finalizer (callback).""" def __init__(self): self.cycle = self def __del__(self): - # CPU-intensive work in the finalizer callback result = 0 for i in range(100000): result += i * i if i % 1000 == 0: result = result % 1000000 -def main_loop(): - """Main loop that triggers GC with expensive callback.""" - while True: - ExpensiveGarbage() - gc.collect() - -if __name__ == "__main__": - main_loop() +_test_sock.sendall(b"working") +while True: + ExpensiveGarbage() + gc.collect() ''' def test_gc_frames_enabled(self): """Test that GC frames appear when gc tracking is enabled.""" with ( - test_subprocess(self.gc_test_script) as subproc, + test_subprocess(self.gc_test_script, wait_for_working=True) as subproc, io.StringIO() as captured_output, mock.patch("sys.stdout", captured_output), ): @@ -94,7 +88,7 @@ def test_gc_frames_enabled(self): def test_gc_frames_disabled(self): """Test that GC frames do not appear when gc tracking is disabled.""" with ( - test_subprocess(self.gc_test_script) as subproc, + test_subprocess(self.gc_test_script, wait_for_working=True) as subproc, io.StringIO() as captured_output, mock.patch("sys.stdout", captured_output), ): @@ -133,18 +127,13 @@ def setUpClass(cls): cls.native_test_script = """ import operator -def main_loop(): - while True: - # Native code in the middle of the stack: - operator.call(inner) - def inner(): - # Python code at the top of the stack: for _ in range(1_000_0000): pass -if __name__ == "__main__": - main_loop() +_test_sock.sendall(b"working") +while True: + operator.call(inner) """ def test_native_frames_enabled(self): @@ -154,10 +143,7 @@ def test_native_frames_enabled(self): ) self.addCleanup(close_and_unlink, collapsed_file) - with ( - test_subprocess(self.native_test_script) as subproc, - ): - # Suppress profiler output when testing file export + with test_subprocess(self.native_test_script, wait_for_working=True) as subproc: with ( io.StringIO() as captured_output, mock.patch("sys.stdout", captured_output), @@ -199,7 +185,7 @@ def test_native_frames_enabled(self): def test_native_frames_disabled(self): """Test that native frames do not appear when native tracking is disabled.""" with ( - test_subprocess(self.native_test_script) as subproc, + test_subprocess(self.native_test_script, wait_for_working=True) as subproc, io.StringIO() as captured_output, mock.patch("sys.stdout", captured_output), ): diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py index 40acbe87512..cc8cd4ff095 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py @@ -39,6 +39,9 @@ # Duration for profiling tests - long enough for process to complete naturally PROFILING_TIMEOUT = str(int(SHORT_TIMEOUT)) +# Duration for profiling in tests - short enough to complete quickly +PROFILING_DURATION_SEC = 2 + @skip_if_not_supported @unittest.skipIf( @@ -355,23 +358,14 @@ def total_occurrences(func): self.assertEqual(total_occurrences(main_key), 2) -@requires_subprocess() -@skip_if_not_supported -class TestSampleProfilerIntegration(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.test_script = ''' -import time -import os - +# Shared workload functions for test scripts +_WORKLOAD_FUNCTIONS = ''' def slow_fibonacci(n): - """Recursive fibonacci - should show up prominently in profiler.""" if n <= 1: return n return slow_fibonacci(n-1) + slow_fibonacci(n-2) def cpu_intensive_work(): - """CPU intensive work that should show in profiler.""" result = 0 for i in range(10000): result += i * i @@ -379,33 +373,48 @@ def cpu_intensive_work(): result = result % 1000000 return result -def main_loop(): - """Main test loop.""" - max_iterations = 1000 - - for iteration in range(max_iterations): +def do_work(): + iteration = 0 + while True: if iteration % 2 == 0: - result = slow_fibonacci(15) + slow_fibonacci(15) else: - result = cpu_intensive_work() + cpu_intensive_work() + iteration += 1 +''' -if __name__ == "__main__": - main_loop() + +@requires_subprocess() +@skip_if_not_supported +class TestSampleProfilerIntegration(unittest.TestCase): + @classmethod + def setUpClass(cls): + # Test script for use with test_subprocess() - signals when work starts + cls.test_script = _WORKLOAD_FUNCTIONS + ''' +_test_sock.sendall(b"working") +do_work() +''' + # CLI test script - runs for fixed duration (no socket sync) + cls.cli_test_script = ''' +import time +''' + _WORKLOAD_FUNCTIONS.replace( + 'while True:', 'end_time = time.time() + 30\n while time.time() < end_time:' +) + ''' +do_work() ''' def test_sampling_basic_functionality(self): with ( - test_subprocess(self.test_script) as subproc, + test_subprocess(self.test_script, wait_for_working=True) as subproc, io.StringIO() as captured_output, mock.patch("sys.stdout", captured_output), ): try: - # Sample for up to SHORT_TIMEOUT seconds, but process exits after fixed iterations collector = PstatsCollector(sample_interval_usec=1000, skip_idle=False) profiling.sampling.sample.sample( subproc.process.pid, collector, - duration_sec=SHORT_TIMEOUT, + duration_sec=PROFILING_DURATION_SEC, ) collector.print_stats(show_summary=False) except PermissionError: @@ -427,7 +436,7 @@ def test_sampling_with_pstats_export(self): ) self.addCleanup(close_and_unlink, pstats_out) - with test_subprocess(self.test_script) as subproc: + with test_subprocess(self.test_script, wait_for_working=True) as subproc: # Suppress profiler output when testing file export with ( io.StringIO() as captured_output, @@ -438,7 +447,7 @@ def test_sampling_with_pstats_export(self): profiling.sampling.sample.sample( subproc.process.pid, collector, - duration_sec=1, + duration_sec=PROFILING_DURATION_SEC, ) collector.export(pstats_out.name) except PermissionError: @@ -472,7 +481,7 @@ def test_sampling_with_collapsed_export(self): self.addCleanup(close_and_unlink, collapsed_file) with ( - test_subprocess(self.test_script) as subproc, + test_subprocess(self.test_script, wait_for_working=True) as subproc, ): # Suppress profiler output when testing file export with ( @@ -484,7 +493,7 @@ def test_sampling_with_collapsed_export(self): profiling.sampling.sample.sample( subproc.process.pid, collector, - duration_sec=1, + duration_sec=PROFILING_DURATION_SEC, ) collector.export(collapsed_file.name) except PermissionError: @@ -522,7 +531,7 @@ def test_sampling_with_collapsed_export(self): def test_sampling_all_threads(self): with ( - test_subprocess(self.test_script) as subproc, + test_subprocess(self.test_script, wait_for_working=True) as subproc, # Suppress profiler output io.StringIO() as captured_output, mock.patch("sys.stdout", captured_output), @@ -532,7 +541,7 @@ def test_sampling_all_threads(self): profiling.sampling.sample.sample( subproc.process.pid, collector, - duration_sec=1, + duration_sec=PROFILING_DURATION_SEC, all_threads=True, ) collector.print_stats(show_summary=False) @@ -544,12 +553,16 @@ def test_sampling_all_threads(self): def test_sample_target_script(self): script_file = tempfile.NamedTemporaryFile(delete=False) - script_file.write(self.test_script.encode("utf-8")) + script_file.write(self.cli_test_script.encode("utf-8")) script_file.flush() self.addCleanup(close_and_unlink, script_file) - # Sample for up to SHORT_TIMEOUT seconds, but process exits after fixed iterations - test_args = ["profiling.sampling.sample", "run", "-d", PROFILING_TIMEOUT, script_file.name] + # Sample for PROFILING_DURATION_SEC seconds + test_args = [ + "profiling.sampling.sample", "run", + "-d", str(PROFILING_DURATION_SEC), + script_file.name + ] with ( mock.patch("sys.argv", test_args), @@ -579,13 +592,13 @@ def test_sample_target_module(self): module_path = os.path.join(tempdir.name, "test_module.py") with open(module_path, "w") as f: - f.write(self.test_script) + f.write(self.cli_test_script) test_args = [ "profiling.sampling.cli", "run", "-d", - PROFILING_TIMEOUT, + str(PROFILING_DURATION_SEC), "-m", "test_module", ] @@ -626,8 +639,10 @@ def test_invalid_pid(self): profiling.sampling.sample.sample(-1, collector, duration_sec=1) def test_process_dies_during_sampling(self): + # Use wait_for_working=False since this simple script doesn't send "working" with test_subprocess( - "import time; time.sleep(0.5); exit()" + "import time; time.sleep(0.5); exit()", + wait_for_working=False ) as subproc: with ( io.StringIO() as captured_output, @@ -650,7 +665,11 @@ def test_process_dies_during_sampling(self): self.assertIn("Error rate", output) def test_is_process_running(self): - with test_subprocess("import time; time.sleep(1000)") as subproc: + # Use wait_for_working=False since this simple script doesn't send "working" + with test_subprocess( + "import time; time.sleep(1000)", + wait_for_working=False + ) as subproc: try: profiler = SampleProfiler( pid=subproc.process.pid, @@ -677,7 +696,11 @@ def test_is_process_running(self): @unittest.skipUnless(sys.platform == "linux", "Only valid on Linux") def test_esrch_signal_handling(self): - with test_subprocess("import time; time.sleep(1000)") as subproc: + # Use wait_for_working=False since this simple script doesn't send "working" + with test_subprocess( + "import time; time.sleep(1000)", + wait_for_working=False + ) as subproc: try: unwinder = _remote_debugging.RemoteUnwinder( subproc.process.pid @@ -789,38 +812,34 @@ class TestAsyncAwareProfilingIntegration(unittest.TestCase): @classmethod def setUpClass(cls): + # Async test script that runs indefinitely until killed. + # Sends "working" signal AFTER tasks are created and scheduled. cls.async_script = ''' import asyncio async def sleeping_leaf(): - """Leaf task that just sleeps - visible in 'all' mode.""" - for _ in range(50): + while True: await asyncio.sleep(0.02) async def cpu_leaf(): - """Leaf task that does CPU work - visible in both modes.""" total = 0 - for _ in range(200): + while True: for i in range(10000): total += i * i await asyncio.sleep(0) - return total async def supervisor(): - """Middle layer that spawns leaf tasks.""" tasks = [ asyncio.create_task(sleeping_leaf(), name="Sleeper-0"), asyncio.create_task(sleeping_leaf(), name="Sleeper-1"), asyncio.create_task(sleeping_leaf(), name="Sleeper-2"), asyncio.create_task(cpu_leaf(), name="Worker"), ] + await asyncio.sleep(0) # Let tasks get scheduled + _test_sock.sendall(b"working") await asyncio.gather(*tasks) -async def main(): - await supervisor() - -if __name__ == "__main__": - asyncio.run(main()) +asyncio.run(supervisor()) ''' def _collect_async_samples(self, async_aware_mode): @@ -828,13 +847,13 @@ def _collect_async_samples(self, async_aware_mode): Returns a dict mapping function names to their sample counts. """ - with test_subprocess(self.async_script) as subproc: + with test_subprocess(self.async_script, wait_for_working=True) as subproc: try: collector = CollapsedStackCollector(1000, skip_idle=False) profiling.sampling.sample.sample( subproc.process.pid, collector, - duration_sec=SHORT_TIMEOUT, + duration_sec=PROFILING_DURATION_SEC, async_aware=async_aware_mode, ) except PermissionError: diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py index 1b0e21a5fe4..c0457ee7eb8 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py @@ -143,27 +143,16 @@ def cpu_active_worker(): while True: x += 1 -def main(): - # Start both threads - idle_thread = threading.Thread(target=idle_worker) - cpu_thread = threading.Thread(target=cpu_active_worker) - idle_thread.start() - cpu_thread.start() - - # Wait for CPU thread to be running, then signal test - cpu_ready.wait() - _test_sock.sendall(b"threads_ready") - - idle_thread.join() - cpu_thread.join() - -main() - +idle_thread = threading.Thread(target=idle_worker) +cpu_thread = threading.Thread(target=cpu_active_worker) +idle_thread.start() +cpu_thread.start() +cpu_ready.wait() +_test_sock.sendall(b"working") +idle_thread.join() +cpu_thread.join() """ - with test_subprocess(cpu_vs_idle_script) as subproc: - # Wait for signal that threads are running - response = subproc.socket.recv(1024) - self.assertEqual(response, b"threads_ready") + with test_subprocess(cpu_vs_idle_script, wait_for_working=True) as subproc: with ( io.StringIO() as captured_output, @@ -365,26 +354,16 @@ def gil_holding_work(): while True: x += 1 -def main(): - # Start both threads - idle_thread = threading.Thread(target=gil_releasing_work) - cpu_thread = threading.Thread(target=gil_holding_work) - idle_thread.start() - cpu_thread.start() - - # Wait for GIL-holding thread to be running, then signal test - gil_ready.wait() - _test_sock.sendall(b"threads_ready") - - idle_thread.join() - cpu_thread.join() - -main() +idle_thread = threading.Thread(target=gil_releasing_work) +cpu_thread = threading.Thread(target=gil_holding_work) +idle_thread.start() +cpu_thread.start() +gil_ready.wait() +_test_sock.sendall(b"working") +idle_thread.join() +cpu_thread.join() """ - with test_subprocess(gil_test_script) as subproc: - # Wait for signal that threads are running - response = subproc.socket.recv(1024) - self.assertEqual(response, b"threads_ready") + with test_subprocess(gil_test_script, wait_for_working=True) as subproc: with ( io.StringIO() as captured_output, diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-19-16-40-24.gh-issue-141732.PTetqp.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-19-16-40-24.gh-issue-141732.PTetqp.rst new file mode 100644 index 00000000000..08420fd5f4d --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-19-16-40-24.gh-issue-141732.PTetqp.rst @@ -0,0 +1,2 @@ +Ensure the :meth:`~object.__repr__` for :exc:`ExceptionGroup` and :exc:`BaseExceptionGroup` does +not change when the exception sequence that was original passed in to its constructor is subsequently mutated. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst new file mode 100644 index 00000000000..a4a29e40027 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst @@ -0,0 +1,6 @@ +Add incomplete sample detection to prevent corrupted profiling data. Each +thread state now contains an embedded base frame (sentinel at the bottom of +the frame stack) with owner type ``FRAME_OWNED_BY_INTERPRETER``. The profiler +validates that stack unwinding terminates at this sentinel frame. Samples that +fail to reach the base frame (due to race conditions, memory corruption, or +other errors) are now rejected rather than being included as spurious data. diff --git a/Misc/NEWS.d/next/Library/2025-12-07-17-30-05.gh-issue-142346.okcAAp.rst b/Misc/NEWS.d/next/Library/2025-12-07-17-30-05.gh-issue-142346.okcAAp.rst new file mode 100644 index 00000000000..cf570f314c0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-07-17-30-05.gh-issue-142346.okcAAp.rst @@ -0,0 +1,3 @@ +Fix usage formatting for mutually exclusive groups in :mod:`argparse` +when they are preceded by positional arguments or followed or intermixed +with other optional arguments. diff --git a/Modules/_remote_debugging/_remote_debugging.h b/Modules/_remote_debugging/_remote_debugging.h index 1f4fe527ba2..0aa98349296 100644 --- a/Modules/_remote_debugging/_remote_debugging.h +++ b/Modules/_remote_debugging/_remote_debugging.h @@ -413,6 +413,7 @@ extern int process_frame_chain( uintptr_t initial_frame_addr, StackChunkList *chunks, PyObject *frame_info, + uintptr_t base_frame_addr, uintptr_t gc_frame, uintptr_t last_profiled_frame, int *stopped_at_cached_frame, diff --git a/Modules/_remote_debugging/frames.c b/Modules/_remote_debugging/frames.c index 10e2cf672dd..abde60c4576 100644 --- a/Modules/_remote_debugging/frames.c +++ b/Modules/_remote_debugging/frames.c @@ -154,14 +154,13 @@ is_frame_valid( void* frame = (void*)frame_addr; - if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) { - return 0; // C frame + char owner = GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner); + if (owner == FRAME_OWNED_BY_INTERPRETER) { + return 0; // C frame or sentinel base frame } - if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR - && GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_THREAD) { - PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", - GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner)); + if (owner != FRAME_OWNED_BY_GENERATOR && owner != FRAME_OWNED_BY_THREAD) { + PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", owner); set_exception_cause(unwinder, PyExc_RuntimeError, "Unhandled frame owner type in async frame"); return -1; } @@ -260,6 +259,7 @@ process_frame_chain( uintptr_t initial_frame_addr, StackChunkList *chunks, PyObject *frame_info, + uintptr_t base_frame_addr, uintptr_t gc_frame, uintptr_t last_profiled_frame, int *stopped_at_cached_frame, @@ -269,6 +269,7 @@ process_frame_chain( { uintptr_t frame_addr = initial_frame_addr; uintptr_t prev_frame_addr = 0; + uintptr_t last_frame_addr = 0; // Track last frame visited for validation const size_t MAX_FRAMES = 1024 + 512; size_t frame_count = 0; @@ -296,6 +297,7 @@ process_frame_chain( PyObject *frame = NULL; uintptr_t next_frame_addr = 0; uintptr_t stackpointer = 0; + last_frame_addr = frame_addr; // Remember this frame address if (++frame_count > MAX_FRAMES) { PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)"); @@ -303,7 +305,6 @@ process_frame_chain( return -1; } - // Try chunks first, fallback to direct memory read if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, &stackpointer, chunks) < 0) { PyErr_Clear(); uintptr_t address_of_code_object = 0; @@ -378,6 +379,17 @@ process_frame_chain( frame_addr = next_frame_addr; } + // Validate we reached the base frame (sentinel at bottom of stack) + // Only validate if we walked the full chain (didn't stop at cached frame) + // and base_frame_addr is provided (non-zero) + int stopped_early = stopped_at_cached_frame && *stopped_at_cached_frame; + if (!stopped_early && base_frame_addr != 0 && last_frame_addr != base_frame_addr) { + PyErr_Format(PyExc_RuntimeError, + "Incomplete sample: did not reach base frame (expected 0x%lx, got 0x%lx)", + base_frame_addr, last_frame_addr); + return -1; + } + return 0; } @@ -541,7 +553,7 @@ collect_frames_with_cache( Py_ssize_t frames_before = PyList_GET_SIZE(frame_info); int stopped_at_cached = 0; - if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, gc_frame, + if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, 0, gc_frame, last_profiled_frame, &stopped_at_cached, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) { return -1; @@ -563,7 +575,7 @@ collect_frames_with_cache( // Cache miss - continue walking from last_profiled_frame to get the rest STATS_INC(unwinder, frame_cache_misses); Py_ssize_t frames_before_walk = PyList_GET_SIZE(frame_info); - if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, gc_frame, + if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, 0, gc_frame, 0, NULL, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) { return -1; } diff --git a/Modules/_remote_debugging/threads.c b/Modules/_remote_debugging/threads.c index ce013f902d1..69819eb8dcd 100644 --- a/Modules/_remote_debugging/threads.c +++ b/Modules/_remote_debugging/threads.c @@ -380,6 +380,7 @@ unwind_stack_for_thread( } uintptr_t frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.current_frame); + uintptr_t base_frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.base_frame); frame_info = PyList_New(0); if (!frame_info) { @@ -411,9 +412,9 @@ unwind_stack_for_thread( PyErr_Clear(); // Non-fatal } } else { - // No caching - process entire frame chain + // No caching - process entire frame chain with base_frame validation if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info, - gc_frame, 0, NULL, NULL, NULL, 0) < 0) { + base_frame_addr, gc_frame, 0, NULL, NULL, NULL, 0) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain"); goto error; } diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 244d8f39e2b..9a43057b383 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -694,12 +694,12 @@ PyTypeObject _PyExc_ ## EXCNAME = { \ #define ComplexExtendsException(EXCBASE, EXCNAME, EXCSTORE, EXCNEW, \ EXCMETHODS, EXCMEMBERS, EXCGETSET, \ - EXCSTR, EXCDOC) \ + EXCSTR, EXCREPR, EXCDOC) \ static PyTypeObject _PyExc_ ## EXCNAME = { \ PyVarObject_HEAD_INIT(NULL, 0) \ # EXCNAME, \ sizeof(Py ## EXCSTORE ## Object), 0, \ - EXCSTORE ## _dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + EXCSTORE ## _dealloc, 0, 0, 0, 0, EXCREPR, 0, 0, 0, 0, 0, \ EXCSTR, 0, 0, 0, \ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, \ PyDoc_STR(EXCDOC), EXCSTORE ## _traverse, \ @@ -792,7 +792,7 @@ StopIteration_traverse(PyObject *op, visitproc visit, void *arg) } ComplexExtendsException(PyExc_Exception, StopIteration, StopIteration, - 0, 0, StopIteration_members, 0, 0, + 0, 0, StopIteration_members, 0, 0, 0, "Signal the end from iterator.__next__()."); @@ -865,7 +865,7 @@ static PyMemberDef SystemExit_members[] = { }; ComplexExtendsException(PyExc_BaseException, SystemExit, SystemExit, - 0, 0, SystemExit_members, 0, 0, + 0, 0, SystemExit_members, 0, 0, 0, "Request to exit from the interpreter."); /* @@ -890,6 +890,7 @@ BaseExceptionGroup_new(PyTypeObject *type, PyObject *args, PyObject *kwds) PyObject *message = NULL; PyObject *exceptions = NULL; + PyObject *exceptions_str = NULL; if (!PyArg_ParseTuple(args, "UO:BaseExceptionGroup.__new__", @@ -905,6 +906,18 @@ BaseExceptionGroup_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } + /* Save initial exceptions sequence as a string in case sequence is mutated */ + if (!PyList_Check(exceptions) && !PyTuple_Check(exceptions)) { + exceptions_str = PyObject_Repr(exceptions); + if (exceptions_str == NULL) { + /* We don't hold a reference to exceptions, so clear it before + * attempting a decref in the cleanup. + */ + exceptions = NULL; + goto error; + } + } + exceptions = PySequence_Tuple(exceptions); if (!exceptions) { return NULL; @@ -988,9 +1001,11 @@ BaseExceptionGroup_new(PyTypeObject *type, PyObject *args, PyObject *kwds) self->msg = Py_NewRef(message); self->excs = exceptions; + self->excs_str = exceptions_str; return (PyObject*)self; error: - Py_DECREF(exceptions); + Py_XDECREF(exceptions); + Py_XDECREF(exceptions_str); return NULL; } @@ -1029,6 +1044,7 @@ BaseExceptionGroup_clear(PyObject *op) PyBaseExceptionGroupObject *self = PyBaseExceptionGroupObject_CAST(op); Py_CLEAR(self->msg); Py_CLEAR(self->excs); + Py_CLEAR(self->excs_str); return BaseException_clear(op); } @@ -1046,6 +1062,7 @@ BaseExceptionGroup_traverse(PyObject *op, visitproc visit, void *arg) PyBaseExceptionGroupObject *self = PyBaseExceptionGroupObject_CAST(op); Py_VISIT(self->msg); Py_VISIT(self->excs); + Py_VISIT(self->excs_str); return BaseException_traverse(op, visit, arg); } @@ -1063,6 +1080,54 @@ BaseExceptionGroup_str(PyObject *op) self->msg, num_excs, num_excs > 1 ? "s" : ""); } +static PyObject * +BaseExceptionGroup_repr(PyObject *op) +{ + PyBaseExceptionGroupObject *self = PyBaseExceptionGroupObject_CAST(op); + assert(self->msg); + + PyObject *exceptions_str = NULL; + + /* Use the saved exceptions string for custom sequences. */ + if (self->excs_str) { + exceptions_str = Py_NewRef(self->excs_str); + } + else { + assert(self->excs); + + /* Older versions delegated to BaseException, inserting the current + * value of self.args[1]; but this can be mutable and go out-of-sync + * with self.exceptions. Instead, use self.exceptions for accuracy, + * making it look like self.args[1] for backwards compatibility. */ + if (PyList_Check(PyTuple_GET_ITEM(self->args, 1))) { + PyObject *exceptions_list = PySequence_List(self->excs); + if (!exceptions_list) { + return NULL; + } + + exceptions_str = PyObject_Repr(exceptions_list); + Py_DECREF(exceptions_list); + } + else { + exceptions_str = PyObject_Repr(self->excs); + } + + if (!exceptions_str) { + return NULL; + } + } + + assert(exceptions_str != NULL); + + const char *name = _PyType_Name(Py_TYPE(self)); + PyObject *repr = PyUnicode_FromFormat( + "%s(%R, %U)", name, + self->msg, exceptions_str); + + Py_DECREF(exceptions_str); + return repr; +} + /*[clinic input] @critical_section BaseExceptionGroup.derive @@ -1697,7 +1762,7 @@ static PyMethodDef BaseExceptionGroup_methods[] = { ComplexExtendsException(PyExc_BaseException, BaseExceptionGroup, BaseExceptionGroup, BaseExceptionGroup_new /* new */, BaseExceptionGroup_methods, BaseExceptionGroup_members, - 0 /* getset */, BaseExceptionGroup_str, + 0 /* getset */, BaseExceptionGroup_str, BaseExceptionGroup_repr, "A combination of multiple unrelated exceptions."); /* @@ -2425,7 +2490,7 @@ static PyGetSetDef OSError_getset[] = { ComplexExtendsException(PyExc_Exception, OSError, OSError, OSError_new, OSError_methods, OSError_members, OSError_getset, - OSError_str, + OSError_str, 0, "Base class for I/O related errors."); @@ -2566,7 +2631,7 @@ static PyMethodDef NameError_methods[] = { ComplexExtendsException(PyExc_Exception, NameError, NameError, 0, NameError_methods, NameError_members, - 0, BaseException_str, "Name not found globally."); + 0, BaseException_str, 0, "Name not found globally."); /* * UnboundLocalError extends NameError @@ -2700,7 +2765,7 @@ static PyMethodDef AttributeError_methods[] = { ComplexExtendsException(PyExc_Exception, AttributeError, AttributeError, 0, AttributeError_methods, AttributeError_members, - 0, BaseException_str, "Attribute not found."); + 0, BaseException_str, 0, "Attribute not found."); /* * SyntaxError extends Exception @@ -2899,7 +2964,7 @@ static PyMemberDef SyntaxError_members[] = { ComplexExtendsException(PyExc_Exception, SyntaxError, SyntaxError, 0, 0, SyntaxError_members, 0, - SyntaxError_str, "Invalid syntax."); + SyntaxError_str, 0, "Invalid syntax."); /* @@ -2959,7 +3024,7 @@ KeyError_str(PyObject *op) } ComplexExtendsException(PyExc_LookupError, KeyError, BaseException, - 0, 0, 0, 0, KeyError_str, "Mapping key not found."); + 0, 0, 0, 0, KeyError_str, 0, "Mapping key not found."); /* diff --git a/Python/ceval.c b/Python/ceval.c index 382ae210ebb..a1d54bd058b 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3352,6 +3352,9 @@ PyEval_MergeCompilerFlags(PyCompilerFlags *cf) { PyThreadState *tstate = _PyThreadState_GET(); _PyInterpreterFrame *current_frame = tstate->current_frame; + if (current_frame == tstate->base_frame) { + current_frame = NULL; + } int result = cf->cf_flags != 0; if (current_frame != NULL) { diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 67368b5ce07..2527dca71d7 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -2571,7 +2571,7 @@ Py_EndInterpreter(PyThreadState *tstate) if (tstate != _PyThreadState_GET()) { Py_FatalError("thread is not current"); } - if (tstate->current_frame != NULL) { + if (tstate->current_frame != tstate->base_frame) { Py_FatalError("thread still has a frame"); } interp->finalizing = 1; diff --git a/Python/pystate.c b/Python/pystate.c index c12a1418e74..2956e785405 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1482,7 +1482,31 @@ init_threadstate(_PyThreadStateImpl *_tstate, // This is cleared when PyGILState_Ensure() creates the thread state. tstate->gilstate_counter = 1; - tstate->current_frame = NULL; + // Initialize the embedded base frame - sentinel at the bottom of the frame stack + _tstate->base_frame.previous = NULL; + _tstate->base_frame.f_executable = PyStackRef_None; + _tstate->base_frame.f_funcobj = PyStackRef_NULL; + _tstate->base_frame.f_globals = NULL; + _tstate->base_frame.f_builtins = NULL; + _tstate->base_frame.f_locals = NULL; + _tstate->base_frame.frame_obj = NULL; + _tstate->base_frame.instr_ptr = NULL; + _tstate->base_frame.stackpointer = _tstate->base_frame.localsplus; + _tstate->base_frame.return_offset = 0; + _tstate->base_frame.owner = FRAME_OWNED_BY_INTERPRETER; + _tstate->base_frame.visited = 0; +#ifdef Py_DEBUG + _tstate->base_frame.lltrace = 0; +#endif +#ifdef Py_GIL_DISABLED + _tstate->base_frame.tlbc_index = 0; +#endif + _tstate->base_frame.localsplus[0] = PyStackRef_NULL; + + // current_frame starts pointing to the base frame + tstate->current_frame = &_tstate->base_frame; + // base_frame pointer for profilers to validate stack unwinding + tstate->base_frame = &_tstate->base_frame; tstate->datastack_chunk = NULL; tstate->datastack_top = NULL; tstate->datastack_limit = NULL; @@ -1660,7 +1684,7 @@ PyThreadState_Clear(PyThreadState *tstate) int verbose = _PyInterpreterState_GetConfig(tstate->interp)->verbose; - if (verbose && tstate->current_frame != NULL) { + if (verbose && tstate->current_frame != tstate->base_frame) { /* bpo-20526: After the main thread calls _PyInterpreterState_SetFinalizing() in Py_FinalizeEx() (or in Py_EndInterpreter() for subinterpreters), diff --git a/Python/traceback.c b/Python/traceback.c index 48f9b4d04c6..8af63c22a9f 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -1036,7 +1036,7 @@ static int dump_frame(int fd, _PyInterpreterFrame *frame) { if (frame->owner == FRAME_OWNED_BY_INTERPRETER) { - /* Ignore trampoline frame */ + /* Ignore trampoline frames and base frame sentinel */ return 0; }