gh-137400: Fix thread-safety issues when profiling all threads (gh-137518)

There were a few thread-safety issues when profiling or tracing all
threads via PyEval_SetProfileAllThreads or PyEval_SetTraceAllThreads:

* The loop over thread states could crash if a thread exits concurrently
  (in both the free threading and default build)
* The modification of `c_profilefunc` and `c_tracefunc` wasn't
  thread-safe on the free threading build.
This commit is contained in:
Sam Gross 2025-08-13 14:15:12 -04:00 committed by GitHub
parent 923d68655b
commit a10152f8fd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 431 additions and 241 deletions

View file

@ -1040,6 +1040,8 @@ set_version_raw(uintptr_t *ptr, uint32_t version)
static void
set_global_version(PyThreadState *tstate, uint32_t version)
{
ASSERT_WORLD_STOPPED();
assert((version & _PY_EVAL_EVENTS_MASK) == 0);
PyInterpreterState *interp = tstate->interp;
set_version_raw(&interp->ceval.instrumentation_version, version);
@ -1939,28 +1941,26 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp)
static int
instrument_all_executing_code_objects(PyInterpreterState *interp) {
instrument_all_executing_code_objects(PyInterpreterState *interp)
{
ASSERT_WORLD_STOPPED();
_PyRuntimeState *runtime = &_PyRuntime;
HEAD_LOCK(runtime);
PyThreadState* ts = PyInterpreterState_ThreadHead(interp);
HEAD_UNLOCK(runtime);
while (ts) {
int err = 0;
_Py_FOR_EACH_TSTATE_BEGIN(interp, ts) {
_PyInterpreterFrame *frame = ts->current_frame;
while (frame) {
if (frame->owner < FRAME_OWNED_BY_INTERPRETER) {
if (instrument_lock_held(_PyFrame_GetCode(frame), interp)) {
return -1;
err = instrument_lock_held(_PyFrame_GetCode(frame), interp);
if (err) {
goto done;
}
}
frame = frame->previous;
}
HEAD_LOCK(runtime);
ts = PyThreadState_Next(ts);
HEAD_UNLOCK(runtime);
}
return 0;
done:
_Py_FOR_EACH_TSTATE_END(interp);
return err;
}
static void
@ -2006,6 +2006,7 @@ check_tool(PyInterpreterState *interp, int tool_id)
int
_PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events)
{
ASSERT_WORLD_STOPPED();
assert(0 <= tool_id && tool_id < PY_MONITORING_TOOL_IDS);
PyThreadState *tstate = _PyThreadState_GET();
PyInterpreterState *interp = tstate->interp;
@ -2014,33 +2015,28 @@ _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events)
return -1;
}
int res;
_PyEval_StopTheWorld(interp);
uint32_t existing_events = get_events(&interp->monitors, tool_id);
if (existing_events == events) {
res = 0;
goto done;
return 0;
}
set_events(&interp->monitors, tool_id, events);
uint32_t new_version = global_version(interp) + MONITORING_VERSION_INCREMENT;
if (new_version == 0) {
PyErr_Format(PyExc_OverflowError, "events set too many times");
res = -1;
goto done;
return -1;
}
set_global_version(tstate, new_version);
#ifdef _Py_TIER2
_Py_Executors_InvalidateAll(interp, 1);
#endif
res = instrument_all_executing_code_objects(interp);
done:
_PyEval_StartTheWorld(interp);
return res;
return instrument_all_executing_code_objects(interp);
}
int
_PyMonitoring_SetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEventSet events)
{
ASSERT_WORLD_STOPPED();
assert(0 <= tool_id && tool_id < PY_MONITORING_TOOL_IDS);
PyInterpreterState *interp = _PyInterpreterState_GET();
assert(events < (1 << _PY_MONITORING_LOCAL_EVENTS));
@ -2052,11 +2048,8 @@ _PyMonitoring_SetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEvent
return -1;
}
int res;
_PyEval_StopTheWorld(interp);
if (allocate_instrumentation_data(code)) {
res = -1;
goto done;
return -1;
}
code->_co_monitoring->tool_versions[tool_id] = interp->monitoring_tool_versions[tool_id];
@ -2064,16 +2057,11 @@ _PyMonitoring_SetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEvent
_Py_LocalMonitors *local = &code->_co_monitoring->local_monitors;
uint32_t existing_events = get_local_events(local, tool_id);
if (existing_events == events) {
res = 0;
goto done;
return 0;
}
set_local_events(local, tool_id, events);
res = force_instrument_lock_held(code, interp);
done:
_PyEval_StartTheWorld(interp);
return res;
return force_instrument_lock_held(code, interp);
}
int
@ -2105,11 +2093,12 @@ int _PyMonitoring_ClearToolId(int tool_id)
}
}
_PyEval_StopTheWorld(interp);
if (_PyMonitoring_SetEvents(tool_id, 0) < 0) {
_PyEval_StartTheWorld(interp);
return -1;
}
_PyEval_StopTheWorld(interp);
uint32_t version = global_version(interp) + MONITORING_VERSION_INCREMENT;
if (version == 0) {
PyErr_Format(PyExc_OverflowError, "events set too many times");
@ -2346,7 +2335,11 @@ monitoring_set_events_impl(PyObject *module, int tool_id, int event_set)
event_set &= ~(1 << PY_MONITORING_EVENT_BRANCH);
event_set |= (1 << PY_MONITORING_EVENT_BRANCH_RIGHT) | (1 << PY_MONITORING_EVENT_BRANCH_LEFT);
}
if (_PyMonitoring_SetEvents(tool_id, event_set)) {
PyInterpreterState *interp = _PyInterpreterState_GET();
_PyEval_StopTheWorld(interp);
int err = _PyMonitoring_SetEvents(tool_id, event_set);
_PyEval_StartTheWorld(interp);
if (err) {
return NULL;
}
Py_RETURN_NONE;
@ -2427,7 +2420,11 @@ monitoring_set_local_events_impl(PyObject *module, int tool_id,
return NULL;
}
if (_PyMonitoring_SetLocalEvents((PyCodeObject*)code, tool_id, event_set)) {
PyInterpreterState *interp = _PyInterpreterState_GET();
_PyEval_StopTheWorld(interp);
int err = _PyMonitoring_SetLocalEvents((PyCodeObject*)code, tool_id, event_set);
_PyEval_StartTheWorld(interp);
if (err) {
return NULL;
}
Py_RETURN_NONE;