diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 3b251d3814b..9bfb4f4f3a4 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -169,7 +169,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_PUSH_FRAME] = 0, + [_PUSH_FRAME] = HAS_ESCAPES_FLAG, [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_CALL_STR_1] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_TUPLE_1] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6df99d64653..c48f0a17c60 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -805,7 +805,8 @@ dummy_func( #if TIER_ONE assert(frame != &entry_frame); #endif - STORE_SP(); + SYNC_SP(); + _PyFrame_SetStackPointer(frame, stack_pointer); assert(EMPTY()); _Py_LeaveRecursiveCallPy(tstate); // GH-99729: We need to unlink the frame *before* clearing it: @@ -3154,7 +3155,8 @@ dummy_func( // Write it out explicitly because it's subtly different. // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); - STORE_SP(); + SYNC_SP(); + _PyFrame_SetStackPointer(frame, stack_pointer); new_frame->previous = frame; CALL_STAT_INC(inlined_py_calls); frame = tstate->current_frame = new_frame; @@ -4013,20 +4015,27 @@ dummy_func( ///////// Tier-2 only opcodes ///////// op (_GUARD_IS_TRUE_POP, (flag -- )) { - DEOPT_IF(Py_IsFalse(flag)); + SYNC_SP(); + DEOPT_IF(!Py_IsTrue(flag)); assert(Py_IsTrue(flag)); } op (_GUARD_IS_FALSE_POP, (flag -- )) { - DEOPT_IF(Py_IsTrue(flag)); + SYNC_SP(); + DEOPT_IF(!Py_IsFalse(flag)); assert(Py_IsFalse(flag)); } op (_GUARD_IS_NONE_POP, (val -- )) { - DEOPT_IF(!Py_IsNone(val)); + SYNC_SP(); + if (!Py_IsNone(val)) { + Py_DECREF(val); + DEOPT_IF(1); + } } op (_GUARD_IS_NOT_NONE_POP, (val -- )) { + SYNC_SP(); DEOPT_IF(Py_IsNone(val)); Py_DECREF(val); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 6060bebca9a..2b4399b25ba 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3318,35 +3318,38 @@ case _GUARD_IS_TRUE_POP: { PyObject *flag; flag = stack_pointer[-1]; - if (Py_IsFalse(flag)) goto deoptimize; - assert(Py_IsTrue(flag)); stack_pointer += -1; + if (!Py_IsTrue(flag)) goto deoptimize; + assert(Py_IsTrue(flag)); break; } case _GUARD_IS_FALSE_POP: { PyObject *flag; flag = stack_pointer[-1]; - if (Py_IsTrue(flag)) goto deoptimize; - assert(Py_IsFalse(flag)); stack_pointer += -1; + if (!Py_IsFalse(flag)) goto deoptimize; + assert(Py_IsFalse(flag)); break; } case _GUARD_IS_NONE_POP: { PyObject *val; val = stack_pointer[-1]; - if (!Py_IsNone(val)) goto deoptimize; stack_pointer += -1; + if (!Py_IsNone(val)) { + Py_DECREF(val); + if (1) goto deoptimize; + } break; } case _GUARD_IS_NOT_NONE_POP: { PyObject *val; val = stack_pointer[-1]; + stack_pointer += -1; if (Py_IsNone(val)) goto deoptimize; Py_DECREF(val); - stack_pointer += -1; break; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 236ae266971..1551a5ef61f 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -481,18 +481,19 @@ translate_bytecode_to_trace( goto done; } uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_likely]; - _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; DPRINTF(2, "%s(%d): counter=%x, bitcount=%d, likely=%d, confidence=%d, uopcode=%s\n", _PyOpcode_OpName[opcode], oparg, counter, bitcount, jump_likely, confidence, _PyUOpName(uopcode)); - ADD_TO_TRACE(uopcode, max_length, 0, target); + _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; + _Py_CODEUNIT *target_instr = next_instr + oparg; if (jump_likely) { - _Py_CODEUNIT *target_instr = next_instr + oparg; DPRINTF(2, "Jump likely (%x = %d bits), continue at byte offset %d\n", instr[1].cache, bitcount, 2 * INSTR_IP(target_instr, code)); instr = target_instr; + ADD_TO_TRACE(uopcode, max_length, 0, INSTR_IP(next_instr, code)); goto top; } + ADD_TO_TRACE(uopcode, max_length, 0, INSTR_IP(target_instr, code)); break; } diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 7ed3b571365..b80fa66e2a1 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -464,7 +464,7 @@ def compute_properties(op: parser.InstDef) -> Properties: ends_with_eval_breaker=eval_breaker_at_end(op), needs_this=variable_used(op, "this_instr"), always_exits=always_exits(op), - stores_sp=variable_used(op, "STORE_SP"), + stores_sp=variable_used(op, "SYNC_SP"), tier_one_only=variable_used(op, "TIER_ONE_ONLY"), uses_co_consts=variable_used(op, "FRAME_CO_CONSTS"), uses_co_names=variable_used(op, "FRAME_CO_NAMES"), diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index c6c602c7122..2fc2ab11532 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -124,7 +124,7 @@ def replace_decrefs( out.emit(f"Py_DECREF({var.name});\n") -def replace_store_sp( +def replace_sync_sp( out: CWriter, tkn: Token, tkn_iter: Iterator[Token], @@ -135,9 +135,7 @@ def replace_store_sp( next(tkn_iter) next(tkn_iter) next(tkn_iter) - out.emit_at("", tkn) stack.flush(out) - out.emit("_PyFrame_SetStackPointer(frame, stack_pointer);\n") def replace_check_eval_breaker( @@ -160,7 +158,7 @@ def replace_check_eval_breaker( "ERROR_IF": replace_error, "DECREF_INPUTS": replace_decrefs, "CHECK_EVAL_BREAKER": replace_check_eval_breaker, - "STORE_SP": replace_store_sp, + "SYNC_SP": replace_sync_sp, } ReplacementFunctionType = Callable[ diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md index e5a48999e96..e87aff43762 100644 --- a/Tools/cases_generator/interpreter_definition.md +++ b/Tools/cases_generator/interpreter_definition.md @@ -6,7 +6,7 @@ ## Abstract bytecode instructions, the dispatching mechanism, error handling, and tracing and instrumentation are all intermixed. -This document proposes defining a custom C-like DSL for defining the +This document proposes defining a custom C-like DSL for defining the instruction semantics and tools for generating the code deriving from the instruction definitions. @@ -46,7 +46,7 @@ ## Rationale As we improve the performance of CPython, we need to optimize larger regions of code, use more complex optimizations and, ultimately, translate to machine -code. +code. All of these steps introduce the possibility of more bugs, and require more code to be written. One way to mitigate this is through the use of code generators. @@ -62,7 +62,7 @@ ## Rationale Rewriting all the instructions is tedious and error-prone, and changing the instructions is a maintenance headache as both versions need to be kept in sync. -By using a code generator and using a common source for the instructions, or +By using a code generator and using a common source for the instructions, or parts of instructions, we can reduce the potential for errors considerably. @@ -75,7 +75,7 @@ ### Syntax Each op definition has a kind, a name, a stack and instruction stream effect, and a piece of C code describing its semantics:: - + ``` file: (definition | family | pseudo)+ @@ -86,7 +86,7 @@ ### Syntax "op" "(" NAME "," stack_effect ")" "{" C-code "}" | "macro" "(" NAME ")" "=" uop ("+" uop)* ";" - + stack_effect: "(" [inputs] "--" [outputs] ")" @@ -201,6 +201,7 @@ ### Special functions/macros * `DEOPT_IF(cond, instruction)`. Deoptimize if `cond` is met. * `ERROR_IF(cond, label)`. Jump to error handler at `label` if `cond` is true. * `DECREF_INPUTS()`. Generate `Py_DECREF()` calls for the input stack effects. +* `SYNC_SP()`. Synchronizes the physical stack pointer with the stack effects. Note that the use of `DECREF_INPUTS()` is optional -- manual calls to `Py_DECREF()` or other approaches are also acceptable @@ -445,7 +446,7 @@ ## Generating the interpreter stack_pointer += 1; } s1 = res; - } + } next_instr += (1 + 1 + 2 + 1 + 4); stack_pointer[-1] = s1; DISPATCH(); diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index 6633950aada..f62ece43c1b 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -169,6 +169,7 @@ def push(self, var: StackItem) -> str: return "" def flush(self, out: CWriter) -> None: + out.start_line() for var in self.variables: if not var.peek: cast = "(PyObject *)" if var.type else "" @@ -189,6 +190,7 @@ def flush(self, out: CWriter) -> None: self.base_offset.clear() self.top_offset.clear() self.peek_offset.clear() + out.start_line() def as_comment(self) -> str: return f"/* Variables: {[v.name for v in self.variables]}. Base offset: {self.base_offset.to_c()}. Top offset: {self.top_offset.to_c()} */"