GH-112354: _GUARD_IS_TRUE_POP side-exits to target the next instruction, not themselves. (GH-114078)

This commit is contained in:
Mark Shannon 2024-01-15 11:41:06 +00:00 committed by GitHub
parent 2010d45327
commit ac10947ba7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 40 additions and 26 deletions

View file

@ -169,7 +169,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
[_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
[_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
[_PUSH_FRAME] = 0,
[_PUSH_FRAME] = HAS_ESCAPES_FLAG,
[_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG,
[_CALL_STR_1] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_CALL_TUPLE_1] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,

View file

@ -805,7 +805,8 @@ dummy_func(
#if TIER_ONE
assert(frame != &entry_frame);
#endif
STORE_SP();
SYNC_SP();
_PyFrame_SetStackPointer(frame, stack_pointer);
assert(EMPTY());
_Py_LeaveRecursiveCallPy(tstate);
// GH-99729: We need to unlink the frame *before* clearing it:
@ -3154,7 +3155,8 @@ dummy_func(
// Write it out explicitly because it's subtly different.
// Eventually this should be the only occurrence of this code.
assert(tstate->interp->eval_frame == NULL);
STORE_SP();
SYNC_SP();
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = frame;
CALL_STAT_INC(inlined_py_calls);
frame = tstate->current_frame = new_frame;
@ -4013,20 +4015,27 @@ dummy_func(
///////// Tier-2 only opcodes /////////
op (_GUARD_IS_TRUE_POP, (flag -- )) {
DEOPT_IF(Py_IsFalse(flag));
SYNC_SP();
DEOPT_IF(!Py_IsTrue(flag));
assert(Py_IsTrue(flag));
}
op (_GUARD_IS_FALSE_POP, (flag -- )) {
DEOPT_IF(Py_IsTrue(flag));
SYNC_SP();
DEOPT_IF(!Py_IsFalse(flag));
assert(Py_IsFalse(flag));
}
op (_GUARD_IS_NONE_POP, (val -- )) {
DEOPT_IF(!Py_IsNone(val));
SYNC_SP();
if (!Py_IsNone(val)) {
Py_DECREF(val);
DEOPT_IF(1);
}
}
op (_GUARD_IS_NOT_NONE_POP, (val -- )) {
SYNC_SP();
DEOPT_IF(Py_IsNone(val));
Py_DECREF(val);
}

View file

@ -3318,35 +3318,38 @@
case _GUARD_IS_TRUE_POP: {
PyObject *flag;
flag = stack_pointer[-1];
if (Py_IsFalse(flag)) goto deoptimize;
assert(Py_IsTrue(flag));
stack_pointer += -1;
if (!Py_IsTrue(flag)) goto deoptimize;
assert(Py_IsTrue(flag));
break;
}
case _GUARD_IS_FALSE_POP: {
PyObject *flag;
flag = stack_pointer[-1];
if (Py_IsTrue(flag)) goto deoptimize;
assert(Py_IsFalse(flag));
stack_pointer += -1;
if (!Py_IsFalse(flag)) goto deoptimize;
assert(Py_IsFalse(flag));
break;
}
case _GUARD_IS_NONE_POP: {
PyObject *val;
val = stack_pointer[-1];
if (!Py_IsNone(val)) goto deoptimize;
stack_pointer += -1;
if (!Py_IsNone(val)) {
Py_DECREF(val);
if (1) goto deoptimize;
}
break;
}
case _GUARD_IS_NOT_NONE_POP: {
PyObject *val;
val = stack_pointer[-1];
stack_pointer += -1;
if (Py_IsNone(val)) goto deoptimize;
Py_DECREF(val);
stack_pointer += -1;
break;
}

View file

@ -481,18 +481,19 @@ translate_bytecode_to_trace(
goto done;
}
uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_likely];
_Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
DPRINTF(2, "%s(%d): counter=%x, bitcount=%d, likely=%d, confidence=%d, uopcode=%s\n",
_PyOpcode_OpName[opcode], oparg,
counter, bitcount, jump_likely, confidence, _PyUOpName(uopcode));
ADD_TO_TRACE(uopcode, max_length, 0, target);
_Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
_Py_CODEUNIT *target_instr = next_instr + oparg;
if (jump_likely) {
_Py_CODEUNIT *target_instr = next_instr + oparg;
DPRINTF(2, "Jump likely (%x = %d bits), continue at byte offset %d\n",
instr[1].cache, bitcount, 2 * INSTR_IP(target_instr, code));
instr = target_instr;
ADD_TO_TRACE(uopcode, max_length, 0, INSTR_IP(next_instr, code));
goto top;
}
ADD_TO_TRACE(uopcode, max_length, 0, INSTR_IP(target_instr, code));
break;
}

View file

@ -464,7 +464,7 @@ def compute_properties(op: parser.InstDef) -> Properties:
ends_with_eval_breaker=eval_breaker_at_end(op),
needs_this=variable_used(op, "this_instr"),
always_exits=always_exits(op),
stores_sp=variable_used(op, "STORE_SP"),
stores_sp=variable_used(op, "SYNC_SP"),
tier_one_only=variable_used(op, "TIER_ONE_ONLY"),
uses_co_consts=variable_used(op, "FRAME_CO_CONSTS"),
uses_co_names=variable_used(op, "FRAME_CO_NAMES"),

View file

@ -124,7 +124,7 @@ def replace_decrefs(
out.emit(f"Py_DECREF({var.name});\n")
def replace_store_sp(
def replace_sync_sp(
out: CWriter,
tkn: Token,
tkn_iter: Iterator[Token],
@ -135,9 +135,7 @@ def replace_store_sp(
next(tkn_iter)
next(tkn_iter)
next(tkn_iter)
out.emit_at("", tkn)
stack.flush(out)
out.emit("_PyFrame_SetStackPointer(frame, stack_pointer);\n")
def replace_check_eval_breaker(
@ -160,7 +158,7 @@ def replace_check_eval_breaker(
"ERROR_IF": replace_error,
"DECREF_INPUTS": replace_decrefs,
"CHECK_EVAL_BREAKER": replace_check_eval_breaker,
"STORE_SP": replace_store_sp,
"SYNC_SP": replace_sync_sp,
}
ReplacementFunctionType = Callable[

View file

@ -6,7 +6,7 @@ ## Abstract
bytecode instructions, the dispatching mechanism, error handling, and
tracing and instrumentation are all intermixed.
This document proposes defining a custom C-like DSL for defining the
This document proposes defining a custom C-like DSL for defining the
instruction semantics and tools for generating the code deriving from
the instruction definitions.
@ -46,7 +46,7 @@ ## Rationale
As we improve the performance of CPython, we need to optimize larger regions
of code, use more complex optimizations and, ultimately, translate to machine
code.
code.
All of these steps introduce the possibility of more bugs, and require more code
to be written. One way to mitigate this is through the use of code generators.
@ -62,7 +62,7 @@ ## Rationale
Rewriting all the instructions is tedious and error-prone, and changing the
instructions is a maintenance headache as both versions need to be kept in sync.
By using a code generator and using a common source for the instructions, or
By using a code generator and using a common source for the instructions, or
parts of instructions, we can reduce the potential for errors considerably.
@ -75,7 +75,7 @@ ### Syntax
Each op definition has a kind, a name, a stack and instruction stream effect,
and a piece of C code describing its semantics::
```
file:
(definition | family | pseudo)+
@ -86,7 +86,7 @@ ### Syntax
"op" "(" NAME "," stack_effect ")" "{" C-code "}"
|
"macro" "(" NAME ")" "=" uop ("+" uop)* ";"
stack_effect:
"(" [inputs] "--" [outputs] ")"
@ -201,6 +201,7 @@ ### Special functions/macros
* `DEOPT_IF(cond, instruction)`. Deoptimize if `cond` is met.
* `ERROR_IF(cond, label)`. Jump to error handler at `label` if `cond` is true.
* `DECREF_INPUTS()`. Generate `Py_DECREF()` calls for the input stack effects.
* `SYNC_SP()`. Synchronizes the physical stack pointer with the stack effects.
Note that the use of `DECREF_INPUTS()` is optional -- manual calls
to `Py_DECREF()` or other approaches are also acceptable
@ -445,7 +446,7 @@ ## Generating the interpreter
stack_pointer += 1;
}
s1 = res;
}
}
next_instr += (1 + 1 + 2 + 1 + 4);
stack_pointer[-1] = s1;
DISPATCH();

View file

@ -169,6 +169,7 @@ def push(self, var: StackItem) -> str:
return ""
def flush(self, out: CWriter) -> None:
out.start_line()
for var in self.variables:
if not var.peek:
cast = "(PyObject *)" if var.type else ""
@ -189,6 +190,7 @@ def flush(self, out: CWriter) -> None:
self.base_offset.clear()
self.top_offset.clear()
self.peek_offset.clear()
out.start_line()
def as_comment(self) -> str:
return f"/* Variables: {[v.name for v in self.variables]}. Base offset: {self.base_offset.to_c()}. Top offset: {self.top_offset.to_c()} */"