mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 07:31:38 +00:00 
			
		
		
		
	GH-118093: Handle some polymorphism before requiring progress in tier two (GH-122843)
This commit is contained in:
		
							parent
							
								
									503af8fe9a
								
							
						
					
					
						commit
						9621a7d017
					
				
					 6 changed files with 73 additions and 42 deletions
				
			
		| 
						 | 
				
			
			@ -111,7 +111,8 @@ never_optimize(
 | 
			
		|||
    _PyInterpreterFrame *frame,
 | 
			
		||||
    _Py_CODEUNIT *instr,
 | 
			
		||||
    _PyExecutorObject **exec,
 | 
			
		||||
    int Py_UNUSED(stack_entries))
 | 
			
		||||
    int Py_UNUSED(stack_entries),
 | 
			
		||||
    bool Py_UNUSED(progress_needed))
 | 
			
		||||
{
 | 
			
		||||
    // This may be called if the optimizer is reset
 | 
			
		||||
    return 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -176,32 +177,44 @@ _Py_SetTier2Optimizer(_PyOptimizerObject *optimizer)
 | 
			
		|||
int
 | 
			
		||||
_PyOptimizer_Optimize(
 | 
			
		||||
    _PyInterpreterFrame *frame, _Py_CODEUNIT *start,
 | 
			
		||||
    _PyStackRef *stack_pointer, _PyExecutorObject **executor_ptr)
 | 
			
		||||
    _PyStackRef *stack_pointer, _PyExecutorObject **executor_ptr, int chain_depth)
 | 
			
		||||
{
 | 
			
		||||
    // The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must*
 | 
			
		||||
    // make progress in order to avoid infinite loops or excessively-long
 | 
			
		||||
    // side-exit chains. We can only insert the executor into the bytecode if
 | 
			
		||||
    // this is true, since a deopt won't infinitely re-enter the executor:
 | 
			
		||||
    chain_depth %= MAX_CHAIN_DEPTH;
 | 
			
		||||
    bool progress_needed = chain_depth == 0;
 | 
			
		||||
    PyCodeObject *code = _PyFrame_GetCode(frame);
 | 
			
		||||
    assert(PyCode_Check(code));
 | 
			
		||||
    PyInterpreterState *interp = _PyInterpreterState_GET();
 | 
			
		||||
    if (!has_space_for_executor(code, start)) {
 | 
			
		||||
    if (progress_needed && !has_space_for_executor(code, start)) {
 | 
			
		||||
        return 0;
 | 
			
		||||
    }
 | 
			
		||||
    _PyOptimizerObject *opt = interp->optimizer;
 | 
			
		||||
    int err = opt->optimize(opt, frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
 | 
			
		||||
    int err = opt->optimize(opt, frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)), progress_needed);
 | 
			
		||||
    if (err <= 0) {
 | 
			
		||||
        return err;
 | 
			
		||||
    }
 | 
			
		||||
    assert(*executor_ptr != NULL);
 | 
			
		||||
    int index = get_index_for_executor(code, start);
 | 
			
		||||
    if (index < 0) {
 | 
			
		||||
        /* Out of memory. Don't raise and assume that the
 | 
			
		||||
         * error will show up elsewhere.
 | 
			
		||||
         *
 | 
			
		||||
         * If an optimizer has already produced an executor,
 | 
			
		||||
         * it might get confused by the executor disappearing,
 | 
			
		||||
         * but there is not much we can do about that here. */
 | 
			
		||||
        Py_DECREF(*executor_ptr);
 | 
			
		||||
        return 0;
 | 
			
		||||
    if (progress_needed) {
 | 
			
		||||
        int index = get_index_for_executor(code, start);
 | 
			
		||||
        if (index < 0) {
 | 
			
		||||
            /* Out of memory. Don't raise and assume that the
 | 
			
		||||
             * error will show up elsewhere.
 | 
			
		||||
             *
 | 
			
		||||
             * If an optimizer has already produced an executor,
 | 
			
		||||
             * it might get confused by the executor disappearing,
 | 
			
		||||
             * but there is not much we can do about that here. */
 | 
			
		||||
            Py_DECREF(*executor_ptr);
 | 
			
		||||
            return 0;
 | 
			
		||||
        }
 | 
			
		||||
        insert_executor(code, start, index, *executor_ptr);
 | 
			
		||||
    }
 | 
			
		||||
    insert_executor(code, start, index, *executor_ptr);
 | 
			
		||||
    else {
 | 
			
		||||
        (*executor_ptr)->vm_data.code = NULL;
 | 
			
		||||
    }
 | 
			
		||||
    (*executor_ptr)->vm_data.chain_depth = chain_depth;
 | 
			
		||||
    assert((*executor_ptr)->vm_data.valid);
 | 
			
		||||
    return 1;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -530,9 +543,9 @@ translate_bytecode_to_trace(
 | 
			
		|||
    _Py_CODEUNIT *instr,
 | 
			
		||||
    _PyUOpInstruction *trace,
 | 
			
		||||
    int buffer_size,
 | 
			
		||||
    _PyBloomFilter *dependencies)
 | 
			
		||||
    _PyBloomFilter *dependencies, bool progress_needed)
 | 
			
		||||
{
 | 
			
		||||
    bool progress_needed = true;
 | 
			
		||||
    bool first = true;
 | 
			
		||||
    PyCodeObject *code = _PyFrame_GetCode(frame);
 | 
			
		||||
    PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj;
 | 
			
		||||
    assert(PyFunction_Check(func));
 | 
			
		||||
| 
						 | 
				
			
			@ -576,7 +589,7 @@ translate_bytecode_to_trace(
 | 
			
		|||
        uint32_t opcode = instr->op.code;
 | 
			
		||||
        uint32_t oparg = instr->op.arg;
 | 
			
		||||
 | 
			
		||||
        if (!progress_needed && instr == initial_instr) {
 | 
			
		||||
        if (!first && instr == initial_instr) {
 | 
			
		||||
            // We have looped around to the start:
 | 
			
		||||
            RESERVE(1);
 | 
			
		||||
            ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
 | 
			
		||||
| 
						 | 
				
			
			@ -585,14 +598,6 @@ translate_bytecode_to_trace(
 | 
			
		|||
 | 
			
		||||
        DPRINTF(2, "%d: %s(%d)\n", target, _PyOpcode_OpName[opcode], oparg);
 | 
			
		||||
 | 
			
		||||
        if (opcode == ENTER_EXECUTOR) {
 | 
			
		||||
            assert(oparg < 256);
 | 
			
		||||
            _PyExecutorObject *executor = code->co_executors->executors[oparg];
 | 
			
		||||
            opcode = executor->vm_data.opcode;
 | 
			
		||||
            DPRINTF(2, "  * ENTER_EXECUTOR -> %s\n",  _PyOpcode_OpName[opcode]);
 | 
			
		||||
            oparg = executor->vm_data.oparg;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (opcode == EXTENDED_ARG) {
 | 
			
		||||
            instr++;
 | 
			
		||||
            opcode = instr->op.code;
 | 
			
		||||
| 
						 | 
				
			
			@ -602,13 +607,27 @@ translate_bytecode_to_trace(
 | 
			
		|||
                goto done;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        if (opcode == ENTER_EXECUTOR) {
 | 
			
		||||
            // We have a couple of options here. We *could* peek "underneath"
 | 
			
		||||
            // this executor and continue tracing, which could give us a longer,
 | 
			
		||||
            // more optimizeable trace (at the expense of lots of duplicated
 | 
			
		||||
            // tier two code). Instead, we choose to just end here and stitch to
 | 
			
		||||
            // the other trace, which allows a side-exit traces to rejoin the
 | 
			
		||||
            // "main" trace periodically (and also helps protect us against
 | 
			
		||||
            // pathological behavior where the amount of tier two code explodes
 | 
			
		||||
            // for a medium-length, branchy code path). This seems to work
 | 
			
		||||
            // better in practice, but in the future we could be smarter about
 | 
			
		||||
            // what we do here:
 | 
			
		||||
            goto done;
 | 
			
		||||
        }
 | 
			
		||||
        assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
 | 
			
		||||
        RESERVE_RAW(2, "_CHECK_VALIDITY_AND_SET_IP");
 | 
			
		||||
        ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target);
 | 
			
		||||
 | 
			
		||||
        /* Special case the first instruction,
 | 
			
		||||
         * so that we can guarantee forward progress */
 | 
			
		||||
        if (progress_needed) {
 | 
			
		||||
        if (first && progress_needed) {
 | 
			
		||||
            assert(first);
 | 
			
		||||
            if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) {
 | 
			
		||||
                opcode = _PyOpcode_Deopt[opcode];
 | 
			
		||||
            }
 | 
			
		||||
| 
						 | 
				
			
			@ -903,7 +922,7 @@ translate_bytecode_to_trace(
 | 
			
		|||
        }
 | 
			
		||||
    top:
 | 
			
		||||
        // Jump here after _PUSH_FRAME or likely branches.
 | 
			
		||||
        progress_needed = false;
 | 
			
		||||
        first = false;
 | 
			
		||||
    }  // End for (;;)
 | 
			
		||||
 | 
			
		||||
done:
 | 
			
		||||
| 
						 | 
				
			
			@ -912,7 +931,7 @@ translate_bytecode_to_trace(
 | 
			
		|||
    }
 | 
			
		||||
    assert(code == initial_code);
 | 
			
		||||
    // Skip short traces where we can't even translate a single instruction:
 | 
			
		||||
    if (progress_needed) {
 | 
			
		||||
    if (first) {
 | 
			
		||||
        OPT_STAT_INC(trace_too_short);
 | 
			
		||||
        DPRINTF(2,
 | 
			
		||||
                "No trace for %s (%s:%d) at byte offset %d (no progress)\n",
 | 
			
		||||
| 
						 | 
				
			
			@ -1225,13 +1244,14 @@ uop_optimize(
 | 
			
		|||
    _PyInterpreterFrame *frame,
 | 
			
		||||
    _Py_CODEUNIT *instr,
 | 
			
		||||
    _PyExecutorObject **exec_ptr,
 | 
			
		||||
    int curr_stackentries)
 | 
			
		||||
    int curr_stackentries,
 | 
			
		||||
    bool progress_needed)
 | 
			
		||||
{
 | 
			
		||||
    _PyBloomFilter dependencies;
 | 
			
		||||
    _Py_BloomFilter_Init(&dependencies);
 | 
			
		||||
    _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH];
 | 
			
		||||
    OPT_STAT_INC(attempts);
 | 
			
		||||
    int length = translate_bytecode_to_trace(frame, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies);
 | 
			
		||||
    int length = translate_bytecode_to_trace(frame, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies, progress_needed);
 | 
			
		||||
    if (length <= 0) {
 | 
			
		||||
        // Error or nothing translated
 | 
			
		||||
        return length;
 | 
			
		||||
| 
						 | 
				
			
			@ -1328,7 +1348,8 @@ counter_optimize(
 | 
			
		|||
    _PyInterpreterFrame *frame,
 | 
			
		||||
    _Py_CODEUNIT *instr,
 | 
			
		||||
    _PyExecutorObject **exec_ptr,
 | 
			
		||||
    int Py_UNUSED(curr_stackentries)
 | 
			
		||||
    int Py_UNUSED(curr_stackentries),
 | 
			
		||||
    bool Py_UNUSED(progress_needed)
 | 
			
		||||
)
 | 
			
		||||
{
 | 
			
		||||
    PyCodeObject *code = _PyFrame_GetCode(frame);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue