mirror of
https://github.com/python/cpython.git
synced 2025-12-31 04:23:37 +00:00
gh-116168: Remove extra _CHECK_STACK_SPACE uops (#117242)
This merges all `_CHECK_STACK_SPACE` uops in a trace into a single `_CHECK_STACK_SPACE_OPERAND` uop that checks whether there is enough stack space for all calls included in the entire trace.
This commit is contained in:
parent
976bcb2379
commit
1c43468886
8 changed files with 493 additions and 125 deletions
215
Include/internal/pycore_uop_ids.h
generated
215
Include/internal/pycore_uop_ids.h
generated
|
|
@ -68,14 +68,15 @@ extern "C" {
|
|||
#define _CHECK_PEP_523 330
|
||||
#define _CHECK_PERIODIC 331
|
||||
#define _CHECK_STACK_SPACE 332
|
||||
#define _CHECK_VALIDITY 333
|
||||
#define _CHECK_VALIDITY_AND_SET_IP 334
|
||||
#define _COLD_EXIT 335
|
||||
#define _COMPARE_OP 336
|
||||
#define _COMPARE_OP_FLOAT 337
|
||||
#define _COMPARE_OP_INT 338
|
||||
#define _COMPARE_OP_STR 339
|
||||
#define _CONTAINS_OP 340
|
||||
#define _CHECK_STACK_SPACE_OPERAND 333
|
||||
#define _CHECK_VALIDITY 334
|
||||
#define _CHECK_VALIDITY_AND_SET_IP 335
|
||||
#define _COLD_EXIT 336
|
||||
#define _COMPARE_OP 337
|
||||
#define _COMPARE_OP_FLOAT 338
|
||||
#define _COMPARE_OP_INT 339
|
||||
#define _COMPARE_OP_STR 340
|
||||
#define _CONTAINS_OP 341
|
||||
#define _CONTAINS_OP_DICT CONTAINS_OP_DICT
|
||||
#define _CONTAINS_OP_SET CONTAINS_OP_SET
|
||||
#define _CONVERT_VALUE CONVERT_VALUE
|
||||
|
|
@ -87,47 +88,47 @@ extern "C" {
|
|||
#define _DELETE_GLOBAL DELETE_GLOBAL
|
||||
#define _DELETE_NAME DELETE_NAME
|
||||
#define _DELETE_SUBSCR DELETE_SUBSCR
|
||||
#define _DEOPT 341
|
||||
#define _DEOPT 342
|
||||
#define _DICT_MERGE DICT_MERGE
|
||||
#define _DICT_UPDATE DICT_UPDATE
|
||||
#define _END_SEND END_SEND
|
||||
#define _ERROR_POP_N 342
|
||||
#define _ERROR_POP_N 343
|
||||
#define _EXIT_INIT_CHECK EXIT_INIT_CHECK
|
||||
#define _FATAL_ERROR 343
|
||||
#define _FATAL_ERROR 344
|
||||
#define _FORMAT_SIMPLE FORMAT_SIMPLE
|
||||
#define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC
|
||||
#define _FOR_ITER 344
|
||||
#define _FOR_ITER 345
|
||||
#define _FOR_ITER_GEN FOR_ITER_GEN
|
||||
#define _FOR_ITER_TIER_TWO 345
|
||||
#define _FOR_ITER_TIER_TWO 346
|
||||
#define _GET_AITER GET_AITER
|
||||
#define _GET_ANEXT GET_ANEXT
|
||||
#define _GET_AWAITABLE GET_AWAITABLE
|
||||
#define _GET_ITER GET_ITER
|
||||
#define _GET_LEN GET_LEN
|
||||
#define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER
|
||||
#define _GUARD_BOTH_FLOAT 346
|
||||
#define _GUARD_BOTH_INT 347
|
||||
#define _GUARD_BOTH_UNICODE 348
|
||||
#define _GUARD_BUILTINS_VERSION 349
|
||||
#define _GUARD_DORV_NO_DICT 350
|
||||
#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 351
|
||||
#define _GUARD_GLOBALS_VERSION 352
|
||||
#define _GUARD_IS_FALSE_POP 353
|
||||
#define _GUARD_IS_NONE_POP 354
|
||||
#define _GUARD_IS_NOT_NONE_POP 355
|
||||
#define _GUARD_IS_TRUE_POP 356
|
||||
#define _GUARD_KEYS_VERSION 357
|
||||
#define _GUARD_NOT_EXHAUSTED_LIST 358
|
||||
#define _GUARD_NOT_EXHAUSTED_RANGE 359
|
||||
#define _GUARD_NOT_EXHAUSTED_TUPLE 360
|
||||
#define _GUARD_TYPE_VERSION 361
|
||||
#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 362
|
||||
#define _INIT_CALL_PY_EXACT_ARGS 363
|
||||
#define _INIT_CALL_PY_EXACT_ARGS_0 364
|
||||
#define _INIT_CALL_PY_EXACT_ARGS_1 365
|
||||
#define _INIT_CALL_PY_EXACT_ARGS_2 366
|
||||
#define _INIT_CALL_PY_EXACT_ARGS_3 367
|
||||
#define _INIT_CALL_PY_EXACT_ARGS_4 368
|
||||
#define _GUARD_BOTH_FLOAT 347
|
||||
#define _GUARD_BOTH_INT 348
|
||||
#define _GUARD_BOTH_UNICODE 349
|
||||
#define _GUARD_BUILTINS_VERSION 350
|
||||
#define _GUARD_DORV_NO_DICT 351
|
||||
#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 352
|
||||
#define _GUARD_GLOBALS_VERSION 353
|
||||
#define _GUARD_IS_FALSE_POP 354
|
||||
#define _GUARD_IS_NONE_POP 355
|
||||
#define _GUARD_IS_NOT_NONE_POP 356
|
||||
#define _GUARD_IS_TRUE_POP 357
|
||||
#define _GUARD_KEYS_VERSION 358
|
||||
#define _GUARD_NOT_EXHAUSTED_LIST 359
|
||||
#define _GUARD_NOT_EXHAUSTED_RANGE 360
|
||||
#define _GUARD_NOT_EXHAUSTED_TUPLE 361
|
||||
#define _GUARD_TYPE_VERSION 362
|
||||
#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 363
|
||||
#define _INIT_CALL_PY_EXACT_ARGS 364
|
||||
#define _INIT_CALL_PY_EXACT_ARGS_0 365
|
||||
#define _INIT_CALL_PY_EXACT_ARGS_1 366
|
||||
#define _INIT_CALL_PY_EXACT_ARGS_2 367
|
||||
#define _INIT_CALL_PY_EXACT_ARGS_3 368
|
||||
#define _INIT_CALL_PY_EXACT_ARGS_4 369
|
||||
#define _INSTRUMENTED_CALL INSTRUMENTED_CALL
|
||||
#define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX
|
||||
#define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW
|
||||
|
|
@ -144,65 +145,65 @@ extern "C" {
|
|||
#define _INSTRUMENTED_RETURN_CONST INSTRUMENTED_RETURN_CONST
|
||||
#define _INSTRUMENTED_RETURN_VALUE INSTRUMENTED_RETURN_VALUE
|
||||
#define _INSTRUMENTED_YIELD_VALUE INSTRUMENTED_YIELD_VALUE
|
||||
#define _INTERNAL_INCREMENT_OPT_COUNTER 369
|
||||
#define _IS_NONE 370
|
||||
#define _INTERNAL_INCREMENT_OPT_COUNTER 370
|
||||
#define _IS_NONE 371
|
||||
#define _IS_OP IS_OP
|
||||
#define _ITER_CHECK_LIST 371
|
||||
#define _ITER_CHECK_RANGE 372
|
||||
#define _ITER_CHECK_TUPLE 373
|
||||
#define _ITER_JUMP_LIST 374
|
||||
#define _ITER_JUMP_RANGE 375
|
||||
#define _ITER_JUMP_TUPLE 376
|
||||
#define _ITER_NEXT_LIST 377
|
||||
#define _ITER_NEXT_RANGE 378
|
||||
#define _ITER_NEXT_TUPLE 379
|
||||
#define _JUMP_TO_TOP 380
|
||||
#define _ITER_CHECK_LIST 372
|
||||
#define _ITER_CHECK_RANGE 373
|
||||
#define _ITER_CHECK_TUPLE 374
|
||||
#define _ITER_JUMP_LIST 375
|
||||
#define _ITER_JUMP_RANGE 376
|
||||
#define _ITER_JUMP_TUPLE 377
|
||||
#define _ITER_NEXT_LIST 378
|
||||
#define _ITER_NEXT_RANGE 379
|
||||
#define _ITER_NEXT_TUPLE 380
|
||||
#define _JUMP_TO_TOP 381
|
||||
#define _LIST_APPEND LIST_APPEND
|
||||
#define _LIST_EXTEND LIST_EXTEND
|
||||
#define _LOAD_ASSERTION_ERROR LOAD_ASSERTION_ERROR
|
||||
#define _LOAD_ATTR 381
|
||||
#define _LOAD_ATTR_CLASS 382
|
||||
#define _LOAD_ATTR_CLASS_0 383
|
||||
#define _LOAD_ATTR_CLASS_1 384
|
||||
#define _LOAD_ATTR 382
|
||||
#define _LOAD_ATTR_CLASS 383
|
||||
#define _LOAD_ATTR_CLASS_0 384
|
||||
#define _LOAD_ATTR_CLASS_1 385
|
||||
#define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN
|
||||
#define _LOAD_ATTR_INSTANCE_VALUE 385
|
||||
#define _LOAD_ATTR_INSTANCE_VALUE_0 386
|
||||
#define _LOAD_ATTR_INSTANCE_VALUE_1 387
|
||||
#define _LOAD_ATTR_METHOD_LAZY_DICT 388
|
||||
#define _LOAD_ATTR_METHOD_NO_DICT 389
|
||||
#define _LOAD_ATTR_METHOD_WITH_VALUES 390
|
||||
#define _LOAD_ATTR_MODULE 391
|
||||
#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 392
|
||||
#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 393
|
||||
#define _LOAD_ATTR_INSTANCE_VALUE 386
|
||||
#define _LOAD_ATTR_INSTANCE_VALUE_0 387
|
||||
#define _LOAD_ATTR_INSTANCE_VALUE_1 388
|
||||
#define _LOAD_ATTR_METHOD_LAZY_DICT 389
|
||||
#define _LOAD_ATTR_METHOD_NO_DICT 390
|
||||
#define _LOAD_ATTR_METHOD_WITH_VALUES 391
|
||||
#define _LOAD_ATTR_MODULE 392
|
||||
#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 393
|
||||
#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 394
|
||||
#define _LOAD_ATTR_PROPERTY LOAD_ATTR_PROPERTY
|
||||
#define _LOAD_ATTR_SLOT 394
|
||||
#define _LOAD_ATTR_SLOT_0 395
|
||||
#define _LOAD_ATTR_SLOT_1 396
|
||||
#define _LOAD_ATTR_WITH_HINT 397
|
||||
#define _LOAD_ATTR_SLOT 395
|
||||
#define _LOAD_ATTR_SLOT_0 396
|
||||
#define _LOAD_ATTR_SLOT_1 397
|
||||
#define _LOAD_ATTR_WITH_HINT 398
|
||||
#define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS
|
||||
#define _LOAD_CONST LOAD_CONST
|
||||
#define _LOAD_CONST_INLINE 398
|
||||
#define _LOAD_CONST_INLINE_BORROW 399
|
||||
#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 400
|
||||
#define _LOAD_CONST_INLINE_WITH_NULL 401
|
||||
#define _LOAD_CONST_INLINE 399
|
||||
#define _LOAD_CONST_INLINE_BORROW 400
|
||||
#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 401
|
||||
#define _LOAD_CONST_INLINE_WITH_NULL 402
|
||||
#define _LOAD_DEREF LOAD_DEREF
|
||||
#define _LOAD_FAST 402
|
||||
#define _LOAD_FAST_0 403
|
||||
#define _LOAD_FAST_1 404
|
||||
#define _LOAD_FAST_2 405
|
||||
#define _LOAD_FAST_3 406
|
||||
#define _LOAD_FAST_4 407
|
||||
#define _LOAD_FAST_5 408
|
||||
#define _LOAD_FAST_6 409
|
||||
#define _LOAD_FAST_7 410
|
||||
#define _LOAD_FAST 403
|
||||
#define _LOAD_FAST_0 404
|
||||
#define _LOAD_FAST_1 405
|
||||
#define _LOAD_FAST_2 406
|
||||
#define _LOAD_FAST_3 407
|
||||
#define _LOAD_FAST_4 408
|
||||
#define _LOAD_FAST_5 409
|
||||
#define _LOAD_FAST_6 410
|
||||
#define _LOAD_FAST_7 411
|
||||
#define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR
|
||||
#define _LOAD_FAST_CHECK LOAD_FAST_CHECK
|
||||
#define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST
|
||||
#define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF
|
||||
#define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS
|
||||
#define _LOAD_GLOBAL 411
|
||||
#define _LOAD_GLOBAL_BUILTINS 412
|
||||
#define _LOAD_GLOBAL_MODULE 413
|
||||
#define _LOAD_GLOBAL 412
|
||||
#define _LOAD_GLOBAL_BUILTINS 413
|
||||
#define _LOAD_GLOBAL_MODULE 414
|
||||
#define _LOAD_LOCALS LOAD_LOCALS
|
||||
#define _LOAD_NAME LOAD_NAME
|
||||
#define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR
|
||||
|
|
@ -216,49 +217,49 @@ extern "C" {
|
|||
#define _MATCH_SEQUENCE MATCH_SEQUENCE
|
||||
#define _NOP NOP
|
||||
#define _POP_EXCEPT POP_EXCEPT
|
||||
#define _POP_FRAME 414
|
||||
#define _POP_JUMP_IF_FALSE 415
|
||||
#define _POP_JUMP_IF_TRUE 416
|
||||
#define _POP_FRAME 415
|
||||
#define _POP_JUMP_IF_FALSE 416
|
||||
#define _POP_JUMP_IF_TRUE 417
|
||||
#define _POP_TOP POP_TOP
|
||||
#define _POP_TOP_LOAD_CONST_INLINE_BORROW 417
|
||||
#define _POP_TOP_LOAD_CONST_INLINE_BORROW 418
|
||||
#define _PUSH_EXC_INFO PUSH_EXC_INFO
|
||||
#define _PUSH_FRAME 418
|
||||
#define _PUSH_FRAME 419
|
||||
#define _PUSH_NULL PUSH_NULL
|
||||
#define _REPLACE_WITH_TRUE 419
|
||||
#define _REPLACE_WITH_TRUE 420
|
||||
#define _RESUME_CHECK RESUME_CHECK
|
||||
#define _SAVE_RETURN_OFFSET 420
|
||||
#define _SEND 421
|
||||
#define _SAVE_RETURN_OFFSET 421
|
||||
#define _SEND 422
|
||||
#define _SEND_GEN SEND_GEN
|
||||
#define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS
|
||||
#define _SET_ADD SET_ADD
|
||||
#define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE
|
||||
#define _SET_UPDATE SET_UPDATE
|
||||
#define _SIDE_EXIT 422
|
||||
#define _START_EXECUTOR 423
|
||||
#define _STORE_ATTR 424
|
||||
#define _STORE_ATTR_INSTANCE_VALUE 425
|
||||
#define _STORE_ATTR_SLOT 426
|
||||
#define _SIDE_EXIT 423
|
||||
#define _START_EXECUTOR 424
|
||||
#define _STORE_ATTR 425
|
||||
#define _STORE_ATTR_INSTANCE_VALUE 426
|
||||
#define _STORE_ATTR_SLOT 427
|
||||
#define _STORE_ATTR_WITH_HINT STORE_ATTR_WITH_HINT
|
||||
#define _STORE_DEREF STORE_DEREF
|
||||
#define _STORE_FAST 427
|
||||
#define _STORE_FAST_0 428
|
||||
#define _STORE_FAST_1 429
|
||||
#define _STORE_FAST_2 430
|
||||
#define _STORE_FAST_3 431
|
||||
#define _STORE_FAST_4 432
|
||||
#define _STORE_FAST_5 433
|
||||
#define _STORE_FAST_6 434
|
||||
#define _STORE_FAST_7 435
|
||||
#define _STORE_FAST 428
|
||||
#define _STORE_FAST_0 429
|
||||
#define _STORE_FAST_1 430
|
||||
#define _STORE_FAST_2 431
|
||||
#define _STORE_FAST_3 432
|
||||
#define _STORE_FAST_4 433
|
||||
#define _STORE_FAST_5 434
|
||||
#define _STORE_FAST_6 435
|
||||
#define _STORE_FAST_7 436
|
||||
#define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST
|
||||
#define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST
|
||||
#define _STORE_GLOBAL STORE_GLOBAL
|
||||
#define _STORE_NAME STORE_NAME
|
||||
#define _STORE_SLICE STORE_SLICE
|
||||
#define _STORE_SUBSCR 436
|
||||
#define _STORE_SUBSCR 437
|
||||
#define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT
|
||||
#define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT
|
||||
#define _SWAP SWAP
|
||||
#define _TO_BOOL 437
|
||||
#define _TO_BOOL 438
|
||||
#define _TO_BOOL_BOOL TO_BOOL_BOOL
|
||||
#define _TO_BOOL_INT TO_BOOL_INT
|
||||
#define _TO_BOOL_LIST TO_BOOL_LIST
|
||||
|
|
@ -268,12 +269,12 @@ extern "C" {
|
|||
#define _UNARY_NEGATIVE UNARY_NEGATIVE
|
||||
#define _UNARY_NOT UNARY_NOT
|
||||
#define _UNPACK_EX UNPACK_EX
|
||||
#define _UNPACK_SEQUENCE 438
|
||||
#define _UNPACK_SEQUENCE 439
|
||||
#define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST
|
||||
#define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE
|
||||
#define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE
|
||||
#define _WITH_EXCEPT_START WITH_EXCEPT_START
|
||||
#define MAX_UOP_ID 438
|
||||
#define MAX_UOP_ID 439
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
4
Include/internal/pycore_uop_metadata.h
generated
4
Include/internal/pycore_uop_metadata.h
generated
|
|
@ -228,6 +228,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
|
|||
[_GUARD_IS_NOT_NONE_POP] = HAS_EXIT_FLAG,
|
||||
[_JUMP_TO_TOP] = HAS_EVAL_BREAK_FLAG,
|
||||
[_SET_IP] = 0,
|
||||
[_CHECK_STACK_SPACE_OPERAND] = HAS_DEOPT_FLAG,
|
||||
[_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG,
|
||||
[_EXIT_TRACE] = HAS_EXIT_FLAG,
|
||||
[_CHECK_VALIDITY] = HAS_DEOPT_FLAG,
|
||||
|
|
@ -302,6 +303,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
|
|||
[_CHECK_PEP_523] = "_CHECK_PEP_523",
|
||||
[_CHECK_PERIODIC] = "_CHECK_PERIODIC",
|
||||
[_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE",
|
||||
[_CHECK_STACK_SPACE_OPERAND] = "_CHECK_STACK_SPACE_OPERAND",
|
||||
[_CHECK_VALIDITY] = "_CHECK_VALIDITY",
|
||||
[_CHECK_VALIDITY_AND_SET_IP] = "_CHECK_VALIDITY_AND_SET_IP",
|
||||
[_COLD_EXIT] = "_COLD_EXIT",
|
||||
|
|
@ -902,6 +904,8 @@ int _PyUop_num_popped(int opcode, int oparg)
|
|||
return 0;
|
||||
case _SET_IP:
|
||||
return 0;
|
||||
case _CHECK_STACK_SPACE_OPERAND:
|
||||
return 0;
|
||||
case _SAVE_RETURN_OFFSET:
|
||||
return 0;
|
||||
case _EXIT_TRACE:
|
||||
|
|
|
|||
|
|
@ -952,6 +952,269 @@ def testfunc(n):
|
|||
_, ex = self._run_with_optimizer(testfunc, 16)
|
||||
self.assertIsNone(ex)
|
||||
|
||||
def test_combine_stack_space_checks_sequential(self):
|
||||
def dummy12(x):
|
||||
return x - 1
|
||||
def dummy13(y):
|
||||
z = y + 2
|
||||
return y, z
|
||||
def testfunc(n):
|
||||
a = 0
|
||||
for _ in range(n):
|
||||
b = dummy12(7)
|
||||
c, d = dummy13(9)
|
||||
a += b + c + d
|
||||
return a
|
||||
|
||||
res, ex = self._run_with_optimizer(testfunc, 32)
|
||||
self.assertEqual(res, 832)
|
||||
self.assertIsNotNone(ex)
|
||||
|
||||
uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
|
||||
uop_names = [uop[0] for uop in uops_and_operands]
|
||||
self.assertEqual(uop_names.count("_PUSH_FRAME"), 2)
|
||||
self.assertEqual(uop_names.count("_POP_FRAME"), 2)
|
||||
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0)
|
||||
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
|
||||
# sequential calls: max(12, 13) == 13
|
||||
largest_stack = _testinternalcapi.get_co_framesize(dummy13.__code__)
|
||||
self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands)
|
||||
|
||||
def test_combine_stack_space_checks_nested(self):
|
||||
def dummy12(x):
|
||||
return x + 3
|
||||
def dummy15(y):
|
||||
z = dummy12(y)
|
||||
return y, z
|
||||
def testfunc(n):
|
||||
a = 0
|
||||
for _ in range(n):
|
||||
b, c = dummy15(2)
|
||||
a += b + c
|
||||
return a
|
||||
|
||||
res, ex = self._run_with_optimizer(testfunc, 32)
|
||||
self.assertEqual(res, 224)
|
||||
self.assertIsNotNone(ex)
|
||||
|
||||
uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
|
||||
uop_names = [uop[0] for uop in uops_and_operands]
|
||||
self.assertEqual(uop_names.count("_PUSH_FRAME"), 2)
|
||||
self.assertEqual(uop_names.count("_POP_FRAME"), 2)
|
||||
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0)
|
||||
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
|
||||
# nested calls: 15 + 12 == 27
|
||||
largest_stack = (
|
||||
_testinternalcapi.get_co_framesize(dummy15.__code__) +
|
||||
_testinternalcapi.get_co_framesize(dummy12.__code__)
|
||||
)
|
||||
self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands)
|
||||
|
||||
def test_combine_stack_space_checks_several_calls(self):
|
||||
def dummy12(x):
|
||||
return x + 3
|
||||
def dummy13(y):
|
||||
z = y + 2
|
||||
return y, z
|
||||
def dummy18(y):
|
||||
z = dummy12(y)
|
||||
x, w = dummy13(z)
|
||||
return z, x, w
|
||||
def testfunc(n):
|
||||
a = 0
|
||||
for _ in range(n):
|
||||
b = dummy12(5)
|
||||
c, d, e = dummy18(2)
|
||||
a += b + c + d + e
|
||||
return a
|
||||
|
||||
res, ex = self._run_with_optimizer(testfunc, 32)
|
||||
self.assertEqual(res, 800)
|
||||
self.assertIsNotNone(ex)
|
||||
|
||||
uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
|
||||
uop_names = [uop[0] for uop in uops_and_operands]
|
||||
self.assertEqual(uop_names.count("_PUSH_FRAME"), 4)
|
||||
self.assertEqual(uop_names.count("_POP_FRAME"), 4)
|
||||
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0)
|
||||
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
|
||||
# max(12, 18 + max(12, 13)) == 31
|
||||
largest_stack = (
|
||||
_testinternalcapi.get_co_framesize(dummy18.__code__) +
|
||||
_testinternalcapi.get_co_framesize(dummy13.__code__)
|
||||
)
|
||||
self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands)
|
||||
|
||||
def test_combine_stack_space_checks_several_calls_different_order(self):
|
||||
# same as `several_calls` but with top-level calls reversed
|
||||
def dummy12(x):
|
||||
return x + 3
|
||||
def dummy13(y):
|
||||
z = y + 2
|
||||
return y, z
|
||||
def dummy18(y):
|
||||
z = dummy12(y)
|
||||
x, w = dummy13(z)
|
||||
return z, x, w
|
||||
def testfunc(n):
|
||||
a = 0
|
||||
for _ in range(n):
|
||||
c, d, e = dummy18(2)
|
||||
b = dummy12(5)
|
||||
a += b + c + d + e
|
||||
return a
|
||||
|
||||
res, ex = self._run_with_optimizer(testfunc, 32)
|
||||
self.assertEqual(res, 800)
|
||||
self.assertIsNotNone(ex)
|
||||
|
||||
uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
|
||||
uop_names = [uop[0] for uop in uops_and_operands]
|
||||
self.assertEqual(uop_names.count("_PUSH_FRAME"), 4)
|
||||
self.assertEqual(uop_names.count("_POP_FRAME"), 4)
|
||||
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0)
|
||||
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
|
||||
# max(18 + max(12, 13), 12) == 31
|
||||
largest_stack = (
|
||||
_testinternalcapi.get_co_framesize(dummy18.__code__) +
|
||||
_testinternalcapi.get_co_framesize(dummy13.__code__)
|
||||
)
|
||||
self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands)
|
||||
|
||||
def test_combine_stack_space_complex(self):
|
||||
def dummy0(x):
|
||||
return x
|
||||
def dummy1(x):
|
||||
return dummy0(x)
|
||||
def dummy2(x):
|
||||
return dummy1(x)
|
||||
def dummy3(x):
|
||||
return dummy0(x)
|
||||
def dummy4(x):
|
||||
y = dummy0(x)
|
||||
return dummy3(y)
|
||||
def dummy5(x):
|
||||
return dummy2(x)
|
||||
def dummy6(x):
|
||||
y = dummy5(x)
|
||||
z = dummy0(y)
|
||||
return dummy4(z)
|
||||
def testfunc(n):
|
||||
a = 0;
|
||||
for _ in range(32):
|
||||
b = dummy5(1)
|
||||
c = dummy0(1)
|
||||
d = dummy6(1)
|
||||
a += b + c + d
|
||||
return a
|
||||
|
||||
res, ex = self._run_with_optimizer(testfunc, 32)
|
||||
self.assertEqual(res, 96)
|
||||
self.assertIsNotNone(ex)
|
||||
|
||||
uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
|
||||
uop_names = [uop[0] for uop in uops_and_operands]
|
||||
self.assertEqual(uop_names.count("_PUSH_FRAME"), 15)
|
||||
self.assertEqual(uop_names.count("_POP_FRAME"), 15)
|
||||
|
||||
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0)
|
||||
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
|
||||
largest_stack = (
|
||||
_testinternalcapi.get_co_framesize(dummy6.__code__) +
|
||||
_testinternalcapi.get_co_framesize(dummy5.__code__) +
|
||||
_testinternalcapi.get_co_framesize(dummy2.__code__) +
|
||||
_testinternalcapi.get_co_framesize(dummy1.__code__) +
|
||||
_testinternalcapi.get_co_framesize(dummy0.__code__)
|
||||
)
|
||||
self.assertIn(
|
||||
("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands
|
||||
)
|
||||
|
||||
def test_combine_stack_space_checks_large_framesize(self):
|
||||
# Create a function with a large framesize. This ensures _CHECK_STACK_SPACE is
|
||||
# actually doing its job. Note that the resulting trace hits
|
||||
# UOP_MAX_TRACE_LENGTH, but since all _CHECK_STACK_SPACEs happen early, this
|
||||
# test is still meaningful.
|
||||
repetitions = 10000
|
||||
ns = {}
|
||||
header = """
|
||||
def dummy_large(a0):
|
||||
"""
|
||||
body = "".join([f"""
|
||||
a{n+1} = a{n} + 1
|
||||
""" for n in range(repetitions)])
|
||||
return_ = f"""
|
||||
return a{repetitions-1}
|
||||
"""
|
||||
exec(textwrap.dedent(header + body + return_), ns, ns)
|
||||
dummy_large = ns['dummy_large']
|
||||
|
||||
# this is something like:
|
||||
#
|
||||
# def dummy_large(a0):
|
||||
# a1 = a0 + 1
|
||||
# a2 = a1 + 1
|
||||
# ....
|
||||
# a9999 = a9998 + 1
|
||||
# return a9999
|
||||
|
||||
def dummy15(z):
|
||||
y = dummy_large(z)
|
||||
return y + 3
|
||||
|
||||
def testfunc(n):
|
||||
b = 0
|
||||
for _ in range(n):
|
||||
b += dummy15(7)
|
||||
return b
|
||||
|
||||
res, ex = self._run_with_optimizer(testfunc, 32)
|
||||
self.assertEqual(res, 32 * (repetitions + 9))
|
||||
self.assertIsNotNone(ex)
|
||||
|
||||
uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
|
||||
uop_names = [uop[0] for uop in uops_and_operands]
|
||||
self.assertEqual(uop_names.count("_PUSH_FRAME"), 2)
|
||||
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
|
||||
|
||||
# this hits a different case during trace projection in refcount test runs only,
|
||||
# so we need to account for both possibilities
|
||||
self.assertIn(uop_names.count("_CHECK_STACK_SPACE"), [0, 1])
|
||||
if uop_names.count("_CHECK_STACK_SPACE") == 0:
|
||||
largest_stack = (
|
||||
_testinternalcapi.get_co_framesize(dummy15.__code__) +
|
||||
_testinternalcapi.get_co_framesize(dummy_large.__code__)
|
||||
)
|
||||
else:
|
||||
largest_stack = _testinternalcapi.get_co_framesize(dummy15.__code__)
|
||||
self.assertIn(
|
||||
("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands
|
||||
)
|
||||
|
||||
def test_combine_stack_space_checks_recursion(self):
|
||||
def dummy15(x):
|
||||
while x > 0:
|
||||
return dummy15(x - 1)
|
||||
return 42
|
||||
def testfunc(n):
|
||||
a = 0
|
||||
for _ in range(n):
|
||||
a += dummy15(n)
|
||||
return a
|
||||
|
||||
res, ex = self._run_with_optimizer(testfunc, 32)
|
||||
self.assertEqual(res, 42 * 32)
|
||||
self.assertIsNotNone(ex)
|
||||
|
||||
uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
|
||||
uop_names = [uop[0] for uop in uops_and_operands]
|
||||
self.assertEqual(uop_names.count("_PUSH_FRAME"), 2)
|
||||
self.assertEqual(uop_names.count("_POP_FRAME"), 0)
|
||||
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 1)
|
||||
self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
|
||||
largest_stack = _testinternalcapi.get_co_framesize(dummy15.__code__)
|
||||
self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands)
|
||||
|
||||
def test_many_nested(self):
|
||||
# overflow the trace_stack
|
||||
def dummy_a(x):
|
||||
|
|
@ -976,8 +1239,9 @@ def testfunc(n):
|
|||
a += dummy_h(n)
|
||||
return a
|
||||
|
||||
self._run_with_optimizer(testfunc, 32)
|
||||
|
||||
res, ex = self._run_with_optimizer(testfunc, 32)
|
||||
self.assertEqual(res, 32 * 32)
|
||||
self.assertIsNone(ex)
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -959,6 +959,17 @@ iframe_getlasti(PyObject *self, PyObject *frame)
|
|||
return PyLong_FromLong(PyUnstable_InterpreterFrame_GetLasti(f));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
get_co_framesize(PyObject *self, PyObject *arg)
|
||||
{
|
||||
if (!PyCode_Check(arg)) {
|
||||
PyErr_SetString(PyExc_TypeError, "argument must be a code object");
|
||||
return NULL;
|
||||
}
|
||||
PyCodeObject *code = (PyCodeObject *)arg;
|
||||
return PyLong_FromLong(code->co_framesize);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
new_counter_optimizer(PyObject *self, PyObject *arg)
|
||||
{
|
||||
|
|
@ -1715,6 +1726,7 @@ static PyMethodDef module_functions[] = {
|
|||
{"iframe_getcode", iframe_getcode, METH_O, NULL},
|
||||
{"iframe_getline", iframe_getline, METH_O, NULL},
|
||||
{"iframe_getlasti", iframe_getlasti, METH_O, NULL},
|
||||
{"get_co_framesize", get_co_framesize, METH_O, NULL},
|
||||
{"get_optimizer", get_optimizer, METH_NOARGS, NULL},
|
||||
{"set_optimizer", set_optimizer, METH_O, NULL},
|
||||
{"new_counter_optimizer", new_counter_optimizer, METH_NOARGS, NULL},
|
||||
|
|
|
|||
|
|
@ -4094,6 +4094,12 @@ dummy_func(
|
|||
frame->instr_ptr = (_Py_CODEUNIT *)instr_ptr;
|
||||
}
|
||||
|
||||
tier2 op(_CHECK_STACK_SPACE_OPERAND, (framesize/2 --)) {
|
||||
assert(framesize <= INT_MAX);
|
||||
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, framesize));
|
||||
DEOPT_IF(tstate->py_recursion_remaining <= 1);
|
||||
}
|
||||
|
||||
op(_SAVE_RETURN_OFFSET, (--)) {
|
||||
#if TIER_ONE
|
||||
frame->return_offset = (uint16_t)(next_instr - this_instr);
|
||||
|
|
|
|||
8
Python/executor_cases.c.h
generated
8
Python/executor_cases.c.h
generated
|
|
@ -3592,6 +3592,14 @@
|
|||
break;
|
||||
}
|
||||
|
||||
case _CHECK_STACK_SPACE_OPERAND: {
|
||||
uint32_t framesize = (uint32_t)CURRENT_OPERAND();
|
||||
assert(framesize <= INT_MAX);
|
||||
if (!_PyThreadState_HasStackSpace(tstate, framesize)) JUMP_TO_JUMP_TARGET();
|
||||
if (tstate->py_recursion_remaining <= 1) JUMP_TO_JUMP_TARGET();
|
||||
break;
|
||||
}
|
||||
|
||||
case _SAVE_RETURN_OFFSET: {
|
||||
oparg = CURRENT_OPARG();
|
||||
#if TIER_ONE
|
||||
|
|
|
|||
|
|
@ -529,14 +529,41 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
|
|||
}
|
||||
}
|
||||
}
|
||||
Py_FatalError("No terminating instruction");
|
||||
Py_UNREACHABLE();
|
||||
}
|
||||
|
||||
/* _PUSH_FRAME/_POP_FRAME's operand can be 0, a PyFunctionObject *, or a
|
||||
* PyCodeObject *. Retrieve the code object if possible.
|
||||
*/
|
||||
static PyCodeObject *
|
||||
get_co(_PyUOpInstruction *op)
|
||||
{
|
||||
assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME);
|
||||
PyCodeObject *co = NULL;
|
||||
uint64_t operand = op->operand;
|
||||
if (operand == 0) {
|
||||
return NULL;
|
||||
}
|
||||
if (operand & 1) {
|
||||
co = (PyCodeObject *)(operand & ~1);
|
||||
}
|
||||
else {
|
||||
PyFunctionObject *func = (PyFunctionObject *)operand;
|
||||
assert(PyFunction_Check(func));
|
||||
co = (PyCodeObject *)func->func_code;
|
||||
}
|
||||
assert(PyCode_Check(co));
|
||||
return co;
|
||||
}
|
||||
|
||||
static void
|
||||
peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size)
|
||||
{
|
||||
PyCodeObject *co = _PyFrame_GetCode(frame);
|
||||
int curr_space = 0;
|
||||
int max_space = 0;
|
||||
_PyUOpInstruction *first_valid_check_stack = NULL;
|
||||
_PyUOpInstruction *corresponding_check_stack = NULL;
|
||||
for (int pc = 0; pc < buffer_size; pc++) {
|
||||
int opcode = buffer[pc].opcode;
|
||||
switch(opcode) {
|
||||
|
|
@ -547,8 +574,7 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s
|
|||
buffer[pc].operand = (uintptr_t)val;
|
||||
break;
|
||||
}
|
||||
case _CHECK_PEP_523:
|
||||
{
|
||||
case _CHECK_PEP_523: {
|
||||
/* Setting the eval frame function invalidates
|
||||
* all executors, so no need to check dynamically */
|
||||
if (_PyInterpreterState_GET()->eval_frame == NULL) {
|
||||
|
|
@ -556,29 +582,72 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s
|
|||
}
|
||||
break;
|
||||
}
|
||||
case _PUSH_FRAME:
|
||||
case _POP_FRAME:
|
||||
{
|
||||
uint64_t operand = buffer[pc].operand;
|
||||
if (operand & 1) {
|
||||
co = (PyCodeObject *)(operand & ~1);
|
||||
assert(PyCode_Check(co));
|
||||
case _CHECK_STACK_SPACE: {
|
||||
assert(corresponding_check_stack == NULL);
|
||||
corresponding_check_stack = &buffer[pc];
|
||||
break;
|
||||
}
|
||||
case _PUSH_FRAME: {
|
||||
assert(corresponding_check_stack != NULL);
|
||||
co = get_co(&buffer[pc]);
|
||||
if (co == NULL) {
|
||||
// should be about to _EXIT_TRACE anyway
|
||||
goto finish;
|
||||
}
|
||||
else if (operand == 0) {
|
||||
co = NULL;
|
||||
int framesize = co->co_framesize;
|
||||
assert(framesize > 0);
|
||||
curr_space += framesize;
|
||||
if (curr_space < 0 || curr_space > INT32_MAX) {
|
||||
// won't fit in signed 32-bit int
|
||||
goto finish;
|
||||
}
|
||||
max_space = curr_space > max_space ? curr_space : max_space;
|
||||
if (first_valid_check_stack == NULL) {
|
||||
first_valid_check_stack = corresponding_check_stack;
|
||||
}
|
||||
else {
|
||||
PyFunctionObject *func = (PyFunctionObject *)operand;
|
||||
assert(PyFunction_Check(func));
|
||||
co = (PyCodeObject *)func->func_code;
|
||||
// delete all but the first valid _CHECK_STACK_SPACE
|
||||
corresponding_check_stack->opcode = _NOP;
|
||||
}
|
||||
corresponding_check_stack = NULL;
|
||||
break;
|
||||
}
|
||||
case _POP_FRAME: {
|
||||
assert(corresponding_check_stack == NULL);
|
||||
assert(co != NULL);
|
||||
int framesize = co->co_framesize;
|
||||
assert(framesize > 0);
|
||||
assert(framesize <= curr_space);
|
||||
curr_space -= framesize;
|
||||
co = get_co(&buffer[pc]);
|
||||
if (co == NULL) {
|
||||
// might be impossible, but bailing is still safe
|
||||
goto finish;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case _JUMP_TO_TOP:
|
||||
case _EXIT_TRACE:
|
||||
return;
|
||||
goto finish;
|
||||
#ifdef Py_DEBUG
|
||||
case _CHECK_STACK_SPACE_OPERAND: {
|
||||
/* We should never see _CHECK_STACK_SPACE_OPERANDs.
|
||||
* They are only created at the end of this pass. */
|
||||
Py_UNREACHABLE();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
Py_UNREACHABLE();
|
||||
finish:
|
||||
if (first_valid_check_stack != NULL) {
|
||||
assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE);
|
||||
assert(max_space > 0);
|
||||
assert(max_space <= INT_MAX);
|
||||
assert(max_space <= INT32_MAX);
|
||||
first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND;
|
||||
first_valid_check_stack->operand = max_space;
|
||||
}
|
||||
}
|
||||
|
||||
// 0 - failure, no error raised, just fall back to Tier 1
|
||||
|
|
|
|||
4
Python/optimizer_cases.c.h
generated
4
Python/optimizer_cases.c.h
generated
|
|
@ -1906,6 +1906,10 @@
|
|||
break;
|
||||
}
|
||||
|
||||
case _CHECK_STACK_SPACE_OPERAND: {
|
||||
break;
|
||||
}
|
||||
|
||||
case _SAVE_RETURN_OFFSET: {
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue