From d716faae086115303b2910d8f877bbf22d1448a5 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Tue, 4 Feb 2025 19:27:11 -0800 Subject: [PATCH 01/73] Experiment with borrowing load_fast --- Python/bytecodes.c | 9 +++++---- Python/executor_cases.c.h | 21 +++++++++++---------- Python/generated_cases.c.h | 9 +++++---- Tools/cases_generator/analyzer.py | 1 + 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 24aa7bbb87c193..f33054b4472957 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -262,12 +262,13 @@ dummy_func( ); ERROR_IF(1, error); } - value = PyStackRef_DUP(value_s); + // value = PyStackRef_DUP(value_s); + value = PyStackRef_DupDeferred(value_s); } replicate(8) pure inst(LOAD_FAST, (-- value)) { assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DUP(GETLOCAL(oparg)); + value = PyStackRef_DupDeferred(GETLOCAL(oparg)); } inst(LOAD_FAST_AND_CLEAR, (-- value)) { @@ -278,8 +279,8 @@ dummy_func( inst(LOAD_FAST_LOAD_FAST, ( -- value1, value2)) { uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; - value1 = PyStackRef_DUP(GETLOCAL(oparg1)); - value2 = PyStackRef_DUP(GETLOCAL(oparg2)); + value1 = PyStackRef_DupDeferred(GETLOCAL(oparg1)); + value2 = PyStackRef_DupDeferred(GETLOCAL(oparg2)); } family(LOAD_CONST, 0) = { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index e164f11620de41..5112aff070afef 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -87,7 +87,8 @@ stack_pointer = _PyFrame_GetStackPointer(frame); JUMP_TO_ERROR(); } - value = PyStackRef_DUP(value_s); + // value = PyStackRef_DUP(value_s); + value = PyStackRef_DupDeferred(value_s); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -99,7 +100,7 @@ oparg = 0; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DUP(GETLOCAL(oparg)); + value = PyStackRef_DupDeferred(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -111,7 +112,7 @@ oparg = 1; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DUP(GETLOCAL(oparg)); + value = PyStackRef_DupDeferred(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -123,7 +124,7 @@ oparg = 2; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DUP(GETLOCAL(oparg)); + value = PyStackRef_DupDeferred(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -135,7 +136,7 @@ oparg = 3; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DUP(GETLOCAL(oparg)); + value = PyStackRef_DupDeferred(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -147,7 +148,7 @@ oparg = 4; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DUP(GETLOCAL(oparg)); + value = PyStackRef_DupDeferred(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -159,7 +160,7 @@ oparg = 5; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DUP(GETLOCAL(oparg)); + value = PyStackRef_DupDeferred(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -171,7 +172,7 @@ oparg = 6; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DUP(GETLOCAL(oparg)); + value = PyStackRef_DupDeferred(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -183,7 +184,7 @@ oparg = 7; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DUP(GETLOCAL(oparg)); + value = PyStackRef_DupDeferred(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -194,7 +195,7 @@ _PyStackRef value; oparg = CURRENT_OPARG(); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DUP(GETLOCAL(oparg)); + value = PyStackRef_DupDeferred(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 8c3c0e3910b8d1..dca46175bf367c 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -8776,7 +8776,7 @@ INSTRUCTION_STATS(LOAD_FAST); _PyStackRef value; assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DUP(GETLOCAL(oparg)); + value = PyStackRef_DupDeferred(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -8819,7 +8819,8 @@ stack_pointer = _PyFrame_GetStackPointer(frame); JUMP_TO_LABEL(error); } - value = PyStackRef_DUP(value_s); + // value = PyStackRef_DUP(value_s); + value = PyStackRef_DupDeferred(value_s); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -8838,8 +8839,8 @@ _PyStackRef value2; uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; - value1 = PyStackRef_DUP(GETLOCAL(oparg1)); - value2 = PyStackRef_DUP(GETLOCAL(oparg2)); + value1 = PyStackRef_DupDeferred(GETLOCAL(oparg1)); + value2 = PyStackRef_DupDeferred(GETLOCAL(oparg2)); stack_pointer[0] = value1; stack_pointer[1] = value2; stack_pointer += 2; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 162a0fdb2cc459..09833c0dd2691c 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -590,6 +590,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_CLEAR", "PyStackRef_CLOSE_SPECIALIZED", "PyStackRef_DUP", + "PyStackRef_DupDeferred", "PyStackRef_False", "PyStackRef_FromPyObjectImmortal", "PyStackRef_FromPyObjectNew", From 3736923a22c66fa14de630c48056121eedf9c9ee Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 5 Feb 2025 12:36:05 -0800 Subject: [PATCH 02/73] Checkpoint poc --- Include/internal/pycore_opcode_metadata.h | 13 +- Include/internal/pycore_stackref.h | 31 +++- Include/internal/pycore_uop_ids.h | 1 + Include/internal/pycore_uop_metadata.h | 4 + Include/opcode_ids.h | 193 +++++++++++----------- Lib/_opcode_metadata.py | 193 +++++++++++----------- Programs/test_frozenmain.h | 14 +- Python/bytecodes.c | 21 ++- Python/executor_cases.c.h | 48 +++--- Python/generated_cases.c.h | 33 +++- Python/opcode_targets.h | 5 +- Python/optimizer_cases.c.h | 9 + Python/specialize.c | 56 +++++++ Tools/cases_generator/analyzer.py | 1 + 14 files changed, 379 insertions(+), 243 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index b6d85490eef1f3..95efc986a8a12f 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -335,6 +335,8 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 0; case LOAD_FAST_AND_CLEAR: return 0; + case LOAD_FAST_BORROW: + return 0; case LOAD_FAST_CHECK: return 0; case LOAD_FAST_LOAD_FAST: @@ -810,6 +812,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 1; case LOAD_FAST_AND_CLEAR: return 1; + case LOAD_FAST_BORROW: + return 1; case LOAD_FAST_CHECK: return 1; case LOAD_FAST_LOAD_FAST: @@ -1605,6 +1609,10 @@ int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect) { *effect = 1; return 0; } + case LOAD_FAST_BORROW: { + *effect = 1; + return 0; + } case LOAD_FAST_CHECK: { *effect = 1; return 0; @@ -2157,6 +2165,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, + [LOAD_FAST_BORROW] = { true, INSTR_FMT_IX, 0 }, [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FROM_DICT_OR_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, @@ -2365,6 +2374,7 @@ _PyOpcode_macro_expansion[256] = { [LOAD_DEREF] = { .nuops = 1, .uops = { { _LOAD_DEREF, OPARG_SIMPLE, 0 } } }, [LOAD_FAST] = { .nuops = 1, .uops = { { _LOAD_FAST, OPARG_SIMPLE, 0 } } }, [LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { _LOAD_FAST_AND_CLEAR, OPARG_SIMPLE, 0 } } }, + [LOAD_FAST_BORROW] = { .nuops = 1, .uops = { { _LOAD_FAST_BORROW, OPARG_SIMPLE, 0 } } }, [LOAD_FAST_CHECK] = { .nuops = 1, .uops = { { _LOAD_FAST_CHECK, OPARG_SIMPLE, 0 } } }, [LOAD_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { _LOAD_FAST, OPARG_TOP, 0 }, { _LOAD_FAST, OPARG_BOTTOM, 0 } } }, [LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_DEREF, OPARG_SIMPLE, 0 } } }, @@ -2591,6 +2601,7 @@ const char *_PyOpcode_OpName[266] = { [LOAD_DEREF] = "LOAD_DEREF", [LOAD_FAST] = "LOAD_FAST", [LOAD_FAST_AND_CLEAR] = "LOAD_FAST_AND_CLEAR", + [LOAD_FAST_BORROW] = "LOAD_FAST_BORROW", [LOAD_FAST_CHECK] = "LOAD_FAST_CHECK", [LOAD_FAST_LOAD_FAST] = "LOAD_FAST_LOAD_FAST", [LOAD_FROM_DICT_OR_DEREF] = "LOAD_FROM_DICT_OR_DEREF", @@ -2849,6 +2860,7 @@ const uint8_t _PyOpcode_Deopt[256] = { [LOAD_DEREF] = LOAD_DEREF, [LOAD_FAST] = LOAD_FAST, [LOAD_FAST_AND_CLEAR] = LOAD_FAST_AND_CLEAR, + [LOAD_FAST_BORROW] = LOAD_FAST_BORROW, [LOAD_FAST_CHECK] = LOAD_FAST_CHECK, [LOAD_FAST_LOAD_FAST] = LOAD_FAST_LOAD_FAST, [LOAD_FROM_DICT_OR_DEREF] = LOAD_FROM_DICT_OR_DEREF, @@ -2931,7 +2943,6 @@ const uint8_t _PyOpcode_Deopt[256] = { #endif // NEED_OPCODE_METADATA #define EXTRA_CASES \ - case 117: \ case 118: \ case 119: \ case 120: \ diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 92b10d21100a25..61b58b9d251d88 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -213,6 +213,25 @@ _PyStackRef_FromPyObjectSteal(PyObject *obj) } # define PyStackRef_FromPyObjectSteal(obj) _PyStackRef_FromPyObjectSteal(_PyObject_CAST(obj)) +static inline _PyStackRef +_PyStackRef_StealIfUnborrowed(_PyStackRef stackref) +{ + if (PyStackRef_IsNull(stackref)) { + return stackref; + } + if (PyStackRef_IsDeferred(stackref)) { + PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref); + if (_Py_IsImmortal(obj) || _PyObject_HasDeferredRefcount(obj)) { + return stackref; + } + else { + fprintf(stderr, "===> Converting to strong reference\n"); + return (_PyStackRef){ .bits = (uintptr_t)(Py_NewRef(obj)) | Py_TAG_PTR }; + } + } + return stackref; +} + static inline _PyStackRef PyStackRef_FromPyObjectNew(PyObject *obj) { @@ -253,15 +272,21 @@ PyStackRef_DUP(_PyStackRef stackref) { assert(!PyStackRef_IsNull(stackref)); if (PyStackRef_IsDeferred(stackref)) { - assert(_Py_IsImmortal(PyStackRef_AsPyObjectBorrow(stackref)) || - _PyObject_HasDeferredRefcount(PyStackRef_AsPyObjectBorrow(stackref)) - ); + /* assert(_Py_IsImmortal(PyStackRef_AsPyObjectBorrow(stackref)) || */ + /* _PyObject_HasDeferredRefcount(PyStackRef_AsPyObjectBorrow(stackref)) */ + /* ); */ return stackref; } Py_INCREF(PyStackRef_AsPyObjectBorrow(stackref)); return stackref; } +static inline _PyStackRef +PyStackRef_DupDeferred(_PyStackRef stackref) +{ + return (_PyStackRef){ .bits = stackref.bits | Py_TAG_DEFERRED }; +} + // Convert a possibly deferred reference to a strong reference. static inline _PyStackRef PyStackRef_AsStrongReference(_PyStackRef stackref) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 5143b10def5396..34c2181253a62f 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -201,6 +201,7 @@ extern "C" { #define _LOAD_FAST_6 426 #define _LOAD_FAST_7 427 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR +#define _LOAD_FAST_BORROW LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 0013540c496938..b97b70b4bc2468 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -33,6 +33,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_FAST_6] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_7] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_BORROW] = 0, [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_CONST_MORTAL] = HAS_ARG_FLAG | HAS_CONST_FLAG, @@ -463,6 +464,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_FAST_6] = "_LOAD_FAST_6", [_LOAD_FAST_7] = "_LOAD_FAST_7", [_LOAD_FAST_AND_CLEAR] = "_LOAD_FAST_AND_CLEAR", + [_LOAD_FAST_BORROW] = "_LOAD_FAST_BORROW", [_LOAD_FAST_CHECK] = "_LOAD_FAST_CHECK", [_LOAD_FAST_LOAD_FAST] = "_LOAD_FAST_LOAD_FAST", [_LOAD_FROM_DICT_OR_DEREF] = "_LOAD_FROM_DICT_OR_DEREF", @@ -585,6 +587,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _LOAD_FAST: return 0; + case _LOAD_FAST_BORROW: + return 0; case _LOAD_FAST_AND_CLEAR: return 0; case _LOAD_FAST_LOAD_FAST: diff --git a/Include/opcode_ids.h b/Include/opcode_ids.h index a634d5e5a229c8..227b6d52bdcbba 100644 --- a/Include/opcode_ids.h +++ b/Include/opcode_ids.h @@ -32,101 +32,102 @@ extern "C" { #define GET_YIELD_FROM_ITER 19 #define INTERPRETER_EXIT 20 #define LOAD_BUILD_CLASS 21 -#define LOAD_LOCALS 22 -#define MAKE_FUNCTION 23 -#define MATCH_KEYS 24 -#define MATCH_MAPPING 25 -#define MATCH_SEQUENCE 26 -#define NOP 27 -#define NOT_TAKEN 28 -#define POP_EXCEPT 29 -#define POP_ITER 30 -#define POP_TOP 31 -#define PUSH_EXC_INFO 32 -#define PUSH_NULL 33 -#define RETURN_GENERATOR 34 -#define RETURN_VALUE 35 -#define SETUP_ANNOTATIONS 36 -#define STORE_SLICE 37 -#define STORE_SUBSCR 38 -#define TO_BOOL 39 -#define UNARY_INVERT 40 -#define UNARY_NEGATIVE 41 -#define UNARY_NOT 42 -#define WITH_EXCEPT_START 43 -#define BINARY_OP 44 -#define BUILD_LIST 45 -#define BUILD_MAP 46 -#define BUILD_SET 47 -#define BUILD_SLICE 48 -#define BUILD_STRING 49 -#define BUILD_TUPLE 50 -#define CALL 51 -#define CALL_INTRINSIC_1 52 -#define CALL_INTRINSIC_2 53 -#define CALL_KW 54 -#define COMPARE_OP 55 -#define CONTAINS_OP 56 -#define CONVERT_VALUE 57 -#define COPY 58 -#define COPY_FREE_VARS 59 -#define DELETE_ATTR 60 -#define DELETE_DEREF 61 -#define DELETE_FAST 62 -#define DELETE_GLOBAL 63 -#define DELETE_NAME 64 -#define DICT_MERGE 65 -#define DICT_UPDATE 66 -#define EXTENDED_ARG 67 -#define FOR_ITER 68 -#define GET_AWAITABLE 69 -#define IMPORT_FROM 70 -#define IMPORT_NAME 71 -#define IS_OP 72 -#define JUMP_BACKWARD 73 -#define JUMP_BACKWARD_NO_INTERRUPT 74 -#define JUMP_FORWARD 75 -#define LIST_APPEND 76 -#define LIST_EXTEND 77 -#define LOAD_ATTR 78 -#define LOAD_COMMON_CONSTANT 79 -#define LOAD_CONST 80 -#define LOAD_DEREF 81 -#define LOAD_FAST 82 -#define LOAD_FAST_AND_CLEAR 83 -#define LOAD_FAST_CHECK 84 -#define LOAD_FAST_LOAD_FAST 85 -#define LOAD_FROM_DICT_OR_DEREF 86 -#define LOAD_FROM_DICT_OR_GLOBALS 87 -#define LOAD_GLOBAL 88 -#define LOAD_NAME 89 -#define LOAD_SMALL_INT 90 -#define LOAD_SPECIAL 91 -#define LOAD_SUPER_ATTR 92 -#define MAKE_CELL 93 -#define MAP_ADD 94 -#define MATCH_CLASS 95 -#define POP_JUMP_IF_FALSE 96 -#define POP_JUMP_IF_NONE 97 -#define POP_JUMP_IF_NOT_NONE 98 -#define POP_JUMP_IF_TRUE 99 -#define RAISE_VARARGS 100 -#define RERAISE 101 -#define SEND 102 -#define SET_ADD 103 -#define SET_FUNCTION_ATTRIBUTE 104 -#define SET_UPDATE 105 -#define STORE_ATTR 106 -#define STORE_DEREF 107 -#define STORE_FAST 108 -#define STORE_FAST_LOAD_FAST 109 -#define STORE_FAST_STORE_FAST 110 -#define STORE_GLOBAL 111 -#define STORE_NAME 112 -#define SWAP 113 -#define UNPACK_EX 114 -#define UNPACK_SEQUENCE 115 -#define YIELD_VALUE 116 +#define LOAD_FAST_BORROW 22 +#define LOAD_LOCALS 23 +#define MAKE_FUNCTION 24 +#define MATCH_KEYS 25 +#define MATCH_MAPPING 26 +#define MATCH_SEQUENCE 27 +#define NOP 28 +#define NOT_TAKEN 29 +#define POP_EXCEPT 30 +#define POP_ITER 31 +#define POP_TOP 32 +#define PUSH_EXC_INFO 33 +#define PUSH_NULL 34 +#define RETURN_GENERATOR 35 +#define RETURN_VALUE 36 +#define SETUP_ANNOTATIONS 37 +#define STORE_SLICE 38 +#define STORE_SUBSCR 39 +#define TO_BOOL 40 +#define UNARY_INVERT 41 +#define UNARY_NEGATIVE 42 +#define UNARY_NOT 43 +#define WITH_EXCEPT_START 44 +#define BINARY_OP 45 +#define BUILD_LIST 46 +#define BUILD_MAP 47 +#define BUILD_SET 48 +#define BUILD_SLICE 49 +#define BUILD_STRING 50 +#define BUILD_TUPLE 51 +#define CALL 52 +#define CALL_INTRINSIC_1 53 +#define CALL_INTRINSIC_2 54 +#define CALL_KW 55 +#define COMPARE_OP 56 +#define CONTAINS_OP 57 +#define CONVERT_VALUE 58 +#define COPY 59 +#define COPY_FREE_VARS 60 +#define DELETE_ATTR 61 +#define DELETE_DEREF 62 +#define DELETE_FAST 63 +#define DELETE_GLOBAL 64 +#define DELETE_NAME 65 +#define DICT_MERGE 66 +#define DICT_UPDATE 67 +#define EXTENDED_ARG 68 +#define FOR_ITER 69 +#define GET_AWAITABLE 70 +#define IMPORT_FROM 71 +#define IMPORT_NAME 72 +#define IS_OP 73 +#define JUMP_BACKWARD 74 +#define JUMP_BACKWARD_NO_INTERRUPT 75 +#define JUMP_FORWARD 76 +#define LIST_APPEND 77 +#define LIST_EXTEND 78 +#define LOAD_ATTR 79 +#define LOAD_COMMON_CONSTANT 80 +#define LOAD_CONST 81 +#define LOAD_DEREF 82 +#define LOAD_FAST 83 +#define LOAD_FAST_AND_CLEAR 84 +#define LOAD_FAST_CHECK 85 +#define LOAD_FAST_LOAD_FAST 86 +#define LOAD_FROM_DICT_OR_DEREF 87 +#define LOAD_FROM_DICT_OR_GLOBALS 88 +#define LOAD_GLOBAL 89 +#define LOAD_NAME 90 +#define LOAD_SMALL_INT 91 +#define LOAD_SPECIAL 92 +#define LOAD_SUPER_ATTR 93 +#define MAKE_CELL 94 +#define MAP_ADD 95 +#define MATCH_CLASS 96 +#define POP_JUMP_IF_FALSE 97 +#define POP_JUMP_IF_NONE 98 +#define POP_JUMP_IF_NOT_NONE 99 +#define POP_JUMP_IF_TRUE 100 +#define RAISE_VARARGS 101 +#define RERAISE 102 +#define SEND 103 +#define SET_ADD 104 +#define SET_FUNCTION_ATTRIBUTE 105 +#define SET_UPDATE 106 +#define STORE_ATTR 107 +#define STORE_DEREF 108 +#define STORE_FAST 109 +#define STORE_FAST_LOAD_FAST 110 +#define STORE_FAST_STORE_FAST 111 +#define STORE_GLOBAL 112 +#define STORE_NAME 113 +#define SWAP 114 +#define UNPACK_EX 115 +#define UNPACK_SEQUENCE 116 +#define YIELD_VALUE 117 #define RESUME 149 #define BINARY_OP_ADD_FLOAT 150 #define BINARY_OP_ADD_INT 151 @@ -243,7 +244,7 @@ extern "C" { #define SETUP_WITH 264 #define STORE_FAST_MAYBE_NULL 265 -#define HAVE_ARGUMENT 43 +#define HAVE_ARGUMENT 44 #define MIN_SPECIALIZED_OPCODE 150 #define MIN_INSTRUMENTED_OPCODE 234 diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py index 3dc69635cba39e..c7cf719251731b 100644 --- a/Lib/_opcode_metadata.py +++ b/Lib/_opcode_metadata.py @@ -233,101 +233,102 @@ 'GET_YIELD_FROM_ITER': 19, 'INTERPRETER_EXIT': 20, 'LOAD_BUILD_CLASS': 21, - 'LOAD_LOCALS': 22, - 'MAKE_FUNCTION': 23, - 'MATCH_KEYS': 24, - 'MATCH_MAPPING': 25, - 'MATCH_SEQUENCE': 26, - 'NOP': 27, - 'NOT_TAKEN': 28, - 'POP_EXCEPT': 29, - 'POP_ITER': 30, - 'POP_TOP': 31, - 'PUSH_EXC_INFO': 32, - 'PUSH_NULL': 33, - 'RETURN_GENERATOR': 34, - 'RETURN_VALUE': 35, - 'SETUP_ANNOTATIONS': 36, - 'STORE_SLICE': 37, - 'STORE_SUBSCR': 38, - 'TO_BOOL': 39, - 'UNARY_INVERT': 40, - 'UNARY_NEGATIVE': 41, - 'UNARY_NOT': 42, - 'WITH_EXCEPT_START': 43, - 'BINARY_OP': 44, - 'BUILD_LIST': 45, - 'BUILD_MAP': 46, - 'BUILD_SET': 47, - 'BUILD_SLICE': 48, - 'BUILD_STRING': 49, - 'BUILD_TUPLE': 50, - 'CALL': 51, - 'CALL_INTRINSIC_1': 52, - 'CALL_INTRINSIC_2': 53, - 'CALL_KW': 54, - 'COMPARE_OP': 55, - 'CONTAINS_OP': 56, - 'CONVERT_VALUE': 57, - 'COPY': 58, - 'COPY_FREE_VARS': 59, - 'DELETE_ATTR': 60, - 'DELETE_DEREF': 61, - 'DELETE_FAST': 62, - 'DELETE_GLOBAL': 63, - 'DELETE_NAME': 64, - 'DICT_MERGE': 65, - 'DICT_UPDATE': 66, - 'EXTENDED_ARG': 67, - 'FOR_ITER': 68, - 'GET_AWAITABLE': 69, - 'IMPORT_FROM': 70, - 'IMPORT_NAME': 71, - 'IS_OP': 72, - 'JUMP_BACKWARD': 73, - 'JUMP_BACKWARD_NO_INTERRUPT': 74, - 'JUMP_FORWARD': 75, - 'LIST_APPEND': 76, - 'LIST_EXTEND': 77, - 'LOAD_ATTR': 78, - 'LOAD_COMMON_CONSTANT': 79, - 'LOAD_CONST': 80, - 'LOAD_DEREF': 81, - 'LOAD_FAST': 82, - 'LOAD_FAST_AND_CLEAR': 83, - 'LOAD_FAST_CHECK': 84, - 'LOAD_FAST_LOAD_FAST': 85, - 'LOAD_FROM_DICT_OR_DEREF': 86, - 'LOAD_FROM_DICT_OR_GLOBALS': 87, - 'LOAD_GLOBAL': 88, - 'LOAD_NAME': 89, - 'LOAD_SMALL_INT': 90, - 'LOAD_SPECIAL': 91, - 'LOAD_SUPER_ATTR': 92, - 'MAKE_CELL': 93, - 'MAP_ADD': 94, - 'MATCH_CLASS': 95, - 'POP_JUMP_IF_FALSE': 96, - 'POP_JUMP_IF_NONE': 97, - 'POP_JUMP_IF_NOT_NONE': 98, - 'POP_JUMP_IF_TRUE': 99, - 'RAISE_VARARGS': 100, - 'RERAISE': 101, - 'SEND': 102, - 'SET_ADD': 103, - 'SET_FUNCTION_ATTRIBUTE': 104, - 'SET_UPDATE': 105, - 'STORE_ATTR': 106, - 'STORE_DEREF': 107, - 'STORE_FAST': 108, - 'STORE_FAST_LOAD_FAST': 109, - 'STORE_FAST_STORE_FAST': 110, - 'STORE_GLOBAL': 111, - 'STORE_NAME': 112, - 'SWAP': 113, - 'UNPACK_EX': 114, - 'UNPACK_SEQUENCE': 115, - 'YIELD_VALUE': 116, + 'LOAD_FAST_BORROW': 22, + 'LOAD_LOCALS': 23, + 'MAKE_FUNCTION': 24, + 'MATCH_KEYS': 25, + 'MATCH_MAPPING': 26, + 'MATCH_SEQUENCE': 27, + 'NOP': 28, + 'NOT_TAKEN': 29, + 'POP_EXCEPT': 30, + 'POP_ITER': 31, + 'POP_TOP': 32, + 'PUSH_EXC_INFO': 33, + 'PUSH_NULL': 34, + 'RETURN_GENERATOR': 35, + 'RETURN_VALUE': 36, + 'SETUP_ANNOTATIONS': 37, + 'STORE_SLICE': 38, + 'STORE_SUBSCR': 39, + 'TO_BOOL': 40, + 'UNARY_INVERT': 41, + 'UNARY_NEGATIVE': 42, + 'UNARY_NOT': 43, + 'WITH_EXCEPT_START': 44, + 'BINARY_OP': 45, + 'BUILD_LIST': 46, + 'BUILD_MAP': 47, + 'BUILD_SET': 48, + 'BUILD_SLICE': 49, + 'BUILD_STRING': 50, + 'BUILD_TUPLE': 51, + 'CALL': 52, + 'CALL_INTRINSIC_1': 53, + 'CALL_INTRINSIC_2': 54, + 'CALL_KW': 55, + 'COMPARE_OP': 56, + 'CONTAINS_OP': 57, + 'CONVERT_VALUE': 58, + 'COPY': 59, + 'COPY_FREE_VARS': 60, + 'DELETE_ATTR': 61, + 'DELETE_DEREF': 62, + 'DELETE_FAST': 63, + 'DELETE_GLOBAL': 64, + 'DELETE_NAME': 65, + 'DICT_MERGE': 66, + 'DICT_UPDATE': 67, + 'EXTENDED_ARG': 68, + 'FOR_ITER': 69, + 'GET_AWAITABLE': 70, + 'IMPORT_FROM': 71, + 'IMPORT_NAME': 72, + 'IS_OP': 73, + 'JUMP_BACKWARD': 74, + 'JUMP_BACKWARD_NO_INTERRUPT': 75, + 'JUMP_FORWARD': 76, + 'LIST_APPEND': 77, + 'LIST_EXTEND': 78, + 'LOAD_ATTR': 79, + 'LOAD_COMMON_CONSTANT': 80, + 'LOAD_CONST': 81, + 'LOAD_DEREF': 82, + 'LOAD_FAST': 83, + 'LOAD_FAST_AND_CLEAR': 84, + 'LOAD_FAST_CHECK': 85, + 'LOAD_FAST_LOAD_FAST': 86, + 'LOAD_FROM_DICT_OR_DEREF': 87, + 'LOAD_FROM_DICT_OR_GLOBALS': 88, + 'LOAD_GLOBAL': 89, + 'LOAD_NAME': 90, + 'LOAD_SMALL_INT': 91, + 'LOAD_SPECIAL': 92, + 'LOAD_SUPER_ATTR': 93, + 'MAKE_CELL': 94, + 'MAP_ADD': 95, + 'MATCH_CLASS': 96, + 'POP_JUMP_IF_FALSE': 97, + 'POP_JUMP_IF_NONE': 98, + 'POP_JUMP_IF_NOT_NONE': 99, + 'POP_JUMP_IF_TRUE': 100, + 'RAISE_VARARGS': 101, + 'RERAISE': 102, + 'SEND': 103, + 'SET_ADD': 104, + 'SET_FUNCTION_ATTRIBUTE': 105, + 'SET_UPDATE': 106, + 'STORE_ATTR': 107, + 'STORE_DEREF': 108, + 'STORE_FAST': 109, + 'STORE_FAST_LOAD_FAST': 110, + 'STORE_FAST_STORE_FAST': 111, + 'STORE_GLOBAL': 112, + 'STORE_NAME': 113, + 'SWAP': 114, + 'UNPACK_EX': 115, + 'UNPACK_SEQUENCE': 116, + 'YIELD_VALUE': 117, 'INSTRUMENTED_END_FOR': 234, 'INSTRUMENTED_POP_ITER': 235, 'INSTRUMENTED_END_SEND': 236, @@ -360,5 +361,5 @@ 'STORE_FAST_MAYBE_NULL': 265, } -HAVE_ARGUMENT = 43 +HAVE_ARGUMENT = 44 MIN_INSTRUMENTED_OPCODE = 234 diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index 0fe8d3d3f7d8c6..867a17220260b1 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -13,26 +13,26 @@ unsigned char M_test_frozenmain[] = { 80,5,89,6,12,0,80,6,89,5,89,6,44,26,0,0, 0,0,0,0,0,0,0,0,12,0,49,4,51,1,0,0, 0,0,0,0,31,0,73,26,0,0,9,0,30,0,80,0, - 35,0,41,7,78,122,18,70,114,111,122,101,110,32,72,101, - 108,108,111,32,87,111,114,108,100,122,8,115,121,115,46,97, + 35,0,41,7,78,218,18,70,114,111,122,101,110,32,72,101, + 108,108,111,32,87,111,114,108,100,218,8,115,121,115,46,97, 114,103,118,218,6,99,111,110,102,105,103,41,5,218,12,112, 114,111,103,114,97,109,95,110,97,109,101,218,10,101,120,101, 99,117,116,97,98,108,101,218,15,117,115,101,95,101,110,118, 105,114,111,110,109,101,110,116,218,17,99,111,110,102,105,103, 117,114,101,95,99,95,115,116,100,105,111,218,14,98,117,102, - 102,101,114,101,100,95,115,116,100,105,111,122,7,99,111,110, - 102,105,103,32,122,2,58,32,41,7,218,3,115,121,115,218, + 102,101,114,101,100,95,115,116,100,105,111,218,7,99,111,110, + 102,105,103,32,218,2,58,32,41,7,218,3,115,121,115,218, 17,95,116,101,115,116,105,110,116,101,114,110,97,108,99,97, 112,105,218,5,112,114,105,110,116,218,4,97,114,103,118,218, - 11,103,101,116,95,99,111,110,102,105,103,115,114,2,0,0, + 11,103,101,116,95,99,111,110,102,105,103,115,114,4,0,0, 0,218,3,107,101,121,169,0,243,0,0,0,0,218,18,116, 101,115,116,95,102,114,111,122,101,110,109,97,105,110,46,112, - 121,218,8,60,109,111,100,117,108,101,62,114,17,0,0,0, + 121,218,8,60,109,111,100,117,108,101,62,114,21,0,0,0, 1,0,0,0,115,94,0,0,0,240,3,1,1,1,243,8, 0,1,11,219,0,24,225,0,5,208,6,26,212,0,27,217, 0,5,128,106,144,35,151,40,145,40,212,0,27,216,9,26, 215,9,38,210,9,38,211,9,40,168,24,213,9,50,128,6, 243,2,6,12,2,128,67,241,14,0,5,10,136,71,144,67, 144,53,152,2,152,54,160,35,157,59,152,45,208,10,40,214, - 4,41,243,15,6,12,2,114,15,0,0,0, + 4,41,243,15,6,12,2,114,19,0,0,0, }; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f33054b4472957..c422c32c728d31 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -262,13 +262,16 @@ dummy_func( ); ERROR_IF(1, error); } - // value = PyStackRef_DUP(value_s); - value = PyStackRef_DupDeferred(value_s); + value = PyStackRef_DUP(value_s); } replicate(8) pure inst(LOAD_FAST, (-- value)) { assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DupDeferred(GETLOCAL(oparg)); + value = PyStackRef_DUP(GETLOCAL(oparg)); + } + + inst (LOAD_FAST_BORROW, (-- value)) { + value = PyStackRef_DupDeferred(value); } inst(LOAD_FAST_AND_CLEAR, (-- value)) { @@ -279,8 +282,8 @@ dummy_func( inst(LOAD_FAST_LOAD_FAST, ( -- value1, value2)) { uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; - value1 = PyStackRef_DupDeferred(GETLOCAL(oparg1)); - value2 = PyStackRef_DupDeferred(GETLOCAL(oparg2)); + value1 = PyStackRef_DUP(GETLOCAL(oparg1)); + value2 = PyStackRef_DUP(GETLOCAL(oparg2)); } family(LOAD_CONST, 0) = { @@ -329,7 +332,7 @@ dummy_func( replicate(8) inst(STORE_FAST, (value --)) { _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = value; + GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); DEAD(value); PyStackRef_XCLOSE(tmp); } @@ -342,7 +345,7 @@ dummy_func( uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; _PyStackRef tmp = GETLOCAL(oparg1); - GETLOCAL(oparg1) = value1; + GETLOCAL(oparg1) = _PyStackRef_StealIfUnborrowed(value1); DEAD(value1); value2 = PyStackRef_DUP(GETLOCAL(oparg2)); PyStackRef_XCLOSE(tmp); @@ -352,11 +355,11 @@ dummy_func( uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; _PyStackRef tmp = GETLOCAL(oparg1); - GETLOCAL(oparg1) = value1; + GETLOCAL(oparg1) = _PyStackRef_StealIfUnborrowed(value1); DEAD(value1); PyStackRef_XCLOSE(tmp); tmp = GETLOCAL(oparg2); - GETLOCAL(oparg2) = value2; + GETLOCAL(oparg2) = _PyStackRef_StealIfUnborrowed(value2); DEAD(value2); PyStackRef_XCLOSE(tmp); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 5112aff070afef..045c391b853d59 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -87,8 +87,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); JUMP_TO_ERROR(); } - // value = PyStackRef_DUP(value_s); - value = PyStackRef_DupDeferred(value_s); + value = PyStackRef_DUP(value_s); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -100,7 +99,7 @@ oparg = 0; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DupDeferred(GETLOCAL(oparg)); + value = PyStackRef_DUP(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -112,7 +111,7 @@ oparg = 1; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DupDeferred(GETLOCAL(oparg)); + value = PyStackRef_DUP(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -124,7 +123,7 @@ oparg = 2; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DupDeferred(GETLOCAL(oparg)); + value = PyStackRef_DUP(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -136,7 +135,7 @@ oparg = 3; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DupDeferred(GETLOCAL(oparg)); + value = PyStackRef_DUP(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -148,7 +147,7 @@ oparg = 4; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DupDeferred(GETLOCAL(oparg)); + value = PyStackRef_DUP(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -160,7 +159,7 @@ oparg = 5; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DupDeferred(GETLOCAL(oparg)); + value = PyStackRef_DUP(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -172,7 +171,7 @@ oparg = 6; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DupDeferred(GETLOCAL(oparg)); + value = PyStackRef_DUP(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -184,7 +183,7 @@ oparg = 7; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DupDeferred(GETLOCAL(oparg)); + value = PyStackRef_DUP(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -195,7 +194,16 @@ _PyStackRef value; oparg = CURRENT_OPARG(); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DupDeferred(GETLOCAL(oparg)); + value = PyStackRef_DUP(GETLOCAL(oparg)); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW: { + _PyStackRef value; + value = PyStackRef_DupDeferred(value); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -308,7 +316,7 @@ assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = value; + GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -323,7 +331,7 @@ assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = value; + GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -338,7 +346,7 @@ assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = value; + GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -353,7 +361,7 @@ assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = value; + GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -368,7 +376,7 @@ assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = value; + GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -383,7 +391,7 @@ assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = value; + GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -398,7 +406,7 @@ assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = value; + GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -413,7 +421,7 @@ assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = value; + GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -427,7 +435,7 @@ oparg = CURRENT_OPARG(); value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = value; + GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index dca46175bf367c..ae3e52e1944fd6 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -8776,7 +8776,7 @@ INSTRUCTION_STATS(LOAD_FAST); _PyStackRef value; assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - value = PyStackRef_DupDeferred(GETLOCAL(oparg)); + value = PyStackRef_DUP(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -8800,6 +8800,22 @@ DISPATCH(); } + TARGET(LOAD_FAST_BORROW) { + #if Py_TAIL_CALL_INTERP + int opcode = LOAD_FAST_BORROW; + (void)(opcode); + #endif + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(LOAD_FAST_BORROW); + _PyStackRef value; + value = PyStackRef_DupDeferred(value); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + DISPATCH(); + } + TARGET(LOAD_FAST_CHECK) { #if Py_TAIL_CALL_INTERP int opcode = LOAD_FAST_CHECK; @@ -8819,8 +8835,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); JUMP_TO_LABEL(error); } - // value = PyStackRef_DUP(value_s); - value = PyStackRef_DupDeferred(value_s); + value = PyStackRef_DUP(value_s); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -8839,8 +8854,8 @@ _PyStackRef value2; uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; - value1 = PyStackRef_DupDeferred(GETLOCAL(oparg1)); - value2 = PyStackRef_DupDeferred(GETLOCAL(oparg2)); + value1 = PyStackRef_DUP(GETLOCAL(oparg1)); + value2 = PyStackRef_DUP(GETLOCAL(oparg2)); stack_pointer[0] = value1; stack_pointer[1] = value2; stack_pointer += 2; @@ -10906,7 +10921,7 @@ _PyStackRef value; value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = value; + GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -10929,7 +10944,7 @@ uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; _PyStackRef tmp = GETLOCAL(oparg1); - GETLOCAL(oparg1) = value1; + GETLOCAL(oparg1) = _PyStackRef_StealIfUnborrowed(value1); value2 = PyStackRef_DUP(GETLOCAL(oparg2)); stack_pointer[-1] = value2; _PyFrame_SetStackPointer(frame, stack_pointer); @@ -10953,14 +10968,14 @@ uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; _PyStackRef tmp = GETLOCAL(oparg1); - GETLOCAL(oparg1) = value1; + GETLOCAL(oparg1) = _PyStackRef_StealIfUnborrowed(value1); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_XCLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); tmp = GETLOCAL(oparg2); - GETLOCAL(oparg2) = value2; + GETLOCAL(oparg2) = _PyStackRef_StealIfUnborrowed(value2); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 0435d0841dbae1..daf2c164bbd70a 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -22,6 +22,7 @@ static void *opcode_targets[256] = { &&TARGET_GET_YIELD_FROM_ITER, &&TARGET_INTERPRETER_EXIT, &&TARGET_LOAD_BUILD_CLASS, + &&TARGET_LOAD_FAST_BORROW, &&TARGET_LOAD_LOCALS, &&TARGET_MAKE_FUNCTION, &&TARGET_MATCH_KEYS, @@ -148,7 +149,6 @@ static void *opcode_targets[256] = { &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_RESUME, &&TARGET_BINARY_OP_ADD_FLOAT, &&TARGET_BINARY_OP_ADD_INT, @@ -414,6 +414,7 @@ Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_CONST_MORTAL(TAIL_CALL_PARA Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_DEREF(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_AND_CLEAR(TAIL_CALL_PARAMS); +Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_BORROW(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_CHECK(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_LOAD_FAST(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FROM_DICT_OR_DEREF(TAIL_CALL_PARAMS); @@ -648,6 +649,7 @@ static py_tail_call_funcptr INSTRUCTION_TABLE[256] = { [LOAD_DEREF] = _TAIL_CALL_LOAD_DEREF, [LOAD_FAST] = _TAIL_CALL_LOAD_FAST, [LOAD_FAST_AND_CLEAR] = _TAIL_CALL_LOAD_FAST_AND_CLEAR, + [LOAD_FAST_BORROW] = _TAIL_CALL_LOAD_FAST_BORROW, [LOAD_FAST_CHECK] = _TAIL_CALL_LOAD_FAST_CHECK, [LOAD_FAST_LOAD_FAST] = _TAIL_CALL_LOAD_FAST_LOAD_FAST, [LOAD_FROM_DICT_OR_DEREF] = _TAIL_CALL_LOAD_FROM_DICT_OR_DEREF, @@ -725,7 +727,6 @@ static py_tail_call_funcptr INSTRUCTION_TABLE[256] = { [UNPACK_SEQUENCE_TWO_TUPLE] = _TAIL_CALL_UNPACK_SEQUENCE_TWO_TUPLE, [WITH_EXCEPT_START] = _TAIL_CALL_WITH_EXCEPT_START, [YIELD_VALUE] = _TAIL_CALL_YIELD_VALUE, - [117] = _TAIL_CALL_UNKNOWN_OPCODE, [118] = _TAIL_CALL_UNKNOWN_OPCODE, [119] = _TAIL_CALL_UNKNOWN_OPCODE, [120] = _TAIL_CALL_UNKNOWN_OPCODE, diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 0372870b94ec0a..aa195669f370f9 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -47,6 +47,15 @@ break; } + case _LOAD_FAST_BORROW: { + JitOptSymbol *value; + value = sym_new_not_null(ctx); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _LOAD_FAST_AND_CLEAR: { JitOptSymbol *value; value = GETLOCAL(oparg); diff --git a/Python/specialize.c b/Python/specialize.c index c741c4f93f3138..27051eb2d65093 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -447,6 +447,25 @@ do { \ # define SPECIALIZATION_FAIL(opcode, kind) ((void)0) #endif +#define NUM_VARS 256 + +static inline void +set_mutated(bool *mutated, int i) +{ + if (i < 256) { + mutated[i] = true; + } +} + +static inline bool +get_mutated(bool *mutated, int i) +{ + if (i > NUM_VARS) { + return true; + } + return mutated[i]; +} + // Initialize warmup counters and optimize instructions. This cannot fail. void _PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, int enable_counters) @@ -463,6 +482,10 @@ _PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, int enable_counters } int opcode = 0; int oparg = 0; + bool mutated[NUM_VARS]; + for (int i = 0; i < NUM_VARS; i++) { + mutated[i] = false; + } /* The last code unit cannot have a cache, so we don't need to check it */ for (Py_ssize_t i = 0; i < size-1; i++) { opcode = instructions[i].op.code; @@ -471,6 +494,19 @@ _PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, int enable_counters if (caches) { // The initial value depends on the opcode switch (opcode) { + case LOAD_FAST_AND_CLEAR: + case DELETE_FAST: + case STORE_FAST: + case STORE_NAME: // probably not needed + set_mutated(mutated, oparg); + break; + case STORE_FAST_STORE_FAST: + set_mutated(mutated, oparg >> 4); + set_mutated(mutated, oparg & 15); + break; + case STORE_FAST_LOAD_FAST: + set_mutated(mutated, oparg >> 4); + break; case JUMP_BACKWARD: instructions[i + 1].counter = jump_counter; break; @@ -490,6 +526,26 @@ _PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, int enable_counters oparg = 0; } } + + /* The last code unit cannot have a cache, so we don't need to check it */ + opcode = 0; + oparg = 0; + int eligible = 0; + int total = 0; + for (Py_ssize_t i = 0; i < size-1; i++) { + opcode = instructions[i].op.code; + total++; + if (opcode == LOAD_FAST) { + oparg = (oparg << 8) | instructions[i].op.arg; + if (!get_mutated(mutated, oparg)) { + // instructions[i].op.code = LOAD_FAST_BORROW; + eligible++; + } + } + if (opcode != EXTENDED_ARG) { + oparg = 0; + } + } #endif /* ENABLE_SPECIALIZATION_FT */ } diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 09833c0dd2691c..2ff4c2bee7268d 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -591,6 +591,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_CLOSE_SPECIALIZED", "PyStackRef_DUP", "PyStackRef_DupDeferred", + "_PyStackRef_StealIfUnborrowed", "PyStackRef_False", "PyStackRef_FromPyObjectImmortal", "PyStackRef_FromPyObjectNew", From 7a142541c4c6c7e7e3991493ba887c904074de54 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 5 Feb 2025 21:53:34 -0800 Subject: [PATCH 03/73] Fix pyframe copy --- Include/internal/pycore_frame.h | 4 +- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_stackref.h | 1 - Include/internal/pycore_uop_metadata.h | 2 +- Include/opcode_ids.h | 128 +++++++++++----------- Lib/_opcode_metadata.py | 128 +++++++++++----------- Objects/codeobject.c | 1 + Programs/test_frozenmain.h | 14 +-- Python/bytecodes.c | 6 +- Python/executor_cases.c.h | 7 +- Python/generated_cases.c.h | 8 +- Python/opcode_targets.h | 2 +- Python/specialize.c | 11 +- 13 files changed, 158 insertions(+), 156 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 8cc3504723b64c..9912281ebd5b47 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -153,8 +153,8 @@ static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame * int stacktop = (int)(src->stackpointer - src->localsplus); assert(stacktop >= _PyFrame_GetCode(src)->co_nlocalsplus); dest->stackpointer = dest->localsplus + stacktop; - for (int i = 1; i < stacktop; i++) { - dest->localsplus[i] = src->localsplus[i]; + for (int i = 0; i < stacktop; i++) { + dest->localsplus[i] = _PyStackRef_StealIfUnborrowed(src->localsplus[i]); } // Don't leave a dangling pointer to the old frame when creating generators // and coroutines: diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 95efc986a8a12f..13d29259bda02b 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -2165,7 +2165,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, - [LOAD_FAST_BORROW] = { true, INSTR_FMT_IX, 0 }, + [LOAD_FAST_BORROW] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FROM_DICT_OR_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 61b58b9d251d88..a5d265e17200ca 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -225,7 +225,6 @@ _PyStackRef_StealIfUnborrowed(_PyStackRef stackref) return stackref; } else { - fprintf(stderr, "===> Converting to strong reference\n"); return (_PyStackRef){ .bits = (uintptr_t)(Py_NewRef(obj)) | Py_TAG_PTR }; } } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index b97b70b4bc2468..2e9dad83827ff5 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -33,7 +33,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_FAST_6] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_7] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_BORROW] = 0, + [_LOAD_FAST_BORROW] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_CONST_MORTAL] = HAS_ARG_FLAG | HAS_CONST_FLAG, diff --git a/Include/opcode_ids.h b/Include/opcode_ids.h index 227b6d52bdcbba..b4da0f669158ba 100644 --- a/Include/opcode_ids.h +++ b/Include/opcode_ids.h @@ -32,69 +32,69 @@ extern "C" { #define GET_YIELD_FROM_ITER 19 #define INTERPRETER_EXIT 20 #define LOAD_BUILD_CLASS 21 -#define LOAD_FAST_BORROW 22 -#define LOAD_LOCALS 23 -#define MAKE_FUNCTION 24 -#define MATCH_KEYS 25 -#define MATCH_MAPPING 26 -#define MATCH_SEQUENCE 27 -#define NOP 28 -#define NOT_TAKEN 29 -#define POP_EXCEPT 30 -#define POP_ITER 31 -#define POP_TOP 32 -#define PUSH_EXC_INFO 33 -#define PUSH_NULL 34 -#define RETURN_GENERATOR 35 -#define RETURN_VALUE 36 -#define SETUP_ANNOTATIONS 37 -#define STORE_SLICE 38 -#define STORE_SUBSCR 39 -#define TO_BOOL 40 -#define UNARY_INVERT 41 -#define UNARY_NEGATIVE 42 -#define UNARY_NOT 43 -#define WITH_EXCEPT_START 44 -#define BINARY_OP 45 -#define BUILD_LIST 46 -#define BUILD_MAP 47 -#define BUILD_SET 48 -#define BUILD_SLICE 49 -#define BUILD_STRING 50 -#define BUILD_TUPLE 51 -#define CALL 52 -#define CALL_INTRINSIC_1 53 -#define CALL_INTRINSIC_2 54 -#define CALL_KW 55 -#define COMPARE_OP 56 -#define CONTAINS_OP 57 -#define CONVERT_VALUE 58 -#define COPY 59 -#define COPY_FREE_VARS 60 -#define DELETE_ATTR 61 -#define DELETE_DEREF 62 -#define DELETE_FAST 63 -#define DELETE_GLOBAL 64 -#define DELETE_NAME 65 -#define DICT_MERGE 66 -#define DICT_UPDATE 67 -#define EXTENDED_ARG 68 -#define FOR_ITER 69 -#define GET_AWAITABLE 70 -#define IMPORT_FROM 71 -#define IMPORT_NAME 72 -#define IS_OP 73 -#define JUMP_BACKWARD 74 -#define JUMP_BACKWARD_NO_INTERRUPT 75 -#define JUMP_FORWARD 76 -#define LIST_APPEND 77 -#define LIST_EXTEND 78 -#define LOAD_ATTR 79 -#define LOAD_COMMON_CONSTANT 80 -#define LOAD_CONST 81 -#define LOAD_DEREF 82 -#define LOAD_FAST 83 -#define LOAD_FAST_AND_CLEAR 84 +#define LOAD_LOCALS 22 +#define MAKE_FUNCTION 23 +#define MATCH_KEYS 24 +#define MATCH_MAPPING 25 +#define MATCH_SEQUENCE 26 +#define NOP 27 +#define NOT_TAKEN 28 +#define POP_EXCEPT 29 +#define POP_ITER 30 +#define POP_TOP 31 +#define PUSH_EXC_INFO 32 +#define PUSH_NULL 33 +#define RETURN_GENERATOR 34 +#define RETURN_VALUE 35 +#define SETUP_ANNOTATIONS 36 +#define STORE_SLICE 37 +#define STORE_SUBSCR 38 +#define TO_BOOL 39 +#define UNARY_INVERT 40 +#define UNARY_NEGATIVE 41 +#define UNARY_NOT 42 +#define WITH_EXCEPT_START 43 +#define BINARY_OP 44 +#define BUILD_LIST 45 +#define BUILD_MAP 46 +#define BUILD_SET 47 +#define BUILD_SLICE 48 +#define BUILD_STRING 49 +#define BUILD_TUPLE 50 +#define CALL 51 +#define CALL_INTRINSIC_1 52 +#define CALL_INTRINSIC_2 53 +#define CALL_KW 54 +#define COMPARE_OP 55 +#define CONTAINS_OP 56 +#define CONVERT_VALUE 57 +#define COPY 58 +#define COPY_FREE_VARS 59 +#define DELETE_ATTR 60 +#define DELETE_DEREF 61 +#define DELETE_FAST 62 +#define DELETE_GLOBAL 63 +#define DELETE_NAME 64 +#define DICT_MERGE 65 +#define DICT_UPDATE 66 +#define EXTENDED_ARG 67 +#define FOR_ITER 68 +#define GET_AWAITABLE 69 +#define IMPORT_FROM 70 +#define IMPORT_NAME 71 +#define IS_OP 72 +#define JUMP_BACKWARD 73 +#define JUMP_BACKWARD_NO_INTERRUPT 74 +#define JUMP_FORWARD 75 +#define LIST_APPEND 76 +#define LIST_EXTEND 77 +#define LOAD_ATTR 78 +#define LOAD_COMMON_CONSTANT 79 +#define LOAD_CONST 80 +#define LOAD_DEREF 81 +#define LOAD_FAST 82 +#define LOAD_FAST_AND_CLEAR 83 +#define LOAD_FAST_BORROW 84 #define LOAD_FAST_CHECK 85 #define LOAD_FAST_LOAD_FAST 86 #define LOAD_FROM_DICT_OR_DEREF 87 @@ -244,7 +244,7 @@ extern "C" { #define SETUP_WITH 264 #define STORE_FAST_MAYBE_NULL 265 -#define HAVE_ARGUMENT 44 +#define HAVE_ARGUMENT 43 #define MIN_SPECIALIZED_OPCODE 150 #define MIN_INSTRUMENTED_OPCODE 234 diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py index c7cf719251731b..ed1e1a6b9648c3 100644 --- a/Lib/_opcode_metadata.py +++ b/Lib/_opcode_metadata.py @@ -233,69 +233,69 @@ 'GET_YIELD_FROM_ITER': 19, 'INTERPRETER_EXIT': 20, 'LOAD_BUILD_CLASS': 21, - 'LOAD_FAST_BORROW': 22, - 'LOAD_LOCALS': 23, - 'MAKE_FUNCTION': 24, - 'MATCH_KEYS': 25, - 'MATCH_MAPPING': 26, - 'MATCH_SEQUENCE': 27, - 'NOP': 28, - 'NOT_TAKEN': 29, - 'POP_EXCEPT': 30, - 'POP_ITER': 31, - 'POP_TOP': 32, - 'PUSH_EXC_INFO': 33, - 'PUSH_NULL': 34, - 'RETURN_GENERATOR': 35, - 'RETURN_VALUE': 36, - 'SETUP_ANNOTATIONS': 37, - 'STORE_SLICE': 38, - 'STORE_SUBSCR': 39, - 'TO_BOOL': 40, - 'UNARY_INVERT': 41, - 'UNARY_NEGATIVE': 42, - 'UNARY_NOT': 43, - 'WITH_EXCEPT_START': 44, - 'BINARY_OP': 45, - 'BUILD_LIST': 46, - 'BUILD_MAP': 47, - 'BUILD_SET': 48, - 'BUILD_SLICE': 49, - 'BUILD_STRING': 50, - 'BUILD_TUPLE': 51, - 'CALL': 52, - 'CALL_INTRINSIC_1': 53, - 'CALL_INTRINSIC_2': 54, - 'CALL_KW': 55, - 'COMPARE_OP': 56, - 'CONTAINS_OP': 57, - 'CONVERT_VALUE': 58, - 'COPY': 59, - 'COPY_FREE_VARS': 60, - 'DELETE_ATTR': 61, - 'DELETE_DEREF': 62, - 'DELETE_FAST': 63, - 'DELETE_GLOBAL': 64, - 'DELETE_NAME': 65, - 'DICT_MERGE': 66, - 'DICT_UPDATE': 67, - 'EXTENDED_ARG': 68, - 'FOR_ITER': 69, - 'GET_AWAITABLE': 70, - 'IMPORT_FROM': 71, - 'IMPORT_NAME': 72, - 'IS_OP': 73, - 'JUMP_BACKWARD': 74, - 'JUMP_BACKWARD_NO_INTERRUPT': 75, - 'JUMP_FORWARD': 76, - 'LIST_APPEND': 77, - 'LIST_EXTEND': 78, - 'LOAD_ATTR': 79, - 'LOAD_COMMON_CONSTANT': 80, - 'LOAD_CONST': 81, - 'LOAD_DEREF': 82, - 'LOAD_FAST': 83, - 'LOAD_FAST_AND_CLEAR': 84, + 'LOAD_LOCALS': 22, + 'MAKE_FUNCTION': 23, + 'MATCH_KEYS': 24, + 'MATCH_MAPPING': 25, + 'MATCH_SEQUENCE': 26, + 'NOP': 27, + 'NOT_TAKEN': 28, + 'POP_EXCEPT': 29, + 'POP_ITER': 30, + 'POP_TOP': 31, + 'PUSH_EXC_INFO': 32, + 'PUSH_NULL': 33, + 'RETURN_GENERATOR': 34, + 'RETURN_VALUE': 35, + 'SETUP_ANNOTATIONS': 36, + 'STORE_SLICE': 37, + 'STORE_SUBSCR': 38, + 'TO_BOOL': 39, + 'UNARY_INVERT': 40, + 'UNARY_NEGATIVE': 41, + 'UNARY_NOT': 42, + 'WITH_EXCEPT_START': 43, + 'BINARY_OP': 44, + 'BUILD_LIST': 45, + 'BUILD_MAP': 46, + 'BUILD_SET': 47, + 'BUILD_SLICE': 48, + 'BUILD_STRING': 49, + 'BUILD_TUPLE': 50, + 'CALL': 51, + 'CALL_INTRINSIC_1': 52, + 'CALL_INTRINSIC_2': 53, + 'CALL_KW': 54, + 'COMPARE_OP': 55, + 'CONTAINS_OP': 56, + 'CONVERT_VALUE': 57, + 'COPY': 58, + 'COPY_FREE_VARS': 59, + 'DELETE_ATTR': 60, + 'DELETE_DEREF': 61, + 'DELETE_FAST': 62, + 'DELETE_GLOBAL': 63, + 'DELETE_NAME': 64, + 'DICT_MERGE': 65, + 'DICT_UPDATE': 66, + 'EXTENDED_ARG': 67, + 'FOR_ITER': 68, + 'GET_AWAITABLE': 69, + 'IMPORT_FROM': 70, + 'IMPORT_NAME': 71, + 'IS_OP': 72, + 'JUMP_BACKWARD': 73, + 'JUMP_BACKWARD_NO_INTERRUPT': 74, + 'JUMP_FORWARD': 75, + 'LIST_APPEND': 76, + 'LIST_EXTEND': 77, + 'LOAD_ATTR': 78, + 'LOAD_COMMON_CONSTANT': 79, + 'LOAD_CONST': 80, + 'LOAD_DEREF': 81, + 'LOAD_FAST': 82, + 'LOAD_FAST_AND_CLEAR': 83, + 'LOAD_FAST_BORROW': 84, 'LOAD_FAST_CHECK': 85, 'LOAD_FAST_LOAD_FAST': 86, 'LOAD_FROM_DICT_OR_DEREF': 87, @@ -361,5 +361,5 @@ 'STORE_FAST_MAYBE_NULL': 265, } -HAVE_ARGUMENT = 44 +HAVE_ARGUMENT = 43 MIN_INSTRUMENTED_OPCODE = 234 diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 833c4d10ba8318..f608a199e761d1 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -542,6 +542,7 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) } co->_co_firsttraceable = entry_point; #ifdef Py_GIL_DISABLED + // fprintf(stderr, "== Quicken %s\n", PyUnicode_AsUTF8(co->co_qualname)); _PyCode_Quicken(_PyCode_CODE(co), Py_SIZE(co), interp->config.tlbc_enabled); #else _PyCode_Quicken(_PyCode_CODE(co), Py_SIZE(co), 1); diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index 867a17220260b1..1c5e26f3f594fa 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,16 +1,16 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { 227,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0, - 0,0,0,0,0,243,184,0,0,0,149,0,90,0,80,0, - 71,0,112,0,90,0,80,0,71,1,112,1,89,2,33,0, - 80,1,51,1,0,0,0,0,0,0,31,0,89,2,33,0, - 80,2,89,0,78,6,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,243,184,0,0,0,149,0,91,0,80,0, + 71,0,113,0,91,0,80,0,71,1,113,1,90,2,33,0, + 80,1,51,1,0,0,0,0,0,0,31,0,90,2,33,0, + 80,2,90,0,78,6,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,51,2,0,0,0,0,0,0, - 31,0,89,1,78,8,0,0,0,0,0,0,0,0,0,0, + 31,0,90,1,78,8,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,33,0,51,0,0,0,0,0, 0,0,80,3,44,26,0,0,0,0,0,0,0,0,0,0, - 112,5,80,4,16,0,68,24,0,0,112,6,89,2,33,0, - 80,5,89,6,12,0,80,6,89,5,89,6,44,26,0,0, + 113,5,80,4,16,0,68,24,0,0,113,6,90,2,33,0, + 80,5,90,6,12,0,80,6,90,5,90,6,44,26,0,0, 0,0,0,0,0,0,0,0,12,0,49,4,51,1,0,0, 0,0,0,0,31,0,73,26,0,0,9,0,30,0,80,0, 35,0,41,7,78,218,18,70,114,111,122,101,110,32,72,101, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c422c32c728d31..6b512b91091a90 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -271,7 +271,7 @@ dummy_func( } inst (LOAD_FAST_BORROW, (-- value)) { - value = PyStackRef_DupDeferred(value); + value = PyStackRef_DupDeferred(GETLOCAL(oparg)); } inst(LOAD_FAST_AND_CLEAR, (-- value)) { @@ -750,7 +750,7 @@ dummy_func( * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - assert(Py_REFCNT(left_o) >= 2); + // assert(Py_REFCNT(left_o) >= 2); PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); DEAD(left); PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); @@ -1096,7 +1096,7 @@ dummy_func( // is pushed to a different frame, the callers' frame. inst(RETURN_VALUE, (retval -- res)) { assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef temp = retval; + _PyStackRef temp = _PyStackRef_StealIfUnborrowed(retval); DEAD(retval); SAVE_STACK(); assert(EMPTY()); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 045c391b853d59..2e4d15342d747c 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -203,7 +203,8 @@ case _LOAD_FAST_BORROW: { _PyStackRef value; - value = PyStackRef_DupDeferred(value); + oparg = CURRENT_OPARG(); + value = PyStackRef_DupDeferred(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -1016,7 +1017,7 @@ * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - assert(Py_REFCNT(left_o) >= 2); + // assert(Py_REFCNT(left_o) >= 2); PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); stack_pointer += -2; @@ -1661,7 +1662,7 @@ _PyStackRef res; retval = stack_pointer[-1]; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef temp = retval; + _PyStackRef temp = _PyStackRef_StealIfUnborrowed(retval); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ae3e52e1944fd6..7c2a29af6ccd30 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -369,7 +369,7 @@ * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - assert(Py_REFCNT(left_o) >= 2); + // assert(Py_REFCNT(left_o) >= 2); PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); stack_pointer += -2; @@ -7230,7 +7230,7 @@ { retval = val; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef temp = retval; + _PyStackRef temp = _PyStackRef_StealIfUnborrowed(retval); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -8809,7 +8809,7 @@ next_instr += 1; INSTRUCTION_STATS(LOAD_FAST_BORROW); _PyStackRef value; - value = PyStackRef_DupDeferred(value); + value = PyStackRef_DupDeferred(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -10284,7 +10284,7 @@ _PyStackRef res; retval = stack_pointer[-1]; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef temp = retval; + _PyStackRef temp = _PyStackRef_StealIfUnborrowed(retval); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index daf2c164bbd70a..f50a4cdec2342a 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -22,7 +22,6 @@ static void *opcode_targets[256] = { &&TARGET_GET_YIELD_FROM_ITER, &&TARGET_INTERPRETER_EXIT, &&TARGET_LOAD_BUILD_CLASS, - &&TARGET_LOAD_FAST_BORROW, &&TARGET_LOAD_LOCALS, &&TARGET_MAKE_FUNCTION, &&TARGET_MATCH_KEYS, @@ -85,6 +84,7 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_DEREF, &&TARGET_LOAD_FAST, &&TARGET_LOAD_FAST_AND_CLEAR, + &&TARGET_LOAD_FAST_BORROW, &&TARGET_LOAD_FAST_CHECK, &&TARGET_LOAD_FAST_LOAD_FAST, &&TARGET_LOAD_FROM_DICT_OR_DEREF, diff --git a/Python/specialize.c b/Python/specialize.c index 27051eb2d65093..cb7b1762384dff 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -487,7 +487,7 @@ _PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, int enable_counters mutated[i] = false; } /* The last code unit cannot have a cache, so we don't need to check it */ - for (Py_ssize_t i = 0; i < size-1; i++) { + for (Py_ssize_t i = 0; i < size; i++) { opcode = instructions[i].op.code; int caches = _PyOpcode_Caches[opcode]; oparg = (oparg << 8) | instructions[i].op.arg; @@ -496,8 +496,8 @@ _PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, int enable_counters switch (opcode) { case LOAD_FAST_AND_CLEAR: case DELETE_FAST: + case MAKE_CELL: case STORE_FAST: - case STORE_NAME: // probably not needed set_mutated(mutated, oparg); break; case STORE_FAST_STORE_FAST: @@ -532,13 +532,13 @@ _PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, int enable_counters oparg = 0; int eligible = 0; int total = 0; - for (Py_ssize_t i = 0; i < size-1; i++) { + for (Py_ssize_t i = 0; i < size; i++) { opcode = instructions[i].op.code; - total++; if (opcode == LOAD_FAST) { oparg = (oparg << 8) | instructions[i].op.arg; + total++; if (!get_mutated(mutated, oparg)) { - // instructions[i].op.code = LOAD_FAST_BORROW; + instructions[i].op.code = LOAD_FAST_BORROW; eligible++; } } @@ -546,6 +546,7 @@ _PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, int enable_counters oparg = 0; } } + // fprintf(stderr, "== LF_SPEC %d %d\n", eligible, total); #endif /* ENABLE_SPECIALIZATION_FT */ } From b1607aa40bf7a293ca5019ba4e3bfa2570d8f5be Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 6 Feb 2025 13:19:50 -0800 Subject: [PATCH 04/73] Strengthen refs when frame is copied --- Python/frame.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Python/frame.c b/Python/frame.c index 68ac2acbaee342..6e97e0d08f1b64 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -55,6 +55,11 @@ take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame) frame = (_PyInterpreterFrame *)f->_f_frame_data; frame->stackpointer = (_PyStackRef *)(((char *)frame) + size); frame->f_executable = PyStackRef_DUP(frame->f_executable); + int stacktop = (int)(frame->stackpointer - frame->localsplus); + assert(stacktop >= _PyFrame_GetCode(frame)->co_nlocalsplus); + for (int i = 0; i < stacktop; i++) { + frame->localsplus[i] = _PyStackRef_StealIfUnborrowed(frame->localsplus[i]); + } f->f_frame = frame; frame->owner = FRAME_OWNED_BY_FRAME_OBJECT; if (_PyFrame_IsIncomplete(frame)) { From e765735d110653f1283c220df86344fd2102e307 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 6 Feb 2025 13:20:27 -0800 Subject: [PATCH 05/73] Cleanup --- Python/specialize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index cb7b1762384dff..a06f2f42ca6f55 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -452,7 +452,7 @@ do { \ static inline void set_mutated(bool *mutated, int i) { - if (i < 256) { + if (i < NUM_VARS) { mutated[i] = true; } } From 291ace9a7da91d23da094c315b8f4e89fe669431 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 6 Feb 2025 13:20:40 -0800 Subject: [PATCH 06/73] Consider all instructions when computing mutations derp --- Python/specialize.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index a06f2f42ca6f55..cffb90338adfff 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -491,22 +491,26 @@ _PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, int enable_counters opcode = instructions[i].op.code; int caches = _PyOpcode_Caches[opcode]; oparg = (oparg << 8) | instructions[i].op.arg; + switch (opcode) { + case LOAD_FAST_AND_CLEAR: + case DELETE_FAST: + case MAKE_CELL: + case STORE_FAST: + set_mutated(mutated, oparg); + break; + case STORE_FAST_STORE_FAST: + set_mutated(mutated, oparg >> 4); + set_mutated(mutated, oparg & 15); + break; + case STORE_FAST_LOAD_FAST: + set_mutated(mutated, oparg >> 4); + break; + default: + break; + } if (caches) { // The initial value depends on the opcode switch (opcode) { - case LOAD_FAST_AND_CLEAR: - case DELETE_FAST: - case MAKE_CELL: - case STORE_FAST: - set_mutated(mutated, oparg); - break; - case STORE_FAST_STORE_FAST: - set_mutated(mutated, oparg >> 4); - set_mutated(mutated, oparg & 15); - break; - case STORE_FAST_LOAD_FAST: - set_mutated(mutated, oparg >> 4); - break; case JUMP_BACKWARD: instructions[i + 1].counter = jump_counter; break; @@ -534,8 +538,8 @@ _PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, int enable_counters int total = 0; for (Py_ssize_t i = 0; i < size; i++) { opcode = instructions[i].op.code; + oparg = (oparg << 8) | instructions[i].op.arg; if (opcode == LOAD_FAST) { - oparg = (oparg << 8) | instructions[i].op.arg; total++; if (!get_mutated(mutated, oparg)) { instructions[i].op.code = LOAD_FAST_BORROW; From 17d6dd60f176920fe63165b9540666a15558c3c0 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 6 Feb 2025 16:44:50 -0800 Subject: [PATCH 07/73] Add a super instruction --- Include/internal/pycore_opcode_metadata.h | 13 +++- Include/internal/pycore_uop_ids.h | 1 + Include/internal/pycore_uop_metadata.h | 4 ++ Include/opcode_ids.h | 67 ++++++++++--------- Lib/_opcode_metadata.py | 67 ++++++++++--------- Programs/test_frozenmain.h | 14 ++-- Python/bytecodes.c | 7 ++ Python/generated_cases.c.h | 21 ++++++ Python/opcode_targets.h | 5 +- Python/specialize.c | 7 ++ .../opcode_metadata_generator.py | 7 +- 11 files changed, 134 insertions(+), 79 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 13d29259bda02b..a3a326ea6c1a56 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -337,6 +337,8 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 0; case LOAD_FAST_BORROW: return 0; + case LOAD_FAST_BORROW_LOAD_FAST_BORROW: + return 0; case LOAD_FAST_CHECK: return 0; case LOAD_FAST_LOAD_FAST: @@ -814,6 +816,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 1; case LOAD_FAST_BORROW: return 1; + case LOAD_FAST_BORROW_LOAD_FAST_BORROW: + return 2; case LOAD_FAST_CHECK: return 1; case LOAD_FAST_LOAD_FAST: @@ -1613,6 +1617,10 @@ int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect) { *effect = 1; return 0; } + case LOAD_FAST_BORROW_LOAD_FAST_BORROW: { + *effect = 2; + return 0; + } case LOAD_FAST_CHECK: { *effect = 1; return 0; @@ -2166,6 +2174,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_BORROW] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, + [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FROM_DICT_OR_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, @@ -2375,6 +2384,7 @@ _PyOpcode_macro_expansion[256] = { [LOAD_FAST] = { .nuops = 1, .uops = { { _LOAD_FAST, OPARG_SIMPLE, 0 } } }, [LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { _LOAD_FAST_AND_CLEAR, OPARG_SIMPLE, 0 } } }, [LOAD_FAST_BORROW] = { .nuops = 1, .uops = { { _LOAD_FAST_BORROW, OPARG_SIMPLE, 0 } } }, + [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = { .nuops = 2, .uops = { { _LOAD_FAST_BORROW, OPARG_TOP, 0 }, { _LOAD_FAST_BORROW, OPARG_BOTTOM, 0 } } }, [LOAD_FAST_CHECK] = { .nuops = 1, .uops = { { _LOAD_FAST_CHECK, OPARG_SIMPLE, 0 } } }, [LOAD_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { _LOAD_FAST, OPARG_TOP, 0 }, { _LOAD_FAST, OPARG_BOTTOM, 0 } } }, [LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_DEREF, OPARG_SIMPLE, 0 } } }, @@ -2602,6 +2612,7 @@ const char *_PyOpcode_OpName[266] = { [LOAD_FAST] = "LOAD_FAST", [LOAD_FAST_AND_CLEAR] = "LOAD_FAST_AND_CLEAR", [LOAD_FAST_BORROW] = "LOAD_FAST_BORROW", + [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = "LOAD_FAST_BORROW_LOAD_FAST_BORROW", [LOAD_FAST_CHECK] = "LOAD_FAST_CHECK", [LOAD_FAST_LOAD_FAST] = "LOAD_FAST_LOAD_FAST", [LOAD_FROM_DICT_OR_DEREF] = "LOAD_FROM_DICT_OR_DEREF", @@ -2861,6 +2872,7 @@ const uint8_t _PyOpcode_Deopt[256] = { [LOAD_FAST] = LOAD_FAST, [LOAD_FAST_AND_CLEAR] = LOAD_FAST_AND_CLEAR, [LOAD_FAST_BORROW] = LOAD_FAST_BORROW, + [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = LOAD_FAST_BORROW_LOAD_FAST_BORROW, [LOAD_FAST_CHECK] = LOAD_FAST_CHECK, [LOAD_FAST_LOAD_FAST] = LOAD_FAST_LOAD_FAST, [LOAD_FROM_DICT_OR_DEREF] = LOAD_FROM_DICT_OR_DEREF, @@ -2943,7 +2955,6 @@ const uint8_t _PyOpcode_Deopt[256] = { #endif // NEED_OPCODE_METADATA #define EXTRA_CASES \ - case 118: \ case 119: \ case 120: \ case 121: \ diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 34c2181253a62f..8998cdf160196a 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -202,6 +202,7 @@ extern "C" { #define _LOAD_FAST_7 427 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_BORROW LOAD_FAST_BORROW +#define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 2e9dad83827ff5..e3365e96d2c658 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -36,6 +36,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_FAST_BORROW] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, + [_LOAD_FAST_BORROW_LOAD_FAST_BORROW] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_CONST_MORTAL] = HAS_ARG_FLAG | HAS_CONST_FLAG, [_LOAD_CONST_IMMORTAL] = HAS_ARG_FLAG | HAS_CONST_FLAG, [_LOAD_SMALL_INT_0] = 0, @@ -465,6 +466,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_FAST_7] = "_LOAD_FAST_7", [_LOAD_FAST_AND_CLEAR] = "_LOAD_FAST_AND_CLEAR", [_LOAD_FAST_BORROW] = "_LOAD_FAST_BORROW", + [_LOAD_FAST_BORROW_LOAD_FAST_BORROW] = "_LOAD_FAST_BORROW_LOAD_FAST_BORROW", [_LOAD_FAST_CHECK] = "_LOAD_FAST_CHECK", [_LOAD_FAST_LOAD_FAST] = "_LOAD_FAST_LOAD_FAST", [_LOAD_FROM_DICT_OR_DEREF] = "_LOAD_FROM_DICT_OR_DEREF", @@ -593,6 +595,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _LOAD_FAST_LOAD_FAST: return 0; + case _LOAD_FAST_BORROW_LOAD_FAST_BORROW: + return 0; case _LOAD_CONST_MORTAL: return 0; case _LOAD_CONST_IMMORTAL: diff --git a/Include/opcode_ids.h b/Include/opcode_ids.h index b4da0f669158ba..2c48d883d4a0bc 100644 --- a/Include/opcode_ids.h +++ b/Include/opcode_ids.h @@ -95,39 +95,40 @@ extern "C" { #define LOAD_FAST 82 #define LOAD_FAST_AND_CLEAR 83 #define LOAD_FAST_BORROW 84 -#define LOAD_FAST_CHECK 85 -#define LOAD_FAST_LOAD_FAST 86 -#define LOAD_FROM_DICT_OR_DEREF 87 -#define LOAD_FROM_DICT_OR_GLOBALS 88 -#define LOAD_GLOBAL 89 -#define LOAD_NAME 90 -#define LOAD_SMALL_INT 91 -#define LOAD_SPECIAL 92 -#define LOAD_SUPER_ATTR 93 -#define MAKE_CELL 94 -#define MAP_ADD 95 -#define MATCH_CLASS 96 -#define POP_JUMP_IF_FALSE 97 -#define POP_JUMP_IF_NONE 98 -#define POP_JUMP_IF_NOT_NONE 99 -#define POP_JUMP_IF_TRUE 100 -#define RAISE_VARARGS 101 -#define RERAISE 102 -#define SEND 103 -#define SET_ADD 104 -#define SET_FUNCTION_ATTRIBUTE 105 -#define SET_UPDATE 106 -#define STORE_ATTR 107 -#define STORE_DEREF 108 -#define STORE_FAST 109 -#define STORE_FAST_LOAD_FAST 110 -#define STORE_FAST_STORE_FAST 111 -#define STORE_GLOBAL 112 -#define STORE_NAME 113 -#define SWAP 114 -#define UNPACK_EX 115 -#define UNPACK_SEQUENCE 116 -#define YIELD_VALUE 117 +#define LOAD_FAST_BORROW_LOAD_FAST_BORROW 85 +#define LOAD_FAST_CHECK 86 +#define LOAD_FAST_LOAD_FAST 87 +#define LOAD_FROM_DICT_OR_DEREF 88 +#define LOAD_FROM_DICT_OR_GLOBALS 89 +#define LOAD_GLOBAL 90 +#define LOAD_NAME 91 +#define LOAD_SMALL_INT 92 +#define LOAD_SPECIAL 93 +#define LOAD_SUPER_ATTR 94 +#define MAKE_CELL 95 +#define MAP_ADD 96 +#define MATCH_CLASS 97 +#define POP_JUMP_IF_FALSE 98 +#define POP_JUMP_IF_NONE 99 +#define POP_JUMP_IF_NOT_NONE 100 +#define POP_JUMP_IF_TRUE 101 +#define RAISE_VARARGS 102 +#define RERAISE 103 +#define SEND 104 +#define SET_ADD 105 +#define SET_FUNCTION_ATTRIBUTE 106 +#define SET_UPDATE 107 +#define STORE_ATTR 108 +#define STORE_DEREF 109 +#define STORE_FAST 110 +#define STORE_FAST_LOAD_FAST 111 +#define STORE_FAST_STORE_FAST 112 +#define STORE_GLOBAL 113 +#define STORE_NAME 114 +#define SWAP 115 +#define UNPACK_EX 116 +#define UNPACK_SEQUENCE 117 +#define YIELD_VALUE 118 #define RESUME 149 #define BINARY_OP_ADD_FLOAT 150 #define BINARY_OP_ADD_INT 151 diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py index ed1e1a6b9648c3..55b66ae78c044e 100644 --- a/Lib/_opcode_metadata.py +++ b/Lib/_opcode_metadata.py @@ -296,39 +296,40 @@ 'LOAD_FAST': 82, 'LOAD_FAST_AND_CLEAR': 83, 'LOAD_FAST_BORROW': 84, - 'LOAD_FAST_CHECK': 85, - 'LOAD_FAST_LOAD_FAST': 86, - 'LOAD_FROM_DICT_OR_DEREF': 87, - 'LOAD_FROM_DICT_OR_GLOBALS': 88, - 'LOAD_GLOBAL': 89, - 'LOAD_NAME': 90, - 'LOAD_SMALL_INT': 91, - 'LOAD_SPECIAL': 92, - 'LOAD_SUPER_ATTR': 93, - 'MAKE_CELL': 94, - 'MAP_ADD': 95, - 'MATCH_CLASS': 96, - 'POP_JUMP_IF_FALSE': 97, - 'POP_JUMP_IF_NONE': 98, - 'POP_JUMP_IF_NOT_NONE': 99, - 'POP_JUMP_IF_TRUE': 100, - 'RAISE_VARARGS': 101, - 'RERAISE': 102, - 'SEND': 103, - 'SET_ADD': 104, - 'SET_FUNCTION_ATTRIBUTE': 105, - 'SET_UPDATE': 106, - 'STORE_ATTR': 107, - 'STORE_DEREF': 108, - 'STORE_FAST': 109, - 'STORE_FAST_LOAD_FAST': 110, - 'STORE_FAST_STORE_FAST': 111, - 'STORE_GLOBAL': 112, - 'STORE_NAME': 113, - 'SWAP': 114, - 'UNPACK_EX': 115, - 'UNPACK_SEQUENCE': 116, - 'YIELD_VALUE': 117, + 'LOAD_FAST_BORROW_LOAD_FAST_BORROW': 85, + 'LOAD_FAST_CHECK': 86, + 'LOAD_FAST_LOAD_FAST': 87, + 'LOAD_FROM_DICT_OR_DEREF': 88, + 'LOAD_FROM_DICT_OR_GLOBALS': 89, + 'LOAD_GLOBAL': 90, + 'LOAD_NAME': 91, + 'LOAD_SMALL_INT': 92, + 'LOAD_SPECIAL': 93, + 'LOAD_SUPER_ATTR': 94, + 'MAKE_CELL': 95, + 'MAP_ADD': 96, + 'MATCH_CLASS': 97, + 'POP_JUMP_IF_FALSE': 98, + 'POP_JUMP_IF_NONE': 99, + 'POP_JUMP_IF_NOT_NONE': 100, + 'POP_JUMP_IF_TRUE': 101, + 'RAISE_VARARGS': 102, + 'RERAISE': 103, + 'SEND': 104, + 'SET_ADD': 105, + 'SET_FUNCTION_ATTRIBUTE': 106, + 'SET_UPDATE': 107, + 'STORE_ATTR': 108, + 'STORE_DEREF': 109, + 'STORE_FAST': 110, + 'STORE_FAST_LOAD_FAST': 111, + 'STORE_FAST_STORE_FAST': 112, + 'STORE_GLOBAL': 113, + 'STORE_NAME': 114, + 'SWAP': 115, + 'UNPACK_EX': 116, + 'UNPACK_SEQUENCE': 117, + 'YIELD_VALUE': 118, 'INSTRUMENTED_END_FOR': 234, 'INSTRUMENTED_POP_ITER': 235, 'INSTRUMENTED_END_SEND': 236, diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index 1c5e26f3f594fa..93b0e993d37c7d 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,16 +1,16 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { 227,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0, - 0,0,0,0,0,243,184,0,0,0,149,0,91,0,80,0, - 71,0,113,0,91,0,80,0,71,1,113,1,90,2,33,0, - 80,1,51,1,0,0,0,0,0,0,31,0,90,2,33,0, - 80,2,90,0,78,6,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,243,184,0,0,0,149,0,92,0,80,0, + 71,0,114,0,92,0,80,0,71,1,114,1,91,2,33,0, + 80,1,51,1,0,0,0,0,0,0,31,0,91,2,33,0, + 80,2,91,0,78,6,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,51,2,0,0,0,0,0,0, - 31,0,90,1,78,8,0,0,0,0,0,0,0,0,0,0, + 31,0,91,1,78,8,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,33,0,51,0,0,0,0,0, 0,0,80,3,44,26,0,0,0,0,0,0,0,0,0,0, - 113,5,80,4,16,0,68,24,0,0,113,6,90,2,33,0, - 80,5,90,6,12,0,80,6,90,5,90,6,44,26,0,0, + 114,5,80,4,16,0,68,24,0,0,114,6,91,2,33,0, + 80,5,91,6,12,0,80,6,91,5,91,6,44,26,0,0, 0,0,0,0,0,0,0,0,12,0,49,4,51,1,0,0, 0,0,0,0,31,0,73,26,0,0,9,0,30,0,80,0, 35,0,41,7,78,218,18,70,114,111,122,101,110,32,72,101, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6b512b91091a90..3514961070d1fb 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -286,6 +286,13 @@ dummy_func( value2 = PyStackRef_DUP(GETLOCAL(oparg2)); } + inst(LOAD_FAST_BORROW_LOAD_FAST_BORROW, ( -- value1, value2)) { + uint32_t oparg1 = oparg >> 4; + uint32_t oparg2 = oparg & 15; + value1 = PyStackRef_DupDeferred(GETLOCAL(oparg1)); + value2 = PyStackRef_DupDeferred(GETLOCAL(oparg2)); + } + family(LOAD_CONST, 0) = { LOAD_CONST_MORTAL, LOAD_CONST_IMMORTAL, diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7c2a29af6ccd30..e0362e2227f02c 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -8816,6 +8816,27 @@ DISPATCH(); } + TARGET(LOAD_FAST_BORROW_LOAD_FAST_BORROW) { + #if Py_TAIL_CALL_INTERP + int opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW; + (void)(opcode); + #endif + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(LOAD_FAST_BORROW_LOAD_FAST_BORROW); + _PyStackRef value1; + _PyStackRef value2; + uint32_t oparg1 = oparg >> 4; + uint32_t oparg2 = oparg & 15; + value1 = PyStackRef_DupDeferred(GETLOCAL(oparg1)); + value2 = PyStackRef_DupDeferred(GETLOCAL(oparg2)); + stack_pointer[0] = value1; + stack_pointer[1] = value2; + stack_pointer += 2; + assert(WITHIN_STACK_BOUNDS()); + DISPATCH(); + } + TARGET(LOAD_FAST_CHECK) { #if Py_TAIL_CALL_INTERP int opcode = LOAD_FAST_CHECK; diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index f50a4cdec2342a..7acb37eb27c313 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -85,6 +85,7 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_FAST, &&TARGET_LOAD_FAST_AND_CLEAR, &&TARGET_LOAD_FAST_BORROW, + &&TARGET_LOAD_FAST_BORROW_LOAD_FAST_BORROW, &&TARGET_LOAD_FAST_CHECK, &&TARGET_LOAD_FAST_LOAD_FAST, &&TARGET_LOAD_FROM_DICT_OR_DEREF, @@ -148,7 +149,6 @@ static void *opcode_targets[256] = { &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_RESUME, &&TARGET_BINARY_OP_ADD_FLOAT, &&TARGET_BINARY_OP_ADD_INT, @@ -415,6 +415,7 @@ Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_DEREF(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_AND_CLEAR(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_BORROW(TAIL_CALL_PARAMS); +Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_BORROW_LOAD_FAST_BORROW(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_CHECK(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_LOAD_FAST(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FROM_DICT_OR_DEREF(TAIL_CALL_PARAMS); @@ -650,6 +651,7 @@ static py_tail_call_funcptr INSTRUCTION_TABLE[256] = { [LOAD_FAST] = _TAIL_CALL_LOAD_FAST, [LOAD_FAST_AND_CLEAR] = _TAIL_CALL_LOAD_FAST_AND_CLEAR, [LOAD_FAST_BORROW] = _TAIL_CALL_LOAD_FAST_BORROW, + [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = _TAIL_CALL_LOAD_FAST_BORROW_LOAD_FAST_BORROW, [LOAD_FAST_CHECK] = _TAIL_CALL_LOAD_FAST_CHECK, [LOAD_FAST_LOAD_FAST] = _TAIL_CALL_LOAD_FAST_LOAD_FAST, [LOAD_FROM_DICT_OR_DEREF] = _TAIL_CALL_LOAD_FROM_DICT_OR_DEREF, @@ -727,7 +729,6 @@ static py_tail_call_funcptr INSTRUCTION_TABLE[256] = { [UNPACK_SEQUENCE_TWO_TUPLE] = _TAIL_CALL_UNPACK_SEQUENCE_TWO_TUPLE, [WITH_EXCEPT_START] = _TAIL_CALL_WITH_EXCEPT_START, [YIELD_VALUE] = _TAIL_CALL_YIELD_VALUE, - [118] = _TAIL_CALL_UNKNOWN_OPCODE, [119] = _TAIL_CALL_UNKNOWN_OPCODE, [120] = _TAIL_CALL_UNKNOWN_OPCODE, [121] = _TAIL_CALL_UNKNOWN_OPCODE, diff --git a/Python/specialize.c b/Python/specialize.c index cffb90338adfff..b3d0496f1ade84 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -546,6 +546,13 @@ _PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, int enable_counters eligible++; } } + else if (opcode == LOAD_FAST_LOAD_FAST) { + total++; + if (!get_mutated(mutated, oparg >> 4) && !get_mutated(mutated, oparg & 15)) { + instructions[i].op.code = LOAD_FAST_BORROW_LOAD_FAST_BORROW; + eligible++; + } + } if (opcode != EXTENDED_ARG) { oparg = 0; } diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index bfee3c8aa301bb..42f062e8e02f6c 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -322,9 +322,10 @@ def generate_expansion_table(analysis: Analysis, out: CWriter) -> None: expansions: list[tuple[str, str, int]] = [] # [(name, size, offset), ...] if inst.is_super(): pieces = inst.name.split("_") - assert len(pieces) == 4, f"{inst.name} doesn't look like a super-instr" - name1 = "_".join(pieces[:2]) - name2 = "_".join(pieces[2:]) + assert len(pieces) % 2 == 0, f"{inst.name} doesn't look like a super-instr" + parts_per_piece = int(len(pieces) / 2) + name1 = "_".join(pieces[:parts_per_piece]) + name2 = "_".join(pieces[parts_per_piece:]) assert name1 in analysis.instructions, f"{name1} doesn't match any instr" assert name2 in analysis.instructions, f"{name2} doesn't match any instr" instr1 = analysis.instructions[name1] From 0a74052f8638617fa073d4cec1816157b17db51b Mon Sep 17 00:00:00 2001 From: Matt Page Date: Tue, 11 Feb 2025 16:52:23 -0800 Subject: [PATCH 08/73] Don't optimize during quickening --- Python/specialize.c | 70 +-------------------------------------------- 1 file changed, 1 insertion(+), 69 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index b3d0496f1ade84..c741c4f93f3138 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -447,25 +447,6 @@ do { \ # define SPECIALIZATION_FAIL(opcode, kind) ((void)0) #endif -#define NUM_VARS 256 - -static inline void -set_mutated(bool *mutated, int i) -{ - if (i < NUM_VARS) { - mutated[i] = true; - } -} - -static inline bool -get_mutated(bool *mutated, int i) -{ - if (i > NUM_VARS) { - return true; - } - return mutated[i]; -} - // Initialize warmup counters and optimize instructions. This cannot fail. void _PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, int enable_counters) @@ -482,32 +463,11 @@ _PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, int enable_counters } int opcode = 0; int oparg = 0; - bool mutated[NUM_VARS]; - for (int i = 0; i < NUM_VARS; i++) { - mutated[i] = false; - } /* The last code unit cannot have a cache, so we don't need to check it */ - for (Py_ssize_t i = 0; i < size; i++) { + for (Py_ssize_t i = 0; i < size-1; i++) { opcode = instructions[i].op.code; int caches = _PyOpcode_Caches[opcode]; oparg = (oparg << 8) | instructions[i].op.arg; - switch (opcode) { - case LOAD_FAST_AND_CLEAR: - case DELETE_FAST: - case MAKE_CELL: - case STORE_FAST: - set_mutated(mutated, oparg); - break; - case STORE_FAST_STORE_FAST: - set_mutated(mutated, oparg >> 4); - set_mutated(mutated, oparg & 15); - break; - case STORE_FAST_LOAD_FAST: - set_mutated(mutated, oparg >> 4); - break; - default: - break; - } if (caches) { // The initial value depends on the opcode switch (opcode) { @@ -530,34 +490,6 @@ _PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, int enable_counters oparg = 0; } } - - /* The last code unit cannot have a cache, so we don't need to check it */ - opcode = 0; - oparg = 0; - int eligible = 0; - int total = 0; - for (Py_ssize_t i = 0; i < size; i++) { - opcode = instructions[i].op.code; - oparg = (oparg << 8) | instructions[i].op.arg; - if (opcode == LOAD_FAST) { - total++; - if (!get_mutated(mutated, oparg)) { - instructions[i].op.code = LOAD_FAST_BORROW; - eligible++; - } - } - else if (opcode == LOAD_FAST_LOAD_FAST) { - total++; - if (!get_mutated(mutated, oparg >> 4) && !get_mutated(mutated, oparg & 15)) { - instructions[i].op.code = LOAD_FAST_BORROW_LOAD_FAST_BORROW; - eligible++; - } - } - if (opcode != EXTENDED_ARG) { - oparg = 0; - } - } - // fprintf(stderr, "== LF_SPEC %d %d\n", eligible, total); #endif /* ENABLE_SPECIALIZATION_FT */ } From afbfd886099d3d033c12376785889642bb9ea5d6 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Tue, 11 Feb 2025 15:02:34 -0800 Subject: [PATCH 09/73] Use abstract interpretation --- Python/flowgraph.c | 293 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 293 insertions(+) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index c5bdf105545459..c5a2df46ff3733 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2415,6 +2415,298 @@ insert_superinstructions(cfg_builder *g) return res; } +typedef struct { + // Index of instruction that produced the reference or -1. + int instr; + + // The local to which the reference refers or -1. + int local; +} ref; + +#define NOT_LOCAL -1 + +#define DUMMY_REF (ref){-1, NOT_LOCAL} + +typedef struct { + ref *refs; + Py_ssize_t size; + Py_ssize_t capacity; +} ref_stack; + +static bool +ref_stack_has_refs_from_instr(ref_stack *stack, int instr) +{ + for (Py_ssize_t i = 0; i < stack->size; i++) { + if (stack->refs[i].instr == instr) { + return true; + } + } + return false; +} + +static int +ref_stack_push(ref_stack *stack, ref r) +{ + if (stack->size == stack->capacity) { + Py_ssize_t new_cap = Py_MAX(32, stack->capacity * 2); + ref *refs = PyMem_Realloc(stack->refs, sizeof(*stack->refs) * new_cap); + if (refs == NULL) { + PyErr_NoMemory(); + return -1; + } + stack->refs = refs; + stack->capacity = new_cap; + } + stack->refs[stack->size] = r; + stack->size++; + return 0; +} + +static ref +ref_stack_pop(ref_stack *stack) +{ + assert(stack->size > 0); + stack->size--; + ref r = stack->refs[stack->size]; + return r; +} + +static void +ref_stack_swap_top(ref_stack *stack, Py_ssize_t off) +{ + Py_ssize_t idx = stack->size - off; + assert(idx >= 0 && idx < stack->size); + ref tmp = stack->refs[idx]; + stack->refs[idx] = stack->refs[stack->size - 1]; + stack->refs[stack->size - 1] = tmp; +} + +static ref +ref_stack_at(ref_stack *stack, Py_ssize_t idx) +{ + assert(idx >= 0 && idx < stack->size); + return stack->refs[idx]; +} + +static void +ref_stack_clear(ref_stack *stack) +{ + stack->size = 0; +} + +static void +ref_stack_fini(ref_stack *stack) +{ + if (stack->refs != NULL) { + PyMem_Free(stack->refs); + } + stack->refs = NULL; + stack->capacity = 0; + stack->size = 0; +} + +static void +kill_local(bool *has_killed_refs, Py_ssize_t size, ref_stack *refs, int local) +{ + for (Py_ssize_t i = 0; i < refs->size; i++) { + ref r = ref_stack_at(refs, i); + if (r.local == local) { + assert(r.instr >= 0); + has_killed_refs[r.instr] = true; + } + } +} + +static void +load_fast_push_block(basicblock ***sp, basicblock *target, int start_depth) +{ + assert(!target->b_visited || (target->b_startdepth == start_depth)); + if (!target->b_visited) { + assert(target->b_startdepth == -1); + target->b_startdepth = start_depth; + target->b_visited = 1; + *(*sp)++ = target; + } +} + +static int +optimize_load_fast(cfg_builder *g) +{ + int status; + ref_stack refs = {0}; + bool *has_killed_refs = NULL; + basicblock *entryblock = g->g_entryblock; + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + b->b_startdepth = -1; + } + basicblock **blocks = make_cfg_traversal_stack(entryblock); + if (blocks == NULL) { + status = ERROR; + goto done; + } + basicblock **sp = blocks; + *sp = entryblock; + sp++; + entryblock->b_startdepth = 0; + entryblock->b_visited = 1; + + while (sp != blocks) { + basicblock *block = *--sp; + assert(block->b_startdepth > -1); + + // Reset state that tracks which instructions produce references to + // locals that are on the stack while the local is overwritten. + int size = sizeof(*has_killed_refs) * block->b_iused; + bool *p = PyMem_Realloc(has_killed_refs, size); + if (p == NULL) { + PyErr_NoMemory(); + status = ERROR; + goto done; + } + else { + has_killed_refs = p; + } + memset(has_killed_refs, 0, size); + + // Reset the stack of refs. We don't track references on the stack + // across basic blocks, but the bytecode will expect their + // presence. Add dummy references as necessary. + ref_stack_clear(&refs); + for (int i = 0; i < block->b_startdepth; i++) { + ref_stack_push(&refs, DUMMY_REF); + } + + for (int i = 0; i < block->b_iused; i++) { + cfg_instr *instr = &block->b_instr[i]; + int opcode = instr->i_opcode; + int oparg = instr->i_oparg; + assert(opcode != EXTENDED_ARG); + switch (opcode) { + case COPY: { + Py_ssize_t idx = refs.size - oparg; + ref r = ref_stack_at(&refs, idx); + if (ref_stack_push(&refs, r) < 0) { + status = ERROR; + goto done; + } + break; + } + + case LOAD_FAST: { + if (ref_stack_push(&refs, (ref){i, oparg}) < 0) { + status = ERROR; + goto done; + } + break; + } + + case LOAD_FAST_LOAD_FAST: { + if (ref_stack_push(&refs, (ref){i, oparg >> 4}) < 0) { + status = ERROR; + goto done; + } + if (ref_stack_push(&refs, (ref){i, oparg & 15}) < 0) { + status = ERROR; + goto done; + } + break; + } + + case RETURN_VALUE: { + // We need to return a new reference so there is no point + // optimizing the instruction that produced the returned + // reference. + ref r = ref_stack_pop(&refs); + if (r.local != NOT_LOCAL) { + assert(r.instr >= 0); + has_killed_refs[r.instr] = true; + } + break; + } + + case STORE_FAST: { + kill_local(has_killed_refs, block->b_iused, &refs, oparg); + ref_stack_pop(&refs); + break; + } + + case STORE_FAST_STORE_FAST: { + kill_local(has_killed_refs, block->b_iused, &refs, oparg >> 4); + kill_local(has_killed_refs, block->b_iused, &refs, oparg & 15); + ref_stack_pop(&refs); + ref_stack_pop(&refs); + break; + } + + case SWAP: { + ref_stack_swap_top(&refs, oparg); + break; + } + + default: { + int num_popped = _PyOpcode_num_popped(opcode, oparg); + int num_pushed = _PyOpcode_num_pushed(opcode, oparg); + if (HAS_TARGET(instr->i_opcode)) { + load_fast_push_block(&sp, instr->i_target, refs.size - num_popped + num_pushed); + } + if (!IS_BLOCK_PUSH_OPCODE(instr->i_opcode)) { + // Block push opcodes only affect the stack when jumping + // to the target. + for (int j = 0; j < num_popped; j++) { + ref_stack_pop(&refs); + } + for (int j = 0; j < num_pushed; j++) { + if (ref_stack_push(&refs, (ref){i, NOT_LOCAL}) < 0) { + status = ERROR; + goto done; + } + } + } + break; + } + } + } + + // Optimize instructions + for (int i = 0; i < block->b_iused; i++) { + if (!has_killed_refs[i] && !ref_stack_has_refs_from_instr(&refs, i)) { + cfg_instr *instr = &block->b_instr[i]; + switch (instr->i_opcode) { + case LOAD_FAST: + instr->i_opcode = LOAD_FAST_BORROW; + break; + case LOAD_FAST_LOAD_FAST: + instr->i_opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW; + break; + default: + break; + } + } + } + + // Push fallthrough block + cfg_instr *term = basicblock_last_instr(block); + if (term != NULL && block->b_next != NULL && + !(IS_UNCONDITIONAL_JUMP_OPCODE(term->i_opcode) || + IS_SCOPE_EXIT_OPCODE(term->i_opcode))) { + assert(BB_HAS_FALLTHROUGH(block)); + load_fast_push_block(&sp, block->b_next, refs.size); + } + } + + status = SUCCESS; + +done: + ref_stack_fini(&refs); + if (has_killed_refs != NULL) { + PyMem_Free(has_killed_refs); + } + if (blocks != NULL) { + PyMem_Free(blocks); + } + return status; +} + // helper functions for add_checks_for_loads_of_unknown_variables static inline void maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp) @@ -3028,6 +3320,7 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache, add_checks_for_loads_of_uninitialized_variables( g->g_entryblock, nlocals, nparams)); RETURN_IF_ERROR(insert_superinstructions(g)); + RETURN_IF_ERROR(optimize_load_fast(g)); RETURN_IF_ERROR(push_cold_blocks_to_end(g)); RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno)); From 696c6306072133a0e1f5906e4a1fb79078a38541 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Tue, 11 Feb 2025 17:02:51 -0800 Subject: [PATCH 10/73] Fix test_generators Ref will be 2 if borrowed --- Lib/test/test_generators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_generators.py b/Lib/test/test_generators.py index bf4b88cd9c4450..8bce42f037478c 100644 --- a/Lib/test/test_generators.py +++ b/Lib/test/test_generators.py @@ -83,7 +83,7 @@ def gen(): g = gen() next(g) g.send(g) - self.assertGreater(sys.getrefcount(g), 2) + self.assertGreaterEqual(sys.getrefcount(g), 2) self.assertFalse(finalized) del g support.gc_collect() From 483ac7a93958395b9e10742fbbffc809199b109a Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 13 Feb 2025 13:54:15 -0800 Subject: [PATCH 11/73] Optimize returns --- Python/flowgraph.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index c5a2df46ff3733..fe8d7147a5535f 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2612,18 +2612,6 @@ optimize_load_fast(cfg_builder *g) break; } - case RETURN_VALUE: { - // We need to return a new reference so there is no point - // optimizing the instruction that produced the returned - // reference. - ref r = ref_stack_pop(&refs); - if (r.local != NOT_LOCAL) { - assert(r.instr >= 0); - has_killed_refs[r.instr] = true; - } - break; - } - case STORE_FAST: { kill_local(has_killed_refs, block->b_iused, &refs, oparg); ref_stack_pop(&refs); From 259d5db9670609c81b9bbfa2a6effeacf3c6de82 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 13 Feb 2025 13:55:39 -0800 Subject: [PATCH 12/73] Remove unused arg --- Python/flowgraph.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index fe8d7147a5535f..685380c5e9f1d1 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2506,7 +2506,7 @@ ref_stack_fini(ref_stack *stack) } static void -kill_local(bool *has_killed_refs, Py_ssize_t size, ref_stack *refs, int local) +kill_local(bool *has_killed_refs, ref_stack *refs, int local) { for (Py_ssize_t i = 0; i < refs->size; i++) { ref r = ref_stack_at(refs, i); @@ -2613,14 +2613,14 @@ optimize_load_fast(cfg_builder *g) } case STORE_FAST: { - kill_local(has_killed_refs, block->b_iused, &refs, oparg); + kill_local(has_killed_refs, &refs, oparg); ref_stack_pop(&refs); break; } case STORE_FAST_STORE_FAST: { - kill_local(has_killed_refs, block->b_iused, &refs, oparg >> 4); - kill_local(has_killed_refs, block->b_iused, &refs, oparg & 15); + kill_local(has_killed_refs, &refs, oparg >> 4); + kill_local(has_killed_refs, &refs, oparg & 15); ref_stack_pop(&refs); ref_stack_pop(&refs); break; From aeafa98b679410575b4ef4367d9f63326659daaf Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 14 Feb 2025 17:03:07 -0800 Subject: [PATCH 13/73] Make sure we convert borrowed refs on frame --- Include/internal/pycore_stackref.h | 10 ++++++++++ Python/frame.c | 7 +++++++ 2 files changed, 17 insertions(+) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index a5d265e17200ca..b1a47322114e7d 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -231,6 +231,16 @@ _PyStackRef_StealIfUnborrowed(_PyStackRef stackref) return stackref; } +static inline bool +_PyStackRef_IsBorrowed(_PyStackRef stackref) +{ + if (PyStackRef_IsNull(stackref) || !PyStackRef_IsDeferred(stackref)) { + return false; + } + PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref); + return !(_Py_IsImmortal(obj) || _PyObject_HasDeferredRefcount(obj)); +} + static inline _PyStackRef PyStackRef_FromPyObjectNew(PyObject *obj) { diff --git a/Python/frame.c b/Python/frame.c index 6e97e0d08f1b64..213f375f31dfe0 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -57,6 +57,13 @@ take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame) frame->f_executable = PyStackRef_DUP(frame->f_executable); int stacktop = (int)(frame->stackpointer - frame->localsplus); assert(stacktop >= _PyFrame_GetCode(frame)->co_nlocalsplus); + // XXX - Maybe more optimal sequence to do here + if (_PyStackRef_IsBorrowed(frame->f_executable)) { + Py_INCREF(PyStackRef_AsPyObjectBorrow(frame->f_executable)); + } + if (_PyStackRef_IsBorrowed(frame->f_funcobj)) { + Py_INCREF(PyStackRef_AsPyObjectBorrow(frame->f_funcobj)); + } for (int i = 0; i < stacktop; i++) { frame->localsplus[i] = _PyStackRef_StealIfUnborrowed(frame->localsplus[i]); } From 85f9a64ab7b9d71caf85dea79b387eb3883327ae Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 14 Feb 2025 17:03:35 -0800 Subject: [PATCH 14/73] Don't test with malformed bytecode --- Lib/test/test_peepholer.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 98f6b29dc7fc5e..4ce3a940ea712b 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -1330,15 +1330,6 @@ def test_fold_tuple_of_constants(self): ] self.cfg_optimization_test(before, after, consts=[], expected_consts=[(1, 2, 3)]) - # not enough consts - same = [ - ('LOAD_SMALL_INT', 1, 0), - ('LOAD_SMALL_INT', 2, 0), - ('BUILD_TUPLE', 3, 0), - ('RETURN_VALUE', None, 0) - ] - self.cfg_optimization_test(same, same, consts=[]) - # not all consts same = [ ('LOAD_SMALL_INT', 1, 0), From b6ab2f7b3419684e997a13340ec6dc0ae3244c29 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 14 Feb 2025 18:04:38 -0800 Subject: [PATCH 15/73] Make sure we convert borrowed refs to func/code when copying generator frame --- Include/internal/pycore_frame.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 9912281ebd5b47..9f281c2a1b118f 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -156,6 +156,13 @@ static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame * for (int i = 0; i < stacktop; i++) { dest->localsplus[i] = _PyStackRef_StealIfUnborrowed(src->localsplus[i]); } + // XXX - More efficient version of this? + if (_PyStackRef_IsBorrowed(dest->f_executable)) { + Py_INCREF(PyStackRef_AsPyObjectBorrow(dest->f_executable)); + } + if (_PyStackRef_IsBorrowed(dest->f_funcobj)) { + Py_INCREF(PyStackRef_AsPyObjectBorrow(dest->f_funcobj)); + } // Don't leave a dangling pointer to the old frame when creating generators // and coroutines: dest->previous = NULL; From fd1ad3d6f85ca015c9db1c06ee2247440e6a3d59 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 14 Feb 2025 18:06:20 -0800 Subject: [PATCH 16/73] Add support for disassembling LOAD_FAST_BORROW_LOAD_FAST_BORROW --- Lib/dis.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/dis.py b/Lib/dis.py index 109c986bbe3d7d..947b1b90fb49cc 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -48,6 +48,7 @@ LOAD_COMMON_CONSTANT = opmap['LOAD_COMMON_CONSTANT'] LOAD_SPECIAL = opmap['LOAD_SPECIAL'] LOAD_FAST_LOAD_FAST = opmap['LOAD_FAST_LOAD_FAST'] +LOAD_FAST_BORROW_LOAD_FAST_BORROW = opmap['LOAD_FAST_BORROW_LOAD_FAST_BORROW'] STORE_FAST_LOAD_FAST = opmap['STORE_FAST_LOAD_FAST'] STORE_FAST_STORE_FAST = opmap['STORE_FAST_STORE_FAST'] IS_OP = opmap['IS_OP'] @@ -598,7 +599,7 @@ def get_argval_argrepr(self, op, arg, offset): lbl = self.get_label_for_offset(argval) assert lbl is not None argrepr = f"to L{lbl}" - elif deop in (LOAD_FAST_LOAD_FAST, STORE_FAST_LOAD_FAST, STORE_FAST_STORE_FAST): + elif deop in (LOAD_FAST_LOAD_FAST, LOAD_FAST_BORROW_LOAD_FAST_BORROW, STORE_FAST_LOAD_FAST, STORE_FAST_STORE_FAST): arg1 = arg >> 4 arg2 = arg & 15 val1, argrepr1 = _get_name_info(arg1, self.varname_from_oparg) From eee2195fd2e25e97fca9c307c66ca37864e6f8d7 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 14 Feb 2025 18:07:09 -0800 Subject: [PATCH 17/73] Make sure exc_obj is always defined Otherwise, it ends up being loaded using `LOAD_FAST_CHECK`, which increfs and causes the refcount check to fail when it uses `LOAD_FAST_BORROW`. --- Lib/test/test_traceback.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 68c4fa117a90f5..44e7582cb5efad 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -3598,6 +3598,7 @@ def test_no_save_exc_type(self): self.assertIsNone(te.exc_type) def test_no_refs_to_exception_and_traceback_objects(self): + exc_obj = None try: 1/0 except Exception as e: From d75ec9ac79e29b7098b16181f307936e85f0c86b Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 19 Feb 2025 14:38:06 -0800 Subject: [PATCH 18/73] Make sure we store new stackrefs for frame executable/funcobj These need to be tagged appropriately, not just increfed, so that they are decrefed when the frame is destroyed. --- Include/internal/pycore_frame.h | 4 ++-- Python/frame.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 9f281c2a1b118f..1392f24161bea4 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -158,10 +158,10 @@ static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame * } // XXX - More efficient version of this? if (_PyStackRef_IsBorrowed(dest->f_executable)) { - Py_INCREF(PyStackRef_AsPyObjectBorrow(dest->f_executable)); + dest->f_executable = PyStackRef_FromPyObjectNew(PyStackRef_AsPyObjectBorrow(dest->f_executable)); } if (_PyStackRef_IsBorrowed(dest->f_funcobj)) { - Py_INCREF(PyStackRef_AsPyObjectBorrow(dest->f_funcobj)); + dest->f_funcobj = PyStackRef_FromPyObjectNew(PyStackRef_AsPyObjectBorrow(dest->f_funcobj)); } // Don't leave a dangling pointer to the old frame when creating generators // and coroutines: diff --git a/Python/frame.c b/Python/frame.c index 213f375f31dfe0..483e7cfe41e890 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -59,10 +59,10 @@ take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame) assert(stacktop >= _PyFrame_GetCode(frame)->co_nlocalsplus); // XXX - Maybe more optimal sequence to do here if (_PyStackRef_IsBorrowed(frame->f_executable)) { - Py_INCREF(PyStackRef_AsPyObjectBorrow(frame->f_executable)); + frame->f_executable = PyStackRef_FromPyObjectNew(PyStackRef_AsPyObjectBorrow(frame->f_executable)); } if (_PyStackRef_IsBorrowed(frame->f_funcobj)) { - Py_INCREF(PyStackRef_AsPyObjectBorrow(frame->f_funcobj)); + frame->f_funcobj = PyStackRef_FromPyObjectNew(PyStackRef_AsPyObjectBorrow(frame->f_funcobj)); } for (int i = 0; i < stacktop; i++) { frame->localsplus[i] = _PyStackRef_StealIfUnborrowed(frame->localsplus[i]); From 66f53513a38eebb7e7bdc9ca96ba76c1ff34ff78 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 19 Feb 2025 14:45:09 -0800 Subject: [PATCH 19/73] Remove refcount check This may be 1 if the `LOAD_FAST` is optimized to a `LOAD_FAST_BORROW`. It's not clear that this is testing anything useful, so remove it. --- Lib/test/test_ctypes/test_memfunctions.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/Lib/test/test_ctypes/test_memfunctions.py b/Lib/test/test_ctypes/test_memfunctions.py index 325487618137f6..e3cb5db775ef62 100644 --- a/Lib/test/test_ctypes/test_memfunctions.py +++ b/Lib/test/test_ctypes/test_memfunctions.py @@ -60,9 +60,6 @@ def test_cast(self): @support.refcount_test def test_string_at(self): s = string_at(b"foo bar") - # XXX The following may be wrong, depending on how Python - # manages string instances - self.assertEqual(2, sys.getrefcount(s)) self.assertTrue(s, "foo bar") self.assertEqual(string_at(b"foo bar", 7), b"foo bar") From 7ef6a0bfcdd52a3c37d21c28f1cd784312659b50 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 19 Feb 2025 14:49:25 -0800 Subject: [PATCH 20/73] Don't hardcode initial refcount in refcount tests The initial value will differ depending on whether a owned or borrowed reference is loaded onto the operand stack. --- Lib/test/test_ctypes/test_refcounts.py | 25 ++++++++++++------------- Lib/test/test_ctypes/test_stringptr.py | 4 ++-- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/Lib/test/test_ctypes/test_refcounts.py b/Lib/test/test_ctypes/test_refcounts.py index 012722d8486218..1fe4b3eca2c50e 100644 --- a/Lib/test/test_ctypes/test_refcounts.py +++ b/Lib/test/test_ctypes/test_refcounts.py @@ -24,36 +24,35 @@ def test_1(self): def callback(value): return value - self.assertEqual(sys.getrefcount(callback), 2) + orig_refcount = sys.getrefcount(callback) cb = MyCallback(callback) - self.assertGreater(sys.getrefcount(callback), 2) + self.assertGreater(sys.getrefcount(callback), orig_refcount) result = f(-10, cb) self.assertEqual(result, -18) cb = None gc.collect() - self.assertEqual(sys.getrefcount(callback), 2) + self.assertEqual(sys.getrefcount(callback), orig_refcount) @support.refcount_test def test_refcount(self): def func(*args): pass - # this is the standard refcount for func - self.assertEqual(sys.getrefcount(func), 2) + orig_refcount = sys.getrefcount(func) # the CFuncPtr instance holds at least one refcount on func: f = OtherCallback(func) - self.assertGreater(sys.getrefcount(func), 2) + self.assertGreater(sys.getrefcount(func), orig_refcount) # and may release it again del f - self.assertGreaterEqual(sys.getrefcount(func), 2) + self.assertGreaterEqual(sys.getrefcount(func), orig_refcount) # but now it must be gone gc.collect() - self.assertEqual(sys.getrefcount(func), 2) + self.assertEqual(sys.getrefcount(func), orig_refcount) class X(ctypes.Structure): _fields_ = [("a", OtherCallback)] @@ -61,27 +60,27 @@ class X(ctypes.Structure): x.a = OtherCallback(func) # the CFuncPtr instance holds at least one refcount on func: - self.assertGreater(sys.getrefcount(func), 2) + self.assertGreater(sys.getrefcount(func), orig_refcount) # and may release it again del x - self.assertGreaterEqual(sys.getrefcount(func), 2) + self.assertGreaterEqual(sys.getrefcount(func), orig_refcount) # and now it must be gone again gc.collect() - self.assertEqual(sys.getrefcount(func), 2) + self.assertEqual(sys.getrefcount(func), orig_refcount) f = OtherCallback(func) # the CFuncPtr instance holds at least one refcount on func: - self.assertGreater(sys.getrefcount(func), 2) + self.assertGreater(sys.getrefcount(func), orig_refcount) # create a cycle f.cycle = f del f gc.collect() - self.assertEqual(sys.getrefcount(func), 2) + self.assertEqual(sys.getrefcount(func), orig_refcount) class AnotherLeak(unittest.TestCase): diff --git a/Lib/test/test_ctypes/test_stringptr.py b/Lib/test/test_ctypes/test_stringptr.py index bb6045b250ffce..a6a2dec68df68e 100644 --- a/Lib/test/test_ctypes/test_stringptr.py +++ b/Lib/test/test_ctypes/test_stringptr.py @@ -20,9 +20,9 @@ class X(Structure): # NULL pointer access self.assertRaises(ValueError, getattr, x.str, "contents") b = create_string_buffer(b"Hello, World") - self.assertEqual(sys.getrefcount(b), 2) + orig_refcount = sys.getrefcount(b) x.str = b - self.assertEqual(sys.getrefcount(b), 3) + self.assertEqual(sys.getrefcount(b), orig_refcount + 1) # POINTER(c_char) and Python string is NOT compatible # POINTER(c_char) and create_string_buffer() is compatible From 2af2bbc1dc89b07fa80871c1740d37486778d55f Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 19 Feb 2025 15:58:50 -0800 Subject: [PATCH 21/73] Remove invalid bytecode from `test_peepholer` These don't push enough values on the stack. --- Lib/test/test_peepholer.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 4ce3a940ea712b..0c04d4b152e3c7 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -1370,16 +1370,6 @@ def test_optimize_if_const_list(self): ] self.cfg_optimization_test(same, same, consts=[]) - # not enough consts - same = [ - ('LOAD_SMALL_INT', 1, 0), - ('LOAD_SMALL_INT', 2, 0), - ('LOAD_SMALL_INT', 3, 0), - ('BUILD_LIST', 4, 0), - ('RETURN_VALUE', None, 0), - ] - self.cfg_optimization_test(same, same, consts=[]) - # not all consts same = [ ('LOAD_SMALL_INT', 1, 0), @@ -1420,16 +1410,6 @@ def test_optimize_if_const_set(self): ] self.cfg_optimization_test(same, same, consts=[]) - # not enough consts - same = [ - ('LOAD_SMALL_INT', 1, 0), - ('LOAD_SMALL_INT', 2, 0), - ('LOAD_SMALL_INT', 3, 0), - ('BUILD_SET', 4, 0), - ('RETURN_VALUE', None, 0), - ] - self.cfg_optimization_test(same, same, consts=[]) - # not all consts same = [ ('LOAD_SMALL_INT', 1, 0), From bf19b7d4d4d1230ae38d83dab0c0b11b6099ac2a Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 19 Feb 2025 15:59:24 -0800 Subject: [PATCH 22/73] Fix invalid bytecode in `test_peepholer.DirectCfgOptimizerTests.test_unconditional_jump_threading` Make sure we have a statically known stack depth --- Lib/test/test_peepholer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 0c04d4b152e3c7..81ae15167509a8 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -2264,13 +2264,13 @@ def get_insts(lno1, lno2, op1, op2): return [ lbl2 := self.Label(), ('LOAD_NAME', 0, 10), + ('POP_TOP', None, 10), (op1, lbl1 := self.Label(), lno1), ('LOAD_NAME', 1, 20), lbl1, (op2, lbl2, lno2), ] - for op1 in ('JUMP', 'JUMP_NO_INTERRUPT'): for op2 in ('JUMP', 'JUMP_NO_INTERRUPT'): # different lines @@ -2280,6 +2280,7 @@ def get_insts(lno1, lno2, op1, op2): op = 'JUMP' if 'JUMP' in (op1, op2) else 'JUMP_NO_INTERRUPT' expected_insts = [ ('LOAD_NAME', 0, 10), + ('POP_TOP', None, 10), ('NOP', None, 4), (op, 0, 5), ] @@ -2296,6 +2297,7 @@ def get_insts(lno1, lno2, op1, op2): op = 'JUMP' if 'JUMP' in (op1, op2) else 'JUMP_NO_INTERRUPT' expected_insts = [ ('LOAD_NAME', 0, 10), + ('POP_TOP', None, 10), (op, 0, lno), ] self.cfg_optimization_test(insts, expected_insts, consts=list(range(5))) From a9bca037940922fd7c4f5f27c2a86df5efe6a945 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 19 Feb 2025 16:06:05 -0800 Subject: [PATCH 23/73] Fix tests that checked for `LOAD_FAST` instructions that are now optimized to borrowed variants --- Lib/test/test_peepholer.py | 46 +++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 81ae15167509a8..7fb0e16af32106 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -906,7 +906,7 @@ def test_load_fast_known_simple(self): def f(): x = 1 y = x + x - self.assertInBytecode(f, 'LOAD_FAST_LOAD_FAST') + self.assertInBytecode(f, 'LOAD_FAST_BORROW_LOAD_FAST_BORROW') def test_load_fast_unknown_simple(self): def f(): @@ -927,27 +927,27 @@ def f(): def test_load_fast_known_because_parameter(self): def f1(x): print(x) - self.assertInBytecode(f1, 'LOAD_FAST') + self.assertInBytecode(f1, 'LOAD_FAST_BORROW') self.assertNotInBytecode(f1, 'LOAD_FAST_CHECK') def f2(*, x): print(x) - self.assertInBytecode(f2, 'LOAD_FAST') + self.assertInBytecode(f2, 'LOAD_FAST_BORROW') self.assertNotInBytecode(f2, 'LOAD_FAST_CHECK') def f3(*args): print(args) - self.assertInBytecode(f3, 'LOAD_FAST') + self.assertInBytecode(f3, 'LOAD_FAST_BORROW') self.assertNotInBytecode(f3, 'LOAD_FAST_CHECK') def f4(**kwargs): print(kwargs) - self.assertInBytecode(f4, 'LOAD_FAST') + self.assertInBytecode(f4, 'LOAD_FAST_BORROW') self.assertNotInBytecode(f4, 'LOAD_FAST_CHECK') def f5(x=0): print(x) - self.assertInBytecode(f5, 'LOAD_FAST') + self.assertInBytecode(f5, 'LOAD_FAST_BORROW') self.assertNotInBytecode(f5, 'LOAD_FAST_CHECK') def test_load_fast_known_because_already_loaded(self): @@ -957,7 +957,7 @@ def f(): print(x) print(x) self.assertInBytecode(f, 'LOAD_FAST_CHECK') - self.assertInBytecode(f, 'LOAD_FAST') + self.assertInBytecode(f, 'LOAD_FAST_BORROW') def test_load_fast_known_multiple_branches(self): def f(): @@ -966,7 +966,7 @@ def f(): else: x = 2 print(x) - self.assertInBytecode(f, 'LOAD_FAST') + self.assertInBytecode(f, 'LOAD_FAST_BORROW') self.assertNotInBytecode(f, 'LOAD_FAST_CHECK') def test_load_fast_unknown_after_error(self): @@ -1010,12 +1010,12 @@ def f(): print(a00, a01, a62, a63) print(a64, a65, a78, a79) - self.assertInBytecode(f, 'LOAD_FAST_LOAD_FAST', ("a00", "a01")) + self.assertInBytecode(f, 'LOAD_FAST_BORROW_LOAD_FAST_BORROW', ("a00", "a01")) self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', "a00") self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', "a01") for i in 62, 63: # First 64 locals: analyze completely - self.assertInBytecode(f, 'LOAD_FAST', f"a{i:02}") + self.assertInBytecode(f, 'LOAD_FAST_BORROW', f"a{i:02}") self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}") for i in 64, 65, 78, 79: # Locals >=64 not in the same basicblock @@ -1023,14 +1023,14 @@ def f(): self.assertNotInBytecode(f, 'LOAD_FAST', f"a{i:02}") for i in 70, 71: # Locals >=64 in the same basicblock - self.assertInBytecode(f, 'LOAD_FAST', f"a{i:02}") + self.assertInBytecode(f, 'LOAD_FAST_BORROW', f"a{i:02}") self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}") # del statements should invalidate within basicblocks. self.assertInBytecode(f, 'LOAD_FAST_CHECK', "a72") self.assertNotInBytecode(f, 'LOAD_FAST', "a72") # previous checked loads within a basicblock enable unchecked loads self.assertInBytecode(f, 'LOAD_FAST_CHECK', "a73") - self.assertInBytecode(f, 'LOAD_FAST', "a73") + self.assertInBytecode(f, 'LOAD_FAST_BORROW', "a73") def test_setting_lineno_no_undefined(self): code = textwrap.dedent("""\ @@ -1048,7 +1048,7 @@ def f(): ns = {} exec(code, ns) f = ns['f'] - self.assertInBytecode(f, "LOAD_FAST") + self.assertInBytecode(f, "LOAD_FAST_BORROW") self.assertNotInBytecode(f, "LOAD_FAST_CHECK") co_code = f.__code__.co_code def trace(frame, event, arg): @@ -1060,7 +1060,7 @@ def trace(frame, event, arg): sys.settrace(trace) result = f() self.assertIsNone(result) - self.assertInBytecode(f, "LOAD_FAST") + self.assertInBytecode(f, "LOAD_FAST_BORROW") self.assertNotInBytecode(f, "LOAD_FAST_CHECK") self.assertEqual(f.__code__.co_code, co_code) @@ -1080,7 +1080,7 @@ def f(): ns = {} exec(code, ns) f = ns['f'] - self.assertInBytecode(f, "LOAD_FAST") + self.assertInBytecode(f, "LOAD_FAST_BORROW") self.assertNotInBytecode(f, "LOAD_FAST_CHECK") co_code = f.__code__.co_code def trace(frame, event, arg): @@ -1094,7 +1094,7 @@ def trace(frame, event, arg): sys.settrace(trace) result = f() self.assertEqual(result, 4) - self.assertInBytecode(f, "LOAD_FAST") + self.assertInBytecode(f, "LOAD_FAST_BORROW") self.assertNotInBytecode(f, "LOAD_FAST_CHECK") self.assertEqual(f.__code__.co_code, co_code) @@ -1114,7 +1114,7 @@ def f(): ns = {} exec(code, ns) f = ns['f'] - self.assertInBytecode(f, "LOAD_FAST") + self.assertInBytecode(f, "LOAD_FAST_BORROW") self.assertNotInBytecode(f, "LOAD_FAST_CHECK") co_code = f.__code__.co_code def trace(frame, event, arg): @@ -1128,7 +1128,7 @@ def trace(frame, event, arg): sys.settrace(trace) result = f() self.assertEqual(result, 4) - self.assertInBytecode(f, "LOAD_FAST") + self.assertInBytecode(f, "LOAD_FAST_BORROW") self.assertNotInBytecode(f, "LOAD_FAST_CHECK") self.assertEqual(f.__code__.co_code, co_code) @@ -1146,7 +1146,7 @@ def f(): ns = {} exec(code, ns) f = ns['f'] - self.assertInBytecode(f, "LOAD_FAST") + self.assertInBytecode(f, "LOAD_FAST_BORROW") self.assertNotInBytecode(f, "LOAD_FAST_CHECK") return f @@ -1160,7 +1160,7 @@ def trace(frame, event, arg): return trace sys.settrace(trace) f() - self.assertInBytecode(f, "LOAD_FAST") + self.assertInBytecode(f, "LOAD_FAST_BORROW") self.assertNotInBytecode(f, "LOAD_FAST_CHECK") def test_initializing_local_does_not_add_check(self): @@ -1173,7 +1173,7 @@ def trace(frame, event, arg): return trace sys.settrace(trace) f() - self.assertInBytecode(f, "LOAD_FAST") + self.assertInBytecode(f, "LOAD_FAST_BORROW") self.assertNotInBytecode(f, "LOAD_FAST_CHECK") @@ -2325,9 +2325,9 @@ def test_list_to_tuple_get_iter(self): ] expected_insts = [ ("BUILD_LIST", 0, 1), - ("LOAD_FAST", 0, 2), + ("LOAD_FAST_BORROW", 0, 2), ("LIST_EXTEND", 1, 3), - ("LOAD_FAST", 1, 4), + ("LOAD_FAST_BORROW", 1, 4), ("LIST_EXTEND", 1, 5), ("NOP", None, 6), # ("CALL_INTRINSIC_1", INTRINSIC_LIST_TO_TUPLE, 6), ("GET_ITER", None, 7), From 293c3178e82ed6f1d7911aa064458e78c90a0c20 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 19 Feb 2025 17:00:14 -0800 Subject: [PATCH 24/73] Update disassembly in test_dis to match new bytecode --- Lib/test/test_dis.py | 189 ++++++++++++++++++++++--------------------- 1 file changed, 95 insertions(+), 94 deletions(-) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 27350120d667c2..3c5201a011c29f 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -51,10 +51,10 @@ def cm(cls, x): dis_c_instance_method = """\ %3d RESUME 0 -%3d LOAD_FAST 1 (x) +%3d LOAD_FAST_BORROW 1 (x) LOAD_SMALL_INT 1 COMPARE_OP 72 (==) - LOAD_FAST 0 (self) + LOAD_FAST_BORROW 0 (self) STORE_ATTR 0 (x) LOAD_CONST 0 (None) RETURN_VALUE @@ -62,10 +62,10 @@ def cm(cls, x): dis_c_instance_method_bytes = """\ RESUME 0 - LOAD_FAST 1 + LOAD_FAST_BORROW 1 LOAD_SMALL_INT 1 COMPARE_OP 72 (==) - LOAD_FAST 0 + LOAD_FAST_BORROW 0 STORE_ATTR 0 LOAD_CONST 0 RETURN_VALUE @@ -74,10 +74,10 @@ def cm(cls, x): dis_c_class_method = """\ %3d RESUME 0 -%3d LOAD_FAST 1 (x) +%3d LOAD_FAST_BORROW 1 (x) LOAD_SMALL_INT 1 COMPARE_OP 72 (==) - LOAD_FAST 0 (cls) + LOAD_FAST_BORROW 0 (cls) STORE_ATTR 0 (x) LOAD_CONST 0 (None) RETURN_VALUE @@ -86,7 +86,7 @@ def cm(cls, x): dis_c_static_method = """\ %3d RESUME 0 -%3d LOAD_FAST 0 (x) +%3d LOAD_FAST_BORROW 0 (x) LOAD_SMALL_INT 1 COMPARE_OP 72 (==) STORE_FAST 0 (x) @@ -114,7 +114,7 @@ def _f(a): %3d RESUME 0 %3d LOAD_GLOBAL 1 (print + NULL) - LOAD_FAST 0 (a) + LOAD_FAST_BORROW 0 (a) CALL 1 POP_TOP @@ -128,7 +128,7 @@ def _f(a): %3d 0 RESUME 0 %3d 2 LOAD_GLOBAL 1 (print + NULL) - 12 LOAD_FAST 0 (a) + 12 LOAD_FAST_BORROW 0 (a) 14 CALL 1 22 POP_TOP @@ -142,7 +142,7 @@ def _f(a): %-14s RESUME 0 %-14s LOAD_GLOBAL 1 (print + NULL) -%-14s LOAD_FAST 0 (a) +%-14s LOAD_FAST_BORROW 0 (a) %-14s CALL 1 %-14s POP_TOP @@ -153,7 +153,7 @@ def _f(a): dis_f_co_code = """\ RESUME 0 LOAD_GLOBAL 1 - LOAD_FAST 0 + LOAD_FAST_BORROW 0 CALL 1 POP_TOP LOAD_SMALL_INT 1 @@ -203,7 +203,7 @@ def bug1333982(x=[]): %3d LOAD_COMMON_CONSTANT 0 (AssertionError) LOAD_CONST 0 ( at 0x..., file "%s", line %d>) MAKE_FUNCTION - LOAD_FAST 0 (x) + LOAD_FAST_BORROW 0 (x) GET_ITER CALL 0 @@ -462,7 +462,7 @@ def foo(a: int, b: str) -> str: NOT_TAKEN STORE_FAST 0 (e) -%4d L4: LOAD_FAST 0 (e) +%4d L4: LOAD_FAST_BORROW 0 (e) LOAD_ATTR 2 (__traceback__) STORE_FAST 1 (tb) L5: POP_EXCEPT @@ -470,7 +470,7 @@ def foo(a: int, b: str) -> str: STORE_FAST 0 (e) DELETE_FAST 0 (e) -%4d LOAD_FAST 1 (tb) +%4d LOAD_FAST_BORROW 1 (tb) RETURN_VALUE -- L6: LOAD_CONST 0 (None) @@ -503,18 +503,18 @@ def _fstring(a, b, c, d): dis_fstring = """\ %3d RESUME 0 -%3d LOAD_FAST 0 (a) +%3d LOAD_FAST_BORROW 0 (a) FORMAT_SIMPLE LOAD_CONST 0 (' ') - LOAD_FAST 1 (b) + LOAD_FAST_BORROW 1 (b) LOAD_CONST 1 ('4') FORMAT_WITH_SPEC LOAD_CONST 0 (' ') - LOAD_FAST 2 (c) + LOAD_FAST_BORROW 2 (c) CONVERT_VALUE 2 (repr) FORMAT_SIMPLE LOAD_CONST 0 (' ') - LOAD_FAST 3 (d) + LOAD_FAST_BORROW 3 (d) CONVERT_VALUE 2 (repr) LOAD_CONST 1 ('4') FORMAT_WITH_SPEC @@ -530,7 +530,7 @@ def _with(c): dis_with = """\ %4d RESUME 0 -%4d LOAD_FAST 0 (c) +%4d LOAD_FAST_BORROW 0 (c) COPY 1 LOAD_SPECIAL 1 (__exit__) SWAP 2 @@ -595,7 +595,7 @@ async def _asyncwith(c): POP_TOP L1: RESUME 0 -%4d LOAD_FAST 0 (c) +%4d LOAD_FAST_BORROW 0 (c) COPY 1 LOAD_SPECIAL 3 (__aexit__) SWAP 2 @@ -707,9 +707,9 @@ def _tryfinallyconst(b): %4d NOP -%4d L1: LOAD_FAST 0 (a) +%4d L1: LOAD_FAST_BORROW 0 (a) -%4d L2: LOAD_FAST 1 (b) +%4d L2: LOAD_FAST_BORROW 1 (b) PUSH_NULL CALL 0 POP_TOP @@ -717,7 +717,7 @@ def _tryfinallyconst(b): -- L3: PUSH_EXC_INFO -%4d LOAD_FAST 1 (b) +%4d LOAD_FAST_BORROW 1 (b) PUSH_NULL CALL 0 POP_TOP @@ -743,7 +743,7 @@ def _tryfinallyconst(b): %4d NOP -%4d LOAD_FAST 0 (b) +%4d LOAD_FAST_BORROW 0 (b) PUSH_NULL CALL 0 POP_TOP @@ -752,7 +752,7 @@ def _tryfinallyconst(b): -- L1: PUSH_EXC_INFO -%4d LOAD_FAST 0 (b) +%4d LOAD_FAST_BORROW 0 (b) PUSH_NULL CALL 0 POP_TOP @@ -798,7 +798,7 @@ def foo(x): SET_FUNCTION_ATTRIBUTE 8 (closure) STORE_FAST 1 (foo) -%4d LOAD_FAST 1 (foo) +%4d LOAD_FAST_BORROW 1 (foo) RETURN_VALUE """ % (_h.__code__.co_firstlineno, _h.__code__.co_firstlineno + 1, @@ -841,12 +841,12 @@ def foo(x): %4d RETURN_GENERATOR POP_TOP L1: RESUME 0 - LOAD_FAST 0 (.0) + LOAD_FAST_BORROW 0 (.0) GET_ITER L2: FOR_ITER 14 (to L3) STORE_FAST 1 (z) LOAD_DEREF 2 (x) - LOAD_FAST 1 (z) + LOAD_FAST_BORROW 1 (z) BINARY_OP 0 (+) YIELD_VALUE 0 RESUME 5 @@ -901,7 +901,7 @@ def loop_test(): STORE_FAST 0 (i) %3d LOAD_GLOBAL_MODULE 1 (load_test + NULL) - LOAD_FAST 0 (i) + LOAD_FAST_BORROW 0 (i) CALL_PY_GENERAL 1 POP_TOP JUMP_BACKWARD_{: <6} 16 (to L1) @@ -996,7 +996,8 @@ def test_boundaries(self): def test_widths(self): long_opcodes = set(['JUMP_BACKWARD_NO_INTERRUPT', - 'INSTRUMENTED_CALL_FUNCTION_EX']) + 'INSTRUMENTED_CALL_FUNCTION_EX', + 'LOAD_FAST_BORROW_LOAD_FAST_BORROW']) for op, opname in enumerate(dis.opname): if opname in long_opcodes or opname.startswith("INSTRUMENTED"): continue @@ -1706,8 +1707,8 @@ def _prepare_test_cases(): Instruction = dis.Instruction expected_opinfo_outer = [ - Instruction(opname='MAKE_CELL', opcode=93, arg=0, argval='a', argrepr='a', offset=0, start_offset=0, starts_line=True, line_number=None, label=None, positions=None, cache_info=None), - Instruction(opname='MAKE_CELL', opcode=93, arg=1, argval='b', argrepr='b', offset=2, start_offset=2, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), + Instruction(opname='MAKE_CELL', opcode=95, arg=0, argval='a', argrepr='a', offset=0, start_offset=0, starts_line=True, line_number=None, label=None, positions=None, cache_info=None), + Instruction(opname='MAKE_CELL', opcode=95, arg=1, argval='b', argrepr='b', offset=2, start_offset=2, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), Instruction(opname='RESUME', opcode=149, arg=0, argval=0, argrepr='', offset=4, start_offset=4, starts_line=True, line_number=1, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_CONST', opcode=80, arg=3, argval=(3, 4), argrepr='(3, 4)', offset=6, start_offset=6, starts_line=True, line_number=2, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_FAST', opcode=82, arg=0, argval='a', argrepr='a', offset=8, start_offset=8, starts_line=False, line_number=2, label=None, positions=None, cache_info=None), @@ -1715,27 +1716,27 @@ def _prepare_test_cases(): Instruction(opname='BUILD_TUPLE', opcode=50, arg=2, argval=2, argrepr='', offset=12, start_offset=12, starts_line=False, line_number=2, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_CONST', opcode=80, arg=0, argval=code_object_f, argrepr=repr(code_object_f), offset=14, start_offset=14, starts_line=False, line_number=2, label=None, positions=None, cache_info=None), Instruction(opname='MAKE_FUNCTION', opcode=23, arg=None, argval=None, argrepr='', offset=16, start_offset=16, starts_line=False, line_number=2, label=None, positions=None, cache_info=None), - Instruction(opname='SET_FUNCTION_ATTRIBUTE', opcode=104, arg=8, argval=8, argrepr='closure', offset=18, start_offset=18, starts_line=False, line_number=2, label=None, positions=None, cache_info=None), - Instruction(opname='SET_FUNCTION_ATTRIBUTE', opcode=104, arg=1, argval=1, argrepr='defaults', offset=20, start_offset=20, starts_line=False, line_number=2, label=None, positions=None, cache_info=None), - Instruction(opname='STORE_FAST', opcode=108, arg=2, argval='f', argrepr='f', offset=22, start_offset=22, starts_line=False, line_number=2, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_GLOBAL', opcode=88, arg=1, argval='print', argrepr='print + NULL', offset=24, start_offset=24, starts_line=True, line_number=7, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='SET_FUNCTION_ATTRIBUTE', opcode=106, arg=8, argval=8, argrepr='closure', offset=18, start_offset=18, starts_line=False, line_number=2, label=None, positions=None, cache_info=None), + Instruction(opname='SET_FUNCTION_ATTRIBUTE', opcode=106, arg=1, argval=1, argrepr='defaults', offset=20, start_offset=20, starts_line=False, line_number=2, label=None, positions=None, cache_info=None), + Instruction(opname='STORE_FAST', opcode=110, arg=2, argval='f', argrepr='f', offset=22, start_offset=22, starts_line=False, line_number=2, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_GLOBAL', opcode=90, arg=1, argval='print', argrepr='print + NULL', offset=24, start_offset=24, starts_line=True, line_number=7, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), Instruction(opname='LOAD_DEREF', opcode=81, arg=0, argval='a', argrepr='a', offset=34, start_offset=34, starts_line=False, line_number=7, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_DEREF', opcode=81, arg=1, argval='b', argrepr='b', offset=36, start_offset=36, starts_line=False, line_number=7, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_CONST', opcode=80, arg=1, argval='', argrepr="''", offset=38, start_offset=38, starts_line=False, line_number=7, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_SMALL_INT', opcode=90, arg=1, argval=1, argrepr='', offset=40, start_offset=40, starts_line=False, line_number=7, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_SMALL_INT', opcode=92, arg=1, argval=1, argrepr='', offset=40, start_offset=40, starts_line=False, line_number=7, label=None, positions=None, cache_info=None), Instruction(opname='BUILD_LIST', opcode=45, arg=0, argval=0, argrepr='', offset=42, start_offset=42, starts_line=False, line_number=7, label=None, positions=None, cache_info=None), Instruction(opname='BUILD_MAP', opcode=46, arg=0, argval=0, argrepr='', offset=44, start_offset=44, starts_line=False, line_number=7, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_CONST', opcode=80, arg=2, argval='Hello world!', argrepr="'Hello world!'", offset=46, start_offset=46, starts_line=False, line_number=7, label=None, positions=None, cache_info=None), Instruction(opname='CALL', opcode=51, arg=7, argval=7, argrepr='', offset=48, start_offset=48, starts_line=False, line_number=7, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), Instruction(opname='POP_TOP', opcode=31, arg=None, argval=None, argrepr='', offset=56, start_offset=56, starts_line=False, line_number=7, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_FAST', opcode=82, arg=2, argval='f', argrepr='f', offset=58, start_offset=58, starts_line=True, line_number=8, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST_BORROW', opcode=84, arg=2, argval='f', argrepr='f', offset=58, start_offset=58, starts_line=True, line_number=8, label=None, positions=None, cache_info=None), Instruction(opname='RETURN_VALUE', opcode=35, arg=None, argval=None, argrepr='', offset=60, start_offset=60, starts_line=False, line_number=8, label=None, positions=None, cache_info=None), ] expected_opinfo_f = [ Instruction(opname='COPY_FREE_VARS', opcode=59, arg=2, argval=2, argrepr='', offset=0, start_offset=0, starts_line=True, line_number=None, label=None, positions=None, cache_info=None), - Instruction(opname='MAKE_CELL', opcode=93, arg=0, argval='c', argrepr='c', offset=2, start_offset=2, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), - Instruction(opname='MAKE_CELL', opcode=93, arg=1, argval='d', argrepr='d', offset=4, start_offset=4, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), + Instruction(opname='MAKE_CELL', opcode=95, arg=0, argval='c', argrepr='c', offset=2, start_offset=2, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), + Instruction(opname='MAKE_CELL', opcode=95, arg=1, argval='d', argrepr='d', offset=4, start_offset=4, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), Instruction(opname='RESUME', opcode=149, arg=0, argval=0, argrepr='', offset=6, start_offset=6, starts_line=True, line_number=2, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_CONST', opcode=80, arg=1, argval=(5, 6), argrepr='(5, 6)', offset=8, start_offset=8, starts_line=True, line_number=3, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_FAST', opcode=82, arg=3, argval='a', argrepr='a', offset=10, start_offset=10, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), @@ -1745,29 +1746,29 @@ def _prepare_test_cases(): Instruction(opname='BUILD_TUPLE', opcode=50, arg=4, argval=4, argrepr='', offset=18, start_offset=18, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_CONST', opcode=80, arg=0, argval=code_object_inner, argrepr=repr(code_object_inner), offset=20, start_offset=20, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), Instruction(opname='MAKE_FUNCTION', opcode=23, arg=None, argval=None, argrepr='', offset=22, start_offset=22, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), - Instruction(opname='SET_FUNCTION_ATTRIBUTE', opcode=104, arg=8, argval=8, argrepr='closure', offset=24, start_offset=24, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), - Instruction(opname='SET_FUNCTION_ATTRIBUTE', opcode=104, arg=1, argval=1, argrepr='defaults', offset=26, start_offset=26, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), - Instruction(opname='STORE_FAST', opcode=108, arg=2, argval='inner', argrepr='inner', offset=28, start_offset=28, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_GLOBAL', opcode=88, arg=1, argval='print', argrepr='print + NULL', offset=30, start_offset=30, starts_line=True, line_number=5, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='SET_FUNCTION_ATTRIBUTE', opcode=106, arg=8, argval=8, argrepr='closure', offset=24, start_offset=24, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), + Instruction(opname='SET_FUNCTION_ATTRIBUTE', opcode=106, arg=1, argval=1, argrepr='defaults', offset=26, start_offset=26, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), + Instruction(opname='STORE_FAST', opcode=110, arg=2, argval='inner', argrepr='inner', offset=28, start_offset=28, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_GLOBAL', opcode=90, arg=1, argval='print', argrepr='print + NULL', offset=30, start_offset=30, starts_line=True, line_number=5, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), Instruction(opname='LOAD_DEREF', opcode=81, arg=3, argval='a', argrepr='a', offset=40, start_offset=40, starts_line=False, line_number=5, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_DEREF', opcode=81, arg=4, argval='b', argrepr='b', offset=42, start_offset=42, starts_line=False, line_number=5, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_DEREF', opcode=81, arg=0, argval='c', argrepr='c', offset=44, start_offset=44, starts_line=False, line_number=5, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_DEREF', opcode=81, arg=1, argval='d', argrepr='d', offset=46, start_offset=46, starts_line=False, line_number=5, label=None, positions=None, cache_info=None), Instruction(opname='CALL', opcode=51, arg=4, argval=4, argrepr='', offset=48, start_offset=48, starts_line=False, line_number=5, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), Instruction(opname='POP_TOP', opcode=31, arg=None, argval=None, argrepr='', offset=56, start_offset=56, starts_line=False, line_number=5, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_FAST', opcode=82, arg=2, argval='inner', argrepr='inner', offset=58, start_offset=58, starts_line=True, line_number=6, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST_BORROW', opcode=84, arg=2, argval='inner', argrepr='inner', offset=58, start_offset=58, starts_line=True, line_number=6, label=None, positions=None, cache_info=None), Instruction(opname='RETURN_VALUE', opcode=35, arg=None, argval=None, argrepr='', offset=60, start_offset=60, starts_line=False, line_number=6, label=None, positions=None, cache_info=None), ] expected_opinfo_inner = [ Instruction(opname='COPY_FREE_VARS', opcode=59, arg=4, argval=4, argrepr='', offset=0, start_offset=0, starts_line=True, line_number=None, label=None, positions=None, cache_info=None), Instruction(opname='RESUME', opcode=149, arg=0, argval=0, argrepr='', offset=2, start_offset=2, starts_line=True, line_number=3, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_GLOBAL', opcode=88, arg=1, argval='print', argrepr='print + NULL', offset=4, start_offset=4, starts_line=True, line_number=4, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_GLOBAL', opcode=90, arg=1, argval='print', argrepr='print + NULL', offset=4, start_offset=4, starts_line=True, line_number=4, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), Instruction(opname='LOAD_DEREF', opcode=81, arg=2, argval='a', argrepr='a', offset=14, start_offset=14, starts_line=False, line_number=4, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_DEREF', opcode=81, arg=3, argval='b', argrepr='b', offset=16, start_offset=16, starts_line=False, line_number=4, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_DEREF', opcode=81, arg=4, argval='c', argrepr='c', offset=18, start_offset=18, starts_line=False, line_number=4, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_DEREF', opcode=81, arg=5, argval='d', argrepr='d', offset=20, start_offset=20, starts_line=False, line_number=4, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_FAST_LOAD_FAST', opcode=85, arg=1, argval=('e', 'f'), argrepr='e, f', offset=22, start_offset=22, starts_line=False, line_number=4, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST_BORROW_LOAD_FAST_BORROW', opcode=85, arg=1, argval=('e', 'f'), argrepr='e, f', offset=22, start_offset=22, starts_line=False, line_number=4, label=None, positions=None, cache_info=None), Instruction(opname='CALL', opcode=51, arg=6, argval=6, argrepr='', offset=24, start_offset=24, starts_line=False, line_number=4, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), Instruction(opname='POP_TOP', opcode=31, arg=None, argval=None, argrepr='', offset=32, start_offset=32, starts_line=False, line_number=4, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_CONST', opcode=80, arg=0, argval=None, argrepr='None', offset=34, start_offset=34, starts_line=False, line_number=4, label=None, positions=None, cache_info=None), @@ -1776,79 +1777,79 @@ def _prepare_test_cases(): expected_opinfo_jumpy = [ Instruction(opname='RESUME', opcode=149, arg=0, argval=0, argrepr='', offset=0, start_offset=0, starts_line=True, line_number=1, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_GLOBAL', opcode=88, arg=1, argval='range', argrepr='range + NULL', offset=2, start_offset=2, starts_line=True, line_number=3, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), - Instruction(opname='LOAD_SMALL_INT', opcode=90, arg=10, argval=10, argrepr='', offset=12, start_offset=12, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_GLOBAL', opcode=90, arg=1, argval='range', argrepr='range + NULL', offset=2, start_offset=2, starts_line=True, line_number=3, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_SMALL_INT', opcode=92, arg=10, argval=10, argrepr='', offset=12, start_offset=12, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), Instruction(opname='CALL', opcode=51, arg=1, argval=1, argrepr='', offset=14, start_offset=14, starts_line=False, line_number=3, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), Instruction(opname='GET_ITER', opcode=16, arg=None, argval=None, argrepr='', offset=22, start_offset=22, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), Instruction(opname='FOR_ITER', opcode=68, arg=32, argval=92, argrepr='to L4', offset=24, start_offset=24, starts_line=False, line_number=3, label=1, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), - Instruction(opname='STORE_FAST', opcode=108, arg=0, argval='i', argrepr='i', offset=28, start_offset=28, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_GLOBAL', opcode=88, arg=3, argval='print', argrepr='print + NULL', offset=30, start_offset=30, starts_line=True, line_number=4, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), - Instruction(opname='LOAD_FAST', opcode=82, arg=0, argval='i', argrepr='i', offset=40, start_offset=40, starts_line=False, line_number=4, label=None, positions=None, cache_info=None), + Instruction(opname='STORE_FAST', opcode=110, arg=0, argval='i', argrepr='i', offset=28, start_offset=28, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_GLOBAL', opcode=90, arg=3, argval='print', argrepr='print + NULL', offset=30, start_offset=30, starts_line=True, line_number=4, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_FAST_BORROW', opcode=84, arg=0, argval='i', argrepr='i', offset=40, start_offset=40, starts_line=False, line_number=4, label=None, positions=None, cache_info=None), Instruction(opname='CALL', opcode=51, arg=1, argval=1, argrepr='', offset=42, start_offset=42, starts_line=False, line_number=4, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), Instruction(opname='POP_TOP', opcode=31, arg=None, argval=None, argrepr='', offset=50, start_offset=50, starts_line=False, line_number=4, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_FAST', opcode=82, arg=0, argval='i', argrepr='i', offset=52, start_offset=52, starts_line=True, line_number=5, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_SMALL_INT', opcode=90, arg=4, argval=4, argrepr='', offset=54, start_offset=54, starts_line=False, line_number=5, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST_BORROW', opcode=84, arg=0, argval='i', argrepr='i', offset=52, start_offset=52, starts_line=True, line_number=5, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_SMALL_INT', opcode=92, arg=4, argval=4, argrepr='', offset=54, start_offset=54, starts_line=False, line_number=5, label=None, positions=None, cache_info=None), Instruction(opname='COMPARE_OP', opcode=55, arg=18, argval='<', argrepr='bool(<)', offset=56, start_offset=56, starts_line=False, line_number=5, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), - Instruction(opname='POP_JUMP_IF_FALSE', opcode=96, arg=3, argval=70, argrepr='to L2', offset=60, start_offset=60, starts_line=False, line_number=5, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='POP_JUMP_IF_FALSE', opcode=98, arg=3, argval=70, argrepr='to L2', offset=60, start_offset=60, starts_line=False, line_number=5, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), Instruction(opname='NOT_TAKEN', opcode=28, arg=None, argval=None, argrepr='', offset=64, start_offset=64, starts_line=False, line_number=5, label=None, positions=None, cache_info=None), Instruction(opname='JUMP_BACKWARD', opcode=73, arg=23, argval=24, argrepr='to L1', offset=66, start_offset=66, starts_line=True, line_number=6, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), - Instruction(opname='LOAD_FAST', opcode=82, arg=0, argval='i', argrepr='i', offset=70, start_offset=70, starts_line=True, line_number=7, label=2, positions=None, cache_info=None), - Instruction(opname='LOAD_SMALL_INT', opcode=90, arg=6, argval=6, argrepr='', offset=72, start_offset=72, starts_line=False, line_number=7, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST_BORROW', opcode=84, arg=0, argval='i', argrepr='i', offset=70, start_offset=70, starts_line=True, line_number=7, label=2, positions=None, cache_info=None), + Instruction(opname='LOAD_SMALL_INT', opcode=92, arg=6, argval=6, argrepr='', offset=72, start_offset=72, starts_line=False, line_number=7, label=None, positions=None, cache_info=None), Instruction(opname='COMPARE_OP', opcode=55, arg=148, argval='>', argrepr='bool(>)', offset=74, start_offset=74, starts_line=False, line_number=7, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), - Instruction(opname='POP_JUMP_IF_TRUE', opcode=99, arg=3, argval=88, argrepr='to L3', offset=78, start_offset=78, starts_line=False, line_number=7, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='POP_JUMP_IF_TRUE', opcode=101, arg=3, argval=88, argrepr='to L3', offset=78, start_offset=78, starts_line=False, line_number=7, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), Instruction(opname='NOT_TAKEN', opcode=28, arg=None, argval=None, argrepr='', offset=82, start_offset=82, starts_line=False, line_number=7, label=None, positions=None, cache_info=None), Instruction(opname='JUMP_BACKWARD', opcode=73, arg=32, argval=24, argrepr='to L1', offset=84, start_offset=84, starts_line=False, line_number=7, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), Instruction(opname='POP_TOP', opcode=31, arg=None, argval=None, argrepr='', offset=88, start_offset=88, starts_line=True, line_number=8, label=3, positions=None, cache_info=None), Instruction(opname='JUMP_FORWARD', opcode=75, arg=13, argval=118, argrepr='to L5', offset=90, start_offset=90, starts_line=False, line_number=8, label=None, positions=None, cache_info=None), Instruction(opname='END_FOR', opcode=9, arg=None, argval=None, argrepr='', offset=92, start_offset=92, starts_line=True, line_number=3, label=4, positions=None, cache_info=None), Instruction(opname='POP_ITER', opcode=30, arg=None, argval=None, argrepr='', offset=94, start_offset=94, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_GLOBAL', opcode=88, arg=3, argval='print', argrepr='print + NULL', offset=96, start_offset=96, starts_line=True, line_number=10, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_GLOBAL', opcode=90, arg=3, argval='print', argrepr='print + NULL', offset=96, start_offset=96, starts_line=True, line_number=10, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), Instruction(opname='LOAD_CONST', opcode=80, arg=0, argval='I can haz else clause?', argrepr="'I can haz else clause?'", offset=106, start_offset=106, starts_line=False, line_number=10, label=None, positions=None, cache_info=None), Instruction(opname='CALL', opcode=51, arg=1, argval=1, argrepr='', offset=108, start_offset=108, starts_line=False, line_number=10, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), Instruction(opname='POP_TOP', opcode=31, arg=None, argval=None, argrepr='', offset=116, start_offset=116, starts_line=False, line_number=10, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_FAST_CHECK', opcode=84, arg=0, argval='i', argrepr='i', offset=118, start_offset=118, starts_line=True, line_number=11, label=5, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST_CHECK', opcode=86, arg=0, argval='i', argrepr='i', offset=118, start_offset=118, starts_line=True, line_number=11, label=5, positions=None, cache_info=None), Instruction(opname='TO_BOOL', opcode=39, arg=None, argval=None, argrepr='', offset=120, start_offset=120, starts_line=False, line_number=11, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('version', 2, b'\x00\x00\x00\x00')]), - Instruction(opname='POP_JUMP_IF_FALSE', opcode=96, arg=40, argval=212, argrepr='to L8', offset=128, start_offset=128, starts_line=False, line_number=11, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='POP_JUMP_IF_FALSE', opcode=98, arg=40, argval=212, argrepr='to L8', offset=128, start_offset=128, starts_line=False, line_number=11, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), Instruction(opname='NOT_TAKEN', opcode=28, arg=None, argval=None, argrepr='', offset=132, start_offset=132, starts_line=False, line_number=11, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_GLOBAL', opcode=88, arg=3, argval='print', argrepr='print + NULL', offset=134, start_offset=134, starts_line=True, line_number=12, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), - Instruction(opname='LOAD_FAST', opcode=82, arg=0, argval='i', argrepr='i', offset=144, start_offset=144, starts_line=False, line_number=12, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_GLOBAL', opcode=90, arg=3, argval='print', argrepr='print + NULL', offset=134, start_offset=134, starts_line=True, line_number=12, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_FAST_BORROW', opcode=84, arg=0, argval='i', argrepr='i', offset=144, start_offset=144, starts_line=False, line_number=12, label=None, positions=None, cache_info=None), Instruction(opname='CALL', opcode=51, arg=1, argval=1, argrepr='', offset=146, start_offset=146, starts_line=False, line_number=12, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), Instruction(opname='POP_TOP', opcode=31, arg=None, argval=None, argrepr='', offset=154, start_offset=154, starts_line=False, line_number=12, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_FAST', opcode=82, arg=0, argval='i', argrepr='i', offset=156, start_offset=156, starts_line=True, line_number=13, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_SMALL_INT', opcode=90, arg=1, argval=1, argrepr='', offset=158, start_offset=158, starts_line=False, line_number=13, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST_BORROW', opcode=84, arg=0, argval='i', argrepr='i', offset=156, start_offset=156, starts_line=True, line_number=13, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_SMALL_INT', opcode=92, arg=1, argval=1, argrepr='', offset=158, start_offset=158, starts_line=False, line_number=13, label=None, positions=None, cache_info=None), Instruction(opname='BINARY_OP', opcode=44, arg=23, argval=23, argrepr='-=', offset=160, start_offset=160, starts_line=False, line_number=13, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('descr', 4, b'\x00\x00\x00\x00\x00\x00\x00\x00')]), - Instruction(opname='STORE_FAST', opcode=108, arg=0, argval='i', argrepr='i', offset=172, start_offset=172, starts_line=False, line_number=13, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_FAST', opcode=82, arg=0, argval='i', argrepr='i', offset=174, start_offset=174, starts_line=True, line_number=14, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_SMALL_INT', opcode=90, arg=6, argval=6, argrepr='', offset=176, start_offset=176, starts_line=False, line_number=14, label=None, positions=None, cache_info=None), + Instruction(opname='STORE_FAST', opcode=110, arg=0, argval='i', argrepr='i', offset=172, start_offset=172, starts_line=False, line_number=13, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST_BORROW', opcode=84, arg=0, argval='i', argrepr='i', offset=174, start_offset=174, starts_line=True, line_number=14, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_SMALL_INT', opcode=92, arg=6, argval=6, argrepr='', offset=176, start_offset=176, starts_line=False, line_number=14, label=None, positions=None, cache_info=None), Instruction(opname='COMPARE_OP', opcode=55, arg=148, argval='>', argrepr='bool(>)', offset=178, start_offset=178, starts_line=False, line_number=14, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), - Instruction(opname='POP_JUMP_IF_FALSE', opcode=96, arg=3, argval=192, argrepr='to L6', offset=182, start_offset=182, starts_line=False, line_number=14, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='POP_JUMP_IF_FALSE', opcode=98, arg=3, argval=192, argrepr='to L6', offset=182, start_offset=182, starts_line=False, line_number=14, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), Instruction(opname='NOT_TAKEN', opcode=28, arg=None, argval=None, argrepr='', offset=186, start_offset=186, starts_line=False, line_number=14, label=None, positions=None, cache_info=None), Instruction(opname='JUMP_BACKWARD', opcode=73, arg=37, argval=118, argrepr='to L5', offset=188, start_offset=188, starts_line=True, line_number=15, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), - Instruction(opname='LOAD_FAST', opcode=82, arg=0, argval='i', argrepr='i', offset=192, start_offset=192, starts_line=True, line_number=16, label=6, positions=None, cache_info=None), - Instruction(opname='LOAD_SMALL_INT', opcode=90, arg=4, argval=4, argrepr='', offset=194, start_offset=194, starts_line=False, line_number=16, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST_BORROW', opcode=84, arg=0, argval='i', argrepr='i', offset=192, start_offset=192, starts_line=True, line_number=16, label=6, positions=None, cache_info=None), + Instruction(opname='LOAD_SMALL_INT', opcode=92, arg=4, argval=4, argrepr='', offset=194, start_offset=194, starts_line=False, line_number=16, label=None, positions=None, cache_info=None), Instruction(opname='COMPARE_OP', opcode=55, arg=18, argval='<', argrepr='bool(<)', offset=196, start_offset=196, starts_line=False, line_number=16, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), - Instruction(opname='POP_JUMP_IF_TRUE', opcode=99, arg=3, argval=210, argrepr='to L7', offset=200, start_offset=200, starts_line=False, line_number=16, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='POP_JUMP_IF_TRUE', opcode=101, arg=3, argval=210, argrepr='to L7', offset=200, start_offset=200, starts_line=False, line_number=16, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), Instruction(opname='NOT_TAKEN', opcode=28, arg=None, argval=None, argrepr='', offset=204, start_offset=204, starts_line=False, line_number=16, label=None, positions=None, cache_info=None), Instruction(opname='JUMP_BACKWARD', opcode=73, arg=46, argval=118, argrepr='to L5', offset=206, start_offset=206, starts_line=False, line_number=16, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), Instruction(opname='JUMP_FORWARD', opcode=75, arg=11, argval=234, argrepr='to L9', offset=210, start_offset=210, starts_line=True, line_number=17, label=7, positions=None, cache_info=None), - Instruction(opname='LOAD_GLOBAL', opcode=88, arg=3, argval='print', argrepr='print + NULL', offset=212, start_offset=212, starts_line=True, line_number=19, label=8, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_GLOBAL', opcode=90, arg=3, argval='print', argrepr='print + NULL', offset=212, start_offset=212, starts_line=True, line_number=19, label=8, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), Instruction(opname='LOAD_CONST', opcode=80, arg=1, argval='Who let lolcatz into this test suite?', argrepr="'Who let lolcatz into this test suite?'", offset=222, start_offset=222, starts_line=False, line_number=19, label=None, positions=None, cache_info=None), Instruction(opname='CALL', opcode=51, arg=1, argval=1, argrepr='', offset=224, start_offset=224, starts_line=False, line_number=19, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), Instruction(opname='POP_TOP', opcode=31, arg=None, argval=None, argrepr='', offset=232, start_offset=232, starts_line=False, line_number=19, label=None, positions=None, cache_info=None), Instruction(opname='NOP', opcode=27, arg=None, argval=None, argrepr='', offset=234, start_offset=234, starts_line=True, line_number=20, label=9, positions=None, cache_info=None), - Instruction(opname='LOAD_SMALL_INT', opcode=90, arg=1, argval=1, argrepr='', offset=236, start_offset=236, starts_line=True, line_number=21, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_SMALL_INT', opcode=90, arg=0, argval=0, argrepr='', offset=238, start_offset=238, starts_line=False, line_number=21, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_SMALL_INT', opcode=92, arg=1, argval=1, argrepr='', offset=236, start_offset=236, starts_line=True, line_number=21, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_SMALL_INT', opcode=92, arg=0, argval=0, argrepr='', offset=238, start_offset=238, starts_line=False, line_number=21, label=None, positions=None, cache_info=None), Instruction(opname='BINARY_OP', opcode=44, arg=11, argval=11, argrepr='/', offset=240, start_offset=240, starts_line=False, line_number=21, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('descr', 4, b'\x00\x00\x00\x00\x00\x00\x00\x00')]), Instruction(opname='POP_TOP', opcode=31, arg=None, argval=None, argrepr='', offset=252, start_offset=252, starts_line=False, line_number=21, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_FAST', opcode=82, arg=0, argval='i', argrepr='i', offset=254, start_offset=254, starts_line=True, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST_BORROW', opcode=84, arg=0, argval='i', argrepr='i', offset=254, start_offset=254, starts_line=True, line_number=25, label=None, positions=None, cache_info=None), Instruction(opname='COPY', opcode=58, arg=1, argval=1, argrepr='', offset=256, start_offset=256, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_SPECIAL', opcode=91, arg=1, argval=1, argrepr='__exit__', offset=258, start_offset=258, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), - Instruction(opname='SWAP', opcode=113, arg=2, argval=2, argrepr='', offset=260, start_offset=260, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), - Instruction(opname='SWAP', opcode=113, arg=3, argval=3, argrepr='', offset=262, start_offset=262, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_SPECIAL', opcode=91, arg=0, argval=0, argrepr='__enter__', offset=264, start_offset=264, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_SPECIAL', opcode=93, arg=1, argval=1, argrepr='__exit__', offset=258, start_offset=258, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='SWAP', opcode=115, arg=2, argval=2, argrepr='', offset=260, start_offset=260, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='SWAP', opcode=115, arg=3, argval=3, argrepr='', offset=262, start_offset=262, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_SPECIAL', opcode=93, arg=0, argval=0, argrepr='__enter__', offset=264, start_offset=264, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), Instruction(opname='CALL', opcode=51, arg=0, argval=0, argrepr='', offset=266, start_offset=266, starts_line=False, line_number=25, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), - Instruction(opname='STORE_FAST', opcode=108, arg=1, argval='dodgy', argrepr='dodgy', offset=274, start_offset=274, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_GLOBAL', opcode=88, arg=3, argval='print', argrepr='print + NULL', offset=276, start_offset=276, starts_line=True, line_number=26, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='STORE_FAST', opcode=110, arg=1, argval='dodgy', argrepr='dodgy', offset=274, start_offset=274, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_GLOBAL', opcode=90, arg=3, argval='print', argrepr='print + NULL', offset=276, start_offset=276, starts_line=True, line_number=26, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), Instruction(opname='LOAD_CONST', opcode=80, arg=2, argval='Never reach this', argrepr="'Never reach this'", offset=286, start_offset=286, starts_line=False, line_number=26, label=None, positions=None, cache_info=None), Instruction(opname='CALL', opcode=51, arg=1, argval=1, argrepr='', offset=288, start_offset=288, starts_line=False, line_number=26, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), Instruction(opname='POP_TOP', opcode=31, arg=None, argval=None, argrepr='', offset=296, start_offset=296, starts_line=False, line_number=26, label=None, positions=None, cache_info=None), @@ -1857,7 +1858,7 @@ def _prepare_test_cases(): Instruction(opname='LOAD_CONST', opcode=80, arg=3, argval=None, argrepr='None', offset=302, start_offset=302, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), Instruction(opname='CALL', opcode=51, arg=3, argval=3, argrepr='', offset=304, start_offset=304, starts_line=False, line_number=25, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), Instruction(opname='POP_TOP', opcode=31, arg=None, argval=None, argrepr='', offset=312, start_offset=312, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_GLOBAL', opcode=88, arg=3, argval='print', argrepr='print + NULL', offset=314, start_offset=314, starts_line=True, line_number=28, label=10, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_GLOBAL', opcode=90, arg=3, argval='print', argrepr='print + NULL', offset=314, start_offset=314, starts_line=True, line_number=28, label=10, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), Instruction(opname='LOAD_CONST', opcode=80, arg=5, argval="OK, now we're done", argrepr='"OK, now we\'re done"', offset=324, start_offset=324, starts_line=False, line_number=28, label=None, positions=None, cache_info=None), Instruction(opname='CALL', opcode=51, arg=1, argval=1, argrepr='', offset=326, start_offset=326, starts_line=False, line_number=28, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), Instruction(opname='POP_TOP', opcode=31, arg=None, argval=None, argrepr='', offset=334, start_offset=334, starts_line=False, line_number=28, label=None, positions=None, cache_info=None), @@ -1866,9 +1867,9 @@ def _prepare_test_cases(): Instruction(opname='PUSH_EXC_INFO', opcode=32, arg=None, argval=None, argrepr='', offset=340, start_offset=340, starts_line=True, line_number=25, label=None, positions=None, cache_info=None), Instruction(opname='WITH_EXCEPT_START', opcode=43, arg=None, argval=None, argrepr='', offset=342, start_offset=342, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), Instruction(opname='TO_BOOL', opcode=39, arg=None, argval=None, argrepr='', offset=344, start_offset=344, starts_line=False, line_number=25, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('version', 2, b'\x00\x00\x00\x00')]), - Instruction(opname='POP_JUMP_IF_TRUE', opcode=99, arg=2, argval=360, argrepr='to L11', offset=352, start_offset=352, starts_line=False, line_number=25, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='POP_JUMP_IF_TRUE', opcode=101, arg=2, argval=360, argrepr='to L11', offset=352, start_offset=352, starts_line=False, line_number=25, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), Instruction(opname='NOT_TAKEN', opcode=28, arg=None, argval=None, argrepr='', offset=356, start_offset=356, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), - Instruction(opname='RERAISE', opcode=101, arg=2, argval=2, argrepr='', offset=358, start_offset=358, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='RERAISE', opcode=103, arg=2, argval=2, argrepr='', offset=358, start_offset=358, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), Instruction(opname='POP_TOP', opcode=31, arg=None, argval=None, argrepr='', offset=360, start_offset=360, starts_line=False, line_number=25, label=11, positions=None, cache_info=None), Instruction(opname='POP_EXCEPT', opcode=29, arg=None, argval=None, argrepr='', offset=362, start_offset=362, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), Instruction(opname='POP_TOP', opcode=31, arg=None, argval=None, argrepr='', offset=364, start_offset=364, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), @@ -1877,32 +1878,32 @@ def _prepare_test_cases(): Instruction(opname='JUMP_BACKWARD_NO_INTERRUPT', opcode=74, arg=29, argval=314, argrepr='to L10', offset=370, start_offset=370, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), Instruction(opname='COPY', opcode=58, arg=3, argval=3, argrepr='', offset=372, start_offset=372, starts_line=True, line_number=None, label=None, positions=None, cache_info=None), Instruction(opname='POP_EXCEPT', opcode=29, arg=None, argval=None, argrepr='', offset=374, start_offset=374, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), - Instruction(opname='RERAISE', opcode=101, arg=1, argval=1, argrepr='', offset=376, start_offset=376, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), + Instruction(opname='RERAISE', opcode=103, arg=1, argval=1, argrepr='', offset=376, start_offset=376, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), Instruction(opname='PUSH_EXC_INFO', opcode=32, arg=None, argval=None, argrepr='', offset=378, start_offset=378, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_GLOBAL', opcode=88, arg=4, argval='ZeroDivisionError', argrepr='ZeroDivisionError', offset=380, start_offset=380, starts_line=True, line_number=22, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_GLOBAL', opcode=90, arg=4, argval='ZeroDivisionError', argrepr='ZeroDivisionError', offset=380, start_offset=380, starts_line=True, line_number=22, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), Instruction(opname='CHECK_EXC_MATCH', opcode=5, arg=None, argval=None, argrepr='', offset=390, start_offset=390, starts_line=False, line_number=22, label=None, positions=None, cache_info=None), - Instruction(opname='POP_JUMP_IF_FALSE', opcode=96, arg=15, argval=426, argrepr='to L12', offset=392, start_offset=392, starts_line=False, line_number=22, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='POP_JUMP_IF_FALSE', opcode=98, arg=15, argval=426, argrepr='to L12', offset=392, start_offset=392, starts_line=False, line_number=22, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), Instruction(opname='NOT_TAKEN', opcode=28, arg=None, argval=None, argrepr='', offset=396, start_offset=396, starts_line=False, line_number=22, label=None, positions=None, cache_info=None), Instruction(opname='POP_TOP', opcode=31, arg=None, argval=None, argrepr='', offset=398, start_offset=398, starts_line=False, line_number=22, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_GLOBAL', opcode=88, arg=3, argval='print', argrepr='print + NULL', offset=400, start_offset=400, starts_line=True, line_number=23, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_GLOBAL', opcode=90, arg=3, argval='print', argrepr='print + NULL', offset=400, start_offset=400, starts_line=True, line_number=23, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), Instruction(opname='LOAD_CONST', opcode=80, arg=4, argval='Here we go, here we go, here we go...', argrepr="'Here we go, here we go, here we go...'", offset=410, start_offset=410, starts_line=False, line_number=23, label=None, positions=None, cache_info=None), Instruction(opname='CALL', opcode=51, arg=1, argval=1, argrepr='', offset=412, start_offset=412, starts_line=False, line_number=23, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), Instruction(opname='POP_TOP', opcode=31, arg=None, argval=None, argrepr='', offset=420, start_offset=420, starts_line=False, line_number=23, label=None, positions=None, cache_info=None), Instruction(opname='POP_EXCEPT', opcode=29, arg=None, argval=None, argrepr='', offset=422, start_offset=422, starts_line=False, line_number=23, label=None, positions=None, cache_info=None), Instruction(opname='JUMP_BACKWARD_NO_INTERRUPT', opcode=74, arg=56, argval=314, argrepr='to L10', offset=424, start_offset=424, starts_line=False, line_number=23, label=None, positions=None, cache_info=None), - Instruction(opname='RERAISE', opcode=101, arg=0, argval=0, argrepr='', offset=426, start_offset=426, starts_line=True, line_number=22, label=12, positions=None, cache_info=None), + Instruction(opname='RERAISE', opcode=103, arg=0, argval=0, argrepr='', offset=426, start_offset=426, starts_line=True, line_number=22, label=12, positions=None, cache_info=None), Instruction(opname='COPY', opcode=58, arg=3, argval=3, argrepr='', offset=428, start_offset=428, starts_line=True, line_number=None, label=None, positions=None, cache_info=None), Instruction(opname='POP_EXCEPT', opcode=29, arg=None, argval=None, argrepr='', offset=430, start_offset=430, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), - Instruction(opname='RERAISE', opcode=101, arg=1, argval=1, argrepr='', offset=432, start_offset=432, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), + Instruction(opname='RERAISE', opcode=103, arg=1, argval=1, argrepr='', offset=432, start_offset=432, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), Instruction(opname='PUSH_EXC_INFO', opcode=32, arg=None, argval=None, argrepr='', offset=434, start_offset=434, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_GLOBAL', opcode=88, arg=3, argval='print', argrepr='print + NULL', offset=436, start_offset=436, starts_line=True, line_number=28, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_GLOBAL', opcode=90, arg=3, argval='print', argrepr='print + NULL', offset=436, start_offset=436, starts_line=True, line_number=28, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), Instruction(opname='LOAD_CONST', opcode=80, arg=5, argval="OK, now we're done", argrepr='"OK, now we\'re done"', offset=446, start_offset=446, starts_line=False, line_number=28, label=None, positions=None, cache_info=None), Instruction(opname='CALL', opcode=51, arg=1, argval=1, argrepr='', offset=448, start_offset=448, starts_line=False, line_number=28, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), Instruction(opname='POP_TOP', opcode=31, arg=None, argval=None, argrepr='', offset=456, start_offset=456, starts_line=False, line_number=28, label=None, positions=None, cache_info=None), - Instruction(opname='RERAISE', opcode=101, arg=0, argval=0, argrepr='', offset=458, start_offset=458, starts_line=False, line_number=28, label=None, positions=None, cache_info=None), + Instruction(opname='RERAISE', opcode=103, arg=0, argval=0, argrepr='', offset=458, start_offset=458, starts_line=False, line_number=28, label=None, positions=None, cache_info=None), Instruction(opname='COPY', opcode=58, arg=3, argval=3, argrepr='', offset=460, start_offset=460, starts_line=True, line_number=None, label=None, positions=None, cache_info=None), Instruction(opname='POP_EXCEPT', opcode=29, arg=None, argval=None, argrepr='', offset=462, start_offset=462, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), - Instruction(opname='RERAISE', opcode=101, arg=1, argval=1, argrepr='', offset=464, start_offset=464, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), + Instruction(opname='RERAISE', opcode=103, arg=1, argval=1, argrepr='', offset=464, start_offset=464, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), ] # One last piece of inspect fodder to check the default line number handling From a12ccd9e513b4af8701f9fcb3a6f5f8f2d8dcf6a Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 20 Feb 2025 20:10:15 -0800 Subject: [PATCH 25/73] Fix refleak in _BINARY_OP_INPLACE_ADD_UNICODE PyStackRef_AsPyObjectSteal creates a new reference if the stackref is deferred. This reference is leaked if we deopt before the corresponding decref. --- Python/bytecodes.c | 4 ++-- Python/executor_cases.c.h | 4 ++-- Python/generated_cases.c.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 3514961070d1fb..c372c3b13dce7c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -732,9 +732,8 @@ dummy_func( // At the end we just skip over the STORE_FAST. op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - PyObject *right_o = PyStackRef_AsPyObjectSteal(right); assert(PyUnicode_CheckExact(left_o)); - assert(PyUnicode_CheckExact(right_o)); + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(right))); int next_oparg; #if TIER_ONE @@ -761,6 +760,7 @@ dummy_func( PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); DEAD(left); PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); + PyObject *right_o = PyStackRef_AsPyObjectSteal(right); PyUnicode_Append(&temp, right_o); *target_local = PyStackRef_FromPyObjectSteal(temp); Py_DECREF(right_o); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 2e4d15342d747c..36f45c515a46ea 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -990,9 +990,8 @@ right = stack_pointer[-1]; left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - PyObject *right_o = PyStackRef_AsPyObjectSteal(right); assert(PyUnicode_CheckExact(left_o)); - assert(PyUnicode_CheckExact(right_o)); + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(right))); int next_oparg; #if TIER_ONE assert(next_instr->op.code == STORE_FAST); @@ -1020,6 +1019,7 @@ // assert(Py_REFCNT(left_o) >= 2); PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); + PyObject *right_o = PyStackRef_AsPyObjectSteal(right); stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e0362e2227f02c..6bce98aabebebc 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -341,9 +341,8 @@ // _BINARY_OP_INPLACE_ADD_UNICODE { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - PyObject *right_o = PyStackRef_AsPyObjectSteal(right); assert(PyUnicode_CheckExact(left_o)); - assert(PyUnicode_CheckExact(right_o)); + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(right))); int next_oparg; #if TIER_ONE assert(next_instr->op.code == STORE_FAST); @@ -372,6 +371,7 @@ // assert(Py_REFCNT(left_o) >= 2); PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); + PyObject *right_o = PyStackRef_AsPyObjectSteal(right); stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); From 1ef26c580b0fb321195ab3fd5c8d40acd5ff753e Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 21 Feb 2025 11:15:36 -0800 Subject: [PATCH 26/73] Create new references to fast locals overwritten via f_locals These may provide support for borrowed references contained in frames closer to the top of the call stack. Add them to a list attached to the frame when they are overwritten, to be destroyed when the frame is destroyed. --- Include/internal/pycore_frame.h | 6 ++++++ Lib/test/test_frame.py | 14 ++++++++++++++ Lib/test/test_sys.py | 2 +- Objects/frameobject.c | 18 +++++++++++++++++- 4 files changed, 38 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 1392f24161bea4..610324e6413782 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -31,6 +31,12 @@ struct _frame { PyEval_GetLocals requires a borrowed reference so the actual reference is stored here */ PyObject *f_locals_cache; + /* A list containing strong references to fast locals that were overwritten + * via f_locals. Borrowed references to these locals may exist in frames + * closer to the top of the stack. The references in this list act as + * "support" for the borrowed references, ensuring that they remain valid. + */ + PyObject *f_overwritten_fast_locals; /* The frame data, if this frame object owns the frame */ PyObject *_f_frame_data[1]; }; diff --git a/Lib/test/test_frame.py b/Lib/test/test_frame.py index a6e11f1a5342b6..f0c27552f60c22 100644 --- a/Lib/test/test_frame.py +++ b/Lib/test/test_frame.py @@ -597,6 +597,20 @@ def make_frame(): with self.assertRaises(TypeError): FrameLocalsProxy(frame=sys._getframe()) # no keyword arguments + def test_overwrite_locals(self): + # Verify we do not crash if we overwrite a local passed as an argument + # from an ancestor in the call stack. + def f(): + xs = [1, 2, 3] + return g(xs) + + def g(xs): + f = sys._getframe() + f.f_back.f_locals["xs"] = None + return xs[1] + + self.assertEqual(f(), 2) + class FrameLocalsProxyMappingTests(mapping_tests.TestHashMappingProtocol): """Test that FrameLocalsProxy behaves like a Mapping (with exceptions)""" diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 87c0106ad30840..34c3ae15e0936a 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1643,7 +1643,7 @@ def func(): INTERPRETER_FRAME = '9PihcP' else: INTERPRETER_FRAME = '9PhcP' - check(x, size('3PiccPP' + INTERPRETER_FRAME + 'P')) + check(x, size('3PiccPPP' + INTERPRETER_FRAME + 'P')) # function def func(): pass check(func, size('16Pi')) diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 001b58dc052416..a55e9c590b63ee 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -243,7 +243,19 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value) Py_XINCREF(value); PyCell_SetTakeRef((PyCellObject *)cell, value); } else if (value != PyStackRef_AsPyObjectBorrow(oldvalue)) { - PyStackRef_XCLOSE(fast[i]); + if (!PyStackRef_IsNull(fast[i])) { + if (frame->f_overwritten_fast_locals == NULL) { + frame->f_overwritten_fast_locals = PyList_New(0); + if (frame->f_overwritten_fast_locals == NULL) { + return -1; + } + } + PyObject *obj = PyStackRef_AsPyObjectBorrow(fast[i]); + if (PyList_Append(frame->f_overwritten_fast_locals, obj) < 0) { + return -1; + } + PyStackRef_CLOSE(fast[i]); + } fast[i] = PyStackRef_FromPyObjectNew(value); } return 0; @@ -1806,6 +1818,7 @@ frame_dealloc(PyObject *op) Py_CLEAR(f->f_trace); Py_CLEAR(f->f_extra_locals); Py_CLEAR(f->f_locals_cache); + Py_CLEAR(f->f_overwritten_fast_locals); PyObject_GC_Del(f); Py_TRASHCAN_END; } @@ -1818,6 +1831,7 @@ frame_traverse(PyObject *op, visitproc visit, void *arg) Py_VISIT(f->f_trace); Py_VISIT(f->f_extra_locals); Py_VISIT(f->f_locals_cache); + Py_VISIT(f->f_overwritten_fast_locals); if (f->f_frame->owner != FRAME_OWNED_BY_FRAME_OBJECT) { return 0; } @@ -1832,6 +1846,7 @@ frame_tp_clear(PyObject *op) Py_CLEAR(f->f_trace); Py_CLEAR(f->f_extra_locals); Py_CLEAR(f->f_locals_cache); + Py_CLEAR(f->f_overwritten_fast_locals); /* locals and stack */ _PyStackRef *locals = _PyFrame_GetLocalsArray(f->f_frame); @@ -1973,6 +1988,7 @@ _PyFrame_New_NoTrack(PyCodeObject *code) f->f_lineno = 0; f->f_extra_locals = NULL; f->f_locals_cache = NULL; + f->f_overwritten_fast_locals = NULL; return f; } From 1eb92260de180fbe4484f22a54571cbb387ac07a Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 21 Feb 2025 11:45:00 -0800 Subject: [PATCH 27/73] Implement two missing opcodes in the static analysis `STORE_FAST_LOAD_FAST` and `LOAD_FAST_AND_CLEAR` both need to kill the local. --- Python/flowgraph.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 685380c5e9f1d1..213250b31de484 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2600,6 +2600,12 @@ optimize_load_fast(cfg_builder *g) break; } + case LOAD_FAST_AND_CLEAR: { + kill_local(has_killed_refs, &refs, oparg); + ref_stack_push(&refs, (ref){i, oparg}); + break; + } + case LOAD_FAST_LOAD_FAST: { if (ref_stack_push(&refs, (ref){i, oparg >> 4}) < 0) { status = ERROR; @@ -2618,6 +2624,13 @@ optimize_load_fast(cfg_builder *g) break; } + case STORE_FAST_LOAD_FAST: { + kill_local(has_killed_refs, &refs, oparg >> 4); + ref_stack_pop(&refs); + ref_stack_push(&refs, (ref){i, oparg & 15}); + break; + } + case STORE_FAST_STORE_FAST: { kill_local(has_killed_refs, &refs, oparg >> 4); kill_local(has_killed_refs, &refs, oparg & 15); From 7291c49222d5029b585f5a5dfa35684ff0d98f07 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 21 Feb 2025 13:10:50 -0800 Subject: [PATCH 28/73] Use g_block_list when resetting stack depth This ensures we hit all the blocks --- Python/flowgraph.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 213250b31de484..c3197f270b9b60 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2535,10 +2535,10 @@ optimize_load_fast(cfg_builder *g) int status; ref_stack refs = {0}; bool *has_killed_refs = NULL; - basicblock *entryblock = g->g_entryblock; - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) { b->b_startdepth = -1; } + basicblock *entryblock = g->g_entryblock; basicblock **blocks = make_cfg_traversal_stack(entryblock); if (blocks == NULL) { status = ERROR; From 90bf8dff4b045904cb726c60278ee5518a88f1ef Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 21 Feb 2025 13:28:42 -0800 Subject: [PATCH 29/73] Avoid reallocating state for each basic block --- Python/flowgraph.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index c3197f270b9b60..93039a5c88b30c 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2534,9 +2534,16 @@ optimize_load_fast(cfg_builder *g) { int status; ref_stack refs = {0}; - bool *has_killed_refs = NULL; + int max_instrs = 0; for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) { b->b_startdepth = -1; + max_instrs = Py_MAX(max_instrs, b->b_iused); + } + size_t has_killed_refs_size = max_instrs * sizeof(bool); + bool *has_killed_refs = PyMem_Calloc(max_instrs, has_killed_refs_size); + if (has_killed_refs == NULL) { + PyErr_NoMemory(); + return ERROR; } basicblock *entryblock = g->g_entryblock; basicblock **blocks = make_cfg_traversal_stack(entryblock); @@ -2556,17 +2563,7 @@ optimize_load_fast(cfg_builder *g) // Reset state that tracks which instructions produce references to // locals that are on the stack while the local is overwritten. - int size = sizeof(*has_killed_refs) * block->b_iused; - bool *p = PyMem_Realloc(has_killed_refs, size); - if (p == NULL) { - PyErr_NoMemory(); - status = ERROR; - goto done; - } - else { - has_killed_refs = p; - } - memset(has_killed_refs, 0, size); + memset(has_killed_refs, 0, has_killed_refs_size); // Reset the stack of refs. We don't track references on the stack // across basic blocks, but the bytecode will expect their From 9bfa922e37ba569ca0e58814a65ddfaa26244baf Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 21 Feb 2025 15:50:39 -0800 Subject: [PATCH 30/73] Generators --- Python/bytecodes.c | 14 ++++++++------ Python/executor_cases.c.h | 4 ++-- Python/generated_cases.c.h | 23 ++++++++++++----------- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c372c3b13dce7c..396563b40b29a4 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1201,6 +1201,8 @@ dummy_func( PyObject *retval_o; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); + _PyStackRef tmp = _PyStackRef_StealIfUnborrowed(v); + DEAD(v); if ((tstate->interp->eval_frame == NULL) && (Py_TYPE(receiver_o) == &PyGen_Type || Py_TYPE(receiver_o) == &PyCoro_Type) && ((PyGenObject *)receiver_o)->gi_frame_state < FRAME_EXECUTING) @@ -1208,7 +1210,7 @@ dummy_func( PyGenObject *gen = (PyGenObject *)receiver_o; _PyInterpreterFrame *gen_frame = &gen->gi_iframe; STACK_SHRINK(1); - _PyFrame_StackPush(gen_frame, v); + _PyFrame_StackPush(gen_frame, tmp); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; @@ -1224,7 +1226,7 @@ dummy_func( else { retval_o = PyObject_CallMethodOneArg(receiver_o, &_Py_ID(send), - PyStackRef_AsPyObjectBorrow(v)); + PyStackRef_AsPyObjectBorrow(tmp)); } if (retval_o == NULL) { int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); @@ -1237,11 +1239,11 @@ dummy_func( JUMPBY(oparg); } else { - PyStackRef_CLOSE(v); + PyStackRef_CLOSE(tmp); ERROR_IF(true, error); } } - PyStackRef_CLOSE(v); + PyStackRef_CLOSE(tmp); retval = PyStackRef_FromPyObjectSteal(retval_o); } @@ -1253,7 +1255,7 @@ dummy_func( DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING); STAT_INC(SEND, hit); gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, v); + _PyFrame_StackPush(gen_frame, _PyStackRef_StealIfUnborrowed(v)); DEAD(v); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; @@ -1300,7 +1302,7 @@ dummy_func( #endif RELOAD_STACK(); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); - value = temp; + value = _PyStackRef_StealIfUnborrowed(temp); LLTRACE_RESUME_FRAME(); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 36f45c515a46ea..a28daa07bcf6f7 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1796,7 +1796,7 @@ } STAT_INC(SEND, hit); gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, v); + _PyFrame_StackPush(gen_frame, _PyStackRef_StealIfUnborrowed(v)); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; @@ -1843,7 +1843,7 @@ #endif stack_pointer = _PyFrame_GetStackPointer(frame); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); - value = temp; + value = _PyStackRef_StealIfUnborrowed(temp); LLTRACE_RESUME_FRAME(); stack_pointer[0] = value; stack_pointer += 1; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 6bce98aabebebc..cef4cae85411dd 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7314,7 +7314,7 @@ #endif stack_pointer = _PyFrame_GetStackPointer(frame); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); - value = temp; + value = _PyStackRef_StealIfUnborrowed(temp); LLTRACE_RESUME_FRAME(); } stack_pointer[0] = value; @@ -10362,6 +10362,7 @@ PyObject *receiver_o = PyStackRef_AsPyObjectBorrow(receiver); PyObject *retval_o; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); + _PyStackRef tmp = _PyStackRef_StealIfUnborrowed(v); if ((tstate->interp->eval_frame == NULL) && (Py_TYPE(receiver_o) == &PyGen_Type || Py_TYPE(receiver_o) == &PyCoro_Type) && ((PyGenObject *)receiver_o)->gi_frame_state < FRAME_EXECUTING) @@ -10369,7 +10370,7 @@ PyGenObject *gen = (PyGenObject *)receiver_o; _PyInterpreterFrame *gen_frame = &gen->gi_iframe; STACK_SHRINK(1); - _PyFrame_StackPush(gen_frame, v); + _PyFrame_StackPush(gen_frame, tmp); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; @@ -10380,15 +10381,19 @@ DISPATCH_INLINED(gen_frame); } if (PyStackRef_IsNone(v) && PyIter_Check(receiver_o)) { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); retval_o = Py_TYPE(receiver_o)->tp_iternext(receiver_o); stack_pointer = _PyFrame_GetStackPointer(frame); } else { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); retval_o = PyObject_CallMethodOneArg(receiver_o, &_Py_ID(send), - PyStackRef_AsPyObjectBorrow(v)); + PyStackRef_AsPyObjectBorrow(tmp)); stack_pointer = _PyFrame_GetStackPointer(frame); } if (retval_o == NULL) { @@ -10408,18 +10413,14 @@ JUMPBY(oparg); } else { - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(v); + PyStackRef_CLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); JUMP_TO_LABEL(error); } } - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(v); + PyStackRef_CLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); retval = PyStackRef_FromPyObjectSteal(retval_o); } @@ -10470,7 +10471,7 @@ } STAT_INC(SEND, hit); gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, v); + _PyFrame_StackPush(gen_frame, _PyStackRef_StealIfUnborrowed(v)); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; @@ -11974,7 +11975,7 @@ #endif stack_pointer = _PyFrame_GetStackPointer(frame); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); - value = temp; + value = _PyStackRef_StealIfUnborrowed(temp); LLTRACE_RESUME_FRAME(); stack_pointer[0] = value; stack_pointer += 1; From bf6222b590309966e23966495bf2f063cae19de3 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 24 Feb 2025 12:08:20 -0800 Subject: [PATCH 31/73] Move optimize after all other passes have run We need to run last, to ensure instructions are not inserted after we strength reduce load fast instructions. --- Lib/test/test_dis.py | 24 ++++++++++++------------ Lib/test/test_peepholer.py | 4 ++-- Python/flowgraph.c | 17 +++++++++-------- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 3c5201a011c29f..fc69bedb52cad0 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -462,7 +462,7 @@ def foo(a: int, b: str) -> str: NOT_TAKEN STORE_FAST 0 (e) -%4d L4: LOAD_FAST_BORROW 0 (e) +%4d L4: LOAD_FAST 0 (e) LOAD_ATTR 2 (__traceback__) STORE_FAST 1 (tb) L5: POP_EXCEPT @@ -470,7 +470,7 @@ def foo(a: int, b: str) -> str: STORE_FAST 0 (e) DELETE_FAST 0 (e) -%4d LOAD_FAST_BORROW 1 (tb) +%4d LOAD_FAST 1 (tb) RETURN_VALUE -- L6: LOAD_CONST 0 (None) @@ -717,7 +717,7 @@ def _tryfinallyconst(b): -- L3: PUSH_EXC_INFO -%4d LOAD_FAST_BORROW 1 (b) +%4d LOAD_FAST 1 (b) PUSH_NULL CALL 0 POP_TOP @@ -752,7 +752,7 @@ def _tryfinallyconst(b): -- L1: PUSH_EXC_INFO -%4d LOAD_FAST_BORROW 0 (b) +%4d LOAD_FAST 0 (b) PUSH_NULL CALL 0 POP_TOP @@ -791,7 +791,7 @@ def foo(x): %4d RESUME 0 -%4d LOAD_FAST 0 (y) +%4d LOAD_FAST_BORROW 0 (y) BUILD_TUPLE 1 LOAD_CONST 0 () MAKE_FUNCTION @@ -815,7 +815,7 @@ def foo(x): %4d RESUME 0 %4d LOAD_GLOBAL 1 (list + NULL) - LOAD_FAST 0 (x) + LOAD_FAST_BORROW 0 (x) BUILD_TUPLE 1 LOAD_CONST 1 ( at 0x..., file "%s", line %d>) MAKE_FUNCTION @@ -1711,8 +1711,8 @@ def _prepare_test_cases(): Instruction(opname='MAKE_CELL', opcode=95, arg=1, argval='b', argrepr='b', offset=2, start_offset=2, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), Instruction(opname='RESUME', opcode=149, arg=0, argval=0, argrepr='', offset=4, start_offset=4, starts_line=True, line_number=1, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_CONST', opcode=80, arg=3, argval=(3, 4), argrepr='(3, 4)', offset=6, start_offset=6, starts_line=True, line_number=2, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_FAST', opcode=82, arg=0, argval='a', argrepr='a', offset=8, start_offset=8, starts_line=False, line_number=2, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_FAST', opcode=82, arg=1, argval='b', argrepr='b', offset=10, start_offset=10, starts_line=False, line_number=2, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST_BORROW', opcode=84, arg=0, argval='a', argrepr='a', offset=8, start_offset=8, starts_line=False, line_number=2, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST_BORROW', opcode=84, arg=1, argval='b', argrepr='b', offset=10, start_offset=10, starts_line=False, line_number=2, label=None, positions=None, cache_info=None), Instruction(opname='BUILD_TUPLE', opcode=50, arg=2, argval=2, argrepr='', offset=12, start_offset=12, starts_line=False, line_number=2, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_CONST', opcode=80, arg=0, argval=code_object_f, argrepr=repr(code_object_f), offset=14, start_offset=14, starts_line=False, line_number=2, label=None, positions=None, cache_info=None), Instruction(opname='MAKE_FUNCTION', opcode=23, arg=None, argval=None, argrepr='', offset=16, start_offset=16, starts_line=False, line_number=2, label=None, positions=None, cache_info=None), @@ -1739,10 +1739,10 @@ def _prepare_test_cases(): Instruction(opname='MAKE_CELL', opcode=95, arg=1, argval='d', argrepr='d', offset=4, start_offset=4, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), Instruction(opname='RESUME', opcode=149, arg=0, argval=0, argrepr='', offset=6, start_offset=6, starts_line=True, line_number=2, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_CONST', opcode=80, arg=1, argval=(5, 6), argrepr='(5, 6)', offset=8, start_offset=8, starts_line=True, line_number=3, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_FAST', opcode=82, arg=3, argval='a', argrepr='a', offset=10, start_offset=10, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_FAST', opcode=82, arg=4, argval='b', argrepr='b', offset=12, start_offset=12, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_FAST', opcode=82, arg=0, argval='c', argrepr='c', offset=14, start_offset=14, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), - Instruction(opname='LOAD_FAST', opcode=82, arg=1, argval='d', argrepr='d', offset=16, start_offset=16, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST_BORROW', opcode=84, arg=3, argval='a', argrepr='a', offset=10, start_offset=10, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST_BORROW', opcode=84, arg=4, argval='b', argrepr='b', offset=12, start_offset=12, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST_BORROW', opcode=84, arg=0, argval='c', argrepr='c', offset=14, start_offset=14, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST_BORROW', opcode=84, arg=1, argval='d', argrepr='d', offset=16, start_offset=16, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), Instruction(opname='BUILD_TUPLE', opcode=50, arg=4, argval=4, argrepr='', offset=18, start_offset=18, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), Instruction(opname='LOAD_CONST', opcode=80, arg=0, argval=code_object_inner, argrepr=repr(code_object_inner), offset=20, start_offset=20, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), Instruction(opname='MAKE_FUNCTION', opcode=23, arg=None, argval=None, argrepr='', offset=22, start_offset=22, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 7fb0e16af32106..94fcccbed942bb 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -2325,9 +2325,9 @@ def test_list_to_tuple_get_iter(self): ] expected_insts = [ ("BUILD_LIST", 0, 1), - ("LOAD_FAST_BORROW", 0, 2), + ("LOAD_FAST", 0, 2), ("LIST_EXTEND", 1, 3), - ("LOAD_FAST_BORROW", 1, 4), + ("LOAD_FAST", 1, 4), ("LIST_EXTEND", 1, 5), ("NOP", None, 6), # ("CALL_INTRINSIC_1", INTRINSIC_LIST_TO_TUPLE, 6), ("GET_ITER", None, 7), diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 93039a5c88b30c..ad92fd527d066b 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -1,4 +1,3 @@ - #include #include "Python.h" @@ -571,6 +570,7 @@ normalize_jumps_in_block(cfg_builder *g, basicblock *b) { basicblock_addop(backwards_jump, NOT_TAKEN, 0, last->i_loc)); RETURN_IF_ERROR( basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc)); + backwards_jump->b_startdepth = target->b_startdepth; last->i_opcode = reversed_opcode; last->i_target = b->b_next; @@ -2520,10 +2520,8 @@ kill_local(bool *has_killed_refs, ref_stack *refs, int local) static void load_fast_push_block(basicblock ***sp, basicblock *target, int start_depth) { - assert(!target->b_visited || (target->b_startdepth == start_depth)); + assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth); if (!target->b_visited) { - assert(target->b_startdepth == -1); - target->b_startdepth = start_depth; target->b_visited = 1; *(*sp)++ = target; } @@ -2535,8 +2533,8 @@ optimize_load_fast(cfg_builder *g) int status; ref_stack refs = {0}; int max_instrs = 0; - for (basicblock *b = g->g_block_list; b != NULL; b = b->b_list) { - b->b_startdepth = -1; + basicblock *entryblock = g->g_entryblock; + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { max_instrs = Py_MAX(max_instrs, b->b_iused); } size_t has_killed_refs_size = max_instrs * sizeof(bool); @@ -2545,7 +2543,6 @@ optimize_load_fast(cfg_builder *g) PyErr_NoMemory(); return ERROR; } - basicblock *entryblock = g->g_entryblock; basicblock **blocks = make_cfg_traversal_stack(entryblock); if (blocks == NULL) { status = ERROR; @@ -3318,7 +3315,6 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache, add_checks_for_loads_of_uninitialized_variables( g->g_entryblock, nlocals, nparams)); RETURN_IF_ERROR(insert_superinstructions(g)); - RETURN_IF_ERROR(optimize_load_fast(g)); RETURN_IF_ERROR(push_cold_blocks_to_end(g)); RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno)); @@ -3650,6 +3646,11 @@ _PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g, RETURN_IF_ERROR(normalize_jumps(g)); assert(no_redundant_jumps(g)); + /* Can't modify the bytecode after inserting instructions that produce + * borrowed references. + */ + RETURN_IF_ERROR(optimize_load_fast(g)); + /* Can't modify the bytecode after computing jump offsets. */ if (_PyCfg_ToInstructionSequence(g, seq) < 0) { return ERROR; From dd97d0c11da80daad364e05a35c74de8da1be1ce Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 24 Feb 2025 12:10:27 -0800 Subject: [PATCH 32/73] Don't promote borrowed references in STORE_FAST This reduces the number of changes to the interpreter loop and leaves the various STORE_FAST instructions unmodified. It means that we cannot optimize LOAD_FAST instructions that are consumed by STORE_FAST instructions, but I think this is a net positive. It speeds up STORE_FAST and doesn't appreciably reduce the number of optimized instructions in the benchmark suite. --- Python/bytecodes.c | 8 ++++---- Python/executor_cases.c.h | 18 +++++++++--------- Python/flowgraph.c | 20 ++++++++++++++++---- Python/generated_cases.c.h | 8 ++++---- 4 files changed, 33 insertions(+), 21 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 396563b40b29a4..ea103c268a3aad 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -339,7 +339,7 @@ dummy_func( replicate(8) inst(STORE_FAST, (value --)) { _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); + GETLOCAL(oparg) = value; DEAD(value); PyStackRef_XCLOSE(tmp); } @@ -352,7 +352,7 @@ dummy_func( uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; _PyStackRef tmp = GETLOCAL(oparg1); - GETLOCAL(oparg1) = _PyStackRef_StealIfUnborrowed(value1); + GETLOCAL(oparg1) = value1; DEAD(value1); value2 = PyStackRef_DUP(GETLOCAL(oparg2)); PyStackRef_XCLOSE(tmp); @@ -362,11 +362,11 @@ dummy_func( uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; _PyStackRef tmp = GETLOCAL(oparg1); - GETLOCAL(oparg1) = _PyStackRef_StealIfUnborrowed(value1); + GETLOCAL(oparg1) = value1; DEAD(value1); PyStackRef_XCLOSE(tmp); tmp = GETLOCAL(oparg2); - GETLOCAL(oparg2) = _PyStackRef_StealIfUnborrowed(value2); + GETLOCAL(oparg2) = value2; DEAD(value2); PyStackRef_XCLOSE(tmp); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index a28daa07bcf6f7..dea2912a094e54 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -317,7 +317,7 @@ assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); + GETLOCAL(oparg) = value; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -332,7 +332,7 @@ assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); + GETLOCAL(oparg) = value; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -347,7 +347,7 @@ assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); + GETLOCAL(oparg) = value; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -362,7 +362,7 @@ assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); + GETLOCAL(oparg) = value; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -377,7 +377,7 @@ assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); + GETLOCAL(oparg) = value; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -392,7 +392,7 @@ assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); + GETLOCAL(oparg) = value; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -407,7 +407,7 @@ assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); + GETLOCAL(oparg) = value; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -422,7 +422,7 @@ assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); + GETLOCAL(oparg) = value; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -436,7 +436,7 @@ oparg = CURRENT_OPARG(); value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); + GETLOCAL(oparg) = value; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); diff --git a/Python/flowgraph.c b/Python/flowgraph.c index ad92fd527d066b..cd6ae83e0f31e0 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2614,13 +2614,19 @@ optimize_load_fast(cfg_builder *g) case STORE_FAST: { kill_local(has_killed_refs, &refs, oparg); - ref_stack_pop(&refs); + ref r = ref_stack_pop(&refs); + if (r.instr != -1) { + has_killed_refs[r.instr] = true; + } break; } case STORE_FAST_LOAD_FAST: { kill_local(has_killed_refs, &refs, oparg >> 4); - ref_stack_pop(&refs); + ref r = ref_stack_pop(&refs); + if (r.instr != -1) { + has_killed_refs[r.instr] = true; + } ref_stack_push(&refs, (ref){i, oparg & 15}); break; } @@ -2628,8 +2634,14 @@ optimize_load_fast(cfg_builder *g) case STORE_FAST_STORE_FAST: { kill_local(has_killed_refs, &refs, oparg >> 4); kill_local(has_killed_refs, &refs, oparg & 15); - ref_stack_pop(&refs); - ref_stack_pop(&refs); + ref r = ref_stack_pop(&refs); + if (r.instr != -1) { + has_killed_refs[r.instr] = true; + } + r = ref_stack_pop(&refs); + if (r.instr != -1) { + has_killed_refs[r.instr] = true; + } break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index cef4cae85411dd..28490e4559a409 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -10943,7 +10943,7 @@ _PyStackRef value; value = stack_pointer[-1]; _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = _PyStackRef_StealIfUnborrowed(value); + GETLOCAL(oparg) = value; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -10966,7 +10966,7 @@ uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; _PyStackRef tmp = GETLOCAL(oparg1); - GETLOCAL(oparg1) = _PyStackRef_StealIfUnborrowed(value1); + GETLOCAL(oparg1) = value1; value2 = PyStackRef_DUP(GETLOCAL(oparg2)); stack_pointer[-1] = value2; _PyFrame_SetStackPointer(frame, stack_pointer); @@ -10990,14 +10990,14 @@ uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; _PyStackRef tmp = GETLOCAL(oparg1); - GETLOCAL(oparg1) = _PyStackRef_StealIfUnborrowed(value1); + GETLOCAL(oparg1) = value1; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_XCLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); tmp = GETLOCAL(oparg2); - GETLOCAL(oparg2) = _PyStackRef_StealIfUnborrowed(value2); + GETLOCAL(oparg2) = value2; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); From 6680709973f4034f2781a3639cfc8d77a6ec5526 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 24 Feb 2025 15:42:14 -0800 Subject: [PATCH 33/73] Track reasons for not being able to optimize instructions This is a little cleaner and makes debugging easier --- Python/flowgraph.c | 123 +++++++++++++++++++++++++-------------------- 1 file changed, 68 insertions(+), 55 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index cd6ae83e0f31e0..af39366eeaf52a 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2415,35 +2415,24 @@ insert_superinstructions(cfg_builder *g) return res; } +#define NOT_LOCAL -1 +#define DUMMY_INSTR -1 +#define DUMMY_REF (ref){DUMMY_INSTR, NOT_LOCAL} + typedef struct { - // Index of instruction that produced the reference or -1. + // Index of instruction that produced the reference or DUMMY_INSTR. int instr; - // The local to which the reference refers or -1. + // The local to which the reference refers or NOT_LOCAL. int local; } ref; -#define NOT_LOCAL -1 - -#define DUMMY_REF (ref){-1, NOT_LOCAL} - typedef struct { ref *refs; Py_ssize_t size; Py_ssize_t capacity; } ref_stack; -static bool -ref_stack_has_refs_from_instr(ref_stack *stack, int instr) -{ - for (Py_ssize_t i = 0; i < stack->size; i++) { - if (stack->refs[i].instr == instr) { - return true; - } - } - return false; -} - static int ref_stack_push(ref_stack *stack, ref r) { @@ -2505,18 +2494,36 @@ ref_stack_fini(ref_stack *stack) stack->size = 0; } +typedef enum { + // The loaded reference is still on the stack when the local is killed + LOCAL_KILLED_ON_STACK = 1, + // The loaded reference is stored into a local + STORED_AS_LOCAL = 2, + // The loaded reference is still on the stack at the end of the basic block + REF_UNCONSUMED = 4, +} LoadFastInstrFlag; + static void -kill_local(bool *has_killed_refs, ref_stack *refs, int local) +kill_local(uint8_t *instr_flags, ref_stack *refs, int local) { for (Py_ssize_t i = 0; i < refs->size; i++) { ref r = ref_stack_at(refs, i); if (r.local == local) { assert(r.instr >= 0); - has_killed_refs[r.instr] = true; + instr_flags[r.instr] |= LOCAL_KILLED_ON_STACK; } } } +static void +store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r) +{ + kill_local(instr_flags, refs, local); + if (r.instr != -1) { + instr_flags[r.instr] |= STORED_AS_LOCAL; + } +} + static void load_fast_push_block(basicblock ***sp, basicblock *target, int start_depth) { @@ -2537,9 +2544,9 @@ optimize_load_fast(cfg_builder *g) for (basicblock *b = entryblock; b != NULL; b = b->b_next) { max_instrs = Py_MAX(max_instrs, b->b_iused); } - size_t has_killed_refs_size = max_instrs * sizeof(bool); - bool *has_killed_refs = PyMem_Calloc(max_instrs, has_killed_refs_size); - if (has_killed_refs == NULL) { + size_t instr_flags_size = max_instrs * sizeof(bool); + uint8_t *instr_flags = PyMem_Calloc(max_instrs, instr_flags_size); + if (instr_flags == NULL) { PyErr_NoMemory(); return ERROR; } @@ -2558,9 +2565,8 @@ optimize_load_fast(cfg_builder *g) basicblock *block = *--sp; assert(block->b_startdepth > -1); - // Reset state that tracks which instructions produce references to - // locals that are on the stack while the local is overwritten. - memset(has_killed_refs, 0, has_killed_refs_size); + // Reset per-block state. + memset(instr_flags, 0, instr_flags_size); // Reset the stack of refs. We don't track references on the stack // across basic blocks, but the bytecode will expect their @@ -2595,8 +2601,11 @@ optimize_load_fast(cfg_builder *g) } case LOAD_FAST_AND_CLEAR: { - kill_local(has_killed_refs, &refs, oparg); - ref_stack_push(&refs, (ref){i, oparg}); + kill_local(instr_flags, &refs, oparg); + if (ref_stack_push(&refs, (ref){i, oparg}) < 0) { + status = ERROR; + goto done; + } break; } @@ -2613,35 +2622,30 @@ optimize_load_fast(cfg_builder *g) } case STORE_FAST: { - kill_local(has_killed_refs, &refs, oparg); ref r = ref_stack_pop(&refs); - if (r.instr != -1) { - has_killed_refs[r.instr] = true; - } + store_local(instr_flags, &refs, oparg, r); break; } case STORE_FAST_LOAD_FAST: { - kill_local(has_killed_refs, &refs, oparg >> 4); + // STORE_FAST ref r = ref_stack_pop(&refs); - if (r.instr != -1) { - has_killed_refs[r.instr] = true; + store_local(instr_flags, &refs, oparg >> 4, r); + // LOAD_FAST + if (ref_stack_push(&refs, (ref){i, oparg & 15}) < 0) { + status = ERROR; + goto done; } - ref_stack_push(&refs, (ref){i, oparg & 15}); break; } case STORE_FAST_STORE_FAST: { - kill_local(has_killed_refs, &refs, oparg >> 4); - kill_local(has_killed_refs, &refs, oparg & 15); + // STORE_FAST ref r = ref_stack_pop(&refs); - if (r.instr != -1) { - has_killed_refs[r.instr] = true; - } + store_local(instr_flags, &refs, oparg >> 4, r); + // STORE_FAST r = ref_stack_pop(&refs); - if (r.instr != -1) { - has_killed_refs[r.instr] = true; - } + store_local(instr_flags, &refs, oparg & 15, r); break; } @@ -2674,9 +2678,27 @@ optimize_load_fast(cfg_builder *g) } } + // Push fallthrough block + cfg_instr *term = basicblock_last_instr(block); + if (term != NULL && block->b_next != NULL && + !(IS_UNCONDITIONAL_JUMP_OPCODE(term->i_opcode) || + IS_SCOPE_EXIT_OPCODE(term->i_opcode))) { + assert(BB_HAS_FALLTHROUGH(block)); + load_fast_push_block(&sp, block->b_next, refs.size); + } + + // Mark instructions that produce values that are on the stack at the + // end of the basic block + for (Py_ssize_t i = 0; i < refs.size; i++) { + ref r = ref_stack_at(&refs, i); + if (r.instr != -1) { + instr_flags[r.instr] |= REF_UNCONSUMED; + } + } + // Optimize instructions for (int i = 0; i < block->b_iused; i++) { - if (!has_killed_refs[i] && !ref_stack_has_refs_from_instr(&refs, i)) { + if (!instr_flags[i]) { cfg_instr *instr = &block->b_instr[i]; switch (instr->i_opcode) { case LOAD_FAST: @@ -2690,23 +2712,14 @@ optimize_load_fast(cfg_builder *g) } } } - - // Push fallthrough block - cfg_instr *term = basicblock_last_instr(block); - if (term != NULL && block->b_next != NULL && - !(IS_UNCONDITIONAL_JUMP_OPCODE(term->i_opcode) || - IS_SCOPE_EXIT_OPCODE(term->i_opcode))) { - assert(BB_HAS_FALLTHROUGH(block)); - load_fast_push_block(&sp, block->b_next, refs.size); - } } status = SUCCESS; done: ref_stack_fini(&refs); - if (has_killed_refs != NULL) { - PyMem_Free(has_killed_refs); + if (instr_flags != NULL) { + PyMem_Free(instr_flags); } if (blocks != NULL) { PyMem_Free(blocks); From 6568fd93c66a74bbece010843febae7ed022ca07 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 24 Feb 2025 15:53:19 -0800 Subject: [PATCH 34/73] Rename PyStackRef_DupDeferred --- Include/internal/pycore_stackref.h | 2 +- Python/bytecodes.c | 6 +++--- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 6 +++--- Tools/cases_generator/analyzer.py | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index b1a47322114e7d..4cca1d33a88997 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -291,7 +291,7 @@ PyStackRef_DUP(_PyStackRef stackref) } static inline _PyStackRef -PyStackRef_DupDeferred(_PyStackRef stackref) +PyStackRef_AsDeferred(_PyStackRef stackref) { return (_PyStackRef){ .bits = stackref.bits | Py_TAG_DEFERRED }; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index ea103c268a3aad..64d2eade40df68 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -271,7 +271,7 @@ dummy_func( } inst (LOAD_FAST_BORROW, (-- value)) { - value = PyStackRef_DupDeferred(GETLOCAL(oparg)); + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); } inst(LOAD_FAST_AND_CLEAR, (-- value)) { @@ -289,8 +289,8 @@ dummy_func( inst(LOAD_FAST_BORROW_LOAD_FAST_BORROW, ( -- value1, value2)) { uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; - value1 = PyStackRef_DupDeferred(GETLOCAL(oparg1)); - value2 = PyStackRef_DupDeferred(GETLOCAL(oparg2)); + value1 = PyStackRef_AsDeferred(GETLOCAL(oparg1)); + value2 = PyStackRef_AsDeferred(GETLOCAL(oparg2)); } family(LOAD_CONST, 0) = { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index dea2912a094e54..cd94d0a20c3fd3 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -204,7 +204,7 @@ case _LOAD_FAST_BORROW: { _PyStackRef value; oparg = CURRENT_OPARG(); - value = PyStackRef_DupDeferred(GETLOCAL(oparg)); + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 28490e4559a409..1d9d1021ab7692 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -8809,7 +8809,7 @@ next_instr += 1; INSTRUCTION_STATS(LOAD_FAST_BORROW); _PyStackRef value; - value = PyStackRef_DupDeferred(GETLOCAL(oparg)); + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -8828,8 +8828,8 @@ _PyStackRef value2; uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; - value1 = PyStackRef_DupDeferred(GETLOCAL(oparg1)); - value2 = PyStackRef_DupDeferred(GETLOCAL(oparg2)); + value1 = PyStackRef_AsDeferred(GETLOCAL(oparg1)); + value2 = PyStackRef_AsDeferred(GETLOCAL(oparg2)); stack_pointer[0] = value1; stack_pointer[1] = value2; stack_pointer += 2; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 2ff4c2bee7268d..0e1e20b49c6df0 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -584,13 +584,13 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyLong_FromLong", "PyLong_FromSsize_t", "PySlice_New", + "PyStackRef_AsDeferred", "PyStackRef_AsPyObjectBorrow", "PyStackRef_AsPyObjectNew", "PyStackRef_AsPyObjectSteal", "PyStackRef_CLEAR", "PyStackRef_CLOSE_SPECIALIZED", "PyStackRef_DUP", - "PyStackRef_DupDeferred", "_PyStackRef_StealIfUnborrowed", "PyStackRef_False", "PyStackRef_FromPyObjectImmortal", From 6fde7b082e068a53cadef6d4e22386f28695128a Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 24 Feb 2025 16:35:53 -0800 Subject: [PATCH 35/73] Rename _PyStackRef_StealIfUnborrowed --- Include/internal/pycore_frame.h | 16 +++++++--------- Include/internal/pycore_stackref.h | 29 +++++++++++------------------ Python/bytecodes.c | 8 ++++---- Python/executor_cases.c.h | 6 +++--- Python/frame.c | 13 +++++-------- Python/generated_cases.c.h | 12 ++++++------ Tools/cases_generator/analyzer.py | 2 +- 7 files changed, 37 insertions(+), 49 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 610324e6413782..f11f19ff21404e 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -152,23 +152,21 @@ _PyFrame_NumSlotsForCodeObject(PyCodeObject *code) return code->co_framesize - FRAME_SPECIALS_SIZE; } -static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest) +static inline void +_PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest) { *dest = *src; assert(src->stackpointer != NULL); int stacktop = (int)(src->stackpointer - src->localsplus); assert(stacktop >= _PyFrame_GetCode(src)->co_nlocalsplus); dest->stackpointer = dest->localsplus + stacktop; + // The generator may outlive any references that were providing "support" + // for borrowed references in the frame. Convert them to strong references. for (int i = 0; i < stacktop; i++) { - dest->localsplus[i] = _PyStackRef_StealIfUnborrowed(src->localsplus[i]); - } - // XXX - More efficient version of this? - if (_PyStackRef_IsBorrowed(dest->f_executable)) { - dest->f_executable = PyStackRef_FromPyObjectNew(PyStackRef_AsPyObjectBorrow(dest->f_executable)); - } - if (_PyStackRef_IsBorrowed(dest->f_funcobj)) { - dest->f_funcobj = PyStackRef_FromPyObjectNew(PyStackRef_AsPyObjectBorrow(dest->f_funcobj)); + dest->localsplus[i] = _PyStackRef_NewIfBorrowedOrSteal(src->localsplus[i]); } + dest->f_executable = _PyStackRef_NewIfBorrowedOrSteal(dest->f_executable); + dest->f_funcobj = _PyStackRef_NewIfBorrowedOrSteal(dest->f_funcobj); // Don't leave a dangling pointer to the old frame when creating generators // and coroutines: dest->previous = NULL; diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 4cca1d33a88997..16b1b295c5cfa6 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -213,24 +213,6 @@ _PyStackRef_FromPyObjectSteal(PyObject *obj) } # define PyStackRef_FromPyObjectSteal(obj) _PyStackRef_FromPyObjectSteal(_PyObject_CAST(obj)) -static inline _PyStackRef -_PyStackRef_StealIfUnborrowed(_PyStackRef stackref) -{ - if (PyStackRef_IsNull(stackref)) { - return stackref; - } - if (PyStackRef_IsDeferred(stackref)) { - PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref); - if (_Py_IsImmortal(obj) || _PyObject_HasDeferredRefcount(obj)) { - return stackref; - } - else { - return (_PyStackRef){ .bits = (uintptr_t)(Py_NewRef(obj)) | Py_TAG_PTR }; - } - } - return stackref; -} - static inline bool _PyStackRef_IsBorrowed(_PyStackRef stackref) { @@ -241,6 +223,17 @@ _PyStackRef_IsBorrowed(_PyStackRef stackref) return !(_Py_IsImmortal(obj) || _PyObject_HasDeferredRefcount(obj)); } + +static inline _PyStackRef +_PyStackRef_NewIfBorrowedOrSteal(_PyStackRef stackref) +{ + if (_PyStackRef_IsBorrowed(stackref)) { + PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref); + return (_PyStackRef){ .bits = (uintptr_t)(Py_NewRef(obj)) | Py_TAG_PTR }; + } + return stackref; +} + static inline _PyStackRef PyStackRef_FromPyObjectNew(PyObject *obj) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 64d2eade40df68..0c011fda04c240 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1103,7 +1103,7 @@ dummy_func( // is pushed to a different frame, the callers' frame. inst(RETURN_VALUE, (retval -- res)) { assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef temp = _PyStackRef_StealIfUnborrowed(retval); + _PyStackRef temp = _PyStackRef_NewIfBorrowedOrSteal(retval); DEAD(retval); SAVE_STACK(); assert(EMPTY()); @@ -1201,7 +1201,7 @@ dummy_func( PyObject *retval_o; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef tmp = _PyStackRef_StealIfUnborrowed(v); + _PyStackRef tmp = _PyStackRef_NewIfBorrowedOrSteal(v); DEAD(v); if ((tstate->interp->eval_frame == NULL) && (Py_TYPE(receiver_o) == &PyGen_Type || Py_TYPE(receiver_o) == &PyCoro_Type) && @@ -1255,7 +1255,7 @@ dummy_func( DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING); STAT_INC(SEND, hit); gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, _PyStackRef_StealIfUnborrowed(v)); + _PyFrame_StackPush(gen_frame, _PyStackRef_NewIfBorrowedOrSteal(v)); DEAD(v); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; @@ -1302,7 +1302,7 @@ dummy_func( #endif RELOAD_STACK(); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); - value = _PyStackRef_StealIfUnborrowed(temp); + value = _PyStackRef_NewIfBorrowedOrSteal(temp); LLTRACE_RESUME_FRAME(); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index cd94d0a20c3fd3..cd20ecdbe7bbb2 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1662,7 +1662,7 @@ _PyStackRef res; retval = stack_pointer[-1]; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef temp = _PyStackRef_StealIfUnborrowed(retval); + _PyStackRef temp = _PyStackRef_NewIfBorrowedOrSteal(retval); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -1796,7 +1796,7 @@ } STAT_INC(SEND, hit); gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, _PyStackRef_StealIfUnborrowed(v)); + _PyFrame_StackPush(gen_frame, _PyStackRef_NewIfBorrowedOrSteal(v)); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; @@ -1843,7 +1843,7 @@ #endif stack_pointer = _PyFrame_GetStackPointer(frame); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); - value = _PyStackRef_StealIfUnborrowed(temp); + value = _PyStackRef_NewIfBorrowedOrSteal(temp); LLTRACE_RESUME_FRAME(); stack_pointer[0] = value; stack_pointer += 1; diff --git a/Python/frame.c b/Python/frame.c index 483e7cfe41e890..6ecfc4a3b90c6c 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -57,15 +57,12 @@ take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame) frame->f_executable = PyStackRef_DUP(frame->f_executable); int stacktop = (int)(frame->stackpointer - frame->localsplus); assert(stacktop >= _PyFrame_GetCode(frame)->co_nlocalsplus); - // XXX - Maybe more optimal sequence to do here - if (_PyStackRef_IsBorrowed(frame->f_executable)) { - frame->f_executable = PyStackRef_FromPyObjectNew(PyStackRef_AsPyObjectBorrow(frame->f_executable)); - } - if (_PyStackRef_IsBorrowed(frame->f_funcobj)) { - frame->f_funcobj = PyStackRef_FromPyObjectNew(PyStackRef_AsPyObjectBorrow(frame->f_funcobj)); - } + // The frame object may outlive any references that were providing "support" + // for borrowed references in the frame. Convert them to strong references. + frame->f_executable = _PyStackRef_NewIfBorrowedOrSteal(frame->f_executable); + frame->f_funcobj = _PyStackRef_NewIfBorrowedOrSteal(frame->f_funcobj); for (int i = 0; i < stacktop; i++) { - frame->localsplus[i] = _PyStackRef_StealIfUnborrowed(frame->localsplus[i]); + frame->localsplus[i] = _PyStackRef_NewIfBorrowedOrSteal(frame->localsplus[i]); } f->f_frame = frame; frame->owner = FRAME_OWNED_BY_FRAME_OBJECT; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 1d9d1021ab7692..56645c92914ee9 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7230,7 +7230,7 @@ { retval = val; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef temp = _PyStackRef_StealIfUnborrowed(retval); + _PyStackRef temp = _PyStackRef_NewIfBorrowedOrSteal(retval); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -7314,7 +7314,7 @@ #endif stack_pointer = _PyFrame_GetStackPointer(frame); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); - value = _PyStackRef_StealIfUnborrowed(temp); + value = _PyStackRef_NewIfBorrowedOrSteal(temp); LLTRACE_RESUME_FRAME(); } stack_pointer[0] = value; @@ -10305,7 +10305,7 @@ _PyStackRef res; retval = stack_pointer[-1]; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef temp = _PyStackRef_StealIfUnborrowed(retval); + _PyStackRef temp = _PyStackRef_NewIfBorrowedOrSteal(retval); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -10362,7 +10362,7 @@ PyObject *receiver_o = PyStackRef_AsPyObjectBorrow(receiver); PyObject *retval_o; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef tmp = _PyStackRef_StealIfUnborrowed(v); + _PyStackRef tmp = _PyStackRef_NewIfBorrowedOrSteal(v); if ((tstate->interp->eval_frame == NULL) && (Py_TYPE(receiver_o) == &PyGen_Type || Py_TYPE(receiver_o) == &PyCoro_Type) && ((PyGenObject *)receiver_o)->gi_frame_state < FRAME_EXECUTING) @@ -10471,7 +10471,7 @@ } STAT_INC(SEND, hit); gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, _PyStackRef_StealIfUnborrowed(v)); + _PyFrame_StackPush(gen_frame, _PyStackRef_NewIfBorrowedOrSteal(v)); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; @@ -11975,7 +11975,7 @@ #endif stack_pointer = _PyFrame_GetStackPointer(frame); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); - value = _PyStackRef_StealIfUnborrowed(temp); + value = _PyStackRef_NewIfBorrowedOrSteal(temp); LLTRACE_RESUME_FRAME(); stack_pointer[0] = value; stack_pointer += 1; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 0e1e20b49c6df0..69b49e6ce3fbe8 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -591,7 +591,6 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_CLEAR", "PyStackRef_CLOSE_SPECIALIZED", "PyStackRef_DUP", - "_PyStackRef_StealIfUnborrowed", "PyStackRef_False", "PyStackRef_FromPyObjectImmortal", "PyStackRef_FromPyObjectNew", @@ -649,6 +648,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "_PyObject_GetManagedDict", "_PyObject_InlineValues", "_PyObject_ManagedDictPointer", + "_PyStackRef_NewIfBorrowedOrSteal", "_PyThreadState_HasStackSpace", "_PyTuple_FromStackRefStealOnSuccess", "_PyTuple_ITEMS", From de13810c6bc04d6db4bfdbd4558845131abb2c3e Mon Sep 17 00:00:00 2001 From: Matt Page Date: Tue, 25 Feb 2025 11:46:40 -0800 Subject: [PATCH 36/73] Avoid extra copies in take_ownership --- Include/internal/pycore_frame.h | 13 ++++++++++--- Objects/genobject.c | 2 +- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 2 +- Python/frame.c | 25 ++++++++++++------------- Python/generated_cases.c.h | 2 +- 6 files changed, 26 insertions(+), 20 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index f11f19ff21404e..3e8c1341082d7c 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -153,20 +153,27 @@ _PyFrame_NumSlotsForCodeObject(PyCodeObject *code) } static inline void -_PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest) +_PyFrame_CopyToHeap(_PyInterpreterFrame *src, _PyInterpreterFrame *dest) { *dest = *src; assert(src->stackpointer != NULL); int stacktop = (int)(src->stackpointer - src->localsplus); assert(stacktop >= _PyFrame_GetCode(src)->co_nlocalsplus); dest->stackpointer = dest->localsplus + stacktop; - // The generator may outlive any references that were providing "support" - // for borrowed references in the frame. Convert them to strong references. + // The destination frame may outlive any references that were providing + // "support" for borrowed references in the source frame. Convert any + // borrowed references that were copied into dest into strong references. for (int i = 0; i < stacktop; i++) { dest->localsplus[i] = _PyStackRef_NewIfBorrowedOrSteal(src->localsplus[i]); } dest->f_executable = _PyStackRef_NewIfBorrowedOrSteal(dest->f_executable); dest->f_funcobj = _PyStackRef_NewIfBorrowedOrSteal(dest->f_funcobj); +} + +static inline void +_PyFrame_CopyToNewGen(_PyInterpreterFrame *src, _PyInterpreterFrame *dest) +{ + _PyFrame_CopyToHeap(src, dest); // Don't leave a dangling pointer to the old frame when creating generators // and coroutines: dest->previous = NULL; diff --git a/Objects/genobject.c b/Objects/genobject.c index 79aed8571c35e7..81047ea46242cf 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -989,7 +989,7 @@ gen_new_with_qualname(PyTypeObject *type, PyFrameObject *f, assert(f->f_frame->frame_obj == NULL); assert(f->f_frame->owner == FRAME_OWNED_BY_FRAME_OBJECT); _PyInterpreterFrame *frame = &gen->gi_iframe; - _PyFrame_Copy((_PyInterpreterFrame *)f->_f_frame_data, frame); + _PyFrame_CopyToNewGen((_PyInterpreterFrame *)f->_f_frame_data, frame); gen->gi_frame_state = FRAME_CREATED; assert(frame->frame_obj == f); f->f_frame = frame; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 0c011fda04c240..d6d9f20b4b8211 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4694,7 +4694,7 @@ dummy_func( SAVE_STACK(); _PyInterpreterFrame *gen_frame = &gen->gi_iframe; frame->instr_ptr++; - _PyFrame_Copy(frame, gen_frame); + _PyFrame_CopyToNewGen(frame, gen_frame); assert(frame->frame_obj == NULL); gen->gi_frame_state = FRAME_CREATED; gen_frame->owner = FRAME_OWNED_BY_GENERATOR; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index cd20ecdbe7bbb2..c391ff30d4fc1b 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -6391,7 +6391,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); _PyInterpreterFrame *gen_frame = &gen->gi_iframe; frame->instr_ptr++; - _PyFrame_Copy(frame, gen_frame); + _PyFrame_CopyToNewGen(frame, gen_frame); assert(frame->frame_obj == NULL); gen->gi_frame_state = FRAME_CREATED; gen_frame->owner = FRAME_OWNED_BY_GENERATOR; diff --git a/Python/frame.c b/Python/frame.c index 6ecfc4a3b90c6c..86f3928bcedce1 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -50,20 +50,19 @@ take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame) { assert(frame->owner < FRAME_OWNED_BY_INTERPRETER); assert(frame->owner != FRAME_OWNED_BY_FRAME_OBJECT); - Py_ssize_t size = ((char*)frame->stackpointer) - (char *)frame; - memcpy((_PyInterpreterFrame *)f->_f_frame_data, frame, size); - frame = (_PyInterpreterFrame *)f->_f_frame_data; - frame->stackpointer = (_PyStackRef *)(((char *)frame) + size); + // While ownership of f_executable is transferred to the frame object, the + // reference in the frame will also be closed when the frame is popped from + // the stack. Create another reference to ensure that works correctly. + // + // This must happen in the source frame (not in the frame + // object). _PyFrame_CopyToHeap converts borrowed references in the source + // frame into strong references in the destination frame. duping the + // reference in the dest frame would result in a leak if the source was + // borrowed because the close on the source would not destroy the newly + // created reference. frame->f_executable = PyStackRef_DUP(frame->f_executable); - int stacktop = (int)(frame->stackpointer - frame->localsplus); - assert(stacktop >= _PyFrame_GetCode(frame)->co_nlocalsplus); - // The frame object may outlive any references that were providing "support" - // for borrowed references in the frame. Convert them to strong references. - frame->f_executable = _PyStackRef_NewIfBorrowedOrSteal(frame->f_executable); - frame->f_funcobj = _PyStackRef_NewIfBorrowedOrSteal(frame->f_funcobj); - for (int i = 0; i < stacktop; i++) { - frame->localsplus[i] = _PyStackRef_NewIfBorrowedOrSteal(frame->localsplus[i]); - } + _PyFrame_CopyToHeap(frame, (_PyInterpreterFrame *) f->_f_frame_data); + frame = (_PyInterpreterFrame *)f->_f_frame_data; f->f_frame = frame; frame->owner = FRAME_OWNED_BY_FRAME_OBJECT; if (_PyFrame_IsIncomplete(frame)) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 56645c92914ee9..8aad4fcf590bdc 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -10275,7 +10275,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); _PyInterpreterFrame *gen_frame = &gen->gi_iframe; frame->instr_ptr++; - _PyFrame_Copy(frame, gen_frame); + _PyFrame_CopyToNewGen(frame, gen_frame); assert(frame->frame_obj == NULL); gen->gi_frame_state = FRAME_CREATED; gen_frame->owner = FRAME_OWNED_BY_GENERATOR; From fdeae7dd5761577da0048edfb045276a5bc4783f Mon Sep 17 00:00:00 2001 From: Matt Page Date: Tue, 25 Feb 2025 15:05:36 -0800 Subject: [PATCH 37/73] Make the default build work --- Include/internal/pycore_stackref.h | 11 ++++++++--- Lib/test/test_dis.py | 2 +- Python/bytecodes.c | 10 ++++++++++ Python/executor_cases.c.h | 5 +++++ Python/generated_cases.c.h | 10 ++++++++++ 5 files changed, 34 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 16b1b295c5cfa6..f1ddac7361524e 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -146,6 +146,12 @@ PyStackRef_CLOSE(_PyStackRef ref) Py_DECREF(obj); } +static inline _PyStackRef +_PyStackRef_NewIfBorrowedOrSteal(_PyStackRef ref) +{ + return ref; +} + static inline _PyStackRef _PyStackRef_DUP(_PyStackRef ref, const char *filename, int linenumber) { @@ -274,9 +280,6 @@ PyStackRef_DUP(_PyStackRef stackref) { assert(!PyStackRef_IsNull(stackref)); if (PyStackRef_IsDeferred(stackref)) { - /* assert(_Py_IsImmortal(PyStackRef_AsPyObjectBorrow(stackref)) || */ - /* _PyObject_HasDeferredRefcount(PyStackRef_AsPyObjectBorrow(stackref)) */ - /* ); */ return stackref; } Py_INCREF(PyStackRef_AsPyObjectBorrow(stackref)); @@ -322,6 +325,8 @@ static const _PyStackRef PyStackRef_NULL = { .bits = 0 }; #define PyStackRef_DUP(stackref) PyStackRef_FromPyObjectSteal(Py_NewRef(PyStackRef_AsPyObjectBorrow(stackref))) +#define _PyStackRef_NewIfBorrowedOrSteal(stackref) stackref + #define PyStackRef_CLOSE_SPECIALIZED(stackref, dealloc) _Py_DECREF_SPECIALIZED(PyStackRef_AsPyObjectBorrow(stackref), dealloc) #endif // Py_GIL_DISABLED diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index fc69bedb52cad0..9ed1f6831f6151 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -877,7 +877,7 @@ def load_test(x, y=0): %3d LOAD_FAST_LOAD_FAST 1 (x, y) STORE_FAST_STORE_FAST 50 (b, a) -%3d LOAD_FAST_LOAD_FAST 35 (a, b) +%3d LOAD_FAST_BORROW_LOAD_FAST_BORROW 35 (a, b) BUILD_TUPLE 2 RETURN_VALUE """ % (load_test.__code__.co_firstlineno, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d6d9f20b4b8211..cdcbcdf140e578 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -271,7 +271,12 @@ dummy_func( } inst (LOAD_FAST_BORROW, (-- value)) { + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif } inst(LOAD_FAST_AND_CLEAR, (-- value)) { @@ -289,8 +294,13 @@ dummy_func( inst(LOAD_FAST_BORROW_LOAD_FAST_BORROW, ( -- value1, value2)) { uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; + #ifdef Py_GIL_DISABLED value1 = PyStackRef_AsDeferred(GETLOCAL(oparg1)); value2 = PyStackRef_AsDeferred(GETLOCAL(oparg2)); + #else + value1 = PyStackRef_DUP(GETLOCAL(oparg1)); + value2 = PyStackRef_DUP(GETLOCAL(oparg2)); + #endif } family(LOAD_CONST, 0) = { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index c391ff30d4fc1b..86e91db75b4c5a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -204,7 +204,12 @@ case _LOAD_FAST_BORROW: { _PyStackRef value; oparg = CURRENT_OPARG(); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 8aad4fcf590bdc..957fd1118bde29 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -8809,7 +8809,12 @@ next_instr += 1; INSTRUCTION_STATS(LOAD_FAST_BORROW); _PyStackRef value; + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -8828,8 +8833,13 @@ _PyStackRef value2; uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; + #ifdef Py_GIL_DISABLED value1 = PyStackRef_AsDeferred(GETLOCAL(oparg1)); value2 = PyStackRef_AsDeferred(GETLOCAL(oparg2)); + #else + value1 = PyStackRef_DUP(GETLOCAL(oparg1)); + value2 = PyStackRef_DUP(GETLOCAL(oparg2)); + #endif stack_pointer[0] = value1; stack_pointer[1] = value2; stack_pointer += 2; From 1aed2811b581385bbee4fb4bc990f5dfcf1801de Mon Sep 17 00:00:00 2001 From: Matt Page Date: Tue, 25 Feb 2025 15:06:29 -0800 Subject: [PATCH 38/73] Add docs for new opcodes --- Doc/library/dis.rst | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index fca4a550f28441..85d5c24d090231 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -1402,6 +1402,13 @@ iterations of the loop. This opcode is now only used in situations where the local variable is guaranteed to be initialized. It cannot raise :exc:`UnboundLocalError`. +.. opcode:: LOAD_FAST_BORROW (var_num) + + Pushes a borrowed reference to the local ``co_varnames[var_num]`` onto the stack + in free-threaded builds. In default builds this is identical to ``LOAD_FAST``. + + .. versionadded:: 3.14 + .. opcode:: LOAD_FAST_LOAD_FAST (var_nums) Pushes references to ``co_varnames[var_nums >> 4]`` and @@ -1409,6 +1416,15 @@ iterations of the loop. .. versionadded:: 3.13 + +.. opcode:: LOAD_FAST_BORROW_LOAD_FAST_BORROW (var_nums) + + Pushes borrowed references to ``co_varnames[var_nums >> 4]`` and + ``co_varnames[var_nums & 15]`` onto the stack in free-threaded builds. This is + identical to ``LOAD_FAST_LOAD_FAST`` in default builds. + + .. versionadded:: 3.14 + .. opcode:: LOAD_FAST_CHECK (var_num) Pushes a reference to the local ``co_varnames[var_num]`` onto the stack, @@ -2023,4 +2039,3 @@ instructions: .. deprecated:: 3.13 All jumps are now relative. This list is empty. - From c332912eca1a47435c231436a607168885cdeff8 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Tue, 25 Feb 2025 23:11:24 -0800 Subject: [PATCH 39/73] Fix flag array size computation --- Python/flowgraph.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index af39366eeaf52a..66d3f269c371a4 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2544,8 +2544,8 @@ optimize_load_fast(cfg_builder *g) for (basicblock *b = entryblock; b != NULL; b = b->b_next) { max_instrs = Py_MAX(max_instrs, b->b_iused); } - size_t instr_flags_size = max_instrs * sizeof(bool); - uint8_t *instr_flags = PyMem_Calloc(max_instrs, instr_flags_size); + size_t instr_flags_size = max_instrs * sizeof(uint8_t); + uint8_t *instr_flags = PyMem_Malloc(instr_flags_size); if (instr_flags == NULL) { PyErr_NoMemory(); return ERROR; @@ -2566,7 +2566,7 @@ optimize_load_fast(cfg_builder *g) assert(block->b_startdepth > -1); // Reset per-block state. - memset(instr_flags, 0, instr_flags_size); + memset(instr_flags, 0, block->b_iused * sizeof(*instr_flags)); // Reset the stack of refs. We don't track references on the stack // across basic blocks, but the bytecode will expect their From 76a75a7e50296ff9865fa9c6468789e1e779060e Mon Sep 17 00:00:00 2001 From: Matt Page Date: Tue, 25 Feb 2025 22:52:33 -0800 Subject: [PATCH 40/73] Add a high level comment explaining our approach --- Python/flowgraph.c | 47 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 66d3f269c371a4..7a03c1a7eec4ea 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2496,11 +2496,11 @@ ref_stack_fini(ref_stack *stack) typedef enum { // The loaded reference is still on the stack when the local is killed - LOCAL_KILLED_ON_STACK = 1, + SUPPORT_KILLED = 1, // The loaded reference is stored into a local - STORED_AS_LOCAL = 2, + STORED_AS_LOCAL = 2, // The loaded reference is still on the stack at the end of the basic block - REF_UNCONSUMED = 4, + REF_UNCONSUMED = 4, } LoadFastInstrFlag; static void @@ -2510,7 +2510,7 @@ kill_local(uint8_t *instr_flags, ref_stack *refs, int local) ref r = ref_stack_at(refs, i); if (r.local == local) { assert(r.instr >= 0); - instr_flags[r.instr] |= LOCAL_KILLED_ON_STACK; + instr_flags[r.instr] |= SUPPORT_KILLED; } } } @@ -2519,7 +2519,7 @@ static void store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r) { kill_local(instr_flags, refs, local); - if (r.instr != -1) { + if (r.instr != DUMMY_INSTR) { instr_flags[r.instr] |= STORED_AS_LOCAL; } } @@ -2534,6 +2534,43 @@ load_fast_push_block(basicblock ***sp, basicblock *target, int start_depth) } } +/* + * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into weaker variants that + * load borrowed references onto the operand stack. + * + * This is only safe when we can prove that the reference in the frame outlives + * the borrowed reference produced by the instruction. We make this tractable + * by enforcing the following lifetimes: + * + * 1. Borrowed references loaded onto the operand stack live until the end of + * the instruction that consumes them from the stack. Any borrowed + * references that would escape into the heap (e.g. into frame objects or + * generators) are converted into new, strong references. + * + * 2. Locals live until they are either killed by an instruction + * (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten + * via `f_locals` is added to a list owned by the frame object. + * + * To simplify the problem of detecting which supporting references in the + * frame are killed by instructions that overwrite locals, we only allow + * borrowed references to be stored as a local in the frame if they were passed + * as an argument. {RETURN,YIELD}_VALUE convert borrowed references into new, + * strong references. + * + * Using the above, we can optimize any LOAD_FAST{_LOAD_FAST} instructions + * that meet the following criteria: + * + * 1. The produced reference must be consumed from the stack before the + * supporting reference in the frame is killed. + * + * 2. The produced reference cannot be stored as a local. + * + * We use abstract interpretation to identify instructions that meet these + * criteria. For each basic block, we simulate the effect the bytecode has on a + * stack of abstract references and note any instructions that violate the + * criteria above. Once we've processed all the instructions in a block, any + * non-violating LOAD_FAST{_LOAD_FAST} can be optimized. + */ static int optimize_load_fast(cfg_builder *g) { From dd6426fd73a2dfda048926a60edfe19c683c5b41 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 28 Feb 2025 09:26:56 -0800 Subject: [PATCH 41/73] Bump magic number after merge (was bumped on main) --- Include/internal/pycore_magic_number.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_magic_number.h b/Include/internal/pycore_magic_number.h index e97d066ebf8e5c..cc96455852d03b 100644 --- a/Include/internal/pycore_magic_number.h +++ b/Include/internal/pycore_magic_number.h @@ -282,7 +282,7 @@ PC/launcher.c must also be updated. */ -#define PYC_MAGIC_NUMBER 3617 +#define PYC_MAGIC_NUMBER 3618 /* This is equivalent to converting PYC_MAGIC_NUMBER to 2 bytes (little-endian) and then appending b'\r\n'. */ #define PYC_MAGIC_NUMBER_TOKEN \ From bf68eb91b2941f28bc844916c695c30bd2e3c2d5 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 28 Feb 2025 10:49:00 -0800 Subject: [PATCH 42/73] Add more tests --- Include/internal/pycore_flowgraph.h | 3 + Lib/test/test_peepholer.py | 99 +++++++++++++++++++++++++++- Modules/_testinternalcapi.c | 18 +++++ Modules/clinic/_testinternalcapi.c.h | 59 ++++++++++++++++- Python/flowgraph.c | 32 ++++++++- 5 files changed, 207 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_flowgraph.h b/Include/internal/pycore_flowgraph.h index 5043260d2fd99f..3b218838b9e833 100644 --- a/Include/internal/pycore_flowgraph.h +++ b/Include/internal/pycore_flowgraph.h @@ -41,6 +41,9 @@ PyAPI_FUNC(PyObject*) _PyCompile_OptimizeCfg( PyObject *consts, int nlocals); +// Export for '_testinternalcapi' shared extension +PyAPI_FUNC(PyObject*) _PyCompile_OptimizeLoadFast(PyObject *instructions); + #ifdef __cplusplus } #endif diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 94fcccbed942bb..5f9c810df56f00 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -4,9 +4,14 @@ import sys import textwrap import unittest +try: + import _testinternalcapi +except ImportError: + _testinternalcapi = None from test import support -from test.support.bytecode_helper import BytecodeTestCase, CfgOptimizationTestCase +from test.support.bytecode_helper import ( + BytecodeTestCase, CfgOptimizationTestCase, CompilationStepTestCase) def compile_pattern_with_fast_locals(pattern): @@ -2353,5 +2358,97 @@ def test_list_to_tuple_get_iter_is_safe(self): self.assertEqual(items, []) +@unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi") +class OptimizeLoadFastTestCase(CompilationStepTestCase): + def check(self, insts, expected_insts): + self.check_instructions(insts) + self.check_instructions(expected_insts) + seq = self.seq_from_insts(insts) + opt_insts = _testinternalcapi.optimize_load_fast(seq) + expected_insts = self.seq_from_insts(expected_insts).get_instructions() + self.assertInstructionsMatch(opt_insts, expected_insts) + + def test_optimized(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_FAST", 1, 2), + ("BINARY_OP", 2, 3), + ] + expected = [ + ("LOAD_FAST_BORROW", 0, 1), + ("LOAD_FAST_BORROW", 1, 2), + ("BINARY_OP", 2, 3), + ] + self.check(insts, expected) + + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_CONST", 1, 2), + ("SWAP", 2, 3), + ("POP_TOP", None, 4), + ] + expected = [ + ("LOAD_FAST_BORROW", 0, 1), + ("LOAD_CONST", 1, 2), + ("SWAP", 2, 3), + ("POP_TOP", None, 4), + ] + self.check(insts, expected) + + def test_unoptimized_if_unconsumed(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_FAST", 1, 2), + ("POP_TOP", None, 3), + ] + expected = [ + ("LOAD_FAST", 0, 1), + ("LOAD_FAST_BORROW", 1, 2), + ("POP_TOP", None, 3), + ] + self.check(insts, expected) + + insts = [ + ("LOAD_FAST", 0, 1), + ("COPY", 1, 2), + ("POP_TOP", None, 3), + ] + self.check(insts, insts) + + def test_unoptimized_if_support_killed(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_CONST", 0, 2), + ("STORE_FAST", 0, 3), + ("POP_TOP", None, 4), + ] + self.check(insts, insts) + + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_CONST", 0, 2), + ("LOAD_CONST", 0, 3), + ("STORE_FAST_STORE_FAST", 0 << 4 | 1, 4), + ("POP_TOP", None, 5), + ] + self.check(insts, insts) + + def test_unoptimized_if_aliased(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("STORE_FAST", 1, 2), + ] + self.check(insts, insts) + + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_CONST", 0, 3), + ("STORE_FAST_STORE_FAST", 0 << 4 | 1, 4), + ] + self.check(insts, insts) + + + + if __name__ == "__main__": unittest.main() diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index c03652259d0e50..35bd8791421c51 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -748,6 +748,23 @@ _testinternalcapi_optimize_cfg_impl(PyObject *module, PyObject *instructions, return _PyCompile_OptimizeCfg(instructions, consts, nlocals); } +/*[clinic input] + +_testinternalcapi.optimize_load_fast -> object + + instructions: object + +Optimize LOAD_FAST{_LOAD_FAST} instructions. +[clinic start generated code]*/ + +static PyObject * +_testinternalcapi_optimize_load_fast_impl(PyObject *module, + PyObject *instructions) +/*[clinic end generated code: output=6f975349c976d017 input=c59f3eac68308c01]*/ +{ + return _PyCompile_OptimizeLoadFast(instructions); +} + static int get_nonnegative_int_from_dict(PyObject *dict, const char *key) { PyObject *obj = PyDict_GetItemString(dict, key); @@ -2023,6 +2040,7 @@ static PyMethodDef module_functions[] = { _TESTINTERNALCAPI_NEW_INSTRUCTION_SEQUENCE_METHODDEF _TESTINTERNALCAPI_COMPILER_CODEGEN_METHODDEF _TESTINTERNALCAPI_OPTIMIZE_CFG_METHODDEF + _TESTINTERNALCAPI_OPTIMIZE_LOAD_FAST_METHODDEF _TESTINTERNALCAPI_ASSEMBLE_CODE_OBJECT_METHODDEF {"get_interp_settings", get_interp_settings, METH_VARARGS, NULL}, {"clear_extension", clear_extension, METH_VARARGS, NULL}, diff --git a/Modules/clinic/_testinternalcapi.c.h b/Modules/clinic/_testinternalcapi.c.h index d98d69df22f982..901fae473ac407 100644 --- a/Modules/clinic/_testinternalcapi.c.h +++ b/Modules/clinic/_testinternalcapi.c.h @@ -225,6 +225,63 @@ _testinternalcapi_optimize_cfg(PyObject *module, PyObject *const *args, Py_ssize return return_value; } +PyDoc_STRVAR(_testinternalcapi_optimize_load_fast__doc__, +"optimize_load_fast($module, /, instructions)\n" +"--\n" +"\n" +"Optimize LOAD_FAST{_LOAD_FAST} instructions."); + +#define _TESTINTERNALCAPI_OPTIMIZE_LOAD_FAST_METHODDEF \ + {"optimize_load_fast", _PyCFunction_CAST(_testinternalcapi_optimize_load_fast), METH_FASTCALL|METH_KEYWORDS, _testinternalcapi_optimize_load_fast__doc__}, + +static PyObject * +_testinternalcapi_optimize_load_fast_impl(PyObject *module, + PyObject *instructions); + +static PyObject * +_testinternalcapi_optimize_load_fast(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(instructions), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"instructions", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "optimize_load_fast", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject *instructions; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + instructions = args[0]; + return_value = _testinternalcapi_optimize_load_fast_impl(module, instructions); + +exit: + return return_value; +} + PyDoc_STRVAR(_testinternalcapi_assemble_code_object__doc__, "assemble_code_object($module, /, filename, instructions, metadata)\n" "--\n" @@ -365,4 +422,4 @@ gh_119213_getargs(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO exit: return return_value; } -/*[clinic end generated code: output=ec77971c6c2663da input=a9049054013a1b77]*/ +/*[clinic end generated code: output=bbd9381589d0f959 input=a9049054013a1b77]*/ diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 7a03c1a7eec4ea..a12e54801bd9bb 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2572,12 +2572,15 @@ load_fast_push_block(basicblock ***sp, basicblock *target, int start_depth) * non-violating LOAD_FAST{_LOAD_FAST} can be optimized. */ static int -optimize_load_fast(cfg_builder *g) +optimize_load_fast(cfg_builder *g, bool compute_stackdepth) { int status; ref_stack refs = {0}; int max_instrs = 0; basicblock *entryblock = g->g_entryblock; + if (compute_stackdepth) { + calculate_stackdepth(g); + } for (basicblock *b = entryblock; b != NULL; b = b->b_next) { max_instrs = Py_MAX(max_instrs, b->b_iused); } @@ -2620,6 +2623,7 @@ optimize_load_fast(cfg_builder *g) assert(opcode != EXTENDED_ARG); switch (opcode) { case COPY: { + assert(oparg > 0); Py_ssize_t idx = refs.size - oparg; ref r = ref_stack_at(&refs, idx); if (ref_stack_push(&refs, r) < 0) { @@ -2687,6 +2691,7 @@ optimize_load_fast(cfg_builder *g) } case SWAP: { + assert(oparg >= 2); ref_stack_swap_top(&refs, oparg); break; } @@ -3711,7 +3716,7 @@ _PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g, /* Can't modify the bytecode after inserting instructions that produce * borrowed references. */ - RETURN_IF_ERROR(optimize_load_fast(g)); + RETURN_IF_ERROR(optimize_load_fast(g, /* compute_stackdepth */ false)); /* Can't modify the bytecode after computing jump offsets. */ if (_PyCfg_ToInstructionSequence(g, seq) < 0) { @@ -3802,3 +3807,26 @@ _PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals) _PyCfgBuilder_Free(g); return res; } + +PyObject * +_PyCompile_OptimizeLoadFast(PyObject *seq) +{ + if (!_PyInstructionSequence_Check(seq)) { + PyErr_SetString(PyExc_ValueError, "expected an instruction sequence"); + return NULL; + } + + cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq); + if (g == NULL) { + return NULL; + } + + if (optimize_load_fast(g, /* compute_stackdepth */ true) != SUCCESS) { + _PyCfgBuilder_Free(g); + return NULL; + } + + PyObject *res = cfg_to_instruction_sequence(g); + _PyCfgBuilder_Free(g); + return res; +} From 474a587a3da34a8e0a6ffad9f456ab9f20c1e9f1 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 28 Feb 2025 11:13:50 -0800 Subject: [PATCH 43/73] Update commented out assertion --- Include/internal/pycore_stackref.h | 11 +++++++++-- Programs/test_frozenmain.h | 14 +++++++------- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 2 +- Tools/cases_generator/analyzer.py | 1 + 6 files changed, 20 insertions(+), 12 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index f1ddac7361524e..bf9b0018dcfb31 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -101,6 +101,12 @@ PyStackRef_IsNone(_PyStackRef ref) return _Py_stackref_get_object(ref) == Py_None; } +static inline bool +PyStackRef_IsBorrowed(_PyStackRef ref) +{ + return false; +} + static inline PyObject * _PyStackRef_AsPyObjectBorrow(_PyStackRef ref, const char *filename, int linenumber) { @@ -220,7 +226,7 @@ _PyStackRef_FromPyObjectSteal(PyObject *obj) # define PyStackRef_FromPyObjectSteal(obj) _PyStackRef_FromPyObjectSteal(_PyObject_CAST(obj)) static inline bool -_PyStackRef_IsBorrowed(_PyStackRef stackref) +PyStackRef_IsBorrowed(_PyStackRef stackref) { if (PyStackRef_IsNull(stackref) || !PyStackRef_IsDeferred(stackref)) { return false; @@ -233,7 +239,7 @@ _PyStackRef_IsBorrowed(_PyStackRef stackref) static inline _PyStackRef _PyStackRef_NewIfBorrowedOrSteal(_PyStackRef stackref) { - if (_PyStackRef_IsBorrowed(stackref)) { + if (PyStackRef_IsBorrowed(stackref)) { PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref); return (_PyStackRef){ .bits = (uintptr_t)(Py_NewRef(obj)) | Py_TAG_PTR }; } @@ -310,6 +316,7 @@ static const _PyStackRef PyStackRef_NULL = { .bits = 0 }; #define PyStackRef_True ((_PyStackRef){.bits = (uintptr_t)&_Py_TrueStruct }) #define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) }) #define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) }) +#define PyStackRef_IsBorrowed(stackref) false #define PyStackRef_AsPyObjectBorrow(stackref) ((PyObject *)(stackref).bits) diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index 93b0e993d37c7d..d2b84321ca2825 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -13,26 +13,26 @@ unsigned char M_test_frozenmain[] = { 80,5,91,6,12,0,80,6,91,5,91,6,44,26,0,0, 0,0,0,0,0,0,0,0,12,0,49,4,51,1,0,0, 0,0,0,0,31,0,73,26,0,0,9,0,30,0,80,0, - 35,0,41,7,78,218,18,70,114,111,122,101,110,32,72,101, - 108,108,111,32,87,111,114,108,100,218,8,115,121,115,46,97, + 35,0,41,7,78,122,18,70,114,111,122,101,110,32,72,101, + 108,108,111,32,87,111,114,108,100,122,8,115,121,115,46,97, 114,103,118,218,6,99,111,110,102,105,103,41,5,218,12,112, 114,111,103,114,97,109,95,110,97,109,101,218,10,101,120,101, 99,117,116,97,98,108,101,218,15,117,115,101,95,101,110,118, 105,114,111,110,109,101,110,116,218,17,99,111,110,102,105,103, 117,114,101,95,99,95,115,116,100,105,111,218,14,98,117,102, - 102,101,114,101,100,95,115,116,100,105,111,218,7,99,111,110, - 102,105,103,32,218,2,58,32,41,7,218,3,115,121,115,218, + 102,101,114,101,100,95,115,116,100,105,111,122,7,99,111,110, + 102,105,103,32,122,2,58,32,41,7,218,3,115,121,115,218, 17,95,116,101,115,116,105,110,116,101,114,110,97,108,99,97, 112,105,218,5,112,114,105,110,116,218,4,97,114,103,118,218, - 11,103,101,116,95,99,111,110,102,105,103,115,114,4,0,0, + 11,103,101,116,95,99,111,110,102,105,103,115,114,2,0,0, 0,218,3,107,101,121,169,0,243,0,0,0,0,218,18,116, 101,115,116,95,102,114,111,122,101,110,109,97,105,110,46,112, - 121,218,8,60,109,111,100,117,108,101,62,114,21,0,0,0, + 121,218,8,60,109,111,100,117,108,101,62,114,17,0,0,0, 1,0,0,0,115,94,0,0,0,240,3,1,1,1,243,8, 0,1,11,219,0,24,225,0,5,208,6,26,212,0,27,217, 0,5,128,106,144,35,151,40,145,40,212,0,27,216,9,26, 215,9,38,210,9,38,211,9,40,168,24,213,9,50,128,6, 243,2,6,12,2,128,67,241,14,0,5,10,136,71,144,67, 144,53,152,2,152,54,160,35,157,59,152,45,208,10,40,214, - 4,41,243,15,6,12,2,114,19,0,0,0, + 4,41,243,15,6,12,2,114,15,0,0,0, }; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index cdcbcdf140e578..ace65803d462c1 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -766,7 +766,7 @@ dummy_func( * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - // assert(Py_REFCNT(left_o) >= 2); + assert(Py_REFCNT(left_o) >= 2 || PyStackRef_IsBorrowed(left)); PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); DEAD(left); PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 86e91db75b4c5a..3fa26f07b1a38f 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1021,7 +1021,7 @@ * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - // assert(Py_REFCNT(left_o) >= 2); + assert(Py_REFCNT(left_o) >= 2 || PyStackRef_IsBorrowed(left)); PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); PyObject *right_o = PyStackRef_AsPyObjectSteal(right); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 957fd1118bde29..bdb33273aab542 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -368,7 +368,7 @@ * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - // assert(Py_REFCNT(left_o) >= 2); + assert(Py_REFCNT(left_o) >= 2 || PyStackRef_IsBorrowed(left)); PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); PyObject *right_o = PyStackRef_AsPyObjectSteal(right); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 69b49e6ce3fbe8..cea2cdfd58a328 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -595,6 +595,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_FromPyObjectImmortal", "PyStackRef_FromPyObjectNew", "PyStackRef_FromPyObjectSteal", + "PyStackRef_IsBorrowed", "PyStackRef_IsExactly", "PyStackRef_IsNone", "PyStackRef_IsTrue", From 4b3aacf424a5eef8d81478095064931e3ce02d23 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 28 Feb 2025 11:29:44 -0800 Subject: [PATCH 44/73] Add NEWS entry --- .../2025-02-28-11-29-35.gh-issue-130704.7RDVLE.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-02-28-11-29-35.gh-issue-130704.7RDVLE.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-28-11-29-35.gh-issue-130704.7RDVLE.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-28-11-29-35.gh-issue-130704.7RDVLE.rst new file mode 100644 index 00000000000000..528c58b54cd7de --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-28-11-29-35.gh-issue-130704.7RDVLE.rst @@ -0,0 +1,2 @@ +Optimize ``LOAD_FAST`` and its superinstruction form to reduce reference +counting overhead. From e6920370e52c4aca3eb54549a132a3a67948cfc8 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 28 Feb 2025 12:18:54 -0800 Subject: [PATCH 45/73] Remove debug print --- Objects/codeobject.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index f608a199e761d1..833c4d10ba8318 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -542,7 +542,6 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) } co->_co_firsttraceable = entry_point; #ifdef Py_GIL_DISABLED - // fprintf(stderr, "== Quicken %s\n", PyUnicode_AsUTF8(co->co_qualname)); _PyCode_Quicken(_PyCode_CODE(co), Py_SIZE(co), interp->config.tlbc_enabled); #else _PyCode_Quicken(_PyCode_CODE(co), Py_SIZE(co), 1); From f98d91d0877e8de486a397422d59b51a6dad191e Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 28 Feb 2025 15:52:20 -0800 Subject: [PATCH 46/73] Fix doctest --- Doc/library/dis.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 85d5c24d090231..06c872c4a920ef 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -76,7 +76,7 @@ the following command can be used to display the disassembly of 2 RESUME 0 3 LOAD_GLOBAL 1 (len + NULL) - LOAD_FAST 0 (alist) + LOAD_FAST_BORROW 0 (alist) CALL 1 RETURN_VALUE From 725dc8e5fea0f706c217be3c18b387e5a091f93f Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 28 Feb 2025 16:43:27 -0800 Subject: [PATCH 47/73] Fix JIT tests --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_ids.h | 108 +++++++++--------- Include/internal/pycore_uop_metadata.h | 35 +++++- Lib/test/test_capi/test_opt.py | 4 +- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 128 ++++++++++++++++++++++ Python/optimizer_analysis.c | 1 + Python/optimizer_bytecodes.c | 4 + Python/optimizer_cases.c.h | 2 +- 9 files changed, 230 insertions(+), 56 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index a3a326ea6c1a56..14657c5c4939dd 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -2173,7 +2173,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, - [LOAD_FAST_BORROW] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, + [LOAD_FAST_BORROW] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 8998cdf160196a..c6feeca3dc8a84 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -201,91 +201,99 @@ extern "C" { #define _LOAD_FAST_6 426 #define _LOAD_FAST_7 427 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR -#define _LOAD_FAST_BORROW LOAD_FAST_BORROW +#define _LOAD_FAST_BORROW 428 +#define _LOAD_FAST_BORROW_0 429 +#define _LOAD_FAST_BORROW_1 430 +#define _LOAD_FAST_BORROW_2 431 +#define _LOAD_FAST_BORROW_3 432 +#define _LOAD_FAST_BORROW_4 433 +#define _LOAD_FAST_BORROW_5 434 +#define _LOAD_FAST_BORROW_6 435 +#define _LOAD_FAST_BORROW_7 436 #define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 428 -#define _LOAD_GLOBAL_BUILTINS 429 -#define _LOAD_GLOBAL_MODULE 430 +#define _LOAD_GLOBAL 437 +#define _LOAD_GLOBAL_BUILTINS 438 +#define _LOAD_GLOBAL_MODULE 439 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 431 -#define _LOAD_SMALL_INT_0 432 -#define _LOAD_SMALL_INT_1 433 -#define _LOAD_SMALL_INT_2 434 -#define _LOAD_SMALL_INT_3 435 +#define _LOAD_SMALL_INT 440 +#define _LOAD_SMALL_INT_0 441 +#define _LOAD_SMALL_INT_1 442 +#define _LOAD_SMALL_INT_2 443 +#define _LOAD_SMALL_INT_3 444 #define _LOAD_SPECIAL LOAD_SPECIAL #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 436 +#define _MAKE_CALLARGS_A_TUPLE 445 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 437 +#define _MAKE_WARM 446 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 438 -#define _MAYBE_EXPAND_METHOD_KW 439 -#define _MONITOR_CALL 440 -#define _MONITOR_CALL_KW 441 -#define _MONITOR_JUMP_BACKWARD 442 -#define _MONITOR_RESUME 443 +#define _MAYBE_EXPAND_METHOD 447 +#define _MAYBE_EXPAND_METHOD_KW 448 +#define _MONITOR_CALL 449 +#define _MONITOR_CALL_KW 450 +#define _MONITOR_JUMP_BACKWARD 451 +#define _MONITOR_RESUME 452 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 444 -#define _POP_JUMP_IF_TRUE 445 +#define _POP_JUMP_IF_FALSE 453 +#define _POP_JUMP_IF_TRUE 454 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 446 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 447 +#define _POP_TOP_LOAD_CONST_INLINE 455 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 456 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 448 +#define _PUSH_FRAME 457 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 449 -#define _PY_FRAME_GENERAL 450 -#define _PY_FRAME_KW 451 -#define _QUICKEN_RESUME 452 -#define _REPLACE_WITH_TRUE 453 +#define _PUSH_NULL_CONDITIONAL 458 +#define _PY_FRAME_GENERAL 459 +#define _PY_FRAME_KW 460 +#define _QUICKEN_RESUME 461 +#define _REPLACE_WITH_TRUE 462 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 454 -#define _SEND 455 -#define _SEND_GEN_FRAME 456 +#define _SAVE_RETURN_OFFSET 463 +#define _SEND 464 +#define _SEND_GEN_FRAME 465 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 457 -#define _STORE_ATTR 458 -#define _STORE_ATTR_INSTANCE_VALUE 459 -#define _STORE_ATTR_SLOT 460 -#define _STORE_ATTR_WITH_HINT 461 +#define _START_EXECUTOR 466 +#define _STORE_ATTR 467 +#define _STORE_ATTR_INSTANCE_VALUE 468 +#define _STORE_ATTR_SLOT 469 +#define _STORE_ATTR_WITH_HINT 470 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 462 -#define _STORE_FAST_0 463 -#define _STORE_FAST_1 464 -#define _STORE_FAST_2 465 -#define _STORE_FAST_3 466 -#define _STORE_FAST_4 467 -#define _STORE_FAST_5 468 -#define _STORE_FAST_6 469 -#define _STORE_FAST_7 470 +#define _STORE_FAST 471 +#define _STORE_FAST_0 472 +#define _STORE_FAST_1 473 +#define _STORE_FAST_2 474 +#define _STORE_FAST_3 475 +#define _STORE_FAST_4 476 +#define _STORE_FAST_5 477 +#define _STORE_FAST_6 478 +#define _STORE_FAST_7 479 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 471 -#define _STORE_SUBSCR 472 +#define _STORE_SLICE 480 +#define _STORE_SUBSCR 481 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 473 -#define _TO_BOOL 474 +#define _TIER2_RESUME_CHECK 482 +#define _TO_BOOL 483 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -295,13 +303,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 475 +#define _UNPACK_SEQUENCE 484 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 475 +#define MAX_UOP_ID 484 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index e3365e96d2c658..fe109de655a0e9 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -33,7 +33,15 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_FAST_6] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_7] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_BORROW] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, + [_LOAD_FAST_BORROW_0] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_BORROW_1] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_BORROW_2] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_BORROW_3] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_BORROW_4] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_BORROW_5] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_BORROW_6] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_BORROW_7] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_BORROW] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_BORROW_LOAD_FAST_BORROW] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, @@ -287,6 +295,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = { [_LOAD_FAST] = 8, + [_LOAD_FAST_BORROW] = 8, [_LOAD_SMALL_INT] = 4, [_STORE_FAST] = 8, [_INIT_CALL_PY_EXACT_ARGS] = 5, @@ -466,6 +475,14 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_FAST_7] = "_LOAD_FAST_7", [_LOAD_FAST_AND_CLEAR] = "_LOAD_FAST_AND_CLEAR", [_LOAD_FAST_BORROW] = "_LOAD_FAST_BORROW", + [_LOAD_FAST_BORROW_0] = "_LOAD_FAST_BORROW_0", + [_LOAD_FAST_BORROW_1] = "_LOAD_FAST_BORROW_1", + [_LOAD_FAST_BORROW_2] = "_LOAD_FAST_BORROW_2", + [_LOAD_FAST_BORROW_3] = "_LOAD_FAST_BORROW_3", + [_LOAD_FAST_BORROW_4] = "_LOAD_FAST_BORROW_4", + [_LOAD_FAST_BORROW_5] = "_LOAD_FAST_BORROW_5", + [_LOAD_FAST_BORROW_6] = "_LOAD_FAST_BORROW_6", + [_LOAD_FAST_BORROW_7] = "_LOAD_FAST_BORROW_7", [_LOAD_FAST_BORROW_LOAD_FAST_BORROW] = "_LOAD_FAST_BORROW_LOAD_FAST_BORROW", [_LOAD_FAST_CHECK] = "_LOAD_FAST_CHECK", [_LOAD_FAST_LOAD_FAST] = "_LOAD_FAST_LOAD_FAST", @@ -589,6 +606,22 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _LOAD_FAST: return 0; + case _LOAD_FAST_BORROW_0: + return 0; + case _LOAD_FAST_BORROW_1: + return 0; + case _LOAD_FAST_BORROW_2: + return 0; + case _LOAD_FAST_BORROW_3: + return 0; + case _LOAD_FAST_BORROW_4: + return 0; + case _LOAD_FAST_BORROW_5: + return 0; + case _LOAD_FAST_BORROW_6: + return 0; + case _LOAD_FAST_BORROW_7: + return 0; case _LOAD_FAST_BORROW: return 0; case _LOAD_FAST_AND_CLEAR: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 2a9b777862c84a..de3cfa0878bf20 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -134,7 +134,7 @@ def testfunc(x): self.assertIsNotNone(ex) uops = get_opnames(ex) self.assertIn("_JUMP_TO_TOP", uops) - self.assertIn("_LOAD_FAST_0", uops) + self.assertIn("_LOAD_FAST_BORROW_0", uops) def test_extended_arg(self): "Check EXTENDED_ARG handling in superblock creation" @@ -180,7 +180,7 @@ def many_vars(): ex = get_first_executor(many_vars) self.assertIsNotNone(ex) - self.assertTrue(any((opcode, oparg, operand) == ("_LOAD_FAST", 259, 0) + self.assertTrue(any((opcode, oparg, operand) == ("_LOAD_FAST_BORROW", 259, 0) for opcode, oparg, _, operand in list(ex))) def test_unspecialized_unpack(self): diff --git a/Python/bytecodes.c b/Python/bytecodes.c index ace65803d462c1..33e1a68211712b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -270,7 +270,7 @@ dummy_func( value = PyStackRef_DUP(GETLOCAL(oparg)); } - inst (LOAD_FAST_BORROW, (-- value)) { + replicate(8) pure inst (LOAD_FAST_BORROW, (-- value)) { assert(!PyStackRef_IsNull(GETLOCAL(oparg))); #ifdef Py_GIL_DISABLED value = PyStackRef_AsDeferred(GETLOCAL(oparg)); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 3fa26f07b1a38f..c0c95a98b5bf11 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -201,6 +201,134 @@ break; } + case _LOAD_FAST_BORROW_0: { + _PyStackRef value; + oparg = 0; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_1: { + _PyStackRef value; + oparg = 1; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_2: { + _PyStackRef value; + oparg = 2; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_3: { + _PyStackRef value; + oparg = 3; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_4: { + _PyStackRef value; + oparg = 4; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_5: { + _PyStackRef value; + oparg = 5; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_6: { + _PyStackRef value; + oparg = 6; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_7: { + _PyStackRef value; + oparg = 7; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _LOAD_FAST_BORROW: { _PyStackRef value; oparg = CURRENT_OPARG(); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 29a05088e62c45..d52c21f90f25fd 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -591,6 +591,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) if (last->opcode == _LOAD_CONST_INLINE || last->opcode == _LOAD_CONST_INLINE_BORROW || last->opcode == _LOAD_FAST || + last->opcode == _LOAD_FAST_BORROW || last->opcode == _COPY ) { last->opcode = _NOP; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index f3625a1492c47c..5c6988a48a103f 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -85,6 +85,10 @@ dummy_func(void) { value = GETLOCAL(oparg); } + op(_LOAD_FAST_BORROW, (-- value)) { + value = GETLOCAL(oparg); + } + op(_LOAD_FAST_AND_CLEAR, (-- value)) { value = GETLOCAL(oparg); JitOptSymbol *temp = sym_new_null(ctx); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index aa195669f370f9..8f0ede03ac8b8f 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -49,7 +49,7 @@ case _LOAD_FAST_BORROW: { JitOptSymbol *value; - value = sym_new_not_null(ctx); + value = GETLOCAL(oparg); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); From 03d35b23f40995772f1d963c7a7f9f06ef758af8 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 28 Feb 2025 18:54:58 -0800 Subject: [PATCH 48/73] Fix missed doctest --- Doc/library/dis.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 06c872c4a920ef..20b7e1f42cfc08 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -215,7 +215,7 @@ Example: ... RESUME LOAD_GLOBAL - LOAD_FAST + LOAD_FAST_BORROW CALL RETURN_VALUE From 39ff3f0f427fd20365c548bdcc2401358bf5b57c Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 28 Feb 2025 19:16:40 -0800 Subject: [PATCH 49/73] Fix narrowing --- Python/flowgraph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index a12e54801bd9bb..408917160cc31b 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2525,7 +2525,7 @@ store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r) } static void -load_fast_push_block(basicblock ***sp, basicblock *target, int start_depth) +load_fast_push_block(basicblock ***sp, basicblock *target, Py_ssize_t start_depth) { assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth); if (!target->b_visited) { From f012a9f5570488e28cb759c9111f66ca4e25ef27 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 28 Feb 2025 19:25:30 -0800 Subject: [PATCH 50/73] Formatting --- Include/internal/pycore_frame.h | 3 ++- Python/flowgraph.c | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 3e8c1341082d7c..33183725c59874 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -164,7 +164,8 @@ _PyFrame_CopyToHeap(_PyInterpreterFrame *src, _PyInterpreterFrame *dest) // "support" for borrowed references in the source frame. Convert any // borrowed references that were copied into dest into strong references. for (int i = 0; i < stacktop; i++) { - dest->localsplus[i] = _PyStackRef_NewIfBorrowedOrSteal(src->localsplus[i]); + dest->localsplus[i] = + _PyStackRef_NewIfBorrowedOrSteal(src->localsplus[i]); } dest->f_executable = _PyStackRef_NewIfBorrowedOrSteal(dest->f_executable); dest->f_funcobj = _PyStackRef_NewIfBorrowedOrSteal(dest->f_funcobj); diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 408917160cc31b..8cfa82211ee034 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2525,7 +2525,8 @@ store_local(uint8_t *instr_flags, ref_stack *refs, int local, ref r) } static void -load_fast_push_block(basicblock ***sp, basicblock *target, Py_ssize_t start_depth) +load_fast_push_block(basicblock ***sp, basicblock *target, + Py_ssize_t start_depth) { assert(target->b_startdepth >= 0 && target->b_startdepth == start_depth); if (!target->b_visited) { @@ -3816,7 +3817,8 @@ _PyCompile_OptimizeLoadFast(PyObject *seq) return NULL; } - cfg_builder *g = _PyCfg_FromInstructionSequence((_PyInstructionSequence*)seq); + cfg_builder *g = + _PyCfg_FromInstructionSequence((_PyInstructionSequence *)seq); if (g == NULL) { return NULL; } From 8ea82b53d20659b91c87e16805afcd107c1f4ced Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 13 Mar 2025 16:23:19 -0700 Subject: [PATCH 51/73] Implement PyStackRef_{Is,Make}HeapSafe --- Include/internal/pycore_stackref.h | 29 ++---- Include/internal/pycore_uop_ids.h | 108 +++++++++++--------- Include/opcode_ids.h | 68 +++++++------ Lib/test/test_dis.py | 87 ++++++++-------- Objects/genobject.c | 2 +- Python/bytecodes.c | 10 +- Python/executor_cases.c.h | 156 +++++++++++++++++++++++++++-- Python/generated_cases.c.h | 82 +++++++++++---- Python/opcode_targets.h | 10 +- Tools/cases_generator/analyzer.py | 2 +- 10 files changed, 374 insertions(+), 180 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index a972e9f3f6839c..5ecd32619a23a8 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -167,12 +167,6 @@ PyStackRef_XCLOSE(_PyStackRef ref) Py_DECREF(obj); } -static inline _PyStackRef -_PyStackRef_NewIfBorrowedOrSteal(_PyStackRef ref) -{ - return ref; -} - static inline _PyStackRef _PyStackRef_DUP(_PyStackRef ref, const char *filename, int linenumber) { @@ -285,9 +279,18 @@ PyStackRef_IsBorrowed(_PyStackRef stackref) return !(_Py_IsImmortal(obj) || _PyObject_HasDeferredRefcount(obj)); } +static inline bool +PyStackRef_IsHeapSafe(_PyStackRef stackref) +{ + if (PyStackRef_IsDeferred(stackref)) { + PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref); + return obj == NULL || _Py_IsImmortal(obj) || _PyObject_HasDeferredRefcount(obj); + } + return true; +} static inline _PyStackRef -_PyStackRef_NewIfBorrowedOrSteal(_PyStackRef stackref) +PyStackRef_MakeHeapSafe(_PyStackRef stackref) { if (PyStackRef_IsBorrowed(stackref)) { PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref); @@ -365,18 +368,6 @@ PyStackRef_AsDeferred(_PyStackRef stackref) return (_PyStackRef){ .bits = stackref.bits | Py_TAG_DEFERRED }; } -static inline bool -PyStackRef_IsHeapSafe(_PyStackRef ref) -{ - return true; -} - -static inline _PyStackRef -PyStackRef_MakeHeapSafe(_PyStackRef ref) -{ - return ref; -} - // Convert a possibly deferred reference to a strong reference. static inline _PyStackRef PyStackRef_AsStrongReference(_PyStackRef stackref) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 095fd043090fbb..1ae9bdbc819b59 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -202,89 +202,99 @@ extern "C" { #define _LOAD_FAST_6 427 #define _LOAD_FAST_7 428 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR +#define _LOAD_FAST_BORROW 429 +#define _LOAD_FAST_BORROW_0 430 +#define _LOAD_FAST_BORROW_1 431 +#define _LOAD_FAST_BORROW_2 432 +#define _LOAD_FAST_BORROW_3 433 +#define _LOAD_FAST_BORROW_4 434 +#define _LOAD_FAST_BORROW_5 435 +#define _LOAD_FAST_BORROW_6 436 +#define _LOAD_FAST_BORROW_7 437 +#define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 429 -#define _LOAD_GLOBAL_BUILTINS 430 -#define _LOAD_GLOBAL_MODULE 431 +#define _LOAD_GLOBAL 438 +#define _LOAD_GLOBAL_BUILTINS 439 +#define _LOAD_GLOBAL_MODULE 440 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 432 -#define _LOAD_SMALL_INT_0 433 -#define _LOAD_SMALL_INT_1 434 -#define _LOAD_SMALL_INT_2 435 -#define _LOAD_SMALL_INT_3 436 +#define _LOAD_SMALL_INT 441 +#define _LOAD_SMALL_INT_0 442 +#define _LOAD_SMALL_INT_1 443 +#define _LOAD_SMALL_INT_2 444 +#define _LOAD_SMALL_INT_3 445 #define _LOAD_SPECIAL LOAD_SPECIAL #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 437 +#define _MAKE_CALLARGS_A_TUPLE 446 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 438 +#define _MAKE_WARM 447 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 439 -#define _MAYBE_EXPAND_METHOD_KW 440 -#define _MONITOR_CALL 441 -#define _MONITOR_CALL_KW 442 -#define _MONITOR_JUMP_BACKWARD 443 -#define _MONITOR_RESUME 444 +#define _MAYBE_EXPAND_METHOD 448 +#define _MAYBE_EXPAND_METHOD_KW 449 +#define _MONITOR_CALL 450 +#define _MONITOR_CALL_KW 451 +#define _MONITOR_JUMP_BACKWARD 452 +#define _MONITOR_RESUME 453 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 445 -#define _POP_JUMP_IF_TRUE 446 +#define _POP_JUMP_IF_FALSE 454 +#define _POP_JUMP_IF_TRUE 455 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 447 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 448 +#define _POP_TOP_LOAD_CONST_INLINE 456 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 457 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 449 +#define _PUSH_FRAME 458 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 450 -#define _PY_FRAME_GENERAL 451 -#define _PY_FRAME_KW 452 -#define _QUICKEN_RESUME 453 -#define _REPLACE_WITH_TRUE 454 +#define _PUSH_NULL_CONDITIONAL 459 +#define _PY_FRAME_GENERAL 460 +#define _PY_FRAME_KW 461 +#define _QUICKEN_RESUME 462 +#define _REPLACE_WITH_TRUE 463 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 455 -#define _SEND 456 -#define _SEND_GEN_FRAME 457 +#define _SAVE_RETURN_OFFSET 464 +#define _SEND 465 +#define _SEND_GEN_FRAME 466 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 458 -#define _STORE_ATTR 459 -#define _STORE_ATTR_INSTANCE_VALUE 460 -#define _STORE_ATTR_SLOT 461 -#define _STORE_ATTR_WITH_HINT 462 +#define _START_EXECUTOR 467 +#define _STORE_ATTR 468 +#define _STORE_ATTR_INSTANCE_VALUE 469 +#define _STORE_ATTR_SLOT 470 +#define _STORE_ATTR_WITH_HINT 471 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 463 -#define _STORE_FAST_0 464 -#define _STORE_FAST_1 465 -#define _STORE_FAST_2 466 -#define _STORE_FAST_3 467 -#define _STORE_FAST_4 468 -#define _STORE_FAST_5 469 -#define _STORE_FAST_6 470 -#define _STORE_FAST_7 471 +#define _STORE_FAST 472 +#define _STORE_FAST_0 473 +#define _STORE_FAST_1 474 +#define _STORE_FAST_2 475 +#define _STORE_FAST_3 476 +#define _STORE_FAST_4 477 +#define _STORE_FAST_5 478 +#define _STORE_FAST_6 479 +#define _STORE_FAST_7 480 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 472 -#define _STORE_SUBSCR 473 +#define _STORE_SLICE 481 +#define _STORE_SUBSCR 482 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 474 -#define _TO_BOOL 475 +#define _TIER2_RESUME_CHECK 483 +#define _TO_BOOL 484 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -294,13 +304,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 476 +#define _UNPACK_SEQUENCE 485 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 476 +#define MAX_UOP_ID 485 #ifdef __cplusplus } diff --git a/Include/opcode_ids.h b/Include/opcode_ids.h index e4e6a88276655e..898dc580f4148e 100644 --- a/Include/opcode_ids.h +++ b/Include/opcode_ids.h @@ -94,39 +94,41 @@ extern "C" { #define LOAD_DEREF 81 #define LOAD_FAST 82 #define LOAD_FAST_AND_CLEAR 83 -#define LOAD_FAST_CHECK 84 -#define LOAD_FAST_LOAD_FAST 85 -#define LOAD_FROM_DICT_OR_DEREF 86 -#define LOAD_FROM_DICT_OR_GLOBALS 87 -#define LOAD_GLOBAL 88 -#define LOAD_NAME 89 -#define LOAD_SMALL_INT 90 -#define LOAD_SPECIAL 91 -#define LOAD_SUPER_ATTR 92 -#define MAKE_CELL 93 -#define MAP_ADD 94 -#define MATCH_CLASS 95 -#define POP_JUMP_IF_FALSE 96 -#define POP_JUMP_IF_NONE 97 -#define POP_JUMP_IF_NOT_NONE 98 -#define POP_JUMP_IF_TRUE 99 -#define RAISE_VARARGS 100 -#define RERAISE 101 -#define SEND 102 -#define SET_ADD 103 -#define SET_FUNCTION_ATTRIBUTE 104 -#define SET_UPDATE 105 -#define STORE_ATTR 106 -#define STORE_DEREF 107 -#define STORE_FAST 108 -#define STORE_FAST_LOAD_FAST 109 -#define STORE_FAST_STORE_FAST 110 -#define STORE_GLOBAL 111 -#define STORE_NAME 112 -#define SWAP 113 -#define UNPACK_EX 114 -#define UNPACK_SEQUENCE 115 -#define YIELD_VALUE 116 +#define LOAD_FAST_BORROW 84 +#define LOAD_FAST_BORROW_LOAD_FAST_BORROW 85 +#define LOAD_FAST_CHECK 86 +#define LOAD_FAST_LOAD_FAST 87 +#define LOAD_FROM_DICT_OR_DEREF 88 +#define LOAD_FROM_DICT_OR_GLOBALS 89 +#define LOAD_GLOBAL 90 +#define LOAD_NAME 91 +#define LOAD_SMALL_INT 92 +#define LOAD_SPECIAL 93 +#define LOAD_SUPER_ATTR 94 +#define MAKE_CELL 95 +#define MAP_ADD 96 +#define MATCH_CLASS 97 +#define POP_JUMP_IF_FALSE 98 +#define POP_JUMP_IF_NONE 99 +#define POP_JUMP_IF_NOT_NONE 100 +#define POP_JUMP_IF_TRUE 101 +#define RAISE_VARARGS 102 +#define RERAISE 103 +#define SEND 104 +#define SET_ADD 105 +#define SET_FUNCTION_ATTRIBUTE 106 +#define SET_UPDATE 107 +#define STORE_ATTR 108 +#define STORE_DEREF 109 +#define STORE_FAST 110 +#define STORE_FAST_LOAD_FAST 111 +#define STORE_FAST_STORE_FAST 112 +#define STORE_GLOBAL 113 +#define STORE_NAME 114 +#define SWAP 115 +#define UNPACK_EX 116 +#define UNPACK_SEQUENCE 117 +#define YIELD_VALUE 118 #define RESUME 128 #define BINARY_OP_ADD_FLOAT 129 #define BINARY_OP_ADD_INT 130 diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 6e1d94bd535663..62bf0d3d8a465b 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -51,10 +51,10 @@ def cm(cls, x): dis_c_instance_method = """\ %3d RESUME 0 -%3d LOAD_FAST 1 (x) +%3d LOAD_FAST_BORROW 1 (x) LOAD_SMALL_INT 1 COMPARE_OP 72 (==) - LOAD_FAST 0 (self) + LOAD_FAST_BORROW 0 (self) STORE_ATTR 0 (x) LOAD_CONST 0 (None) RETURN_VALUE @@ -62,10 +62,10 @@ def cm(cls, x): dis_c_instance_method_bytes = """\ RESUME 0 - LOAD_FAST 1 + LOAD_FAST_BORROW 1 LOAD_SMALL_INT 1 COMPARE_OP 72 (==) - LOAD_FAST 0 + LOAD_FAST_BORROW 0 STORE_ATTR 0 LOAD_CONST 0 RETURN_VALUE @@ -74,10 +74,10 @@ def cm(cls, x): dis_c_class_method = """\ %3d RESUME 0 -%3d LOAD_FAST 1 (x) +%3d LOAD_FAST_BORROW 1 (x) LOAD_SMALL_INT 1 COMPARE_OP 72 (==) - LOAD_FAST 0 (cls) + LOAD_FAST_BORROW 0 (cls) STORE_ATTR 0 (x) LOAD_CONST 0 (None) RETURN_VALUE @@ -86,7 +86,7 @@ def cm(cls, x): dis_c_static_method = """\ %3d RESUME 0 -%3d LOAD_FAST 0 (x) +%3d LOAD_FAST_BORROW 0 (x) LOAD_SMALL_INT 1 COMPARE_OP 72 (==) STORE_FAST 0 (x) @@ -114,7 +114,7 @@ def _f(a): %3d RESUME 0 %3d LOAD_GLOBAL 1 (print + NULL) - LOAD_FAST 0 (a) + LOAD_FAST_BORROW 0 (a) CALL 1 POP_TOP @@ -128,7 +128,7 @@ def _f(a): %3d 0 RESUME 0 %3d 2 LOAD_GLOBAL 1 (print + NULL) - 12 LOAD_FAST 0 (a) + 12 LOAD_FAST_BORROW 0 (a) 14 CALL 1 22 POP_TOP @@ -142,7 +142,7 @@ def _f(a): %-14s RESUME 0 %-14s LOAD_GLOBAL 1 (print + NULL) -%-14s LOAD_FAST 0 (a) +%-14s LOAD_FAST_BORROW 0 (a) %-14s CALL 1 %-14s POP_TOP @@ -153,7 +153,7 @@ def _f(a): dis_f_co_code = """\ RESUME 0 LOAD_GLOBAL 1 - LOAD_FAST 0 + LOAD_FAST_BORROW 0 CALL 1 POP_TOP LOAD_SMALL_INT 1 @@ -203,7 +203,7 @@ def bug1333982(x=[]): %3d LOAD_COMMON_CONSTANT 0 (AssertionError) LOAD_CONST 0 ( at 0x..., file "%s", line %d>) MAKE_FUNCTION - LOAD_FAST 0 (x) + LOAD_FAST_BORROW 0 (x) GET_ITER CALL 0 @@ -503,18 +503,18 @@ def _fstring(a, b, c, d): dis_fstring = """\ %3d RESUME 0 -%3d LOAD_FAST 0 (a) +%3d LOAD_FAST_BORROW 0 (a) FORMAT_SIMPLE LOAD_CONST 0 (' ') - LOAD_FAST 1 (b) + LOAD_FAST_BORROW 1 (b) LOAD_CONST 1 ('4') FORMAT_WITH_SPEC LOAD_CONST 0 (' ') - LOAD_FAST 2 (c) + LOAD_FAST_BORROW 2 (c) CONVERT_VALUE 2 (repr) FORMAT_SIMPLE LOAD_CONST 0 (' ') - LOAD_FAST 3 (d) + LOAD_FAST_BORROW 3 (d) CONVERT_VALUE 2 (repr) LOAD_CONST 1 ('4') FORMAT_WITH_SPEC @@ -530,7 +530,7 @@ def _with(c): dis_with = """\ %4d RESUME 0 -%4d LOAD_FAST 0 (c) +%4d LOAD_FAST_BORROW 0 (c) COPY 1 LOAD_SPECIAL 1 (__exit__) SWAP 2 @@ -595,7 +595,7 @@ async def _asyncwith(c): POP_TOP L1: RESUME 0 -%4d LOAD_FAST 0 (c) +%4d LOAD_FAST_BORROW 0 (c) COPY 1 LOAD_SPECIAL 3 (__aexit__) SWAP 2 @@ -707,9 +707,9 @@ def _tryfinallyconst(b): %4d NOP -%4d L1: LOAD_FAST 0 (a) +%4d L1: LOAD_FAST_BORROW 0 (a) -%4d L2: LOAD_FAST 1 (b) +%4d L2: LOAD_FAST_BORROW 1 (b) PUSH_NULL CALL 0 POP_TOP @@ -743,7 +743,7 @@ def _tryfinallyconst(b): %4d NOP -%4d LOAD_FAST 0 (b) +%4d LOAD_FAST_BORROW 0 (b) PUSH_NULL CALL 0 POP_TOP @@ -791,14 +791,14 @@ def foo(x): %4d RESUME 0 -%4d LOAD_FAST 0 (y) +%4d LOAD_FAST_BORROW 0 (y) BUILD_TUPLE 1 LOAD_CONST 0 () MAKE_FUNCTION SET_FUNCTION_ATTRIBUTE 8 (closure) STORE_FAST 1 (foo) -%4d LOAD_FAST 1 (foo) +%4d LOAD_FAST_BORROW 1 (foo) RETURN_VALUE """ % (_h.__code__.co_firstlineno, _h.__code__.co_firstlineno + 1, @@ -815,7 +815,7 @@ def foo(x): %4d RESUME 0 %4d LOAD_GLOBAL 1 (list + NULL) - LOAD_FAST 0 (x) + LOAD_FAST_BORROW 0 (x) BUILD_TUPLE 1 LOAD_CONST 1 ( at 0x..., file "%s", line %d>) MAKE_FUNCTION @@ -841,12 +841,12 @@ def foo(x): %4d RETURN_GENERATOR POP_TOP L1: RESUME 0 - LOAD_FAST 0 (.0) + LOAD_FAST_BORROW 0 (.0) GET_ITER L2: FOR_ITER 14 (to L3) STORE_FAST 1 (z) LOAD_DEREF 2 (x) - LOAD_FAST 1 (z) + LOAD_FAST_BORROW 1 (z) BINARY_OP 0 (+) YIELD_VALUE 0 RESUME 5 @@ -996,6 +996,7 @@ def test_boundaries(self): def test_widths(self): long_opcodes = set(['JUMP_BACKWARD_NO_INTERRUPT', + 'LOAD_FAST_BORROW_LOAD_FAST_BORROW', 'INSTRUMENTED_CALL_FUNCTION_EX']) for op, opname in enumerate(dis.opname): if opname in long_opcodes or opname.startswith("INSTRUMENTED"): @@ -1735,8 +1736,8 @@ def _prepare_test_cases(): make_inst(opname='MAKE_CELL', arg=1, argval='b', argrepr='b', offset=2, start_offset=2, starts_line=False, line_number=None), make_inst(opname='RESUME', arg=0, argval=0, argrepr='', offset=4, start_offset=4, starts_line=True, line_number=1), make_inst(opname='LOAD_CONST', arg=3, argval=(3, 4), argrepr='(3, 4)', offset=6, start_offset=6, starts_line=True, line_number=2), - make_inst(opname='LOAD_FAST', arg=0, argval='a', argrepr='a', offset=8, start_offset=8, starts_line=False, line_number=2), - make_inst(opname='LOAD_FAST', arg=1, argval='b', argrepr='b', offset=10, start_offset=10, starts_line=False, line_number=2), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='a', argrepr='a', offset=8, start_offset=8, starts_line=False, line_number=2), + make_inst(opname='LOAD_FAST_BORROW', arg=1, argval='b', argrepr='b', offset=10, start_offset=10, starts_line=False, line_number=2), make_inst(opname='BUILD_TUPLE', arg=2, argval=2, argrepr='', offset=12, start_offset=12, starts_line=False, line_number=2), make_inst(opname='LOAD_CONST', arg=0, argval=code_object_f, argrepr=repr(code_object_f), offset=14, start_offset=14, starts_line=False, line_number=2), make_inst(opname='MAKE_FUNCTION', arg=None, argval=None, argrepr='', offset=16, start_offset=16, starts_line=False, line_number=2), @@ -1753,7 +1754,7 @@ def _prepare_test_cases(): make_inst(opname='LOAD_CONST', arg=2, argval='Hello world!', argrepr="'Hello world!'", offset=46, start_offset=46, starts_line=False, line_number=7), make_inst(opname='CALL', arg=7, argval=7, argrepr='', offset=48, start_offset=48, starts_line=False, line_number=7, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=56, start_offset=56, starts_line=False, line_number=7), - make_inst(opname='LOAD_FAST', arg=2, argval='f', argrepr='f', offset=58, start_offset=58, starts_line=True, line_number=8), + make_inst(opname='LOAD_FAST_BORROW', arg=2, argval='f', argrepr='f', offset=58, start_offset=58, starts_line=True, line_number=8), make_inst(opname='RETURN_VALUE', arg=None, argval=None, argrepr='', offset=60, start_offset=60, starts_line=False, line_number=8), ] @@ -1763,10 +1764,10 @@ def _prepare_test_cases(): make_inst(opname='MAKE_CELL', arg=1, argval='d', argrepr='d', offset=4, start_offset=4, starts_line=False, line_number=None), make_inst(opname='RESUME', arg=0, argval=0, argrepr='', offset=6, start_offset=6, starts_line=True, line_number=2), make_inst(opname='LOAD_CONST', arg=1, argval=(5, 6), argrepr='(5, 6)', offset=8, start_offset=8, starts_line=True, line_number=3), - make_inst(opname='LOAD_FAST', arg=3, argval='a', argrepr='a', offset=10, start_offset=10, starts_line=False, line_number=3), - make_inst(opname='LOAD_FAST', arg=4, argval='b', argrepr='b', offset=12, start_offset=12, starts_line=False, line_number=3), - make_inst(opname='LOAD_FAST', arg=0, argval='c', argrepr='c', offset=14, start_offset=14, starts_line=False, line_number=3), - make_inst(opname='LOAD_FAST', arg=1, argval='d', argrepr='d', offset=16, start_offset=16, starts_line=False, line_number=3), + make_inst(opname='LOAD_FAST_BORROW', arg=3, argval='a', argrepr='a', offset=10, start_offset=10, starts_line=False, line_number=3), + make_inst(opname='LOAD_FAST_BORROW', arg=4, argval='b', argrepr='b', offset=12, start_offset=12, starts_line=False, line_number=3), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='c', argrepr='c', offset=14, start_offset=14, starts_line=False, line_number=3), + make_inst(opname='LOAD_FAST_BORROW', arg=1, argval='d', argrepr='d', offset=16, start_offset=16, starts_line=False, line_number=3), make_inst(opname='BUILD_TUPLE', arg=4, argval=4, argrepr='', offset=18, start_offset=18, starts_line=False, line_number=3), make_inst(opname='LOAD_CONST', arg=0, argval=code_object_inner, argrepr=repr(code_object_inner), offset=20, start_offset=20, starts_line=False, line_number=3), make_inst(opname='MAKE_FUNCTION', arg=None, argval=None, argrepr='', offset=22, start_offset=22, starts_line=False, line_number=3), @@ -1780,7 +1781,7 @@ def _prepare_test_cases(): make_inst(opname='LOAD_DEREF', arg=1, argval='d', argrepr='d', offset=46, start_offset=46, starts_line=False, line_number=5), make_inst(opname='CALL', arg=4, argval=4, argrepr='', offset=48, start_offset=48, starts_line=False, line_number=5, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=56, start_offset=56, starts_line=False, line_number=5), - make_inst(opname='LOAD_FAST', arg=2, argval='inner', argrepr='inner', offset=58, start_offset=58, starts_line=True, line_number=6), + make_inst(opname='LOAD_FAST_BORROW', arg=2, argval='inner', argrepr='inner', offset=58, start_offset=58, starts_line=True, line_number=6), make_inst(opname='RETURN_VALUE', arg=None, argval=None, argrepr='', offset=60, start_offset=60, starts_line=False, line_number=6), ] @@ -1792,7 +1793,7 @@ def _prepare_test_cases(): make_inst(opname='LOAD_DEREF', arg=3, argval='b', argrepr='b', offset=16, start_offset=16, starts_line=False, line_number=4), make_inst(opname='LOAD_DEREF', arg=4, argval='c', argrepr='c', offset=18, start_offset=18, starts_line=False, line_number=4), make_inst(opname='LOAD_DEREF', arg=5, argval='d', argrepr='d', offset=20, start_offset=20, starts_line=False, line_number=4), - make_inst(opname='LOAD_FAST_LOAD_FAST', arg=1, argval=('e', 'f'), argrepr='e, f', offset=22, start_offset=22, starts_line=False, line_number=4), + make_inst(opname='LOAD_FAST_BORROW_LOAD_FAST_BORROW', arg=1, argval=('e', 'f'), argrepr='e, f', offset=22, start_offset=22, starts_line=False, line_number=4), make_inst(opname='CALL', arg=6, argval=6, argrepr='', offset=24, start_offset=24, starts_line=False, line_number=4, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=32, start_offset=32, starts_line=False, line_number=4), make_inst(opname='LOAD_CONST', arg=0, argval=None, argrepr='None', offset=34, start_offset=34, starts_line=False, line_number=4), @@ -1808,16 +1809,16 @@ def _prepare_test_cases(): make_inst(opname='FOR_ITER', arg=32, argval=92, argrepr='to L4', offset=24, start_offset=24, starts_line=False, line_number=3, label=1, cache_info=[('counter', 1, b'\x00\x00')]), make_inst(opname='STORE_FAST', arg=0, argval='i', argrepr='i', offset=28, start_offset=28, starts_line=False, line_number=3), make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=30, start_offset=30, starts_line=True, line_number=4, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), - make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=40, start_offset=40, starts_line=False, line_number=4), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=40, start_offset=40, starts_line=False, line_number=4), make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=42, start_offset=42, starts_line=False, line_number=4, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=50, start_offset=50, starts_line=False, line_number=4), - make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=52, start_offset=52, starts_line=True, line_number=5), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=52, start_offset=52, starts_line=True, line_number=5), make_inst(opname='LOAD_SMALL_INT', arg=4, argval=4, argrepr='', offset=54, start_offset=54, starts_line=False, line_number=5), make_inst(opname='COMPARE_OP', arg=18, argval='<', argrepr='bool(<)', offset=56, start_offset=56, starts_line=False, line_number=5, cache_info=[('counter', 1, b'\x00\x00')]), make_inst(opname='POP_JUMP_IF_FALSE', arg=3, argval=70, argrepr='to L2', offset=60, start_offset=60, starts_line=False, line_number=5, cache_info=[('counter', 1, b'\x00\x00')]), make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=64, start_offset=64, starts_line=False, line_number=5), make_inst(opname='JUMP_BACKWARD', arg=23, argval=24, argrepr='to L1', offset=66, start_offset=66, starts_line=True, line_number=6, cache_info=[('counter', 1, b'\x00\x00')]), - make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=70, start_offset=70, starts_line=True, line_number=7, label=2), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=70, start_offset=70, starts_line=True, line_number=7, label=2), make_inst(opname='LOAD_SMALL_INT', arg=6, argval=6, argrepr='', offset=72, start_offset=72, starts_line=False, line_number=7), make_inst(opname='COMPARE_OP', arg=148, argval='>', argrepr='bool(>)', offset=74, start_offset=74, starts_line=False, line_number=7, cache_info=[('counter', 1, b'\x00\x00')]), make_inst(opname='POP_JUMP_IF_TRUE', arg=3, argval=88, argrepr='to L3', offset=78, start_offset=78, starts_line=False, line_number=7, cache_info=[('counter', 1, b'\x00\x00')]), @@ -1836,20 +1837,20 @@ def _prepare_test_cases(): make_inst(opname='POP_JUMP_IF_FALSE', arg=40, argval=212, argrepr='to L8', offset=128, start_offset=128, starts_line=False, line_number=11, cache_info=[('counter', 1, b'\x00\x00')]), make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=132, start_offset=132, starts_line=False, line_number=11), make_inst(opname='LOAD_GLOBAL', arg=3, argval='print', argrepr='print + NULL', offset=134, start_offset=134, starts_line=True, line_number=12, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), - make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=144, start_offset=144, starts_line=False, line_number=12), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=144, start_offset=144, starts_line=False, line_number=12), make_inst(opname='CALL', arg=1, argval=1, argrepr='', offset=146, start_offset=146, starts_line=False, line_number=12, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=154, start_offset=154, starts_line=False, line_number=12), - make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=156, start_offset=156, starts_line=True, line_number=13), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=156, start_offset=156, starts_line=True, line_number=13), make_inst(opname='LOAD_SMALL_INT', arg=1, argval=1, argrepr='', offset=158, start_offset=158, starts_line=False, line_number=13), make_inst(opname='BINARY_OP', arg=23, argval=23, argrepr='-=', offset=160, start_offset=160, starts_line=False, line_number=13, cache_info=[('counter', 1, b'\x00\x00'), ('descr', 4, b'\x00\x00\x00\x00\x00\x00\x00\x00')]), make_inst(opname='STORE_FAST', arg=0, argval='i', argrepr='i', offset=172, start_offset=172, starts_line=False, line_number=13), - make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=174, start_offset=174, starts_line=True, line_number=14), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=174, start_offset=174, starts_line=True, line_number=14), make_inst(opname='LOAD_SMALL_INT', arg=6, argval=6, argrepr='', offset=176, start_offset=176, starts_line=False, line_number=14), make_inst(opname='COMPARE_OP', arg=148, argval='>', argrepr='bool(>)', offset=178, start_offset=178, starts_line=False, line_number=14, cache_info=[('counter', 1, b'\x00\x00')]), make_inst(opname='POP_JUMP_IF_FALSE', arg=3, argval=192, argrepr='to L6', offset=182, start_offset=182, starts_line=False, line_number=14, cache_info=[('counter', 1, b'\x00\x00')]), make_inst(opname='NOT_TAKEN', arg=None, argval=None, argrepr='', offset=186, start_offset=186, starts_line=False, line_number=14), make_inst(opname='JUMP_BACKWARD', arg=37, argval=118, argrepr='to L5', offset=188, start_offset=188, starts_line=True, line_number=15, cache_info=[('counter', 1, b'\x00\x00')]), - make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=192, start_offset=192, starts_line=True, line_number=16, label=6), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=192, start_offset=192, starts_line=True, line_number=16, label=6), make_inst(opname='LOAD_SMALL_INT', arg=4, argval=4, argrepr='', offset=194, start_offset=194, starts_line=False, line_number=16), make_inst(opname='COMPARE_OP', arg=18, argval='<', argrepr='bool(<)', offset=196, start_offset=196, starts_line=False, line_number=16, cache_info=[('counter', 1, b'\x00\x00')]), make_inst(opname='POP_JUMP_IF_TRUE', arg=3, argval=210, argrepr='to L7', offset=200, start_offset=200, starts_line=False, line_number=16, cache_info=[('counter', 1, b'\x00\x00')]), @@ -1865,7 +1866,7 @@ def _prepare_test_cases(): make_inst(opname='LOAD_SMALL_INT', arg=0, argval=0, argrepr='', offset=238, start_offset=238, starts_line=False, line_number=21), make_inst(opname='BINARY_OP', arg=11, argval=11, argrepr='/', offset=240, start_offset=240, starts_line=False, line_number=21, cache_info=[('counter', 1, b'\x00\x00'), ('descr', 4, b'\x00\x00\x00\x00\x00\x00\x00\x00')]), make_inst(opname='POP_TOP', arg=None, argval=None, argrepr='', offset=252, start_offset=252, starts_line=False, line_number=21), - make_inst(opname='LOAD_FAST', arg=0, argval='i', argrepr='i', offset=254, start_offset=254, starts_line=True, line_number=25), + make_inst(opname='LOAD_FAST_BORROW', arg=0, argval='i', argrepr='i', offset=254, start_offset=254, starts_line=True, line_number=25), make_inst(opname='COPY', arg=1, argval=1, argrepr='', offset=256, start_offset=256, starts_line=False, line_number=25), make_inst(opname='LOAD_SPECIAL', arg=1, argval=1, argrepr='__exit__', offset=258, start_offset=258, starts_line=False, line_number=25), make_inst(opname='SWAP', arg=2, argval=2, argrepr='', offset=260, start_offset=260, starts_line=False, line_number=25), diff --git a/Objects/genobject.c b/Objects/genobject.c index 81047ea46242cf..79aed8571c35e7 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -989,7 +989,7 @@ gen_new_with_qualname(PyTypeObject *type, PyFrameObject *f, assert(f->f_frame->frame_obj == NULL); assert(f->f_frame->owner == FRAME_OWNED_BY_FRAME_OBJECT); _PyInterpreterFrame *frame = &gen->gi_iframe; - _PyFrame_CopyToNewGen((_PyInterpreterFrame *)f->_f_frame_data, frame); + _PyFrame_Copy((_PyInterpreterFrame *)f->_f_frame_data, frame); gen->gi_frame_state = FRAME_CREATED; assert(frame->frame_obj == f); f->f_frame = frame; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 69656070100484..7c1d98f10ea9a1 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1125,7 +1125,7 @@ dummy_func( // is pushed to a different frame, the callers' frame. inst(RETURN_VALUE, (retval -- res)) { assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef temp = PyStackRef_MakeHeapSafe(temp); + _PyStackRef temp = PyStackRef_MakeHeapSafe(retval); DEAD(retval); SAVE_STACK(); assert(EMPTY()); @@ -1223,7 +1223,7 @@ dummy_func( PyObject *retval_o; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef tmp = _PyStackRef_NewIfBorrowedOrSteal(v); + _PyStackRef tmp = PyStackRef_MakeHeapSafe(v); DEAD(v); if ((tstate->interp->eval_frame == NULL) && (Py_TYPE(receiver_o) == &PyGen_Type || Py_TYPE(receiver_o) == &PyCoro_Type) && @@ -1277,7 +1277,7 @@ dummy_func( DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING); STAT_INC(SEND, hit); gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, _PyStackRef_NewIfBorrowedOrSteal(v)); + _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v)); DEAD(v); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; @@ -1324,7 +1324,7 @@ dummy_func( #endif RELOAD_STACK(); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); - value = _PyStackRef_NewIfBorrowedOrSteal(temp); + value = PyStackRef_MakeHeapSafe(temp); LLTRACE_RESUME_FRAME(); } @@ -4810,7 +4810,7 @@ dummy_func( SAVE_STACK(); _PyInterpreterFrame *gen_frame = &gen->gi_iframe; frame->instr_ptr++; - _PyFrame_CopyToNewGen(frame, gen_frame); + _PyFrame_Copy(frame, gen_frame); assert(frame->frame_obj == NULL); gen->gi_frame_state = FRAME_CREATED; gen_frame->owner = FRAME_OWNED_BY_GENERATOR; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index fb5b2240436f1c..26721ef5fbd585 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -201,6 +201,149 @@ break; } + case _LOAD_FAST_BORROW_0: { + _PyStackRef value; + oparg = 0; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_1: { + _PyStackRef value; + oparg = 1; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_2: { + _PyStackRef value; + oparg = 2; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_3: { + _PyStackRef value; + oparg = 3; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_4: { + _PyStackRef value; + oparg = 4; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_5: { + _PyStackRef value; + oparg = 5; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_6: { + _PyStackRef value; + oparg = 6; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW_7: { + _PyStackRef value; + oparg = 7; + assert(oparg == CURRENT_OPARG()); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_BORROW: { + _PyStackRef value; + oparg = CURRENT_OPARG(); + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _LOAD_FAST_AND_CLEAR: { _PyStackRef value; oparg = CURRENT_OPARG(); @@ -1016,9 +1159,8 @@ right = stack_pointer[-1]; left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - PyObject *right_o = PyStackRef_AsPyObjectSteal(right); assert(PyUnicode_CheckExact(left_o)); - assert(PyUnicode_CheckExact(right_o)); + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(right))); int next_oparg; #if TIER_ONE assert(next_instr->op.code == STORE_FAST); @@ -1044,9 +1186,10 @@ * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - assert(Py_REFCNT(left_o) >= 2); + assert(Py_REFCNT(left_o) >= 2 || PyStackRef_IsBorrowed(left)); PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); + PyObject *right_o = PyStackRef_AsPyObjectSteal(right); stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -1694,8 +1837,7 @@ _PyStackRef res; retval = stack_pointer[-1]; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef temp = retval; - assert(PyStackRef_IsHeapSafe(temp)); + _PyStackRef temp = PyStackRef_MakeHeapSafe(retval); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -1829,7 +1971,7 @@ } STAT_INC(SEND, hit); gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, v); + _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v)); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; @@ -1876,7 +2018,7 @@ #endif stack_pointer = _PyFrame_GetStackPointer(frame); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); - value = temp; + value = PyStackRef_MakeHeapSafe(temp); LLTRACE_RESUME_FRAME(); stack_pointer[0] = value; stack_pointer += 1; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 8cdbaf1a4d5761..a2695573bb0d3a 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -340,9 +340,8 @@ // _BINARY_OP_INPLACE_ADD_UNICODE { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - PyObject *right_o = PyStackRef_AsPyObjectSteal(right); assert(PyUnicode_CheckExact(left_o)); - assert(PyUnicode_CheckExact(right_o)); + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(right))); int next_oparg; #if TIER_ONE assert(next_instr->op.code == STORE_FAST); @@ -369,9 +368,10 @@ * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - assert(Py_REFCNT(left_o) >= 2); + assert(Py_REFCNT(left_o) >= 2 || PyStackRef_IsBorrowed(left)); PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); + PyObject *right_o = PyStackRef_AsPyObjectSteal(right); stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -7325,8 +7325,7 @@ { retval = val; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef temp = retval; - assert(PyStackRef_IsHeapSafe(temp)); + _PyStackRef temp = PyStackRef_MakeHeapSafe(retval); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -7410,7 +7409,7 @@ #endif stack_pointer = _PyFrame_GetStackPointer(frame); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); - value = temp; + value = PyStackRef_MakeHeapSafe(temp); LLTRACE_RESUME_FRAME(); } stack_pointer[0] = value; @@ -8896,6 +8895,53 @@ DISPATCH(); } + TARGET(LOAD_FAST_BORROW) { + #if Py_TAIL_CALL_INTERP + int opcode = LOAD_FAST_BORROW; + (void)(opcode); + #endif + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(LOAD_FAST_BORROW); + _PyStackRef value; + assert(!PyStackRef_IsNull(GETLOCAL(oparg))); + #ifdef Py_GIL_DISABLED + value = PyStackRef_AsDeferred(GETLOCAL(oparg)); + #else + value = PyStackRef_DUP(GETLOCAL(oparg)); + #endif + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + DISPATCH(); + } + + TARGET(LOAD_FAST_BORROW_LOAD_FAST_BORROW) { + #if Py_TAIL_CALL_INTERP + int opcode = LOAD_FAST_BORROW_LOAD_FAST_BORROW; + (void)(opcode); + #endif + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(LOAD_FAST_BORROW_LOAD_FAST_BORROW); + _PyStackRef value1; + _PyStackRef value2; + uint32_t oparg1 = oparg >> 4; + uint32_t oparg2 = oparg & 15; + #ifdef Py_GIL_DISABLED + value1 = PyStackRef_AsDeferred(GETLOCAL(oparg1)); + value2 = PyStackRef_AsDeferred(GETLOCAL(oparg2)); + #else + value1 = PyStackRef_DUP(GETLOCAL(oparg1)); + value2 = PyStackRef_DUP(GETLOCAL(oparg2)); + #endif + stack_pointer[0] = value1; + stack_pointer[1] = value2; + stack_pointer += 2; + assert(WITHIN_STACK_BOUNDS()); + DISPATCH(); + } + TARGET(LOAD_FAST_CHECK) { #if Py_TAIL_CALL_INTERP int opcode = LOAD_FAST_CHECK; @@ -10364,8 +10410,7 @@ _PyStackRef res; retval = stack_pointer[-1]; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef temp = retval; - assert(PyStackRef_IsHeapSafe(temp)); + _PyStackRef temp = PyStackRef_MakeHeapSafe(retval); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -10422,6 +10467,7 @@ PyObject *receiver_o = PyStackRef_AsPyObjectBorrow(receiver); PyObject *retval_o; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); + _PyStackRef tmp = PyStackRef_MakeHeapSafe(v); if ((tstate->interp->eval_frame == NULL) && (Py_TYPE(receiver_o) == &PyGen_Type || Py_TYPE(receiver_o) == &PyCoro_Type) && ((PyGenObject *)receiver_o)->gi_frame_state < FRAME_EXECUTING) @@ -10429,7 +10475,7 @@ PyGenObject *gen = (PyGenObject *)receiver_o; _PyInterpreterFrame *gen_frame = &gen->gi_iframe; STACK_SHRINK(1); - _PyFrame_StackPush(gen_frame, v); + _PyFrame_StackPush(gen_frame, tmp); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; @@ -10440,15 +10486,19 @@ DISPATCH_INLINED(gen_frame); } if (PyStackRef_IsNone(v) && PyIter_Check(receiver_o)) { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); retval_o = Py_TYPE(receiver_o)->tp_iternext(receiver_o); stack_pointer = _PyFrame_GetStackPointer(frame); } else { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); retval_o = PyObject_CallMethodOneArg(receiver_o, &_Py_ID(send), - PyStackRef_AsPyObjectBorrow(v)); + PyStackRef_AsPyObjectBorrow(tmp)); stack_pointer = _PyFrame_GetStackPointer(frame); } if (retval_o == NULL) { @@ -10468,18 +10518,14 @@ JUMPBY(oparg); } else { - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(v); + PyStackRef_CLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); JUMP_TO_LABEL(error); } } - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(v); + PyStackRef_CLOSE(tmp); stack_pointer = _PyFrame_GetStackPointer(frame); retval = PyStackRef_FromPyObjectSteal(retval_o); } @@ -10530,7 +10576,7 @@ } STAT_INC(SEND, hit); gen_frame = &gen->gi_iframe; - _PyFrame_StackPush(gen_frame, v); + _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v)); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; @@ -12051,7 +12097,7 @@ #endif stack_pointer = _PyFrame_GetStackPointer(frame); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); - value = temp; + value = PyStackRef_MakeHeapSafe(temp); LLTRACE_RESUME_FRAME(); stack_pointer[0] = value; stack_pointer += 1; diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index c0dac90aebd458..5b9fb794c6bddc 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -84,6 +84,8 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_DEREF, &&TARGET_LOAD_FAST, &&TARGET_LOAD_FAST_AND_CLEAR, + &&TARGET_LOAD_FAST_BORROW, + &&TARGET_LOAD_FAST_BORROW_LOAD_FAST_BORROW, &&TARGET_LOAD_FAST_CHECK, &&TARGET_LOAD_FAST_LOAD_FAST, &&TARGET_LOAD_FROM_DICT_OR_DEREF, @@ -126,8 +128,6 @@ static void *opcode_targets[256] = { &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_RESUME, &&TARGET_BINARY_OP_ADD_FLOAT, &&TARGET_BINARY_OP_ADD_INT, @@ -414,6 +414,8 @@ Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_CONST_MORTAL(TAIL_CALL_PARA Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_DEREF(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_AND_CLEAR(TAIL_CALL_PARAMS); +Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_BORROW(TAIL_CALL_PARAMS); +Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_BORROW_LOAD_FAST_BORROW(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_CHECK(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FAST_LOAD_FAST(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_LOAD_FROM_DICT_OR_DEREF(TAIL_CALL_PARAMS); @@ -648,6 +650,8 @@ static py_tail_call_funcptr INSTRUCTION_TABLE[256] = { [LOAD_DEREF] = _TAIL_CALL_LOAD_DEREF, [LOAD_FAST] = _TAIL_CALL_LOAD_FAST, [LOAD_FAST_AND_CLEAR] = _TAIL_CALL_LOAD_FAST_AND_CLEAR, + [LOAD_FAST_BORROW] = _TAIL_CALL_LOAD_FAST_BORROW, + [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = _TAIL_CALL_LOAD_FAST_BORROW_LOAD_FAST_BORROW, [LOAD_FAST_CHECK] = _TAIL_CALL_LOAD_FAST_CHECK, [LOAD_FAST_LOAD_FAST] = _TAIL_CALL_LOAD_FAST_LOAD_FAST, [LOAD_FROM_DICT_OR_DEREF] = _TAIL_CALL_LOAD_FROM_DICT_OR_DEREF, @@ -725,8 +729,6 @@ static py_tail_call_funcptr INSTRUCTION_TABLE[256] = { [UNPACK_SEQUENCE_TWO_TUPLE] = _TAIL_CALL_UNPACK_SEQUENCE_TWO_TUPLE, [WITH_EXCEPT_START] = _TAIL_CALL_WITH_EXCEPT_START, [YIELD_VALUE] = _TAIL_CALL_YIELD_VALUE, - [117] = _TAIL_CALL_UNKNOWN_OPCODE, - [118] = _TAIL_CALL_UNKNOWN_OPCODE, [119] = _TAIL_CALL_UNKNOWN_OPCODE, [120] = _TAIL_CALL_UNKNOWN_OPCODE, [121] = _TAIL_CALL_UNKNOWN_OPCODE, diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index f1cd8337174df1..15ac008674ef1a 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -605,6 +605,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_IsTrue", "PyStackRef_IsFalse", "PyStackRef_IsNull", + "PyStackRef_MakeHeapSafe", "PyStackRef_None", "PyStackRef_TYPE", "PyStackRef_True", @@ -654,7 +655,6 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "_PyObject_InlineValues", "_PyObject_IsUniquelyReferenced", "_PyObject_ManagedDictPointer", - "_PyStackRef_NewIfBorrowedOrSteal", "_PyThreadState_HasStackSpace", "_PyTuple_FromStackRefStealOnSuccess", "_PyTuple_ITEMS", From 9bec9f50fd087e17bae3b3772728ef1d525779c0 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 13 Mar 2025 16:32:36 -0700 Subject: [PATCH 52/73] Add missing error handling --- Python/flowgraph.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 711e4e5426f89f..703763cc307691 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2631,7 +2631,9 @@ optimize_load_fast(cfg_builder *g, bool compute_stackdepth) int max_instrs = 0; basicblock *entryblock = g->g_entryblock; if (compute_stackdepth) { - calculate_stackdepth(g); + if (calculate_stackdepth(g) == ERROR) { + return ERROR; + } } for (basicblock *b = entryblock; b != NULL; b = b->b_next) { max_instrs = Py_MAX(max_instrs, b->b_iused); @@ -2665,7 +2667,10 @@ optimize_load_fast(cfg_builder *g, bool compute_stackdepth) // presence. Add dummy references as necessary. ref_stack_clear(&refs); for (int i = 0; i < block->b_startdepth; i++) { - ref_stack_push(&refs, DUMMY_REF); + if (ref_stack_push(&refs, DUMMY_REF) < 0) { + status = ERROR; + goto done; + } } for (int i = 0; i < block->b_iused; i++) { From 902ae849414a34f30f9689dcc63d0f3cf0872311 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 13 Mar 2025 16:34:32 -0700 Subject: [PATCH 53/73] Simplify frees --- Python/flowgraph.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 703763cc307691..46544e9285d283 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2817,12 +2817,8 @@ optimize_load_fast(cfg_builder *g, bool compute_stackdepth) done: ref_stack_fini(&refs); - if (instr_flags != NULL) { - PyMem_Free(instr_flags); - } - if (blocks != NULL) { - PyMem_Free(blocks); - } + PyMem_Free(instr_flags); + PyMem_Free(blocks); return status; } From b0ea38fe9d2454460cf7bb1adf69ca01409a7c2a Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 13 Mar 2025 16:45:16 -0700 Subject: [PATCH 54/73] Get rid of `PyStackRef_IsBorrowed` --- Include/internal/pycore_stackref.h | 24 ++++-------------------- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 2 +- Tools/cases_generator/analyzer.py | 1 - 5 files changed, 7 insertions(+), 24 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 5ecd32619a23a8..98673bec360fd0 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -104,12 +104,6 @@ PyStackRef_IsNone(_PyStackRef ref) return _Py_stackref_get_object(ref) == Py_None; } -static inline bool -PyStackRef_IsBorrowed(_PyStackRef ref) -{ - return false; -} - static inline PyObject * _PyStackRef_AsPyObjectBorrow(_PyStackRef ref, const char *filename, int linenumber) { @@ -269,16 +263,6 @@ _PyStackRef_FromPyObjectSteal(PyObject *obj) } # define PyStackRef_FromPyObjectSteal(obj) _PyStackRef_FromPyObjectSteal(_PyObject_CAST(obj)) -static inline bool -PyStackRef_IsBorrowed(_PyStackRef stackref) -{ - if (PyStackRef_IsNull(stackref) || !PyStackRef_IsDeferred(stackref)) { - return false; - } - PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref); - return !(_Py_IsImmortal(obj) || _PyObject_HasDeferredRefcount(obj)); -} - static inline bool PyStackRef_IsHeapSafe(_PyStackRef stackref) { @@ -292,11 +276,11 @@ PyStackRef_IsHeapSafe(_PyStackRef stackref) static inline _PyStackRef PyStackRef_MakeHeapSafe(_PyStackRef stackref) { - if (PyStackRef_IsBorrowed(stackref)) { - PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref); - return (_PyStackRef){ .bits = (uintptr_t)(Py_NewRef(obj)) | Py_TAG_PTR }; + if (PyStackRef_IsHeapSafe(stackref)) { + return stackref; } - return stackref; + PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref); + return (_PyStackRef){ .bits = (uintptr_t)(Py_NewRef(obj)) | Py_TAG_PTR }; } static inline _PyStackRef diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7c1d98f10ea9a1..34d130d269201f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -780,7 +780,7 @@ dummy_func( * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - assert(Py_REFCNT(left_o) >= 2 || PyStackRef_IsBorrowed(left)); + assert(Py_REFCNT(left_o) >= 2 || !PyStackRef_IsHeapSafe(left)); PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); DEAD(left); PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 26721ef5fbd585..73b8692e540c77 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1186,7 +1186,7 @@ * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - assert(Py_REFCNT(left_o) >= 2 || PyStackRef_IsBorrowed(left)); + assert(Py_REFCNT(left_o) >= 2 || !PyStackRef_IsHeapSafe(left)); PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); PyObject *right_o = PyStackRef_AsPyObjectSteal(right); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index a2695573bb0d3a..3e38ae9fcfcb4d 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -368,7 +368,7 @@ * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - assert(Py_REFCNT(left_o) >= 2 || PyStackRef_IsBorrowed(left)); + assert(Py_REFCNT(left_o) >= 2 || !PyStackRef_IsHeapSafe(left)); PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local); PyObject *right_o = PyStackRef_AsPyObjectSteal(right); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 15ac008674ef1a..a97a8d078e0f3c 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -596,7 +596,6 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_FromPyObjectImmortal", "PyStackRef_FromPyObjectNew", "PyStackRef_FromPyObjectSteal", - "PyStackRef_IsBorrowed", "PyStackRef_IsExactly", "PyStackRef_FromPyObjectStealMortal", "PyStackRef_IsNone", From 1f5cfcdc8a44f86868924bc3d3e5b8d2e13b3dcf Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 13 Mar 2025 17:03:21 -0700 Subject: [PATCH 55/73] Use PyStackRef_Borrow as the new API --- Include/internal/pycore_stackref.h | 10 +++++- Lib/test/test_dis.py | 4 +-- Programs/test_frozenmain.h | 14 ++++---- Python/bytecodes.c | 15 ++------- Python/executor_cases.c.h | 54 +++++------------------------- Python/generated_cases.c.h | 15 ++------- Tools/cases_generator/analyzer.py | 2 +- 7 files changed, 34 insertions(+), 80 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 98673bec360fd0..9c2654e1324df7 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -178,6 +178,12 @@ PyStackRef_MakeHeapSafe(_PyStackRef ref) return ref; } +static inline _PyStackRef +PyStackRef_Borrow(_PyStackRef ref) +{ + return PyStackRef_DUP(ref) +} + #define PyStackRef_CLEAR(REF) \ do { \ _PyStackRef *_tmp_op_ptr = &(REF); \ @@ -347,7 +353,7 @@ PyStackRef_DUP(_PyStackRef stackref) } static inline _PyStackRef -PyStackRef_AsDeferred(_PyStackRef stackref) +PyStackRef_Borrow(_PyStackRef stackref) { return (_PyStackRef){ .bits = stackref.bits | Py_TAG_DEFERRED }; } @@ -432,6 +438,8 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { #endif +#define PyStackRef_Borrow(ref) PyStackRef_DUP(ref) + #ifdef _WIN32 #define PyStackRef_RefcountOnObject(REF) (((REF).bits & Py_TAG_BITS) == 0) #define PyStackRef_AsPyObjectBorrow BITS_TO_PTR_MASKED diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 62bf0d3d8a465b..bf2f250c04e1e5 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -877,7 +877,7 @@ def load_test(x, y=0): %3d LOAD_FAST_LOAD_FAST 1 (x, y) STORE_FAST_STORE_FAST 50 (b, a) -%3d LOAD_FAST_LOAD_FAST 35 (a, b) +%3d LOAD_FAST_BORROW_LOAD_FAST_BORROW 35 (a, b) BUILD_TUPLE 2 RETURN_VALUE """ % (load_test.__code__.co_firstlineno, @@ -901,7 +901,7 @@ def loop_test(): STORE_FAST 0 (i) %3d LOAD_GLOBAL_MODULE 1 (load_test + NULL) - LOAD_FAST 0 (i) + LOAD_FAST_BORROW 0 (i) CALL_PY_GENERAL 1 POP_TOP JUMP_BACKWARD_{: <6} 16 (to L1) diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index 55bf4fe26e967a..3d1069d5b29740 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,16 +1,16 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { 227,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0, - 0,0,0,0,0,243,184,0,0,0,128,0,90,0,80,0, - 71,0,112,0,90,0,80,0,71,1,112,1,89,2,32,0, - 80,1,50,1,0,0,0,0,0,0,30,0,89,2,32,0, - 80,2,89,0,78,6,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,243,184,0,0,0,128,0,92,0,80,0, + 71,0,114,0,92,0,80,0,71,1,114,1,91,2,32,0, + 80,1,50,1,0,0,0,0,0,0,30,0,91,2,32,0, + 80,2,91,0,78,6,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,50,2,0,0,0,0,0,0, - 30,0,89,1,78,8,0,0,0,0,0,0,0,0,0,0, + 30,0,91,1,78,8,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,32,0,50,0,0,0,0,0, 0,0,80,3,43,26,0,0,0,0,0,0,0,0,0,0, - 112,5,80,4,15,0,68,24,0,0,112,6,89,2,32,0, - 80,5,89,6,11,0,80,6,89,5,89,6,43,26,0,0, + 114,5,80,4,15,0,68,24,0,0,114,6,91,2,32,0, + 80,5,91,6,11,0,80,6,91,5,91,6,43,26,0,0, 0,0,0,0,0,0,0,0,11,0,48,4,50,1,0,0, 0,0,0,0,30,0,73,26,0,0,8,0,29,0,80,0, 34,0,41,7,78,122,18,70,114,111,122,101,110,32,72,101, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 34d130d269201f..ea35fc06c807d5 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -272,11 +272,7 @@ dummy_func( replicate(8) pure inst (LOAD_FAST_BORROW, (-- value)) { assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - #ifdef Py_GIL_DISABLED - value = PyStackRef_AsDeferred(GETLOCAL(oparg)); - #else - value = PyStackRef_DUP(GETLOCAL(oparg)); - #endif + value = PyStackRef_Borrow(GETLOCAL(oparg)); } inst(LOAD_FAST_AND_CLEAR, (-- value)) { @@ -294,13 +290,8 @@ dummy_func( inst(LOAD_FAST_BORROW_LOAD_FAST_BORROW, ( -- value1, value2)) { uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; - #ifdef Py_GIL_DISABLED - value1 = PyStackRef_AsDeferred(GETLOCAL(oparg1)); - value2 = PyStackRef_AsDeferred(GETLOCAL(oparg2)); - #else - value1 = PyStackRef_DUP(GETLOCAL(oparg1)); - value2 = PyStackRef_DUP(GETLOCAL(oparg2)); - #endif + value1 = PyStackRef_Borrow(GETLOCAL(oparg1)); + value2 = PyStackRef_Borrow(GETLOCAL(oparg2)); } family(LOAD_CONST, 0) = { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 73b8692e540c77..c14701abf81f4a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -206,11 +206,7 @@ oparg = 0; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - #ifdef Py_GIL_DISABLED - value = PyStackRef_AsDeferred(GETLOCAL(oparg)); - #else - value = PyStackRef_DUP(GETLOCAL(oparg)); - #endif + value = PyStackRef_Borrow(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -222,11 +218,7 @@ oparg = 1; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - #ifdef Py_GIL_DISABLED - value = PyStackRef_AsDeferred(GETLOCAL(oparg)); - #else - value = PyStackRef_DUP(GETLOCAL(oparg)); - #endif + value = PyStackRef_Borrow(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -238,11 +230,7 @@ oparg = 2; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - #ifdef Py_GIL_DISABLED - value = PyStackRef_AsDeferred(GETLOCAL(oparg)); - #else - value = PyStackRef_DUP(GETLOCAL(oparg)); - #endif + value = PyStackRef_Borrow(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -254,11 +242,7 @@ oparg = 3; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - #ifdef Py_GIL_DISABLED - value = PyStackRef_AsDeferred(GETLOCAL(oparg)); - #else - value = PyStackRef_DUP(GETLOCAL(oparg)); - #endif + value = PyStackRef_Borrow(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -270,11 +254,7 @@ oparg = 4; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - #ifdef Py_GIL_DISABLED - value = PyStackRef_AsDeferred(GETLOCAL(oparg)); - #else - value = PyStackRef_DUP(GETLOCAL(oparg)); - #endif + value = PyStackRef_Borrow(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -286,11 +266,7 @@ oparg = 5; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - #ifdef Py_GIL_DISABLED - value = PyStackRef_AsDeferred(GETLOCAL(oparg)); - #else - value = PyStackRef_DUP(GETLOCAL(oparg)); - #endif + value = PyStackRef_Borrow(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -302,11 +278,7 @@ oparg = 6; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - #ifdef Py_GIL_DISABLED - value = PyStackRef_AsDeferred(GETLOCAL(oparg)); - #else - value = PyStackRef_DUP(GETLOCAL(oparg)); - #endif + value = PyStackRef_Borrow(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -318,11 +290,7 @@ oparg = 7; assert(oparg == CURRENT_OPARG()); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - #ifdef Py_GIL_DISABLED - value = PyStackRef_AsDeferred(GETLOCAL(oparg)); - #else - value = PyStackRef_DUP(GETLOCAL(oparg)); - #endif + value = PyStackRef_Borrow(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -333,11 +301,7 @@ _PyStackRef value; oparg = CURRENT_OPARG(); assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - #ifdef Py_GIL_DISABLED - value = PyStackRef_AsDeferred(GETLOCAL(oparg)); - #else - value = PyStackRef_DUP(GETLOCAL(oparg)); - #endif + value = PyStackRef_Borrow(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 3e38ae9fcfcb4d..c80616be17c48c 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -8905,11 +8905,7 @@ INSTRUCTION_STATS(LOAD_FAST_BORROW); _PyStackRef value; assert(!PyStackRef_IsNull(GETLOCAL(oparg))); - #ifdef Py_GIL_DISABLED - value = PyStackRef_AsDeferred(GETLOCAL(oparg)); - #else - value = PyStackRef_DUP(GETLOCAL(oparg)); - #endif + value = PyStackRef_Borrow(GETLOCAL(oparg)); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -8928,13 +8924,8 @@ _PyStackRef value2; uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; - #ifdef Py_GIL_DISABLED - value1 = PyStackRef_AsDeferred(GETLOCAL(oparg1)); - value2 = PyStackRef_AsDeferred(GETLOCAL(oparg2)); - #else - value1 = PyStackRef_DUP(GETLOCAL(oparg1)); - value2 = PyStackRef_DUP(GETLOCAL(oparg2)); - #endif + value1 = PyStackRef_Borrow(GETLOCAL(oparg1)); + value2 = PyStackRef_Borrow(GETLOCAL(oparg2)); stack_pointer[0] = value1; stack_pointer[1] = value2; stack_pointer += 2; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index a97a8d078e0f3c..7f78192dd74dee 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -584,11 +584,11 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyLong_FromLong", "PyLong_FromSsize_t", "PySlice_New", - "PyStackRef_AsDeferred", "PyStackRef_AsPyObjectBorrow", "PyStackRef_AsPyObjectNew", "PyStackRef_FromPyObjectNewMortal", "PyStackRef_AsPyObjectSteal", + "PyStackRef_Borrow", "PyStackRef_CLEAR", "PyStackRef_CLOSE_SPECIALIZED", "PyStackRef_DUP", From d1e8e4502237c51d2c18834d9771e6a22ee11966 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 14 Mar 2025 15:59:18 -0700 Subject: [PATCH 56/73] Make the default build work --- Include/internal/pycore_stackref.h | 14 +++++++++----- Objects/floatobject.c | 31 ------------------------------ Python/gc.c | 8 +++++++- Python/marshal.c | 5 +++-- 4 files changed, 19 insertions(+), 39 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 9c2654e1324df7..2f3d1b58281e7e 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -425,7 +425,6 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { assert(!_Py_IsStaticImmortal(obj)); break; case Py_TAG_REFCNT: - assert(obj == NULL || _Py_IsImmortal(obj)); break; default: assert(0); @@ -438,17 +437,16 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { #endif -#define PyStackRef_Borrow(ref) PyStackRef_DUP(ref) - #ifdef _WIN32 -#define PyStackRef_RefcountOnObject(REF) (((REF).bits & Py_TAG_BITS) == 0) +#define PyStackRef_RefcountOnObject(REF) (((REF).bits & Py_TAG_REFCNT) == 0) #define PyStackRef_AsPyObjectBorrow BITS_TO_PTR_MASKED +#define PyStackRef_Borrow(REF) (_PyStackRef){ .bits = ((REF).bits) | Py_TAG_REFCNT}; #else /* Does this ref not have an embedded refcount and thus not refer to a declared immmortal object? */ static inline int PyStackRef_RefcountOnObject(_PyStackRef ref) { - return (ref.bits & Py_TAG_BITS) == 0; + return (ref.bits & Py_TAG_REFCNT) == 0; } static inline PyObject * @@ -456,6 +454,12 @@ PyStackRef_AsPyObjectBorrow(_PyStackRef ref) { return BITS_TO_PTR_MASKED(ref); } + +static inline _PyStackRef +PyStackRef_Borrow(_PyStackRef ref) +{ + return (_PyStackRef){ .bits = ref.bits | Py_TAG_REFCNT }; +} #endif static inline PyObject * diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 324abddcf2a46d..ebaf715eb46a47 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -134,8 +134,6 @@ PyFloat_FromDouble(double fval) return (PyObject *) op; } -#ifdef Py_GIL_DISABLED - _PyStackRef _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value) { PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc); @@ -143,35 +141,6 @@ _PyStackRef _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef righ return PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(value)); } -#else // Py_GIL_DISABLED - -_PyStackRef _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value) -{ - PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - if (Py_REFCNT(left_o) == 1) { - ((PyFloatObject *)left_o)->ob_fval = value; - PyStackRef_CLOSE_SPECIALIZED(right, _PyFloat_ExactDealloc); - return left; - } - else if (Py_REFCNT(right_o) == 1) { - ((PyFloatObject *)right_o)->ob_fval = value; - PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc); - return right; - } - else { - PyObject *result = PyFloat_FromDouble(value); - PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc); - PyStackRef_CLOSE_SPECIALIZED(right, _PyFloat_ExactDealloc); - if (result == NULL) { - return PyStackRef_NULL; - } - return PyStackRef_FromPyObjectStealMortal(result); - } -} - -#endif // Py_GIL_DISABLED - static PyObject * float_from_string_inner(const char *s, Py_ssize_t len, void *obj) { diff --git a/Python/gc.c b/Python/gc.c index f2a88657e8d8d6..6493d43c650bfd 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -543,6 +543,12 @@ visit_decref(PyObject *op, void *parent) int _PyGC_VisitStackRef(_PyStackRef *ref, visitproc visit, void *arg) { + // This is a bit tricky! We want to ignore stackrefs with embedded + // refcounts when computing the incoming references, but otherwise treat + // them like normal. + if (!PyStackRef_RefcountOnObject(*ref) && (visit == visit_decref)) { + return 0; + } Py_VISIT(PyStackRef_AsPyObjectBorrow(*ref)); return 0; } @@ -553,7 +559,7 @@ _PyGC_VisitFrameStack(_PyInterpreterFrame *frame, visitproc visit, void *arg) _PyStackRef *ref = _PyFrame_GetLocalsArray(frame); /* locals and stack */ for (; ref < frame->stackpointer; ref++) { - Py_VISIT(PyStackRef_AsPyObjectBorrow(*ref)); + _Py_VISIT_STACKREF(*ref); } return 0; } diff --git a/Python/marshal.c b/Python/marshal.c index cf7011652513ae..7f65808e8ec620 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -382,11 +382,12 @@ w_ref(PyObject *v, char *flag, WFILE *p) if (p->version < 3 || p->hashtable == NULL) return 0; /* not writing object references */ - /* If it has only one reference, it definitely isn't shared. - * But we use TYPE_REF always for interned string, to PYC file stable + /* If it has only one reference, it definitely isn't shared. But we use + * TYPE_REF always for interned string and code objects, to PYC file stable * as possible. */ if (Py_REFCNT(v) == 1 && + !PyCode_Check(v) && !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) { return 0; } From cc01a302802e39b97a37b22146a67b39caa49959 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 17 Mar 2025 11:25:30 -0700 Subject: [PATCH 57/73] Regen frozenmain --- Programs/test_frozenmain.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index 1c7f74cc8039f2..7a88c5ec671f54 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,16 +1,16 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { 227,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0, - 0,0,0,0,0,243,184,0,0,0,128,0,90,0,80,1, - 71,0,112,0,90,0,80,1,71,1,112,1,89,2,32,0, - 80,2,50,1,0,0,0,0,0,0,30,0,89,2,32,0, - 80,3,89,0,78,6,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,243,184,0,0,0,128,0,92,0,80,1, + 71,0,114,0,92,0,80,1,71,1,114,1,91,2,32,0, + 80,2,50,1,0,0,0,0,0,0,30,0,91,2,32,0, + 80,3,91,0,78,6,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,50,2,0,0,0,0,0,0, - 30,0,89,1,78,8,0,0,0,0,0,0,0,0,0,0, + 30,0,91,1,78,8,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,32,0,50,0,0,0,0,0, 0,0,80,4,43,26,0,0,0,0,0,0,0,0,0,0, - 112,5,80,5,15,0,68,24,0,0,112,6,89,2,32,0, - 80,6,89,6,11,0,80,7,89,5,89,6,43,26,0,0, + 114,5,80,5,15,0,68,24,0,0,114,6,91,2,32,0, + 80,6,91,6,11,0,80,7,91,5,91,6,43,26,0,0, 0,0,0,0,0,0,0,0,11,0,48,4,50,1,0,0, 0,0,0,0,30,0,73,26,0,0,8,0,29,0,80,1, 34,0,41,8,233,0,0,0,0,78,122,18,70,114,111,122, From 6c5faabc7ee32a4495fb2eb26c04cd508bf01757 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 17 Mar 2025 12:50:30 -0700 Subject: [PATCH 58/73] Add a workaround for failing tests rather than change marshal.c --- Lib/test/test_importlib/test_abc.py | 3 +++ Python/marshal.c | 5 ++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_importlib/test_abc.py b/Lib/test/test_importlib/test_abc.py index b1ab52f966ffdb..758a227c03e038 100644 --- a/Lib/test/test_importlib/test_abc.py +++ b/Lib/test/test_importlib/test_abc.py @@ -801,6 +801,9 @@ def verify_code(self, code_object, *, bytecode_written=False): data.extend(self.init._pack_uint32(0)) data.extend(self.init._pack_uint32(self.loader.source_mtime)) data.extend(self.init._pack_uint32(self.loader.source_size)) + # Make sure theres > 1 reference to code_object so that the + # marshaled representation below matches the cached representation + l = [code_object] data.extend(marshal.dumps(code_object)) self.assertEqual(self.loader.written[self.cached], bytes(data)) diff --git a/Python/marshal.c b/Python/marshal.c index 95f9f59209b380..b39c1a5b1ade50 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -384,12 +384,11 @@ w_ref(PyObject *v, char *flag, WFILE *p) if (p->version < 3 || p->hashtable == NULL) return 0; /* not writing object references */ - /* If it has only one reference, it definitely isn't shared. But we use - * TYPE_REF always for interned string and code objects, to PYC file stable + /* If it has only one reference, it definitely isn't shared. + * But we use TYPE_REF always for interned string, to PYC file stable * as possible. */ if (Py_REFCNT(v) == 1 && - !PyCode_Check(v) && !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) { return 0; } From 32bd0c68b82dccd7088f50c7354885cefbbfaf4f Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 17 Mar 2025 13:01:36 -0700 Subject: [PATCH 59/73] Update dis.rst to reflect support for LOAD_FAST_BORROW in the default build --- Doc/library/dis.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 865f2185cedd62..4fde5bf903880b 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -1404,8 +1404,8 @@ iterations of the loop. .. opcode:: LOAD_FAST_BORROW (var_num) - Pushes a borrowed reference to the local ``co_varnames[var_num]`` onto the stack - in free-threaded builds. In default builds this is identical to ``LOAD_FAST``. + Pushes a borrowed reference to the local ``co_varnames[var_num]`` onto the + stack. .. versionadded:: 3.14 @@ -1420,8 +1420,7 @@ iterations of the loop. .. opcode:: LOAD_FAST_BORROW_LOAD_FAST_BORROW (var_nums) Pushes borrowed references to ``co_varnames[var_nums >> 4]`` and - ``co_varnames[var_nums & 15]`` onto the stack in free-threaded builds. This is - identical to ``LOAD_FAST_LOAD_FAST`` in default builds. + ``co_varnames[var_nums & 15]`` onto the stack. .. versionadded:: 3.14 From fdb8a82d63ee7013ddc07357cc2bf0a28d62af77 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 17 Mar 2025 15:16:48 -0700 Subject: [PATCH 60/73] Exclude immortal objects when keeping overwritten locals alive --- Objects/frameobject.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 13c514dba5e0fd..9dfce54cb8f581 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -244,15 +244,15 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value) Py_XINCREF(value); PyCell_SetTakeRef((PyCellObject *)cell, value); } else if (value != PyStackRef_AsPyObjectBorrow(oldvalue)) { - if (!PyStackRef_IsNull(fast[i])) { + PyObject *old_obj = PyStackRef_AsPyObjectBorrow(fast[i]); + if (old_obj != NULL && !_Py_IsImmortal(old_obj)) { if (frame->f_overwritten_fast_locals == NULL) { frame->f_overwritten_fast_locals = PyList_New(0); if (frame->f_overwritten_fast_locals == NULL) { return -1; } } - PyObject *obj = PyStackRef_AsPyObjectBorrow(fast[i]); - if (PyList_Append(frame->f_overwritten_fast_locals, obj) < 0) { + if (PyList_Append(frame->f_overwritten_fast_locals, old_obj) < 0) { return -1; } PyStackRef_CLOSE(fast[i]); From a9620172a2f63d45605bc7c99ad083a74dfdfefe Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 17 Mar 2025 16:12:01 -0700 Subject: [PATCH 61/73] Use a tuple to store overwritten fast locals --- Include/internal/pycore_frame.h | 4 ++-- Objects/frameobject.c | 31 ++++++++++++++++++++++++------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index aba7648b1421fb..bdb739e1766f9f 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -32,9 +32,9 @@ struct _frame { PyEval_GetLocals requires a borrowed reference so the actual reference is stored here */ PyObject *f_locals_cache; - /* A list containing strong references to fast locals that were overwritten + /* A tuple containing strong references to fast locals that were overwritten * via f_locals. Borrowed references to these locals may exist in frames - * closer to the top of the stack. The references in this list act as + * closer to the top of the stack. The references in this tuple act as * "support" for the borrowed references, ensuring that they remain valid. */ PyObject *f_overwritten_fast_locals; diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 9dfce54cb8f581..1d383ba838e197 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -206,6 +206,29 @@ framelocalsproxy_getitem(PyObject *self, PyObject *key) return NULL; } +static int +add_overwritten_fast_local(PyFrameObject *frame, PyObject *obj) +{ + Py_ssize_t new_size = 1; + if (frame->f_overwritten_fast_locals != NULL) { + new_size = PyTuple_Size(frame->f_overwritten_fast_locals); + if (new_size == -1) { + return -1; + } + } + PyObject *new_tuple = PyTuple_New(new_size); + if (new_tuple == NULL) { + return -1; + } + for (Py_ssize_t i = 0; i < new_size - 1; i++) { + PyObject *o = PyTuple_GET_ITEM(frame->f_overwritten_fast_locals, i); + PyTuple_SET_ITEM(new_tuple, i, Py_NewRef(o)); + } + PyTuple_SET_ITEM(new_tuple, new_size - 1, Py_NewRef(obj)); + Py_XSETREF(frame->f_overwritten_fast_locals, new_tuple); + return 0; +} + static int framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value) { @@ -246,13 +269,7 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value) } else if (value != PyStackRef_AsPyObjectBorrow(oldvalue)) { PyObject *old_obj = PyStackRef_AsPyObjectBorrow(fast[i]); if (old_obj != NULL && !_Py_IsImmortal(old_obj)) { - if (frame->f_overwritten_fast_locals == NULL) { - frame->f_overwritten_fast_locals = PyList_New(0); - if (frame->f_overwritten_fast_locals == NULL) { - return -1; - } - } - if (PyList_Append(frame->f_overwritten_fast_locals, old_obj) < 0) { + if (add_overwritten_fast_local(frame, old_obj) < 0) { return -1; } PyStackRef_CLOSE(fast[i]); From 6c2f07deed4403489afb3c72c662c4ccd4d4067d Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 17 Mar 2025 16:26:27 -0700 Subject: [PATCH 62/73] Fix off-by-one error --- Lib/test/test_frame.py | 2 ++ Objects/frameobject.c | 12 ++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_frame.py b/Lib/test/test_frame.py index f0c27552f60c22..18ade18d1a1708 100644 --- a/Lib/test/test_frame.py +++ b/Lib/test/test_frame.py @@ -602,11 +602,13 @@ def test_overwrite_locals(self): # from an ancestor in the call stack. def f(): xs = [1, 2, 3] + ys = [4, 5, 6] return g(xs) def g(xs): f = sys._getframe() f.f_back.f_locals["xs"] = None + f.f_back.f_locals["ys"] = None return xs[1] self.assertEqual(f(), 2) diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 1d383ba838e197..53cc4f84363935 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -209,12 +209,16 @@ framelocalsproxy_getitem(PyObject *self, PyObject *key) static int add_overwritten_fast_local(PyFrameObject *frame, PyObject *obj) { - Py_ssize_t new_size = 1; - if (frame->f_overwritten_fast_locals != NULL) { - new_size = PyTuple_Size(frame->f_overwritten_fast_locals); - if (new_size == -1) { + Py_ssize_t new_size; + if (frame->f_overwritten_fast_locals == NULL) { + new_size = 1; + } + else { + Py_ssize_t size = PyTuple_Size(frame->f_overwritten_fast_locals); + if (size == -1) { return -1; } + new_size = size + 1; } PyObject *new_tuple = PyTuple_New(new_size); if (new_tuple == NULL) { From 5ff2dea14239bf7c3990e801d1bdb6ca3e4b4089 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 20 Mar 2025 15:56:15 -0700 Subject: [PATCH 63/73] Fix post-merge issues --- Include/internal/pycore_frame.h | 6 ++++++ Include/internal/pycore_opcode_metadata.h | 18 ++++++++++++++++-- Programs/test_frozenmain.h | 14 +++++++------- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index dde211c5eac015..8c410e9e208340 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -28,6 +28,12 @@ struct _frame { PyEval_GetLocals requires a borrowed reference so the actual reference is stored here */ PyObject *f_locals_cache; + /* A tuple containing strong references to fast locals that were overwritten + * via f_locals. Borrowed references to these locals may exist in frames + * closer to the top of the stack. The references in this tuple act as + * "support" for the borrowed references, ensuring that they remain valid. + */ + PyObject *f_overwritten_fast_locals; /* The frame data, if this frame object owns the frame */ PyObject *_f_frame_data[1]; }; diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 096cd0b5e8db67..ab15d6a5a2ce0a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -335,6 +335,10 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 0; case LOAD_FAST_AND_CLEAR: return 0; + case LOAD_FAST_BORROW: + return 0; + case LOAD_FAST_BORROW_LOAD_FAST_BORROW: + return 0; case LOAD_FAST_CHECK: return 0; case LOAD_FAST_LOAD_FAST: @@ -810,6 +814,10 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 1; case LOAD_FAST_AND_CLEAR: return 1; + case LOAD_FAST_BORROW: + return 1; + case LOAD_FAST_BORROW_LOAD_FAST_BORROW: + return 2; case LOAD_FAST_CHECK: return 1; case LOAD_FAST_LOAD_FAST: @@ -1198,6 +1206,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, + [LOAD_FAST_BORROW] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, + [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FROM_DICT_OR_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, @@ -1406,6 +1416,8 @@ _PyOpcode_macro_expansion[256] = { [LOAD_DEREF] = { .nuops = 1, .uops = { { _LOAD_DEREF, OPARG_SIMPLE, 0 } } }, [LOAD_FAST] = { .nuops = 1, .uops = { { _LOAD_FAST, OPARG_SIMPLE, 0 } } }, [LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { _LOAD_FAST_AND_CLEAR, OPARG_SIMPLE, 0 } } }, + [LOAD_FAST_BORROW] = { .nuops = 1, .uops = { { _LOAD_FAST_BORROW, OPARG_SIMPLE, 0 } } }, + [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = { .nuops = 2, .uops = { { _LOAD_FAST_BORROW, OPARG_TOP, 0 }, { _LOAD_FAST_BORROW, OPARG_BOTTOM, 0 } } }, [LOAD_FAST_CHECK] = { .nuops = 1, .uops = { { _LOAD_FAST_CHECK, OPARG_SIMPLE, 0 } } }, [LOAD_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { _LOAD_FAST, OPARG_TOP, 0 }, { _LOAD_FAST, OPARG_BOTTOM, 0 } } }, [LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_DEREF, OPARG_SIMPLE, 0 } } }, @@ -1632,6 +1644,8 @@ const char *_PyOpcode_OpName[266] = { [LOAD_DEREF] = "LOAD_DEREF", [LOAD_FAST] = "LOAD_FAST", [LOAD_FAST_AND_CLEAR] = "LOAD_FAST_AND_CLEAR", + [LOAD_FAST_BORROW] = "LOAD_FAST_BORROW", + [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = "LOAD_FAST_BORROW_LOAD_FAST_BORROW", [LOAD_FAST_CHECK] = "LOAD_FAST_CHECK", [LOAD_FAST_LOAD_FAST] = "LOAD_FAST_LOAD_FAST", [LOAD_FROM_DICT_OR_DEREF] = "LOAD_FROM_DICT_OR_DEREF", @@ -1890,6 +1904,8 @@ const uint8_t _PyOpcode_Deopt[256] = { [LOAD_DEREF] = LOAD_DEREF, [LOAD_FAST] = LOAD_FAST, [LOAD_FAST_AND_CLEAR] = LOAD_FAST_AND_CLEAR, + [LOAD_FAST_BORROW] = LOAD_FAST_BORROW, + [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = LOAD_FAST_BORROW_LOAD_FAST_BORROW, [LOAD_FAST_CHECK] = LOAD_FAST_CHECK, [LOAD_FAST_LOAD_FAST] = LOAD_FAST_LOAD_FAST, [LOAD_FROM_DICT_OR_DEREF] = LOAD_FROM_DICT_OR_DEREF, @@ -1972,8 +1988,6 @@ const uint8_t _PyOpcode_Deopt[256] = { #endif // NEED_OPCODE_METADATA #define EXTRA_CASES \ - case 117: \ - case 118: \ case 119: \ case 120: \ case 121: \ diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index 76a335086c9b11..8cedee31e08a00 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,16 +1,16 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { 227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0, - 0,0,0,0,0,243,184,0,0,0,128,0,90,0,80,1, - 71,0,112,0,90,0,80,1,71,1,112,1,89,2,32,0, - 80,2,50,1,0,0,0,0,0,0,30,0,89,2,32,0, - 80,3,89,0,78,6,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,243,184,0,0,0,128,0,92,0,80,1, + 71,0,114,0,92,0,80,1,71,1,114,1,91,2,32,0, + 80,2,50,1,0,0,0,0,0,0,30,0,91,2,32,0, + 80,3,91,0,78,6,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,50,2,0,0,0,0,0,0, - 30,0,89,1,78,8,0,0,0,0,0,0,0,0,0,0, + 30,0,91,1,78,8,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,32,0,50,0,0,0,0,0, 0,0,80,4,43,26,0,0,0,0,0,0,0,0,0,0, - 112,5,80,7,15,0,68,24,0,0,112,6,89,2,32,0, - 80,5,89,6,11,0,80,6,89,5,89,6,43,26,0,0, + 114,5,80,7,15,0,68,24,0,0,114,6,91,2,32,0, + 80,5,91,6,11,0,80,6,91,5,91,6,43,26,0,0, 0,0,0,0,0,0,0,0,11,0,48,4,50,1,0,0, 0,0,0,0,30,0,73,26,0,0,8,0,29,0,80,1, 34,0,41,8,233,0,0,0,0,78,122,18,70,114,111,122, From 0c1e67f9a0e4218263dcec493578ba51c85f60f9 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 20 Mar 2025 16:08:34 -0700 Subject: [PATCH 64/73] English is hard --- Lib/test/test_importlib/test_abc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_importlib/test_abc.py b/Lib/test/test_importlib/test_abc.py index 758a227c03e038..070920d0da7e19 100644 --- a/Lib/test/test_importlib/test_abc.py +++ b/Lib/test/test_importlib/test_abc.py @@ -801,7 +801,7 @@ def verify_code(self, code_object, *, bytecode_written=False): data.extend(self.init._pack_uint32(0)) data.extend(self.init._pack_uint32(self.loader.source_mtime)) data.extend(self.init._pack_uint32(self.loader.source_size)) - # Make sure theres > 1 reference to code_object so that the + # Make sure there's > 1 reference to code_object so that the # marshaled representation below matches the cached representation l = [code_object] data.extend(marshal.dumps(code_object)) From ae2ec65139f27b6a5ec4fa162a8ffe611446c775 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 20 Mar 2025 16:10:23 -0700 Subject: [PATCH 65/73] Improve readability of test cases --- Lib/test/test_peepholer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 13cf7d094b7c28..39db8f96f19007 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -2533,7 +2533,7 @@ def test_unoptimized_if_support_killed(self): ("LOAD_FAST", 0, 1), ("LOAD_CONST", 0, 2), ("LOAD_CONST", 0, 3), - ("STORE_FAST_STORE_FAST", 0 << 4 | 1, 4), + ("STORE_FAST_STORE_FAST", ((0 << 4) | 1), 4), ("POP_TOP", None, 5), ] self.check(insts, insts) @@ -2548,7 +2548,7 @@ def test_unoptimized_if_aliased(self): insts = [ ("LOAD_FAST", 0, 1), ("LOAD_CONST", 0, 3), - ("STORE_FAST_STORE_FAST", 0 << 4 | 1, 4), + ("STORE_FAST_STORE_FAST", ((0 << 4) | 1), 4), ] self.check(insts, insts) From 03c474ee491e615bd3a6ee02ad541c63e73fa31a Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 20 Mar 2025 16:17:19 -0700 Subject: [PATCH 66/73] Elaborate in the blurb --- .../2025-02-28-11-29-35.gh-issue-130704.7RDVLE.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-28-11-29-35.gh-issue-130704.7RDVLE.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-28-11-29-35.gh-issue-130704.7RDVLE.rst index 528c58b54cd7de..42fba2933c31b3 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-28-11-29-35.gh-issue-130704.7RDVLE.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-28-11-29-35.gh-issue-130704.7RDVLE.rst @@ -1,2 +1,4 @@ Optimize ``LOAD_FAST`` and its superinstruction form to reduce reference -counting overhead. +counting overhead. These instructions are replaced with faster variants that +load borrowed references onto the operand stack when we can prove that the +reference in the frame outlives the reference loaded onto the stack. From 60665c99bcd638041e9c5c83bb502761158b8279 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 20 Mar 2025 16:25:27 -0700 Subject: [PATCH 67/73] Remove parameter to calculate stackdepth --- Python/flowgraph.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 5895e323b09cd6..c79e460acf961b 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2740,17 +2740,12 @@ load_fast_push_block(basicblock ***sp, basicblock *target, * non-violating LOAD_FAST{_LOAD_FAST} can be optimized. */ static int -optimize_load_fast(cfg_builder *g, bool compute_stackdepth) +optimize_load_fast(cfg_builder *g) { int status; ref_stack refs = {0}; int max_instrs = 0; basicblock *entryblock = g->g_entryblock; - if (compute_stackdepth) { - if (calculate_stackdepth(g) == ERROR) { - return ERROR; - } - } for (basicblock *b = entryblock; b != NULL; b = b->b_next) { max_instrs = Py_MAX(max_instrs, b->b_iused); } @@ -3885,7 +3880,7 @@ _PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g, /* Can't modify the bytecode after inserting instructions that produce * borrowed references. */ - RETURN_IF_ERROR(optimize_load_fast(g, /* compute_stackdepth */ false)); + RETURN_IF_ERROR(optimize_load_fast(g)); /* Can't modify the bytecode after computing jump offsets. */ if (_PyCfg_ToInstructionSequence(g, seq) < 0) { @@ -3991,7 +3986,12 @@ _PyCompile_OptimizeLoadFast(PyObject *seq) return NULL; } - if (optimize_load_fast(g, /* compute_stackdepth */ true) != SUCCESS) { + if (calculate_stackdepth(g) == ERROR) { + _PyCfgBuilder_Free(g); + return NULL; + } + + if (optimize_load_fast(g) != SUCCESS) { _PyCfgBuilder_Free(g); return NULL; } From ac8940bba04cb99fddd5d870903233022c9e7003 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 20 Mar 2025 16:28:49 -0700 Subject: [PATCH 68/73] Update comment --- Python/flowgraph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index c79e460acf961b..0ad4144b0bc188 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2703,7 +2703,7 @@ load_fast_push_block(basicblock ***sp, basicblock *target, } /* - * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into weaker variants that + * Strength reduce LOAD_FAST{_LOAD_FAST} instructions into faster variants that * load borrowed references onto the operand stack. * * This is only safe when we can prove that the reference in the frame outlives From f12573fbcd9aa3e60150d1f04da4c5c660618f36 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 21 Mar 2025 11:39:40 -0700 Subject: [PATCH 69/73] Test optimize_load_fast as part of OptimizeCfg --- Include/internal/pycore_flowgraph.h | 3 -- Lib/test/test_peepholer.py | 38 ++++++++++++------ Modules/_testinternalcapi.c | 18 --------- Modules/clinic/_testinternalcapi.c.h | 59 +--------------------------- Python/flowgraph.c | 32 +++------------ 5 files changed, 33 insertions(+), 117 deletions(-) diff --git a/Include/internal/pycore_flowgraph.h b/Include/internal/pycore_flowgraph.h index 3b218838b9e833..5043260d2fd99f 100644 --- a/Include/internal/pycore_flowgraph.h +++ b/Include/internal/pycore_flowgraph.h @@ -41,9 +41,6 @@ PyAPI_FUNC(PyObject*) _PyCompile_OptimizeCfg( PyObject *consts, int nlocals); -// Export for '_testinternalcapi' shared extension -PyAPI_FUNC(PyObject*) _PyCompile_OptimizeLoadFast(PyObject *instructions); - #ifdef __cplusplus } #endif diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 39db8f96f19007..fa63b64efa840f 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -2435,9 +2435,9 @@ def test_list_to_tuple_get_iter(self): ] expected_insts = [ ("BUILD_LIST", 0, 1), - ("LOAD_FAST", 0, 2), + ("LOAD_FAST_BORROW", 0, 2), ("LIST_EXTEND", 1, 3), - ("LOAD_FAST", 1, 4), + ("LOAD_FAST_BORROW", 1, 4), ("LIST_EXTEND", 1, 5), ("NOP", None, 6), # ("CALL_INTRINSIC_1", INTRINSIC_LIST_TO_TUPLE, 6), ("GET_ITER", None, 7), @@ -2463,15 +2463,24 @@ def test_list_to_tuple_get_iter_is_safe(self): self.assertEqual(items, []) -@unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi") -class OptimizeLoadFastTestCase(CompilationStepTestCase): - def check(self, insts, expected_insts): - self.check_instructions(insts) - self.check_instructions(expected_insts) - seq = self.seq_from_insts(insts) - opt_insts = _testinternalcapi.optimize_load_fast(seq) - expected_insts = self.seq_from_insts(expected_insts).get_instructions() - self.assertInstructionsMatch(opt_insts, expected_insts) +class OptimizeLoadFastTestCase(DirectCfgOptimizerTests): + def make_bb(self, insts): + last_loc = insts[-1][2] + maxconst = 0 + for op, arg, _ in insts: + if op == "LOAD_CONST": + maxconst = max(maxconst, arg) + consts = [None for _ in range(maxconst + 1)] + return insts + [ + ("LOAD_CONST", 0, last_loc + 1), + ("RETURN_VALUE", None, last_loc + 2), + ], consts + + def check(self, insts, expected_insts, consts=None): + insts_bb, insts_consts = self.make_bb(insts) + expected_insts_bb, exp_consts = self.make_bb(expected_insts) + self.cfg_optimization_test(insts_bb, expected_insts_bb, + consts=insts_consts, expected_consts=exp_consts) def test_optimized(self): insts = [ @@ -2518,7 +2527,12 @@ def test_unoptimized_if_unconsumed(self): ("COPY", 1, 2), ("POP_TOP", None, 3), ] - self.check(insts, insts) + expected = [ + ("LOAD_FAST", 0, 1), + ("NOP", None, 2), + ("NOP", None, 3), + ] + self.check(insts, expected) def test_unoptimized_if_support_killed(self): insts = [ diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index af5670fe74f266..56e3408652a6a0 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -751,23 +751,6 @@ _testinternalcapi_optimize_cfg_impl(PyObject *module, PyObject *instructions, return _PyCompile_OptimizeCfg(instructions, consts, nlocals); } -/*[clinic input] - -_testinternalcapi.optimize_load_fast -> object - - instructions: object - -Optimize LOAD_FAST{_LOAD_FAST} instructions. -[clinic start generated code]*/ - -static PyObject * -_testinternalcapi_optimize_load_fast_impl(PyObject *module, - PyObject *instructions) -/*[clinic end generated code: output=6f975349c976d017 input=c59f3eac68308c01]*/ -{ - return _PyCompile_OptimizeLoadFast(instructions); -} - static int get_nonnegative_int_from_dict(PyObject *dict, const char *key) { PyObject *obj = PyDict_GetItemString(dict, key); @@ -2043,7 +2026,6 @@ static PyMethodDef module_functions[] = { _TESTINTERNALCAPI_NEW_INSTRUCTION_SEQUENCE_METHODDEF _TESTINTERNALCAPI_COMPILER_CODEGEN_METHODDEF _TESTINTERNALCAPI_OPTIMIZE_CFG_METHODDEF - _TESTINTERNALCAPI_OPTIMIZE_LOAD_FAST_METHODDEF _TESTINTERNALCAPI_ASSEMBLE_CODE_OBJECT_METHODDEF {"get_interp_settings", get_interp_settings, METH_VARARGS, NULL}, {"clear_extension", clear_extension, METH_VARARGS, NULL}, diff --git a/Modules/clinic/_testinternalcapi.c.h b/Modules/clinic/_testinternalcapi.c.h index 901fae473ac407..d98d69df22f982 100644 --- a/Modules/clinic/_testinternalcapi.c.h +++ b/Modules/clinic/_testinternalcapi.c.h @@ -225,63 +225,6 @@ _testinternalcapi_optimize_cfg(PyObject *module, PyObject *const *args, Py_ssize return return_value; } -PyDoc_STRVAR(_testinternalcapi_optimize_load_fast__doc__, -"optimize_load_fast($module, /, instructions)\n" -"--\n" -"\n" -"Optimize LOAD_FAST{_LOAD_FAST} instructions."); - -#define _TESTINTERNALCAPI_OPTIMIZE_LOAD_FAST_METHODDEF \ - {"optimize_load_fast", _PyCFunction_CAST(_testinternalcapi_optimize_load_fast), METH_FASTCALL|METH_KEYWORDS, _testinternalcapi_optimize_load_fast__doc__}, - -static PyObject * -_testinternalcapi_optimize_load_fast_impl(PyObject *module, - PyObject *instructions); - -static PyObject * -_testinternalcapi_optimize_load_fast(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - PyObject *return_value = NULL; - #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - - #define NUM_KEYWORDS 1 - static struct { - PyGC_Head _this_is_not_used; - PyObject_VAR_HEAD - PyObject *ob_item[NUM_KEYWORDS]; - } _kwtuple = { - .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_item = { &_Py_ID(instructions), }, - }; - #undef NUM_KEYWORDS - #define KWTUPLE (&_kwtuple.ob_base.ob_base) - - #else // !Py_BUILD_CORE - # define KWTUPLE NULL - #endif // !Py_BUILD_CORE - - static const char * const _keywords[] = {"instructions", NULL}; - static _PyArg_Parser _parser = { - .keywords = _keywords, - .fname = "optimize_load_fast", - .kwtuple = KWTUPLE, - }; - #undef KWTUPLE - PyObject *argsbuf[1]; - PyObject *instructions; - - args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, - /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); - if (!args) { - goto exit; - } - instructions = args[0]; - return_value = _testinternalcapi_optimize_load_fast_impl(module, instructions); - -exit: - return return_value; -} - PyDoc_STRVAR(_testinternalcapi_assemble_code_object__doc__, "assemble_code_object($module, /, filename, instructions, metadata)\n" "--\n" @@ -422,4 +365,4 @@ gh_119213_getargs(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO exit: return return_value; } -/*[clinic end generated code: output=bbd9381589d0f959 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=ec77971c6c2663da input=a9049054013a1b77]*/ diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 0ad4144b0bc188..96c36010c7688c 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2717,7 +2717,7 @@ load_fast_push_block(basicblock ***sp, basicblock *target, * * 2. Locals live until they are either killed by an instruction * (e.g. STORE_FAST) or the frame is unwound. Any local that is overwritten - * via `f_locals` is added to a list owned by the frame object. + * via `f_locals` is added to a tuple owned by the frame object. * * To simplify the problem of detecting which supporting references in the * frame are killed by instructions that overwrite locals, we only allow @@ -3965,38 +3965,18 @@ _PyCompile_OptimizeCfg(PyObject *seq, PyObject *consts, int nlocals) nparams, firstlineno) < 0) { goto error; } - res = cfg_to_instruction_sequence(g); -error: - Py_DECREF(const_cache); - _PyCfgBuilder_Free(g); - return res; -} - -PyObject * -_PyCompile_OptimizeLoadFast(PyObject *seq) -{ - if (!_PyInstructionSequence_Check(seq)) { - PyErr_SetString(PyExc_ValueError, "expected an instruction sequence"); - return NULL; - } - - cfg_builder *g = - _PyCfg_FromInstructionSequence((_PyInstructionSequence *)seq); - if (g == NULL) { - return NULL; - } if (calculate_stackdepth(g) == ERROR) { - _PyCfgBuilder_Free(g); - return NULL; + goto error; } if (optimize_load_fast(g) != SUCCESS) { - _PyCfgBuilder_Free(g); - return NULL; + goto error; } - PyObject *res = cfg_to_instruction_sequence(g); + res = cfg_to_instruction_sequence(g); +error: + Py_DECREF(const_cache); _PyCfgBuilder_Free(g); return res; } From 44f7ffcfc5aac68c419ff1a8ccd7c9641ade56ed Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 21 Mar 2025 11:40:33 -0700 Subject: [PATCH 70/73] Remove test with invalid bytecode Not enough items on stack --- Lib/test/test_peepholer.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index fa63b64efa840f..e629fa8be98f9b 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -1437,19 +1437,6 @@ def test_fold_constant_intrinsic_list_to_tuple(self): ] self.cfg_optimization_test(before, after, consts=[], expected_consts=[(1, 2, 3)]) - # no sequence start - same = [ - ('LOAD_SMALL_INT', 1, 0), - ('LIST_APPEND', 1, 0), - ('LOAD_SMALL_INT', 2, 0), - ('LIST_APPEND', 1, 0), - ('LOAD_SMALL_INT', 3, 0), - ('LIST_APPEND', 1, 0), - ('CALL_INTRINSIC_1', INTRINSIC_LIST_TO_TUPLE, 0), - ('RETURN_VALUE', None, 0) - ] - self.cfg_optimization_test(same, same, consts=[]) - def test_optimize_if_const_list(self): before = [ ('NOP', None, 0), From 818e94e676125c87d26d505015c150e7d12c4033 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 21 Mar 2025 16:30:28 -0700 Subject: [PATCH 71/73] Add helper macro for pushing refs --- Python/flowgraph.c | 46 +++++++++++++++++----------------------------- 1 file changed, 17 insertions(+), 29 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 96c36010c7688c..2391754523d353 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2584,7 +2584,6 @@ insert_superinstructions(cfg_builder *g) #define NOT_LOCAL -1 #define DUMMY_INSTR -1 -#define DUMMY_REF (ref){DUMMY_INSTR, NOT_LOCAL} typedef struct { // Index of instruction that produced the reference or DUMMY_INSTR. @@ -2766,6 +2765,14 @@ optimize_load_fast(cfg_builder *g) entryblock->b_startdepth = 0; entryblock->b_visited = 1; + #define PUSH_REF(instr, local) \ + do { \ + if (ref_stack_push(&refs, (ref){(instr), (local)}) < 0) { \ + status = ERROR; \ + goto done; \ + } \ + } while(0) + while (sp != blocks) { basicblock *block = *--sp; assert(block->b_startdepth > -1); @@ -2778,10 +2785,7 @@ optimize_load_fast(cfg_builder *g) // presence. Add dummy references as necessary. ref_stack_clear(&refs); for (int i = 0; i < block->b_startdepth; i++) { - if (ref_stack_push(&refs, DUMMY_REF) < 0) { - status = ERROR; - goto done; - } + PUSH_REF(DUMMY_INSTR, NOT_LOCAL); } for (int i = 0; i < block->b_iused; i++) { @@ -2802,31 +2806,19 @@ optimize_load_fast(cfg_builder *g) } case LOAD_FAST: { - if (ref_stack_push(&refs, (ref){i, oparg}) < 0) { - status = ERROR; - goto done; - } + PUSH_REF(i, oparg); break; } case LOAD_FAST_AND_CLEAR: { kill_local(instr_flags, &refs, oparg); - if (ref_stack_push(&refs, (ref){i, oparg}) < 0) { - status = ERROR; - goto done; - } + PUSH_REF(i, oparg); break; } case LOAD_FAST_LOAD_FAST: { - if (ref_stack_push(&refs, (ref){i, oparg >> 4}) < 0) { - status = ERROR; - goto done; - } - if (ref_stack_push(&refs, (ref){i, oparg & 15}) < 0) { - status = ERROR; - goto done; - } + PUSH_REF(i, oparg >> 4); + PUSH_REF(i, oparg & 15); break; } @@ -2841,10 +2833,7 @@ optimize_load_fast(cfg_builder *g) ref r = ref_stack_pop(&refs); store_local(instr_flags, &refs, oparg >> 4, r); // LOAD_FAST - if (ref_stack_push(&refs, (ref){i, oparg & 15}) < 0) { - status = ERROR; - goto done; - } + PUSH_REF(i, oparg & 15); break; } @@ -2877,10 +2866,7 @@ optimize_load_fast(cfg_builder *g) ref_stack_pop(&refs); } for (int j = 0; j < num_pushed; j++) { - if (ref_stack_push(&refs, (ref){i, NOT_LOCAL}) < 0) { - status = ERROR; - goto done; - } + PUSH_REF(i, NOT_LOCAL); } } break; @@ -2924,6 +2910,8 @@ optimize_load_fast(cfg_builder *g) } } + #undef PUSH_REF + status = SUCCESS; done: From c30e1e9dc24e044d9753593f9b38bc2ab4a436bf Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 21 Mar 2025 18:15:21 -0700 Subject: [PATCH 72/73] Handle opcodes that leave at least one input on the stack --- Lib/test/test_peepholer.py | 119 +++++++++++++++++++++++++++++++++++++ Python/flowgraph.c | 99 ++++++++++++++++++++++++++---- 2 files changed, 207 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index e629fa8be98f9b..3632325cf932d5 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -2553,6 +2553,125 @@ def test_unoptimized_if_aliased(self): ] self.check(insts, insts) + def test_consume_no_inputs(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("GET_LEN", None, 2), + ("STORE_FAST", 1 , 3), + ("STORE_FAST", 2, 4), + ] + self.check(insts, insts) + + def test_consume_some_inputs_no_outputs(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("GET_LEN", None, 2), + ("LIST_APPEND", 0, 3), + ] + self.check(insts, insts) + + def test_check_exc_match(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_FAST", 1, 2), + ("CHECK_EXC_MATCH", None, 3) + ] + expected = [ + ("LOAD_FAST", 0, 1), + ("LOAD_FAST_BORROW", 1, 2), + ("CHECK_EXC_MATCH", None, 3) + ] + self.check(insts, expected) + + def test_for_iter(self): + insts = [ + ("LOAD_FAST", 0, 1), + top := self.Label(), + ("FOR_ITER", end := self.Label(), 2), + ("STORE_FAST", 2, 3), + ("JUMP", top, 4), + end, + ("END_FOR", None, 5), + ("POP_TOP", None, 6), + ("LOAD_CONST", 0, 7), + ("RETURN_VALUE", None, 8), + ] + self.cfg_optimization_test(insts, insts, consts=[None]) + + def test_load_attr(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_ATTR", 0, 2), + ] + expected = [ + ("LOAD_FAST_BORROW", 0, 1), + ("LOAD_ATTR", 0, 2), + ] + self.check(insts, expected) + + # Method call, leaves self on stack unconsumed + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_ATTR", 1, 2), + ] + expected = [ + ("LOAD_FAST", 0, 1), + ("LOAD_ATTR", 1, 2), + ] + self.check(insts, expected) + + def test_super_attr(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_FAST", 1, 2), + ("LOAD_FAST", 2, 3), + ("LOAD_SUPER_ATTR", 0, 4), + ] + expected = [ + ("LOAD_FAST_BORROW", 0, 1), + ("LOAD_FAST_BORROW", 1, 2), + ("LOAD_FAST_BORROW", 2, 3), + ("LOAD_SUPER_ATTR", 0, 4), + ] + self.check(insts, expected) + + # Method call, leaves self on stack unconsumed + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_FAST", 1, 2), + ("LOAD_FAST", 2, 3), + ("LOAD_SUPER_ATTR", 1, 4), + ] + expected = [ + ("LOAD_FAST_BORROW", 0, 1), + ("LOAD_FAST_BORROW", 1, 2), + ("LOAD_FAST", 2, 3), + ("LOAD_SUPER_ATTR", 1, 4), + ] + self.check(insts, expected) + + def test_send(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_FAST", 1, 2), + ("SEND", end := self.Label(), 3), + ("LOAD_CONST", 0, 4), + ("RETURN_VALUE", None, 5), + end, + ("LOAD_CONST", 0, 6), + ("RETURN_VALUE", None, 7) + ] + expected = [ + ("LOAD_FAST", 0, 1), + ("LOAD_FAST_BORROW", 1, 2), + ("SEND", end := self.Label(), 3), + ("LOAD_CONST", 0, 4), + ("RETURN_VALUE", None, 5), + end, + ("LOAD_CONST", 0, 6), + ("RETURN_VALUE", None, 7) + ] + self.cfg_optimization_test(insts, expected, consts=[None]) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 2391754523d353..a0d5690250cffb 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2794,17 +2794,7 @@ optimize_load_fast(cfg_builder *g) int oparg = instr->i_oparg; assert(opcode != EXTENDED_ARG); switch (opcode) { - case COPY: { - assert(oparg > 0); - Py_ssize_t idx = refs.size - oparg; - ref r = ref_stack_at(&refs, idx); - if (ref_stack_push(&refs, r) < 0) { - status = ERROR; - goto done; - } - break; - } - + // Opcodes that load and store locals case LOAD_FAST: { PUSH_REF(i, oparg); break; @@ -2847,12 +2837,99 @@ optimize_load_fast(cfg_builder *g) break; } + // Opcodes that shuffle values on the stack + case COPY: { + assert(oparg > 0); + Py_ssize_t idx = refs.size - oparg; + ref r = ref_stack_at(&refs, idx); + PUSH_REF(r.instr, r.local); + break; + } + case SWAP: { assert(oparg >= 2); ref_stack_swap_top(&refs, oparg); break; } + // We treat opcodes that do not consume all of their inputs on + // a case by case basis, as we have no generic way of knowing + // how many inputs should be left on the stack. + + // Opcodes that consume no inputs + case GET_ANEXT: + case GET_LEN: + case IMPORT_FROM: + case MATCH_KEYS: + case MATCH_MAPPING: + case MATCH_SEQUENCE: + case WITH_EXCEPT_START: { + int num_popped = _PyOpcode_num_popped(opcode, oparg); + int num_pushed = _PyOpcode_num_pushed(opcode, oparg); + int net_pushed = num_pushed - num_popped; + assert(net_pushed >= 0); + for (int i = 0; i < net_pushed; i++) { + PUSH_REF(i, NOT_LOCAL); + } + break; + } + + // Opcodes that consume some inputs and push no new values + case DICT_MERGE: + case DICT_UPDATE: + case LIST_APPEND: + case LIST_EXTEND: + case MAP_ADD: + case RERAISE: + case SET_ADD: + case SET_UPDATE: { + int num_popped = _PyOpcode_num_popped(opcode, oparg); + int num_pushed = _PyOpcode_num_pushed(opcode, oparg); + int net_popped = num_popped - num_pushed; + assert(net_popped > 0); + for (int i = 0; i < net_popped; i++) { + ref_stack_pop(&refs); + } + break; + } + + // Opcodes that consume some inputs and push new values + case CHECK_EXC_MATCH: { + ref_stack_pop(&refs); + PUSH_REF(i, NOT_LOCAL); + break; + } + + case FOR_ITER: { + load_fast_push_block(&sp, instr->i_target, refs.size + 1); + PUSH_REF(i, NOT_LOCAL); + break; + } + + case LOAD_ATTR: + case LOAD_SUPER_ATTR: { + ref self = ref_stack_pop(&refs); + if (opcode == LOAD_SUPER_ATTR) { + ref_stack_pop(&refs); + ref_stack_pop(&refs); + } + PUSH_REF(i, NOT_LOCAL); + if (oparg & 1) { + // A method call; conservatively assume that self is pushed + // back onto the stack + PUSH_REF(self.instr, self.local); + } + break; + } + + case SEND: { + load_fast_push_block(&sp, instr->i_target, refs.size); + ref_stack_pop(&refs); + PUSH_REF(i, NOT_LOCAL); + break; + } + + // Opcodes that consume all of their inputs default: { int num_popped = _PyOpcode_num_popped(opcode, oparg); int num_pushed = _PyOpcode_num_pushed(opcode, oparg); From 80fc5aa616d1e31d780e8e66c3f8d88d7c1e63b5 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Sun, 23 Mar 2025 14:38:35 -0700 Subject: [PATCH 73/73] Avoid having stackref only visible from the c stack --- Python/bytecodes.c | 11 ++++------- Python/generated_cases.c.h | 17 ++++++++--------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index ef4ca49ae85db3..e47190bc54d07c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1210,11 +1210,8 @@ dummy_func( op(_SEND, (receiver, v -- receiver, retval)) { PyObject *receiver_o = PyStackRef_AsPyObjectBorrow(receiver); - PyObject *retval_o; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef tmp = PyStackRef_MakeHeapSafe(v); - DEAD(v); if ((tstate->interp->eval_frame == NULL) && (Py_TYPE(receiver_o) == &PyGen_Type || Py_TYPE(receiver_o) == &PyCoro_Type) && ((PyGenObject *)receiver_o)->gi_frame_state < FRAME_EXECUTING) @@ -1222,7 +1219,7 @@ dummy_func( PyGenObject *gen = (PyGenObject *)receiver_o; _PyInterpreterFrame *gen_frame = &gen->gi_iframe; STACK_SHRINK(1); - _PyFrame_StackPush(gen_frame, tmp); + _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v)); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; @@ -1238,7 +1235,7 @@ dummy_func( else { retval_o = PyObject_CallMethodOneArg(receiver_o, &_Py_ID(send), - PyStackRef_AsPyObjectBorrow(tmp)); + PyStackRef_AsPyObjectBorrow(v)); } if (retval_o == NULL) { int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); @@ -1251,11 +1248,11 @@ dummy_func( JUMPBY(oparg); } else { - PyStackRef_CLOSE(tmp); + PyStackRef_CLOSE(v); ERROR_IF(true, error); } } - PyStackRef_CLOSE(tmp); + PyStackRef_CLOSE(v); retval = PyStackRef_FromPyObjectSteal(retval_o); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 4336f2e5d38496..9726010a48953b 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -10479,7 +10479,6 @@ PyObject *receiver_o = PyStackRef_AsPyObjectBorrow(receiver); PyObject *retval_o; assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - _PyStackRef tmp = PyStackRef_MakeHeapSafe(v); if ((tstate->interp->eval_frame == NULL) && (Py_TYPE(receiver_o) == &PyGen_Type || Py_TYPE(receiver_o) == &PyCoro_Type) && ((PyGenObject *)receiver_o)->gi_frame_state < FRAME_EXECUTING) @@ -10487,7 +10486,7 @@ PyGenObject *gen = (PyGenObject *)receiver_o; _PyInterpreterFrame *gen_frame = &gen->gi_iframe; STACK_SHRINK(1); - _PyFrame_StackPush(gen_frame, tmp); + _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v)); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; @@ -10498,19 +10497,15 @@ DISPATCH_INLINED(gen_frame); } if (PyStackRef_IsNone(v) && PyIter_Check(receiver_o)) { - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); retval_o = Py_TYPE(receiver_o)->tp_iternext(receiver_o); stack_pointer = _PyFrame_GetStackPointer(frame); } else { - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); retval_o = PyObject_CallMethodOneArg(receiver_o, &_Py_ID(send), - PyStackRef_AsPyObjectBorrow(tmp)); + PyStackRef_AsPyObjectBorrow(v)); stack_pointer = _PyFrame_GetStackPointer(frame); } if (retval_o == NULL) { @@ -10530,14 +10525,18 @@ JUMPBY(oparg); } else { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(tmp); + PyStackRef_CLOSE(v); stack_pointer = _PyFrame_GetStackPointer(frame); JUMP_TO_LABEL(error); } } + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(tmp); + PyStackRef_CLOSE(v); stack_pointer = _PyFrame_GetStackPointer(frame); retval = PyStackRef_FromPyObjectSteal(retval_o); }