diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 3344ede5e92c07..862796b6bb67f6 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1257,14 +1257,14 @@ extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256]; const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = { [BINARY_OP] = { .nuops = 1, .uops = { { _BINARY_OP, 0, 0 } } }, - [BINARY_OP_ADD_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_ADD_FLOAT, 0, 0 } } }, - [BINARY_OP_ADD_INT] = { .nuops = 2, .uops = { { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_ADD_INT, 0, 0 } } }, - [BINARY_OP_ADD_UNICODE] = { .nuops = 2, .uops = { { _GUARD_BOTH_UNICODE, 0, 0 }, { _BINARY_OP_ADD_UNICODE, 0, 0 } } }, - [BINARY_OP_INPLACE_ADD_UNICODE] = { .nuops = 2, .uops = { { _GUARD_BOTH_UNICODE, 0, 0 }, { _BINARY_OP_INPLACE_ADD_UNICODE, 0, 0 } } }, - [BINARY_OP_MULTIPLY_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_MULTIPLY_FLOAT, 0, 0 } } }, - [BINARY_OP_MULTIPLY_INT] = { .nuops = 2, .uops = { { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_MULTIPLY_INT, 0, 0 } } }, - [BINARY_OP_SUBTRACT_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_SUBTRACT_FLOAT, 0, 0 } } }, - [BINARY_OP_SUBTRACT_INT] = { .nuops = 2, .uops = { { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_SUBTRACT_INT, 0, 0 } } }, + [BINARY_OP_ADD_FLOAT] = { .nuops = 3, .uops = { { _NOP, 0, 0 }, { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_ADD_FLOAT, 0, 0 } } }, + [BINARY_OP_ADD_INT] = { .nuops = 3, .uops = { { _NOP, 0, 0 }, { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_ADD_INT, 0, 0 } } }, + [BINARY_OP_ADD_UNICODE] = { .nuops = 3, .uops = { { _NOP, 0, 0 }, { _GUARD_BOTH_UNICODE, 0, 0 }, { _BINARY_OP_ADD_UNICODE, 0, 0 } } }, + [BINARY_OP_INPLACE_ADD_UNICODE] = { .nuops = 4, .uops = { { _NOP, 0, 0 }, { _NOP, 0, 0 }, { _GUARD_BOTH_UNICODE, 0, 0 }, { _BINARY_OP_INPLACE_ADD_UNICODE, 0, 0 } } }, + [BINARY_OP_MULTIPLY_FLOAT] = { .nuops = 3, .uops = { { _NOP, 0, 0 }, { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_MULTIPLY_FLOAT, 0, 0 } } }, + [BINARY_OP_MULTIPLY_INT] = { .nuops = 3, .uops = { { _NOP, 0, 0 }, { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_MULTIPLY_INT, 0, 0 } } }, + [BINARY_OP_SUBTRACT_FLOAT] = { .nuops = 3, .uops = { { _NOP, 0, 0 }, { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_SUBTRACT_FLOAT, 0, 0 } } }, + [BINARY_OP_SUBTRACT_INT] = { .nuops = 3, .uops = { { _NOP, 0, 0 }, { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_SUBTRACT_INT, 0, 0 } } }, [BINARY_SLICE] = { .nuops = 1, .uops = { { _BINARY_SLICE, 0, 0 } } }, [BINARY_SUBSCR] = { .nuops = 1, .uops = { { _BINARY_SUBSCR, 0, 0 } } }, [BINARY_SUBSCR_DICT] = { .nuops = 1, .uops = { { _BINARY_SUBSCR_DICT, 0, 0 } } }, diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index f92c0a0cddf906..ce391da7aa4724 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -76,6 +76,7 @@ typedef struct _PyExecutorObject { size_t jit_size; void *jit_code; void *jit_side_entry; + PyObject *refs; _PyExitData exits[1]; } _PyExecutorObject; @@ -144,7 +145,7 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp); int _Py_uop_analyze_and_optimize(struct _PyInterpreterFrame *frame, _PyUOpInstruction *trace, int trace_len, int curr_stackentries, - _PyBloomFilter *dependencies); + _PyBloomFilter *dependencies, PyObject *new_refs); extern PyTypeObject _PyCounterExecutor_Type; extern PyTypeObject _PyCounterOptimizer_Type; diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index f1ab72180d714d..8df6355f28e17c 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -991,7 +991,8 @@ def testfunc(n): self.assertIsNotNone(ex) uops = get_opnames(ex) self.assertNotIn("_GUARD_BOTH_INT", uops) - self.assertIn("_BINARY_OP_ADD_INT", uops) + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + self.assertIn("_LOAD_CONST_INLINE_BORROW", uops) # Try again, but between the runs, set the global to a float. # This should result in no executor the second time. ns = {} diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-30-15-23-16.gh-issue-115506.NpDku1.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-30-15-23-16.gh-issue-115506.NpDku1.rst new file mode 100644 index 00000000000000..daf4edae4aee67 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-30-15-23-16.gh-issue-115506.NpDku1.rst @@ -0,0 +1 @@ +Improve constant propagation and folding in JIT-compiled code. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8535306d9c7a03..7d2bfaa5e28751 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -495,11 +495,11 @@ dummy_func( } macro(BINARY_OP_MULTIPLY_INT) = - _GUARD_BOTH_INT + unused/1 + _BINARY_OP_MULTIPLY_INT; + NOP + _GUARD_BOTH_INT + unused/1 + _BINARY_OP_MULTIPLY_INT; macro(BINARY_OP_ADD_INT) = - _GUARD_BOTH_INT + unused/1 + _BINARY_OP_ADD_INT; + NOP + _GUARD_BOTH_INT + unused/1 + _BINARY_OP_ADD_INT; macro(BINARY_OP_SUBTRACT_INT) = - _GUARD_BOTH_INT + unused/1 + _BINARY_OP_SUBTRACT_INT; + NOP + _GUARD_BOTH_INT + unused/1 + _BINARY_OP_SUBTRACT_INT; op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); @@ -558,11 +558,11 @@ dummy_func( } macro(BINARY_OP_MULTIPLY_FLOAT) = - _GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_MULTIPLY_FLOAT; + NOP + _GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_MULTIPLY_FLOAT; macro(BINARY_OP_ADD_FLOAT) = - _GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_ADD_FLOAT; + NOP + _GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_ADD_FLOAT; macro(BINARY_OP_SUBTRACT_FLOAT) = - _GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_SUBTRACT_FLOAT; + NOP + _GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_SUBTRACT_FLOAT; op(_GUARD_BOTH_UNICODE, (left, right -- left, right)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); @@ -585,7 +585,7 @@ dummy_func( } macro(BINARY_OP_ADD_UNICODE) = - _GUARD_BOTH_UNICODE + unused/1 + _BINARY_OP_ADD_UNICODE; + NOP + _GUARD_BOTH_UNICODE + unused/1 + _BINARY_OP_ADD_UNICODE; // This is a subtle one. It's a super-instruction for // BINARY_OP_ADD_UNICODE followed by STORE_FAST @@ -634,7 +634,7 @@ dummy_func( } macro(BINARY_OP_INPLACE_ADD_UNICODE) = - _GUARD_BOTH_UNICODE + unused/1 + _BINARY_OP_INPLACE_ADD_UNICODE; + NOP + NOP + _GUARD_BOTH_UNICODE + unused/1 + _BINARY_OP_INPLACE_ADD_UNICODE; family(BINARY_SUBSCR, INLINE_CACHE_ENTRIES_BINARY_SUBSCR) = { BINARY_SUBSCR_DICT, diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 1201fe82efb919..5475530f617da0 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -62,6 +62,9 @@ _PyStackRef left; _PyStackRef right; _PyStackRef res; + // _NOP + { + } // _GUARD_BOTH_FLOAT right = stack_pointer[-1]; left = stack_pointer[-2]; @@ -98,6 +101,9 @@ _PyStackRef left; _PyStackRef right; _PyStackRef res; + // _NOP + { + } // _GUARD_BOTH_INT right = stack_pointer[-1]; left = stack_pointer[-2]; @@ -133,6 +139,9 @@ _PyStackRef left; _PyStackRef right; _PyStackRef res; + // _NOP + { + } // _GUARD_BOTH_UNICODE right = stack_pointer[-1]; left = stack_pointer[-2]; @@ -167,6 +176,12 @@ static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 1, "incorrect cache size"); _PyStackRef left; _PyStackRef right; + // _NOP + { + } + // _NOP + { + } // _GUARD_BOTH_UNICODE right = stack_pointer[-1]; left = stack_pointer[-2]; @@ -229,6 +244,9 @@ _PyStackRef left; _PyStackRef right; _PyStackRef res; + // _NOP + { + } // _GUARD_BOTH_FLOAT right = stack_pointer[-1]; left = stack_pointer[-2]; @@ -265,6 +283,9 @@ _PyStackRef left; _PyStackRef right; _PyStackRef res; + // _NOP + { + } // _GUARD_BOTH_INT right = stack_pointer[-1]; left = stack_pointer[-2]; @@ -300,6 +321,9 @@ _PyStackRef left; _PyStackRef right; _PyStackRef res; + // _NOP + { + } // _GUARD_BOTH_FLOAT right = stack_pointer[-1]; left = stack_pointer[-2]; @@ -336,6 +360,9 @@ _PyStackRef left; _PyStackRef right; _PyStackRef res; + // _NOP + { + } // _GUARD_BOTH_INT right = stack_pointer[-1]; left = stack_pointer[-2]; diff --git a/Python/optimizer.c b/Python/optimizer.c index 978649faa04d45..4b405b0b818c08 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -257,6 +257,7 @@ uop_dealloc(_PyExecutorObject *self) { _PyObject_GC_UNTRACK(self); assert(self->vm_data.code == NULL); unlink_executor(self); + Py_CLEAR(self->refs); #ifdef _Py_JIT _PyJIT_Free(self); #endif @@ -360,6 +361,7 @@ static int executor_traverse(PyObject *o, visitproc visit, void *arg) { _PyExecutorObject *executor = (_PyExecutorObject *)o; + Py_VISIT(executor->refs); for (uint32_t i = 0; i < executor->exit_count; i++) { Py_VISIT(executor->exits[i].executor); } @@ -1066,6 +1068,7 @@ allocate_executor(int exit_count, int length) res->trace = (_PyUOpInstruction *)(res->exits + exit_count); res->code_size = length; res->exit_count = exit_count; + res->refs = NULL; return res; } @@ -1247,12 +1250,19 @@ uop_optimize( } assert(length < UOP_MAX_TRACE_LENGTH); OPT_STAT_INC(traces_created); + // These are any references that were created during optimization, and need + // to be kept alive until we build the executor's refs tuple: + PyObject *new_refs = PyList_New(0); + if (new_refs == NULL) { + return -1; + } char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE"); if (env_var == NULL || *env_var == '\0' || *env_var > '0') { length = _Py_uop_analyze_and_optimize(frame, buffer, length, - curr_stackentries, &dependencies); + curr_stackentries, &dependencies, new_refs); if (length <= 0) { + Py_DECREF(new_refs); return length; } } @@ -1274,13 +1284,39 @@ uop_optimize( assert(_PyOpcode_uop_name[buffer[pc].opcode]); assert(strncmp(_PyOpcode_uop_name[buffer[pc].opcode], _PyOpcode_uop_name[opcode], strlen(_PyOpcode_uop_name[opcode])) == 0); } + // We *might* want to de-duplicate these. In addition to making sure we do + // so in a way that preserves "equal" constants with different types (see + // _PyCode_ConstantKey), we *also* need to be careful to compare unknown + // objects by identity, since we don't want to invoke arbitary code in a + // __hash__/__eq__ implementation. It might be more trouble than it's worth: + int refs_needed = 0; + for (int i = 0; i < length; i++) { + if (buffer[i].opcode == _LOAD_CONST_INLINE) { + refs_needed++; + } + } + PyObject *refs = PyTuple_New(refs_needed); + if (refs == NULL) { + Py_DECREF(new_refs); + return -1; + } + int j = 0; + for (int i = 0; i < length; i++) { + if (buffer[i].opcode == _LOAD_CONST_INLINE) { + PyTuple_SET_ITEM(refs, j++, Py_NewRef(buffer[i].operand)); + } + } + Py_DECREF(new_refs); + assert(j == refs_needed); OPT_HIST(effective_trace_length(buffer, length), optimized_trace_length_hist); length = prepare_for_execution(buffer, length); assert(length <= UOP_MAX_TRACE_LENGTH); _PyExecutorObject *executor = make_executor_from_uops(buffer, length, &dependencies); if (executor == NULL) { + Py_DECREF(refs); return -1; } + executor->refs = refs; assert(length <= UOP_MAX_TRACE_LENGTH); *exec_ptr = executor; return 1; @@ -1584,6 +1620,7 @@ executor_clear(_PyExecutorObject *executor) * free the executor unless we hold a strong reference to it */ Py_INCREF(executor); + Py_CLEAR(executor->refs); for (uint32_t i = 0; i < executor->exit_count; i++) { executor->exits[i].temperature = initial_unreachable_backoff_counter(); Py_CLEAR(executor->exits[i].executor); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index b202b58a8b7214..0177ec1b56586d 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -300,10 +300,20 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define GETLOCAL(idx) ((ctx->frame->locals[idx])) -#define REPLACE_OP(INST, OP, ARG, OPERAND) \ - INST->opcode = OP; \ - INST->oparg = ARG; \ - INST->operand = OPERAND; +#define REPLACE_OP(INST, OP, ARG, OPERAND) \ + do { \ + (INST)->opcode = (OP); \ + (INST)->oparg = (ARG); \ + (INST)->operand = (OPERAND); \ + } while (0) + +#define REPLACE_OP_WITH_LOAD_CONST(INST, CONST) \ + do { \ + PyObject *o = (CONST); \ + int opcode = _Py_IsImmortal(o) ? _LOAD_CONST_INLINE_BORROW \ + : _LOAD_CONST_INLINE; \ + REPLACE_OP((INST), opcode, 0, (uintptr_t)o); \ + } while (0) /* Shortened forms for convenience, used in optimizer_bytecodes.c */ #define sym_is_not_null _Py_uop_sym_is_not_null @@ -392,7 +402,8 @@ optimize_uops( _PyUOpInstruction *trace, int trace_len, int curr_stacklen, - _PyBloomFilter *dependencies + _PyBloomFilter *dependencies, + PyObject *new_refs ) { @@ -524,6 +535,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) last_set_ip = pc; break; case _POP_TOP: + case _POP_TOP_LOAD_CONST_INLINE_BORROW: { _PyUOpInstruction *last = &buffer[pc-1]; while (last->opcode == _NOP) { @@ -535,9 +547,14 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) last->opcode == _COPY ) { last->opcode = _NOP; - buffer[pc].opcode = _NOP; + if (buffer[pc].opcode == _POP_TOP_LOAD_CONST_INLINE_BORROW) { + buffer[pc].opcode = _LOAD_CONST_INLINE_BORROW; + } + else { + buffer[pc].opcode = _NOP; + } } - if (last->opcode == _REPLACE_WITH_TRUE) { + if (last->opcode == _POP_TOP_LOAD_CONST_INLINE_BORROW) { last->opcode = _NOP; } break; @@ -580,7 +597,8 @@ _Py_uop_analyze_and_optimize( _PyUOpInstruction *buffer, int length, int curr_stacklen, - _PyBloomFilter *dependencies + _PyBloomFilter *dependencies, + PyObject *new_refs ) { OPT_STAT_INC(optimizer_attempts); @@ -592,7 +610,7 @@ _Py_uop_analyze_and_optimize( length = optimize_uops( _PyFrame_GetCode(frame), buffer, - length, curr_stacklen, dependencies); + length, curr_stacklen, dependencies, new_refs); if (length <= 0) { return length; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 9a1b9da52f4bb5..66efaf165dd53e 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -63,6 +63,7 @@ dummy_func(void) { _Py_UOpsContext *ctx; _PyUOpInstruction *this_instr; _PyBloomFilter *dependencies; + PyObject *new_refs; int modified; int curr_space; int max_space; @@ -81,6 +82,9 @@ dummy_func(void) { op(_LOAD_FAST, (-- value)) { value = GETLOCAL(oparg); + if (sym_is_const(value)) { + REPLACE_OP_WITH_LOAD_CONST(this_instr, sym_get_const(value)); + } } op(_LOAD_FAST_AND_CLEAR, (-- value)) { @@ -193,13 +197,15 @@ dummy_func(void) { assert(PyLong_CheckExact(sym_get_const(right))); PyObject *temp = _PyLong_Add((PyLongObject *)sym_get_const(left), (PyLongObject *)sym_get_const(right)); - if (temp == NULL) { + if (temp == NULL || _PyList_AppendTakeRef((PyListObject *)new_refs, temp)) { goto error; } + assert(this_instr[-2].opcode == _NOP); + assert(this_instr[-1].opcode == _NOP); + REPLACE_OP(this_instr - 2, _POP_TOP, 0, 0); + REPLACE_OP(this_instr - 1, _POP_TOP, 0, 0); + REPLACE_OP_WITH_LOAD_CONST(this_instr, temp); res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! } else { res = sym_new_type(ctx, &PyLong_Type); @@ -214,13 +220,15 @@ dummy_func(void) { assert(PyLong_CheckExact(sym_get_const(right))); PyObject *temp = _PyLong_Subtract((PyLongObject *)sym_get_const(left), (PyLongObject *)sym_get_const(right)); - if (temp == NULL) { + if (temp == NULL || _PyList_AppendTakeRef((PyListObject *)new_refs, temp)) { goto error; } + assert(this_instr[-2].opcode == _NOP); + assert(this_instr[-1].opcode == _NOP); + REPLACE_OP(this_instr - 2, _POP_TOP, 0, 0); + REPLACE_OP(this_instr - 1, _POP_TOP, 0, 0); + REPLACE_OP_WITH_LOAD_CONST(this_instr, temp); res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! } else { res = sym_new_type(ctx, &PyLong_Type); @@ -235,13 +243,15 @@ dummy_func(void) { assert(PyLong_CheckExact(sym_get_const(right))); PyObject *temp = _PyLong_Multiply((PyLongObject *)sym_get_const(left), (PyLongObject *)sym_get_const(right)); - if (temp == NULL) { + if (temp == NULL || _PyList_AppendTakeRef((PyListObject *)new_refs, temp)) { goto error; } + assert(this_instr[-2].opcode == _NOP); + assert(this_instr[-1].opcode == _NOP); + REPLACE_OP(this_instr - 2, _POP_TOP, 0, 0); + REPLACE_OP(this_instr - 1, _POP_TOP, 0, 0); + REPLACE_OP_WITH_LOAD_CONST(this_instr, temp); res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! } else { res = sym_new_type(ctx, &PyLong_Type); @@ -257,13 +267,15 @@ dummy_func(void) { PyObject *temp = PyFloat_FromDouble( PyFloat_AS_DOUBLE(sym_get_const(left)) + PyFloat_AS_DOUBLE(sym_get_const(right))); - if (temp == NULL) { + if (temp == NULL || _PyList_AppendTakeRef((PyListObject *)new_refs, temp)) { goto error; } + assert(this_instr[-2].opcode == _NOP); + assert(this_instr[-1].opcode == _NOP); + REPLACE_OP(this_instr - 2, _POP_TOP, 0, 0); + REPLACE_OP(this_instr - 1, _POP_TOP, 0, 0); + REPLACE_OP_WITH_LOAD_CONST(this_instr, temp); res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! } else { res = sym_new_type(ctx, &PyFloat_Type); @@ -279,13 +291,15 @@ dummy_func(void) { PyObject *temp = PyFloat_FromDouble( PyFloat_AS_DOUBLE(sym_get_const(left)) - PyFloat_AS_DOUBLE(sym_get_const(right))); - if (temp == NULL) { + if (temp == NULL || _PyList_AppendTakeRef((PyListObject *)new_refs, temp)) { goto error; } + assert(this_instr[-2].opcode == _NOP); + assert(this_instr[-1].opcode == _NOP); + REPLACE_OP(this_instr - 2, _POP_TOP, 0, 0); + REPLACE_OP(this_instr - 1, _POP_TOP, 0, 0); + REPLACE_OP_WITH_LOAD_CONST(this_instr, temp); res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! } else { res = sym_new_type(ctx, &PyFloat_Type); @@ -301,13 +315,15 @@ dummy_func(void) { PyObject *temp = PyFloat_FromDouble( PyFloat_AS_DOUBLE(sym_get_const(left)) * PyFloat_AS_DOUBLE(sym_get_const(right))); - if (temp == NULL) { + if (temp == NULL || _PyList_AppendTakeRef((PyListObject *)new_refs, temp)) { goto error; } + assert(this_instr[-2].opcode == _NOP); + assert(this_instr[-1].opcode == _NOP); + REPLACE_OP(this_instr - 2, _POP_TOP, 0, 0); + REPLACE_OP(this_instr - 1, _POP_TOP, 0, 0); + REPLACE_OP_WITH_LOAD_CONST(this_instr, temp); res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! } else { res = sym_new_type(ctx, &PyFloat_Type); @@ -318,15 +334,42 @@ dummy_func(void) { if (sym_is_const(left) && sym_is_const(right) && sym_matches_type(left, &PyUnicode_Type) && sym_matches_type(right, &PyUnicode_Type)) { PyObject *temp = PyUnicode_Concat(sym_get_const(left), sym_get_const(right)); - if (temp == NULL) { + if (temp == NULL || _PyList_AppendTakeRef((PyListObject *)new_refs, temp)) { + goto error; + } + assert(this_instr[-2].opcode == _NOP); + assert(this_instr[-1].opcode == _NOP); + REPLACE_OP(this_instr - 2, _POP_TOP, 0, 0); + REPLACE_OP(this_instr - 1, _POP_TOP, 0, 0); + REPLACE_OP_WITH_LOAD_CONST(this_instr, temp); + res = sym_new_const(ctx, temp); + } + else { + res = sym_new_type(ctx, &PyUnicode_Type); + } + } + + op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) { + _Py_UopsSymbol *res; + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyUnicode_Type) && sym_matches_type(right, &PyUnicode_Type)) { + PyObject *temp = PyUnicode_Concat(sym_get_const(left), sym_get_const(right)); + if (temp == NULL || _PyList_AppendTakeRef((PyListObject *)new_refs, temp)) { goto error; } + assert(this_instr[-3].opcode == _NOP); + assert(this_instr[-2].opcode == _NOP); + assert(this_instr[-1].opcode == _NOP); + REPLACE_OP(this_instr - 3, _POP_TOP, 0, 0); + REPLACE_OP(this_instr - 2, _POP_TOP, 0, 0); + REPLACE_OP_WITH_LOAD_CONST(this_instr - 1, temp); + REPLACE_OP(this_instr, _STORE_FAST, this_instr->operand, 0); res = sym_new_const(ctx, temp); - Py_DECREF(temp); } else { res = sym_new_type(ctx, &PyUnicode_Type); } + GETLOCAL(this_instr->operand) = res; } op(_BINARY_SUBSCR_INIT_CALL, (container, sub -- new_frame: _Py_UOpsAbstractFrame *)) { @@ -420,8 +463,7 @@ dummy_func(void) { op(_LOAD_CONST, (-- value)) { PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); - int opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; - REPLACE_OP(this_instr, opcode, 0, (uintptr_t)val); + REPLACE_OP_WITH_LOAD_CONST(this_instr, val); value = sym_new_const(ctx, val); } @@ -444,6 +486,9 @@ dummy_func(void) { } op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { + if (sym_is_const(bottom)) { + REPLACE_OP_WITH_LOAD_CONST(this_instr, sym_get_const(bottom)); + } assert(oparg > 0); top = bottom; } @@ -832,6 +877,12 @@ dummy_func(void) { self_or_null = sym_new_unknown(ctx); } + op(_REPLACE_WITH_TRUE, (value -- res)) { + (void)value; + REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)Py_True); + res = sym_new_const(ctx, Py_True); + } + op(_JUMP_TO_TOP, (--)) { ctx->done = true; } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 4d172e3c762704..78306c7d0aaf06 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -39,6 +39,9 @@ case _LOAD_FAST: { _Py_UopsSymbol *value; value = GETLOCAL(oparg); + if (sym_is_const(value)) { + REPLACE_OP_WITH_LOAD_CONST(this_instr, sym_get_const(value)); + } stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -59,8 +62,7 @@ case _LOAD_CONST: { _Py_UopsSymbol *value; PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg); - int opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; - REPLACE_OP(this_instr, opcode, 0, (uintptr_t)val); + REPLACE_OP_WITH_LOAD_CONST(this_instr, val); value = sym_new_const(ctx, val); stack_pointer[0] = value; stack_pointer += 1; @@ -187,8 +189,12 @@ } case _REPLACE_WITH_TRUE: { + _Py_UopsSymbol *value; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); + value = stack_pointer[-1]; + (void)value; + REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)Py_True); + res = sym_new_const(ctx, Py_True); stack_pointer[-1] = res; break; } @@ -244,13 +250,15 @@ assert(PyLong_CheckExact(sym_get_const(right))); PyObject *temp = _PyLong_Multiply((PyLongObject *)sym_get_const(left), (PyLongObject *)sym_get_const(right)); - if (temp == NULL) { + if (temp == NULL || _PyList_AppendTakeRef((PyListObject *)new_refs, temp)) { goto error; } + assert(this_instr[-2].opcode == _NOP); + assert(this_instr[-1].opcode == _NOP); + REPLACE_OP(this_instr - 2, _POP_TOP, 0, 0); + REPLACE_OP(this_instr - 1, _POP_TOP, 0, 0); + REPLACE_OP_WITH_LOAD_CONST(this_instr, temp); res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! } else { res = sym_new_type(ctx, &PyLong_Type); @@ -274,13 +282,15 @@ assert(PyLong_CheckExact(sym_get_const(right))); PyObject *temp = _PyLong_Add((PyLongObject *)sym_get_const(left), (PyLongObject *)sym_get_const(right)); - if (temp == NULL) { + if (temp == NULL || _PyList_AppendTakeRef((PyListObject *)new_refs, temp)) { goto error; } + assert(this_instr[-2].opcode == _NOP); + assert(this_instr[-1].opcode == _NOP); + REPLACE_OP(this_instr - 2, _POP_TOP, 0, 0); + REPLACE_OP(this_instr - 1, _POP_TOP, 0, 0); + REPLACE_OP_WITH_LOAD_CONST(this_instr, temp); res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! } else { res = sym_new_type(ctx, &PyLong_Type); @@ -304,13 +314,15 @@ assert(PyLong_CheckExact(sym_get_const(right))); PyObject *temp = _PyLong_Subtract((PyLongObject *)sym_get_const(left), (PyLongObject *)sym_get_const(right)); - if (temp == NULL) { + if (temp == NULL || _PyList_AppendTakeRef((PyListObject *)new_refs, temp)) { goto error; } + assert(this_instr[-2].opcode == _NOP); + assert(this_instr[-1].opcode == _NOP); + REPLACE_OP(this_instr - 2, _POP_TOP, 0, 0); + REPLACE_OP(this_instr - 1, _POP_TOP, 0, 0); + REPLACE_OP_WITH_LOAD_CONST(this_instr, temp); res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! } else { res = sym_new_type(ctx, &PyLong_Type); @@ -366,13 +378,15 @@ PyObject *temp = PyFloat_FromDouble( PyFloat_AS_DOUBLE(sym_get_const(left)) * PyFloat_AS_DOUBLE(sym_get_const(right))); - if (temp == NULL) { + if (temp == NULL || _PyList_AppendTakeRef((PyListObject *)new_refs, temp)) { goto error; } + assert(this_instr[-2].opcode == _NOP); + assert(this_instr[-1].opcode == _NOP); + REPLACE_OP(this_instr - 2, _POP_TOP, 0, 0); + REPLACE_OP(this_instr - 1, _POP_TOP, 0, 0); + REPLACE_OP_WITH_LOAD_CONST(this_instr, temp); res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! } else { res = sym_new_type(ctx, &PyFloat_Type); @@ -397,13 +411,15 @@ PyObject *temp = PyFloat_FromDouble( PyFloat_AS_DOUBLE(sym_get_const(left)) + PyFloat_AS_DOUBLE(sym_get_const(right))); - if (temp == NULL) { + if (temp == NULL || _PyList_AppendTakeRef((PyListObject *)new_refs, temp)) { goto error; } + assert(this_instr[-2].opcode == _NOP); + assert(this_instr[-1].opcode == _NOP); + REPLACE_OP(this_instr - 2, _POP_TOP, 0, 0); + REPLACE_OP(this_instr - 1, _POP_TOP, 0, 0); + REPLACE_OP_WITH_LOAD_CONST(this_instr, temp); res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! } else { res = sym_new_type(ctx, &PyFloat_Type); @@ -428,13 +444,15 @@ PyObject *temp = PyFloat_FromDouble( PyFloat_AS_DOUBLE(sym_get_const(left)) - PyFloat_AS_DOUBLE(sym_get_const(right))); - if (temp == NULL) { + if (temp == NULL || _PyList_AppendTakeRef((PyListObject *)new_refs, temp)) { goto error; } + assert(this_instr[-2].opcode == _NOP); + assert(this_instr[-1].opcode == _NOP); + REPLACE_OP(this_instr - 2, _POP_TOP, 0, 0); + REPLACE_OP(this_instr - 1, _POP_TOP, 0, 0); + REPLACE_OP_WITH_LOAD_CONST(this_instr, temp); res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! } else { res = sym_new_type(ctx, &PyFloat_Type); @@ -468,11 +486,15 @@ if (sym_is_const(left) && sym_is_const(right) && sym_matches_type(left, &PyUnicode_Type) && sym_matches_type(right, &PyUnicode_Type)) { PyObject *temp = PyUnicode_Concat(sym_get_const(left), sym_get_const(right)); - if (temp == NULL) { + if (temp == NULL || _PyList_AppendTakeRef((PyListObject *)new_refs, temp)) { goto error; } + assert(this_instr[-2].opcode == _NOP); + assert(this_instr[-1].opcode == _NOP); + REPLACE_OP(this_instr - 2, _POP_TOP, 0, 0); + REPLACE_OP(this_instr - 1, _POP_TOP, 0, 0); + REPLACE_OP_WITH_LOAD_CONST(this_instr, temp); res = sym_new_const(ctx, temp); - Py_DECREF(temp); } else { res = sym_new_type(ctx, &PyUnicode_Type); @@ -484,6 +506,30 @@ } case _BINARY_OP_INPLACE_ADD_UNICODE: { + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + _Py_UopsSymbol *res; + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyUnicode_Type) && sym_matches_type(right, &PyUnicode_Type)) { + PyObject *temp = PyUnicode_Concat(sym_get_const(left), sym_get_const(right)); + if (temp == NULL || _PyList_AppendTakeRef((PyListObject *)new_refs, temp)) { + goto error; + } + assert(this_instr[-3].opcode == _NOP); + assert(this_instr[-2].opcode == _NOP); + assert(this_instr[-1].opcode == _NOP); + REPLACE_OP(this_instr - 3, _POP_TOP, 0, 0); + REPLACE_OP(this_instr - 2, _POP_TOP, 0, 0); + REPLACE_OP_WITH_LOAD_CONST(this_instr - 1, temp); + REPLACE_OP(this_instr, _STORE_FAST, this_instr->operand, 0); + res = sym_new_const(ctx, temp); + } + else { + res = sym_new_type(ctx, &PyUnicode_Type); + } + GETLOCAL(this_instr->operand) = res; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); break; @@ -2155,6 +2201,9 @@ _Py_UopsSymbol *bottom; _Py_UopsSymbol *top; bottom = stack_pointer[-1 - (oparg-1)]; + if (sym_is_const(bottom)) { + REPLACE_OP_WITH_LOAD_CONST(this_instr, sym_get_const(bottom)); + } assert(oparg > 0); top = bottom; stack_pointer[0] = top;