Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-115419: Tidy up tier 2 optimizer. Merge peephole pass into main pass #117997

Merged
merged 1 commit into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 39 additions & 124 deletions Python/optimizer_analysis.c
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,30 @@ eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit)
}
}

/* _PUSH_FRAME/_POP_FRAME's operand can be 0, a PyFunctionObject *, or a
* PyCodeObject *. Retrieve the code object if possible.
*/
static PyCodeObject *
get_code(_PyUOpInstruction *op)
{
assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME);
PyCodeObject *co = NULL;
uint64_t operand = op->operand;
if (operand == 0) {
return NULL;
}
if (operand & 1) {
co = (PyCodeObject *)(operand & ~1);
}
else {
PyFunctionObject *func = (PyFunctionObject *)operand;
assert(PyFunction_Check(func));
co = (PyCodeObject *)func->func_code;
}
assert(PyCode_Check(co));
return co;
}

/* 1 for success, 0 for not ready, cannot error at the moment. */
static int
optimize_uops(
Expand All @@ -376,6 +400,10 @@ optimize_uops(
_Py_UOpsContext context;
_Py_UOpsContext *ctx = &context;
uint32_t opcode = UINT16_MAX;
int curr_space = 0;
int max_space = 0;
_PyUOpInstruction *first_valid_check_stack = NULL;
_PyUOpInstruction *corresponding_check_stack = NULL;

if (_Py_uop_abstractcontext_init(ctx) < 0) {
goto out_of_space;
Expand Down Expand Up @@ -416,8 +444,7 @@ optimize_uops(
ctx->frame->stack_pointer = stack_pointer;
assert(STACK_LEVEL() >= 0);
}
_Py_uop_abstractcontext_fini(ctx);
return trace_len;
Py_UNREACHABLE();

out_of_space:
DPRINTF(3, "\n");
Expand All @@ -443,9 +470,17 @@ optimize_uops(
_Py_uop_abstractcontext_fini(ctx);
return 0;
done:
/* Cannot optimize further, but there would be no benefit
* in retrying later */
/* Either reached the end or cannot optimize further, but there
* would be no benefit in retrying later */
_Py_uop_abstractcontext_fini(ctx);
if (first_valid_check_stack != NULL) {
assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE);
assert(max_space > 0);
assert(max_space <= INT_MAX);
assert(max_space <= INT32_MAX);
first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND;
first_valid_check_stack->operand = max_space;
}
return trace_len;
}

Expand Down Expand Up @@ -532,124 +567,6 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
Py_UNREACHABLE();
}

/* _PUSH_FRAME/_POP_FRAME's operand can be 0, a PyFunctionObject *, or a
* PyCodeObject *. Retrieve the code object if possible.
*/
static PyCodeObject *
get_co(_PyUOpInstruction *op)
{
assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME);
PyCodeObject *co = NULL;
uint64_t operand = op->operand;
if (operand == 0) {
return NULL;
}
if (operand & 1) {
co = (PyCodeObject *)(operand & ~1);
}
else {
PyFunctionObject *func = (PyFunctionObject *)operand;
assert(PyFunction_Check(func));
co = (PyCodeObject *)func->func_code;
}
assert(PyCode_Check(co));
return co;
}

static void
peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size)
{
PyCodeObject *co = _PyFrame_GetCode(frame);
int curr_space = 0;
int max_space = 0;
_PyUOpInstruction *first_valid_check_stack = NULL;
_PyUOpInstruction *corresponding_check_stack = NULL;
for (int pc = 0; pc < buffer_size; pc++) {
int opcode = buffer[pc].opcode;
switch(opcode) {
case _LOAD_CONST: {
assert(co != NULL);
PyObject *val = PyTuple_GET_ITEM(co->co_consts, buffer[pc].oparg);
buffer[pc].opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE;
buffer[pc].operand = (uintptr_t)val;
break;
}
case _CHECK_PEP_523: {
/* Setting the eval frame function invalidates
* all executors, so no need to check dynamically */
if (_PyInterpreterState_GET()->eval_frame == NULL) {
buffer[pc].opcode = _NOP;
}
break;
}
case _CHECK_STACK_SPACE: {
assert(corresponding_check_stack == NULL);
corresponding_check_stack = &buffer[pc];
break;
}
case _PUSH_FRAME: {
assert(corresponding_check_stack != NULL);
co = get_co(&buffer[pc]);
if (co == NULL) {
// should be about to _EXIT_TRACE anyway
goto finish;
}
int framesize = co->co_framesize;
assert(framesize > 0);
curr_space += framesize;
if (curr_space < 0 || curr_space > INT32_MAX) {
// won't fit in signed 32-bit int
goto finish;
}
max_space = curr_space > max_space ? curr_space : max_space;
if (first_valid_check_stack == NULL) {
first_valid_check_stack = corresponding_check_stack;
}
else {
// delete all but the first valid _CHECK_STACK_SPACE
corresponding_check_stack->opcode = _NOP;
}
corresponding_check_stack = NULL;
break;
}
case _POP_FRAME: {
assert(corresponding_check_stack == NULL);
assert(co != NULL);
int framesize = co->co_framesize;
assert(framesize > 0);
assert(framesize <= curr_space);
curr_space -= framesize;
co = get_co(&buffer[pc]);
if (co == NULL) {
// might be impossible, but bailing is still safe
goto finish;
}
break;
}
case _JUMP_TO_TOP:
case _EXIT_TRACE:
goto finish;
#ifdef Py_DEBUG
case _CHECK_STACK_SPACE_OPERAND: {
/* We should never see _CHECK_STACK_SPACE_OPERANDs.
* They are only created at the end of this pass. */
Py_UNREACHABLE();
}
#endif
}
}
Py_UNREACHABLE();
finish:
if (first_valid_check_stack != NULL) {
assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE);
assert(max_space > 0);
assert(max_space <= INT_MAX);
assert(max_space <= INT32_MAX);
first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND;
first_valid_check_stack->operand = max_space;
}
}

// 0 - failure, no error raised, just fall back to Tier 1
// -1 - failure, and raise error
// > 0 - length of optimized trace
Expand All @@ -669,8 +586,6 @@ _Py_uop_analyze_and_optimize(
return err;
}

peephole_opt(frame, buffer, length);

length = optimize_uops(
_PyFrame_GetCode(frame), buffer,
length, curr_stacklen, dependencies);
Expand Down
83 changes: 78 additions & 5 deletions Python/optimizer_bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,14 @@ optimize_to_bool(
_Py_UopsSymbol **result_ptr);

extern void
eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit)
eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit);

extern PyCodeObject *get_code(_PyUOpInstruction *op);

static int
dummy_func(void) {

PyCodeObject *code;
PyCodeObject *co;
int oparg;
_Py_UopsSymbol *flag;
_Py_UopsSymbol *left;
Expand All @@ -54,10 +56,15 @@ dummy_func(void) {
_Py_UopsSymbol *top;
_Py_UopsSymbol *bottom;
_Py_UOpsAbstractFrame *frame;
_Py_UOpsAbstractFrame *new_frame;
_Py_UOpsContext *ctx;
_PyUOpInstruction *this_instr;
_PyBloomFilter *dependencies;
int modified;
int curr_space;
int max_space;
_PyUOpInstruction *first_valid_check_stack;
_PyUOpInstruction *corresponding_check_stack;

// BEGIN BYTECODES //

Expand Down Expand Up @@ -393,9 +400,10 @@ dummy_func(void) {
}

op(_LOAD_CONST, (-- value)) {
// There should be no LOAD_CONST. It should be all
// replaced by peephole_opt.
Py_UNREACHABLE();
PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg);
int opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE;
REPLACE_OP(this_instr, opcode, 0, (uintptr_t)val);
OUT_OF_SPACE_IF_NULL(value = sym_new_const(ctx, val));
}

op(_LOAD_CONST_INLINE, (ptr/4 -- value)) {
Expand Down Expand Up @@ -590,6 +598,32 @@ dummy_func(void) {
frame_pop(ctx);
stack_pointer = ctx->frame->stack_pointer;
res = retval;

/* Stack space handling */
assert(corresponding_check_stack == NULL);
assert(co != NULL);
int framesize = co->co_framesize;
assert(framesize > 0);
assert(framesize <= curr_space);
curr_space -= framesize;

co = get_code(this_instr);
if (co == NULL) {
// might be impossible, but bailing is still safe
goto done;
}
}

op(_CHECK_STACK_SPACE, ( --)) {
assert(corresponding_check_stack == NULL);
corresponding_check_stack = this_instr;
}

op (_CHECK_STACK_SPACE_OPERAND, ( -- )) {
(void)framesize;
/* We should never see _CHECK_STACK_SPACE_OPERANDs.
* They are only created at the end of this pass. */
Py_UNREACHABLE();
}

op(_PUSH_FRAME, (new_frame: _Py_UOpsAbstractFrame * -- unused if (0))) {
Expand All @@ -598,6 +632,29 @@ dummy_func(void) {
ctx->frame = new_frame;
ctx->curr_frame_depth++;
stack_pointer = new_frame->stack_pointer;
co = get_code(this_instr);
if (co == NULL) {
// should be about to _EXIT_TRACE anyway
goto done;
}

/* Stack space handling */
int framesize = co->co_framesize;
assert(framesize > 0);
curr_space += framesize;
if (curr_space < 0 || curr_space > INT32_MAX) {
// won't fit in signed 32-bit int
goto done;
}
max_space = curr_space > max_space ? curr_space : max_space;
if (first_valid_check_stack == NULL) {
first_valid_check_stack = corresponding_check_stack;
}
else {
// delete all but the first valid _CHECK_STACK_SPACE
corresponding_check_stack->opcode = _NOP;
}
corresponding_check_stack = NULL;
}

op(_UNPACK_SEQUENCE, (seq -- values[oparg])) {
Expand Down Expand Up @@ -662,6 +719,22 @@ dummy_func(void) {
}
}

op(_CHECK_PEP_523, (--)) {
/* Setting the eval frame function invalidates
* all executors, so no need to check dynamically */
if (_PyInterpreterState_GET()->eval_frame == NULL) {
REPLACE_OP(this_instr, _NOP, 0 ,0);
}
}

op(_JUMP_TO_TOP, (--)) {
goto done;
}

op(_EXIT_TRACE, (--)) {
goto done;
}


// END BYTECODES //

Expand Down
Loading
Loading