Skip to content

gh-126835: Set location for noped out instructions after constant folding in CFG. #130109

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions Lib/test/test_peepholer.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,83 @@ def test_folding_subscript(self):
self.assertInBytecode(code, 'BINARY_OP', argval=subscr_argval)
self.check_lnotab(code)

def test_constant_folding_remove_nop_location(self):
sources = [
"""
(-
-
-
1)
""",

"""
(1
+
2
+
3)
""",

"""
(1,
2,
3)[0]
""",

"""
[1,
2,
3]
""",

"""
{1,
2,
3}
""",

"""
1 in [
1,
2,
3
]
""",

"""
1 in {
1,
2,
3
}
""",

"""
for _ in [1,
2,
3]:
pass
""",

"""
for _ in [1,
2,
x]:
pass
""",

"""
for _ in {1,
2,
3}:
pass
"""
]

for source in sources:
code = compile(textwrap.dedent(source), '', 'single')
self.assertNotInBytecode(code, 'NOP')

def test_in_literal_list(self):
def containtest():
return x in [a, b]
Expand Down
85 changes: 31 additions & 54 deletions Python/flowgraph.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,12 @@ is_jump(cfg_instr *i)
_instr__ptr_->i_oparg = 0; \
} while (0);

#define INSTR_SET_LOC(I, LOC) \
do { \
cfg_instr *_instr__ptr_ = (I); \
_instr__ptr_->i_loc = (LOC); \
} while (0);

/***** Blocks *****/

/* Returns the offset of the next instruction in the current block's
Expand Down Expand Up @@ -1382,18 +1388,20 @@ get_constant_sequence(basicblock *bb, int start, int size,

/*
Walk basic block backwards starting from "start" and change "count" number of
non-NOP instructions to NOP's.
non-NOP instructions to NOP's and set their location to NO_LOCATION.
*/
static void
nop_out(basicblock *bb, int start, int count)
{
assert(start < bb->b_iused);
for (; count > 0; start--) {
assert(start >= 0);
if (bb->b_instr[start].i_opcode == NOP) {
cfg_instr *instr = &bb->b_instr[start];
if (instr->i_opcode == NOP) {
continue;
}
INSTR_SET_OP0(&bb->b_instr[start], NOP);
INSTR_SET_OP0(instr, NOP);
INSTR_SET_LOC(instr, NO_LOCATION);
count--;
}
}
Expand Down Expand Up @@ -1423,7 +1431,7 @@ fold_tuple_of_constants(basicblock *bb, int n, PyObject *consts, PyObject *const
int index = add_const(newconst, consts, const_cache);
RETURN_IF_ERROR(index);
nop_out(bb, n-1, seq_size);
INSTR_SET_OP1(&bb->b_instr[n], LOAD_CONST, index);
INSTR_SET_OP1(instr, LOAD_CONST, index);
return SUCCESS;
}

Expand Down Expand Up @@ -1479,40 +1487,16 @@ optimize_lists_and_sets(basicblock *bb, int i, int nextop,
else {
assert(i >= 2);
assert(instr->i_opcode == BUILD_LIST || instr->i_opcode == BUILD_SET);

INSTR_SET_LOC(&bb->b_instr[i-2], instr->i_loc);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we know for sure that the locations of instructions i-1 and i-2 are not already set here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure, but this ensures that new BUILD_LIST/BUILD_SET start from the original line.

Copy link
Contributor Author

@WolframAlph WolframAlph Feb 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Chances are we have some bugs with line numbers after optimizations but tests did not catch that. I would say we revisit this in near future. I am not sure we should spend too much time on it now.


INSTR_SET_OP1(&bb->b_instr[i-2], instr->i_opcode, 0);
INSTR_SET_OP1(&bb->b_instr[i-1], LOAD_CONST, index);
INSTR_SET_OP1(&bb->b_instr[i], instr->i_opcode == BUILD_LIST ? LIST_EXTEND : SET_UPDATE, 1);
}
return SUCCESS;
}

/*
Walk basic block backwards starting from "start" to collect instruction pair
that loads consts skipping NOP's in between.
*/
static bool
find_load_const_pair(basicblock *bb, int start, cfg_instr **first, cfg_instr **second)
{
cfg_instr *second_load_const = NULL;
while (start >= 0) {
cfg_instr *inst = &bb->b_instr[start--];
if (inst->i_opcode == NOP) {
continue;
}
if (!loads_const(inst->i_opcode)) {
return false;
}
if (second_load_const == NULL) {
second_load_const = inst;
continue;
}
*first = inst;
*second = second_load_const;
return true;
}
return false;
}

/* Determine opcode & oparg for freshly folded constant. */
static int
newop_from_folded(PyObject *newconst, PyObject *consts,
Expand All @@ -1534,27 +1518,25 @@ newop_from_folded(PyObject *newconst, PyObject *consts,
}

static int
optimize_if_const_op(basicblock *bb, int n, PyObject *consts, PyObject *const_cache)
optimize_if_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
{
cfg_instr *subscr = &bb->b_instr[n];
assert(subscr->i_opcode == BINARY_OP);
if (subscr->i_oparg != NB_SUBSCR) {
cfg_instr *binop = &bb->b_instr[i];
assert(binop->i_opcode == BINARY_OP);
if (binop->i_oparg != NB_SUBSCR) {
/* TODO: support other binary ops */
return SUCCESS;
}
cfg_instr *arg, *idx;
if (!find_load_const_pair(bb, n-1, &arg, &idx)) {
PyObject *pair;
RETURN_IF_ERROR(get_constant_sequence(bb, i-1, 2, consts, &pair));
if (pair == NULL) {
return SUCCESS;
}
PyObject *o = NULL, *key = NULL;
if ((o = get_const_value(arg->i_opcode, arg->i_oparg, consts)) == NULL
|| (key = get_const_value(idx->i_opcode, idx->i_oparg, consts)) == NULL)
{
goto error;
}
PyObject *newconst = PyObject_GetItem(o, key);
Py_DECREF(o);
Py_DECREF(key);
assert(PyTuple_CheckExact(pair) && PyTuple_Size(pair) == 2);
PyObject *left = PyTuple_GET_ITEM(pair, 0);
PyObject *right = PyTuple_GET_ITEM(pair, 1);
assert(left != NULL && right != NULL);
PyObject *newconst = PyObject_GetItem(left, right);
Comment on lines +1535 to +1538
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
PyObject *left = PyTuple_GET_ITEM(pair, 0);
PyObject *right = PyTuple_GET_ITEM(pair, 1);
assert(left != NULL && right != NULL);
PyObject *newconst = PyObject_GetItem(left, right);
PyObject *obj = PyTuple_GET_ITEM(pair, 0);
PyObject *attr = PyTuple_GET_ITEM(pair, 1);
assert(obj != NULL && attr != NULL);
PyObject *newconst = PyObject_GetItem(obj, attr);

I this clearer?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For subscript yes, but eventually there will be other binary ops as well. I could apply if you want.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, leave it.

Py_DECREF(pair);
if (newconst == NULL) {
if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
return ERROR;
Expand All @@ -1564,14 +1546,9 @@ optimize_if_const_op(basicblock *bb, int n, PyObject *consts, PyObject *const_ca
}
int newopcode, newoparg;
RETURN_IF_ERROR(newop_from_folded(newconst, consts, const_cache, &newopcode, &newoparg));
INSTR_SET_OP1(subscr, newopcode, newoparg);
INSTR_SET_OP0(arg, NOP);
INSTR_SET_OP0(idx, NOP);
nop_out(bb, i-1, 2);
INSTR_SET_OP1(binop, newopcode, newoparg);
return SUCCESS;
error:
Py_XDECREF(o);
Py_XDECREF(key);
return ERROR;
}

#define VISITED (-1)
Expand Down Expand Up @@ -2072,7 +2049,7 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
}
break;
case BINARY_OP:
RETURN_IF_ERROR(optimize_if_const_op(bb, i, consts, const_cache));
RETURN_IF_ERROR(optimize_if_const_binop(bb, i, consts, const_cache));
break;
}
}
Expand Down
Loading