-
-
Notifications
You must be signed in to change notification settings - Fork 31.6k
bpo-45152: refactor the dis module to make handling of hasconst opcod… #28258
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ | |
MAKE_FUNCTION = opmap['MAKE_FUNCTION'] | ||
MAKE_FUNCTION_FLAGS = ('defaults', 'kwdefaults', 'annotations', 'closure') | ||
|
||
LOAD_CONST = opmap['LOAD_CONST'] | ||
|
||
def _try_compile(source, name): | ||
"""Attempts to compile the given source, first as an expression and | ||
|
@@ -317,19 +318,32 @@ def get_instructions(x, *, first_line=None): | |
co.co_names, co.co_consts, | ||
linestarts, line_offset, co_positions=co.co_positions()) | ||
|
||
def _get_const_info(const_index, const_list): | ||
def _get_const_value(op, arg, co_consts): | ||
"""Helper to get the value of the const in a hasconst op. | ||
|
||
Returns the dereferenced constant if this is possible. | ||
Otherwise (if it is a LOAD_CONST and co_consts is not | ||
provided) returns the dis.UNKNOWN sentinel. | ||
""" | ||
assert op in hasconst | ||
|
||
argval = UNKNOWN | ||
if op == LOAD_CONST: | ||
if co_consts is not None: | ||
argval = co_consts[arg] | ||
return argval | ||
|
||
def _get_const_info(op, arg, co_consts): | ||
"""Helper to get optional details about const references | ||
|
||
Returns the dereferenced constant and its repr if the constant | ||
list is defined. | ||
Returns the dereferenced constant and its repr if the value | ||
can be calculated. | ||
Otherwise returns the sentinel value dis.UNKNOWN for the value | ||
and an empty string for its repr. | ||
""" | ||
if const_list is not None: | ||
argval = const_list[const_index] | ||
return argval, repr(argval) | ||
else: | ||
return UNKNOWN, '' | ||
argval = _get_const_value(op, arg, co_consts) | ||
argrepr = repr(argval) if argval is not UNKNOWN else '' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wondering if repr(UNKNOWN) should just return There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I ruled that out when I thought that the sentinel will be part of the external API, because it could be confusing. |
||
return argval, argrepr | ||
|
||
def _get_name_info(name_index, get_name, **extrainfo): | ||
"""Helper to get optional details about named references | ||
|
@@ -371,14 +385,14 @@ def parse_exception_table(code): | |
return entries | ||
|
||
def _get_instructions_bytes(code, varname_from_oparg=None, | ||
names=None, constants=None, | ||
names=None, co_consts=None, | ||
linestarts=None, line_offset=0, | ||
exception_entries=(), co_positions=None): | ||
"""Iterate over the instructions in a bytecode string. | ||
|
||
Generates a sequence of Instruction namedtuples giving the details of each | ||
opcode. Additional information about the code's runtime environment | ||
(e.g. variable names, constants) can be specified using optional | ||
(e.g. variable names, co_consts) can be specified using optional | ||
arguments. | ||
|
||
""" | ||
|
@@ -408,7 +422,7 @@ def _get_instructions_bytes(code, varname_from_oparg=None, | |
# raw name index for LOAD_GLOBAL, LOAD_CONST, etc. | ||
argval = arg | ||
if op in hasconst: | ||
argval, argrepr = _get_const_info(arg, constants) | ||
argval, argrepr = _get_const_info(op, arg, co_consts) | ||
elif op in hasname: | ||
argval, argrepr = _get_name_info(arg, get_name) | ||
elif op in hasjabs: | ||
|
@@ -457,7 +471,7 @@ def _disassemble_recursive(co, *, file=None, depth=None): | |
_disassemble_recursive(x, file=file, depth=depth) | ||
|
||
def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None, | ||
names=None, constants=None, linestarts=None, | ||
names=None, co_consts=None, linestarts=None, | ||
*, file=None, line_offset=0, exception_entries=(), | ||
co_positions=None): | ||
# Omit the line number column entirely if we have no line number info | ||
|
@@ -476,7 +490,7 @@ def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None, | |
else: | ||
offset_width = 4 | ||
for instr in _get_instructions_bytes(code, varname_from_oparg, names, | ||
constants, linestarts, | ||
co_consts, linestarts, | ||
line_offset=line_offset, exception_entries=exception_entries, | ||
co_positions=co_positions): | ||
new_source_line = (show_lineno and | ||
|
@@ -557,11 +571,13 @@ def _find_imports(co): | |
opargs = [(op, arg) for _, op, arg in _unpack_opargs(co.co_code) | ||
if op != EXTENDED_ARG] | ||
for i, (op, oparg) in enumerate(opargs): | ||
if (op == IMPORT_NAME and i >= 2 | ||
and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST): | ||
level = consts[opargs[i-2][1]] | ||
fromlist = consts[opargs[i-1][1]] | ||
yield (names[oparg], level, fromlist) | ||
if op == IMPORT_NAME and i >= 2: | ||
from_op = opargs[i-1] | ||
level_op = opargs[i-2] | ||
if (from_op[0] in hasconst and level_op[0] in hasconst): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. even this needs hasconst to have LOAD_NONE - the fromlist can be None. |
||
level = _get_const_value(level_op[0], level_op[1], consts) | ||
fromlist = _get_const_value(from_op[0], from_op[1], consts) | ||
yield (names[oparg], level, fromlist) | ||
|
||
def _find_store_names(co): | ||
"""Find names of variables which are written in the code | ||
|
@@ -635,7 +651,7 @@ def dis(self): | |
with io.StringIO() as output: | ||
_disassemble_bytes(co.co_code, | ||
varname_from_oparg=co._varname_from_oparg, | ||
names=co.co_names, constants=co.co_consts, | ||
names=co.co_names, co_consts=co.co_consts, | ||
linestarts=self._linestarts, | ||
line_offset=self._line_offset, | ||
file=output, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have a patch that exposes the get_const_value function from compile.c in the _opcode module, but I'm on the fence whether it's worth the complication it adds. Let me know if you'd like to see it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why would you do that? Surely it's easier to replicate its functionality in pure Python, even if we add some new ways to represent constants. It doesn't even take a code object currently. And TBH, should e.g. LOAD_NONE even be counted as having a constant? I'd expect HAS_CONST(LOAD_NONE) might well be False.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think LOAD_NONE should be HAS_CONST, because the peephole optimiser treats it the same as a LOAD_CONST, for things like folding const tuples and conditional jumps that depend on const.
So far I don't think opcode.hasconst is used for anything (it contains only LOAD_CONST). So I thought we would make it include all the splits of LOAD_CONST, and then HAS_CONST is a C macro generated from that (it's new, I just added it).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, I see. Currently we take (not the exact notation)
and transform it to
adding
(None, 42)
as a new constant toco_consts
. We want to still do that even when the first instruction is replaced byLOAD_NONE
. That makes sense!We still have the issue that we might end up with unused constants in
co_consts
due to this folding. Inada just fixed this for trailing unused constants in GH-28109 (bpo-45056) but for orphaned constants in the middle we have no solution (we'd have to renumber all the LOAD_CONST opargs). Fortunately I think it's not worth it, this is rare.So you've convinced me that LOAD_NONE should be in
opcode.hasconst
, but not that it's worth exposing that C function.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I agree it's OTT in this case. We'll just have to implement it twice.