Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-88116: Use a compact format to represent end line and column offsets. #91666

Merged
merged 19 commits into from
Apr 21, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 19 additions & 13 deletions Include/cpython/code.h
Original file line number Diff line number Diff line change
@@ -86,15 +86,7 @@ typedef uint16_t _Py_CODEUNIT;
PyObject *co_filename; /* unicode (where it was loaded from) */ \
PyObject *co_name; /* unicode (name, for reference) */ \
PyObject *co_qualname; /* unicode (qualname, for reference) */ \
PyObject *co_linetable; /* bytes (encoding addr<->lineno mapping) \
See Objects/lnotab_notes.txt for details. \
*/ \
PyObject *co_endlinetable; /* bytes object that holds end lineno for \
instructions separated across different \
lines */ \
PyObject *co_columntable; /* bytes object that holds start/end column \
offset each instruction */ \
\
PyObject *co_linetable; /* bytes object that holds location info */ \
PyObject *co_weakreflist; /* to support weakrefs to code objects */ \
/* Scratch space for extra data relating to the code object. \
Type is a void* to keep the format private in codeobject.c to force \
@@ -153,13 +145,13 @@ PyAPI_FUNC(PyCodeObject *) PyCode_New(
int, int, int, int, int, PyObject *, PyObject *,
PyObject *, PyObject *, PyObject *, PyObject *,
PyObject *, PyObject *, PyObject *, int, PyObject *,
PyObject *, PyObject *, PyObject *);
PyObject *);

PyAPI_FUNC(PyCodeObject *) PyCode_NewWithPosOnlyArgs(
int, int, int, int, int, int, PyObject *, PyObject *,
PyObject *, PyObject *, PyObject *, PyObject *,
PyObject *, PyObject *, PyObject *, int, PyObject *,
PyObject *, PyObject *, PyObject *);
PyObject *);
/* same as struct above */

/* Creates a new empty code object with the specified source location. */
@@ -176,8 +168,8 @@ PyAPI_FUNC(int) PyCode_Addr2Location(PyCodeObject *, int, int *, int *, int *, i
/* for internal use only */
struct _opaque {
int computed_line;
const char *lo_next;
const char *limit;
const uint8_t *lo_next;
const uint8_t *limit;
};

typedef struct _line_offsets {
@@ -210,6 +202,20 @@ PyAPI_FUNC(int) _PyCode_GetExtra(PyObject *code, Py_ssize_t index,
PyAPI_FUNC(int) _PyCode_SetExtra(PyObject *code, Py_ssize_t index,
void *extra);


typedef enum _PyCodeLocationInfoKind {
/* short forms are 0 to 9 */
PY_CODE_LOCATION_INFO_SHORT0 = 0,
/* one lineforms are 10 to 12 */
PY_CODE_LOCATION_INFO_ONE_LINE0 = 10,
PY_CODE_LOCATION_INFO_ONE_LINE1 = 11,
PY_CODE_LOCATION_INFO_ONE_LINE2 = 12,

PY_CODE_LOCATION_INFO_NO_COLUMNS = 13,
PY_CODE_LOCATION_INFO_LONG = 14,
PY_CODE_LOCATION_INFO_NONE = 15
} _PyCodeLocationInfoKind;

#ifdef __cplusplus
}
#endif
49 changes: 35 additions & 14 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
@@ -176,8 +176,6 @@ struct _PyCodeConstructor {
PyObject *code;
int firstlineno;
PyObject *linetable;
PyObject *endlinetable;
PyObject *columntable;

/* used by the code */
PyObject *consts;
@@ -221,21 +219,10 @@ extern PyObject* _PyCode_GetCellvars(PyCodeObject *);
extern PyObject* _PyCode_GetFreevars(PyCodeObject *);
extern PyObject* _PyCode_GetCode(PyCodeObject *);

/* Return the ending source code line number from a bytecode index. */
extern int _PyCode_Addr2EndLine(PyCodeObject *, int);

/* Return the ending source code line number from a bytecode index. */
extern int _PyCode_Addr2EndLine(PyCodeObject *, int);
/* Return the starting source code column offset from a bytecode index. */
extern int _PyCode_Addr2Offset(PyCodeObject *, int);
/* Return the ending source code column offset from a bytecode index. */
extern int _PyCode_Addr2EndOffset(PyCodeObject *, int);

/** API for initializing the line number tables. */
extern int _PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds);
extern int _PyCode_InitEndAddressRange(PyCodeObject* co, PyCodeAddressRange* bounds);

/** Out of process API for initializing the line number table. */
/** Out of process API for initializing the location table. */
extern void _PyLineTable_InitAddressRange(
const char *linetable,
Py_ssize_t length,
@@ -445,6 +432,40 @@ read_obj(uint16_t *p)
return (PyObject *)val;
}

static inline int
write_varint(uint8_t *ptr, unsigned int val)
{
int written = 1;
while (val >= 64) {
*ptr++ = 64 | (val & 63);
val >>= 6;
written++;
}
*ptr = val;
return written;
}

static inline int
write_signed_varint(uint8_t *ptr, int val)
{
if (val < 0) {
val = ((-val)<<1) | 1;
}
else {
val = val << 1;
}
return write_varint(ptr, val);
}

static inline int
write_location_entry_start(uint8_t *ptr, int code, int length)
{
assert((code & 15) == code);
*ptr = 128 | (code << 3) | (length - 1);
return 1;
}


#ifdef __cplusplus
}
#endif
2 changes: 1 addition & 1 deletion Lib/importlib/_bootstrap_external.py
Original file line number Diff line number Diff line change
@@ -402,7 +402,7 @@ def _write_atomic(path, data, mode=0o666):
# add JUMP_BACKWARD_NO_INTERRUPT, make JUMP_NO_INTERRUPT virtual)
# Python 3.11a7 3492 (make POP_JUMP_IF_NONE/NOT_NONE/TRUE/FALSE relative)
# Python 3.11a7 3493 (Make JUMP_IF_TRUE_OR_POP/JUMP_IF_FALSE_OR_POP relative)

# Python 3.11a7 3494 (New location info table)
# Python 3.12 will start with magic number 3500


169 changes: 130 additions & 39 deletions Lib/test/test_code.py
Original file line number Diff line number Diff line change
@@ -230,9 +230,7 @@ def func(): pass
co.co_name,
co.co_qualname,
co.co_firstlineno,
co.co_lnotab,
co.co_endlinetable,
co.co_columntable,
co.co_linetable,
co.co_exceptiontable,
co.co_freevars,
co.co_cellvars)
@@ -273,8 +271,6 @@ def func2():
("co_filename", "newfilename"),
("co_name", "newname"),
("co_linetable", code2.co_linetable),
("co_endlinetable", code2.co_endlinetable),
("co_columntable", code2.co_columntable),
):
with self.subTest(attr=attr, value=value):
new_code = code.replace(**{attr: value})
@@ -311,9 +307,7 @@ def func():
co.co_name,
co.co_qualname,
co.co_firstlineno,
co.co_lnotab,
co.co_endlinetable,
co.co_columntable,
co.co_linetable,
co.co_exceptiontable,
co.co_freevars,
co.co_cellvars,
@@ -391,14 +385,17 @@ def test_co_positions_artificial_instructions(self):
)

def test_endline_and_columntable_none_when_no_debug_ranges(self):
# Make sure that if `-X no_debug_ranges` is used, the endlinetable and
# columntable are None.
# Make sure that if `-X no_debug_ranges` is used, there is
# minimal debug info
code = textwrap.dedent("""
def f():
pass
assert f.__code__.co_endlinetable is None
assert f.__code__.co_columntable is None
positions = f.__code__.co_positions()
for line, end_line, column, end_column in positions:
assert line == end_line
assert column is None
assert end_column is None
""")
assert_python_ok('-X', 'no_debug_ranges', '-c', code)

@@ -408,8 +405,11 @@ def test_endline_and_columntable_none_when_no_debug_ranges_env(self):
def f():
pass
assert f.__code__.co_endlinetable is None
assert f.__code__.co_columntable is None
positions = f.__code__.co_positions()
for line, end_line, column, end_column in positions:
assert line == end_line
assert column is None
assert end_column is None
""")
assert_python_ok('-c', code, PYTHONNODEBUGRANGES='1')

@@ -421,35 +421,10 @@ def func():
x = 1
new_code = func.__code__.replace(co_linetable=b'')
positions = new_code.co_positions()
next(positions) # Skip RESUME at start
for line, end_line, column, end_column in positions:
self.assertIsNone(line)
self.assertEqual(end_line, new_code.co_firstlineno + 1)

@requires_debug_ranges()
def test_co_positions_empty_endlinetable(self):
def func():
x = 1
new_code = func.__code__.replace(co_endlinetable=b'')
positions = new_code.co_positions()
next(positions) # Skip RESUME at start
for line, end_line, column, end_column in positions:
self.assertEqual(line, new_code.co_firstlineno + 1)
self.assertIsNone(end_line)

@requires_debug_ranges()
def test_co_positions_empty_columntable(self):
def func():
x = 1
new_code = func.__code__.replace(co_columntable=b'')
positions = new_code.co_positions()
next(positions) # Skip RESUME at start
for line, end_line, column, end_column in positions:
self.assertEqual(line, new_code.co_firstlineno + 1)
self.assertEqual(end_line, new_code.co_firstlineno + 1)
self.assertIsNone(column)
self.assertIsNone(end_column)


def isinterned(s):
return s is sys.intern(('_' + s + '_')[1:-1])
@@ -527,6 +502,122 @@ def callback(code):
self.assertFalse(bool(coderef()))
self.assertTrue(self.called)

# Python implementation of location table parsing algorithm
def read(it):
return next(it)

def read_varint(it):
b = read(it)
val = b & 63;
shift = 0;
while b & 64:
b = read(it)
shift += 6
val |= (b&63) << shift
return val

def read_signed_varint(it):
uval = read_varint(it)
if uval & 1:
return -(uval >> 1)
else:
return uval >> 1

def parse_location_table(code):
line = code.co_firstlineno
it = iter(code.co_linetable)
while True:
try:
first_byte = read(it)
except StopIteration:
return
code = (first_byte >> 3) & 15
length = (first_byte & 7) + 1
if code == 15:
yield (code, length, None, None, None, None)
elif code == 14:
line_delta = read_signed_varint(it)
line += line_delta
end_line = line + read_varint(it)
col = read_varint(it)
if col == 0:
col = None
else:
col -= 1
end_col = read_varint(it)
if end_col == 0:
end_col = None
else:
end_col -= 1
yield (code, length, line, end_line, col, end_col)
elif code == 13: # No column
line_delta = read_signed_varint(it)
line += line_delta
yield (code, length, line, line, None, None)
elif code in (10, 11, 12): # new line
line_delta = code - 10
line += line_delta
column = read(it)
end_column = read(it)
yield (code, length, line, line, column, end_column)
else:
assert (0 <= code < 10)
second_byte = read(it)
column = code << 3 | (second_byte >> 4)
yield (code, length, line, line, column, column + (second_byte & 15))

def positions_from_location_table(code):
for _, length, line, end_line, col, end_col in parse_location_table(code):
for _ in range(length):
yield (line, end_line, col, end_col)

def misshappen():
"""
"""
x = (


4

+

y

)
y = (
a
+
b
+

d
)
return q if (

x

) else p


class CodeLocationTest(unittest.TestCase):

def check_positions(self, func):
pos1 = list(func.__code__.co_positions())
pos2 = list(positions_from_location_table(func.__code__))
for l1, l2 in zip(pos1, pos2):
self.assertEqual(l1, l2)
self.assertEqual(len(pos1), len(pos2))


def test_positions(self):
self.check_positions(parse_location_table)
self.check_positions(misshappen)


if check_impl_detail(cpython=True) and ctypes is not None:
py = ctypes.pythonapi
Loading