diff --git a/Doc/library/inspect.rst b/Doc/library/inspect.rst index 311523fb3aa3c8..58b84a35a890e3 100644 --- a/Doc/library/inspect.rst +++ b/Doc/library/inspect.rst @@ -164,6 +164,10 @@ attributes (see :ref:`import-mod-attrs` for module attributes): | | | read more :ref:`here | | | | `| +-----------+-------------------+---------------------------+ +| | co_lnotab | encoded mapping of line | +| | | numbers to bytecode | +| | | indices | ++-----------+-------------------+---------------------------+ | | co_freevars | tuple of names of free | | | | variables (referenced via | | | | a function's closure) | diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 625a394ba68b11..1d2ddf3507aee1 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -962,6 +962,7 @@ Internal types single: co_filename (code object attribute) single: co_firstlineno (code object attribute) single: co_flags (code object attribute) + single: co_lnotab (code object attribute) single: co_name (code object attribute) single: co_names (code object attribute) single: co_nlocals (code object attribute) @@ -988,7 +989,9 @@ Internal types a tuple containing the literals used by the bytecode; :attr:`co_names` is a tuple containing the names used by the bytecode; :attr:`co_filename` is the filename from which the code was compiled; :attr:`co_firstlineno` is - the first line number of the function; :attr:`co_stacksize` is the + the first line number of the function; :attr:`co_lnotab` is a string + encoding the mapping from bytecode offsets to line numbers (for details + see the source code of the interpreter); :attr:`co_stacksize` is the required stack size; :attr:`co_flags` is an integer encoding a number of flags for the interpreter. diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index b907a01f291476..45a5e5062d55b6 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -700,10 +700,6 @@ Removed *context* parameter instead. (Contributed by Victor Stinner in :gh:`94172`.) -* Remove ``co_lnotab`` attribute from code objects per :pep:`626`: - use ``co_lines`` method instead. - (Contributed by Nikita Sobolev in :gh:`101865`.) - Porting to Python 3.12 ====================== diff --git a/Lib/inspect.py b/Lib/inspect.py index 445425bab5a37c..8bb3a375735af6 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -499,6 +499,7 @@ def iscode(object): co_freevars tuple of names of free variables co_posonlyargcount number of positional only arguments co_kwonlyargcount number of keyword only arguments (not including ** arg) + co_lnotab encoded mapping of line numbers to bytecode indices co_name name with which this code object was defined co_names tuple of names other than arguments and function locals co_nlocals number of local variables @@ -1707,6 +1708,7 @@ def getframeinfo(frame, context=1): def getlineno(frame): """Get the line number from a frame object, allowing for optimization.""" + # FrameType.f_lineno is now a descriptor that grovels co_lnotab return frame.f_lineno _FrameInfo = namedtuple('_FrameInfo', ('frame',) + Traceback._fields) diff --git a/Lib/pydoc_data/topics.py b/Lib/pydoc_data/topics.py index 636fac4e4c822c..e7f403d3ffbf12 100644 --- a/Lib/pydoc_data/topics.py +++ b/Lib/pydoc_data/topics.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Autogenerated by Sphinx on Mon Feb 13 10:21:05 2023 +# Autogenerated by Sphinx on Tue Feb 7 13:18:04 2023 topics = {'assert': 'The "assert" statement\n' '**********************\n' '\n' @@ -13704,10 +13704,13 @@ ' bytecode; "co_filename" is the filename from which the code ' 'was\n' ' compiled; "co_firstlineno" is the first line number of the\n' - ' function; "co_stacksize" is the required stack size; ' - '"co_flags"\n' - ' is an integer encoding a number of flags for the ' - 'interpreter.\n' + ' function; "co_lnotab" is a string encoding the mapping from\n' + ' bytecode offsets to line numbers (for details see the source\n' + ' code of the interpreter); "co_stacksize" is the required ' + 'stack\n' + ' size; "co_flags" is an integer encoding a number of flags ' + 'for\n' + ' the interpreter.\n' '\n' ' The following flag bits are defined for "co_flags": bit ' '"0x04"\n' diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-02-13-10-35-52.gh-issue-101865.2kjTxP.rst b/Misc/NEWS.d/next/Core and Builtins/2023-02-13-10-35-52.gh-issue-101865.2kjTxP.rst deleted file mode 100644 index 40ea42803e040e..00000000000000 --- a/Misc/NEWS.d/next/Core and Builtins/2023-02-13-10-35-52.gh-issue-101865.2kjTxP.rst +++ /dev/null @@ -1 +0,0 @@ -Remove deprecated ``co_lnotab`` from code objects as per :pep:`626`. diff --git a/Misc/gdbinit b/Misc/gdbinit index 12230489441e4f..e8f62ba6476423 100644 --- a/Misc/gdbinit +++ b/Misc/gdbinit @@ -54,7 +54,25 @@ end # A rewrite of the Python interpreter's line number calculator in GDB's # command language define lineno - printf "%d", f->f_lineno + set $__continue = 1 + set $__co = f->f_code + set $__lasti = f->f_lasti + set $__sz = ((PyVarObject *)$__co->co_lnotab)->ob_size/2 + set $__p = (unsigned char *)((PyBytesObject *)$__co->co_lnotab)->ob_sval + set $__li = $__co->co_firstlineno + set $__ad = 0 + while ($__sz-1 >= 0 && $__continue) + set $__sz = $__sz - 1 + set $__ad = $__ad + *$__p + set $__p = $__p + 1 + if ($__ad > $__lasti) + set $__continue = 0 + else + set $__li = $__li + *$__p + set $__p = $__p + 1 + end + end + printf "%d", $__li end define pyframev diff --git a/Objects/codeobject.c b/Objects/codeobject.c index add3079a4d7fb9..ab31b6582cdaae 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1097,6 +1097,77 @@ _PyLineTable_NextAddressRange(PyCodeAddressRange *range) return 1; } +static int +emit_pair(PyObject **bytes, int *offset, int a, int b) +{ + Py_ssize_t len = PyBytes_GET_SIZE(*bytes); + if (*offset + 2 >= len) { + if (_PyBytes_Resize(bytes, len * 2) < 0) + return 0; + } + unsigned char *lnotab = (unsigned char *) PyBytes_AS_STRING(*bytes); + lnotab += *offset; + *lnotab++ = a; + *lnotab++ = b; + *offset += 2; + return 1; +} + +static int +emit_delta(PyObject **bytes, int bdelta, int ldelta, int *offset) +{ + while (bdelta > 255) { + if (!emit_pair(bytes, offset, 255, 0)) { + return 0; + } + bdelta -= 255; + } + while (ldelta > 127) { + if (!emit_pair(bytes, offset, bdelta, 127)) { + return 0; + } + bdelta = 0; + ldelta -= 127; + } + while (ldelta < -128) { + if (!emit_pair(bytes, offset, bdelta, -128)) { + return 0; + } + bdelta = 0; + ldelta += 128; + } + return emit_pair(bytes, offset, bdelta, ldelta); +} + +static PyObject * +decode_linetable(PyCodeObject *code) +{ + PyCodeAddressRange bounds; + PyObject *bytes; + int table_offset = 0; + int code_offset = 0; + int line = code->co_firstlineno; + bytes = PyBytes_FromStringAndSize(NULL, 64); + if (bytes == NULL) { + return NULL; + } + _PyCode_InitAddressRange(code, &bounds); + while (_PyLineTable_NextAddressRange(&bounds)) { + if (bounds.opaque.computed_line != line) { + int bdelta = bounds.ar_start - code_offset; + int ldelta = bounds.opaque.computed_line - line; + if (!emit_delta(&bytes, bdelta, ldelta, &table_offset)) { + Py_DECREF(bytes); + return NULL; + } + code_offset = bounds.ar_start; + line = bounds.opaque.computed_line; + } + } + _PyBytes_Resize(&bytes, table_offset); + return bytes; +} + typedef struct { PyObject_HEAD @@ -1810,6 +1881,12 @@ static PyMemberDef code_memberlist[] = { }; +static PyObject * +code_getlnotab(PyCodeObject *code, void *closure) +{ + return decode_linetable(code); +} + static PyObject * code_getvarnames(PyCodeObject *code, void *closure) { @@ -1842,6 +1919,7 @@ code_getcode(PyCodeObject *code, void *closure) } static PyGetSetDef code_getsetlist[] = { + {"co_lnotab", (getter)code_getlnotab, NULL, NULL}, {"_co_code_adaptive", (getter)code_getcodeadaptive, NULL, NULL}, // The following old names are kept for backward compatibility. {"co_varnames", (getter)code_getvarnames, NULL, NULL}, diff --git a/Objects/lnotab_notes.txt b/Objects/lnotab_notes.txt new file mode 100644 index 00000000000000..362b87a86a481f --- /dev/null +++ b/Objects/lnotab_notes.txt @@ -0,0 +1,225 @@ +Description of the internal format of the line number table + +Conceptually, the line number table consists of a sequence of triples: + start-offset (inclusive), end-offset (exclusive), line-number. + +Note that not all byte codes have a line number so we need handle `None` for the line-number. + +However, storing the above sequence directly would be very inefficient as we would need 12 bytes per entry. + +First, note that the end of one entry is the same as the start of the next, so we can overlap entries. +Second, we don't really need arbitrary access to the sequence, so we can store deltas. + +We just need to store (end - start, line delta) pairs. The start offset of the first entry is always zero. + +Third, most deltas are small, so we can use a single byte for each value, as long we allow several entries for the same line. + +Consider the following table + Start End Line + 0 6 1 + 6 50 2 + 50 350 7 + 350 360 No line number + 360 376 8 + 376 380 208 + +Stripping the redundant ends gives: + + End-Start Line-delta + 6 +1 + 44 +1 + 300 +5 + 10 No line number + 16 +1 + 4 +200 + + +Note that the end - start value is always positive. + +Finally, in order to fit into a single byte we need to convert start deltas to the range 0 <= delta <= 254, +and line deltas to the range -127 <= delta <= 127. +A line delta of -128 is used to indicate no line number. +Also note that a delta of zero indicates that there are no bytecodes in the given range, +which means we can use an invalid line number for that range. + +Final form: + + Start delta Line delta + 6 +1 + 44 +1 + 254 +5 + 46 0 + 10 -128 (No line number, treated as a delta of zero) + 16 +1 + 0 +127 (line 135, but the range is empty as no bytecodes are at line 135) + 4 +73 + +Iterating over the table. +------------------------- + +For the `co_lines` attribute we want to emit the full form, omitting the (350, 360, No line number) and empty entries. + +The code is as follows: + +def co_lines(code): + line = code.co_firstlineno + end = 0 + table_iter = iter(code.internal_line_table): + for sdelta, ldelta in table_iter: + if ldelta == 0: # No change to line number, just accumulate changes to end + end += sdelta + continue + start = end + end = start + sdelta + if ldelta == -128: # No valid line number -- skip entry + continue + line += ldelta + if end == start: # Empty range, omit. + continue + yield start, end, line + + + + +The historical co_lnotab format +------------------------------- + +prior to 3.10 code objects stored a field named co_lnotab. +This was an array of unsigned bytes disguised as a Python bytes object. + +The old co_lnotab did not account for the presence of bytecodes without a line number, +nor was it well suited to tracing as a number of workarounds were required. + +The old format can still be accessed via `code.co_lnotab`, which is lazily computed from the new format. + +Below is the description of the old co_lnotab format: + + +The array is conceptually a compressed list of + (bytecode offset increment, line number increment) +pairs. The details are important and delicate, best illustrated by example: + + byte code offset source code line number + 0 1 + 6 2 + 50 7 + 350 207 + 361 208 + +Instead of storing these numbers literally, we compress the list by storing only +the difference from one row to the next. Conceptually, the stored list might +look like: + + 0, 1, 6, 1, 44, 5, 300, 200, 11, 1 + +The above doesn't really work, but it's a start. An unsigned byte (byte code +offset) can't hold negative values, or values larger than 255, a signed byte +(line number) can't hold values larger than 127 or less than -128, and the +above example contains two such values. (Note that before 3.6, line number +was also encoded by an unsigned byte.) So we make two tweaks: + + (a) there's a deep assumption that byte code offsets increase monotonically, + and + (b) if byte code offset jumps by more than 255 from one row to the next, or if + source code line number jumps by more than 127 or less than -128 from one row + to the next, more than one pair is written to the table. In case #b, + there's no way to know from looking at the table later how many were written. + That's the delicate part. A user of co_lnotab desiring to find the source + line number corresponding to a bytecode address A should do something like + this: + + lineno = addr = 0 + for addr_incr, line_incr in co_lnotab: + addr += addr_incr + if addr > A: + return lineno + if line_incr >= 0x80: + line_incr -= 0x100 + lineno += line_incr + +(In C, this is implemented by PyCode_Addr2Line().) In order for this to work, +when the addr field increments by more than 255, the line # increment in each +pair generated must be 0 until the remaining addr increment is < 256. So, in +the example above, assemble_lnotab in compile.c should not (as was actually done +until 2.2) expand 300, 200 to + 255, 255, 45, 45, +but to + 255, 0, 45, 127, 0, 73. + +The above is sufficient to reconstruct line numbers for tracebacks, but not for +line tracing. Tracing is handled by PyCode_CheckLineNumber() in codeobject.c +and maybe_call_line_trace() in ceval.c. + +*** Tracing *** + +To a first approximation, we want to call the tracing function when the line +number of the current instruction changes. Re-computing the current line for +every instruction is a little slow, though, so each time we compute the line +number we save the bytecode indices where it's valid: + + *instr_lb <= frame->f_lasti < *instr_ub + +is true so long as execution does not change lines. That is, *instr_lb holds +the first bytecode index of the current line, and *instr_ub holds the first +bytecode index of the next line. As long as the above expression is true, +maybe_call_line_trace() does not need to call PyCode_CheckLineNumber(). Note +that the same line may appear multiple times in the lnotab, either because the +bytecode jumped more than 255 indices between line number changes or because +the compiler inserted the same line twice. Even in that case, *instr_ub holds +the first index of the next line. + +However, we don't *always* want to call the line trace function when the above +test fails. + +Consider this code: + +1: def f(a): +2: while a: +3: print(1) +4: break +5: else: +6: print(2) + +which compiles to this: + + 2 0 SETUP_LOOP 26 (to 28) + >> 2 LOAD_FAST 0 (a) + 4 POP_JUMP_IF_FALSE 18 + + 3 6 LOAD_GLOBAL 0 (print) + 8 LOAD_CONST 1 (1) + 10 CALL_NO_KW 1 + 12 POP_TOP + + 4 14 BREAK_LOOP + 16 JUMP_ABSOLUTE 2 + >> 18 POP_BLOCK + + 6 20 LOAD_GLOBAL 0 (print) + 22 LOAD_CONST 2 (2) + 24 CALL_NO_KW 1 + 26 POP_TOP + >> 28 LOAD_CONST 0 (None) + 30 RETURN_VALUE + +If 'a' is false, execution will jump to the POP_BLOCK instruction at offset 18 +and the co_lnotab will claim that execution has moved to line 4, which is wrong. +In this case, we could instead associate the POP_BLOCK with line 5, but that +would break jumps around loops without else clauses. + +We fix this by only calling the line trace function for a forward jump if the +co_lnotab indicates we have jumped to the *start* of a line, i.e. if the current +instruction offset matches the offset given for the start of a line by the +co_lnotab. For backward jumps, however, we always call the line trace function, +which lets a debugger stop on every evaluation of a loop guard (which usually +won't be the first opcode in a line). + +Why do we set f_lineno when tracing, and only just before calling the trace +function? Well, consider the code above when 'a' is true. If stepping through +this with 'n' in pdb, you would stop at line 1 with a "call" type event, then +line events on lines 2, 3, and 4, then a "return" type event -- but because the +code for the return actually falls in the range of the "line 6" opcodes, you +would be shown line 6 during this event. This is a change from the behaviour in +2.2 and before, and I've found it confusing in practice. By setting and using +f_lineno when tracing, one can report a line number different from that +suggested by f_lasti on this one occasion where it's desirable.