From 06a79d93045f67d7a30447a6d38076760637c8df Mon Sep 17 00:00:00 2001 From: sobolevn Date: Mon, 13 Feb 2023 10:36:41 +0300 Subject: [PATCH 1/7] gh-101865: Remove deprecated `co_lnotab` from code objects as per PEP626 --- Doc/library/inspect.rst | 4 - Doc/reference/datamodel.rst | 5 +- Doc/whatsnew/3.12.rst | 4 + Lib/inspect.py | 2 - Lib/pydoc_data/topics.py | 13 +- ...-02-13-10-35-52.gh-issue-101865.2kjTxP.rst | 1 + Misc/gdbinit | 20 +- Objects/codeobject.c | 78 ------ Objects/lnotab_notes.txt | 225 ------------------ 9 files changed, 12 insertions(+), 340 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-02-13-10-35-52.gh-issue-101865.2kjTxP.rst delete mode 100644 Objects/lnotab_notes.txt diff --git a/Doc/library/inspect.rst b/Doc/library/inspect.rst index 58b84a35a890e3..311523fb3aa3c8 100644 --- a/Doc/library/inspect.rst +++ b/Doc/library/inspect.rst @@ -164,10 +164,6 @@ attributes (see :ref:`import-mod-attrs` for module attributes): | | | read more :ref:`here | | | | `| +-----------+-------------------+---------------------------+ -| | co_lnotab | encoded mapping of line | -| | | numbers to bytecode | -| | | indices | -+-----------+-------------------+---------------------------+ | | co_freevars | tuple of names of free | | | | variables (referenced via | | | | a function's closure) | diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 1d2ddf3507aee1..625a394ba68b11 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -962,7 +962,6 @@ Internal types single: co_filename (code object attribute) single: co_firstlineno (code object attribute) single: co_flags (code object attribute) - single: co_lnotab (code object attribute) single: co_name (code object attribute) single: co_names (code object attribute) single: co_nlocals (code object attribute) @@ -989,9 +988,7 @@ Internal types a tuple containing the literals used by the bytecode; :attr:`co_names` is a tuple containing the names used by the bytecode; :attr:`co_filename` is the filename from which the code was compiled; :attr:`co_firstlineno` is - the first line number of the function; :attr:`co_lnotab` is a string - encoding the mapping from bytecode offsets to line numbers (for details - see the source code of the interpreter); :attr:`co_stacksize` is the + the first line number of the function; :attr:`co_stacksize` is the required stack size; :attr:`co_flags` is an integer encoding a number of flags for the interpreter. diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 45a5e5062d55b6..b907a01f291476 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -700,6 +700,10 @@ Removed *context* parameter instead. (Contributed by Victor Stinner in :gh:`94172`.) +* Remove ``co_lnotab`` attribute from code objects per :pep:`626`: + use ``co_lines`` method instead. + (Contributed by Nikita Sobolev in :gh:`101865`.) + Porting to Python 3.12 ====================== diff --git a/Lib/inspect.py b/Lib/inspect.py index 8bb3a375735af6..445425bab5a37c 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -499,7 +499,6 @@ def iscode(object): co_freevars tuple of names of free variables co_posonlyargcount number of positional only arguments co_kwonlyargcount number of keyword only arguments (not including ** arg) - co_lnotab encoded mapping of line numbers to bytecode indices co_name name with which this code object was defined co_names tuple of names other than arguments and function locals co_nlocals number of local variables @@ -1708,7 +1707,6 @@ def getframeinfo(frame, context=1): def getlineno(frame): """Get the line number from a frame object, allowing for optimization.""" - # FrameType.f_lineno is now a descriptor that grovels co_lnotab return frame.f_lineno _FrameInfo = namedtuple('_FrameInfo', ('frame',) + Traceback._fields) diff --git a/Lib/pydoc_data/topics.py b/Lib/pydoc_data/topics.py index e7f403d3ffbf12..636fac4e4c822c 100644 --- a/Lib/pydoc_data/topics.py +++ b/Lib/pydoc_data/topics.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Autogenerated by Sphinx on Tue Feb 7 13:18:04 2023 +# Autogenerated by Sphinx on Mon Feb 13 10:21:05 2023 topics = {'assert': 'The "assert" statement\n' '**********************\n' '\n' @@ -13704,13 +13704,10 @@ ' bytecode; "co_filename" is the filename from which the code ' 'was\n' ' compiled; "co_firstlineno" is the first line number of the\n' - ' function; "co_lnotab" is a string encoding the mapping from\n' - ' bytecode offsets to line numbers (for details see the source\n' - ' code of the interpreter); "co_stacksize" is the required ' - 'stack\n' - ' size; "co_flags" is an integer encoding a number of flags ' - 'for\n' - ' the interpreter.\n' + ' function; "co_stacksize" is the required stack size; ' + '"co_flags"\n' + ' is an integer encoding a number of flags for the ' + 'interpreter.\n' '\n' ' The following flag bits are defined for "co_flags": bit ' '"0x04"\n' diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-02-13-10-35-52.gh-issue-101865.2kjTxP.rst b/Misc/NEWS.d/next/Core and Builtins/2023-02-13-10-35-52.gh-issue-101865.2kjTxP.rst new file mode 100644 index 00000000000000..40ea42803e040e --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-02-13-10-35-52.gh-issue-101865.2kjTxP.rst @@ -0,0 +1 @@ +Remove deprecated ``co_lnotab`` from code objects as per :pep:`626`. diff --git a/Misc/gdbinit b/Misc/gdbinit index e8f62ba6476423..12230489441e4f 100644 --- a/Misc/gdbinit +++ b/Misc/gdbinit @@ -54,25 +54,7 @@ end # A rewrite of the Python interpreter's line number calculator in GDB's # command language define lineno - set $__continue = 1 - set $__co = f->f_code - set $__lasti = f->f_lasti - set $__sz = ((PyVarObject *)$__co->co_lnotab)->ob_size/2 - set $__p = (unsigned char *)((PyBytesObject *)$__co->co_lnotab)->ob_sval - set $__li = $__co->co_firstlineno - set $__ad = 0 - while ($__sz-1 >= 0 && $__continue) - set $__sz = $__sz - 1 - set $__ad = $__ad + *$__p - set $__p = $__p + 1 - if ($__ad > $__lasti) - set $__continue = 0 - else - set $__li = $__li + *$__p - set $__p = $__p + 1 - end - end - printf "%d", $__li + printf "%d", f->f_lineno end define pyframev diff --git a/Objects/codeobject.c b/Objects/codeobject.c index ab31b6582cdaae..add3079a4d7fb9 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1097,77 +1097,6 @@ _PyLineTable_NextAddressRange(PyCodeAddressRange *range) return 1; } -static int -emit_pair(PyObject **bytes, int *offset, int a, int b) -{ - Py_ssize_t len = PyBytes_GET_SIZE(*bytes); - if (*offset + 2 >= len) { - if (_PyBytes_Resize(bytes, len * 2) < 0) - return 0; - } - unsigned char *lnotab = (unsigned char *) PyBytes_AS_STRING(*bytes); - lnotab += *offset; - *lnotab++ = a; - *lnotab++ = b; - *offset += 2; - return 1; -} - -static int -emit_delta(PyObject **bytes, int bdelta, int ldelta, int *offset) -{ - while (bdelta > 255) { - if (!emit_pair(bytes, offset, 255, 0)) { - return 0; - } - bdelta -= 255; - } - while (ldelta > 127) { - if (!emit_pair(bytes, offset, bdelta, 127)) { - return 0; - } - bdelta = 0; - ldelta -= 127; - } - while (ldelta < -128) { - if (!emit_pair(bytes, offset, bdelta, -128)) { - return 0; - } - bdelta = 0; - ldelta += 128; - } - return emit_pair(bytes, offset, bdelta, ldelta); -} - -static PyObject * -decode_linetable(PyCodeObject *code) -{ - PyCodeAddressRange bounds; - PyObject *bytes; - int table_offset = 0; - int code_offset = 0; - int line = code->co_firstlineno; - bytes = PyBytes_FromStringAndSize(NULL, 64); - if (bytes == NULL) { - return NULL; - } - _PyCode_InitAddressRange(code, &bounds); - while (_PyLineTable_NextAddressRange(&bounds)) { - if (bounds.opaque.computed_line != line) { - int bdelta = bounds.ar_start - code_offset; - int ldelta = bounds.opaque.computed_line - line; - if (!emit_delta(&bytes, bdelta, ldelta, &table_offset)) { - Py_DECREF(bytes); - return NULL; - } - code_offset = bounds.ar_start; - line = bounds.opaque.computed_line; - } - } - _PyBytes_Resize(&bytes, table_offset); - return bytes; -} - typedef struct { PyObject_HEAD @@ -1881,12 +1810,6 @@ static PyMemberDef code_memberlist[] = { }; -static PyObject * -code_getlnotab(PyCodeObject *code, void *closure) -{ - return decode_linetable(code); -} - static PyObject * code_getvarnames(PyCodeObject *code, void *closure) { @@ -1919,7 +1842,6 @@ code_getcode(PyCodeObject *code, void *closure) } static PyGetSetDef code_getsetlist[] = { - {"co_lnotab", (getter)code_getlnotab, NULL, NULL}, {"_co_code_adaptive", (getter)code_getcodeadaptive, NULL, NULL}, // The following old names are kept for backward compatibility. {"co_varnames", (getter)code_getvarnames, NULL, NULL}, diff --git a/Objects/lnotab_notes.txt b/Objects/lnotab_notes.txt deleted file mode 100644 index 362b87a86a481f..00000000000000 --- a/Objects/lnotab_notes.txt +++ /dev/null @@ -1,225 +0,0 @@ -Description of the internal format of the line number table - -Conceptually, the line number table consists of a sequence of triples: - start-offset (inclusive), end-offset (exclusive), line-number. - -Note that not all byte codes have a line number so we need handle `None` for the line-number. - -However, storing the above sequence directly would be very inefficient as we would need 12 bytes per entry. - -First, note that the end of one entry is the same as the start of the next, so we can overlap entries. -Second, we don't really need arbitrary access to the sequence, so we can store deltas. - -We just need to store (end - start, line delta) pairs. The start offset of the first entry is always zero. - -Third, most deltas are small, so we can use a single byte for each value, as long we allow several entries for the same line. - -Consider the following table - Start End Line - 0 6 1 - 6 50 2 - 50 350 7 - 350 360 No line number - 360 376 8 - 376 380 208 - -Stripping the redundant ends gives: - - End-Start Line-delta - 6 +1 - 44 +1 - 300 +5 - 10 No line number - 16 +1 - 4 +200 - - -Note that the end - start value is always positive. - -Finally, in order to fit into a single byte we need to convert start deltas to the range 0 <= delta <= 254, -and line deltas to the range -127 <= delta <= 127. -A line delta of -128 is used to indicate no line number. -Also note that a delta of zero indicates that there are no bytecodes in the given range, -which means we can use an invalid line number for that range. - -Final form: - - Start delta Line delta - 6 +1 - 44 +1 - 254 +5 - 46 0 - 10 -128 (No line number, treated as a delta of zero) - 16 +1 - 0 +127 (line 135, but the range is empty as no bytecodes are at line 135) - 4 +73 - -Iterating over the table. -------------------------- - -For the `co_lines` attribute we want to emit the full form, omitting the (350, 360, No line number) and empty entries. - -The code is as follows: - -def co_lines(code): - line = code.co_firstlineno - end = 0 - table_iter = iter(code.internal_line_table): - for sdelta, ldelta in table_iter: - if ldelta == 0: # No change to line number, just accumulate changes to end - end += sdelta - continue - start = end - end = start + sdelta - if ldelta == -128: # No valid line number -- skip entry - continue - line += ldelta - if end == start: # Empty range, omit. - continue - yield start, end, line - - - - -The historical co_lnotab format -------------------------------- - -prior to 3.10 code objects stored a field named co_lnotab. -This was an array of unsigned bytes disguised as a Python bytes object. - -The old co_lnotab did not account for the presence of bytecodes without a line number, -nor was it well suited to tracing as a number of workarounds were required. - -The old format can still be accessed via `code.co_lnotab`, which is lazily computed from the new format. - -Below is the description of the old co_lnotab format: - - -The array is conceptually a compressed list of - (bytecode offset increment, line number increment) -pairs. The details are important and delicate, best illustrated by example: - - byte code offset source code line number - 0 1 - 6 2 - 50 7 - 350 207 - 361 208 - -Instead of storing these numbers literally, we compress the list by storing only -the difference from one row to the next. Conceptually, the stored list might -look like: - - 0, 1, 6, 1, 44, 5, 300, 200, 11, 1 - -The above doesn't really work, but it's a start. An unsigned byte (byte code -offset) can't hold negative values, or values larger than 255, a signed byte -(line number) can't hold values larger than 127 or less than -128, and the -above example contains two such values. (Note that before 3.6, line number -was also encoded by an unsigned byte.) So we make two tweaks: - - (a) there's a deep assumption that byte code offsets increase monotonically, - and - (b) if byte code offset jumps by more than 255 from one row to the next, or if - source code line number jumps by more than 127 or less than -128 from one row - to the next, more than one pair is written to the table. In case #b, - there's no way to know from looking at the table later how many were written. - That's the delicate part. A user of co_lnotab desiring to find the source - line number corresponding to a bytecode address A should do something like - this: - - lineno = addr = 0 - for addr_incr, line_incr in co_lnotab: - addr += addr_incr - if addr > A: - return lineno - if line_incr >= 0x80: - line_incr -= 0x100 - lineno += line_incr - -(In C, this is implemented by PyCode_Addr2Line().) In order for this to work, -when the addr field increments by more than 255, the line # increment in each -pair generated must be 0 until the remaining addr increment is < 256. So, in -the example above, assemble_lnotab in compile.c should not (as was actually done -until 2.2) expand 300, 200 to - 255, 255, 45, 45, -but to - 255, 0, 45, 127, 0, 73. - -The above is sufficient to reconstruct line numbers for tracebacks, but not for -line tracing. Tracing is handled by PyCode_CheckLineNumber() in codeobject.c -and maybe_call_line_trace() in ceval.c. - -*** Tracing *** - -To a first approximation, we want to call the tracing function when the line -number of the current instruction changes. Re-computing the current line for -every instruction is a little slow, though, so each time we compute the line -number we save the bytecode indices where it's valid: - - *instr_lb <= frame->f_lasti < *instr_ub - -is true so long as execution does not change lines. That is, *instr_lb holds -the first bytecode index of the current line, and *instr_ub holds the first -bytecode index of the next line. As long as the above expression is true, -maybe_call_line_trace() does not need to call PyCode_CheckLineNumber(). Note -that the same line may appear multiple times in the lnotab, either because the -bytecode jumped more than 255 indices between line number changes or because -the compiler inserted the same line twice. Even in that case, *instr_ub holds -the first index of the next line. - -However, we don't *always* want to call the line trace function when the above -test fails. - -Consider this code: - -1: def f(a): -2: while a: -3: print(1) -4: break -5: else: -6: print(2) - -which compiles to this: - - 2 0 SETUP_LOOP 26 (to 28) - >> 2 LOAD_FAST 0 (a) - 4 POP_JUMP_IF_FALSE 18 - - 3 6 LOAD_GLOBAL 0 (print) - 8 LOAD_CONST 1 (1) - 10 CALL_NO_KW 1 - 12 POP_TOP - - 4 14 BREAK_LOOP - 16 JUMP_ABSOLUTE 2 - >> 18 POP_BLOCK - - 6 20 LOAD_GLOBAL 0 (print) - 22 LOAD_CONST 2 (2) - 24 CALL_NO_KW 1 - 26 POP_TOP - >> 28 LOAD_CONST 0 (None) - 30 RETURN_VALUE - -If 'a' is false, execution will jump to the POP_BLOCK instruction at offset 18 -and the co_lnotab will claim that execution has moved to line 4, which is wrong. -In this case, we could instead associate the POP_BLOCK with line 5, but that -would break jumps around loops without else clauses. - -We fix this by only calling the line trace function for a forward jump if the -co_lnotab indicates we have jumped to the *start* of a line, i.e. if the current -instruction offset matches the offset given for the start of a line by the -co_lnotab. For backward jumps, however, we always call the line trace function, -which lets a debugger stop on every evaluation of a loop guard (which usually -won't be the first opcode in a line). - -Why do we set f_lineno when tracing, and only just before calling the trace -function? Well, consider the code above when 'a' is true. If stepping through -this with 'n' in pdb, you would stop at line 1 with a "call" type event, then -line events on lines 2, 3, and 4, then a "return" type event -- but because the -code for the return actually falls in the range of the "line 6" opcodes, you -would be shown line 6 during this event. This is a change from the behaviour in -2.2 and before, and I've found it confusing in practice. By setting and using -f_lineno when tracing, one can report a line number different from that -suggested by f_lasti on this one occasion where it's desirable. From dd65849ac6570f40b51fc11632841d51689d8eb1 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Tue, 21 Feb 2023 17:02:29 +0300 Subject: [PATCH 2/7] Revert "gh-101865: Remove deprecated `co_lnotab` from code objects as per PEP626" This reverts commit 06a79d93045f67d7a30447a6d38076760637c8df. --- Doc/library/inspect.rst | 4 + Doc/reference/datamodel.rst | 5 +- Doc/whatsnew/3.12.rst | 4 - Lib/inspect.py | 2 + Lib/pydoc_data/topics.py | 13 +- ...-02-13-10-35-52.gh-issue-101865.2kjTxP.rst | 1 - Misc/gdbinit | 20 +- Objects/codeobject.c | 78 ++++++ Objects/lnotab_notes.txt | 225 ++++++++++++++++++ 9 files changed, 340 insertions(+), 12 deletions(-) delete mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-02-13-10-35-52.gh-issue-101865.2kjTxP.rst create mode 100644 Objects/lnotab_notes.txt diff --git a/Doc/library/inspect.rst b/Doc/library/inspect.rst index 311523fb3aa3c8..58b84a35a890e3 100644 --- a/Doc/library/inspect.rst +++ b/Doc/library/inspect.rst @@ -164,6 +164,10 @@ attributes (see :ref:`import-mod-attrs` for module attributes): | | | read more :ref:`here | | | | `| +-----------+-------------------+---------------------------+ +| | co_lnotab | encoded mapping of line | +| | | numbers to bytecode | +| | | indices | ++-----------+-------------------+---------------------------+ | | co_freevars | tuple of names of free | | | | variables (referenced via | | | | a function's closure) | diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 625a394ba68b11..1d2ddf3507aee1 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -962,6 +962,7 @@ Internal types single: co_filename (code object attribute) single: co_firstlineno (code object attribute) single: co_flags (code object attribute) + single: co_lnotab (code object attribute) single: co_name (code object attribute) single: co_names (code object attribute) single: co_nlocals (code object attribute) @@ -988,7 +989,9 @@ Internal types a tuple containing the literals used by the bytecode; :attr:`co_names` is a tuple containing the names used by the bytecode; :attr:`co_filename` is the filename from which the code was compiled; :attr:`co_firstlineno` is - the first line number of the function; :attr:`co_stacksize` is the + the first line number of the function; :attr:`co_lnotab` is a string + encoding the mapping from bytecode offsets to line numbers (for details + see the source code of the interpreter); :attr:`co_stacksize` is the required stack size; :attr:`co_flags` is an integer encoding a number of flags for the interpreter. diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index b907a01f291476..45a5e5062d55b6 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -700,10 +700,6 @@ Removed *context* parameter instead. (Contributed by Victor Stinner in :gh:`94172`.) -* Remove ``co_lnotab`` attribute from code objects per :pep:`626`: - use ``co_lines`` method instead. - (Contributed by Nikita Sobolev in :gh:`101865`.) - Porting to Python 3.12 ====================== diff --git a/Lib/inspect.py b/Lib/inspect.py index 445425bab5a37c..8bb3a375735af6 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -499,6 +499,7 @@ def iscode(object): co_freevars tuple of names of free variables co_posonlyargcount number of positional only arguments co_kwonlyargcount number of keyword only arguments (not including ** arg) + co_lnotab encoded mapping of line numbers to bytecode indices co_name name with which this code object was defined co_names tuple of names other than arguments and function locals co_nlocals number of local variables @@ -1707,6 +1708,7 @@ def getframeinfo(frame, context=1): def getlineno(frame): """Get the line number from a frame object, allowing for optimization.""" + # FrameType.f_lineno is now a descriptor that grovels co_lnotab return frame.f_lineno _FrameInfo = namedtuple('_FrameInfo', ('frame',) + Traceback._fields) diff --git a/Lib/pydoc_data/topics.py b/Lib/pydoc_data/topics.py index 636fac4e4c822c..e7f403d3ffbf12 100644 --- a/Lib/pydoc_data/topics.py +++ b/Lib/pydoc_data/topics.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Autogenerated by Sphinx on Mon Feb 13 10:21:05 2023 +# Autogenerated by Sphinx on Tue Feb 7 13:18:04 2023 topics = {'assert': 'The "assert" statement\n' '**********************\n' '\n' @@ -13704,10 +13704,13 @@ ' bytecode; "co_filename" is the filename from which the code ' 'was\n' ' compiled; "co_firstlineno" is the first line number of the\n' - ' function; "co_stacksize" is the required stack size; ' - '"co_flags"\n' - ' is an integer encoding a number of flags for the ' - 'interpreter.\n' + ' function; "co_lnotab" is a string encoding the mapping from\n' + ' bytecode offsets to line numbers (for details see the source\n' + ' code of the interpreter); "co_stacksize" is the required ' + 'stack\n' + ' size; "co_flags" is an integer encoding a number of flags ' + 'for\n' + ' the interpreter.\n' '\n' ' The following flag bits are defined for "co_flags": bit ' '"0x04"\n' diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-02-13-10-35-52.gh-issue-101865.2kjTxP.rst b/Misc/NEWS.d/next/Core and Builtins/2023-02-13-10-35-52.gh-issue-101865.2kjTxP.rst deleted file mode 100644 index 40ea42803e040e..00000000000000 --- a/Misc/NEWS.d/next/Core and Builtins/2023-02-13-10-35-52.gh-issue-101865.2kjTxP.rst +++ /dev/null @@ -1 +0,0 @@ -Remove deprecated ``co_lnotab`` from code objects as per :pep:`626`. diff --git a/Misc/gdbinit b/Misc/gdbinit index 12230489441e4f..e8f62ba6476423 100644 --- a/Misc/gdbinit +++ b/Misc/gdbinit @@ -54,7 +54,25 @@ end # A rewrite of the Python interpreter's line number calculator in GDB's # command language define lineno - printf "%d", f->f_lineno + set $__continue = 1 + set $__co = f->f_code + set $__lasti = f->f_lasti + set $__sz = ((PyVarObject *)$__co->co_lnotab)->ob_size/2 + set $__p = (unsigned char *)((PyBytesObject *)$__co->co_lnotab)->ob_sval + set $__li = $__co->co_firstlineno + set $__ad = 0 + while ($__sz-1 >= 0 && $__continue) + set $__sz = $__sz - 1 + set $__ad = $__ad + *$__p + set $__p = $__p + 1 + if ($__ad > $__lasti) + set $__continue = 0 + else + set $__li = $__li + *$__p + set $__p = $__p + 1 + end + end + printf "%d", $__li end define pyframev diff --git a/Objects/codeobject.c b/Objects/codeobject.c index add3079a4d7fb9..ab31b6582cdaae 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1097,6 +1097,77 @@ _PyLineTable_NextAddressRange(PyCodeAddressRange *range) return 1; } +static int +emit_pair(PyObject **bytes, int *offset, int a, int b) +{ + Py_ssize_t len = PyBytes_GET_SIZE(*bytes); + if (*offset + 2 >= len) { + if (_PyBytes_Resize(bytes, len * 2) < 0) + return 0; + } + unsigned char *lnotab = (unsigned char *) PyBytes_AS_STRING(*bytes); + lnotab += *offset; + *lnotab++ = a; + *lnotab++ = b; + *offset += 2; + return 1; +} + +static int +emit_delta(PyObject **bytes, int bdelta, int ldelta, int *offset) +{ + while (bdelta > 255) { + if (!emit_pair(bytes, offset, 255, 0)) { + return 0; + } + bdelta -= 255; + } + while (ldelta > 127) { + if (!emit_pair(bytes, offset, bdelta, 127)) { + return 0; + } + bdelta = 0; + ldelta -= 127; + } + while (ldelta < -128) { + if (!emit_pair(bytes, offset, bdelta, -128)) { + return 0; + } + bdelta = 0; + ldelta += 128; + } + return emit_pair(bytes, offset, bdelta, ldelta); +} + +static PyObject * +decode_linetable(PyCodeObject *code) +{ + PyCodeAddressRange bounds; + PyObject *bytes; + int table_offset = 0; + int code_offset = 0; + int line = code->co_firstlineno; + bytes = PyBytes_FromStringAndSize(NULL, 64); + if (bytes == NULL) { + return NULL; + } + _PyCode_InitAddressRange(code, &bounds); + while (_PyLineTable_NextAddressRange(&bounds)) { + if (bounds.opaque.computed_line != line) { + int bdelta = bounds.ar_start - code_offset; + int ldelta = bounds.opaque.computed_line - line; + if (!emit_delta(&bytes, bdelta, ldelta, &table_offset)) { + Py_DECREF(bytes); + return NULL; + } + code_offset = bounds.ar_start; + line = bounds.opaque.computed_line; + } + } + _PyBytes_Resize(&bytes, table_offset); + return bytes; +} + typedef struct { PyObject_HEAD @@ -1810,6 +1881,12 @@ static PyMemberDef code_memberlist[] = { }; +static PyObject * +code_getlnotab(PyCodeObject *code, void *closure) +{ + return decode_linetable(code); +} + static PyObject * code_getvarnames(PyCodeObject *code, void *closure) { @@ -1842,6 +1919,7 @@ code_getcode(PyCodeObject *code, void *closure) } static PyGetSetDef code_getsetlist[] = { + {"co_lnotab", (getter)code_getlnotab, NULL, NULL}, {"_co_code_adaptive", (getter)code_getcodeadaptive, NULL, NULL}, // The following old names are kept for backward compatibility. {"co_varnames", (getter)code_getvarnames, NULL, NULL}, diff --git a/Objects/lnotab_notes.txt b/Objects/lnotab_notes.txt new file mode 100644 index 00000000000000..362b87a86a481f --- /dev/null +++ b/Objects/lnotab_notes.txt @@ -0,0 +1,225 @@ +Description of the internal format of the line number table + +Conceptually, the line number table consists of a sequence of triples: + start-offset (inclusive), end-offset (exclusive), line-number. + +Note that not all byte codes have a line number so we need handle `None` for the line-number. + +However, storing the above sequence directly would be very inefficient as we would need 12 bytes per entry. + +First, note that the end of one entry is the same as the start of the next, so we can overlap entries. +Second, we don't really need arbitrary access to the sequence, so we can store deltas. + +We just need to store (end - start, line delta) pairs. The start offset of the first entry is always zero. + +Third, most deltas are small, so we can use a single byte for each value, as long we allow several entries for the same line. + +Consider the following table + Start End Line + 0 6 1 + 6 50 2 + 50 350 7 + 350 360 No line number + 360 376 8 + 376 380 208 + +Stripping the redundant ends gives: + + End-Start Line-delta + 6 +1 + 44 +1 + 300 +5 + 10 No line number + 16 +1 + 4 +200 + + +Note that the end - start value is always positive. + +Finally, in order to fit into a single byte we need to convert start deltas to the range 0 <= delta <= 254, +and line deltas to the range -127 <= delta <= 127. +A line delta of -128 is used to indicate no line number. +Also note that a delta of zero indicates that there are no bytecodes in the given range, +which means we can use an invalid line number for that range. + +Final form: + + Start delta Line delta + 6 +1 + 44 +1 + 254 +5 + 46 0 + 10 -128 (No line number, treated as a delta of zero) + 16 +1 + 0 +127 (line 135, but the range is empty as no bytecodes are at line 135) + 4 +73 + +Iterating over the table. +------------------------- + +For the `co_lines` attribute we want to emit the full form, omitting the (350, 360, No line number) and empty entries. + +The code is as follows: + +def co_lines(code): + line = code.co_firstlineno + end = 0 + table_iter = iter(code.internal_line_table): + for sdelta, ldelta in table_iter: + if ldelta == 0: # No change to line number, just accumulate changes to end + end += sdelta + continue + start = end + end = start + sdelta + if ldelta == -128: # No valid line number -- skip entry + continue + line += ldelta + if end == start: # Empty range, omit. + continue + yield start, end, line + + + + +The historical co_lnotab format +------------------------------- + +prior to 3.10 code objects stored a field named co_lnotab. +This was an array of unsigned bytes disguised as a Python bytes object. + +The old co_lnotab did not account for the presence of bytecodes without a line number, +nor was it well suited to tracing as a number of workarounds were required. + +The old format can still be accessed via `code.co_lnotab`, which is lazily computed from the new format. + +Below is the description of the old co_lnotab format: + + +The array is conceptually a compressed list of + (bytecode offset increment, line number increment) +pairs. The details are important and delicate, best illustrated by example: + + byte code offset source code line number + 0 1 + 6 2 + 50 7 + 350 207 + 361 208 + +Instead of storing these numbers literally, we compress the list by storing only +the difference from one row to the next. Conceptually, the stored list might +look like: + + 0, 1, 6, 1, 44, 5, 300, 200, 11, 1 + +The above doesn't really work, but it's a start. An unsigned byte (byte code +offset) can't hold negative values, or values larger than 255, a signed byte +(line number) can't hold values larger than 127 or less than -128, and the +above example contains two such values. (Note that before 3.6, line number +was also encoded by an unsigned byte.) So we make two tweaks: + + (a) there's a deep assumption that byte code offsets increase monotonically, + and + (b) if byte code offset jumps by more than 255 from one row to the next, or if + source code line number jumps by more than 127 or less than -128 from one row + to the next, more than one pair is written to the table. In case #b, + there's no way to know from looking at the table later how many were written. + That's the delicate part. A user of co_lnotab desiring to find the source + line number corresponding to a bytecode address A should do something like + this: + + lineno = addr = 0 + for addr_incr, line_incr in co_lnotab: + addr += addr_incr + if addr > A: + return lineno + if line_incr >= 0x80: + line_incr -= 0x100 + lineno += line_incr + +(In C, this is implemented by PyCode_Addr2Line().) In order for this to work, +when the addr field increments by more than 255, the line # increment in each +pair generated must be 0 until the remaining addr increment is < 256. So, in +the example above, assemble_lnotab in compile.c should not (as was actually done +until 2.2) expand 300, 200 to + 255, 255, 45, 45, +but to + 255, 0, 45, 127, 0, 73. + +The above is sufficient to reconstruct line numbers for tracebacks, but not for +line tracing. Tracing is handled by PyCode_CheckLineNumber() in codeobject.c +and maybe_call_line_trace() in ceval.c. + +*** Tracing *** + +To a first approximation, we want to call the tracing function when the line +number of the current instruction changes. Re-computing the current line for +every instruction is a little slow, though, so each time we compute the line +number we save the bytecode indices where it's valid: + + *instr_lb <= frame->f_lasti < *instr_ub + +is true so long as execution does not change lines. That is, *instr_lb holds +the first bytecode index of the current line, and *instr_ub holds the first +bytecode index of the next line. As long as the above expression is true, +maybe_call_line_trace() does not need to call PyCode_CheckLineNumber(). Note +that the same line may appear multiple times in the lnotab, either because the +bytecode jumped more than 255 indices between line number changes or because +the compiler inserted the same line twice. Even in that case, *instr_ub holds +the first index of the next line. + +However, we don't *always* want to call the line trace function when the above +test fails. + +Consider this code: + +1: def f(a): +2: while a: +3: print(1) +4: break +5: else: +6: print(2) + +which compiles to this: + + 2 0 SETUP_LOOP 26 (to 28) + >> 2 LOAD_FAST 0 (a) + 4 POP_JUMP_IF_FALSE 18 + + 3 6 LOAD_GLOBAL 0 (print) + 8 LOAD_CONST 1 (1) + 10 CALL_NO_KW 1 + 12 POP_TOP + + 4 14 BREAK_LOOP + 16 JUMP_ABSOLUTE 2 + >> 18 POP_BLOCK + + 6 20 LOAD_GLOBAL 0 (print) + 22 LOAD_CONST 2 (2) + 24 CALL_NO_KW 1 + 26 POP_TOP + >> 28 LOAD_CONST 0 (None) + 30 RETURN_VALUE + +If 'a' is false, execution will jump to the POP_BLOCK instruction at offset 18 +and the co_lnotab will claim that execution has moved to line 4, which is wrong. +In this case, we could instead associate the POP_BLOCK with line 5, but that +would break jumps around loops without else clauses. + +We fix this by only calling the line trace function for a forward jump if the +co_lnotab indicates we have jumped to the *start* of a line, i.e. if the current +instruction offset matches the offset given for the start of a line by the +co_lnotab. For backward jumps, however, we always call the line trace function, +which lets a debugger stop on every evaluation of a loop guard (which usually +won't be the first opcode in a line). + +Why do we set f_lineno when tracing, and only just before calling the trace +function? Well, consider the code above when 'a' is true. If stepping through +this with 'n' in pdb, you would stop at line 1 with a "call" type event, then +line events on lines 2, 3, and 4, then a "return" type event -- but because the +code for the return actually falls in the range of the "line 6" opcodes, you +would be shown line 6 during this event. This is a change from the behaviour in +2.2 and before, and I've found it confusing in practice. By setting and using +f_lineno when tracing, one can report a line number different from that +suggested by f_lasti on this one occasion where it's desirable. From 1a6b7bb5f7a22e1cdab3925239b2f662cde7de35 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Tue, 21 Feb 2023 17:22:40 +0300 Subject: [PATCH 3/7] Deprecate `co_lnotab` instead of removing it --- Doc/whatsnew/3.12.rst | 6 ++++++ Lib/test/test_code.py | 7 +++++++ .../2023-02-21-17-22-06.gh-issue-101865.fwrTOA.rst | 2 ++ Objects/codeobject.c | 7 +++++++ 4 files changed, 22 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-02-21-17-22-06.gh-issue-101865.fwrTOA.rst diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 45a5e5062d55b6..891c98d8d0a094 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -516,6 +516,12 @@ Pending Removal in Python 3.14 functions that have been deprecated since Python 2 but only gained a proper :exc:`DeprecationWarning` in 3.12. Remove them in 3.14. +* Accessing ``co_lnotab`` was deprecated in :pep:`626` since 3.10 + and was planned to be removed in 3.12 + but it only got a proper :exc:`DeprecationWarning` in 3.12. + Remove it in 3.14. + (Contributed by Nikita Sobolev in :gh:`101866`.) + Pending Removal in Future Versions ---------------------------------- diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index 9c2ac83e1b69e3..06faa35a1d7c80 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -338,6 +338,13 @@ def func(): new_code = code = func.__code__.replace(co_linetable=b'') self.assertEqual(list(new_code.co_lines()), []) + def test_co_lnotab_is_deprecated(self): # TODO: remove in 3.14 + def func(): + pass + + with self.assertWarns(DeprecationWarning): + func.__code__.co_lnotab + def test_invalid_bytecode(self): def foo(): pass diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-02-21-17-22-06.gh-issue-101865.fwrTOA.rst b/Misc/NEWS.d/next/Core and Builtins/2023-02-21-17-22-06.gh-issue-101865.fwrTOA.rst new file mode 100644 index 00000000000000..876cc223a0e727 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-02-21-17-22-06.gh-issue-101865.fwrTOA.rst @@ -0,0 +1,2 @@ +Deprecate ``co_lnotab`` in code objects, schedule it for removal in Python +3.14 diff --git a/Objects/codeobject.c b/Objects/codeobject.c index ab31b6582cdaae..24c316f1442cd5 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1884,6 +1884,13 @@ static PyMemberDef code_memberlist[] = { static PyObject * code_getlnotab(PyCodeObject *code, void *closure) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "co_lnotab is deprecated since Python 3.12 " + "and will be removed in Python 3.14, " + "use co_lines instead.", + 1) < 0) { + return NULL; + } return decode_linetable(code); } From 67a1c98e8d5ceb6e3cb8d2c9994955e224119ebf Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Thu, 23 Feb 2023 00:02:24 +0300 Subject: [PATCH 4/7] Update gdbinit --- Misc/gdbinit | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/Misc/gdbinit b/Misc/gdbinit index e8f62ba6476423..12230489441e4f 100644 --- a/Misc/gdbinit +++ b/Misc/gdbinit @@ -54,25 +54,7 @@ end # A rewrite of the Python interpreter's line number calculator in GDB's # command language define lineno - set $__continue = 1 - set $__co = f->f_code - set $__lasti = f->f_lasti - set $__sz = ((PyVarObject *)$__co->co_lnotab)->ob_size/2 - set $__p = (unsigned char *)((PyBytesObject *)$__co->co_lnotab)->ob_sval - set $__li = $__co->co_firstlineno - set $__ad = 0 - while ($__sz-1 >= 0 && $__continue) - set $__sz = $__sz - 1 - set $__ad = $__ad + *$__p - set $__p = $__p + 1 - if ($__ad > $__lasti) - set $__continue = 0 - else - set $__li = $__li + *$__p - set $__p = $__p + 1 - end - end - printf "%d", $__li + printf "%d", f->f_lineno end define pyframev From 2fc556ec54b53a61b0d348ee13aa322567965fc0 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Sat, 18 Mar 2023 15:17:54 +0300 Subject: [PATCH 5/7] Address review --- Doc/whatsnew/3.12.rst | 2 +- Misc/gdbinit | 20 +++++++++++++++++++- Objects/codeobject.c | 2 +- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 9fe262a7bd6dc2..d2dc768e12873f 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -526,7 +526,7 @@ Pending Removal in Python 3.14 * Accessing ``co_lnotab`` was deprecated in :pep:`626` since 3.10 and was planned to be removed in 3.12 but it only got a proper :exc:`DeprecationWarning` in 3.12. - Remove it in 3.14. + May be removed in 3.14. (Contributed by Nikita Sobolev in :gh:`101866`.) Pending Removal in Future Versions diff --git a/Misc/gdbinit b/Misc/gdbinit index 12230489441e4f..e8f62ba6476423 100644 --- a/Misc/gdbinit +++ b/Misc/gdbinit @@ -54,7 +54,25 @@ end # A rewrite of the Python interpreter's line number calculator in GDB's # command language define lineno - printf "%d", f->f_lineno + set $__continue = 1 + set $__co = f->f_code + set $__lasti = f->f_lasti + set $__sz = ((PyVarObject *)$__co->co_lnotab)->ob_size/2 + set $__p = (unsigned char *)((PyBytesObject *)$__co->co_lnotab)->ob_sval + set $__li = $__co->co_firstlineno + set $__ad = 0 + while ($__sz-1 >= 0 && $__continue) + set $__sz = $__sz - 1 + set $__ad = $__ad + *$__p + set $__p = $__p + 1 + if ($__ad > $__lasti) + set $__continue = 0 + else + set $__li = $__li + *$__p + set $__p = $__p + 1 + end + end + printf "%d", $__li end define pyframev diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 1c040d8da6b0c0..257fd11af1ddb1 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1886,7 +1886,7 @@ code_getlnotab(PyCodeObject *code, void *closure) { if (PyErr_WarnEx(PyExc_DeprecationWarning, "co_lnotab is deprecated since Python 3.12 " - "and will be removed in Python 3.14, " + "and may be removed in Python 3.14, " "use co_lines instead.", 1) < 0) { return NULL; From cba6b24f2c25ff5b15c3c1476d740de7c18b0ee8 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Sun, 19 Mar 2023 15:54:11 +0300 Subject: [PATCH 6/7] Address review --- Doc/reference/datamodel.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index f447bbb1216d52..e77c7bb643d9c6 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -991,7 +991,8 @@ Internal types the filename from which the code was compiled; :attr:`co_firstlineno` is the first line number of the function; :attr:`co_lnotab` is a string encoding the mapping from bytecode offsets to line numbers (for details - see the source code of the interpreter); :attr:`co_stacksize` is the + see the source code of the interpreter, is deprecated since 3.12 + and may be removed in 3.14); :attr:`co_stacksize` is the required stack size; :attr:`co_flags` is an integer encoding a number of flags for the interpreter. From 0966c8c1df364c4d1d0572c9566790d5c6837e2e Mon Sep 17 00:00:00 2001 From: sobolevn Date: Mon, 3 Apr 2023 17:36:32 +0300 Subject: [PATCH 7/7] Address review --- Objects/codeobject.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 77d1911dc19bb4..755d0b85e7cf30 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1922,9 +1922,7 @@ static PyObject * code_getlnotab(PyCodeObject *code, void *closure) { if (PyErr_WarnEx(PyExc_DeprecationWarning, - "co_lnotab is deprecated since Python 3.12 " - "and may be removed in Python 3.14, " - "use co_lines instead.", + "co_lnotab is deprecated, use co_lines instead.", 1) < 0) { return NULL; }