From 17fd03b1cdd97ce60ff081e0f209668e9a648ef9 Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Sun, 4 Jul 2021 15:08:52 -0400 Subject: [PATCH 1/5] bpo-43950: Print columns in tracebacks (PEP 657) The traceback.c and traceback.py mechanisms now utilize the newly added code.co_positions and PyCode_Addr2Location to print carets on the specific expressions involved in a traceback. Co-authored-by: Pablo Galindo Co-authored-by: Ammar Askar --- Doc/library/traceback.rst | 17 ++- Include/cpython/traceback.h | 2 +- Include/internal/pycore_traceback.h | 21 ++++ Lib/idlelib/idle_test/test_run.py | 6 +- Lib/test/test_cmd_line_script.py | 4 +- Lib/test/test_doctest.py | 1 + Lib/test/test_traceback.py | 176 ++++++++++++++++++++++++---- Lib/test/test_zipimport.py | 5 +- Lib/traceback.py | 89 ++++++++++++-- Parser/pegen.c | 26 +--- Python/_warnings.c | 2 +- Python/traceback.c | 73 ++++++++++-- 12 files changed, 347 insertions(+), 75 deletions(-) diff --git a/Doc/library/traceback.rst b/Doc/library/traceback.rst index bd53bc066becc5..1961b9a435bd35 100644 --- a/Doc/library/traceback.rst +++ b/Doc/library/traceback.rst @@ -447,37 +447,42 @@ The output for the example would look similar to this: *** print_tb: File "", line 10, in lumberjack() + ^^^^^^^^^^^^ *** print_exception: Traceback (most recent call last): File "", line 10, in lumberjack() + ^^^^^^^^^^^^ File "", line 4, in lumberjack bright_side_of_death() + ^^^^^^^^^^^^^^^^^^^^^^ IndexError: tuple index out of range *** print_exc: Traceback (most recent call last): File "", line 10, in lumberjack() + ^^^^^^^^^^^^ File "", line 4, in lumberjack bright_side_of_death() + ^^^^^^^^^^^^^^^^^^^^^^ IndexError: tuple index out of range *** format_exc, first and last line: Traceback (most recent call last): IndexError: tuple index out of range *** format_exception: ['Traceback (most recent call last):\n', - ' File "", line 10, in \n lumberjack()\n', - ' File "", line 4, in lumberjack\n bright_side_of_death()\n', - ' File "", line 7, in bright_side_of_death\n return tuple()[0]\n', + ' File "", line 10, in \n lumberjack()\n ^^^^^^^^^^^^\n', + ' File "", line 4, in lumberjack\n bright_side_of_death()\n ^^^^^^^^^^^^^^^^^^^^^^\n', + ' File "", line 7, in bright_side_of_death\n return tuple()[0]\n ^^^^^^^^^^\n', 'IndexError: tuple index out of range\n'] *** extract_tb: [, line 10 in >, , line 4 in lumberjack>, , line 7 in bright_side_of_death>] *** format_tb: - [' File "", line 10, in \n lumberjack()\n', - ' File "", line 4, in lumberjack\n bright_side_of_death()\n', - ' File "", line 7, in bright_side_of_death\n return tuple()[0]\n'] + [' File "", line 10, in \n lumberjack()\n ^^^^^^^^^^^^\n', + ' File "", line 4, in lumberjack\n bright_side_of_death()\n ^^^^^^^^^^^^^^^^^^^^^^\n', + ' File "", line 7, in bright_side_of_death\n return tuple()[0]\n ^^^^^^^^^^\n'] *** tb_lineno: 10 diff --git a/Include/cpython/traceback.h b/Include/cpython/traceback.h index aac5b42c344d3f..d0dde335cfee5b 100644 --- a/Include/cpython/traceback.h +++ b/Include/cpython/traceback.h @@ -10,5 +10,5 @@ typedef struct _traceback { int tb_lineno; } PyTracebackObject; -PyAPI_FUNC(int) _Py_DisplaySourceLine(PyObject *, PyObject *, int, int); +PyAPI_FUNC(int) _Py_DisplaySourceLine(PyObject *, PyObject *, int, int, int *, PyObject **); PyAPI_FUNC(void) _PyTraceback_Add(const char *, const char *, int); diff --git a/Include/internal/pycore_traceback.h b/Include/internal/pycore_traceback.h index 4d282308769dc8..55e9f072d96cd4 100644 --- a/Include/internal/pycore_traceback.h +++ b/Include/internal/pycore_traceback.h @@ -87,6 +87,27 @@ PyAPI_FUNC(PyObject*) _PyTraceBack_FromFrame( PyObject *tb_next, PyFrameObject *frame); +static inline Py_ssize_t +_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset) +{ + const char *str = PyUnicode_AsUTF8(line); + if (!str) { + return 0; + } + Py_ssize_t len = strlen(str); + if (col_offset > len + 1) { + col_offset = len + 1; + } + assert(col_offset >= 0); + PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace"); + if (!text) { + return 0; + } + Py_ssize_t size = PyUnicode_GET_LENGTH(text); + Py_DECREF(text); + return size; +} + #ifdef __cplusplus } #endif diff --git a/Lib/idlelib/idle_test/test_run.py b/Lib/idlelib/idle_test/test_run.py index ec4637c5ca617a..b289fa7cbd6481 100644 --- a/Lib/idlelib/idle_test/test_run.py +++ b/Lib/idlelib/idle_test/test_run.py @@ -33,9 +33,9 @@ def __eq__(self, other): run.print_exception() tb = output.getvalue().strip().splitlines() - self.assertEqual(11, len(tb)) - self.assertIn('UnhashableException: ex2', tb[3]) - self.assertIn('UnhashableException: ex1', tb[10]) + self.assertEqual(13, len(tb)) + self.assertIn('UnhashableException: ex2', tb[4]) + self.assertIn('UnhashableException: ex1', tb[12]) data = (('1/0', ZeroDivisionError, "division by zero\n"), ('abc', NameError, "name 'abc' is not defined. " diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py index 6ffec918ebbd59..e50c9925799917 100644 --- a/Lib/test/test_cmd_line_script.py +++ b/Lib/test/test_cmd_line_script.py @@ -548,10 +548,10 @@ def test_pep_409_verbiage(self): script_name = _make_test_script(script_dir, 'script', script) exitcode, stdout, stderr = assert_python_failure(script_name) text = stderr.decode('ascii').split('\n') - self.assertEqual(len(text), 5) + self.assertEqual(len(text), 6) self.assertTrue(text[0].startswith('Traceback')) self.assertTrue(text[1].startswith(' File ')) - self.assertTrue(text[3].startswith('NameError')) + self.assertTrue(text[4].startswith('NameError')) def test_non_ascii(self): # Mac OS X denies the creation of a file with an invalid UTF-8 name. diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py index 828a0ff56763a4..06d9d5d4ade83c 100644 --- a/Lib/test/test_doctest.py +++ b/Lib/test/test_doctest.py @@ -2835,6 +2835,7 @@ def test_unicode(): """ exec(compile(example.source, filename, "single", File "", line 1, in raise Exception('clé') + ^^^^^^^^^^^^^^^^^^^^^^ Exception: clé TestResults(failed=1, attempted=1) """ diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 78b2851d384942..54f592a8ea5001 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -17,8 +17,9 @@ test_code = namedtuple('code', ['co_filename', 'co_name']) +test_code.co_positions = lambda _: iter([(6, 6, 0, 0)]) test_frame = namedtuple('frame', ['f_code', 'f_globals', 'f_locals']) -test_tb = namedtuple('tb', ['tb_frame', 'tb_lineno', 'tb_next']) +test_tb = namedtuple('tb', ['tb_frame', 'tb_lineno', 'tb_next', 'tb_lasti']) class TracebackCases(unittest.TestCase): @@ -154,9 +155,9 @@ def do_test(firstlines, message, charset, lineno): self.assertTrue(stdout[2].endswith(err_line), "Invalid traceback line: {0!r} instead of {1!r}".format( stdout[2], err_line)) - self.assertTrue(stdout[3] == err_msg, + self.assertTrue(stdout[4] == err_msg, "Invalid error message: {0!r} instead of {1!r}".format( - stdout[3], err_msg)) + stdout[4], err_msg)) do_test("", "foo", "ascii", 3) for charset in ("ascii", "iso-8859-1", "utf-8", "GBK"): @@ -272,6 +273,114 @@ def test_signatures(self): '(exc, /, value=)') +class TracebackErrorLocationCaretTests(unittest.TestCase): + """ + Tests for printing code error expressions as part of PEP 657 + """ + def get_exception(self, callable): + try: + callable() + self.fail("No exception thrown.") + except: + return traceback.format_exc().splitlines()[:-1] + + callable_line = get_exception.__code__.co_firstlineno + 2 + + def test_basic_caret(self): + def f(): + raise ValueError("basic caret tests") + + lineno_f = f.__code__.co_firstlineno + expected_f = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ^^^^^^^^^^\n' + f' File "{__file__}", line {lineno_f+1}, in f\n' + ' raise ValueError("basic caret tests")\n' + ' ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n' + ) + result_lines = self.get_exception(f) + self.assertEqual(result_lines, expected_f.splitlines()) + + def test_line_with_unicode(self): + # Make sure that even if a line contains multi-byte unicode characters + # the correct carets are printed. + def f_with_unicode(): + raise ValueError("Ĥellö Wörld") + + lineno_f = f_with_unicode.__code__.co_firstlineno + expected_f = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ^^^^^^^^^^\n' + f' File "{__file__}", line {lineno_f+1}, in f_with_unicode\n' + ' raise ValueError("Ĥellö Wörld")\n' + ' ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n' + ) + result_lines = self.get_exception(f_with_unicode) + self.assertEqual(result_lines, expected_f.splitlines()) + + def test_caret_in_type_annotation(self): + def f_with_type(): + def foo(a: THIS_DOES_NOT_EXIST ) -> int: + return 0 + + lineno_f = f_with_type.__code__.co_firstlineno + expected_f = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ^^^^^^^^^^\n' + f' File "{__file__}", line {lineno_f+1}, in f_with_type\n' + ' def foo(a: THIS_DOES_NOT_EXIST ) -> int:\n' + ' ^^^^^^^^^^^^^^^^^^^\n' + ) + result_lines = self.get_exception(f_with_type) + self.assertEqual(result_lines, expected_f.splitlines()) + + def test_caret_multiline_expression(self): + # Make sure no carets are printed for expressions spanning multiple + # lines. + def f_with_multiline(): + raise ValueError( + "error over multiple lines" + ) + + lineno_f = f_with_multiline.__code__.co_firstlineno + expected_f = ( + 'Traceback (most recent call last):\n' + f' File "{__file__}", line {self.callable_line}, in get_exception\n' + ' callable()\n' + ' ^^^^^^^^^^\n' + f' File "{__file__}", line {lineno_f+1}, in f_with_multiline\n' + ' raise ValueError(\n' + ) + result_lines = self.get_exception(f_with_multiline) + self.assertEqual(result_lines, expected_f.splitlines()) + + +@cpython_only +class CPythonTracebackErrorCaretTests(TracebackErrorLocationCaretTests): + """ + Same set of tests as above but with Python's internal traceback printing. + """ + def get_exception(self, callable): + from _testcapi import traceback_print + try: + callable() + self.fail("No exception thrown.") + except: + type_, value, tb = sys.exc_info() + + file_ = StringIO() + traceback_print(tb, file_) + return file_.getvalue().splitlines() + + callable_line = get_exception.__code__.co_firstlineno + 3 + + class TracebackFormatTests(unittest.TestCase): def some_exception(self): @@ -315,9 +424,9 @@ def check_traceback_format(self, cleanup_func=None): # Make sure that the traceback is properly indented. tb_lines = python_fmt.splitlines() - self.assertEqual(len(tb_lines), 5) + self.assertEqual(len(tb_lines), 7) banner = tb_lines[0] - location, source_line = tb_lines[-2:] + location, source_line = tb_lines[-3], tb_lines[-2] self.assertTrue(banner.startswith('Traceback')) self.assertTrue(location.startswith(' File')) self.assertTrue(source_line.startswith(' raise')) @@ -381,12 +490,16 @@ def f(): 'Traceback (most recent call last):\n' f' File "{__file__}", line {lineno_f+5}, in _check_recursive_traceback_display\n' ' f()\n' + ' ^^^\n' f' File "{__file__}", line {lineno_f+1}, in f\n' ' f()\n' + ' ^^^\n' f' File "{__file__}", line {lineno_f+1}, in f\n' ' f()\n' + ' ^^^\n' f' File "{__file__}", line {lineno_f+1}, in f\n' ' f()\n' + ' ^^^\n' # XXX: The following line changes depending on whether the tests # are run through the interactive interpreter or with -m # It also varies depending on the platform (stack size) @@ -427,19 +540,24 @@ def g(count=10): result_g = ( f' File "{__file__}", line {lineno_g+2}, in g\n' ' return g(count-1)\n' + ' ^^^^^^^^^^\n' f' File "{__file__}", line {lineno_g+2}, in g\n' ' return g(count-1)\n' + ' ^^^^^^^^^^\n' f' File "{__file__}", line {lineno_g+2}, in g\n' ' return g(count-1)\n' + ' ^^^^^^^^^^\n' ' [Previous line repeated 7 more times]\n' f' File "{__file__}", line {lineno_g+3}, in g\n' ' raise ValueError\n' + ' ^^^^^^^^^^^^^^^^\n' 'ValueError\n' ) tb_line = ( 'Traceback (most recent call last):\n' f' File "{__file__}", line {lineno_g+7}, in _check_recursive_traceback_display\n' ' g()\n' + ' ^^^\n' ) expected = (tb_line + result_g).splitlines() actual = stderr_g.getvalue().splitlines() @@ -464,15 +582,20 @@ def h(count=10): 'Traceback (most recent call last):\n' f' File "{__file__}", line {lineno_h+7}, in _check_recursive_traceback_display\n' ' h()\n' + ' ^^^\n' f' File "{__file__}", line {lineno_h+2}, in h\n' ' return h(count-1)\n' + ' ^^^^^^^^^^\n' f' File "{__file__}", line {lineno_h+2}, in h\n' ' return h(count-1)\n' + ' ^^^^^^^^^^\n' f' File "{__file__}", line {lineno_h+2}, in h\n' ' return h(count-1)\n' + ' ^^^^^^^^^^\n' ' [Previous line repeated 7 more times]\n' f' File "{__file__}", line {lineno_h+3}, in h\n' ' g()\n' + ' ^^^\n' ) expected = (result_h + result_g).splitlines() actual = stderr_h.getvalue().splitlines() @@ -489,18 +612,23 @@ def h(count=10): result_g = ( f' File "{__file__}", line {lineno_g+2}, in g\n' ' return g(count-1)\n' + ' ^^^^^^^^^^\n' f' File "{__file__}", line {lineno_g+2}, in g\n' ' return g(count-1)\n' + ' ^^^^^^^^^^\n' f' File "{__file__}", line {lineno_g+2}, in g\n' ' return g(count-1)\n' + ' ^^^^^^^^^^\n' f' File "{__file__}", line {lineno_g+3}, in g\n' ' raise ValueError\n' + ' ^^^^^^^^^^^^^^^^\n' 'ValueError\n' ) tb_line = ( 'Traceback (most recent call last):\n' - f' File "{__file__}", line {lineno_g+71}, in _check_recursive_traceback_display\n' + f' File "{__file__}", line {lineno_g+81}, in _check_recursive_traceback_display\n' ' g(traceback._RECURSIVE_CUTOFF)\n' + ' ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n' ) expected = (tb_line + result_g).splitlines() actual = stderr_g.getvalue().splitlines() @@ -517,19 +645,24 @@ def h(count=10): result_g = ( f' File "{__file__}", line {lineno_g+2}, in g\n' ' return g(count-1)\n' + ' ^^^^^^^^^^\n' f' File "{__file__}", line {lineno_g+2}, in g\n' ' return g(count-1)\n' + ' ^^^^^^^^^^\n' f' File "{__file__}", line {lineno_g+2}, in g\n' ' return g(count-1)\n' + ' ^^^^^^^^^^\n' ' [Previous line repeated 1 more time]\n' f' File "{__file__}", line {lineno_g+3}, in g\n' ' raise ValueError\n' + ' ^^^^^^^^^^^^^^^^\n' 'ValueError\n' ) tb_line = ( 'Traceback (most recent call last):\n' - f' File "{__file__}", line {lineno_g+99}, in _check_recursive_traceback_display\n' + f' File "{__file__}", line {lineno_g+114}, in _check_recursive_traceback_display\n' ' g(traceback._RECURSIVE_CUTOFF + 1)\n' + ' ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n' ) expected = (tb_line + result_g).splitlines() actual = stderr_g.getvalue().splitlines() @@ -580,10 +713,10 @@ def __eq__(self, other): exception_print(exc_val) tb = stderr_f.getvalue().strip().splitlines() - self.assertEqual(11, len(tb)) - self.assertEqual(context_message.strip(), tb[5]) - self.assertIn('UnhashableException: ex2', tb[3]) - self.assertIn('UnhashableException: ex1', tb[10]) + self.assertEqual(13, len(tb)) + self.assertEqual(context_message.strip(), tb[6]) + self.assertIn('UnhashableException: ex2', tb[4]) + self.assertIn('UnhashableException: ex1', tb[12]) cause_message = ( @@ -613,8 +746,8 @@ def zero_div(self): def check_zero_div(self, msg): lines = msg.splitlines() - self.assertTrue(lines[-3].startswith(' File')) - self.assertIn('1/0 # In zero_div', lines[-2]) + self.assertTrue(lines[-4].startswith(' File')) + self.assertIn('1/0 # In zero_div', lines[-3]) self.assertTrue(lines[-1].startswith('ZeroDivisionError'), lines[-1]) def test_simple(self): @@ -623,11 +756,11 @@ def test_simple(self): except ZeroDivisionError as _: e = _ lines = self.get_report(e).splitlines() - self.assertEqual(len(lines), 4) + self.assertEqual(len(lines), 5) self.assertTrue(lines[0].startswith('Traceback')) self.assertTrue(lines[1].startswith(' File')) self.assertIn('1/0 # Marker', lines[2]) - self.assertTrue(lines[3].startswith('ZeroDivisionError')) + self.assertTrue(lines[4].startswith('ZeroDivisionError')) def test_cause(self): def inner_raise(): @@ -666,11 +799,11 @@ def test_context_suppression(self): except ZeroDivisionError as _: e = _ lines = self.get_report(e).splitlines() - self.assertEqual(len(lines), 4) + self.assertEqual(len(lines), 5) self.assertTrue(lines[0].startswith('Traceback')) self.assertTrue(lines[1].startswith(' File')) self.assertIn('ZeroDivisionError from None', lines[2]) - self.assertTrue(lines[3].startswith('ZeroDivisionError')) + self.assertTrue(lines[4].startswith('ZeroDivisionError')) def test_cause_and_context(self): # When both a cause and a context are set, only the cause should be @@ -1362,7 +1495,7 @@ def test_lookup_lines(self): e = Exception("uh oh") c = test_code('/foo.py', 'method') f = test_frame(c, None, None) - tb = test_tb(f, 6, None) + tb = test_tb(f, 6, None, 0) exc = traceback.TracebackException(Exception, e, tb, lookup_lines=False) self.assertEqual(linecache.cache, {}) linecache.updatecache('/foo.py', globals()) @@ -1373,7 +1506,7 @@ def test_locals(self): e = Exception("uh oh") c = test_code('/foo.py', 'method') f = test_frame(c, globals(), {'something': 1, 'other': 'string'}) - tb = test_tb(f, 6, None) + tb = test_tb(f, 6, None, 0) exc = traceback.TracebackException( Exception, e, tb, capture_locals=True) self.assertEqual( @@ -1384,7 +1517,7 @@ def test_no_locals(self): e = Exception("uh oh") c = test_code('/foo.py', 'method') f = test_frame(c, globals(), {'something': 1}) - tb = test_tb(f, 6, None) + tb = test_tb(f, 6, None, 0) exc = traceback.TracebackException(Exception, e, tb) self.assertEqual(exc.stack[0].locals, None) @@ -1405,8 +1538,9 @@ def f(): output = StringIO() exc.print(file=output) self.assertEqual( - output.getvalue().split('\n')[-4:], + output.getvalue().split('\n')[-5:], [' x/0', + ' ^^^', ' x = 12', 'ZeroDivisionError: division by zero', '']) diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py index dfc4a060ec6cc9..861ebe347b3ee9 100644 --- a/Lib/test/test_zipimport.py +++ b/Lib/test/test_zipimport.py @@ -716,7 +716,10 @@ def doTraceback(self, module): s = io.StringIO() print_tb(tb, 1, s) - self.assertTrue(s.getvalue().endswith(raise_src)) + self.assertTrue(s.getvalue().endswith( + ' def do_raise(): raise TypeError\n' + ' ^^^^^^^^^^^^^^^\n' + )) else: raise AssertionError("This ought to be impossible") diff --git a/Lib/traceback.py b/Lib/traceback.py index b4c7641addec77..cf1ba2a1a49ac9 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -69,7 +69,8 @@ def extract_tb(tb, limit=None): trace. The line is a string with leading and trailing whitespace stripped; if the source is not available it is None. """ - return StackSummary.extract(walk_tb(tb), limit=limit) + return StackSummary._extract_from_extended_frame_gen( + _walk_tb_with_full_positions(tb), limit=limit) # # Exception formatting and output. @@ -251,10 +252,12 @@ class FrameSummary: mapping the name to the repr() of the variable. """ - __slots__ = ('filename', 'lineno', 'name', '_line', 'locals') + __slots__ = ('filename', 'lineno', 'end_lineno', 'colno', 'end_colno', + 'name', '_line', 'locals') def __init__(self, filename, lineno, name, *, lookup_line=True, - locals=None, line=None): + locals=None, line=None, + end_lineno=None, colno=None, end_colno=None): """Construct a FrameSummary. :param lookup_line: If True, `linecache` is consulted for the source @@ -271,6 +274,9 @@ def __init__(self, filename, lineno, name, *, lookup_line=True, if lookup_line: self.line self.locals = {k: repr(v) for k, v in locals.items()} if locals else None + self.end_lineno = end_lineno + self.colno = colno + self.end_colno = end_colno def __eq__(self, other): if isinstance(other, FrameSummary): @@ -295,11 +301,17 @@ def __repr__(self): def __len__(self): return 4 + @property + def _original_line(self): + # Returns the line as-is from the source, without modifying whitespace. + self.line + return self._line + @property def line(self): if self._line is None: - self._line = linecache.getline(self.filename, self.lineno).strip() - return self._line + self._line = linecache.getline(self.filename, self.lineno) + return self._line.strip() def walk_stack(f): @@ -309,7 +321,7 @@ def walk_stack(f): current stack is used. Usually used with StackSummary.extract. """ if f is None: - f = sys._getframe().f_back.f_back + f = sys._getframe().f_back.f_back.f_back.f_back while f is not None: yield f, f.f_lineno f = f.f_back @@ -326,6 +338,27 @@ def walk_tb(tb): tb = tb.tb_next +def _walk_tb_with_full_positions(tb): + # Internal version of walk_tb that yields full code positions including + # end line and column information. + while tb is not None: + positions = _get_code_position(tb.tb_frame.f_code, tb.tb_lasti) + # Yield tb_lineno when co_positions does not have a line number to + # maintain behavior with walk_tb. + if positions[0] is None: + yield tb.tb_frame, (tb.tb_lineno, ) + positions[1:] + else: + yield tb.tb_frame, positions + tb = tb.tb_next + + +def _get_code_position(code, instruction_index): + if instruction_index < 0: + return (None, None, None, None) + positions_gen = code.co_positions() + return next(itertools.islice(positions_gen, instruction_index // 2, None)) + + _RECURSIVE_CUTOFF = 3 # Also hardcoded in traceback.c. class StackSummary(list): @@ -345,6 +378,21 @@ def extract(klass, frame_gen, *, limit=None, lookup_lines=True, :param capture_locals: If True, the local variables from each frame will be captured as object representations into the FrameSummary. """ + def extended_frame_gen(): + for f, lineno in frame_gen: + yield f, (lineno, None, None, None) + + return klass._extract_from_extended_frame_gen( + extended_frame_gen(), limit=limit, lookup_lines=lookup_lines, + capture_locals=capture_locals) + + @classmethod + def _extract_from_extended_frame_gen(klass, frame_gen, *, limit=None, + lookup_lines=True, capture_locals=False): + # Same as extract but operates on a frame generator that yields + # (frame, (lineno, end_lineno, colno, end_colno)) in the stack. + # Only lineno is required, the remaining fields can be empty if the + # information is not available. if limit is None: limit = getattr(sys, 'tracebacklimit', None) if limit is not None and limit < 0: @@ -357,7 +405,7 @@ def extract(klass, frame_gen, *, limit=None, lookup_lines=True, result = klass() fnames = set() - for f, lineno in frame_gen: + for f, (lineno, end_lineno, colno, end_colno) in frame_gen: co = f.f_code filename = co.co_filename name = co.co_name @@ -370,7 +418,8 @@ def extract(klass, frame_gen, *, limit=None, lookup_lines=True, else: f_locals = None result.append(FrameSummary( - filename, lineno, name, lookup_line=False, locals=f_locals)) + filename, lineno, name, lookup_line=False, locals=f_locals, + end_lineno=end_lineno, colno=colno, end_colno=end_colno)) for filename in fnames: linecache.checkcache(filename) # If immediate lookup was desired, trigger lookups now. @@ -437,6 +486,17 @@ def format(self): frame.filename, frame.lineno, frame.name)) if frame.line: row.append(' {}\n'.format(frame.line.strip())) + + stripped_characters = len(frame._original_line) - len(frame.line.lstrip()) + if frame.end_lineno == frame.lineno and frame.end_colno != 0: + colno = _byte_offset_to_character_offset(frame._original_line, frame.colno) + end_colno = _byte_offset_to_character_offset(frame._original_line, frame.end_colno) + + row.append(' ') + row.append(' ' * (colno - stripped_characters)) + row.append('^' * (end_colno - colno)) + row.append('\n') + if frame.locals: for name, value in sorted(frame.locals.items()): row.append(' {name} = {value}\n'.format(name=name, value=value)) @@ -450,6 +510,14 @@ def format(self): return result +def _byte_offset_to_character_offset(str, offset): + as_utf8 = str.encode('utf-8') + if offset > len(as_utf8): + offset = len(as_utf8) + + return len(as_utf8[:offset + 1].decode("utf-8")) + + class TracebackException: """An exception ready for rendering. @@ -491,8 +559,9 @@ def __init__(self, exc_type, exc_value, exc_traceback, *, limit=None, _seen.add(id(exc_value)) # TODO: locals. - self.stack = StackSummary.extract( - walk_tb(exc_traceback), limit=limit, lookup_lines=lookup_lines, + self.stack = StackSummary._extract_from_extended_frame_gen( + _walk_tb_with_full_positions(exc_traceback), + limit=limit, lookup_lines=lookup_lines, capture_locals=capture_locals) self.exc_type = exc_type # Capture now to permit freeing resources: only complication is in the diff --git a/Parser/pegen.c b/Parser/pegen.c index 3472d489e067d3..c79ee648d96e43 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -1,5 +1,6 @@ #include #include "pycore_ast.h" // _PyAST_Validate(), +#include "pycore_traceback.h" // _byte_offset_to_character_offset(), #include #include "tokenizer.h" @@ -139,27 +140,6 @@ _create_dummy_identifier(Parser *p) return _PyPegen_new_identifier(p, ""); } -static inline Py_ssize_t -byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset) -{ - const char *str = PyUnicode_AsUTF8(line); - if (!str) { - return 0; - } - Py_ssize_t len = strlen(str); - if (col_offset > len + 1) { - col_offset = len + 1; - } - assert(col_offset >= 0); - PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace"); - if (!text) { - return 0; - } - Py_ssize_t size = PyUnicode_GET_LENGTH(text); - Py_DECREF(text); - return size; -} - const char * _PyPegen_get_expr_name(expr_ty e) { @@ -498,9 +478,9 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, Py_ssize_t end_col_number = end_col_offset; if (p->tok->encoding != NULL) { - col_number = byte_offset_to_character_offset(error_line, col_offset); + col_number = _byte_offset_to_character_offset(error_line, col_offset); end_col_number = end_col_number > 0 ? - byte_offset_to_character_offset(error_line, end_col_offset) : + _byte_offset_to_character_offset(error_line, end_col_offset) : end_col_number; } tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number); diff --git a/Python/_warnings.c b/Python/_warnings.c index 9c8815c1a3e204..9f68da208731e7 100644 --- a/Python/_warnings.c +++ b/Python/_warnings.c @@ -544,7 +544,7 @@ show_warning(PyObject *filename, int lineno, PyObject *text, PyFile_WriteString("\n", f_stderr); } else { - _Py_DisplaySourceLine(f_stderr, filename, lineno, 2); + _Py_DisplaySourceLine(f_stderr, filename, lineno, 2, NULL, NULL); } error: diff --git a/Python/traceback.c b/Python/traceback.c index f7dc5ad6864762..a7bb73cff17918 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -3,9 +3,11 @@ #include "Python.h" -#include "code.h" +#include "code.h" // PyCode_Addr2Line etc #include "pycore_interp.h" // PyInterpreterState.gc #include "frameobject.h" // PyFrame_GetBack() +#include "pycore_frame.h" // _PyFrame_GetCode() +#include "pycore_traceback.h" // _byte_offset_to_character_offset() #include "structmember.h" // PyMemberDef #include "osdefs.h" // SEP #ifdef HAVE_FCNTL_H @@ -370,7 +372,7 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject * } int -_Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent) +_Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent, int *truncation, PyObject **line) { int err = 0; int fd; @@ -461,6 +463,11 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent) return err; } + if (line) { + Py_INCREF(lineobj); + *line = lineobj; + } + /* remove the indentation of the line */ kind = PyUnicode_KIND(lineobj); data = PyUnicode_DATA(lineobj); @@ -480,6 +487,10 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent) } } + if (truncation != NULL) { + *truncation = i - indent; + } + /* Write some spaces before the line */ strcpy(buf, " "); assert (strlen(buf) == 10); @@ -501,8 +512,12 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent) return err; } +#define _TRACEBACK_SOURCE_LINE_INDENT 4 + +// TODO: Pick up filename and other stuff from the tb argument static int -tb_displayline(PyObject *f, PyObject *filename, int lineno, PyObject *name) +tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int lineno, + PyFrameObject *frame, PyObject *name) { int err; PyObject *line; @@ -517,9 +532,54 @@ tb_displayline(PyObject *f, PyObject *filename, int lineno, PyObject *name) Py_DECREF(line); if (err != 0) return err; + int truncation = _TRACEBACK_SOURCE_LINE_INDENT; + PyObject* source_line = NULL; /* ignore errors since we can't report them, can we? */ - if (_Py_DisplaySourceLine(f, filename, lineno, 4)) + if (!_Py_DisplaySourceLine(f, filename, lineno, _TRACEBACK_SOURCE_LINE_INDENT, &truncation, &source_line)) { + int code_offset = tb->tb_lasti; + if (PyCode_Addr2Line(_PyFrame_GetCode(frame), code_offset) != _PyCode_Addr2EndLine(_PyFrame_GetCode(frame), code_offset)) { + goto done; + } + + Py_ssize_t start_offset = (Py_ssize_t) _PyCode_Addr2Offset(_PyFrame_GetCode(frame), code_offset); + Py_ssize_t end_offset = (Py_ssize_t) _PyCode_Addr2EndOffset(_PyFrame_GetCode(frame), code_offset); + + if (start_offset < 0 || end_offset < 0) { + goto done; + } + if (end_offset == -1) { + // TODO: highlight from start_offset to the end of line + goto done; + } + // Convert the utf-8 byte offset to the actual character offset so we + // print the right number of carets. We do -1 here because the column + // offsets provided by _PyCode_Addr2Offset and _PyCode_Addr2EndOffset + // are 1-indexed, not 0-indexed. + start_offset = _byte_offset_to_character_offset(source_line, start_offset); + end_offset = _byte_offset_to_character_offset(source_line, end_offset); + + char offset = truncation; + while (++offset <= start_offset) { + err = PyFile_WriteString(" ", f); + if (err < 0) { + goto done; + } + } + while (++offset <= end_offset + 1) { + err = PyFile_WriteString("^", f); + if (err < 0) { + goto done; + } + } + err = PyFile_WriteString("\n", f); + } + + else { PyErr_Clear(); + } + +done: + Py_XDECREF(source_line); return err; } @@ -576,8 +636,8 @@ tb_printinternal(PyTracebackObject *tb, PyObject *f, long limit) } cnt++; if (err == 0 && cnt <= TB_RECURSIVE_CUTOFF) { - err = tb_displayline(f, code->co_filename, tb->tb_lineno, - code->co_name); + err = tb_displayline(tb, f, code->co_filename, tb->tb_lineno, + tb->tb_frame, code->co_name); if (err == 0) { err = PyErr_CheckSignals(); } @@ -926,4 +986,3 @@ _Py_DumpTracebackThreads(int fd, PyInterpreterState *interp, return NULL; } - From 9a317119bffc8b364670c51b81df440adc85b0e7 Mon Sep 17 00:00:00 2001 From: Ammar Askar Date: Sun, 4 Jul 2021 15:42:07 -0400 Subject: [PATCH 2/5] Address feedback from @isidentical --- Python/traceback.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/Python/traceback.c b/Python/traceback.c index a7bb73cff17918..b76b556fbf5efd 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -537,26 +537,31 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen /* ignore errors since we can't report them, can we? */ if (!_Py_DisplaySourceLine(f, filename, lineno, _TRACEBACK_SOURCE_LINE_INDENT, &truncation, &source_line)) { int code_offset = tb->tb_lasti; - if (PyCode_Addr2Line(_PyFrame_GetCode(frame), code_offset) != _PyCode_Addr2EndLine(_PyFrame_GetCode(frame), code_offset)) { + PyCodeObject* code = _PyFrame_GetCode(frame); + + int start_line; + int end_line; + int start_col_byte_offset; + int end_col_byte_offset; + if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset, + &end_line, &end_col_byte_offset)) { + goto done; + } + if (start_line != end_line) { goto done; } - Py_ssize_t start_offset = (Py_ssize_t) _PyCode_Addr2Offset(_PyFrame_GetCode(frame), code_offset); - Py_ssize_t end_offset = (Py_ssize_t) _PyCode_Addr2EndOffset(_PyFrame_GetCode(frame), code_offset); - - if (start_offset < 0 || end_offset < 0) { + if (start_col_byte_offset < 0 || end_col_byte_offset < 0) { goto done; } - if (end_offset == -1) { + if (end_col_byte_offset == -1) { // TODO: highlight from start_offset to the end of line goto done; } // Convert the utf-8 byte offset to the actual character offset so we - // print the right number of carets. We do -1 here because the column - // offsets provided by _PyCode_Addr2Offset and _PyCode_Addr2EndOffset - // are 1-indexed, not 0-indexed. - start_offset = _byte_offset_to_character_offset(source_line, start_offset); - end_offset = _byte_offset_to_character_offset(source_line, end_offset); + // print the right number of carets. + Py_ssize_t start_offset = _byte_offset_to_character_offset(source_line, start_col_byte_offset); + Py_ssize_t end_offset = _byte_offset_to_character_offset(source_line, end_col_byte_offset); char offset = truncation; while (++offset <= start_offset) { @@ -986,3 +991,4 @@ _Py_DumpTracebackThreads(int fd, PyInterpreterState *interp, return NULL; } + From 83f0260ac4b6fe4e005f961f4a529996a11c5202 Mon Sep 17 00:00:00 2001 From: Ammar Askar Date: Sun, 4 Jul 2021 16:09:35 -0400 Subject: [PATCH 3/5] Remove TODO about highlighting to end --- Python/traceback.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Python/traceback.c b/Python/traceback.c index b76b556fbf5efd..7193030491bd84 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -554,10 +554,6 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen if (start_col_byte_offset < 0 || end_col_byte_offset < 0) { goto done; } - if (end_col_byte_offset == -1) { - // TODO: highlight from start_offset to the end of line - goto done; - } // Convert the utf-8 byte offset to the actual character offset so we // print the right number of carets. Py_ssize_t start_offset = _byte_offset_to_character_offset(source_line, start_col_byte_offset); From 1a5a4cfc174602dc2f9439034121cda7dc9624b5 Mon Sep 17 00:00:00 2001 From: Ammar Askar Date: Sun, 4 Jul 2021 16:21:04 -0400 Subject: [PATCH 4/5] Remove stale TODO --- Python/traceback.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/traceback.c b/Python/traceback.c index 7193030491bd84..5c45af810e92f3 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -514,7 +514,6 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent, i #define _TRACEBACK_SOURCE_LINE_INDENT 4 -// TODO: Pick up filename and other stuff from the tb argument static int tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int lineno, PyFrameObject *frame, PyObject *name) @@ -535,7 +534,8 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen int truncation = _TRACEBACK_SOURCE_LINE_INDENT; PyObject* source_line = NULL; /* ignore errors since we can't report them, can we? */ - if (!_Py_DisplaySourceLine(f, filename, lineno, _TRACEBACK_SOURCE_LINE_INDENT, &truncation, &source_line)) { + if (!_Py_DisplaySourceLine(f, filename, lineno, _TRACEBACK_SOURCE_LINE_INDENT, + &truncation, &source_line)) { int code_offset = tb->tb_lasti; PyCodeObject* code = _PyFrame_GetCode(frame); From 58e2f7fc035aec5f777168737b3e0ddbfde985b8 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Sun, 4 Jul 2021 21:19:31 +0100 Subject: [PATCH 5/5] Move byte_offset_to_character_offset to the parser API --- Include/internal/pycore_traceback.h | 21 --------------------- Parser/pegen.c | 26 +++++++++++++++++++++++--- Parser/pegen.h | 1 + Python/traceback.c | 6 +++--- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/Include/internal/pycore_traceback.h b/Include/internal/pycore_traceback.h index 55e9f072d96cd4..4d282308769dc8 100644 --- a/Include/internal/pycore_traceback.h +++ b/Include/internal/pycore_traceback.h @@ -87,27 +87,6 @@ PyAPI_FUNC(PyObject*) _PyTraceBack_FromFrame( PyObject *tb_next, PyFrameObject *frame); -static inline Py_ssize_t -_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset) -{ - const char *str = PyUnicode_AsUTF8(line); - if (!str) { - return 0; - } - Py_ssize_t len = strlen(str); - if (col_offset > len + 1) { - col_offset = len + 1; - } - assert(col_offset >= 0); - PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace"); - if (!text) { - return 0; - } - Py_ssize_t size = PyUnicode_GET_LENGTH(text); - Py_DECREF(text); - return size; -} - #ifdef __cplusplus } #endif diff --git a/Parser/pegen.c b/Parser/pegen.c index c79ee648d96e43..3e8ddfbf53cf75 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -1,6 +1,5 @@ #include #include "pycore_ast.h" // _PyAST_Validate(), -#include "pycore_traceback.h" // _byte_offset_to_character_offset(), #include #include "tokenizer.h" @@ -398,6 +397,27 @@ get_error_line(Parser *p, Py_ssize_t lineno) return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace"); } +Py_ssize_t +_PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset) +{ + const char *str = PyUnicode_AsUTF8(line); + if (!str) { + return 0; + } + Py_ssize_t len = strlen(str); + if (col_offset > len + 1) { + col_offset = len + 1; + } + assert(col_offset >= 0); + PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace"); + if (!text) { + return 0; + } + Py_ssize_t size = PyUnicode_GET_LENGTH(text); + Py_DECREF(text); + return size; +} + void * _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, Py_ssize_t lineno, Py_ssize_t col_offset, @@ -478,9 +498,9 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, Py_ssize_t end_col_number = end_col_offset; if (p->tok->encoding != NULL) { - col_number = _byte_offset_to_character_offset(error_line, col_offset); + col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset); end_col_number = end_col_number > 0 ? - _byte_offset_to_character_offset(error_line, end_col_offset) : + _PyPegen_byte_offset_to_character_offset(error_line, end_col_offset) : end_col_number; } tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number); diff --git a/Parser/pegen.h b/Parser/pegen.h index eac73bba151bca..c09b4a2927562b 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -139,6 +139,7 @@ expr_ty _PyPegen_name_token(Parser *p); expr_ty _PyPegen_number_token(Parser *p); void *_PyPegen_string_token(Parser *p); const char *_PyPegen_get_expr_name(expr_ty); +Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset); void *_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...); void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, Py_ssize_t lineno, Py_ssize_t col_offset, diff --git a/Python/traceback.c b/Python/traceback.c index 5c45af810e92f3..a60f9916424337 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -7,7 +7,7 @@ #include "pycore_interp.h" // PyInterpreterState.gc #include "frameobject.h" // PyFrame_GetBack() #include "pycore_frame.h" // _PyFrame_GetCode() -#include "pycore_traceback.h" // _byte_offset_to_character_offset() +#include "../Parser/pegen.h" // _PyPegen_byte_offset_to_character_offset() #include "structmember.h" // PyMemberDef #include "osdefs.h" // SEP #ifdef HAVE_FCNTL_H @@ -556,8 +556,8 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen } // Convert the utf-8 byte offset to the actual character offset so we // print the right number of carets. - Py_ssize_t start_offset = _byte_offset_to_character_offset(source_line, start_col_byte_offset); - Py_ssize_t end_offset = _byte_offset_to_character_offset(source_line, end_col_byte_offset); + Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset); + Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset); char offset = truncation; while (++offset <= start_offset) {