Skip to content

gh-132449: Improve syntax error messages for keywords with typos #132450

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Apr 22, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions Grammar/python.gram
Original file line number Diff line number Diff line change
@@ -94,12 +94,18 @@ func_type[mod_ty]: '(' a=[type_expressions] ')' '->' b=expression NEWLINE* ENDMA
# GENERAL STATEMENTS
# ==================

statements[asdl_stmt_seq*]: a=statement+ { (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a) }
statements[asdl_stmt_seq*]: a=statement+ { _PyPegen_register_stmts(p, (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a)) }

statement[asdl_stmt_seq*]: a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) } | a[asdl_stmt_seq*]=simple_stmts { a }
statement[asdl_stmt_seq*]:
| a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) }
| a[asdl_stmt_seq*]=simple_stmts { a }

single_compound_stmt[asdl_stmt_seq*]:
| a=compound_stmt {
_PyPegen_register_stmts(p, (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a)) }

statement_newline[asdl_stmt_seq*]:
| a=compound_stmt NEWLINE { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) }
| a=single_compound_stmt NEWLINE { a }
| simple_stmts
| NEWLINE { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, CHECK(stmt_ty, _PyAST_Pass(EXTRA))) }
| ENDMARKER { _PyPegen_interactive_exit(p) }
1 change: 1 addition & 0 deletions Include/cpython/pyerrors.h
Original file line number Diff line number Diff line change
@@ -30,6 +30,7 @@ typedef struct {
PyObject *end_offset;
PyObject *text;
PyObject *print_file_and_line;
PyObject *metadata;
} PySyntaxErrorObject;

typedef struct {
15 changes: 7 additions & 8 deletions Lib/codeop.py
Original file line number Diff line number Diff line change
@@ -47,7 +47,7 @@
PyCF_ONLY_AST = 0x400
PyCF_ALLOW_INCOMPLETE_INPUT = 0x4000

def _maybe_compile(compiler, source, filename, symbol):
def _maybe_compile(compiler, source, filename, symbol, flags):
# Check for source consisting of only blank lines and comments.
for line in source.split("\n"):
line = line.strip()
@@ -61,10 +61,10 @@ def _maybe_compile(compiler, source, filename, symbol):
with warnings.catch_warnings():
warnings.simplefilter("ignore", (SyntaxWarning, DeprecationWarning))
try:
compiler(source, filename, symbol)
compiler(source, filename, symbol, flags=flags)
except SyntaxError: # Let other compile() errors propagate.
try:
compiler(source + "\n", filename, symbol)
compiler(source + "\n", filename, symbol, flags=flags)
return None
except _IncompleteInputError as e:
return None
@@ -74,14 +74,13 @@ def _maybe_compile(compiler, source, filename, symbol):

return compiler(source, filename, symbol, incomplete_input=False)

def _compile(source, filename, symbol, incomplete_input=True):
flags = 0
def _compile(source, filename, symbol, incomplete_input=True, *, flags=0):
if incomplete_input:
flags |= PyCF_ALLOW_INCOMPLETE_INPUT
flags |= PyCF_DONT_IMPLY_DEDENT
return compile(source, filename, symbol, flags)

def compile_command(source, filename="<input>", symbol="single"):
def compile_command(source, filename="<input>", symbol="single", flags=0):
r"""Compile a command and determine whether it is incomplete.

Arguments:
@@ -100,7 +99,7 @@ def compile_command(source, filename="<input>", symbol="single"):
syntax error (OverflowError and ValueError can be produced by
malformed literals).
"""
return _maybe_compile(_compile, source, filename, symbol)
return _maybe_compile(_compile, source, filename, symbol, flags)

class Compile:
"""Instances of this class behave much like the built-in compile
@@ -152,4 +151,4 @@ def __call__(self, source, filename="<input>", symbol="single"):
syntax error (OverflowError and ValueError can be produced by
malformed literals).
"""
return _maybe_compile(self.compiler, source, filename, symbol)
return _maybe_compile(self.compiler, source, filename, symbol, flags=self.compiler.flags)
2 changes: 1 addition & 1 deletion Lib/test/test_exceptions.py
Original file line number Diff line number Diff line change
@@ -2462,7 +2462,7 @@ def test_incorrect_constructor(self):
args = ("bad.py", 1, 2)
self.assertRaises(TypeError, SyntaxError, "bad bad", args)

args = ("bad.py", 1, 2, 4, 5, 6, 7)
args = ("bad.py", 1, 2, 4, 5, 6, 7, 8)
self.assertRaises(TypeError, SyntaxError, "bad bad", args)

args = ("bad.py", 1, 2, "abcdefg", 1)
126 changes: 125 additions & 1 deletion Lib/test/test_syntax.py
Original file line number Diff line number Diff line change
@@ -1189,7 +1189,7 @@
>>> with block ad something:
... pass
Traceback (most recent call last):
SyntaxError: invalid syntax
SyntaxError: invalid syntax. Did you mean 'and'?

>>> try
... pass
@@ -1713,6 +1713,130 @@
Traceback (most recent call last):
SyntaxError: expected one or more exception types

Check custom exceptions for keywords with typos

>>> fur a in b:
... pass
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'for'?

>>> for a in b:
... pass
... elso:
... pass
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'else'?

>>> whille True:
... pass
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'while'?

>>> while True:
... pass
... elso:
... pass
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'else'?

>>> iff x > 5:
... pass
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'if'?

>>> if x:
... pass
... elseif y:
... pass
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'elif'?

>>> if x:
... pass
... elif y:
... pass
... elso:
... pass
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'else'?

>>> tyo:
... pass
... except y:
... pass
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'try'?

>>> classe MyClass:
... pass
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'class'?

>>> impor math
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'import'?

>>> form x import y
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'from'?

>>> defn calculate_sum(a, b):
... return a + b
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'def'?

>>> def foo():
... returm result
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'return'?

>>> lamda x: x ** 2
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'lambda'?

>>> def foo():
... yeld i
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'yield'?

>>> def foo():
... globel counter
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'global'?

>>> frum math import sqrt
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'from'?

>>> asynch def fetch_data():
... pass
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'async'?

>>> async def foo():
... awaid fetch_data()
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'await'?

>>> raisee ValueError("Error")
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'raise'?

>>> [
... x for x
... in range(3)
... of x
... ]
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'if'?

>>> [
... 123 fur x
... in range(3)
... if x
... ]
Traceback (most recent call last):
SyntaxError: invalid syntax. Did you mean 'for'?

>>> f(a=23, a=234)
Traceback (most recent call last):
100 changes: 100 additions & 0 deletions Lib/traceback.py
Original file line number Diff line number Diff line change
@@ -6,6 +6,10 @@
import sys
import textwrap
import warnings
import codeop
import keyword
import tokenize
import io
from contextlib import suppress
import _colorize
from _colorize import ANSIColors
@@ -1090,6 +1094,7 @@ def __init__(self, exc_type, exc_value, exc_traceback, *, limit=None,
self.end_offset = exc_value.end_offset
self.msg = exc_value.msg
self._is_syntax_error = True
self._exc_metadata = getattr(exc_value, "_metadata", None)
elif exc_type and issubclass(exc_type, ImportError) and \
getattr(exc_value, "name_from", None) is not None:
wrong_name = getattr(exc_value, "name_from", None)
@@ -1273,6 +1278,98 @@ def format_exception_only(self, *, show_group=False, _depth=0, **kwargs):
for ex in self.exceptions:
yield from ex.format_exception_only(show_group=show_group, _depth=_depth+1, colorize=colorize)

def _find_keyword_typos(self):
assert self._is_syntax_error
try:
import _suggestions
except ImportError:
_suggestions = None

# Only try to find keyword typos if there is no custom message
if self.msg != "invalid syntax" and "Perhaps you forgot a comma" not in self.msg:
return

if not self._exc_metadata:
return

line, offset, source = self._exc_metadata
end_line = int(self.lineno) if self.lineno is not None else 0
lines = None
from_filename = False

if source is None:
if self.filename:
try:
with open(self.filename) as f:
lines = f.read().splitlines()
except Exception:
line, end_line, offset = 0,1,0
else:
from_filename = True
lines = lines if lines is not None else self.text.splitlines()
else:
lines = source.splitlines()

error_code = lines[line -1 if line > 0 else 0:end_line]
error_code[0] = error_code[0][offset:]
error_code = textwrap.dedent('\n'.join(error_code))

# Do not continue if the source is too large
if len(error_code) > 1024:
return

error_lines = error_code.splitlines()
tokens = tokenize.generate_tokens(io.StringIO(error_code).readline)
tokens_left_to_process = 10
import difflib
for token in tokens:
start, end = token.start, token.end
if token.type != tokenize.NAME:
continue
# Only consider NAME tokens on the same line as the error
if from_filename and token.start[0]+line != end_line+1:
continue
wrong_name = token.string
if wrong_name in keyword.kwlist:
continue

# Limit the number of valid tokens to consider to not spend
# to much time in this function
tokens_left_to_process -= 1
if tokens_left_to_process < 0:
break
# Limit the number of possible matches to try
matches = difflib.get_close_matches(wrong_name, keyword.kwlist, n=3)
if not matches and _suggestions is not None:
suggestion = _suggestions._generate_suggestions(keyword.kwlist, wrong_name)
matches = [suggestion] if suggestion is not None else matches
for suggestion in matches:
if not suggestion or suggestion == wrong_name:
continue
# Try to replace the token with the keyword
the_lines = error_lines.copy()
the_line = the_lines[start[0] - 1][:]
chars = list(the_line)
chars[token.start[1]:token.end[1]] = suggestion
the_lines[start[0] - 1] = ''.join(chars)
code = '\n'.join(the_lines)

# Check if it works
try:
codeop.compile_command(code, symbol="exec", flags=codeop.PyCF_ONLY_AST)
except SyntaxError:
continue

# Keep token.line but handle offsets correctly
self.text = token.line
self.offset = token.start[1] + 1
self.end_offset = token.end[1] + 1
self.lineno = start[0]
self.end_lineno = end[0]
self.msg = f"invalid syntax. Did you mean '{suggestion}'?"
return


def _format_syntax_error(self, stype, **kwargs):
"""Format SyntaxError exceptions (internal helper)."""
# Show exactly where the problem was found.
@@ -1299,6 +1396,9 @@ def _format_syntax_error(self, stype, **kwargs):
# text = " foo\n"
# rtext = " foo"
# ltext = "foo"
with suppress(Exception):
self._find_keyword_typos()
text = self.text
rtext = text.rstrip('\n')
ltext = rtext.lstrip(' \n\f')
spaces = len(rtext) - len(ltext)
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Syntax errors that look like misspellings of Python keywords now provide a
helpful fix suggestion for the typo. Contributed by Pablo Galindo Salgado.
Loading