Skip to content

Commit bd4c2a3

Browse files
committed
gh-132449: Improve syntax error messages for keywords with typos
Signed-off-by: Pablo Galindo <pablogsal@gmail.com>
1 parent e6ef47a commit bd4c2a3

File tree

10 files changed

+254
-20
lines changed

10 files changed

+254
-20
lines changed

Diff for: Grammar/python.gram

+5-3
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,18 @@ _PyPegen_parse(Parser *p)
8787
# ==============
8888

8989
file[mod_ty]: a=[statements] ENDMARKER { _PyPegen_make_module(p, a) }
90-
interactive[mod_ty]: a=statement_newline { _PyAST_Interactive(a, p->arena) }
90+
interactive[mod_ty]: a=statement_newline { _PyAST_Interactive(_PyPegen_register_stmts(p, a), p->arena) }
9191
eval[mod_ty]: a=expressions NEWLINE* ENDMARKER { _PyAST_Expression(a, p->arena) }
9292
func_type[mod_ty]: '(' a=[type_expressions] ')' '->' b=expression NEWLINE* ENDMARKER { _PyAST_FunctionType(a, b, p->arena) }
9393

9494
# GENERAL STATEMENTS
9595
# ==================
9696

97-
statements[asdl_stmt_seq*]: a=statement+ { (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a) }
97+
statements[asdl_stmt_seq*]: a=statement+ { _PyPegen_register_stmts(p, (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a)) }
9898

99-
statement[asdl_stmt_seq*]: a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) } | a[asdl_stmt_seq*]=simple_stmts { a }
99+
statement[asdl_stmt_seq*]:
100+
| a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) }
101+
| a[asdl_stmt_seq*]=simple_stmts { a }
100102

101103
statement_newline[asdl_stmt_seq*]:
102104
| a=compound_stmt NEWLINE { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) }

Diff for: Include/cpython/pyerrors.h

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ typedef struct {
3030
PyObject *end_offset;
3131
PyObject *text;
3232
PyObject *print_file_and_line;
33+
PyObject *metadata;
3334
} PySyntaxErrorObject;
3435

3536
typedef struct {

Diff for: Lib/codeop.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
PyCF_ONLY_AST = 0x400
4848
PyCF_ALLOW_INCOMPLETE_INPUT = 0x4000
4949

50-
def _maybe_compile(compiler, source, filename, symbol):
50+
def _maybe_compile(compiler, source, filename, symbol, flags=0):
5151
# Check for source consisting of only blank lines and comments.
5252
for line in source.split("\n"):
5353
line = line.strip()
@@ -61,8 +61,8 @@ def _maybe_compile(compiler, source, filename, symbol):
6161
with warnings.catch_warnings():
6262
warnings.simplefilter("ignore", (SyntaxWarning, DeprecationWarning))
6363
try:
64-
compiler(source, filename, symbol)
65-
except SyntaxError: # Let other compile() errors propagate.
64+
compiler(source, filename, symbol, flags)
65+
except SyntaxError as e: # Let other compile() errors propagate.
6666
try:
6767
compiler(source + "\n", filename, symbol)
6868
return None
@@ -72,16 +72,15 @@ def _maybe_compile(compiler, source, filename, symbol):
7272
pass
7373
# fallthrough
7474

75-
return compiler(source, filename, symbol, incomplete_input=False)
75+
return compiler(source, filename, symbol, incomplete_input=False, flags=flags)
7676

77-
def _compile(source, filename, symbol, incomplete_input=True):
78-
flags = 0
77+
def _compile(source, filename, symbol, incomplete_input=True, flags=0):
7978
if incomplete_input:
8079
flags |= PyCF_ALLOW_INCOMPLETE_INPUT
8180
flags |= PyCF_DONT_IMPLY_DEDENT
8281
return compile(source, filename, symbol, flags)
8382

84-
def compile_command(source, filename="<input>", symbol="single"):
83+
def compile_command(source, filename="<input>", symbol="single", flags=0):
8584
r"""Compile a command and determine whether it is incomplete.
8685
8786
Arguments:
@@ -100,7 +99,7 @@ def compile_command(source, filename="<input>", symbol="single"):
10099
syntax error (OverflowError and ValueError can be produced by
101100
malformed literals).
102101
"""
103-
return _maybe_compile(_compile, source, filename, symbol)
102+
return _maybe_compile(_compile, source, filename, symbol, flags)
104103

105104
class Compile:
106105
"""Instances of this class behave much like the built-in compile

Diff for: Lib/test/test_syntax.py

+69-1
Original file line numberDiff line numberDiff line change
@@ -1189,7 +1189,7 @@
11891189
>>> with block ad something:
11901190
... pass
11911191
Traceback (most recent call last):
1192-
SyntaxError: invalid syntax
1192+
SyntaxError: invalid syntax. Did you mean 'and'?
11931193
11941194
>>> try
11951195
... pass
@@ -1713,6 +1713,74 @@
17131713
Traceback (most recent call last):
17141714
SyntaxError: expected one or more exception types
17151715
1716+
Check custom exceptions for keywords with typos
1717+
1718+
>>> fur a in b:
1719+
... pass
1720+
Traceback (most recent call last):
1721+
SyntaxError: invalid syntax. Did you mean 'for'?
1722+
1723+
>>> whille True:
1724+
... pass
1725+
Traceback (most recent call last):
1726+
SyntaxError: invalid syntax. Did you mean 'while'?
1727+
1728+
>>> iff x > 5:
1729+
... pass
1730+
Traceback (most recent call last):
1731+
SyntaxError: invalid syntax. Did you mean 'if'?
1732+
1733+
>>> classe MyClass:
1734+
... pass
1735+
Traceback (most recent call last):
1736+
SyntaxError: invalid syntax. Did you mean 'class'?
1737+
1738+
1739+
>>> impor math
1740+
Traceback (most recent call last):
1741+
SyntaxError: invalid syntax. Did you mean 'import'?
1742+
1743+
>>> defn calculate_sum(a, b):
1744+
... return a + b
1745+
Traceback (most recent call last):
1746+
SyntaxError: invalid syntax. Did you mean 'def'?
1747+
1748+
>>> def foo():
1749+
... returm result
1750+
Traceback (most recent call last):
1751+
SyntaxError: invalid syntax. Did you mean 'return'?
1752+
1753+
>>> lamda x: x ** 2
1754+
Traceback (most recent call last):
1755+
SyntaxError: invalid syntax. Did you mean 'lambda'?
1756+
1757+
>>> def foo():
1758+
... yeld i
1759+
Traceback (most recent call last):
1760+
SyntaxError: invalid syntax. Did you mean 'yield'?
1761+
1762+
>>> def foo():
1763+
... globel counter
1764+
Traceback (most recent call last):
1765+
SyntaxError: invalid syntax. Did you mean 'global'?
1766+
1767+
>>> frum math import sqrt
1768+
Traceback (most recent call last):
1769+
SyntaxError: invalid syntax. Did you mean 'from'?
1770+
1771+
>>> asynch def fetch_data():
1772+
... pass
1773+
Traceback (most recent call last):
1774+
SyntaxError: invalid syntax. Did you mean 'async'?
1775+
1776+
>>> async def foo():
1777+
... awaid fetch_data()
1778+
Traceback (most recent call last):
1779+
SyntaxError: invalid syntax. Did you mean 'await'?
1780+
1781+
>>> raisee ValueError("Error")
1782+
Traceback (most recent call last):
1783+
SyntaxError: invalid syntax. Did you mean 'raise'?
17161784
17171785
>>> f(a=23, a=234)
17181786
Traceback (most recent call last):

Diff for: Lib/traceback.py

+88
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
import sys
77
import textwrap
88
import warnings
9+
import codeop
10+
import keyword
11+
import tokenize
12+
import io
913
from contextlib import suppress
1014
import _colorize
1115
from _colorize import ANSIColors
@@ -1090,6 +1094,7 @@ def __init__(self, exc_type, exc_value, exc_traceback, *, limit=None,
10901094
self.end_offset = exc_value.end_offset
10911095
self.msg = exc_value.msg
10921096
self._is_syntax_error = True
1097+
self._exc_metadata = getattr(exc_value, "_metadata", None)
10931098
elif exc_type and issubclass(exc_type, ImportError) and \
10941099
getattr(exc_value, "name_from", None) is not None:
10951100
wrong_name = getattr(exc_value, "name_from", None)
@@ -1272,6 +1277,86 @@ def format_exception_only(self, *, show_group=False, _depth=0, **kwargs):
12721277
if self.exceptions and show_group:
12731278
for ex in self.exceptions:
12741279
yield from ex.format_exception_only(show_group=show_group, _depth=_depth+1, colorize=colorize)
1280+
1281+
def _find_keyword_typos(self):
1282+
try:
1283+
import _suggestions
1284+
except ImportError:
1285+
return
1286+
1287+
assert self._is_syntax_error
1288+
1289+
# Only try to find keyword typos if there is no custom message
1290+
if self.msg != "invalid syntax":
1291+
return
1292+
1293+
if not self._exc_metadata:
1294+
return
1295+
1296+
line, offset, source = self._exc_metadata
1297+
end_line = int(self.lineno) if self.lineno is not None else 0
1298+
lines = None
1299+
from_filename = False
1300+
1301+
if source is None:
1302+
if self.filename:
1303+
try:
1304+
with open(self.filename) as f:
1305+
lines = f.readlines()
1306+
except Exception:
1307+
line, end_line, offset = 0,1,0
1308+
else:
1309+
from_filename = True
1310+
lines = lines if lines is not None else self.text.splitlines()
1311+
else:
1312+
lines = source.splitlines()
1313+
1314+
error_code = lines[line -1 if line > 0 else 0:end_line]
1315+
error_code[0] = error_code[0][offset:]
1316+
error_code = textwrap.dedent(''.join(error_code))
1317+
1318+
# Do not continue if the source is too large
1319+
if len(error_code) > 1024:
1320+
return
1321+
1322+
tokens = tokenize.generate_tokens(io.StringIO(error_code).readline)
1323+
tokens_left_to_process = 10
1324+
for token in tokens:
1325+
tokens_left_to_process -= 1
1326+
if tokens_left_to_process < 0:
1327+
break
1328+
start, end = token.start, token.end
1329+
if token.type != tokenize.NAME:
1330+
continue
1331+
if from_filename and token.start[0]+line != end_line+1:
1332+
continue
1333+
wrong_name = token.string
1334+
if wrong_name in keyword.kwlist:
1335+
continue
1336+
suggestion = _suggestions._generate_suggestions(keyword.kwlist, wrong_name)
1337+
if not suggestion or suggestion == wrong_name:
1338+
continue
1339+
# Try to replace the token with the keyword
1340+
the_lines = error_code.splitlines()
1341+
the_line = the_lines[start[0] - 1]
1342+
chars = list(the_line)
1343+
chars[token.start[1]:token.end[1]] = suggestion
1344+
the_lines[start[0] - 1] = ''.join(chars)
1345+
code = ''.join(the_lines)
1346+
# Check if it works
1347+
try:
1348+
codeop.compile_command(code, symbol="exec", flags=codeop.PyCF_ONLY_AST)
1349+
except SyntaxError as e:
1350+
continue
1351+
# Keep token.line but handle offsets correctly
1352+
self.text = token.line
1353+
self.offset = token.start[1] + 1
1354+
self.end_offset = token.end[1] + 1
1355+
self.lineno = start[0]
1356+
self.end_lineno = end[0]
1357+
self.msg = f"invalid syntax. Did you mean '{suggestion}'?"
1358+
return
1359+
12751360

12761361
def _format_syntax_error(self, stype, **kwargs):
12771362
"""Format SyntaxError exceptions (internal helper)."""
@@ -1299,6 +1384,9 @@ def _format_syntax_error(self, stype, **kwargs):
12991384
# text = " foo\n"
13001385
# rtext = " foo"
13011386
# ltext = "foo"
1387+
with suppress(Exception):
1388+
self._find_keyword_typos()
1389+
text = self.text
13021390
rtext = text.rstrip('\n')
13031391
ltext = rtext.lstrip(' \n\f')
13041392
spaces = len(rtext) - len(ltext)

Diff for: Objects/exceptions.c

+7-2
Original file line numberDiff line numberDiff line change
@@ -2668,10 +2668,10 @@ SyntaxError_init(PyObject *op, PyObject *args, PyObject *kwds)
26682668

26692669
self->end_lineno = NULL;
26702670
self->end_offset = NULL;
2671-
if (!PyArg_ParseTuple(info, "OOOO|OO",
2671+
if (!PyArg_ParseTuple(info, "OOOO|OOO",
26722672
&self->filename, &self->lineno,
26732673
&self->offset, &self->text,
2674-
&self->end_lineno, &self->end_offset)) {
2674+
&self->end_lineno, &self->end_offset, &self->metadata)) {
26752675
Py_DECREF(info);
26762676
return -1;
26772677
}
@@ -2682,6 +2682,7 @@ SyntaxError_init(PyObject *op, PyObject *args, PyObject *kwds)
26822682
Py_INCREF(self->text);
26832683
Py_XINCREF(self->end_lineno);
26842684
Py_XINCREF(self->end_offset);
2685+
Py_XINCREF(self->metadata);
26852686
Py_DECREF(info);
26862687

26872688
if (self->end_lineno != NULL && self->end_offset == NULL) {
@@ -2704,6 +2705,7 @@ SyntaxError_clear(PyObject *op)
27042705
Py_CLEAR(self->end_offset);
27052706
Py_CLEAR(self->text);
27062707
Py_CLEAR(self->print_file_and_line);
2708+
Py_CLEAR(self->metadata);
27072709
return BaseException_clear(op);
27082710
}
27092711

@@ -2727,6 +2729,7 @@ SyntaxError_traverse(PyObject *op, visitproc visit, void *arg)
27272729
Py_VISIT(self->end_offset);
27282730
Py_VISIT(self->text);
27292731
Py_VISIT(self->print_file_and_line);
2732+
Py_VISIT(self->metadata);
27302733
return BaseException_traverse(op, visit, arg);
27312734
}
27322735

@@ -2822,6 +2825,8 @@ static PyMemberDef SyntaxError_members[] = {
28222825
{"print_file_and_line", _Py_T_OBJECT,
28232826
offsetof(PySyntaxErrorObject, print_file_and_line), 0,
28242827
PyDoc_STR("exception print_file_and_line")},
2828+
{"_metadata", _Py_T_OBJECT, offsetof(PySyntaxErrorObject, metadata), 0,
2829+
PyDoc_STR("exception private metadata")},
28252830
{NULL} /* Sentinel */
28262831
};
28272832

Diff for: Parser/action_helpers.c

+23
Original file line numberDiff line numberDiff line change
@@ -1711,3 +1711,26 @@ _PyPegen_checked_future_import(Parser *p, identifier module, asdl_alias_seq * na
17111711
}
17121712
return _PyAST_ImportFrom(module, names, level, lineno, col_offset, end_lineno, end_col_offset, arena);
17131713
}
1714+
1715+
stmt_ty
1716+
_PyPegen_register_stmt(Parser *p, stmt_ty stmt) {
1717+
p->last_stmt_location.lineno = stmt->lineno;
1718+
p->last_stmt_location.col_offset = stmt->col_offset;
1719+
p->last_stmt_location.end_lineno = stmt->end_lineno;
1720+
p->last_stmt_location.end_col_offset = stmt->end_col_offset;
1721+
return stmt;
1722+
}
1723+
1724+
asdl_stmt_seq*
1725+
_PyPegen_register_stmts(Parser *p, asdl_stmt_seq* stmts) {
1726+
Py_ssize_t len = asdl_seq_LEN(stmts);
1727+
if (len == 0) {
1728+
return stmts;
1729+
}
1730+
stmt_ty last_stmt = asdl_seq_GET(stmts, len - 1);
1731+
p->last_stmt_location.lineno = last_stmt->lineno;
1732+
p->last_stmt_location.col_offset = last_stmt->col_offset;
1733+
p->last_stmt_location.end_lineno = last_stmt->end_lineno;
1734+
p->last_stmt_location.end_col_offset = last_stmt->end_col_offset;
1735+
return stmts;
1736+
}

Diff for: Parser/parser.c

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)