Skip to content

Commit 3483299

Browse files
gh-81548: Deprecate octal escape sequences with value larger than 0o377 (GH-91668)
1 parent a055dac commit 3483299

File tree

8 files changed

+139
-18
lines changed

8 files changed

+139
-18
lines changed

Doc/reference/lexical_analysis.rst

+5
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,11 @@ Notes:
596596
(1)
597597
As in Standard C, up to three octal digits are accepted.
598598

599+
.. versionchanged:: 3.11
600+
Octal escapes with value larger than ``0o377`` produce a :exc:`DeprecationWarning`.
601+
In a future Python version they will be a :exc:`SyntaxWarning` and
602+
eventually a :exc:`SyntaxError`.
603+
599604
(2)
600605
Unlike in Standard C, exactly two hex digits are required.
601606

Doc/whatsnew/3.11.rst

+6
Original file line numberDiff line numberDiff line change
@@ -1055,6 +1055,12 @@ CPython bytecode changes
10551055
Deprecated
10561056
==========
10571057

1058+
* Octal escapes with value larger than ``0o377`` now produce
1059+
a :exc:`DeprecationWarning`.
1060+
In a future Python version they will be a :exc:`SyntaxWarning` and
1061+
eventually a :exc:`SyntaxError`.
1062+
(Contributed by Serhiy Storchaka in :issue:`81548`.)
1063+
10581064
* The :mod:`lib2to3` package and ``2to3`` tool are now deprecated and may not
10591065
be able to parse Python 3.10 or newer. See the :pep:`617` (New PEG parser for
10601066
CPython). (Contributed by Victor Stinner in :issue:`40360`.)

Lib/test/test_codecs.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1193,7 +1193,6 @@ def test_escape(self):
11931193
check(br"[\418]", b"[!8]")
11941194
check(br"[\101]", b"[A]")
11951195
check(br"[\1010]", b"[A0]")
1196-
check(br"[\501]", b"[A]")
11971196
check(br"[\x41]", b"[A]")
11981197
check(br"[\x410]", b"[A0]")
11991198
for i in range(97, 123):
@@ -1209,6 +1208,9 @@ def test_escape(self):
12091208
check(br"\9", b"\\9")
12101209
with self.assertWarns(DeprecationWarning):
12111210
check(b"\\\xfa", b"\\\xfa")
1211+
for i in range(0o400, 0o1000):
1212+
with self.assertWarns(DeprecationWarning):
1213+
check(rb'\%o' % i, bytes([i & 0o377]))
12121214

12131215
def test_errors(self):
12141216
decode = codecs.escape_decode
@@ -2435,6 +2437,9 @@ def test_escape_decode(self):
24352437
check(br"\9", "\\9")
24362438
with self.assertWarns(DeprecationWarning):
24372439
check(b"\\\xfa", "\\\xfa")
2440+
for i in range(0o400, 0o1000):
2441+
with self.assertWarns(DeprecationWarning):
2442+
check(rb'\%o' % i, chr(i))
24382443

24392444
def test_decode_errors(self):
24402445
decode = codecs.unicode_escape_decode

Lib/test/test_string_literals.py

+53
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ def test_eval_str_invalid_escape(self):
116116
warnings.simplefilter('always', category=DeprecationWarning)
117117
eval("'''\n\\z'''")
118118
self.assertEqual(len(w), 1)
119+
self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
119120
self.assertEqual(w[0].filename, '<string>')
120121
self.assertEqual(w[0].lineno, 1)
121122

@@ -125,6 +126,32 @@ def test_eval_str_invalid_escape(self):
125126
eval("'''\n\\z'''")
126127
exc = cm.exception
127128
self.assertEqual(w, [])
129+
self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
130+
self.assertEqual(exc.filename, '<string>')
131+
self.assertEqual(exc.lineno, 1)
132+
self.assertEqual(exc.offset, 1)
133+
134+
def test_eval_str_invalid_octal_escape(self):
135+
for i in range(0o400, 0o1000):
136+
with self.assertWarns(DeprecationWarning):
137+
self.assertEqual(eval(r"'\%o'" % i), chr(i))
138+
139+
with warnings.catch_warnings(record=True) as w:
140+
warnings.simplefilter('always', category=DeprecationWarning)
141+
eval("'''\n\\407'''")
142+
self.assertEqual(len(w), 1)
143+
self.assertEqual(str(w[0].message),
144+
r"invalid octal escape sequence '\407'")
145+
self.assertEqual(w[0].filename, '<string>')
146+
self.assertEqual(w[0].lineno, 1)
147+
148+
with warnings.catch_warnings(record=True) as w:
149+
warnings.simplefilter('error', category=DeprecationWarning)
150+
with self.assertRaises(SyntaxError) as cm:
151+
eval("'''\n\\407'''")
152+
exc = cm.exception
153+
self.assertEqual(w, [])
154+
self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
128155
self.assertEqual(exc.filename, '<string>')
129156
self.assertEqual(exc.lineno, 1)
130157
self.assertEqual(exc.offset, 1)
@@ -166,6 +193,7 @@ def test_eval_bytes_invalid_escape(self):
166193
warnings.simplefilter('always', category=DeprecationWarning)
167194
eval("b'''\n\\z'''")
168195
self.assertEqual(len(w), 1)
196+
self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
169197
self.assertEqual(w[0].filename, '<string>')
170198
self.assertEqual(w[0].lineno, 1)
171199

@@ -175,6 +203,31 @@ def test_eval_bytes_invalid_escape(self):
175203
eval("b'''\n\\z'''")
176204
exc = cm.exception
177205
self.assertEqual(w, [])
206+
self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
207+
self.assertEqual(exc.filename, '<string>')
208+
self.assertEqual(exc.lineno, 1)
209+
210+
def test_eval_bytes_invalid_octal_escape(self):
211+
for i in range(0o400, 0o1000):
212+
with self.assertWarns(DeprecationWarning):
213+
self.assertEqual(eval(r"b'\%o'" % i), bytes([i & 0o377]))
214+
215+
with warnings.catch_warnings(record=True) as w:
216+
warnings.simplefilter('always', category=DeprecationWarning)
217+
eval("b'''\n\\407'''")
218+
self.assertEqual(len(w), 1)
219+
self.assertEqual(str(w[0].message),
220+
r"invalid octal escape sequence '\407'")
221+
self.assertEqual(w[0].filename, '<string>')
222+
self.assertEqual(w[0].lineno, 1)
223+
224+
with warnings.catch_warnings(record=True) as w:
225+
warnings.simplefilter('error', category=DeprecationWarning)
226+
with self.assertRaises(SyntaxError) as cm:
227+
eval("b'''\n\\407'''")
228+
exc = cm.exception
229+
self.assertEqual(w, [])
230+
self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
178231
self.assertEqual(exc.filename, '<string>')
179232
self.assertEqual(exc.lineno, 1)
180233

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Octal escapes with value larger than ``0o377`` now produce a
2+
:exc:`DeprecationWarning`. In a future Python version they will be a
3+
:exc:`SyntaxWarning` and eventually a :exc:`SyntaxError`.

Objects/bytesobject.c

+24-5
Original file line numberDiff line numberDiff line change
@@ -1113,6 +1113,12 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
11131113
if (s < end && '0' <= *s && *s <= '7')
11141114
c = (c<<3) + *s++ - '0';
11151115
}
1116+
if (c > 0377) {
1117+
if (*first_invalid_escape == NULL) {
1118+
*first_invalid_escape = s-3; /* Back up 3 chars, since we've
1119+
already incremented s. */
1120+
}
1121+
}
11161122
*p++ = c;
11171123
break;
11181124
case 'x':
@@ -1179,11 +1185,24 @@ PyObject *PyBytes_DecodeEscape(const char *s,
11791185
if (result == NULL)
11801186
return NULL;
11811187
if (first_invalid_escape != NULL) {
1182-
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1183-
"invalid escape sequence '\\%c'",
1184-
(unsigned char)*first_invalid_escape) < 0) {
1185-
Py_DECREF(result);
1186-
return NULL;
1188+
unsigned char c = *first_invalid_escape;
1189+
if ('4' <= c && c <= '7') {
1190+
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1191+
"invalid octal escape sequence '\\%.3s'",
1192+
first_invalid_escape) < 0)
1193+
{
1194+
Py_DECREF(result);
1195+
return NULL;
1196+
}
1197+
}
1198+
else {
1199+
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1200+
"invalid escape sequence '\\%c'",
1201+
c) < 0)
1202+
{
1203+
Py_DECREF(result);
1204+
return NULL;
1205+
}
11871206
}
11881207
}
11891208
return result;

Objects/unicodeobject.c

+24-5
Original file line numberDiff line numberDiff line change
@@ -6404,6 +6404,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
64046404
ch = (ch<<3) + *s++ - '0';
64056405
}
64066406
}
6407+
if (ch > 0377) {
6408+
if (*first_invalid_escape == NULL) {
6409+
*first_invalid_escape = s-3; /* Back up 3 chars, since we've
6410+
already incremented s. */
6411+
}
6412+
}
64076413
WRITE_CHAR(ch);
64086414
continue;
64096415

@@ -6554,11 +6560,24 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
65546560
if (result == NULL)
65556561
return NULL;
65566562
if (first_invalid_escape != NULL) {
6557-
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
6558-
"invalid escape sequence '\\%c'",
6559-
(unsigned char)*first_invalid_escape) < 0) {
6560-
Py_DECREF(result);
6561-
return NULL;
6563+
unsigned char c = *first_invalid_escape;
6564+
if ('4' <= c && c <= '7') {
6565+
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
6566+
"invalid octal escape sequence '\\%.3s'",
6567+
first_invalid_escape) < 0)
6568+
{
6569+
Py_DECREF(result);
6570+
return NULL;
6571+
}
6572+
}
6573+
else {
6574+
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
6575+
"invalid escape sequence '\\%c'",
6576+
c) < 0)
6577+
{
6578+
Py_DECREF(result);
6579+
return NULL;
6580+
}
65626581
}
65636582
}
65646583
return result;

Parser/string_parser.c

+18-7
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,15 @@
99
//// STRING HANDLING FUNCTIONS ////
1010

1111
static int
12-
warn_invalid_escape_sequence(Parser *p, unsigned char first_invalid_escape_char, Token *t)
12+
warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t)
1313
{
14+
unsigned char c = *first_invalid_escape;
15+
int octal = ('4' <= c && c <= '7');
1416
PyObject *msg =
15-
PyUnicode_FromFormat("invalid escape sequence '\\%c'", first_invalid_escape_char);
17+
octal
18+
? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'",
19+
first_invalid_escape)
20+
: PyUnicode_FromFormat("invalid escape sequence '\\%c'", c);
1621
if (msg == NULL) {
1722
return -1;
1823
}
@@ -27,7 +32,13 @@ warn_invalid_escape_sequence(Parser *p, unsigned char first_invalid_escape_char,
2732
since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
2833
error location, if p->known_err_token is not set. */
2934
p->known_err_token = t;
30-
RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", first_invalid_escape_char);
35+
if (octal) {
36+
RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'",
37+
first_invalid_escape);
38+
}
39+
else {
40+
RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c);
41+
}
3142
}
3243
Py_DECREF(msg);
3344
return -1;
@@ -118,7 +129,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
118129
v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape);
119130

120131
if (v != NULL && first_invalid_escape != NULL) {
121-
if (warn_invalid_escape_sequence(parser, *first_invalid_escape, t) < 0) {
132+
if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) {
122133
/* We have not decref u before because first_invalid_escape points
123134
inside u. */
124135
Py_XDECREF(u);
@@ -140,7 +151,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
140151
}
141152

142153
if (first_invalid_escape != NULL) {
143-
if (warn_invalid_escape_sequence(p, *first_invalid_escape, t) < 0) {
154+
if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) {
144155
Py_DECREF(result);
145156
return NULL;
146157
}
@@ -357,7 +368,7 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
357368
break;
358369
}
359370
}
360-
371+
361372
if (s == expr_end) {
362373
if (*expr_end == '!' || *expr_end == ':' || *expr_end == '=') {
363374
RAISE_SYNTAX_ERROR("f-string: expression required before '%c'", *expr_end);
@@ -465,7 +476,7 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw,
465476
decode_unicode_with_escapes(). */
466477
continue;
467478
}
468-
if (ch == '{' && warn_invalid_escape_sequence(p, ch, t) < 0) {
479+
if (ch == '{' && warn_invalid_escape_sequence(p, s-1, t) < 0) {
469480
return -1;
470481
}
471482
}

0 commit comments

Comments
 (0)