Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-81548: Deprecate octal sequences with value larger than 0o377 #91668

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Doc/reference/lexical_analysis.rst
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,11 @@ Notes:
(1)
As in Standard C, up to three octal digits are accepted.

.. versionchanged:: 3.11
Octal escapes with value larger than ``0o377`` produce a :exc:`DeprecationWarning`.
In a future Python version they will be a :exc:`SyntaxWarning` and
eventually a :exc:`SyntaxError`.

(2)
Unlike in Standard C, exactly two hex digits are required.

Expand Down
6 changes: 6 additions & 0 deletions Doc/whatsnew/3.11.rst
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,12 @@ CPython bytecode changes
Deprecated
==========

* Octal escapes with value larger than ``0o377`` now produce
a :exc:`DeprecationWarning`.
In a future Python version they will be a :exc:`SyntaxWarning` and
eventually a :exc:`SyntaxError`.
(Contributed by Serhiy Storchaka in :issue:`81548`.)

* The :mod:`lib2to3` package and ``2to3`` tool are now deprecated and may not
be able to parse Python 3.10 or newer. See the :pep:`617` (New PEG parser for
CPython). (Contributed by Victor Stinner in :issue:`40360`.)
Expand Down
7 changes: 6 additions & 1 deletion Lib/test/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1193,7 +1193,6 @@ def test_escape(self):
check(br"[\418]", b"[!8]")
check(br"[\101]", b"[A]")
check(br"[\1010]", b"[A0]")
check(br"[\501]", b"[A]")
check(br"[\x41]", b"[A]")
check(br"[\x410]", b"[A0]")
for i in range(97, 123):
Expand All @@ -1209,6 +1208,9 @@ def test_escape(self):
check(br"\9", b"\\9")
with self.assertWarns(DeprecationWarning):
check(b"\\\xfa", b"\\\xfa")
for i in range(0o400, 0o1000):
with self.assertWarns(DeprecationWarning):
check(rb'\%o' % i, bytes([i & 0o377]))

def test_errors(self):
decode = codecs.escape_decode
Expand Down Expand Up @@ -2435,6 +2437,9 @@ def test_escape_decode(self):
check(br"\9", "\\9")
with self.assertWarns(DeprecationWarning):
check(b"\\\xfa", "\\\xfa")
for i in range(0o400, 0o1000):
with self.assertWarns(DeprecationWarning):
check(rb'\%o' % i, chr(i))

def test_decode_errors(self):
decode = codecs.unicode_escape_decode
Expand Down
53 changes: 53 additions & 0 deletions Lib/test/test_string_literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def test_eval_str_invalid_escape(self):
warnings.simplefilter('always', category=DeprecationWarning)
eval("'''\n\\z'''")
self.assertEqual(len(w), 1)
self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 1)

Expand All @@ -125,6 +126,32 @@ def test_eval_str_invalid_escape(self):
eval("'''\n\\z'''")
exc = cm.exception
self.assertEqual(w, [])
self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
self.assertEqual(exc.filename, '<string>')
self.assertEqual(exc.lineno, 1)
self.assertEqual(exc.offset, 1)

def test_eval_str_invalid_octal_escape(self):
for i in range(0o400, 0o1000):
with self.assertWarns(DeprecationWarning):
self.assertEqual(eval(r"'\%o'" % i), chr(i))

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always', category=DeprecationWarning)
eval("'''\n\\407'''")
self.assertEqual(len(w), 1)
self.assertEqual(str(w[0].message),
r"invalid octal escape sequence '\407'")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 1)

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('error', category=DeprecationWarning)
with self.assertRaises(SyntaxError) as cm:
eval("'''\n\\407'''")
exc = cm.exception
self.assertEqual(w, [])
self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
self.assertEqual(exc.filename, '<string>')
self.assertEqual(exc.lineno, 1)
self.assertEqual(exc.offset, 1)
Expand Down Expand Up @@ -166,6 +193,7 @@ def test_eval_bytes_invalid_escape(self):
warnings.simplefilter('always', category=DeprecationWarning)
eval("b'''\n\\z'''")
self.assertEqual(len(w), 1)
self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 1)

Expand All @@ -175,6 +203,31 @@ def test_eval_bytes_invalid_escape(self):
eval("b'''\n\\z'''")
exc = cm.exception
self.assertEqual(w, [])
self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
self.assertEqual(exc.filename, '<string>')
self.assertEqual(exc.lineno, 1)

def test_eval_bytes_invalid_octal_escape(self):
for i in range(0o400, 0o1000):
with self.assertWarns(DeprecationWarning):
self.assertEqual(eval(r"b'\%o'" % i), bytes([i & 0o377]))

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always', category=DeprecationWarning)
eval("b'''\n\\407'''")
self.assertEqual(len(w), 1)
self.assertEqual(str(w[0].message),
r"invalid octal escape sequence '\407'")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 1)

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('error', category=DeprecationWarning)
with self.assertRaises(SyntaxError) as cm:
eval("b'''\n\\407'''")
exc = cm.exception
self.assertEqual(w, [])
self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
self.assertEqual(exc.filename, '<string>')
self.assertEqual(exc.lineno, 1)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Octal escapes with value larger than ``0o377`` now produce a
:exc:`DeprecationWarning`. In a future Python version they will be a
:exc:`SyntaxWarning` and eventually a :exc:`SyntaxError`.
29 changes: 24 additions & 5 deletions Objects/bytesobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1113,6 +1113,12 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
if (s < end && '0' <= *s && *s <= '7')
c = (c<<3) + *s++ - '0';
}
if (c > 0377) {
if (*first_invalid_escape == NULL) {
*first_invalid_escape = s-3; /* Back up 3 chars, since we've
already incremented s. */
}
}
*p++ = c;
break;
case 'x':
Expand Down Expand Up @@ -1179,11 +1185,24 @@ PyObject *PyBytes_DecodeEscape(const char *s,
if (result == NULL)
return NULL;
if (first_invalid_escape != NULL) {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid escape sequence '\\%c'",
(unsigned char)*first_invalid_escape) < 0) {
Py_DECREF(result);
return NULL;
unsigned char c = *first_invalid_escape;
if ('4' <= c && c <= '7') {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid octal escape sequence '\\%.3s'",
first_invalid_escape) < 0)
{
Py_DECREF(result);
return NULL;
}
}
else {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid escape sequence '\\%c'",
c) < 0)
{
Py_DECREF(result);
return NULL;
}
}
}
return result;
Expand Down
29 changes: 24 additions & 5 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -6403,6 +6403,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
ch = (ch<<3) + *s++ - '0';
}
}
if (ch > 0377) {
if (*first_invalid_escape == NULL) {
*first_invalid_escape = s-3; /* Back up 3 chars, since we've
already incremented s. */
}
}
WRITE_CHAR(ch);
continue;

Expand Down Expand Up @@ -6553,11 +6559,24 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
if (result == NULL)
return NULL;
if (first_invalid_escape != NULL) {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid escape sequence '\\%c'",
(unsigned char)*first_invalid_escape) < 0) {
Py_DECREF(result);
return NULL;
unsigned char c = *first_invalid_escape;
if ('4' <= c && c <= '7') {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid octal escape sequence '\\%.3s'",
first_invalid_escape) < 0)
{
Py_DECREF(result);
return NULL;
}
}
else {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid escape sequence '\\%c'",
c) < 0)
{
Py_DECREF(result);
return NULL;
}
}
}
return result;
Expand Down
25 changes: 18 additions & 7 deletions Parser/string_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,15 @@
//// STRING HANDLING FUNCTIONS ////

static int
warn_invalid_escape_sequence(Parser *p, unsigned char first_invalid_escape_char, Token *t)
warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t)
{
unsigned char c = *first_invalid_escape;
int octal = ('4' <= c && c <= '7');
PyObject *msg =
PyUnicode_FromFormat("invalid escape sequence '\\%c'", first_invalid_escape_char);
octal
? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'",
first_invalid_escape)
: PyUnicode_FromFormat("invalid escape sequence '\\%c'", c);
if (msg == NULL) {
return -1;
}
Expand All @@ -27,7 +32,13 @@ warn_invalid_escape_sequence(Parser *p, unsigned char first_invalid_escape_char,
since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
error location, if p->known_err_token is not set. */
p->known_err_token = t;
RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", first_invalid_escape_char);
if (octal) {
RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'",
first_invalid_escape);
}
else {
RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c);
}
}
Py_DECREF(msg);
return -1;
Expand Down Expand Up @@ -118,7 +129,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape);

if (v != NULL && first_invalid_escape != NULL) {
if (warn_invalid_escape_sequence(parser, *first_invalid_escape, t) < 0) {
if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) {
/* We have not decref u before because first_invalid_escape points
inside u. */
Py_XDECREF(u);
Expand All @@ -140,7 +151,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
}

if (first_invalid_escape != NULL) {
if (warn_invalid_escape_sequence(p, *first_invalid_escape, t) < 0) {
if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) {
Py_DECREF(result);
return NULL;
}
Expand Down Expand Up @@ -357,7 +368,7 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
break;
}
}

if (s == expr_end) {
if (*expr_end == '!' || *expr_end == ':' || *expr_end == '=') {
RAISE_SYNTAX_ERROR("f-string: expression required before '%c'", *expr_end);
Expand Down Expand Up @@ -465,7 +476,7 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw,
decode_unicode_with_escapes(). */
continue;
}
if (ch == '{' && warn_invalid_escape_sequence(p, ch, t) < 0) {
if (ch == '{' && warn_invalid_escape_sequence(p, s-1, t) < 0) {
return -1;
}
}
Expand Down