Skip to content

Commit 110b6fe

Browse files
committed
#27364: Deprecate invalid escape strings in str/byutes.
Patch by Emanuel Barry, reviewed by Serhiy Storchaka and Martin Panter.
1 parent 186122e commit 110b6fe

File tree

7 files changed

+48
-12
lines changed

7 files changed

+48
-12
lines changed

Doc/reference/lexical_analysis.rst

+4
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,10 @@ is more easily recognized as broken.) It is also important to note that the
560560
escape sequences only recognized in string literals fall into the category of
561561
unrecognized escapes for bytes literals.
562562

563+
.. versionchanged:: 3.6
564+
Unrecognized escape sequences produce a DeprecationWarning. In
565+
some future version of Python they will be a SyntaxError.
566+
563567
Even in a raw literal, quotes can be escaped with a backslash, but the
564568
backslash remains in the result; for example, ``r"\""`` is a valid string
565569
literal consisting of two characters: a backslash and a double quote; ``r"\"``

Doc/whatsnew/3.6.rst

+5
Original file line numberDiff line numberDiff line change
@@ -952,6 +952,11 @@ Deprecated features
952952
parameter will be dropped in a future Python release and likely earlier
953953
through third party tools. See :issue:`27919` for details.
954954

955+
* A backslash-character pair that is not a valid escape sequence now generates
956+
a DeprecationWarning. Although this will eventually become a SyntaxError,
957+
that will not be for several Python releases. (Contributed by Emanuel Barry
958+
in :issue:`27364`.)
959+
955960

956961
Deprecated Python behavior
957962
--------------------------

Lib/test/test_codecs.py

+24-11
Original file line numberDiff line numberDiff line change
@@ -1175,7 +1175,7 @@ def test_escape(self):
11751175
check(b"[\\\n]", b"[]")
11761176
check(br'[\"]', b'["]')
11771177
check(br"[\']", b"[']")
1178-
check(br"[\\]", br"[\]")
1178+
check(br"[\\]", b"[\\]")
11791179
check(br"[\a]", b"[\x07]")
11801180
check(br"[\b]", b"[\x08]")
11811181
check(br"[\t]", b"[\x09]")
@@ -1184,20 +1184,25 @@ def test_escape(self):
11841184
check(br"[\f]", b"[\x0c]")
11851185
check(br"[\r]", b"[\x0d]")
11861186
check(br"[\7]", b"[\x07]")
1187-
check(br"[\8]", br"[\8]")
11881187
check(br"[\78]", b"[\x078]")
11891188
check(br"[\41]", b"[!]")
11901189
check(br"[\418]", b"[!8]")
11911190
check(br"[\101]", b"[A]")
11921191
check(br"[\1010]", b"[A0]")
11931192
check(br"[\501]", b"[A]")
11941193
check(br"[\x41]", b"[A]")
1195-
check(br"[\X41]", br"[\X41]")
11961194
check(br"[\x410]", b"[A0]")
1197-
for b in range(256):
1198-
if b not in b'\n"\'\\abtnvfr01234567x':
1199-
b = bytes([b])
1200-
check(b'\\' + b, b'\\' + b)
1195+
for i in range(97, 123):
1196+
b = bytes([i])
1197+
if b not in b'abfnrtvx':
1198+
with self.assertWarns(DeprecationWarning):
1199+
check(b"\\" + b, b"\\" + b)
1200+
with self.assertWarns(DeprecationWarning):
1201+
check(b"\\" + b.upper(), b"\\" + b.upper())
1202+
with self.assertWarns(DeprecationWarning):
1203+
check(br"\8", b"\\8")
1204+
with self.assertWarns(DeprecationWarning):
1205+
check(br"\9", b"\\9")
12011206

12021207
def test_errors(self):
12031208
decode = codecs.escape_decode
@@ -2448,7 +2453,6 @@ def test_escape_decode(self):
24482453
check(br"[\f]", "[\x0c]")
24492454
check(br"[\r]", "[\x0d]")
24502455
check(br"[\7]", "[\x07]")
2451-
check(br"[\8]", r"[\8]")
24522456
check(br"[\78]", "[\x078]")
24532457
check(br"[\41]", "[!]")
24542458
check(br"[\418]", "[!8]")
@@ -2458,9 +2462,18 @@ def test_escape_decode(self):
24582462
check(br"[\x410]", "[A0]")
24592463
check(br"\u20ac", "\u20ac")
24602464
check(br"\U0001d120", "\U0001d120")
2461-
for b in range(256):
2462-
if b not in b'\n"\'\\abtnvfr01234567xuUN':
2463-
check(b'\\' + bytes([b]), '\\' + chr(b))
2465+
for i in range(97, 123):
2466+
b = bytes([i])
2467+
if b not in b'abfnrtuvx':
2468+
with self.assertWarns(DeprecationWarning):
2469+
check(b"\\" + b, "\\" + chr(i))
2470+
if b.upper() not in b'UN':
2471+
with self.assertWarns(DeprecationWarning):
2472+
check(b"\\" + b.upper(), "\\" + chr(i-32))
2473+
with self.assertWarns(DeprecationWarning):
2474+
check(br"\8", "\\8")
2475+
with self.assertWarns(DeprecationWarning):
2476+
check(br"\9", "\\9")
24642477

24652478
def test_decode_errors(self):
24662479
decode = codecs.unicode_escape_decode

Lib/test/test_unicode.py

+7
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import itertools
1111
import operator
1212
import struct
13+
import string
1314
import sys
1415
import unittest
1516
import warnings
@@ -2752,6 +2753,12 @@ def test_free_after_iterating(self):
27522753
support.check_free_after_iterating(self, iter, str)
27532754
support.check_free_after_iterating(self, reversed, str)
27542755

2756+
def test_invalid_sequences(self):
2757+
for letter in string.ascii_letters + "89": # 0-7 are octal escapes
2758+
if letter in "abfnrtuvxNU":
2759+
continue
2760+
with self.assertWarns(DeprecationWarning):
2761+
eval(r"'\%s'" % letter)
27552762

27562763
class StringModuleTest(unittest.TestCase):
27572764
def test_formatter_parser(self):

Misc/NEWS

+3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #27364: A backslash-character pair that is not a valid escape sequence
14+
now generates a DeprecationWarning.
15+
1316
- Issue #27350: `dict` implementation is changed like PyPy. It is more compact
1417
and preserves insertion order.
1518
(Concept developed by Raymond Hettinger and patch by Inada Naoki.)

Objects/bytesobject.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -1207,8 +1207,9 @@ PyObject *PyBytes_DecodeEscape(const char *s,
12071207
break;
12081208

12091209
default:
1210+
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape sequence '\\%c'", *(--s)) < 0)
1211+
goto failed;
12101212
*p++ = '\\';
1211-
s--;
12121213
goto non_esc; /* an arbitrary number of unescaped
12131214
UTF-8 bytes may follow. */
12141215
}

Objects/unicodeobject.c

+3
Original file line numberDiff line numberDiff line change
@@ -6065,6 +6065,9 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
60656065
goto error;
60666066

60676067
default:
6068+
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
6069+
"invalid escape sequence '\\%c'", c) < 0)
6070+
goto onError;
60686071
WRITE_ASCII_CHAR('\\');
60696072
WRITE_CHAR(c);
60706073
continue;

0 commit comments

Comments
 (0)