Skip to content

Commit

Permalink
#27364: Deprecate invalid escape strings in str/byutes.
Browse files Browse the repository at this point in the history
Patch by Emanuel Barry, reviewed by Serhiy Storchaka and Martin Panter.
  • Loading branch information
bitdancer committed Sep 8, 2016
1 parent 186122e commit 110b6fe
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 12 deletions.
4 changes: 4 additions & 0 deletions Doc/reference/lexical_analysis.rst
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,10 @@ is more easily recognized as broken.) It is also important to note that the
escape sequences only recognized in string literals fall into the category of
unrecognized escapes for bytes literals.

.. versionchanged:: 3.6
Unrecognized escape sequences produce a DeprecationWarning. In
some future version of Python they will be a SyntaxError.

Even in a raw literal, quotes can be escaped with a backslash, but the
backslash remains in the result; for example, ``r"\""`` is a valid string
literal consisting of two characters: a backslash and a double quote; ``r"\"``
Expand Down
5 changes: 5 additions & 0 deletions Doc/whatsnew/3.6.rst
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,11 @@ Deprecated features
parameter will be dropped in a future Python release and likely earlier
through third party tools. See :issue:`27919` for details.

* A backslash-character pair that is not a valid escape sequence now generates
a DeprecationWarning. Although this will eventually become a SyntaxError,
that will not be for several Python releases. (Contributed by Emanuel Barry
in :issue:`27364`.)


Deprecated Python behavior
--------------------------
Expand Down
35 changes: 24 additions & 11 deletions Lib/test/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1175,7 +1175,7 @@ def test_escape(self):
check(b"[\\\n]", b"[]")
check(br'[\"]', b'["]')
check(br"[\']", b"[']")
check(br"[\\]", br"[\]")
check(br"[\\]", b"[\\]")
check(br"[\a]", b"[\x07]")
check(br"[\b]", b"[\x08]")
check(br"[\t]", b"[\x09]")
Expand All @@ -1184,20 +1184,25 @@ def test_escape(self):
check(br"[\f]", b"[\x0c]")
check(br"[\r]", b"[\x0d]")
check(br"[\7]", b"[\x07]")
check(br"[\8]", br"[\8]")
check(br"[\78]", b"[\x078]")
check(br"[\41]", b"[!]")
check(br"[\418]", b"[!8]")
check(br"[\101]", b"[A]")
check(br"[\1010]", b"[A0]")
check(br"[\501]", b"[A]")
check(br"[\x41]", b"[A]")
check(br"[\X41]", br"[\X41]")
check(br"[\x410]", b"[A0]")
for b in range(256):
if b not in b'\n"\'\\abtnvfr01234567x':
b = bytes([b])
check(b'\\' + b, b'\\' + b)
for i in range(97, 123):
b = bytes([i])
if b not in b'abfnrtvx':
with self.assertWarns(DeprecationWarning):
check(b"\\" + b, b"\\" + b)
with self.assertWarns(DeprecationWarning):
check(b"\\" + b.upper(), b"\\" + b.upper())
with self.assertWarns(DeprecationWarning):
check(br"\8", b"\\8")
with self.assertWarns(DeprecationWarning):
check(br"\9", b"\\9")

def test_errors(self):
decode = codecs.escape_decode
Expand Down Expand Up @@ -2448,7 +2453,6 @@ def test_escape_decode(self):
check(br"[\f]", "[\x0c]")
check(br"[\r]", "[\x0d]")
check(br"[\7]", "[\x07]")
check(br"[\8]", r"[\8]")
check(br"[\78]", "[\x078]")
check(br"[\41]", "[!]")
check(br"[\418]", "[!8]")
Expand All @@ -2458,9 +2462,18 @@ def test_escape_decode(self):
check(br"[\x410]", "[A0]")
check(br"\u20ac", "\u20ac")
check(br"\U0001d120", "\U0001d120")
for b in range(256):
if b not in b'\n"\'\\abtnvfr01234567xuUN':
check(b'\\' + bytes([b]), '\\' + chr(b))
for i in range(97, 123):
b = bytes([i])
if b not in b'abfnrtuvx':
with self.assertWarns(DeprecationWarning):
check(b"\\" + b, "\\" + chr(i))
if b.upper() not in b'UN':
with self.assertWarns(DeprecationWarning):
check(b"\\" + b.upper(), "\\" + chr(i-32))
with self.assertWarns(DeprecationWarning):
check(br"\8", "\\8")
with self.assertWarns(DeprecationWarning):
check(br"\9", "\\9")

def test_decode_errors(self):
decode = codecs.unicode_escape_decode
Expand Down
7 changes: 7 additions & 0 deletions Lib/test/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import itertools
import operator
import struct
import string
import sys
import unittest
import warnings
Expand Down Expand Up @@ -2752,6 +2753,12 @@ def test_free_after_iterating(self):
support.check_free_after_iterating(self, iter, str)
support.check_free_after_iterating(self, reversed, str)

def test_invalid_sequences(self):
for letter in string.ascii_letters + "89": # 0-7 are octal escapes
if letter in "abfnrtuvxNU":
continue
with self.assertWarns(DeprecationWarning):
eval(r"'\%s'" % letter)

class StringModuleTest(unittest.TestCase):
def test_formatter_parser(self):
Expand Down
3 changes: 3 additions & 0 deletions Misc/NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1
Core and Builtins
-----------------

- Issue #27364: A backslash-character pair that is not a valid escape sequence
now generates a DeprecationWarning.

- Issue #27350: `dict` implementation is changed like PyPy. It is more compact
and preserves insertion order.
(Concept developed by Raymond Hettinger and patch by Inada Naoki.)
Expand Down
3 changes: 2 additions & 1 deletion Objects/bytesobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1207,8 +1207,9 @@ PyObject *PyBytes_DecodeEscape(const char *s,
break;

default:
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape sequence '\\%c'", *(--s)) < 0)
goto failed;
*p++ = '\\';
s--;
goto non_esc; /* an arbitrary number of unescaped
UTF-8 bytes may follow. */
}
Expand Down
3 changes: 3 additions & 0 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -6065,6 +6065,9 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
goto error;

default:
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid escape sequence '\\%c'", c) < 0)
goto onError;
WRITE_ASCII_CHAR('\\');
WRITE_CHAR(c);
continue;
Expand Down

0 comments on commit 110b6fe

Please sign in to comment.