From 45f0bcc81c40711ec5fda7a69696595292e07542 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 8 Sep 2022 11:20:06 +0300 Subject: [PATCH 1/6] gh-94808: Cover `%p` in `PyUnicode_FromFormat` --- Lib/test/test_unicode.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 63bccb72e04646..b28c3be8ed94b4 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2807,6 +2807,20 @@ def check_format(expected, format, *args): check_format('repr=abc', b'repr=%V', 'abc', b'xyz') + # test %p + # We cannot test the exact result, + # because it returns a hex representation of a C pointer, + # which is going to be different each time. But, we can test the format. + p_format1 = PyUnicode_FromFormat(b'%p', 'abc') + self.assertIsInstance(p_format1, str) + self.assertTrue(p_format1.startswith('0x')) + self.assertEqual(len(p_format1), 11) + + p_format2 = PyUnicode_FromFormat(b'repr=%p', b'xyz') + self.assertIsInstance(p_format2, str) + self.assertTrue(p_format2.startswith('repr=0x')) + self.assertEqual(len(p_format2), 11 + 5) + # Test string decode from parameter of %s using utf-8. # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of # '\u4eba\u6c11' From 9faf155ce00f5e599e12894802fe5a62d1f5cbb8 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 8 Sep 2022 12:09:58 +0300 Subject: [PATCH 2/6] Remove `len` assumptions from `%p` --- Lib/test/test_unicode.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index b28c3be8ed94b4..76076af39866a1 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2814,12 +2814,10 @@ def check_format(expected, format, *args): p_format1 = PyUnicode_FromFormat(b'%p', 'abc') self.assertIsInstance(p_format1, str) self.assertTrue(p_format1.startswith('0x')) - self.assertEqual(len(p_format1), 11) - p_format2 = PyUnicode_FromFormat(b'repr=%p', b'xyz') + p_format2 = PyUnicode_FromFormat(b'repr=%p', '123456', b'xyz') self.assertIsInstance(p_format2, str) self.assertTrue(p_format2.startswith('repr=0x')) - self.assertEqual(len(p_format2), 11 + 5) # Test string decode from parameter of %s using utf-8. # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of From ad127bd31cc5a566a3f1b1f0ea6c156630d81e43 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 8 Sep 2022 14:24:21 +0300 Subject: [PATCH 3/6] Use regex --- Lib/test/test_unicode.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 76076af39866a1..0fdddbd9688917 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2811,13 +2811,14 @@ def check_format(expected, format, *args): # We cannot test the exact result, # because it returns a hex representation of a C pointer, # which is going to be different each time. But, we can test the format. + p_format_regex = r'0x[a-zA-Z0-9]{8,}' p_format1 = PyUnicode_FromFormat(b'%p', 'abc') self.assertIsInstance(p_format1, str) - self.assertTrue(p_format1.startswith('0x')) + self.assertRegex(p_format1, p_format_regex) - p_format2 = PyUnicode_FromFormat(b'repr=%p', '123456', b'xyz') + p_format2 = PyUnicode_FromFormat(b'repr=%p', '123456', None, b'xyz') self.assertIsInstance(p_format2, str) - self.assertTrue(p_format2.startswith('repr=0x')) + self.assertRegex(p_format2, p_format_regex) # Test string decode from parameter of %s using utf-8. # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of From 0213fab433a46575676a3860a20cbe5cd837643e Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Fri, 7 Oct 2022 00:30:00 +0300 Subject: [PATCH 4/6] Address review --- Lib/test/test_unicode.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 0fdddbd9688917..24582b183f6718 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2816,9 +2816,15 @@ def check_format(expected, format, *args): self.assertIsInstance(p_format1, str) self.assertRegex(p_format1, p_format_regex) - p_format2 = PyUnicode_FromFormat(b'repr=%p', '123456', None, b'xyz') + p_format2 = PyUnicode_FromFormat(b'%p %p %p', '123456', None, b'xyz') self.assertIsInstance(p_format2, str) - self.assertRegex(p_format2, p_format_regex) + self.assertRegex(p_format2, + r'0x[a-zA-Z0-9]{8,} 0x[a-zA-Z0-9]{1,} 0x[a-zA-Z0-9]{8,}') + + # Extra args are ignored: + p_format3 = PyUnicode_FromFormat(b'%p', '123456', None, b'xyz') + self.assertIsInstance(p_format3, str) + self.assertRegex(p_format3, p_format_regex) # Test string decode from parameter of %s using utf-8. # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of From af4ba720358445df3127e33a21a50e299d2d1d81 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Fri, 7 Oct 2022 01:04:35 +0300 Subject: [PATCH 5/6] Update test_unicode.py --- Lib/test/test_unicode.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 33b1fbd9359a99..c8e2bca46f21ee 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2816,10 +2816,10 @@ def check_format(expected, format, *args): self.assertIsInstance(p_format1, str) self.assertRegex(p_format1, p_format_regex) - p_format2 = PyUnicode_FromFormat(b'%p %p %p', '123456', None, b'xyz') + p_format2 = PyUnicode_FromFormat(b'%p %p', '123456', b'xyz') self.assertIsInstance(p_format2, str) self.assertRegex(p_format2, - r'0x[a-zA-Z0-9]{8,} 0x[a-zA-Z0-9]{1,} 0x[a-zA-Z0-9]{8,}') + r'0x[a-zA-Z0-9]{8,} 0x[a-zA-Z0-9]{8,}') # Extra args are ignored: p_format3 = PyUnicode_FromFormat(b'%p', '123456', None, b'xyz') From cb76e610e39a3ce5fdc0b3f6e46083e7587ced26 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Fri, 7 Oct 2022 08:00:51 -0700 Subject: [PATCH 6/6] Update Lib/test/test_unicode.py --- Lib/test/test_unicode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index c8e2bca46f21ee..b9ee9d30318c46 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2811,7 +2811,7 @@ def check_format(expected, format, *args): # We cannot test the exact result, # because it returns a hex representation of a C pointer, # which is going to be different each time. But, we can test the format. - p_format_regex = r'0x[a-zA-Z0-9]{8,}' + p_format_regex = r'^0x[a-zA-Z0-9]{8,}$' p_format1 = PyUnicode_FromFormat(b'%p', 'abc') self.assertIsInstance(p_format1, str) self.assertRegex(p_format1, p_format_regex)