-
-
Notifications
You must be signed in to change notification settings - Fork 30.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
bpo-46659: Update the test on the mbcs codec alias #31168
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1904,7 +1904,10 @@ def test_basics(self): | |
name += "_codec" | ||
elif encoding == "latin_1": | ||
name = "latin_1" | ||
self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-")) | ||
# Skip the mbcs alias on Windows | ||
if name != "mbcs": | ||
self.assertEqual(encoding.replace("_", "-"), | ||
name.replace("_", "-")) | ||
|
||
(b, size) = codecs.getencoder(encoding)(s) | ||
self.assertEqual(size, len(s), "encoding=%r" % encoding) | ||
|
@@ -3188,11 +3191,13 @@ def test_incremental(self): | |
self.assertEqual(decoded, ('abc', 3)) | ||
|
||
def test_mbcs_alias(self): | ||
# Check that looking up our 'default' codepage will return | ||
# mbcs when we don't have a more specific one available | ||
with mock.patch('_winapi.GetACP', return_value=123): | ||
codec = codecs.lookup('cp123') | ||
self.assertEqual(codec.name, 'mbcs') | ||
# On Windows, the encoding name must be the ANSI code page | ||
encoding = locale.getpreferredencoding(False) | ||
self.assertTrue(encoding.startswith('cp'), encoding) | ||
|
||
# The encodings module create a "mbcs" alias to the ANSI code page | ||
codec = codecs.lookup(encoding) | ||
self.assertEqual(codec.name, "mbcs") | ||
Comment on lines
+3198
to
+3200
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was never true before. With 1252 as my ANSI code page, I checked The differences are that "mbcs" maps every byte, whereas our code-page encodings do not map undefined bytes, and the "replace" handler of "mbcs" uses a best-fit mapping (e.g. "α" -> "a") when encoding text, instead of mapping all undefined characters to "?". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This issue is worse than what I expected, I created https://bugs.python.org/issue46668 to discuss it. |
||
|
||
@support.bigmemtest(size=2**31, memuse=7, dry_run=False) | ||
def test_large_input(self, size): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will fail if
PYTHONUTF8
is set in the environment, because it overridesgetpreferredencoding(False)
and_get_locale_encoding()
.