Skip to content

Commit 3529718

Browse files
authored
bpo-42236: os.device_encoding() respects UTF-8 Mode (GH-23119)
On Unix, the os.device_encoding() function now returns 'UTF-8' rather than the device encoding if the Python UTF-8 Mode is enabled.
1 parent 0001a1b commit 3529718

File tree

5 files changed

+52
-17
lines changed

5 files changed

+52
-17
lines changed

Doc/library/os.rst

+8
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ of the UTF-8 encoding:
113113
:ref:`error handler <error-handlers>` being enabled for :data:`sys.stdin`
114114
and :data:`sys.stdout` (:data:`sys.stderr` continues to use
115115
``backslashreplace`` as it does in the default locale-aware mode)
116+
* On Unix, :func:`os.device_encoding` returns ``'UTF-8'``. rather than the
117+
device encoding.
116118

117119
Note that the standard stream settings in UTF-8 mode can be overridden by
118120
:envvar:`PYTHONIOENCODING` (just as they can be in the default locale-aware
@@ -808,6 +810,12 @@ as internal buffering of data.
808810
Return a string describing the encoding of the device associated with *fd*
809811
if it is connected to a terminal; else return :const:`None`.
810812

813+
On Unix, if the :ref:`Python UTF-8 Mode <utf8-mode>` is enabled, return
814+
``'UTF-8'`` rather than the device encoding.
815+
816+
.. versionchanged:: 3.10
817+
On Unix, the function now implements the Python UTF-8 Mode.
818+
811819

812820
.. function:: dup(fd)
813821

Lib/test/test_utf8_mode.py

+27
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
"""
44

55
import locale
6+
import subprocess
67
import sys
78
import textwrap
89
import unittest
910
from test import support
1011
from test.support.script_helper import assert_python_ok, assert_python_failure
12+
from test.support import os_helper
1113

1214

1315
MS_WINDOWS = (sys.platform == 'win32')
@@ -250,6 +252,31 @@ def test_optim_level(self):
250252
out = self.get_output('-X', 'utf8', '-E', '-c', code)
251253
self.assertEqual(out, '1')
252254

255+
@unittest.skipIf(MS_WINDOWS,
256+
"os.device_encoding() doesn't implement "
257+
"the UTF-8 Mode on Windows")
258+
def test_device_encoding(self):
259+
# Use stdout as TTY
260+
if not sys.stdout.isatty():
261+
self.skipTest("sys.stdout is not a TTY")
262+
263+
filename = 'out.txt'
264+
self.addCleanup(os_helper.unlink, filename)
265+
266+
code = (f'import os, sys; fd = sys.stdout.fileno(); '
267+
f'out = open({filename!r}, "w", encoding="utf-8"); '
268+
f'print(os.isatty(fd), os.device_encoding(fd), file=out); '
269+
f'out.close()')
270+
cmd = [sys.executable, '-X', 'utf8', '-c', code]
271+
# The stdout TTY is inherited to the child process
272+
proc = subprocess.run(cmd, text=True)
273+
self.assertEqual(proc.returncode, 0, proc)
274+
275+
# In UTF-8 Mode, device_encoding(fd) returns "UTF-8" if fd is a TTY
276+
with open(filename, encoding="utf8") as fp:
277+
out = fp.read().rstrip()
278+
self.assertEqual(out, 'True UTF-8')
279+
253280

254281
if __name__ == "__main__":
255282
unittest.main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
On Unix, the :func:`os.device_encoding` function now returns ``'UTF-8'`` rather
2+
than the device encoding if the :ref:`Python UTF-8 Mode <utf8-mode>` is
3+
enabled.

Python/fileutils.c

+7-11
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,6 @@ get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
5555
PyObject *
5656
_Py_device_encoding(int fd)
5757
{
58-
#if defined(MS_WINDOWS)
59-
UINT cp;
60-
#endif
6158
int valid;
6259
_Py_BEGIN_SUPPRESS_IPH
6360
valid = isatty(fd);
@@ -66,6 +63,7 @@ _Py_device_encoding(int fd)
6663
Py_RETURN_NONE;
6764

6865
#if defined(MS_WINDOWS)
66+
UINT cp;
6967
if (fd == 0)
7068
cp = GetConsoleCP();
7169
else if (fd == 1 || fd == 2)
@@ -74,16 +72,14 @@ _Py_device_encoding(int fd)
7472
cp = 0;
7573
/* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
7674
has no console */
77-
if (cp != 0)
78-
return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
79-
#elif defined(CODESET)
80-
{
81-
char *codeset = nl_langinfo(CODESET);
82-
if (codeset != NULL && codeset[0] != 0)
83-
return PyUnicode_FromString(codeset);
75+
if (cp == 0) {
76+
Py_RETURN_NONE;
8477
}
78+
79+
return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
80+
#else
81+
return _Py_GetLocaleEncodingObject();
8582
#endif
86-
Py_RETURN_NONE;
8783
}
8884

8985
#if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)

Python/initconfig.c

+7-6
Original file line numberDiff line numberDiff line change
@@ -1515,8 +1515,8 @@ config_init_stdio_encoding(PyConfig *config,
15151515
{
15161516
PyStatus status;
15171517

1518-
/* If Py_SetStandardStreamEncoding() have been called, use these
1519-
parameters. */
1518+
/* If Py_SetStandardStreamEncoding() has been called, use its
1519+
arguments if they are not NULL. */
15201520
if (config->stdio_encoding == NULL && _Py_StandardStreamEncoding != NULL) {
15211521
status = CONFIG_SET_BYTES_STR(config, &config->stdio_encoding,
15221522
_Py_StandardStreamEncoding,
@@ -1535,6 +1535,7 @@ config_init_stdio_encoding(PyConfig *config,
15351535
}
15361536
}
15371537

1538+
// Exit if encoding and errors are defined
15381539
if (config->stdio_encoding != NULL && config->stdio_errors != NULL) {
15391540
return _PyStatus_OK();
15401541
}
@@ -1634,12 +1635,12 @@ config_get_fs_encoding(PyConfig *config, const PyPreConfig *preconfig,
16341635
if (preconfig->utf8_mode) {
16351636
return PyConfig_SetString(config, fs_encoding, L"utf-8");
16361637
}
1637-
else if (_Py_GetForceASCII()) {
1638+
1639+
if (_Py_GetForceASCII()) {
16381640
return PyConfig_SetString(config, fs_encoding, L"ascii");
16391641
}
1640-
else {
1641-
return config_get_locale_encoding(config, preconfig, fs_encoding);
1642-
}
1642+
1643+
return config_get_locale_encoding(config, preconfig, fs_encoding);
16431644
#endif // !MS_WINDOWS
16441645
}
16451646

0 commit comments

Comments
 (0)