Skip to content

Commit 74c72a2

Browse files
authored
gh-109425: regrtest decodes worker stdout with backslashreplace (#109428)
libregrtest now decodes stdout of test worker processes with the "backslashreplace" error handler to log corrupted stdout, instead of failing with an error and not logging the stdout.
1 parent 68a6f21 commit 74c72a2

File tree

3 files changed

+37
-16
lines changed

3 files changed

+37
-16
lines changed

Lib/test/libregrtest/run_workers.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,12 @@ def create_stdout(self, stack: contextlib.ExitStack) -> TextIO:
218218

219219
# gh-94026: Write stdout+stderr to a tempfile as workaround for
220220
# non-blocking pipes on Emscripten with NodeJS.
221-
stdout_file = tempfile.TemporaryFile('w+', encoding=encoding)
221+
# gh-109425: Use "backslashreplace" error handler: log corrupted
222+
# stdout+stderr, instead of failing with a UnicodeDecodeError and not
223+
# logging stdout+stderr at all.
224+
stdout_file = tempfile.TemporaryFile('w+',
225+
encoding=encoding,
226+
errors='backslashreplace')
222227
stack.enter_context(stdout_file)
223228
return stdout_file
224229

Lib/test/test_regrtest.py

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -421,10 +421,12 @@ def regex_search(self, regex, output):
421421
self.fail("%r not found in %r" % (regex, output))
422422
return match
423423

424-
def check_line(self, output, regex, full=False):
424+
def check_line(self, output, pattern, full=False, regex=True):
425+
if not regex:
426+
pattern = re.escape(pattern)
425427
if full:
426-
regex += '\n'
427-
regex = re.compile(r'^' + regex, re.MULTILINE)
428+
pattern += '\n'
429+
regex = re.compile(r'^' + pattern, re.MULTILINE)
428430
self.assertRegex(output, regex)
429431

430432
def parse_executed_tests(self, output):
@@ -1755,9 +1757,8 @@ def test_leak_tmp_file(self):
17551757
f"files (1): mytmpfile",
17561758
output)
17571759

1758-
def test_mp_decode_error(self):
1759-
# gh-101634: If a worker stdout cannot be decoded, report a failed test
1760-
# and a non-zero exit code.
1760+
def test_worker_decode_error(self):
1761+
# gh-109425: Use "backslashreplace" error handler to decode stdout.
17611762
if sys.platform == 'win32':
17621763
encoding = locale.getencoding()
17631764
else:
@@ -1767,29 +1768,41 @@ def test_mp_decode_error(self):
17671768
if encoding is None:
17681769
self.skipTest("cannot get regrtest worker encoding")
17691770

1770-
nonascii = b"byte:\xa0\xa9\xff\n"
1771+
nonascii = bytes(ch for ch in range(128, 256))
1772+
corrupted_output = b"nonascii:%s\n" % (nonascii,)
1773+
# gh-108989: On Windows, assertion errors are written in UTF-16: when
1774+
# decoded each letter is follow by a NUL character.
1775+
assertion_failed = 'Assertion failed: tstate_is_alive(tstate)\n'
1776+
corrupted_output += assertion_failed.encode('utf-16-le')
17711777
try:
1772-
nonascii.decode(encoding)
1778+
corrupted_output.decode(encoding)
17731779
except UnicodeDecodeError:
17741780
pass
17751781
else:
1776-
self.skipTest(f"{encoding} can decode non-ASCII bytes {nonascii!a}")
1782+
self.skipTest(f"{encoding} can decode non-ASCII bytes")
1783+
1784+
expected_line = corrupted_output.decode(encoding, 'backslashreplace')
17771785

17781786
code = textwrap.dedent(fr"""
17791787
import sys
1788+
import unittest
1789+
1790+
class Tests(unittest.TestCase):
1791+
def test_pass(self):
1792+
pass
1793+
17801794
# bytes which cannot be decoded from UTF-8
1781-
nonascii = {nonascii!a}
1782-
sys.stdout.buffer.write(nonascii)
1795+
corrupted_output = {corrupted_output!a}
1796+
sys.stdout.buffer.write(corrupted_output)
17831797
sys.stdout.buffer.flush()
17841798
""")
17851799
testname = self.create_test(code=code)
17861800

1787-
output = self.run_tests("--fail-env-changed", "-v", "-j1", testname,
1788-
exitcode=EXITCODE_BAD_TEST)
1801+
output = self.run_tests("--fail-env-changed", "-v", "-j1", testname)
17891802
self.check_executed_tests(output, [testname],
1790-
failed=[testname],
17911803
parallel=True,
1792-
stats=0)
1804+
stats=1)
1805+
self.check_line(output, expected_line, regex=False)
17931806

17941807
def test_doctest(self):
17951808
code = textwrap.dedent(r'''
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
libregrtest now decodes stdout of test worker processes with the
2+
"backslashreplace" error handler to log corrupted stdout, instead of failing
3+
with an error and not logging the stdout. Patch by Victor Stinner.

0 commit comments

Comments
 (0)