Skip to content

GH-120754: Add a strace helper and test set of syscalls for open().read() #121143

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Aug 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
a44faf3
Add strace helper for tracing system calls made by python running spe…
cmaloney Jun 28, 2024
a462039
Update test_subprocess to use strace_helper
cmaloney Jun 28, 2024
2ab832d
Add test to FileIO that validates set of syscalls
cmaloney Jun 29, 2024
ef298f2
Move from assert to .assertEqual
cmaloney Jun 29, 2024
283a077
Allow libc to use different fstat variants
cmaloney Jun 29, 2024
3b6c094
Exit early if strace exited non-zero
cmaloney Jun 29, 2024
97b294f
Add myself to ACKS
cmaloney Jun 29, 2024
e5bdc6b
Add tests around pathilb read_*() behavior
cmaloney Jun 29, 2024
397cd9e
Remove subprocess._USE_VFORK strace test
cmaloney Jun 29, 2024
e88d414
Merge remote-tracking branch 'origin/main' into cmaloney/systrace_hel…
cmaloney Jul 4, 2024
d99157f
Update call sequence after gh-120755
cmaloney Jul 4, 2024
5664558
Add specific python bug links
cmaloney Jul 9, 2024
736d5bc
Reduce annotations, stay bytes longer, make raw_events non-private
cmaloney Jul 9, 2024
47ed7fe
Move _strace_working checks to all be in requires_strace
cmaloney Jul 9, 2024
55d1cec
formatting fixes, reduce annotations further
cmaloney Jul 10, 2024
6fe0961
Small cleanups from self review
cmaloney Jul 10, 2024
943b07d
Merge branch 'main' into cmaloney/systrace_helper_wip
cmaloney Jul 10, 2024
2ea2bc8
Adjust test cases to match more general system call shape
cmaloney Jul 10, 2024
cdf449a
raw_events -> event_bytes
cmaloney Jul 10, 2024
c44bca6
Add bits I forgot to commit
cmaloney Jul 10, 2024
0210d16
Merge remote-tracking branch 'main' into cmaloney/systrace_helper_wip
cmaloney Aug 1, 2024
a1b4028
Merge branch 'main' into cmaloney/systrace_helper_wip
cmaloney Aug 18, 2024
0c6ebe6
Switch to functools.cache, simplifying the code
cmaloney Aug 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 166 additions & 0 deletions Lib/test/support/strace_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import re
import sys
import textwrap
import unittest
from dataclasses import dataclass
from functools import cache
from test import support
from test.support.script_helper import run_python_until_end

_strace_binary = "/usr/bin/strace"
_syscall_regex = re.compile(
r"(?P<syscall>[^(]*)\((?P<args>[^)]*)\)\s*[=]\s*(?P<returncode>.+)")
_returncode_regex = re.compile(
br"\+\+\+ exited with (?P<returncode>\d+) \+\+\+")


@dataclass
class StraceEvent:
syscall: str
args: list[str]
returncode: str


@dataclass
class StraceResult:
strace_returncode: int
python_returncode: int

"""The event messages generated by strace. This is very similar to the
stderr strace produces with returncode marker section removed."""
event_bytes: bytes
stdout: bytes
stderr: bytes

def events(self):
"""Parse event_bytes data into system calls for easier processing.

This assumes the program under inspection doesn't print any non-utf8
strings which would mix into the strace output."""
decoded_events = self.event_bytes.decode('utf-8')
matches = [
_syscall_regex.match(event)
for event in decoded_events.splitlines()
]
return [
StraceEvent(match["syscall"],
[arg.strip() for arg in (match["args"].split(","))],
match["returncode"]) for match in matches if match
]

def sections(self):
"""Find all "MARK <X>" writes and use them to make groups of events.

This is useful to avoid variable / overhead events, like those at
interpreter startup or when opening a file so a test can verify just
the small case under study."""
current_section = "__startup"
sections = {current_section: []}
for event in self.events():
if event.syscall == 'write' and len(
event.args) > 2 and event.args[1].startswith("\"MARK "):
# Found a new section, don't include the write in the section
# but all events until next mark should be in that section
current_section = event.args[1].split(
" ", 1)[1].removesuffix('\\n"')
if current_section not in sections:
sections[current_section] = list()
else:
sections[current_section].append(event)

return sections


@support.requires_subprocess()
def strace_python(code, strace_flags, check=True):
"""Run strace and return the trace.

Sets strace_returncode and python_returncode to `-1` on error."""
res = None

def _make_error(reason, details):
return StraceResult(
strace_returncode=-1,
python_returncode=-1,
event_bytes=f"error({reason},details={details}) = -1".encode('utf-8'),
stdout=res.out if res else "",
stderr=res.err if res else "")

# Run strace, and get out the raw text
try:
res, cmd_line = run_python_until_end(
"-c",
textwrap.dedent(code),
__run_using_command=[_strace_binary] + strace_flags)
except OSError as err:
return _make_error("Caught OSError", err)

if check and res.rc:
res.fail(cmd_line)

# Get out program returncode
stripped = res.err.strip()
output = stripped.rsplit(b"\n", 1)
if len(output) != 2:
return _make_error("Expected strace events and exit code line",
stripped[-50:])

returncode_match = _returncode_regex.match(output[1])
if not returncode_match:
return _make_error("Expected to find returncode in last line.",
output[1][:50])

python_returncode = int(returncode_match["returncode"])
if check and python_returncode:
res.fail(cmd_line)

return StraceResult(strace_returncode=res.rc,
python_returncode=python_returncode,
event_bytes=output[0],
stdout=res.out,
stderr=res.err)


def _get_events(code, strace_flags, prelude, cleanup):
# NOTE: The flush is currently required to prevent the prints from getting
# buffered and done all at once at exit
prelude = textwrap.dedent(prelude)
code = textwrap.dedent(code)
cleanup = textwrap.dedent(cleanup)
to_run = f"""
print("MARK prelude", flush=True)
{prelude}
print("MARK code", flush=True)
{code}
print("MARK cleanup", flush=True)
{cleanup}
print("MARK __shutdown", flush=True)
"""
trace = strace_python(to_run, strace_flags)
all_sections = trace.sections()
return all_sections['code']


def get_syscalls(code, strace_flags, prelude="", cleanup=""):
"""Get the syscalls which a given chunk of python code generates"""
events = _get_events(code, strace_flags, prelude=prelude, cleanup=cleanup)
return [ev.syscall for ev in events]


# Moderately expensive (spawns a subprocess), so share results when possible.
@cache
def _can_strace():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: i would just stick a functools.cache on this

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated to functools.cache, definitely simplified a bit. Initially had been worried about adding a lot more dependencies to the I/O tests, but also realized when things like read are broken, interpreter build breaks (can't load modules, etc.)

res = strace_python("import sys; sys.exit(0)", [], check=False)
assert res.events(), "Should have parsed multiple calls"

return res.strace_returncode == 0 and res.python_returncode == 0


def requires_strace():
if sys.platform != "linux":
return unittest.skip("Linux only, requires strace.")

return unittest.skipUnless(_can_strace(), "Requires working strace")


__all__ = ["requires_strace", "strace_python", "StraceEvent", "StraceResult"]
93 changes: 92 additions & 1 deletion Lib/test/test_fileio.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from test.support import (
cpython_only, swap_attr, gc_collect, is_emscripten, is_wasi,
infinite_recursion,
infinite_recursion, strace_helper
)
from test.support.os_helper import (
TESTFN, TESTFN_ASCII, TESTFN_UNICODE, make_bad_fd,
Expand All @@ -24,6 +24,9 @@
import _pyio # Python implementation of io


_strace_flags=["--trace=%file,%desc"]


class AutoFileTests:
# file tests for which a test file is automatically set up

Expand Down Expand Up @@ -359,6 +362,94 @@ def testErrnoOnClosedReadinto(self, f):
a = array('b', b'x'*10)
f.readinto(a)

@strace_helper.requires_strace()
def test_syscalls_read(self):
"""Check that the set of system calls produced by the I/O stack is what
is expected for various read cases.

It's expected as bits of the I/O implementation change, this will need
to change. The goal is to catch changes that unintentionally add
additional systemcalls (ex. additional calls have been looked at in
bpo-21679 and gh-120754).
"""
self.f.write(b"Hello, World!")
self.f.close()


def check_readall(name, code, prelude="", cleanup=""):
with self.subTest(name=name):
syscalls = strace_helper.get_syscalls(code, _strace_flags,
prelude=prelude,
cleanup=cleanup)

# There are a number of related syscalls used to implement
# behaviors in a libc (ex. fstat, newfstatat, open, openat).
# Allow any that use the same substring.
def count_similarname(name):
return len([sc for sc in syscalls if name in sc])

# Should open and close the file exactly once
self.assertEqual(count_similarname('open'), 1)
self.assertEqual(count_similarname('close'), 1)

# Should only have one fstat (bpo-21679, gh-120754)
self.assertEqual(count_similarname('fstat'), 1)


# "open, read, close" file using different common patterns.
check_readall(
"open builtin with default options",
f"""
f = open('{TESTFN}')
f.read()
f.close()
"""
)

check_readall(
"open in binary mode",
f"""
f = open('{TESTFN}', 'rb')
f.read()
f.close()
"""
)

check_readall(
"open in text mode",
f"""
f = open('{TESTFN}', 'rt')
f.read()
f.close()
"""
)

check_readall(
"pathlib read_bytes",
"p.read_bytes()",
prelude=f"""from pathlib import Path; p = Path("{TESTFN}")"""

)

check_readall(
"pathlib read_text",
"p.read_text()",
prelude=f"""from pathlib import Path; p = Path("{TESTFN}")"""
)

# Focus on just `read()`.
calls = strace_helper.get_syscalls(
prelude=f"f = open('{TESTFN}')",
code="f.read()",
cleanup="f.close()",
strace_flags=_strace_flags
)
# One to read all the bytes
# One to read the EOF and get a size 0 return.
self.assertEqual(calls.count("read"), 2)



class CAutoFileTests(AutoFileTests, unittest.TestCase):
FileIO = _io.FileIO
modulename = '_io'
Expand Down
53 changes: 21 additions & 32 deletions Lib/test/test_subprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from test.support import check_sanitizer
from test.support import import_helper
from test.support import os_helper
from test.support import strace_helper
from test.support import warnings_helper
from test.support.script_helper import assert_python_ok
import subprocess
Expand Down Expand Up @@ -3415,44 +3416,33 @@ def __del__(self):

@unittest.skipIf(not sysconfig.get_config_var("HAVE_VFORK"),
"vfork() not enabled by configure.")
@unittest.skipIf(sys.platform != "linux", "Linux only, requires strace.")
@strace_helper.requires_strace()
@mock.patch("subprocess._USE_POSIX_SPAWN", new=False)
def test_vfork_used_when_expected(self):
# This is a performance regression test to ensure we default to using
# vfork() when possible.
# Technically this test could pass when posix_spawn is used as well
# because libc tends to implement that internally using vfork. But
# that'd just be testing a libc+kernel implementation detail.
strace_binary = "/usr/bin/strace"
# The only system calls we are interested in.
strace_filter = "--trace=clone,clone2,clone3,fork,vfork,exit,exit_group"
true_binary = "/bin/true"
strace_command = [strace_binary, strace_filter]

try:
does_strace_work_process = subprocess.run(
strace_command + [true_binary],
stderr=subprocess.PIPE,
stdout=subprocess.DEVNULL,
)
rc = does_strace_work_process.returncode
stderr = does_strace_work_process.stderr
except OSError:
rc = -1
stderr = ""
if rc or (b"+++ exited with 0 +++" not in stderr):
self.skipTest("strace not found or not working as expected.")
# Are intersted in the system calls:
# clone,clone2,clone3,fork,vfork,exit,exit_group
# Unfortunately using `--trace` with that list to strace fails because
# not all are supported on all platforms (ex. clone2 is ia64 only...)
# So instead use `%process` which is recommended by strace, and contains
# the above.
true_binary = "/bin/true"
strace_args = ["--trace=%process"]

with self.subTest(name="default_is_vfork"):
vfork_result = assert_python_ok(
"-c",
textwrap.dedent(f"""\
import subprocess
subprocess.check_call([{true_binary!r}])"""),
__run_using_command=strace_command,
vfork_result = strace_helper.strace_python(
f"""\
import subprocess
subprocess.check_call([{true_binary!r}])""",
strace_args
)
# Match both vfork() and clone(..., flags=...|CLONE_VFORK|...)
self.assertRegex(vfork_result.err, br"(?i)vfork")
self.assertRegex(vfork_result.event_bytes, br"(?i)vfork")
# Do NOT check that fork() or other clones did not happen.
# If the OS denys the vfork it'll fallback to plain fork().

Expand All @@ -3465,21 +3455,20 @@ def test_vfork_used_when_expected(self):
("setgroups", "", "extra_groups=[]", True),
):
with self.subTest(name=sub_name):
non_vfork_result = assert_python_ok(
"-c",
textwrap.dedent(f"""\
non_vfork_result = strace_helper.strace_python(
f"""\
import subprocess
{preamble}
try:
subprocess.check_call(
[{true_binary!r}], **dict({sp_kwarg}))
except PermissionError:
if not {expect_permission_error}:
raise"""),
__run_using_command=strace_command,
raise""",
strace_args
)
# Ensure neither vfork() or clone(..., flags=...|CLONE_VFORK|...).
self.assertNotRegex(non_vfork_result.err, br"(?i)vfork")
self.assertNotRegex(non_vfork_result.event_bytes, br"(?i)vfork")


@unittest.skipUnless(mswindows, "Windows specific tests")
Expand Down
1 change: 1 addition & 0 deletions Misc/ACKS
Original file line number Diff line number Diff line change
Expand Up @@ -1164,6 +1164,7 @@ Grzegorz Makarewicz
David Malcolm
Greg Malcolm
William Mallard
Cody Maloney
Ken Manheimer
Vladimir Marangozov
Colin Marc
Expand Down
Loading