Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Anonymize sandbox paths instead of stripping completely #20492

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/notes/2.22.x.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ Setting [the `orphan_files_behaviour = "ignore"` option](https://www.pantsbuild.

### Plugin API changes

## Bugfixes

- Sandboxing: Log output from sandboxes will contain anonymized paths instead of completely removing the sandbox prefix. (#20492)
Comment on lines +71 to +73
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This sort of cross-cutting thing often goes in a dedicated "overall" section: https://github.com/pantsbuild/pants/blob/main/docs/notes/2.20.x.md#overall

Maybe you could add one after "new options system" and move it up there? (I personally find it easier to read release notes where the changes are grouped by area rather than by "size"/"type")

Also, I think this may soon (pantsbuild/pantsbuild.org#184) be rendered in places that don't auto-link the GitHub issues:

Suggested change
## Bugfixes
- Sandboxing: Log output from sandboxes will contain anonymized paths instead of completely removing the sandbox prefix. (#20492)
- Sandboxing: Log output from sandboxes will contain anonymized paths instead of completely removing the sandbox prefix. ([#20492](https://github.com/pantsbuild/pants/pull/20492))

(I note that we haven't historically been linking the PRs/issues for everything, although maybe it'd be good to do more consistently.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, I think this may soon (pantsbuild/pantsbuild.org#184) be rendered in places that don't auto-link the GitHub issues:

Or, maybe the doc site renderer we use (docusaurus) can do auto-linking. I'm not sure, but it seems plausible 🤔

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Had the same thought about auto linking during doc site rendering.. :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or, maybe the doc site renderer we use (docusaurus) can do auto-linking. I'm not sure, but it seems plausible

I did a quick look through their docs and source code, and I couldn't find any indication of auto-linking support, so we'd have to write a plugin (and/or contribute it upstream)... so, for now, let's just write the links manually to ensure they work everywhere?

I've filed pantsbuild/pantsbuild.org#203


## Full Changelog

For the full changelog, see the individual GitHub Releases for this series: https://github.com/pantsbuild/pants/releases
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def test_failing_source(rule_runner: RuleRunner) -> None:
rule_runner.write_files({"f.py": BAD_FILE, "BUILD": "python_sources(name='t')"})
tgt = rule_runner.get_target(Address("", target_name="t", relative_file_path="f.py"))
fmt_result = run_isort(rule_runner, [tgt])
assert fmt_result.stdout == "Fixing f.py\n"
assert fmt_result.stdout == "Fixing /<pants-sandbox-1>/f.py\n"
assert fmt_result.output == rule_runner.make_snapshot({"f.py": FIXED_BAD_FILE})
assert fmt_result.did_change is True

Expand All @@ -116,7 +116,7 @@ def test_multiple_targets(rule_runner: RuleRunner) -> None:
rule_runner.get_target(Address("", target_name="t", relative_file_path="bad.py")),
]
fmt_result = run_isort(rule_runner, tgts)
assert "Fixing bad.py\n" == fmt_result.stdout
assert "Fixing /<pants-sandbox-1>/bad.py\n" == fmt_result.stdout
assert fmt_result.output == rule_runner.make_snapshot(
{"good.py": GOOD_FILE, "bad.py": FIXED_BAD_FILE}
)
Expand All @@ -136,7 +136,7 @@ def test_config_file(rule_runner: RuleRunner, path: str, extra_args: list[str])
)
tgt = rule_runner.get_target(Address("", target_name="t", relative_file_path="f.py"))
fmt_result = run_isort(rule_runner, [tgt], extra_args=extra_args)
assert fmt_result.stdout == "Fixing f.py\n"
assert fmt_result.stdout == "Fixing /<pants-sandbox-1>/f.py\n"
assert fmt_result.output == rule_runner.make_snapshot({"f.py": FIXED_NEEDS_CONFIG_FILE})
assert fmt_result.did_change is True

Expand Down Expand Up @@ -171,7 +171,7 @@ def test_passthrough_args(rule_runner: RuleRunner) -> None:
rule_runner.write_files({"f.py": NEEDS_CONFIG_FILE, "BUILD": "python_sources(name='t')"})
tgt = rule_runner.get_target(Address("", target_name="t", relative_file_path="f.py"))
fmt_result = run_isort(rule_runner, [tgt], extra_args=["--isort-args='--combine-as'"])
assert fmt_result.stdout == "Fixing f.py\n"
assert fmt_result.stdout == "Fixing /<pants-sandbox-1>/f.py\n"
assert fmt_result.output == rule_runner.make_snapshot({"f.py": FIXED_NEEDS_CONFIG_FILE})
assert fmt_result.did_change is True

Expand Down Expand Up @@ -202,7 +202,9 @@ def test_stub_files(rule_runner: RuleRunner) -> None:
rule_runner.get_target(Address("", target_name="t", relative_file_path="bad.py")),
]
fmt_result = run_isort(rule_runner, bad_tgts)
assert fmt_result.stdout == "Fixing bad.py\nFixing bad.pyi\n"
assert (
fmt_result.stdout == "Fixing /<pants-sandbox-1>/bad.py\nFixing /<pants-sandbox-1>/bad.pyi\n"
)
assert fmt_result.output == rule_runner.make_snapshot(
{"bad.py": FIXED_BAD_FILE, "bad.pyi": FIXED_BAD_FILE}
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def throwException = throw new IllegalArgumentException
"""

BAD_FILE_STDOUT = """\
Foo.scala:2:24: error: [DisableSyntax.throw] exceptions should be avoided, consider encoding the error in the return type instead
/<pants-sandbox-1>/Foo.scala:2:24: error: [DisableSyntax.throw] exceptions should be avoided, consider encoding the error in the return type instead
def throwException = throw new IllegalArgumentException
^^^^^
"""
Expand Down
31 changes: 22 additions & 9 deletions src/python/pants/util/strutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,17 +129,30 @@ def strip_prefix(string: str, prefix: str) -> str:


# NB: We allow bytes because `ProcessResult.std{err,out}` uses bytes.
def strip_v2_chroot_path(v: bytes | str) -> str:
"""Remove all instances of the chroot tmpdir path from the str so that it only uses relative
paths.

This is useful when a tool that is run with the V2 engine outputs absolute paths. It is
confusing for the user to see the absolute path in the final output because it is an
implementation detail that Pants copies their source code into a chroot.
def anonymize_v2_chroot_path(v: bytes | str) -> str:
"""Replace all instances of chroot tmpdir paths from the str to use cache-friendly anonymous
alternatives.

This is useful when a tool that is run with the V2 engine outputs absolute paths. These paths
are bad for caching, because they'll be unique for each run. They are also unstable. This
replacement will expose the sandbox usage to users, but using <pants-sandbox-1>, etc. makes them
distinct from most other paths, and makes it very clear if multiple sandboxes are being used.
"""
if isinstance(v, bytes):
v = v.decode()
return re.sub(r"/.*/pants-sandbox-[a-zA-Z0-9]+/", "", v)

found_sandbox_paths = re.findall(r"(/.*?/pants-sandbox-[a-zA-Z0-9]+)/", v)
sandbox_path_to_replacement: dict[str, str] = {}
for sandbox_path in found_sandbox_paths:
if sandbox_path not in sandbox_path_to_replacement:
sandbox_path_to_replacement[
sandbox_path
] = f"/<pants-sandbox-{len(sandbox_path_to_replacement)+1}>"

for sandbox, replacement in sandbox_path_to_replacement.items():
v = v.replace(sandbox, replacement)

return v


@dataclasses.dataclass(frozen=True)
Expand All @@ -154,7 +167,7 @@ class Simplifier:

def simplify(self, v: bytes | str) -> str:
chroot = (
strip_v2_chroot_path(v)
anonymize_v2_chroot_path(v)
if self.strip_chroot_path
else v.decode()
if isinstance(v, bytes)
Expand Down
51 changes: 38 additions & 13 deletions src/python/pants/util/strutil_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pants.util.frozendict import FrozenDict
from pants.util.strutil import (
Simplifier,
anonymize_v2_chroot_path,
bullet_list,
comma_separated_list,
docstring,
Expand All @@ -23,7 +24,6 @@
softwrap,
stable_hash,
strip_prefix,
strip_v2_chroot_path,
)


Expand Down Expand Up @@ -71,22 +71,26 @@ def test_strip_prefix() -> None:

def test_strip_chroot_path() -> None:
assert (
strip_v2_chroot_path(
anonymize_v2_chroot_path(
dedent(
"""\
Would reformat /private/var/folders/sx/pdpbqz4x5cscn9hhfpbsbqvm0000gn/T/pants-sandbox-3zt5Ph/src/python/example.py
Would reformat /var/folders/sx/pdpbqz4x5cscn9hhfpbsbqvm0000gn/T/pants-sandbox-OCnquv/test.py
Would reformat /custom-tmpdir/pants-sandbox-7zt4pH/custom_tmpdir.py
Would reformat /custom-tmpdir/pants-sandbox-7zt4pH/another_file.py
Would reformat /private/var/folders/sx/pdpbqz4x5cscn9hhfpbsbqvm0000gn/T/pants-sandbox-3zt5Ph/foobar.py

Some other output.
"""
)
)
== dedent(
"""\
Would reformat src/python/example.py
Would reformat test.py
Would reformat custom_tmpdir.py
Would reformat /<pants-sandbox-1>/src/python/example.py
Would reformat /<pants-sandbox-2>/test.py
Would reformat /<pants-sandbox-3>/custom_tmpdir.py
Would reformat /<pants-sandbox-3>/another_file.py
Would reformat /<pants-sandbox-1>/foobar.py

Some other output.
"""
Expand All @@ -95,33 +99,54 @@ def test_strip_chroot_path() -> None:

# A subdir must be prefixed with `pants-sandbox-`, then some characters after it.
assert (
strip_v2_chroot_path("/var/pants_sandbox_OCnquv/test.py")
anonymize_v2_chroot_path("/var/pants_sandbox_OCnquv/test.py")
== "/var/pants_sandbox_OCnquv/test.py"
)
assert (
strip_v2_chroot_path("/var/pants_sandboxOCnquv/test.py")
anonymize_v2_chroot_path("/var/pants_sandboxOCnquv/test.py")
== "/var/pants_sandboxOCnquv/test.py"
)
assert strip_v2_chroot_path("/var/pants-sandbox/test.py") == "/var/pants-sandbox/test.py"
assert anonymize_v2_chroot_path("/var/pants-sandbox/test.py") == "/var/pants-sandbox/test.py"

# Our heuristic requires absolute paths.
assert (
strip_v2_chroot_path("var/pants-sandbox-OCnquv/test.py")
anonymize_v2_chroot_path("var/pants-sandbox-OCnquv/test.py")
== "var/pants-sandbox-OCnquv/test.py"
)

# Confirm we can handle values with no chroot path.
assert strip_v2_chroot_path("") == ""
assert strip_v2_chroot_path("hello world") == "hello world"
assert anonymize_v2_chroot_path("") == ""
assert anonymize_v2_chroot_path("hello world") == "hello world"


def test_path_issue_strip_chroot_path() -> None:
# This was a specific bug observed where the PATH variable got anonymized incorrectly. In particular, the
# replacement crossed over into *another* PATH item, i.e, including `:`.
assert (
anonymize_v2_chroot_path(
dedent(
"""\
/var/pants-sandbox-123/red:/var/pants-sandbox-123/blue
/var/pants-sandbox-345/red:/var/pants-sandbox-456/blue
"""
)
)
== dedent(
"""\
/<pants-sandbox-1>/red:/<pants-sandbox-1>/blue
/<pants-sandbox-2>/red:/<pants-sandbox-3>/blue
"""
)
)


@pytest.mark.parametrize(
("strip_chroot_path", "strip_formatting", "expected"),
[
(False, False, "\033[0;31m/var/pants-sandbox-123/red/path.py\033[0m \033[1mbold\033[0m"),
(False, True, "/var/pants-sandbox-123/red/path.py bold"),
(True, False, "\033[0;31mred/path.py\033[0m \033[1mbold\033[0m"),
(True, True, "red/path.py bold"),
(True, False, "\033[0;31m/<pants-sandbox-1>/red/path.py\033[0m \033[1mbold\033[0m"),
(True, True, "/<pants-sandbox-1>/red/path.py bold"),
],
)
def test_simplifier(strip_chroot_path: bool, strip_formatting: bool, expected: str) -> None:
Expand Down
Loading