From 667f816ff1efb9186565692c04c8338ed6b8c11e Mon Sep 17 00:00:00 2001 From: sin-ack Date: Thu, 23 May 2024 15:43:19 +0000 Subject: [PATCH] fix(tar): append slash to top-level directory mtree entries bsdtar's mtree format has a quirk wherein entries without "/" in their first word are treated as "relative" entries, and "relative" directories will cause tar to "change directory" into the declared directory entry. If such a directory is followed by a "relative" entry, then the file will be created within the directory, instead of at top-level as expected. To mitigate, we append a slash to top-level directory entries. Fixes #851. --- BUILD.bazel | 38 ++++++++++++++++++++++++++ lib/private/modify_mtree.awk | 11 ++++++++ lib/private/tar.bzl | 10 +++++++ lib/tests/tar/BUILD.bazel | 49 ++++++++++++++++++++++++++++++++++ lib/tests/tar/expected13.mtree | 3 +++ lib/tests/tar/expected14.mtree | 3 +++ 6 files changed, 114 insertions(+) create mode 100644 lib/tests/tar/expected13.mtree create mode 100644 lib/tests/tar/expected14.mtree diff --git a/BUILD.bazel b/BUILD.bazel index aa37a1ed8..053f76664 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -5,6 +5,7 @@ load("@bazel_skylib//rules:copy_file.bzl", "copy_file") load("@bazel_skylib//rules:write_file.bzl", "write_file") load("@buildifier_prebuilt//:rules.bzl", "buildifier") load("//lib:diff_test.bzl", "diff_test") +load("//lib:tar.bzl", "mtree_spec") load("//lib:testing.bzl", "assert_contains") load("//lib:utils.bzl", "is_bazel_7_or_greater") load("//lib:write_source_files.bzl", "write_source_files") @@ -136,3 +137,40 @@ bzl_library( visibility = ["//visibility:public"], deps = ["@bazel_gazelle//:deps"], ) + +# Test case for mtree_spec: Ensure that multiple entries at the root directory are handled correctly (bug #851) +# See lib/tests/tar/BUILD.bazel for why this is here. +write_file( + name = "tar_test13_main", + out = "13project/__main__.py", + content = ["__main__.py"], +) + +write_file( + name = "tar_test13_bin", + out = "13project_bin", + content = ["project_bin"], +) + +mtree_spec( + name = "tar_test13_mtree_unsorted", + srcs = [ + ":tar_test13_main", + ":tar_test13_bin", + ] +) + +# NOTE: On some systems, the mtree_spec output can have a different order. +# In order to make the test more robust, we sort the mtree output. +genrule( + name = "tar_test13_mtree", + srcs = [":tar_test13_mtree_unsorted"], + outs = ["actual13.mtree"], + cmd = "sort $< > $@", +) + +diff_test( + name = "tar_test13", + file1 = "tar_test13_mtree", + file2 = "//lib/tests/tar:expected13.mtree", +) diff --git a/lib/private/modify_mtree.awk b/lib/private/modify_mtree.awk index 2fe659801..346321c0e 100644 --- a/lib/private/modify_mtree.awk +++ b/lib/private/modify_mtree.awk @@ -7,6 +7,17 @@ } else if (index($1, strip_prefix) == 1) { # this line starts with the strip_prefix sub("^" strip_prefix "/", ""); + + # NOTE: The mtree format treats file paths without slashes as "relative" entries. + # If a relative entry is a directory, then it will "change directory" to that + # directory, and any subsequent "relative" entries will be created inside that + # directory. This causes issues when there is a top-level directory that is + # followed by a top-level file, as the file will be created inside the directory. + # To avoid this, we append a slash to the directory path to make it a "full" entry. + components = split($1, _, "/"); + if ($0 ~ /type=dir/ && components == 1) { + $1 = $1 "/"; + } } else { # this line declares some path under a parent directory, which will be discarded next; diff --git a/lib/private/tar.bzl b/lib/private/tar.bzl index 89f7ad394..733ff6085 100644 --- a/lib/private/tar.bzl +++ b/lib/private/tar.bzl @@ -195,6 +195,16 @@ def _expand(file, expander, transform = to_repository_relative_path): segments = path.split("/") for i in range(1, len(segments)): parent = "/".join(segments[:i]) + + # NOTE: The mtree format treats file paths without slashes as "relative" entries. + # If a relative entry is a directory, then it will "change directory" to that + # directory, and any subsequent "relative" entries will be created inside that + # directory. This causes issues when there is a top-level directory that is + # followed by a top-level file, as the file will be created inside the directory. + # To avoid this, we append a slash to the directory path to make it a "full" entry. + if i == 1: + parent += "/" + lines.append(_mtree_line(parent, "dir")) lines.append(_mtree_line(_vis_encode(path), "file", content = _vis_encode(e.path))) diff --git a/lib/tests/tar/BUILD.bazel b/lib/tests/tar/BUILD.bazel index 1ec89762b..bbc32008d 100644 --- a/lib/tests/tar/BUILD.bazel +++ b/lib/tests/tar/BUILD.bazel @@ -358,3 +358,52 @@ diff_test( file1 = "modified2.mtree", file2 = "expected2.mtree", ) + +# Case 13: Ensure that multiple entries at the root directory are handled correctly (bug #851) +# NOTE: The mtree_spec part of this test is placed at the root BUILD.bazel because +# that's the only way to ensure that the mtree_spec generates single-component +# entries (which would trigger the bug). +exports_files(["expected13.mtree"]) + +# Case 14: Ensure mtree_mutate correctly handles prefix stripping for top-level directories (bug #851) + +write_file( + name = "test14_main", + out = "14project/__main__.py", + content = ["__main__.py"], +) + +write_file( + name = "test14_bin", + out = "14project_bin", + content = ["project_bin"], +) + +mtree_spec( + name = "mtree14", + srcs = [ + ":test14_main", + ":test14_bin", + ], +) + +mtree_mutate( + name = "strip_prefix14_unsorted", + mtree = "mtree14", + strip_prefix = "lib/tests/tar", +) + +# NOTE: On some systems, the mtree_spec output can have a different order. +# In order to make the test more robust, we sort the mtree output. +genrule( + name = "strip_prefix14", + srcs = [":strip_prefix14_unsorted"], + outs = ["actual14.mtree"], + cmd = "sort $< > $@", +) + +diff_test( + name = "test14", + file1 = ":strip_prefix14", + file2 = "expected14.mtree", +) diff --git a/lib/tests/tar/expected13.mtree b/lib/tests/tar/expected13.mtree new file mode 100644 index 000000000..4e154b04f --- /dev/null +++ b/lib/tests/tar/expected13.mtree @@ -0,0 +1,3 @@ +13project/ uid=0 gid=0 time=1672560000 mode=0755 type=dir +13project/__main__.py uid=0 gid=0 time=1672560000 mode=0755 type=file content=bazel-out/k8-opt/bin/13project/__main__.py +13project_bin uid=0 gid=0 time=1672560000 mode=0755 type=file content=bazel-out/k8-opt/bin/13project_bin diff --git a/lib/tests/tar/expected14.mtree b/lib/tests/tar/expected14.mtree new file mode 100644 index 000000000..a927a0dfe --- /dev/null +++ b/lib/tests/tar/expected14.mtree @@ -0,0 +1,3 @@ +14project/ uid=0 gid=0 time=1672560000 mode=0755 type=dir +14project/__main__.py uid=0 gid=0 time=1672560000 mode=0755 type=file content=bazel-out/k8-opt/bin/lib/tests/tar/14project/__main__.py +14project_bin uid=0 gid=0 time=1672560000 mode=0755 type=file content=bazel-out/k8-opt/bin/lib/tests/tar/14project_bin