diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD b/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD index a882276a8273a7..0c8809977e69f1 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD @@ -41,6 +41,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/rules:repository/repository_function", "//src/main/java/com/google/devtools/build/lib/rules:repository/workspace_base_rule", "//src/main/java/com/google/devtools/build/lib/rules:repository/workspace_configured_target_factory", + "//src/main/java/com/google/devtools/build/lib/unsafe:string", "//src/main/java/com/google/devtools/build/lib/util", "//src/main/java/com/google/devtools/build/lib/util:os", "//src/main/java/com/google/devtools/build/lib/vfs", diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java index f38e7d56894ac2..dd591595b11a19 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java @@ -15,8 +15,8 @@ package com.google.devtools.build.lib.bazel.repository; import static com.google.devtools.build.lib.bazel.repository.StripPrefixedPath.maybeDeprefixSymlink; +import static java.nio.charset.StandardCharsets.UTF_8; -import com.google.common.base.Optional; import com.google.common.io.ByteStreams; import com.google.devtools.build.lib.bazel.repository.DecompressorValue.Decompressor; import com.google.devtools.build.lib.vfs.FileSystemUtils; @@ -29,6 +29,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Map; +import java.util.Optional; import java.util.Set; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; @@ -54,20 +55,21 @@ public Path decompress(DecompressorDescriptor descriptor) Map symlinks = new HashMap<>(); try (InputStream decompressorStream = getDecompressorStream(descriptor)) { - TarArchiveInputStream tarStream = new TarArchiveInputStream(decompressorStream); + // USTAR tar headers use an unspecified encoding whereas PAX tar headers always use UTF-8. + // Since this would result in ambiguity with Bazel forcing the default JVM encoding to be + // ISO-8859-1, we explicitly specify the encoding as UTF-8. + TarArchiveInputStream tarStream = new TarArchiveInputStream(decompressorStream, UTF_8.name()); TarArchiveEntry entry; while ((entry = tarStream.getNextTarEntry()) != null) { String entryName = entry.getName(); entryName = renameFiles.getOrDefault(entryName, entryName); - StripPrefixedPath entryPath = StripPrefixedPath.maybeDeprefix(entryName, prefix); + StripPrefixedPath entryPath = + StripPrefixedPath.maybeDeprefix(entryName.getBytes(UTF_8), prefix); foundPrefix = foundPrefix || entryPath.foundPrefix(); if (prefix.isPresent() && !foundPrefix) { - Optional suggestion = - CouldNotFindPrefixException.maybeMakePrefixSuggestion(entryPath.getPathFragment()); - if (suggestion.isPresent()) { - availablePrefixes.add(suggestion.get()); - } + CouldNotFindPrefixException.maybeMakePrefixSuggestion(entryPath.getPathFragment()) + .ifPresent(availablePrefixes::add); } if (entryPath.skip()) { @@ -80,8 +82,9 @@ public Path decompress(DecompressorDescriptor descriptor) filePath.createDirectoryAndParents(); } else { if (entry.isSymbolicLink() || entry.isLink()) { - PathFragment targetName = PathFragment.create(entry.getLinkName()); - targetName = maybeDeprefixSymlink(targetName, prefix, descriptor.destinationPath()); + PathFragment targetName = + maybeDeprefixSymlink( + entry.getLinkName().getBytes(UTF_8), prefix, descriptor.destinationPath()); if (entry.isSymbolicLink()) { symlinks.put(filePath, targetName); } else { diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorDescriptor.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorDescriptor.java index 2676fdba14d091..c40d5ab35f38da 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorDescriptor.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorDescriptor.java @@ -15,10 +15,10 @@ package com.google.devtools.build.lib.bazel.repository; import com.google.auto.value.AutoValue; -import com.google.common.base.Optional; import com.google.common.collect.ImmutableMap; import com.google.devtools.build.lib.vfs.Path; import java.util.Map; +import java.util.Optional; /** Description of an archive to be decompressed. */ @AutoValue diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java index e32a2decb18741..9deddba06b887f 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java @@ -14,14 +14,17 @@ package com.google.devtools.build.lib.bazel.repository; +import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.UTF_8; + import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Optional; import com.google.devtools.build.lib.rules.repository.RepositoryFunction.RepositoryFunctionException; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.devtools.build.skyframe.SkyFunctionException.Transience; import com.google.devtools.build.skyframe.SkyValue; import java.io.IOException; +import java.util.Optional; import java.util.Set; import net.starlark.java.eval.Starlark; @@ -59,9 +62,14 @@ private static String prepareErrorMessage(String prefix, Set availablePr } public static Optional maybeMakePrefixSuggestion(PathFragment pathFragment) { - return pathFragment.isMultiSegment() - ? Optional.of(pathFragment.getSegment(0)) - : Optional.absent(); + if (!pathFragment.isMultiSegment()) { + return Optional.empty(); + } + String rawFirstSegment = pathFragment.getSegment(0); + // Users can only specify prefixes from Starlark, where strings always use UTF-8, so we + // optimistically decode with it here even though we do not know the original encoding. + return Optional.of(new String(rawFirstSegment.getBytes(ISO_8859_1), UTF_8)); +// return Optional.of(rawFirstSegment); } } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPath.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPath.java index 77c5815e79a50d..5852b4d0ea5303 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPath.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPath.java @@ -14,11 +14,13 @@ package com.google.devtools.build.lib.bazel.repository; -import com.google.common.base.Optional; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + import com.google.common.base.Preconditions; import com.google.devtools.build.lib.concurrent.ThreadSafety; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; +import java.util.Optional; /** * Utility class for removing a prefix from an archive's path. @@ -36,17 +38,19 @@ public final class StripPrefixedPath { * could cause collisions, if a zip file had one entry for bin/some-binary and another entry for * /bin/some-binary. * - * Note that the prefix is stripped to move the files up one level, so if you have an entry + *

Note that the prefix is stripped to move the files up one level, so if you have an entry * "foo/../bar" and a prefix of "foo", the result will be "bar" not "../bar". */ - public static StripPrefixedPath maybeDeprefix(String entry, Optional prefix) { + public static StripPrefixedPath maybeDeprefix(byte[] entry, Optional prefix) { Preconditions.checkNotNull(entry); PathFragment entryPath = relativize(entry); - if (!prefix.isPresent()) { + if (prefix.isEmpty()) { return new StripPrefixedPath(entryPath, false, false); } - PathFragment prefixPath = relativize(prefix.get()); + // Bazel parses Starlark files, which are the ultimate source of prefixes, as Latin-1 + // (ISO-8859-1). + PathFragment prefixPath = relativize(prefix.get().getBytes(ISO_8859_1)); boolean found = false; boolean skip = false; if (entryPath.startsWith(prefixPath)) { @@ -64,8 +68,8 @@ public static StripPrefixedPath maybeDeprefix(String entry, Optional pre /** * Normalize the path and, if it is absolute, make it relative (e.g., /foo/bar becomes foo/bar). */ - private static PathFragment relativize(String path) { - PathFragment entryPath = PathFragment.create(path); + private static PathFragment relativize(byte[] path) { + PathFragment entryPath = createPathFragment(path); if (entryPath.isAbsolute()) { entryPath = entryPath.toRelative(); } @@ -79,10 +83,10 @@ private StripPrefixedPath(PathFragment pathFragment, boolean found, boolean skip } public static PathFragment maybeDeprefixSymlink( - PathFragment linkPathFragment, Optional prefix, Path root) { - boolean wasAbsolute = linkPathFragment.isAbsolute(); + byte[] rawTarget, Optional prefix, Path root) { + boolean wasAbsolute = createPathFragment(rawTarget).isAbsolute(); // Strip the prefix from the link path if set. - linkPathFragment = maybeDeprefix(linkPathFragment.getPathString(), prefix).getPathFragment(); + PathFragment linkPathFragment = maybeDeprefix(rawTarget, prefix).getPathFragment(); if (wasAbsolute) { // Recover the path to an absolute path as maybeDeprefix() relativize the path // even if the prefix is not set @@ -103,4 +107,10 @@ public boolean skip() { return skip; } + static PathFragment createPathFragment(byte[] rawBytes) { + // Bazel internally represents paths as raw bytes by using the Latin-1 encoding, which has the + // property that (new String(bytes, ISO_8859_1)).getBytes(ISO_8859_1)) equals bytes for every + // byte array bytes. + return PathFragment.create(new String(rawBytes, ISO_8859_1)); + } } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/ZipDecompressor.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/ZipDecompressor.java index 8990e7c4c08e64..4a8f7709a5798b 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/ZipDecompressor.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/ZipDecompressor.java @@ -15,9 +15,9 @@ package com.google.devtools.build.lib.bazel.repository; import static com.google.devtools.build.lib.bazel.repository.StripPrefixedPath.maybeDeprefixSymlink; +import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Optional; import com.google.common.base.Preconditions; import com.google.common.io.ByteStreams; import com.google.devtools.build.lib.bazel.repository.DecompressorValue.Decompressor; @@ -29,11 +29,11 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.nio.charset.Charset; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Map; +import java.util.Optional; import java.util.Set; import javax.annotation.Nullable; @@ -89,7 +89,8 @@ public Path decompress(DecompressorDescriptor descriptor) for (ZipFileEntry entry : entries) { String entryName = entry.getName(); entryName = renameFiles.getOrDefault(entryName, entryName); - StripPrefixedPath entryPath = StripPrefixedPath.maybeDeprefix(entryName, prefix); + StripPrefixedPath entryPath = + StripPrefixedPath.maybeDeprefix(entryName.getBytes(UTF_8), prefix); foundPrefix = foundPrefix || entryPath.foundPrefix(); if (entryPath.skip()) { continue; @@ -102,12 +103,9 @@ public Path decompress(DecompressorDescriptor descriptor) Set prefixes = new HashSet<>(); for (ZipFileEntry entry : entries) { StripPrefixedPath entryPath = - StripPrefixedPath.maybeDeprefix(entry.getName(), Optional.absent()); - Optional suggestion = - CouldNotFindPrefixException.maybeMakePrefixSuggestion(entryPath.getPathFragment()); - if (suggestion.isPresent()) { - prefixes.add(suggestion.get()); - } + StripPrefixedPath.maybeDeprefix(entry.getName().getBytes(UTF_8), Optional.empty()); + CouldNotFindPrefixException.maybeMakePrefixSuggestion(entryPath.getPathFragment()) + .ifPresent(prefixes::add); } throw new CouldNotFindPrefixException(prefix.get(), prefixes); } @@ -146,17 +144,22 @@ private static void extractZipEntry( // For symlinks, the "compressed data" is actually the target name. int read = reader.getInputStream(entry).read(buffer); Preconditions.checkState(read == buffer.length); - PathFragment target = PathFragment.create(new String(buffer, Charset.defaultCharset())); + + PathFragment target = StripPrefixedPath.createPathFragment(buffer); if (target.containsUplevelReferences()) { PathFragment pointsTo = strippedRelativePath.getParentDirectory().getRelative(target); if (pointsTo.containsUplevelReferences()) { - throw new IOException("Zip entries cannot refer to files outside of their directory: " - + reader.getFilename() + " has a symlink " + strippedRelativePath + " pointing to " - + target); + throw new IOException( + "Zip entries cannot refer to files outside of their directory: " + + reader.getFilename() + + " has a symlink " + + strippedRelativePath + + " pointing to " + + new String(buffer, UTF_8)); } } - target = maybeDeprefixSymlink(target, prefix, destinationDirectory); - symlinks.put(outputPath, target); + + symlinks.put(outputPath, maybeDeprefixSymlink(buffer, prefix, destinationDirectory)); } else { try (InputStream input = reader.getInputStream(entry); OutputStream output = outputPath.getOutputStream()) { diff --git a/src/test/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPathTest.java b/src/test/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPathTest.java index df1ea4db0fb19f..784b16348e0580 100644 --- a/src/test/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPathTest.java +++ b/src/test/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPathTest.java @@ -15,13 +15,14 @@ package com.google.devtools.build.lib.bazel.repository; import static com.google.common.truth.Truth.assertThat; +import static java.nio.charset.StandardCharsets.UTF_8; -import com.google.common.base.Optional; import com.google.devtools.build.lib.clock.BlazeClock; import com.google.devtools.build.lib.util.OS; import com.google.devtools.build.lib.vfs.DigestHashFunction; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.devtools.build.lib.vfs.inmemoryfs.InMemoryFileSystem; +import java.util.Optional; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -33,31 +34,32 @@ public class StripPrefixedPathTest { @Test public void testStrip() { - StripPrefixedPath result = StripPrefixedPath.maybeDeprefix("foo/bar", Optional.of("foo")); + StripPrefixedPath result = + StripPrefixedPath.maybeDeprefix("foo/bar".getBytes(UTF_8), Optional.of("foo")); assertThat(PathFragment.create("bar")).isEqualTo(result.getPathFragment()); assertThat(result.foundPrefix()).isTrue(); assertThat(result.skip()).isFalse(); - result = StripPrefixedPath.maybeDeprefix("foo", Optional.of("foo")); + result = StripPrefixedPath.maybeDeprefix("foo".getBytes(UTF_8), Optional.of("foo")); assertThat(result.skip()).isTrue(); - result = StripPrefixedPath.maybeDeprefix("bar/baz", Optional.of("foo")); + result = StripPrefixedPath.maybeDeprefix("bar/baz".getBytes(UTF_8), Optional.of("foo")); assertThat(result.foundPrefix()).isFalse(); - result = StripPrefixedPath.maybeDeprefix("foof/bar", Optional.of("foo")); + result = StripPrefixedPath.maybeDeprefix("foof/bar".getBytes(UTF_8), Optional.of("foo")); assertThat(result.foundPrefix()).isFalse(); } @Test public void testAbsolute() { - StripPrefixedPath result = StripPrefixedPath.maybeDeprefix( - "/foo/bar", Optional.absent()); + StripPrefixedPath result = + StripPrefixedPath.maybeDeprefix("/foo/bar".getBytes(UTF_8), Optional.empty()); assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("foo/bar")); - result = StripPrefixedPath.maybeDeprefix("///foo/bar/baz", Optional.absent()); + result = StripPrefixedPath.maybeDeprefix("///foo/bar/baz".getBytes(UTF_8), Optional.empty()); assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("foo/bar/baz")); - result = StripPrefixedPath.maybeDeprefix("/foo/bar/baz", Optional.of("/foo")); + result = StripPrefixedPath.maybeDeprefix("/foo/bar/baz".getBytes(UTF_8), Optional.of("/foo")); assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("bar/baz")); } @@ -66,21 +68,21 @@ public void testWindowsAbsolute() { if (OS.getCurrent() != OS.WINDOWS) { return; } - StripPrefixedPath result = StripPrefixedPath.maybeDeprefix( - "c:/foo/bar", Optional.absent()); + StripPrefixedPath result = + StripPrefixedPath.maybeDeprefix("c:/foo/bar".getBytes(UTF_8), Optional.empty()); assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("foo/bar")); } @Test public void testNormalize() { - StripPrefixedPath result = StripPrefixedPath.maybeDeprefix( - "../bar", Optional.absent()); + StripPrefixedPath result = + StripPrefixedPath.maybeDeprefix("../bar".getBytes(UTF_8), Optional.empty()); assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("../bar")); - result = StripPrefixedPath.maybeDeprefix("foo/../baz", Optional.absent()); + result = StripPrefixedPath.maybeDeprefix("foo/../baz".getBytes(UTF_8), Optional.empty()); assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("baz")); - result = StripPrefixedPath.maybeDeprefix("foo/../baz", Optional.of("foo")); + result = StripPrefixedPath.maybeDeprefix("foo/../baz".getBytes(UTF_8), Optional.of("foo")); assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("baz")); } @@ -91,23 +93,23 @@ public void testDeprefixSymlink() { PathFragment relativeNoPrefix = StripPrefixedPath.maybeDeprefixSymlink( - PathFragment.create("a/b"), Optional.absent(), fileSystem.getPath("/usr")); + "a/b".getBytes(UTF_8), Optional.empty(), fileSystem.getPath("/usr")); // there is no attempt to get absolute path for the relative symlinks target path assertThat(relativeNoPrefix).isEqualTo(PathFragment.create("a/b")); PathFragment absoluteNoPrefix = StripPrefixedPath.maybeDeprefixSymlink( - PathFragment.create("/a/b"), Optional.absent(), fileSystem.getPath("/usr")); + "/a/b".getBytes(UTF_8), Optional.empty(), fileSystem.getPath("/usr")); assertThat(absoluteNoPrefix).isEqualTo(PathFragment.create("/usr/a/b")); PathFragment absolutePrefix = StripPrefixedPath.maybeDeprefixSymlink( - PathFragment.create("/root/a/b"), Optional.of("root"), fileSystem.getPath("/usr")); + "/root/a/b".getBytes(UTF_8), Optional.of("root"), fileSystem.getPath("/usr")); assertThat(absolutePrefix).isEqualTo(PathFragment.create("/usr/a/b")); PathFragment relativePrefix = StripPrefixedPath.maybeDeprefixSymlink( - PathFragment.create("root/a/b"), Optional.of("root"), fileSystem.getPath("/usr")); + "root/a/b".getBytes(UTF_8), Optional.of("root"), fileSystem.getPath("/usr")); // there is no attempt to get absolute path for the relative symlinks target path assertThat(relativePrefix).isEqualTo(PathFragment.create("a/b")); } diff --git a/src/test/shell/bazel/bazel_workspaces_test.sh b/src/test/shell/bazel/bazel_workspaces_test.sh index a0669133572c25..b073e2da91e844 100755 --- a/src/test/shell/bazel/bazel_workspaces_test.sh +++ b/src/test/shell/bazel/bazel_workspaces_test.sh @@ -449,6 +449,69 @@ function test_extract_rename_files() { ensure_output_contains_exactly_once "external/repo/out_dir/renamed-A.txt" "Second file: A" } +function test_extract_pax_tar_non_ascii_file_names() { + local archive_tar="${TEST_TMPDIR}/pax.tar" + + pushd "${TEST_TMPDIR}" + mkdir "Ä_pax_∅" + echo "bar" > "Ä_pax_∅/Ä_foo_∅.txt" + tar --format=pax -cvf pax.tar "Ä_pax_∅" + popd + + set_workspace_command " + repository_ctx.extract('${archive_tar}', 'out_dir', 'Ä_pax_∅/')" + + build_and_process_log --exclude_rule "repository @local_config_cc" + + ensure_contains_exactly 'location: .*repos.bzl:3:25' 1 + ensure_contains_atleast 'context: "repository @repo"' 2 + ensure_contains_exactly 'extract_event' 1 + + ensure_output_contains_exactly_once "external/repo/out_dir/Ä_foo_∅.txt" "bar" +} + +function test_extract_ustar_tar_non_ascii_file_names() { + local archive_tar="${TEST_TMPDIR}/ustar.tar" + + pushd "${TEST_TMPDIR}" + mkdir "Ä_ustar_∅" + echo "bar" > "Ä_ustar_∅/Ä_foo_∅.txt" + tar --format=ustar -cvf ustar.tar "Ä_ustar_∅" + popd + + set_workspace_command " + repository_ctx.extract('${archive_tar}', 'out_dir', 'Ä_ustar_∅/')" + + build_and_process_log --exclude_rule "repository @local_config_cc" + + ensure_contains_exactly 'location: .*repos.bzl:3:25' 1 + ensure_contains_atleast 'context: "repository @repo"' 2 + ensure_contains_exactly 'extract_event' 1 + + ensure_output_contains_exactly_once "external/repo/out_dir/Ä_foo_∅.txt" "bar" +} + +function test_extract_default_zip_non_ascii_file_names() { + local archive_tar="${TEST_TMPDIR}/default.zip" + + pushd "${TEST_TMPDIR}" + mkdir "Ä_default_∅" + echo "bar" > "Ä_default_∅/Ä_foo_∅.txt" + zip default.zip -r "Ä_default_∅" + popd + + set_workspace_command " + repository_ctx.extract('${archive_tar}', 'out_dir', 'Ä_default_∅/')" + + build_and_process_log --exclude_rule "repository @local_config_cc" + + ensure_contains_exactly 'location: .*repos.bzl:3:25' 1 + ensure_contains_atleast 'context: "repository @repo"' 2 + ensure_contains_exactly 'extract_event' 1 + + ensure_output_contains_exactly_once "external/repo/out_dir/Ä_foo_∅.txt" "bar" +} + function test_file() { set_workspace_command 'repository_ctx.file("filefile.sh", "echo filefile", True)'