From e9a4e93ed713fba03c83a4a37bed80624fba4d5c Mon Sep 17 00:00:00 2001 From: pcloudy Date: Wed, 7 Jul 2021 04:55:00 -0700 Subject: [PATCH] bzlmod: Add new features to http_archive rule Those features are needed by the Bzlmod system to fetch Bazel module dependencies from Bazel registries. - integrity: In addition to "sha256", users can specify the integrity value to verify the downloaded archive content. - remote_patches, remote_patch_strip: apply patche files downloaded from given URLs. This will be used to apply patch files hosted in Bazel registries. Related: https://github.com/bazelbuild/bazel/issues/13316 RELNOTES: None PiperOrigin-RevId: 383386903 --- .../bazel/repository/downloader/Checksum.java | 8 +- .../shell/bazel/external_integration_test.sh | 69 ++++++++ .../shell/bazel/external_patching_test.sh | 155 ++++++++++++++++++ tools/build_defs/repo/http.bzl | 32 +++- tools/build_defs/repo/utils.bzl | 40 ++++- 5 files changed, 294 insertions(+), 10 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/Checksum.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/Checksum.java index 8e05adf0247585..a63e1b4f4a5bf9 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/Checksum.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/Checksum.java @@ -25,6 +25,10 @@ public static final class InvalidChecksumException extends Exception { private InvalidChecksumException(KeyType keyType, String hash) { super("Invalid " + keyType + " checksum '" + hash + "'"); } + + private InvalidChecksumException(String msg) { + super(msg); + } } private final KeyType keyType; @@ -73,14 +77,14 @@ public static Checksum fromSubresourceIntegrity(String integrity) } if (keyType == null) { - throw new IllegalArgumentException( + throw new InvalidChecksumException( "Unsupported checksum algorithm: '" + integrity + "' (expected SHA-1, SHA-256, SHA-384, or SHA-512)"); } if (hash.length != expectedLength) { - throw new IllegalArgumentException( + throw new InvalidChecksumException( "Invalid " + keyType + " SRI checksum '" + integrity + "'"); } diff --git a/src/test/shell/bazel/external_integration_test.sh b/src/test/shell/bazel/external_integration_test.sh index c8745c7c82fca6..ec532fe80ac117 100755 --- a/src/test/shell/bazel/external_integration_test.sh +++ b/src/test/shell/bazel/external_integration_test.sh @@ -937,7 +937,76 @@ EOF shutdown_server } +function test_integrity_correct() { + REPO_PATH=$TEST_TMPDIR/repo + mkdir -p "$REPO_PATH" + cd "$REPO_PATH" + create_workspace_with_default_repos WORKSPACE + touch BUILD + zip -r repo.zip * + integrity="sha256-$(cat repo.zip | openssl dgst -sha256 -binary | openssl base64 -A)" + startup_server $PWD + cd - + + cat >> $(create_workspace_with_default_repos WORKSPACE) <> $(create_workspace_with_default_repos WORKSPACE) < $TEST_log 2>&1 && fail "Expected to fail" + expect_log "Unsupported checksum algorithm: 'a random string'" + shutdown_server +} + +function test_integrity_incorrect() { + REPO_PATH=$TEST_TMPDIR/repo + mkdir -p "$REPO_PATH" + cd "$REPO_PATH" + create_workspace_with_default_repos WORKSPACE + touch BUILD + zip -r repo.zip * + startup_server $PWD + cd - + + cat >> $(create_workspace_with_default_repos WORKSPACE) < $TEST_log 2>&1 && fail "Expected to fail" + expect_log "Error downloading \\[http://127.0.0.1:$fileserver_port/repo.zip\\] to" + # Bazel translates the integrity value back to the sha256 checksum. + expect_log "but wanted 61a6f762aaf60652cbf332879b8dcc2cfd81be2129a061da957d039eae77f0b0" + shutdown_server +} function test_same_name() { mkdir ext diff --git a/src/test/shell/bazel/external_patching_test.sh b/src/test/shell/bazel/external_patching_test.sh index 70003d2801d133..e2aaef77d6a49c 100755 --- a/src/test/shell/bazel/external_patching_test.sh +++ b/src/test/shell/bazel/external_patching_test.sh @@ -196,6 +196,161 @@ EOF expect_log 'Helpful message' } +test_remote_patch_on_top_of_local_patch() { + EXTREPODIR=`pwd` + EXTREPOURL="$(get_extrepourl ${EXTREPODIR})" + # Generate the remote patch file + cat > remote.patch <<'EOF' +--- a/foo.sh 2018-01-15 10:39:20.183909147 +0100 ++++ b/foo.sh 2018-01-15 10:43:35.331566052 +0100 +@@ -1,3 +1,3 @@ + #!/usr/bin/env sh + +-echo Here be dragons... ++echo There are dragons... +EOF + integrity="sha256-$(cat remote.patch | openssl dgst -sha256 -binary | openssl base64 -A)" + + mkdir main + cd main + + # Generate the local patch file + cat > local.patch <<'EOF' +--- foo.sh.orig 2021-07-05 15:16:49.000000000 +0200 ++++ foo.sh 2021-07-05 15:17:15.000000000 +0200 +@@ -1,3 +1,3 @@ +-#!/usr/bin/env sh ++#!/bin/sh + + echo There are dragons... +EOF + + cat > WORKSPACE < BUILD <<'EOF' +genrule( + name = "foo", + outs = ["foo.sh"], + srcs = ["@ext//:foo.sh"], + cmd = "cp $< $@; chmod u+x $@", + executable = True, +) +EOF + + bazel build :foo.sh + foopath=`bazel info bazel-bin`/foo.sh + grep -q 'There are' $foopath || fail "expected remote patch to be applied" + grep -q '/bin/sh' $foopath || fail "expected local patch to be applied" +} + +test_remote_patch_integrity_incorrect() { + EXTREPODIR=`pwd` + EXTREPOURL="$(get_extrepourl ${EXTREPODIR})" + # Generate the remote patch file + cat > remote.patch <<'EOF' +--- a/foo.sh 2018-01-15 10:39:20.183909147 +0100 ++++ b/foo.sh 2018-01-15 10:43:35.331566052 +0100 +@@ -1,3 +1,3 @@ + #!/usr/bin/env sh + +-echo Here be dragons... ++echo There are dragons... +EOF + + mkdir main + cd main + + cat > WORKSPACE < $TEST_log 2>&1 && fail "Expected to fail" + expect_log "Error downloading \\[.*/remote.patch\\] to" + expect_log "but wanted 61a6f762aaf60652cbf332879b8dcc2cfd81be2129a061da957d039eae77f0b0" +} + +test_remote_patches_with_same_base_name() { + EXTREPODIR=`pwd` + EXTREPOURL="$(get_extrepourl ${EXTREPODIR})" + + mkdir a + # Generate a remote patch file + cat > a/remote.patch <<'EOF' +--- a/foo.sh 2018-01-15 10:39:20.183909147 +0100 ++++ b/foo.sh 2018-01-15 10:43:35.331566052 +0100 +@@ -1,3 +1,3 @@ + #!/usr/bin/env sh + +-echo Here be dragons... ++echo There are dragons... +EOF + integrity_a="sha256-$(cat a/remote.patch | openssl dgst -sha256 -binary | openssl base64 -A)" + + mkdir b + # Generate another remote patch file with the same base name + cat > b/remote.patch <<'EOF' +--- a/foo.sh 2021-07-05 15:16:49.000000000 +0200 ++++ b/foo.sh 2021-07-05 15:17:15.000000000 +0200 +@@ -1,3 +1,3 @@ +-#!/usr/bin/env sh ++#!/bin/sh + + echo There are dragons... +EOF + integrity_b="sha256-$(cat b/remote.patch | openssl dgst -sha256 -binary | openssl base64 -A)" + + mkdir main + cd main + + cat > WORKSPACE < BUILD <<'EOF' +genrule( + name = "foo", + outs = ["foo.sh"], + srcs = ["@ext//:foo.sh"], + cmd = "cp $< $@; chmod u+x $@", + executable = True, +) +EOF + + bazel build :foo.sh + foopath=`bazel info bazel-bin`/foo.sh + grep -q 'There are' $foopath || fail "expected a/remote.patch to be applied" + grep -q '/bin/sh' $foopath || fail "expected b/remote.patch to be applied" +} + test_patch_git() { EXTREPODIR=`pwd` if $is_windows; then diff --git a/tools/build_defs/repo/http.bzl b/tools/build_defs/repo/http.bzl index 1affd6649a0340..a3d3aa4e73d3cf 100644 --- a/tools/build_defs/repo/http.bzl +++ b/tools/build_defs/repo/http.bzl @@ -116,11 +116,14 @@ def _http_archive_impl(ctx): ctx.attr.strip_prefix, canonical_id = ctx.attr.canonical_id, auth = auth, + integrity = ctx.attr.integrity, ) workspace_and_buildfile(ctx) - patch(ctx) + patch(ctx, auth = auth) - return update_attrs(ctx.attr, _http_archive_attrs.keys(), {"sha256": download_info.sha256}) + # We don't need to override the sha256 attribute if integrity is already specified. + sha256_override = {} if ctx.attr.integrity else {"sha256": download_info.sha256} + return update_attrs(ctx.attr, _http_archive_attrs.keys(), sha256_override) _HTTP_FILE_BUILD = """ package(default_visibility = ["//visibility:public"]) @@ -227,7 +230,15 @@ If all downloads fail, the rule will fail.""", This must match the SHA-256 of the file downloaded. _It is a security risk to omit the SHA-256 as remote files can change._ At best omitting this field will make your build non-hermetic. It is optional to make development -easier but should be set before shipping.""", +easier but either this attribute or `integrity` should be set before shipping.""", + ), + "integrity": attr.string( + doc = """Expected checksum in Subresource Integrity format of the file downloaded. + +This must match the checksum of the file downloaded. _It is a security risk +to omit the checksum as remote files can change._ At best omitting this +field will make your build non-hermetic. It is optional to make development +easier but either this attribute or `sha256` should be set before shipping.""", ), "netrc": attr.string( doc = "Location of the .netrc file to use for authentication", @@ -279,6 +290,19 @@ following: `"zip"`, `"jar"`, `"war"`, `"tar"`, `"tar.gz"`, `"tgz"`, "patch command line tool if `patch_tool` attribute is specified or there are " + "arguments other than `-p` in `patch_args` attribute.", ), + "remote_patches": attr.string_dict( + default = {}, + doc = + "A map of patch file URL to its integrity value, they are applied after extracting " + + "the archive and before applying patch files from the `patches` attribute. " + + "It uses the Bazel-native patch implementation, you can specify the patch strip " + + "number with `remote_patch_strip`", + ), + "remote_patch_strip": attr.int( + default = 0, + doc = + "The number of leading slashes to be stripped from the file name in the remote patches.", + ), "patch_tool": attr.string( default = "", doc = "The patch(1) utility to use. If this is specified, Bazel will use the specifed " + @@ -293,7 +317,7 @@ following: `"zip"`, `"jar"`, `"war"`, `"tar"`, `"tar.gz"`, `"tgz"`, "If arguments other than -p are specified, Bazel will fall back to use patch " + "command line tool instead of the Bazel-native patch implementation. When falling " + "back to patch command line tool and patch_tool attribute is not specified, " + - "`patch` will be used.", + "`patch` will be used. This only affects patch files in the `patches` attribute.", ), "patch_cmds": attr.string_list( default = [], diff --git a/tools/build_defs/repo/utils.bzl b/tools/build_defs/repo/utils.bzl index e13775507b0b95..7608a5e7ab9b6c 100644 --- a/tools/build_defs/repo/utils.bzl +++ b/tools/build_defs/repo/utils.bzl @@ -28,6 +28,9 @@ load( ``` """ +# Temporary directory for downloading remote patch files. +_REMOTE_PATCH_DIR = ".tmp_remote_patches" + def workspace_and_buildfile(ctx): """Utility function for writing WORKSPACE and, if requested, a BUILD file. @@ -69,7 +72,19 @@ def _use_native_patch(patch_args): return False return True -def patch(ctx, patches = None, patch_cmds = None, patch_cmds_win = None, patch_tool = None, patch_args = None): +def _download_patch(ctx, patch_url, integrity, auth): + name = patch_url.split("/")[-1] + patch_path = ctx.path(_REMOTE_PATCH_DIR).get_child(name) + ctx.download( + patch_url, + patch_path, + canonical_id = ctx.attr.canonical_id, + auth = auth, + integrity = integrity, + ) + return patch_path + +def patch(ctx, patches = None, patch_cmds = None, patch_cmds_win = None, patch_tool = None, patch_args = None, auth = None): """Implementation of patching an already extracted repository. This rule is intended to be used in the implementation function of @@ -90,15 +105,23 @@ def patch(ctx, patches = None, patch_cmds = None, patch_cmds_win = None, patch_t patch_tool: Path of the patch tool to execute for applying patches. String. patch_args: Arguments to pass to the patch tool. List of strings. + auth: An optional dict specifying authentication information for some of the URLs. """ bash_exe = ctx.os.environ["BAZEL_SH"] if "BAZEL_SH" in ctx.os.environ else "bash" powershell_exe = ctx.os.environ["BAZEL_POWERSHELL"] if "BAZEL_POWERSHELL" in ctx.os.environ else "powershell.exe" - if patches == None and hasattr(ctx.attr, "patches"): - patches = ctx.attr.patches if patches == None: patches = [] + if hasattr(ctx.attr, "patches") and ctx.attr.patches: + patches += ctx.attr.patches + + remote_patches = {} + remote_patch_strip = 0 + if hasattr(ctx.attr, "remote_patches") and ctx.attr.remote_patches: + if hasattr(ctx.attr, "remote_patch_strip"): + remote_patch_strip = ctx.attr.remote_patch_strip + remote_patches = ctx.attr.remote_patches if patch_cmds == None and hasattr(ctx.attr, "patch_cmds"): patch_cmds = ctx.attr.patch_cmds @@ -123,9 +146,18 @@ def patch(ctx, patches = None, patch_cmds = None, patch_cmds_win = None, patch_t if patch_args == None: patch_args = [] - if len(patches) > 0 or len(patch_cmds) > 0: + if len(remote_patches) > 0 or len(patches) > 0 or len(patch_cmds) > 0: ctx.report_progress("Patching repository") + # Apply remote patches + for patch_url in remote_patches: + integrity = remote_patches[patch_url] + patchfile = _download_patch(ctx, patch_url, integrity, auth) + ctx.patch(patchfile, remote_patch_strip) + ctx.delete(patchfile) + ctx.delete(ctx.path(_REMOTE_PATCH_DIR)) + + # Apply local patches if native_patch and _use_native_patch(patch_args): if patch_args: strip = int(patch_args[-1][2:])