Skip to content

Commit

Permalink
Use URLs as default canonical IDs in common repo rules
Browse files Browse the repository at this point in the history
RELNOTES[INC]: The `--experimental_repository_cache_urls_as_default_canonical_id` flag is no longer available. Instead, the `http_archive`, `http_file`, `http_jar`, `jvm_maven_import_external`, and `jvm_import_external` repository rules now use the URLs as the canonical ID if none is provided explicitly. If this behavior is not desired, it can be disabled via `--repo_env=BAZEL_HTTP_RULES_URLS_AS_DEFAULT_CANONICAL_ID=0`.

Fixes bazelbuild#19749

Closes bazelbuild#20047.

PiperOrigin-RevId: 580826938
Change-Id: I32bfb04084ce75b74f664794e1924d3188f58805
  • Loading branch information
fmeum committed Nov 9, 2023
1 parent f57618b commit 0ac10e0
Show file tree
Hide file tree
Showing 15 changed files with 145 additions and 65 deletions.
12 changes: 6 additions & 6 deletions MODULE.bazel.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions scripts/bootstrap/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,14 @@ fi

: ${JAVA_VERSION:="11"}

# TODO: remove `--repo_env=BAZEL_HTTP_RULES_URLS_AS_DEFAULT_CANONICAL_ID=0` once all dependencies are
# mirrored. See https://github.com/bazelbuild/bazel/pull/19549 for more context.
_BAZEL_ARGS="--spawn_strategy=standalone \
--nojava_header_compilation \
--strategy=Javac=worker --worker_quit_after_build --ignore_unsupported_sandboxing \
--compilation_mode=opt \
--repository_cache=derived/repository_cache \
--repo_env=BAZEL_HTTP_RULES_URLS_AS_DEFAULT_CANONICAL_ID=0 \
--extra_toolchains=//scripts/bootstrap:all \
--extra_toolchains=@bazel_tools//tools/python:autodetecting_toolchain \
--enable_bzlmod \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,6 @@ public void beforeCommand(CommandEnvironment env) throws AbruptExitException {
if (repoOptions.repositoryDownloaderRetries >= 0) {
downloadManager.setRetries(repoOptions.repositoryDownloaderRetries);
}
downloadManager.setUrlsAsDefaultCanonicalId(repoOptions.urlsAsDefaultCanonicalId);

repositoryCache.setHardlink(repoOptions.useHardlinks);
if (repoOptions.experimentalScaleTimeouts > 0.0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,15 +264,15 @@ public Converter() {

@Option(
name = "experimental_repository_cache_urls_as_default_canonical_id",
defaultValue = "false",
documentationCategory = OptionDocumentationCategory.BAZEL_CLIENT_OPTIONS,
effectTags = {OptionEffectTag.LOADING_AND_ANALYSIS},
metadataTags = {OptionMetadataTag.EXPERIMENTAL},
help =
"If true, use a string derived from the URLs of repository downloads as the canonical_id "
+ "if not specified. This causes a change in the URLs to result in a redownload even "
+ "if the cache contains a download with the same hash. This can be used to verify "
+ "that URL changes don't result in broken repositories being masked by the cache.")
defaultValue = "true",
documentationCategory = OptionDocumentationCategory.UNDOCUMENTED,
metadataTags = OptionMetadataTag.DEPRECATED,
effectTags = {OptionEffectTag.NO_OP},
deprecationWarning =
"This behavior is enabled by default for http_* and jvm_* rules and no "
+ "longer controlled by this flag. Use "
+ "--repo_env=BAZEL_HTTP_RULES_URLS_AS_DEFAULT_CANONICAL_ID=0 to disable it instead.",
help = "No-op.")
public boolean urlsAsDefaultCanonicalId;

@Option(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.stream.Collectors;
import javax.annotation.Nullable;

/**
Expand All @@ -59,7 +58,6 @@ public class DownloadManager {
private final Downloader downloader;
private boolean disableDownload = false;
private int retries = 0;
private boolean urlsAsDefaultCanonicalId;
@Nullable private Credentials netrcCreds;
private CredentialFactory credentialFactory = StaticCredentials::new;

Expand Down Expand Up @@ -90,10 +88,6 @@ public void setRetries(int retries) {
this.retries = retries;
}

public void setUrlsAsDefaultCanonicalId(boolean urlsAsDefaultCanonicalId) {
this.urlsAsDefaultCanonicalId = urlsAsDefaultCanonicalId;
}

public void setNetrcCreds(Credentials netrcCreds) {
this.netrcCreds = netrcCreds;
}
Expand Down Expand Up @@ -134,9 +128,6 @@ public Path download(
if (Thread.interrupted()) {
throw new InterruptedException();
}
if (Strings.isNullOrEmpty(canonicalId) && urlsAsDefaultCanonicalId) {
canonicalId = originalUrls.stream().map(URL::toExternalForm).collect(Collectors.joining(" "));
}

// TODO(andreisolo): This code path is inconsistent as the authHeaders are fetched from a
// .netrc only if it comes from a http_{archive,file,jar} - and it is handled directly
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ public void setup(BlackBoxTestContext context) throws IOException {
String sharedRepoCache = System.getenv("REPOSITORY_CACHE");
if (sharedRepoCache != null) {
lines.add("common --repository_cache=" + sharedRepoCache);
// TODO: Remove this flag once all dependencies are mirrored.
// See https://github.com/bazelbuild/bazel/pull/19549 for more context.
lines.add("common --repo_env=BAZEL_HTTP_RULES_URLS_AS_DEFAULT_CANONICAL_ID=0");
if (OS.getCurrent() == OS.DARWIN) {
// For reducing SSD usage on our physical Mac machines.
lines.add("common --experimental_repository_cache_hardlinks");
Expand Down
4 changes: 4 additions & 0 deletions src/test/py/bazel/bzlmod/bazel_fetch_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ def generatBuiltinModules(self):
self.ScratchFile('tools_mock/WORKSPACE')
self.ScratchFile('tools_mock/MODULE.bazel', ['module(name="bazel_tools")'])
self.ScratchFile('tools_mock/tools/build_defs/repo/BUILD')
self.CopyFile(
self.Rlocation('io_bazel/tools/build_defs/repo/cache.bzl'),
'tools_mock/tools/build_defs/repo/cache.bzl',
)
self.CopyFile(
self.Rlocation('io_bazel/tools/build_defs/repo/http.bzl'),
'tools_mock/tools/build_defs/repo/http.bzl',
Expand Down
6 changes: 6 additions & 0 deletions src/test/py/bazel/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,12 @@ def setUp(self):
shared_repo_cache = os.environ.get('REPOSITORY_CACHE')
if shared_repo_cache:
f.write('common --repository_cache={}\n'.format(shared_repo_cache))
# TODO(pcloudy): Remove this flag once all dependencies are mirrored.
# See https://github.com/bazelbuild/bazel/pull/19549 for more context.
f.write(
'common'
' --repo_env=BAZEL_HTTP_RULES_URLS_AS_DEFAULT_CANONICAL_ID=0\n'
)
if TestBase.IsDarwin():
# For reducing SSD usage on our physical Mac machines.
f.write('common --experimental_repository_cache_hardlinks\n')
Expand Down
41 changes: 26 additions & 15 deletions src/test/shell/bazel/bazel_repository_cache_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,9 @@ function test_fetch_value_with_existing_cache_and_no_network() {
cache_entry="$repo_cache_dir/content_addressable/sha256/$sha256"
mkdir -p "$cache_entry"
cp "$repo2_zip" "$cache_entry/file" # Artifacts are named uniformly as "file" in the cache
http_archive_url="http://localhost:$nc_port/bleh"
canonical_id_hash=$(printf "$http_archive_url" | sha256sum | cut -f 1 -d ' ')
touch "$cache_entry/id-$canonical_id_hash"

# Fetch without a server
shutdown_server
Expand Down Expand Up @@ -271,6 +274,7 @@ EOF
# to do without checksum. But we can safely do so, as the loopback device
# is reasonably safe against man-in-the-middle attacks.
bazel fetch --repository_cache="$repo_cache_dir" \
--repo_env=BAZEL_HTTP_RULES_URLS_AS_DEFAULT_CANONICAL_ID=0 \
//zoo:breeding-program >& $TEST_log \
|| fail "expected fetch to succeed"

Expand All @@ -283,6 +287,7 @@ EOF

# As we don't have a predicted cache, we expect fetching to fail now.
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
--repo_env=BAZEL_HTTP_RULES_URLS_AS_DEFAULT_CANONICAL_ID=0 \
&& fail "expected failure" || :

# However, if we add the hash, the value is taken from cache
Expand All @@ -298,6 +303,7 @@ http_archive(
)
EOF
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
--repo_env=BAZEL_HTTP_RULES_URLS_AS_DEFAULT_CANONICAL_ID=0 \
|| fail "expected fetch to succeed"
}

Expand Down Expand Up @@ -465,15 +471,19 @@ EOF
expect_log "Error downloading"
}

function test_break_url() {
function test_http_archive_no_default_canonical_id() {
setup_repository

bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
bazel fetch --repository_cache="$repo_cache_dir" \
--repo_env=BAZEL_HTTP_RULES_URLS_AS_DEFAULT_CANONICAL_ID=0 \
//zoo:breeding-program >& $TEST_log \
|| echo "Expected fetch to succeed"

shutdown_server

bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
bazel fetch --repository_cache="$repo_cache_dir" \
--repo_env=BAZEL_HTTP_RULES_URLS_AS_DEFAULT_CANONICAL_ID=0 \
//zoo:breeding-program >& $TEST_log \
|| echo "Expected fetch to succeed"

# Break url in WORKSPACE
Expand All @@ -489,24 +499,27 @@ http_archive(
)
EOF

# By default, cache entry will still match by sha256, even if url is changed.
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
# Without the default canonical id, cache entry will still match by sha256, even if url is
# changed.
bazel fetch --repository_cache="$repo_cache_dir" \
--repo_env=BAZEL_HTTP_RULES_URLS_AS_DEFAULT_CANONICAL_ID=0 \
//zoo:breeding-program >& $TEST_log \
|| echo "Expected fetch to succeed"
}

function test_experimental_repository_cache_urls_as_default_canonical_id() {

function test_http_archive_urls_as_default_canonical_id() {
# TODO: Remove when the integration test setup itself no longer relies on this.
add_to_bazelrc "common --repo_env=BAZEL_HTTP_RULES_URLS_AS_DEFAULT_CANONICAL_ID=1"

setup_repository

bazel fetch --repository_cache="$repo_cache_dir" \
--experimental_repository_cache_urls_as_default_canonical_id \
//zoo:breeding-program >& $TEST_log \
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
|| echo "Expected fetch to succeed"

shutdown_server

bazel fetch --repository_cache="$repo_cache_dir" \
--experimental_repository_cache_urls_as_default_canonical_id \
//zoo:breeding-program >& $TEST_log \
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
|| echo "Expected fetch to succeed"

# Break url in WORKSPACE
Expand All @@ -523,9 +536,7 @@ http_archive(
EOF

# As repository cache key should depend on urls, we expect fetching to fail now.
bazel fetch --repository_cache="$repo_cache_dir" \
--experimental_repository_cache_urls_as_default_canonical_id \
//zoo:breeding-program >& $TEST_log \
bazel fetch --repository_cache="$repo_cache_dir" //zoo:breeding-program >& $TEST_log \
&& fail "expected failure" || :
}

Expand Down
3 changes: 3 additions & 0 deletions src/test/shell/testenv.sh.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,9 @@ EOF
if [[ -n ${REPOSITORY_CACHE:-} ]]; then
echo "testenv.sh: Using repository cache at $REPOSITORY_CACHE."
echo "common --repository_cache=$REPOSITORY_CACHE" >> $TEST_TMPDIR/bazelrc
# TODO: Remove this flag once all dependencies are mirrored.
# See https://github.com/bazelbuild/bazel/pull/19549 for more context.
echo "common --repo_env=BAZEL_HTTP_RULES_URLS_AS_DEFAULT_CANONICAL_ID=0" >> $TEST_TMPDIR/bazelrc
if is_darwin; then
# For reducing SSD usage on our physical Mac machines.
echo "testenv.sh: Enabling --experimental_repository_cache_hardlinks"
Expand Down
8 changes: 4 additions & 4 deletions src/test/tools/bzlmod/MODULE.bazel.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tools/build_defs/repo/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ filegroup(
filegroup(
name = "http_src",
srcs = [
"cache.bzl",
"http.bzl",
"utils.bzl",
],
Expand Down
50 changes: 50 additions & 0 deletions tools/build_defs/repo/cache.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright 2023 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# WARNING:
# https://github.com/bazelbuild/bazel/issues/17713
# .bzl files in this package (tools/build_defs/repo) are evaluated
# in a Starlark environment without "@_builtins" injection, and must not refer
# to symbols associated with build/workspace .bzl files

"""Returns the default canonical id to use for downloads."""

visibility("private")

DEFAULT_CANONICAL_ID_ENV = "BAZEL_HTTP_RULES_URLS_AS_DEFAULT_CANONICAL_ID"

CANONICAL_ID_DOC = """A canonical ID of the file downloaded.
If specified and non-empty, Bazel will not take the file from cache, unless it
was added to the cache by a request with the same canonical ID.
If unspecified or empty, Bazel by default uses the URLs of the file as the
canonical ID. This helps catch the common mistake of updating the URLs without
also updating the hash, resulting in builds that succeed locally but fail on
machines without the file in the cache. This behavior can be disabled with
--repo_env={env}=0.
""".format(env = DEFAULT_CANONICAL_ID_ENV)

def get_default_canonical_id(repository_ctx, urls):
"""Returns the default canonical id to use for downloads."""
if repository_ctx.os.environ.get(DEFAULT_CANONICAL_ID_ENV) == "0":
return ""

# Do not sort URLs to prevent the following scenario:
# 1. http_archive with urls = [B, A] created.
# 2. Successful fetch from B results in canonical ID "A B".
# 3. Order of urls is flipped to [A, B].
# 4. Fetch would reuse cache entry for "A B", even though A may be broken (it has never been
# fetched before).
return " ".join(urls)
Loading

0 comments on commit 0ac10e0

Please sign in to comment.