Don't add test jobs if test results are up-of-date in the cache.

Bazel lacks a method to verify if test results are current in the cache. Additional details can be found at: bazelbuild/bazel#3978. To address this limitation, let's change the test runner so that it doesn't run tests and exits with a non-zero code immediately if the $TEST_CHECK_UP_TO_DATE_FILE file exists. To check test results in the cache, we need to create the $TEST_CHECK_UP_TO_DATE_FILE and run tests. Successful test execution indicates that the results are up-to-date in the cache. Signed-off-by: Andrei Vagin <avagin@google.com>
google · May 10, 2024 · d46d343 · d46d343
1 parent e77a65a
commit d46d343
Show file tree

Hide file tree

Showing 5 changed files with 91 additions and 32 deletions.
diff --git a/.buildkite/hooks/pre-command b/.buildkite/hooks/pre-command
@@ -49,9 +49,11 @@ fi
 set -x
 
 # Setup for parallelization with PARTITION and TOTAL_PARTITIONS.
-export PARTITION=${BUILDKITE_PARALLEL_JOB:-0}
-PARTITION=$((${PARTITION}+1)) # 1-indexed, but PARALLEL_JOB is 0-indexed.
-export TOTAL_PARTITIONS=${BUILDKITE_PARALLEL_JOB_COUNT:-1}
+if [ -z "${PARTITION:-}" ]; then
+ export PARTITION=${BUILDKITE_PARALLEL_JOB:-0}
+ PARTITION=$((${PARTITION}+1)) # 1-indexed, but PARALLEL_JOB is 0-indexed.
+ export TOTAL_PARTITIONS=${BUILDKITE_PARALLEL_JOB_COUNT:-1}
+fi
 
 if [[ "${BUILDKITE_BRANCH}" =~ ^test/ ]]; then
  # STABLE_VERSION depends on the most recent tag, so let's set the same tag
@@ -150,3 +152,9 @@ if [[ -n "${BUILDKITE_PIPELINE_ID}" && "${BUILDKITE_BRANCH}" =~ ^test/ ]]; then
  pipeline_add_env SKIP_LOADING_IMAGES 1
  fi
 fi
+
+export TEST_CHECK_UP_TO_DATE_FILE=/tmp/.gvisor-bazel-test-up-to-date
+
+if [[ -f "$TEST_CHECK_UP_TO_DATE_FILE" ]]; then
+ rm -rf "$TEST_CHECK_UP_TO_DATE_FILE"
+fi
diff --git a/.buildkite/pipeline.yaml b/.buildkite/pipeline.yaml
@@ -502,44 +502,23 @@ steps:
  - <<: *common
  <<: *docker
  label: ":php: PHP runtime tests"
- command: make php8.1.1-runtime-tests RUNTIME_ARGS=--directfs
- parallelism: 10
- agents:
- <<: *platform_specific_agents
- arch: "amd64"
+ command: "./tools/buildkite-runtime-tests 'php8.1.1' 5 --directfs"
  - <<: *common
  <<: *docker
  label: ":java: Java runtime tests"
- command: make java17-runtime-tests RUNTIME_ARGS=--directfs
- parallelism: 40
- agents:
- <<: *platform_specific_agents
- arch: "amd64"
+ command: "./tools/buildkite-runtime-tests 'java17' 20 --directfs"
  - <<: *common
  <<: *docker
  label: ":golang: Go runtime tests"
- command: make go1.22-runtime-tests RUNTIME_ARGS=--directfs
- parallelism: 10
- agents:
- <<: *platform_specific_agents
- arch: "amd64"
+ command: "./tools/buildkite-runtime-tests 'go1.22' 5 --directfs"
  - <<: *common
  <<: *docker
  label: ":node: NodeJS runtime tests"
- command: make nodejs16.13.2-runtime-tests RUNTIME_ARGS=--directfs
- parallelism: 10
- agents:
- <<: *platform_specific_agents
- arch: "amd64"
+ command: "./tools/buildkite-runtime-tests 'nodejs16.13.2' 5 --directfs"
  - <<: *common
  <<: *docker
  label: ":python: Python runtime tests"
- command: make python3.10.2-runtime-tests RUNTIME_ARGS=--directfs
- parallelism: 10
- agents:
- <<: *platform_specific_agents
- arch: "amd64"
-
+ command: "./tools/buildkite-runtime-tests 'python3.10.2' 5 --directfs"
  # Runtime tests (goferfs). Continuous only.
  - <<: *common
  <<: *docker

diff --git a/Makefile b/Makefile
@@ -116,6 +116,7 @@ RUNTIME_LOG_DIR ?= $(RUNTIME_DIR)/logs
 RUNTIME_LOGS ?= $(RUNTIME_LOG_DIR)/runsc.log.%TEST%.%TIMESTAMP%.%COMMAND%
 RUNTIME_ARGS ?=
 DOCKER_RELOAD_COMMAND ?= sudo systemctl reload docker
+TEST_CHECK_UP_TO_DATE_FILE ?= /tmp/.gvisor-bazel-test-up-to-date
 
 SYSFS_GROUP_PATH := /sys/fs/cgroup
 ifeq ($(shell stat -f -c "%T" "$(SYSFS_GROUP_PATH)" 2>/dev/null),cgroup2fs)
@@ -166,7 +167,7 @@ install_runtime = $(call configure,$(1),$(2) --TESTONLY-test-name-env=RUNSC_TEST
 # Don't use cached results, otherwise multiple runs using different runtimes
 # may be skipped, if all other inputs are the same.
 test_runtime = $(call test,--test_env=RUNTIME=$(1) --nocache_test_results $(PARTITIONS) $(2))
-test_runtime_cached = $(call test,--test_env=RUNTIME=$(1) $(PARTITIONS) $(2))
+test_runtime_cached = $(call test,--test_env=RUNTIME=$(1) --test_env=TEST_CHECK_UP_TO_DATE_FILE=$(TEST_CHECK_UP_TO_DATE_FILE) $(PARTITIONS) $(2))
 
 refresh: $(RUNTIME_BIN) ## Updates the runtime binary.
 .PHONY: refresh
@@ -257,8 +258,9 @@ RUNTIME_TESTS_FLAKY_IS_ERROR ?= true
 RUNTIME_TESTS_FLAKY_SHORT_CIRCUIT ?= true
 
 %-runtime-tests: load-runtimes_% $(RUNTIME_BIN)
- @$(call install_runtime,$(RUNTIME),--watchdog-action=panic --platform=systrap)
- @IMAGE_TAG=$(call tag,runtimes_$*) && \
+ test -f $(TEST_CHECK_UP_TO_DATE_FILE) || \
+ { $(call install_runtime,$(RUNTIME),--watchdog-action=panic --platform=systrap); }
+ IMAGE_TAG=$(call tag,runtimes_$*) && \
  $(call test_runtime_cached,$(RUNTIME),--test_timeout=1800 --test_env=RUNTIME_TESTS_FILTER=$(RUNTIME_TESTS_FILTER) --test_env=RUNTIME_TESTS_PER_TEST_TIMEOUT=$(RUNTIME_TESTS_PER_TEST_TIMEOUT) --test_env=RUNTIME_TESTS_RUNS_PER_TEST=$(RUNTIME_TESTS_RUNS_PER_TEST) --test_env=RUNTIME_TESTS_FLAKY_IS_ERROR=$(RUNTIME_TESTS_FLAKY_IS_ERROR) --test_env=RUNTIME_TESTS_FLAKY_SHORT_CIRCUIT=$(RUNTIME_TESTS_FLAKY_SHORT_CIRCUIT) --test_env=IMAGE_TAG=$${IMAGE_TAG} //test/runtimes:$*)
 
 do-tests: $(RUNTIME_BIN)

diff --git a/test/runtimes/defs.bzl b/test/runtimes/defs.bzl
@@ -20,8 +20,13 @@ def _runtime_test_impl(ctx):
 
  # Build a runner.
  runner = ctx.actions.declare_file("%s-executer" % ctx.label.name)
+ # if TEST_CHECK_UP_TO_DATE_FILE file exists, the goal is to check if test
+ # results are up-to-date in cache. runner has to return a non-zero code
+ # immediately.
  runner_content = "\n".join([
  "#!/bin/bash",
+ 'test -n "$TEST_SHARD_STATUS_FILE" && touch "$TEST_SHARD_STATUS_FILE"',
+ 'test -n "$TEST_CHECK_UP_TO_DATE_FILE" && test -f "$TEST_CHECK_UP_TO_DATE_FILE" && { echo "Test results in the cache are outdated." 1>&2; exit 1; }',
  "%s %s $@\n" % (ctx.files._runner[0].short_path, " ".join(args)),
  ])
  ctx.actions.write(runner, runner_content, is_executable = True)

diff --git a/tools/buildkite-runtime-tests b/tools/buildkite-runtime-tests
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# Copyright 2024 The gVisor Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Add runtime test jobs only if test results in the case are outdated.
+
+set -xeo pipefail
+
+exec 1>&2
+
+cmd="$1"
+shift
+export TOTAL_PARTITIONS="$1"
+shift
+
+if [[ -z "$TEST_CHECK_UP_TO_DATE_FILE" ]]; then
+ echo "TEST_CHECK_UP_TO_DATE_FILE isn't set"
+ exit 1
+fi
+
+function cleanup() {
+ if [[ -f "$TEST_CHECK_UP_TO_DATE_FILE" ]]; then
+ rm -rf $TEST_CHECK_UP_TO_DATE_FILE
+ fi
+}
+
+touch $TEST_CHECK_UP_TO_DATE_FILE
+trap cleanup EXIT
+
+for PARTITION in `seq "$TOTAL_PARTITIONS"`; do
+ export PARTITION
+ if make $cmd-runtime-tests; then
+ continue
+ fi
+ ( cat <<EOF
+ - label: "${BUILDKITE_LABEL} ($PARTITION/$TOTAL_PARTITIONS)"
+ timeout_in_minutes: 30
+ retry:
+ automatic:
+ - exit_status: -1
+ limit: 10
+ - exit_status: "*"
+ limit: 2
+ command: make $cmd-runtime-tests RUNTIME_ARGS="$@"
+ agents:
+ arch: "amd64"
+ env:
+ BUILDKITE_PIPELINE_INSTALL_RUNTIME: true
+ TOTAL_PARTITIONS: $TOTAL_PARTITIONS
+ PARTITION: $PARTITION
+EOF
+ ) | tee /proc/self/fd/2 | buildkite-agent pipeline upload
+done