Skip to content

Commit

Permalink
fix: 4epochs kind test et al (#9358)
Browse files Browse the repository at this point in the history
- Updated lock file paths to use /var/lock instead of home directories
(~/). Avoids issues arising from different home directories across CI
jobs and standardizes things
- Try to fix 4epochs test running, enable it in both dockerized and
non-dockerized workflows. Put 4epochs tests (along with transfer/smoke)
behind a `network-all` label which controls when, namely, 4epochs tests
is run.
- Some testing script adjustments
- Silence on AWS S3/minio uploads 
- Try to be very explicit about pushing a new image in deploy_spartan.sh
  • Loading branch information
ludamad authored Oct 24, 2024
1 parent bd05d87 commit e480e6b
Show file tree
Hide file tree
Showing 19 changed files with 220 additions and 72 deletions.
6 changes: 4 additions & 2 deletions .github/ensure-builder/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ inputs:
run:
# command to run once builder/tester available
required: false
spot_strategy:
default: BestEffort
runs:
# define an action, runs in OS of caller
using: composite
Expand All @@ -21,7 +23,7 @@ runs:
run: |
TYPE=${{ inputs.runner_type }}
# Try to use spot for every runner type for now
echo "spot_strategy=BestEffort" >> $GITHUB_OUTPUT
echo "spot_strategy=${{ inputs.spot_strategy }}" >> $GITHUB_OUTPUT
echo "runner_label=$USERNAME-$runner_type" >> $GITHUB_OUTPUT
if [[ $TYPE == builder-x86 ]]; then
# 128-core x86 instance types, aws chooses for us based on capacity
Expand Down Expand Up @@ -86,7 +88,7 @@ runs:
if ! scripts/run_on_builder "[ -d ~/run-$RUN_ID ]" ; then
echo "Copying local repo to spot"
(tar czf - .git || true) | scripts/run_on_builder \
"flock ~/git.lock bash -c '$UNPACK'"
"flock /var/lock/git.lock bash -c '$UNPACK'"
fi
- name: Attach EBS Cache Disk
Expand Down
2 changes: 1 addition & 1 deletion .github/ensure-tester-with-images/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ runs:
for image in ${{ inputs.builder_images_to_copy }} ; do
if ! docker image ls --format '{{.Repository}}:{{.Tag}}' | grep "$image" ; then
export FORCE_COLOR=1
flock submodule.lock git submodule update --init --recursive --recommend-shallow
flock /var/lock/submodule.lock git submodule update --init --recursive --recommend-shallow
${{ inputs.builder_command }}
break
fi
Expand Down
59 changes: 52 additions & 7 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ jobs:
ref: "${{ github.event.pull_request.head.sha }}"
- uses: ./.github/ci-setup-action
with:
concurrency_key: build-x86
concurrency_key: bench-summary-x86
- name: "Build and upload bench aggregate file"
working-directory: ./yarn-project/scripts
run: |
Expand Down Expand Up @@ -618,18 +618,61 @@ jobs:
timeout-minutes: 40
run: earthly-ci --no-output ./yarn-project/+prover-client-test

network-test-fake-proofs:
# proving disabled
network-test:
needs: [build, configure]
runs-on: ${{ needs.configure.outputs.username }}-x86
strategy:
fail-fast: false
matrix:
test: [test-transfer.sh, test-4epochs.sh]
steps:
- uses: actions/checkout@v4
with: { ref: "${{ env.GIT_COMMIT }}" }
- uses: ./.github/ci-setup-action
with:
concurrency_key: network-test-fake-proofs-x86
- name: "Prover Client Tests"
concurrency_key: network-test-${{ matrix.test }}
- name: "Setup and Local Network Tests"
timeout-minutes: 40
run: earthly-ci --no-output ./yarn-project/+network-test-fake-proofs
# Only allow transfer test to run on every (non-network-all) PR
if: matrix.test == 'test-transfer.sh' || github.ref_name == 'master' || contains(github.event.pull_request.labels.*.name, 'network-all')
run: earthly-ci --no-output ./yarn-project/+network-test --test=./${{ matrix.test }}

# note: proving disabled
kind-network-test:
needs: [build, configure]
runs-on: ${{ needs.configure.outputs.username }}-x86
strategy:
fail-fast: false
matrix:
test: [smoke.test.ts, transfer.test.ts, 4epochs.test.ts]
steps:
- uses: actions/checkout@v4
with: { ref: "${{ env.GIT_COMMIT }}" }
- uses: ./.github/ci-setup-action
with:
concurrency_key: kind-network-test-${{ matrix.test }}
- name: Setup and KIND Network Test
timeout-minutes: 60
uses: ./.github/ensure-tester-with-images
if: matrix.test == 'smoke.test.ts' || github.ref_name == 'master' || contains(github.event.pull_request.labels.*.name, 'network-all')
env:
USERNAME: ${{ needs.configure.outputs.username }}
with:
runner_type: 8core-tester-x86
spot_strategy: None # use on-demand machines
builder_type: builder-x86
# these are copied to the tester and expected by the earthly command below
# if they fail to copy, it will try to build them on the tester and fail
builder_images_to_copy: aztecprotocol/aztec:${{ env.GIT_COMMIT }} aztecprotocol/end-to-end:${{ env.GIT_COMMIT }}
# command to produce the images in case they don't exist
builder_command: scripts/earthly-ci ./yarn-project+export-e2e-test-images
run: |
sudo shutdown -P 60 # extend ttl
cd yarn-project/end-to-end
echo ${{ secrets.DOCKERHUB_PASSWORD }} | docker login -u aztecprotocolci --password-stdin
test=${{ matrix.test }}
NAMESPACE="${test%.test.ts}" FRESH_INSTALL=true VALUES_FILE=${values_file:-default.yaml} ./scripts/network_test.sh ./src/spartan/$test
l1-contracts-test:
needs: [build, configure]
Expand Down Expand Up @@ -859,7 +902,8 @@ jobs:
- yarn-project-formatting
- yarn-project-test
- prover-client-test
- network-test-fake-proofs
- network-test
- kind-network-test
- l1-contracts-test
- docs-preview
# - bb-bench # non-blocking
Expand Down Expand Up @@ -918,7 +962,8 @@ jobs:
- yarn-project-formatting
- yarn-project-test
- prover-client-test
- network-test-fake-proofs
- network-test
- kind-network-test
- l1-contracts-test
- docs-preview
# - bb-bench # non-blocking
Expand Down
2 changes: 1 addition & 1 deletion build-system/s3-cache-scripts/cache-download.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ function on_exit() {
trap on_exit EXIT

# Attempt to download the cache file
aws ${S3_BUILD_CACHE_AWS_PARAMS:-} s3 cp "s3://aztec-ci-artifacts/build-cache/$TAR_FILE" "$TAR_FILE" --quiet --no-sign-request &>/dev/null || (echo "Cache download of $TAR_FILE failed." && exit 1)
aws ${S3_BUILD_CACHE_AWS_PARAMS:-} s3 cp "s3://aztec-ci-artifacts/build-cache/$TAR_FILE" "$TAR_FILE" --quiet --no-sign-request || (echo "Cache download of $TAR_FILE failed." && exit 1)

# Extract the cache file
mkdir -p "$OUT_DIR"
Expand Down
2 changes: 1 addition & 1 deletion build-system/s3-cache-scripts/cache-upload.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ trap on_exit EXIT
# Rest of args are our binary paths
tar -czf "$TAR_FILE" $@

aws ${S3_BUILD_CACHE_AWS_PARAMS:-} s3 cp "$TAR_FILE" "s3://aztec-ci-artifacts/build-cache/$NAME"
aws ${S3_BUILD_CACHE_AWS_PARAMS:-} s3 cp "$TAR_FILE" "s3://aztec-ci-artifacts/build-cache/$NAME" --quiet --no-progress
2 changes: 1 addition & 1 deletion scripts/earthly-ci
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ export FORCE_COLOR=1
export EARTHLY_CONFIG=$(git rev-parse --show-toplevel)/.github/earthly-ci-config.yml

function wipe_non_cache_docker_state {
flock -n "$HOME/wipe_docker_state.lock" bash -c '
flock -n "/var/lock/wipe_docker_state.lock" bash -c '
echo "Detected corrupted docker images. Wiping and trying again."
# Based on https://stackoverflow.com/a/75849307
# wipe everything but volumes where we have earthly cache
Expand Down
2 changes: 1 addition & 1 deletion scripts/run_interleaved.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ shift

function cleanup() {
# kill everything in our process group except our process
trap - SIGTERM && kill $(pgrep -g $$ | grep -v $$) 2>/dev/null || true
trap - SIGTERM && kill $(pgrep -g $$ | grep -v $$) $(jobs -p) &>/dev/null || true
}
trap cleanup SIGINT SIGTERM EXIT

Expand Down
2 changes: 1 addition & 1 deletion scripts/run_native_testnet.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ display_help() {
echo
echo "Options:"
echo " -h Display this help message"
echo " -t Specify the test file (default: $TEST_FILE)"
echo " -t Specify the test file (default: $TEST_SCRIPT)"
echo " -p Specify the prover command (default: $PROVER_SCRIPT)"
echo " -val Specify the number of validators (default: $NUM_VALIDATORS)"
echo " -v Set logging level to verbose"
Expand Down
74 changes: 61 additions & 13 deletions spartan/scripts/deploy_spartan.sh
Original file line number Diff line number Diff line change
@@ -1,23 +1,71 @@
#!/bin/bash
set -eu
set -eux
set -o pipefail

TAG=$1
VALUES=$2
NAMESPACE=${3:-spartan}
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

if [ -z "$TAG" ]; then
echo "Usage: $0 <tag> <values>"
echo "Example: $0 latest 48-validators"
echo "Usage: $0 <docker image tag> <values> (optional: <namespace>)"
echo "Example: $0 latest 48-validators devnet"
exit 1
fi

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
function cleanup() {
set +x
# kill everything in our process group except our process
trap - SIGTERM && kill $(pgrep -g $$ | grep -v $$) $(jobs -p) &>/dev/null || true
}
trap cleanup SIGINT SIGTERM EXIT

function show_status_until_pxe_ready() {
set +x
sleep 15 # let helm upgrade start
kubectl get pods -n $NAMESPACE
for i in {1..20} ; do
# Show once a minute x 20 minutes
kubectl get pods -n $NAMESPACE
sleep 60
done
}
show_status_until_pxe_ready &

helm upgrade --install spartan $SCRIPT_DIR/../aztec-network \
--namespace spartan \
--create-namespace \
--values $SCRIPT_DIR/../aztec-network/values/$VALUES.yaml \
--set images.aztec.image="aztecprotocol/aztec:$TAG" \
--set network.public=true \
--wait \
--wait-for-jobs=true \
--timeout=30m
function log_stern() {
set +x
stern $NAMESPACE -n $NAMESPACE 2>&1 > "$SCRIPT_DIR/logs/$NAMESPACE-deploy.log"
}
log_stern &

function upgrade() {
# pull and resolve the image just to be absolutely sure k8s gets the latest image in the tag we want
docker pull --platform linux/amd64 aztecprotocol/aztec:$TAG
IMAGE=$(docker inspect --format='{{index .RepoDigests 0}}' aztecprotocol/aztec:$TAG)
if ! [ -z "${PRINT_ONLY:-}" ] ; then
helm template $NAMESPACE $SCRIPT_DIR/../aztec-network \
--namespace $NAMESPACE \
--create-namespace \
--values $SCRIPT_DIR/../aztec-network/values/$VALUES.yaml \
--set images.aztec.image="$IMAGE" \
--set network.public=true
else
helm upgrade --install $NAMESPACE $SCRIPT_DIR/../aztec-network \
--namespace $NAMESPACE \
--create-namespace \
--values $SCRIPT_DIR/../aztec-network/values/$VALUES.yaml \
--set images.aztec.image="$IMAGE" \
--set network.public=true \
--wait \
--wait-for-jobs=true \
--timeout=30m 2>&1
fi
}

# running the helm upgrade, but will try again if the setup l2 contracts job complains about being immutable
if ! upgrade | tee "$SCRIPT_DIR/logs/$NAMESPACE-helm.log" ; then
if grep 'cannot patch "'$NAMESPACE'-aztec-network-setup-l2-contracts"' "$SCRIPT_DIR/logs/$NAMESPACE-helm.log" ; then
kubectl delete job $NAMESPACE-aztec-network-setup-l2-contracts -n $NAMESPACE
upgrade
fi
fi
1 change: 1 addition & 0 deletions spartan/scripts/logs/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.log
25 changes: 23 additions & 2 deletions spartan/scripts/setup_local_k8s.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,31 @@ if ! command -v helm &> /dev/null; then
rm get_helm.sh
fi

if kind get clusters | grep -q "^kind$"; then
if ! command -v stern &> /dev/null; then
# Download Stern
curl -Lo stern.tar.gz https://github.com/stern/stern/releases/download/v1.31.0/stern_1.31.0_linux_amd64.tar.gz

# Extract the binary
tar -xzf stern.tar.gz

# Move it to /usr/local/bin and set permissions
sudo mv stern /usr/local/bin/stern
sudo chmod +x /usr/local/bin/stern

# Verify installation
stern --version

# Clean up
rm stern.tar.gz
fi

if kubectl config get-clusters | grep -q "^kind-kind$"; then
echo "Cluster 'kind' already exists. Skipping creation."
else
# Sometimes, kubectl does not have our kind context yet kind registers it as existing
# Ensure our context exists in kubectl
kind delete cluster || true
kind create cluster
fi

kubectl config use-context kind-kind
kubectl config use-context kind-kind || true
30 changes: 30 additions & 0 deletions spartan/scripts/test_spartan.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash
# Targets a running cluster and tests 5 slots worth of transfers against it.
set -eu
set -o pipefail

# Test a deployed cluster

NAMESPACE=${1:-spartan}
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

if [ -z "$NAMESPACE" ]; then
echo "Usage: $0 (optional: <namespace>)"
echo "Example: $0 devnet"
exit 1
fi

echo "Note: Repo should be bootstrapped with ./bootstrap.sh fast."

# Fetch the service URLs based on the namespace for injection in the test-transfer.sh
export BOOTNODE_URL=http://$(kubectl get svc -n $NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='$NAMESPACE-aztec-network-boot-node-lb-tcp')].status.loadBalancer.ingress[0].hostname}"):8080
export PXE_URL=http://$(kubectl get svc -n $NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='$NAMESPACE-aztec-network-pxe-lb')].status.loadBalancer.ingress[0].hostname}"):8080
export ETHEREUM_HOST=http://$(kubectl get svc -n $NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='$NAMESPACE-aztec-network-ethereum-lb')].status.loadBalancer.ingress[0].hostname}"):8545

echo "BOOTNODE_URL: $BOOTNODE_URL"
echo "PXE_URL: $PXE_URL"
echo "ETHEREUM_HOST: $ETHEREUM_HOST"

# hack to ensure L2 contracts are considered deployed
touch $SCRIPT_DIR/../../yarn-project/end-to-end/scripts/native-network/state/l2-contracts.env
bash -x $SCRIPT_DIR/../../yarn-project/end-to-end/scripts/native-network/test-transfer.sh
7 changes: 4 additions & 3 deletions yarn-project/Earthfile
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,8 @@ prover-client-test:

# NOTE: This is not in the end-to-end Earthfile as that is entirely LOCALLY commands that will go away sometime.
# Running this inside the main builder as the point is not to run this through dockerization.
network-test-fake-proofs:
network-test:
ARG test=./test-transfer.sh
FROM +build
WORKDIR /usr/src/
# Bare minimum git setup to run 'git rev-parse --show-toplevel'
Expand All @@ -291,14 +292,14 @@ network-test-fake-proofs:
# All script arguments are in the end-to-end/scripts/native-network folder
ENV LOG_LEVEL=verbose
RUN INTERLEAVED=true end-to-end/scripts/native_network_test.sh \
./test-transfer.sh \
"$test" \
./deploy-l1-contracts.sh \
./deploy-l2-contracts.sh \
./boot-node.sh \
./ethereum.sh \
"./prover-node.sh 8078 false" \
./pxe.sh \
"./validator.sh 8081 40401"
"./validators.sh 3"

publish-npm:
FROM +build
Expand Down
10 changes: 9 additions & 1 deletion yarn-project/bot/src/bot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ const TRANSFER_AMOUNT = 1;
export class Bot {
private log = createDebugLogger('aztec:bot');

private attempts: number = 0;
private successes: number = 0;

protected constructor(
public readonly wallet: Wallet,
public readonly token: TokenContract | EasyPrivateTokenContract,
Expand All @@ -39,6 +42,7 @@ export class Bot {
}

public async run() {
this.attempts++;
const logCtx = { runId: Date.now() * 1000 + Math.floor(Math.random() * 1000) };
const { privateTransfersPerTx, publicTransfersPerTx, feePaymentMethod, followChain, txMinedWaitSeconds } =
this.config;
Expand Down Expand Up @@ -96,7 +100,11 @@ export class Bot {
provenTimeout: txMinedWaitSeconds,
proven: followChain === 'PROVEN',
});
this.log.info(`Tx ${receipt.txHash} mined in block ${receipt.blockNumber}`, logCtx);
this.log.info(
`Tx #${this.attempts} ${receipt.txHash} successfully mined in block ${receipt.blockNumber} (stats: ${this.successes}/${this.attempts} success)`,
logCtx,
);
this.successes++;
}

public async getBalances() {
Expand Down
Loading

0 comments on commit e480e6b

Please sign in to comment.