Skip to content

Commit

Permalink
feat: stable deployments for spartan (#9147)
Browse files Browse the repository at this point in the history
A bunch of things to fix spartan deployment under the "default" values
and "3-validators".

Big thing is that the 16 and 48 values file need the metrics chart to be
deployed, but the smaller ones don't.

This is so that we can run the KIND tests in CI without metrics (which
may be dumb, since if it fails we lose all logs).

Big fix is to sleep before the boot node and validators come up. This is
to allow time for the k8s services to assign them DNS names.

Remove the limits on the nodes so they are faster.

---------

Co-authored-by: ludamad <adam.domurad@gmail.com>
  • Loading branch information
just-mitch and ludamad authored Oct 10, 2024
1 parent 25bd47b commit 3e1c02e
Show file tree
Hide file tree
Showing 12 changed files with 128 additions and 30 deletions.
3 changes: 2 additions & 1 deletion scripts/ci/get_e2e_jobs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ cd "$(dirname "$0")"/../..
BRANCH=$1
LABELS=$2

# Define the allow_list
# Define the jobs that will run on every PR
allow_list=(
"e2e-2-pxes"
"e2e-authwit"
Expand All @@ -26,6 +26,7 @@ allow_list=(
"e2e-cheat-codes"
"e2e-prover-fake-proofs"
"e2e-lending-contract"
"kind-network-smoke"
)

# Add labels from input to the allow_list
Expand Down
13 changes: 10 additions & 3 deletions spartan/aztec-network/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,20 @@ http://{{ include "aztec-network.fullname" . }}-metrics.{{ .Release.Namespace }}
{{- end -}}

{{- define "aztec-network.otelCollectorMetricsEndpoint" -}}
http://metrics-opentelemetry-collector.metrics:4318/v1/metrics
{{- if .Values.telemetry.enabled -}}
{{- if .Values.telemetry.otelCollectorEndpoint -}}
{{- .Values.telemetry.otelCollectorEndpoint -}}/v1/metrics
{{- end -}}
{{- end -}}
{{- end -}}

{{- define "aztec-network.otelCollectorTracesEndpoint" -}}
http://metrics-opentelemetry-collector.metrics:4318/v1/traces
{{- if .Values.telemetry.enabled -}}
{{- if .Values.telemetry.otelCollectorEndpoint -}}
{{- .Values.telemetry.otelCollectorEndpoint -}}/v1/traces
{{- end -}}
{{- end -}}
{{- end -}}



{{- define "helpers.flag" -}}
Expand Down
5 changes: 4 additions & 1 deletion spartan/aztec-network/templates/boot-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,13 @@ spec:
sleep 5
done
echo "Ethereum node is ready!"
{{- if .Values.telemetry.enabled }}
until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do
echo "Waiting for OpenTelemetry collector..."
sleep 5
done
echo "OpenTelemetry collector is ready!"
{{- end }}
- name: deploy-contracts
image: {{ .Values.images.aztec.image }}
command:
Expand All @@ -56,10 +58,11 @@ spec:
- name: boot-node
image: {{ .Values.images.aztec.image }}
command:
# sleep to allow dns name to be resolvable
[
"/bin/bash",
"-c",
"source /shared/contracts.env && env && node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --node --archiver --sequencer --pxe",
"sleep 10 && source /shared/contracts.env && env && node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --node --archiver --sequencer --pxe",
]
livenessProbe:
exec:
Expand Down
2 changes: 1 addition & 1 deletion spartan/aztec-network/templates/l2-contracts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ metadata:
data:
deploy-contracts.sh: |
#!/bin/sh
set -e
set -ex
# Run the deploy-l1-contracts command and capture the output
output=$(node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js deploy-l1-contracts --validators {{ join "," .Values.validator.validatorAddresses | quote }})
Expand Down
25 changes: 25 additions & 0 deletions spartan/aztec-network/templates/prover-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,30 @@ spec:
app: prover-node
spec:
initContainers:
- name: wait-for-boot-node
image: {{ .Values.images.curl.image }}
command:
- /bin/sh
- -c
- |
until curl -s -X POST -H 'Content-Type: application/json' \
-d '{"jsonrpc":"2.0","method":"web3_clientVersion","params":[],"id":67}' \
{{ include "aztec-network.ethereumHost" . }} | grep -q anvil; do
echo "Waiting for Ethereum node..."
sleep 5
done
echo "Ethereum node is ready!"
{{- if .Values.telemetry.enabled }}
until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do
echo "Waiting for OpenTelemetry collector..."
sleep 5
done
echo "OpenTelemetry collector is ready!"
{{- end }}
until curl --head --silent {{ include "aztec-network.bootNodeUrl" . }}/status; do
echo "Waiting for boot node..."
sleep 5
done
- name: configure-prover-env
image: "{{ .Values.images.aztec.image }}"
imagePullPolicy: {{ .Values.images.aztec.pullPolicy }}
Expand All @@ -33,6 +57,7 @@ spec:
env:
- name: ETHEREUM_HOST
value: {{ include "aztec-network.ethereumHost" . | quote }}

containers:
- name: prover-node
image: "{{ .Values.images.aztec.image }}"
Expand Down
11 changes: 11 additions & 0 deletions spartan/aztec-network/templates/pxe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@ spec:
{{- include "aztec-network.selectorLabels" . | nindent 8 }}
app: pxe
spec:
initContainers:
- name: wait-for-boot-node
image: {{ .Values.images.curl.image }}
command:
- /bin/sh
- -c
- |
until curl --head --silent {{ include "aztec-network.bootNodeUrl" . }}/status; do
echo "Waiting for boot node..."
sleep 5
done
containers:
- name: pxe
image: "{{ .Values.images.aztec.image }}"
Expand Down
27 changes: 26 additions & 1 deletion spartan/aztec-network/templates/validator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,31 @@ spec:
# We expect the validators to have already been added to the smart contract by this point - but this container still needs
# to be run in order to get the values
initContainers:
- name: wait-for-boot-node
image: {{ .Values.images.curl.image }}
command:
- /bin/sh
- -c
- |
until curl -s -X POST -H 'Content-Type: application/json' \
-d '{"jsonrpc":"2.0","method":"web3_clientVersion","params":[],"id":67}' \
{{ include "aztec-network.ethereumHost" . }} | grep -q anvil; do
echo "Waiting for Ethereum node..."
sleep 5
done
echo "Ethereum node is ready!"
{{- if .Values.telemetry.enabled }}
until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do
echo "Waiting for OpenTelemetry collector..."
sleep 5
done
echo "OpenTelemetry collector is ready!"
{{- end }}
until curl --head --silent {{ include "aztec-network.bootNodeUrl" . }}/status; do
echo "Waiting for boot node..."
sleep 5
done
- name: configure-validator-env
image: "{{ .Values.images.aztec.image }}"
imagePullPolicy: {{ .Values.images.aztec.pullPolicy }}
Expand Down Expand Up @@ -50,7 +75,7 @@ spec:
command:
- "/bin/bash"
- "-c"
- "source /shared/contracts.env && env && node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --node --archiver --sequencer"
- "sleep 10 && source /shared/contracts.env && env && node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --node --archiver --sequencer"
volumeMounts:
- name: shared-volume
mountPath: /shared
Expand Down
26 changes: 8 additions & 18 deletions spartan/aztec-network/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ network:
public: false
enableBots: true

telemetry:
enabled: false
otelCollectorEndpoint:

images:
aztec:
image: aztecprotocol/aztec
Expand Down Expand Up @@ -31,13 +35,14 @@ bootNode:
requests:
memory: "2Gi"
cpu: "200m"
limits:
memory: "4Gi"
cpu: "4"
storage: "8Gi"

validator:
replicas: 1
validatorKeys:
- 0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80
validatorAddresses:
- 0xf39Fd6e51aad88F6F4ce6aB8827279cffFb92266
service:
p2pPort: 40400
nodePort: 8080
Expand All @@ -54,9 +59,6 @@ validator:
requests:
memory: "2Gi"
cpu: "200m"
limits:
memory: "28Gi"
cpu: "7"
storage: "8Gi"

proverNode:
Expand All @@ -71,9 +73,6 @@ proverNode:
requests:
memory: "2Gi"
cpu: "200m"
limits:
memory: "120Gi"
cpu: "15"
storage: "8Gi"

pxe:
Expand All @@ -93,9 +92,6 @@ pxe:
requests:
memory: "2Gi"
cpu: "200m"
limits:
memory: "4Gi"
cpu: "1"

bot:
logLevel: "debug"
Expand Down Expand Up @@ -124,9 +120,6 @@ bot:
requests:
memory: "2Gi"
cpu: "200m"
limits:
memory: "4Gi"
cpu: "1"

ethereum:
replicas: 1
Expand All @@ -152,7 +145,4 @@ ethereum:
requests:
memory: "2Gi"
cpu: "200m"
limits:
memory: "4Gi"
cpu: "1"
storage: "8Gi"
10 changes: 10 additions & 0 deletions spartan/aztec-network/values/16-validators.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
##########
# BEWARE #
##########
# You need to deploy the metrics helm chart before using this values file.
# head to spartan/metrics and run `./install.sh`
# (then `./forward.sh` if you want to see it)
telemetry:
enabled: true
otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318

bootNode:
sequencer:
minTxsPerBlock: 4
Expand Down
10 changes: 10 additions & 0 deletions spartan/aztec-network/values/48-validators.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
##########
# BEWARE #
##########
# You need to deploy the metrics helm chart before using this values file.
# head to spartan/metrics and run `./install.sh`
# (then `./forward.sh` if you want to see it)
telemetry:
enabled: true
otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318

validator:
debug: "aztec:*,-aztec:avm_simulator:*,-aztec:libp2p_service"
replicas: 48
Expand Down
4 changes: 2 additions & 2 deletions yarn-project/end-to-end/Earthfile
Original file line number Diff line number Diff line change
Expand Up @@ -292,12 +292,12 @@ e2e-cli-wallet:
LOCALLY
RUN COMPOSE_FILE=scripts/docker-compose-wallet.yml ./scripts/e2e_compose_test.sh e2e_cli_wallet

network-smoke:
kind-network-smoke:
ARG values_file
LOCALLY
RUN NAMESPACE=smoke FRESH_INSTALL=true VALUES_FILE=${values_file:-default.yaml} ./scripts/network_test.sh ./src/spartan/smoke.test.ts

network-transfer:
kind-network-transfer:
ARG values_file
LOCALLY
RUN NAMESPACE=transfer FRESH_INSTALL=true VALUES_FILE=${values_file:-default.yaml} ./scripts/network_test.sh ./src/spartan/transfer.test.ts
22 changes: 19 additions & 3 deletions yarn-project/end-to-end/scripts/network_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Main positional parameter
TEST="$1"

REPO=$(git rev-parse --show-toplevel)
if [ "$(uname)" = "Linux" ] && [ "$(uname -m)" = "x86_64" ]; then
"$REPO"/spartan/scripts/setup_local_k8s.sh
else
echo "Not on x64 Linux, not installing k8s and helm."
fi

# Default values for environment variables
VALUES_FILE="${VALUES_FILE:-default.yaml}"
CHAOS_VALUES="${CHAOS_VALUES:-}"
Expand Down Expand Up @@ -55,12 +62,13 @@ function show_status_until_pxe_ready() {
}

show_status_until_pxe_ready &
SHOW_STATUS_PID=$!

# Install the Helm chart
helm upgrade --install spartan "$(git rev-parse --show-toplevel)/spartan/aztec-network/" \
helm upgrade --install spartan "$REPO/spartan/aztec-network/" \
--namespace "$NAMESPACE" \
--create-namespace \
--values "$(git rev-parse --show-toplevel)/spartan/aztec-network/values/$VALUES_FILE" \
--values "$REPO/spartan/aztec-network/values/$VALUES_FILE" \
--set images.aztec.image="aztecprotocol/aztec:$AZTEC_DOCKER_TAG" \
--set ingress.enabled=true \
--wait \
Expand All @@ -71,8 +79,16 @@ kubectl wait pod -l app==pxe --for=condition=Ready -n "$NAMESPACE" --timeout=10m

# tunnel in to get access directly to our PXE service in k8s
(kubectl port-forward --namespace $NAMESPACE svc/spartan-aztec-network-pxe 9082:8080 2>/dev/null >/dev/null || true) &
PORT_FORWARD_PID=$!

cleanup() {
echo "Cleaning up..."
kill $PORT_FORWARD_PID || true
kill $SHOW_STATUS_PID || true
}

trap cleanup EXIT SIGINT SIGTERM

# run our test in the host network namespace (so we can access the above with localhost)
docker run --rm --network=host \
-e PXE_URL=http://localhost:9082 \
-e DEBUG="aztec:*" \
Expand Down

0 comments on commit 3e1c02e

Please sign in to comment.