From 8efe7ea9f79a72e15b47f8cbd388661fbe1203f8 Mon Sep 17 00:00:00 2001 From: Mitch Date: Thu, 14 Nov 2024 12:21:46 -0500 Subject: [PATCH] feat: gating_passive.test.ts This test: - updates the aztec network deployment, allowing validators to use each other as boot nodes - applies the "network-requirements" network shaping - permanently disables the boot node - runs 3 epochs during which it: - kills 25% of the validators - asserts that we miss less than 50% of slots Other work in this branch includes: - add `ignoreDroppedReceiptsFor` TX wait options - this allows sending a TX to one node, and awaiting it on another since we need time for p2p propagation - we need this since we have shifted the PXE to point at the top-level validator service, which load balances across individuals - this may help with #9613 - scalable loki deployment for prod - more visible logging for core sequencer operations - better error handling during the setup of l2 contracts - better error handling in the pxe - rename the network shaping charts to "aztec-chaos-scenarios" --- .github/workflows/ci.yml | 4 + spartan/aztec-chaos-scenarios/Chart.yaml | 6 + .../scripts/stop_experiments.sh | 0 .../templates/_helpers.tpl | 11 +- .../templates/boot-node-failure.yaml | 21 ++ .../templates/network-shaping.yaml} | 28 +-- .../templates/prover-failure.yaml | 21 ++ .../templates/validator-kill.yaml | 21 ++ .../values.yaml | 21 +- .../values/boot-node-failure.yaml | 6 + .../values/hard.yaml | 0 .../values/mild.yaml | 0 .../values/moderate.yaml | 2 +- .../values/network-requirements.yaml | 23 +++ .../values/prover-failure.yaml | 6 + .../values/rough.yaml | 0 .../values/validator-kill.yaml | 6 + spartan/aztec-network/templates/_helpers.tpl | 2 +- .../aztec-network/templates/prover-node.yaml | 2 +- spartan/aztec-network/templates/pxe.yaml | 2 +- .../templates/setup-l2-contracts.yaml | 2 +- .../templates/transaction-bot.yaml | 4 +- .../aztec-network/templates/validator.yaml | 60 ++++-- spartan/aztec-network/values.yaml | 11 +- .../values/4-validators-with-metrics.yaml | 28 +++ .../aztec-network/values/48-validators.yaml | 1 - .../metrics/{install.sh => install-kind.sh} | 3 +- spartan/metrics/install-prod.sh | 2 +- spartan/metrics/values.yaml | 37 +--- spartan/metrics/values/gke-autopilot.yaml | 4 + spartan/metrics/values/kind.yaml | 25 +++ spartan/metrics/values/prod.yaml | 46 +++++ spartan/network-shaping/Chart.yaml | 6 - .../network-shaping/values/kill-provers.yaml | 17 -- spartan/scripts/setup_local_k8s.sh | 2 +- .../aztec.js/src/contract/sent_tx.test.ts | 2 +- yarn-project/aztec.js/src/contract/sent_tx.ts | 16 +- .../cli/src/cmds/misc/setup_contracts.ts | 22 +- .../end-to-end/scripts/network_test.sh | 11 +- .../src/fixtures/snapshot_manager.ts | 5 + .../end-to-end/src/spartan/4epochs.test.ts | 2 +- .../src/spartan/gating-passive.test.ts | 116 +++++++++++ .../end-to-end/src/spartan/reorg.test.ts | 9 +- .../end-to-end/src/spartan/smoke.test.ts | 2 +- .../end-to-end/src/spartan/transfer.test.ts | 2 +- .../src/spartan/{k8_utils.ts => utils.ts} | 194 ++++++++++++++++-- .../pxe/src/pxe_service/pxe_service.ts | 122 ++++++----- .../src/sequencer/sequencer.test.ts | 8 +- .../src/sequencer/sequencer.ts | 82 +++++--- .../sequencer-client/src/sequencer/utils.ts | 5 +- 50 files changed, 786 insertions(+), 242 deletions(-) create mode 100644 spartan/aztec-chaos-scenarios/Chart.yaml rename spartan/{network-shaping => aztec-chaos-scenarios}/scripts/stop_experiments.sh (100%) rename spartan/{network-shaping => aztec-chaos-scenarios}/templates/_helpers.tpl (74%) create mode 100644 spartan/aztec-chaos-scenarios/templates/boot-node-failure.yaml rename spartan/{network-shaping/templates/network-chaos.yaml => aztec-chaos-scenarios/templates/network-shaping.yaml} (72%) create mode 100644 spartan/aztec-chaos-scenarios/templates/prover-failure.yaml create mode 100644 spartan/aztec-chaos-scenarios/templates/validator-kill.yaml rename spartan/{network-shaping => aztec-chaos-scenarios}/values.yaml (89%) create mode 100644 spartan/aztec-chaos-scenarios/values/boot-node-failure.yaml rename spartan/{network-shaping => aztec-chaos-scenarios}/values/hard.yaml (100%) rename spartan/{network-shaping => aztec-chaos-scenarios}/values/mild.yaml (100%) rename spartan/{network-shaping => aztec-chaos-scenarios}/values/moderate.yaml (95%) create mode 100644 spartan/aztec-chaos-scenarios/values/network-requirements.yaml create mode 100644 spartan/aztec-chaos-scenarios/values/prover-failure.yaml rename spartan/{network-shaping => aztec-chaos-scenarios}/values/rough.yaml (100%) create mode 100644 spartan/aztec-chaos-scenarios/values/validator-kill.yaml create mode 100644 spartan/aztec-network/values/4-validators-with-metrics.yaml rename spartan/metrics/{install.sh => install-kind.sh} (80%) create mode 100644 spartan/metrics/values/gke-autopilot.yaml create mode 100644 spartan/metrics/values/kind.yaml delete mode 100644 spartan/network-shaping/Chart.yaml delete mode 100644 spartan/network-shaping/values/kill-provers.yaml create mode 100644 yarn-project/end-to-end/src/spartan/gating-passive.test.ts rename yarn-project/end-to-end/src/spartan/{k8_utils.ts => utils.ts} (54%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 493eb9adfd0..9b8c8d28949 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -711,6 +711,10 @@ jobs: values: 16-validators runner_type: 16core-tester-x86 timeout: 60 + - test: gating-passive.test.ts + values: 16-validators + runner_type: 16core-tester-x86 + timeout: 60 steps: - uses: actions/checkout@v4 with: { ref: "${{ env.GIT_COMMIT }}" } diff --git a/spartan/aztec-chaos-scenarios/Chart.yaml b/spartan/aztec-chaos-scenarios/Chart.yaml new file mode 100644 index 00000000000..7bb3bfd6136 --- /dev/null +++ b/spartan/aztec-chaos-scenarios/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: aztec-chaos-scenarios +description: Chaos scenarios for spartan using chaos-mesh +type: application +version: 0.1.0 +appVersion: "1.0.0" diff --git a/spartan/network-shaping/scripts/stop_experiments.sh b/spartan/aztec-chaos-scenarios/scripts/stop_experiments.sh similarity index 100% rename from spartan/network-shaping/scripts/stop_experiments.sh rename to spartan/aztec-chaos-scenarios/scripts/stop_experiments.sh diff --git a/spartan/network-shaping/templates/_helpers.tpl b/spartan/aztec-chaos-scenarios/templates/_helpers.tpl similarity index 74% rename from spartan/network-shaping/templates/_helpers.tpl rename to spartan/aztec-chaos-scenarios/templates/_helpers.tpl index 2239bfc3a7e..76ae9b4d9b4 100644 --- a/spartan/network-shaping/templates/_helpers.tpl +++ b/spartan/aztec-chaos-scenarios/templates/_helpers.tpl @@ -1,7 +1,7 @@ {{/* Create a default fully qualified app name. */}} -{{- define "network-shaping.fullname" -}} +{{- define "aztec-chaos-scenarios.fullname" -}} {{- if .Values.fullnameOverride }} {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} {{- else }} @@ -14,17 +14,12 @@ Create a default fully qualified app name. {{- end }} {{- end }} -{{/* -Selector labels -*/}} -{{- define "chaos-mesh.selectorLabels" -}} -{{- end }} {{/* Common labels */}} -{{- define "network-shaping.labels" -}} -app.kubernetes.io/name: {{ include "network-shaping.fullname" . }} +{{- define "aztec-chaos-scenarios.labels" -}} +app.kubernetes.io/name: {{ include "aztec-chaos-scenarios.fullname" . }} app.kubernetes.io/instance: {{ .Release.Name }} {{- if .Chart.AppVersion }} app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} diff --git a/spartan/aztec-chaos-scenarios/templates/boot-node-failure.yaml b/spartan/aztec-chaos-scenarios/templates/boot-node-failure.yaml new file mode 100644 index 00000000000..b31fbf99dbc --- /dev/null +++ b/spartan/aztec-chaos-scenarios/templates/boot-node-failure.yaml @@ -0,0 +1,21 @@ +{{- if .Values.bootNodeFailure.enabled }} +--- +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: {{ .Values.global.targetNamespace }}-boot-node-failure + namespace: {{ .Values.global.chaosMeshNamespace }} + labels: + {{- include "aztec-chaos-scenarios.labels" . | nindent 4 }} + annotations: + "helm.sh/resource-policy": keep +spec: + action: pod-failure + mode: all + selector: + namespaces: + - {{ .Values.global.targetNamespace }} + labelSelectors: + app: boot-node + duration: {{ .Values.bootNodeFailure.duration }} +{{- end }} diff --git a/spartan/network-shaping/templates/network-chaos.yaml b/spartan/aztec-chaos-scenarios/templates/network-shaping.yaml similarity index 72% rename from spartan/network-shaping/templates/network-chaos.yaml rename to spartan/aztec-chaos-scenarios/templates/network-shaping.yaml index 21c954767cb..6e2a86de5f0 100644 --- a/spartan/network-shaping/templates/network-chaos.yaml +++ b/spartan/aztec-chaos-scenarios/templates/network-shaping.yaml @@ -7,7 +7,7 @@ metadata: name: {{ .Values.global.targetNamespace }}-latency namespace: {{ .Values.global.chaosMeshNamespace }} labels: - {{- include "network-shaping.labels" . | nindent 4 }} + {{- include "aztec-chaos-scenarios.labels" . | nindent 4 }} annotations: "helm.sh/hook": post-install,post-upgrade "helm.sh/hook-weight": "0" @@ -31,7 +31,7 @@ metadata: name: {{ .Values.global.targetNamespace }}-bandwidth namespace: {{ .Values.global.chaosMeshNamespace }} labels: - {{- include "network-shaping.labels" . | nindent 4 }} + {{- include "aztec-chaos-scenarios.labels" . | nindent 4 }} annotations: "helm.sh/hook": post-install,post-upgrade "helm.sh/hook-weight": "0" @@ -57,7 +57,7 @@ metadata: name: {{ .Values.global.targetNamespace }}-packet-loss namespace: {{ .Values.global.chaosMeshNamespace }} labels: - {{- include "network-shaping.labels" . | nindent 4 }} + {{- include "aztec-chaos-scenarios.labels" . | nindent 4 }} annotations: "helm.sh/resource-policy": keep spec: @@ -72,26 +72,4 @@ spec: duration: 8760h {{- end }} -{{- if .Values.networkShaping.conditions.killProvers.enabled }} ---- -apiVersion: chaos-mesh.org/v1alpha1 -kind: PodChaos -metadata: - name: {{ .Values.global.targetNamespace }}-kill-provers - namespace: {{ .Values.global.chaosMeshNamespace }} - labels: - {{- include "network-shaping.labels" . | nindent 4 }} - annotations: - "helm.sh/resource-policy": keep -spec: - action: pod-failure - mode: all - selector: - namespaces: - - {{ .Values.global.targetNamespace }} - labelSelectors: - app: prover-node - duration: {{ .Values.networkShaping.conditions.killProvers.duration }} {{- end }} - -{{- end }} \ No newline at end of file diff --git a/spartan/aztec-chaos-scenarios/templates/prover-failure.yaml b/spartan/aztec-chaos-scenarios/templates/prover-failure.yaml new file mode 100644 index 00000000000..ad02d238350 --- /dev/null +++ b/spartan/aztec-chaos-scenarios/templates/prover-failure.yaml @@ -0,0 +1,21 @@ +{{- if .Values.proverFailure.enabled }} +--- +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: {{ .Values.global.targetNamespace }}-prover-failure + namespace: {{ .Values.global.chaosMeshNamespace }} + labels: + {{- include "aztec-chaos-scenarios.labels" . | nindent 4 }} + annotations: + "helm.sh/resource-policy": keep +spec: + action: pod-failure + mode: all + selector: + namespaces: + - {{ .Values.global.targetNamespace }} + labelSelectors: + app: prover-node + duration: {{ .Values.proverFailure.duration }} +{{- end }} diff --git a/spartan/aztec-chaos-scenarios/templates/validator-kill.yaml b/spartan/aztec-chaos-scenarios/templates/validator-kill.yaml new file mode 100644 index 00000000000..11177c404f7 --- /dev/null +++ b/spartan/aztec-chaos-scenarios/templates/validator-kill.yaml @@ -0,0 +1,21 @@ +{{- if .Values.validatorKill.enabled }} +--- +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: {{ .Values.global.targetNamespace }}-validator-kill + namespace: {{ .Values.global.chaosMeshNamespace }} + labels: + {{- include "aztec-chaos-scenarios.labels" . | nindent 4 }} + annotations: + "helm.sh/resource-policy": keep +spec: + action: pod-kill + mode: fixed-percent + value: {{ .Values.validatorKill.percent | quote }} + selector: + namespaces: + - {{ .Values.global.targetNamespace }} + labelSelectors: + app: validator +{{- end }} diff --git a/spartan/network-shaping/values.yaml b/spartan/aztec-chaos-scenarios/values.yaml similarity index 89% rename from spartan/network-shaping/values.yaml rename to spartan/aztec-chaos-scenarios/values.yaml index 0b3ed0d698e..cb85d9e008f 100644 --- a/spartan/network-shaping/values.yaml +++ b/spartan/aztec-chaos-scenarios/values.yaml @@ -1,3 +1,6 @@ +nameOverride: null +fullnameOverride: null + global: # When deploying, override the namespace to where spartan will deploy to, this will apply all chaos experiments to all pods within that namespace # run deployment with --values global.namespace=your-namespace @@ -7,12 +10,12 @@ global: # Network shaping configuration networkShaping: # Master switch to enable network shaping - enabled: true + enabled: false # Default settings defaultSettings: mode: all - # Set duration to 1 year so the the experiment will run indefinitely unless overridden + # Set duration to 1 year so the experiment will run indefinitely unless overridden duration: 8760h # Network conditions to apply @@ -62,9 +65,17 @@ networkShaping: # Buffer = smoother bandwidth restriction but higher memory usage buffer: 1000 - killProvers: - enabled: false - duration: 13m +proverFailure: + enabled: false + duration: 13m + +validatorKill: + enabled: false + percent: 30 + +bootNodeFailure: + enabled: false + duration: 60m ## Here are some exciting example configurations created by claude: # Example use cases for different configurations: diff --git a/spartan/aztec-chaos-scenarios/values/boot-node-failure.yaml b/spartan/aztec-chaos-scenarios/values/boot-node-failure.yaml new file mode 100644 index 00000000000..5a965eb76f6 --- /dev/null +++ b/spartan/aztec-chaos-scenarios/values/boot-node-failure.yaml @@ -0,0 +1,6 @@ +global: + namespace: "smoke" + +bootNodeFailure: + enabled: true + duration: 60m diff --git a/spartan/network-shaping/values/hard.yaml b/spartan/aztec-chaos-scenarios/values/hard.yaml similarity index 100% rename from spartan/network-shaping/values/hard.yaml rename to spartan/aztec-chaos-scenarios/values/hard.yaml diff --git a/spartan/network-shaping/values/mild.yaml b/spartan/aztec-chaos-scenarios/values/mild.yaml similarity index 100% rename from spartan/network-shaping/values/mild.yaml rename to spartan/aztec-chaos-scenarios/values/mild.yaml diff --git a/spartan/network-shaping/values/moderate.yaml b/spartan/aztec-chaos-scenarios/values/moderate.yaml similarity index 95% rename from spartan/network-shaping/values/moderate.yaml rename to spartan/aztec-chaos-scenarios/values/moderate.yaml index a32d4a09ff5..5c91eded622 100644 --- a/spartan/network-shaping/values/moderate.yaml +++ b/spartan/aztec-chaos-scenarios/values/moderate.yaml @@ -22,4 +22,4 @@ networkShaping: packetLoss: enabled: true loss: "0.5" - correlation: "60" \ No newline at end of file + correlation: "60" diff --git a/spartan/aztec-chaos-scenarios/values/network-requirements.yaml b/spartan/aztec-chaos-scenarios/values/network-requirements.yaml new file mode 100644 index 00000000000..0f5e4c4d3c9 --- /dev/null +++ b/spartan/aztec-chaos-scenarios/values/network-requirements.yaml @@ -0,0 +1,23 @@ +# Imposes the network conditions that are stated as requirements for node operators +global: + namespace: "smoke" + +networkShaping: + enabled: true + conditions: + latency: + enabled: true + delay: + # Regional network latency (e.g., cross-country) + latency: 100ms + jitter: 20ms + correlation: "75" + bandwidth: + enabled: true + rate: 250mbps + limit: 125000000 + buffer: 25000 + packetLoss: + enabled: true + loss: "0.5" + correlation: "60" diff --git a/spartan/aztec-chaos-scenarios/values/prover-failure.yaml b/spartan/aztec-chaos-scenarios/values/prover-failure.yaml new file mode 100644 index 00000000000..16a52fb1d60 --- /dev/null +++ b/spartan/aztec-chaos-scenarios/values/prover-failure.yaml @@ -0,0 +1,6 @@ +global: + namespace: "smoke" + +proverFailure: + enabled: true + duration: 13m diff --git a/spartan/network-shaping/values/rough.yaml b/spartan/aztec-chaos-scenarios/values/rough.yaml similarity index 100% rename from spartan/network-shaping/values/rough.yaml rename to spartan/aztec-chaos-scenarios/values/rough.yaml diff --git a/spartan/aztec-chaos-scenarios/values/validator-kill.yaml b/spartan/aztec-chaos-scenarios/values/validator-kill.yaml new file mode 100644 index 00000000000..695e4a30d94 --- /dev/null +++ b/spartan/aztec-chaos-scenarios/values/validator-kill.yaml @@ -0,0 +1,6 @@ +global: + namespace: "smoke" + +validatorKill: + enabled: true + percent: 25 diff --git a/spartan/aztec-network/templates/_helpers.tpl b/spartan/aztec-network/templates/_helpers.tpl index f9be2d9ecaa..33f8dda0671 100644 --- a/spartan/aztec-network/templates/_helpers.tpl +++ b/spartan/aztec-network/templates/_helpers.tpl @@ -78,7 +78,7 @@ http://{{ include "aztec-network.fullname" . }}-boot-node-0.{{ include "aztec-ne {{- if .Values.validator.externalTcpHost -}} http://{{ .Values.validator.externalTcpHost }}:{{ .Values.validator.service.nodePort }} {{- else -}} -http://{{ include "aztec-network.fullname" . }}-validator-0.{{ include "aztec-network.fullname" . }}-validator.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.validator.service.nodePort }} +http://{{ include "aztec-network.fullname" . }}-validator.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.validator.service.nodePort }} {{- end -}} {{- end -}} diff --git a/spartan/aztec-network/templates/prover-node.yaml b/spartan/aztec-network/templates/prover-node.yaml index bd2f305710b..ed8c2d6fae1 100644 --- a/spartan/aztec-network/templates/prover-node.yaml +++ b/spartan/aztec-network/templates/prover-node.yaml @@ -18,7 +18,7 @@ spec: app: prover-node spec: initContainers: - - name: wait-for-boot-node + - name: wait-for-services image: {{ .Values.images.curl.image }} command: - /bin/sh diff --git a/spartan/aztec-network/templates/pxe.yaml b/spartan/aztec-network/templates/pxe.yaml index dbe03396d60..deb8de4a82e 100644 --- a/spartan/aztec-network/templates/pxe.yaml +++ b/spartan/aztec-network/templates/pxe.yaml @@ -49,7 +49,7 @@ spec: - name: ETHEREUM_HOST value: {{ include "aztec-network.ethereumHost" . | quote }} - name: AZTEC_NODE_URL - value: {{ include "aztec-network.bootNodeUrl" . | quote }} + value: {{ include "aztec-network.validatorUrl" . | quote }} - name: LOG_JSON value: "1" - name: LOG_LEVEL diff --git a/spartan/aztec-network/templates/setup-l2-contracts.yaml b/spartan/aztec-network/templates/setup-l2-contracts.yaml index 5f6a42d1806..df05ffd20cc 100644 --- a/spartan/aztec-network/templates/setup-l2-contracts.yaml +++ b/spartan/aztec-network/templates/setup-l2-contracts.yaml @@ -32,7 +32,7 @@ spec: echo "L2 contracts initialized" env: - name: PXE_URL - value: {{ include "aztec-network.pxeUrl" . | quote }} + value: {{ include "aztec-network.bootNodeUrl" . | quote }} - name: DEBUG value: "aztec:*" - name: LOG_LEVEL diff --git a/spartan/aztec-network/templates/transaction-bot.yaml b/spartan/aztec-network/templates/transaction-bot.yaml index 598262d3710..9f1239fcc2b 100644 --- a/spartan/aztec-network/templates/transaction-bot.yaml +++ b/spartan/aztec-network/templates/transaction-bot.yaml @@ -25,7 +25,7 @@ spec: {{- if .Values.bot.nodeUrl }} value: "{{ .Values.bot.nodeUrl }}" {{- else }} - value: {{ include "aztec-network.bootNodeUrl" . | quote }} + value: {{ include "aztec-network.validatorUrl" . | quote }} {{- end }} command: - /bin/sh @@ -47,7 +47,7 @@ spec: {{- if .Values.bot.nodeUrl }} value: "{{ .Values.bot.nodeUrl }}" {{- else }} - value: {{ include "aztec-network.bootNodeUrl" . | quote }} + value: {{ include "aztec-network.validatorUrl" . | quote }} {{- end }} - name: LOG_JSON value: "1" diff --git a/spartan/aztec-network/templates/validator.yaml b/spartan/aztec-network/templates/validator.yaml index ac6223b5dff..15909a4e54c 100644 --- a/spartan/aztec-network/templates/validator.yaml +++ b/spartan/aztec-network/templates/validator.yaml @@ -19,12 +19,13 @@ spec: app: validator spec: initContainers: - - name: wait-for-boot-node + - name: wait-for-services image: {{ .Values.images.curl.image }} command: - /bin/sh - -c - | + # First check ethereum node until curl -s -X POST -H 'Content-Type: application/json' \ -d '{"jsonrpc":"2.0","method":"web3_clientVersion","params":[],"id":67}' \ {{ include "aztec-network.ethereumHost" . }} | grep -q reth; do @@ -32,6 +33,7 @@ spec: sleep 5 done echo "Ethereum node is ready!" + {{- if .Values.telemetry.enabled }} until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do echo "Waiting for OpenTelemetry collector..." @@ -39,11 +41,40 @@ spec: done echo "OpenTelemetry collector is ready!" {{- end }} - until curl --head --silent {{ include "aztec-network.bootNodeUrl" . }}/status; do - echo "Waiting for boot node..." - sleep 5 - done - echo "Boot node is ready!" + + if [ "{{ .Values.validator.dynamicBootNode }}" = "true" ]; then + # Get the list of pod IPs for the validator service + MAX_ATTEMPTS=3 + for i in $(seq 0 $(({{ .Values.validator.replicas }} - 1))); do + PEER_IP="{{ include "aztec-network.fullname" . }}-validator-${i}.{{ include "aztec-network.fullname" . }}-validator" + echo "Checking ${PEER_IP} for /status" + for attempt in $(seq 1 $MAX_ATTEMPTS); do + if curl --silent --head --fail "http://${PEER_IP}:{{ .Values.validator.service.nodePort }}/status" > /dev/null; then + echo "Found responsive peer at ${PEER_IP}" + # the PXE has its node set to the the validator service. + # and that's all we need to know to bootstrap, + # since it will get a good node ENR from whatever node the PXE connects to. + echo "{{ include "aztec-network.pxeUrl" . }}" > /shared/pxe_url + break 2 + fi + sleep 2 + done + done + if [ ! -f /shared/pxe_url ]; then + echo "No responsive peers found after multiple attempts, exiting." + exit 1 + fi + else + until curl --silent --head --fail "{{ include "aztec-network.bootNodeUrl" . }}/status" > /dev/null; do + echo "Waiting for boot node..." + sleep 5 + done + echo "Boot node is ready!" + echo "{{ include "aztec-network.bootNodeUrl" . }}" > /shared/pxe_url + fi + volumeMounts: + - name: shared-volume + mountPath: /shared - name: configure-validator-env image: "{{ .Values.images.aztec.image }}" @@ -51,7 +82,10 @@ spec: command: - "/bin/sh" - "-c" - - "cp /scripts/configure-validator-env.sh /tmp/configure-validator-env.sh && chmod +x /tmp/configure-validator-env.sh && /tmp/configure-validator-env.sh {{ include "aztec-network.bootNodeUrl" . }}" + - | + cp /scripts/configure-validator-env.sh /tmp/configure-validator-env.sh && \ + chmod +x /tmp/configure-validator-env.sh && \ + /tmp/configure-validator-env.sh "$(cat /shared/pxe_url)" volumeMounts: - name: shared-volume mountPath: /shared @@ -169,16 +203,8 @@ spec: - name: validator-keys configMap: name: {{ include "aztec-network.fullname" . }}-validator-keys - volumeClaimTemplates: - - metadata: - name: shared-volume - labels: - {{- include "aztec-network.labels" . | nindent 8 }} - spec: - accessModes: ["ReadWriteOnce"] - resources: - requests: - storage: {{ .Values.validator.storage }} + - name: shared-volume + emptyDir: {} --- apiVersion: v1 kind: ConfigMap diff --git a/spartan/aztec-network/values.yaml b/spartan/aztec-network/values.yaml index 4c1ff560df4..83e669542f3 100644 --- a/spartan/aztec-network/values.yaml +++ b/spartan/aztec-network/values.yaml @@ -67,6 +67,10 @@ bootNode: storage: "8Gi" validator: + # If true, the validator will use its peers to serve as the boot node. + # This cannot be used when the network first starts up. + # But it must be used if the boot node is killed, and the validator is restarted. + dynamicBootNode: false externalTcpHost: "" externalUdpHost: "" replicas: 1 @@ -79,7 +83,7 @@ validator: p2pUdpPort: 40400 nodePort: 8080 logLevel: "debug" - debug: "aztec:*,-aztec:avm_simulator*,-aztec:libp2p_service*,-aztec:circuits:artifact_hash,-json-rpc*,-aztec:world-state:database,-aztec:l2_block_stream*" + debug: "aztec:*,-aztec:avm_simulator*,-aztec:libp2p_service*,-aztec:circuits:artifact_hash,-json-rpc*,-aztec:l2_block_stream,-aztec:world-state:database" sequencer: maxSecondsBetweenBlocks: 0 minTxsPerBlock: 1 @@ -98,7 +102,6 @@ validator: requests: memory: "2Gi" cpu: "200m" - storage: "8Gi" proverNode: externalTcpHost: "" @@ -120,10 +123,11 @@ proverNode: storage: "8Gi" pxe: + proverEnabled: false externalHost: "" logLevel: "debug" proverEnable: false - debug: "aztec:*,-aztec:avm_simulator*,-aztec:libp2p_service*,-aztec:circuits:artifact_hash,-json-rpc*,-aztec:l2_block_stream,-aztec:world-state:database" + debug: "aztec:*,-aztec:avm_simulator*,-aztec:libp2p_service*,-aztec:circuits:artifact_hash,-json-rpc*,-aztec:world-state:database,-aztec:l2_block_stream*" replicas: 1 service: port: 8080 @@ -142,6 +146,7 @@ pxe: bot: enabled: true + nodeUrl: "" logLevel: "debug" debug: "aztec:*,-aztec:avm_simulator*,-aztec:libp2p_service*,-aztec:circuits:artifact_hash,-json-rpc*,-aztec:l2_block_stream,-aztec:world-state:database" replicas: 1 diff --git a/spartan/aztec-network/values/4-validators-with-metrics.yaml b/spartan/aztec-network/values/4-validators-with-metrics.yaml new file mode 100644 index 00000000000..47387cd89c1 --- /dev/null +++ b/spartan/aztec-network/values/4-validators-with-metrics.yaml @@ -0,0 +1,28 @@ +########## +# BEWARE # +########## +# You need to deploy the metrics helm chart before using this values file. +# head to spartan/metrics and run `./install.sh` +# (then `./forward.sh` if you want to see it) +telemetry: + enabled: true + otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318 + +validator: + replicas: 4 + validatorKeys: + - 0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80 + - 0x59c6995e998f97a5a0044966f0945389dc9e86dae88c7a8412f4603b6b78690d + - 0x5de4111afa1a4b94908f83103eb1f1706367c2e68ca870fc3fb9a804cdab365a + - 0x7c852118294e51e653712a81e05800f419141751be58f605c371e15141b007a6 + validatorAddresses: + - 0xf39Fd6e51aad88F6F4ce6aB8827279cffFb92266 + - 0x70997970C51812dc3A010C7d01b50e0d17dc79C8 + - 0x3C44CdDdB6a900fa2b585dd299e03d12FA4293BC + - 0x90F79bf6EB2c4f870365E785982E1f101E93b906 + validator: + disabled: false + +bootNode: + validator: + disabled: true diff --git a/spartan/aztec-network/values/48-validators.yaml b/spartan/aztec-network/values/48-validators.yaml index dd7f399d805..31d48095681 100644 --- a/spartan/aztec-network/values/48-validators.yaml +++ b/spartan/aztec-network/values/48-validators.yaml @@ -9,7 +9,6 @@ telemetry: otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318 validator: - debug: "aztec:*,-aztec:avm_simulator:*,-aztec:libp2p_service" replicas: 48 validatorKeys: - 0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80 diff --git a/spartan/metrics/install.sh b/spartan/metrics/install-kind.sh similarity index 80% rename from spartan/metrics/install.sh rename to spartan/metrics/install-kind.sh index 46efddd62de..3a9ecfb4ccf 100755 --- a/spartan/metrics/install.sh +++ b/spartan/metrics/install-kind.sh @@ -14,5 +14,4 @@ helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm helm repo add grafana https://grafana.github.io/helm-charts helm repo add prometheus-community https://prometheus-community.github.io/helm-charts helm dependency update -# need to disable node exporter for GKE autopilot to be happy -helm upgrade metrics "$SCRIPT_DIR" -n metrics --install --create-namespace --atomic --set prometheus.prometheus-node-exporter.enabled=false \ No newline at end of file +helm upgrade metrics "$SCRIPT_DIR" -n metrics --install --create-namespace --atomic --timeout 15m --values "$SCRIPT_DIR/values/kind.yaml" diff --git a/spartan/metrics/install-prod.sh b/spartan/metrics/install-prod.sh index 849dbcd1cc9..a61cc2b8ef4 100755 --- a/spartan/metrics/install-prod.sh +++ b/spartan/metrics/install-prod.sh @@ -3,4 +3,4 @@ set -eu cd "$(dirname "${BASH_SOURCE[0]}")" -helm upgrade metrics . -n metrics --values "./values/prod.yaml" --install --create-namespace --atomic $@ +helm upgrade metrics . -n metrics --values "./values/prod.yaml" --install --create-namespace $@ diff --git a/spartan/metrics/values.yaml b/spartan/metrics/values.yaml index f8407b25357..6e75301c12b 100644 --- a/spartan/metrics/values.yaml +++ b/spartan/metrics/values.yaml @@ -51,8 +51,8 @@ opentelemetry-collector: processors: resource: attributes: - - action: preserve - key: k8s.namespace.name + - action: preserve + key: k8s.namespace.name batch: {} receivers: otlp: @@ -92,31 +92,9 @@ opentelemetry-collector: # Enable and configure the Loki subchart # https://artifacthub.io/packages/helm/grafana/loki-simple-scalable -loki: - deploymentMode: SingleBinary - loki: - auth_enabled: false - commonConfig: - replication_factor: 1 - storage: - type: "filesystem" - schemaConfig: - configs: - - from: "2024-01-01" - store: tsdb - index: - prefix: loki_index_ - period: 24h - object_store: filesystem # we're storing on filesystem so there's no real persistence here. - schema: v13 - singleBinary: - replicas: 1 - read: - replicas: 0 - backend: - replicas: 0 - write: - replicas: 0 +# loki: +# Nothing set here, because we need to use values from the values directory; +# otherwise, things don't get overridden correctly. # Enable and configure the Tempo subchart # https://artifacthub.io/packages/helm/grafana/tempo @@ -159,9 +137,10 @@ prometheus: - job_name: aztec static_configs: - targets: ["metrics-opentelemetry-collector.metrics:8889"] - - job_name: 'kube-state-metrics' + - job_name: "kube-state-metrics" static_configs: - - targets: ['metrics-kube-state-metrics.metrics.svc.cluster.local:8080'] + - targets: + ["metrics-kube-state-metrics.metrics.svc.cluster.local:8080"] # Enable and configure Grafana # https://artifacthub.io/packages/helm/grafana/grafana diff --git a/spartan/metrics/values/gke-autopilot.yaml b/spartan/metrics/values/gke-autopilot.yaml new file mode 100644 index 00000000000..bbdc1b095fd --- /dev/null +++ b/spartan/metrics/values/gke-autopilot.yaml @@ -0,0 +1,4 @@ +# This file isn't used by default. It is here if you need to install metrics on GKE autopilot. +prometheus: + prometheus-node-exporter: + enabled: false diff --git a/spartan/metrics/values/kind.yaml b/spartan/metrics/values/kind.yaml new file mode 100644 index 00000000000..c8b8a970b25 --- /dev/null +++ b/spartan/metrics/values/kind.yaml @@ -0,0 +1,25 @@ +loki: + deploymentMode: SingleBinary + loki: + auth_enabled: false + commonConfig: + replication_factor: 1 + storage: + type: "filesystem" + schemaConfig: + configs: + - from: "2024-01-01" + store: tsdb + index: + prefix: loki_index_ + period: 24h + object_store: filesystem # we're storing on filesystem so there's no real persistence here. + schema: v13 + singleBinary: + replicas: 1 + read: + replicas: 0 + backend: + replicas: 0 + write: + replicas: 0 diff --git a/spartan/metrics/values/prod.yaml b/spartan/metrics/values/prod.yaml index 347f87f73b9..2da726d4431 100644 --- a/spartan/metrics/values/prod.yaml +++ b/spartan/metrics/values/prod.yaml @@ -15,3 +15,49 @@ opentelemetry-collector: service: enabled: true type: LoadBalancer + +loki: + loki: + schemaConfig: + configs: + - from: "2024-04-01" + store: tsdb + object_store: s3 + schema: v13 + index: + prefix: loki_index_ + period: 24h + ingester: + chunk_encoding: snappy + querier: + max_concurrent: 4 + pattern_ingester: + enabled: true + limits_config: + allow_structured_metadata: true + volume_enabled: true + retention_period: 336h # 14 days + compactor: + retention_enabled: true + delete_request_store: s3 + auth_enabled: false + + deploymentMode: SimpleScalable + + singleBinary: + replicas: 0 + backend: + replicas: 2 + read: + replicas: 2 + write: + replicas: 3 + + minio: + enabled: true + persistence: + size: 64Gi + + gateway: + service: + type: LoadBalancer diff --git a/spartan/network-shaping/Chart.yaml b/spartan/network-shaping/Chart.yaml deleted file mode 100644 index 854dc4672e8..00000000000 --- a/spartan/network-shaping/Chart.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: v2 -name: network-shaping -description: Network shaping for spartan using chaos-mesh -type: application -version: 0.1.0 -appVersion: "1.0.0" \ No newline at end of file diff --git a/spartan/network-shaping/values/kill-provers.yaml b/spartan/network-shaping/values/kill-provers.yaml deleted file mode 100644 index 2128efca00a..00000000000 --- a/spartan/network-shaping/values/kill-provers.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# Simulates congested network conditions -# High latency, limited bandwidth, packet loss -global: - namespace: "smoke" - -networkShaping: - enabled: true - conditions: - latency: - enabled: false - bandwidth: - enabled: false - packetLoss: - enabled: false - killProvers: - enabled: true - duration: 13m diff --git a/spartan/scripts/setup_local_k8s.sh b/spartan/scripts/setup_local_k8s.sh index 0571aebfe9e..8068ce867ae 100755 --- a/spartan/scripts/setup_local_k8s.sh +++ b/spartan/scripts/setup_local_k8s.sh @@ -62,4 +62,4 @@ fi kubectl config use-context kind-kind || true "$SCRIPT_DIR"/../chaos-mesh/install.sh -"$SCRIPT_DIR"/../metrics/install.sh +"$SCRIPT_DIR"/../metrics/install-kind.sh diff --git a/yarn-project/aztec.js/src/contract/sent_tx.test.ts b/yarn-project/aztec.js/src/contract/sent_tx.test.ts index 13a2def98cd..c2bf65adb6a 100644 --- a/yarn-project/aztec.js/src/contract/sent_tx.test.ts +++ b/yarn-project/aztec.js/src/contract/sent_tx.test.ts @@ -39,7 +39,7 @@ describe('SentTx', () => { it('throws if tx is dropped', async () => { pxe.getTxReceipt.mockResolvedValue({ ...txReceipt, status: TxStatus.DROPPED } as TxReceipt); pxe.getSyncStatus.mockResolvedValue({ blocks: 19 }); - await expect(sentTx.wait({ timeout: 1, interval: 0.4 })).rejects.toThrow(/dropped/); + await expect(sentTx.wait({ timeout: 1, interval: 0.4, ignoreDroppedReceiptsFor: 0 })).rejects.toThrow(/dropped/); }); it('waits for the tx to be proven', async () => { diff --git a/yarn-project/aztec.js/src/contract/sent_tx.ts b/yarn-project/aztec.js/src/contract/sent_tx.ts index 7b85b3fd853..17d9cb9b7b6 100644 --- a/yarn-project/aztec.js/src/contract/sent_tx.ts +++ b/yarn-project/aztec.js/src/contract/sent_tx.ts @@ -4,6 +4,8 @@ import { type FieldsOf } from '@aztec/foundation/types'; /** Options related to waiting for a tx. */ export type WaitOpts = { + /** The amount of time to ignore TxStatus.DROPPED receipts (in seconds) due to the presumption that it is being propagated by the p2p network. Defaults to 5. */ + ignoreDroppedReceiptsFor?: number; /** The maximum time (in seconds) to wait for the transaction to be mined. Defaults to 60. */ timeout?: number; /** The maximum time (in seconds) to wait for the transaction to be proven. Defaults to 600. */ @@ -24,6 +26,7 @@ export type WaitOpts = { }; export const DefaultWaitOpts: WaitOpts = { + ignoreDroppedReceiptsFor: 5, timeout: 60, provenTimeout: 600, interval: 1, @@ -101,6 +104,9 @@ export class SentTx { protected async waitForReceipt(opts?: WaitOpts): Promise { const txHash = await this.getTxHash(); + const startTime = Date.now(); + const ignoreDroppedReceiptsFor = opts?.ignoreDroppedReceiptsFor ?? DefaultWaitOpts.ignoreDroppedReceiptsFor; + return await retryUntil( async () => { const txReceipt = await this.pxe.getTxReceipt(txHash); @@ -108,9 +114,15 @@ export class SentTx { if (txReceipt.status === TxStatus.PENDING) { return undefined; } - // If the tx was dropped, return it + // If the tx was "dropped", either return it or ignore based on timing. + // We can ignore it at first because the transaction may have been sent to node 1, and now we're asking node 2 for the receipt. + // If we don't allow a short grace period, we could incorrectly return a TxReceipt with status DROPPED. if (txReceipt.status === TxStatus.DROPPED) { - return txReceipt; + const elapsedSeconds = (Date.now() - startTime) / 1000; + if (!ignoreDroppedReceiptsFor || elapsedSeconds > ignoreDroppedReceiptsFor) { + return txReceipt; + } + return undefined; } // If we don't care about waiting for notes to be synced, return the receipt const waitForNotesAvailable = opts?.waitForNotesAvailable ?? DefaultWaitOpts.waitForNotesAvailable; diff --git a/yarn-project/cli/src/cmds/misc/setup_contracts.ts b/yarn-project/cli/src/cmds/misc/setup_contracts.ts index e1317fe29d4..1a0b5472251 100644 --- a/yarn-project/cli/src/cmds/misc/setup_contracts.ts +++ b/yarn-project/cli/src/cmds/misc/setup_contracts.ts @@ -18,8 +18,22 @@ export async function setupCanonicalL2FeeJuice( const feeJuiceContract = await FeeJuiceContract.at(ProtocolContractAddress.FeeJuice, deployer); log('setupCanonicalL2FeeJuice: Calling initialize on fee juice contract...'); - await feeJuiceContract.methods - .initialize(feeJuicePortalAddress) - .send({ fee: { paymentMethod: new NoFeePaymentMethod(), gasSettings: GasSettings.teardownless() } }) - .wait(waitOpts); + + try { + const provenTx = await feeJuiceContract.methods + .initialize(feeJuicePortalAddress) + .prove({ fee: { paymentMethod: new NoFeePaymentMethod(), gasSettings: GasSettings.teardownless() } }); + + await provenTx.send().wait(waitOpts); + log('setupCanonicalL2FeeJuice: Fee juice contract initialized'); + } catch (e: any) { + // TODO: make this less brittle, e.g. using a 204 http code + // It's "okay" at the time of this writing because the only assertion made is `storage.portal_address.read_public().is_zero()` + if (e instanceof Error && (e.message.includes('Assertion failed') || e.message.includes('app_logic_reverted'))) { + log('setupCanonicalL2FeeJuice: Fee juice contract already initialized'); + } else { + log('setupCanonicalL2FeeJuice: Error initializing fee juice contract', e); + throw e; + } + } } diff --git a/yarn-project/end-to-end/scripts/network_test.sh b/yarn-project/end-to-end/scripts/network_test.sh index 5d3c2db374c..0c6e592cd00 100755 --- a/yarn-project/end-to-end/scripts/network_test.sh +++ b/yarn-project/end-to-end/scripts/network_test.sh @@ -86,18 +86,18 @@ handle_network_shaping() { fi fi - echo "Deploying network shaping configuration..." - if ! helm upgrade --install network-shaping "$REPO/spartan/network-shaping/" \ + echo "Deploying Aztec Chaos Scenarios..." + if ! helm upgrade --install aztec-chaos-scenarios "$REPO/spartan/aztec-chaos-scenarios/" \ --namespace chaos-mesh \ - --values "$REPO/spartan/network-shaping/values/$CHAOS_VALUES" \ + --values "$REPO/spartan/aztec-chaos-scenarios/values/$CHAOS_VALUES" \ --set global.targetNamespace="$NAMESPACE" \ --wait \ --timeout=5m; then - echo "Error: failed to deploy network shaping configuration!" + echo "Error: failed to deploy Aztec Chaos Scenarios!" return 1 fi - echo "Network shaping configuration applied successfully" + echo "Aztec Chaos Scenarios applied successfully" return 0 fi return 0 @@ -154,6 +154,7 @@ fi docker run --rm --network=host \ -v ~/.kube:/root/.kube \ -e K8S=true \ + -e INSTANCE_NAME="spartan" \ -e SPARTAN_DIR="/usr/src/spartan" \ -e NAMESPACE="$NAMESPACE" \ -e HOST_PXE_PORT=$PXE_PORT \ diff --git a/yarn-project/end-to-end/src/fixtures/snapshot_manager.ts b/yarn-project/end-to-end/src/fixtures/snapshot_manager.ts index ee16764f278..36504fac84d 100644 --- a/yarn-project/end-to-end/src/fixtures/snapshot_manager.ts +++ b/yarn-project/end-to-end/src/fixtures/snapshot_manager.ts @@ -541,6 +541,11 @@ export const addAccounts = }), ); + logger.verbose('Account deployment tx hashes:'); + for (const provenTx of provenTxs) { + logger.verbose(provenTx.getTxHash().to0xString()); + } + logger.verbose('Deploying accounts...'); const txs = await Promise.all(provenTxs.map(provenTx => provenTx.send())); await Promise.all(txs.map(tx => tx.wait({ interval: 0.1, proven: waitUntilProven }))); diff --git a/yarn-project/end-to-end/src/spartan/4epochs.test.ts b/yarn-project/end-to-end/src/spartan/4epochs.test.ts index 354da3335f0..29f24e42f56 100644 --- a/yarn-project/end-to-end/src/spartan/4epochs.test.ts +++ b/yarn-project/end-to-end/src/spartan/4epochs.test.ts @@ -6,8 +6,8 @@ import { TokenContract } from '@aztec/noir-contracts.js'; import { jest } from '@jest/globals'; import { RollupCheatCodes } from '../../../aztec.js/src/utils/cheat_codes.js'; -import { getConfig, isK8sConfig, startPortForward } from './k8_utils.js'; import { type TestWallets, setupTestWalletsWithTokens } from './setup_test_wallets.js'; +import { getConfig, isK8sConfig, startPortForward } from './utils.js'; const config = getConfig(process.env); diff --git a/yarn-project/end-to-end/src/spartan/gating-passive.test.ts b/yarn-project/end-to-end/src/spartan/gating-passive.test.ts new file mode 100644 index 00000000000..03726310982 --- /dev/null +++ b/yarn-project/end-to-end/src/spartan/gating-passive.test.ts @@ -0,0 +1,116 @@ +import { EthCheatCodes, createCompatibleClient, sleep } from '@aztec/aztec.js'; +import { createDebugLogger } from '@aztec/foundation/log'; + +import { expect, jest } from '@jest/globals'; + +import { RollupCheatCodes } from '../../../aztec.js/src/utils/cheat_codes.js'; +import { + applyBootNodeFailure, + applyNetworkShaping, + applyValidatorKill, + awaitL2BlockNumber, + enableValidatorDynamicBootNode, + getConfig, + isK8sConfig, + restartBot, + startPortForward, +} from './utils.js'; + +const config = getConfig(process.env); +if (!isK8sConfig(config)) { + throw new Error('This test must be run in a k8s environment'); +} +const { + NAMESPACE, + HOST_PXE_PORT, + HOST_ETHEREUM_PORT, + CONTAINER_PXE_PORT, + CONTAINER_ETHEREUM_PORT, + SPARTAN_DIR, + INSTANCE_NAME, +} = config; +const debugLogger = createDebugLogger('aztec:spartan-test:reorg'); + +describe('a test that passively observes the network in the presence of network chaos', () => { + jest.setTimeout(60 * 60 * 1000); // 60 minutes + + const ETHEREUM_HOST = `http://127.0.0.1:${HOST_ETHEREUM_PORT}`; + const PXE_URL = `http://127.0.0.1:${HOST_PXE_PORT}`; + // 50% is the max that we expect to miss + const MAX_MISSED_SLOT_PERCENT = 0.5; + + it('survives network chaos', async () => { + await startPortForward({ + resource: 'svc/spartan-aztec-network-pxe', + namespace: NAMESPACE, + containerPort: CONTAINER_PXE_PORT, + hostPort: HOST_PXE_PORT, + }); + await startPortForward({ + resource: 'svc/spartan-aztec-network-ethereum', + namespace: NAMESPACE, + containerPort: CONTAINER_ETHEREUM_PORT, + hostPort: HOST_ETHEREUM_PORT, + }); + const client = await createCompatibleClient(PXE_URL, debugLogger); + const ethCheatCodes = new EthCheatCodes(ETHEREUM_HOST); + const rollupCheatCodes = new RollupCheatCodes( + ethCheatCodes, + await client.getNodeInfo().then(n => n.l1ContractAddresses), + ); + const { epochDuration, slotDuration } = await rollupCheatCodes.getConfig(); + + // make it so the validator will use its peers to bootstrap + await enableValidatorDynamicBootNode(INSTANCE_NAME, NAMESPACE, SPARTAN_DIR, debugLogger); + + // restart the bot to ensure that it's not affected by the previous test + await restartBot(NAMESPACE, debugLogger); + + // wait for the chain to build at least 1 epoch's worth of blocks + // note, don't forget that normally an epoch doesn't need epochDuration worth of blocks, + // but here we do double duty: + // we want a handful of blocks, and we want to pass the epoch boundary + await awaitL2BlockNumber(rollupCheatCodes, epochDuration, 60 * 5, debugLogger); + + let deploymentOutput: string = ''; + deploymentOutput = await applyNetworkShaping({ + valuesFile: 'network-requirements.yaml', + namespace: NAMESPACE, + spartanDir: SPARTAN_DIR, + logger: debugLogger, + }); + debugLogger.info(deploymentOutput); + deploymentOutput = await applyBootNodeFailure({ + durationSeconds: 60 * 60 * 24, + namespace: NAMESPACE, + spartanDir: SPARTAN_DIR, + logger: debugLogger, + }); + debugLogger.info(deploymentOutput); + await restartBot(NAMESPACE, debugLogger); + + const rounds = 3; + for (let i = 0; i < rounds; i++) { + debugLogger.info(`Round ${i + 1}/${rounds}`); + deploymentOutput = await applyValidatorKill({ + namespace: NAMESPACE, + spartanDir: SPARTAN_DIR, + logger: debugLogger, + }); + debugLogger.info(deploymentOutput); + debugLogger.info(`Waiting for 1 epoch to pass`); + const controlTips = await rollupCheatCodes.getTips(); + await sleep(Number(epochDuration * slotDuration) * 1000); + const newTips = await rollupCheatCodes.getTips(); + + const expectedPending = + controlTips.pending + BigInt(Math.floor((1 - MAX_MISSED_SLOT_PERCENT) * Number(epochDuration))); + expect(newTips.pending).toBeGreaterThan(expectedPending); + // calculate the percentage of slots missed + const perfectPending = controlTips.pending + BigInt(Math.floor(Number(epochDuration))); + const missedSlots = Number(perfectPending) - Number(newTips.pending); + const missedSlotsPercentage = (missedSlots / Number(epochDuration)) * 100; + debugLogger.info(`Missed ${missedSlots} slots, ${missedSlotsPercentage.toFixed(2)}%`); + } + }); +}); diff --git a/yarn-project/end-to-end/src/spartan/reorg.test.ts b/yarn-project/end-to-end/src/spartan/reorg.test.ts index 046f51a1357..8421e7387c5 100644 --- a/yarn-project/end-to-end/src/spartan/reorg.test.ts +++ b/yarn-project/end-to-end/src/spartan/reorg.test.ts @@ -4,15 +4,15 @@ import { createDebugLogger } from '@aztec/foundation/log'; import { expect, jest } from '@jest/globals'; import { RollupCheatCodes } from '../../../aztec.js/src/utils/cheat_codes.js'; +import { type TestWallets, performTransfers, setupTestWalletsWithTokens } from './setup_test_wallets.js'; import { - applyKillProvers, + applyProverFailure, deleteResourceByLabel, getConfig, isK8sConfig, startPortForward, waitForResourceByLabel, -} from './k8_utils.js'; -import { type TestWallets, performTransfers, setupTestWalletsWithTokens } from './setup_test_wallets.js'; +} from './utils.js'; const config = getConfig(process.env); if (!isK8sConfig(config)) { @@ -78,10 +78,11 @@ describe('reorg test', () => { const { pending: preReorgPending, proven: preReorgProven } = await rollupCheatCodes.getTips(); // kill the provers - const stdout = await applyKillProvers({ + const stdout = await applyProverFailure({ namespace: NAMESPACE, spartanDir: SPARTAN_DIR, durationSeconds: Number(epochDuration * slotDuration) * 2, + logger: debugLogger, }); debugLogger.info(stdout); diff --git a/yarn-project/end-to-end/src/spartan/smoke.test.ts b/yarn-project/end-to-end/src/spartan/smoke.test.ts index 4eafdd9d4ef..43a67dc9d12 100644 --- a/yarn-project/end-to-end/src/spartan/smoke.test.ts +++ b/yarn-project/end-to-end/src/spartan/smoke.test.ts @@ -5,7 +5,7 @@ import { RollupAbi } from '@aztec/l1-artifacts'; import { createPublicClient, getAddress, getContract, http } from 'viem'; import { foundry } from 'viem/chains'; -import { getConfig, isK8sConfig, startPortForward } from './k8_utils.js'; +import { getConfig, isK8sConfig, startPortForward } from './utils.js'; const config = getConfig(process.env); diff --git a/yarn-project/end-to-end/src/spartan/transfer.test.ts b/yarn-project/end-to-end/src/spartan/transfer.test.ts index a073247cae3..e82f0518fb0 100644 --- a/yarn-project/end-to-end/src/spartan/transfer.test.ts +++ b/yarn-project/end-to-end/src/spartan/transfer.test.ts @@ -4,8 +4,8 @@ import { TokenContract } from '@aztec/noir-contracts.js'; import { jest } from '@jest/globals'; -import { getConfig, isK8sConfig, startPortForward } from './k8_utils.js'; import { type TestWallets, setupTestWalletsWithTokens } from './setup_test_wallets.js'; +import { getConfig, isK8sConfig, startPortForward } from './utils.js'; const config = getConfig(process.env); diff --git a/yarn-project/end-to-end/src/spartan/k8_utils.ts b/yarn-project/end-to-end/src/spartan/utils.ts similarity index 54% rename from yarn-project/end-to-end/src/spartan/k8_utils.ts rename to yarn-project/end-to-end/src/spartan/utils.ts index 9fd2b81a827..1e5fd64145b 100644 --- a/yarn-project/end-to-end/src/spartan/k8_utils.ts +++ b/yarn-project/end-to-end/src/spartan/utils.ts @@ -1,15 +1,19 @@ -import { createDebugLogger } from '@aztec/aztec.js'; +import { createDebugLogger, sleep } from '@aztec/aztec.js'; +import type { Logger } from '@aztec/foundation/log'; import { exec, spawn } from 'child_process'; import path from 'path'; import { promisify } from 'util'; import { z } from 'zod'; +import type { RollupCheatCodes } from '../../../aztec.js/src/utils/cheat_codes.js'; + const execAsync = promisify(exec); const logger = createDebugLogger('k8s-utils'); const k8sConfigSchema = z.object({ + INSTANCE_NAME: z.string().min(1, 'INSTANCE_NAME env variable must be set'), NAMESPACE: z.string().min(1, 'NAMESPACE env variable must be set'), HOST_PXE_PORT: z.coerce.number().min(1, 'HOST_PXE_PORT env variable must be set'), CONTAINER_PXE_PORT: z.coerce.number().default(8080), @@ -92,12 +96,16 @@ export async function deleteResourceByName({ resource, namespace, name, + force = false, }: { resource: string; namespace: string; name: string; + force?: boolean; }) { - const command = `kubectl delete ${resource} ${name} -n ${namespace} --ignore-not-found=true --wait=true`; + const command = `kubectl delete ${resource} ${name} -n ${namespace} --ignore-not-found=true --wait=true ${ + force ? '--force' : '' + }`; logger.info(`command: ${command}`); const { stdout } = await execAsync(command); return stdout; @@ -141,12 +149,43 @@ export function getChartDir(spartanDir: string, chartName: string) { return path.join(spartanDir.trim(), chartName); } -function valuesToArgs(values: Record) { +function valuesToArgs(values: Record) { return Object.entries(values) .map(([key, value]) => `--set ${key}=${value}`) .join(' '); } +function createHelmCommand({ + instanceName, + helmChartDir, + namespace, + valuesFile, + timeout, + values, + reuseValues = false, +}: { + instanceName: string; + helmChartDir: string; + namespace: string; + valuesFile: string | undefined; + timeout: string; + values: Record; + reuseValues?: boolean; +}) { + const valuesFileArgs = valuesFile ? `--values ${helmChartDir}/values/${valuesFile}` : ''; + const reuseValuesArgs = reuseValues ? '--reuse-values' : ''; + return `helm upgrade --install ${instanceName} ${helmChartDir} --namespace ${namespace} ${valuesFileArgs} ${reuseValuesArgs} --wait --timeout=${timeout} ${valuesToArgs( + values, + )}`; +} + +async function execHelmCommand(args: Parameters[0]) { + const helmCommand = createHelmCommand(args); + logger.info(`helm command: ${helmCommand}`); + const { stdout } = await execAsync(helmCommand); + return stdout; +} + /** * Installs a Helm chart with the given parameters. * @param instanceName - The name of the Helm chart instance. @@ -160,7 +199,7 @@ function valuesToArgs(values: Record) { * * Example usage: * ```typescript - * const stdout = await installChaosMeshChart({ instanceName: 'force-reorg', targetNamespace: 'smoke', valuesFile: 'kill-provers.yaml'}); + * const stdout = await installChaosMeshChart({ instanceName: 'force-reorg', targetNamespace: 'smoke', valuesFile: 'prover-failure.yaml'}); * console.log(stdout); * ``` */ @@ -173,6 +212,7 @@ export async function installChaosMeshChart({ timeout = '5m', clean = true, values = {}, + logger, }: { instanceName: string; targetNamespace: string; @@ -181,42 +221,164 @@ export async function installChaosMeshChart({ chaosMeshNamespace?: string; timeout?: string; clean?: boolean; - values?: Record; + values?: Record; + logger: Logger; }) { if (clean) { // uninstall the helm chart if it exists + logger.info(`Uninstalling helm chart ${instanceName}`); await execAsync(`helm uninstall ${instanceName} --namespace ${chaosMeshNamespace} --wait --ignore-not-found`); // and delete the podchaos resource - await deleteResourceByName({ + const deleteArgs = { resource: 'podchaos', namespace: chaosMeshNamespace, name: `${targetNamespace}-${instanceName}`, + }; + logger.info(`Deleting podchaos resource`); + await deleteResourceByName(deleteArgs).catch(e => { + logger.error(`Error deleting podchaos resource: ${e}`); + logger.info(`Force deleting podchaos resource`); + return deleteResourceByName({ ...deleteArgs, force: true }); }); } - const helmCommand = `helm upgrade --install ${instanceName} ${helmChartDir} --namespace ${chaosMeshNamespace} --values ${helmChartDir}/values/${valuesFile} --wait --timeout=${timeout} --set global.targetNamespace=${targetNamespace} ${valuesToArgs( - values, - )}`; - const { stdout } = await execAsync(helmCommand); - return stdout; + return execHelmCommand({ + instanceName, + helmChartDir, + namespace: chaosMeshNamespace, + valuesFile, + timeout, + values: { ...values, 'global.targetNamespace': targetNamespace }, + }); } -export function applyKillProvers({ +export function applyProverFailure({ namespace, spartanDir, durationSeconds, + logger, }: { namespace: string; spartanDir: string; durationSeconds: number; + logger: Logger; }) { return installChaosMeshChart({ - instanceName: 'kill-provers', + instanceName: 'prover-failure', targetNamespace: namespace, - valuesFile: 'kill-provers.yaml', - helmChartDir: getChartDir(spartanDir, 'network-shaping'), + valuesFile: 'prover-failure.yaml', + helmChartDir: getChartDir(spartanDir, 'aztec-chaos-scenarios'), values: { - 'networkShaping.conditions.killProvers.duration': `${durationSeconds}s`, + 'proverFailure.duration': `${durationSeconds}s`, }, + logger, }); } + +export function applyBootNodeFailure({ + namespace, + spartanDir, + durationSeconds, + logger, +}: { + namespace: string; + spartanDir: string; + durationSeconds: number; + logger: Logger; +}) { + return installChaosMeshChart({ + instanceName: 'boot-node-failure', + targetNamespace: namespace, + valuesFile: 'boot-node-failure.yaml', + helmChartDir: getChartDir(spartanDir, 'aztec-chaos-scenarios'), + values: { + 'bootNodeFailure.duration': `${durationSeconds}s`, + }, + logger, + }); +} + +export function applyValidatorKill({ + namespace, + spartanDir, + logger, +}: { + namespace: string; + spartanDir: string; + logger: Logger; +}) { + return installChaosMeshChart({ + instanceName: 'validator-kill', + targetNamespace: namespace, + valuesFile: 'validator-kill.yaml', + helmChartDir: getChartDir(spartanDir, 'aztec-chaos-scenarios'), + logger, + }); +} + +export function applyNetworkShaping({ + valuesFile, + namespace, + spartanDir, + logger, +}: { + valuesFile: string; + namespace: string; + spartanDir: string; + logger: Logger; +}) { + return installChaosMeshChart({ + instanceName: 'network-shaping', + targetNamespace: namespace, + valuesFile, + helmChartDir: getChartDir(spartanDir, 'aztec-chaos-scenarios'), + logger, + }); +} + +export async function awaitL2BlockNumber( + rollupCheatCodes: RollupCheatCodes, + blockNumber: bigint, + timeoutSeconds: number, + logger: Logger, +) { + logger.info(`Waiting for L2 Block ${blockNumber}`); + let tips = await rollupCheatCodes.getTips(); + const endTime = Date.now() + timeoutSeconds * 1000; + while (tips.pending < blockNumber && Date.now() < endTime) { + logger.info(`At L2 Block ${tips.pending}`); + await sleep(1000); + tips = await rollupCheatCodes.getTips(); + } + logger.info(`Reached L2 Block ${tips.pending}`); +} + +export async function restartBot(namespace: string, logger: Logger) { + logger.info(`Restarting bot`); + await deleteResourceByLabel({ resource: 'pods', namespace, label: 'app=bot' }); + await sleep(10 * 1000); + await waitForResourceByLabel({ resource: 'pods', namespace, label: 'app=bot' }); + logger.info(`Bot restarted`); +} + +export async function enableValidatorDynamicBootNode( + instanceName: string, + namespace: string, + spartanDir: string, + logger: Logger, +) { + logger.info(`Enabling validator dynamic boot node`); + await execHelmCommand({ + instanceName, + namespace, + helmChartDir: getChartDir(spartanDir, 'aztec-network'), + values: { + 'validator.dynamicBootNode': 'true', + }, + valuesFile: undefined, + timeout: '10m', + reuseValues: true, + }); + + logger.info(`Validator dynamic boot node enabled`); +} diff --git a/yarn-project/pxe/src/pxe_service/pxe_service.ts b/yarn-project/pxe/src/pxe_service/pxe_service.ts index 62e96081780..88592c1e783 100644 --- a/yarn-project/pxe/src/pxe_service/pxe_service.ts +++ b/yarn-project/pxe/src/pxe_service/pxe_service.ts @@ -496,10 +496,19 @@ export class PXEService implements PXE { txRequest: TxExecutionRequest, privateExecutionResult: PrivateExecutionResult, ): Promise { - return this.jobQueue.put(async () => { - const { publicInputs, clientIvcProof } = await this.#prove(txRequest, this.proofCreator, privateExecutionResult); - return new TxProvingResult(privateExecutionResult, publicInputs, clientIvcProof!); - }); + return this.jobQueue + .put(async () => { + const { publicInputs, clientIvcProof } = await this.#prove( + txRequest, + this.proofCreator, + privateExecutionResult, + ); + return new TxProvingResult(privateExecutionResult, publicInputs, clientIvcProof!); + }) + .catch(err => { + this.log.error(err); + throw err; + }); } // TODO(#7456) Prevent msgSender being defined here for the first call @@ -511,47 +520,52 @@ export class PXEService implements PXE { profile: boolean = false, scopes?: AztecAddress[], ): Promise { - return await this.jobQueue.put(async () => { - const privateExecutionResult = await this.#executePrivate(txRequest, msgSender, scopes); - - let publicInputs: PrivateKernelTailCircuitPublicInputs; - let profileResult; - if (profile) { - ({ publicInputs, profileResult } = await this.#profileKernelProver( - txRequest, - this.proofCreator, - privateExecutionResult, - )); - } else { - publicInputs = await this.#simulateKernels(txRequest, privateExecutionResult); - } + return await this.jobQueue + .put(async () => { + const privateExecutionResult = await this.#executePrivate(txRequest, msgSender, scopes); + + let publicInputs: PrivateKernelTailCircuitPublicInputs; + let profileResult; + if (profile) { + ({ publicInputs, profileResult } = await this.#profileKernelProver( + txRequest, + this.proofCreator, + privateExecutionResult, + )); + } else { + publicInputs = await this.#simulateKernels(txRequest, privateExecutionResult); + } - const privateSimulationResult = new PrivateSimulationResult(privateExecutionResult, publicInputs); - const simulatedTx = privateSimulationResult.toSimulatedTx(); - let publicOutput: PublicSimulationOutput | undefined; - if (simulatePublic) { - publicOutput = await this.#simulatePublicCalls(simulatedTx); - } + const privateSimulationResult = new PrivateSimulationResult(privateExecutionResult, publicInputs); + const simulatedTx = privateSimulationResult.toSimulatedTx(); + let publicOutput: PublicSimulationOutput | undefined; + if (simulatePublic) { + publicOutput = await this.#simulatePublicCalls(simulatedTx); + } - if (!skipTxValidation) { - if (!(await this.node.isValidTx(simulatedTx, true))) { - throw new Error('The simulated transaction is unable to be added to state and is invalid.'); + if (!skipTxValidation) { + if (!(await this.node.isValidTx(simulatedTx, true))) { + throw new Error('The simulated transaction is unable to be added to state and is invalid.'); + } } - } - // We log only if the msgSender is undefined, as simulating with a different msgSender - // is unlikely to be a real transaction, and likely to be only used to read data. - // Meaning that it will not necessarily have produced a nullifier (and thus have no TxHash) - // If we log, the `getTxHash` function will throw. - if (!msgSender) { - this.log.info(`Executed local simulation for ${simulatedTx.getTxHash()}`); - } - return TxSimulationResult.fromPrivateSimulationResultAndPublicOutput( - privateSimulationResult, - publicOutput, - profileResult, - ); - }); + // We log only if the msgSender is undefined, as simulating with a different msgSender + // is unlikely to be a real transaction, and likely to be only used to read data. + // Meaning that it will not necessarily have produced a nullifier (and thus have no TxHash) + // If we log, the `getTxHash` function will throw. + if (!msgSender) { + this.log.info(`Executed local simulation for ${simulatedTx.getTxHash()}`); + } + return TxSimulationResult.fromPrivateSimulationResultAndPublicOutput( + privateSimulationResult, + publicOutput, + profileResult, + ); + }) + .catch(err => { + this.log.error(err); + throw err; + }); } public async sendTx(tx: Tx): Promise { @@ -560,7 +574,10 @@ export class PXEService implements PXE { throw new Error(`A settled tx with equal hash ${txHash.toString()} exists.`); } this.log.info(`Sending transaction ${txHash}`); - await this.node.sendTx(tx); + await this.node.sendTx(tx).catch(err => { + this.log.error(err); + throw err; + }); this.log.info(`Sent transaction ${txHash}`); return txHash; } @@ -573,14 +590,19 @@ export class PXEService implements PXE { scopes?: AztecAddress[], ): Promise { // all simulations must be serialized w.r.t. the synchronizer - return await this.jobQueue.put(async () => { - // TODO - Should check if `from` has the permission to call the view function. - const functionCall = await this.#getFunctionCall(functionName, args, to); - const executionResult = await this.#simulateUnconstrained(functionCall, scopes); - - // TODO - Return typed result based on the function artifact. - return executionResult; - }); + return await this.jobQueue + .put(async () => { + // TODO - Should check if `from` has the permission to call the view function. + const functionCall = await this.#getFunctionCall(functionName, args, to); + const executionResult = await this.#simulateUnconstrained(functionCall, scopes); + + // TODO - Return typed result based on the function artifact. + return executionResult; + }) + .catch(err => { + this.log.error(err); + throw err; + }); } public getTxReceipt(txHash: TxHash): Promise { diff --git a/yarn-project/sequencer-client/src/sequencer/sequencer.test.ts b/yarn-project/sequencer-client/src/sequencer/sequencer.test.ts index 59b65a3c875..e1b3f8bb71a 100644 --- a/yarn-project/sequencer-client/src/sequencer/sequencer.test.ts +++ b/yarn-project/sequencer-client/src/sequencer/sequencer.test.ts @@ -232,8 +232,10 @@ describe('sequencer', () => { }, // It would be nice to add the other states, but we would need to inject delays within the `work` loop ])('does not build a block if it does not have enough time left in the slot', async ({ delayedState }) => { - // trick the sequencer into thinking that we are just too far into the slot - sequencer.setL1GenesisTime(Math.floor(Date.now() / 1000) - (sequencer.getTimeTable()[delayedState] + 1)); + // trick the sequencer into thinking that we are just too far into slot 1 + sequencer.setL1GenesisTime( + Math.floor(Date.now() / 1000) - slotDuration * 1 - (sequencer.getTimeTable()[delayedState] + 1), + ); const tx = mockTxForRollup(); tx.data.constants.txContext.chainId = chainId; @@ -841,7 +843,7 @@ class TestSubject extends Sequencer { } public override doRealWork() { - this.setState(SequencerState.IDLE, true /** force */); + this.setState(SequencerState.IDLE, 0, true /** force */); return super.doRealWork(); } } diff --git a/yarn-project/sequencer-client/src/sequencer/sequencer.ts b/yarn-project/sequencer-client/src/sequencer/sequencer.ts index 340072b52da..d5c4b6874dd 100644 --- a/yarn-project/sequencer-client/src/sequencer/sequencer.ts +++ b/yarn-project/sequencer-client/src/sequencer/sequencer.ts @@ -167,11 +167,11 @@ export class Sequencer { [SequencerState.IDLE]: this.aztecSlotDuration, [SequencerState.SYNCHRONIZING]: this.aztecSlotDuration, [SequencerState.PROPOSER_CHECK]: this.aztecSlotDuration, // We always want to allow the full slot to check if we are the proposer - [SequencerState.WAITING_FOR_TXS]: 3, - [SequencerState.CREATING_BLOCK]: 5, - [SequencerState.PUBLISHING_BLOCK_TO_PEERS]: 5 + this.maxTxsPerBlock * 2, // if we take 5 seconds to create block, then 4 transactions at 2 seconds each - [SequencerState.WAITING_FOR_ATTESTATIONS]: 5 + this.maxTxsPerBlock * 2 + 3, // it shouldn't take 3 seconds to publish to peers - [SequencerState.PUBLISHING_BLOCK]: 5 + this.maxTxsPerBlock * 2 + 3 + 5, // wait 5 seconds for attestations + [SequencerState.WAITING_FOR_TXS]: 5, + [SequencerState.CREATING_BLOCK]: 7, + [SequencerState.PUBLISHING_BLOCK_TO_PEERS]: 7 + this.maxTxsPerBlock * 2, // if we take 5 seconds to create block, then 4 transactions at 2 seconds each + [SequencerState.WAITING_FOR_ATTESTATIONS]: 7 + this.maxTxsPerBlock * 2 + 3, // it shouldn't take 3 seconds to publish to peers + [SequencerState.PUBLISHING_BLOCK]: 7 + this.maxTxsPerBlock * 2 + 3 + 5, // wait 5 seconds for attestations }; if (this.enforceTimeTable && newTimeTable[SequencerState.PUBLISHING_BLOCK] > this.aztecSlotDuration) { throw new Error('Sequencer cannot publish block in less than a slot'); @@ -185,7 +185,7 @@ export class Sequencer { public start() { this.runningPromise = new RunningPromise(this.work.bind(this), this.pollingIntervalMs); this.runningPromise.start(); - this.setState(SequencerState.IDLE, true /** force */); + this.setState(SequencerState.IDLE, 0, true /** force */); this.log.info('Sequencer started'); return Promise.resolve(); } @@ -197,7 +197,7 @@ export class Sequencer { this.log.debug(`Stopping sequencer`); await this.runningPromise?.stop(); this.publisher.interrupt(); - this.setState(SequencerState.STOPPED, true /** force */); + this.setState(SequencerState.STOPPED, 0, true /** force */); this.log.info('Stopped sequencer'); } @@ -208,7 +208,7 @@ export class Sequencer { this.log.info('Restarting sequencer'); this.publisher.restart(); this.runningPromise!.start(); - this.setState(SequencerState.IDLE, true /** force */); + this.setState(SequencerState.IDLE, 0, true /** force */); } /** @@ -228,7 +228,7 @@ export class Sequencer { * - If our block for some reason is not included, revert the state */ protected async doRealWork() { - this.setState(SequencerState.SYNCHRONIZING); + this.setState(SequencerState.SYNCHRONIZING, 0); // Update state when the previous block has been synced const prevBlockSynced = await this.isBlockSynced(); // Do not go forward with new block if the previous one has not been mined and processed @@ -239,7 +239,7 @@ export class Sequencer { this.log.debug('Previous block has been mined and processed'); - this.setState(SequencerState.PROPOSER_CHECK); + this.setState(SequencerState.PROPOSER_CHECK, 0); const chainTip = await this.l2BlockSource.getBlock(-1); const historicalHeader = chainTip?.header; @@ -273,8 +273,9 @@ export class Sequencer { if (!this.shouldProposeBlock(historicalHeader, {})) { return; } + const secondsIntoSlot = getSecondsIntoSlot(this.l1GenesisTime, this.aztecSlotDuration, Number(slot)); - this.setState(SequencerState.WAITING_FOR_TXS); + this.setState(SequencerState.WAITING_FOR_TXS, secondsIntoSlot); // Get txs to build the new block. const pendingTxs = this.p2pClient.getTxs('pending'); @@ -319,7 +320,7 @@ export class Sequencer { } catch (err) { this.log.error(`Error assembling block`, (err as any).stack); } - this.setState(SequencerState.IDLE); + this.setState(SequencerState.IDLE, 0); } protected async work() { @@ -333,7 +334,7 @@ export class Sequencer { throw err; } } finally { - this.setState(SequencerState.IDLE); + this.setState(SequencerState.IDLE, 0); } } @@ -392,14 +393,13 @@ export class Sequencer { return true; } - setState(proposedState: SequencerState, force: boolean = false) { + setState(proposedState: SequencerState, secondsIntoSlot: number, force: boolean = false) { if (this.state === SequencerState.STOPPED && force !== true) { this.log.warn( `Cannot set sequencer from ${this.state} to ${proposedState} as it is stopped. Set force=true to override.`, ); return; } - const secondsIntoSlot = getSecondsIntoSlot(this.l1GenesisTime, this.aztecSlotDuration); if (!this.doIHaveEnoughTimeLeft(proposedState, secondsIntoSlot)) { throw new SequencerTooSlowError(this.state, proposedState, this.timeTable[proposedState], secondsIntoSlot); } @@ -496,7 +496,12 @@ export class Sequencer { this.metrics.recordNewBlock(newGlobalVariables.blockNumber.toNumber(), validTxs.length); const workTimer = new Timer(); - this.setState(SequencerState.CREATING_BLOCK); + const secondsIntoSlot = getSecondsIntoSlot( + this.l1GenesisTime, + this.aztecSlotDuration, + newGlobalVariables.slotNumber.toNumber(), + ); + this.setState(SequencerState.CREATING_BLOCK, secondsIntoSlot); this.log.info( `Building blockNumber=${newGlobalVariables.blockNumber.toNumber()} txCount=${ validTxs.length @@ -558,7 +563,7 @@ export class Sequencer { await this.publisher.validateBlockForSubmission(block.header); const workDuration = workTimer.ms(); - this.log.verbose( + this.log.info( `Assembled block ${block.number} (txEffectsHash: ${block.header.contentCommitment.txsEffectsHash.toString( 'hex', )})`, @@ -573,19 +578,19 @@ export class Sequencer { ); if (this.isFlushing) { - this.log.verbose(`Flushing completed`); + this.log.info(`Flushing completed`); } const txHashes = validTxs.map(tx => tx.getTxHash()); this.isFlushing = false; - this.log.verbose('Collecting attestations'); + this.log.info('Collecting attestations'); const attestations = await this.collectAttestations(block, txHashes); - this.log.verbose('Attestations collected'); + this.log.info('Attestations collected'); - this.log.verbose('Collecting proof quotes'); + this.log.info('Collecting proof quotes'); const proofQuote = await this.createProofClaimForPreviousEpoch(newGlobalVariables.slotNumber.toBigInt()); - this.log.verbose(proofQuote ? `Using proof quote ${inspect(proofQuote.payload)}` : 'No proof quote available'); + this.log.info(proofQuote ? `Using proof quote ${inspect(proofQuote.payload)}` : 'No proof quote available'); try { await this.publishL2Block(block, attestations, txHashes, proofQuote); @@ -633,16 +638,28 @@ export class Sequencer { const numberOfRequiredAttestations = Math.floor((committee.length * 2) / 3) + 1; - this.log.verbose('Creating block proposal'); + this.log.info('Creating block proposal'); const proposal = await this.validatorClient.createBlockProposal(block.header, block.archive.root, txHashes); - this.setState(SequencerState.PUBLISHING_BLOCK_TO_PEERS); - this.log.verbose('Broadcasting block proposal to validators'); + let secondsIntoSlot = getSecondsIntoSlot( + this.l1GenesisTime, + this.aztecSlotDuration, + block.header.globalVariables.slotNumber.toNumber(), + ); + + this.setState(SequencerState.PUBLISHING_BLOCK_TO_PEERS, secondsIntoSlot); + this.log.info('Broadcasting block proposal to validators'); this.validatorClient.broadcastBlockProposal(proposal); - this.setState(SequencerState.WAITING_FOR_ATTESTATIONS); + secondsIntoSlot = getSecondsIntoSlot( + this.l1GenesisTime, + this.aztecSlotDuration, + block.header.globalVariables.slotNumber.toNumber(), + ); + + this.setState(SequencerState.WAITING_FOR_ATTESTATIONS, secondsIntoSlot); const attestations = await this.validatorClient.collectAttestations(proposal, numberOfRequiredAttestations); - this.log.verbose(`Collected attestations from validators, number of attestations: ${attestations.length}`); + this.log.info(`Collected attestations from validators, number of attestations: ${attestations.length}`); // note: the smart contract requires that the signatures are provided in the order of the committee return orderAttestations(attestations, committee); @@ -659,7 +676,7 @@ export class Sequencer { // Get quotes for the epoch to be proven const quotes = await this.p2pClient.getEpochProofQuotes(epochToProve); - this.log.verbose(`Retrieved ${quotes.length} quotes, slot: ${slotNumber}, epoch to prove: ${epochToProve}`); + this.log.info(`Retrieved ${quotes.length} quotes, slot: ${slotNumber}, epoch to prove: ${epochToProve}`); for (const quote of quotes) { this.log.verbose(inspect(quote.payload)); } @@ -670,7 +687,7 @@ export class Sequencer { const validQuotes = (await validQuotesPromise).filter((q): q is EpochProofQuote => !!q); if (!validQuotes.length) { - this.log.verbose(`Failed to find any valid proof quotes`); + this.log.warn(`Failed to find any valid proof quotes`); return undefined; } // pick the quote with the lowest fee @@ -697,8 +714,13 @@ export class Sequencer { txHashes?: TxHash[], proofQuote?: EpochProofQuote, ) { + const secondsIntoSlot = getSecondsIntoSlot( + this.l1GenesisTime, + this.aztecSlotDuration, + block.header.globalVariables.slotNumber.toNumber(), + ); // Publishes new block to the network and awaits the tx to be mined - this.setState(SequencerState.PUBLISHING_BLOCK); + this.setState(SequencerState.PUBLISHING_BLOCK, secondsIntoSlot); const publishedL2Block = await this.publisher.proposeL2Block(block, attestations, txHashes, proofQuote); if (!publishedL2Block) { diff --git a/yarn-project/sequencer-client/src/sequencer/utils.ts b/yarn-project/sequencer-client/src/sequencer/utils.ts index c423c29ace4..4c16e8c8a9b 100644 --- a/yarn-project/sequencer-client/src/sequencer/utils.ts +++ b/yarn-project/sequencer-client/src/sequencer/utils.ts @@ -73,6 +73,7 @@ export function orderAttestations(attestations: BlockAttestation[], orderAddress return orderedAttestations; } -export function getSecondsIntoSlot(l1GenesisTime: number, aztecSlotDuration: number): number { - return (Date.now() / 1000 - l1GenesisTime) % aztecSlotDuration; +export function getSecondsIntoSlot(l1GenesisTime: number, aztecSlotDuration: number, slotNumber: number): number { + const slotStartTimestamp = l1GenesisTime + slotNumber * aztecSlotDuration; + return Date.now() / 1000 - slotStartTimestamp; }