diff --git a/spartan/aztec-network/templates/boot-node.yaml b/spartan/aztec-network/templates/boot-node.yaml index 59f99ba4a7f..abe934d44dd 100644 --- a/spartan/aztec-network/templates/boot-node.yaml +++ b/spartan/aztec-network/templates/boot-node.yaml @@ -69,12 +69,16 @@ spec: "-c", "sleep 30 && source /shared/contracts.env && env && node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --node --archiver --sequencer --pxe", ] + startupProbe: + httpGet: + path: /status + port: {{ .Values.bootNode.service.nodePort }} + periodSeconds: {{ .Values.bootNode.startupProbe.periodSeconds }} + failureThreshold: {{ .Values.bootNode.startupProbe.failureThreshold }} livenessProbe: - exec: - command: - - /bin/sh - - -c - - curl -fSs http://127.0.0.1:{{ .Values.bootNode.service.nodePort }}/status + httpGet: + path: /status + port: {{ .Values.bootNode.service.nodePort }} initialDelaySeconds: 30 periodSeconds: 5 timeoutSeconds: 30 @@ -89,16 +93,10 @@ spec: subPath: contracts.env {{- end }} env: - - name: POD_NAME + - name: POD_IP valueFrom: fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: POD_DNS_NAME - value: "$(POD_NAME).{{ include "aztec-network.fullname" . }}-boot-node.$(POD_NAMESPACE).svc.cluster.local" + fieldPath: status.podIP - name: PORT value: "{{ .Values.bootNode.service.nodePort }}" - name: LOG_LEVEL @@ -123,13 +121,13 @@ spec: {{- if .Values.bootNode.externalTcpHost }} value: "{{ .Values.bootNode.externalTcpHost }}:{{ .Values.bootNode.service.p2pTcpPort }}" {{- else }} - value: "$(POD_DNS_NAME):{{ .Values.bootNode.service.p2pTcpPort }}" + value: "$(POD_IP):{{ .Values.bootNode.service.p2pTcpPort }}" {{- end }} - name: P2P_UDP_ANNOUNCE_ADDR {{- if .Values.bootNode.externalUdpHost }} value: "{{ .Values.bootNode.externalUdpHost }}:{{ .Values.bootNode.service.p2pUdpPort }}" {{- else }} - value: "$(POD_DNS_NAME):{{ .Values.bootNode.service.p2pUdpPort }}" + value: "$(POD_IP):{{ .Values.bootNode.service.p2pUdpPort }}" {{- end }} - name: P2P_TCP_LISTEN_ADDR value: "0.0.0.0:{{ .Values.bootNode.service.p2pTcpPort }}" diff --git a/spartan/aztec-network/templates/prover-node.yaml b/spartan/aztec-network/templates/prover-node.yaml index 6ec388150d3..42d00ffa83e 100644 --- a/spartan/aztec-network/templates/prover-node.yaml +++ b/spartan/aztec-network/templates/prover-node.yaml @@ -72,16 +72,10 @@ spec: - name: shared-volume mountPath: /shared env: - - name: POD_NAME + - name: POD_IP valueFrom: fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: POD_DNS_NAME - value: "$(POD_NAME).{{ include "aztec-network.fullname" . }}-prover-node.$(POD_NAMESPACE).svc.cluster.local" + fieldPath: status.podIP - name: PORT value: "{{ .Values.proverNode.service.nodePort }}" - name: LOG_LEVEL @@ -102,7 +96,7 @@ spec: - name: PROVER_COORDINATION_NODE_URL value: {{ include "aztec-network.bootNodeUrl" . | quote }} - name: PROVER_JOB_SOURCE_URL - value: "http://$(POD_DNS_NAME):{{ .Values.proverNode.service.nodePort }}" + value: "http://$(POD_IP):{{ .Values.proverNode.service.nodePort }}" ports: - containerPort: {{ .Values.proverNode.service.nodePort }} resources: @@ -163,4 +157,4 @@ spec: - port: {{ .Values.proverNode.service.nodePort }} name: node {{ end }} -{{ end }} \ No newline at end of file +{{ end }} diff --git a/spartan/aztec-network/templates/validator.yaml b/spartan/aztec-network/templates/validator.yaml index 963e778d745..310bf2bcacf 100644 --- a/spartan/aztec-network/templates/validator.yaml +++ b/spartan/aztec-network/templates/validator.yaml @@ -78,20 +78,30 @@ spec: - "/bin/bash" - "-c" - "sleep 10 && source /shared/contracts.env && env && node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --node --archiver --sequencer" + startupProbe: + httpGet: + path: /status + port: {{ .Values.validator.service.nodePort }} + failureThreshold: {{ .Values.validator.startupProbe.failureThreshold }} + periodSeconds: {{ .Values.validator.startupProbe.periodSeconds }} + livenessProbe: + exec: + command: + - /bin/sh + - -c + - curl -fSs http://127.0.0.1:{{ .Values.validator.service.nodePort }}/status + initialDelaySeconds: 30 + periodSeconds: 5 + timeoutSeconds: 30 + failureThreshold: 3 volumeMounts: - name: shared-volume mountPath: /shared env: - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE + - name: POD_IP valueFrom: fieldRef: - fieldPath: metadata.namespace - - name: POD_DNS_NAME - value: "$(POD_NAME).{{ include "aztec-network.fullname" . }}-validator.$(POD_NAMESPACE).svc.cluster.local" + fieldPath: status.podIP - name: PORT value: "{{ .Values.validator.service.nodePort }}" - name: LOG_LEVEL @@ -114,13 +124,13 @@ spec: {{- if .Values.validator.externalTcpHost }} value: "{{ .Values.validator.externalTcpHost }}:{{ .Values.validator.service.p2pTcpPort }}" {{- else }} - value: "$(POD_DNS_NAME):{{ .Values.validator.service.p2pTcpPort }}" + value: "$(POD_IP):{{ .Values.validator.service.p2pTcpPort }}" {{- end }} - name: P2P_UDP_ANNOUNCE_ADDR {{- if .Values.validator.externalUdpHost }} value: "{{ .Values.validator.externalUdpHost }}:{{ .Values.validator.service.p2pUdpPort }}" {{- else }} - value: "$(POD_DNS_NAME):{{ .Values.validator.service.p2pUdpPort }}" + value: "$(POD_IP):{{ .Values.validator.service.p2pUdpPort }}" {{- end }} - name: P2P_TCP_LISTEN_ADDR value: "0.0.0.0:{{ .Values.validator.service.p2pTcpPort }}" diff --git a/spartan/aztec-network/values.yaml b/spartan/aztec-network/values.yaml index 8905fc155eb..3dc885cb56c 100644 --- a/spartan/aztec-network/values.yaml +++ b/spartan/aztec-network/values.yaml @@ -44,6 +44,11 @@ bootNode: memory: "2Gi" cpu: "200m" deployContracts: true # Set to false to use manual contract addresses + startupProbe: + periodSeconds: 10 + # Only if we fail for 20 minutes straight do we call it botched + # This gives enough time to sync + failureThreshold: 120 contracts: rollupAddress: "" registryAddress: "" @@ -75,6 +80,11 @@ validator: disabled: false p2p: enabled: "true" + startupProbe: + periodSeconds: 10 + # Only if we fail for 20 minutes straight do we call it botched + # This gives enough time to sync + failureThreshold: 120 resources: requests: memory: "2Gi" diff --git a/yarn-project/aztec/src/cli/cmds/start_node.ts b/yarn-project/aztec/src/cli/cmds/start_node.ts index 2a0bf875fc7..06bb311b3cb 100644 --- a/yarn-project/aztec/src/cli/cmds/start_node.ts +++ b/yarn-project/aztec/src/cli/cmds/start_node.ts @@ -18,8 +18,7 @@ export const startNode = async ( options: any, signalHandlers: (() => Promise)[], userLog: LogFn, - // ): Promise => { -) => { +): Promise => { // Services that will be started in a single multi-rpc server const services: ServerList = [];