Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(k8s): boot node long sync #9610

Merged
merged 44 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
e08989f
x
ludamad Oct 25, 2024
03ff27b
x
ludamad Oct 25, 2024
fc40b0d
small ci fix; get load balancer url helper function ; allow otel inje…
ludamad Oct 25, 2024
380713d
.
ludamad Oct 25, 2024
11b41b7
.
ludamad Oct 25, 2024
c6ddaf3
.
ludamad Oct 25, 2024
92d1e4b
-
ludamad Oct 25, 2024
1ebfe9b
lead
ludamad Oct 28, 2024
1960d6b
Merge remote-tracking branch 'origin/master'
ludamad Oct 28, 2024
0738e89
deploy_spartan.sh prod changes
ludamad Oct 28, 2024
35e2a98
validator.sh
ludamad Oct 28, 2024
f477cd3
telemetry for winston
ludamad Oct 28, 2024
f83ed70
negative patterns in core logger
ludamad Oct 28, 2024
739acc7
negative patterns in core logger
ludamad Oct 28, 2024
45be95d
yarn project working
ludamad Oct 28, 2024
137b324
yarn project working
ludamad Oct 28, 2024
8fe411c
metrics
ludamad Oct 28, 2024
50b9c7a
revert
ludamad Oct 29, 2024
48067cf
missing file
ludamad Oct 29, 2024
9b7215d
missing file
ludamad Oct 29, 2024
7efb469
metrics in native testnet script
ludamad Oct 29, 2024
5fb0973
telemetry for winston
ludamad Oct 29, 2024
6cb4b46
fix metrics
ludamad Oct 30, 2024
fc299f3
Update install.sh
ludamad Oct 30, 2024
587a58e
updates
ludamad Oct 30, 2024
8638eb7
Merge remote-tracking branch 'origin/ad/metrics-working' into ad/metr…
ludamad Oct 30, 2024
ef41a4d
rev
ludamad Oct 30, 2024
0b9baed
-
ludamad Oct 30, 2024
4c138ed
Merge branch 'master' into ad/metrics-working
ludamad Oct 30, 2024
734c205
formatting
ludamad Oct 30, 2024
ec0a3d6
Merge remote-tracking branch 'origin/ad/metrics-working' into ad/metr…
ludamad Oct 30, 2024
90bf69d
cleaner
ludamad Oct 30, 2024
c900476
Update logger.ts
ludamad Oct 30, 2024
cfad19f
fix
ludamad Oct 30, 2024
8fb3a0e
try startup probe
ludamad Oct 30, 2024
abf3d8e
Merge remote-tracking branch 'origin/master' into ad/boot-node-sync-w…
ludamad Oct 30, 2024
56612c1
revert
ludamad Oct 30, 2024
a77fca2
Update start_node.ts
ludamad Oct 31, 2024
a2606a9
Update boot-node.yaml
ludamad Oct 31, 2024
6b0a2f8
Update validator.yaml
ludamad Oct 31, 2024
76e879e
Update values.yaml
ludamad Oct 31, 2024
9b2cd6d
Update boot-node.yaml
ludamad Oct 31, 2024
7a31663
Update boot-node.yaml
ludamad Oct 31, 2024
531896c
try and fix startup probe issue with pod dns
ludamad Oct 31, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 13 additions & 15 deletions spartan/aztec-network/templates/boot-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,16 @@ spec:
"-c",
"sleep 30 && source /shared/contracts.env && env && node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --node --archiver --sequencer --pxe",
]
startupProbe:
httpGet:
path: /status
port: {{ .Values.bootNode.service.nodePort }}
periodSeconds: {{ .Values.bootNode.startupProbe.periodSeconds }}
failureThreshold: {{ .Values.bootNode.startupProbe.failureThreshold }}
livenessProbe:
exec:
command:
- /bin/sh
- -c
- curl -fSs http://127.0.0.1:{{ .Values.bootNode.service.nodePort }}/status
httpGet:
path: /status
port: {{ .Values.bootNode.service.nodePort }}
initialDelaySeconds: 30
periodSeconds: 5
timeoutSeconds: 30
Expand All @@ -89,16 +93,10 @@ spec:
subPath: contracts.env
{{- end }}
env:
- name: POD_NAME
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_DNS_NAME
value: "$(POD_NAME).{{ include "aztec-network.fullname" . }}-boot-node.$(POD_NAMESPACE).svc.cluster.local"
fieldPath: status.podIP
- name: PORT
value: "{{ .Values.bootNode.service.nodePort }}"
- name: LOG_LEVEL
Expand All @@ -123,13 +121,13 @@ spec:
{{- if .Values.bootNode.externalTcpHost }}
value: "{{ .Values.bootNode.externalTcpHost }}:{{ .Values.bootNode.service.p2pTcpPort }}"
{{- else }}
value: "$(POD_DNS_NAME):{{ .Values.bootNode.service.p2pTcpPort }}"
value: "$(POD_IP):{{ .Values.bootNode.service.p2pTcpPort }}"
{{- end }}
- name: P2P_UDP_ANNOUNCE_ADDR
{{- if .Values.bootNode.externalUdpHost }}
value: "{{ .Values.bootNode.externalUdpHost }}:{{ .Values.bootNode.service.p2pUdpPort }}"
{{- else }}
value: "$(POD_DNS_NAME):{{ .Values.bootNode.service.p2pUdpPort }}"
value: "$(POD_IP):{{ .Values.bootNode.service.p2pUdpPort }}"
{{- end }}
- name: P2P_TCP_LISTEN_ADDR
value: "0.0.0.0:{{ .Values.bootNode.service.p2pTcpPort }}"
Expand Down
14 changes: 4 additions & 10 deletions spartan/aztec-network/templates/prover-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,10 @@ spec:
- name: shared-volume
mountPath: /shared
env:
- name: POD_NAME
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_DNS_NAME
value: "$(POD_NAME).{{ include "aztec-network.fullname" . }}-prover-node.$(POD_NAMESPACE).svc.cluster.local"
fieldPath: status.podIP
- name: PORT
value: "{{ .Values.proverNode.service.nodePort }}"
- name: LOG_LEVEL
Expand All @@ -102,7 +96,7 @@ spec:
- name: PROVER_COORDINATION_NODE_URL
value: {{ include "aztec-network.bootNodeUrl" . | quote }}
- name: PROVER_JOB_SOURCE_URL
value: "http://$(POD_DNS_NAME):{{ .Values.proverNode.service.nodePort }}"
value: "http://$(POD_IP):{{ .Values.proverNode.service.nodePort }}"
ports:
- containerPort: {{ .Values.proverNode.service.nodePort }}
resources:
Expand Down Expand Up @@ -163,4 +157,4 @@ spec:
- port: {{ .Values.proverNode.service.nodePort }}
name: node
{{ end }}
{{ end }}
{{ end }}
30 changes: 20 additions & 10 deletions spartan/aztec-network/templates/validator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,20 +78,30 @@ spec:
- "/bin/bash"
- "-c"
- "sleep 10 && source /shared/contracts.env && env && node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --node --archiver --sequencer"
startupProbe:
httpGet:
path: /status
port: {{ .Values.validator.service.nodePort }}
failureThreshold: {{ .Values.validator.startupProbe.failureThreshold }}
periodSeconds: {{ .Values.validator.startupProbe.periodSeconds }}
livenessProbe:
exec:
command:
- /bin/sh
- -c
- curl -fSs http://127.0.0.1:{{ .Values.validator.service.nodePort }}/status
initialDelaySeconds: 30
periodSeconds: 5
timeoutSeconds: 30
failureThreshold: 3
volumeMounts:
- name: shared-volume
mountPath: /shared
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_DNS_NAME
value: "$(POD_NAME).{{ include "aztec-network.fullname" . }}-validator.$(POD_NAMESPACE).svc.cluster.local"
fieldPath: status.podIP
- name: PORT
value: "{{ .Values.validator.service.nodePort }}"
- name: LOG_LEVEL
Expand All @@ -114,13 +124,13 @@ spec:
{{- if .Values.validator.externalTcpHost }}
value: "{{ .Values.validator.externalTcpHost }}:{{ .Values.validator.service.p2pTcpPort }}"
{{- else }}
value: "$(POD_DNS_NAME):{{ .Values.validator.service.p2pTcpPort }}"
value: "$(POD_IP):{{ .Values.validator.service.p2pTcpPort }}"
{{- end }}
- name: P2P_UDP_ANNOUNCE_ADDR
{{- if .Values.validator.externalUdpHost }}
value: "{{ .Values.validator.externalUdpHost }}:{{ .Values.validator.service.p2pUdpPort }}"
{{- else }}
value: "$(POD_DNS_NAME):{{ .Values.validator.service.p2pUdpPort }}"
value: "$(POD_IP):{{ .Values.validator.service.p2pUdpPort }}"
{{- end }}
- name: P2P_TCP_LISTEN_ADDR
value: "0.0.0.0:{{ .Values.validator.service.p2pTcpPort }}"
Expand Down
10 changes: 10 additions & 0 deletions spartan/aztec-network/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ bootNode:
memory: "2Gi"
cpu: "200m"
deployContracts: true # Set to false to use manual contract addresses
startupProbe:
periodSeconds: 10
# Only if we fail for 20 minutes straight do we call it botched
# This gives enough time to sync
failureThreshold: 120
contracts:
rollupAddress: ""
registryAddress: ""
Expand Down Expand Up @@ -75,6 +80,11 @@ validator:
disabled: false
p2p:
enabled: "true"
startupProbe:
periodSeconds: 10
# Only if we fail for 20 minutes straight do we call it botched
# This gives enough time to sync
failureThreshold: 120
resources:
requests:
memory: "2Gi"
Expand Down
3 changes: 1 addition & 2 deletions yarn-project/aztec/src/cli/cmds/start_node.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ export const startNode = async (
options: any,
signalHandlers: (() => Promise<void>)[],
userLog: LogFn,
// ): Promise<ServerList> => {
) => {
): Promise<ServerList> => {
// Services that will be started in a single multi-rpc server
const services: ServerList = [];

Expand Down
Loading