-
Notifications
You must be signed in to change notification settings - Fork 321
/
Copy pathci.sh
executable file
·297 lines (267 loc) · 12.8 KB
/
ci.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
#!/bin/bash
# Utility script to deploy Penumbra testnet(s) to k8s,
# used as part of CI. At a high level, this script does the following:
#
# * reads env vars (e.g. from github actions) to set helm values
# * runs a container with `pd testnet generate` to create genesis
# * munges the generated data into valid (but internal) peer strings
# * deploys helm chart to kubernetes cluster, replacing running pods
# * waits a while, then fetches the public ip addresses
# * re-munges the generated data into publicly-routable peer strings
# * re-deploys the helm chart to overwrite the config
#
set -euo pipefail
# The following env vars can be used to override config fars
# for the helm chart. N.B. these env vars are also configured
# in GitHub Actions, so the values below may be out of date.
WORKDIR="${WORKDIR:=$(pwd)/charts/penumbra/pdcli}"
IMAGE="${IMAGE:-ghcr.io/penumbra-zone/penumbra}"
PENUMBRA_VERSION="${PENUMBRA_VERSION:-main}"
PENUMBRA_UID_GID="${PENUMBRA_UID_GID:-1000\:1000}"
TENDERMINT_VERSION="${TENDERMINT_VERSION:-v0.34.23}"
NVALS="${NVALS:-2}"
NFULLNODES="${NFULLNODES:-2}"
CONTAINERHOME="${CONTAINERHOME:-/root}"
# Default to bespoke devnet for deployments; less likely to break public testnets.
# Useful for running ad-hoc via CLI. The workflows override this for testnet/preview.
HELM_RELEASE="${HELM_RELEASE:-penumbra-devnet}"
# Check that the network we're trying to configure has a valid config.
if [[ "$HELM_RELEASE" =~ ^penumbra-testnet$ ]] ; then
HELM_VARS_FILE="networks/testnet/helm-values-for-${HELM_RELEASE}.yml"
elif [[ "$HELM_RELEASE" =~ ^penumbra-testnet-preview$ ]] ; then
HELM_VARS_FILE="networks/testnet-preview/helm-values-for-${HELM_RELEASE}.yml"
elif [[ "$HELM_RELEASE" =~ ^penumbra-devnet$ ]] ; then
HELM_VARS_FILE="networks/devnet/helm-values-for-${HELM_RELEASE}.yml"
else
>&2 echo "ERROR: helm release name '$HELM_RELEASE' not supported"
exit 1
fi
if [[ ! -e "$HELM_VARS_FILE" ]]; then
>&2 echo "ERROR: file not found: '$HELM_VARS_FILE'"
exit 2
fi
# Get CLI program for running containers. Prefers podman if available,
# defaults to docker otherwise. Helpful for running script on workstations.
function get_container_cli() {
if hash podman > /dev/null 2>&1 ; then
echo "podman"
else
echo "docker"
fi
}
function create_genesis() {
# Use fresh working directory. The dirpath is used within the helm chart,
# to read local files generated by "testnet generate", and push them
# into the cluster config.
test -d "$WORKDIR" && rm -r "$WORKDIR"
mkdir -p "$WORKDIR"
for i in $(seq "$NVALS"); do
I="$((i-1))"
NODEDIR="node${I}"
mkdir -p "${WORKDIR}/${NODEDIR}"
# This will be overwritten by pd testnet generate.
echo '{"identity_key": "penumbravalid1lr73zgd726gpk7rl45hvpg9f7r9wchgg8gpjhx2gqntx4md6gg9sser05u","consensus_key": "9OQ8HOy4YsryEPLbTtPKoKdmmjSqEJhzvS+x0WC8YoM=","name": "","website": "","description": "","enabled": false,"funding_streams": [{"address": "penumbrav2t1wz70yfqlgzfgwml5ne04vhnhahg8axmaupuv7x0gpuzesfhhz63y52cqffv93k7qvuuq6yqtgcj0z267v59qxpjuvc0hvfaynaaemgmqzyj38xhj8yjx7vcftnyq9q28exjrdj","rate_bps": 100}],"sequence_number": 0,"governance_key": "penumbragovern1lr73zgd726gpk7rl45hvpg9f7r9wchgg8gpjhx2gqntx4md6gg9sthagp6"}' > "${WORKDIR}/${NODEDIR}/val.json"
done
find "$WORKDIR" -name "val.json" -exec cat {} + | jq -s > "${WORKDIR}/vals.json"
# For the weekly testnets, we pass `--preserve-chain-id` when generating
# the config. For testnet-preview, we don't want that option: we want
# a unique chain id for every deploy.
if [[ "$HELM_RELEASE" =~ ^penumbra-testnet$ ]] ; then
preserve_chain_opt="--preserve-chain-id"
else
preserve_chain_opt=""
fi
echo "Generating new testnet files..."
container_cli="$(get_container_cli)"
# Silence shellcheck warning on 'preserve_chain_opt' being an empty string.
# shellcheck disable=SC2086
"$container_cli" run --user 0:0 \
--pull always \
-v "${WORKDIR}:${CONTAINERHOME}" --rm \
--entrypoint pd \
"${IMAGE}:${PENUMBRA_VERSION}" \
testnet generate \
$preserve_chain_opt \
--validators-input-file "${CONTAINERHOME}/vals.json" > /dev/null
# Compress the genesis file, otherwise the Helm manifest will exceed the 1MB
# secret limit. See GH1783.
gzip -c "${WORKDIR}/.penumbra/testnet_data/node0/tendermint/config/genesis.json" \
> "${WORKDIR}/genesis.json.gz"
# Clear out persistent peers. Will peer after services are bootstrapped.
# The Helm chart requires that these local flat files exist, but we cannot
# populate them with external IPs just yet. Make sure they're present,
# but empty, for now.
for i in $(seq 0 $((NVALS -1))); do
echo > "${WORKDIR}/external_address_val_${i}.txt"
echo > "${WORKDIR}/persistent_peers_val_${i}.txt"
done
for i in $(seq 0 $((NFULLNODES -1))); do
echo > "${WORKDIR}/external_address_fn_${i}.txt"
echo > "${WORKDIR}/persistent_peers_fn_${i}.txt"
done
}
# Remove existing deployment and associated storage. Intended to omit removal
# of certain durable resources, such as LoadBalancer and ManagedCertificate.
# We intentionally don't use "helm uninstall" because GCP takes a while
# to propagate ingress recreation, causing delays in endpoint availability.
function helm_uninstall() {
# Delete existing deployments.
kubectl delete deployments -l app.kubernetes.io/instance="$HELM_RELEASE" --wait=false > /dev/null 2>&1
# Delete all existing PVCs so that fresh testnet is created.
kubectl delete pvc -l app.kubernetes.io/instance="$HELM_RELEASE" > /dev/null 2>&1
}
# We manually munge the ConfigMap for genesis data, rather than using Helm
# to generate the template, to avoid the large filesize from exceeding
# the 1MB Secret limit for Helm manifests.
function create_config_map() {
if [[ -z "$HELM_RELEASE" ]] ; then
>&2 echo "Error: HELM_RELEASE not defined, cannot generate ConfigMap for genesis"
return 1
fi
repo_root="$(git rev-parse --show-toplevel)"
config_map_name="${HELM_RELEASE}-genesis-config"
# The smelly sleeps ensure that the ConfigMap exists prior to creation of PVs which
# mount in the contents of the CM; otherwise, pods get stuck in init state.
kubectl delete configmap "$config_map_name" --ignore-not-found=true --wait
sleep 2
kubectl create configmap "$config_map_name" --from-file="${repo_root}/deployments/charts/penumbra/pdcli/genesis.json.gz"
sleep 3
}
# Apply the Helm configuration to the cluster. Will overwrite resources
# as necessary. Will *not* replace certain durable resources like
# the ManagedCertificate, which is annotated with helm.sh/resource-policy=keep.
function helm_install() {
create_config_map
helm upgrade --install "$HELM_RELEASE" ./charts/penumbra \
--set "numValidators=$NVALS" \
--set "numFullNodes=$NFULLNODES" \
--set "penumbra.image=$IMAGE" \
--set "penumbra.version=$PENUMBRA_VERSION" \
--set "grafana.version=$PENUMBRA_VERSION" \
--set "penumbra.uidGid=$PENUMBRA_UID_GID" \
--set "tendermint.version=$TENDERMINT_VERSION" \
--values "$HELM_VARS_FILE"
}
# Block until the Services' ExternalIP attributes are populated.
function wait_for_external_ips() {
while true; do
echo "Waiting for load balancer external IPs to be provisioned..."
mapfile -t STATUSES < <(kubectl get svc -l app.kubernetes.io/instance="$HELM_RELEASE" --no-headers | grep p2p | awk '{print $4}')
FOUND_PENDING=false
for STATUS in "${STATUSES[@]}"; do
if [[ "$STATUS" == "<pending>" ]]; then
sleep 5
FOUND_PENDING=true
break
fi
done
if [[ "$FOUND_PENDING" == "false" ]]; then
break
fi
done
}
function wait_for_pods_to_be_running() {
echo "Waiting for pods to be running..."
kubectl wait --for=condition=ready pods --timeout=5m \
-l app.kubernetes.io/instance="$HELM_RELEASE"
}
function collect_local_config_values() {
for i in $(seq 0 $((NVALS - 1))); do
echo "Getting public peer string for validator $i"
NODE_ID="$(kubectl exec "$(kubectl get pods -l app.kubernetes.io/instance="$HELM_RELEASE" -o name | grep "penumbra.*val-${i}")" -c tm -- tendermint --home=/home/.tendermint show-node-id | tr -d '\r')"
IP="$(kubectl get svc "${HELM_RELEASE}-p2p-val-${i}" -o json | jq -r .status.loadBalancer.ingress[0].ip | tr -d '\r')"
EXTERNAL_ADDR="${IP}:26656"
NODE_ADDR="${NODE_ID}@${EXTERNAL_ADDR}"
echo "$EXTERNAL_ADDR" > "${WORKDIR}/external_address_val_${i}.txt"
echo "$NODE_ADDR" > "${WORKDIR}/node_address_val_${i}.txt"
done
for i in $(seq 0 $((NFULLNODES - 1))); do
echo "Getting public peer string for fullnode $i"
NODE_ID="$(kubectl exec "$(kubectl get pods -l app.kubernetes.io/instance="$HELM_RELEASE" -o name | grep "penumbra.*-fn-${i}")" -c tm -- tendermint --home=/home/.tendermint show-node-id | tr -d '\r')"
IP="$(kubectl get svc "${HELM_RELEASE}-p2p-fn-${i}" -o json | jq -r .status.loadBalancer.ingress[0].ip | tr -d '\r')"
EXTERNAL_ADDR="${IP}:26656"
NODE_ADDR="${NODE_ID}@${EXTERNAL_ADDR}"
echo "$EXTERNAL_ADDR" > "${WORKDIR}/external_address_fn_${i}.txt"
echo "$NODE_ADDR" > "${WORKDIR}/node_address_fn_${i}.txt"
done
# Now we've got all the info we need in local flat files: node ids, external ips,
# tied to designation as a service/pod identity. Let's loop over those local files
# and assemble the info into tm config attributes for each service.
for i in $(seq 0 $((NVALS - 1))); do
find "$WORKDIR" -type f -iname 'node_address*' -and -not -iname "*val_${i}.txt" -exec cat {} + \
| perl -npE 's/\n/,/g' | perl -npE 's/,$//' \
> "${WORKDIR}/persistent_peers_val_${i}.txt"
done
for i in $(seq 0 $((NFULLNODES - 1))); do
find "$WORKDIR" -type f -iname 'node_address*' -and -not -iname "*fn_${i}.txt" -exec cat {} + \
| perl -npE 's/\n/,/g' | perl -npE 's/,$//' \
> "${WORKDIR}/persistent_peers_fn_${i}.txt"
done
}
# Deploy a fresh testnet, destroying all prior chain state with new genesis.
function full_ci_rebuild() {
echo "Shutting down existing testnet if necessary..."
helm_uninstall
echo "Creating new genesis config..."
create_genesis
echo "Performining initial deploy of network, with private IPs..."
# Will deploy nodes, but will not be able to peer. Need to get IPs of services, then can peer
helm_install
wait_for_external_ips
wait_for_pods_to_be_running
echo "Collecting config values for each node..."
collect_local_config_values
echo "Applying fresh values so that nodes can peer and advertise external addresses."
# First, remove the old resources.
helm_uninstall
sleep 5
helm_install
# Report results
if wait_for_pods_to_be_running ; then
echo "Deploy complete!"
else
echo "ERROR: pods failed to enter running start. Deploy has failed."
return 1
fi
}
# Determine whether the version to be deployed constitutes a semver "patch" release,
# e.g. 0.1.2 -> 0.1.3.
function is_patch_release() {
# Ensure version format is semver, otherwise fail.
if ! echo "$PENUMBRA_VERSION" | grep -qE '^v[0-9]+\.[0-9]+\.[0-9]+' ; then
return 1
fi
# Split on '.', inspect final field.
z="$(perl -F'\.' -lanE 'print $F[-1]' <<< "$PENUMBRA_VERSION")"
# If "z" in x.y.z is 0, then it's a minor release. (Or a major release,
# but we don't need to worry about that yet.)
if [[ $z = "0" ]] ; then
return 2
else
return 0
fi
}
# Bump the version of pd running for the deployment, across all
# fullnodes and validators. Allow the cluster to reconcile the changes
# by terminating and creating pods to match. Does *not* alter chain state.
# Allows us to handle "patch" versions.
function update_image_for_running_deployment() {
kubectl set image deployments \
-l "app.kubernetes.io/instance=${HELM_RELEASE}, app.kubernetes.io/component in (fullnode, validator)" \
"pd=${IMAGE}:${PENUMBRA_VERSION}"
# Wait for rollout to complete. Will block until pods are marked Ready.
kubectl rollout status deployment \
-l "app.kubernetes.io/instance=${HELM_RELEASE}, app.kubernetes.io/component in (fullnode, validator)"
}
function main() {
echo "Deploying network '${HELM_RELEASE}'..."
# TODO: to deploy older versions, e.g. v0.53.1, an override is necessary here
if is_patch_release ; then
echo "Release target '$PENUMBRA_VERSION' is a patch release; will preserve testnet while bumping version."
update_image_for_running_deployment
else
echo "Release target '$PENUMBRA_VERSION' requires a full re-deploy; will generate new testnet chain info."
full_ci_rebuild
fi
}
main