Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable 2 Cluster Deployment of Corfu/LR #266

Merged
merged 2 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 191 additions & 0 deletions .github/workflows/cloud-upgrade-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
name: Corfu Cluster Test

on: push

jobs:
upgrade-test:
runs-on: ubuntu-latest
timeout-minutes: 120

env:
PKG_USERNAME: ${{ secrets.pkg_username }}
PUBLISH_TOKEN: ${{ secrets.publish_token }}

steps:
- uses: actions/checkout@v2
with:
repository: "CorfuDB/CorfuDB"

- name: Setup BuildX
uses: docker/setup-buildx-action@v2

- name: Cancel Previous Runs
uses: styfle/cancel-workflow-action@0.6.0
with:
access_token: ${{ github.token }}

- name: Cache local Maven repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-

- name: Checkout Corfu Master
uses: actions/checkout@v2
with:
repository: "CorfuDB/CorfuDB"
path: enable-lr

- name: Build V2 Image
run: |
.ci/infrastructure-docker-build.sh docker openjdk:8-jdk-bullseye

- name: Checkout Corfu 3.2.3
uses: actions/checkout@v2
with:
repository: "CorfuDB/CorfuDB"
ref: corfu-cloud-0.3.2.3

- name: Build V1 Image
run: |
.ci/infrastructure-docker-build.sh docker openjdk:8-jdk-bullseye

- name: Checkout code
uses: actions/checkout@v2

- name: Build test client docker image
working-directory: cloud/corfu/corfu-cloud-test
run: |
./docker-build.sh

- name: Set up cluster
uses: AbsaOSS/k3d-action@v2
with:
cluster-name: "corfu"
args: >-
--volume /tmp/k3dvol:/tmp/k3dvol
-p "8082:30080@agent:0"
--agents 3

- name: Import images
run: |
k3d image import corfudb/corfu-server:0.3.2-SNAPSHOT \
corfudb/corfu-server:0.4.0-SNAPSHOT \
corfudb/corfu-cloud-test:latest \
-c corfu

- name: Set up Helm
working-directory: ./cloud/corfu
run: |
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
chmod 700 get_helm.sh
./get_helm.sh
helm repo add jetstack https://charts.jetstack.io
helm repo update

- name: Initialize v1 cluster
working-directory: ./cloud/corfu
run: |
helm install corfu corfu --set tls.enabled=false --set tls.certificate.enabled=false --set global.replicas=3 --set image.repository=corfudb/corfu-server --set image.tag=0.3.2-SNAPSHOT
helm install corfu2 corfu --set tls.enabled=false --set tls.certificate.enabled=false --set global.replicas=3 --set image.repository=corfudb/corfu-server --set image.tag=0.3.2-SNAPSHOT --set lr.name="log-replication2" --set nameOverride="corfu2" --set serviceAccount.name="corfu2" --set nameOverride="corfu2" --set fullnameOverride="corfu2" --set cluster.type="sink"
sleep 30

- name: Cluster Verify V1
working-directory: ./cloud/corfu
run: |
lr_version=V1

# Wait for Corfu to be ready
while ! kubectl logs corfu-0 -c corfu | grep -q "DATA"; do
echo "Corfu is not ready yet..."
sleep 15
done
echo "Corfu is Ready!!!!"

# Get the leader of the log replication
lr_leader=""
while true; do
if kubectl logs log-replication-0 | grep -q "acquired"; then
lr_leader="log-replication-0"
break
fi
if kubectl logs log-replication-1 | grep -q "acquired"; then
lr_leader="log-replication-1"
break
fi
if kubectl logs log-replication-2 | grep -q "acquired"; then
lr_leader="log-replication-2"
break
fi
done

echo "LR Leader is: $lr_leader"

lr_ready_str=""
if [ $lr_version = "V2" ]; then
lr_ready_str="Received leadership response from node"
else
lr_ready_str="Negotiation complete"
fi

# Wait for the log replication leader to be ready
while ! kubectl logs $lr_leader | grep -q $lr_ready_str; do
echo "LR is not ready yet..."
sleep 10
done

echo "Ready to Replicate!!!!"

- name: Upgrade cluster
working-directory: ./cloud/corfu
run: |
helm upgrade corfu corfu --set tls.enabled=false --set tls.certificate.enabled=false --set global.replicas=3 --set image.repository=corfudb/corfu-server --set image.tag=0.4.0-SNAPSHOT --set version.new=true
helm upgrade corfu2 corfu --set tls.enabled=false --set tls.certificate.enabled=false --set global.replicas=3 --set image.repository=corfudb/corfu-server --set image.tag=0.4.0-SNAPSHOT --set lr.name="log-replication2" --set nameOverride="corfu2" --set serviceAccount.name="corfu2" --set nameOverride="corfu2" --set fullnameOverride="corfu2" --set cluster.type="sink" --set version.new=true

while kubectl describe pods --all-namespaces | grep -q "0.3.2-SNAPSHOT"; do
echo "Waiting for pods to be re-imaged..."
sleep 10
done

echo "Cluster upgrade complete!!!"

- name: Test cluster
working-directory: ./cloud/corfu
run: |
echo "Writing Data To Source..."
helm install corfu-cloud-test corfu-cloud-test-helm --set tls.enabled=false --set jobs.job=1

while ! kubectl get pods -o wide | grep corfu-cloud-test | grep -q Completed; do
echo "Waiting for test to finish..."
sleep 5
done

helm uninstall corfu-cloud-test
while kubectl get pods -o wide | grep -q corfu-cloud-test; do
echo "Removing test agent..."
sleep 5
done

echo "Test Complete!!!"

- name: Validate test
working-directory: ./cloud/corfu
run: |
echo "Starting test validation!!!"
helm install corfu-cloud-test corfu-cloud-test-helm --set tls.enabled=false --set jobs.job=2

while ! kubectl get pods -o wide | grep corfu-cloud-test | grep -q Completed; do
echo "Waiting for validation to complete..."
sleep 5
done

helm uninstall corfu-cloud-test
while kubectl get pods -o wide | grep -q corfu-cloud-test; do
echo "Removing test agent..."
sleep 5
done

echo "Validation Complete!!!"
Binary file added cloud/corfu/.swp
Binary file not shown.
142 changes: 142 additions & 0 deletions cloud/corfu/cluster_deploy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#!/bin/zsh
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just some thoughts:
When the sh script is getting complex you should switch to a better tool.
Maybe using python could be a better solution maybe some other autmation tool, like terraform.

Just to note, if you'd like to replace this sh with something else in the future, I'd be happy to support that activity


cluster_setup() {
k3d cluster delete corfu
rm -rf /tmp/k3dvol

k3d cluster create corfu \
--volume /tmp/k3dvol:/tmp/k3dvol \
-p "8082:30080@agent:0" \
--agents 4
}

image_imports() {
k3d image import corfudb/corfu-server:0.3.2-SNAPSHOT -c corfu
k3d image import corfudb/corfu-server:0.4.0-SNAPSHOT -c corfu
k3d image import corfudb/corfu-cloud-test:latest -c corfu
}

helm_setup() {
helm repo add jetstack https://charts.jetstack.io
helm repo update
}

init_v1_cluster() {
helm install corfu corfu --set tls.enabled=false --set tls.certificate.enabled=false --set global.replicas=3 --set image.repository=corfudb/corfu-server --set image.tag=0.3.2-SNAPSHOT
helm install corfu2 corfu --set tls.enabled=false --set tls.certificate.enabled=false --set global.replicas=3 --set image.repository=corfudb/corfu-server --set image.tag=0.3.2-SNAPSHOT --set lr.name="log-replication2" --set nameOverride="corfu2" --set serviceAccount.name="corfu2" --set nameOverride="corfu2" --set fullnameOverride="corfu2" --set cluster.type="sink"
sleep 30
}

init_v2_cluster() {
helm install corfu corfu --set tls.enabled=false --set tls.certificate.enabled=false --set global.replicas=3 --set image.repository=corfudb/corfu-server --set image.tag=0.4.0-SNAPSHOT --set version.new=true
helm install corfu2 corfu --set tls.enabled=false --set tls.certificate.enabled=false --set global.replicas=3 --set image.repository=corfudb/corfu-server --set image.tag=0.4.0-SNAPSHOT --set lr.name="log-replication2" --set nameOverride="corfu2" --set serviceAccount.name="corfu2" --set nameOverride="corfu2" --set fullnameOverride="corfu2" --set cluster.type="sink" --set version.new=true
sleep 30
}

cluster_verify() {
local lr_version=$1

# Wait for Corfu to be ready
while ! kubectl logs corfu-0 -c corfu | grep -q "DATA"; do
echo "Corfu is not ready yet..."
sleep 15
done
echo "Corfu is Ready!!!!"

# Get the leader of the log replication
lr_leader=""
while true; do
if kubectl logs log-replication-0 | grep -q "acquired"; then
lr_leader="log-replication-0"
break
fi
if kubectl logs log-replication-1 | grep -q "acquired"; then
lr_leader="log-replication-1"
break
fi
if kubectl logs log-replication-2 | grep -q "acquired"; then
lr_leader="log-replication-2"
break
fi
done

echo "LR Leader is: $lr_leader"

lr_ready_str=""
if [ $lr_version = "V2" ]; then
lr_ready_str="Received leadership response from node"
else
lr_ready_str="Negotiation complete"
fi

# Wait for the log replication leader to be ready
while ! kubectl logs $lr_leader | grep -q $lr_ready_str; do
echo "LR is not ready yet..."
sleep 10
done

echo "Ready to Replicate!!!!"
}

cluster_upgrade() {
helm upgrade corfu corfu --set tls.enabled=false --set tls.certificate.enabled=false --set global.replicas=3 --set image.repository=corfudb/corfu-server --set image.tag=0.4.0-SNAPSHOT --set version.new=true
helm upgrade corfu2 corfu --set tls.enabled=false --set tls.certificate.enabled=false --set global.replicas=3 --set image.repository=corfudb/corfu-server --set image.tag=0.4.0-SNAPSHOT --set lr.name="log-replication2" --set nameOverride="corfu2" --set serviceAccount.name="corfu2" --set nameOverride="corfu2" --set fullnameOverride="corfu2" --set cluster.type="sink" --set version.new=true

while kubectl describe pods --all-namespaces | grep -q "0.3.2-SNAPSHOT"; do
echo "Waiting for pods to be re-imaged..."
sleep 10
done

echo "Cluster upgrade complete!!!"
}


cluster_test() {
echo "Writing Data To Source..."
helm install corfu-cloud-test corfu-cloud-test-helm --set tls.enabled=false --set jobs.job=1

while ! kubectl get pods -o wide | grep corfu-cloud-test | grep -q Completed; do
echo "Waiting for test to finish..."
sleep 5
done


helm uninstall corfu-cloud-test
while kubectl get pods -o wide | grep -q corfu-cloud-test; do
echo "Removing test agent..."
sleep 5
done

echo "Test Complete!!!"
}

cluster_test_validate() {
echo "Starting test validation!!!"
helm install corfu-cloud-test corfu-cloud-test-helm --set tls.enabled=false --set jobs.job=2

while ! kubectl get pods -o wide | grep corfu-cloud-test | grep -q Completed; do
echo "Waiting for validation to complete..."
sleep 5
done

helm uninstall corfu-cloud-test
while kubectl get pods -o wide | grep -q corfu-cloud-test; do
echo "Removing test agent..."
sleep 5
done

echo "Validation Complete!!!"
}

cluster_setup
image_imports
helm_setup

init_v1_cluster
cluster_verify V1
cluster_test

cluster_upgrade

cluster_verify V2
cluster_test_validate
3 changes: 3 additions & 0 deletions cloud/corfu/corfu-cloud-test-helm/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
apiVersion: v2
name: corfu-cloud-test
version: 0.1.0
36 changes: 36 additions & 0 deletions cloud/corfu/corfu-cloud-test-helm/templates/job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
apiVersion: batch/v1
kind: Job
metadata:
name: corfu-cloud-test
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: corfu-client
image: corfudb/corfu-cloud-test:latest
imagePullPolicy: Never
command:
- "sh"
- "-c"
- |
java -cp *.jar org.corfudb.cloud.runtime.test.Main {{ .Values.corfuEndpoint }} {{ .Values.jobs.job }}
{{- if .Values.tls.enabled }} \
/certs/keystore.jks /password/password /certs/truststore.jks /password/password
{{- end }}
volumeMounts:
{{- if .Values.tls.enabled }}
- name: certificate
mountPath: /certs
- name: password
mountPath: /password
{{- end }}
volumes:
{{- if .Values.tls.enabled }}
- name: certificate
secret:
secretName: {{ .Values.tls.certificateName }}
- name: password
secret:
secretName: {{ .Values.tls.passwordName }}
{{- end }}
13 changes: 13 additions & 0 deletions cloud/corfu/corfu-cloud-test-helm/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
image:
registry: "docker.io"
repository: "corfudb/corfu-cloud-test"
tag: "latest"
pullPolicy: Never
corfuEndpoint: "corfu-0.corfu-headless.default.svc.cluster.local"
tls:
enabled: true
certificateName: corfu-certificate-tls
passwordName: corfu-password
jobs:
# 1: test, 2: validate, 3: test & validate
job: 3
7 changes: 7 additions & 0 deletions cloud/corfu/corfu-cloud-test/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM openjdk:8-jdk-alpine3.8

ADD ./build/libs/corfu-cloud-test.jar /app/

WORKDIR /app

CMD java -cp *.jar org.corfudb.cloud.runtime.test.Main "$@"
Loading
Loading