Skip to content

Commit

Permalink
add support for adding dc to an existing cluster
Browse files Browse the repository at this point in the history
Summary:
* Add k8ssandra.io/rebuild to CassandraDatacenter when rebuild required
* Use Initialized condition to check if dc is being added to existing cluster
* Add RBAC annotations for CassandraTasks
* Add integration tests for adding dc to existing cluster
    * Add new set of subtests that use an existing cluster as test fixture
* Create CassandraTask for rebuild job
* Update logic for computing replication factor
* Add support for working with arbitrary number of kind clusters
* Update replication of system keyspaces
* Update replication of user keyspaces
    * Use k8ssandra.io/dc-replication annotation
* Update replication of stargate auth and reaper keyspaces

Details:
In Cassandra 4 you cannot declare a non-existent dc in the replication
strategy. If we are creating a K8ssandraCluster with 2 DCs, dc1 and dc2, for
example, we can only declare replicas for dc1 initially. Only after dc2 is
added to the C* cluster can we specify replicas for it.

The cassandra.system_distributed_replication_dc_names and
cassandra.system_distributed_replication_per_dc Java system properties are kind
of a backdoor via the management-api that do allow us to specify non-existent
DCs for system keysapces but only on the initial cluster creation.

The GetDatacentersForReplication function is used for system, stargate, reaper,
and user keyspaces to determine which DCs should be included for the replication.
If the cluster is already initialized then only the DCs that are already part of
the cluster are included.

When adding a new dc replication for user keyspaces is specified via the
k8ssanda.io/dc-replication annotation. If not specified, no replication changes
are made for user keyspaces. If specified, all user keyspaces must be specified.
If you don't want to replicate a particular keyspace, then specify a value of
zero.

Reconcile Stargate auth and Reaper keyspaces after reconciling each dc. This
change is needed to handle rebuild and decommission scenarios. See
k8ssandra#262 (comment)
for a detailed explanation on why the changes are necessary.
  • Loading branch information
jsanda committed Jan 21, 2022
1 parent 59600b1 commit b89c81e
Show file tree
Hide file tree
Showing 20 changed files with 1,526 additions and 133 deletions.
58 changes: 33 additions & 25 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,15 @@ TEST_ARGS=

NS ?= k8ssandra-operator

CLUSTER_SCOPE = false
# DEPLOYMENT specifies a particular kustomization to use for configuring the operator
# in a particular way, cluster-scoped for example. See config/deployments/README.md for
# more info.
DEPLOYMENT =

# Indicates the number of kind clusters that are being used. Note that the clusters should
# be created with scripts/setup-kind-multicluster.sh.
NUM_CLUSTERS = 2

ifeq ($(DEPLOYMENT), )
DEPLOY_TARGET =
else
Expand Down Expand Up @@ -173,38 +179,36 @@ multi-up: cleanup build manifests kustomize docker-build create-kind-multicluste
kubectl config use-context kind-k8ssandra-0
$(KUSTOMIZE) build config/deployments/control-plane$(DEPLOY_TARGET) | kubectl apply --server-side --force-conflicts -f -
##install the data plane
kubectl config use-context kind-k8ssandra-1
$(KUSTOMIZE) build config/deployments/data-plane$(DEPLOY_TARGET) | kubectl apply --server-side --force-conflicts -f -
for ((i = 1; i < $(NUM_CLUSTERS); ++i)); do \
kubectl config use-context kind-k8ssandra-$$i; \
$(KUSTOMIZE) build config/deployments/data-plane$(DEPLOY_TARGET) | kubectl apply --server-side --force-conflicts -f -; \
done
## Create a client config
make create-client-config
make create-clientconfig
## Restart the control plane
kubectl config use-context kind-k8ssandra-0
kubectl -n $(NS) delete pod -l control-plane=k8ssandra-operator
kubectl -n $(NS) rollout status deployment k8ssandra-operator
ifeq ($(DEPLOYMENT), cass-operator-dev)
kubectl -n $(NS) delete pod -l name=cass-operator
kubectl -n $(NS) rollout status deployment cass-operator-controller-manager
endif

multi-reload: build manifests kustomize docker-build kind-load-image-multi cert-manager-multi
# Reload the operator on the control-plane
kubectl config use-context kind-k8ssandra-0
$(KUSTOMIZE) build config/deployments/control-plane$(DEPLOY_TARGET) | kubectl apply --server-side --force-conflicts -f -
kubectl -n $(NS) delete pod -l control-plane=k8ssandra-operator
kubectl -n $(NS) rollout status deployment k8ssandra-operator
ifeq ($(DEPLOYMENT), cass-operator-dev)
kubectl -n $(NS) delete pod -l name=cass-operator
kubectl -n $(NS) rollout status deployment cass-operator-controller-manager
endif
# Reload the operator on the data-plane
kubectl config use-context kind-k8ssandra-1
$(KUSTOMIZE) build config/deployments/data-plane$(DEPLOY_TARGET) | kubectl apply --server-side --force-conflicts -f -
kubectl -n $(NS) delete pod -l control-plane=k8ssandra-operator
kubectl -n $(NS) rollout status deployment k8ssandra-operator
ifeq ($(DEPLOYMENT), cass-operator-dev)
kubectl -n $(NS) delete pod -l name=cass-operator
kubectl -n $(NS) rollout status deployment cass-operator-controller-manager
endif
for ((i = 1; i < $(NUM_CLUSTERS); ++i)); do \
kubectl config use-context kind-k8ssandra-$$i; \
$(KUSTOMIZE) build config/deployments/data-plane$(DEPLOY_TARGET) | kubectl apply --server-side --force-conflicts -f -; \
kubectl -n $(NS) delete pod -l control-plane=k8ssandra-operator; \
kubectl -n $(NS) rollout status deployment k8ssandra-operator; \
kubectl -n $(NS) delete pod -l name=cass-operator; \
kubectl -n $(NS) rollout status deployment cass-operator-controller-manager; \
done

single-deploy:
kubectl config use-context kind-k8ssandra-0
Expand All @@ -225,8 +229,9 @@ create-kind-multicluster:
scripts/setup-kind-multicluster.sh --clusters 2 --kind-worker-nodes 4

kind-load-image-multi:
kind load docker-image --name k8ssandra-0 ${IMG}
kind load docker-image --name k8ssandra-1 ${IMG}
for ((i = 0; i < $(NUM_CLUSTERS); ++i)); do \
kind load docker-image --name k8ssandra-$$i ${IMG}; \
done

##@ Deployment

Expand All @@ -249,15 +254,18 @@ cert-manager: ## Install cert-manager to the cluster
kubectl rollout status deployment cert-manager-webhook -n cert-manager

cert-manager-multi: ## Install cert-manager to the clusters
kubectl config use-context kind-k8ssandra-0
make cert-manager
kubectl config use-context kind-k8ssandra-1
make cert-manager
for ((i = 0; i < $(NUM_CLUSTERS); ++i)); do \
kubectl config use-context kind-k8ssandra-$$i; \
make cert-manager; \
done

create-client-config:
create-clientconfig:
kubectl config use-context kind-k8ssandra-0
make install
scripts/create-clientconfig.sh --namespace $(NS) --src-kubeconfig build/kubeconfigs/k8ssandra-1.yaml --dest-kubeconfig build/kubeconfigs/k8ssandra-0.yaml --in-cluster-kubeconfig build/kubeconfigs/updated/k8ssandra-1.yaml --output-dir clientconfig
for ((i = 0; i < $(NUM_CLUSTERS); ++i)); do \
make install; \
scripts/create-clientconfig.sh --namespace $(NS) --src-kubeconfig build/kubeconfigs/k8ssandra-$$i.yaml --dest-kubeconfig build/kubeconfigs/k8ssandra-0.yaml --in-cluster-kubeconfig build/kubeconfigs/updated/k8ssandra-$$i.yaml --output-dir clientconfig; \
done


CONTROLLER_GEN = $(shell pwd)/bin/controller-gen
controller-gen: ## Download controller-gen locally if necessary.
Expand Down
13 changes: 13 additions & 0 deletions apis/k8ssandra/v1alpha1/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,19 @@ const (
// regardless of whether the replication of the system keyspaces changes.
SystemReplicationAnnotation = "k8ssandra.io/system-replication"

// DcReplicationAnnotation tells the operator the replication settings to apply to user
// keyspaces when adding a DC to an existing cluster. The value should be serialized
// JSON, e.g., {"dc2": {"ks1": 3, "ks2": 3}}. All user keyspaces must be specified;
// otherwise, reconcilation will fail with a validation error. If you do not want to
// replicate a particular keyspace, specify a value of 0. Replication settings can be
// specified for multiple DCs; however, existing DCs won't be modified, and only the DC
// currently being added will be updated. Specifying multiple DCs can be useful though
// if you add multiple DCs to the cluster at once (Note that the CassandraDatacenters
// are still deployed serially).
DcReplicationAnnotation = "k8ssandra.io/dc-replication"

RebuildLabel = "k8ssandra.io/rebuild"

NameLabel = "app.kubernetes.io/name"
NameLabelValue = "k8ssandra-operator"

Expand Down
12 changes: 12 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,18 @@ rules:
- patch
- update
- watch
- apiGroups:
- control.k8ssandra.io
resources:
- cassandratasks
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
Expand Down
Loading

0 comments on commit b89c81e

Please sign in to comment.