From 4d3a13428bca55b9f3a2240484402ac6e29dc9c9 Mon Sep 17 00:00:00 2001 From: oviner Date: Mon, 7 Oct 2024 13:20:14 +0300 Subject: [PATCH 1/3] Adjust Node Label Conditions Based on Full Label Name The OCS operator searches for the topology.kubernetes.io/zone label key using a string containing logic. If the customer has a node with a label that includes the string zone. The OCS operator can read another label containing 'zone' instead of topology.kubernetes.io/zone. Signed-off-by: Oded Viner --- controllers/storagecluster/topology.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/controllers/storagecluster/topology.go b/controllers/storagecluster/topology.go index 773f2cf26b..08f3fe00ff 100644 --- a/controllers/storagecluster/topology.go +++ b/controllers/storagecluster/topology.go @@ -100,7 +100,7 @@ func setFailureDomain(sc *ocsv1.StorageCluster) { // If sufficient zones are available then we select zone as the failure domain topologyMap := sc.Status.NodeTopologies for label, labelValues := range topologyMap.Labels { - if strings.Contains(label, "zone") { + if label == corev1.LabelZoneFailureDomainStable || label == labelZoneFailureDomainWithoutBeta { if (len(labelValues) >= 2 && arbiterEnabled(sc)) || (len(labelValues) >= 3) { failureDomain = "zone" } @@ -135,7 +135,7 @@ func determinePlacementRack( targetAZ := "" for label, value := range node.Labels { for _, key := range validTopologyLabelKeys { - if strings.Contains(label, key) && strings.Contains(label, "zone") { + if strings.Contains(label, key) && (label == corev1.LabelZoneFailureDomainStable || label == labelZoneFailureDomainWithoutBeta) { targetAZ = value break } @@ -159,7 +159,7 @@ func determinePlacementRack( if n.Name == nodeName { for label, value := range n.Labels { for _, key := range validTopologyLabelKeys { - if strings.Contains(label, key) && strings.Contains(label, "zone") && value == targetAZ { + if strings.Contains(label, key) && (label == corev1.LabelZoneFailureDomainStable || label == labelZoneFailureDomainWithoutBeta) && value == targetAZ { validRack = true break } From ebce8fbaa54f9c537dc93c08ef5f139594e4555a Mon Sep 17 00:00:00 2001 From: Malay Kumar Parida Date: Wed, 9 Oct 2024 14:23:02 +0530 Subject: [PATCH 2/3] Include EiT state as part of the desired state hash sent to clients When Encryption in-transit(EiT) is enabled/disabled the kernel mount option for cephFS needs to be updated between prefer-crc/secure. So the desired state hash needs to include the EiT state, so that if the EiT state is changed the desired state hash will change and the client will reconcile to get the updated mount option. Signed-off-by: Malay Kumar Parida --- services/provider/server/server.go | 75 +++++++++++++++++++++---- services/provider/server/server_test.go | 9 +++ 2 files changed, 73 insertions(+), 11 deletions(-) diff --git a/services/provider/server/server.go b/services/provider/server/server.go index 9f127f80d5..8676854b0e 100644 --- a/services/provider/server/server.go +++ b/services/provider/server/server.go @@ -11,7 +11,6 @@ import ( "encoding/json" "encoding/pem" "fmt" - "k8s.io/utils/ptr" "math" "net" "slices" @@ -19,6 +18,8 @@ import ( "strings" "time" + "k8s.io/utils/ptr" + "github.com/blang/semver/v4" nbv1 "github.com/noobaa/noobaa-operator/v5/pkg/apis/noobaa/v1alpha1" quotav1 "github.com/openshift/api/quota/v1" @@ -193,7 +194,17 @@ func (s *OCSProviderServer) GetStorageConfig(ctx context.Context, req *pb.Storag if err != nil { return nil, status.Errorf(codes.Internal, "Failed to construct status response: %v", err) } - desiredClientConfigHash := getDesiredClientConfigHash(channelName, consumerObj) + + storageCluster, err := s.getStorageCluster(ctx) + if err != nil { + return nil, err + } + + desiredClientConfigHash := getDesiredClientConfigHash( + channelName, + consumerObj, + isEncryptionInTransitEnabled(storageCluster.Spec.Network), + ) klog.Infof("successfully returned the config details to the consumer.") return &pb.StorageConfigResponse{ @@ -751,15 +762,12 @@ func (s *OCSProviderServer) GetStorageClaimConfig(ctx context.Context, req *pb.S "csi.storage.k8s.io/controller-expand-secret-name": provisionerSecretName, } - storageClusters := &ocsv1.StorageClusterList{} - if err := s.client.List(ctx, storageClusters, client.InNamespace(s.namespace), client.Limit(2)); err != nil { - return nil, status.Errorf(codes.Internal, "failed to get storage cluster: %v", err) - } - if len(storageClusters.Items) != 1 { - return nil, status.Errorf(codes.Internal, "expecting one single storagecluster to exist") + storageCluster, err := s.getStorageCluster(ctx) + if err != nil { + return nil, err } var kernelMountOptions map[string]string - for _, option := range strings.Split(util.GetCephFSKernelMountOptions(&storageClusters.Items[0]), ",") { + for _, option := range strings.Split(util.GetCephFSKernelMountOptions(storageCluster), ",") { if kernelMountOptions == nil { kernelMountOptions = map[string]string{} } @@ -847,7 +855,16 @@ func (s *OCSProviderServer) ReportStatus(ctx context.Context, req *pb.ReportStat return nil, status.Errorf(codes.Internal, "Failed to construct status response: %v", err) } - desiredClientConfigHash := getDesiredClientConfigHash(channelName, storageConsumer) + storageCluster, err := s.getStorageCluster(ctx) + if err != nil { + return nil, err + } + + desiredClientConfigHash := getDesiredClientConfigHash( + channelName, + storageConsumer, + isEncryptionInTransitEnabled(storageCluster.Spec.Network), + ) return &pb.ReportStatusResponse{ DesiredClientOperatorChannel: channelName, @@ -855,10 +872,11 @@ func (s *OCSProviderServer) ReportStatus(ctx context.Context, req *pb.ReportStat }, nil } -func getDesiredClientConfigHash(channelName string, storageConsumer *ocsv1alpha1.StorageConsumer) string { +func getDesiredClientConfigHash(channelName string, storageConsumer *ocsv1alpha1.StorageConsumer, encryptionInTransit bool) string { var arr = []any{ channelName, storageConsumer.Spec.StorageQuotaInGiB, + encryptionInTransit, } return util.CalculateMD5Hash(arr) } @@ -878,6 +896,41 @@ func (s *OCSProviderServer) getOCSSubscriptionChannel(ctx context.Context) (stri return subscription.Spec.Channel, nil } +func (s *OCSProviderServer) getStorageCluster(ctx context.Context) (*ocsv1.StorageCluster, error) { + scList := &ocsv1.StorageClusterList{} + if err := s.client.List(ctx, scList, client.InNamespace(s.namespace)); err != nil { + return nil, status.Errorf(codes.Internal, "failed to list storage clusters: %v", err) + } + + var foundSc *ocsv1.StorageCluster + for i := range scList.Items { + sc := &scList.Items[i] + if sc.Status.Phase == util.PhaseIgnored { + continue // Skip Ignored storage cluster + } + if sc.Spec.AllowRemoteStorageConsumers { + if foundSc != nil { + // This means we have already found one storage cluster, so this is a second one + return nil, status.Errorf(codes.FailedPrecondition, "multiple provider storage clusters found") + } + foundSc = sc + } + } + + if foundSc == nil { + return nil, status.Errorf(codes.NotFound, "no provider storage cluster found") + } + + return foundSc, nil +} + +func isEncryptionInTransitEnabled(networkSpec *rookCephv1.NetworkSpec) bool { + return networkSpec != nil && + networkSpec.Connections != nil && + networkSpec.Connections.Encryption != nil && + networkSpec.Connections.Encryption.Enabled +} + func extractMonitorIps(data string) ([]string, error) { var ips []string mons := strings.Split(data, ",") diff --git a/services/provider/server/server_test.go b/services/provider/server/server_test.go index e23635362f..68b34ab224 100644 --- a/services/provider/server/server_test.go +++ b/services/provider/server/server_test.go @@ -275,6 +275,15 @@ func TestGetExternalResources(t *testing.T) { ocsSubscription.Spec = ocsSubscriptionSpec assert.NoError(t, client.Create(ctx, ocsSubscription)) + storageCluster := &ocsv1.StorageCluster{ + Spec: ocsv1.StorageClusterSpec{ + AllowRemoteStorageConsumers: true, + }, + } + storageCluster.Name = "test-storagecluster" + storageCluster.Namespace = serverNamespace + assert.NoError(t, client.Create(ctx, storageCluster)) + // When ocsv1alpha1.StorageConsumerStateReady req := pb.StorageConfigRequest{ StorageConsumerUUID: string(consumerResource.UID), From 9e83754ae64fbee860a553c6b240882853b33873 Mon Sep 17 00:00:00 2001 From: Nitin Goyal Date: Fri, 18 Oct 2024 11:54:42 +0530 Subject: [PATCH 3/3] hack: run continuous patch for ocs-client-operator-config Update the existing background patch process to run continuously, ensuring the ocs-client-operator-config ConfigMap remains patched. This prevents OLM from resetting the ConfigMap and causing deployment failures. Signed-off-by: Nitin Goyal --- hack/install-ocs-operator.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hack/install-ocs-operator.sh b/hack/install-ocs-operator.sh index bd8a182fe0..6eae6cd37d 100755 --- a/hack/install-ocs-operator.sh +++ b/hack/install-ocs-operator.sh @@ -37,13 +37,8 @@ patch_ocs_client_operator_config_configmap() { while true; do if oc get cm ocs-client-operator-config -n openshift-storage; then oc patch cm ocs-client-operator-config -n openshift-storage --type merge -p '{"data":{"DEPLOY_CSI":"false"}}' - sleep 10 - value=$(oc get cm ocs-client-operator-config -n openshift-storage -o custom-columns=:data.DEPLOY_CSI --no-headers) - if [[ "$value" == "false" ]]; then - break - fi + sleep 2 fi - sleep 10 done } @@ -56,6 +51,11 @@ patch_ocs_client_operator_config_configmap() { # configuration and deploying the CSI. This approach allows us to stop the CSI deployment by patching the ConfigMap as soon as it's created. # We cannot create the ConfigMap early in the process because OLM overwrites it with an empty one later in the cycle. patch_ocs_client_operator_config_configmap & +# Get the process ID (PID) of the background process +bg_pid=$! +# Trap to kill the process when the script exits +trap 'kill $bg_pid' EXIT + "$OPERATOR_SDK" run bundle "$OCS_CLIENT_BUNDLE_FULL_IMAGE_NAME" --timeout=10m --security-context-config restricted -n "$INSTALL_NAMESPACE" "$OPERATOR_SDK" run bundle "$NOOBAA_BUNDLE_FULL_IMAGE_NAME" --timeout=10m --security-context-config restricted -n "$INSTALL_NAMESPACE" "$OPERATOR_SDK" run bundle "$BUNDLE_FULL_IMAGE_NAME" --timeout=10m --security-context-config restricted -n "$INSTALL_NAMESPACE"