diff --git a/api/v1alpha2/mondooauditconfig_types.go b/api/v1alpha2/mondooauditconfig_types.go index cfafa457d..0cf93e607 100644 --- a/api/v1alpha2/mondooauditconfig_types.go +++ b/api/v1alpha2/mondooauditconfig_types.go @@ -199,6 +199,8 @@ const ( AdmissionDegraded MondooAuditConfigConditionType = "AdmissionDegraded" // Indicates weather Admission controller is Degraded because of the ScanAPI ScanAPIDegraded MondooAuditConfigConditionType = "ScanAPIDegraded" + // Indicates weather the operator itself is Degraded + MondooOperaotrDegraded MondooAuditConfigConditionType = "MondooOperatorDegraded" // MondooIntegrationDegraded will hold the status for any issues encountered while trying to CheckIn() // on behalf of the Mondoo integration MRN MondooIntegrationDegraded MondooAuditConfigConditionType = "IntegrationDegraded" diff --git a/cmd/mondoo-operator/operator/cmd.go b/cmd/mondoo-operator/operator/cmd.go index accc813b2..f318ed2e3 100644 --- a/cmd/mondoo-operator/operator/cmd.go +++ b/cmd/mondoo-operator/operator/cmd.go @@ -135,6 +135,28 @@ func init() { return err } + // Check whether the mondoo-operator crashed because of OOMKilled + setupLog.Info("Checking whether mondoo-operator was terminated before") + + k8sConfig, err := ctrl.GetConfig() + if err != nil { + setupLog.Error(err, "unable to get k8s config") + return err + } + // use separate client to prevent errors due to cache + // "the cache is not started, can not read objects" + // https://sdk.operatorframework.io/docs/building-operators/golang/references/client/#non-default-client + client, err := client.New(k8sConfig, client.Options{Scheme: scheme}) + if err != nil { + setupLog.Error(err, "unable to create non-caching k8s client") + return err + } + err = checkForTerminatedState(ctx, client, v, setupLog) + if err != nil { + setupLog.Error(err, "unable to check for terminated state of mondoo-operator-controller") + return err + } + if err = resource_monitor.RegisterResourceMonitors(mgr, scanApiStore); err != nil { setupLog.Error(err, "unable to register resource monitors", "controller", "resource_monitor") return err diff --git a/cmd/mondoo-operator/operator/operator_status.go b/cmd/mondoo-operator/operator/operator_status.go new file mode 100644 index 000000000..dab9118b4 --- /dev/null +++ b/cmd/mondoo-operator/operator/operator_status.go @@ -0,0 +1,118 @@ +// Copyright (c) Mondoo, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package operator + +import ( + "context" + + "github.com/go-logr/logr" + "k8s.io/apimachinery/pkg/api/errors" + + k8sv1alpha2 "go.mondoo.com/mondoo-operator/api/v1alpha2" + "go.mondoo.com/mondoo-operator/controllers" + "go.mondoo.com/mondoo-operator/controllers/status" + "go.mondoo.com/mondoo-operator/pkg/utils/k8s" + "go.mondoo.com/mondoo-operator/pkg/utils/mondoo" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + k8sversion "k8s.io/apimachinery/pkg/version" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func checkForTerminatedState(ctx context.Context, nonCacheClient client.Client, v *k8sversion.Info, logger logr.Logger) error { + statusReport := status.NewStatusReporter(nonCacheClient, controllers.MondooClientBuilder, v) + + var err error + config := &k8sv1alpha2.MondooOperatorConfig{} + if err = nonCacheClient.Get(ctx, types.NamespacedName{Name: k8sv1alpha2.MondooOperatorConfigName}, config); err != nil { + if errors.IsNotFound(err) { + logger.Info("MondooOperatorConfig not found, using defaults") + } else { + logger.Error(err, "Failed to check for MondooOpertorConfig") + return err + } + } + + mondooAuditConfigs := &k8sv1alpha2.MondooAuditConfigList{} + if err := nonCacheClient.List(ctx, mondooAuditConfigs); err != nil { + logger.Error(err, "error listing MondooAuditConfigs") + return err + } + + for _, mondooAuditConfig := range mondooAuditConfigs.Items { + mondooAuditConfigCopy := mondooAuditConfig.DeepCopy() + + podList := &corev1.PodList{} + listOpts := &client.ListOptions{ + Namespace: mondooAuditConfig.Namespace, + LabelSelector: labels.SelectorFromSet(map[string]string{ + "app.kubernetes.io/name": "mondoo-operator", + }), + } + if err := nonCacheClient.List(ctx, podList, listOpts); err != nil { + logger.Error(err, "failed to list pods", "Mondoo.Namespace", mondooAuditConfig.Namespace, "Mondoo.Name", mondooAuditConfig.Name) + return err + } + + currentPod := k8s.GetNewestPodFromList(podList) + for _, containerStatus := range currentPod.Status.ContainerStatuses { + if containerStatus.Name != "manager" { + continue + } + stateUpdate := false + if containerStatus.State.Terminated != nil || containerStatus.LastTerminationState.Terminated != nil { + logger.Info("mondoo-operator was terminated before") + // Update status + updateOperatorConditions(&mondooAuditConfig, true, currentPod) + stateUpdate = true + } else if containerStatus.RestartCount == 0 && containerStatus.State.Terminated == nil { + logger.Info("mondoo-operator is running or starting", "state", containerStatus.State) + updateOperatorConditions(&mondooAuditConfig, false, &corev1.Pod{}) + stateUpdate = true + } + if stateUpdate { + err := mondoo.UpdateMondooAuditStatus(ctx, nonCacheClient, mondooAuditConfigCopy, &mondooAuditConfig, logger) + if err != nil { + logger.Error(err, "failed to update status for MondooAuditConfig") + return err + } + // Report upstream before we get OOMkilled again + err = statusReport.Report(ctx, mondooAuditConfig, *config) + if err != nil { + logger.Error(err, "failed to report status upstream") + return err + } + break + } + } + } + return nil +} + +func updateOperatorConditions(config *k8sv1alpha2.MondooAuditConfig, degradedStatus bool, pod *corev1.Pod) { + msg := "Mondoo Operator controller is available" + reason := "MondooOperatorAvailable" + status := corev1.ConditionFalse + updateCheck := mondoo.UpdateConditionIfReasonOrMessageChange + affectedPods := []string{} + memoryLimit := "" + if degradedStatus { + msg = "Mondoo Operator controller is unavailable" + for i, containerStatus := range pod.Status.ContainerStatuses { + if (containerStatus.LastTerminationState.Terminated != nil && containerStatus.LastTerminationState.Terminated.ExitCode == 137) || + (containerStatus.State.Terminated != nil && containerStatus.State.Terminated.ExitCode == 137) { + msg = "Mondoo Operator controller is unavailable due to OOM" + affectedPods = append(affectedPods, pod.Name) + memoryLimit = pod.Spec.Containers[i].Resources.Limits.Memory().String() + break + } + } + + reason = "MondooOperatorUnavailable" + status = corev1.ConditionTrue + } + + config.Status.Conditions = mondoo.SetMondooAuditCondition(config.Status.Conditions, k8sv1alpha2.MondooOperaotrDegraded, status, reason, msg, updateCheck, affectedPods, memoryLimit) +} diff --git a/cmd/mondoo-operator/operator/operator_status_test.go b/cmd/mondoo-operator/operator/operator_status_test.go new file mode 100644 index 000000000..75b5c3900 --- /dev/null +++ b/cmd/mondoo-operator/operator/operator_status_test.go @@ -0,0 +1,160 @@ +// Copyright (c) Mondoo, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package operator + +import ( + "context" + "testing" + "time" + + "github.com/go-logr/zapr" + "github.com/golang/mock/gomock" + "github.com/stretchr/testify/suite" + "go.uber.org/zap" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/version" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + mondoov1alpha2 "go.mondoo.com/mondoo-operator/api/v1alpha2" + scanapistoremock "go.mondoo.com/mondoo-operator/controllers/resource_monitor/scan_api_store/mock" + "go.mondoo.com/mondoo-operator/controllers/scanapi" + "go.mondoo.com/mondoo-operator/pkg/utils/mondoo" + fakeMondoo "go.mondoo.com/mondoo-operator/pkg/utils/mondoo/fake" + "go.mondoo.com/mondoo-operator/pkg/utils/test" + "go.mondoo.com/mondoo-operator/tests/framework/utils" +) + +type DeploymentHandlerSuite struct { + suite.Suite + ctx context.Context + scheme *runtime.Scheme + containerImageResolver mondoo.ContainerImageResolver + + auditConfig mondoov1alpha2.MondooAuditConfig + fakeClientBuilder *fake.ClientBuilder + mockCtrl *gomock.Controller + scanApiStoreMock *scanapistoremock.MockScanApiStore +} + +func (s *DeploymentHandlerSuite) SetupSuite() { + s.ctx = context.Background() + s.scheme = clientgoscheme.Scheme + s.Require().NoError(mondoov1alpha2.AddToScheme(s.scheme)) + s.containerImageResolver = fakeMondoo.NewNoOpContainerImageResolver() + s.mockCtrl = gomock.NewController(s.T()) + s.scanApiStoreMock = scanapistoremock.NewMockScanApiStore(s.mockCtrl) +} + +func (s *DeploymentHandlerSuite) BeforeTest(suiteName, testName string) { + s.auditConfig = utils.DefaultAuditConfig("mondoo-operator", true, false, false, false) + s.fakeClientBuilder = fake.NewClientBuilder().WithObjects(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: scanapi.TokenSecretName(s.auditConfig.Name), + Namespace: s.auditConfig.Namespace, + }, + Data: map[string][]byte{"token": []byte("token")}, + }, test.TestKubeSystemNamespace()) +} + +func (s *DeploymentHandlerSuite) AfterTest(suiteName, testName string) { + s.mockCtrl.Finish() +} + +func (s *DeploymentHandlerSuite) TestOOMDetect() { + mondooAuditConfig := &s.auditConfig + + oomPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mondoo-operator-123", + Namespace: s.auditConfig.Namespace, + Labels: map[string]string{"app.kubernetes.io/name": "mondoo-operator"}, + CreationTimestamp: metav1.Time{ + Time: time.Now(), + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "manager", + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceMemory: *resource.NewQuantity(1, resource.BinarySI), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "manager", + LastTerminationState: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + ExitCode: 137, + }, + }, + }, + }, + }, + } + + // This is needed because of https://github.com/kubernetes-sigs/controller-runtime/issues/2362 + objs := []client.Object{mondooAuditConfig, oomPod} + k8sClient := s.fakeClientBuilder.WithScheme(clientgoscheme.Scheme).WithStatusSubresource(objs...).WithObjects(objs...).Build() + + v := &version.Info{} + cfg := zap.NewDevelopmentConfig() + cfg.InitialFields = map[string]interface{}{ + "controller": "terminated-test", + } + zapLog, err := cfg.Build() + s.Require().NoError(err, "failed to set up logging for test cases") + testLogger := zapr.NewLogger(zapLog) + + err = checkForTerminatedState(s.ctx, k8sClient, v, testLogger) + s.NoError(err) + + mondooAuditConfigs := &mondoov1alpha2.MondooAuditConfigList{} + err = k8sClient.List(s.ctx, mondooAuditConfigs) + s.NoError(err) + s.Len(mondooAuditConfigs.Items, 1) + + condition := mondooAuditConfigs.Items[0].Status.Conditions[0] + s.Equal("Mondoo Operator controller is unavailable due to OOM", condition.Message) + s.Len(condition.AffectedPods, 1) + s.Contains(condition.AffectedPods, "mondoo-operator-123") + containerMemory := oomPod.Spec.Containers[0].Resources.Limits.Memory() + s.Equal(containerMemory.String(), condition.MemoryLimit) + s.Equal("MondooOperatorUnavailable", condition.Reason) + s.Equal(corev1.ConditionTrue, condition.Status) + + oomPod.Status.ContainerStatuses[0].LastTerminationState = corev1.ContainerState{} + oomPod.Status.ContainerStatuses[0].State.Running = &corev1.ContainerStateRunning{} + s.NoError(k8sClient.Status().Update(s.ctx, oomPod)) + + err = checkForTerminatedState(s.ctx, k8sClient, v, testLogger) + s.NoError(err) + + mondooAuditConfigs = &mondoov1alpha2.MondooAuditConfigList{} + err = k8sClient.List(s.ctx, mondooAuditConfigs) + s.NoError(err) + s.Len(mondooAuditConfigs.Items, 1) + + condition = mondooAuditConfigs.Items[0].Status.Conditions[0] + s.Equal("Mondoo Operator controller is available", condition.Message) + s.Len(condition.AffectedPods, 0) + s.Equal("", condition.MemoryLimit) + s.Equal("MondooOperatorAvailable", condition.Reason) + s.Equal(corev1.ConditionFalse, condition.Status) +} + +func TestOperatorSuite(t *testing.T) { + suite.Run(t, new(DeploymentHandlerSuite)) +} diff --git a/controllers/metrics/metrics.go b/controllers/metrics/metrics.go index 32a5c4e0a..8aad92a09 100644 --- a/controllers/metrics/metrics.go +++ b/controllers/metrics/metrics.go @@ -70,7 +70,6 @@ func (mr *MetricsReconciler) Start(ctx context.Context) error { } func (mr *MetricsReconciler) metricsLoop() { - mr.log.Info("Updating metrics") mondooAuditConfigs := &v1alpha2.MondooAuditConfigList{} if err := mr.Client.List(mr.ctx, mondooAuditConfigs); err != nil { mr.log.Error(err, "error listing MondooAuditConfigs") diff --git a/controllers/mondooauditconfig_controller_test.go b/controllers/mondooauditconfig_controller_test.go index 8a0d1567b..979ca9434 100644 --- a/controllers/mondooauditconfig_controller_test.go +++ b/controllers/mondooauditconfig_controller_test.go @@ -260,6 +260,11 @@ func TestTokenRegistration(t *testing.T) { Identifier: status.ScanApiIdentifier, Status: mondooclient.MessageStatus_MESSAGE_INFO, }, + { + Message: "No status reported yet", + Identifier: status.MondooOperatorIdentifier, + Status: mondooclient.MessageStatus_MESSAGE_UNKNOWN, + }, }, }, LastState: status.OperatorCustomState{ diff --git a/controllers/status/operator_status.go b/controllers/status/operator_status.go index 8bd37b6c1..5762f5566 100644 --- a/controllers/status/operator_status.go +++ b/controllers/status/operator_status.go @@ -23,6 +23,7 @@ const ( AdmissionControllerIdentifier = "admission-controller" ScanApiIdentifier = "scan-api" NamespaceFilteringIdentifier = "namespace-filtering" + MondooOperatorIdentifier = "mondoo-operator" noStatusMessage = "No status reported yet" ) @@ -51,7 +52,7 @@ func ReportStatusRequestFromAuditConfig( nodeNames[i] = nodes[i].Name } - messages := make([]mondooclient.IntegrationMessage, 5) + messages := make([]mondooclient.IntegrationMessage, 6) // Kubernetes resources scanning status messages[0].Identifier = K8sResourcesScanningIdentifier @@ -65,7 +66,7 @@ func ReportStatusRequestFromAuditConfig( log.Error(err, "Failed to create extra information for Kubernetes Resource Scanning on OOM error") } if extraStruct != nil { - messages[4].Extra = extraStruct + messages[0].Extra = extraStruct } } else { messages[0].Status = mondooclient.MessageStatus_MESSAGE_INFO @@ -92,7 +93,7 @@ func ReportStatusRequestFromAuditConfig( log.Error(err, "Failed to create extra information for Kubernetes Container Image on OOM error") } if extraStruct != nil { - messages[4].Extra = extraStruct + messages[1].Extra = extraStruct } } else { messages[1].Status = mondooclient.MessageStatus_MESSAGE_INFO @@ -119,7 +120,7 @@ func ReportStatusRequestFromAuditConfig( log.Error(err, "Failed to create extra information for Node Scanning on OOM error") } if extraStruct != nil { - messages[4].Extra = extraStruct + messages[2].Extra = extraStruct } } else { messages[2].Status = mondooclient.MessageStatus_MESSAGE_INFO @@ -146,7 +147,7 @@ func ReportStatusRequestFromAuditConfig( log.Error(err, "Failed to create extra information for Admission Controller on OOM error") } if extraStruct != nil { - messages[4].Extra = extraStruct + messages[3].Extra = extraStruct } } else { messages[3].Status = mondooclient.MessageStatus_MESSAGE_INFO @@ -187,6 +188,27 @@ func ReportStatusRequestFromAuditConfig( messages[4].Message = "Scan API is disabled" } + messages[5].Identifier = MondooOperatorIdentifier + mondooOperator := mondoo.FindMondooAuditConditions(m.Status.Conditions, v1alpha2.MondooOperaotrDegraded) + if mondooOperator != nil { + if mondooOperator.Status == v1.ConditionTrue { + messages[5].Status = mondooclient.MessageStatus_MESSAGE_ERROR + extraStruct, err := createOOMExtraInformation(mondooOperator.Message, mondooOperator.AffectedPods, mondooOperator.MemoryLimit) + if err != nil { + log.Error(err, "Failed to create extra information for Scan API on OOM error") + } + if extraStruct != nil { + messages[5].Extra = extraStruct + } + } else { + messages[5].Status = mondooclient.MessageStatus_MESSAGE_INFO + } + messages[5].Message = mondooOperator.Message + } else { + messages[5].Status = mondooclient.MessageStatus_MESSAGE_UNKNOWN + messages[5].Message = noStatusMessage + } + // If there were any error messages, the overall status is error status := mondooclient.Status_ACTIVE for _, m := range messages { diff --git a/controllers/status/operator_status_test.go b/controllers/status/operator_status_test.go index e6889bc4a..348ffaaa0 100644 --- a/controllers/status/operator_status_test.go +++ b/controllers/status/operator_status_test.go @@ -45,6 +45,7 @@ func TestReportStatusRequestFromAuditConfig_AllDisabled(t *testing.T) { {Identifier: NodeScanningIdentifier, Status: mondooclient.MessageStatus_MESSAGE_INFO, Message: "Node scanning is disabled"}, {Identifier: AdmissionControllerIdentifier, Status: mondooclient.MessageStatus_MESSAGE_INFO, Message: "Admission controller is disabled"}, {Identifier: ScanApiIdentifier, Status: mondooclient.MessageStatus_MESSAGE_INFO, Message: "Scan API is disabled"}, + {Identifier: MondooOperatorIdentifier, Status: mondooclient.MessageStatus_MESSAGE_UNKNOWN, Message: "No status reported yet"}, } assert.ElementsMatch(t, messages, reportStatus.Messages.Messages) } @@ -74,6 +75,7 @@ func TestReportStatusRequestFromAuditConfig_AllEnabled(t *testing.T) { {Message: "Node Scanning is available", Status: v1.ConditionFalse, Type: v1alpha2.NodeScanningDegraded}, {Message: "Admission controller is available", Status: v1.ConditionFalse, Type: v1alpha2.AdmissionDegraded}, {Message: "ScanAPI controller is available", Status: v1.ConditionFalse, Type: v1alpha2.ScanAPIDegraded}, + {Message: "Mondoo Operator controller is available", Status: v1.ConditionFalse, Type: v1alpha2.MondooOperaotrDegraded}, } reportStatus := ReportStatusRequestFromAuditConfig(integrationMrn, m, nodes, v, logger) @@ -101,6 +103,7 @@ func TestReportStatusRequestFromAuditConfig_AllEnabled(t *testing.T) { {Identifier: NodeScanningIdentifier, Status: mondooclient.MessageStatus_MESSAGE_INFO, Message: m.Status.Conditions[2].Message}, {Identifier: AdmissionControllerIdentifier, Status: mondooclient.MessageStatus_MESSAGE_INFO, Message: m.Status.Conditions[3].Message}, {Identifier: ScanApiIdentifier, Status: mondooclient.MessageStatus_MESSAGE_INFO, Message: m.Status.Conditions[4].Message}, + {Identifier: MondooOperatorIdentifier, Status: mondooclient.MessageStatus_MESSAGE_INFO, Message: m.Status.Conditions[5].Message}, } assert.ElementsMatch(t, messages, reportStatus.Messages.Messages) } @@ -130,6 +133,7 @@ func TestReportStatusRequestFromAuditConfig_AllEnabled_DeprecatedFields(t *testi {Message: "Node Scanning is available", Status: v1.ConditionFalse, Type: v1alpha2.NodeScanningDegraded}, {Message: "Admission controller is available", Status: v1.ConditionFalse, Type: v1alpha2.AdmissionDegraded}, {Message: "ScanAPI controller is available", Status: v1.ConditionFalse, Type: v1alpha2.ScanAPIDegraded}, + {Message: "Mondoo Operator controller is available", Status: v1.ConditionFalse, Type: v1alpha2.MondooOperaotrDegraded}, } reportStatus := ReportStatusRequestFromAuditConfig(integrationMrn, m, nodes, v, logger) @@ -157,6 +161,7 @@ func TestReportStatusRequestFromAuditConfig_AllEnabled_DeprecatedFields(t *testi {Identifier: NodeScanningIdentifier, Status: mondooclient.MessageStatus_MESSAGE_INFO, Message: m.Status.Conditions[2].Message}, {Identifier: AdmissionControllerIdentifier, Status: mondooclient.MessageStatus_MESSAGE_INFO, Message: m.Status.Conditions[3].Message}, {Identifier: ScanApiIdentifier, Status: mondooclient.MessageStatus_MESSAGE_INFO, Message: m.Status.Conditions[4].Message}, + {Identifier: MondooOperatorIdentifier, Status: mondooclient.MessageStatus_MESSAGE_INFO, Message: m.Status.Conditions[5].Message}, } assert.ElementsMatch(t, messages, reportStatus.Messages.Messages) } @@ -182,6 +187,7 @@ func TestReportStatusRequestFromAuditConfig_AllError(t *testing.T) { {Message: "Node Scanning error", Status: v1.ConditionTrue, Type: v1alpha2.NodeScanningDegraded}, {Message: "Admission controller error", Status: v1.ConditionTrue, Type: v1alpha2.AdmissionDegraded}, {Message: "ScanAPI controller error", Status: v1.ConditionTrue, Type: v1alpha2.ScanAPIDegraded}, + {Message: "Mondoo Operator controller is unavailable", Status: v1.ConditionTrue, Type: v1alpha2.MondooOperaotrDegraded}, } reportStatus := ReportStatusRequestFromAuditConfig(integrationMrn, m, nodes, v, logger) @@ -204,6 +210,7 @@ func TestReportStatusRequestFromAuditConfig_AllError(t *testing.T) { {Identifier: NodeScanningIdentifier, Status: mondooclient.MessageStatus_MESSAGE_ERROR, Message: m.Status.Conditions[2].Message}, {Identifier: AdmissionControllerIdentifier, Status: mondooclient.MessageStatus_MESSAGE_ERROR, Message: m.Status.Conditions[3].Message}, {Identifier: ScanApiIdentifier, Status: mondooclient.MessageStatus_MESSAGE_ERROR, Message: m.Status.Conditions[4].Message}, + {Identifier: MondooOperatorIdentifier, Status: mondooclient.MessageStatus_MESSAGE_ERROR, Message: m.Status.Conditions[5].Message}, } assert.ElementsMatch(t, messages, reportStatus.Messages.Messages) } diff --git a/controllers/status/status_reporter_test.go b/controllers/status/status_reporter_test.go index 3ac9d88c8..fec093ea7 100644 --- a/controllers/status/status_reporter_test.go +++ b/controllers/status/status_reporter_test.go @@ -110,6 +110,11 @@ func (s *StatusReporterSuite) TestReport() { Identifier: ScanApiIdentifier, Status: mondooclient.MessageStatus_MESSAGE_INFO, }, + { + Message: "No status reported yet", + Identifier: MondooOperatorIdentifier, + Status: mondooclient.MessageStatus_MESSAGE_UNKNOWN, + }, }, }, LastState: OperatorCustomState{ @@ -179,6 +184,11 @@ func (s *StatusReporterSuite) TestReport_StatusChange() { Identifier: ScanApiIdentifier, Status: mondooclient.MessageStatus_MESSAGE_INFO, }, + { + Message: "No status reported yet", + Identifier: MondooOperatorIdentifier, + Status: mondooclient.MessageStatus_MESSAGE_UNKNOWN, + }, }, }, LastState: operatorState, diff --git a/pkg/utils/k8s/pods.go b/pkg/utils/k8s/pods.go new file mode 100644 index 000000000..886c566c6 --- /dev/null +++ b/pkg/utils/k8s/pods.go @@ -0,0 +1,26 @@ +// Copyright (c) Mondoo, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package k8s + +import ( + "time" + + corev1 "k8s.io/api/core/v1" +) + +// GetNewestPodFromList returns the most recent pod from a pod list +// This is determined by the creation timestamp of the pod +func GetNewestPodFromList(pods *corev1.PodList) *corev1.Pod { + podCreationtime := time.Unix(0, 0) + currentPod := &corev1.Pod{} + for i := range pods.Items { + pod := &pods.Items[i] + if pod.ObjectMeta.CreationTimestamp.Time.Before(podCreationtime) { + continue + } + podCreationtime = pod.ObjectMeta.CreationTimestamp.Time + currentPod = pod + } + return currentPod +} diff --git a/tests/integration/audit_config_base_suite.go b/tests/integration/audit_config_base_suite.go index 68f7c2c39..41acf3d5c 100644 --- a/tests/integration/audit_config_base_suite.go +++ b/tests/integration/audit_config_base_suite.go @@ -217,6 +217,84 @@ func (s *AuditConfigBaseSuite) testMondooAuditConfigKubernetesResources(auditCon s.Equal("ACTIVE", status) } +func (s *AuditConfigBaseSuite) testOOMMondooOperatorController(auditConfig mondoov2.MondooAuditConfig) { + s.auditConfig = auditConfig + + // Disable container image resolution to be able to run the k8s resources scan CronJob with a local image. + cleanup := s.disableContainerImageResolution() + defer cleanup() + + zap.S().Info("Create an audit config that enables nothing.") + s.NoErrorf( + s.testCluster.K8sHelper.Clientset.Create(s.ctx, &auditConfig), + "Failed to create Mondoo audit config.") + + s.Require().True(s.testCluster.K8sHelper.WaitUntilMondooClientSecretExists(s.ctx, s.auditConfig.Namespace), "Mondoo SA not created") + + deployments := &appsv1.DeploymentList{} + listOpts := &client.ListOptions{ + Namespace: auditConfig.Namespace, + LabelSelector: labels.SelectorFromSet(map[string]string{ + "app.kubernetes.io/name": "mondoo-operator", + }), + } + s.NoError(s.testCluster.K8sHelper.Clientset.List(s.ctx, deployments, listOpts)) + s.Equalf(1, len(deployments.Items), "mondoo-operator deployment not found") + + operatorDeployment := deployments.Items[0] + operatorDeployment.Spec.Template.Spec.Containers[0].Resources.Requests = corev1.ResourceList{} + operatorDeployment.Spec.Template.Spec.Containers[0].Resources.Limits = corev1.ResourceList{ + corev1.ResourceMemory: resource.MustParse("15Mi"), // this should be low enough to trigger an OOMkilled + } + + zap.S().Info("Reducing memory limit to trigger OOM.") + s.NoError(s.testCluster.K8sHelper.Clientset.Update(s.ctx, &operatorDeployment)) + + // This will take some time, because: + // a new replicaset should be created + // the first Pod tries to start and gets killed + // on the 2nd start we should get an OOMkilled status update + err := s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.MondooOperaotrDegraded, corev1.ConditionTrue) + s.NoError(err, "Failed to find degraded condition") + + foundMondooAuditConfig, err := s.testCluster.K8sHelper.GetMondooAuditConfigFromCluster(auditConfig.Name, auditConfig.Namespace) + s.NoError(err, "Failed to find MondooAuditConfig") + s.Contains(foundMondooAuditConfig.Status.Conditions[5].Message, "OOM", "Failed to find OOMKilled message in degraded condition") + s.Len(foundMondooAuditConfig.Status.Conditions[5].AffectedPods, 1, "Failed to find only one pod in degraded condition") + + // Give the integration a chance to update + time.Sleep(2 * time.Second) + + status, err := s.integration.GetStatus(s.ctx) + s.NoError(err, "Failed to get status") + s.Equal("ERROR", status) + + s.NoError(s.testCluster.K8sHelper.Clientset.List(s.ctx, deployments, listOpts)) + s.Equalf(1, len(deployments.Items), "mondoo-operator deployment not found") + + operatorDeployment = deployments.Items[0] + operatorDeployment.Spec.Template.Spec.Containers[0].Resources.Limits = corev1.ResourceList{ + corev1.ResourceMemory: resource.MustParse("100Mi"), // this should be enough to get the operator running again + } + + zap.S().Info("Increasing memory limit to get controller running again.") + s.NoError(s.testCluster.K8sHelper.Clientset.Update(s.ctx, &operatorDeployment)) + + err = s.testCluster.K8sHelper.CheckForDegradedCondition(&auditConfig, mondoov2.MondooOperaotrDegraded, corev1.ConditionFalse) + s.NoError(err, "Failed to find degraded condition") + foundMondooAuditConfig, err = s.testCluster.K8sHelper.GetMondooAuditConfigFromCluster(auditConfig.Name, auditConfig.Namespace) + s.NoError(err, "Failed to find MondooAuditConfig") + s.NotContains(foundMondooAuditConfig.Status.Conditions[5].Message, "OOM", "Found OOMKilled message in condition") + s.Len(foundMondooAuditConfig.Status.Conditions[5].AffectedPods, 0, "Found a pod in condition") + + // Give the integration a chance to update + time.Sleep(2 * time.Second) + + status, err = s.integration.GetStatus(s.ctx) + s.NoError(err, "Failed to get status") + s.Equal("ACTIVE", status) +} + func (s *AuditConfigBaseSuite) testMondooAuditConfigContainers(auditConfig mondoov2.MondooAuditConfig) { nginxLabel := "app.kubernetes.io/name=nginx" _, err := s.testCluster.K8sHelper.Kubectl("run", "-n", "default", "nginx", "--image", "nginx", "-l", nginxLabel) diff --git a/tests/integration/audit_config_test.go b/tests/integration/audit_config_test.go index d4a0d0dc2..0291c291c 100644 --- a/tests/integration/audit_config_test.go +++ b/tests/integration/audit_config_test.go @@ -17,6 +17,11 @@ type AuditConfigSuite struct { AuditConfigBaseSuite } +func (s *AuditConfigSuite) TestOOMControllerReporting() { + auditConfig := utils.DefaultAuditConfigMinimal(s.testCluster.Settings.Namespace, false, false, false, false) + s.testOOMMondooOperatorController(auditConfig) +} + func (s *AuditConfigSuite) TestReconcile_AllDisabled() { auditConfig := utils.DefaultAuditConfigMinimal(s.testCluster.Settings.Namespace, false, false, false, false) s.testMondooAuditConfigAllDisabled(auditConfig)