From dcf9ac4f3bb59311af43ca504137fd00c78e6374 Mon Sep 17 00:00:00 2001 From: Justin Kulikauskas Date: Tue, 16 May 2023 16:36:36 -0400 Subject: [PATCH] Update API discovery cache even when incomplete Previously, if there were any APIs having issues on the cluster, no new API information could be used by the controller. Now, partial results can still be updated. Additionally, API discovery errors during startup will not prevent the controller from functioning at all. Refs: - https://issues.redhat.com/browse/ACM-5491 Signed-off-by: Justin Kulikauskas --- controllers/configurationpolicy_controller.go | 37 +++++--- test/e2e/case18_discovery_refresh_test.go | 84 +++++++++++++------ .../bad-apiservice.yaml | 13 +++ 3 files changed, 98 insertions(+), 36 deletions(-) create mode 100644 test/resources/case18_discovery_refresh/bad-apiservice.yaml diff --git a/controllers/configurationpolicy_controller.go b/controllers/configurationpolicy_controller.go index d6bf5288..2c0f9728 100644 --- a/controllers/configurationpolicy_controller.go +++ b/controllers/configurationpolicy_controller.go @@ -96,6 +96,7 @@ type discoveryInfo struct { apiResourceList []*metav1.APIResourceList apiGroups []*restmapper.APIGroupResources discoveryLastRefreshed time.Time + refreshError error } // ConfigurationPolicyReconciler reconciles a ConfigurationPolicy object @@ -288,6 +289,9 @@ func (r *ConfigurationPolicyReconciler) handlePolicyWorker( } } +// refreshDiscoveryInfo tries to discover all the available APIs on the cluster, and update the +// cached information. If it encounters an error, it may update the cache with partial results, +// if those seem "better" than what's in the current cache. func (r *ConfigurationPolicyReconciler) refreshDiscoveryInfo() error { log.V(2).Info("Refreshing the discovery info") r.lock.Lock() @@ -295,28 +299,37 @@ func (r *ConfigurationPolicyReconciler) refreshDiscoveryInfo() error { dd := r.TargetK8sClient.Discovery() - _, apiResourceList, err := dd.ServerGroupsAndResources() - if err != nil { - log.Error(err, "Could not get the API resource list") + _, apiResourceList, resourceErr := dd.ServerGroupsAndResources() + if resourceErr != nil { + r.refreshError = resourceErr + log.Error(resourceErr, "Could not get the full API resource list") + } - return err + if resourceErr == nil || (len(apiResourceList) > len(r.discoveryInfo.apiResourceList)) { + // update the list if it's complete, or if it's "better" than the old one + r.discoveryInfo.apiResourceList = apiResourceList } - r.apiResourceList = apiResourceList + apiGroups, groupErr := restmapper.GetAPIGroupResources(dd) + if groupErr != nil { + r.refreshError = groupErr + log.Error(groupErr, "Could not get the full API groups list") + } - apiGroups, err := restmapper.GetAPIGroupResources(dd) - if err != nil { - log.Error(err, "Could not get the API groups list") + if resourceErr == nil && groupErr == nil { + r.refreshError = nil + } - return err + if r.refreshError == nil || (len(apiGroups) > len(r.discoveryInfo.apiGroups)) { + // update the list if it's complete, or if it's "better" than the old one + r.discoveryInfo.apiGroups = apiGroups } - r.apiGroups = apiGroups // Reset the OpenAPI cache in case the CRDs were updated since the last fetch. r.openAPIParser = openapi.NewOpenAPIParser(dd) - r.discoveryLastRefreshed = time.Now().UTC() + r.discoveryInfo.discoveryLastRefreshed = time.Now().UTC() - return nil + return r.refreshError // can be nil } // shouldEvaluatePolicy will determine if the policy is ready for evaluation by examining the diff --git a/test/e2e/case18_discovery_refresh_test.go b/test/e2e/case18_discovery_refresh_test.go index 7b80c043..84bdec8b 100644 --- a/test/e2e/case18_discovery_refresh_test.go +++ b/test/e2e/case18_discovery_refresh_test.go @@ -11,39 +11,41 @@ import ( . "github.com/onsi/gomega" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" "open-cluster-management.io/config-policy-controller/test/utils" ) -const ( - case18PolicyName = "policy-c18" - case18Policy = "../resources/case18_discovery_refresh/policy.yaml" - case18PolicyTemplateName = "policy-c18-template" - case18PolicyTemplatePreReqs = "../resources/case18_discovery_refresh/prereqs-for-template-policy.yaml" - case18PolicyTemplate = "../resources/case18_discovery_refresh/policy-template.yaml" - case18ConfigMapName = "c18-configmap" -) +var _ = Describe("Test discovery info refresh", Ordered, func() { + const ( + policyName = "policy-c18" + policyYaml = "../resources/case18_discovery_refresh/policy.yaml" + policyTemplateName = "policy-c18-template" + policyTemplatePreReqs = "../resources/case18_discovery_refresh/prereqs-for-template-policy.yaml" + policyTemplateYaml = "../resources/case18_discovery_refresh/policy-template.yaml" + configMapName = "c18-configmap" + badAPIServiceYaml = "../resources/case18_discovery_refresh/bad-apiservice.yaml" + ) -var _ = Describe("Test discovery info refresh", func() { It("Verifies that the discovery info is refreshed after a CRD is installed", func() { - By("Creating " + case18PolicyName + " on managed") - utils.Kubectl("apply", "-f", case18Policy, "-n", testNamespace) + By("Creating " + policyName + " on managed") + utils.Kubectl("apply", "-f", policyYaml, "-n", testNamespace) policy := utils.GetWithTimeout( clientManagedDynamic, gvrConfigPolicy, - case18PolicyName, + policyName, testNamespace, true, defaultTimeoutSeconds, ) Expect(policy).NotTo(BeNil()) - By("Verifying " + case18PolicyName + " becomes compliant") + By("Verifying " + policyName + " becomes compliant") Eventually(func() interface{} { policy := utils.GetWithTimeout( clientManagedDynamic, gvrConfigPolicy, - case18PolicyName, + policyName, testNamespace, true, defaultTimeoutSeconds, @@ -54,12 +56,33 @@ var _ = Describe("Test discovery info refresh", func() { }) It("Verifies that the discovery info is refreshed when a template references a new object kind", func() { + By("Adding a non-functional API service") + utils.Kubectl("apply", "-f", badAPIServiceYaml) + + By("Checking that the API causes an error during API discovery") + Eventually( + func() interface{} { + cmd := exec.Command("kubectl", "api-resources") + + err := cmd.Start() + if err != nil { + return err + } + + err = cmd.Wait() + + return err + }, + defaultTimeoutSeconds, + 1, + ).ShouldNot(BeNil()) + By("Creating the prerequisites on managed") // This needs to be wrapped in an eventually since the object can't be created immediately after the CRD // is created. Eventually( func() interface{} { - cmd := exec.Command("kubectl", "apply", "-f", case18PolicyTemplatePreReqs) + cmd := exec.Command("kubectl", "apply", "-f", policyTemplatePreReqs) err := cmd.Start() if err != nil { @@ -74,24 +97,24 @@ var _ = Describe("Test discovery info refresh", func() { 1, ).Should(BeNil()) - By("Creating " + case18PolicyTemplateName + " on managed") - utils.Kubectl("apply", "-f", case18PolicyTemplate, "-n", testNamespace) + By("Creating " + policyTemplateName + " on managed") + utils.Kubectl("apply", "-f", policyTemplateYaml, "-n", testNamespace) policy := utils.GetWithTimeout( clientManagedDynamic, gvrConfigPolicy, - case18PolicyTemplateName, + policyTemplateName, testNamespace, true, defaultTimeoutSeconds, ) Expect(policy).NotTo(BeNil()) - By("Verifying " + case18PolicyTemplateName + " becomes compliant") + By("Verifying " + policyTemplateName + " becomes compliant") Eventually(func() interface{} { policy := utils.GetWithTimeout( clientManagedDynamic, gvrConfigPolicy, - case18PolicyTemplateName, + policyTemplateName, testNamespace, true, defaultTimeoutSeconds, @@ -101,9 +124,9 @@ var _ = Describe("Test discovery info refresh", func() { }, defaultTimeoutSeconds, 1).Should(Equal("Compliant")) }) - It("Cleans up", func() { + AfterAll(func() { err := clientManagedDynamic.Resource(gvrConfigPolicy).Namespace(testNamespace).Delete( - context.TODO(), case18PolicyName, metav1.DeleteOptions{}, + context.TODO(), policyName, metav1.DeleteOptions{}, ) if !k8serrors.IsNotFound(err) { Expect(err).ToNot(HaveOccurred()) @@ -117,7 +140,7 @@ var _ = Describe("Test discovery info refresh", func() { } err = clientManagedDynamic.Resource(gvrConfigPolicy).Namespace(testNamespace).Delete( - context.TODO(), case18PolicyTemplateName, metav1.DeleteOptions{}, + context.TODO(), policyTemplateName, metav1.DeleteOptions{}, ) if !k8serrors.IsNotFound(err) { Expect(err).ToNot(HaveOccurred()) @@ -131,7 +154,20 @@ var _ = Describe("Test discovery info refresh", func() { } err = clientManaged.CoreV1().ConfigMaps("default").Delete( - context.TODO(), case18ConfigMapName, metav1.DeleteOptions{}, + context.TODO(), configMapName, metav1.DeleteOptions{}, + ) + if !k8serrors.IsNotFound(err) { + Expect(err).ToNot(HaveOccurred()) + } + + gvrAPIService := schema.GroupVersionResource{ + Group: "apiregistration.k8s.io", + Version: "v1", + Resource: "apiservices", + } + + err = clientManagedDynamic.Resource(gvrAPIService).Delete( + context.TODO(), "v1beta1.pizza.example.com", metav1.DeleteOptions{}, ) if !k8serrors.IsNotFound(err) { Expect(err).ToNot(HaveOccurred()) diff --git a/test/resources/case18_discovery_refresh/bad-apiservice.yaml b/test/resources/case18_discovery_refresh/bad-apiservice.yaml new file mode 100644 index 00000000..a8a4a3cf --- /dev/null +++ b/test/resources/case18_discovery_refresh/bad-apiservice.yaml @@ -0,0 +1,13 @@ +apiVersion: apiregistration.k8s.io/v1 +kind: APIService +metadata: + name: v1beta1.pizza.example.com +spec: + group: pizza.example.com + groupPriorityMinimum: 100 + insecureSkipTLSVerify: true + service: + name: pizza-server + namespace: kube-system + version: v1beta1 + versionPriority: 100