diff --git a/.changelog/2571.txt b/.changelog/2571.txt new file mode 100644 index 0000000000..91b3f2943b --- /dev/null +++ b/.changelog/2571.txt @@ -0,0 +1,3 @@ +```release-note:bug +control-plane: fix bug in endpoints controller when deregistering services from consul when a node is deleted. +``` diff --git a/control-plane/connect-inject/controllers/endpoints/endpoints_controller.go b/control-plane/connect-inject/controllers/endpoints/endpoints_controller.go index 7b236792ab..cdf56b187f 100644 --- a/control-plane/connect-inject/controllers/endpoints/endpoints_controller.go +++ b/control-plane/connect-inject/controllers/endpoints/endpoints_controller.go @@ -159,7 +159,6 @@ func (r *Controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu } err = r.Client.Get(ctx, req.NamespacedName, &serviceEndpoints) - // endpointPods holds a set of all pods this endpoints object is currently pointing to. // We use this later when we reconcile ACL tokens to decide whether an ACL token in Consul // is for a pod that no longer exists. @@ -183,7 +182,7 @@ func (r *Controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu // It is possible that the endpoints object has never been registered, in which case deregistration is a no-op. if isLabeledIgnore(serviceEndpoints.Labels) { // We always deregister the service to handle the case where a user has registered the service, then added the label later. - r.Log.Info("Ignoring endpoint labeled with `consul.hashicorp.com/service-ignore: \"true\"`", "name", req.Name, "namespace", req.Namespace) + r.Log.Info("ignoring endpoint labeled with `consul.hashicorp.com/service-ignore: \"true\"`", "name", req.Name, "namespace", req.Namespace) err = r.deregisterService(apiClient, req.Name, req.Namespace, nil) return ctrl.Result{}, err } @@ -915,14 +914,14 @@ func getHealthCheckStatusReason(healthCheckStatus, podName, podNamespace string) // them only if they are not in endpointsAddressesMap. If the map is nil, it will deregister all instances. If the map // has addresses, it will only deregister instances not in the map. func (r *Controller) deregisterService(apiClient *api.Client, k8sSvcName, k8sSvcNamespace string, endpointsAddressesMap map[string]bool) error { - // Get services matching metadata. - nodesWithSvcs, err := r.serviceInstancesForK8sNodes(apiClient, k8sSvcName, k8sSvcNamespace) + // Get services matching metadata from Consul + nodesWithSvcs, err := r.serviceInstancesForNodes(apiClient, k8sSvcName, k8sSvcNamespace) if err != nil { r.Log.Error(err, "failed to get service instances", "name", k8sSvcName) return err } - // Deregister each service instance that matches the metadata. + var errs error for _, nodeSvcs := range nodesWithSvcs { for _, svc := range nodeSvcs.Services { // We need to get services matching "k8s-service-name" and "k8s-namespace" metadata. @@ -933,42 +932,48 @@ func (r *Controller) deregisterService(apiClient *api.Client, k8sSvcName, k8sSvc if _, ok := endpointsAddressesMap[svc.Address]; !ok { // If the service address is not in the Endpoints addresses, deregister it. r.Log.Info("deregistering service from consul", "svc", svc.ID) - _, err = apiClient.Catalog().Deregister(&api.CatalogDeregistration{ + _, err := apiClient.Catalog().Deregister(&api.CatalogDeregistration{ Node: nodeSvcs.Node.Node, ServiceID: svc.ID, Namespace: svc.Namespace, }, nil) if err != nil { + // Do not exit right away as there might be other services that need to be deregistered. r.Log.Error(err, "failed to deregister service instance", "id", svc.ID) - return err + errs = multierror.Append(errs, err) + } else { + serviceDeregistered = true } - serviceDeregistered = true } } else { r.Log.Info("deregistering service from consul", "svc", svc.ID) - if _, err = apiClient.Catalog().Deregister(&api.CatalogDeregistration{ + _, err := apiClient.Catalog().Deregister(&api.CatalogDeregistration{ Node: nodeSvcs.Node.Node, ServiceID: svc.ID, Namespace: svc.Namespace, - }, nil); err != nil { + }, nil) + if err != nil { + // Do not exit right away as there might be other services that need to be deregistered. r.Log.Error(err, "failed to deregister service instance", "id", svc.ID) - return err + errs = multierror.Append(errs, err) + } else { + serviceDeregistered = true } - serviceDeregistered = true } if r.AuthMethod != "" && serviceDeregistered { r.Log.Info("reconciling ACL tokens for service", "svc", svc.Service) - err = r.deleteACLTokensForServiceInstance(apiClient, svc, k8sSvcNamespace, svc.Meta[constants.MetaKeyPodName]) + err := r.deleteACLTokensForServiceInstance(apiClient, svc, k8sSvcNamespace, svc.Meta[constants.MetaKeyPodName]) if err != nil { r.Log.Error(err, "failed to reconcile ACL tokens for service", "svc", svc.Service) - return err + errs = multierror.Append(errs, err) } } } } - return nil + return errs + } // deleteACLTokensForServiceInstance finds the ACL tokens that belongs to the service instance and deletes it from Consul. @@ -1088,21 +1093,32 @@ func getTokenMetaFromDescription(description string) (map[string]string, error) return tokenMeta, nil } -func (r *Controller) serviceInstancesForK8sNodes(apiClient *api.Client, k8sServiceName, k8sServiceNamespace string) ([]*api.CatalogNodeServiceList, error) { +func (r *Controller) serviceInstancesForNodes(apiClient *api.Client, k8sServiceName, k8sServiceNamespace string) ([]*api.CatalogNodeServiceList, error) { var serviceList []*api.CatalogNodeServiceList - // Get a list of k8s nodes. - var nodeList corev1.NodeList - err := r.Client.List(r.Context, &nodeList) + + // The nodelist may have changed between this point and when the event was raised + // For example, if a pod is evicted because a node has been deleted, there is no guarantee that that node will show up here + // query consul catalog for a list of nodes supporting this service + // quite a lot of results as synthetic nodes are never deregistered. + var nodes []*api.Node + filter := fmt.Sprintf(`Meta[%q] == %q `, "synthetic-node", "true") + nodes, _, err := apiClient.Catalog().Nodes(&api.QueryOptions{Filter: filter, Namespace: namespaces.WildcardNamespace}) if err != nil { return nil, err } - for _, node := range nodeList.Items { + + var errs error + for _, node := range nodes { var nodeServices *api.CatalogNodeServiceList - nodeServices, err = r.serviceInstancesForK8SServiceNameAndNamespace(apiClient, k8sServiceName, k8sServiceNamespace, common.ConsulNodeNameFromK8sNode(node.Name)) - serviceList = append(serviceList, nodeServices) + nodeServices, err := r.serviceInstancesForK8SServiceNameAndNamespace(apiClient, k8sServiceName, k8sServiceNamespace, node.Node) + if err != nil { + errs = multierror.Append(errs, err) + } else { + serviceList = append(serviceList, nodeServices) + } } - return serviceList, err + return serviceList, errs } // serviceInstancesForK8SServiceNameAndNamespace calls Consul's ServicesWithFilter to get the list diff --git a/control-plane/connect-inject/controllers/endpoints/endpoints_controller_test.go b/control-plane/connect-inject/controllers/endpoints/endpoints_controller_test.go index ea1ce686d6..477be49e9f 100644 --- a/control-plane/connect-inject/controllers/endpoints/endpoints_controller_test.go +++ b/control-plane/connect-inject/controllers/endpoints/endpoints_controller_test.go @@ -893,6 +893,9 @@ func TestReconcileCreateEndpoint_MultiportService(t *testing.T) { catalogRegistration := &api.CatalogRegistration{ Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: svc, } _, err := consulClient.Catalog().Register(catalogRegistration, nil) @@ -2339,6 +2342,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-service-updated", Service: "service-updated", @@ -2358,6 +2364,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-service-updated-sidecar-proxy", @@ -2444,6 +2453,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-service-updated", Service: "service-updated", @@ -2463,6 +2475,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: "127.0.0.1", + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-service-updated-sidecar-proxy", @@ -2549,6 +2564,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-service-updated", Service: "service-updated", @@ -2566,6 +2584,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-service-updated-sidecar-proxy", @@ -2631,6 +2652,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-different-consul-svc-name", Service: "different-consul-svc-name", @@ -2648,6 +2672,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-different-consul-svc-name-sidecar-proxy", @@ -2721,6 +2748,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-service-updated", Service: "service-updated", @@ -2732,6 +2762,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-service-updated-sidecar-proxy", @@ -2835,6 +2868,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-service-updated", Service: "service-updated", @@ -2846,6 +2882,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-service-updated-sidecar-proxy", @@ -2862,6 +2901,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod2-service-updated", Service: "service-updated", @@ -2873,6 +2915,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod2-service-updated-sidecar-proxy", @@ -2932,6 +2977,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-different-consul-svc-name", Service: "different-consul-svc-name", @@ -2943,6 +2991,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-different-consul-svc-name-sidecar-proxy", @@ -2959,6 +3010,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod2-different-consul-svc-name", Service: "different-consul-svc-name", @@ -2970,6 +3024,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod2-different-consul-svc-name-sidecar-proxy", @@ -3015,6 +3072,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-service-updated", Service: "service-updated", @@ -3026,6 +3086,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-service-updated-sidecar-proxy", @@ -3042,6 +3105,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod2-service-updated", Service: "service-updated", @@ -3053,6 +3119,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod2-service-updated-sidecar-proxy", @@ -3088,6 +3157,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-different-consul-svc-name", Service: "different-consul-svc-name", @@ -3099,6 +3171,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-different-consul-svc-name-sidecar-proxy", @@ -3115,6 +3190,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod2-different-consul-svc-name", Service: "different-consul-svc-name", @@ -3126,6 +3204,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod2-different-consul-svc-name-sidecar-proxy", @@ -3174,6 +3255,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-service-updated", Service: "service-updated", @@ -3191,6 +3275,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-service-updated-sidecar-proxy", @@ -3270,6 +3357,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-service-updated", Service: "service-updated", @@ -3287,6 +3377,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-service-updated-sidecar-proxy", @@ -3309,6 +3402,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod2-service-updated", Service: "service-updated", @@ -3326,6 +3422,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod2-service-updated-sidecar-proxy", @@ -3409,6 +3508,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-service-updated", Service: "service-updated", @@ -3426,6 +3528,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-service-updated-sidecar-proxy", @@ -4116,6 +4221,9 @@ func TestReconcileDeleteEndpoint(t *testing.T) { serviceRegistration := &api.CatalogRegistration{ Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: svc, } _, err := consulClient.Catalog().Register(serviceRegistration, nil) @@ -4262,6 +4370,9 @@ func TestReconcileIgnoresServiceIgnoreLabel(t *testing.T) { serviceRegistration := &api.CatalogRegistration{ Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-" + svcName, Service: svcName, @@ -4385,6 +4496,9 @@ func TestReconcile_podSpecifiesExplicitService(t *testing.T) { _, err := consulClient.Catalog().Register(&api.CatalogRegistration{ Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-" + svcName, Service: svcName, @@ -4542,6 +4656,9 @@ func TestServiceInstancesForK8SServiceNameAndNamespace(t *testing.T) { catalogRegistration := &api.CatalogRegistration{ Node: consulNodeName, Address: "127.0.0.1", + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: svc, } _, err = consulClient.Catalog().Register(catalogRegistration, nil)