Skip to content

Commit

Permalink
remove health checks controller and use endpoints controller for heal…
Browse files Browse the repository at this point in the history
…th checks (#472)

remove health checks controller and use endpoints controller for health checks.
  • Loading branch information
kschoche authored Apr 8, 2021
1 parent e2eada2 commit 2f144ec
Show file tree
Hide file tree
Showing 18 changed files with 397 additions and 1,613 deletions.
4 changes: 2 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ executors:
- image: docker.mirror.hashicorp.services/circleci/golang:1.14
environment:
TEST_RESULTS: /tmp/test-results # path to where test results are saved
CONSUL_VERSION: 1.9.0-rc1 # Consul's OSS version to use in tests
CONSUL_ENT_VERSION: 1.9.0+ent-rc1 # Consul's enterprise version to use in tests
CONSUL_VERSION: 1.9.4 # Consul's OSS version to use in tests
CONSUL_ENT_VERSION: 1.9.4+ent # Consul's enterprise version to use in tests

jobs:
go-fmt-and-vet:
Expand Down
6 changes: 3 additions & 3 deletions connect-inject/cleanup_resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func (c *CleanupResource) reconcile() {
}

podList, err := c.KubernetesClient.CoreV1().Pods(corev1.NamespaceAll).List(c.Ctx,
metav1.ListOptions{LabelSelector: labelInject})
metav1.ListOptions{LabelSelector: annotationStatus})
if err != nil {
c.Log.Error("unable to get pods", "error", err)
return
Expand Down Expand Up @@ -223,11 +223,11 @@ func (c *CleanupResource) Informer() cache.SharedIndexInformer {
&cache.ListWatch{
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
return c.KubernetesClient.CoreV1().Pods(metav1.NamespaceAll).List(c.Ctx,
metav1.ListOptions{LabelSelector: labelInject})
metav1.ListOptions{LabelSelector: annotationStatus})
},
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
return c.KubernetesClient.CoreV1().Pods(metav1.NamespaceAll).Watch(c.Ctx,
metav1.ListOptions{LabelSelector: labelInject})
metav1.ListOptions{LabelSelector: annotationStatus})
},
},
&corev1.Pod{},
Expand Down
4 changes: 2 additions & 2 deletions connect-inject/cleanup_resource_ent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ var (
Name: "foo-abc123",
Namespace: "default",
Labels: map[string]string{
labelInject: injected,
annotationStatus: injected,
},
Annotations: map[string]string{
annotationStatus: injected,
Expand All @@ -309,7 +309,7 @@ var (
Name: "foo-abc123",
Namespace: "default",
Labels: map[string]string{
labelInject: injected,
annotationStatus: injected,
},
Annotations: map[string]string{
annotationStatus: injected,
Expand Down
2 changes: 1 addition & 1 deletion connect-inject/cleanup_resource_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ var (
Name: "foo-abc123",
Namespace: "default",
Labels: map[string]string{
labelInject: injected,
annotationStatus: injected,
},
Annotations: map[string]string{
annotationStatus: injected,
Expand Down
73 changes: 68 additions & 5 deletions connect-inject/endpoints_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"fmt"
"strings"

mapset "github.com/deckarep/golang-set"
"github.com/deckarep/golang-set"
"github.com/go-logr/logr"
"github.com/hashicorp/consul-k8s/consul"
"github.com/hashicorp/consul/api"
Expand All @@ -25,10 +25,12 @@ import (
)

const (
MetaKeyPodName = "pod-name"
MetaKeyKubeServiceName = "k8s-service-name"
MetaKeyKubeNS = "k8s-namespace"
envoyPrometheusBindAddr = "envoy_prometheus_bind_addr"
MetaKeyPodName = "pod-name"
MetaKeyKubeServiceName = "k8s-service-name"
MetaKeyKubeNS = "k8s-namespace"
kubernetesSuccessReasonMsg = "Kubernetes health checks passing"
podPendingReasonMsg = "Pod is pending"
envoyPrometheusBindAddr = "envoy_prometheus_bind_addr"
)

type EndpointsController struct {
Expand Down Expand Up @@ -135,6 +137,18 @@ func (r *EndpointsController) Reconcile(ctx context.Context, req ctrl.Request) (
r.Log.Error(err, "failed to register proxy service with Consul", "consul-proxy-service-name", proxyServiceRegistration.Name)
return ctrl.Result{}, err
}

// Update the TTL health check for the service.
// This is required because ServiceRegister() does not update the TTL if the service already exists.
r.Log.Info("updating ttl health check", "service", serviceRegistration.Name)
status, reason, err := getReadyStatusAndReason(pod)
if err != nil {
return ctrl.Result{}, err
}
err = client.Agent().UpdateTTL(getConsulHealthCheckID(pod, serviceRegistration.ID), reason, status)
if err != nil {
return ctrl.Result{}, err
}
}
}
}
Expand Down Expand Up @@ -212,13 +226,29 @@ func (r *EndpointsController) createServiceRegistrations(pod corev1.Pod, service
tags = append(tags, strings.Split(raw, ",")...)
}

// We do not set the Notes field with the 'reason' on creation because it does not set the Output field which
// gets read by Consul and you'll end up with both Notes and Output set.
// Notes (reason) will updated by UpdateTTL() as soon as this function returns.
status, _, err := getReadyStatusAndReason(pod)
if err != nil {
return nil, nil, err
}

service := &api.AgentServiceRegistration{
ID: serviceID,
Name: serviceName,
Port: servicePort,
Address: pod.Status.PodIP,
Meta: meta,
Namespace: "", // TODO: namespace support
Check: &api.AgentServiceCheck{
CheckID: getConsulHealthCheckID(pod, serviceID),
Name: "Kubernetes Health Check",
TTL: "100000h",
Status: status,
SuccessBeforePassing: 1,
FailuresBeforeCritical: 1,
},
}
if len(tags) > 0 {
service.Tags = tags
Expand Down Expand Up @@ -292,6 +322,39 @@ func (r *EndpointsController) createServiceRegistrations(pod corev1.Pod, service
return service, proxyService, nil
}

// getConsulHealthCheckID deterministically generates a health check ID that will be unique to the Agent
// where the health check is registered and deregistered.
func getConsulHealthCheckID(pod corev1.Pod, serviceID string) string {
return fmt.Sprintf("%s/%s/kubernetes-health-check", pod.Namespace, serviceID)
}

// getReadyStatusAndReason returns the formatted status string to pass to Consul based on the
// ready state of the pod along with the reason message which will be passed into the Notes
// field of the Consul health check.
func getReadyStatusAndReason(pod corev1.Pod) (string, string, error) {
// A pod might be pending if the init containers have run but the non-init
// containers haven't reached running state. In this case we set a failing health
// check so the pod doesn't receive traffic before it's ready.
if pod.Status.Phase == corev1.PodPending {
return api.HealthCritical, podPendingReasonMsg, nil
}

for _, cond := range pod.Status.Conditions {
var consulStatus, reason string
if cond.Type == corev1.PodReady {
if cond.Status != corev1.ConditionTrue {
consulStatus = api.HealthCritical
reason = cond.Message
} else {
consulStatus = api.HealthPassing
reason = kubernetesSuccessReasonMsg
}
return consulStatus, reason, nil
}
}
return "", "", fmt.Errorf("no ready status for pod: %s", pod.Name)
}

// deregisterServiceOnAllAgents queries all agents for service instances that have the metadata
// "k8s-service-name"=k8sSvcName and "k8s-namespace"=k8sSvcNamespace. The k8s service name may or may not match the
// consul service name, but the k8s service name will always match the metadata on the Consul service
Expand Down
Loading

0 comments on commit 2f144ec

Please sign in to comment.