From 274b08dd8b7df709e979848741906007deb08608 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Mon, 23 Oct 2023 17:00:03 +0200 Subject: [PATCH] Allow halting on certificate errors When certificate errors are encountered, the fix is usually to restart the affected pod. To allow this to happen automatically, add a configuration setting for the gateway agent; extend the mechanism to the route agent. The setting is disabled by default; it will be enabled by default by the operator. Signed-off-by: Stephen Kitt --- main.go | 9 +++++-- pkg/routeagent_driver/environment/env.go | 15 ++++++------ pkg/routeagent_driver/main.go | 31 +++++++++++++++++++++--- pkg/types/types.go | 1 + 4 files changed, 43 insertions(+), 13 deletions(-) diff --git a/main.go b/main.go index fb98167171..720d80230d 100644 --- a/main.go +++ b/main.go @@ -105,15 +105,20 @@ func main() { var err error + var logCertificateError = logger.Errorf + if submSpec.HaltOnCertificateError { + logCertificateError = logger.FatalfOnError + } + //nolint:reassign // We need to reassign ErrorHandlers to register our handler utilruntime.ErrorHandlers = append(utilruntime.ErrorHandlers, func(err error) { var unknownAuthorityError x509.UnknownAuthorityError if errors.As(err, &unknownAuthorityError) && lastBadCertificate.Swap(unknownAuthorityError.Cert) != unknownAuthorityError.Cert { - logger.Errorf(err, "Certificate error: %s", resource.ToJSON(err)) + logCertificateError(err, "Certificate error: %s", resource.ToJSON(err)) } var certificateInvalidError x509.CertificateInvalidError if errors.As(err, &certificateInvalidError) && lastBadCertificate.Swap(certificateInvalidError.Cert) != certificateInvalidError.Cert { - logger.Errorf(err, "Certificate error: %s", resource.ToJSON(err)) + logCertificateError(err, "Certificate error: %s", resource.ToJSON(err)) } // The generic handler has already logged the error, no need to repeat if we don't want extra detail }) diff --git a/pkg/routeagent_driver/environment/env.go b/pkg/routeagent_driver/environment/env.go index a84ff8a211..cac4dc01fe 100644 --- a/pkg/routeagent_driver/environment/env.go +++ b/pkg/routeagent_driver/environment/env.go @@ -19,11 +19,12 @@ limitations under the License. package environment type Specification struct { - ClusterID string - Namespace string - ClusterCidr []string - ServiceCidr []string - GlobalCidr []string - Uninstall bool - WaitForNode bool + ClusterID string + Namespace string + ClusterCidr []string + ServiceCidr []string + GlobalCidr []string + Uninstall bool + WaitForNode bool + HaltOnCertificateError bool } diff --git a/pkg/routeagent_driver/main.go b/pkg/routeagent_driver/main.go index 79582095b7..38214386c1 100644 --- a/pkg/routeagent_driver/main.go +++ b/pkg/routeagent_driver/main.go @@ -19,11 +19,13 @@ limitations under the License. package main import ( + "crypto/x509" "flag" "fmt" "io/fs" "os" "strconv" + "sync/atomic" "time" "github.com/kelseyhightower/envconfig" @@ -31,6 +33,7 @@ import ( "github.com/submariner-io/admiral/pkg/log" "github.com/submariner-io/admiral/pkg/log/kzerolog" "github.com/submariner-io/admiral/pkg/names" + "github.com/submariner-io/admiral/pkg/resource" admversion "github.com/submariner-io/admiral/pkg/version" "github.com/submariner-io/admiral/pkg/watcher" v1 "github.com/submariner-io/submariner/pkg/apis/submariner.io/v1" @@ -50,6 +53,7 @@ import ( "github.com/submariner-io/submariner/pkg/routeagent_driver/handlers/ovn" "github.com/submariner-io/submariner/pkg/versions" corev1 "k8s.io/api/core/v1" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" @@ -60,10 +64,11 @@ import ( ) var ( - masterURL string - kubeconfig string - logger = log.Logger{Logger: logf.Log.WithName("main")} - showVersion = false + masterURL string + kubeconfig string + logger = log.Logger{Logger: logf.Log.WithName("main")} + showVersion = false + lastBadCertificate atomic.Value ) func main() { @@ -152,6 +157,24 @@ func main() { return } + var logCertificateError = logger.Errorf + if env.HaltOnCertificateError { + logCertificateError = logger.FatalfOnError + } + + //nolint:reassign // We need to reassign ErrorHandlers to register our handler + utilruntime.ErrorHandlers = append(utilruntime.ErrorHandlers, func(err error) { + var unknownAuthorityError x509.UnknownAuthorityError + if errors.As(err, &unknownAuthorityError) && lastBadCertificate.Swap(unknownAuthorityError.Cert) != unknownAuthorityError.Cert { + logCertificateError(err, "Certificate error: %s", resource.ToJSON(err)) + } + var certificateInvalidError x509.CertificateInvalidError + if errors.As(err, &certificateInvalidError) && lastBadCertificate.Swap(certificateInvalidError.Cert) != certificateInvalidError.Cert { + logCertificateError(err, "Certificate error: %s", resource.ToJSON(err)) + } + // The generic handler has already logged the error, no need to repeat if we don't want extra detail + }) + if err = annotateNode(env.ClusterCidr, k8sClientSet); err != nil { logger.Errorf(err, "Error while annotating the node") } diff --git a/pkg/types/types.go b/pkg/types/types.go index dbd59bd301..fb402869b8 100644 --- a/pkg/types/types.go +++ b/pkg/types/types.go @@ -45,6 +45,7 @@ type SubmarinerSpecification struct { NATEnabled bool HealthCheckEnabled bool `default:"true"` Uninstall bool + HaltOnCertificateError bool HealthCheckInterval uint HealthCheckMaxPacketLossCount uint MetricsPort string `default:"32780"`