Skip to content

Commit

Permalink
Allow halting on certificate errors
Browse files Browse the repository at this point in the history
When certificate errors are encountered, the fix is usually to restart
the affected pod. To allow this to happen automatically, add a
configuration setting for the gateway agent; extend the mechanism to
the route agent.

The setting is disabled by default; it will be enabled by default by
the operator.

Signed-off-by: Stephen Kitt <skitt@redhat.com>
  • Loading branch information
skitt committed Oct 23, 2023
1 parent daf50e5 commit 274b08d
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 13 deletions.
9 changes: 7 additions & 2 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,15 +105,20 @@ func main() {

var err error

var logCertificateError = logger.Errorf
if submSpec.HaltOnCertificateError {
logCertificateError = logger.FatalfOnError
}

//nolint:reassign // We need to reassign ErrorHandlers to register our handler
utilruntime.ErrorHandlers = append(utilruntime.ErrorHandlers, func(err error) {
var unknownAuthorityError x509.UnknownAuthorityError
if errors.As(err, &unknownAuthorityError) && lastBadCertificate.Swap(unknownAuthorityError.Cert) != unknownAuthorityError.Cert {
logger.Errorf(err, "Certificate error: %s", resource.ToJSON(err))
logCertificateError(err, "Certificate error: %s", resource.ToJSON(err))
}
var certificateInvalidError x509.CertificateInvalidError
if errors.As(err, &certificateInvalidError) && lastBadCertificate.Swap(certificateInvalidError.Cert) != certificateInvalidError.Cert {
logger.Errorf(err, "Certificate error: %s", resource.ToJSON(err))
logCertificateError(err, "Certificate error: %s", resource.ToJSON(err))
}
// The generic handler has already logged the error, no need to repeat if we don't want extra detail
})
Expand Down
15 changes: 8 additions & 7 deletions pkg/routeagent_driver/environment/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@ limitations under the License.
package environment

type Specification struct {
ClusterID string
Namespace string
ClusterCidr []string
ServiceCidr []string
GlobalCidr []string
Uninstall bool
WaitForNode bool
ClusterID string
Namespace string
ClusterCidr []string
ServiceCidr []string
GlobalCidr []string
Uninstall bool
WaitForNode bool
HaltOnCertificateError bool
}
31 changes: 27 additions & 4 deletions pkg/routeagent_driver/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,21 @@ limitations under the License.
package main

import (
"crypto/x509"
"flag"
"fmt"
"io/fs"
"os"
"strconv"
"sync/atomic"
"time"

"github.com/kelseyhightower/envconfig"
"github.com/pkg/errors"
"github.com/submariner-io/admiral/pkg/log"
"github.com/submariner-io/admiral/pkg/log/kzerolog"
"github.com/submariner-io/admiral/pkg/names"
"github.com/submariner-io/admiral/pkg/resource"
admversion "github.com/submariner-io/admiral/pkg/version"
"github.com/submariner-io/admiral/pkg/watcher"
v1 "github.com/submariner-io/submariner/pkg/apis/submariner.io/v1"
Expand All @@ -50,6 +53,7 @@ import (
"github.com/submariner-io/submariner/pkg/routeagent_driver/handlers/ovn"
"github.com/submariner-io/submariner/pkg/versions"
corev1 "k8s.io/api/core/v1"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
Expand All @@ -60,10 +64,11 @@ import (
)

var (
masterURL string
kubeconfig string
logger = log.Logger{Logger: logf.Log.WithName("main")}
showVersion = false
masterURL string
kubeconfig string
logger = log.Logger{Logger: logf.Log.WithName("main")}
showVersion = false
lastBadCertificate atomic.Value
)

func main() {
Expand Down Expand Up @@ -152,6 +157,24 @@ func main() {
return
}

var logCertificateError = logger.Errorf
if env.HaltOnCertificateError {
logCertificateError = logger.FatalfOnError
}

//nolint:reassign // We need to reassign ErrorHandlers to register our handler
utilruntime.ErrorHandlers = append(utilruntime.ErrorHandlers, func(err error) {
var unknownAuthorityError x509.UnknownAuthorityError
if errors.As(err, &unknownAuthorityError) && lastBadCertificate.Swap(unknownAuthorityError.Cert) != unknownAuthorityError.Cert {
logCertificateError(err, "Certificate error: %s", resource.ToJSON(err))
}
var certificateInvalidError x509.CertificateInvalidError
if errors.As(err, &certificateInvalidError) && lastBadCertificate.Swap(certificateInvalidError.Cert) != certificateInvalidError.Cert {
logCertificateError(err, "Certificate error: %s", resource.ToJSON(err))
}
// The generic handler has already logged the error, no need to repeat if we don't want extra detail
})

if err = annotateNode(env.ClusterCidr, k8sClientSet); err != nil {
logger.Errorf(err, "Error while annotating the node")
}
Expand Down
1 change: 1 addition & 0 deletions pkg/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ type SubmarinerSpecification struct {
NATEnabled bool
HealthCheckEnabled bool `default:"true"`
Uninstall bool
HaltOnCertificateError bool
HealthCheckInterval uint
HealthCheckMaxPacketLossCount uint
MetricsPort string `default:"32780"`
Expand Down

0 comments on commit 274b08d

Please sign in to comment.