diff --git a/pkg/security/certificate_manager.go b/pkg/security/certificate_manager.go index 1453113b6a12..5d13dd6beecb 100644 --- a/pkg/security/certificate_manager.go +++ b/pkg/security/certificate_manager.go @@ -51,7 +51,7 @@ type CertificateManager struct { // The metrics struct is initialized at init time and metrics do their // own locking. - certMetrics Metrics + certMetrics *Metrics // Client cert expiration cache. clientCertExpirationCache *ClientCertExpirationCache @@ -94,12 +94,14 @@ func makeCertificateManager( fn(&o) } - return &CertificateManager{ + cm := &CertificateManager{ Locator: certnames.MakeLocator(certsDir), tenantIdentifier: o.tenantIdentifier, timeSource: o.timeSource, tlsSettings: tlsSettings, } + cm.certMetrics = createMetricsLocked(cm) + return cm } type cmOptions struct { @@ -161,7 +163,7 @@ func (cm *CertificateManager) IsForTenant() bool { } // Metrics returns the metrics struct. -func (cm *CertificateManager) Metrics() Metrics { +func (cm *CertificateManager) Metrics() *Metrics { return cm.certMetrics } @@ -405,7 +407,6 @@ func (cm *CertificateManager) LoadCertificates() error { cm.tenantCert = tenantCert cm.tenantSigningCert = tenantSigningCert - cm.certMetrics = cm.createMetricsLocked() return nil } diff --git a/pkg/security/certificate_metrics.go b/pkg/security/certificate_metrics.go index e11fb291469c..ea707d379cdd 100644 --- a/pkg/security/certificate_metrics.go +++ b/pkg/security/certificate_metrics.go @@ -165,8 +165,15 @@ var ( } ) -func expirationGauge(metadata metric.Metadata, ci *CertInfo) *metric.Gauge { +// certClosure defines a way to expose a certificate to the below metric types. +// Closures are used to expose certificates instead of direct references, +// because the references can become outdated if the system loads new +// certificates. +type certClosure func() *CertInfo + +func expirationGauge(metadata metric.Metadata, certFunc certClosure) *metric.Gauge { return metric.NewFunctionalGauge(metadata, func() int64 { + ci := certFunc() if ci != nil && ci.Error == nil { return ci.ExpirationTime.Unix() } else { @@ -174,8 +181,12 @@ func expirationGauge(metadata metric.Metadata, ci *CertInfo) *metric.Gauge { } }) } -func ttlGauge(metadata metric.Metadata, ci *CertInfo, ts timeutil.TimeSource) *metric.Gauge { + +func ttlGauge( + metadata metric.Metadata, certFunc certClosure, ts timeutil.TimeSource, +) *metric.Gauge { return metric.NewFunctionalGauge(metadata, func() int64 { + ci := certFunc() if ci != nil && ci.Error == nil { expiry := ci.ExpirationTime.Unix() sec := timeutil.Unix(expiry, 0).Sub(ts.Now()).Seconds() @@ -195,31 +206,31 @@ var defaultTimeSource = timeutil.DefaultTimeSource{} // If the corresponding certificate is missing or invalid (Error != nil), we reset the // metric to zero. // cm.mu must be held to protect the certificates. Metrics do their own atomicity. -func (cm *CertificateManager) createMetricsLocked() Metrics { +func createMetricsLocked(cm *CertificateManager) *Metrics { ts := cm.timeSource if ts == nil { ts = defaultTimeSource } b := aggmetric.MakeBuilder(SQLUserLabel) - return Metrics{ - CAExpiration: expirationGauge(metaCAExpiration, cm.caCert), - TenantExpiration: expirationGauge(metaTenantExpiration, cm.tenantCert), - TenantCAExpiration: expirationGauge(metaTenantCAExpiration, cm.tenantCACert), - UIExpiration: expirationGauge(metaUIExpiration, cm.uiCert), - UICAExpiration: expirationGauge(metaUICAExpiration, cm.uiCACert), + return &Metrics{ + CAExpiration: expirationGauge(metaCAExpiration, cm.CACert), + TenantExpiration: expirationGauge(metaTenantExpiration, func() *CertInfo { return cm.tenantCert }), + TenantCAExpiration: expirationGauge(metaTenantCAExpiration, func() *CertInfo { return cm.tenantCACert }), + UIExpiration: expirationGauge(metaUIExpiration, cm.UICert), + UICAExpiration: expirationGauge(metaUICAExpiration, cm.UICACert), ClientExpiration: b.Gauge(metaClientExpiration), - ClientCAExpiration: expirationGauge(metaClientCAExpiration, cm.clientCACert), - NodeExpiration: expirationGauge(metaNodeExpiration, cm.nodeCert), - NodeClientExpiration: expirationGauge(metaNodeClientExpiration, cm.nodeClientCert), - - CATTL: ttlGauge(metaCATTL, cm.caCert, ts), - TenantTTL: ttlGauge(metaTenantTTL, cm.tenantCert, ts), - TenantCATTL: ttlGauge(metaTenantCATTL, cm.tenantCACert, ts), - UITTL: ttlGauge(metaUITTL, cm.uiCert, ts), - UICATTL: ttlGauge(metaUICATTL, cm.uiCACert, ts), + ClientCAExpiration: expirationGauge(metaClientCAExpiration, cm.ClientCACert), + NodeExpiration: expirationGauge(metaNodeExpiration, cm.NodeCert), + NodeClientExpiration: expirationGauge(metaNodeClientExpiration, func() *CertInfo { return cm.nodeClientCert }), + + CATTL: ttlGauge(metaCATTL, cm.CACert, ts), + TenantTTL: ttlGauge(metaTenantTTL, func() *CertInfo { return cm.tenantCert }, ts), + TenantCATTL: ttlGauge(metaTenantCATTL, func() *CertInfo { return cm.tenantCACert }, ts), + UITTL: ttlGauge(metaUITTL, cm.UICert, ts), + UICATTL: ttlGauge(metaUICATTL, cm.UICACert, ts), ClientTTL: b.Gauge(metaClientTTL), - ClientCATTL: ttlGauge(metaClientCATTL, cm.clientCACert, ts), - NodeTTL: ttlGauge(metaNodeTTL, cm.nodeCert, ts), - NodeClientTTL: ttlGauge(metaNodeClientTTL, cm.nodeClientCert, ts), + ClientCATTL: ttlGauge(metaClientCATTL, cm.ClientCACert, ts), + NodeTTL: ttlGauge(metaNodeTTL, cm.NodeCert, ts), + NodeClientTTL: ttlGauge(metaNodeClientTTL, func() *CertInfo { return cm.nodeClientCert }, ts), } } diff --git a/pkg/security/certificate_metrics_test.go b/pkg/security/certificate_metrics_test.go index 21a6ff3c4e0e..6880db1fe20d 100644 --- a/pkg/security/certificate_metrics_test.go +++ b/pkg/security/certificate_metrics_test.go @@ -14,24 +14,11 @@ import ( "github.com/cockroachdb/cockroach/pkg/security/securityassets" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/stretchr/testify/require" ) -// TestMetricsValues asserts that with the appropriate certificates on disk, -// the correct expiration and ttl values are set on the metrics vars that are -// exposed to our collectors. It uses a single key pair to approximate the -// behavior across other key pairs. -func TestMetricsValues(t *testing.T) { - defer leaktest.AfterTest(t)() - - // required to read certs from disk in tests - securityassets.ResetLoader() - defer ResetTest() - - // now is unix 1, each expiration is +1 after that. - // this means an expiration is 1 + cert offset, and ttl is expiration - 1. - now := timeutil.Unix(1, 0) +func writeTestCerts(t *testing.T, certsDir string) { - certsDir := t.TempDir() for offset, certName := range []string{ "ca", "ca-client", @@ -55,6 +42,25 @@ func TestMetricsValues(t *testing.T) { t.Fatalf("#%d: could not write file %s: %v", offset, keyFile, err) } } +} + +// TestMetricsValues asserts that with the appropriate certificates on disk, +// the correct expiration and ttl values are set on the metrics vars that are +// exposed to our collectors. It uses a single key pair to approximate the +// behavior across other key pairs. +func TestMetricsValues(t *testing.T) { + defer leaktest.AfterTest(t)() + + // required to read certs from disk in tests + securityassets.ResetLoader() + defer ResetTest() + + // now is unix 1, each expiration is +1 after that. + // this means an expiration is 1 + cert offset, and ttl is expiration - 1. + now := timeutil.Unix(1, 0) + + certsDir := t.TempDir() + writeTestCerts(t, certsDir) cm, err := security.NewCertificateManager( certsDir, @@ -97,3 +103,57 @@ func TestMetricsValues(t *testing.T) { } } + +// TestCertificateReload verifies that when the certificate manager's +// underlying ceritificates change, the original metrics references (which are +// the ones registered) also reflect the TTLs on the new certificates. +func TestCertificateReload(t *testing.T) { + defer leaktest.AfterTest(t)() + + // required to read certs from disk in tests + securityassets.ResetLoader() + defer ResetTest() + + // now is unix 1, each expiration is +1 after that. + // this means an expiration is 1 + cert offset, and ttl is expiration - 1. + now := timeutil.Unix(1, 0) + + certsDir := t.TempDir() + // writeTestCerts writes the ca certificate with an expiration of 2. + writeTestCerts(t, certsDir) + + cm, err := security.NewCertificateManager( + certsDir, + security.CommandTLSSettings{}, + security.WithTimeSource(timeutil.NewManualTime(now)), + security.ForTenant(1), + ) + if err != nil { + t.Error(err) + } + + caCertExpiration := cm.Metrics().CAExpiration + caCertTTL := cm.Metrics().CATTL + + // Validate the starting values, with an expiration of 2 and a now of 1, + // expiration = 2, ttl = 1. + require.Equal(t, 2, int(caCertExpiration.Value())) + require.Equal(t, 1, int(caCertTTL.Value())) + + // overwrite the ca certificate with an expiration of 1000. + certFile := "ca.crt" + expiration := 1000 + _, certBytes := makeTestCert(t, "", 0, nil, timeutil.Unix(int64(expiration), 0)) + if err := os.WriteFile(filepath.Join(certsDir, certFile), certBytes, 0700); err != nil { + t.Fatalf("could not write file %s: %v", certFile, err) + } + + // reload certificates after new one is written. + err = cm.LoadCertificates() + require.NoError(t, err) + + // Validate the values after reload, with an expiration of 1000 and a now of 1, + // expiration = 1000, ttl = 999. + require.Equal(t, 1000, int(caCertExpiration.Value())) + require.Equal(t, 999, int(caCertTTL.Value())) +}