diff --git a/changelog/29091.txt b/changelog/29091.txt new file mode 100644 index 000000000000..409d8ddf4a67 --- /dev/null +++ b/changelog/29091.txt @@ -0,0 +1,3 @@ +```release-note:bug +core/metrics: Fix unlocked mounts read for usage reporting. +``` diff --git a/vault/core.go b/vault/core.go index bad9a45e96eb..d9e7ad62de50 100644 --- a/vault/core.go +++ b/vault/core.go @@ -2898,14 +2898,17 @@ func (c *Core) preSeal() error { if err := c.teardownAudits(); err != nil { result = multierror.Append(result, fmt.Errorf("error tearing down audits: %w", err)) } - if err := c.stopExpiration(); err != nil { - result = multierror.Append(result, fmt.Errorf("error stopping expiration: %w", err)) - } + // Ensure that the ActivityLog and CensusManager are both completely torn + // down before stopping the ExpirationManager. This ordering is critical, + // due to a tight coupling between the ActivityLog, CensusManager, and + // ExpirationManager for product usage reporting. c.stopActivityLog() - // Clean up census on seal if err := c.teardownCensusManager(); err != nil { result = multierror.Append(result, fmt.Errorf("error tearing down reporting agent: %w", err)) } + if err := c.stopExpiration(); err != nil { + result = multierror.Append(result, fmt.Errorf("error stopping expiration: %w", err)) + } if err := c.teardownCredentials(context.Background()); err != nil { result = multierror.Append(result, fmt.Errorf("error tearing down credentials: %w", err)) } diff --git a/vault/core_metrics.go b/vault/core_metrics.go index 0ca9a0e09432..60f0295d144f 100644 --- a/vault/core_metrics.go +++ b/vault/core_metrics.go @@ -540,22 +540,15 @@ func getMeanNamespaceSecrets(mapOfNamespacesToSecrets map[string]int) int { func (c *Core) GetSecretEngineUsageMetrics() map[string]int { mounts := make(map[string]int) - c.authLock.RLock() - defer c.authLock.RUnlock() - - // we don't grab the statelock, so this code might run during or after the seal process. - // Therefore, we need to check if c.auth is nil. If we do not, this will panic when - // run after seal. - if c.auth == nil { - return mounts - } + c.mountsLock.RLock() + defer c.mountsLock.RUnlock() for _, entry := range c.mounts.Entries { - authType := entry.Type - if _, ok := mounts[authType]; !ok { - mounts[authType] = 1 + mountType := entry.Type + if _, ok := mounts[mountType]; !ok { + mounts[mountType] = 1 } else { - mounts[authType] += 1 + mounts[mountType] += 1 } } return mounts @@ -568,13 +561,6 @@ func (c *Core) GetAuthMethodUsageMetrics() map[string]int { c.authLock.RLock() defer c.authLock.RUnlock() - // we don't grab the statelock, so this code might run during or after the seal process. - // Therefore, we need to check if c.auth is nil. If we do not, this will panic when - // run after seal. - if c.auth == nil { - return mounts - } - for _, entry := range c.auth.Entries { authType := entry.Type if _, ok := mounts[authType]; !ok { diff --git a/vault/expiration.go b/vault/expiration.go index c2f652020ce2..37cb26c559f3 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -435,8 +435,11 @@ func (c *Core) setupExpiration(e ExpireLeaseStrategy) error { return nil } -// stopExpiration is used to stop the expiration manager before -// sealing the Vault. +// stopExpiration is used to stop the expiration manager before sealing Vault. +// This *must* be called after shutting down the ActivityLog and +// CensusManager to prevent Core's expirationManager reference from +// changing while being accessed by product usage reporting. This is +// an unfortunate side-effect of tight coupling between ActivityLog and Core. func (c *Core) stopExpiration() error { if c.expiration != nil { if err := c.expiration.Stop(); err != nil { diff --git a/vault/expiration_test.go b/vault/expiration_test.go index 9e0da07bdb5e..2ef631562596 100644 --- a/vault/expiration_test.go +++ b/vault/expiration_test.go @@ -855,10 +855,7 @@ func TestExpiration_Restore(t *testing.T) { } // Stop everything - err = c.stopExpiration() - if err != nil { - t.Fatalf("err: %v", err) - } + stopExpiration(t, c) if exp.leaseCount != 0 { t.Fatalf("expected %v leases, got %v", 0, exp.leaseCount) @@ -3008,6 +3005,23 @@ func registerOneLease(t *testing.T, ctx context.Context, exp *ExpirationManager) return leaseID } +// stopExpiration is a test helper which allows us to safely teardown the +// expiration manager. This preserves the shutdown order of Core for these few +// outlier tests that (previously) directly called [Core].stopExpiration(). +func stopExpiration(t *testing.T, core *Core) { + t.Helper() + core.stopActivityLog() + err := core.teardownCensusManager() + if err != nil { + t.Fatalf("error stopping census manager: %v", err) + } + + err = core.stopExpiration() + if err != nil { + t.Fatalf("error stopping expiration manager: %v", err) + } +} + func TestExpiration_MarkIrrevocable(t *testing.T) { c, _, _ := TestCoreUnsealed(t) exp := c.expiration @@ -3060,10 +3074,7 @@ func TestExpiration_MarkIrrevocable(t *testing.T) { } // stop and restore to verify that irrevocable leases are properly loaded from storage - err = c.stopExpiration() - if err != nil { - t.Fatalf("error stopping expiration manager: %v", err) - } + stopExpiration(t, c) err = exp.Restore(nil) if err != nil { @@ -3153,10 +3164,7 @@ func TestExpiration_StopClearsIrrevocableCache(t *testing.T) { exp.markLeaseIrrevocable(ctx, le, fmt.Errorf("test irrevocable error")) exp.pendingLock.Unlock() - err = c.stopExpiration() - if err != nil { - t.Fatalf("error stopping expiration manager: %v", err) - } + stopExpiration(t, c) if _, ok := exp.irrevocable.Load(leaseID); ok { t.Error("expiration manager irrevocable cache should be cleared on stop") diff --git a/vault/token_store_test.go b/vault/token_store_test.go index 8223108e9876..6d58616966f9 100644 --- a/vault/token_store_test.go +++ b/vault/token_store_test.go @@ -1170,10 +1170,7 @@ func TestTokenStore_CreateLookup_ExpirationInRestoreMode(t *testing.T) { t.Fatalf("err: %v", err) } - err = c.stopExpiration() - if err != nil { - t.Fatal(err) - } + stopExpiration(t, c) // Reset expiration manager to restore mode ts.expiration.restoreModeLock.Lock()