Skip to content

Commit a67e062

Browse files
authored
Fix unlocked mounts read (#29091)
This PR fixes copy-paste error in the product usage code where we were taking out the authLock to access the mount table. While we're add it we can remove the existing lock grabbing in the product usage goroutine in favor of a serialized startup/teardown of censusManager and its core dependency which requires the lock. This requires some minor test edits, so created a test helper for that. By moving the censusManager teardown before expirationManager teardown, we can effectively ensure the goroutine is completely stopped outside of any expirationManager change. We are already guaranteed serial startup, so this should free us of any complex lock semantics.
1 parent 73bf3eb commit a67e062

File tree

6 files changed

+42
-42
lines changed

6 files changed

+42
-42
lines changed

changelog/29091.txt

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
```release-note:bug
2+
core/metrics: Fix unlocked mounts read for usage reporting.
3+
```

vault/core.go

+7-4
Original file line numberDiff line numberDiff line change
@@ -2898,14 +2898,17 @@ func (c *Core) preSeal() error {
28982898
if err := c.teardownAudits(); err != nil {
28992899
result = multierror.Append(result, fmt.Errorf("error tearing down audits: %w", err))
29002900
}
2901-
if err := c.stopExpiration(); err != nil {
2902-
result = multierror.Append(result, fmt.Errorf("error stopping expiration: %w", err))
2903-
}
2901+
// Ensure that the ActivityLog and CensusManager are both completely torn
2902+
// down before stopping the ExpirationManager. This ordering is critical,
2903+
// due to a tight coupling between the ActivityLog, CensusManager, and
2904+
// ExpirationManager for product usage reporting.
29042905
c.stopActivityLog()
2905-
// Clean up census on seal
29062906
if err := c.teardownCensusManager(); err != nil {
29072907
result = multierror.Append(result, fmt.Errorf("error tearing down reporting agent: %w", err))
29082908
}
2909+
if err := c.stopExpiration(); err != nil {
2910+
result = multierror.Append(result, fmt.Errorf("error stopping expiration: %w", err))
2911+
}
29092912
if err := c.teardownCredentials(context.Background()); err != nil {
29102913
result = multierror.Append(result, fmt.Errorf("error tearing down credentials: %w", err))
29112914
}

vault/core_metrics.go

+6-20
Original file line numberDiff line numberDiff line change
@@ -540,22 +540,15 @@ func getMeanNamespaceSecrets(mapOfNamespacesToSecrets map[string]int) int {
540540
func (c *Core) GetSecretEngineUsageMetrics() map[string]int {
541541
mounts := make(map[string]int)
542542

543-
c.authLock.RLock()
544-
defer c.authLock.RUnlock()
545-
546-
// we don't grab the statelock, so this code might run during or after the seal process.
547-
// Therefore, we need to check if c.auth is nil. If we do not, this will panic when
548-
// run after seal.
549-
if c.auth == nil {
550-
return mounts
551-
}
543+
c.mountsLock.RLock()
544+
defer c.mountsLock.RUnlock()
552545

553546
for _, entry := range c.mounts.Entries {
554-
authType := entry.Type
555-
if _, ok := mounts[authType]; !ok {
556-
mounts[authType] = 1
547+
mountType := entry.Type
548+
if _, ok := mounts[mountType]; !ok {
549+
mounts[mountType] = 1
557550
} else {
558-
mounts[authType] += 1
551+
mounts[mountType] += 1
559552
}
560553
}
561554
return mounts
@@ -568,13 +561,6 @@ func (c *Core) GetAuthMethodUsageMetrics() map[string]int {
568561
c.authLock.RLock()
569562
defer c.authLock.RUnlock()
570563

571-
// we don't grab the statelock, so this code might run during or after the seal process.
572-
// Therefore, we need to check if c.auth is nil. If we do not, this will panic when
573-
// run after seal.
574-
if c.auth == nil {
575-
return mounts
576-
}
577-
578564
for _, entry := range c.auth.Entries {
579565
authType := entry.Type
580566
if _, ok := mounts[authType]; !ok {

vault/expiration.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -435,8 +435,11 @@ func (c *Core) setupExpiration(e ExpireLeaseStrategy) error {
435435
return nil
436436
}
437437

438-
// stopExpiration is used to stop the expiration manager before
439-
// sealing the Vault.
438+
// stopExpiration is used to stop the expiration manager before sealing Vault.
439+
// This *must* be called after shutting down the ActivityLog and
440+
// CensusManager to prevent Core's expirationManager reference from
441+
// changing while being accessed by product usage reporting. This is
442+
// an unfortunate side-effect of tight coupling between ActivityLog and Core.
440443
func (c *Core) stopExpiration() error {
441444
if c.expiration != nil {
442445
if err := c.expiration.Stop(); err != nil {

vault/expiration_test.go

+20-12
Original file line numberDiff line numberDiff line change
@@ -855,10 +855,7 @@ func TestExpiration_Restore(t *testing.T) {
855855
}
856856

857857
// Stop everything
858-
err = c.stopExpiration()
859-
if err != nil {
860-
t.Fatalf("err: %v", err)
861-
}
858+
stopExpiration(t, c)
862859

863860
if exp.leaseCount != 0 {
864861
t.Fatalf("expected %v leases, got %v", 0, exp.leaseCount)
@@ -3008,6 +3005,23 @@ func registerOneLease(t *testing.T, ctx context.Context, exp *ExpirationManager)
30083005
return leaseID
30093006
}
30103007

3008+
// stopExpiration is a test helper which allows us to safely teardown the
3009+
// expiration manager. This preserves the shutdown order of Core for these few
3010+
// outlier tests that (previously) directly called [Core].stopExpiration().
3011+
func stopExpiration(t *testing.T, core *Core) {
3012+
t.Helper()
3013+
core.stopActivityLog()
3014+
err := core.teardownCensusManager()
3015+
if err != nil {
3016+
t.Fatalf("error stopping census manager: %v", err)
3017+
}
3018+
3019+
err = core.stopExpiration()
3020+
if err != nil {
3021+
t.Fatalf("error stopping expiration manager: %v", err)
3022+
}
3023+
}
3024+
30113025
func TestExpiration_MarkIrrevocable(t *testing.T) {
30123026
c, _, _ := TestCoreUnsealed(t)
30133027
exp := c.expiration
@@ -3060,10 +3074,7 @@ func TestExpiration_MarkIrrevocable(t *testing.T) {
30603074
}
30613075

30623076
// stop and restore to verify that irrevocable leases are properly loaded from storage
3063-
err = c.stopExpiration()
3064-
if err != nil {
3065-
t.Fatalf("error stopping expiration manager: %v", err)
3066-
}
3077+
stopExpiration(t, c)
30673078

30683079
err = exp.Restore(nil)
30693080
if err != nil {
@@ -3153,10 +3164,7 @@ func TestExpiration_StopClearsIrrevocableCache(t *testing.T) {
31533164
exp.markLeaseIrrevocable(ctx, le, fmt.Errorf("test irrevocable error"))
31543165
exp.pendingLock.Unlock()
31553166

3156-
err = c.stopExpiration()
3157-
if err != nil {
3158-
t.Fatalf("error stopping expiration manager: %v", err)
3159-
}
3167+
stopExpiration(t, c)
31603168

31613169
if _, ok := exp.irrevocable.Load(leaseID); ok {
31623170
t.Error("expiration manager irrevocable cache should be cleared on stop")

vault/token_store_test.go

+1-4
Original file line numberDiff line numberDiff line change
@@ -1170,10 +1170,7 @@ func TestTokenStore_CreateLookup_ExpirationInRestoreMode(t *testing.T) {
11701170
t.Fatalf("err: %v", err)
11711171
}
11721172

1173-
err = c.stopExpiration()
1174-
if err != nil {
1175-
t.Fatal(err)
1176-
}
1173+
stopExpiration(t, c)
11771174

11781175
// Reset expiration manager to restore mode
11791176
ts.expiration.restoreModeLock.Lock()

0 commit comments

Comments
 (0)