Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Audit: always use derived context for audit logging, increase timeout to 10s #28286

Merged
merged 3 commits into from
Sep 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 23 additions & 71 deletions audit/broker.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (

const (
// timeout is the duration which should be used for context related timeouts.
timeout = 5 * time.Second
timeout = 10 * time.Second
)

var (
Expand Down Expand Up @@ -277,29 +277,12 @@ func (b *Broker) LogRequest(ctx context.Context, in *logical.LogInput) (retErr e

e.Data = in

// Evaluate whether we need a new context for auditing.
var auditContext context.Context
if isContextViable(ctx) {
auditContext = ctx
} else {
// In cases where we are trying to audit the request, and the existing
// context is not viable due to a short deadline, we detach ourselves from
// the original context (keeping only the namespace).
// This is so that we get a fair run at writing audit entries if Vault
// has taken up a lot of time handling the request before audit (request)
// is triggered. Pipeline nodes and the eventlogger.Broker may check for a
// cancelled context and refuse to process the nodes further.
ns, err := nshelper.FromContext(ctx)
if err != nil {
return fmt.Errorf("namespace missing from context: %w", err)
}

tempContext, auditCancel := context.WithTimeout(context.Background(), timeout)
defer auditCancel()
auditContext = nshelper.ContextWithNamespace(tempContext, ns)

b.logger.Trace("log request requires a derived context (original context was not viable)", "namespace ID", ns.ID, "namespace path", ns.Path, "timeout", timeout)
// Get a context to use for auditing.
auditContext, auditCancel, err := getAuditContext(ctx)
if err != nil {
return err
}
defer auditCancel()

var status eventlogger.Status
if hasAuditPipelines(b.broker) {
Expand Down Expand Up @@ -361,29 +344,12 @@ func (b *Broker) LogResponse(ctx context.Context, in *logical.LogInput) (retErr

e.Data = in

// Evaluate whether we need a new context for auditing.
var auditContext context.Context
if isContextViable(ctx) {
auditContext = ctx
} else {
// In cases where we are trying to audit the response, and the existing
// context is not viable due to a short deadline, we detach ourselves from
// the original context (keeping only the namespace).
// This is so that we get a fair run at writing audit entries if Vault
// has taken up a lot of time handling the request before audit (response)
// is triggered. Pipeline nodes and the eventlogger.Broker may check for a
// cancelled context and refuse to process the nodes further.
ns, err := nshelper.FromContext(ctx)
if err != nil {
return fmt.Errorf("namespace missing from context: %w", err)
}

tempContext, auditCancel := context.WithTimeout(context.Background(), timeout)
defer auditCancel()
auditContext = nshelper.ContextWithNamespace(tempContext, ns)

b.logger.Trace("log response requires a derived context (original context was not viable)", "namespace ID", ns.ID, "namespace path", ns.Path, "timeout", timeout)
// Get a context to use for auditing.
auditContext, auditCancel, err := getAuditContext(ctx)
if err != nil {
return err
}
defer auditCancel()

var status eventlogger.Status
if hasAuditPipelines(b.broker) {
Expand Down Expand Up @@ -463,33 +429,19 @@ func (b *Broker) IsRegistered(name string) bool {
return b.isRegisteredByName(name)
}

// isContextViable examines the supplied context to see if its own deadline would
// occur later than a newly created context with a specific timeout.
// Additionally, whether the supplied context is already cancelled, thus making it
// unviable.
// If the existing context is viable it can be used 'as-is', if not, the caller
// should consider creating a new context with the relevant deadline and associated
// context values (e.g. namespace) in order to reduce the likelihood that the
// audit system believes there is a failure in audit (and updating its metrics)
// when the root cause is elsewhere.
func isContextViable(ctx context.Context) bool {
if ctx == nil {
return false
}

select {
case <-ctx.Done():
return false
default:
// getAuditContext extracts the namespace from the specified context and returns
// a new context and cancelation function, completely detached from the original
// with a timeout.
// NOTE: When error is nil, the context.CancelFunc returned from this function
// should be deferred immediately by the caller to prevent resource leaks.
func getAuditContext(ctx context.Context) (context.Context, context.CancelFunc, error) {
ns, err := nshelper.FromContext(ctx)
if err != nil {
return nil, nil, fmt.Errorf("namespace missing from context: %w", err)
}

deadline, hasDeadline := ctx.Deadline()

// If there's no deadline on the context then we don't need to worry about
// it being cancelled due to a timeout.
if !hasDeadline {
return true
}
tempContext := nshelper.ContextWithNamespace(context.Background(), ns)
auditContext, auditCancel := context.WithTimeout(tempContext, timeout)

return deadline.After(time.Now().Add(timeout))
return auditContext, auditCancel, nil
}
79 changes: 38 additions & 41 deletions audit/broker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,54 +159,51 @@ func BenchmarkAuditBroker_File_Request_DevNull(b *testing.B) {
})
}

// TestBroker_isContextViable_basics checks the expected result of isContextViable
// for basic inputs such as nil, cancelled context and a never-ending context.
func TestBroker_isContextViable_basics(t *testing.T) {
// TestBroker_getAuditContext_NoNamespace checks that we get the right error when
// trying to get an audit context with no namespace.
func TestBroker_getAuditContext_NoNamespace(t *testing.T) {
t.Parallel()

require.False(t, isContextViable(nil))
ctx, cancel := context.WithCancel(context.Background())
cancel()
require.False(t, isContextViable(ctx))
require.True(t, isContextViable(context.Background()))
_, _, err := getAuditContext(context.Background())
require.Error(t, err)
require.EqualError(t, err, "namespace missing from context: no namespace")
}

// TestBroker_isContextViable_timeouts checks the expected result of isContextViable
// for various timeout durations.
func TestBroker_isContextViable_timeouts(t *testing.T) {
// TestBroker_getAuditContext checks that we get a context back which isn't linked
// to the original context, and contains our namespace.
func TestBroker_getAuditContext(t *testing.T) {
t.Parallel()

tests := map[string]struct {
timeout time.Duration
expected bool
}{
"2s-smaller-deadline": {
timeout: timeout - 2*time.Second,
expected: false,
},
"same-deadline": {
timeout: timeout,
expected: false, // Expected as a near miss
},
"same-deadline-plus": {
timeout: timeout + 100*time.Millisecond,
expected: true,
},
"2x-longer-deadline": {
timeout: timeout * 2,
expected: true,
},
// context with namespace
ns := &nshelper.Namespace{
ID: "foo",
Path: "foo/",
}

for name, tc := range tests {
name := name
tc := tc
t.Run(name, func(t *testing.T) {
t.Parallel()
// Create a context with a namespace.
originalContext, originalCancel := context.WithCancel(context.Background())
t.Cleanup(originalCancel)
nsContext := nshelper.ContextWithNamespace(originalContext, ns)

ctx, cancel := context.WithTimeout(context.Background(), tc.timeout)
t.Cleanup(func() { cancel() })
require.Equal(t, tc.expected, isContextViable(ctx))
})
}
// Get the audit context
auditContext, auditCancel, err := getAuditContext(nsContext)
t.Cleanup(auditCancel)

require.NoError(t, err)
require.NotNil(t, auditContext)
require.NotNil(t, auditCancel)

// Ensure the namespace is there too.
val, err := nshelper.FromContext(auditContext)
require.NoError(t, err)
require.Equal(t, ns, val)

// Now cancel the original context and ensure it is done but audit context isn't.
originalCancel()
require.NotNil(t, originalContext.Err())
require.Nil(t, auditContext.Err())

// Now cancel the audit context and ensure that it is done.
auditCancel()
require.NotNil(t, auditContext.Err())
}
3 changes: 3 additions & 0 deletions changelog/28286.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
audit: Internal implementation changes to the audit subsystem which improve relability.
```
Loading