Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VA: Fix performRemoteValidation goroutine leak #7727

Merged
merged 5 commits into from
Sep 30, 2024
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 61 additions & 42 deletions va/va.go
Original file line number Diff line number Diff line change
Expand Up @@ -467,74 +467,93 @@ func (va *ValidationAuthorityImpl) performRemoteValidation(
err error
}

ctx, cancel := context.WithCancel(ctx)
defer cancel()
results := make(chan *rvaResult)

for _, i := range rand.Perm(len(va.remoteVAs)) {
remoteVA := va.remoteVAs[i]
go func(rva RemoteVA, out chan<- *rvaResult) {
res, err := rva.PerformValidation(ctx, req)
out <- &rvaResult{
select {
case out <- &rvaResult{
hostname: rva.Address,
response: res,
err: err,
}:
case <-ctx.Done():
// Context canceled, exit the goroutine.
return
beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved
}
}(remoteVA, results)
}

required := len(va.remoteVAs) - va.maxRemoteFailures
good := 0
bad := 0
total := 0
var firstProb *probs.ProblemDetails

for res := range results {
var currProb *probs.ProblemDetails

if res.err != nil {
bad++

if canceled.Is(res.err) {
currProb = probs.ServerInternal("Remote PerformValidation RPC canceled")
for {
select {
case res := <-results:
total++

var currProb *probs.ProblemDetails

if res.err != nil {
bad++

if canceled.Is(res.err) {
currProb = probs.ServerInternal("Remote PerformValidation RPC canceled")
} else {
va.log.Errf("Remote VA %q.PerformValidation failed: %s", res.hostname, res.err)
currProb = probs.ServerInternal("Remote PerformValidation RPC failed")
}
} else if res.response.Problems != nil {
bad++

var err error
currProb, err = bgrpc.PBToProblemDetails(res.response.Problems)
if err != nil {
va.log.Errf("Remote VA %q.PerformValidation returned malformed problem: %s", res.hostname, err)
currProb = probs.ServerInternal("Remote PerformValidation RPC returned malformed result")
}
} else {
va.log.Errf("Remote VA %q.PerformValidation failed: %s", res.hostname, res.err)
currProb = probs.ServerInternal("Remote PerformValidation RPC failed")
good++
}
} else if res.response.Problems != nil {
bad++

var err error
currProb, err = bgrpc.PBToProblemDetails(res.response.Problems)
if err != nil {
va.log.Errf("Remote VA %q.PerformValidation returned malformed problem: %s", res.hostname, err)
currProb = probs.ServerInternal("Remote PerformValidation RPC returned malformed result")

if firstProb == nil && currProb != nil {
firstProb = currProb
}
} else {
good++
}

if firstProb == nil && currProb != nil {
firstProb = currProb
}
if good >= required {
// Enough validations have succeeded to consider the
// authorization valid. Stop any remaining goroutines.
cancel()
beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved
return nil
}
if bad > va.maxRemoteFailures {
// Too many validations have failed to consider the
// authorization valid. Stop any remaining goroutines.
va.metrics.remoteValidationFailures.Inc()
firstProb.Detail = fmt.Sprintf("During secondary validation: %s", firstProb.Detail)
cancel()
return firstProb
}

// Return as soon as we have enough successes or failures for a definitive result.
if good >= required {
return nil
}
if bad > va.maxRemoteFailures {
va.metrics.remoteValidationFailures.Inc()
firstProb.Detail = fmt.Sprintf("During secondary validation: %s", firstProb.Detail)
return firstProb
}
if total >= len(va.remoteVAs) {
// This condition should not occur - it indicates the good/bad
// counts neither met the required threshold nor the
// maxRemoteFailures threshold.
cancel()
return probs.ServerInternal("Too few remote PerformValidation RPC results")
}

// If we somehow haven't returned early, we need to break the loop once all
// of the VAs have returned a result.
if good+bad >= len(va.remoteVAs) {
break
case <-ctx.Done():
return probs.ServerInternal("Context canceled before sufficient results received")
}
}

// This condition should not occur - it indicates the good/bad counts neither
// met the required threshold nor the maxRemoteFailures threshold.
return probs.ServerInternal("Too few remote PerformValidation RPC results")
}

// logRemoteResults is called by `processRemoteCAAResults` when the
Expand Down