Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Customize results cleaning (using smuggled interface) #3235

Merged
merged 8 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions pkg/detectors/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ func WithSkipIDs(skipIDs []string) func(*scanner) {
// Ensure the scanner satisfies the interface at compile time.
var _ detectors.Detector = (*scanner)(nil)
var _ detectors.MultiPartCredentialProvider = (*scanner)(nil)
var _ detectors.CustomResultsCleaner = (*scanner)(nil)

var (
defaultVerificationClient = common.SaneHttpClient()
Expand Down Expand Up @@ -245,7 +246,11 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result
}
}
}
return awsCustomCleanResults(results), nil
return results, nil
}

func (s scanner) ShouldCleanResultsIrrespectiveOfConfiguration() bool {
return true
}

func (s scanner) verifyMatch(ctx context.Context, resIDMatch, resSecretMatch string, retryOn403 bool) (bool, map[string]string, error) {
Expand Down Expand Up @@ -399,7 +404,7 @@ func (s scanner) verifyCanary(resIDMatch, resSecretMatch string) (bool, string,
}
}

func awsCustomCleanResults(results []detectors.Result) []detectors.Result {
func (s scanner) CleanResults(results []detectors.Result) []detectors.Result {
if len(results) == 0 {
return results
}
Expand Down
9 changes: 7 additions & 2 deletions pkg/detectors/awssessionkeys/awssessionkey.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ func WithSkipIDs(skipIDs []string) func(*scanner) {

// Ensure the scanner satisfies the interface at compile time.
var _ detectors.Detector = (*scanner)(nil)
var _ detectors.CustomResultsCleaner = (*scanner)(nil)

var (
defaultVerificationClient = common.SaneHttpClient()
Expand Down Expand Up @@ -167,7 +168,11 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result
}
}
}
return awsCustomCleanResults(results), nil
return results, nil
}

func (s scanner) ShouldCleanResultsIrrespectiveOfConfiguration() bool {
return true
}

func (s scanner) verifyMatch(ctx context.Context, resIDMatch, resSecretMatch string, resSessionMatch string, retryOn403 bool) (bool, map[string]string, error) {
Expand Down Expand Up @@ -283,7 +288,7 @@ func (s scanner) verifyMatch(ctx context.Context, resIDMatch, resSecretMatch str
}
}

func awsCustomCleanResults(results []detectors.Result) []detectors.Result {
func (s scanner) CleanResults(results []detectors.Result) []detectors.Result {
if len(results) == 0 {
return results
}
Expand Down
16 changes: 16 additions & 0 deletions pkg/detectors/detectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,22 @@ type Detector interface {
Type() detectorspb.DetectorType
}

// CustomResultsCleaner is an optional interface that a detector can implement to customize how its generated results
// are "cleaned," which is defined as removing superfluous results from those found in a given chunk. The default
// implementation of this logic removes all unverified results if there are any verified results, and all unverified
// results except for one otherwise, but this interface allows a detector to specify different logic. (This logic must
// be implemented outside results generation because there are circumstances under which the engine should not execute
// it.)
type CustomResultsCleaner interface {
// CleanResults removes "superfluous" results from a result set (where the definition of "superfluous" is detector-
// specific).
CleanResults(results []Result) []Result
// ShouldCleanResultsIrrespectiveOfConfiguration allows a custom cleaner to instruct the engine to ignore
// user-provided configuration that controls whether results are cleaned. (User-provided configuration is not the
// only factor that determines whether the engine runs cleaning logic.)
ShouldCleanResultsIrrespectiveOfConfiguration() bool
Comment on lines +39 to +42
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think this is something that would happen often?

Copy link
Collaborator Author

@rosecodym rosecodym Aug 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's needed for aws and awssessionkey today, but I doubt it would be used for anything else ever.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm.. what are your thoughts on special-casing aws and awssessionkey detectors instead of making this a required method for the interface?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am extremely opposed to special-casing any detectors in the code. My position is that if we've created a bad abstraction, we should own that, rather than splitting it into multiple parts with invisible coupling just so that we don't have to look at the mess we've made.

}

// Versioner is an optional interface that a detector can implement to
// differentiate instances of the same detector type.
type Versioner interface {
Expand Down
2 changes: 1 addition & 1 deletion pkg/detectors/opsgenie/opsgenie.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,5 +94,5 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
results = append(results, s1)
}

return detectors.CleanResults(results), nil
return results, nil
}
7 changes: 4 additions & 3 deletions pkg/detectors/razorpay/razorpay.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,17 @@ package razorpay
import (
"context"
"encoding/json"
regexp "github.com/wasilibs/go-re2"
"io"
"net/http"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

type Scanner struct{
type Scanner struct {
detectors.DefaultMultiPartCredentialProvider
}

Expand Down Expand Up @@ -77,7 +78,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result

}

return detectors.CleanResults(results), nil
return results, nil
}

func (s Scanner) Type() detectorspb.DetectorType {
Expand Down
2 changes: 1 addition & 1 deletion pkg/detectors/twilio/twilio.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
}
}

return detectors.CleanResults(results), nil
return results, nil
}

func (s Scanner) Type() detectorspb.DetectorType {
Expand Down
13 changes: 11 additions & 2 deletions pkg/engine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -1060,15 +1060,24 @@ func (e *Engine) filterResults(
detector *ahocorasick.DetectorMatch,
results []detectors.Result,
) []detectors.Result {
if e.filterUnverified {
results = detectors.CleanResults(results)
clean := detectors.CleanResults
ignoreConfig := false
if cleaner, ok := detector.Detector.(detectors.CustomResultsCleaner); ok {
clean = cleaner.CleanResults
ignoreConfig = cleaner.ShouldCleanResultsIrrespectiveOfConfiguration()
}
if e.filterUnverified || ignoreConfig {
results = clean(results)
}

if !e.retainFalsePositives {
results = detectors.FilterKnownFalsePositives(ctx, detector.Detector, results)
}

if e.filterEntropy != 0 {
results = detectors.FilterResultsWithEntropy(ctx, results, e.filterEntropy, e.retainFalsePositives)
}

return results
}

Expand Down
69 changes: 69 additions & 0 deletions pkg/engine/engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,75 @@ func TestLikelyDuplicate(t *testing.T) {
}
}

type customCleaner struct {
ignoreConfig bool
}

var _ detectors.CustomResultsCleaner = (*customCleaner)(nil)
var _ detectors.Detector = (*customCleaner)(nil)

func (c customCleaner) FromData(aCtx.Context, bool, []byte) ([]detectors.Result, error) {
return []detectors.Result{}, nil
}

func (c customCleaner) Keywords() []string { return []string{} }
func (c customCleaner) Type() detectorspb.DetectorType { return detectorspb.DetectorType(-1) }

func (c customCleaner) CleanResults([]detectors.Result) []detectors.Result {
return []detectors.Result{}
}
func (c customCleaner) ShouldCleanResultsIrrespectiveOfConfiguration() bool { return c.ignoreConfig }

func TestFilterResults_CustomCleaner(t *testing.T) {
testCases := []struct {
name string
cleaningConfigured bool
ignoreConfig bool
resultsToClean []detectors.Result
wantResults []detectors.Result
}{
{
name: "respect config to clean",
cleaningConfigured: true,
ignoreConfig: false,
resultsToClean: []detectors.Result{{}},
wantResults: []detectors.Result{},
},
{
name: "respect config to not clean",
cleaningConfigured: false,
ignoreConfig: false,
resultsToClean: []detectors.Result{{}},
wantResults: []detectors.Result{{}},
},
{
name: "clean irrespective of config",
cleaningConfigured: false,
ignoreConfig: true,
resultsToClean: []detectors.Result{{}},
wantResults: []detectors.Result{},
},
}

for _, tt := range testCases {
t.Run(tt.name, func(t *testing.T) {
match := ahocorasick.DetectorMatch{
Detector: customCleaner{
ignoreConfig: tt.ignoreConfig,
},
}
engine := Engine{
filterUnverified: tt.cleaningConfigured,
retainFalsePositives: true,
}

cleaned := engine.filterResults(context.Background(), &match, tt.resultsToClean)

assert.ElementsMatch(t, tt.wantResults, cleaned)
})
}
}

func BenchmarkPopulateMatchingDetectors(b *testing.B) {
allDetectors := DefaultDetectors()
ac := ahocorasick.NewAhoCorasickCore(allDetectors)
Expand Down
Loading