-
Notifications
You must be signed in to change notification settings - Fork 1.7k
/
detectors.go
290 lines (256 loc) · 10.2 KB
/
detectors.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
package detectors
import (
"context"
"crypto/rand"
"errors"
"math/big"
"net/url"
"strings"
"unicode"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
)
// Detector defines an interface for scanning for and verifying secrets.
type Detector interface {
// FromData will scan bytes for results, and optionally verify them.
FromData(ctx context.Context, verify bool, data []byte) ([]Result, error)
// Keywords are used for efficiently pre-filtering chunks using substring operations.
// Use unique identifiers that are part of the secret if you can, or the provider name.
Keywords() []string
// Type returns the DetectorType number from detectors.proto for the given detector.
Type() detectorspb.DetectorType
// Description returns a description for the result being detected
Description() string
}
// CustomResultsCleaner is an optional interface that a detector can implement to customize how its generated results
// are "cleaned," which is defined as removing superfluous results from those found in a given chunk. The default
// implementation of this logic removes all unverified results if there are any verified results, and all unverified
// results except for one otherwise, but this interface allows a detector to specify different logic. (This logic must
// be implemented outside results generation because there are circumstances under which the engine should not execute
// it.)
type CustomResultsCleaner interface {
// CleanResults removes "superfluous" results from a result set (where the definition of "superfluous" is detector-
// specific).
CleanResults(results []Result) []Result
// ShouldCleanResultsIrrespectiveOfConfiguration allows a custom cleaner to instruct the engine to ignore
// user-provided configuration that controls whether results are cleaned. (User-provided configuration is not the
// only factor that determines whether the engine runs cleaning logic.)
ShouldCleanResultsIrrespectiveOfConfiguration() bool
}
// Versioner is an optional interface that a detector can implement to
// differentiate instances of the same detector type.
type Versioner interface {
Version() int
}
// MaxSecretSizeProvider is an optional interface that a detector can implement to
// provide a custom max size for the secret it finds.
type MaxSecretSizeProvider interface {
MaxSecretSize() int64
}
// StartOffsetProvider is an optional interface that a detector can implement to
// provide a custom start offset for the secret it finds.
type StartOffsetProvider interface {
StartOffset() int64
}
// MultiPartCredentialProvider is an optional interface that a detector can implement
// to indicate its compatibility with multi-part credentials and provide the maximum
// secret size for the credential it finds.
type MultiPartCredentialProvider interface {
// MaxCredentialSpan returns the maximum span or range of characters that the
// detector should consider when searching for a multi-part credential.
MaxCredentialSpan() int64
}
// EndpointCustomizer is an optional interface that a detector can implement to
// support verifying against user-supplied endpoints.
type EndpointCustomizer interface {
SetConfiguredEndpoints(...string) error
SetCloudEndpoint(string)
UseCloudEndpoint(bool)
UseFoundEndpoints(bool)
}
type CloudProvider interface {
CloudEndpoint() string
}
type Result struct {
// DetectorType is the type of Detector.
DetectorType detectorspb.DetectorType
// DetectorName is the name of the Detector. Used for custom detectors.
DetectorName string
Verified bool
// VerificationFromCache indicates whether this result's verification result came from the verification cache rather
// than an actual remote request.
VerificationFromCache bool
// Raw contains the raw secret identifier data. Prefer IDs over secrets since it is used for deduping after hashing.
Raw []byte
// RawV2 contains the raw secret identifier that is a combination of both the ID and the secret.
// This is used for secrets that are multi part and could have the same ID. Ex: AWS credentials
RawV2 []byte
// Redacted contains the redacted version of the raw secret identification data for display purposes.
// A secret ID should be used if available.
Redacted string
ExtraData map[string]string
StructuredData *detectorspb.StructuredData
// This field should only be populated if the verification process itself failed in a way that provides no
// information about the verification status of the candidate secret, such as if the verification request timed out.
verificationError error
// AnalysisInfo should be set with information required for credential
// analysis to run. The keys of the map are analyzer specific and
// should match what is expected in the corresponding analyzer.
AnalysisInfo map[string]string
}
// CopyVerificationInfo clones verification info (status and error) from another Result struct. This is used when
// loading verification info from a verification cache. (A method is necessary because verification errors are not
// exported, to prevent the accidental storage of sensitive information in them.)
func (r *Result) CopyVerificationInfo(from *Result) {
r.Verified = from.Verified
r.verificationError = from.verificationError
}
// SetVerificationError is the only way to set a new verification error. Any sensitive values should be passed-in as secrets to be redacted.
func (r *Result) SetVerificationError(err error, secrets ...string) {
if err != nil {
r.verificationError = redactSecrets(err, secrets...)
}
}
// Public accessors for the fields could also be provided if needed.
func (r *Result) VerificationError() error {
return r.verificationError
}
// redactSecrets replaces all instances of the given secrets with [REDACTED] in the error message.
func redactSecrets(err error, secrets ...string) error {
lastErr := unwrapToLast(err)
errStr := lastErr.Error()
for _, secret := range secrets {
errStr = strings.Replace(errStr, secret, "[REDACTED]", -1)
}
return errors.New(errStr)
}
// unwrapToLast returns the last error in the chain of errors.
// This is added to exclude non-essential details (like URLs) for brevity and security.
// Also helps us optimize performance in redaction and enhance log clarity.
func unwrapToLast(err error) error {
for {
unwrapped := errors.Unwrap(err)
if unwrapped == nil {
// We've reached the last error in the chain
return err
}
err = unwrapped
}
}
type ResultWithMetadata struct {
// IsWordlistFalsePositive indicates whether this secret was flagged as a false positive based on a wordlist check
IsWordlistFalsePositive bool
// SourceMetadata contains source-specific contextual information.
SourceMetadata *source_metadatapb.MetaData
// SourceID is the ID of the source that the API uses to map secrets to specific sources.
SourceID sources.SourceID
// JobID is the ID of the job that the API uses to map secrets to specific jobs.
JobID sources.JobID
// SecretID is the ID of the secret, if it exists.
// Only secrets that are being reverified will have a SecretID.
SecretID int64
// SourceType is the type of Source.
SourceType sourcespb.SourceType
// SourceName is the name of the Source.
SourceName string
Result
// Data from the sources.Chunk which this result was emitted for
Data []byte
// DetectorDescription is the description of the Detector.
DetectorDescription string
// DecoderType is the type of decoder that was used to generate this result's data.
DecoderType detectorspb.DecoderType
}
// CopyMetadata returns a detector result with included metadata from the source chunk.
func CopyMetadata(chunk *sources.Chunk, result Result) ResultWithMetadata {
return ResultWithMetadata{
SourceMetadata: chunk.SourceMetadata,
SourceID: chunk.SourceID,
JobID: chunk.JobID,
SecretID: chunk.SecretID,
SourceType: chunk.SourceType,
SourceName: chunk.SourceName,
Result: result,
Data: chunk.Data,
}
}
// CleanResults returns all verified secrets, and if there are no verified secrets,
// just one unverified secret if there are any.
func CleanResults(results []Result) []Result {
if len(results) == 0 {
return results
}
var cleaned = make(map[string]Result, 0)
for _, s := range results {
if s.Verified {
cleaned[s.Redacted] = s
}
}
if len(cleaned) == 0 {
return results[:1]
}
results = results[:0]
for _, r := range cleaned {
results = append(results, r)
}
return results
}
// PrefixRegex ensures that at least one of the given keywords is within
// 40 characters of the capturing group that follows.
// This can help prevent false positives.
func PrefixRegex(keywords []string) string {
pre := `(?i:`
middle := strings.Join(keywords, "|")
post := `)(?:.|[\n\r]){0,40}?`
return pre + middle + post
}
// KeyIsRandom is a Low cost check to make sure that 'keys' include a number to reduce FPs.
// Golang doesn't support regex lookaheads, so must be done in separate calls.
// TODO improve checks. Shannon entropy did not work well.
func KeyIsRandom(key string) bool {
for _, ch := range key {
if unicode.IsDigit(ch) {
return true
}
}
return false
}
func MustGetBenchmarkData() map[string][]byte {
sizes := map[string]int{
"xsmall": 10, // 10 bytes
"small": 100, // 100 bytes
"medium": 1024, // 1KB
"large": 10 * 1024, // 10KB
"xlarge": 100 * 1024, // 100KB
"xxlarge": 1024 * 1024, // 1MB
}
data := make(map[string][]byte)
for key, size := range sizes {
// Generating a byte slice of a specific size with random data.
content := make([]byte, size)
for i := 0; i < size; i++ {
randomByte, err := rand.Int(rand.Reader, big.NewInt(256))
if err != nil {
panic(err)
}
content[i] = byte(randomByte.Int64())
}
data[key] = content
}
return data
}
func RedactURL(u url.URL) string {
u.User = url.UserPassword(u.User.Username(), "********")
return strings.TrimSpace(strings.Replace(u.String(), "%2A", "*", -1))
}
func ParseURLAndStripPathAndParams(u string) (*url.URL, error) {
parsedURL, err := url.Parse(u)
if err != nil {
return nil, err
}
parsedURL.Path = ""
parsedURL.RawQuery = ""
return parsedURL, nil
}