Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

standardize email pattern #3524

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/common/patterns.go
zricethezav marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"strings"
)

const EmailPattern = `\b(?:[a-z0-9!#$%&'*+/=?^_\x60{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_\x60{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])\b`
const EmailPattern = `\b((?:[a-z0-9!#$%&'*+/=?^_\x60{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_\x60{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\]))\b`
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because our detectors rely on FindAllStringSubmatch, which specifically searches for group matches. Open to any ideas you may have!

const SubDomainPattern = `\b([A-Za-z0-9](?:[A-Za-z0-9\-]{0,61}[A-Za-z0-9])?)\b`
const UUIDPattern = `\b([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\b`
const UUIDPatternUpperCase = `\b([0-9A-Z]{8}-[0-9A-Z]{4}-[0-9A-Z]{4}-[0-9A-Z]{4}-[0-9A-Z]{12})\b`
Expand Down
44 changes: 43 additions & 1 deletion pkg/common/patterns_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package common

import (
"github.com/stretchr/testify/assert"
"regexp"
"testing"

"github.com/stretchr/testify/assert"
)

const (
Expand All @@ -13,6 +14,47 @@ const (
passwordRegex = `(?im)(?:pass|password)\S{0,40}?[:=\s]{1,3}[ '"=]{0,1}([^:^<>;.*&|£\n\s]{4,40})`
)

func TestEmailRegexCheck(t *testing.T) {
testEmails := `
// positive cases
standard email = john.doe@example.com
subdomain email = jane_doe123@sub.domain.co.us
organization email = alice.smith@test.org
test email = bob@test.name
with tag email = user.name+tag@domain.com
hyphen domain = info@my-site.net
service email = contact@web-service.io
underscore email = example_user@domain.info
departement email = first.last@department.company.edu
alphanumeric email = user1234@domain.co
local server email = admin@local-server.local
dot email = test.email@my-email-service.xyz
special char email = special@characters.com
support email = support@customer-service.org

// negative cases
not an email = abc.123@z
looks like email = test@user <- no domain
email but not = user12@service.COM <- capital letters not supported for domain
random text = here's some information about local-user@edu user
`

expectedStr := []string{
"john.doe@example.com", "jane_doe123@sub.domain.co.us",
"alice.smith@test.org", "bob@test.name", "user.name+tag@domain.com",
"info@my-site.net", "contact@web-service.io", "example_user@domain.info",
"first.last@department.company.edu", "user1234@domain.co", "admin@local-server.local",
"test.email@my-email-service.xyz", "special@characters.com", "support@customer-service.org",
}

emailRegex := regexp.MustCompile(EmailPattern)

emailMatches := emailRegex.FindAllString(testEmails, -1)

assert.Exactly(t, emailMatches, expectedStr)

}

func TestUsernameRegexCheck(t *testing.T) {
usernameRegexPat := UsernameRegexCheck(usernamePattern)

Expand Down
22 changes: 11 additions & 11 deletions pkg/detectors/checkvist/checkvist.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,18 @@ package checkvist

import (
"context"
regexp "github.com/wasilibs/go-re2"
"net/http"
"net/url"
"strings"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

type Scanner struct{
type Scanner struct {
detectors.DefaultMultiPartCredentialProvider
}

Expand All @@ -24,7 +25,7 @@ var (

// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"checkvist"}) + `\b([0-9a-zA-Z]{14})\b`)
emailPat = regexp.MustCompile(detectors.PrefixRegex([]string{"checkvist"}) + `\b([\w\.-]+@[\w-]+\.[\w\.-]{2,5})\b`)
emailPat = regexp.MustCompile(detectors.PrefixRegex([]string{"checkvist"}) + common.EmailPattern)
)

// Keywords are used for efficiently pre-filtering chunks.
Expand All @@ -38,14 +39,13 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
dataStr := string(data)

matches := keyPat.FindAllStringSubmatch(dataStr, -1)
emailMatches := emailPat.FindAllStringSubmatch(dataStr, -1)

for _, emailMatch := range emailMatches {
if len(emailMatch) != 2 {
continue
}
resEmailMatch := strings.TrimSpace(emailMatch[1])
uniqueEmailMatches := make(map[string]struct{})
for _, match := range emailPat.FindAllStringSubmatch(dataStr, -1) {
uniqueEmailMatches[strings.TrimSpace(match[1])] = struct{}{}
}

for emailMatch := range uniqueEmailMatches {
for _, match := range matches {
if len(match) != 2 {
continue
Expand All @@ -55,12 +55,12 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
s1 := detectors.Result{
DetectorType: detectorspb.DetectorType_Checkvist,
Raw: []byte(resMatch),
RawV2: []byte(resMatch + resEmailMatch),
RawV2: []byte(resMatch + emailMatch),
}

if verify {
payload := url.Values{}
payload.Add("username", resEmailMatch)
payload.Add("username", emailMatch)
payload.Add("remote_key", resMatch)

req, err := http.NewRequestWithContext(ctx, "GET", "https://checkvist.com/auth/login.json?version=2", strings.NewReader(payload.Encode()))
Expand Down
118 changes: 118 additions & 0 deletions pkg/detectors/checkvist/checkvist_integration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
//go:build detectors
// +build detectors

package checkvist

import (
"context"
"fmt"
"testing"
"time"

"github.com/kylelemons/godebug/pretty"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

func TestCheckvist_FromChunk(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2")
if err != nil {
t.Fatalf("could not get test secrets from GCP: %s", err)
}
user := testSecrets.MustGetField("CHECKVIST_EMAIL")
secret := testSecrets.MustGetField("CHECKVIST")
inactiveSecret := testSecrets.MustGetField("CHECKVIST_INACTIVE")

type args struct {
ctx context.Context
data []byte
verify bool
}
tests := []struct {
name string
s Scanner
args args
want []detectors.Result
wantErr bool
}{
{
name: "found, verified",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a checkvist user %s with checkvist secret %s within", user, secret)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_Checkvist,
Verified: true,
},
},
wantErr: false,
},
{
name: "found, unverified",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a checkvist user %s with checkvist secret %s within but not valid", user, inactiveSecret)), // the secret would satisfy the regex but not pass validation
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_Checkvist,
Verified: false,
},
},
wantErr: false,
},
{
name: "not found",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte("You cannot find the secret within"),
verify: true,
},
want: nil,
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := Scanner{}
got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
if (err != nil) != tt.wantErr {
t.Errorf("Checkvist.FromData() error = %v, wantErr %v", err, tt.wantErr)
return
}
for i := range got {
if len(got[i].Raw) == 0 {
t.Fatalf("no raw secret present: \n %+v", got[i])
}
got[i].Raw = nil
}
if diff := pretty.Compare(got, tt.want); diff != "" {
t.Errorf("Checkvist.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
}
})
}
}

func BenchmarkFromData(benchmark *testing.B) {
ctx := context.Background()
s := Scanner{}
for name, data := range detectors.MustGetBenchmarkData() {
benchmark.Run(name, func(b *testing.B) {
b.ResetTimer()
for n := 0; n < b.N; n++ {
s.FromData(ctx, false, data)
}
})
}
}
Loading
Loading