From 4970796056f995f732e9b7a9874642063b43a084 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 29 Aug 2024 10:32:02 -0400 Subject: [PATCH 01/10] add temporary targeting ability --- main.go | 2 ++ pkg/engine/github.go | 21 +++++++++++++++++++++ pkg/sources/sources.go | 2 ++ 3 files changed, 25 insertions(+) diff --git a/main.go b/main.go index 0dbf1f8d11e8..8fc7df8643ce 100644 --- a/main.go +++ b/main.go @@ -99,6 +99,7 @@ var ( githubScanIssueComments = githubScan.Flag("issue-comments", "Include issue descriptions and comments in scan.").Bool() githubScanPRComments = githubScan.Flag("pr-comments", "Include pull request descriptions and comments in scan.").Bool() githubScanGistComments = githubScan.Flag("gist-comments", "Include gist comments in scan.").Bool() + githubTarget = githubScan.Flag("target", "scan a target").Hidden().String() // GitHub Cross Fork Object Reference Experimental Feature githubExperimentalScan = cli.Command("github-experimental", "Run an experimental GitHub scan. Must specify at least one experimental sub-module to run: object-discovery.") @@ -627,6 +628,7 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics, IncludePullRequestComments: *githubScanPRComments, IncludeGistComments: *githubScanGistComments, Filter: filter, + Target: *githubTarget, } if err := eng.ScanGitHub(ctx, cfg); err != nil { return scanMetrics, fmt.Errorf("failed to scan Github: %v", err) diff --git a/pkg/engine/github.go b/pkg/engine/github.go index 9bf84072a7d0..17d0b09f038e 100644 --- a/pkg/engine/github.go +++ b/pkg/engine/github.go @@ -1,7 +1,10 @@ package engine import ( + "strings" + gogit "github.com/go-git/go-git/v5" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" @@ -58,6 +61,24 @@ func (e *Engine) ScanGitHub(ctx context.Context, c sources.GithubConfig) error { return err } githubSource.WithScanOptions(scanOptions) + + if c.Target != "" { + parts := strings.Split(c.Target, "@") + target := sources.ChunkingTarget{ + QueryCriteria: &source_metadatapb.MetaData{ + Data: &source_metadatapb.MetaData_Github{ + Github: &source_metadatapb.Github{ + Link: c.Repos[0], + File: parts[0], + Commit: parts[1], + }, + }, + }, + } + _, err = e.sourceManager.Run(ctx, sourceName, githubSource, target) + return err + } + _, err = e.sourceManager.Run(ctx, sourceName, githubSource) return err } diff --git a/pkg/sources/sources.go b/pkg/sources/sources.go index 39d280b2b0f2..b7427927377d 100644 --- a/pkg/sources/sources.go +++ b/pkg/sources/sources.go @@ -236,6 +236,8 @@ type GithubConfig struct { SkipBinaries bool // IncludeWikis indicates whether to include repository wikis in the scan. IncludeWikis bool + + Target string } // GitHubExperimentalConfig defines the optional configuration for an experimental GitHub source. From ea6f5b1b83f7f20b5f1f4b617759b7fc483e5d78 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 29 Aug 2024 10:43:52 -0400 Subject: [PATCH 02/10] add failing integration test --- pkg/sources/github/github_integration_test.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pkg/sources/github/github_integration_test.go b/pkg/sources/github/github_integration_test.go index cfc6fe01703b..6f1fb3e9a31d 100644 --- a/pkg/sources/github/github_integration_test.go +++ b/pkg/sources/github/github_integration_test.go @@ -758,6 +758,24 @@ func TestSource_Chunks_TargetedScan(t *testing.T) { }, wantChunks: 1, }, + { + name: "targeted scan, binary file", + init: init{ + name: "test source", + connection: &sourcespb.GitHub{Credential: &sourcespb.GitHub_Unauthenticated{}}, + queryCriteria: &source_metadatapb.MetaData{ + Data: &source_metadatapb.MetaData_Github{ + Github: &source_metadatapb.Github{ + Repository: "https://github.com/google/prompt-to-prompt.git", + Link: "https://github.com/google/prompt-to-prompt/blob/160965d6d4a54ce93eac183371625fe2f8446c10/null_text_w_ptp.ipynb", + Commit: "160965d6d4a54ce93eac183371625fe2f8446c10", + File: "null_text_w_ptp.ipynb", + }, + }, + }, + }, + wantChunks: 1, + }, { name: "no file in commit", init: init{ From 587dd47b3114ba5eea1c88478fce6df452ce13b3 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 29 Aug 2024 11:06:23 -0400 Subject: [PATCH 03/10] move existing path into fn --- pkg/sources/github/github.go | 25 +++++++------------------ pkg/sources/github/repo.go | 22 ++++++++++++++++++++-- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index a6eba956e7c7..44581dacbbc7 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -1398,24 +1398,13 @@ func (s *Source) scanTarget(ctx context.Context, target sources.ChunkingTarget, sha: meta.GetCommit(), filename: meta.GetFile(), } - res, err := s.getDiffForFileInCommit(ctx, qry) - if err != nil { - return err - } - chunk := &sources.Chunk{ - SourceType: s.Type(), - SourceName: s.name, - SourceID: s.SourceID(), - JobID: s.JobID(), - SecretID: target.SecretID, - Data: []byte(stripLeadingPlusMinus(res)), - SourceMetadata: &source_metadatapb.MetaData{ - Data: &source_metadatapb.MetaData_Github{Github: meta}, - }, - Verify: s.verify, - } - - return common.CancellableWrite(ctx, chunksChan, chunk) + _, err = s.getDiffForFileInCommit( + ctx, + qry, + sources.ChanReporter{Ch: chunksChan}, + target.SecretID, + &source_metadatapb.MetaData_Github{Github: meta}) + return err } // stripLeadingPlusMinus removes leading + and - characters from lines in a diff string. These characters exist in the diff --git a/pkg/sources/github/repo.go b/pkg/sources/github/repo.go index 67e7728816c7..04e035f17c1b 100644 --- a/pkg/sources/github/repo.go +++ b/pkg/sources/github/repo.go @@ -10,6 +10,7 @@ import ( gogit "github.com/go-git/go-git/v5" "github.com/google/go-github/v63/github" + "github.com/trufflesecurity/trufflehog/v3/pkg/sources" "github.com/trufflesecurity/trufflehog/v3/pkg/context" "github.com/trufflesecurity/trufflehog/v3/pkg/giturl" @@ -291,7 +292,13 @@ type commitQuery struct { // getDiffForFileInCommit retrieves the diff for a specified file in a commit. // If the file or its diff is not found, it returns an error. -func (s *Source) getDiffForFileInCommit(ctx context.Context, query commitQuery) (string, error) { +func (s *Source) getDiffForFileInCommit( + ctx context.Context, + query commitQuery, + reporter sources.ChunkReporter, + secretID int64, + meta *source_metadatapb.MetaData_Github, +) (string, error) { var ( commit *github.RepositoryCommit err error @@ -332,7 +339,18 @@ func (s *Source) getDiffForFileInCommit(ctx context.Context, query commitQuery) return "", fmt.Errorf("commit %s does not contain patch for file %s", query.sha, query.filename) } - return res.String(), nil + return "", reporter.ChunkOk(ctx, sources.Chunk{ + SourceType: s.Type(), + SourceName: s.name, + SourceID: s.SourceID(), + JobID: s.JobID(), + SecretID: secretID, + Data: []byte(stripLeadingPlusMinus(res.String())), + SourceMetadata: &source_metadatapb.MetaData{ + Data: meta, + }, + Verify: s.verify, + }) } func (s *Source) normalizeRepo(repo string) (string, error) { From 0b4d2b1854788abb98d4fa5e47f82dc84d975ed9 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 29 Aug 2024 11:12:38 -0400 Subject: [PATCH 04/10] Revert "move existing path into fn" This reverts commit 587dd47b3114ba5eea1c88478fce6df452ce13b3. --- pkg/sources/github/github.go | 25 ++++++++++++++++++------- pkg/sources/github/repo.go | 22 ++-------------------- 2 files changed, 20 insertions(+), 27 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 44581dacbbc7..a6eba956e7c7 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -1398,13 +1398,24 @@ func (s *Source) scanTarget(ctx context.Context, target sources.ChunkingTarget, sha: meta.GetCommit(), filename: meta.GetFile(), } - _, err = s.getDiffForFileInCommit( - ctx, - qry, - sources.ChanReporter{Ch: chunksChan}, - target.SecretID, - &source_metadatapb.MetaData_Github{Github: meta}) - return err + res, err := s.getDiffForFileInCommit(ctx, qry) + if err != nil { + return err + } + chunk := &sources.Chunk{ + SourceType: s.Type(), + SourceName: s.name, + SourceID: s.SourceID(), + JobID: s.JobID(), + SecretID: target.SecretID, + Data: []byte(stripLeadingPlusMinus(res)), + SourceMetadata: &source_metadatapb.MetaData{ + Data: &source_metadatapb.MetaData_Github{Github: meta}, + }, + Verify: s.verify, + } + + return common.CancellableWrite(ctx, chunksChan, chunk) } // stripLeadingPlusMinus removes leading + and - characters from lines in a diff string. These characters exist in the diff --git a/pkg/sources/github/repo.go b/pkg/sources/github/repo.go index 04e035f17c1b..67e7728816c7 100644 --- a/pkg/sources/github/repo.go +++ b/pkg/sources/github/repo.go @@ -10,7 +10,6 @@ import ( gogit "github.com/go-git/go-git/v5" "github.com/google/go-github/v63/github" - "github.com/trufflesecurity/trufflehog/v3/pkg/sources" "github.com/trufflesecurity/trufflehog/v3/pkg/context" "github.com/trufflesecurity/trufflehog/v3/pkg/giturl" @@ -292,13 +291,7 @@ type commitQuery struct { // getDiffForFileInCommit retrieves the diff for a specified file in a commit. // If the file or its diff is not found, it returns an error. -func (s *Source) getDiffForFileInCommit( - ctx context.Context, - query commitQuery, - reporter sources.ChunkReporter, - secretID int64, - meta *source_metadatapb.MetaData_Github, -) (string, error) { +func (s *Source) getDiffForFileInCommit(ctx context.Context, query commitQuery) (string, error) { var ( commit *github.RepositoryCommit err error @@ -339,18 +332,7 @@ func (s *Source) getDiffForFileInCommit( return "", fmt.Errorf("commit %s does not contain patch for file %s", query.sha, query.filename) } - return "", reporter.ChunkOk(ctx, sources.Chunk{ - SourceType: s.Type(), - SourceName: s.name, - SourceID: s.SourceID(), - JobID: s.JobID(), - SecretID: secretID, - Data: []byte(stripLeadingPlusMinus(res.String())), - SourceMetadata: &source_metadatapb.MetaData{ - Data: meta, - }, - Verify: s.verify, - }) + return res.String(), nil } func (s *Source) normalizeRepo(repo string) (string, error) { From 760f112b97a48f4ea0b9a1552fd4777e984dfde4 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 29 Aug 2024 11:34:18 -0400 Subject: [PATCH 05/10] download everything --- pkg/sources/github/github.go | 33 +++++++++++-------- pkg/sources/github/github_integration_test.go | 2 +- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index a6eba956e7c7..ff06a18753d5 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -16,6 +16,7 @@ import ( "github.com/go-logr/logr" "github.com/gobwas/glob" "github.com/google/go-github/v63/github" + "github.com/trufflesecurity/trufflehog/v3/pkg/handlers" "golang.org/x/exp/rand" "golang.org/x/sync/errgroup" "google.golang.org/protobuf/proto" @@ -1392,30 +1393,36 @@ func (s *Source) scanTarget(ctx context.Context, target sources.ChunkingTarget, return fmt.Errorf("invalid GitHub URL") } - qry := commitQuery{ - repo: segments[2], - owner: segments[1], - sha: meta.GetCommit(), - filename: meta.GetFile(), + readCloser, resp, err := s.connector.APIClient().Repositories.DownloadContents( + ctx, + segments[1], + segments[2], + meta.GetFile(), + &github.RepositoryContentGetOptions{Ref: meta.GetCommit()}) + // As of this writing, if the returned readCloser is not nil, it's just the Body of the returned github.Response, so + // there's no need to independently close it. + if resp != nil && resp.Body != nil { + defer resp.Body.Close() } - res, err := s.getDiffForFileInCommit(ctx, qry) if err != nil { - return err + return fmt.Errorf("could not download file for scan: %w", err) + } + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("unexpected HTTP response status when trying to download file for scan: %v", resp.Status) } - chunk := &sources.Chunk{ + + reporter := sources.ChanReporter{Ch: chunksChan} + chunkSkel := sources.Chunk{ SourceType: s.Type(), SourceName: s.name, SourceID: s.SourceID(), JobID: s.JobID(), SecretID: target.SecretID, - Data: []byte(stripLeadingPlusMinus(res)), SourceMetadata: &source_metadatapb.MetaData{ Data: &source_metadatapb.MetaData_Github{Github: meta}, }, - Verify: s.verify, - } - - return common.CancellableWrite(ctx, chunksChan, chunk) + Verify: s.verify} + return handlers.HandleFile(ctx, readCloser, &chunkSkel, reporter) } // stripLeadingPlusMinus removes leading + and - characters from lines in a diff string. These characters exist in the diff --git a/pkg/sources/github/github_integration_test.go b/pkg/sources/github/github_integration_test.go index 6f1fb3e9a31d..10855e80efde 100644 --- a/pkg/sources/github/github_integration_test.go +++ b/pkg/sources/github/github_integration_test.go @@ -774,7 +774,7 @@ func TestSource_Chunks_TargetedScan(t *testing.T) { }, }, }, - wantChunks: 1, + wantChunks: 607, }, { name: "no file in commit", From 2fd59d4eb0599ed6e72379bdc73ee9b85ad6724b Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 29 Aug 2024 11:36:28 -0400 Subject: [PATCH 06/10] remove unused fn --- pkg/sources/github/github.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index ff06a18753d5..e201bb5bcc6b 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -1424,10 +1424,3 @@ func (s *Source) scanTarget(ctx context.Context, target sources.ChunkingTarget, Verify: s.verify} return handlers.HandleFile(ctx, readCloser, &chunkSkel, reporter) } - -// stripLeadingPlusMinus removes leading + and - characters from lines in a diff string. These characters exist in the -// diffs returned when performing a targeted scan and need to be removed so that detectors are operating on the correct -// text. -func stripLeadingPlusMinus(diff string) string { - return strings.ReplaceAll(strings.ReplaceAll(diff, "\n+", "\n"), "\n-", "\n") -} From 76032c0637d86ae951b182ad532ddcfd34a42b3d Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 29 Aug 2024 11:36:59 -0400 Subject: [PATCH 07/10] Revert "add temporary targeting ability" This reverts commit 4970796056f995f732e9b7a9874642063b43a084. --- main.go | 2 -- pkg/engine/github.go | 21 --------------------- pkg/sources/sources.go | 2 -- 3 files changed, 25 deletions(-) diff --git a/main.go b/main.go index 8fc7df8643ce..0dbf1f8d11e8 100644 --- a/main.go +++ b/main.go @@ -99,7 +99,6 @@ var ( githubScanIssueComments = githubScan.Flag("issue-comments", "Include issue descriptions and comments in scan.").Bool() githubScanPRComments = githubScan.Flag("pr-comments", "Include pull request descriptions and comments in scan.").Bool() githubScanGistComments = githubScan.Flag("gist-comments", "Include gist comments in scan.").Bool() - githubTarget = githubScan.Flag("target", "scan a target").Hidden().String() // GitHub Cross Fork Object Reference Experimental Feature githubExperimentalScan = cli.Command("github-experimental", "Run an experimental GitHub scan. Must specify at least one experimental sub-module to run: object-discovery.") @@ -628,7 +627,6 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics, IncludePullRequestComments: *githubScanPRComments, IncludeGistComments: *githubScanGistComments, Filter: filter, - Target: *githubTarget, } if err := eng.ScanGitHub(ctx, cfg); err != nil { return scanMetrics, fmt.Errorf("failed to scan Github: %v", err) diff --git a/pkg/engine/github.go b/pkg/engine/github.go index 17d0b09f038e..9bf84072a7d0 100644 --- a/pkg/engine/github.go +++ b/pkg/engine/github.go @@ -1,10 +1,7 @@ package engine import ( - "strings" - gogit "github.com/go-git/go-git/v5" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" @@ -61,24 +58,6 @@ func (e *Engine) ScanGitHub(ctx context.Context, c sources.GithubConfig) error { return err } githubSource.WithScanOptions(scanOptions) - - if c.Target != "" { - parts := strings.Split(c.Target, "@") - target := sources.ChunkingTarget{ - QueryCriteria: &source_metadatapb.MetaData{ - Data: &source_metadatapb.MetaData_Github{ - Github: &source_metadatapb.Github{ - Link: c.Repos[0], - File: parts[0], - Commit: parts[1], - }, - }, - }, - } - _, err = e.sourceManager.Run(ctx, sourceName, githubSource, target) - return err - } - _, err = e.sourceManager.Run(ctx, sourceName, githubSource) return err } diff --git a/pkg/sources/sources.go b/pkg/sources/sources.go index b7427927377d..39d280b2b0f2 100644 --- a/pkg/sources/sources.go +++ b/pkg/sources/sources.go @@ -236,8 +236,6 @@ type GithubConfig struct { SkipBinaries bool // IncludeWikis indicates whether to include repository wikis in the scan. IncludeWikis bool - - Target string } // GitHubExperimentalConfig defines the optional configuration for an experimental GitHub source. From 95c5254033d4e002f652a9a7adcfbef9fc934596 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 29 Aug 2024 11:38:30 -0400 Subject: [PATCH 08/10] remove other unused fn --- pkg/sources/github/repo.go | 46 -------------------------------------- 1 file changed, 46 deletions(-) diff --git a/pkg/sources/github/repo.go b/pkg/sources/github/repo.go index 67e7728816c7..7e60a2079663 100644 --- a/pkg/sources/github/repo.go +++ b/pkg/sources/github/repo.go @@ -289,52 +289,6 @@ type commitQuery struct { filename string } -// getDiffForFileInCommit retrieves the diff for a specified file in a commit. -// If the file or its diff is not found, it returns an error. -func (s *Source) getDiffForFileInCommit(ctx context.Context, query commitQuery) (string, error) { - var ( - commit *github.RepositoryCommit - err error - ) - for { - commit, _, err = s.connector.APIClient().Repositories.GetCommit(ctx, query.owner, query.repo, query.sha, nil) - if s.handleRateLimit(err) { - continue - } - if err != nil { - return "", fmt.Errorf("error fetching commit %s: %w", query.sha, err) - } - break - } - - if len(commit.Files) == 0 { - return "", fmt.Errorf("commit %s does not contain any files", query.sha) - } - - res := new(strings.Builder) - // Only return the diff if the file is in the commit. - for _, file := range commit.Files { - if *file.Filename != query.filename { - continue - } - - if file.Patch == nil { - return "", fmt.Errorf("commit %s file %s does not have a diff", query.sha, query.filename) - } - - if _, err := res.WriteString(*file.Patch); err != nil { - return "", fmt.Errorf("buffer write error for commit %s file %s: %w", query.sha, query.filename, err) - } - res.WriteString("\n") - } - - if res.Len() == 0 { - return "", fmt.Errorf("commit %s does not contain patch for file %s", query.sha, query.filename) - } - - return res.String(), nil -} - func (s *Source) normalizeRepo(repo string) (string, error) { // If there's a '/', assume it's a URL and try to normalize it. if strings.ContainsRune(repo, '/') { From 0793eda7cfa49163b7c7e92800e6aa0f8e46a2a7 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 29 Aug 2024 12:43:11 -0400 Subject: [PATCH 09/10] remove unused struct --- pkg/sources/github/repo.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pkg/sources/github/repo.go b/pkg/sources/github/repo.go index 7e60a2079663..79be8653ffcb 100644 --- a/pkg/sources/github/repo.go +++ b/pkg/sources/github/repo.go @@ -281,14 +281,6 @@ func (s *Source) wikiIsReachable(ctx context.Context, repoURL string) bool { return wikiURL == res.Request.URL.String() } -// commitQuery represents the details required to fetch a commit. -type commitQuery struct { - repo string - owner string - sha string - filename string -} - func (s *Source) normalizeRepo(repo string) (string, error) { // If there's a '/', assume it's a URL and try to normalize it. if strings.ContainsRune(repo, '/') { From 8609a06050d0a6104648b7d0baa4d7777ae1a77d Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 29 Aug 2024 12:59:55 -0400 Subject: [PATCH 10/10] test against a repo we control --- pkg/sources/github/github_integration_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/sources/github/github_integration_test.go b/pkg/sources/github/github_integration_test.go index 10855e80efde..9fff037c321b 100644 --- a/pkg/sources/github/github_integration_test.go +++ b/pkg/sources/github/github_integration_test.go @@ -762,13 +762,13 @@ func TestSource_Chunks_TargetedScan(t *testing.T) { name: "targeted scan, binary file", init: init{ name: "test source", - connection: &sourcespb.GitHub{Credential: &sourcespb.GitHub_Unauthenticated{}}, + connection: &sourcespb.GitHub{Credential: &sourcespb.GitHub_Token{Token: githubToken}}, queryCriteria: &source_metadatapb.MetaData{ Data: &source_metadatapb.MetaData_Github{ Github: &source_metadatapb.Github{ - Repository: "https://github.com/google/prompt-to-prompt.git", - Link: "https://github.com/google/prompt-to-prompt/blob/160965d6d4a54ce93eac183371625fe2f8446c10/null_text_w_ptp.ipynb", - Commit: "160965d6d4a54ce93eac183371625fe2f8446c10", + Repository: "https://github.com/truffle-sandbox/test-secrets.git", + Link: "https://github.com/truffle-sandbox/test-secrets/blob/70bef8590f87257c0992eecc7db529827a12b801/null_text_w_ptp.ipynb", + Commit: "70bef8590f87257c0992eecc7db529827a12b801", File: "null_text_w_ptp.ipynb", }, },