Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Download files when reverifying #3252

Merged
merged 10 commits into from
Aug 29, 2024
Merged
40 changes: 20 additions & 20 deletions pkg/sources/github/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/go-logr/logr"
"github.com/gobwas/glob"
"github.com/google/go-github/v63/github"
"github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
"golang.org/x/exp/rand"
"golang.org/x/sync/errgroup"
"google.golang.org/protobuf/proto"
Expand Down Expand Up @@ -1392,35 +1393,34 @@ func (s *Source) scanTarget(ctx context.Context, target sources.ChunkingTarget,
return fmt.Errorf("invalid GitHub URL")
}

qry := commitQuery{
repo: segments[2],
owner: segments[1],
sha: meta.GetCommit(),
filename: meta.GetFile(),
readCloser, resp, err := s.connector.APIClient().Repositories.DownloadContents(
ctx,
segments[1],
segments[2],
meta.GetFile(),
&github.RepositoryContentGetOptions{Ref: meta.GetCommit()})
// As of this writing, if the returned readCloser is not nil, it's just the Body of the returned github.Response, so
// there's no need to independently close it.
if resp != nil && resp.Body != nil {
defer resp.Body.Close()
}
res, err := s.getDiffForFileInCommit(ctx, qry)
if err != nil {
return err
return fmt.Errorf("could not download file for scan: %w", err)
}
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("unexpected HTTP response status when trying to download file for scan: %v", resp.Status)
}
chunk := &sources.Chunk{

reporter := sources.ChanReporter{Ch: chunksChan}
chunkSkel := sources.Chunk{
SourceType: s.Type(),
SourceName: s.name,
SourceID: s.SourceID(),
JobID: s.JobID(),
SecretID: target.SecretID,
Data: []byte(stripLeadingPlusMinus(res)),
SourceMetadata: &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Github{Github: meta},
},
Verify: s.verify,
}

return common.CancellableWrite(ctx, chunksChan, chunk)
}

// stripLeadingPlusMinus removes leading + and - characters from lines in a diff string. These characters exist in the
// diffs returned when performing a targeted scan and need to be removed so that detectors are operating on the correct
// text.
func stripLeadingPlusMinus(diff string) string {
return strings.ReplaceAll(strings.ReplaceAll(diff, "\n+", "\n"), "\n-", "\n")
Verify: s.verify}
return handlers.HandleFile(ctx, readCloser, &chunkSkel, reporter)
}
18 changes: 18 additions & 0 deletions pkg/sources/github/github_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,24 @@ func TestSource_Chunks_TargetedScan(t *testing.T) {
},
wantChunks: 1,
},
{
name: "targeted scan, binary file",
init: init{
name: "test source",
connection: &sourcespb.GitHub{Credential: &sourcespb.GitHub_Token{Token: githubToken}},
queryCriteria: &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Github{
Github: &source_metadatapb.Github{
Repository: "https://github.com/truffle-sandbox/test-secrets.git",
Link: "https://github.com/truffle-sandbox/test-secrets/blob/70bef8590f87257c0992eecc7db529827a12b801/null_text_w_ptp.ipynb",
Commit: "70bef8590f87257c0992eecc7db529827a12b801",
File: "null_text_w_ptp.ipynb",
},
},
},
},
wantChunks: 607,
},
{
name: "no file in commit",
init: init{
Expand Down
54 changes: 0 additions & 54 deletions pkg/sources/github/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,60 +281,6 @@ func (s *Source) wikiIsReachable(ctx context.Context, repoURL string) bool {
return wikiURL == res.Request.URL.String()
}

// commitQuery represents the details required to fetch a commit.
type commitQuery struct {
repo string
owner string
sha string
filename string
}

// getDiffForFileInCommit retrieves the diff for a specified file in a commit.
// If the file or its diff is not found, it returns an error.
func (s *Source) getDiffForFileInCommit(ctx context.Context, query commitQuery) (string, error) {
var (
commit *github.RepositoryCommit
err error
)
for {
commit, _, err = s.connector.APIClient().Repositories.GetCommit(ctx, query.owner, query.repo, query.sha, nil)
if s.handleRateLimit(err) {
continue
}
if err != nil {
return "", fmt.Errorf("error fetching commit %s: %w", query.sha, err)
}
break
}

if len(commit.Files) == 0 {
return "", fmt.Errorf("commit %s does not contain any files", query.sha)
}

res := new(strings.Builder)
// Only return the diff if the file is in the commit.
for _, file := range commit.Files {
if *file.Filename != query.filename {
continue
}

if file.Patch == nil {
return "", fmt.Errorf("commit %s file %s does not have a diff", query.sha, query.filename)
}

if _, err := res.WriteString(*file.Patch); err != nil {
return "", fmt.Errorf("buffer write error for commit %s file %s: %w", query.sha, query.filename, err)
}
res.WriteString("\n")
}

if res.Len() == 0 {
return "", fmt.Errorf("commit %s does not contain patch for file %s", query.sha, query.filename)
}

return res.String(), nil
}

func (s *Source) normalizeRepo(repo string) (string, error) {
// If there's a '/', assume it's a URL and try to normalize it.
if strings.ContainsRune(repo, '/') {
Expand Down
Loading