Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move GitHub integration tests behind a build flag and add unit tests #527

Merged
merged 5 commits into from
May 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ require (
github.com/joho/godotenv v1.4.0
github.com/jpillora/overseer v1.1.6
github.com/kylelemons/godebug v1.1.0
github.com/mattn/go-colorable v0.1.12
github.com/paulbellamy/ratecounter v0.2.0
github.com/pkg/errors v0.9.1
github.com/razorpay/razorpay-go v0.0.0-20210728161131-0341409a6ab2
github.com/rs/zerolog v1.26.1
github.com/sergi/go-diff v1.2.0
github.com/sirupsen/logrus v1.8.1
github.com/stretchr/testify v1.7.0
github.com/tailscale/depaware v0.0.0-20210622194025-720c4b409502
github.com/xanzy/go-gitlab v0.64.0
github.com/zricethezav/gitleaks/v8 v8.5.2
Expand All @@ -48,6 +48,7 @@ require (
google.golang.org/genproto v0.0.0-20220405205423-9d709892a2bf
google.golang.org/protobuf v1.28.0
gopkg.in/alecthomas/kingpin.v2 v2.2.6
gopkg.in/h2non/gock.v1 v1.1.2
)

require (
Expand All @@ -71,6 +72,7 @@ require (
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.4 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.4 // indirect
github.com/aws/smithy-go v1.11.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dimchansky/utfbom v1.1.1 // indirect
github.com/emirpasic/gods v1.12.0 // indirect
github.com/go-git/gcfg v1.5.0 // indirect
Expand All @@ -84,18 +86,21 @@ require (
github.com/google/go-querystring v1.1.0 // indirect
github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect
github.com/googleapis/gax-go/v2 v2.2.0 // indirect
github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542 // indirect
github.com/hashicorp/go-cleanhttp v0.5.1 // indirect
github.com/imdario/mergo v0.3.12 // indirect
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/jpillora/s3 v1.1.4 // indirect
github.com/json-iterator/go v1.1.11 // indirect
github.com/kevinburke/ssh_config v0.0.0-20201106050909-4977a11b4351 // indirect
github.com/mattn/go-colorable v0.1.12 // indirect
github.com/mattn/go-isatty v0.0.14 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.1 // indirect
github.com/pkg/diff v0.0.0-20200914180035-5b29258ca4f7 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/xanzy/ssh-agent v0.3.0 // indirect
github.com/yusufpapurcu/wmi v1.2.2 // indirect
go.opencensus.io v0.23.0 // indirect
Expand All @@ -110,4 +115,5 @@ require (
google.golang.org/grpc v1.45.0 // indirect
gopkg.in/ini.v1 v1.66.2 // indirect
gopkg.in/warnings.v0 v0.1.2 // indirect
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
)
6 changes: 6 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,8 @@ github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/ad
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
github.com/h2non/filetype v1.1.3 h1:FKkx9QbD7HR/zjK1Ia5XiBsq9zdLi5Kf3zGyFTAFkGg=
github.com/h2non/filetype v1.1.3/go.mod h1:319b3zT68BvV+WRj7cwy856M2ehB3HqNOt6sy1HndBY=
github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542 h1:2VTzZjLZBgl62/EtslCrtky5vbi9dd7HrQPQIx6wqiw=
github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542/go.mod h1:Ow0tF8D4Kplbc8s8sSb3V2oUCygFHVp8gC3Dn6U4MNI=
github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBtguAZLlVdkD9Q=
github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
Expand Down Expand Up @@ -391,6 +393,8 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJ
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32 h1:W6apQkHrMkS0Muv8G/TipAy/FJl/rCYT0+EuS8+Z0z4=
github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32/go.mod h1:9wM+0iRr9ahx58uYLpLIr5fm8diHn0JbqRycJi6w0Ms=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
Expand Down Expand Up @@ -930,6 +934,8 @@ gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/h2non/gock.v1 v1.1.2 h1:jBbHXgGBK/AoPVfJh5x4r/WxIrElvbLel8TCZkkZJoY=
gopkg.in/h2non/gock.v1 v1.1.2/go.mod h1:n7UGz/ckNChHiK05rDoiC4MYSunEC/lyaUm2WWaDva0=
gopkg.in/ini.v1 v1.62.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/ini.v1 v1.66.2 h1:XfR1dOYubytKy4Shzc2LHrrGhU0lDCfDGG1yLPmpgsI=
gopkg.in/ini.v1 v1.66.2/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
Expand Down
155 changes: 106 additions & 49 deletions pkg/sources/github/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ import (
"strconv"
"strings"
"sync"
"sync/atomic"
"time"

"github.com/bradleyfalzon/ghinstallation/v2"
"github.com/go-errors/errors"
gogit "github.com/go-git/go-git/v5"
"github.com/google/go-github/v42/github"
"github.com/sirupsen/logrus"
log "github.com/sirupsen/logrus"
"golang.org/x/oauth2"
"golang.org/x/sync/semaphore"
Expand All @@ -37,8 +37,8 @@ import (

type Source struct {
name string
sourceId int64
jobId int64
sourceID int64
jobID int64
verify bool
repos []string
orgs []string
Expand All @@ -64,11 +64,11 @@ func (s *Source) Type() sourcespb.SourceType {
}

func (s *Source) SourceID() int64 {
return s.sourceId
return s.sourceID
}

func (s *Source) JobID() int64 {
return s.jobId
return s.jobID
}

func (s *Source) Token(ctx context.Context, installationClient *github.Client) (string, error) {
Expand All @@ -94,13 +94,13 @@ func (s *Source) Token(ctx context.Context, installationClient *github.Client) (
}

// Init returns an initialized GitHub source.
func (s *Source) Init(aCtx context.Context, name string, jobId, sourceId int64, verify bool, connection *anypb.Any, concurrency int) error {
func (s *Source) Init(aCtx context.Context, name string, jobID, sourceID int64, verify bool, connection *anypb.Any, concurrency int) error {
s.log = log.WithField("source", s.Type()).WithField("name", name)

s.aCtx = aCtx
s.name = name
s.sourceId = sourceId
s.jobId = jobId
s.sourceID = sourceID
s.jobID = jobID
s.verify = verify
s.jobSem = semaphore.NewWeighted(int64(concurrency))

Expand Down Expand Up @@ -333,16 +333,27 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk) err
}

func (s *Source) scan(ctx context.Context, installationClient *github.Client, chunksChan chan *sources.Chunk) error {
scanned := 0
var scanned uint64

log.Debugf("Found %v total repos to scan", len(s.repos))
wg := sync.WaitGroup{}
errs := []error{}
var errsMut sync.Mutex
errs := make(chan error, 1)
reportErr := func(err error) {
// save the error if there's room, otherwise log and drop it
select {
case errs <- err:
default:
log.WithError(err).Warn("dropping error")
}
}

Comment on lines +340 to +349
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't this only ever keep the first error, which I assume would be non fatal if this is called more than once?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I believe that was the previous behavior as well? Or rather, the previous behavior saved all errors in a slice and only returned the first if there was one.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm the previous behavior may not have been correct then. What do you think?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm.. the only advantage I see of returning all the errors is if we want to determine control flow from the specific errors or display them to the user. In its current state we log each error, so we still have the information in the logs.

This function is essentially returning whether 1 or more errors occurred, which I think is fine for the Chunks method.

for i, repoURL := range s.repos {
if err := s.jobSem.Acquire(ctx, 1); err != nil {
// Acquire blocks until it can acquire the semaphore or returns an
// error if the context is finished
log.WithError(err).Debug("could not acquire semaphore")
continue
reportErr(err)
break
}
wg.Add(1)
go func(ctx context.Context, repoURL string, i int) {
Expand All @@ -367,10 +378,7 @@ func (s *Source) scan(ctx context.Context, installationClient *github.Client, ch
var token string
token, err = s.Token(ctx, installationClient)
if err != nil {
// TODO: maybe we can use a channel here
errsMut.Lock()
errs = append(errs, err)
errsMut.Unlock()
reportErr(err)
return
}
path, repo, err = git.CloneRepoUsingToken(token, repoURL, "clone")
Expand All @@ -391,20 +399,20 @@ func (s *Source) scan(ctx context.Context, installationClient *github.Client, ch
if err != nil {
log.WithError(err).Errorf("unable to scan repo, continuing")
}
// TODO: use atomic library
scanned++
logrus.Debugf("scanned %d/%d repos", scanned, len(s.repos))
atomic.AddUint64(&scanned, 1)
log.Debugf("scanned %d/%d repos", scanned, len(s.repos))
}(ctx, repoURL, i)
}

wg.Wait()

// This only returns first error which is what we did prior to concurrency
if len(errs) > 0 {
return errs[0]
select {
case err := <-errs:
return err
default:
return nil
}

return nil
}

// handleRateLimit returns true if a rate limit was handled
Expand Down Expand Up @@ -443,7 +451,8 @@ func handleRateLimit(errIn error, res *github.Response) bool {
return true
}

func (s *Source) addReposByOrg(ctx context.Context, apiClient *github.Client, org string) error {
func (s *Source) getReposByOrg(ctx context.Context, apiClient *github.Client, org string) ([]string, error) {
repos := []string{}
opts := &github.RepositoryListByOrgOptions{
ListOptions: github.ListOptions{
PerPage: 100,
Expand All @@ -459,7 +468,7 @@ func (s *Source) addReposByOrg(ctx context.Context, apiClient *github.Client, or
continue
}
if err != nil {
return fmt.Errorf("could not list repos for org %s: %w", org, err)
return nil, fmt.Errorf("could not list repos for org %s: %w", org, err)
}
if len(someRepos) == 0 {
break
Expand All @@ -472,18 +481,31 @@ func (s *Source) addReposByOrg(ctx context.Context, apiClient *github.Client, or
continue
}
}
common.AddStringSliceItem(r.GetCloneURL(), &s.repos)
repos = append(repos, r.GetCloneURL())
}
if res.NextPage == 0 {
break
}
opts.Page = res.NextPage
}
log.WithField("org", org).Debugf("Found %d repos (%d forks)", numRepos, numForks)
return repos, nil
}

func (s *Source) addReposByOrg(ctx context.Context, apiClient *github.Client, org string) error {
repos, err := s.getReposByOrg(ctx, apiClient, org)
if err != nil {
return err
}
// add the repos to the set of repos
for _, repo := range repos {
common.AddStringSliceItem(repo, &s.repos)
}
return nil
}

func (s *Source) addReposByUser(ctx context.Context, apiClient *github.Client, user string) error {
func (s *Source) getReposByUser(ctx context.Context, apiClient *github.Client, user string) ([]string, error) {
repos := []string{}
opts := &github.RepositoryListOptions{
ListOptions: github.ListOptions{
PerPage: 50,
Expand All @@ -498,23 +520,36 @@ func (s *Source) addReposByUser(ctx context.Context, apiClient *github.Client, u
continue
}
if err != nil {
return fmt.Errorf("could not list repos for user %s: %w", user, err)
return nil, fmt.Errorf("could not list repos for user %s: %w", user, err)
}
for _, r := range someRepos {
if r.GetFork() && !s.conn.IncludeForks {
continue
}
common.AddStringSliceItem(r.GetCloneURL(), &s.repos)
repos = append(repos, r.GetCloneURL())
}
if res.NextPage == 0 {
break
}
opts.Page = res.NextPage
}
return repos, nil
}

func (s *Source) addReposByUser(ctx context.Context, apiClient *github.Client, user string) error {
repos, err := s.getReposByUser(ctx, apiClient, user)
if err != nil {
return err
}
// add the repos to the set of repos
for _, repo := range repos {
common.AddStringSliceItem(repo, &s.repos)
}
return nil
}

func (s *Source) addGistsByUser(ctx context.Context, apiClient *github.Client, user string) {
func (s *Source) getGistsByUser(ctx context.Context, apiClient *github.Client, user string) ([]string, error) {
gistURLs := []string{}
gistOpts := &github.GistListOptions{}
for {
gists, resp, err := apiClient.Gists.List(ctx, user, gistOpts)
Expand All @@ -525,21 +560,33 @@ func (s *Source) addGistsByUser(ctx context.Context, apiClient *github.Client, u
continue
}
if err != nil {
log.WithError(err).Warnf("Could not get gists for user %s", user)
log.WithError(err).Warnf("could not list repos for user %s", user)
return nil, fmt.Errorf("could not list repos for user %s: %w", user, err)
}
for _, gist := range gists {
common.AddStringSliceItem(gist.GetGitPullURL(), &s.repos)
gistURLs = append(gistURLs, gist.GetGitPullURL())
}
if resp == nil || resp.NextPage == 0 {
break
}
gistOpts.Page = resp.NextPage
}
return
return gistURLs, nil
}

func (s *Source) addMembersByApp(ctx context.Context, installationClient *github.Client, apiClient *github.Client) error {
func (s *Source) addGistsByUser(ctx context.Context, apiClient *github.Client, user string) error {
gists, err := s.getGistsByUser(ctx, apiClient, user)
if err != nil {
return err
}
// add the gists to the set of repos
for _, gist := range gists {
common.AddStringSliceItem(gist, &s.repos)
}
return nil
}

func (s *Source) addMembersByApp(ctx context.Context, installationClient *github.Client, apiClient *github.Client) error {
opts := &github.ListOptions{
PerPage: 500,
}
Expand Down Expand Up @@ -650,24 +697,34 @@ func (s *Source) addOrgsByUser(ctx context.Context, apiClient *github.Client, us

func (s *Source) normalizeRepos(ctx context.Context, apiClient *github.Client) {
// TODO: Add check/fix for repos that are missing scheme
repoIter := make([]string, len(s.repos))
copy(repoIter, s.repos)
for _, repo := range repoIter {
if parts := strings.Split(repo, "/"); len(parts) == 1 {
origSources := len(s.repos)
s.addGistsByUser(ctx, apiClient, repo)
if err := s.addReposByUser(ctx, apiClient, repo); err != nil {
log.WithError(err).Error("error fetching repos by user")
}
if origSources != len(s.repos) {
common.RemoveStringSliceItem(repo, &s.repos)
normalizedRepos := map[string]struct{}{}
for _, repo := range s.repos {
// if there's a '/', assume it's a URL and try to normalize it
if strings.ContainsRune(repo, '/') {
repoNormalized, err := giturl.NormalizeGithubRepo(repo)
if err != nil {
log.WithError(err).Warnf("Repo not in expected format: %s", repo)
continue
}
normalizedRepos[repoNormalized] = struct{}{}
continue
}
repoNormalized, err := giturl.NormalizeGithubRepo(repo)
if err != nil {
log.WithError(err).Warnf("Repo not in expected format: %s", repo)
// otherwise, assume it's a user and enumerate repositories and gists
if repos, err := s.getReposByUser(ctx, apiClient, repo); err == nil {
for _, repo := range repos {
normalizedRepos[repo] = struct{}{}
}
}
if gists, err := s.getGistsByUser(ctx, apiClient, repo); err == nil {
for _, gist := range gists {
normalizedRepos[gist] = struct{}{}
}
}
common.AddStringSliceItem(repoNormalized, &s.repos)
}

// replace s.repos
s.repos = s.repos[:0]
for key := range normalizedRepos {
s.repos = append(s.repos, key)
}
}
Loading