Skip to content

Commit

Permalink
Move GitHub integration tests behind a build flag and add unit tests (#…
Browse files Browse the repository at this point in the history
…527)

* Add unit tests and refactor some logic

* Move integration tests to a separate file behind a build flag

* Fix bugs in normalizeRepos

* Address lint errors

* Sort slices before comparing because order doesn't matter
  • Loading branch information
mcastorina authored May 9, 2022
1 parent 0179258 commit edaf1e1
Show file tree
Hide file tree
Showing 5 changed files with 995 additions and 584 deletions.
8 changes: 7 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ require (
github.com/joho/godotenv v1.4.0
github.com/jpillora/overseer v1.1.6
github.com/kylelemons/godebug v1.1.0
github.com/mattn/go-colorable v0.1.12
github.com/paulbellamy/ratecounter v0.2.0
github.com/pkg/errors v0.9.1
github.com/razorpay/razorpay-go v0.0.0-20210728161131-0341409a6ab2
github.com/rs/zerolog v1.26.1
github.com/sergi/go-diff v1.2.0
github.com/sirupsen/logrus v1.8.1
github.com/stretchr/testify v1.7.0
github.com/tailscale/depaware v0.0.0-20210622194025-720c4b409502
github.com/xanzy/go-gitlab v0.64.0
github.com/zricethezav/gitleaks/v8 v8.5.2
Expand All @@ -48,6 +48,7 @@ require (
google.golang.org/genproto v0.0.0-20220405205423-9d709892a2bf
google.golang.org/protobuf v1.28.0
gopkg.in/alecthomas/kingpin.v2 v2.2.6
gopkg.in/h2non/gock.v1 v1.1.2
)

require (
Expand All @@ -71,6 +72,7 @@ require (
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.4 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.4 // indirect
github.com/aws/smithy-go v1.11.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dimchansky/utfbom v1.1.1 // indirect
github.com/emirpasic/gods v1.12.0 // indirect
github.com/go-git/gcfg v1.5.0 // indirect
Expand All @@ -84,18 +86,21 @@ require (
github.com/google/go-querystring v1.1.0 // indirect
github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect
github.com/googleapis/gax-go/v2 v2.2.0 // indirect
github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542 // indirect
github.com/hashicorp/go-cleanhttp v0.5.1 // indirect
github.com/imdario/mergo v0.3.12 // indirect
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/jpillora/s3 v1.1.4 // indirect
github.com/json-iterator/go v1.1.11 // indirect
github.com/kevinburke/ssh_config v0.0.0-20201106050909-4977a11b4351 // indirect
github.com/mattn/go-colorable v0.1.12 // indirect
github.com/mattn/go-isatty v0.0.14 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.1 // indirect
github.com/pkg/diff v0.0.0-20200914180035-5b29258ca4f7 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/xanzy/ssh-agent v0.3.0 // indirect
github.com/yusufpapurcu/wmi v1.2.2 // indirect
go.opencensus.io v0.23.0 // indirect
Expand All @@ -110,4 +115,5 @@ require (
google.golang.org/grpc v1.45.0 // indirect
gopkg.in/ini.v1 v1.66.2 // indirect
gopkg.in/warnings.v0 v0.1.2 // indirect
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
)
6 changes: 6 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,8 @@ github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/ad
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
github.com/h2non/filetype v1.1.3 h1:FKkx9QbD7HR/zjK1Ia5XiBsq9zdLi5Kf3zGyFTAFkGg=
github.com/h2non/filetype v1.1.3/go.mod h1:319b3zT68BvV+WRj7cwy856M2ehB3HqNOt6sy1HndBY=
github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542 h1:2VTzZjLZBgl62/EtslCrtky5vbi9dd7HrQPQIx6wqiw=
github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542/go.mod h1:Ow0tF8D4Kplbc8s8sSb3V2oUCygFHVp8gC3Dn6U4MNI=
github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBtguAZLlVdkD9Q=
github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
Expand Down Expand Up @@ -391,6 +393,8 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJ
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32 h1:W6apQkHrMkS0Muv8G/TipAy/FJl/rCYT0+EuS8+Z0z4=
github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32/go.mod h1:9wM+0iRr9ahx58uYLpLIr5fm8diHn0JbqRycJi6w0Ms=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
Expand Down Expand Up @@ -930,6 +934,8 @@ gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/h2non/gock.v1 v1.1.2 h1:jBbHXgGBK/AoPVfJh5x4r/WxIrElvbLel8TCZkkZJoY=
gopkg.in/h2non/gock.v1 v1.1.2/go.mod h1:n7UGz/ckNChHiK05rDoiC4MYSunEC/lyaUm2WWaDva0=
gopkg.in/ini.v1 v1.62.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/ini.v1 v1.66.2 h1:XfR1dOYubytKy4Shzc2LHrrGhU0lDCfDGG1yLPmpgsI=
gopkg.in/ini.v1 v1.66.2/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
Expand Down
155 changes: 106 additions & 49 deletions pkg/sources/github/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ import (
"strconv"
"strings"
"sync"
"sync/atomic"
"time"

"github.com/bradleyfalzon/ghinstallation/v2"
"github.com/go-errors/errors"
gogit "github.com/go-git/go-git/v5"
"github.com/google/go-github/v42/github"
"github.com/sirupsen/logrus"
log "github.com/sirupsen/logrus"
"golang.org/x/oauth2"
"golang.org/x/sync/semaphore"
Expand All @@ -37,8 +37,8 @@ import (

type Source struct {
name string
sourceId int64
jobId int64
sourceID int64
jobID int64
verify bool
repos []string
orgs []string
Expand All @@ -64,11 +64,11 @@ func (s *Source) Type() sourcespb.SourceType {
}

func (s *Source) SourceID() int64 {
return s.sourceId
return s.sourceID
}

func (s *Source) JobID() int64 {
return s.jobId
return s.jobID
}

func (s *Source) Token(ctx context.Context, installationClient *github.Client) (string, error) {
Expand All @@ -94,13 +94,13 @@ func (s *Source) Token(ctx context.Context, installationClient *github.Client) (
}

// Init returns an initialized GitHub source.
func (s *Source) Init(aCtx context.Context, name string, jobId, sourceId int64, verify bool, connection *anypb.Any, concurrency int) error {
func (s *Source) Init(aCtx context.Context, name string, jobID, sourceID int64, verify bool, connection *anypb.Any, concurrency int) error {
s.log = log.WithField("source", s.Type()).WithField("name", name)

s.aCtx = aCtx
s.name = name
s.sourceId = sourceId
s.jobId = jobId
s.sourceID = sourceID
s.jobID = jobID
s.verify = verify
s.jobSem = semaphore.NewWeighted(int64(concurrency))

Expand Down Expand Up @@ -333,16 +333,27 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk) err
}

func (s *Source) scan(ctx context.Context, installationClient *github.Client, chunksChan chan *sources.Chunk) error {
scanned := 0
var scanned uint64

log.Debugf("Found %v total repos to scan", len(s.repos))
wg := sync.WaitGroup{}
errs := []error{}
var errsMut sync.Mutex
errs := make(chan error, 1)
reportErr := func(err error) {
// save the error if there's room, otherwise log and drop it
select {
case errs <- err:
default:
log.WithError(err).Warn("dropping error")
}
}

for i, repoURL := range s.repos {
if err := s.jobSem.Acquire(ctx, 1); err != nil {
// Acquire blocks until it can acquire the semaphore or returns an
// error if the context is finished
log.WithError(err).Debug("could not acquire semaphore")
continue
reportErr(err)
break
}
wg.Add(1)
go func(ctx context.Context, repoURL string, i int) {
Expand All @@ -367,10 +378,7 @@ func (s *Source) scan(ctx context.Context, installationClient *github.Client, ch
var token string
token, err = s.Token(ctx, installationClient)
if err != nil {
// TODO: maybe we can use a channel here
errsMut.Lock()
errs = append(errs, err)
errsMut.Unlock()
reportErr(err)
return
}
path, repo, err = git.CloneRepoUsingToken(token, repoURL, "clone")
Expand All @@ -391,20 +399,20 @@ func (s *Source) scan(ctx context.Context, installationClient *github.Client, ch
if err != nil {
log.WithError(err).Errorf("unable to scan repo, continuing")
}
// TODO: use atomic library
scanned++
logrus.Debugf("scanned %d/%d repos", scanned, len(s.repos))
atomic.AddUint64(&scanned, 1)
log.Debugf("scanned %d/%d repos", scanned, len(s.repos))
}(ctx, repoURL, i)
}

wg.Wait()

// This only returns first error which is what we did prior to concurrency
if len(errs) > 0 {
return errs[0]
select {
case err := <-errs:
return err
default:
return nil
}

return nil
}

// handleRateLimit returns true if a rate limit was handled
Expand Down Expand Up @@ -443,7 +451,8 @@ func handleRateLimit(errIn error, res *github.Response) bool {
return true
}

func (s *Source) addReposByOrg(ctx context.Context, apiClient *github.Client, org string) error {
func (s *Source) getReposByOrg(ctx context.Context, apiClient *github.Client, org string) ([]string, error) {
repos := []string{}
opts := &github.RepositoryListByOrgOptions{
ListOptions: github.ListOptions{
PerPage: 100,
Expand All @@ -459,7 +468,7 @@ func (s *Source) addReposByOrg(ctx context.Context, apiClient *github.Client, or
continue
}
if err != nil {
return fmt.Errorf("could not list repos for org %s: %w", org, err)
return nil, fmt.Errorf("could not list repos for org %s: %w", org, err)
}
if len(someRepos) == 0 {
break
Expand All @@ -472,18 +481,31 @@ func (s *Source) addReposByOrg(ctx context.Context, apiClient *github.Client, or
continue
}
}
common.AddStringSliceItem(r.GetCloneURL(), &s.repos)
repos = append(repos, r.GetCloneURL())
}
if res.NextPage == 0 {
break
}
opts.Page = res.NextPage
}
log.WithField("org", org).Debugf("Found %d repos (%d forks)", numRepos, numForks)
return repos, nil
}

func (s *Source) addReposByOrg(ctx context.Context, apiClient *github.Client, org string) error {
repos, err := s.getReposByOrg(ctx, apiClient, org)
if err != nil {
return err
}
// add the repos to the set of repos
for _, repo := range repos {
common.AddStringSliceItem(repo, &s.repos)
}
return nil
}

func (s *Source) addReposByUser(ctx context.Context, apiClient *github.Client, user string) error {
func (s *Source) getReposByUser(ctx context.Context, apiClient *github.Client, user string) ([]string, error) {
repos := []string{}
opts := &github.RepositoryListOptions{
ListOptions: github.ListOptions{
PerPage: 50,
Expand All @@ -498,23 +520,36 @@ func (s *Source) addReposByUser(ctx context.Context, apiClient *github.Client, u
continue
}
if err != nil {
return fmt.Errorf("could not list repos for user %s: %w", user, err)
return nil, fmt.Errorf("could not list repos for user %s: %w", user, err)
}
for _, r := range someRepos {
if r.GetFork() && !s.conn.IncludeForks {
continue
}
common.AddStringSliceItem(r.GetCloneURL(), &s.repos)
repos = append(repos, r.GetCloneURL())
}
if res.NextPage == 0 {
break
}
opts.Page = res.NextPage
}
return repos, nil
}

func (s *Source) addReposByUser(ctx context.Context, apiClient *github.Client, user string) error {
repos, err := s.getReposByUser(ctx, apiClient, user)
if err != nil {
return err
}
// add the repos to the set of repos
for _, repo := range repos {
common.AddStringSliceItem(repo, &s.repos)
}
return nil
}

func (s *Source) addGistsByUser(ctx context.Context, apiClient *github.Client, user string) {
func (s *Source) getGistsByUser(ctx context.Context, apiClient *github.Client, user string) ([]string, error) {
gistURLs := []string{}
gistOpts := &github.GistListOptions{}
for {
gists, resp, err := apiClient.Gists.List(ctx, user, gistOpts)
Expand All @@ -525,21 +560,33 @@ func (s *Source) addGistsByUser(ctx context.Context, apiClient *github.Client, u
continue
}
if err != nil {
log.WithError(err).Warnf("Could not get gists for user %s", user)
log.WithError(err).Warnf("could not list repos for user %s", user)
return nil, fmt.Errorf("could not list repos for user %s: %w", user, err)
}
for _, gist := range gists {
common.AddStringSliceItem(gist.GetGitPullURL(), &s.repos)
gistURLs = append(gistURLs, gist.GetGitPullURL())
}
if resp == nil || resp.NextPage == 0 {
break
}
gistOpts.Page = resp.NextPage
}
return
return gistURLs, nil
}

func (s *Source) addMembersByApp(ctx context.Context, installationClient *github.Client, apiClient *github.Client) error {
func (s *Source) addGistsByUser(ctx context.Context, apiClient *github.Client, user string) error {
gists, err := s.getGistsByUser(ctx, apiClient, user)
if err != nil {
return err
}
// add the gists to the set of repos
for _, gist := range gists {
common.AddStringSliceItem(gist, &s.repos)
}
return nil
}

func (s *Source) addMembersByApp(ctx context.Context, installationClient *github.Client, apiClient *github.Client) error {
opts := &github.ListOptions{
PerPage: 500,
}
Expand Down Expand Up @@ -650,24 +697,34 @@ func (s *Source) addOrgsByUser(ctx context.Context, apiClient *github.Client, us

func (s *Source) normalizeRepos(ctx context.Context, apiClient *github.Client) {
// TODO: Add check/fix for repos that are missing scheme
repoIter := make([]string, len(s.repos))
copy(repoIter, s.repos)
for _, repo := range repoIter {
if parts := strings.Split(repo, "/"); len(parts) == 1 {
origSources := len(s.repos)
s.addGistsByUser(ctx, apiClient, repo)
if err := s.addReposByUser(ctx, apiClient, repo); err != nil {
log.WithError(err).Error("error fetching repos by user")
}
if origSources != len(s.repos) {
common.RemoveStringSliceItem(repo, &s.repos)
normalizedRepos := map[string]struct{}{}
for _, repo := range s.repos {
// if there's a '/', assume it's a URL and try to normalize it
if strings.ContainsRune(repo, '/') {
repoNormalized, err := giturl.NormalizeGithubRepo(repo)
if err != nil {
log.WithError(err).Warnf("Repo not in expected format: %s", repo)
continue
}
normalizedRepos[repoNormalized] = struct{}{}
continue
}
repoNormalized, err := giturl.NormalizeGithubRepo(repo)
if err != nil {
log.WithError(err).Warnf("Repo not in expected format: %s", repo)
// otherwise, assume it's a user and enumerate repositories and gists
if repos, err := s.getReposByUser(ctx, apiClient, repo); err == nil {
for _, repo := range repos {
normalizedRepos[repo] = struct{}{}
}
}
if gists, err := s.getGistsByUser(ctx, apiClient, repo); err == nil {
for _, gist := range gists {
normalizedRepos[gist] = struct{}{}
}
}
common.AddStringSliceItem(repoNormalized, &s.repos)
}

// replace s.repos
s.repos = s.repos[:0]
for key := range normalizedRepos {
s.repos = append(s.repos, key)
}
}
Loading

0 comments on commit edaf1e1

Please sign in to comment.