From a024ba54f77ba4abc74e5cc4ecfebb14ec2100e9 Mon Sep 17 00:00:00 2001 From: James Telfer <792299+jamestelfer@users.noreply.github.com> Date: Thu, 26 Oct 2023 00:07:39 +1100 Subject: [PATCH] feat: add code-search repository selection flag (GitHub only) Introduces the `--code-search` flag that allows the provision of GitHub Code Search terms (legacy) to define the selection of repositories to be visited. The repository list will be the unique set of repositories referenced in the code search results. --- cmd/platform.go | 7 +++- internal/scm/github/github.go | 77 +++++++++++++++++++++++++++++++++++ internal/scm/github/util.go | 14 +++++++ 3 files changed, 96 insertions(+), 2 deletions(-) diff --git a/cmd/platform.go b/cmd/platform.go index 724bf717..8ee1e743 100644 --- a/cmd/platform.go +++ b/cmd/platform.go @@ -24,6 +24,7 @@ func configurePlatform(cmd *cobra.Command) { flags.StringP("username", "u", "", "The Bitbucket server username.") flags.StringP("token", "T", "", "The personal access token for the targeting platform. Can also be set using the GITHUB_TOKEN/GITLAB_TOKEN/GITEA_TOKEN/BITBUCKET_SERVER_TOKEN environment variable.") + flags.StringP("code-search", "", "", "Use a code search to find a set of repositories to target. Repeated results from a given repository will be ignored.") flags.StringSliceP("org", "O", nil, "The name of a GitHub organization. All repositories in that organization will be used.") flags.StringSliceP("group", "G", nil, "The name of a GitLab organization. All repositories in that group will be used.") flags.StringSliceP("user", "U", nil, "The name of a user. All repositories owned by that user will be used.") @@ -120,6 +121,7 @@ func getVersionController(flag *flag.FlagSet, verifyFlags bool, readOnly bool) ( func createGithubClient(flag *flag.FlagSet, verifyFlags bool, readOnly bool) (multigitter.VersionController, error) { gitBaseURL, _ := flag.GetString("base-url") + codeSearch, _ := flag.GetString("code-search") orgs, _ := flag.GetStringSlice("org") users, _ := flag.GetStringSlice("user") repos, _ := flag.GetStringSlice("repo") @@ -130,8 +132,8 @@ func createGithubClient(flag *flag.FlagSet, verifyFlags bool, readOnly bool) (mu sshAuth, _ := flag.GetBool("ssh-auth") skipForks, _ := flag.GetBool("skip-forks") - if verifyFlags && len(orgs) == 0 && len(users) == 0 && len(repos) == 0 && repoSearch == "" { - return nil, errors.New("no organization, user, repo or repo-search set") + if verifyFlags && len(orgs) == 0 && len(users) == 0 && len(repos) == 0 && repoSearch == "" && codeSearch == "" { + return nil, errors.New("no organization, user, repo, repo-search or code-search set") } token, err := getToken(flag) @@ -166,6 +168,7 @@ func createGithubClient(flag *flag.FlagSet, verifyFlags bool, readOnly bool) (mu BaseURL: gitBaseURL, TransportMiddleware: http.NewLoggingRoundTripper, RepoListing: github.RepositoryListing{ + CodeSearch: codeSearch, Organizations: orgs, Users: users, Repositories: repoRefs, diff --git a/internal/scm/github/github.go b/internal/scm/github/github.go index 12edd677..cd2511f1 100755 --- a/internal/scm/github/github.go +++ b/internal/scm/github/github.go @@ -105,6 +105,7 @@ type Github struct { // RepositoryListing contains information about which repositories that should be fetched type RepositoryListing struct { + CodeSearch string Organizations []string Users []string Repositories []RepositoryReference @@ -222,6 +223,14 @@ func (g *Github) getRepositories(ctx context.Context) ([]*github.Repository, err allRepos = append(allRepos, repos...) } + if len(g.CodeSearch) > 0 { + repos, err := g.getCodeSearchRepositories(ctx, g.CodeSearch) + if err != nil { + return nil, errors.Wrapf(err, "could not get code search results for '%s'", g.CodeSearch) + } + allRepos = append(allRepos, repos...) + } + // Remove duplicate repos repoMap := map[string]*github.Repository{} for _, repo := range allRepos { @@ -333,6 +342,74 @@ func (g *Github) getSearchRepositories(ctx context.Context, search string) ([]*g return repos, nil } +func (g *Github) getCodeSearchRepositories(ctx context.Context, search string) ([]*github.Repository, error) { + resultRepos := make(map[string]RepositoryReference) + + i := 1 + for { + rr, _, err := retry(ctx, func() ([]*github.CodeResult, *github.Response, error) { + // Include forks in the search, same as repository searches + query := "fork:true " + search + rr, resp, err := g.ghClient.Search.Code(ctx, query, &github.SearchOptions{ + ListOptions: github.ListOptions{ + Page: i, + PerPage: 100, + }, + }) + + if err != nil { + return nil, nil, err + } + + if rr.IncompleteResults != nil && *rr.IncompleteResults { + // can occur when search times out on the server: for now, fail instead + // of handling the issue + return nil, nil, fmt.Errorf("search results incomplete") + } + + return rr.CodeResults, resp, nil + }) + + if err != nil { + return nil, err + } + + for _, r := range rr { + repo := r.Repository + + resultRepos[repo.GetFullName()] = RepositoryReference{ + OwnerName: repo.GetOwner().GetLogin(), + Name: repo.GetName(), + } + } + + if len(rr) != 100 { + break + } + i++ + } + + // Code search does not return full details (like permissions). So for each + // repo discovered, we have to query it again. + + repoNames := mapValues(resultRepos) + return g.getAllRepositories(ctx, repoNames) +} + +func (g *Github) getAllRepositories(ctx context.Context, repoRefs []RepositoryReference) ([]*github.Repository, error) { + var repos []*github.Repository + + for _, ref := range repoRefs { + r, err := g.getRepository(ctx, ref) + if err != nil { + return nil, err + } + repos = append(repos, r) + } + + return repos, nil +} + func (g *Github) getRepository(ctx context.Context, repoRef RepositoryReference) (*github.Repository, error) { repo, _, err := retry(ctx, func() (*github.Repository, *github.Response, error) { return g.ghClient.Repositories.Get(ctx, repoRef.OwnerName, repoRef.Name) diff --git a/internal/scm/github/util.go b/internal/scm/github/util.go index 2ef96b11..110c5402 100644 --- a/internal/scm/github/util.go +++ b/internal/scm/github/util.go @@ -44,3 +44,17 @@ func chunkSlice[T any](stack []T, chunkSize int) [][]T { return append(chunks, stack) } + +// mapValues returns a new array containing all the values of the supplied map, +// in iteration (i.e. non-deterministic) order. +func mapValues[K comparable, V any](source map[K]V) []V { + values := make([]V, len(source)) + + i := 0 + for _, v := range source { + values[i] = v + i++ + } + + return values +}