diff --git a/cmd/platform.go b/cmd/platform.go index 724bf717..8ee1e743 100644 --- a/cmd/platform.go +++ b/cmd/platform.go @@ -24,6 +24,7 @@ func configurePlatform(cmd *cobra.Command) { flags.StringP("username", "u", "", "The Bitbucket server username.") flags.StringP("token", "T", "", "The personal access token for the targeting platform. Can also be set using the GITHUB_TOKEN/GITLAB_TOKEN/GITEA_TOKEN/BITBUCKET_SERVER_TOKEN environment variable.") + flags.StringP("code-search", "", "", "Use a code search to find a set of repositories to target. Repeated results from a given repository will be ignored.") flags.StringSliceP("org", "O", nil, "The name of a GitHub organization. All repositories in that organization will be used.") flags.StringSliceP("group", "G", nil, "The name of a GitLab organization. All repositories in that group will be used.") flags.StringSliceP("user", "U", nil, "The name of a user. All repositories owned by that user will be used.") @@ -120,6 +121,7 @@ func getVersionController(flag *flag.FlagSet, verifyFlags bool, readOnly bool) ( func createGithubClient(flag *flag.FlagSet, verifyFlags bool, readOnly bool) (multigitter.VersionController, error) { gitBaseURL, _ := flag.GetString("base-url") + codeSearch, _ := flag.GetString("code-search") orgs, _ := flag.GetStringSlice("org") users, _ := flag.GetStringSlice("user") repos, _ := flag.GetStringSlice("repo") @@ -130,8 +132,8 @@ func createGithubClient(flag *flag.FlagSet, verifyFlags bool, readOnly bool) (mu sshAuth, _ := flag.GetBool("ssh-auth") skipForks, _ := flag.GetBool("skip-forks") - if verifyFlags && len(orgs) == 0 && len(users) == 0 && len(repos) == 0 && repoSearch == "" { - return nil, errors.New("no organization, user, repo or repo-search set") + if verifyFlags && len(orgs) == 0 && len(users) == 0 && len(repos) == 0 && repoSearch == "" && codeSearch == "" { + return nil, errors.New("no organization, user, repo, repo-search or code-search set") } token, err := getToken(flag) @@ -166,6 +168,7 @@ func createGithubClient(flag *flag.FlagSet, verifyFlags bool, readOnly bool) (mu BaseURL: gitBaseURL, TransportMiddleware: http.NewLoggingRoundTripper, RepoListing: github.RepositoryListing{ + CodeSearch: codeSearch, Organizations: orgs, Users: users, Repositories: repoRefs, diff --git a/internal/scm/github/github.go b/internal/scm/github/github.go index 12edd677..cd2511f1 100755 --- a/internal/scm/github/github.go +++ b/internal/scm/github/github.go @@ -105,6 +105,7 @@ type Github struct { // RepositoryListing contains information about which repositories that should be fetched type RepositoryListing struct { + CodeSearch string Organizations []string Users []string Repositories []RepositoryReference @@ -222,6 +223,14 @@ func (g *Github) getRepositories(ctx context.Context) ([]*github.Repository, err allRepos = append(allRepos, repos...) } + if len(g.CodeSearch) > 0 { + repos, err := g.getCodeSearchRepositories(ctx, g.CodeSearch) + if err != nil { + return nil, errors.Wrapf(err, "could not get code search results for '%s'", g.CodeSearch) + } + allRepos = append(allRepos, repos...) + } + // Remove duplicate repos repoMap := map[string]*github.Repository{} for _, repo := range allRepos { @@ -333,6 +342,74 @@ func (g *Github) getSearchRepositories(ctx context.Context, search string) ([]*g return repos, nil } +func (g *Github) getCodeSearchRepositories(ctx context.Context, search string) ([]*github.Repository, error) { + resultRepos := make(map[string]RepositoryReference) + + i := 1 + for { + rr, _, err := retry(ctx, func() ([]*github.CodeResult, *github.Response, error) { + // Include forks in the search, same as repository searches + query := "fork:true " + search + rr, resp, err := g.ghClient.Search.Code(ctx, query, &github.SearchOptions{ + ListOptions: github.ListOptions{ + Page: i, + PerPage: 100, + }, + }) + + if err != nil { + return nil, nil, err + } + + if rr.IncompleteResults != nil && *rr.IncompleteResults { + // can occur when search times out on the server: for now, fail instead + // of handling the issue + return nil, nil, fmt.Errorf("search results incomplete") + } + + return rr.CodeResults, resp, nil + }) + + if err != nil { + return nil, err + } + + for _, r := range rr { + repo := r.Repository + + resultRepos[repo.GetFullName()] = RepositoryReference{ + OwnerName: repo.GetOwner().GetLogin(), + Name: repo.GetName(), + } + } + + if len(rr) != 100 { + break + } + i++ + } + + // Code search does not return full details (like permissions). So for each + // repo discovered, we have to query it again. + + repoNames := mapValues(resultRepos) + return g.getAllRepositories(ctx, repoNames) +} + +func (g *Github) getAllRepositories(ctx context.Context, repoRefs []RepositoryReference) ([]*github.Repository, error) { + var repos []*github.Repository + + for _, ref := range repoRefs { + r, err := g.getRepository(ctx, ref) + if err != nil { + return nil, err + } + repos = append(repos, r) + } + + return repos, nil +} + func (g *Github) getRepository(ctx context.Context, repoRef RepositoryReference) (*github.Repository, error) { repo, _, err := retry(ctx, func() (*github.Repository, *github.Response, error) { return g.ghClient.Repositories.Get(ctx, repoRef.OwnerName, repoRef.Name) diff --git a/internal/scm/github/util.go b/internal/scm/github/util.go index 2ef96b11..110c5402 100644 --- a/internal/scm/github/util.go +++ b/internal/scm/github/util.go @@ -44,3 +44,17 @@ func chunkSlice[T any](stack []T, chunkSize int) [][]T { return append(chunks, stack) } + +// mapValues returns a new array containing all the values of the supplied map, +// in iteration (i.e. non-deterministic) order. +func mapValues[K comparable, V any](source map[K]V) []V { + values := make([]V, len(source)) + + i := 0 + for _, v := range source { + values[i] = v + i++ + } + + return values +}