diff --git a/checker/raw_result.go b/checker/raw_result.go index 5ec49e47998..dba45660c2a 100644 --- a/checker/raw_result.go +++ b/checker/raw_result.go @@ -308,6 +308,8 @@ const ( DangerousWorkflowScriptInjection DangerousWorkflowType = "scriptInjection" // DangerousWorkflowUntrustedCheckout represents an untrusted checkout. DangerousWorkflowUntrustedCheckout DangerousWorkflowType = "untrustedCheckout" + // DangerousWorkflowImposterReference represents an untrusted imposter reference. + DangerousWorkflowImposterReference DangerousWorkflowType = "imposterReference" ) // DangerousWorkflowData contains raw results diff --git a/checks/dangerous_workflow.go b/checks/dangerous_workflow.go index 5b12f488df4..69c35a62ff6 100644 --- a/checks/dangerous_workflow.go +++ b/checks/dangerous_workflow.go @@ -26,11 +26,10 @@ const CheckDangerousWorkflow = "Dangerous-Workflow" //nolint:gochecknoinits func init() { - supportedRequestTypes := []checker.RequestType{ - checker.FileBased, - checker.CommitBased, - } - if err := registerCheck(CheckDangerousWorkflow, DangerousWorkflow, supportedRequestTypes); err != nil { + // NOTE: supportedRequestTypes is nil, because some checks need to make API + // calls in order to verify remote state on GitHub. We may want to look into + // breaking these up into separate sub-checks with their own supportedRequestTypes. + if err := registerCheck(CheckDangerousWorkflow, DangerousWorkflow, nil); err != nil { // this should never happen panic(err) } diff --git a/checks/evaluation/dangerous_workflow.go b/checks/evaluation/dangerous_workflow.go index be37c6cdee5..63488b97f87 100644 --- a/checks/evaluation/dangerous_workflow.go +++ b/checks/evaluation/dangerous_workflow.go @@ -37,6 +37,8 @@ func DangerousWorkflow(name string, dl checker.DetailLogger, text = fmt.Sprintf("untrusted code checkout '%v'", e.File.Snippet) case checker.DangerousWorkflowScriptInjection: text = fmt.Sprintf("script injection with untrusted input '%v'", e.File.Snippet) + case checker.DangerousWorkflowImposterReference: + text = fmt.Sprintf("untrusted reference does not belong to repo '%v'", e.File.Snippet) default: err := sce.WithMessage(sce.ErrScorecardInternal, "invalid type") return checker.CreateRuntimeErrorResult(name, err) diff --git a/checks/raw/dangerous_workflow.go b/checks/raw/dangerous_workflow.go index 7db760e715c..8aba8329046 100644 --- a/checks/raw/dangerous_workflow.go +++ b/checks/raw/dangerous_workflow.go @@ -15,6 +15,7 @@ package raw import ( + "context" "fmt" "regexp" "strings" @@ -69,17 +70,24 @@ var ( func DangerousWorkflow(c clients.RepoClient) (checker.DangerousWorkflowData, error) { // data is shared across all GitHub workflows. var data checker.DangerousWorkflowData + + v := &validateGitHubActionWorkflowPatterns{ + client: c, + } + err := fileparser.OnMatchingFileContentDo(c, fileparser.PathMatcher{ Pattern: ".github/workflows/*", CaseSensitive: false, - }, validateGitHubActionWorkflowPatterns, &data) + }, v.Validate, &data) return data, err } -// Check file content. -var validateGitHubActionWorkflowPatterns fileparser.DoWhileTrueOnFileContent = func(path string, - content []byte, +type validateGitHubActionWorkflowPatterns struct { + client clients.RepoClient +} + +func (v *validateGitHubActionWorkflowPatterns) Validate(path string, content []byte, args ...interface{}, ) (bool, error) { if !fileparser.IsWorkflowFile(path) { @@ -117,6 +125,11 @@ var validateGitHubActionWorkflowPatterns fileparser.DoWhileTrueOnFileContent = f return false, err } + // 3. Check for imposter commit references from forks + if err := validateImposterCommits(v.client, workflow, path, pdata); err != nil { + return false, err + } + // TODO: Check other dangerous patterns. return true, nil } @@ -269,3 +282,102 @@ func checkVariablesInScript(script string, pos *actionlint.Pos, } return nil } + +func validateImposterCommits(client clients.RepoClient, workflow *actionlint.Workflow, path string, + pdata *checker.DangerousWorkflowData, +) error { + ctx := context.TODO() + cache := &containsCache{ + client: client, + cache: make(map[commitKey]bool), + } + for _, job := range workflow.Jobs { + for _, step := range job.Steps { + e, ok := step.Exec.(*actionlint.ExecAction) + if !ok { + continue + } + + // Parse out repo / SHA. + ref := e.Uses.Value + trimmedRef := strings.TrimPrefix(ref, "actions://") + s := strings.Split(trimmedRef, "@") + if len(s) != 2 { + return sce.WithMessage(sce.ErrorCheckRuntime, fmt.Sprintf("unexpected reference: %s", trimmedRef)) + } + // Repo references can include paths (e.g. github/codeql-action/init) - Trim first n + repoSplit := strings.SplitN(s[0], "/", 3) + if len(repoSplit) < 2 { + return sce.WithMessage(sce.ErrorCheckRuntime, fmt.Sprintf("unexpected repo reference: %s", s[0])) + } + repo := strings.Join(repoSplit[:2], "/") + sha := s[1] + + // Check if repo contains SHA - we use a cache to reduce duplicate calls to GitHub, + // since reachability queries can be expensive. + ok, err := cache.Contains(ctx, repo, sha) + if err != nil { + return err + } + if !ok { + pdata.Workflows = append(pdata.Workflows, + checker.DangerousWorkflow{ + File: checker.File{ + Path: path, + Type: finding.FileTypeSource, + Offset: fileparser.GetLineNumber(step.Pos), + Snippet: trimmedRef, + }, + Job: createJob(job), + Type: checker.DangerousWorkflowImposterReference, + }, + ) + } + } + } + + return nil +} + +type commitKey struct { + repo, sha string +} + +// containsCache caches response values for whether a commit is contained in a given repo. +// This allows us to deduplicate work if we've already checked this commit. +type containsCache struct { + client clients.RepoClient + cache map[commitKey]bool +} + +func (c *containsCache) Contains(ctx context.Context, repo, sha string) (bool, error) { + key := commitKey{ + repo: repo, + sha: sha, + } + + // See if we've already seen (repo, sha). + v, ok := c.cache[key] + if ok { + return v, nil + } + + // If not, query subrepo for commit reachability. + // Make new client for referenced repo. + subclient, err := c.client.NewClient(repo, "", 0) + if err != nil { + return false, sce.WithMessage(sce.ErrorCheckRuntime, err.Error()) + } + + out, err := checkImposterCommit(subclient, sha) + c.cache[key] = out + return out, err +} + +func checkImposterCommit(c clients.RepoClient, target string) (bool, error) { + ok, err := c.ContainsRevision(clients.HeadSHA, target) + if err != nil { + return false, sce.WithMessage(sce.ErrorCheckRuntime, err.Error()) + } + return ok, nil +} diff --git a/checks/raw/dangerous_workflow_test.go b/checks/raw/dangerous_workflow_test.go index 3e0e43a65ce..ae0db35be2e 100644 --- a/checks/raw/dangerous_workflow_test.go +++ b/checks/raw/dangerous_workflow_test.go @@ -148,6 +148,11 @@ func TestGithubDangerousWorkflow(t *testing.T) { filename: ".github/workflows/github-workflow-dangerous-pattern-untrusted-script-injection-wildcard.yml", expected: ret{nb: 1}, }, + { + name: "imposter commit", + filename: ".github/workflows/github-workflow-imposter-commit.yaml", + expected: ret{nb: 1}, + }, } for _, tt := range tests { tt := tt // Re-initializing variable so it is not changed while executing the closure below @@ -157,7 +162,16 @@ func TestGithubDangerousWorkflow(t *testing.T) { ctrl := gomock.NewController(t) mockRepoClient := mockrepo.NewMockRepoClient(ctrl) mockRepoClient.EXPECT().ListFiles(gomock.Any()).Return([]string{tt.filename}, nil) + mockRepoClient.EXPECT().NewClient(gomock.Any(), gomock.Any(), gomock.Any()).Return(mockRepoClient, nil).AnyTimes() + mockRepoClient.EXPECT().ContainsRevision(gomock.Any(), gomock.Any()).DoAndReturn( + func(base string, target string) (bool, error) { + if target == "imposter" { + return false, nil + } + return true, nil + }).AnyTimes() mockRepoClient.EXPECT().GetFileContent(gomock.Any()).DoAndReturn(func(file string) ([]byte, error) { + t.Log("mock: ", file) // This will read the file and return the content content, err := os.ReadFile("../testdata/" + file) if err != nil { diff --git a/checks/testdata/.github/workflows/github-workflow-imposter-commit.yaml b/checks/testdata/.github/workflows/github-workflow-imposter-commit.yaml new file mode 100644 index 00000000000..5cdb95d6031 --- /dev/null +++ b/checks/testdata/.github/workflows/github-workflow-imposter-commit.yaml @@ -0,0 +1,19 @@ +# Copyright 2021 OpenSSF Scorecard Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Docker +jobs: + push: + steps: + - uses: actions/checkout/foo@imposter # imposter commit diff --git a/clients/githubrepo/branches.go b/clients/githubrepo/branches.go index 027ca56be19..93bd2fc84b8 100644 --- a/clients/githubrepo/branches.go +++ b/clients/githubrepo/branches.go @@ -17,6 +17,7 @@ package githubrepo import ( "context" "fmt" + "net/http" "strings" "sync" @@ -259,3 +260,20 @@ func getBranchRefFrom(data *branch) *clients.BranchRef { return branchRef } + +func (handler *branchesHandler) containsRevision(base, target string) (bool, error) { + url := handler.repourl + diff, resp, err := handler.ghClient.Repositories.CompareCommits(handler.ctx, url.owner, url.repo, base, target, + &github.ListOptions{PerPage: 1}) + if err != nil { + if resp.StatusCode == http.StatusNotFound { + // NotFound can be returned for some divergent cases: "404 No common ancestor between ..." + return false, nil + } + return false, fmt.Errorf("error during branchesHandler.containsRevision: %w", err) + } + + // Target should be behind the base ref if it is considered contained. + ok := strings.EqualFold(diff.GetStatus(), "behind") || strings.EqualFold(diff.GetStatus(), "identical") + return ok, nil +} diff --git a/clients/githubrepo/client.go b/clients/githubrepo/client.go index e6680efd8e2..5b614fbd437 100644 --- a/clients/githubrepo/client.go +++ b/clients/githubrepo/client.go @@ -124,6 +124,20 @@ func (client *Client) InitRepo(inputRepo clients.Repo, commitSHA string, commitD return nil } +// NewClient implements RepoClient.NewClient. +func (client *Client) NewClient(inputRepo, commitSHA string, commitDepth int) (clients.RepoClient, error) { + // Client has some state - extract out the stateless clients to create a new Client. + newClient := newFromClients(client.ctx, client.repoClient, client.graphClient.client, client.tarball.httpClient) + repo, err := MakeGithubRepo(inputRepo) + if err != nil { + return nil, err + } + if err := newClient.InitRepo(repo, commitSHA, commitDepth); err != nil { + return nil, err + } + return newClient, nil +} + // URI implements RepoClient.URI. func (client *Client) URI() string { return fmt.Sprintf("github.com/%s/%s", client.repourl.owner, client.repourl.repo) @@ -254,6 +268,11 @@ func (client *Client) Close() error { return client.tarball.cleanup() } +// ContainsRevision implements RepoClient.ContainsRevision. +func (client *Client) ContainsRevision(base, target string) (bool, error) { + return client.branches.containsRevision(base, target) +} + // CreateGithubRepoClientWithTransport returns a Client which implements RepoClient interface. func CreateGithubRepoClientWithTransport(ctx context.Context, rt http.RoundTripper) clients.RepoClient { httpClient := &http.Client{ @@ -262,6 +281,10 @@ func CreateGithubRepoClientWithTransport(ctx context.Context, rt http.RoundTripp client := github.NewClient(httpClient) graphClient := githubv4.NewClient(httpClient) + return newFromClients(ctx, client, graphClient, httpClient) +} + +func newFromClients(ctx context.Context, client *github.Client, graphClient *githubv4.Client, httpClient *http.Client) clients.RepoClient { return &Client{ ctx: ctx, repoClient: client, diff --git a/clients/gitlabrepo/client.go b/clients/gitlabrepo/client.go index f42436dec33..7db42b39e22 100644 --- a/clients/gitlabrepo/client.go +++ b/clients/gitlabrepo/client.go @@ -135,6 +135,22 @@ func (client *Client) InitRepo(inputRepo clients.Repo, commitSHA string, commitD return nil } +// NewClient implements RepoClient.NewClient. +func (client *Client) NewClient(inputRepo, commitSHA string, commitDepth int) (clients.RepoClient, error) { + newClient, err := newFromClient(client.ctx, client.glClient) + if err != nil { + return nil, err + } + repo, err := MakeGitlabRepo(inputRepo) + if err != nil { + return nil, err + } + if err := newClient.InitRepo(repo, commitSHA, commitDepth); err != nil { + return nil, err + } + return newClient, nil +} + func (client *Client) URI() string { return fmt.Sprintf("%s/%s/%s", client.repourl.host, client.repourl.owner, client.repourl.project) } @@ -228,12 +244,20 @@ func (client *Client) Close() error { return nil } +func (client *Client) ContainsRevision(base, target string) (bool, error) { + return false, fmt.Errorf("ContainsRevision: %w", clients.ErrUnsupportedFeature) +} + func CreateGitlabClientWithToken(ctx context.Context, token string, repo clients.Repo) (clients.RepoClient, error) { client, err := gitlab.NewClient(token, gitlab.WithBaseURL(repo.Host())) if err != nil { return nil, fmt.Errorf("could not create gitlab client with error: %w", err) } + return newFromClient(ctx, client) +} + +func newFromClient(ctx context.Context, client *gitlab.Client) (clients.RepoClient, error) { return &Client{ ctx: ctx, glClient: client, diff --git a/clients/localdir/client.go b/clients/localdir/client.go index 45cf09924ae..256ee27295c 100644 --- a/clients/localdir/client.go +++ b/clients/localdir/client.go @@ -243,6 +243,10 @@ func (client *localDirClient) Close() error { return nil } +func (client *localDirClient) ContainsRevision(base, target string) (bool, error) { + return false, fmt.Errorf("ContainsRevision: %w", clients.ErrUnsupportedFeature) +} + // ListProgrammingLanguages implements RepoClient.ListProgrammingLanguages. // TODO: add ListProgrammingLanguages support for local directories. func (client *localDirClient) ListProgrammingLanguages() ([]clients.Language, error) { @@ -263,6 +267,23 @@ func (client *localDirClient) GetOrgRepoClient(ctx context.Context) (clients.Rep return nil, fmt.Errorf("GetOrgRepoClient: %w", clients.ErrUnsupportedFeature) } +func (client *localDirClient) NewClient(repo string, commitSHA string, + commitDepth int, +) (clients.RepoClient, error) { + newClient := &localDirClient{ + ctx: client.ctx, + logger: client.logger, + } + r, err := MakeLocalDirRepo(repo) + if err != nil { + return nil, err + } + if err := newClient.InitRepo(r, commitSHA, commitDepth); err != nil { + return nil, err + } + return newClient, nil +} + // CreateLocalDirClient returns a client which implements RepoClient interface. func CreateLocalDirClient(ctx context.Context, logger *log.Logger) clients.RepoClient { return &localDirClient{ diff --git a/clients/mockclients/repo_client.go b/clients/mockclients/repo_client.go index ced212b6390..20c6837f9d7 100644 --- a/clients/mockclients/repo_client.go +++ b/clients/mockclients/repo_client.go @@ -20,7 +20,7 @@ package mockrepo import ( - "context" + context "context" reflect "reflect" time "time" @@ -65,6 +65,21 @@ func (mr *MockRepoClientMockRecorder) Close() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Close", reflect.TypeOf((*MockRepoClient)(nil).Close)) } +// ContainsRevision mocks base method. +func (m *MockRepoClient) ContainsRevision(base, target string) (bool, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ContainsRevision", base, target) + ret0, _ := ret[0].(bool) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ContainsRevision indicates an expected call of ContainsRevision. +func (mr *MockRepoClientMockRecorder) ContainsRevision(base, target interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ContainsRevision", reflect.TypeOf((*MockRepoClient)(nil).ContainsRevision), base, target) +} + // GetBranch mocks base method. func (m *MockRepoClient) GetBranch(branch string) (*clients.BranchRef, error) { m.ctrl.T.Helper() @@ -141,14 +156,20 @@ func (mr *MockRepoClientMockRecorder) GetFileContent(filename interface{}) *gomo } // GetOrgRepoClient mocks base method. -func (m *MockRepoClient) GetOrgRepoClient(ctx context.Context) (clients.RepoClient, error) { +func (m *MockRepoClient) GetOrgRepoClient(arg0 context.Context) (clients.RepoClient, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetOrgRepoClient") + ret := m.ctrl.Call(m, "GetOrgRepoClient", arg0) ret0, _ := ret[0].(clients.RepoClient) ret1, _ := ret[1].(error) return ret0, ret1 } +// GetOrgRepoClient indicates an expected call of GetOrgRepoClient. +func (mr *MockRepoClientMockRecorder) GetOrgRepoClient(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetOrgRepoClient", reflect.TypeOf((*MockRepoClient)(nil).GetOrgRepoClient), arg0) +} + // InitRepo mocks base method. func (m *MockRepoClient) InitRepo(repo clients.Repo, commitSHA string, commitDepth int) error { m.ctrl.T.Helper() @@ -358,6 +379,21 @@ func (mr *MockRepoClientMockRecorder) LocalPath() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LocalPath", reflect.TypeOf((*MockRepoClient)(nil).LocalPath)) } +// NewClient mocks base method. +func (m *MockRepoClient) NewClient(repo, commitSHA string, commitDepth int) (clients.RepoClient, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "NewClient", repo, commitSHA, commitDepth) + ret0, _ := ret[0].(clients.RepoClient) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// NewClient indicates an expected call of NewClient. +func (mr *MockRepoClientMockRecorder) NewClient(repo, commitSHA, commitDepth interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "NewClient", reflect.TypeOf((*MockRepoClient)(nil).NewClient), repo, commitSHA, commitDepth) +} + // Search mocks base method. func (m *MockRepoClient) Search(request clients.SearchRequest) (clients.SearchResponse, error) { m.ctrl.T.Helper() diff --git a/clients/ossfuzz/client.go b/clients/ossfuzz/client.go index 02799fe5d50..4a6fed04611 100644 --- a/clients/ossfuzz/client.go +++ b/clients/ossfuzz/client.go @@ -159,6 +159,11 @@ func (c *client) InitRepo(inputRepo clients.Repo, commitSHA string, commitDepth return fmt.Errorf("InitRepo: %w", clients.ErrUnsupportedFeature) } +// InitRepo implements RepoClient.InitRepo. +func (c *client) NewClient(inputRepo, commitSHA string, commitDepth int) (clients.RepoClient, error) { + return nil, fmt.Errorf("NewClient: %w", clients.ErrUnsupportedFeature) +} + // IsArchived implements RepoClient.IsArchived. func (c *client) IsArchived() (bool, error) { return false, fmt.Errorf("IsArchived: %w", clients.ErrUnsupportedFeature) @@ -263,3 +268,8 @@ func (c *client) ListLicenses() ([]clients.License, error) { func (c *client) GetCreatedAt() (time.Time, error) { return time.Time{}, fmt.Errorf("GetCreatedAt: %w", clients.ErrUnsupportedFeature) } + +// ContainsRevision implements RepoClient.ContainsRevision. +func (c *client) ContainsRevision(base, target string) (bool, error) { + return false, fmt.Errorf("ContainsRevision: %w", clients.ErrUnsupportedFeature) +} diff --git a/clients/repo_client.go b/clients/repo_client.go index a5804b3f7d7..bf28b660b1c 100644 --- a/clients/repo_client.go +++ b/clients/repo_client.go @@ -30,6 +30,7 @@ const HeadSHA = "HEAD" // RepoClient interface is used by Scorecard checks to access a repo. type RepoClient interface { InitRepo(repo Repo, commitSHA string, commitDepth int) error + NewClient(repo string, commitSHA string, commitDepth int) (RepoClient, error) URI() string IsArchived() (bool, error) ListFiles(predicate func(string) (bool, error)) ([]string, error) @@ -52,6 +53,7 @@ type RepoClient interface { ListStatuses(ref string) ([]Status, error) ListWebhooks() ([]Webhook, error) ListProgrammingLanguages() ([]Language, error) + ContainsRevision(base, target string) (bool, error) Search(request SearchRequest) (SearchResponse, error) SearchCommits(request SearchCommitsOptions) ([]Commit, error) Close() error diff --git a/e2e/dangerous_workflow_test.go b/e2e/dangerous_workflow_test.go index ecdb21e0727..2e773951f0b 100644 --- a/e2e/dangerous_workflow_test.go +++ b/e2e/dangerous_workflow_test.go @@ -86,10 +86,15 @@ var _ = Describe("E2E TEST:"+checks.CheckTokenPermissions, func() { Expect(err).Should(BeNil()) defer os.RemoveAll(tmpDir) - _, e := git.PlainClone(tmpDir, false, &git.CloneOptions{ - URL: "http://github.com/ossf-tests/scorecard-check-dangerous-workflow-e2e", - }) - Expect(e).Should(BeNil()) + for _, repo := range []string{ + "http://github.com/actions/checkout", + "http://github.com/ossf-tests/scorecard-check-dangerous-workflow-e2e", + } { + _, e := git.PlainClone(tmpDir, false, &git.CloneOptions{ + URL: repo, + }) + Expect(e).Should(BeNil()) + } repo, err := localdir.MakeLocalDirRepo(tmpDir) Expect(err).Should(BeNil())