Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Add DangerousWorkflow check for imposter commits. #2789

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions checker/raw_result.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,8 @@ const (
DangerousWorkflowScriptInjection DangerousWorkflowType = "scriptInjection"
// DangerousWorkflowUntrustedCheckout represents an untrusted checkout.
DangerousWorkflowUntrustedCheckout DangerousWorkflowType = "untrustedCheckout"
// DangerousWorkflowImposterReference represents an untrusted imposter reference.
DangerousWorkflowImposterReference DangerousWorkflowType = "imposterReference"
)

// DangerousWorkflowData contains raw results
Expand Down
9 changes: 4 additions & 5 deletions checks/dangerous_workflow.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,10 @@ const CheckDangerousWorkflow = "Dangerous-Workflow"

//nolint:gochecknoinits
func init() {
supportedRequestTypes := []checker.RequestType{
checker.FileBased,
checker.CommitBased,
}
if err := registerCheck(CheckDangerousWorkflow, DangerousWorkflow, supportedRequestTypes); err != nil {
Comment on lines -29 to -33
Copy link
Member

@spencerschrock spencerschrock Apr 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The check is still checker.CommitBased in my opinion

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

// NOTE: supportedRequestTypes is nil, because some checks need to make API
// calls in order to verify remote state on GitHub. We may want to look into
// breaking these up into separate sub-checks with their own supportedRequestTypes.
if err := registerCheck(CheckDangerousWorkflow, DangerousWorkflow, nil); err != nil {
// this should never happen
panic(err)
}
Expand Down
2 changes: 2 additions & 0 deletions checks/evaluation/dangerous_workflow.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ func DangerousWorkflow(name string, dl checker.DetailLogger,
text = fmt.Sprintf("untrusted code checkout '%v'", e.File.Snippet)
case checker.DangerousWorkflowScriptInjection:
text = fmt.Sprintf("script injection with untrusted input '%v'", e.File.Snippet)
case checker.DangerousWorkflowImposterReference:
text = fmt.Sprintf("untrusted reference does not belong to repo '%v'", e.File.Snippet)
default:
err := sce.WithMessage(sce.ErrScorecardInternal, "invalid type")
return checker.CreateRuntimeErrorResult(name, err)
Expand Down
120 changes: 116 additions & 4 deletions checks/raw/dangerous_workflow.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package raw

import (
"context"
"fmt"
"regexp"
"strings"
Expand Down Expand Up @@ -69,17 +70,24 @@ var (
func DangerousWorkflow(c clients.RepoClient) (checker.DangerousWorkflowData, error) {
// data is shared across all GitHub workflows.
var data checker.DangerousWorkflowData

v := &validateGitHubActionWorkflowPatterns{
client: c,
}

err := fileparser.OnMatchingFileContentDo(c, fileparser.PathMatcher{
Pattern: ".github/workflows/*",
CaseSensitive: false,
}, validateGitHubActionWorkflowPatterns, &data)
}, v.Validate, &data)

return data, err
}

// Check file content.
var validateGitHubActionWorkflowPatterns fileparser.DoWhileTrueOnFileContent = func(path string,
content []byte,
type validateGitHubActionWorkflowPatterns struct {
client clients.RepoClient
}

func (v *validateGitHubActionWorkflowPatterns) Validate(path string, content []byte,
args ...interface{},
) (bool, error) {
if !fileparser.IsWorkflowFile(path) {
Expand Down Expand Up @@ -117,6 +125,11 @@ var validateGitHubActionWorkflowPatterns fileparser.DoWhileTrueOnFileContent = f
return false, err
}

// 3. Check for imposter commit references from forks
if err := validateImposterCommits(v.client, workflow, path, pdata); err != nil {
return false, err
}

// TODO: Check other dangerous patterns.
return true, nil
}
Expand Down Expand Up @@ -269,3 +282,102 @@ func checkVariablesInScript(script string, pos *actionlint.Pos,
}
return nil
}

func validateImposterCommits(client clients.RepoClient, workflow *actionlint.Workflow, path string,
pdata *checker.DangerousWorkflowData,
) error {
ctx := context.TODO()
cache := &containsCache{
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from the perspective of the cron, it would be nice for the cron if the cache persisted between calls. I'm curious how much overlap there would be.

Not sure what the best approach would be

cc @azeemshaikh38

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for reference, running just the Dangerous-Workflow check on ossf/scorecard consumes 93 core REST quota.

39 for urllib3/urllib3
73 for tensorflow/tensorflow

It's going to be very repo-dependent, based on their CI.

client: client,
cache: make(map[commitKey]bool),
}
for _, job := range workflow.Jobs {
for _, step := range job.Steps {
Comment on lines +294 to +295
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't cover re-usable workflows ( i assume they are also vulnerable). Would it make sense to loop over Uses.Values from both jobs (job.WorkflowCall.Uses.Value) and steps to get them into a slice? And then doing the same analysis over everything in the slice?

https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_iduses

I was trying to check how the code handled this job and it wasn't being checked:

provenance:
needs: [goreleaser]
permissions:
actions: read # To read the workflow path.
id-token: write # To sign the provenance.
contents: write # To add assets to a release.
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v1.4.0

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reusable workflows can't be hash pinned. Would be interesting to check whether the imposter commit holds true for reusable workflows too.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Taken from the docs:

{ref} can be a SHA, a release tag, or a branch name.

And an experiment I did last year for a different reason
https://github.com/spencerschrock/reusable-workflow-caller/blob/aa0ccd0b1d5255d79a7ba32fd729a1db93d2f124/.github/workflows/scorecard.yml#L30

e, ok := step.Exec.(*actionlint.ExecAction)
if !ok {
continue
}

// Parse out repo / SHA.
ref := e.Uses.Value
trimmedRef := strings.TrimPrefix(ref, "actions://")
s := strings.Split(trimmedRef, "@")
if len(s) != 2 {
return sce.WithMessage(sce.ErrorCheckRuntime, fmt.Sprintf("unexpected reference: %s", trimmedRef))
}
// Repo references can include paths (e.g. github/codeql-action/init) - Trim first n
repoSplit := strings.SplitN(s[0], "/", 3)
if len(repoSplit) < 2 {
return sce.WithMessage(sce.ErrorCheckRuntime, fmt.Sprintf("unexpected repo reference: %s", s[0]))
}
repo := strings.Join(repoSplit[:2], "/")
sha := s[1]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we do a check here for if the SHA is actually a sha? Because this could be a tag too? which wouldn't need the imposter commit verification.

The branch protection check uses something like this to check for it:

// as a package level variable
var reCommitSHA = regexp.MustCompile("^[a-f0-9]{40}$")

...
// when testing
if !reCommitSHA.MatchString(foo) {
	continue
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would also need to force sha to lowercase, or account for it in the regex


// Check if repo contains SHA - we use a cache to reduce duplicate calls to GitHub,
// since reachability queries can be expensive.
ok, err := cache.Contains(ctx, repo, sha)
if err != nil {
return err
}
if !ok {
pdata.Workflows = append(pdata.Workflows,
checker.DangerousWorkflow{
File: checker.File{
Path: path,
Type: finding.FileTypeSource,
Offset: fileparser.GetLineNumber(step.Pos),
Snippet: trimmedRef,
},
Job: createJob(job),
Type: checker.DangerousWorkflowImposterReference,
},
)
}
}
}

return nil
}

type commitKey struct {
repo, sha string
}

// containsCache caches response values for whether a commit is contained in a given repo.
// This allows us to deduplicate work if we've already checked this commit.
type containsCache struct {
client clients.RepoClient
cache map[commitKey]bool
}

func (c *containsCache) Contains(ctx context.Context, repo, sha string) (bool, error) {
key := commitKey{
repo: repo,
sha: sha,
}

// See if we've already seen (repo, sha).
v, ok := c.cache[key]
if ok {
return v, nil
}

// If not, query subrepo for commit reachability.
// Make new client for referenced repo.
subclient, err := c.client.NewClient(repo, "", 0)
wlynch marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

commitSHA probably shouldn't be "" here, perhaps clients.HeadSHA?

if err != nil {
return false, sce.WithMessage(sce.ErrorCheckRuntime, err.Error())
}

out, err := checkImposterCommit(subclient, sha)
c.cache[key] = out
return out, err
}

func checkImposterCommit(c clients.RepoClient, target string) (bool, error) {
ok, err := c.ContainsRevision(clients.HeadSHA, target)
if err != nil {
return false, sce.WithMessage(sce.ErrorCheckRuntime, err.Error())
}
return ok, nil
}
14 changes: 14 additions & 0 deletions checks/raw/dangerous_workflow_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,11 @@ func TestGithubDangerousWorkflow(t *testing.T) {
filename: ".github/workflows/github-workflow-dangerous-pattern-untrusted-script-injection-wildcard.yml",
expected: ret{nb: 1},
},
{
name: "imposter commit",
filename: ".github/workflows/github-workflow-imposter-commit.yaml",
expected: ret{nb: 1},
},
}
for _, tt := range tests {
tt := tt // Re-initializing variable so it is not changed while executing the closure below
Expand All @@ -157,7 +162,16 @@ func TestGithubDangerousWorkflow(t *testing.T) {
ctrl := gomock.NewController(t)
mockRepoClient := mockrepo.NewMockRepoClient(ctrl)
mockRepoClient.EXPECT().ListFiles(gomock.Any()).Return([]string{tt.filename}, nil)
mockRepoClient.EXPECT().NewClient(gomock.Any(), gomock.Any(), gomock.Any()).Return(mockRepoClient, nil).AnyTimes()
mockRepoClient.EXPECT().ContainsRevision(gomock.Any(), gomock.Any()).DoAndReturn(
func(base string, target string) (bool, error) {
if target == "imposter" {
return false, nil
}
return true, nil
}).AnyTimes()
mockRepoClient.EXPECT().GetFileContent(gomock.Any()).DoAndReturn(func(file string) ([]byte, error) {
t.Log("mock: ", file)
// This will read the file and return the content
content, err := os.ReadFile("../testdata/" + file)
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright 2021 OpenSSF Scorecard Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Docker
jobs:
push:
steps:
- uses: actions/checkout/foo@imposter # imposter commit
18 changes: 18 additions & 0 deletions clients/githubrepo/branches.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package githubrepo
import (
"context"
"fmt"
"net/http"
"strings"
"sync"

Expand Down Expand Up @@ -259,3 +260,20 @@ func getBranchRefFrom(data *branch) *clients.BranchRef {

return branchRef
}

func (handler *branchesHandler) containsRevision(base, target string) (bool, error) {
url := handler.repourl
diff, resp, err := handler.ghClient.Repositories.CompareCommits(handler.ctx, url.owner, url.repo, base, target,
&github.ListOptions{PerPage: 1})
if err != nil {
if resp.StatusCode == http.StatusNotFound {
// NotFound can be returned for some divergent cases: "404 No common ancestor between ..."
return false, nil
}
return false, fmt.Errorf("error during branchesHandler.containsRevision: %w", err)
}

// Target should be behind the base ref if it is considered contained.
ok := strings.EqualFold(diff.GetStatus(), "behind") || strings.EqualFold(diff.GetStatus(), "identical")
return ok, nil
}
23 changes: 23 additions & 0 deletions clients/githubrepo/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,20 @@ func (client *Client) InitRepo(inputRepo clients.Repo, commitSHA string, commitD
return nil
}

// NewClient implements RepoClient.NewClient.
func (client *Client) NewClient(inputRepo, commitSHA string, commitDepth int) (clients.RepoClient, error) {
// Client has some state - extract out the stateless clients to create a new Client.
newClient := newFromClients(client.ctx, client.repoClient, client.graphClient.client, client.tarball.httpClient)
repo, err := MakeGithubRepo(inputRepo)
if err != nil {
return nil, err
}
if err := newClient.InitRepo(repo, commitSHA, commitDepth); err != nil {
return nil, err
}
return newClient, nil
}

// URI implements RepoClient.URI.
func (client *Client) URI() string {
return fmt.Sprintf("github.com/%s/%s", client.repourl.owner, client.repourl.repo)
Expand Down Expand Up @@ -254,6 +268,11 @@ func (client *Client) Close() error {
return client.tarball.cleanup()
}

// ContainsRevision implements RepoClient.ContainsRevision.
func (client *Client) ContainsRevision(base, target string) (bool, error) {
return client.branches.containsRevision(base, target)
}

// CreateGithubRepoClientWithTransport returns a Client which implements RepoClient interface.
func CreateGithubRepoClientWithTransport(ctx context.Context, rt http.RoundTripper) clients.RepoClient {
httpClient := &http.Client{
Expand All @@ -262,6 +281,10 @@ func CreateGithubRepoClientWithTransport(ctx context.Context, rt http.RoundTripp
client := github.NewClient(httpClient)
graphClient := githubv4.NewClient(httpClient)

return newFromClients(ctx, client, graphClient, httpClient)
}

func newFromClients(ctx context.Context, client *github.Client, graphClient *githubv4.Client, httpClient *http.Client) clients.RepoClient {
return &Client{
ctx: ctx,
repoClient: client,
Expand Down
24 changes: 24 additions & 0 deletions clients/gitlabrepo/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,22 @@ func (client *Client) InitRepo(inputRepo clients.Repo, commitSHA string, commitD
return nil
}

// NewClient implements RepoClient.NewClient.
func (client *Client) NewClient(inputRepo, commitSHA string, commitDepth int) (clients.RepoClient, error) {
newClient, err := newFromClient(client.ctx, client.glClient)
if err != nil {
return nil, err
}
repo, err := MakeGitlabRepo(inputRepo)
if err != nil {
return nil, err
}
if err := newClient.InitRepo(repo, commitSHA, commitDepth); err != nil {
wlynch marked this conversation as resolved.
Show resolved Hide resolved
return nil, err
}
return newClient, nil
}

func (client *Client) URI() string {
return fmt.Sprintf("%s/%s/%s", client.repourl.host, client.repourl.owner, client.repourl.project)
}
Expand Down Expand Up @@ -228,12 +244,20 @@ func (client *Client) Close() error {
return nil
}

func (client *Client) ContainsRevision(base, target string) (bool, error) {
return false, fmt.Errorf("ContainsRevision: %w", clients.ErrUnsupportedFeature)
}

func CreateGitlabClientWithToken(ctx context.Context, token string, repo clients.Repo) (clients.RepoClient, error) {
client, err := gitlab.NewClient(token, gitlab.WithBaseURL(repo.Host()))
if err != nil {
return nil, fmt.Errorf("could not create gitlab client with error: %w", err)
}

return newFromClient(ctx, client)
}

func newFromClient(ctx context.Context, client *gitlab.Client) (clients.RepoClient, error) {
return &Client{
ctx: ctx,
glClient: client,
Expand Down
21 changes: 21 additions & 0 deletions clients/localdir/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,10 @@ func (client *localDirClient) Close() error {
return nil
}

func (client *localDirClient) ContainsRevision(base, target string) (bool, error) {
return false, fmt.Errorf("ContainsRevision: %w", clients.ErrUnsupportedFeature)
}

// ListProgrammingLanguages implements RepoClient.ListProgrammingLanguages.
// TODO: add ListProgrammingLanguages support for local directories.
func (client *localDirClient) ListProgrammingLanguages() ([]clients.Language, error) {
Expand All @@ -263,6 +267,23 @@ func (client *localDirClient) GetOrgRepoClient(ctx context.Context) (clients.Rep
return nil, fmt.Errorf("GetOrgRepoClient: %w", clients.ErrUnsupportedFeature)
}

func (client *localDirClient) NewClient(repo string, commitSHA string,
commitDepth int,
) (clients.RepoClient, error) {
newClient := &localDirClient{
ctx: client.ctx,
logger: client.logger,
}
r, err := MakeLocalDirRepo(repo)
if err != nil {
return nil, err
}
if err := newClient.InitRepo(r, commitSHA, commitDepth); err != nil {
return nil, err
}
return newClient, nil
}

// CreateLocalDirClient returns a client which implements RepoClient interface.
func CreateLocalDirClient(ctx context.Context, logger *log.Logger) clients.RepoClient {
return &localDirClient{
Expand Down
Loading