Skip to content

Commit

Permalink
use release and tag APIs to enhance imposter commit verification
Browse files Browse the repository at this point in the history
The old verification process assumed a commit was always in the default
branch, or came from a select number of hardcoded release branches. This
was error prone whenever new releases branches were used by some of the
actions we check.

The commit verification workflow now uses dynamic data to determine
which branches to check. The steps are now:
1. Check if the commit is one of the latest 100 tags, which should be
   the most common case.
2. Check the default branch (unchanged from before).
3. Check branches associated with the most recent releases. This removes
   the hardcoded checks and should require fewer updates in the future.

Signed-off-by: Spencer Schrock <sschrock@google.com>
  • Loading branch information
spencerschrock committed Sep 24, 2024
1 parent 2f11571 commit c67d706
Show file tree
Hide file tree
Showing 7 changed files with 227 additions and 48 deletions.
223 changes: 194 additions & 29 deletions app/server/github_verifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,54 +16,72 @@ package server

import (
"context"
"errors"
"fmt"
"net/http"
"strconv"
"strings"

"github.com/google/go-github/v65/github"
"golang.org/x/mod/semver"
)

var errInvalidCodeQLVersion = errors.New("codeql version invalid")

type githubVerifier struct {
ctx context.Context
client *github.Client
ctx context.Context
client *github.Client
cachedCommits map[commit]bool
codeqlActionMajor string
}

// contains makes two "core" API calls: one for the default branch, and one to compare the target hash to a branch
// if the repo is "github/codeql-action", also check releases/v1 before failing.
// returns a new githubVerifier, with an instantiated map.
// most uses should use this constructor.
func newGitHubVerifier(ctx context.Context, client *github.Client) *githubVerifier {
verifier := githubVerifier{
ctx: ctx,
client: client,
}
verifier.cachedCommits = map[commit]bool{}
return &verifier
}

// contains may make several "core" API calls:
// - one to get repository tags (we expect most cases to only need this call)
// - two to get the default branch name and check it
// - up to 10 requests when checking previous release branches
func (g *githubVerifier) contains(c commit) (bool, error) {
owner, repo, hash := c.owner, c.repo, c.hash
defaultBranch, err := g.defaultBranch(owner, repo)
// return the cached answer if we've seen this commit before
if contains, ok := g.cachedCommits[c]; ok {
return contains, nil
}

// fetch 100 most recent tags first, as this should handle the most common scenario
if err := g.getTags(c.owner, c.repo); err != nil {
return false, err
}

// check cache again now that it's populated with tags
if contains, ok := g.cachedCommits[c]; ok {
return contains, nil
}

// check default branch
defaultBranch, err := g.defaultBranch(c.owner, c.repo)
if err != nil {
return false, err
}
contains, err := g.branchContains(defaultBranch, owner, repo, hash)
contains, err := g.branchContains(defaultBranch, c.owner, c.repo, c.hash)
if err != nil {
return false, err
}
if contains {
return true, nil
}

switch {
// github/codeql-action has commits from their release branches that don't show up in the default branch
// this isn't the best approach for now, but theres no universal "does this commit belong to this repo" call
case owner == "github" && repo == "codeql-action":
releaseBranches := []string{"releases/v3", "releases/v2", "releases/v1"}
for _, branch := range releaseBranches {
contains, err = g.branchContains(branch, owner, repo, hash)
if err != nil {
return false, err
}
if contains {
return true, nil
}
}

// add fallback lookup for actions/upload-artifact v3/node20 branch
// https://github.com/actions/starter-workflows/pull/2348#discussion_r1536228344
case owner == "actions" && repo == "upload-artifact":
contains, err = g.branchContains("v3/node20", owner, repo, hash)
}
return contains, err
// finally, check the most recent 10 release branches. This limit is arbitrary and can be adjusted in the future.
const lookback = 10
return g.checkReleaseBranches(c.owner, c.repo, c.hash, defaultBranch, lookback)
}

func (g *githubVerifier) defaultBranch(owner, repo string) (string, error) {
Expand All @@ -89,5 +107,152 @@ func (g *githubVerifier) branchContains(branch, owner, repo, hash string) (bool,
}

// Target should be behind or at the base ref if it is considered contained.
return diff.GetStatus() == "behind" || diff.GetStatus() == "identical", nil
contains := diff.GetStatus() == "behind" || diff.GetStatus() == "identical"
if contains {
g.markContains(owner, repo, hash)
}
return contains, nil
}

func (g *githubVerifier) getTags(owner, repo string) error {
// 100 releases is the maximum supported by /repos/{owner}/{repo}/tags endpoint
opts := github.ListOptions{PerPage: 100}
tags, _, err := g.client.Repositories.ListTags(g.ctx, owner, repo, &opts)
if err != nil {
return fmt.Errorf("fetch tags: %w", err)
}
isCodeQL := owner == "github" && repo == "codeql-action"
for _, t := range tags {
if t.Commit != nil && t.Commit.SHA != nil {
g.markContains(owner, repo, *t.Commit.SHA)
}
// store the highest major version for github/codeql-action
// this helps check release branches later, due to their release process.
if isCodeQL {
version := t.GetName()
if semver.IsValid(version) && semver.Compare(version, g.codeqlActionMajor) == 1 {
g.codeqlActionMajor = semver.Major(version)
}
}
}
return nil
}

func (g *githubVerifier) markContains(owner, repo, sha string) {
commit := commit{
owner: owner,
repo: repo,
hash: strings.ToLower(sha),
}
g.cachedCommits[commit] = true
}

// check the most recent release branches, ignoring the default branch which was already checked.
func (g *githubVerifier) checkReleaseBranches(owner, repo, hash, defaultBranch string, limit int) (bool, error) {
var (
analyzedBranches int
branches []string
err error
)

switch {
// special case: github/codeql-action releases all come from "main", even though the tags are on different branches
case owner == "github" && repo == "codeql-action":
branches, err = g.getCodeQLBranches()
if err != nil {
return false, err
}
default:
branches, err = g.getBranches(owner, repo)
if err != nil {
return false, err
}
// we may have discovered more commit SHAs from the release process
c := commit{
owner: owner,
repo: repo,
hash: hash,
}
if contains, ok := g.cachedCommits[c]; ok {
return contains, nil
}
}

for _, branch := range branches {
if analyzedBranches >= limit {
break
}
if branch == defaultBranch {
continue
}
analyzedBranches++
contains, err := g.branchContains(branch, owner, repo, hash)
if err != nil {
return false, err
}
if contains {
return true, nil
}
}

return false, nil
}

// returns the integer version from the expected format: "v1", "v2", "v3" ..
func parseCodeQLVersion(version string) (int, error) {
if !strings.HasPrefix(version, "v") {
return 0, fmt.Errorf("%w: %s", errInvalidCodeQLVersion, version)
}
major, err := strconv.Atoi(version[1:])
if major < 1 || err != nil {
return 0, fmt.Errorf("%w: %s", errInvalidCodeQLVersion, version)
}
return major, nil
}

// these branches follow the releases/v3 pattern, so we can make assumptions about what they're called.
// this should be called after g.getTags(), because it requires g.codeqlActionMajor to be set.
func (g *githubVerifier) getCodeQLBranches() ([]string, error) {
if g.codeqlActionMajor == "" {
return nil, nil
}
version, err := parseCodeQLVersion(g.codeqlActionMajor)
if err != nil {
return nil, err
}
branches := make([]string, version)
// descending order (e..g releases/v5, releases/v4, ... releases/v1)
for i := 0; i < version; i++ {
branches[i] = "releases/v" + strconv.Itoa(version-i)
}
return branches, nil
}

func (g *githubVerifier) getBranches(owner, repo string) ([]string, error) {
var branches []string
seen := map[string]struct{}{}

// 100 releases is the maximum supported by /repos/{owner}/{repo}/releases endpoint
opts := github.ListOptions{PerPage: 100}
releases, _, err := g.client.Repositories.ListReleases(g.ctx, owner, repo, &opts)
if err != nil {
return nil, fmt.Errorf("fetch releases: %w", err)
}

for _, r := range releases {
if r.TargetCommitish != nil {
if isCommitHash(*r.TargetCommitish) {
// if a commit, we know it's in the repo
g.markContains(owner, repo, *r.TargetCommitish)
} else {
// otherwise we have a release branch to check
if _, ok := seen[*r.TargetCommitish]; !ok {
seen[*r.TargetCommitish] = struct{}{}
branches = append(branches, *r.TargetCommitish)
}
}
}
}

return branches, nil
}
16 changes: 6 additions & 10 deletions app/server/github_verifier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ func Test_githubVerifier_contains_codeql_v1(t *testing.T) {
httpClient := http.Client{
Transport: suffixStubTripper{
responsePaths: map[string]string{
"tags": "./testdata/api/github/codeqlV3Tags.json", // start lookback from v3
"codeql-action": "./testdata/api/github/repository.json", // api call which finds the default branch
"main...somehash": "./testdata/api/github/divergent.json", // doesnt belong to default branch
"v3...somehash": "./testdata/api/github/divergent.json", // doesnt belong to releases/v3 branch
Expand All @@ -36,11 +37,8 @@ func Test_githubVerifier_contains_codeql_v1(t *testing.T) {
},
}
client := github.NewClient(&httpClient)
gv := githubVerifier{
ctx: context.Background(),
client: client,
}
got, err := gv.contains(commit{"github", "codeql-action", "somehash"})
gv := newGitHubVerifier(context.Background(), client)
got, err := gv.contains(commit{owner: "github", repo: "codeql-action", hash: "somehash"})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
Expand All @@ -54,6 +52,7 @@ func Test_githubVerifier_contains_codeql_v2(t *testing.T) {
httpClient := http.Client{
Transport: suffixStubTripper{
responsePaths: map[string]string{
"tags": "./testdata/api/github/codeqlV3Tags.json", // start lookback from v3
"codeql-action": "./testdata/api/github/repository.json", // api call which finds the default branch
"main...somehash": "./testdata/api/github/divergent.json", // doesnt belong to default branch
"v3...somehash": "./testdata/api/github/divergent.json", // doesnt belong to releases/v3 branch either
Expand All @@ -62,11 +61,8 @@ func Test_githubVerifier_contains_codeql_v2(t *testing.T) {
},
}
client := github.NewClient(&httpClient)
gv := githubVerifier{
ctx: context.Background(),
client: client,
}
got, err := gv.contains(commit{"github", "codeql-action", "somehash"})
gv := newGitHubVerifier(context.Background(), client)
got, err := gv.contains(commit{owner: "github", repo: "codeql-action", hash: "somehash"})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
Expand Down
5 changes: 1 addition & 4 deletions app/server/post_results.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,10 +226,7 @@ func getAndVerifyWorkflowContent(ctx context.Context,
return fmt.Errorf("error decoding workflow contents: %w", err)
}

verifier := &githubVerifier{
ctx: ctx,
client: client,
}
verifier := newGitHubVerifier(ctx, client)
// Verify scorecard workflow.
return verifyScorecardWorkflow(workflowContent, verifier)
}
Expand Down
16 changes: 11 additions & 5 deletions app/server/post_results_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,22 +116,20 @@ var _ = Describe("E2E Test: getAndVerifyWorkflowContent", func() {
})

// helper function to setup a github verifier with an appropriately set token.
func getGithubVerifier() githubVerifier {
func getGithubVerifier() *githubVerifier {
httpClient := http.DefaultClient
token, _ := readGitHubTokens()
if token != "" {
httpClient.Transport = githubTransport{
token: token,
}
}
return githubVerifier{
ctx: context.Background(),
client: github.NewClient(httpClient),
}
return newGitHubVerifier(context.Background(), github.NewClient(httpClient))
}

var _ = Describe("E2E Test: githubVerifier_contains", func() {
Context("E2E Test: Validate known good commits", func() {
// https://github.com/actions/starter-workflows/pull/2348#discussion_r1536228344
It("can detect actions/upload-artifact v3-node20 commits", func() {
gv := getGithubVerifier()
c, err := gv.contains(commit{"actions", "upload-artifact", "97a0fba1372883ab732affbe8f94b823f91727db"})
Expand All @@ -145,5 +143,13 @@ var _ = Describe("E2E Test: githubVerifier_contains", func() {
Expect(err).Should(BeNil())
Expect(c).To(BeTrue())
})

// https://github.com/ossf/scorecard-action/issues/1367#issuecomment-2333175627
It("can detect release branch commits", func() {
gv := getGithubVerifier()
c, err := gv.contains(commit{"actions", "upload-artifact", "ff15f0306b3f739f7b6fd43fb5d26cd321bd4de5"})
Expect(err).Should(BeNil())
Expect(c).To(BeTrue())
})
})
})
12 changes: 12 additions & 0 deletions app/server/testdata/api/github/codeqlV3Tags.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[
{
"name": "v3.26.9",
"zipball_url": "https://api.github.com/repos/github/codeql-action/zipball/refs/tags/v3.26.9",
"tarball_url": "https://api.github.com/repos/github/codeql-action/tarball/refs/tags/v3.26.9",
"commit": {
"sha": "461ef6c76dfe95d5c364de2f431ddbd31a417628",
"url": "https://api.github.com/repos/github/codeql-action/commits/461ef6c76dfe95d5c364de2f431ddbd31a417628"
},
"node_id": "MDM6UmVmMjU5NDQ1ODc4OnJlZnMvdGFncy92My4yNi45"
}
]
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ require (
github.com/spf13/pflag v1.0.5
github.com/transparency-dev/merkle v0.0.2
gocloud.dev v0.37.0
golang.org/x/mod v0.20.0
golang.org/x/net v0.29.0
)

Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,8 @@ golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
Expand Down

0 comments on commit c67d706

Please sign in to comment.