Skip to content

Commit

Permalink
GitGetter: Re-allow ref=COMMIT_ID
Browse files Browse the repository at this point in the history
Earlier versions of this code allowed "ref" to take any value that would
be accepted by "git checkout" as a valid target of a symbolic ref. We
inadvertently made a breaking change that broke that as part of
introducing a shallow clone optimization, because shallow clone requires
selecting a single branch.

To restore the previous capabilities while retaining the "depth" argument,
here we accept a compromise where "ref" has the stronger requirement of
being a valid named ref in the remote repository if and only if "depth"
is set to a value greater than zero. If depth isn't set or is less than
one, we will do the old behavior of just cloning all of the refs in the
remote repository in full and then switching to refer to the selected
branch, tag, or naked commit ID as a separate step.

This includes a heuristic to generate an additional error message hint if
we get an error from "git clone" and it looks like the user might've been
trying to use "depth" and "ref=COMMIT" together. We can't recognize that
error accurately because it's only reported as human-oriented git command
output, but this heuristic should hopefully minimize situations where we
show it inappropriately.
  • Loading branch information
apparentlymart committed Dec 22, 2021
1 parent 64f1c03 commit 23702d0
Show file tree
Hide file tree
Showing 2 changed files with 139 additions and 3 deletions.
39 changes: 36 additions & 3 deletions get_git.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func (g *GitGetter) Get(dst string, u *url.URL) error {

// Extract some query parameters we use
var ref, sshKey string
var depth int
depth := 0 // 0 means "don't use shallow clone"
q := u.Query()
if len(q) > 0 {
ref = q.Get("ref")
Expand Down Expand Up @@ -167,20 +167,53 @@ func (g *GitGetter) checkout(dst string, ref string) error {
return getRunCommand(cmd)
}

// gitCommitIDRegex is a pattern intended to match strings that seem
// "likely to be" git commit IDs, rather than named refs. This cannot be
// an exact decision because it's valid to name a branch or tag after a series
// of hexadecimal digits too.
//
// We require at least 7 digits here because that's the smallest size git
// itself will typically generate, and so it'll reduce the risk of false
// positives on short branch names that happen to also be "hex words".
var gitCommitIDRegex = regexp.MustCompile("^[0-9a-fA-F]{7,40}$")

func (g *GitGetter) clone(ctx context.Context, dst, sshKeyFile string, u *url.URL, ref string, depth int) error {
args := []string{"clone"}

originalRef := ref // we handle an unspecified ref differently than explicitly selecting the default branch below
if ref == "" {
ref = findRemoteDefaultBranch(u)
}
if depth > 0 {
args = append(args, "--depth", strconv.Itoa(depth))
args = append(args, "--branch", ref)
}
args = append(args, u.String(), dst)

args = append(args, "--branch", ref, u.String(), dst)
cmd := exec.CommandContext(ctx, "git", args...)
setupGitEnv(cmd, sshKeyFile)
return getRunCommand(cmd)
err := getRunCommand(cmd)
if err != nil {
if depth > 0 && originalRef != "" {
// If we're creating a shallow clone then the given ref must be
// a named ref (branch or tag) rather than a commit directly.
// We can't accurately recognize the resulting error here without
// hard-coding assumptions about git's human-readable output, but
// we can at least try a heuristic.
if gitCommitIDRegex.MatchString(originalRef) {
return fmt.Errorf("%w (note that setting 'depth' requires 'ref' to be a branch or tag name)", err)
}
}
return err
}

if depth < 1 && originalRef != "" {
// If we didn't add --depth and --branch above then we will now be
// on the remote repository's default branch, rather than the selected
// ref, so we'll need to fix that before we return.
return g.checkout(dst, originalRef)
}
return nil
}

func (g *GitGetter) update(ctx context.Context, dst, sshKeyFile, ref string, depth int) error {
Expand Down
103 changes: 103 additions & 0 deletions get_git_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,58 @@ func TestGitGetter_branch(t *testing.T) {
}
}

func TestGitGetter_commitID(t *testing.T) {
if !testHasGit {
t.Skip("git not found, skipping")
}

g := new(GitGetter)
dst := tempDir(t)

// We're going to create different content on the main branch vs.
// another branch here, so that below we can recognize if we
// correctly cloned the commit actually requested (from the
// "other branch"), not the one at HEAD.
repo := testGitRepo(t, "commit_id")
repo.git("checkout", "-b", "main-branch")
repo.commitFile("wrong.txt", "Nope")
repo.git("checkout", "-b", "other-branch")
repo.commitFile("hello.txt", "Yep")
commitID, err := repo.latestCommit()
if err != nil {
t.Fatal(err)
}
// Return to the main branch so that HEAD of this repository
// will be that, rather than "test-branch".
repo.git("checkout", "main-branch")

q := repo.url.Query()
q.Add("ref", commitID)
repo.url.RawQuery = q.Encode()

t.Logf("Getting %s", repo.url)
if err := g.Get(dst, repo.url); err != nil {
t.Fatalf("err: %s", err)
}

// Verify the main file exists
mainPath := filepath.Join(dst, "hello.txt")
if _, err := os.Stat(mainPath); err != nil {
t.Fatalf("err: %s", err)
}

// Get again should work
if err := g.Get(dst, repo.url); err != nil {
t.Fatalf("err: %s", err)
}

// Verify the main file exists
mainPath = filepath.Join(dst, "hello.txt")
if _, err := os.Stat(mainPath); err != nil {
t.Fatalf("err: %s", err)
}
}

func TestGitGetter_remoteWithoutMaster(t *testing.T) {
if !testHasGit {
t.Log("git not found, skipping")
Expand Down Expand Up @@ -212,6 +264,44 @@ func TestGitGetter_shallowCloneWithTag(t *testing.T) {
}
}

func TestGitGetter_shallowCloneWithCommitID(t *testing.T) {
if !testHasGit {
t.Log("git not found, skipping")
t.Skip()
}

g := new(GitGetter)
dst := tempDir(t)

repo := testGitRepo(t, "upstream")
repo.commitFile("v1.0.txt", "0")
repo.git("tag", "v1.0")
repo.commitFile("v1.1.txt", "1")

commitID, err := repo.latestCommit()
if err != nil {
t.Fatal(err)
}

// Specify a clone depth of 1 with a naked commit ID
// This is intentionally invalid: shallow clone always requires a named ref.
q := repo.url.Query()
q.Add("ref", commitID[:8])
q.Add("depth", "1")
repo.url.RawQuery = q.Encode()

t.Logf("Getting %s", repo.url)
err = g.Get(dst, repo.url)
if err == nil {
t.Fatalf("success; want error")
}
// We use a heuristic to generate an extra hint in the error message if
// it looks like the user was trying to combine ref=COMMIT with depth.
if got, want := err.Error(), "(note that setting 'depth' requires 'ref' to be a branch or tag name)"; !strings.Contains(got, want) {
t.Errorf("missing error message hint\ngot: %s\nwant substring: %s", got, want)
}
}

func TestGitGetter_branchUpdate(t *testing.T) {
if !testHasGit {
t.Skip("git not found, skipping")
Expand Down Expand Up @@ -661,6 +751,19 @@ func (r *gitRepo) commitFile(file, content string) {
r.git("commit", "-m", "Adding "+file)
}

// latestCommit returns the full commit id of the latest commit on the current
// branch.
func (r *gitRepo) latestCommit() (string, error) {
cmd := exec.Command("git", "rev-parse", "HEAD")
cmd.Dir = r.dir
rawOut, err := cmd.Output()
if err != nil {
return "", err
}
rawOut = bytes.TrimSpace(rawOut)
return string(rawOut), nil
}

// This is a read-only deploy key for an empty test repository.
// Note: This is split over multiple lines to avoid being disabled by key
// scanners automatically.
Expand Down

1 comment on commit 23702d0

@Boobie84
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return getRunCommand(cmd)

Please sign in to comment.