Skip to content

Commit

Permalink
libgit2: optimise checkout branch clones
Browse files Browse the repository at this point in the history
No-op reconciliations are very inefficient, as they carry out
a full clone operation of the target repository even when
no changes have taken place.

This change will execute a remote-ls operation, and cancel
the clone operation if the remote tip commit is still the same
as the one observed on the last reconcilation. In such cases,
an git.NoChangesError is returned.

Signed-off-by: Paulo Gomes <paulo.gomes@weave.works>
  • Loading branch information
Paulo Gomes committed May 11, 2022
1 parent 5b4750b commit 860d705
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 13 deletions.
22 changes: 20 additions & 2 deletions controllers/gitrepository_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,9 @@ func (r *GitRepositoryReconciler) notify(oldObj, newObj *sourcev1.GitRepository,
// reconcileStorage ensures the current state of the storage matches the
// desired and previously observed state.
//
// All Artifacts for the object except for the current one in the Status are
// garbage collected from the Storage.
// The garbage collection is executed based on the flag based settings and
// may remove files that are beyond their TTL or the maximum number of files
// to survive a collection cycle.
// If the Artifact in the Status of the object disappeared from the Storage,
// it is removed from the object.
// If the object does not have an Artifact in its Status, a Reconciling
Expand Down Expand Up @@ -411,6 +412,11 @@ func (r *GitRepositoryReconciler) reconcileSource(ctx context.Context,
checkoutOpts.Tag = ref.Tag
checkoutOpts.SemVer = ref.SemVer
}

if artifact := obj.GetArtifact(); artifact != nil {
checkoutOpts.LastRevision = artifact.Revision
}

checkoutStrategy, err := strategy.CheckoutStrategyForImplementation(ctx,
git.Implementation(obj.Spec.GitImplementation), checkoutOpts)
if err != nil {
Expand Down Expand Up @@ -455,6 +461,11 @@ func (r *GitRepositoryReconciler) reconcileSource(ctx context.Context,
defer cancel()
c, err := checkoutStrategy.Checkout(gitCtx, dir, repositoryURL, authOpts)
if err != nil {
var v git.NoChangesError
if errors.As(err, &v) {
return sreconcile.ResultSuccess, nil
}

e := &serror.Event{
Err: fmt.Errorf("failed to checkout and determine revision: %w", err),
Reason: sourcev1.GitOperationFailedReason,
Expand Down Expand Up @@ -495,6 +506,13 @@ func (r *GitRepositoryReconciler) reconcileSource(ctx context.Context,
// object are set, and the symlink in the Storage is updated to its path.
func (r *GitRepositoryReconciler) reconcileArtifact(ctx context.Context,
obj *sourcev1.GitRepository, commit *git.Commit, includes *artifactSet, dir string) (sreconcile.Result, error) {
// If reconciliation resulted in git.NoChangesError,
// avoid reconciling artifact, as this was already done
// on a previous reconciliation.
if commit == nil || commit.Hash.String() == "" {
return sreconcile.ResultSuccess, nil
}

// Create potential new artifact with current available metadata
artifact := r.Storage.NewArtifactFor(obj.Kind, obj.GetObjectMeta(), commit.String(), fmt.Sprintf("%s.tar.gz", commit.Hash.String()))

Expand Down
12 changes: 12 additions & 0 deletions pkg/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,15 @@ func (c *Commit) ShortMessage() string {
type CheckoutStrategy interface {
Checkout(ctx context.Context, path, url string, config *AuthOptions) (*Commit, error)
}

// NoChangesError represents the case in which a Git clone operation
// is attempted, but cancelled as the revision is still the same as
// the one observed on the last successful reconciliation.
type NoChangesError struct {
Message string
ObservedRevision string
}

func (e NoChangesError) Error() string {
return fmt.Sprintf("%s: observed revision '%s'", e.Message, e.ObservedRevision)
}
85 changes: 75 additions & 10 deletions pkg/git/libgit2/checkout.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,40 +52,105 @@ func CheckoutStrategyForOptions(ctx context.Context, opt git.CheckoutOptions) gi
if branch == "" {
branch = git.DefaultBranch
}
return &CheckoutBranch{Branch: branch}
return &CheckoutBranch{
Branch: branch,
LastRevision: opt.LastRevision,
}
}
}

type CheckoutBranch struct {
Branch string
Branch string
LastRevision string
}

func (c *CheckoutBranch) Checkout(ctx context.Context, path, url string, opts *git.AuthOptions) (*git.Commit, error) {
repo, err := safeClone(url, path, &git2go.CloneOptions{
FetchOptions: git2go.FetchOptions{
repo, err := git2go.InitRepository(path, false)
if err != nil {
return nil, fmt.Errorf("unable to init repository for '%s': %w", managed.EffectiveURL(url), gitutil.LibGit2Error(err))
}
defer repo.Free()

remote, err := repo.Remotes.Create("origin", url)
if err != nil {
return nil, fmt.Errorf("unable to create remote for '%s': %w", managed.EffectiveURL(url), gitutil.LibGit2Error(err))
}
defer remote.Free()

callBacks := RemoteCallbacks(ctx, opts)
err = remote.ConnectFetch(&callBacks, &git2go.ProxyOptions{Type: git2go.ProxyTypeAuto}, nil)
if err != nil {
return nil, fmt.Errorf("unable to fetch-connect to remote '%s': %w", managed.EffectiveURL(url), gitutil.LibGit2Error(err))
}
defer remote.Disconnect()

// When the last observed revision is set, check whether it is still
// the same at the remote branch. If so, short-circuit the clone operation here.
if c.LastRevision != "" {
heads, err := remote.Ls(c.Branch)
if err != nil {
return nil, fmt.Errorf("unable to remote ls for '%s': %w", managed.EffectiveURL(url), gitutil.LibGit2Error(err))
}
if len(heads) > 0 {
currentRevision := fmt.Sprintf("%s/%s", c.Branch, heads[0].Id.String())
if currentRevision == c.LastRevision {
return nil, git.NoChangesError{
Message: "no changes since last reconcilation",
ObservedRevision: currentRevision,
}
}
}
}

// Limit the fetch operation to the specific branch, to decrease network usage.
err = remote.Fetch([]string{c.Branch},
&git2go.FetchOptions{
DownloadTags: git2go.DownloadTagsNone,
RemoteCallbacks: RemoteCallbacks(ctx, opts),
ProxyOptions: git2go.ProxyOptions{Type: git2go.ProxyTypeAuto},
},
CheckoutOptions: git2go.CheckoutOptions{
Strategy: git2go.CheckoutForce,
},
CheckoutBranch: c.Branch,
"")
if err != nil {
return nil, fmt.Errorf("unable to fetch remote '%s': %w",
managed.EffectiveURL(url), gitutil.LibGit2Error(err))
}

branch, err := repo.References.Lookup(fmt.Sprintf("refs/remotes/origin/%s", c.Branch))
if err != nil {
return nil, fmt.Errorf("unable to lookup branch '%s' for '%s': %w",
c.Branch, managed.EffectiveURL(url), gitutil.LibGit2Error(err))
}
defer branch.Free()

upstreamCommit, err := repo.LookupCommit(branch.Target())
if err != nil {
return nil, fmt.Errorf("unable to lookup commit '%s' for '%s': %w",
c.Branch, managed.EffectiveURL(url), gitutil.LibGit2Error(err))
}
defer upstreamCommit.Free()

// Once the index has been updated with Fetch, and we know the tip commit,
// a hard reset can be used to align the local worktree with the remote branch's.
err = repo.ResetToCommit(upstreamCommit, git2go.ResetHard, &git2go.CheckoutOptions{
Strategy: git2go.CheckoutForce,
})
if err != nil {
return nil, fmt.Errorf("unable to clone '%s': %w", managed.EffectiveURL(url), gitutil.LibGit2Error(err))
return nil, fmt.Errorf("unable to hard reset to commit for '%s': %w", managed.EffectiveURL(url), gitutil.LibGit2Error(err))
}
defer repo.Free()

// Use the current worktree's head as reference for the commit to be returned.
head, err := repo.Head()
if err != nil {
return nil, fmt.Errorf("git resolve HEAD error: %w", err)
}
defer head.Free()

cc, err := repo.LookupCommit(head.Target())
if err != nil {
return nil, fmt.Errorf("failed to lookup HEAD commit '%s' for branch '%s': %w", head.Target(), c.Branch, err)
}
defer cc.Free()

return buildCommit(cc, "refs/heads/"+c.Branch), nil
}

Expand Down
19 changes: 18 additions & 1 deletion pkg/git/libgit2/checkout_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ func TestCheckoutBranch_Checkout(t *testing.T) {
filesCreated map[string]string
expectedCommit string
expectedErr string
lastRevision string
}{
{
name: "Default branch",
Expand All @@ -95,14 +96,30 @@ func TestCheckoutBranch_Checkout(t *testing.T) {
branch: "invalid",
expectedErr: "reference 'refs/remotes/origin/invalid' not found",
},
{
name: "skip clone - lastRevision hasn't changed",
branch: defaultBranch,
filesCreated: map[string]string{"branch": "second"},
expectedCommit: secondCommit.String(),
lastRevision: fmt.Sprintf("%s/%s", defaultBranch, secondCommit.String()),
expectedErr: fmt.Sprintf("no changes since last reconcilation: observed revision '%s/%s'", defaultBranch, secondCommit.String()),
},
{
name: "lastRevision is different",
branch: defaultBranch,
filesCreated: map[string]string{"branch": "second"},
expectedCommit: secondCommit.String(),
lastRevision: fmt.Sprintf("%s/%s", defaultBranch, firstCommit.String()),
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)

branch := CheckoutBranch{
Branch: tt.branch,
Branch: tt.branch,
LastRevision: tt.lastRevision,
}
tmpDir := t.TempDir()

Expand Down
5 changes: 5 additions & 0 deletions pkg/git/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ type CheckoutOptions struct {
// RecurseSubmodules defines if submodules should be checked out,
// not supported by all Implementations.
RecurseSubmodules bool

// LastRevision holds the revision observed on the last successful
// reconciliation.
// It is used to skip clone operations when no changes were detected.
LastRevision string
}

type TransportType string
Expand Down

0 comments on commit 860d705

Please sign in to comment.