From 7a809be09f95c2fbd99222ad80a61071c46bc338 Mon Sep 17 00:00:00 2001 From: Nick Date: Fri, 19 Aug 2022 02:49:18 -0400 Subject: [PATCH] git-annex tests (#13) Fixes https://github.com/neuropoly/gitea/issues/11 Tests: * `git annex init` * `git annex copy --from origin` * `git annex copy --to origin` over: * ssh for: * the owner * a collaborator * a read-only collaborator * a stranger in a * public repo * private repo And then confirms: * Deletion of the remote repo (to ensure lockdown isn't messing with us: https://git-annex.branchable.com/internals/lockdown/#comment-0cc5225dc5abe8eddeb843bfd2fdc382) ------ To support all this: * Add util.FileCmp() * Patch withKeyFile() so it can be nested in other copies of itself ------- Many thanks to Mathieu for giving style tips and catching several bugs, including a subtle one in util.filecmp() which neutered it. Co-authored-by: Mathieu Guay-Paquet --- .github/workflows/pull-db-tests.yml | 4 + Makefile | 2 +- modules/util/filecmp.go | 87 ++ .../api_helper_for_declarative_test.go | 25 + tests/integration/git_annex_test.go | 759 ++++++++++++++++++ .../git_helper_for_declarative_test.go | 22 + 6 files changed, 898 insertions(+), 1 deletion(-) create mode 100644 modules/util/filecmp.go create mode 100644 tests/integration/git_annex_test.go diff --git a/.github/workflows/pull-db-tests.yml b/.github/workflows/pull-db-tests.yml index 97446e6cd3b2f..c44e82f82eadd 100644 --- a/.github/workflows/pull-db-tests.yml +++ b/.github/workflows/pull-db-tests.yml @@ -46,6 +46,7 @@ jobs: - name: Add hosts to /etc/hosts run: '[ -e "/.dockerenv" ] || [ -e "/run/.containerenv" ] || echo "127.0.0.1 pgsql ldap minio" | sudo tee -a /etc/hosts' - run: make deps-backend + - run: sudo apt update && sudo DEBIAN_FRONTEND=noninteractive apt install -y git-annex - run: make backend env: TAGS: bindata @@ -69,6 +70,7 @@ jobs: go-version-file: go.mod check-latest: true - run: make deps-backend + - run: sudo apt update && sudo DEBIAN_FRONTEND=noninteractive apt install -y git-annex - run: make backend env: TAGS: bindata gogit sqlite sqlite_unlock_notify @@ -172,6 +174,7 @@ jobs: - name: Add hosts to /etc/hosts run: '[ -e "/.dockerenv" ] || [ -e "/run/.containerenv" ] || echo "127.0.0.1 mysql elasticsearch smtpimap" | sudo tee -a /etc/hosts' - run: make deps-backend + - run: sudo apt update && sudo DEBIAN_FRONTEND=noninteractive apt install -y git-annex - run: make backend env: TAGS: bindata @@ -205,6 +208,7 @@ jobs: - name: Add hosts to /etc/hosts run: '[ -e "/.dockerenv" ] || [ -e "/run/.containerenv" ] || echo "127.0.0.1 mssql" | sudo tee -a /etc/hosts' - run: make deps-backend + - run: sudo apt update && sudo DEBIAN_FRONTEND=noninteractive apt install -y git-annex - run: make backend env: TAGS: bindata diff --git a/Makefile b/Makefile index c38e88a8e0ad9..0cb48db5d2988 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ self := $(location) @tmpdir=`mktemp --tmpdir -d` ; \ echo Using temporary directory $$tmpdir for test repositories ; \ USE_REPO_TEST_DIR= $(MAKE) -f $(self) --no-print-directory REPO_TEST_DIR=$$tmpdir/ $@ ; \ - STATUS=$$? ; rm -r "$$tmpdir" ; exit $$STATUS + STATUS=$$? ; chmod -R +w "$$tmpdir" && rm -r "$$tmpdir" ; exit $$STATUS else diff --git a/modules/util/filecmp.go b/modules/util/filecmp.go new file mode 100644 index 0000000000000..76e7705cc1b56 --- /dev/null +++ b/modules/util/filecmp.go @@ -0,0 +1,87 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package util + +import ( + "bytes" + "io" + "os" +) + +// Decide if two files have the same contents or not. +// chunkSize is the size of the blocks to scan by; pass 0 to get a sensible default. +// *Follows* symlinks. +// +// May return an error if something else goes wrong; in this case, you should ignore the value of 'same'. +// +// derived from https://stackoverflow.com/a/30038571 +// under CC-BY-SA-4.0 by several contributors +func FileCmp(file1, file2 string, chunkSize int) (same bool, err error) { + if chunkSize == 0 { + chunkSize = 4 * 1024 + } + + // shortcuts: check file metadata + stat1, err := os.Stat(file1) + if err != nil { + return false, err + } + + stat2, err := os.Stat(file2) + if err != nil { + return false, err + } + + // are inputs are literally the same file? + if os.SameFile(stat1, stat2) { + return true, nil + } + + // do inputs at least have the same size? + if stat1.Size() != stat2.Size() { + return false, nil + } + + // long way: compare contents + f1, err := os.Open(file1) + if err != nil { + return false, err + } + defer f1.Close() + + f2, err := os.Open(file2) + if err != nil { + return false, err + } + defer f2.Close() + + b1 := make([]byte, chunkSize) + b2 := make([]byte, chunkSize) + for { + n1, err1 := io.ReadFull(f1, b1) + n2, err2 := io.ReadFull(f2, b2) + + // https://pkg.go.dev/io#Reader + // > Callers should always process the n > 0 bytes returned + // > before considering the error err. Doing so correctly + // > handles I/O errors that happen after reading some bytes + // > and also both of the allowed EOF behaviors. + + if !bytes.Equal(b1[:n1], b2[:n2]) { + return false, nil + } + + if (err1 == io.EOF && err2 == io.EOF) || (err1 == io.ErrUnexpectedEOF && err2 == io.ErrUnexpectedEOF) { + return true, nil + } + + // some other error, like a dropped network connection or a bad transfer + if err1 != nil { + return false, err1 + } + if err2 != nil { + return false, err2 + } + } +} diff --git a/tests/integration/api_helper_for_declarative_test.go b/tests/integration/api_helper_for_declarative_test.go index 3524ce9834add..58f4d63f30bac 100644 --- a/tests/integration/api_helper_for_declarative_test.go +++ b/tests/integration/api_helper_for_declarative_test.go @@ -21,6 +21,7 @@ import ( api "code.gitea.io/gitea/modules/structs" "code.gitea.io/gitea/services/forms" + "github.com/google/uuid" "github.com/stretchr/testify/assert" ) @@ -462,3 +463,27 @@ func doAPIAddRepoToOrganizationTeam(ctx APITestContext, teamID int64, orgName, r ctx.Session.MakeRequest(t, req, http.StatusNoContent) } } + +// generate and activate an ssh key for the user attached to the APITestContext +// TODO: pick a better name; golang doesn't do method overriding. +func withCtxKeyFile(t *testing.T, ctx APITestContext, callback func()) { + // we need to have write:public_key to do this step + // the easiest way is to create a throwaway ctx that is identical but only has that permission + ctxKeyWriter := ctx + ctxKeyWriter.Token = getTokenForLoggedInUser(t, ctx.Session, auth.AccessTokenScopeWriteUser) + + keyName := "One of " + ctx.Username + "'s keys: #" + uuid.New().String() + withKeyFile(t, keyName, func(keyFile string) { + var key api.PublicKey + + doAPICreateUserKey(ctxKeyWriter, keyName, keyFile, + func(t *testing.T, _key api.PublicKey) { + // save the key ID so we can delete it at the end + key = _key + })(t) + + defer doAPIDeleteUserKey(ctxKeyWriter, key.ID)(t) + + callback() + }) +} diff --git a/tests/integration/git_annex_test.go b/tests/integration/git_annex_test.go new file mode 100644 index 0000000000000..ee7d4c17205b6 --- /dev/null +++ b/tests/integration/git_annex_test.go @@ -0,0 +1,759 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package integration + +import ( + "errors" + "fmt" + "math/rand" + "net/url" + "os" + "path" + "regexp" + "strings" + "testing" + + auth_model "code.gitea.io/gitea/models/auth" + "code.gitea.io/gitea/models/db" + "code.gitea.io/gitea/models/perm" + repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/setting" + api "code.gitea.io/gitea/modules/structs" + "code.gitea.io/gitea/modules/util" + "code.gitea.io/gitea/tests" + + "github.com/stretchr/testify/require" +) + +// Some guidelines: +// +// * a APITestContext is an awkward union of session credential + username + target repo +// which is assumed to be owned by that username; if you want to target a different +// repo, you need to edit its .Reponame or just ignore it and write "username/reponame.git" + +func doCreateRemoteAnnexRepository(t *testing.T, u *url.URL, ctx APITestContext, private bool) (err error) { + // creating a repo counts as editing the user's profile (is done by POSTing + // to /api/v1/user/repos/) -- which means it needs a User-scoped token and + // both that and editing need a Repo-scoped token because they edit repositories. + rescopedCtx := ctx + rescopedCtx.Token = getTokenForLoggedInUser(t, ctx.Session, auth_model.AccessTokenScopeWriteUser, auth_model.AccessTokenScopeWriteRepository) + doAPICreateRepository(rescopedCtx, false)(t) + doAPIEditRepository(rescopedCtx, &api.EditRepoOption{Private: &private})(t) + + repoURL := createSSHUrl(ctx.GitPath(), u) + + // Fill in fixture data + withAnnexCtxKeyFile(t, ctx, func() { + err = doInitRemoteAnnexRepository(t, repoURL) + }) + if err != nil { + return fmt.Errorf("Unable to initialize remote repo with git-annex fixture: %w", err) + } + return nil +} + +/* +Test that permissions are enforced on git-annex-shell commands. + + Along the way, test that uploading, downloading, and deleting all work. +*/ +func TestGitAnnexPermissions(t *testing.T) { + /* + // TODO: look into how LFS did this + if !setting.Annex.Enabled { + t.Skip() + } + */ + + // Each case below is split so that 'clone' is done as + // the repo owner, but 'copy' as the user under test. + // + // Otherwise, in cases where permissions block the + // initial 'clone', the test would simply end there + // and never verify if permissions apply properly to + // 'annex copy' -- potentially leaving a security gap. + + onGiteaRun(t, func(t *testing.T, u *url.URL) { + t.Run("Public", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + ownerCtx := NewAPITestContext(t, "user2", "annex-public", auth_model.AccessTokenScopeWriteRepository) + + // create a public repo + require.NoError(t, doCreateRemoteAnnexRepository(t, u, ownerCtx, false)) + + // double-check it's public + repo, err := repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, ownerCtx.Username, ownerCtx.Reponame) + require.NoError(t, err) + require.False(t, repo.IsPrivate) + + // Remote addresses of the repo + repoURL := createSSHUrl(ownerCtx.GitPath(), u) // remote git URL + remoteRepoPath := path.Join(setting.RepoRootPath, ownerCtx.GitPath()) // path on disk -- which can be examined directly because we're testing from localhost + + // Different sessions, so we can test different permissions. + // We leave Reponame blank because we don't actually then later add it according to each case if needed + // + // NB: these usernames need to match appropriate entries in models/fixtures/user.yml + writerCtx := NewAPITestContext(t, "user5", "", auth_model.AccessTokenScopeWriteRepository) + readerCtx := NewAPITestContext(t, "user4", "", auth_model.AccessTokenScopeReadRepository) + outsiderCtx := NewAPITestContext(t, "user8", "", auth_model.AccessTokenScopeReadRepository) // a user with no specific access + + // set up collaborators + doAPIAddCollaborator(ownerCtx, readerCtx.Username, perm.AccessModeRead)(t) + doAPIAddCollaborator(ownerCtx, writerCtx.Username, perm.AccessModeWrite)(t) + + // tests + t.Run("Owner", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + t.Run("SSH", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + repoPath := path.Join(t.TempDir(), ownerCtx.Reponame) + defer util.RemoveAll(repoPath) // cleans out git-annex lockdown permissions + + withAnnexCtxKeyFile(t, ownerCtx, func() { + doGitClone(repoPath, repoURL)(t) + }) + + withAnnexCtxKeyFile(t, ownerCtx, func() { + t.Run("Init", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.NoError(t, doAnnexInitTest(remoteRepoPath, repoPath)) + }) + + t.Run("Download", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.NoError(t, doAnnexDownloadTest(remoteRepoPath, repoPath)) + }) + + t.Run("Upload", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.NoError(t, doAnnexUploadTest(remoteRepoPath, repoPath)) + }) + }) + }) + }) + + t.Run("Writer", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + t.Run("SSH", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + repoPath := path.Join(t.TempDir(), ownerCtx.Reponame) + defer util.RemoveAll(repoPath) // cleans out git-annex lockdown permissions + + withAnnexCtxKeyFile(t, ownerCtx, func() { + doGitClone(repoPath, repoURL)(t) + }) + + withAnnexCtxKeyFile(t, writerCtx, func() { + t.Run("Init", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.NoError(t, doAnnexInitTest(remoteRepoPath, repoPath)) + }) + + t.Run("Download", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.NoError(t, doAnnexDownloadTest(remoteRepoPath, repoPath)) + }) + + t.Run("Upload", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.NoError(t, doAnnexUploadTest(remoteRepoPath, repoPath)) + }) + }) + }) + }) + + t.Run("Reader", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + t.Run("SSH", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + repoPath := path.Join(t.TempDir(), ownerCtx.Reponame) + defer util.RemoveAll(repoPath) // cleans out git-annex lockdown permissions + + withAnnexCtxKeyFile(t, ownerCtx, func() { + doGitClone(repoPath, repoURL)(t) + }) + + withAnnexCtxKeyFile(t, readerCtx, func() { + t.Run("Init", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.NoError(t, doAnnexInitTest(remoteRepoPath, repoPath)) + }) + + t.Run("Download", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.NoError(t, doAnnexDownloadTest(remoteRepoPath, repoPath)) + }) + + t.Run("Upload", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.Error(t, doAnnexUploadTest(remoteRepoPath, repoPath), "Uploading should fail due to permissions") + }) + }) + }) + }) + + t.Run("Outsider", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + t.Run("SSH", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + repoPath := path.Join(t.TempDir(), ownerCtx.Reponame) + defer util.RemoveAll(repoPath) // cleans out git-annex lockdown permissions + + withAnnexCtxKeyFile(t, ownerCtx, func() { + doGitClone(repoPath, repoURL)(t) + }) + + withAnnexCtxKeyFile(t, outsiderCtx, func() { + t.Run("Init", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.NoError(t, doAnnexInitTest(remoteRepoPath, repoPath)) + }) + + t.Run("Download", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.NoError(t, doAnnexDownloadTest(remoteRepoPath, repoPath)) + }) + + t.Run("Upload", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.Error(t, doAnnexUploadTest(remoteRepoPath, repoPath), "Uploading should fail due to permissions") + }) + }) + }) + }) + + t.Run("Delete", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + // Delete the repo, make sure it's fully gone + doAPIDeleteRepository(ownerCtx)(t) + _, statErr := os.Stat(remoteRepoPath) + require.True(t, os.IsNotExist(statErr), "Remote annex repo should be removed from disk") + }) + }) + + t.Run("Private", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + ownerCtx := NewAPITestContext(t, "user2", "annex-private", auth_model.AccessTokenScopeWriteRepository) + + // create a private repo + require.NoError(t, doCreateRemoteAnnexRepository(t, u, ownerCtx, true)) + + // double-check it's private + repo, err := repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, ownerCtx.Username, ownerCtx.Reponame) + require.NoError(t, err) + require.True(t, repo.IsPrivate) + + // Remote addresses of the repo + repoURL := createSSHUrl(ownerCtx.GitPath(), u) // remote git URL + remoteRepoPath := path.Join(setting.RepoRootPath, ownerCtx.GitPath()) // path on disk -- which can be examined directly because we're testing from localhost + + // Different sessions, so we can test different permissions. + // We leave Reponame blank because we don't actually then later add it according to each case if needed + // + // NB: these usernames need to match appropriate entries in models/fixtures/user.yml + writerCtx := NewAPITestContext(t, "user5", "", auth_model.AccessTokenScopeWriteRepository) + readerCtx := NewAPITestContext(t, "user4", "", auth_model.AccessTokenScopeReadRepository) + outsiderCtx := NewAPITestContext(t, "user8", "", auth_model.AccessTokenScopeReadRepository) // a user with no specific access + // Note: there's also full anonymous access, which is only available for public HTTP repos; + // it should behave the same as 'outsider' but we (will) test it separately below anyway + + // set up collaborators + doAPIAddCollaborator(ownerCtx, readerCtx.Username, perm.AccessModeRead)(t) + doAPIAddCollaborator(ownerCtx, writerCtx.Username, perm.AccessModeWrite)(t) + + // tests + t.Run("Owner", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + t.Run("SSH", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + repoPath := path.Join(t.TempDir(), ownerCtx.Reponame) + defer util.RemoveAll(repoPath) // cleans out git-annex lockdown permissions + + withAnnexCtxKeyFile(t, ownerCtx, func() { + doGitClone(repoPath, repoURL)(t) + }) + + withAnnexCtxKeyFile(t, ownerCtx, func() { + t.Run("Init", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.NoError(t, doAnnexInitTest(remoteRepoPath, repoPath)) + }) + + t.Run("Download", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.NoError(t, doAnnexDownloadTest(remoteRepoPath, repoPath)) + }) + + t.Run("Upload", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.NoError(t, doAnnexUploadTest(remoteRepoPath, repoPath)) + }) + }) + }) + }) + + t.Run("Writer", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + t.Run("SSH", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + repoPath := path.Join(t.TempDir(), ownerCtx.Reponame) + defer util.RemoveAll(repoPath) // cleans out git-annex lockdown permissions + + withAnnexCtxKeyFile(t, ownerCtx, func() { + doGitClone(repoPath, repoURL)(t) + }) + + withAnnexCtxKeyFile(t, writerCtx, func() { + t.Run("Init", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.NoError(t, doAnnexInitTest(remoteRepoPath, repoPath)) + }) + + t.Run("Download", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.NoError(t, doAnnexDownloadTest(remoteRepoPath, repoPath)) + }) + + t.Run("Upload", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.NoError(t, doAnnexUploadTest(remoteRepoPath, repoPath)) + }) + }) + }) + }) + + t.Run("Reader", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + t.Run("SSH", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + repoPath := path.Join(t.TempDir(), ownerCtx.Reponame) + defer util.RemoveAll(repoPath) // cleans out git-annex lockdown permissions + + withAnnexCtxKeyFile(t, ownerCtx, func() { + doGitClone(repoPath, repoURL)(t) + }) + + withAnnexCtxKeyFile(t, readerCtx, func() { + t.Run("Init", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.NoError(t, doAnnexInitTest(remoteRepoPath, repoPath)) + }) + + t.Run("Download", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.NoError(t, doAnnexDownloadTest(remoteRepoPath, repoPath)) + }) + + t.Run("Upload", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.Error(t, doAnnexUploadTest(remoteRepoPath, repoPath), "Uploading should fail due to permissions") + }) + }) + }) + }) + + t.Run("Outsider", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + t.Run("SSH", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + repoPath := path.Join(t.TempDir(), ownerCtx.Reponame) + defer util.RemoveAll(repoPath) // cleans out git-annex lockdown permissions + + withAnnexCtxKeyFile(t, ownerCtx, func() { + doGitClone(repoPath, repoURL)(t) + }) + + withAnnexCtxKeyFile(t, outsiderCtx, func() { + t.Run("Init", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.Error(t, doAnnexInitTest(remoteRepoPath, repoPath), "annex init should fail due to permissions") + }) + + t.Run("Download", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.Error(t, doAnnexDownloadTest(remoteRepoPath, repoPath), "annex copy --from should fail due to permissions") + }) + + t.Run("Upload", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + require.Error(t, doAnnexUploadTest(remoteRepoPath, repoPath), "annex copy --to should fail due to permissions") + }) + }) + }) + }) + + t.Run("Delete", func(t *testing.T) { + defer tests.PrintCurrentTest(t)() + + // Delete the repo, make sure it's fully gone + doAPIDeleteRepository(ownerCtx)(t) + _, statErr := os.Stat(remoteRepoPath) + require.True(t, os.IsNotExist(statErr), "Remote annex repo should be removed from disk") + }) + }) + }) +} + +/* +Test that 'git annex init' works. + + precondition: repoPath contains a pre-cloned repo set up by doInitAnnexRepository(). +*/ +func doAnnexInitTest(remoteRepoPath, repoPath string) (err error) { + _, _, err = git.NewCommand(git.DefaultContext, "annex", "init", "cloned-repo").RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return fmt.Errorf("Couldn't `git annex init`: %w", err) + } + + // - method 0: 'git config remote.origin.annex-uuid'. + // Demonstrates that 'git annex init' successfully contacted + // the remote git-annex and was able to learn its ID number. + readAnnexUUID, _, err := git.NewCommand(git.DefaultContext, "config", "remote.origin.annex-uuid").RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return fmt.Errorf("Couldn't read remote `git config remote.origin.annex-uuid`: %w", err) + } + readAnnexUUID = strings.TrimSpace(readAnnexUUID) + + match := regexp.MustCompile("^[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}$").MatchString(readAnnexUUID) + if !match { + return fmt.Errorf("'git config remote.origin.annex-uuid' should have been able to download the remote's uuid; but instead read '%s'", readAnnexUUID) + } + + remoteAnnexUUID, _, err := git.NewCommand(git.DefaultContext, "config", "annex.uuid").RunStdString(&git.RunOpts{Dir: remoteRepoPath}) + if err != nil { + return fmt.Errorf("Couldn't read local `git config annex.uuid`: %w", err) + } + + remoteAnnexUUID = strings.TrimSpace(remoteAnnexUUID) + match = regexp.MustCompile("^[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}$").MatchString(remoteAnnexUUID) + if !match { + return fmt.Errorf("'git annex init' should have been able to download the remote's uuid; but instead read '%s'", remoteAnnexUUID) + } + + if readAnnexUUID != remoteAnnexUUID { + return fmt.Errorf("'git annex init' should have read the expected annex UUID '%s', but instead got '%s'", remoteAnnexUUID, readAnnexUUID) + } + + // - method 1: 'git annex whereis'. + // Demonstrates that git-annex understands the annexed file can be found in the remote annex. + annexWhereis, _, err := git.NewCommand(git.DefaultContext, "annex", "whereis", "large.bin").RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return fmt.Errorf("Couldn't `git annex whereis large.bin`: %w", err) + } + // Note: this regex is unanchored because 'whereis' outputs multiple lines containing + // headers and 1+ remotes and we just want to find one of them. + match = regexp.MustCompile(regexp.QuoteMeta(remoteAnnexUUID) + " -- .* \\[origin\\]\n").MatchString(annexWhereis) + if !match { + return errors.New("'git annex whereis' should report large.bin is known to be in [origin]") + } + + return nil +} + +func doAnnexDownloadTest(remoteRepoPath, repoPath string) (err error) { + // NB: this test does something slightly different if run separately from "doAnnexInitTest()": + // "git annex copy" will notice and run "git annex init", silently. + // This shouldn't change any results, but be aware in case it does. + + _, _, err = git.NewCommand(git.DefaultContext, "annex", "copy", "--from", "origin").RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return err + } + + // verify the file was downloaded + localObjectPath, err := annexObjectPath(repoPath, "large.bin") + if err != nil { + return err + } + // localObjectPath := path.Join(repoPath, "large.bin") // or, just compare against the checked-out file + + remoteObjectPath, err := annexObjectPath(remoteRepoPath, "large.bin") + if err != nil { + return err + } + + match, err := util.FileCmp(localObjectPath, remoteObjectPath, 0) + if err != nil { + return err + } + if !match { + return errors.New("Annexed files should be the same") + } + + return nil +} + +func doAnnexUploadTest(remoteRepoPath, repoPath string) (err error) { + // NB: this test does something slightly different if run separately from "Init": + // it first runs "git annex init" silently in the background. + // This shouldn't change any results, but be aware in case it does. + + err = generateRandomFile(1024*1024/4, path.Join(repoPath, "contribution.bin")) + if err != nil { + return err + } + + err = git.AddChanges(repoPath, false, ".") + if err != nil { + return err + } + + err = git.CommitChanges(repoPath, git.CommitChangesOptions{Message: "Annex another file"}) + if err != nil { + return err + } + + _, _, err = git.NewCommand(git.DefaultContext, "annex", "copy", "--to", "origin").RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return err + } + + _, _, err = git.NewCommand(git.DefaultContext, "annex", "sync", "--no-content").RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return err + } + + // verify the file was uploaded + localObjectPath, err := annexObjectPath(repoPath, "contribution.bin") + if err != nil { + return err + } + // localObjectPath := path.Join(repoPath, "contribution.bin") // or, just compare against the checked-out file + + remoteObjectPath, err := annexObjectPath(remoteRepoPath, "contribution.bin") + if err != nil { + return err + } + + match, err := util.FileCmp(localObjectPath, remoteObjectPath, 0) + if err != nil { + return err + } + if !match { + return errors.New("Annexed files should be the same") + } + + return nil +} + +// ---- Helpers ---- + +func generateRandomFile(size int, path string) (err error) { + // Generate random file + + // XXX TODO: maybe this should not be random, but instead a predictable pattern, so that the test is deterministic + bufSize := 4 * 1024 + if bufSize > size { + bufSize = size + } + + buffer := make([]byte, bufSize) + + f, err := os.Create(path) + if err != nil { + return err + } + defer f.Close() + + written := 0 + for written < size { + n := size - written + if n > bufSize { + n = bufSize + } + _, err := rand.Read(buffer[:n]) + if err != nil { + return err + } + n, err = f.Write(buffer[:n]) + if err != nil { + return err + } + written += n + } + if err != nil { + return err + } + + return nil +} + +// ---- Annex-specific helpers ---- + +/* +Initialize a repo with some baseline annexed and non-annexed files. + + TODO: perhaps this generator could be replaced with a fixture (see + integrations/gitea-repositories-meta/ and models/fixtures/repository.yml). + However we reuse this template for -different- repos, so maybe not. +*/ +func doInitAnnexRepository(repoPath string) error { + // set up what files should be annexed + // in this case, all *.bin files will be annexed + // without this, git-annex's default config annexes every file larger than some number of megabytes + f, err := os.Create(path.Join(repoPath, ".gitattributes")) + if err != nil { + return err + } + defer f.Close() + + // set up git-annex to store certain filetypes via *annex* pointers + // (https://git-annex.branchable.com/internals/pointer_file/). + // but only when run via 'git add' (see git-annex-smudge(1)) + _, err = f.WriteString("* annex.largefiles=anything\n") + if err != nil { + return err + } + _, err = f.WriteString("*.bin filter=annex\n") + if err != nil { + return err + } + f.Close() + + err = git.AddChanges(repoPath, false, ".") + if err != nil { + return err + } + err = git.CommitChanges(repoPath, git.CommitChangesOptions{Message: "Configure git-annex settings"}) + if err != nil { + return err + } + + // 'git annex init' + err = git.NewCommand(git.DefaultContext, "annex", "init", "test-repo").Run(&git.RunOpts{Dir: repoPath}) + if err != nil { + return err + } + + // add a file to the annex + err = generateRandomFile(1024*1024/4, path.Join(repoPath, "large.bin")) + if err != nil { + return err + } + err = git.AddChanges(repoPath, false, ".") + if err != nil { + return err + } + err = git.CommitChanges(repoPath, git.CommitChangesOptions{Message: "Annex a file"}) + if err != nil { + return err + } + + return nil +} + +/* +Initialize a remote repo with some baseline annexed and non-annexed files. +*/ +func doInitRemoteAnnexRepository(t *testing.T, repoURL *url.URL) error { + repoPath := path.Join(t.TempDir(), path.Base(repoURL.Path)) + // This clone is immediately thrown away, which + // helps force the tests to be end-to-end. + defer util.RemoveAll(repoPath) + + doGitClone(repoPath, repoURL)(t) // TODO: this call is the only reason for the testing.T; can it be removed? + + err := doInitAnnexRepository(repoPath) + if err != nil { + return err + } + + _, _, err = git.NewCommand(git.DefaultContext, "annex", "sync", "--content").RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return err + } + + return nil +} + +/* +Find the path in .git/annex/objects/ of the contents for a given annexed file. + + repoPath: the git repository to examine + file: the path (in the repo's current HEAD) of the annex pointer + + TODO: pass a parameter to allow examining non-HEAD branches +*/ +func annexObjectPath(repoPath, file string) (string, error) { + // NB: `git annex lookupkey` is more reliable, but doesn't work in bare repos. + annexKey, _, err := git.NewCommandContextNoGlobals(git.DefaultContext, "show").AddDynamicArguments("HEAD:" + file).RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return "", fmt.Errorf("in %s: %w", repoPath, err) // the error from git prints the filename but not repo + } + + // There are two formats an annexed file pointer might be: + // * a symlink to .git/annex/objects/$HASHDIR/$ANNEX_KEY/$ANNEX_KEY - used by files created with 'git annex add' + // * a text file containing /annex/objects/$ANNEX_KEY - used by files for which 'git add' was configured to run git-annex-smudge + // This recovers $ANNEX_KEY from either case: + annexKey = path.Base(strings.TrimSpace(annexKey)) + + contentPath, _, err := git.NewCommandContextNoGlobals(git.DefaultContext, "annex", "contentlocation").AddDynamicArguments(annexKey).RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + return "", fmt.Errorf("in %s: %s does not seem to be annexed: %w", repoPath, file, err) + } + contentPath = strings.TrimSpace(contentPath) + + return path.Join(repoPath, contentPath), nil +} + +/* like withKeyFile(), but automatically sets it the account given in ctx for use by git-annex */ +func withAnnexCtxKeyFile(t *testing.T, ctx APITestContext, callback func()) { + _gitAnnexUseGitSSH, gitAnnexUseGitSSHExists := os.LookupEnv("GIT_ANNEX_USE_GIT_SSH") + defer func() { + // reset + if gitAnnexUseGitSSHExists { + os.Setenv("GIT_ANNEX_USE_GIT_SSH", _gitAnnexUseGitSSH) + } + }() + + os.Setenv("GIT_ANNEX_USE_GIT_SSH", "1") // withKeyFile works by setting GIT_SSH_COMMAND, but git-annex only respects that if this is set + + withCtxKeyFile(t, ctx, callback) +} diff --git a/tests/integration/git_helper_for_declarative_test.go b/tests/integration/git_helper_for_declarative_test.go index 10cf79b9fd8b2..e959e2e06cfa2 100644 --- a/tests/integration/git_helper_for_declarative_test.go +++ b/tests/integration/git_helper_for_declarative_test.go @@ -39,6 +39,28 @@ func withKeyFile(t *testing.T, keyname string, callback func(string)) { "ssh -o \"UserKnownHostsFile=/dev/null\" -o \"StrictHostKeyChecking=no\" -o \"IdentitiesOnly=yes\" -i \""+keyFile+"\" \"$@\""), 0o700) assert.NoError(t, err) + // reset ssh wrapper afterwards + _gitSSH, gitSSHExists := os.LookupEnv("GIT_SSH") + defer func() { + if gitSSHExists { + os.Setenv("GIT_SSH", _gitSSH) + } + }() + + _gitSSHCommand, gitSSHCommandExists := os.LookupEnv("GIT_SSH_COMMAND") + defer func() { + if gitSSHCommandExists { + os.Setenv("GIT_SSH_COMMAND", _gitSSHCommand) + } + }() + + _gitSSHVariant, gitSSHVariantExists := os.LookupEnv("GIT_SSH_VARIANT") + defer func() { + if gitSSHVariantExists { + os.Setenv("GIT_SSH_VARIANT", _gitSSHVariant) + } + }() + // Setup ssh wrapper os.Setenv("GIT_SSH", path.Join(tmpDir, "ssh")) os.Setenv("GIT_SSH_COMMAND",