From d466eb46b83192feda898cc698719a328a0a1a7a Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Tue, 1 Jun 2021 13:44:33 +0300 Subject: [PATCH 01/24] find active commits --- pkg/retention/active_commits.go | 57 ++++++++++++++++++++++++++++ pkg/retention/active_commits_test.go | 19 ++++++++++ 2 files changed, 76 insertions(+) create mode 100644 pkg/retention/active_commits.go create mode 100644 pkg/retention/active_commits_test.go diff --git a/pkg/retention/active_commits.go b/pkg/retention/active_commits.go new file mode 100644 index 00000000000..296fc5e7659 --- /dev/null +++ b/pkg/retention/active_commits.go @@ -0,0 +1,57 @@ +package retention + +import ( + "context" + "time" + + "github.com/treeverse/lakefs/pkg/graveler" +) + +type ActiveCommitFinder struct { + refManager graveler.RefManager +} + +func (a *ActiveCommitFinder) FindActiveCommits(ctx context.Context, repositoryId graveler.RepositoryID) ([]graveler.CommitID, error) { + activeCommitsToThreshold := make(map[graveler.CommitID]time.Time) + branchIterator, err := a.refManager.ListBranches(ctx, repositoryId) + if err != nil { + return nil, err + } + for branchIterator.Next() { + branchRecord := branchIterator.Value() + commit, err := a.refManager.GetCommit(ctx, repositoryId, branchRecord.CommitID) + if err != nil { + return nil, err + } + branchExpirationThreshold := getExpirationThresholdForCommit(commit) + if !activeCommitsToThreshold[branchRecord.CommitID].After(branchExpirationThreshold) { + // was already here with earlier expiration date + continue + } + activeCommitsToThreshold[branchRecord.CommitID] = branchExpirationThreshold + for len(commit.Parents) > 0 && commit.CreationDate.After(branchExpirationThreshold) { + commitID := commit.Parents[0] + if !activeCommitsToThreshold[commitID].After(branchExpirationThreshold) { + // was already here with earlier expiration date + break + } + commit, err = a.refManager.GetCommit(ctx, repositoryId, commitID) + if err != nil { + return nil, err + } + activeCommitsToThreshold[commitID] = branchExpirationThreshold + } + } + if branchIterator.Err() != nil { + return nil, branchIterator.Err() + } + res := make([]graveler.CommitID, 0, len(activeCommitsToThreshold)) + for commitID := range activeCommitsToThreshold { + res = append(res, commitID) + } + return res, nil +} + +func getExpirationThresholdForCommit(c *graveler.Commit) time.Time { + return time.Now().AddDate(0, 0, -28) +} diff --git a/pkg/retention/active_commits_test.go b/pkg/retention/active_commits_test.go new file mode 100644 index 00000000000..2e4b69269ef --- /dev/null +++ b/pkg/retention/active_commits_test.go @@ -0,0 +1,19 @@ +package retention + +import ( + "testing" + + "github.com/treeverse/lakefs/pkg/graveler" +) + +func TestBasic(t *testing.T) { + a := &graveler.Commit{Message: "a", Parents: []graveler.CommitID{}} + b := &graveler.Commit{Message: "b", Parents: []graveler.CommitID{"a"}} + c := &graveler.Commit{Message: "c", Parents: []graveler.CommitID{"a"}} + d := &graveler.Commit{Message: "d", Parents: []graveler.CommitID{"c1"}} + e := &graveler.Commit{Message: "e", Parents: []graveler.CommitID{"c2"}} + f := &graveler.Commit{Message: "c5", Parents: []graveler.CommitID{"c3"}} +} + +func verifyResult(t *testing.T, base *graveler.Commit, expected []string) { +} From 0abc5c6fa38f1eed297d75349d5f9c70d2d8da19 Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Fri, 4 Jun 2021 10:31:14 +0300 Subject: [PATCH 02/24] wip --- Makefile | 2 + pkg/catalog/fake_graveler_test.go | 41 ----- pkg/graveler/graveler.go | 2 + pkg/graveler/testutil/fakes.go | 41 +++++ pkg/retention/active_commits.go | 57 ------- pkg/retention/active_commits_test.go | 19 --- pkg/retention/expired_commits.go | 86 +++++++++++ pkg/retention/expired_commits_test.go | 212 ++++++++++++++++++++++++++ 8 files changed, 343 insertions(+), 117 deletions(-) delete mode 100644 pkg/retention/active_commits.go delete mode 100644 pkg/retention/active_commits_test.go create mode 100644 pkg/retention/expired_commits.go create mode 100644 pkg/retention/expired_commits_test.go diff --git a/Makefile b/Makefile index 8510e45c67b..e73eb733727 100644 --- a/Makefile +++ b/Makefile @@ -67,6 +67,7 @@ clean: pkg/graveler/sstable/mock \ pkg/webui \ pkg/graveler/committed/mock + pkg/graveler/mock check-licenses: check-licenses-go-mod check-licenses-npm @@ -135,6 +136,7 @@ gen-api: go-install ## Run the swagger code generator gen-mockgen: go-install ## Run the generator for inline commands $(GOGENERATE) ./pkg/graveler/sstable $(GOGENERATE) ./pkg/graveler/committed + $(GOGENERATE) ./pkg/graveler $(GOGENERATE) ./pkg/pyramid $(GOGENERATE) ./pkg/onboard $(GOGENERATE) ./pkg/actions diff --git a/pkg/catalog/fake_graveler_test.go b/pkg/catalog/fake_graveler_test.go index bf5cca35f44..72714a71e4a 100644 --- a/pkg/catalog/fake_graveler_test.go +++ b/pkg/catalog/fake_graveler_test.go @@ -373,47 +373,6 @@ func (m *FakeRepositoryIterator) Err() error { func (m *FakeRepositoryIterator) Close() {} -type FakeBranchIterator struct { - Data []*graveler.BranchRecord - Index int -} - -func NewFakeBranchIterator(data []*graveler.BranchRecord) *FakeBranchIterator { - return &FakeBranchIterator{Data: data, Index: -1} -} - -func NewFakeBranchIteratorFactory(data []*graveler.BranchRecord) func() graveler.BranchIterator { - return func() graveler.BranchIterator { return NewFakeBranchIterator(data) } -} - -func (m *FakeBranchIterator) Next() bool { - if m.Index >= len(m.Data) { - return false - } - m.Index++ - return m.Index < len(m.Data) -} - -func (m *FakeBranchIterator) SeekGE(id graveler.BranchID) { - m.Index = len(m.Data) - for i, item := range m.Data { - if item.BranchID >= id { - m.Index = i - 1 - return - } - } -} - -func (m *FakeBranchIterator) Value() *graveler.BranchRecord { - return m.Data[m.Index] -} - -func (m *FakeBranchIterator) Err() error { - return nil -} - -func (m *FakeBranchIterator) Close() {} - type FakeTagIterator struct { Data []*graveler.TagRecord Index int diff --git a/pkg/graveler/graveler.go b/pkg/graveler/graveler.go index a278504d9fa..1eaddd4de75 100644 --- a/pkg/graveler/graveler.go +++ b/pkg/graveler/graveler.go @@ -14,6 +14,8 @@ import ( "google.golang.org/protobuf/types/known/timestamppb" ) +//go:generate mockgen -source=graveler.go -destination=mock/graveler.go -package=mock + // Basic Types // DiffType represents the type of the change diff --git a/pkg/graveler/testutil/fakes.go b/pkg/graveler/testutil/fakes.go index 2a48796166a..ed9289bc899 100644 --- a/pkg/graveler/testutil/fakes.go +++ b/pkg/graveler/testutil/fakes.go @@ -682,3 +682,44 @@ func (i *FakeDiffIterator) Err() error { func (i *FakeDiffIterator) Close() { i.closed = true } + +type FakeBranchIterator struct { + Data []*graveler.BranchRecord + Index int +} + +func NewFakeBranchIterator(data []*graveler.BranchRecord) *FakeBranchIterator { + return &FakeBranchIterator{Data: data, Index: -1} +} + +func NewFakeBranchIteratorFactory(data []*graveler.BranchRecord) func() graveler.BranchIterator { + return func() graveler.BranchIterator { return NewFakeBranchIterator(data) } +} + +func (m *FakeBranchIterator) Next() bool { + if m.Index >= len(m.Data) { + return false + } + m.Index++ + return m.Index < len(m.Data) +} + +func (m *FakeBranchIterator) SeekGE(id graveler.BranchID) { + m.Index = len(m.Data) + for i, item := range m.Data { + if item.BranchID >= id { + m.Index = i - 1 + return + } + } +} + +func (m *FakeBranchIterator) Value() *graveler.BranchRecord { + return m.Data[m.Index] +} + +func (m *FakeBranchIterator) Err() error { + return nil +} + +func (m *FakeBranchIterator) Close() {} diff --git a/pkg/retention/active_commits.go b/pkg/retention/active_commits.go deleted file mode 100644 index 296fc5e7659..00000000000 --- a/pkg/retention/active_commits.go +++ /dev/null @@ -1,57 +0,0 @@ -package retention - -import ( - "context" - "time" - - "github.com/treeverse/lakefs/pkg/graveler" -) - -type ActiveCommitFinder struct { - refManager graveler.RefManager -} - -func (a *ActiveCommitFinder) FindActiveCommits(ctx context.Context, repositoryId graveler.RepositoryID) ([]graveler.CommitID, error) { - activeCommitsToThreshold := make(map[graveler.CommitID]time.Time) - branchIterator, err := a.refManager.ListBranches(ctx, repositoryId) - if err != nil { - return nil, err - } - for branchIterator.Next() { - branchRecord := branchIterator.Value() - commit, err := a.refManager.GetCommit(ctx, repositoryId, branchRecord.CommitID) - if err != nil { - return nil, err - } - branchExpirationThreshold := getExpirationThresholdForCommit(commit) - if !activeCommitsToThreshold[branchRecord.CommitID].After(branchExpirationThreshold) { - // was already here with earlier expiration date - continue - } - activeCommitsToThreshold[branchRecord.CommitID] = branchExpirationThreshold - for len(commit.Parents) > 0 && commit.CreationDate.After(branchExpirationThreshold) { - commitID := commit.Parents[0] - if !activeCommitsToThreshold[commitID].After(branchExpirationThreshold) { - // was already here with earlier expiration date - break - } - commit, err = a.refManager.GetCommit(ctx, repositoryId, commitID) - if err != nil { - return nil, err - } - activeCommitsToThreshold[commitID] = branchExpirationThreshold - } - } - if branchIterator.Err() != nil { - return nil, branchIterator.Err() - } - res := make([]graveler.CommitID, 0, len(activeCommitsToThreshold)) - for commitID := range activeCommitsToThreshold { - res = append(res, commitID) - } - return res, nil -} - -func getExpirationThresholdForCommit(c *graveler.Commit) time.Time { - return time.Now().AddDate(0, 0, -28) -} diff --git a/pkg/retention/active_commits_test.go b/pkg/retention/active_commits_test.go deleted file mode 100644 index 2e4b69269ef..00000000000 --- a/pkg/retention/active_commits_test.go +++ /dev/null @@ -1,19 +0,0 @@ -package retention - -import ( - "testing" - - "github.com/treeverse/lakefs/pkg/graveler" -) - -func TestBasic(t *testing.T) { - a := &graveler.Commit{Message: "a", Parents: []graveler.CommitID{}} - b := &graveler.Commit{Message: "b", Parents: []graveler.CommitID{"a"}} - c := &graveler.Commit{Message: "c", Parents: []graveler.CommitID{"a"}} - d := &graveler.Commit{Message: "d", Parents: []graveler.CommitID{"c1"}} - e := &graveler.Commit{Message: "e", Parents: []graveler.CommitID{"c2"}} - f := &graveler.Commit{Message: "c5", Parents: []graveler.CommitID{"c3"}} -} - -func verifyResult(t *testing.T, base *graveler.Commit, expected []string) { -} diff --git a/pkg/retention/expired_commits.go b/pkg/retention/expired_commits.go new file mode 100644 index 00000000000..4a645700f43 --- /dev/null +++ b/pkg/retention/expired_commits.go @@ -0,0 +1,86 @@ +package retention + +import ( + "context" + "time" + + "github.com/treeverse/lakefs/pkg/graveler" +) + +type ExpirationDateGetter interface { + Get(c *graveler.CommitRecord) time.Time +} + +type ExpiredCommitFinder struct { + refManager graveler.RefManager + expirationDateGetter ExpirationDateGetter +} + +type CommitSet map[graveler.CommitID]bool + +type Commits struct { + Expired CommitSet + Active CommitSet +} + +func (a *ExpiredCommitFinder) Find(ctx context.Context, repositoryId graveler.RepositoryID, previouslyExpiredCommits CommitSet) (*Commits, error) { + processed := make(map[graveler.CommitID]time.Time) + res := &Commits{ + Active: make(map[graveler.CommitID]bool, 0), + Expired: make(map[graveler.CommitID]bool, 0), + } + branchIterator, err := a.refManager.ListBranches(ctx, repositoryId) + if err != nil { + return nil, err + } + for branchIterator.Next() { + branchRecord := branchIterator.Value() + commitID := branchRecord.CommitID + previousCommit, err := a.refManager.GetCommit(ctx, repositoryId, commitID) + if err != nil { + return nil, err + } + var branchExpirationThreshold time.Time + if a.expirationDateGetter == nil { + branchExpirationThreshold = getExpirationThresholdForCommit(previousCommit) + } else { + branchExpirationThreshold = a.expirationDateGetter.Get(&graveler.CommitRecord{CommitID: commitID, Commit: previousCommit}) + } + if previousThreshold, ok := processed[commitID]; ok && !previousThreshold.After(branchExpirationThreshold) { + // was already here with earlier expiration date + continue + } + processed[commitID] = branchExpirationThreshold + res.Active[commitID] = true + for len(previousCommit.Parents) > 0 { + commitID = previousCommit.Parents[0] + if _, ok := previouslyExpiredCommits[commitID]; ok { + // commit was already expired in a previous run + break + } + if previousThreshold, ok := processed[commitID]; ok && !previousThreshold.After(branchExpirationThreshold) { + // was already here with earlier expiration date + break + } + if previousCommit.CreationDate.After(branchExpirationThreshold) { + res.Active[commitID] = true + delete(res.Expired, commitID) + } else if active, ok := res.Active[commitID]; !ok || !active { + res.Expired[commitID] = true + } + previousCommit, err = a.refManager.GetCommit(ctx, repositoryId, commitID) + if err != nil { + return nil, err + } + processed[commitID] = branchExpirationThreshold + } + } + if branchIterator.Err() != nil { + return nil, branchIterator.Err() + } + return res, nil +} + +func getExpirationThresholdForCommit(_ *graveler.Commit) time.Time { + return time.Now().AddDate(0, 0, -28) +} diff --git a/pkg/retention/expired_commits_test.go b/pkg/retention/expired_commits_test.go new file mode 100644 index 00000000000..1bd2c267161 --- /dev/null +++ b/pkg/retention/expired_commits_test.go @@ -0,0 +1,212 @@ +package retention + +import ( + "context" + "sort" + "testing" + "time" + + "github.com/go-test/deep" + "github.com/golang/mock/gomock" + "github.com/treeverse/lakefs/pkg/graveler" + "github.com/treeverse/lakefs/pkg/graveler/mock" + gtestutil "github.com/treeverse/lakefs/pkg/graveler/testutil" + "github.com/treeverse/lakefs/pkg/testutil" +) + +type testExpirationDateGetter struct { + expirationDates map[string]time.Time +} + +func (t *testExpirationDateGetter) Get(c *graveler.CommitRecord) time.Time { + return t.expirationDates[string(c.CommitID)] +} + +type testCommit struct { + daysPassed int + parents []graveler.CommitID +} + +func newTestCommit(daysPassed int, parents ...graveler.CommitID) testCommit { + return testCommit{ + daysPassed: daysPassed, + parents: parents, + } +} + +func newCommitSet(commitIDs []string) CommitSet { + res := make(map[graveler.CommitID]bool, 0) + for _, commitID := range commitIDs { + res[graveler.CommitID(commitID)] = true + } + return res +} + +func TestBasic(t *testing.T) { + tests := map[string]struct { + commits map[string]testCommit + headsRetentionDays map[string]int + previouslyExpired []string + expectedActiveIDs []string + expectedExpiredIDs []string + }{ + "two_branches": { + commits: map[string]testCommit{ + "a": newTestCommit(15), + "b": newTestCommit(10, "a"), + "c": newTestCommit(10, "a"), + "d": newTestCommit(5, "c"), + "e": newTestCommit(5, "b"), + "f": newTestCommit(1, "e"), + }, + headsRetentionDays: map[string]int{"f": 7, "d": 3}, + expectedActiveIDs: []string{"b", "d", "e", "f"}, + expectedExpiredIDs: []string{"a", "c"}, + }, + "old_heads": { + commits: map[string]testCommit{ + "a": newTestCommit(15), + "b": newTestCommit(20, "a"), + "c": newTestCommit(20, "a"), + "d": newTestCommit(20, "a"), + }, + headsRetentionDays: map[string]int{"b": 7, "c": 7, "d": 7}, + expectedActiveIDs: []string{"b", "c", "d"}, + expectedExpiredIDs: []string{"a"}, + }, + "all_commits_active": { + commits: map[string]testCommit{ + "a": newTestCommit(5), + "b": newTestCommit(4, "a"), + "c": newTestCommit(3, "b"), + "d": newTestCommit(2, "b"), + "e": newTestCommit(1, "b"), + }, + headsRetentionDays: map[string]int{"d": 15, "e": 7, "c": 2}, + expectedActiveIDs: []string{"a", "b", "c", "d", "e"}, + expectedExpiredIDs: []string{}, + }, + "merge": { + commits: map[string]testCommit{ + "a": newTestCommit(7), + "b": newTestCommit(6, "a"), + "c": newTestCommit(7), + "d": newTestCommit(6, "c", "a"), + }, + headsRetentionDays: map[string]int{"b": 3, "d": 10}, + expectedActiveIDs: []string{"b", "c", "d"}, + expectedExpiredIDs: []string{"a"}, + }, + "two_branches_with_previously_expired": { + commits: map[string]testCommit{ + "a": newTestCommit(15), + "b": newTestCommit(10, "a"), + "c": newTestCommit(10, "a"), + "d": newTestCommit(5, "c"), + "e": newTestCommit(5, "b"), + "f": newTestCommit(1, "e"), + }, + headsRetentionDays: map[string]int{"f": 7, "d": 3}, + previouslyExpired: []string{"a"}, + expectedActiveIDs: []string{"b", "d", "e", "f"}, + expectedExpiredIDs: []string{"c"}, + }, + "many_previously_expired": { + commits: map[string]testCommit{ + "e7": newTestCommit(6), + "e6": newTestCommit(6, "e7"), + "e5": newTestCommit(6, "e6"), + "e4": newTestCommit(6, "e5"), + "e3": newTestCommit(6, "e4"), + "e2": newTestCommit(6, "e3"), + "e1": newTestCommit(6, "e2"), + "a": newTestCommit(6, "e1"), + "b": newTestCommit(5, "a"), + "c": newTestCommit(5, "a"), + }, + headsRetentionDays: map[string]int{"c": 7, "b": 7}, + previouslyExpired: []string{"e1", "e2", "e3", "e4", "e5", "e6", "e7"}, + expectedActiveIDs: []string{"a", "b", "c"}, + expectedExpiredIDs: []string{}, + }, + "merge_in_history": { + // graph taken from git core tests + // E---D---C---B---A + // \"-_ \ \ + // \ `---------G \ + // \ \ + // F----------------H + commits: map[string]testCommit{ + "e": newTestCommit(21), + "d": newTestCommit(20, "e"), + "f": newTestCommit(19, "e"), + "c": newTestCommit(18, "e"), + "b": newTestCommit(17, "d"), + "a": newTestCommit(4, "c"), + "g": newTestCommit(4, "b", "e"), + "h": newTestCommit(3, "a", "f"), + }, + headsRetentionDays: map[string]int{"h": 14, "g": 7, "f": 7}, + previouslyExpired: []string{}, + expectedActiveIDs: []string{"h", "a", "b", "c", "f", "g"}, + expectedExpiredIDs: []string{"e", "d"}, + }, + } + now := time.Now() + for name, tst := range tests { + t.Run(name, func(t *testing.T) { + branchRecords := make([]*graveler.BranchRecord, 0, len(tst.headsRetentionDays)) + expirationDates := make(map[string]time.Time) + ctrl := gomock.NewController(t) + refManagerMock := mock.NewMockRefManager(ctrl) + ctx := context.Background() + for head, retentionDays := range tst.headsRetentionDays { + branchRecords = append(branchRecords, &graveler.BranchRecord{ + Branch: &graveler.Branch{CommitID: graveler.CommitID(head)}, + }) + expirationDates[head] = now.AddDate(0, 0, -retentionDays) + } + sort.Slice(branchRecords, func(i, j int) bool { + return expirationDates[string(branchRecords[i].CommitID)].Before(expirationDates[string(branchRecords[j].CommitID)]) + }) + branchIterator := gtestutil.NewFakeBranchIterator(branchRecords) + refManagerMock.EXPECT().ListBranches(ctx, graveler.RepositoryID("test")).Return(branchIterator, nil) + commitMap := make(map[graveler.CommitID]*graveler.Commit) + previouslyExpired := newCommitSet(tst.previouslyExpired) + for commitID, testCommit := range tst.commits { + id := graveler.CommitID(commitID) + commitMap[id] = &graveler.Commit{Message: commitID, Parents: testCommit.parents, CreationDate: now.AddDate(0, 0, -testCommit.daysPassed)} + if !previouslyExpired[id] { + refManagerMock.EXPECT().GetCommit(ctx, graveler.RepositoryID("test"), id).Return(commitMap[id], nil) + } + } + finder := ExpiredCommitFinder{ + refManager: refManagerMock, + expirationDateGetter: &testExpirationDateGetter{ + expirationDates: expirationDates, + }, + } + retentionCommits, err := finder.Find(ctx, "test", previouslyExpired) + testutil.MustDo(t, "find active commits", err) + activeCommitIDs := make([]string, 0, len(retentionCommits.Active)) + for commitID := range retentionCommits.Active { + activeCommitIDs = append(activeCommitIDs, string(commitID.Ref())) + } + sort.Strings(tst.expectedActiveIDs) + sort.Strings(activeCommitIDs) + if diff := deep.Equal(tst.expectedActiveIDs, activeCommitIDs); diff != nil { + t.Errorf("active commits ids diff=%s", diff) + } + expiredCommitIDs := make([]string, 0, len(retentionCommits.Expired)) + + for commitID := range retentionCommits.Expired { + expiredCommitIDs = append(expiredCommitIDs, string(commitID.Ref())) + } + sort.Strings(tst.expectedExpiredIDs) + sort.Strings(expiredCommitIDs) + if diff := deep.Equal(tst.expectedExpiredIDs, expiredCommitIDs); diff != nil { + t.Errorf("expired commits ids diff=%s", diff) + } + }) + } +} From 431c20773bdf498cde2d659c035d1c15f70f604c Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Sun, 6 Jun 2021 14:56:54 +0300 Subject: [PATCH 03/24] rename test --- pkg/retention/expired_commits_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/retention/expired_commits_test.go b/pkg/retention/expired_commits_test.go index 1bd2c267161..068695bac99 100644 --- a/pkg/retention/expired_commits_test.go +++ b/pkg/retention/expired_commits_test.go @@ -42,7 +42,7 @@ func newCommitSet(commitIDs []string) CommitSet { return res } -func TestBasic(t *testing.T) { +func TestExpiredCommits(t *testing.T) { tests := map[string]struct { commits map[string]testCommit headsRetentionDays map[string]int From 3763a54df6472719e4dcb8a3dd00b85870084e8c Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Sun, 6 Jun 2021 14:58:01 +0300 Subject: [PATCH 04/24] linter fixes --- pkg/retention/expired_commits.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/retention/expired_commits.go b/pkg/retention/expired_commits.go index 4a645700f43..26b48f70f84 100644 --- a/pkg/retention/expired_commits.go +++ b/pkg/retention/expired_commits.go @@ -23,20 +23,20 @@ type Commits struct { Active CommitSet } -func (a *ExpiredCommitFinder) Find(ctx context.Context, repositoryId graveler.RepositoryID, previouslyExpiredCommits CommitSet) (*Commits, error) { +func (a *ExpiredCommitFinder) Find(ctx context.Context, repositoryID graveler.RepositoryID, previouslyExpiredCommits CommitSet) (*Commits, error) { processed := make(map[graveler.CommitID]time.Time) res := &Commits{ - Active: make(map[graveler.CommitID]bool, 0), - Expired: make(map[graveler.CommitID]bool, 0), + Active: make(map[graveler.CommitID]bool), + Expired: make(map[graveler.CommitID]bool), } - branchIterator, err := a.refManager.ListBranches(ctx, repositoryId) + branchIterator, err := a.refManager.ListBranches(ctx, repositoryID) if err != nil { return nil, err } for branchIterator.Next() { branchRecord := branchIterator.Value() commitID := branchRecord.CommitID - previousCommit, err := a.refManager.GetCommit(ctx, repositoryId, commitID) + previousCommit, err := a.refManager.GetCommit(ctx, repositoryID, commitID) if err != nil { return nil, err } @@ -68,7 +68,7 @@ func (a *ExpiredCommitFinder) Find(ctx context.Context, repositoryId graveler.Re } else if active, ok := res.Active[commitID]; !ok || !active { res.Expired[commitID] = true } - previousCommit, err = a.refManager.GetCommit(ctx, repositoryId, commitID) + previousCommit, err = a.refManager.GetCommit(ctx, repositoryID, commitID) if err != nil { return nil, err } From 3876effebfeb56cc1013202c86c4a5caa2c92771 Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Sun, 6 Jun 2021 16:12:47 +0300 Subject: [PATCH 05/24] fix test --- pkg/catalog/catalog_test.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/catalog/catalog_test.go b/pkg/catalog/catalog_test.go index 58e959938df..775e834b7fc 100644 --- a/pkg/catalog/catalog_test.go +++ b/pkg/catalog/catalog_test.go @@ -5,6 +5,8 @@ import ( "testing" "time" + "github.com/treeverse/lakefs/pkg/graveler/testutil" + "github.com/go-test/deep" "github.com/treeverse/lakefs/pkg/graveler" "google.golang.org/protobuf/types/known/timestamppb" @@ -125,7 +127,7 @@ func TestCatalog_BranchExists(t *testing.T) { t.Run(tt.Branch, func(t *testing.T) { // setup Catalog gravelerMock := &FakeGraveler{ - BranchIteratorFactory: NewFakeBranchIteratorFactory(gravelerData), + BranchIteratorFactory: testutil.NewFakeBranchIteratorFactory(gravelerData), } c := &Catalog{ Store: gravelerMock, @@ -227,7 +229,7 @@ func TestCatalog_ListBranches(t *testing.T) { t.Run(tt.name, func(t *testing.T) { // setup Catalog gravelerMock := &FakeGraveler{ - BranchIteratorFactory: NewFakeBranchIteratorFactory(gravelerData), + BranchIteratorFactory: testutil.NewFakeBranchIteratorFactory(gravelerData), } c := &Catalog{ Store: gravelerMock, From 2ddf880e304963a330079837ef551fd4d733ac7d Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Tue, 8 Jun 2021 13:52:33 +0300 Subject: [PATCH 06/24] wip --- api/swagger.yml | 31 +++++++++++++++- clients/spark/build.sbt | 2 +- pkg/api/controller.go | 12 ++++++ pkg/retention/commit_set_writer.go | 49 +++++++++++++++++++++++++ pkg/retention/commit_set_writer_test.go | 28 ++++++++++++++ pkg/retention/expired_commits.go | 4 ++ pkg/retention/manager.go | 19 ++++++++++ 7 files changed, 143 insertions(+), 2 deletions(-) create mode 100644 pkg/retention/commit_set_writer.go create mode 100644 pkg/retention/commit_set_writer_test.go create mode 100644 pkg/retention/manager.go diff --git a/api/swagger.yml b/api/swagger.yml index cd17eb339b9..142a76115b3 100644 --- a/api/swagger.yml +++ b/api/swagger.yml @@ -2808,7 +2808,36 @@ paths: $ref: "#/components/responses/NotFound" default: $ref: "#/components/responses/ServerError" - + /repositories/{repository}/gc/prepare: + parameters: + - in: path + name: repository + required: true + schema: + type: string + post: + tags: + - retention + operationId: prepareRetentionCommits + summary: save lists of active and expired commits for garbage collection + responses: + 201: + description: paths to commit sets + content: + application/json: + schema: + type: object + properties: + path_to_expired: + type: string + path_to_active: + type: string + 401: + $ref: "#/components/responses/Unauthorized" + 404: + $ref: "#/components/responses/NotFound" + default: + $ref: "#/components/responses/ServerError" /healthcheck: get: operationId: healthCheck diff --git a/clients/spark/build.sbt b/clients/spark/build.sbt index 5e272f64973..9938f3da626 100644 --- a/clients/spark/build.sbt +++ b/clients/spark/build.sbt @@ -57,7 +57,7 @@ def generateExamplesProject(buildType: BuildType) = sharedSettings, settingsToCompileIn("examples"), scalaVersion := buildType.scalaVersion, - libraryDependencies ++= Seq("org.apache.spark" %% "spark-sql" % buildType.sparkVersion % "provided", + libraryDependencies ++= Seq("org. apache.spark" %% "spark-sql" % buildType.sparkVersion % "provided", "software.amazon.awssdk" % "bom" % "2.15.15", "software.amazon.awssdk" % "s3" % "2.15.15", "com.amazonaws" % "aws-java-sdk" % "1.7.4", // should match hadoop-aws version(!) diff --git a/pkg/api/controller.go b/pkg/api/controller.go index 389457b8790..4d2a36c4761 100644 --- a/pkg/api/controller.go +++ b/pkg/api/controller.go @@ -1888,6 +1888,18 @@ func (c *Controller) GetCommit(w http.ResponseWriter, r *http.Request, repositor writeResponse(w, http.StatusOK, response) } +func (c *Controller) PrepareRetentionCommits(w http.ResponseWriter, r *http.Request, repository string) { + if !c.authorize(w, r, []permissions.Permission{ + { + Action: permissions.ListObjectsAction, + Resource: permissions.RepoArn(repository), + }, + }) { + return + } + +} + func (c *Controller) GetMetaRange(w http.ResponseWriter, r *http.Request, repository string, metaRange string) { if !c.authorize(w, r, []permissions.Permission{ { diff --git a/pkg/retention/commit_set_writer.go b/pkg/retention/commit_set_writer.go new file mode 100644 index 00000000000..f016089cda5 --- /dev/null +++ b/pkg/retention/commit_set_writer.go @@ -0,0 +1,49 @@ +package retention + +import ( + "context" + "encoding/csv" + "io" + "strings" + + "github.com/treeverse/lakefs/pkg/block" + + "github.com/treeverse/lakefs/pkg/graveler" +) + +type CommitSetWriter struct { + ctx context.Context + block block.Adapter +} + +func NewCommitSetWriter(block block.Adapter) *CommitSetWriter { + return &CommitSetWriter{block: block} +} + +func write(commitIDs map[graveler.CommitID]bool, writer *io.PipeWriter, isExpired bool) error { + csvExpiredWriter := csv.NewWriter(writer) + for commitID := range commitIDs { + err := csvExpiredWriter.Write([]string{string(commitID), isExpired}) + if err != nil { + return err + } + } + csvExpiredWriter.Flush() + return writer.Close() +} + +func (c *CommitSetWriter) Write(commits *Commits) error { + b := &strings.Builder{} + csv.NewWriter(b) + + c.block.UploadPart().Put(c.ctx, &block.ObjectPointer{ + StorageNamespace: "", + Identifier: "", + IdentifierType: block.IdentifierTypeFull, + }) + err := write(commits.Expired, c.expiredWriter) + if err != nil { + return err + } + return write(commits.Active, c.activeWriter) +} diff --git a/pkg/retention/commit_set_writer_test.go b/pkg/retention/commit_set_writer_test.go new file mode 100644 index 00000000000..bb2a37e56f3 --- /dev/null +++ b/pkg/retention/commit_set_writer_test.go @@ -0,0 +1,28 @@ +package retention + +import ( + "testing" + + "github.com/go-openapi/swag" + + "github.com/treeverse/lakefs/pkg/graveler" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/session" + + "github.com/aws/aws-sdk-go/service/s3" +) + +func Test(t *testing.T) { + awsConfig := &aws.Config{Region: swag.String("us-east-1")} + sess, err := session.NewSession(awsConfig) + if err != nil { + panic(err) + } + client := s3.New(sess, awsConfig) + w := NewCommitSetWriter("yoni-test3", "retention_test/1", client) + w.Write(&Commits{ + Expired: map[graveler.CommitID]bool{"a": true, "b": true}, + Active: map[graveler.CommitID]bool{"c": true, "d": true}, + }) +} diff --git a/pkg/retention/expired_commits.go b/pkg/retention/expired_commits.go index 26b48f70f84..a9a85ae156c 100644 --- a/pkg/retention/expired_commits.go +++ b/pkg/retention/expired_commits.go @@ -16,6 +16,10 @@ type ExpiredCommitFinder struct { expirationDateGetter ExpirationDateGetter } +func NewExpiredCommitFinder(refManager graveler.RefManager) *ExpiredCommitFinder { + return &ExpiredCommitFinder{refManager: refManager} +} + type CommitSet map[graveler.CommitID]bool type Commits struct { diff --git a/pkg/retention/manager.go b/pkg/retention/manager.go new file mode 100644 index 00000000000..8ab6760f928 --- /dev/null +++ b/pkg/retention/manager.go @@ -0,0 +1,19 @@ +package retention + +import ( + "context" + + "github.com/treeverse/lakefs/pkg/block" + "github.com/treeverse/lakefs/pkg/graveler" +) + +type Manager struct { + graveler graveler.Graveler + adapter block.Adapter +} + +func (m *Manager) Prepare(ctx context.Context, repositoryID graveler.RepositoryID) { + expiredCommitFinder := NewExpiredCommitFinder(m.graveler.RefManager) + commits, err := expiredCommitFinder.Find(ctx, repositoryID, nil) + +} From d37ec3aa06b93d07a716927ed015b78989c1ff2a Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Thu, 10 Jun 2021 15:24:11 +0300 Subject: [PATCH 07/24] wip --- clients/java/README.md | 2 + clients/java/api/openapi.yaml | 50 +++++ clients/java/docs/InlineResponse201.md | 14 ++ clients/java/docs/RetentionApi.md | 88 +++++++++ .../io/lakefs/clients/api/RetentionApi.java | 182 ++++++++++++++++++ .../clients/api/model/InlineResponse201.java | 127 ++++++++++++ .../lakefs/clients/api/RetentionApiTest.java | 52 +++++ .../api/model/InlineResponse201Test.java | 59 ++++++ clients/python/.openapi-generator/FILES | 6 + clients/python/README.md | 2 + clients/python/docs/InlineResponse201.md | 12 ++ clients/python/docs/RetentionApi.md | 100 ++++++++++ .../python/lakefs_client/api/retention_api.py | 159 +++++++++++++++ clients/python/lakefs_client/apis/__init__.py | 1 + .../lakefs_client/model/inline_response201.py | 170 ++++++++++++++++ .../python/lakefs_client/models/__init__.py | 1 + .../python/test/test_inline_response201.py | 36 ++++ clients/python/test/test_retention_api.py | 36 ++++ pkg/api/controller.go | 2 +- pkg/retention/commit_set_writer.go | 37 ++-- pkg/retention/commit_set_writer_test.go | 23 +-- 21 files changed, 1119 insertions(+), 40 deletions(-) create mode 100644 clients/java/docs/InlineResponse201.md create mode 100644 clients/java/docs/RetentionApi.md create mode 100644 clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java create mode 100644 clients/java/src/main/java/io/lakefs/clients/api/model/InlineResponse201.java create mode 100644 clients/java/src/test/java/io/lakefs/clients/api/RetentionApiTest.java create mode 100644 clients/java/src/test/java/io/lakefs/clients/api/model/InlineResponse201Test.java create mode 100644 clients/python/docs/InlineResponse201.md create mode 100644 clients/python/docs/RetentionApi.md create mode 100644 clients/python/lakefs_client/api/retention_api.py create mode 100644 clients/python/lakefs_client/model/inline_response201.py create mode 100644 clients/python/test/test_inline_response201.py create mode 100644 clients/python/test/test_retention_api.py diff --git a/clients/java/README.md b/clients/java/README.md index fc940ec0f43..ba975e88f22 100644 --- a/clients/java/README.md +++ b/clients/java/README.md @@ -189,6 +189,7 @@ Class | Method | HTTP request | Description *RepositoriesApi* | [**deleteRepository**](docs/RepositoriesApi.md#deleteRepository) | **DELETE** /repositories/{repository} | delete repository *RepositoriesApi* | [**getRepository**](docs/RepositoriesApi.md#getRepository) | **GET** /repositories/{repository} | get repository *RepositoriesApi* | [**listRepositories**](docs/RepositoriesApi.md#listRepositories) | **GET** /repositories | list repositories +*RetentionApi* | [**prepareRetentionCommits**](docs/RetentionApi.md#prepareRetentionCommits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection *StagingApi* | [**getPhysicalAddress**](docs/StagingApi.md#getPhysicalAddress) | **GET** /repositories/{repository}/branches/{branch}/staging/backing | get a physical address and a return token to write object to underlying storage *StagingApi* | [**linkPhysicalAddress**](docs/StagingApi.md#linkPhysicalAddress) | **PUT** /repositories/{repository}/branches/{branch}/staging/backing | associate staging on this physical address with a path *TagsApi* | [**createTag**](docs/TagsApi.md#createTag) | **POST** /repositories/{repository}/tags | create tag @@ -219,6 +220,7 @@ Class | Method | HTTP request | Description - [GroupList](docs/GroupList.md) - [HookRun](docs/HookRun.md) - [HookRunList](docs/HookRunList.md) + - [InlineResponse201](docs/InlineResponse201.md) - [LoginInformation](docs/LoginInformation.md) - [Merge](docs/Merge.md) - [MergeResult](docs/MergeResult.md) diff --git a/clients/java/api/openapi.yaml b/clients/java/api/openapi.yaml index 236d354a656..463618fecc6 100644 --- a/clients/java/api/openapi.yaml +++ b/clients/java/api/openapi.yaml @@ -3738,6 +3738,46 @@ paths: tags: - metadata x-accepts: application/json + /repositories/{repository}/gc/prepare: + post: + operationId: prepareRetentionCommits + parameters: + - explode: false + in: path + name: repository + required: true + schema: + type: string + style: simple + responses: + "201": + content: + application/json: + schema: + $ref: '#/components/schemas/inline_response_201' + description: paths to commit sets + "401": + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + description: Unauthorized + "404": + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + description: Resource Not Found + default: + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + description: Internal Server Error + summary: save lists of active and expired commits for garbage collection + tags: + - retention + x-accepts: application/json /healthcheck: get: description: check that the API server is up and running @@ -4890,6 +4930,16 @@ components: format: binary type: string type: object + inline_response_201: + example: + path_to_expired: path_to_expired + path_to_active: path_to_active + properties: + path_to_expired: + type: string + path_to_active: + type: string + type: object MergeResult_summary: example: removed: 6 diff --git a/clients/java/docs/InlineResponse201.md b/clients/java/docs/InlineResponse201.md new file mode 100644 index 00000000000..1c92483accc --- /dev/null +++ b/clients/java/docs/InlineResponse201.md @@ -0,0 +1,14 @@ + + +# InlineResponse201 + + +## Properties + +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**pathToExpired** | **String** | | [optional] +**pathToActive** | **String** | | [optional] + + + diff --git a/clients/java/docs/RetentionApi.md b/clients/java/docs/RetentionApi.md new file mode 100644 index 00000000000..d2db2d33e09 --- /dev/null +++ b/clients/java/docs/RetentionApi.md @@ -0,0 +1,88 @@ +# RetentionApi + +All URIs are relative to *http://localhost/api/v1* + +Method | HTTP request | Description +------------- | ------------- | ------------- +[**prepareRetentionCommits**](RetentionApi.md#prepareRetentionCommits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection + + + +# **prepareRetentionCommits** +> InlineResponse201 prepareRetentionCommits(repository) + +save lists of active and expired commits for garbage collection + +### Example +```java +// Import classes: +import io.lakefs.clients.api.ApiClient; +import io.lakefs.clients.api.ApiException; +import io.lakefs.clients.api.Configuration; +import io.lakefs.clients.api.auth.*; +import io.lakefs.clients.api.models.*; +import io.lakefs.clients.api.RetentionApi; + +public class Example { + public static void main(String[] args) { + ApiClient defaultClient = Configuration.getDefaultApiClient(); + defaultClient.setBasePath("http://localhost/api/v1"); + + // Configure HTTP basic authorization: basic_auth + HttpBasicAuth basic_auth = (HttpBasicAuth) defaultClient.getAuthentication("basic_auth"); + basic_auth.setUsername("YOUR USERNAME"); + basic_auth.setPassword("YOUR PASSWORD"); + + // Configure API key authorization: cookie_auth + ApiKeyAuth cookie_auth = (ApiKeyAuth) defaultClient.getAuthentication("cookie_auth"); + cookie_auth.setApiKey("YOUR API KEY"); + // Uncomment the following line to set a prefix for the API key, e.g. "Token" (defaults to null) + //cookie_auth.setApiKeyPrefix("Token"); + + // Configure HTTP bearer authorization: jwt_token + HttpBearerAuth jwt_token = (HttpBearerAuth) defaultClient.getAuthentication("jwt_token"); + jwt_token.setBearerToken("BEARER TOKEN"); + + RetentionApi apiInstance = new RetentionApi(defaultClient); + String repository = "repository_example"; // String | + try { + InlineResponse201 result = apiInstance.prepareRetentionCommits(repository); + System.out.println(result); + } catch (ApiException e) { + System.err.println("Exception when calling RetentionApi#prepareRetentionCommits"); + System.err.println("Status code: " + e.getCode()); + System.err.println("Reason: " + e.getResponseBody()); + System.err.println("Response headers: " + e.getResponseHeaders()); + e.printStackTrace(); + } + } +} +``` + +### Parameters + +Name | Type | Description | Notes +------------- | ------------- | ------------- | ------------- + **repository** | **String**| | + +### Return type + +[**InlineResponse201**](InlineResponse201.md) + +### Authorization + +[basic_auth](../README.md#basic_auth), [cookie_auth](../README.md#cookie_auth), [jwt_token](../README.md#jwt_token) + +### HTTP request headers + + - **Content-Type**: Not defined + - **Accept**: application/json + +### HTTP response details +| Status code | Description | Response headers | +|-------------|-------------|------------------| +**201** | paths to commit sets | - | +**401** | Unauthorized | - | +**404** | Resource Not Found | - | +**0** | Internal Server Error | - | + diff --git a/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java b/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java new file mode 100644 index 00000000000..5f44ce26b7a --- /dev/null +++ b/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java @@ -0,0 +1,182 @@ +/* + * lakeFS API + * lakeFS HTTP API + * + * The version of the OpenAPI document: 0.1.0 + * + * + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + + +package io.lakefs.clients.api; + +import io.lakefs.clients.api.ApiCallback; +import io.lakefs.clients.api.ApiClient; +import io.lakefs.clients.api.ApiException; +import io.lakefs.clients.api.ApiResponse; +import io.lakefs.clients.api.Configuration; +import io.lakefs.clients.api.Pair; +import io.lakefs.clients.api.ProgressRequestBody; +import io.lakefs.clients.api.ProgressResponseBody; + +import com.google.gson.reflect.TypeToken; + +import java.io.IOException; + + +import io.lakefs.clients.api.model.Error; +import io.lakefs.clients.api.model.InlineResponse201; + +import java.lang.reflect.Type; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class RetentionApi { + private ApiClient localVarApiClient; + + public RetentionApi() { + this(Configuration.getDefaultApiClient()); + } + + public RetentionApi(ApiClient apiClient) { + this.localVarApiClient = apiClient; + } + + public ApiClient getApiClient() { + return localVarApiClient; + } + + public void setApiClient(ApiClient apiClient) { + this.localVarApiClient = apiClient; + } + + /** + * Build call for prepareRetentionCommits + * @param repository (required) + * @param _callback Callback for upload/download progress + * @return Call to execute + * @throws ApiException If fail to serialize the request body object + * @http.response.details + + + + + + +
Status Code Description Response Headers
201 paths to commit sets -
401 Unauthorized -
404 Resource Not Found -
0 Internal Server Error -
+ */ + public okhttp3.Call prepareRetentionCommitsCall(String repository, final ApiCallback _callback) throws ApiException { + Object localVarPostBody = null; + + // create path and map variables + String localVarPath = "/repositories/{repository}/gc/prepare" + .replaceAll("\\{" + "repository" + "\\}", localVarApiClient.escapeString(repository.toString())); + + List localVarQueryParams = new ArrayList(); + List localVarCollectionQueryParams = new ArrayList(); + Map localVarHeaderParams = new HashMap(); + Map localVarCookieParams = new HashMap(); + Map localVarFormParams = new HashMap(); + + final String[] localVarAccepts = { + "application/json" + }; + final String localVarAccept = localVarApiClient.selectHeaderAccept(localVarAccepts); + if (localVarAccept != null) { + localVarHeaderParams.put("Accept", localVarAccept); + } + + final String[] localVarContentTypes = { + + }; + final String localVarContentType = localVarApiClient.selectHeaderContentType(localVarContentTypes); + localVarHeaderParams.put("Content-Type", localVarContentType); + + String[] localVarAuthNames = new String[] { "basic_auth", "cookie_auth", "jwt_token" }; + return localVarApiClient.buildCall(localVarPath, "POST", localVarQueryParams, localVarCollectionQueryParams, localVarPostBody, localVarHeaderParams, localVarCookieParams, localVarFormParams, localVarAuthNames, _callback); + } + + @SuppressWarnings("rawtypes") + private okhttp3.Call prepareRetentionCommitsValidateBeforeCall(String repository, final ApiCallback _callback) throws ApiException { + + // verify the required parameter 'repository' is set + if (repository == null) { + throw new ApiException("Missing the required parameter 'repository' when calling prepareRetentionCommits(Async)"); + } + + + okhttp3.Call localVarCall = prepareRetentionCommitsCall(repository, _callback); + return localVarCall; + + } + + /** + * save lists of active and expired commits for garbage collection + * + * @param repository (required) + * @return InlineResponse201 + * @throws ApiException If fail to call the API, e.g. server error or cannot deserialize the response body + * @http.response.details + + + + + + +
Status Code Description Response Headers
201 paths to commit sets -
401 Unauthorized -
404 Resource Not Found -
0 Internal Server Error -
+ */ + public InlineResponse201 prepareRetentionCommits(String repository) throws ApiException { + ApiResponse localVarResp = prepareRetentionCommitsWithHttpInfo(repository); + return localVarResp.getData(); + } + + /** + * save lists of active and expired commits for garbage collection + * + * @param repository (required) + * @return ApiResponse<InlineResponse201> + * @throws ApiException If fail to call the API, e.g. server error or cannot deserialize the response body + * @http.response.details + + + + + + +
Status Code Description Response Headers
201 paths to commit sets -
401 Unauthorized -
404 Resource Not Found -
0 Internal Server Error -
+ */ + public ApiResponse prepareRetentionCommitsWithHttpInfo(String repository) throws ApiException { + okhttp3.Call localVarCall = prepareRetentionCommitsValidateBeforeCall(repository, null); + Type localVarReturnType = new TypeToken(){}.getType(); + return localVarApiClient.execute(localVarCall, localVarReturnType); + } + + /** + * save lists of active and expired commits for garbage collection (asynchronously) + * + * @param repository (required) + * @param _callback The callback to be executed when the API call finishes + * @return The request call + * @throws ApiException If fail to process the API call, e.g. serializing the request body object + * @http.response.details + + + + + + +
Status Code Description Response Headers
201 paths to commit sets -
401 Unauthorized -
404 Resource Not Found -
0 Internal Server Error -
+ */ + public okhttp3.Call prepareRetentionCommitsAsync(String repository, final ApiCallback _callback) throws ApiException { + + okhttp3.Call localVarCall = prepareRetentionCommitsValidateBeforeCall(repository, _callback); + Type localVarReturnType = new TypeToken(){}.getType(); + localVarApiClient.executeAsync(localVarCall, localVarReturnType, _callback); + return localVarCall; + } +} diff --git a/clients/java/src/main/java/io/lakefs/clients/api/model/InlineResponse201.java b/clients/java/src/main/java/io/lakefs/clients/api/model/InlineResponse201.java new file mode 100644 index 00000000000..7ea84db1257 --- /dev/null +++ b/clients/java/src/main/java/io/lakefs/clients/api/model/InlineResponse201.java @@ -0,0 +1,127 @@ +/* + * lakeFS API + * lakeFS HTTP API + * + * The version of the OpenAPI document: 0.1.0 + * + * + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + + +package io.lakefs.clients.api.model; + +import java.util.Objects; +import java.util.Arrays; +import com.google.gson.TypeAdapter; +import com.google.gson.annotations.JsonAdapter; +import com.google.gson.annotations.SerializedName; +import com.google.gson.stream.JsonReader; +import com.google.gson.stream.JsonWriter; +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import java.io.IOException; + +/** + * InlineResponse201 + */ +@javax.annotation.Generated(value = "org.openapitools.codegen.languages.JavaClientCodegen") +public class InlineResponse201 { + public static final String SERIALIZED_NAME_PATH_TO_EXPIRED = "path_to_expired"; + @SerializedName(SERIALIZED_NAME_PATH_TO_EXPIRED) + private String pathToExpired; + + public static final String SERIALIZED_NAME_PATH_TO_ACTIVE = "path_to_active"; + @SerializedName(SERIALIZED_NAME_PATH_TO_ACTIVE) + private String pathToActive; + + + public InlineResponse201 pathToExpired(String pathToExpired) { + + this.pathToExpired = pathToExpired; + return this; + } + + /** + * Get pathToExpired + * @return pathToExpired + **/ + @javax.annotation.Nullable + @ApiModelProperty(value = "") + + public String getPathToExpired() { + return pathToExpired; + } + + + public void setPathToExpired(String pathToExpired) { + this.pathToExpired = pathToExpired; + } + + + public InlineResponse201 pathToActive(String pathToActive) { + + this.pathToActive = pathToActive; + return this; + } + + /** + * Get pathToActive + * @return pathToActive + **/ + @javax.annotation.Nullable + @ApiModelProperty(value = "") + + public String getPathToActive() { + return pathToActive; + } + + + public void setPathToActive(String pathToActive) { + this.pathToActive = pathToActive; + } + + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + InlineResponse201 inlineResponse201 = (InlineResponse201) o; + return Objects.equals(this.pathToExpired, inlineResponse201.pathToExpired) && + Objects.equals(this.pathToActive, inlineResponse201.pathToActive); + } + + @Override + public int hashCode() { + return Objects.hash(pathToExpired, pathToActive); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("class InlineResponse201 {\n"); + sb.append(" pathToExpired: ").append(toIndentedString(pathToExpired)).append("\n"); + sb.append(" pathToActive: ").append(toIndentedString(pathToActive)).append("\n"); + sb.append("}"); + return sb.toString(); + } + + /** + * Convert the given object to string with each line indented by 4 spaces + * (except the first line). + */ + private String toIndentedString(Object o) { + if (o == null) { + return "null"; + } + return o.toString().replace("\n", "\n "); + } + +} + diff --git a/clients/java/src/test/java/io/lakefs/clients/api/RetentionApiTest.java b/clients/java/src/test/java/io/lakefs/clients/api/RetentionApiTest.java new file mode 100644 index 00000000000..21acfa36f37 --- /dev/null +++ b/clients/java/src/test/java/io/lakefs/clients/api/RetentionApiTest.java @@ -0,0 +1,52 @@ +/* + * lakeFS API + * lakeFS HTTP API + * + * The version of the OpenAPI document: 0.1.0 + * + * + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + + +package io.lakefs.clients.api; + +import io.lakefs.clients.api.ApiException; +import io.lakefs.clients.api.model.Error; +import io.lakefs.clients.api.model.InlineResponse201; +import org.junit.Test; +import org.junit.Ignore; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * API tests for RetentionApi + */ +@Ignore +public class RetentionApiTest { + + private final RetentionApi api = new RetentionApi(); + + + /** + * save lists of active and expired commits for garbage collection + * + * + * + * @throws ApiException + * if the Api call fails + */ + @Test + public void prepareRetentionCommitsTest() throws ApiException { + String repository = null; + InlineResponse201 response = api.prepareRetentionCommits(repository); + + // TODO: test validations + } + +} diff --git a/clients/java/src/test/java/io/lakefs/clients/api/model/InlineResponse201Test.java b/clients/java/src/test/java/io/lakefs/clients/api/model/InlineResponse201Test.java new file mode 100644 index 00000000000..147f382bd11 --- /dev/null +++ b/clients/java/src/test/java/io/lakefs/clients/api/model/InlineResponse201Test.java @@ -0,0 +1,59 @@ +/* + * lakeFS API + * lakeFS HTTP API + * + * The version of the OpenAPI document: 0.1.0 + * + * + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + + +package io.lakefs.clients.api.model; + +import com.google.gson.TypeAdapter; +import com.google.gson.annotations.JsonAdapter; +import com.google.gson.annotations.SerializedName; +import com.google.gson.stream.JsonReader; +import com.google.gson.stream.JsonWriter; +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import java.io.IOException; +import org.junit.Assert; +import org.junit.Ignore; +import org.junit.Test; + + +/** + * Model tests for InlineResponse201 + */ +public class InlineResponse201Test { + private final InlineResponse201 model = new InlineResponse201(); + + /** + * Model tests for InlineResponse201 + */ + @Test + public void testInlineResponse201() { + // TODO: test InlineResponse201 + } + + /** + * Test the property 'pathToExpired' + */ + @Test + public void pathToExpiredTest() { + // TODO: test pathToExpired + } + + /** + * Test the property 'pathToActive' + */ + @Test + public void pathToActiveTest() { + // TODO: test pathToActive + } + +} diff --git a/clients/python/.openapi-generator/FILES b/clients/python/.openapi-generator/FILES index 947f87aa22e..25b44e3c1f3 100644 --- a/clients/python/.openapi-generator/FILES +++ b/clients/python/.openapi-generator/FILES @@ -27,6 +27,7 @@ docs/GroupList.md docs/HealthCheckApi.md docs/HookRun.md docs/HookRunList.md +docs/InlineResponse201.md docs/LoginInformation.md docs/Merge.md docs/MergeResult.md @@ -48,6 +49,7 @@ docs/Repository.md docs/RepositoryCreation.md docs/RepositoryList.md docs/ResetCreation.md +docs/RetentionApi.md docs/RevertCreation.md docs/Setup.md docs/StagingApi.md @@ -74,6 +76,7 @@ lakefs_client/api/metadata_api.py lakefs_client/api/objects_api.py lakefs_client/api/refs_api.py lakefs_client/api/repositories_api.py +lakefs_client/api/retention_api.py lakefs_client/api/staging_api.py lakefs_client/api/tags_api.py lakefs_client/api_client.py @@ -101,6 +104,7 @@ lakefs_client/model/group_creation.py lakefs_client/model/group_list.py lakefs_client/model/hook_run.py lakefs_client/model/hook_run_list.py +lakefs_client/model/inline_response201.py lakefs_client/model/login_information.py lakefs_client/model/merge.py lakefs_client/model/merge_result.py @@ -139,4 +143,6 @@ setup.cfg setup.py test-requirements.txt test/__init__.py +test/test_inline_response201.py +test/test_retention_api.py tox.ini diff --git a/clients/python/README.md b/clients/python/README.md index c828972eb83..136c949dd8a 100644 --- a/clients/python/README.md +++ b/clients/python/README.md @@ -170,6 +170,7 @@ Class | Method | HTTP request | Description *RepositoriesApi* | [**delete_repository**](docs/RepositoriesApi.md#delete_repository) | **DELETE** /repositories/{repository} | delete repository *RepositoriesApi* | [**get_repository**](docs/RepositoriesApi.md#get_repository) | **GET** /repositories/{repository} | get repository *RepositoriesApi* | [**list_repositories**](docs/RepositoriesApi.md#list_repositories) | **GET** /repositories | list repositories +*RetentionApi* | [**prepare_retention_commits**](docs/RetentionApi.md#prepare_retention_commits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection *StagingApi* | [**get_physical_address**](docs/StagingApi.md#get_physical_address) | **GET** /repositories/{repository}/branches/{branch}/staging/backing | get a physical address and a return token to write object to underlying storage *StagingApi* | [**link_physical_address**](docs/StagingApi.md#link_physical_address) | **PUT** /repositories/{repository}/branches/{branch}/staging/backing | associate staging on this physical address with a path *TagsApi* | [**create_tag**](docs/TagsApi.md#create_tag) | **POST** /repositories/{repository}/tags | create tag @@ -200,6 +201,7 @@ Class | Method | HTTP request | Description - [GroupList](docs/GroupList.md) - [HookRun](docs/HookRun.md) - [HookRunList](docs/HookRunList.md) + - [InlineResponse201](docs/InlineResponse201.md) - [LoginInformation](docs/LoginInformation.md) - [Merge](docs/Merge.md) - [MergeResult](docs/MergeResult.md) diff --git a/clients/python/docs/InlineResponse201.md b/clients/python/docs/InlineResponse201.md new file mode 100644 index 00000000000..d68520c46f9 --- /dev/null +++ b/clients/python/docs/InlineResponse201.md @@ -0,0 +1,12 @@ +# InlineResponse201 + + +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**path_to_expired** | **str** | | [optional] +**path_to_active** | **str** | | [optional] + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/clients/python/docs/RetentionApi.md b/clients/python/docs/RetentionApi.md new file mode 100644 index 00000000000..37826b8bb93 --- /dev/null +++ b/clients/python/docs/RetentionApi.md @@ -0,0 +1,100 @@ +# lakefs_client.RetentionApi + +All URIs are relative to *http://localhost/api/v1* + +Method | HTTP request | Description +------------- | ------------- | ------------- +[**prepare_retention_commits**](RetentionApi.md#prepare_retention_commits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection + + +# **prepare_retention_commits** +> InlineResponse201 prepare_retention_commits(repository) + +save lists of active and expired commits for garbage collection + +### Example + +* Basic Authentication (basic_auth): +* Api Key Authentication (cookie_auth): +* Bearer (JWT) Authentication (jwt_token): +```python +import time +import lakefs_client +from lakefs_client.api import retention_api +from lakefs_client.model.inline_response201 import InlineResponse201 +from lakefs_client.model.error import Error +from pprint import pprint +# Defining the host is optional and defaults to http://localhost/api/v1 +# See configuration.py for a list of all supported configuration parameters. +configuration = lakefs_client.Configuration( + host = "http://localhost/api/v1" +) + +# The client must configure the authentication and authorization parameters +# in accordance with the API server security policy. +# Examples for each auth method are provided below, use the example that +# satisfies your auth use case. + +# Configure HTTP basic authorization: basic_auth +configuration = lakefs_client.Configuration( + username = 'YOUR_USERNAME', + password = 'YOUR_PASSWORD' +) + +# Configure API key authorization: cookie_auth +configuration.api_key['cookie_auth'] = 'YOUR_API_KEY' + +# Uncomment below to setup prefix (e.g. Bearer) for API key, if needed +# configuration.api_key_prefix['cookie_auth'] = 'Bearer' + +# Configure Bearer authorization (JWT): jwt_token +configuration = lakefs_client.Configuration( + access_token = 'YOUR_BEARER_TOKEN' +) + +# Enter a context with an instance of the API client +with lakefs_client.ApiClient(configuration) as api_client: + # Create an instance of the API class + api_instance = retention_api.RetentionApi(api_client) + repository = "repository_example" # str | + + # example passing only required values which don't have defaults set + try: + # save lists of active and expired commits for garbage collection + api_response = api_instance.prepare_retention_commits(repository) + pprint(api_response) + except lakefs_client.ApiException as e: + print("Exception when calling RetentionApi->prepare_retention_commits: %s\n" % e) +``` + + +### Parameters + +Name | Type | Description | Notes +------------- | ------------- | ------------- | ------------- + **repository** | **str**| | + +### Return type + +[**InlineResponse201**](InlineResponse201.md) + +### Authorization + +[basic_auth](../README.md#basic_auth), [cookie_auth](../README.md#cookie_auth), [jwt_token](../README.md#jwt_token) + +### HTTP request headers + + - **Content-Type**: Not defined + - **Accept**: application/json + + +### HTTP response details +| Status code | Description | Response headers | +|-------------|-------------|------------------| +**201** | paths to commit sets | - | +**401** | Unauthorized | - | +**404** | Resource Not Found | - | +**0** | Internal Server Error | - | + +[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md) + diff --git a/clients/python/lakefs_client/api/retention_api.py b/clients/python/lakefs_client/api/retention_api.py new file mode 100644 index 00000000000..c0a1dc72f3b --- /dev/null +++ b/clients/python/lakefs_client/api/retention_api.py @@ -0,0 +1,159 @@ +""" + lakeFS API + + lakeFS HTTP API # noqa: E501 + + The version of the OpenAPI document: 0.1.0 + Contact: services@treeverse.io + Generated by: https://openapi-generator.tech +""" + + +import re # noqa: F401 +import sys # noqa: F401 + +from lakefs_client.api_client import ApiClient, Endpoint as _Endpoint +from lakefs_client.model_utils import ( # noqa: F401 + check_allowed_values, + check_validations, + date, + datetime, + file_type, + none_type, + validate_and_convert_types +) +from lakefs_client.model.error import Error +from lakefs_client.model.inline_response201 import InlineResponse201 + + +class RetentionApi(object): + """NOTE: This class is auto generated by OpenAPI Generator + Ref: https://openapi-generator.tech + + Do not edit the class manually. + """ + + def __init__(self, api_client=None): + if api_client is None: + api_client = ApiClient() + self.api_client = api_client + + def __prepare_retention_commits( + self, + repository, + **kwargs + ): + """save lists of active and expired commits for garbage collection # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + + >>> thread = api.prepare_retention_commits(repository, async_req=True) + >>> result = thread.get() + + Args: + repository (str): + + Keyword Args: + _return_http_data_only (bool): response data without head status + code and headers. Default is True. + _preload_content (bool): if False, the urllib3.HTTPResponse object + will be returned without reading/decoding response data. + Default is True. + _request_timeout (float/tuple): timeout setting for this request. If one + number provided, it will be total request timeout. It can also + be a pair (tuple) of (connection, read) timeouts. + Default is None. + _check_input_type (bool): specifies if type checking + should be done one the data sent to the server. + Default is True. + _check_return_type (bool): specifies if type checking + should be done one the data received from the server. + Default is True. + _host_index (int/None): specifies the index of the server + that we want to use. + Default is read from the configuration. + async_req (bool): execute request asynchronously + + Returns: + InlineResponse201 + If the method is called asynchronously, returns the request + thread. + """ + kwargs['async_req'] = kwargs.get( + 'async_req', False + ) + kwargs['_return_http_data_only'] = kwargs.get( + '_return_http_data_only', True + ) + kwargs['_preload_content'] = kwargs.get( + '_preload_content', True + ) + kwargs['_request_timeout'] = kwargs.get( + '_request_timeout', None + ) + kwargs['_check_input_type'] = kwargs.get( + '_check_input_type', True + ) + kwargs['_check_return_type'] = kwargs.get( + '_check_return_type', True + ) + kwargs['_host_index'] = kwargs.get('_host_index') + kwargs['repository'] = \ + repository + return self.call_with_http_info(**kwargs) + + self.prepare_retention_commits = _Endpoint( + settings={ + 'response_type': (InlineResponse201,), + 'auth': [ + 'basic_auth', + 'cookie_auth', + 'jwt_token' + ], + 'endpoint_path': '/repositories/{repository}/gc/prepare', + 'operation_id': 'prepare_retention_commits', + 'http_method': 'POST', + 'servers': None, + }, + params_map={ + 'all': [ + 'repository', + ], + 'required': [ + 'repository', + ], + 'nullable': [ + ], + 'enum': [ + ], + 'validation': [ + ] + }, + root_map={ + 'validations': { + }, + 'allowed_values': { + }, + 'openapi_types': { + 'repository': + (str,), + }, + 'attribute_map': { + 'repository': 'repository', + }, + 'location_map': { + 'repository': 'path', + }, + 'collection_format_map': { + } + }, + headers_map={ + 'accept': [ + 'application/json' + ], + 'content_type': [], + }, + api_client=api_client, + callable=__prepare_retention_commits + ) diff --git a/clients/python/lakefs_client/apis/__init__.py b/clients/python/lakefs_client/apis/__init__.py index c1f1338d601..84b1782e7d8 100644 --- a/clients/python/lakefs_client/apis/__init__.py +++ b/clients/python/lakefs_client/apis/__init__.py @@ -24,5 +24,6 @@ from lakefs_client.api.objects_api import ObjectsApi from lakefs_client.api.refs_api import RefsApi from lakefs_client.api.repositories_api import RepositoriesApi +from lakefs_client.api.retention_api import RetentionApi from lakefs_client.api.staging_api import StagingApi from lakefs_client.api.tags_api import TagsApi diff --git a/clients/python/lakefs_client/model/inline_response201.py b/clients/python/lakefs_client/model/inline_response201.py new file mode 100644 index 00000000000..bbd062521e0 --- /dev/null +++ b/clients/python/lakefs_client/model/inline_response201.py @@ -0,0 +1,170 @@ +""" + lakeFS API + + lakeFS HTTP API # noqa: E501 + + The version of the OpenAPI document: 0.1.0 + Contact: services@treeverse.io + Generated by: https://openapi-generator.tech +""" + + +import re # noqa: F401 +import sys # noqa: F401 + +from lakefs_client.model_utils import ( # noqa: F401 + ApiTypeError, + ModelComposed, + ModelNormal, + ModelSimple, + cached_property, + change_keys_js_to_python, + convert_js_args_to_python_args, + date, + datetime, + file_type, + none_type, + validate_get_composed_info, +) + + +class InlineResponse201(ModelNormal): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + + Attributes: + allowed_values (dict): The key is the tuple path to the attribute + and the for var_name this is (var_name,). The value is a dict + with a capitalized key describing the allowed value and an allowed + value. These dicts store the allowed enum values. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + discriminator_value_class_map (dict): A dict to go from the discriminator + variable value to the discriminator class name. + validations (dict): The key is the tuple path to the attribute + and the for var_name this is (var_name,). The value is a dict + that stores validations for max_length, min_length, max_items, + min_items, exclusive_maximum, inclusive_maximum, exclusive_minimum, + inclusive_minimum, and regex. + additional_properties_type (tuple): A tuple of classes accepted + as additional properties values. + """ + + allowed_values = { + } + + validations = { + } + + additional_properties_type = None + + _nullable = False + + @cached_property + def openapi_types(): + """ + This must be a method because a model may have properties that are + of type self, this must run after the class is loaded + + Returns + openapi_types (dict): The key is attribute name + and the value is attribute type. + """ + return { + 'path_to_expired': (str,), # noqa: E501 + 'path_to_active': (str,), # noqa: E501 + } + + @cached_property + def discriminator(): + return None + + + attribute_map = { + 'path_to_expired': 'path_to_expired', # noqa: E501 + 'path_to_active': 'path_to_active', # noqa: E501 + } + + _composed_schemas = {} + + required_properties = set([ + '_data_store', + '_check_type', + '_spec_property_naming', + '_path_to_item', + '_configuration', + '_visited_composed_classes', + ]) + + @convert_js_args_to_python_args + def __init__(self, *args, **kwargs): # noqa: E501 + """InlineResponse201 - a model defined in OpenAPI + + Keyword Args: + _check_type (bool): if True, values for parameters in openapi_types + will be type checked and a TypeError will be + raised if the wrong type is input. + Defaults to True + _path_to_item (tuple/list): This is a list of keys or values to + drill down to the model in received_data + when deserializing a response + _spec_property_naming (bool): True if the variable names in the input data + are serialized names, as specified in the OpenAPI document. + False if the variable names in the input data + are pythonic names, e.g. snake case (default) + _configuration (Configuration): the instance to use when + deserializing a file_type parameter. + If passed, type conversion is attempted + If omitted no type conversion is done. + _visited_composed_classes (tuple): This stores a tuple of + classes that we have traveled through so that + if we see that class again we will not use its + discriminator again. + When traveling through a discriminator, the + composed schema that is + is traveled through is added to this set. + For example if Animal has a discriminator + petType and we pass in "Dog", and the class Dog + allOf includes Animal, we move through Animal + once using the discriminator, and pick Dog. + Then in Dog, we will make an instance of the + Animal class but this time we won't travel + through its discriminator because we passed in + _visited_composed_classes = (Animal,) + path_to_expired (str): [optional] # noqa: E501 + path_to_active (str): [optional] # noqa: E501 + """ + + _check_type = kwargs.pop('_check_type', True) + _spec_property_naming = kwargs.pop('_spec_property_naming', False) + _path_to_item = kwargs.pop('_path_to_item', ()) + _configuration = kwargs.pop('_configuration', None) + _visited_composed_classes = kwargs.pop('_visited_composed_classes', ()) + + if args: + raise ApiTypeError( + "Invalid positional arguments=%s passed to %s. Remove those invalid positional arguments." % ( + args, + self.__class__.__name__, + ), + path_to_item=_path_to_item, + valid_classes=(self.__class__,), + ) + + self._data_store = {} + self._check_type = _check_type + self._spec_property_naming = _spec_property_naming + self._path_to_item = _path_to_item + self._configuration = _configuration + self._visited_composed_classes = _visited_composed_classes + (self.__class__,) + + for var_name, var_value in kwargs.items(): + if var_name not in self.attribute_map and \ + self._configuration is not None and \ + self._configuration.discard_unknown_keys and \ + self.additional_properties_type is None: + # discard variable. + continue + setattr(self, var_name, var_value) diff --git a/clients/python/lakefs_client/models/__init__.py b/clients/python/lakefs_client/models/__init__.py index 31c89feb1b1..c0daef9614c 100644 --- a/clients/python/lakefs_client/models/__init__.py +++ b/clients/python/lakefs_client/models/__init__.py @@ -29,6 +29,7 @@ from lakefs_client.model.group_list import GroupList from lakefs_client.model.hook_run import HookRun from lakefs_client.model.hook_run_list import HookRunList +from lakefs_client.model.inline_response201 import InlineResponse201 from lakefs_client.model.login_information import LoginInformation from lakefs_client.model.merge import Merge from lakefs_client.model.merge_result import MergeResult diff --git a/clients/python/test/test_inline_response201.py b/clients/python/test/test_inline_response201.py new file mode 100644 index 00000000000..95b4c256149 --- /dev/null +++ b/clients/python/test/test_inline_response201.py @@ -0,0 +1,36 @@ +""" + lakeFS API + + lakeFS HTTP API # noqa: E501 + + The version of the OpenAPI document: 0.1.0 + Contact: services@treeverse.io + Generated by: https://openapi-generator.tech +""" + + +import sys +import unittest + +import lakefs_client +from lakefs_client.model.inline_response201 import InlineResponse201 + + +class TestInlineResponse201(unittest.TestCase): + """InlineResponse201 unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def testInlineResponse201(self): + """Test InlineResponse201""" + # FIXME: construct object with mandatory attributes with example values + # model = InlineResponse201() # noqa: E501 + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/clients/python/test/test_retention_api.py b/clients/python/test/test_retention_api.py new file mode 100644 index 00000000000..548b7a342be --- /dev/null +++ b/clients/python/test/test_retention_api.py @@ -0,0 +1,36 @@ +""" + lakeFS API + + lakeFS HTTP API # noqa: E501 + + The version of the OpenAPI document: 0.1.0 + Contact: services@treeverse.io + Generated by: https://openapi-generator.tech +""" + + +import unittest + +import lakefs_client +from lakefs_client.api.retention_api import RetentionApi # noqa: E501 + + +class TestRetentionApi(unittest.TestCase): + """RetentionApi unit test stubs""" + + def setUp(self): + self.api = RetentionApi() # noqa: E501 + + def tearDown(self): + pass + + def test_prepare_retention_commits(self): + """Test case for prepare_retention_commits + + save lists of active and expired commits for garbage collection # noqa: E501 + """ + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/pkg/api/controller.go b/pkg/api/controller.go index 25410847a25..efd069df5e9 100644 --- a/pkg/api/controller.go +++ b/pkg/api/controller.go @@ -1920,7 +1920,7 @@ func (c *Controller) PrepareRetentionCommits(w http.ResponseWriter, r *http.Requ }) { return } - + c.Logger } func (c *Controller) GetMetaRange(w http.ResponseWriter, r *http.Request, repository string, metaRange string) { diff --git a/pkg/retention/commit_set_writer.go b/pkg/retention/commit_set_writer.go index f016089cda5..8112551f655 100644 --- a/pkg/retention/commit_set_writer.go +++ b/pkg/retention/commit_set_writer.go @@ -3,12 +3,10 @@ package retention import ( "context" "encoding/csv" - "io" + "strconv" "strings" "github.com/treeverse/lakefs/pkg/block" - - "github.com/treeverse/lakefs/pkg/graveler" ) type CommitSetWriter struct { @@ -20,30 +18,21 @@ func NewCommitSetWriter(block block.Adapter) *CommitSetWriter { return &CommitSetWriter{block: block} } -func write(commitIDs map[graveler.CommitID]bool, writer *io.PipeWriter, isExpired bool) error { - csvExpiredWriter := csv.NewWriter(writer) - for commitID := range commitIDs { - err := csvExpiredWriter.Write([]string{string(commitID), isExpired}) +func (c *CommitSetWriter) Write(commits *Commits, pointer *block.ObjectPointer) error { + b := &strings.Builder{} + csvWriter := csv.NewWriter(b) + for commitID := range commits.Expired { + err := csvWriter.Write([]string{string(commitID), strconv.FormatBool(true)}) if err != nil { return err } } - csvExpiredWriter.Flush() - return writer.Close() -} - -func (c *CommitSetWriter) Write(commits *Commits) error { - b := &strings.Builder{} - csv.NewWriter(b) - - c.block.UploadPart().Put(c.ctx, &block.ObjectPointer{ - StorageNamespace: "", - Identifier: "", - IdentifierType: block.IdentifierTypeFull, - }) - err := write(commits.Expired, c.expiredWriter) - if err != nil { - return err + for commitID := range commits.Active { + err := csvWriter.Write([]string{string(commitID), strconv.FormatBool(false)}) + if err != nil { + return err + } } - return write(commits.Active, c.activeWriter) + commitsStr := b.String() + return c.block.Put(c.ctx, *pointer, int64(len(commitsStr)), strings.NewReader(commitsStr), block.PutOpts{}) } diff --git a/pkg/retention/commit_set_writer_test.go b/pkg/retention/commit_set_writer_test.go index bb2a37e56f3..0e18213b4ab 100644 --- a/pkg/retention/commit_set_writer_test.go +++ b/pkg/retention/commit_set_writer_test.go @@ -3,26 +3,19 @@ package retention import ( "testing" - "github.com/go-openapi/swag" - "github.com/treeverse/lakefs/pkg/graveler" - - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/aws/session" - - "github.com/aws/aws-sdk-go/service/s3" ) func Test(t *testing.T) { - awsConfig := &aws.Config{Region: swag.String("us-east-1")} - sess, err := session.NewSession(awsConfig) - if err != nil { - panic(err) - } - client := s3.New(sess, awsConfig) - w := NewCommitSetWriter("yoni-test3", "retention_test/1", client) + //awsConfig := &aws.Config{Region: swag.String("us-east-1")} + //sess, err := session.NewSession(awsConfig) + //if err != nil { + // panic(err) + //} + //client := s3.New(sess, awsConfig) + w := NewCommitSetWriter(nil) w.Write(&Commits{ Expired: map[graveler.CommitID]bool{"a": true, "b": true}, Active: map[graveler.CommitID]bool{"c": true, "d": true}, - }) + }, nil) } From bb5823b0e9c5b4dfc2bcba2a4d1be6f05b8efc57 Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Mon, 14 Jun 2021 10:32:07 +0300 Subject: [PATCH 08/24] wip --- Makefile | 2 +- api/swagger.yml | 18 +- clients/java/README.md | 4 +- clients/java/api/openapi.yaml | 24 ++- ...onse201.md => GarbageCollectionCommits.md} | 5 +- clients/java/docs/RetentionApi.md | 16 +- .../io/lakefs/clients/api/RetentionApi.java | 40 ++--- .../api/model/GarbageCollectionCommits.java | 98 ++++++++++ .../clients/api/model/InlineResponse201.java | 127 ------------- .../model/GarbageCollectionCommitsTest.java | 51 ++++++ clients/python/.openapi-generator/FILES | 6 +- clients/python/README.md | 4 +- ...onse201.md => GarbageCollectionCommits.md} | 5 +- clients/python/docs/RetentionApi.md | 16 +- .../python/lakefs_client/api/retention_api.py | 16 +- .../model/garbage_collection_commits.py | 167 ++++++++++++++++++ .../python/lakefs_client/models/__init__.py | 2 +- .../test/test_garbage_collection_commits.py | 36 ++++ pkg/api/controller.go | 7 +- pkg/catalog/catalog.go | 65 +++++++ pkg/catalog/commit_set_writer.go | 39 ++++ pkg/catalog/interface.go | 1 + pkg/graveler/graveler.go | 21 ++- pkg/graveler/ref/expired_commits.go | 86 +++++++++ .../ref}/expired_commits_test.go | 46 +++-- pkg/graveler/ref/manager.go | 4 + pkg/graveler/ref/merge_base_finder.go | 6 +- pkg/retention/commit_set_writer.go | 38 ---- pkg/retention/commit_set_writer_test.go | 21 --- pkg/retention/expired_commits.go | 90 ---------- pkg/retention/manager.go | 19 -- 31 files changed, 678 insertions(+), 402 deletions(-) rename clients/java/docs/{InlineResponse201.md => GarbageCollectionCommits.md} (50%) create mode 100644 clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionCommits.java delete mode 100644 clients/java/src/main/java/io/lakefs/clients/api/model/InlineResponse201.java create mode 100644 clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionCommitsTest.java rename clients/python/docs/{InlineResponse201.md => GarbageCollectionCommits.md} (70%) create mode 100644 clients/python/lakefs_client/model/garbage_collection_commits.py create mode 100644 clients/python/test/test_garbage_collection_commits.py create mode 100644 pkg/catalog/commit_set_writer.go create mode 100644 pkg/graveler/ref/expired_commits.go rename pkg/{retention => graveler/ref}/expired_commits_test.go (83%) delete mode 100644 pkg/retention/commit_set_writer.go delete mode 100644 pkg/retention/commit_set_writer_test.go delete mode 100644 pkg/retention/expired_commits.go delete mode 100644 pkg/retention/manager.go diff --git a/Makefile b/Makefile index 28e085f0a33..2cffe77c08b 100644 --- a/Makefile +++ b/Makefile @@ -68,7 +68,7 @@ clean: pkg/ddl/statik.go \ pkg/graveler/sstable/mock \ pkg/webui \ - pkg/graveler/committed/mock + pkg/graveler/committed/mock \ pkg/graveler/mock check-licenses: check-licenses-go-mod check-licenses-npm diff --git a/api/swagger.yml b/api/swagger.yml index 4a2ef7885a7..c2740ad64c7 100644 --- a/api/swagger.yml +++ b/api/swagger.yml @@ -811,6 +811,13 @@ components: - checksum - size_bytes + GarbageCollectionCommits: + type: object + properties: + path: + type: string + description: path to a dataset of commits + paths: /setup_lakefs: post: @@ -2833,20 +2840,15 @@ paths: post: tags: - retention - operationId: prepareRetentionCommits + operationId: prepareGarbageCollectionCommits summary: save lists of active and expired commits for garbage collection responses: 201: - description: paths to commit sets + description: paths to commit dataset content: application/json: schema: - type: object - properties: - path_to_expired: - type: string - path_to_active: - type: string + $ref: "#/components/schemas/GarbageCollectionCommits" 401: $ref: "#/components/responses/Unauthorized" 404: diff --git a/clients/java/README.md b/clients/java/README.md index ba975e88f22..9d88a062963 100644 --- a/clients/java/README.md +++ b/clients/java/README.md @@ -189,7 +189,7 @@ Class | Method | HTTP request | Description *RepositoriesApi* | [**deleteRepository**](docs/RepositoriesApi.md#deleteRepository) | **DELETE** /repositories/{repository} | delete repository *RepositoriesApi* | [**getRepository**](docs/RepositoriesApi.md#getRepository) | **GET** /repositories/{repository} | get repository *RepositoriesApi* | [**listRepositories**](docs/RepositoriesApi.md#listRepositories) | **GET** /repositories | list repositories -*RetentionApi* | [**prepareRetentionCommits**](docs/RetentionApi.md#prepareRetentionCommits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection +*RetentionApi* | [**prepareGarbageCollectionCommits**](docs/RetentionApi.md#prepareGarbageCollectionCommits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection *StagingApi* | [**getPhysicalAddress**](docs/StagingApi.md#getPhysicalAddress) | **GET** /repositories/{repository}/branches/{branch}/staging/backing | get a physical address and a return token to write object to underlying storage *StagingApi* | [**linkPhysicalAddress**](docs/StagingApi.md#linkPhysicalAddress) | **PUT** /repositories/{repository}/branches/{branch}/staging/backing | associate staging on this physical address with a path *TagsApi* | [**createTag**](docs/TagsApi.md#createTag) | **POST** /repositories/{repository}/tags | create tag @@ -215,12 +215,12 @@ Class | Method | HTTP request | Description - [Diff](docs/Diff.md) - [DiffList](docs/DiffList.md) - [Error](docs/Error.md) + - [GarbageCollectionCommits](docs/GarbageCollectionCommits.md) - [Group](docs/Group.md) - [GroupCreation](docs/GroupCreation.md) - [GroupList](docs/GroupList.md) - [HookRun](docs/HookRun.md) - [HookRunList](docs/HookRunList.md) - - [InlineResponse201](docs/InlineResponse201.md) - [LoginInformation](docs/LoginInformation.md) - [Merge](docs/Merge.md) - [MergeResult](docs/MergeResult.md) diff --git a/clients/java/api/openapi.yaml b/clients/java/api/openapi.yaml index 463618fecc6..6ca4602aa9a 100644 --- a/clients/java/api/openapi.yaml +++ b/clients/java/api/openapi.yaml @@ -3740,7 +3740,7 @@ paths: x-accepts: application/json /repositories/{repository}/gc/prepare: post: - operationId: prepareRetentionCommits + operationId: prepareGarbageCollectionCommits parameters: - explode: false in: path @@ -3754,8 +3754,8 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/inline_response_201' - description: paths to commit sets + $ref: '#/components/schemas/GarbageCollectionCommits' + description: paths to commit dataset "401": content: application/json: @@ -4923,6 +4923,14 @@ components: - size_bytes - staging type: object + GarbageCollectionCommits: + example: + path: path + properties: + path: + description: path to a dataset of commits + type: string + type: object inline_object: properties: content: @@ -4930,16 +4938,6 @@ components: format: binary type: string type: object - inline_response_201: - example: - path_to_expired: path_to_expired - path_to_active: path_to_active - properties: - path_to_expired: - type: string - path_to_active: - type: string - type: object MergeResult_summary: example: removed: 6 diff --git a/clients/java/docs/InlineResponse201.md b/clients/java/docs/GarbageCollectionCommits.md similarity index 50% rename from clients/java/docs/InlineResponse201.md rename to clients/java/docs/GarbageCollectionCommits.md index 1c92483accc..3d67e31ca91 100644 --- a/clients/java/docs/InlineResponse201.md +++ b/clients/java/docs/GarbageCollectionCommits.md @@ -1,14 +1,13 @@ -# InlineResponse201 +# GarbageCollectionCommits ## Properties Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- -**pathToExpired** | **String** | | [optional] -**pathToActive** | **String** | | [optional] +**path** | **String** | path to a dataset of commits | [optional] diff --git a/clients/java/docs/RetentionApi.md b/clients/java/docs/RetentionApi.md index d2db2d33e09..e1f70ef499d 100644 --- a/clients/java/docs/RetentionApi.md +++ b/clients/java/docs/RetentionApi.md @@ -4,12 +4,12 @@ All URIs are relative to *http://localhost/api/v1* Method | HTTP request | Description ------------- | ------------- | ------------- -[**prepareRetentionCommits**](RetentionApi.md#prepareRetentionCommits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection +[**prepareGarbageCollectionCommits**](RetentionApi.md#prepareGarbageCollectionCommits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection - -# **prepareRetentionCommits** -> InlineResponse201 prepareRetentionCommits(repository) + +# **prepareGarbageCollectionCommits** +> GarbageCollectionCommits prepareGarbageCollectionCommits(repository) save lists of active and expired commits for garbage collection @@ -46,10 +46,10 @@ public class Example { RetentionApi apiInstance = new RetentionApi(defaultClient); String repository = "repository_example"; // String | try { - InlineResponse201 result = apiInstance.prepareRetentionCommits(repository); + GarbageCollectionCommits result = apiInstance.prepareGarbageCollectionCommits(repository); System.out.println(result); } catch (ApiException e) { - System.err.println("Exception when calling RetentionApi#prepareRetentionCommits"); + System.err.println("Exception when calling RetentionApi#prepareGarbageCollectionCommits"); System.err.println("Status code: " + e.getCode()); System.err.println("Reason: " + e.getResponseBody()); System.err.println("Response headers: " + e.getResponseHeaders()); @@ -67,7 +67,7 @@ Name | Type | Description | Notes ### Return type -[**InlineResponse201**](InlineResponse201.md) +[**GarbageCollectionCommits**](GarbageCollectionCommits.md) ### Authorization @@ -81,7 +81,7 @@ Name | Type | Description | Notes ### HTTP response details | Status code | Description | Response headers | |-------------|-------------|------------------| -**201** | paths to commit sets | - | +**201** | paths to commit dataset | - | **401** | Unauthorized | - | **404** | Resource Not Found | - | **0** | Internal Server Error | - | diff --git a/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java b/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java index 5f44ce26b7a..c16491c960c 100644 --- a/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java +++ b/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java @@ -28,7 +28,7 @@ import io.lakefs.clients.api.model.Error; -import io.lakefs.clients.api.model.InlineResponse201; +import io.lakefs.clients.api.model.GarbageCollectionCommits; import java.lang.reflect.Type; import java.util.ArrayList; @@ -56,7 +56,7 @@ public void setApiClient(ApiClient apiClient) { } /** - * Build call for prepareRetentionCommits + * Build call for prepareGarbageCollectionCommits * @param repository (required) * @param _callback Callback for upload/download progress * @return Call to execute @@ -64,13 +64,13 @@ public void setApiClient(ApiClient apiClient) { * @http.response.details - +
Status Code Description Response Headers
201 paths to commit sets -
201 paths to commit dataset -
401 Unauthorized -
404 Resource Not Found -
0 Internal Server Error -
*/ - public okhttp3.Call prepareRetentionCommitsCall(String repository, final ApiCallback _callback) throws ApiException { + public okhttp3.Call prepareGarbageCollectionCommitsCall(String repository, final ApiCallback _callback) throws ApiException { Object localVarPostBody = null; // create path and map variables @@ -102,15 +102,15 @@ public okhttp3.Call prepareRetentionCommitsCall(String repository, final ApiCall } @SuppressWarnings("rawtypes") - private okhttp3.Call prepareRetentionCommitsValidateBeforeCall(String repository, final ApiCallback _callback) throws ApiException { + private okhttp3.Call prepareGarbageCollectionCommitsValidateBeforeCall(String repository, final ApiCallback _callback) throws ApiException { // verify the required parameter 'repository' is set if (repository == null) { - throw new ApiException("Missing the required parameter 'repository' when calling prepareRetentionCommits(Async)"); + throw new ApiException("Missing the required parameter 'repository' when calling prepareGarbageCollectionCommits(Async)"); } - okhttp3.Call localVarCall = prepareRetentionCommitsCall(repository, _callback); + okhttp3.Call localVarCall = prepareGarbageCollectionCommitsCall(repository, _callback); return localVarCall; } @@ -119,19 +119,19 @@ private okhttp3.Call prepareRetentionCommitsValidateBeforeCall(String repository * save lists of active and expired commits for garbage collection * * @param repository (required) - * @return InlineResponse201 + * @return GarbageCollectionCommits * @throws ApiException If fail to call the API, e.g. server error or cannot deserialize the response body * @http.response.details - +
Status Code Description Response Headers
201 paths to commit sets -
201 paths to commit dataset -
401 Unauthorized -
404 Resource Not Found -
0 Internal Server Error -
*/ - public InlineResponse201 prepareRetentionCommits(String repository) throws ApiException { - ApiResponse localVarResp = prepareRetentionCommitsWithHttpInfo(repository); + public GarbageCollectionCommits prepareGarbageCollectionCommits(String repository) throws ApiException { + ApiResponse localVarResp = prepareGarbageCollectionCommitsWithHttpInfo(repository); return localVarResp.getData(); } @@ -139,20 +139,20 @@ public InlineResponse201 prepareRetentionCommits(String repository) throws ApiEx * save lists of active and expired commits for garbage collection * * @param repository (required) - * @return ApiResponse<InlineResponse201> + * @return ApiResponse<GarbageCollectionCommits> * @throws ApiException If fail to call the API, e.g. server error or cannot deserialize the response body * @http.response.details - +
Status Code Description Response Headers
201 paths to commit sets -
201 paths to commit dataset -
401 Unauthorized -
404 Resource Not Found -
0 Internal Server Error -
*/ - public ApiResponse prepareRetentionCommitsWithHttpInfo(String repository) throws ApiException { - okhttp3.Call localVarCall = prepareRetentionCommitsValidateBeforeCall(repository, null); - Type localVarReturnType = new TypeToken(){}.getType(); + public ApiResponse prepareGarbageCollectionCommitsWithHttpInfo(String repository) throws ApiException { + okhttp3.Call localVarCall = prepareGarbageCollectionCommitsValidateBeforeCall(repository, null); + Type localVarReturnType = new TypeToken(){}.getType(); return localVarApiClient.execute(localVarCall, localVarReturnType); } @@ -166,16 +166,16 @@ public ApiResponse prepareRetentionCommitsWithHttpInfo(String * @http.response.details - +
Status Code Description Response Headers
201 paths to commit sets -
201 paths to commit dataset -
401 Unauthorized -
404 Resource Not Found -
0 Internal Server Error -
*/ - public okhttp3.Call prepareRetentionCommitsAsync(String repository, final ApiCallback _callback) throws ApiException { + public okhttp3.Call prepareGarbageCollectionCommitsAsync(String repository, final ApiCallback _callback) throws ApiException { - okhttp3.Call localVarCall = prepareRetentionCommitsValidateBeforeCall(repository, _callback); - Type localVarReturnType = new TypeToken(){}.getType(); + okhttp3.Call localVarCall = prepareGarbageCollectionCommitsValidateBeforeCall(repository, _callback); + Type localVarReturnType = new TypeToken(){}.getType(); localVarApiClient.executeAsync(localVarCall, localVarReturnType, _callback); return localVarCall; } diff --git a/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionCommits.java b/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionCommits.java new file mode 100644 index 00000000000..239b1398eb7 --- /dev/null +++ b/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionCommits.java @@ -0,0 +1,98 @@ +/* + * lakeFS API + * lakeFS HTTP API + * + * The version of the OpenAPI document: 0.1.0 + * + * + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + + +package io.lakefs.clients.api.model; + +import java.util.Objects; +import java.util.Arrays; +import com.google.gson.TypeAdapter; +import com.google.gson.annotations.JsonAdapter; +import com.google.gson.annotations.SerializedName; +import com.google.gson.stream.JsonReader; +import com.google.gson.stream.JsonWriter; +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import java.io.IOException; + +/** + * GarbageCollectionCommits + */ +@javax.annotation.Generated(value = "org.openapitools.codegen.languages.JavaClientCodegen") +public class GarbageCollectionCommits { + public static final String SERIALIZED_NAME_PATH = "path"; + @SerializedName(SERIALIZED_NAME_PATH) + private String path; + + + public GarbageCollectionCommits path(String path) { + + this.path = path; + return this; + } + + /** + * path to a dataset of commits + * @return path + **/ + @javax.annotation.Nullable + @ApiModelProperty(value = "path to a dataset of commits") + + public String getPath() { + return path; + } + + + public void setPath(String path) { + this.path = path; + } + + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + GarbageCollectionCommits garbageCollectionCommits = (GarbageCollectionCommits) o; + return Objects.equals(this.path, garbageCollectionCommits.path); + } + + @Override + public int hashCode() { + return Objects.hash(path); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("class GarbageCollectionCommits {\n"); + sb.append(" path: ").append(toIndentedString(path)).append("\n"); + sb.append("}"); + return sb.toString(); + } + + /** + * Convert the given object to string with each line indented by 4 spaces + * (except the first line). + */ + private String toIndentedString(Object o) { + if (o == null) { + return "null"; + } + return o.toString().replace("\n", "\n "); + } + +} + diff --git a/clients/java/src/main/java/io/lakefs/clients/api/model/InlineResponse201.java b/clients/java/src/main/java/io/lakefs/clients/api/model/InlineResponse201.java deleted file mode 100644 index 7ea84db1257..00000000000 --- a/clients/java/src/main/java/io/lakefs/clients/api/model/InlineResponse201.java +++ /dev/null @@ -1,127 +0,0 @@ -/* - * lakeFS API - * lakeFS HTTP API - * - * The version of the OpenAPI document: 0.1.0 - * - * - * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). - * https://openapi-generator.tech - * Do not edit the class manually. - */ - - -package io.lakefs.clients.api.model; - -import java.util.Objects; -import java.util.Arrays; -import com.google.gson.TypeAdapter; -import com.google.gson.annotations.JsonAdapter; -import com.google.gson.annotations.SerializedName; -import com.google.gson.stream.JsonReader; -import com.google.gson.stream.JsonWriter; -import io.swagger.annotations.ApiModel; -import io.swagger.annotations.ApiModelProperty; -import java.io.IOException; - -/** - * InlineResponse201 - */ -@javax.annotation.Generated(value = "org.openapitools.codegen.languages.JavaClientCodegen") -public class InlineResponse201 { - public static final String SERIALIZED_NAME_PATH_TO_EXPIRED = "path_to_expired"; - @SerializedName(SERIALIZED_NAME_PATH_TO_EXPIRED) - private String pathToExpired; - - public static final String SERIALIZED_NAME_PATH_TO_ACTIVE = "path_to_active"; - @SerializedName(SERIALIZED_NAME_PATH_TO_ACTIVE) - private String pathToActive; - - - public InlineResponse201 pathToExpired(String pathToExpired) { - - this.pathToExpired = pathToExpired; - return this; - } - - /** - * Get pathToExpired - * @return pathToExpired - **/ - @javax.annotation.Nullable - @ApiModelProperty(value = "") - - public String getPathToExpired() { - return pathToExpired; - } - - - public void setPathToExpired(String pathToExpired) { - this.pathToExpired = pathToExpired; - } - - - public InlineResponse201 pathToActive(String pathToActive) { - - this.pathToActive = pathToActive; - return this; - } - - /** - * Get pathToActive - * @return pathToActive - **/ - @javax.annotation.Nullable - @ApiModelProperty(value = "") - - public String getPathToActive() { - return pathToActive; - } - - - public void setPathToActive(String pathToActive) { - this.pathToActive = pathToActive; - } - - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - InlineResponse201 inlineResponse201 = (InlineResponse201) o; - return Objects.equals(this.pathToExpired, inlineResponse201.pathToExpired) && - Objects.equals(this.pathToActive, inlineResponse201.pathToActive); - } - - @Override - public int hashCode() { - return Objects.hash(pathToExpired, pathToActive); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("class InlineResponse201 {\n"); - sb.append(" pathToExpired: ").append(toIndentedString(pathToExpired)).append("\n"); - sb.append(" pathToActive: ").append(toIndentedString(pathToActive)).append("\n"); - sb.append("}"); - return sb.toString(); - } - - /** - * Convert the given object to string with each line indented by 4 spaces - * (except the first line). - */ - private String toIndentedString(Object o) { - if (o == null) { - return "null"; - } - return o.toString().replace("\n", "\n "); - } - -} - diff --git a/clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionCommitsTest.java b/clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionCommitsTest.java new file mode 100644 index 00000000000..a1514a7fbd1 --- /dev/null +++ b/clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionCommitsTest.java @@ -0,0 +1,51 @@ +/* + * lakeFS API + * lakeFS HTTP API + * + * The version of the OpenAPI document: 0.1.0 + * + * + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + + +package io.lakefs.clients.api.model; + +import com.google.gson.TypeAdapter; +import com.google.gson.annotations.JsonAdapter; +import com.google.gson.annotations.SerializedName; +import com.google.gson.stream.JsonReader; +import com.google.gson.stream.JsonWriter; +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import java.io.IOException; +import org.junit.Assert; +import org.junit.Ignore; +import org.junit.Test; + + +/** + * Model tests for GarbageCollectionCommits + */ +public class GarbageCollectionCommitsTest { + private final GarbageCollectionCommits model = new GarbageCollectionCommits(); + + /** + * Model tests for GarbageCollectionCommits + */ + @Test + public void testGarbageCollectionCommits() { + // TODO: test GarbageCollectionCommits + } + + /** + * Test the property 'path' + */ + @Test + public void pathTest() { + // TODO: test path + } + +} diff --git a/clients/python/.openapi-generator/FILES b/clients/python/.openapi-generator/FILES index 25b44e3c1f3..65668cb614f 100644 --- a/clients/python/.openapi-generator/FILES +++ b/clients/python/.openapi-generator/FILES @@ -21,13 +21,13 @@ docs/CurrentUser.md docs/Diff.md docs/DiffList.md docs/Error.md +docs/GarbageCollectionCommits.md docs/Group.md docs/GroupCreation.md docs/GroupList.md docs/HealthCheckApi.md docs/HookRun.md docs/HookRunList.md -docs/InlineResponse201.md docs/LoginInformation.md docs/Merge.md docs/MergeResult.md @@ -99,12 +99,12 @@ lakefs_client/model/current_user.py lakefs_client/model/diff.py lakefs_client/model/diff_list.py lakefs_client/model/error.py +lakefs_client/model/garbage_collection_commits.py lakefs_client/model/group.py lakefs_client/model/group_creation.py lakefs_client/model/group_list.py lakefs_client/model/hook_run.py lakefs_client/model/hook_run_list.py -lakefs_client/model/inline_response201.py lakefs_client/model/login_information.py lakefs_client/model/merge.py lakefs_client/model/merge_result.py @@ -143,6 +143,4 @@ setup.cfg setup.py test-requirements.txt test/__init__.py -test/test_inline_response201.py -test/test_retention_api.py tox.ini diff --git a/clients/python/README.md b/clients/python/README.md index 136c949dd8a..7c0b0f0dbea 100644 --- a/clients/python/README.md +++ b/clients/python/README.md @@ -170,7 +170,7 @@ Class | Method | HTTP request | Description *RepositoriesApi* | [**delete_repository**](docs/RepositoriesApi.md#delete_repository) | **DELETE** /repositories/{repository} | delete repository *RepositoriesApi* | [**get_repository**](docs/RepositoriesApi.md#get_repository) | **GET** /repositories/{repository} | get repository *RepositoriesApi* | [**list_repositories**](docs/RepositoriesApi.md#list_repositories) | **GET** /repositories | list repositories -*RetentionApi* | [**prepare_retention_commits**](docs/RetentionApi.md#prepare_retention_commits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection +*RetentionApi* | [**prepare_garbage_collection_commits**](docs/RetentionApi.md#prepare_garbage_collection_commits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection *StagingApi* | [**get_physical_address**](docs/StagingApi.md#get_physical_address) | **GET** /repositories/{repository}/branches/{branch}/staging/backing | get a physical address and a return token to write object to underlying storage *StagingApi* | [**link_physical_address**](docs/StagingApi.md#link_physical_address) | **PUT** /repositories/{repository}/branches/{branch}/staging/backing | associate staging on this physical address with a path *TagsApi* | [**create_tag**](docs/TagsApi.md#create_tag) | **POST** /repositories/{repository}/tags | create tag @@ -196,12 +196,12 @@ Class | Method | HTTP request | Description - [Diff](docs/Diff.md) - [DiffList](docs/DiffList.md) - [Error](docs/Error.md) + - [GarbageCollectionCommits](docs/GarbageCollectionCommits.md) - [Group](docs/Group.md) - [GroupCreation](docs/GroupCreation.md) - [GroupList](docs/GroupList.md) - [HookRun](docs/HookRun.md) - [HookRunList](docs/HookRunList.md) - - [InlineResponse201](docs/InlineResponse201.md) - [LoginInformation](docs/LoginInformation.md) - [Merge](docs/Merge.md) - [MergeResult](docs/MergeResult.md) diff --git a/clients/python/docs/InlineResponse201.md b/clients/python/docs/GarbageCollectionCommits.md similarity index 70% rename from clients/python/docs/InlineResponse201.md rename to clients/python/docs/GarbageCollectionCommits.md index d68520c46f9..d511fc642f4 100644 --- a/clients/python/docs/InlineResponse201.md +++ b/clients/python/docs/GarbageCollectionCommits.md @@ -1,11 +1,10 @@ -# InlineResponse201 +# GarbageCollectionCommits ## Properties Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- -**path_to_expired** | **str** | | [optional] -**path_to_active** | **str** | | [optional] +**path** | **str** | path to a dataset of commits | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/clients/python/docs/RetentionApi.md b/clients/python/docs/RetentionApi.md index 37826b8bb93..838009d12fd 100644 --- a/clients/python/docs/RetentionApi.md +++ b/clients/python/docs/RetentionApi.md @@ -4,11 +4,11 @@ All URIs are relative to *http://localhost/api/v1* Method | HTTP request | Description ------------- | ------------- | ------------- -[**prepare_retention_commits**](RetentionApi.md#prepare_retention_commits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection +[**prepare_garbage_collection_commits**](RetentionApi.md#prepare_garbage_collection_commits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection -# **prepare_retention_commits** -> InlineResponse201 prepare_retention_commits(repository) +# **prepare_garbage_collection_commits** +> GarbageCollectionCommits prepare_garbage_collection_commits(repository) save lists of active and expired commits for garbage collection @@ -21,7 +21,7 @@ save lists of active and expired commits for garbage collection import time import lakefs_client from lakefs_client.api import retention_api -from lakefs_client.model.inline_response201 import InlineResponse201 +from lakefs_client.model.garbage_collection_commits import GarbageCollectionCommits from lakefs_client.model.error import Error from pprint import pprint # Defining the host is optional and defaults to http://localhost/api/v1 @@ -61,10 +61,10 @@ with lakefs_client.ApiClient(configuration) as api_client: # example passing only required values which don't have defaults set try: # save lists of active and expired commits for garbage collection - api_response = api_instance.prepare_retention_commits(repository) + api_response = api_instance.prepare_garbage_collection_commits(repository) pprint(api_response) except lakefs_client.ApiException as e: - print("Exception when calling RetentionApi->prepare_retention_commits: %s\n" % e) + print("Exception when calling RetentionApi->prepare_garbage_collection_commits: %s\n" % e) ``` @@ -76,7 +76,7 @@ Name | Type | Description | Notes ### Return type -[**InlineResponse201**](InlineResponse201.md) +[**GarbageCollectionCommits**](GarbageCollectionCommits.md) ### Authorization @@ -91,7 +91,7 @@ Name | Type | Description | Notes ### HTTP response details | Status code | Description | Response headers | |-------------|-------------|------------------| -**201** | paths to commit sets | - | +**201** | paths to commit dataset | - | **401** | Unauthorized | - | **404** | Resource Not Found | - | **0** | Internal Server Error | - | diff --git a/clients/python/lakefs_client/api/retention_api.py b/clients/python/lakefs_client/api/retention_api.py index c0a1dc72f3b..19d53fe6392 100644 --- a/clients/python/lakefs_client/api/retention_api.py +++ b/clients/python/lakefs_client/api/retention_api.py @@ -23,7 +23,7 @@ validate_and_convert_types ) from lakefs_client.model.error import Error -from lakefs_client.model.inline_response201 import InlineResponse201 +from lakefs_client.model.garbage_collection_commits import GarbageCollectionCommits class RetentionApi(object): @@ -38,7 +38,7 @@ def __init__(self, api_client=None): api_client = ApiClient() self.api_client = api_client - def __prepare_retention_commits( + def __prepare_garbage_collection_commits( self, repository, **kwargs @@ -48,7 +48,7 @@ def __prepare_retention_commits( This method makes a synchronous HTTP request by default. To make an asynchronous HTTP request, please pass async_req=True - >>> thread = api.prepare_retention_commits(repository, async_req=True) + >>> thread = api.prepare_garbage_collection_commits(repository, async_req=True) >>> result = thread.get() Args: @@ -76,7 +76,7 @@ def __prepare_retention_commits( async_req (bool): execute request asynchronously Returns: - InlineResponse201 + GarbageCollectionCommits If the method is called asynchronously, returns the request thread. """ @@ -103,16 +103,16 @@ def __prepare_retention_commits( repository return self.call_with_http_info(**kwargs) - self.prepare_retention_commits = _Endpoint( + self.prepare_garbage_collection_commits = _Endpoint( settings={ - 'response_type': (InlineResponse201,), + 'response_type': (GarbageCollectionCommits,), 'auth': [ 'basic_auth', 'cookie_auth', 'jwt_token' ], 'endpoint_path': '/repositories/{repository}/gc/prepare', - 'operation_id': 'prepare_retention_commits', + 'operation_id': 'prepare_garbage_collection_commits', 'http_method': 'POST', 'servers': None, }, @@ -155,5 +155,5 @@ def __prepare_retention_commits( 'content_type': [], }, api_client=api_client, - callable=__prepare_retention_commits + callable=__prepare_garbage_collection_commits ) diff --git a/clients/python/lakefs_client/model/garbage_collection_commits.py b/clients/python/lakefs_client/model/garbage_collection_commits.py new file mode 100644 index 00000000000..82ea29a5785 --- /dev/null +++ b/clients/python/lakefs_client/model/garbage_collection_commits.py @@ -0,0 +1,167 @@ +""" + lakeFS API + + lakeFS HTTP API # noqa: E501 + + The version of the OpenAPI document: 0.1.0 + Contact: services@treeverse.io + Generated by: https://openapi-generator.tech +""" + + +import re # noqa: F401 +import sys # noqa: F401 + +from lakefs_client.model_utils import ( # noqa: F401 + ApiTypeError, + ModelComposed, + ModelNormal, + ModelSimple, + cached_property, + change_keys_js_to_python, + convert_js_args_to_python_args, + date, + datetime, + file_type, + none_type, + validate_get_composed_info, +) + + +class GarbageCollectionCommits(ModelNormal): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + + Attributes: + allowed_values (dict): The key is the tuple path to the attribute + and the for var_name this is (var_name,). The value is a dict + with a capitalized key describing the allowed value and an allowed + value. These dicts store the allowed enum values. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + discriminator_value_class_map (dict): A dict to go from the discriminator + variable value to the discriminator class name. + validations (dict): The key is the tuple path to the attribute + and the for var_name this is (var_name,). The value is a dict + that stores validations for max_length, min_length, max_items, + min_items, exclusive_maximum, inclusive_maximum, exclusive_minimum, + inclusive_minimum, and regex. + additional_properties_type (tuple): A tuple of classes accepted + as additional properties values. + """ + + allowed_values = { + } + + validations = { + } + + additional_properties_type = None + + _nullable = False + + @cached_property + def openapi_types(): + """ + This must be a method because a model may have properties that are + of type self, this must run after the class is loaded + + Returns + openapi_types (dict): The key is attribute name + and the value is attribute type. + """ + return { + 'path': (str,), # noqa: E501 + } + + @cached_property + def discriminator(): + return None + + + attribute_map = { + 'path': 'path', # noqa: E501 + } + + _composed_schemas = {} + + required_properties = set([ + '_data_store', + '_check_type', + '_spec_property_naming', + '_path_to_item', + '_configuration', + '_visited_composed_classes', + ]) + + @convert_js_args_to_python_args + def __init__(self, *args, **kwargs): # noqa: E501 + """GarbageCollectionCommits - a model defined in OpenAPI + + Keyword Args: + _check_type (bool): if True, values for parameters in openapi_types + will be type checked and a TypeError will be + raised if the wrong type is input. + Defaults to True + _path_to_item (tuple/list): This is a list of keys or values to + drill down to the model in received_data + when deserializing a response + _spec_property_naming (bool): True if the variable names in the input data + are serialized names, as specified in the OpenAPI document. + False if the variable names in the input data + are pythonic names, e.g. snake case (default) + _configuration (Configuration): the instance to use when + deserializing a file_type parameter. + If passed, type conversion is attempted + If omitted no type conversion is done. + _visited_composed_classes (tuple): This stores a tuple of + classes that we have traveled through so that + if we see that class again we will not use its + discriminator again. + When traveling through a discriminator, the + composed schema that is + is traveled through is added to this set. + For example if Animal has a discriminator + petType and we pass in "Dog", and the class Dog + allOf includes Animal, we move through Animal + once using the discriminator, and pick Dog. + Then in Dog, we will make an instance of the + Animal class but this time we won't travel + through its discriminator because we passed in + _visited_composed_classes = (Animal,) + path (str): path to a dataset of commits. [optional] # noqa: E501 + """ + + _check_type = kwargs.pop('_check_type', True) + _spec_property_naming = kwargs.pop('_spec_property_naming', False) + _path_to_item = kwargs.pop('_path_to_item', ()) + _configuration = kwargs.pop('_configuration', None) + _visited_composed_classes = kwargs.pop('_visited_composed_classes', ()) + + if args: + raise ApiTypeError( + "Invalid positional arguments=%s passed to %s. Remove those invalid positional arguments." % ( + args, + self.__class__.__name__, + ), + path_to_item=_path_to_item, + valid_classes=(self.__class__,), + ) + + self._data_store = {} + self._check_type = _check_type + self._spec_property_naming = _spec_property_naming + self._path_to_item = _path_to_item + self._configuration = _configuration + self._visited_composed_classes = _visited_composed_classes + (self.__class__,) + + for var_name, var_value in kwargs.items(): + if var_name not in self.attribute_map and \ + self._configuration is not None and \ + self._configuration.discard_unknown_keys and \ + self.additional_properties_type is None: + # discard variable. + continue + setattr(self, var_name, var_value) diff --git a/clients/python/lakefs_client/models/__init__.py b/clients/python/lakefs_client/models/__init__.py index c0daef9614c..10c52628f43 100644 --- a/clients/python/lakefs_client/models/__init__.py +++ b/clients/python/lakefs_client/models/__init__.py @@ -24,12 +24,12 @@ from lakefs_client.model.diff import Diff from lakefs_client.model.diff_list import DiffList from lakefs_client.model.error import Error + from lakefs_client.model.garbage_collection_commits import GarbageCollectionCommits from lakefs_client.model.group import Group from lakefs_client.model.group_creation import GroupCreation from lakefs_client.model.group_list import GroupList from lakefs_client.model.hook_run import HookRun from lakefs_client.model.hook_run_list import HookRunList -from lakefs_client.model.inline_response201 import InlineResponse201 from lakefs_client.model.login_information import LoginInformation from lakefs_client.model.merge import Merge from lakefs_client.model.merge_result import MergeResult diff --git a/clients/python/test/test_garbage_collection_commits.py b/clients/python/test/test_garbage_collection_commits.py new file mode 100644 index 00000000000..9ea30e5e33b --- /dev/null +++ b/clients/python/test/test_garbage_collection_commits.py @@ -0,0 +1,36 @@ +""" + lakeFS API + + lakeFS HTTP API # noqa: E501 + + The version of the OpenAPI document: 0.1.0 + Contact: services@treeverse.io + Generated by: https://openapi-generator.tech +""" + + +import sys +import unittest + +import lakefs_client +from lakefs_client.model.garbage_collection_commits import GarbageCollectionCommits + + +class TestGarbageCollectionCommits(unittest.TestCase): + """GarbageCollectionCommits unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def testGarbageCollectionCommits(self): + """Test GarbageCollectionCommits""" + # FIXME: construct object with mandatory attributes with example values + # model = GarbageCollectionCommits() # noqa: E501 + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/pkg/api/controller.go b/pkg/api/controller.go index bedf32aad4a..373f090fcc6 100644 --- a/pkg/api/controller.go +++ b/pkg/api/controller.go @@ -15,6 +15,8 @@ import ( "strings" "time" + "github.com/go-openapi/swag" + "github.com/aws/aws-sdk-go/aws" nanoid "github.com/matoous/go-nanoid/v2" "github.com/treeverse/lakefs/pkg/actions" @@ -1911,7 +1913,7 @@ func (c *Controller) GetCommit(w http.ResponseWriter, r *http.Request, repositor writeResponse(w, http.StatusOK, response) } -func (c *Controller) PrepareRetentionCommits(w http.ResponseWriter, r *http.Request, repository string) { +func (c *Controller) PrepareGarbageCollectionCommits(w http.ResponseWriter, r *http.Request, repository string) { if !c.authorize(w, r, []permissions.Permission{ { Action: permissions.ListObjectsAction, @@ -1920,7 +1922,8 @@ func (c *Controller) PrepareRetentionCommits(w http.ResponseWriter, r *http.Requ }) { return } - c.Logger + pth := "" + writeResponse(w, http.StatusCreated, GarbageCollectionCommits{Path: swag.String(pth)}) } func (c *Controller) GetMetaRange(w http.ResponseWriter, r *http.Request, repository string, metaRange string) { diff --git a/pkg/catalog/catalog.go b/pkg/catalog/catalog.go index a24345bd00b..cc75dc9361b 100644 --- a/pkg/catalog/catalog.go +++ b/pkg/catalog/catalog.go @@ -4,12 +4,15 @@ import ( "context" "crypto" _ "crypto/sha256" + "encoding/csv" "errors" "fmt" "io" + "strconv" "strings" "github.com/cockroachdb/pebble" + "github.com/google/uuid" "github.com/hashicorp/go-multierror" "github.com/treeverse/lakefs/pkg/batch" "github.com/treeverse/lakefs/pkg/block" @@ -1175,6 +1178,68 @@ func (c *Catalog) GetRange(ctx context.Context, repositoryID, rangeID string) (g return c.Store.GetRange(ctx, graveler.RepositoryID(repositoryID), graveler.RangeID(rangeID)) } +func (c *Catalog) PrepareExpiredCommits(ctx context.Context, repository string, previousResultPath string) (string, error) { + repositoryID := graveler.RepositoryID(repository) + if err := Validate([]ValidateArg{ + {"repositoryID", repositoryID, ValidateRepositoryID}, + }); err != nil { + return "", err + } + repo, err := c.Store.GetRepository(ctx, repositoryID) + if err != nil { + return "", err + } + previousRunReader, err := c.BlockAdapter.Get(ctx, block.ObjectPointer{ + StorageNamespace: string(repo.StorageNamespace), + Identifier: fmt.Sprintf(previousResultPath), + IdentifierType: block.IdentifierTypeRelative, + }, -1) + if err != nil { + return "", err + } + previouslyExpiredCommits := make([]graveler.CommitID, 0) + if previousResultPath != "" { + csvReader := csv.NewReader(previousRunReader) + previousCommits, err := csvReader.ReadAll() + if err != nil { + return "", err + } + for _, commitRow := range previousCommits { + previouslyExpiredCommits = append(previouslyExpiredCommits, graveler.CommitID(commitRow[1])) + } + } + activeCommits, expiredCommits, err := c.Store.GetExpiredCommits(ctx, repositoryID, previouslyExpiredCommits) + if err != nil { + return "", fmt.Errorf("preparing expired commits: %v", err) + } + b := &strings.Builder{} + csvWriter := csv.NewWriter(b) + for _, commitID := range expiredCommits { + err = csvWriter.Write([]string{string(commitID), strconv.FormatBool(true)}) + if err != nil { + return "", err + } + } + for _, commitID := range activeCommits { + err = csvWriter.Write([]string{string(commitID), strconv.FormatBool(false)}) + if err != nil { + return "", err + } + } + commitsStr := b.String() + runId := uuid.New().String() + path := fmt.Sprintf("_lakefs/retention/commits/run_id=%s/commits.csv", runId) + err = c.BlockAdapter.Put(ctx, block.ObjectPointer{ + StorageNamespace: string(repo.StorageNamespace), + Identifier: path, + IdentifierType: block.IdentifierTypeRelative, + }, int64(len(commitsStr)), strings.NewReader(commitsStr), block.PutOpts{}) + if err != nil { + return "", err + } + return fmt.Sprintf("%s/%s", repo.StorageNamespace, path), nil +} + func (c *Catalog) Close() error { var errs error for _, manager := range c.managers { diff --git a/pkg/catalog/commit_set_writer.go b/pkg/catalog/commit_set_writer.go new file mode 100644 index 00000000000..40a092d0ab4 --- /dev/null +++ b/pkg/catalog/commit_set_writer.go @@ -0,0 +1,39 @@ +package catalog + +// +//import ( +// "context" +// "encoding/csv" +// "strconv" +// "strings" +// +// "github.com/treeverse/lakefs/pkg/block" +//) +// +//type CommitSetWriter struct { +// ctx context.Context +// block block.Adapter +//} +// +//func NewCommitSetWriter(block block.Adapter) *CommitSetWriter { +// return &CommitSetWriter{block: block} +//} +// +//func (c *CommitSetWriter) Write(commits *retention.Commits, pointer *block.ObjectPointer) error { +// b := &strings.Builder{} +// csvWriter := csv.NewWriter(b) +// for commitID := range commits.Expired { +// err := csvWriter.Write([]string{string(commitID), strconv.FormatBool(true)}) +// if err != nil { +// return err +// } +// } +// for commitID := range commits.Active { +// err := csvWriter.Write([]string{string(commitID), strconv.FormatBool(false)}) +// if err != nil { +// return err +// } +// } +// commitsStr := b.String() +// return c.block.Put(c.ctx, *pointer, int64(len(commitsStr)), strings.NewReader(commitsStr), block.PutOpts{}) +//} diff --git a/pkg/catalog/interface.go b/pkg/catalog/interface.go index 150711ca67d..2cbe18ed11c 100644 --- a/pkg/catalog/interface.go +++ b/pkg/catalog/interface.go @@ -94,6 +94,7 @@ type Interface interface { Commit(ctx context.Context, repository, branch string, message string, committer string, metadata Metadata) (*CommitLog, error) GetCommit(ctx context.Context, repository, reference string) (*CommitLog, error) ListCommits(ctx context.Context, repository, branch string, fromReference string, limit int) ([]*CommitLog, bool, error) + PrepareExpiredCommits(ctx context.Context, repository string, previouslyExpiredCommits []string) (string, error) // Revert creates a reverse patch to the given commit, and applies it as a new commit on the given branch. Revert(ctx context.Context, repository, branch string, params RevertParams) error diff --git a/pkg/graveler/graveler.go b/pkg/graveler/graveler.go index 1eaddd4de75..1147ff7afe1 100644 --- a/pkg/graveler/graveler.go +++ b/pkg/graveler/graveler.go @@ -370,6 +370,10 @@ type VersionController interface { // GetStagingToken returns the token identifying current staging for branchID of // repositoryID. GetStagingToken(ctx context.Context, repositoryID RepositoryID, branchID BranchID) (*StagingToken, error) + + // GetExpiredCommits returns the sets of active and expired commits, according to the branch rules for garbage collection. + // The commits in the given set previouslyExpiredCommits will not be scanned. + GetExpiredCommits(ctx context.Context, repositoryID RepositoryID, previouslyExpiredCommits []CommitID) (expired []CommitID, active []CommitID, err error) } // Plumbing includes commands for fiddling more directly with graveler implementation @@ -468,6 +472,14 @@ type CommitIterator interface { Close() } +type CommitGetter interface { + GetCommit(ctx context.Context, repositoryID RepositoryID, commitID CommitID) (*Commit, error) +} + +type BranchLister interface { + ListBranches(ctx context.Context, repositoryID RepositoryID) (BranchIterator, error) +} + // These are the more complex internal components that compose the functionality of the Graveler // RefManager handles references: branches, commits, probably tags in the future @@ -535,6 +547,10 @@ type RefManager interface { // FillGenerations computes and updates the generation field for all commits in a repository. // It should be used for restoring commits from a commit-dump which was performed before the field was introduced. FillGenerations(ctx context.Context, repositoryID RepositoryID) error + + // GetExpiredCommits returns the sets of active and expired commits, according to the branch rules for garbage collection. + // The commits in the given set previouslyExpiredCommits will not be scanned. + GetExpiredCommits(ctx context.Context, repositoryID RepositoryID, previouslyExpiredCommits []CommitID) (expired []CommitID, active []CommitID, err error) } // CommittedManager reads and applies committed snapshots @@ -631,7 +647,6 @@ func (id Key) Copy() Key { copy(keyCopy, id) return keyCopy } - func (id Key) String() string { return string(id) } @@ -858,6 +873,10 @@ func (g *Graveler) GetStagingToken(ctx context.Context, repositoryID RepositoryI return &branch.StagingToken, nil } +func (g *Graveler) GetExpiredCommits(ctx context.Context, repositoryID RepositoryID, previouslyExpiredCommits []CommitID) (expired []CommitID, active []CommitID, err error) { + panic("implement me") +} + func (g *Graveler) Get(ctx context.Context, repositoryID RepositoryID, ref Ref, key Key) (*Value, error) { repo, err := g.RefManager.GetRepository(ctx, repositoryID) if err != nil { diff --git a/pkg/graveler/ref/expired_commits.go b/pkg/graveler/ref/expired_commits.go new file mode 100644 index 00000000000..88b64af39d8 --- /dev/null +++ b/pkg/graveler/ref/expired_commits.go @@ -0,0 +1,86 @@ +package ref + +import ( + "context" + "time" + + "github.com/treeverse/lakefs/pkg/graveler" +) + +type ExpirationDateGetter interface { + Get(c *graveler.CommitRecord) time.Time +} + +type ExpiredCommitsFinder struct { + branchLister graveler.BranchLister + commitGetter graveler.CommitGetter + expirationDateGetter ExpirationDateGetter +} + +func (e *ExpiredCommitsFinder) GetExpiredCommits(ctx context.Context, repositoryID graveler.RepositoryID, previouslyExpiredCommits []graveler.CommitID) (expired []graveler.CommitID, active []graveler.CommitID, err error) { + processed := make(map[graveler.CommitID]time.Time) + branchIterator, err := e.branchLister.ListBranches(ctx, repositoryID) + if err != nil { + return nil, nil, err + } + previouslyExpiredMap := make(map[graveler.CommitID]bool) + for _, commitID := range previouslyExpiredCommits { + previouslyExpiredMap[commitID] = true + } + activeMap := make(map[graveler.CommitID]bool) + expiredMap := make(map[graveler.CommitID]bool) + for branchIterator.Next() { + branchRecord := branchIterator.Value() + commitID := branchRecord.CommitID + previousCommit, err := e.commitGetter.GetCommit(ctx, repositoryID, commitID) + if err != nil { + return nil, nil, err + } + var branchExpirationThreshold time.Time + if e.expirationDateGetter == nil { + //branchExpirationThreshold = getExpirationThresholdForCommit(previousCommit) + } else { + branchExpirationThreshold = e.expirationDateGetter.Get(&graveler.CommitRecord{CommitID: commitID, Commit: previousCommit}) + } + if previousThreshold, ok := processed[commitID]; ok && !previousThreshold.After(branchExpirationThreshold) { + // was already here with earlier expiration date + continue + } + processed[commitID] = branchExpirationThreshold + activeMap[commitID] = true + for len(previousCommit.Parents) > 0 { + commitID = previousCommit.Parents[0] + if _, ok := previouslyExpiredMap[commitID]; ok { + // commit was already expired in a previous run + break + } + if previousThreshold, ok := processed[commitID]; ok && !previousThreshold.After(branchExpirationThreshold) { + // was already here with earlier expiration date + break + } + if previousCommit.CreationDate.After(branchExpirationThreshold) { + activeMap[commitID] = true + delete(expiredMap, commitID) + } else if active, ok := activeMap[commitID]; !ok || !active { + expiredMap[commitID] = true + } + previousCommit, err = e.commitGetter.GetCommit(ctx, repositoryID, commitID) + if err != nil { + return nil, nil, err + } + processed[commitID] = branchExpirationThreshold + } + } + if branchIterator.Err() != nil { + return nil, nil, branchIterator.Err() + } + return toArray(activeMap), toArray(expiredMap), nil +} + +func toArray(commitMap map[graveler.CommitID]bool) []graveler.CommitID { + res := make([]graveler.CommitID, 0, len(commitMap)) + for commitID := range commitMap { + res = append(res, commitID) + } + return res +} diff --git a/pkg/retention/expired_commits_test.go b/pkg/graveler/ref/expired_commits_test.go similarity index 83% rename from pkg/retention/expired_commits_test.go rename to pkg/graveler/ref/expired_commits_test.go index 068695bac99..6a4f2bbddb9 100644 --- a/pkg/retention/expired_commits_test.go +++ b/pkg/graveler/ref/expired_commits_test.go @@ -1,4 +1,4 @@ -package retention +package ref import ( "context" @@ -11,7 +11,6 @@ import ( "github.com/treeverse/lakefs/pkg/graveler" "github.com/treeverse/lakefs/pkg/graveler/mock" gtestutil "github.com/treeverse/lakefs/pkg/graveler/testutil" - "github.com/treeverse/lakefs/pkg/testutil" ) type testExpirationDateGetter struct { @@ -34,7 +33,7 @@ func newTestCommit(daysPassed int, parents ...graveler.CommitID) testCommit { } } -func newCommitSet(commitIDs []string) CommitSet { +func newCommitSet(commitIDs []string) map[graveler.CommitID]bool { res := make(map[graveler.CommitID]bool, 0) for _, commitID := range commitIDs { res[graveler.CommitID(commitID)] = true @@ -180,33 +179,44 @@ func TestExpiredCommits(t *testing.T) { refManagerMock.EXPECT().GetCommit(ctx, graveler.RepositoryID("test"), id).Return(commitMap[id], nil) } } - finder := ExpiredCommitFinder{ - refManager: refManagerMock, + finder := ExpiredCommitsFinder{ + commitGetter: refManagerMock, + branchLister: refManagerMock, expirationDateGetter: &testExpirationDateGetter{ expirationDates: expirationDates, }, } - retentionCommits, err := finder.Find(ctx, "test", previouslyExpired) - testutil.MustDo(t, "find active commits", err) - activeCommitIDs := make([]string, 0, len(retentionCommits.Active)) - for commitID := range retentionCommits.Active { - activeCommitIDs = append(activeCommitIDs, string(commitID.Ref())) + previouslyExpiredCommitIDs := make([]graveler.CommitID, len(tst.previouslyExpired)) + for i := range tst.previouslyExpired { + previouslyExpiredCommitIDs[i] = graveler.CommitID(tst.previouslyExpired[i]) + } + activeCommits, expiredCommits, err := finder.GetExpiredCommits(ctx, "test", previouslyExpiredCommitIDs) + if err != nil { + t.Fatalf("failed to find expired commits: %v", err) } sort.Strings(tst.expectedActiveIDs) - sort.Strings(activeCommitIDs) - if diff := deep.Equal(tst.expectedActiveIDs, activeCommitIDs); diff != nil { + sort.Slice(activeCommits, func(i, j int) bool { + return activeCommits[i].Ref() < activeCommits[j].Ref() + }) + if diff := deep.Equal(tst.expectedActiveIDs, testToStringArray(activeCommits)); diff != nil { t.Errorf("active commits ids diff=%s", diff) } - expiredCommitIDs := make([]string, 0, len(retentionCommits.Expired)) - for commitID := range retentionCommits.Expired { - expiredCommitIDs = append(expiredCommitIDs, string(commitID.Ref())) - } sort.Strings(tst.expectedExpiredIDs) - sort.Strings(expiredCommitIDs) - if diff := deep.Equal(tst.expectedExpiredIDs, expiredCommitIDs); diff != nil { + sort.Slice(expiredCommits, func(i, j int) bool { + return expiredCommits[i].Ref() < expiredCommits[j].Ref() + }) + if diff := deep.Equal(tst.expectedExpiredIDs, testToStringArray(expiredCommits)); diff != nil { t.Errorf("expired commits ids diff=%s", diff) } }) } } + +func testToStringArray(commitIDs []graveler.CommitID) []string { + res := make([]string, len(commitIDs)) + for i := range commitIDs { + res[i] = string(commitIDs[i]) + } + return res +} diff --git a/pkg/graveler/ref/manager.go b/pkg/graveler/ref/manager.go index 24f0c27f166..e600dd6dac6 100644 --- a/pkg/graveler/ref/manager.go +++ b/pkg/graveler/ref/manager.go @@ -410,3 +410,7 @@ func (m *Manager) FillGenerations(ctx context.Context, repositoryID graveler.Rep }) return err } + +func (m *Manager) GetExpiredCommits(ctx context.Context, repositoryID graveler.RepositoryID, previouslyExpiredCommits []graveler.CommitID) (expired []graveler.CommitID, active []graveler.CommitID, err error) { + panic("implement me") +} diff --git a/pkg/graveler/ref/merge_base_finder.go b/pkg/graveler/ref/merge_base_finder.go index 2970e3178be..dfaafeada22 100644 --- a/pkg/graveler/ref/merge_base_finder.go +++ b/pkg/graveler/ref/merge_base_finder.go @@ -7,10 +7,6 @@ import ( "github.com/treeverse/lakefs/pkg/graveler" ) -type CommitGetter interface { - GetCommit(ctx context.Context, repositoryID graveler.RepositoryID, commitID graveler.CommitID) (*graveler.Commit, error) -} - type reachedFlags uint8 const ( @@ -20,7 +16,7 @@ const ( // FindMergeBase finds the best common ancestor according to the definition in the git-merge-base documentation: https://git-scm.com/docs/git-merge-base // One common ancestor is better than another common ancestor if the latter is an ancestor of the former. -func FindMergeBase(ctx context.Context, getter CommitGetter, repositoryID graveler.RepositoryID, leftID, rightID graveler.CommitID) (*graveler.Commit, error) { +func FindMergeBase(ctx context.Context, getter graveler.CommitGetter, repositoryID graveler.RepositoryID, leftID, rightID graveler.CommitID) (*graveler.Commit, error) { var commitRecord *graveler.CommitRecord queue := NewCommitsGenerationPriorityQueue() reached := make(map[graveler.CommitID]reachedFlags) diff --git a/pkg/retention/commit_set_writer.go b/pkg/retention/commit_set_writer.go deleted file mode 100644 index 8112551f655..00000000000 --- a/pkg/retention/commit_set_writer.go +++ /dev/null @@ -1,38 +0,0 @@ -package retention - -import ( - "context" - "encoding/csv" - "strconv" - "strings" - - "github.com/treeverse/lakefs/pkg/block" -) - -type CommitSetWriter struct { - ctx context.Context - block block.Adapter -} - -func NewCommitSetWriter(block block.Adapter) *CommitSetWriter { - return &CommitSetWriter{block: block} -} - -func (c *CommitSetWriter) Write(commits *Commits, pointer *block.ObjectPointer) error { - b := &strings.Builder{} - csvWriter := csv.NewWriter(b) - for commitID := range commits.Expired { - err := csvWriter.Write([]string{string(commitID), strconv.FormatBool(true)}) - if err != nil { - return err - } - } - for commitID := range commits.Active { - err := csvWriter.Write([]string{string(commitID), strconv.FormatBool(false)}) - if err != nil { - return err - } - } - commitsStr := b.String() - return c.block.Put(c.ctx, *pointer, int64(len(commitsStr)), strings.NewReader(commitsStr), block.PutOpts{}) -} diff --git a/pkg/retention/commit_set_writer_test.go b/pkg/retention/commit_set_writer_test.go deleted file mode 100644 index 0e18213b4ab..00000000000 --- a/pkg/retention/commit_set_writer_test.go +++ /dev/null @@ -1,21 +0,0 @@ -package retention - -import ( - "testing" - - "github.com/treeverse/lakefs/pkg/graveler" -) - -func Test(t *testing.T) { - //awsConfig := &aws.Config{Region: swag.String("us-east-1")} - //sess, err := session.NewSession(awsConfig) - //if err != nil { - // panic(err) - //} - //client := s3.New(sess, awsConfig) - w := NewCommitSetWriter(nil) - w.Write(&Commits{ - Expired: map[graveler.CommitID]bool{"a": true, "b": true}, - Active: map[graveler.CommitID]bool{"c": true, "d": true}, - }, nil) -} diff --git a/pkg/retention/expired_commits.go b/pkg/retention/expired_commits.go deleted file mode 100644 index a9a85ae156c..00000000000 --- a/pkg/retention/expired_commits.go +++ /dev/null @@ -1,90 +0,0 @@ -package retention - -import ( - "context" - "time" - - "github.com/treeverse/lakefs/pkg/graveler" -) - -type ExpirationDateGetter interface { - Get(c *graveler.CommitRecord) time.Time -} - -type ExpiredCommitFinder struct { - refManager graveler.RefManager - expirationDateGetter ExpirationDateGetter -} - -func NewExpiredCommitFinder(refManager graveler.RefManager) *ExpiredCommitFinder { - return &ExpiredCommitFinder{refManager: refManager} -} - -type CommitSet map[graveler.CommitID]bool - -type Commits struct { - Expired CommitSet - Active CommitSet -} - -func (a *ExpiredCommitFinder) Find(ctx context.Context, repositoryID graveler.RepositoryID, previouslyExpiredCommits CommitSet) (*Commits, error) { - processed := make(map[graveler.CommitID]time.Time) - res := &Commits{ - Active: make(map[graveler.CommitID]bool), - Expired: make(map[graveler.CommitID]bool), - } - branchIterator, err := a.refManager.ListBranches(ctx, repositoryID) - if err != nil { - return nil, err - } - for branchIterator.Next() { - branchRecord := branchIterator.Value() - commitID := branchRecord.CommitID - previousCommit, err := a.refManager.GetCommit(ctx, repositoryID, commitID) - if err != nil { - return nil, err - } - var branchExpirationThreshold time.Time - if a.expirationDateGetter == nil { - branchExpirationThreshold = getExpirationThresholdForCommit(previousCommit) - } else { - branchExpirationThreshold = a.expirationDateGetter.Get(&graveler.CommitRecord{CommitID: commitID, Commit: previousCommit}) - } - if previousThreshold, ok := processed[commitID]; ok && !previousThreshold.After(branchExpirationThreshold) { - // was already here with earlier expiration date - continue - } - processed[commitID] = branchExpirationThreshold - res.Active[commitID] = true - for len(previousCommit.Parents) > 0 { - commitID = previousCommit.Parents[0] - if _, ok := previouslyExpiredCommits[commitID]; ok { - // commit was already expired in a previous run - break - } - if previousThreshold, ok := processed[commitID]; ok && !previousThreshold.After(branchExpirationThreshold) { - // was already here with earlier expiration date - break - } - if previousCommit.CreationDate.After(branchExpirationThreshold) { - res.Active[commitID] = true - delete(res.Expired, commitID) - } else if active, ok := res.Active[commitID]; !ok || !active { - res.Expired[commitID] = true - } - previousCommit, err = a.refManager.GetCommit(ctx, repositoryID, commitID) - if err != nil { - return nil, err - } - processed[commitID] = branchExpirationThreshold - } - } - if branchIterator.Err() != nil { - return nil, branchIterator.Err() - } - return res, nil -} - -func getExpirationThresholdForCommit(_ *graveler.Commit) time.Time { - return time.Now().AddDate(0, 0, -28) -} diff --git a/pkg/retention/manager.go b/pkg/retention/manager.go deleted file mode 100644 index 8ab6760f928..00000000000 --- a/pkg/retention/manager.go +++ /dev/null @@ -1,19 +0,0 @@ -package retention - -import ( - "context" - - "github.com/treeverse/lakefs/pkg/block" - "github.com/treeverse/lakefs/pkg/graveler" -) - -type Manager struct { - graveler graveler.Graveler - adapter block.Adapter -} - -func (m *Manager) Prepare(ctx context.Context, repositoryID graveler.RepositoryID) { - expiredCommitFinder := NewExpiredCommitFinder(m.graveler.RefManager) - commits, err := expiredCommitFinder.Find(ctx, repositoryID, nil) - -} From 494239d816118e92d90bd323759117b5ab40e2d4 Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Mon, 14 Jun 2021 16:39:17 +0300 Subject: [PATCH 09/24] put everything in graveler --- .../python/lakefs_client/models/__init__.py | 2 +- pkg/catalog/catalog.go | 4 +- pkg/graveler/graveler.go | 50 +++++++++++++------ pkg/graveler/ref/expired_commits.go | 22 ++++---- pkg/graveler/ref/expired_commits_test.go | 20 +++----- pkg/graveler/ref/manager.go | 5 +- pkg/graveler/retention/garbage_collection.go | 48 ++++++++++++++++++ 7 files changed, 109 insertions(+), 42 deletions(-) create mode 100644 pkg/graveler/retention/garbage_collection.go diff --git a/clients/python/lakefs_client/models/__init__.py b/clients/python/lakefs_client/models/__init__.py index 72697fe4fc1..a96c699dccb 100644 --- a/clients/python/lakefs_client/models/__init__.py +++ b/clients/python/lakefs_client/models/__init__.py @@ -24,7 +24,7 @@ from lakefs_client.model.diff import Diff from lakefs_client.model.diff_list import DiffList from lakefs_client.model.error import Error - from lakefs_client.model.garbage_collection_commits import GarbageCollectionCommits +from lakefs_client.model.garbage_collection_commits import GarbageCollectionCommits from lakefs_client.model.group import Group from lakefs_client.model.group_creation import GroupCreation from lakefs_client.model.group_list import GroupList diff --git a/pkg/catalog/catalog.go b/pkg/catalog/catalog.go index cc75dc9361b..e4a2d8040db 100644 --- a/pkg/catalog/catalog.go +++ b/pkg/catalog/catalog.go @@ -21,6 +21,7 @@ import ( "github.com/treeverse/lakefs/pkg/graveler" "github.com/treeverse/lakefs/pkg/graveler/committed" "github.com/treeverse/lakefs/pkg/graveler/ref" + "github.com/treeverse/lakefs/pkg/graveler/retention" "github.com/treeverse/lakefs/pkg/graveler/sstable" "github.com/treeverse/lakefs/pkg/graveler/staging" "github.com/treeverse/lakefs/pkg/ident" @@ -184,7 +185,8 @@ func New(ctx context.Context, cfg Config) (*Catalog, error) { refManager := ref.NewPGRefManager(executor, cfg.DB, ident.NewHexAddressProvider()) branchLocker := ref.NewBranchLocker(cfg.LockDB) - store := graveler.NewGraveler(branchLocker, committedManager, stagingManager, refManager) + retentionRuleManager := retention.NewRuleManager(tierFSParams.Adapter) + store := graveler.NewGraveler(branchLocker, committedManager, stagingManager, refManager, retentionRuleManager) return &Catalog{ BlockAdapter: tierFSParams.Adapter, diff --git a/pkg/graveler/graveler.go b/pkg/graveler/graveler.go index 1147ff7afe1..3dd36ffdb24 100644 --- a/pkg/graveler/graveler.go +++ b/pkg/graveler/graveler.go @@ -550,7 +550,7 @@ type RefManager interface { // GetExpiredCommits returns the sets of active and expired commits, according to the branch rules for garbage collection. // The commits in the given set previouslyExpiredCommits will not be scanned. - GetExpiredCommits(ctx context.Context, repositoryID RepositoryID, previouslyExpiredCommits []CommitID) (expired []CommitID, active []CommitID, err error) + GetExpiredCommits(ctx context.Context, repositoryID RepositoryID, previouslyExpiredCommits []CommitID, rules *RetentionRules) (expired []CommitID, active []CommitID, err error) } // CommittedManager reads and applies committed snapshots @@ -664,22 +664,24 @@ func (id TagID) String() string { } type Graveler struct { - CommittedManager CommittedManager - StagingManager StagingManager - RefManager RefManager - branchLocker BranchLocker - hooks HooksHandler - log logging.Logger + CommittedManager CommittedManager + StagingManager StagingManager + RefManager RefManager + branchLocker BranchLocker + hooks HooksHandler + retentionRuleManager RetentionRuleManager + log logging.Logger } -func NewGraveler(branchLocker BranchLocker, committedManager CommittedManager, stagingManager StagingManager, refManager RefManager) *Graveler { +func NewGraveler(branchLocker BranchLocker, committedManager CommittedManager, stagingManager StagingManager, refManager RefManager, retentionRuleManager RetentionRuleManager) *Graveler { return &Graveler{ - CommittedManager: committedManager, - StagingManager: stagingManager, - RefManager: refManager, - branchLocker: branchLocker, - hooks: &HooksNoOp{}, - log: logging.Default().WithField("service_name", "graveler_graveler"), + CommittedManager: committedManager, + StagingManager: stagingManager, + RefManager: refManager, + branchLocker: branchLocker, + hooks: &HooksNoOp{}, + retentionRuleManager: retentionRuleManager, + log: logging.Default().WithField("service_name", "graveler_graveler"), } } @@ -874,7 +876,16 @@ func (g *Graveler) GetStagingToken(ctx context.Context, repositoryID RepositoryI } func (g *Graveler) GetExpiredCommits(ctx context.Context, repositoryID RepositoryID, previouslyExpiredCommits []CommitID) (expired []CommitID, active []CommitID, err error) { - panic("implement me") + repo, err := g.RefManager.GetRepository(ctx, repositoryID) + if err != nil { + return nil, nil, err + } + // TODO use "_lakefs" from configuration + rules, err := g.retentionRuleManager.GetRules(ctx, fmt.Sprintf("%s/_lakefs/retention/rules/config.json", repo.StorageNamespace)) + if err != nil { + return nil, nil, fmt.Errorf("get retention rules: %v", err) + } + return g.RefManager.GetExpiredCommits(ctx, repositoryID, previouslyExpiredCommits, rules) } func (g *Graveler) Get(ctx context.Context, repositoryID RepositoryID, ref Ref, key Key) (*Value, error) { @@ -2000,3 +2011,12 @@ func (c *commitValueIterator) Err() error { func (c *commitValueIterator) Close() { c.src.Close() } + +type RetentionRuleManager interface { + GetRules(ctx context.Context, rulesConfigurationPath string) (*RetentionRules, error) +} + +type RetentionRules struct { + DefaultRetentionDays int `json:"default"` + BranchRetentionDays map[BranchID]int `json:"branches"` +} diff --git a/pkg/graveler/ref/expired_commits.go b/pkg/graveler/ref/expired_commits.go index 88b64af39d8..c1810f02bf6 100644 --- a/pkg/graveler/ref/expired_commits.go +++ b/pkg/graveler/ref/expired_commits.go @@ -12,13 +12,19 @@ type ExpirationDateGetter interface { } type ExpiredCommitsFinder struct { - branchLister graveler.BranchLister - commitGetter graveler.CommitGetter - expirationDateGetter ExpirationDateGetter + branchLister graveler.BranchLister + commitGetter graveler.CommitGetter + rules *graveler.RetentionRules +} + +func NewExpiredCommitsFinder(branchLister graveler.BranchLister, commitGetter graveler.CommitGetter, rules *graveler.RetentionRules) *ExpiredCommitsFinder { + return &ExpiredCommitsFinder{branchLister: branchLister, commitGetter: commitGetter, rules: rules} } func (e *ExpiredCommitsFinder) GetExpiredCommits(ctx context.Context, repositoryID graveler.RepositoryID, previouslyExpiredCommits []graveler.CommitID) (expired []graveler.CommitID, active []graveler.CommitID, err error) { + now := time.Now() processed := make(map[graveler.CommitID]time.Time) + branchIterator, err := e.branchLister.ListBranches(ctx, repositoryID) if err != nil { return nil, nil, err @@ -31,17 +37,15 @@ func (e *ExpiredCommitsFinder) GetExpiredCommits(ctx context.Context, repository expiredMap := make(map[graveler.CommitID]bool) for branchIterator.Next() { branchRecord := branchIterator.Value() + branchExpirationThreshold := now.AddDate(0, 0, -e.rules.DefaultRetentionDays) + if branchExpirationPeriod, ok := e.rules.BranchRetentionDays[branchRecord.BranchID]; ok { + branchExpirationThreshold = now.AddDate(0, 0, -branchExpirationPeriod) + } commitID := branchRecord.CommitID previousCommit, err := e.commitGetter.GetCommit(ctx, repositoryID, commitID) if err != nil { return nil, nil, err } - var branchExpirationThreshold time.Time - if e.expirationDateGetter == nil { - //branchExpirationThreshold = getExpirationThresholdForCommit(previousCommit) - } else { - branchExpirationThreshold = e.expirationDateGetter.Get(&graveler.CommitRecord{CommitID: commitID, Commit: previousCommit}) - } if previousThreshold, ok := processed[commitID]; ok && !previousThreshold.After(branchExpirationThreshold) { // was already here with earlier expiration date continue diff --git a/pkg/graveler/ref/expired_commits_test.go b/pkg/graveler/ref/expired_commits_test.go index 6a4f2bbddb9..1d28e5edcb2 100644 --- a/pkg/graveler/ref/expired_commits_test.go +++ b/pkg/graveler/ref/expired_commits_test.go @@ -13,14 +13,6 @@ import ( gtestutil "github.com/treeverse/lakefs/pkg/graveler/testutil" ) -type testExpirationDateGetter struct { - expirationDates map[string]time.Time -} - -func (t *testExpirationDateGetter) Get(c *graveler.CommitRecord) time.Time { - return t.expirationDates[string(c.CommitID)] -} - type testCommit struct { daysPassed int parents []graveler.CommitID @@ -151,19 +143,21 @@ func TestExpiredCommits(t *testing.T) { expectedExpiredIDs: []string{"e", "d"}, }, } - now := time.Now() for name, tst := range tests { t.Run(name, func(t *testing.T) { + now := time.Now() branchRecords := make([]*graveler.BranchRecord, 0, len(tst.headsRetentionDays)) expirationDates := make(map[string]time.Time) ctrl := gomock.NewController(t) refManagerMock := mock.NewMockRefManager(ctrl) ctx := context.Background() + retentionRules := &graveler.RetentionRules{DefaultRetentionDays: 0, BranchRetentionDays: make(map[graveler.BranchID]int)} for head, retentionDays := range tst.headsRetentionDays { branchRecords = append(branchRecords, &graveler.BranchRecord{ - Branch: &graveler.Branch{CommitID: graveler.CommitID(head)}, + BranchID: graveler.BranchID(head), + Branch: &graveler.Branch{CommitID: graveler.CommitID(head)}, }) - expirationDates[head] = now.AddDate(0, 0, -retentionDays) + retentionRules.BranchRetentionDays[graveler.BranchID(head)] = retentionDays } sort.Slice(branchRecords, func(i, j int) bool { return expirationDates[string(branchRecords[i].CommitID)].Before(expirationDates[string(branchRecords[j].CommitID)]) @@ -182,9 +176,7 @@ func TestExpiredCommits(t *testing.T) { finder := ExpiredCommitsFinder{ commitGetter: refManagerMock, branchLister: refManagerMock, - expirationDateGetter: &testExpirationDateGetter{ - expirationDates: expirationDates, - }, + rules: retentionRules, } previouslyExpiredCommitIDs := make([]graveler.CommitID, len(tst.previouslyExpired)) for i := range tst.previouslyExpired { diff --git a/pkg/graveler/ref/manager.go b/pkg/graveler/ref/manager.go index e600dd6dac6..ec1ec976d0c 100644 --- a/pkg/graveler/ref/manager.go +++ b/pkg/graveler/ref/manager.go @@ -411,6 +411,7 @@ func (m *Manager) FillGenerations(ctx context.Context, repositoryID graveler.Rep return err } -func (m *Manager) GetExpiredCommits(ctx context.Context, repositoryID graveler.RepositoryID, previouslyExpiredCommits []graveler.CommitID) (expired []graveler.CommitID, active []graveler.CommitID, err error) { - panic("implement me") +func (m *Manager) GetExpiredCommits(ctx context.Context, repositoryID graveler.RepositoryID, previouslyExpiredCommits []graveler.CommitID, rules *graveler.RetentionRules) (expired []graveler.CommitID, active []graveler.CommitID, err error) { + finder := NewExpiredCommitsFinder(m, m, rules) + return finder.GetExpiredCommits(ctx, repositoryID, previouslyExpiredCommits) } diff --git a/pkg/graveler/retention/garbage_collection.go b/pkg/graveler/retention/garbage_collection.go new file mode 100644 index 00000000000..14476baa16f --- /dev/null +++ b/pkg/graveler/retention/garbage_collection.go @@ -0,0 +1,48 @@ +package retention + +import ( + "bytes" + "context" + "encoding/json" + + "github.com/treeverse/lakefs/pkg/block" + "github.com/treeverse/lakefs/pkg/graveler" +) + +type RuleManager struct { + blockAdapter block.Adapter +} + +func NewRuleManager(blockAdapter block.Adapter) *RuleManager { + return &RuleManager{blockAdapter: blockAdapter} +} + +func (m *RuleManager) GetRules(ctx context.Context, rulesConfigurationPath string) (*graveler.RetentionRules, error) { + reader, err := m.blockAdapter.Get(ctx, block.ObjectPointer{ + Identifier: rulesConfigurationPath, + IdentifierType: block.IdentifierTypeFull, + }, -1) + defer func() { + _ = reader.Close() + }() + if err != nil { + return nil, err + } + var rules graveler.RetentionRules + err = json.NewDecoder(reader).Decode(&rules) + if err != nil { + return nil, err + } + return &rules, nil +} + +func (m *RuleManager) SaveRules(ctx context.Context, rulesConfigurationPath string, rules *graveler.RetentionRules) error { + rulesBytes, err := json.Marshal(rules) + if err != nil { + return err + } + return m.blockAdapter.Put(ctx, block.ObjectPointer{ + Identifier: rulesConfigurationPath, + IdentifierType: block.IdentifierTypeFull, + }, int64(len(rulesBytes)), bytes.NewReader(rulesBytes), block.PutOpts{}) +} From 8cb41dd51cf1643e9900979192e846e63b429209 Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Mon, 14 Jun 2021 16:44:40 +0300 Subject: [PATCH 10/24] linting --- pkg/catalog/catalog.go | 8 +++---- pkg/catalog/commit_set_writer.go | 39 -------------------------------- pkg/catalog/interface.go | 2 +- pkg/graveler/graveler.go | 2 +- 4 files changed, 6 insertions(+), 45 deletions(-) delete mode 100644 pkg/catalog/commit_set_writer.go diff --git a/pkg/catalog/catalog.go b/pkg/catalog/catalog.go index e4a2d8040db..28d9f47b108 100644 --- a/pkg/catalog/catalog.go +++ b/pkg/catalog/catalog.go @@ -1193,7 +1193,7 @@ func (c *Catalog) PrepareExpiredCommits(ctx context.Context, repository string, } previousRunReader, err := c.BlockAdapter.Get(ctx, block.ObjectPointer{ StorageNamespace: string(repo.StorageNamespace), - Identifier: fmt.Sprintf(previousResultPath), + Identifier: previousResultPath, IdentifierType: block.IdentifierTypeRelative, }, -1) if err != nil { @@ -1212,7 +1212,7 @@ func (c *Catalog) PrepareExpiredCommits(ctx context.Context, repository string, } activeCommits, expiredCommits, err := c.Store.GetExpiredCommits(ctx, repositoryID, previouslyExpiredCommits) if err != nil { - return "", fmt.Errorf("preparing expired commits: %v", err) + return "", fmt.Errorf("preparing expired commits: %w", err) } b := &strings.Builder{} csvWriter := csv.NewWriter(b) @@ -1229,8 +1229,8 @@ func (c *Catalog) PrepareExpiredCommits(ctx context.Context, repository string, } } commitsStr := b.String() - runId := uuid.New().String() - path := fmt.Sprintf("_lakefs/retention/commits/run_id=%s/commits.csv", runId) + runID := uuid.New().String() + path := fmt.Sprintf("_lakefs/retention/commits/run_id=%s/commits.csv", runID) err = c.BlockAdapter.Put(ctx, block.ObjectPointer{ StorageNamespace: string(repo.StorageNamespace), Identifier: path, diff --git a/pkg/catalog/commit_set_writer.go b/pkg/catalog/commit_set_writer.go deleted file mode 100644 index 40a092d0ab4..00000000000 --- a/pkg/catalog/commit_set_writer.go +++ /dev/null @@ -1,39 +0,0 @@ -package catalog - -// -//import ( -// "context" -// "encoding/csv" -// "strconv" -// "strings" -// -// "github.com/treeverse/lakefs/pkg/block" -//) -// -//type CommitSetWriter struct { -// ctx context.Context -// block block.Adapter -//} -// -//func NewCommitSetWriter(block block.Adapter) *CommitSetWriter { -// return &CommitSetWriter{block: block} -//} -// -//func (c *CommitSetWriter) Write(commits *retention.Commits, pointer *block.ObjectPointer) error { -// b := &strings.Builder{} -// csvWriter := csv.NewWriter(b) -// for commitID := range commits.Expired { -// err := csvWriter.Write([]string{string(commitID), strconv.FormatBool(true)}) -// if err != nil { -// return err -// } -// } -// for commitID := range commits.Active { -// err := csvWriter.Write([]string{string(commitID), strconv.FormatBool(false)}) -// if err != nil { -// return err -// } -// } -// commitsStr := b.String() -// return c.block.Put(c.ctx, *pointer, int64(len(commitsStr)), strings.NewReader(commitsStr), block.PutOpts{}) -//} diff --git a/pkg/catalog/interface.go b/pkg/catalog/interface.go index 2cbe18ed11c..6e88b5814b2 100644 --- a/pkg/catalog/interface.go +++ b/pkg/catalog/interface.go @@ -94,7 +94,7 @@ type Interface interface { Commit(ctx context.Context, repository, branch string, message string, committer string, metadata Metadata) (*CommitLog, error) GetCommit(ctx context.Context, repository, reference string) (*CommitLog, error) ListCommits(ctx context.Context, repository, branch string, fromReference string, limit int) ([]*CommitLog, bool, error) - PrepareExpiredCommits(ctx context.Context, repository string, previouslyExpiredCommits []string) (string, error) + PrepareExpiredCommits(ctx context.Context, repository string, previousResultPath string) (string, error) // Revert creates a reverse patch to the given commit, and applies it as a new commit on the given branch. Revert(ctx context.Context, repository, branch string, params RevertParams) error diff --git a/pkg/graveler/graveler.go b/pkg/graveler/graveler.go index 3dd36ffdb24..a25ba9b95ee 100644 --- a/pkg/graveler/graveler.go +++ b/pkg/graveler/graveler.go @@ -883,7 +883,7 @@ func (g *Graveler) GetExpiredCommits(ctx context.Context, repositoryID Repositor // TODO use "_lakefs" from configuration rules, err := g.retentionRuleManager.GetRules(ctx, fmt.Sprintf("%s/_lakefs/retention/rules/config.json", repo.StorageNamespace)) if err != nil { - return nil, nil, fmt.Errorf("get retention rules: %v", err) + return nil, nil, fmt.Errorf("get retention rules: %w", err) } return g.RefManager.GetExpiredCommits(ctx, repositoryID, previouslyExpiredCommits, rules) } From 196d94624e33c3ec5654fe0723461989c73815cd Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Mon, 14 Jun 2021 16:46:31 +0300 Subject: [PATCH 11/24] revert typo --- clients/spark/build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/spark/build.sbt b/clients/spark/build.sbt index 9d14a416d3a..228c4e306ea 100644 --- a/clients/spark/build.sbt +++ b/clients/spark/build.sbt @@ -57,7 +57,7 @@ def generateExamplesProject(buildType: BuildType) = sharedSettings, settingsToCompileIn("examples"), scalaVersion := buildType.scalaVersion, - libraryDependencies ++= Seq("org. apache.spark" %% "spark-sql" % buildType.sparkVersion % "provided", + libraryDependencies ++= Seq("org.apache.spark" %% "spark-sql" % buildType.sparkVersion % "provided", "software.amazon.awssdk" % "bom" % "2.15.15", "software.amazon.awssdk" % "s3" % "2.15.15", "com.amazonaws" % "aws-java-sdk" % "1.7.4", // should match hadoop-aws version(!) From edff49ce05d9e57eb7265e7f82fd182b7a776b7b Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Mon, 14 Jun 2021 17:36:29 +0300 Subject: [PATCH 12/24] finish api wiring --- api/swagger.yml | 11 ++++ clients/java/README.md | 1 + clients/java/api/openapi.yaml | 14 ++++++ clients/java/docs/RetentionApi.md | 8 +-- .../io/lakefs/clients/api/RetentionApi.java | 27 ++++++---- clients/python/.openapi-generator/FILES | 2 + clients/python/README.md | 1 + clients/python/docs/RetentionApi.md | 16 +++++- .../python/lakefs_client/api/retention_api.py | 10 +++- .../python/lakefs_client/models/__init__.py | 1 + pkg/api/controller.go | 13 ++++- pkg/catalog/catalog.go | 16 +++--- pkg/catalog/catalog_test.go | 3 +- pkg/catalog/fake_graveler_test.go | 4 ++ pkg/graveler/graveler_test.go | 50 +++++++++---------- pkg/graveler/testutil/fakes.go | 4 ++ 16 files changed, 128 insertions(+), 53 deletions(-) diff --git a/api/swagger.yml b/api/swagger.yml index 90b1a14bcbb..60f9600311d 100644 --- a/api/swagger.yml +++ b/api/swagger.yml @@ -817,6 +817,12 @@ components: - checksum - size_bytes + GarbageCollectionPrepareRequest: + type: object + properties: + previous_result_path: + type: string + description: path to the result of a previous successful GC job GarbageCollectionCommits: type: object properties: @@ -2844,6 +2850,11 @@ paths: schema: type: string post: + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/GarbageCollectionPrepareRequest" tags: - retention operationId: prepareGarbageCollectionCommits diff --git a/clients/java/README.md b/clients/java/README.md index 5752e9b74eb..b51a2a6a20f 100644 --- a/clients/java/README.md +++ b/clients/java/README.md @@ -217,6 +217,7 @@ Class | Method | HTTP request | Description - [DiffList](docs/DiffList.md) - [Error](docs/Error.md) - [GarbageCollectionCommits](docs/GarbageCollectionCommits.md) + - [GarbageCollectionPrepareRequest](docs/GarbageCollectionPrepareRequest.md) - [Group](docs/Group.md) - [GroupCreation](docs/GroupCreation.md) - [GroupList](docs/GroupList.md) diff --git a/clients/java/api/openapi.yaml b/clients/java/api/openapi.yaml index d64461cbb19..29480f1d5ec 100644 --- a/clients/java/api/openapi.yaml +++ b/clients/java/api/openapi.yaml @@ -3749,6 +3749,11 @@ paths: schema: type: string style: simple + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/GarbageCollectionPrepareRequest' responses: "201": content: @@ -3777,6 +3782,7 @@ paths: summary: save lists of active and expired commits for garbage collection tags: - retention + x-contentType: application/json x-accepts: application/json /healthcheck: get: @@ -4950,6 +4956,14 @@ components: - size_bytes - staging type: object + GarbageCollectionPrepareRequest: + example: + previous_result_path: previous_result_path + properties: + previous_result_path: + description: path to the result of a previous successful GC job + type: string + type: object GarbageCollectionCommits: example: path: path diff --git a/clients/java/docs/RetentionApi.md b/clients/java/docs/RetentionApi.md index e1f70ef499d..cdc8efe9f25 100644 --- a/clients/java/docs/RetentionApi.md +++ b/clients/java/docs/RetentionApi.md @@ -9,7 +9,7 @@ Method | HTTP request | Description # **prepareGarbageCollectionCommits** -> GarbageCollectionCommits prepareGarbageCollectionCommits(repository) +> GarbageCollectionCommits prepareGarbageCollectionCommits(repository, garbageCollectionPrepareRequest) save lists of active and expired commits for garbage collection @@ -45,8 +45,9 @@ public class Example { RetentionApi apiInstance = new RetentionApi(defaultClient); String repository = "repository_example"; // String | + GarbageCollectionPrepareRequest garbageCollectionPrepareRequest = new GarbageCollectionPrepareRequest(); // GarbageCollectionPrepareRequest | try { - GarbageCollectionCommits result = apiInstance.prepareGarbageCollectionCommits(repository); + GarbageCollectionCommits result = apiInstance.prepareGarbageCollectionCommits(repository, garbageCollectionPrepareRequest); System.out.println(result); } catch (ApiException e) { System.err.println("Exception when calling RetentionApi#prepareGarbageCollectionCommits"); @@ -64,6 +65,7 @@ public class Example { Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- **repository** | **String**| | + **garbageCollectionPrepareRequest** | [**GarbageCollectionPrepareRequest**](GarbageCollectionPrepareRequest.md)| | [optional] ### Return type @@ -75,7 +77,7 @@ Name | Type | Description | Notes ### HTTP request headers - - **Content-Type**: Not defined + - **Content-Type**: application/json - **Accept**: application/json ### HTTP response details diff --git a/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java b/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java index c16491c960c..03c39c8f58c 100644 --- a/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java +++ b/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java @@ -29,6 +29,7 @@ import io.lakefs.clients.api.model.Error; import io.lakefs.clients.api.model.GarbageCollectionCommits; +import io.lakefs.clients.api.model.GarbageCollectionPrepareRequest; import java.lang.reflect.Type; import java.util.ArrayList; @@ -58,6 +59,7 @@ public void setApiClient(ApiClient apiClient) { /** * Build call for prepareGarbageCollectionCommits * @param repository (required) + * @param garbageCollectionPrepareRequest (optional) * @param _callback Callback for upload/download progress * @return Call to execute * @throws ApiException If fail to serialize the request body object @@ -70,8 +72,8 @@ public void setApiClient(ApiClient apiClient) { 0 Internal Server Error - */ - public okhttp3.Call prepareGarbageCollectionCommitsCall(String repository, final ApiCallback _callback) throws ApiException { - Object localVarPostBody = null; + public okhttp3.Call prepareGarbageCollectionCommitsCall(String repository, GarbageCollectionPrepareRequest garbageCollectionPrepareRequest, final ApiCallback _callback) throws ApiException { + Object localVarPostBody = garbageCollectionPrepareRequest; // create path and map variables String localVarPath = "/repositories/{repository}/gc/prepare" @@ -92,7 +94,7 @@ public okhttp3.Call prepareGarbageCollectionCommitsCall(String repository, final } final String[] localVarContentTypes = { - + "application/json" }; final String localVarContentType = localVarApiClient.selectHeaderContentType(localVarContentTypes); localVarHeaderParams.put("Content-Type", localVarContentType); @@ -102,7 +104,7 @@ public okhttp3.Call prepareGarbageCollectionCommitsCall(String repository, final } @SuppressWarnings("rawtypes") - private okhttp3.Call prepareGarbageCollectionCommitsValidateBeforeCall(String repository, final ApiCallback _callback) throws ApiException { + private okhttp3.Call prepareGarbageCollectionCommitsValidateBeforeCall(String repository, GarbageCollectionPrepareRequest garbageCollectionPrepareRequest, final ApiCallback _callback) throws ApiException { // verify the required parameter 'repository' is set if (repository == null) { @@ -110,7 +112,7 @@ private okhttp3.Call prepareGarbageCollectionCommitsValidateBeforeCall(String re } - okhttp3.Call localVarCall = prepareGarbageCollectionCommitsCall(repository, _callback); + okhttp3.Call localVarCall = prepareGarbageCollectionCommitsCall(repository, garbageCollectionPrepareRequest, _callback); return localVarCall; } @@ -119,6 +121,7 @@ private okhttp3.Call prepareGarbageCollectionCommitsValidateBeforeCall(String re * save lists of active and expired commits for garbage collection * * @param repository (required) + * @param garbageCollectionPrepareRequest (optional) * @return GarbageCollectionCommits * @throws ApiException If fail to call the API, e.g. server error or cannot deserialize the response body * @http.response.details @@ -130,8 +133,8 @@ private okhttp3.Call prepareGarbageCollectionCommitsValidateBeforeCall(String re 0 Internal Server Error - */ - public GarbageCollectionCommits prepareGarbageCollectionCommits(String repository) throws ApiException { - ApiResponse localVarResp = prepareGarbageCollectionCommitsWithHttpInfo(repository); + public GarbageCollectionCommits prepareGarbageCollectionCommits(String repository, GarbageCollectionPrepareRequest garbageCollectionPrepareRequest) throws ApiException { + ApiResponse localVarResp = prepareGarbageCollectionCommitsWithHttpInfo(repository, garbageCollectionPrepareRequest); return localVarResp.getData(); } @@ -139,6 +142,7 @@ public GarbageCollectionCommits prepareGarbageCollectionCommits(String repositor * save lists of active and expired commits for garbage collection * * @param repository (required) + * @param garbageCollectionPrepareRequest (optional) * @return ApiResponse<GarbageCollectionCommits> * @throws ApiException If fail to call the API, e.g. server error or cannot deserialize the response body * @http.response.details @@ -150,8 +154,8 @@ public GarbageCollectionCommits prepareGarbageCollectionCommits(String repositor 0 Internal Server Error - */ - public ApiResponse prepareGarbageCollectionCommitsWithHttpInfo(String repository) throws ApiException { - okhttp3.Call localVarCall = prepareGarbageCollectionCommitsValidateBeforeCall(repository, null); + public ApiResponse prepareGarbageCollectionCommitsWithHttpInfo(String repository, GarbageCollectionPrepareRequest garbageCollectionPrepareRequest) throws ApiException { + okhttp3.Call localVarCall = prepareGarbageCollectionCommitsValidateBeforeCall(repository, garbageCollectionPrepareRequest, null); Type localVarReturnType = new TypeToken(){}.getType(); return localVarApiClient.execute(localVarCall, localVarReturnType); } @@ -160,6 +164,7 @@ public ApiResponse prepareGarbageCollectionCommitsWith * save lists of active and expired commits for garbage collection (asynchronously) * * @param repository (required) + * @param garbageCollectionPrepareRequest (optional) * @param _callback The callback to be executed when the API call finishes * @return The request call * @throws ApiException If fail to process the API call, e.g. serializing the request body object @@ -172,9 +177,9 @@ public ApiResponse prepareGarbageCollectionCommitsWith 0 Internal Server Error - */ - public okhttp3.Call prepareGarbageCollectionCommitsAsync(String repository, final ApiCallback _callback) throws ApiException { + public okhttp3.Call prepareGarbageCollectionCommitsAsync(String repository, GarbageCollectionPrepareRequest garbageCollectionPrepareRequest, final ApiCallback _callback) throws ApiException { - okhttp3.Call localVarCall = prepareGarbageCollectionCommitsValidateBeforeCall(repository, _callback); + okhttp3.Call localVarCall = prepareGarbageCollectionCommitsValidateBeforeCall(repository, garbageCollectionPrepareRequest, _callback); Type localVarReturnType = new TypeToken(){}.getType(); localVarApiClient.executeAsync(localVarCall, localVarReturnType, _callback); return localVarCall; diff --git a/clients/python/.openapi-generator/FILES b/clients/python/.openapi-generator/FILES index 209c48e3170..714611f4f63 100644 --- a/clients/python/.openapi-generator/FILES +++ b/clients/python/.openapi-generator/FILES @@ -22,6 +22,7 @@ docs/Diff.md docs/DiffList.md docs/Error.md docs/GarbageCollectionCommits.md +docs/GarbageCollectionPrepareRequest.md docs/Group.md docs/GroupCreation.md docs/GroupList.md @@ -101,6 +102,7 @@ lakefs_client/model/diff.py lakefs_client/model/diff_list.py lakefs_client/model/error.py lakefs_client/model/garbage_collection_commits.py +lakefs_client/model/garbage_collection_prepare_request.py lakefs_client/model/group.py lakefs_client/model/group_creation.py lakefs_client/model/group_list.py diff --git a/clients/python/README.md b/clients/python/README.md index 390a7e4de17..c2f166f32c2 100644 --- a/clients/python/README.md +++ b/clients/python/README.md @@ -198,6 +198,7 @@ Class | Method | HTTP request | Description - [DiffList](docs/DiffList.md) - [Error](docs/Error.md) - [GarbageCollectionCommits](docs/GarbageCollectionCommits.md) + - [GarbageCollectionPrepareRequest](docs/GarbageCollectionPrepareRequest.md) - [Group](docs/Group.md) - [GroupCreation](docs/GroupCreation.md) - [GroupList](docs/GroupList.md) diff --git a/clients/python/docs/RetentionApi.md b/clients/python/docs/RetentionApi.md index 838009d12fd..fbf928de618 100644 --- a/clients/python/docs/RetentionApi.md +++ b/clients/python/docs/RetentionApi.md @@ -23,6 +23,7 @@ import lakefs_client from lakefs_client.api import retention_api from lakefs_client.model.garbage_collection_commits import GarbageCollectionCommits from lakefs_client.model.error import Error +from lakefs_client.model.garbage_collection_prepare_request import GarbageCollectionPrepareRequest from pprint import pprint # Defining the host is optional and defaults to http://localhost/api/v1 # See configuration.py for a list of all supported configuration parameters. @@ -57,6 +58,9 @@ with lakefs_client.ApiClient(configuration) as api_client: # Create an instance of the API class api_instance = retention_api.RetentionApi(api_client) repository = "repository_example" # str | + garbage_collection_prepare_request = GarbageCollectionPrepareRequest( + previous_result_path="previous_result_path_example", + ) # GarbageCollectionPrepareRequest | (optional) # example passing only required values which don't have defaults set try: @@ -65,6 +69,15 @@ with lakefs_client.ApiClient(configuration) as api_client: pprint(api_response) except lakefs_client.ApiException as e: print("Exception when calling RetentionApi->prepare_garbage_collection_commits: %s\n" % e) + + # example passing only required values which don't have defaults set + # and optional values + try: + # save lists of active and expired commits for garbage collection + api_response = api_instance.prepare_garbage_collection_commits(repository, garbage_collection_prepare_request=garbage_collection_prepare_request) + pprint(api_response) + except lakefs_client.ApiException as e: + print("Exception when calling RetentionApi->prepare_garbage_collection_commits: %s\n" % e) ``` @@ -73,6 +86,7 @@ with lakefs_client.ApiClient(configuration) as api_client: Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- **repository** | **str**| | + **garbage_collection_prepare_request** | [**GarbageCollectionPrepareRequest**](GarbageCollectionPrepareRequest.md)| | [optional] ### Return type @@ -84,7 +98,7 @@ Name | Type | Description | Notes ### HTTP request headers - - **Content-Type**: Not defined + - **Content-Type**: application/json - **Accept**: application/json diff --git a/clients/python/lakefs_client/api/retention_api.py b/clients/python/lakefs_client/api/retention_api.py index 19d53fe6392..46ee2879933 100644 --- a/clients/python/lakefs_client/api/retention_api.py +++ b/clients/python/lakefs_client/api/retention_api.py @@ -24,6 +24,7 @@ ) from lakefs_client.model.error import Error from lakefs_client.model.garbage_collection_commits import GarbageCollectionCommits +from lakefs_client.model.garbage_collection_prepare_request import GarbageCollectionPrepareRequest class RetentionApi(object): @@ -55,6 +56,7 @@ def __prepare_garbage_collection_commits( repository (str): Keyword Args: + garbage_collection_prepare_request (GarbageCollectionPrepareRequest): [optional] _return_http_data_only (bool): response data without head status code and headers. Default is True. _preload_content (bool): if False, the urllib3.HTTPResponse object @@ -119,6 +121,7 @@ def __prepare_garbage_collection_commits( params_map={ 'all': [ 'repository', + 'garbage_collection_prepare_request', ], 'required': [ 'repository', @@ -138,12 +141,15 @@ def __prepare_garbage_collection_commits( 'openapi_types': { 'repository': (str,), + 'garbage_collection_prepare_request': + (GarbageCollectionPrepareRequest,), }, 'attribute_map': { 'repository': 'repository', }, 'location_map': { 'repository': 'path', + 'garbage_collection_prepare_request': 'body', }, 'collection_format_map': { } @@ -152,7 +158,9 @@ def __prepare_garbage_collection_commits( 'accept': [ 'application/json' ], - 'content_type': [], + 'content_type': [ + 'application/json' + ] }, api_client=api_client, callable=__prepare_garbage_collection_commits diff --git a/clients/python/lakefs_client/models/__init__.py b/clients/python/lakefs_client/models/__init__.py index a96c699dccb..b414c34ad80 100644 --- a/clients/python/lakefs_client/models/__init__.py +++ b/clients/python/lakefs_client/models/__init__.py @@ -25,6 +25,7 @@ from lakefs_client.model.diff_list import DiffList from lakefs_client.model.error import Error from lakefs_client.model.garbage_collection_commits import GarbageCollectionCommits +from lakefs_client.model.garbage_collection_prepare_request import GarbageCollectionPrepareRequest from lakefs_client.model.group import Group from lakefs_client.model.group_creation import GroupCreation from lakefs_client.model.group_list import GroupList diff --git a/pkg/api/controller.go b/pkg/api/controller.go index 7e8a1b4b3af..784f1d9ecfe 100644 --- a/pkg/api/controller.go +++ b/pkg/api/controller.go @@ -1914,7 +1914,7 @@ func (c *Controller) GetCommit(w http.ResponseWriter, r *http.Request, repositor writeResponse(w, http.StatusOK, response) } -func (c *Controller) PrepareGarbageCollectionCommits(w http.ResponseWriter, r *http.Request, repository string) { +func (c *Controller) PrepareGarbageCollectionCommits(w http.ResponseWriter, r *http.Request, body PrepareGarbageCollectionCommitsJSONRequestBody, repository string) { if !c.authorize(w, r, []permissions.Permission{ { Action: permissions.ListObjectsAction, @@ -1923,7 +1923,16 @@ func (c *Controller) PrepareGarbageCollectionCommits(w http.ResponseWriter, r *h }) { return } - pth := "" + ctx := r.Context() + c.LogAction(ctx, "prepare_garbage_collection_commits") + previousResultPath := "" + if body.PreviousResultPath != nil { + previousResultPath = *body.PreviousResultPath + } + pth, err := c.Catalog.PrepareExpiredCommits(ctx, repository, previousResultPath) + if handleAPIError(w, err) { + return + } writeResponse(w, http.StatusCreated, GarbageCollectionCommits{Path: swag.String(pth)}) } diff --git a/pkg/catalog/catalog.go b/pkg/catalog/catalog.go index 28d9f47b108..2e025f45706 100644 --- a/pkg/catalog/catalog.go +++ b/pkg/catalog/catalog.go @@ -1191,16 +1191,16 @@ func (c *Catalog) PrepareExpiredCommits(ctx context.Context, repository string, if err != nil { return "", err } - previousRunReader, err := c.BlockAdapter.Get(ctx, block.ObjectPointer{ - StorageNamespace: string(repo.StorageNamespace), - Identifier: previousResultPath, - IdentifierType: block.IdentifierTypeRelative, - }, -1) - if err != nil { - return "", err - } previouslyExpiredCommits := make([]graveler.CommitID, 0) if previousResultPath != "" { + previousRunReader, err := c.BlockAdapter.Get(ctx, block.ObjectPointer{ + StorageNamespace: string(repo.StorageNamespace), + Identifier: previousResultPath, + IdentifierType: block.IdentifierTypeRelative, + }, -1) + if err != nil { + return "", err + } csvReader := csv.NewReader(previousRunReader) previousCommits, err := csvReader.ReadAll() if err != nil { diff --git a/pkg/catalog/catalog_test.go b/pkg/catalog/catalog_test.go index cf9cbf7acf8..8ab9596d4ab 100644 --- a/pkg/catalog/catalog_test.go +++ b/pkg/catalog/catalog_test.go @@ -5,10 +5,9 @@ import ( "testing" "time" - "github.com/treeverse/lakefs/pkg/graveler/testutil" - "github.com/go-test/deep" "github.com/treeverse/lakefs/pkg/graveler" + "github.com/treeverse/lakefs/pkg/graveler/testutil" "google.golang.org/protobuf/types/known/timestamppb" ) diff --git a/pkg/catalog/fake_graveler_test.go b/pkg/catalog/fake_graveler_test.go index 72714a71e4a..9a726e6e1b4 100644 --- a/pkg/catalog/fake_graveler_test.go +++ b/pkg/catalog/fake_graveler_test.go @@ -19,6 +19,10 @@ type FakeGraveler struct { hooks graveler.HooksHandler } +func (g *FakeGraveler) GetExpiredCommits(ctx context.Context, repositoryID graveler.RepositoryID, previouslyExpiredCommits []graveler.CommitID) (expired []graveler.CommitID, active []graveler.CommitID, err error) { + panic("implement me") +} + func (g *FakeGraveler) CreateBareRepository(ctx context.Context, repositoryID graveler.RepositoryID, storageNamespace graveler.StorageNamespace, branchID graveler.BranchID) (*graveler.Repository, error) { panic("implement me") } diff --git a/pkg/graveler/graveler_test.go b/pkg/graveler/graveler_test.go index 9e8baf389d2..04d1f4333cc 100644 --- a/pkg/graveler/graveler_test.go +++ b/pkg/graveler/graveler_test.go @@ -77,7 +77,7 @@ func TestGraveler_List(t *testing.T) { name: "one committed one staged no paths", r: graveler.NewGraveler(branchLocker, &testutil.CommittedFake{ValueIterator: testutil.NewValueIteratorFake([]graveler.ValueRecord{{Key: graveler.Key("foo"), Value: &graveler.Value{}}})}, &testutil.StagingFake{ValueIterator: testutil.NewValueIteratorFake([]graveler.ValueRecord{{Key: graveler.Key("bar"), Value: &graveler.Value{}}})}, - &testutil.RefsFake{RefType: graveler.ReferenceTypeBranch, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, + &testutil.RefsFake{RefType: graveler.ReferenceTypeBranch, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, nil, ), expected: []*graveler.ValueRecord{{Key: graveler.Key("bar"), Value: &graveler.Value{}}, {Key: graveler.Key("foo"), Value: &graveler.Value{}}}, }, @@ -85,7 +85,7 @@ func TestGraveler_List(t *testing.T) { name: "same path different file", r: graveler.NewGraveler(branchLocker, &testutil.CommittedFake{ValueIterator: testutil.NewValueIteratorFake([]graveler.ValueRecord{{Key: graveler.Key("foo"), Value: &graveler.Value{Identity: []byte("original")}}})}, &testutil.StagingFake{ValueIterator: testutil.NewValueIteratorFake([]graveler.ValueRecord{{Key: graveler.Key("foo"), Value: &graveler.Value{Identity: []byte("other")}}})}, - &testutil.RefsFake{RefType: graveler.ReferenceTypeBranch, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, + &testutil.RefsFake{RefType: graveler.ReferenceTypeBranch, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, nil, ), expected: []*graveler.ValueRecord{{Key: graveler.Key("foo"), Value: &graveler.Value{Identity: []byte("other")}}}, }, @@ -93,7 +93,7 @@ func TestGraveler_List(t *testing.T) { name: "one committed one staged no paths - with prefix", r: graveler.NewGraveler(branchLocker, &testutil.CommittedFake{ValueIterator: testutil.NewValueIteratorFake([]graveler.ValueRecord{{Key: graveler.Key("prefix/foo"), Value: &graveler.Value{}}})}, &testutil.StagingFake{ValueIterator: testutil.NewValueIteratorFake([]graveler.ValueRecord{{Key: graveler.Key("prefix/bar"), Value: &graveler.Value{}}})}, - &testutil.RefsFake{RefType: graveler.ReferenceTypeBranch, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, + &testutil.RefsFake{RefType: graveler.ReferenceTypeBranch, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, nil, ), expected: []*graveler.ValueRecord{{Key: graveler.Key("prefix/bar"), Value: &graveler.Value{}}, {Key: graveler.Key("prefix/foo"), Value: &graveler.Value{}}}, }, @@ -133,54 +133,54 @@ func TestGraveler_Get(t *testing.T) { { name: "commit - exists", r: graveler.NewGraveler(branchLocker, &testutil.CommittedFake{ValuesByKey: map[string]*graveler.Value{"key": {Identity: []byte("committed")}}}, nil, - &testutil.RefsFake{RefType: graveler.ReferenceTypeCommit, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, + &testutil.RefsFake{RefType: graveler.ReferenceTypeCommit, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, nil, ), expectedValueResult: graveler.Value{Identity: []byte("committed")}, }, { name: "commit - not found", r: graveler.NewGraveler(branchLocker, &testutil.CommittedFake{Err: graveler.ErrNotFound}, nil, - &testutil.RefsFake{RefType: graveler.ReferenceTypeCommit, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, + &testutil.RefsFake{RefType: graveler.ReferenceTypeCommit, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, nil, ), expectedErr: graveler.ErrNotFound, }, { name: "commit - error", r: graveler.NewGraveler(branchLocker, &testutil.CommittedFake{Err: errTest}, nil, - &testutil.RefsFake{RefType: graveler.ReferenceTypeCommit, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, + &testutil.RefsFake{RefType: graveler.ReferenceTypeCommit, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, nil, ), expectedErr: errTest, }, { name: "branch - only staged", r: graveler.NewGraveler(branchLocker, &testutil.CommittedFake{Err: graveler.ErrNotFound}, &testutil.StagingFake{Value: &graveler.Value{Identity: []byte("staged")}}, - &testutil.RefsFake{RefType: graveler.ReferenceTypeBranch, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, + &testutil.RefsFake{RefType: graveler.ReferenceTypeBranch, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, nil, ), expectedValueResult: graveler.Value{Identity: []byte("staged")}, }, { name: "branch - committed and staged", r: graveler.NewGraveler(branchLocker, &testutil.CommittedFake{ValuesByKey: map[string]*graveler.Value{"key": {Identity: []byte("committed")}}}, &testutil.StagingFake{Value: &graveler.Value{Identity: []byte("staged")}}, - &testutil.RefsFake{RefType: graveler.ReferenceTypeBranch, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, + &testutil.RefsFake{RefType: graveler.ReferenceTypeBranch, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, nil, ), expectedValueResult: graveler.Value{Identity: []byte("staged")}, }, { name: "branch - only committed", r: graveler.NewGraveler(branchLocker, &testutil.CommittedFake{ValuesByKey: map[string]*graveler.Value{"key": {Identity: []byte("committed")}}}, &testutil.StagingFake{Err: graveler.ErrNotFound}, - &testutil.RefsFake{RefType: graveler.ReferenceTypeBranch, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, + &testutil.RefsFake{RefType: graveler.ReferenceTypeBranch, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, nil, ), expectedValueResult: graveler.Value{Identity: []byte("committed")}, }, { name: "branch - tombstone", r: graveler.NewGraveler(branchLocker, &testutil.CommittedFake{ValuesByKey: map[string]*graveler.Value{"key": {Identity: []byte("committed")}}}, &testutil.StagingFake{Value: nil}, - &testutil.RefsFake{RefType: graveler.ReferenceTypeBranch, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, + &testutil.RefsFake{RefType: graveler.ReferenceTypeBranch, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, nil, ), expectedErr: graveler.ErrNotFound, }, { name: "branch - staged return error", r: graveler.NewGraveler(branchLocker, &testutil.CommittedFake{}, &testutil.StagingFake{Err: errTest}, - &testutil.RefsFake{RefType: graveler.ReferenceTypeBranch, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, + &testutil.RefsFake{RefType: graveler.ReferenceTypeBranch, Commits: map[graveler.CommitID]*graveler.Commit{"": {}}}, nil, ), expectedErr: errTest, }, @@ -216,7 +216,7 @@ func TestGraveler_DiffUncommitted(t *testing.T) { name: "no changes", r: graveler.NewGraveler(branchLocker, &testutil.CommittedFake{ValueIterator: testutil.NewValueIteratorFake([]graveler.ValueRecord{{Key: graveler.Key("foo/one"), Value: &graveler.Value{}}})}, &testutil.StagingFake{ValueIterator: testutil.NewValueIteratorFake([]graveler.ValueRecord{})}, - &testutil.RefsFake{Branch: &graveler.Branch{CommitID: "c1"}, Commits: map[graveler.CommitID]*graveler.Commit{"c1": {MetaRangeID: "mri1"}}}, + &testutil.RefsFake{Branch: &graveler.Branch{CommitID: "c1"}, Commits: map[graveler.CommitID]*graveler.Commit{"c1": {MetaRangeID: "mri1"}}}, nil, ), amount: 10, expectedDiff: testutil.NewDiffIter([]graveler.Diff{}), @@ -225,7 +225,7 @@ func TestGraveler_DiffUncommitted(t *testing.T) { name: "added one", r: graveler.NewGraveler(branchLocker, &testutil.CommittedFake{ValueIterator: testutil.NewValueIteratorFake([]graveler.ValueRecord{})}, &testutil.StagingFake{ValueIterator: testutil.NewValueIteratorFake([]graveler.ValueRecord{{Key: graveler.Key("foo/one"), Value: &graveler.Value{}}})}, - &testutil.RefsFake{Branch: &graveler.Branch{CommitID: "c1"}, Commits: map[graveler.CommitID]*graveler.Commit{"c1": {MetaRangeID: "mri1"}}}, + &testutil.RefsFake{Branch: &graveler.Branch{CommitID: "c1"}, Commits: map[graveler.CommitID]*graveler.Commit{"c1": {MetaRangeID: "mri1"}}}, nil, ), amount: 10, expectedDiff: testutil.NewDiffIter([]graveler.Diff{{ @@ -238,7 +238,7 @@ func TestGraveler_DiffUncommitted(t *testing.T) { name: "changed one", r: graveler.NewGraveler(branchLocker, &testutil.CommittedFake{ValueIterator: testutil.NewValueIteratorFake([]graveler.ValueRecord{{Key: graveler.Key("foo/one"), Value: &graveler.Value{Identity: []byte("one")}}}), ValuesByKey: map[string]*graveler.Value{"foo/one": {Identity: []byte("one")}}}, &testutil.StagingFake{ValueIterator: testutil.NewValueIteratorFake([]graveler.ValueRecord{{Key: graveler.Key("foo/one"), Value: &graveler.Value{Identity: []byte("one_changed")}}})}, - &testutil.RefsFake{Branch: &graveler.Branch{CommitID: "c1"}, Commits: map[graveler.CommitID]*graveler.Commit{"c1": {MetaRangeID: "mri1"}}}, + &testutil.RefsFake{Branch: &graveler.Branch{CommitID: "c1"}, Commits: map[graveler.CommitID]*graveler.Commit{"c1": {MetaRangeID: "mri1"}}}, nil, ), amount: 10, expectedDiff: testutil.NewDiffIter([]graveler.Diff{{ @@ -251,7 +251,7 @@ func TestGraveler_DiffUncommitted(t *testing.T) { name: "removed one", r: graveler.NewGraveler(branchLocker, &testutil.CommittedFake{ValueIterator: testutil.NewValueIteratorFake([]graveler.ValueRecord{{Key: graveler.Key("foo/one"), Value: &graveler.Value{}}})}, &testutil.StagingFake{ValueIterator: testutil.NewValueIteratorFake([]graveler.ValueRecord{{Key: graveler.Key("foo/one"), Value: nil}})}, - &testutil.RefsFake{Branch: &graveler.Branch{CommitID: "c1"}, Commits: map[graveler.CommitID]*graveler.Commit{"c1": {MetaRangeID: "mri1"}}}, + &testutil.RefsFake{Branch: &graveler.Branch{CommitID: "c1"}, Commits: map[graveler.CommitID]*graveler.Commit{"c1": {MetaRangeID: "mri1"}}}, nil, ), amount: 10, expectedDiff: testutil.NewDiffIter([]graveler.Diff{{ @@ -296,7 +296,7 @@ func TestGraveler_CreateBranch(t *testing.T) { &testutil.RefsFake{ Err: graveler.ErrNotFound, CommitID: "8888888798e3aeface8e62d1c7072a965314b4", - }, + }, nil, ) _, err := gravel.CreateBranch(context.Background(), "", "", "") if err != nil { @@ -307,7 +307,7 @@ func TestGraveler_CreateBranch(t *testing.T) { nil, &testutil.RefsFake{ Branch: &graveler.Branch{}, - }, + }, nil, ) _, err = gravel.CreateBranch(context.Background(), "", "", "") if !errors.Is(err, graveler.ErrBranchExists) { @@ -320,7 +320,7 @@ func TestGraveler_UpdateBranch(t *testing.T) { branchLocker := ref.NewBranchLocker(conn) gravel := graveler.NewGraveler(branchLocker, nil, &testutil.StagingFake{ValueIterator: testutil.NewValueIteratorFake([]graveler.ValueRecord{{Key: graveler.Key("foo/one"), Value: &graveler.Value{}}})}, - &testutil.RefsFake{Branch: &graveler.Branch{}}, + &testutil.RefsFake{Branch: &graveler.Branch{}}, nil, ) _, err := gravel.UpdateBranch(context.Background(), "", "", "") if !errors.Is(err, graveler.ErrConflictFound) { @@ -328,7 +328,7 @@ func TestGraveler_UpdateBranch(t *testing.T) { } gravel = graveler.NewGraveler(branchLocker, nil, &testutil.StagingFake{ValueIterator: testutil.NewValueIteratorFake([]graveler.ValueRecord{})}, - &testutil.RefsFake{Branch: &graveler.Branch{}}, + &testutil.RefsFake{Branch: &graveler.Branch{}}, nil, ) _, err = gravel.UpdateBranch(context.Background(), "", "", "") if err != nil { @@ -469,7 +469,7 @@ func TestGraveler_Commit(t *testing.T) { expectedCommitID := graveler.CommitID("expectedCommitId") expectedRangeID := graveler.MetaRangeID("expectedRangeID") values := testutil.NewValueIteratorFake([]graveler.ValueRecord{{Key: nil, Value: nil}}) - g := graveler.NewGraveler(branchLocker, tt.fields.CommittedManager, tt.fields.StagingManager, tt.fields.RefManager) + g := graveler.NewGraveler(branchLocker, tt.fields.CommittedManager, tt.fields.StagingManager, tt.fields.RefManager, nil) got, err := g.Commit(context.Background(), "", "", graveler.CommitParams{ Committer: tt.args.committer, @@ -554,7 +554,7 @@ func TestGraveler_PreCommitHook(t *testing.T) { t.Run(tt.name, func(t *testing.T) { // setup ctx := context.Background() - g := graveler.NewGraveler(branchLocker, committedManager, stagingManager, refManager) + g := graveler.NewGraveler(branchLocker, committedManager, stagingManager, refManager, nil) h := &Hooks{Err: tt.err} if tt.hook { g.SetHooksHandler(h) @@ -647,7 +647,7 @@ func TestGraveler_PreMergeHook(t *testing.T) { t.Run(tt.name, func(t *testing.T) { // setup ctx := context.Background() - g := graveler.NewGraveler(branchLocker, committedManager, stagingManager, refManager) + g := graveler.NewGraveler(branchLocker, committedManager, stagingManager, refManager, nil) h := &Hooks{Err: tt.err} if tt.hook { g.SetHooksHandler(h) @@ -821,7 +821,7 @@ func TestGraveler_AddCommitToBranchHead(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := graveler.NewGraveler(branchLocker, tt.fields.CommittedManager, tt.fields.StagingManager, tt.fields.RefManager) + g := graveler.NewGraveler(branchLocker, tt.fields.CommittedManager, tt.fields.StagingManager, tt.fields.RefManager, nil) got, err := g.AddCommitToBranchHead(context.Background(), expectedRepositoryID, expectedBranchID, graveler.Commit{ Committer: tt.args.committer, Message: tt.args.message, @@ -969,7 +969,7 @@ func TestGraveler_AddCommit(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := graveler.NewGraveler(branchLocker, tt.fields.CommittedManager, tt.fields.StagingManager, tt.fields.RefManager) + g := graveler.NewGraveler(branchLocker, tt.fields.CommittedManager, tt.fields.StagingManager, tt.fields.RefManager, nil) commit := graveler.Commit{ Committer: tt.args.committer, Message: tt.args.message, @@ -1149,7 +1149,7 @@ func TestGraveler_Delete(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() - g := graveler.NewGraveler(branchLocker, tt.fields.CommittedManager, tt.fields.StagingManager, tt.fields.RefManager) + g := graveler.NewGraveler(branchLocker, tt.fields.CommittedManager, tt.fields.StagingManager, tt.fields.RefManager, nil) if err := g.Delete(ctx, tt.args.repositoryID, tt.args.branchID, tt.args.key); !errors.Is(err, tt.expectedErr) { t.Errorf("Delete() returned unexpected error. got = %v, expected %v", err, tt.expectedErr) } diff --git a/pkg/graveler/testutil/fakes.go b/pkg/graveler/testutil/fakes.go index ed9289bc899..b6a73ebeb9b 100644 --- a/pkg/graveler/testutil/fakes.go +++ b/pkg/graveler/testutil/fakes.go @@ -204,6 +204,10 @@ type RefsFake struct { Commits map[graveler.CommitID]*graveler.Commit } +func (m *RefsFake) GetExpiredCommits(ctx context.Context, repositoryID graveler.RepositoryID, previouslyExpiredCommits []graveler.CommitID, rules *graveler.RetentionRules) (expired []graveler.CommitID, active []graveler.CommitID, err error) { + panic("implement me") +} + func (m *RefsFake) FillGenerations(ctx context.Context, repositoryID graveler.RepositoryID) error { panic("implement me") } From 0d1db93ee7595aa1e0df75705ff0f6fb5d243d5f Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Mon, 14 Jun 2021 17:47:46 +0300 Subject: [PATCH 13/24] fix sorting in test --- pkg/graveler/ref/expired_commits_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/graveler/ref/expired_commits_test.go b/pkg/graveler/ref/expired_commits_test.go index 1d28e5edcb2..0de373d126e 100644 --- a/pkg/graveler/ref/expired_commits_test.go +++ b/pkg/graveler/ref/expired_commits_test.go @@ -147,7 +147,6 @@ func TestExpiredCommits(t *testing.T) { t.Run(name, func(t *testing.T) { now := time.Now() branchRecords := make([]*graveler.BranchRecord, 0, len(tst.headsRetentionDays)) - expirationDates := make(map[string]time.Time) ctrl := gomock.NewController(t) refManagerMock := mock.NewMockRefManager(ctrl) ctx := context.Background() @@ -160,7 +159,8 @@ func TestExpiredCommits(t *testing.T) { retentionRules.BranchRetentionDays[graveler.BranchID(head)] = retentionDays } sort.Slice(branchRecords, func(i, j int) bool { - return expirationDates[string(branchRecords[i].CommitID)].Before(expirationDates[string(branchRecords[j].CommitID)]) + // start with the branch with the strictest retention rules + return retentionRules.BranchRetentionDays[branchRecords[i].BranchID] > retentionRules.BranchRetentionDays[branchRecords[j].BranchID] }) branchIterator := gtestutil.NewFakeBranchIterator(branchRecords) refManagerMock.EXPECT().ListBranches(ctx, graveler.RepositoryID("test")).Return(branchIterator, nil) @@ -170,7 +170,7 @@ func TestExpiredCommits(t *testing.T) { id := graveler.CommitID(commitID) commitMap[id] = &graveler.Commit{Message: commitID, Parents: testCommit.parents, CreationDate: now.AddDate(0, 0, -testCommit.daysPassed)} if !previouslyExpired[id] { - refManagerMock.EXPECT().GetCommit(ctx, graveler.RepositoryID("test"), id).Return(commitMap[id], nil) + refManagerMock.EXPECT().GetCommit(ctx, graveler.RepositoryID("test"), id).Return(commitMap[id], nil).Times(1) } } finder := ExpiredCommitsFinder{ From 8d1b96f45b98d66163b4bfa2558e26bfc551c981 Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Mon, 14 Jun 2021 17:54:20 +0300 Subject: [PATCH 14/24] gen files --- .../docs/GarbageCollectionPrepareRequest.md | 13 ++ .../GarbageCollectionPrepareRequest.java | 98 ++++++++++ .../GarbageCollectionPrepareRequestTest.java | 51 ++++++ .../docs/GarbageCollectionPrepareRequest.md | 11 ++ .../garbage_collection_prepare_request.py | 167 ++++++++++++++++++ ...test_garbage_collection_prepare_request.py | 36 ++++ 6 files changed, 376 insertions(+) create mode 100644 clients/java/docs/GarbageCollectionPrepareRequest.md create mode 100644 clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareRequest.java create mode 100644 clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionPrepareRequestTest.java create mode 100644 clients/python/docs/GarbageCollectionPrepareRequest.md create mode 100644 clients/python/lakefs_client/model/garbage_collection_prepare_request.py create mode 100644 clients/python/test/test_garbage_collection_prepare_request.py diff --git a/clients/java/docs/GarbageCollectionPrepareRequest.md b/clients/java/docs/GarbageCollectionPrepareRequest.md new file mode 100644 index 00000000000..b2e0cbd9ea7 --- /dev/null +++ b/clients/java/docs/GarbageCollectionPrepareRequest.md @@ -0,0 +1,13 @@ + + +# GarbageCollectionPrepareRequest + + +## Properties + +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**previousResultPath** | **String** | path to the result of a previous successful GC job | [optional] + + + diff --git a/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareRequest.java b/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareRequest.java new file mode 100644 index 00000000000..4dfe2e54455 --- /dev/null +++ b/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareRequest.java @@ -0,0 +1,98 @@ +/* + * lakeFS API + * lakeFS HTTP API + * + * The version of the OpenAPI document: 0.1.0 + * + * + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + + +package io.lakefs.clients.api.model; + +import java.util.Objects; +import java.util.Arrays; +import com.google.gson.TypeAdapter; +import com.google.gson.annotations.JsonAdapter; +import com.google.gson.annotations.SerializedName; +import com.google.gson.stream.JsonReader; +import com.google.gson.stream.JsonWriter; +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import java.io.IOException; + +/** + * GarbageCollectionPrepareRequest + */ +@javax.annotation.Generated(value = "org.openapitools.codegen.languages.JavaClientCodegen") +public class GarbageCollectionPrepareRequest { + public static final String SERIALIZED_NAME_PREVIOUS_RESULT_PATH = "previous_result_path"; + @SerializedName(SERIALIZED_NAME_PREVIOUS_RESULT_PATH) + private String previousResultPath; + + + public GarbageCollectionPrepareRequest previousResultPath(String previousResultPath) { + + this.previousResultPath = previousResultPath; + return this; + } + + /** + * path to the result of a previous successful GC job + * @return previousResultPath + **/ + @javax.annotation.Nullable + @ApiModelProperty(value = "path to the result of a previous successful GC job") + + public String getPreviousResultPath() { + return previousResultPath; + } + + + public void setPreviousResultPath(String previousResultPath) { + this.previousResultPath = previousResultPath; + } + + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + GarbageCollectionPrepareRequest garbageCollectionPrepareRequest = (GarbageCollectionPrepareRequest) o; + return Objects.equals(this.previousResultPath, garbageCollectionPrepareRequest.previousResultPath); + } + + @Override + public int hashCode() { + return Objects.hash(previousResultPath); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("class GarbageCollectionPrepareRequest {\n"); + sb.append(" previousResultPath: ").append(toIndentedString(previousResultPath)).append("\n"); + sb.append("}"); + return sb.toString(); + } + + /** + * Convert the given object to string with each line indented by 4 spaces + * (except the first line). + */ + private String toIndentedString(Object o) { + if (o == null) { + return "null"; + } + return o.toString().replace("\n", "\n "); + } + +} + diff --git a/clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionPrepareRequestTest.java b/clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionPrepareRequestTest.java new file mode 100644 index 00000000000..eafe8384424 --- /dev/null +++ b/clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionPrepareRequestTest.java @@ -0,0 +1,51 @@ +/* + * lakeFS API + * lakeFS HTTP API + * + * The version of the OpenAPI document: 0.1.0 + * + * + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + + +package io.lakefs.clients.api.model; + +import com.google.gson.TypeAdapter; +import com.google.gson.annotations.JsonAdapter; +import com.google.gson.annotations.SerializedName; +import com.google.gson.stream.JsonReader; +import com.google.gson.stream.JsonWriter; +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import java.io.IOException; +import org.junit.Assert; +import org.junit.Ignore; +import org.junit.Test; + + +/** + * Model tests for GarbageCollectionPrepareRequest + */ +public class GarbageCollectionPrepareRequestTest { + private final GarbageCollectionPrepareRequest model = new GarbageCollectionPrepareRequest(); + + /** + * Model tests for GarbageCollectionPrepareRequest + */ + @Test + public void testGarbageCollectionPrepareRequest() { + // TODO: test GarbageCollectionPrepareRequest + } + + /** + * Test the property 'previousResultPath' + */ + @Test + public void previousResultPathTest() { + // TODO: test previousResultPath + } + +} diff --git a/clients/python/docs/GarbageCollectionPrepareRequest.md b/clients/python/docs/GarbageCollectionPrepareRequest.md new file mode 100644 index 00000000000..e1af9eace17 --- /dev/null +++ b/clients/python/docs/GarbageCollectionPrepareRequest.md @@ -0,0 +1,11 @@ +# GarbageCollectionPrepareRequest + + +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**previous_result_path** | **str** | path to the result of a previous successful GC job | [optional] + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/clients/python/lakefs_client/model/garbage_collection_prepare_request.py b/clients/python/lakefs_client/model/garbage_collection_prepare_request.py new file mode 100644 index 00000000000..a3a425c606b --- /dev/null +++ b/clients/python/lakefs_client/model/garbage_collection_prepare_request.py @@ -0,0 +1,167 @@ +""" + lakeFS API + + lakeFS HTTP API # noqa: E501 + + The version of the OpenAPI document: 0.1.0 + Contact: services@treeverse.io + Generated by: https://openapi-generator.tech +""" + + +import re # noqa: F401 +import sys # noqa: F401 + +from lakefs_client.model_utils import ( # noqa: F401 + ApiTypeError, + ModelComposed, + ModelNormal, + ModelSimple, + cached_property, + change_keys_js_to_python, + convert_js_args_to_python_args, + date, + datetime, + file_type, + none_type, + validate_get_composed_info, +) + + +class GarbageCollectionPrepareRequest(ModelNormal): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + + Attributes: + allowed_values (dict): The key is the tuple path to the attribute + and the for var_name this is (var_name,). The value is a dict + with a capitalized key describing the allowed value and an allowed + value. These dicts store the allowed enum values. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + discriminator_value_class_map (dict): A dict to go from the discriminator + variable value to the discriminator class name. + validations (dict): The key is the tuple path to the attribute + and the for var_name this is (var_name,). The value is a dict + that stores validations for max_length, min_length, max_items, + min_items, exclusive_maximum, inclusive_maximum, exclusive_minimum, + inclusive_minimum, and regex. + additional_properties_type (tuple): A tuple of classes accepted + as additional properties values. + """ + + allowed_values = { + } + + validations = { + } + + additional_properties_type = None + + _nullable = False + + @cached_property + def openapi_types(): + """ + This must be a method because a model may have properties that are + of type self, this must run after the class is loaded + + Returns + openapi_types (dict): The key is attribute name + and the value is attribute type. + """ + return { + 'previous_result_path': (str,), # noqa: E501 + } + + @cached_property + def discriminator(): + return None + + + attribute_map = { + 'previous_result_path': 'previous_result_path', # noqa: E501 + } + + _composed_schemas = {} + + required_properties = set([ + '_data_store', + '_check_type', + '_spec_property_naming', + '_path_to_item', + '_configuration', + '_visited_composed_classes', + ]) + + @convert_js_args_to_python_args + def __init__(self, *args, **kwargs): # noqa: E501 + """GarbageCollectionPrepareRequest - a model defined in OpenAPI + + Keyword Args: + _check_type (bool): if True, values for parameters in openapi_types + will be type checked and a TypeError will be + raised if the wrong type is input. + Defaults to True + _path_to_item (tuple/list): This is a list of keys or values to + drill down to the model in received_data + when deserializing a response + _spec_property_naming (bool): True if the variable names in the input data + are serialized names, as specified in the OpenAPI document. + False if the variable names in the input data + are pythonic names, e.g. snake case (default) + _configuration (Configuration): the instance to use when + deserializing a file_type parameter. + If passed, type conversion is attempted + If omitted no type conversion is done. + _visited_composed_classes (tuple): This stores a tuple of + classes that we have traveled through so that + if we see that class again we will not use its + discriminator again. + When traveling through a discriminator, the + composed schema that is + is traveled through is added to this set. + For example if Animal has a discriminator + petType and we pass in "Dog", and the class Dog + allOf includes Animal, we move through Animal + once using the discriminator, and pick Dog. + Then in Dog, we will make an instance of the + Animal class but this time we won't travel + through its discriminator because we passed in + _visited_composed_classes = (Animal,) + previous_result_path (str): path to the result of a previous successful GC job. [optional] # noqa: E501 + """ + + _check_type = kwargs.pop('_check_type', True) + _spec_property_naming = kwargs.pop('_spec_property_naming', False) + _path_to_item = kwargs.pop('_path_to_item', ()) + _configuration = kwargs.pop('_configuration', None) + _visited_composed_classes = kwargs.pop('_visited_composed_classes', ()) + + if args: + raise ApiTypeError( + "Invalid positional arguments=%s passed to %s. Remove those invalid positional arguments." % ( + args, + self.__class__.__name__, + ), + path_to_item=_path_to_item, + valid_classes=(self.__class__,), + ) + + self._data_store = {} + self._check_type = _check_type + self._spec_property_naming = _spec_property_naming + self._path_to_item = _path_to_item + self._configuration = _configuration + self._visited_composed_classes = _visited_composed_classes + (self.__class__,) + + for var_name, var_value in kwargs.items(): + if var_name not in self.attribute_map and \ + self._configuration is not None and \ + self._configuration.discard_unknown_keys and \ + self.additional_properties_type is None: + # discard variable. + continue + setattr(self, var_name, var_value) diff --git a/clients/python/test/test_garbage_collection_prepare_request.py b/clients/python/test/test_garbage_collection_prepare_request.py new file mode 100644 index 00000000000..f8d6443e44b --- /dev/null +++ b/clients/python/test/test_garbage_collection_prepare_request.py @@ -0,0 +1,36 @@ +""" + lakeFS API + + lakeFS HTTP API # noqa: E501 + + The version of the OpenAPI document: 0.1.0 + Contact: services@treeverse.io + Generated by: https://openapi-generator.tech +""" + + +import sys +import unittest + +import lakefs_client +from lakefs_client.model.garbage_collection_prepare_request import GarbageCollectionPrepareRequest + + +class TestGarbageCollectionPrepareRequest(unittest.TestCase): + """GarbageCollectionPrepareRequest unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def testGarbageCollectionPrepareRequest(self): + """Test GarbageCollectionPrepareRequest""" + # FIXME: construct object with mandatory attributes with example values + # model = GarbageCollectionPrepareRequest() # noqa: E501 + pass + + +if __name__ == '__main__': + unittest.main() From 88edd93079c11ece5f3cd28de3f52984bb013357 Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Tue, 15 Jun 2021 15:44:59 +0300 Subject: [PATCH 15/24] rules api --- api/swagger.yml | 64 ++++++++++++++++++++ pkg/api/controller.go | 46 ++++++++++++++ pkg/catalog/catalog.go | 9 +++ pkg/catalog/interface.go | 5 +- pkg/ddl/000029_retention_auth.down.sql | 15 +++++ pkg/ddl/000029_retention_auth.up.sql | 11 ++++ pkg/graveler/graveler.go | 24 +++++++- pkg/graveler/retention/garbage_collection.go | 6 +- pkg/permissions/actions.go | 3 + 9 files changed, 176 insertions(+), 7 deletions(-) create mode 100644 pkg/ddl/000029_retention_auth.down.sql create mode 100644 pkg/ddl/000029_retention_auth.up.sql diff --git a/api/swagger.yml b/api/swagger.yml index 60f9600311d..9bd04840bb9 100644 --- a/api/swagger.yml +++ b/api/swagger.yml @@ -823,6 +823,7 @@ components: previous_result_path: type: string description: path to the result of a previous successful GC job + GarbageCollectionCommits: type: object properties: @@ -830,6 +831,30 @@ components: type: string description: path to a dataset of commits + GarbageCollectionRule: + type: object + properties: + branch_id: + type: string + retention_days: + type: integer + required: + - branch_id + - retention_days + + GarbageCollectionRules: + type: object + properties: + default_retention_days: + type: integer + branches: + type: array + items: + $ref: '#/components/schemas/GarbageCollectionRule' + required: + - default_retention_days + - branches + paths: /setup_lakefs: post: @@ -2842,6 +2867,45 @@ paths: $ref: "#/components/responses/NotFound" default: $ref: "#/components/responses/ServerError" + /repositories/{repository}/gc/rules: + parameters: + - in: path + name: repository + required: true + schema: + type: string + get: + tags: + - retention + operationId: getGarbageCollectionRules + responses: + 200: + description: gc rule list + content: + application/json: + schema: + $ref: "#/components/schemas/GarbageCollectionRules" + 401: + $ref: "#/components/responses/Unauthorized" + default: + $ref: "#/components/responses/ServerError" + post: + tags: + - retention + operationId: set garbage collection rules + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/GarbageCollectionRules" + responses: + 204: + description: set garbage collection rules successfully + default: + $ref: "#/components/responses/ServerError" + + /repositories/{repository}/gc/prepare: parameters: - in: path diff --git a/pkg/api/controller.go b/pkg/api/controller.go index 784f1d9ecfe..606c4435be0 100644 --- a/pkg/api/controller.go +++ b/pkg/api/controller.go @@ -1914,6 +1914,52 @@ func (c *Controller) GetCommit(w http.ResponseWriter, r *http.Request, repositor writeResponse(w, http.StatusOK, response) } +func (c *Controller) GetGarbageCollectionRules(w http.ResponseWriter, r *http.Request, repository string) { + if !c.authorize(w, r, []permissions.Permission{ + { + Action: permissions.GetGarbageCollectionRules, + Resource: permissions.RepoArn(repository), + }, + }) { + return + } + ctx := r.Context() + rules, err := c.Catalog.GetRetentionRules(ctx, repository) + if handleAPIError(w, err) { + return + } + resp := GarbageCollectionRules{} + resp.DefaultRetentionDays = rules.DefaultRetentionDays + for branchID, retentionDays := range rules.BranchRetentionDays { + resp.Branches = append(resp.Branches, GarbageCollectionRule{BranchId: branchID.String(), RetentionDays: retentionDays}) + } + writeResponse(w, http.StatusOK, resp) +} + +func (c *Controller) SetGarbageCollectionRules(w http.ResponseWriter, r *http.Request, body SetGarbageCollectionRulesJSONRequestBody, repository string) { + if !c.authorize(w, r, []permissions.Permission{ + { + Action: permissions.GetGarbageCollectionRules, + Resource: permissions.RepoArn(repository), + }, + }) { + return + } + ctx := r.Context() + rules := &graveler.RetentionRules{ + DefaultRetentionDays: body.DefaultRetentionDays, + BranchRetentionDays: make(map[graveler.BranchID]int), + } + for _, rule := range body.Branches { + rules.BranchRetentionDays[graveler.BranchID(rule.BranchId)] = rule.RetentionDays + } + err := c.Catalog.SetRetentionRules(ctx, repository, rules) + if handleAPIError(w, err) { + return + } + writeResponse(w, http.StatusNoContent, nil) +} + func (c *Controller) PrepareGarbageCollectionCommits(w http.ResponseWriter, r *http.Request, body PrepareGarbageCollectionCommitsJSONRequestBody, repository string) { if !c.authorize(w, r, []permissions.Permission{ { diff --git a/pkg/catalog/catalog.go b/pkg/catalog/catalog.go index 2e025f45706..75c0044326b 100644 --- a/pkg/catalog/catalog.go +++ b/pkg/catalog/catalog.go @@ -1180,6 +1180,14 @@ func (c *Catalog) GetRange(ctx context.Context, repositoryID, rangeID string) (g return c.Store.GetRange(ctx, graveler.RepositoryID(repositoryID), graveler.RangeID(rangeID)) } +func (c *Catalog) GetRetentionRules(ctx context.Context, repositoryID string) (*graveler.RetentionRules, error) { + return c.Store.GetRetentionRules(ctx, graveler.RepositoryID(repositoryID)) +} + +func (c *Catalog) SetRetentionRules(ctx context.Context, repositoryID string, rules *graveler.RetentionRules) error { + return c.Store.SetRetentionRules(ctx, graveler.RepositoryID(repositoryID), rules) +} + func (c *Catalog) PrepareExpiredCommits(ctx context.Context, repository string, previousResultPath string) (string, error) { repositoryID := graveler.RepositoryID(repository) if err := Validate([]ValidateArg{ @@ -1228,6 +1236,7 @@ func (c *Catalog) PrepareExpiredCommits(ctx context.Context, repository string, return "", err } } + csvWriter.Flush() commitsStr := b.String() runID := uuid.New().String() path := fmt.Sprintf("_lakefs/retention/commits/run_id=%s/commits.csv", runID) diff --git a/pkg/catalog/interface.go b/pkg/catalog/interface.go index 6e88b5814b2..e9f9428615d 100644 --- a/pkg/catalog/interface.go +++ b/pkg/catalog/interface.go @@ -94,7 +94,6 @@ type Interface interface { Commit(ctx context.Context, repository, branch string, message string, committer string, metadata Metadata) (*CommitLog, error) GetCommit(ctx context.Context, repository, reference string) (*CommitLog, error) ListCommits(ctx context.Context, repository, branch string, fromReference string, limit int) ([]*CommitLog, bool, error) - PrepareExpiredCommits(ctx context.Context, repository string, previousResultPath string) (string, error) // Revert creates a reverse patch to the given commit, and applies it as a new commit on the given branch. Revert(ctx context.Context, repository, branch string, params RevertParams) error @@ -117,5 +116,9 @@ type Interface interface { GetMetaRange(ctx context.Context, repositoryID, metaRangeID string) (graveler.MetaRangeInfo, error) GetRange(ctx context.Context, repositoryID, rangeID string) (graveler.RangeInfo, error) + GetRetentionRules(ctx context.Context, repositoryID string) (*graveler.RetentionRules, error) + SetRetentionRules(ctx context.Context, repositoryID string, rules *graveler.RetentionRules) error + PrepareExpiredCommits(ctx context.Context, repositoryID string, previousResultPath string) (string, error) + io.Closer } diff --git a/pkg/ddl/000029_retention_auth.down.sql b/pkg/ddl/000029_retention_auth.down.sql new file mode 100644 index 00000000000..fa10fbb2a00 --- /dev/null +++ b/pkg/ddl/000029_retention_auth.down.sql @@ -0,0 +1,15 @@ +BEGIN; + +UPDATE auth_policies +SET statement = statement - (SELECT ordinality-1 AS index + FROM auth_policies CROSS JOIN jsonb_array_elements(statement) WITH ORDINALITY + WHERE display_name = 'RepoManagementReadAll' AND value = '{"Action": ["retention:Get*"], "Effect": "allow", "Resource": "*"}')::int +WHERE display_name = 'RepoManagementReadAll' AND statement @> '[{"Action": ["retention:Get*"], "Effect": "allow", "Resource": "*"}]'::jsonb; + +UPDATE auth_policies +SET statement = statement - (SELECT ordinality-1 AS index + FROM auth_policies CROSS JOIN jsonb_array_elements(statement) WITH ORDINALITY + WHERE display_name = 'RepoManagementFullAccess' AND value = '{"Action": ["retention:*"], "Effect": "allow", "Resource": "*"}')::int +WHERE display_name = 'RepoManagementFullAccess' AND statement @> '[{"Action": ["retention:*"], "Effect": "allow", "Resource": "*"}]'::jsonb; + +COMMIT; diff --git a/pkg/ddl/000029_retention_auth.up.sql b/pkg/ddl/000029_retention_auth.up.sql new file mode 100644 index 00000000000..5a10e2abf8f --- /dev/null +++ b/pkg/ddl/000029_retention_auth.up.sql @@ -0,0 +1,11 @@ +BEGIN; + +UPDATE auth_policies +SET statement = statement || '[{"Action": ["retention:Get*"], "Effect": "allow", "Resource": "*"}]'::jsonb +WHERE display_name = 'RepoManagementReadAll' AND NOT statement @> '[{"Action": ["retention:Get*"], "Effect": "allow", "Resource": "*"}]'::jsonb; + +UPDATE auth_policies +SET statement = statement || '[{"Action": ["retention:*"], "Effect": "allow", "Resource": "*"}]'::jsonb +WHERE display_name = 'RepoManagementFullAccess' AND NOT statement @> '[{"Action": ["retention:*"], "Effect": "allow", "Resource": "*"}]'::jsonb; + +COMMIT; diff --git a/pkg/graveler/graveler.go b/pkg/graveler/graveler.go index a25ba9b95ee..a6a1febdbc5 100644 --- a/pkg/graveler/graveler.go +++ b/pkg/graveler/graveler.go @@ -371,6 +371,10 @@ type VersionController interface { // repositoryID. GetStagingToken(ctx context.Context, repositoryID RepositoryID, branchID BranchID) (*StagingToken, error) + GetRetentionRules(ctx context.Context, repositoryID RepositoryID) (*RetentionRules, error) + + SetRetentionRules(ctx context.Context, repositoryID RepositoryID, rules *RetentionRules) error + // GetExpiredCommits returns the sets of active and expired commits, according to the branch rules for garbage collection. // The commits in the given set previouslyExpiredCommits will not be scanned. GetExpiredCommits(ctx context.Context, repositoryID RepositoryID, previouslyExpiredCommits []CommitID) (expired []CommitID, active []CommitID, err error) @@ -875,13 +879,26 @@ func (g *Graveler) GetStagingToken(ctx context.Context, repositoryID RepositoryI return &branch.StagingToken, nil } -func (g *Graveler) GetExpiredCommits(ctx context.Context, repositoryID RepositoryID, previouslyExpiredCommits []CommitID) (expired []CommitID, active []CommitID, err error) { +func (g *Graveler) GetRetentionRules(ctx context.Context, repositoryID RepositoryID) (*RetentionRules, error) { + // TODO use "_lakefs" from configuration repo, err := g.RefManager.GetRepository(ctx, repositoryID) if err != nil { - return nil, nil, err + return nil, err } + return g.retentionRuleManager.GetRules(ctx, fmt.Sprintf("%s/_lakefs/retention/rules/config.json", repo.StorageNamespace)) +} + +func (g *Graveler) SetRetentionRules(ctx context.Context, repositoryID RepositoryID, rules *RetentionRules) error { // TODO use "_lakefs" from configuration - rules, err := g.retentionRuleManager.GetRules(ctx, fmt.Sprintf("%s/_lakefs/retention/rules/config.json", repo.StorageNamespace)) + repo, err := g.RefManager.GetRepository(ctx, repositoryID) + if err != nil { + return err + } + return g.retentionRuleManager.SaveRules(ctx, fmt.Sprintf("%s/_lakefs/retention/rules/config.json", repo.StorageNamespace), rules) +} + +func (g *Graveler) GetExpiredCommits(ctx context.Context, repositoryID RepositoryID, previouslyExpiredCommits []CommitID) (expired []CommitID, active []CommitID, err error) { + rules, err := g.GetRetentionRules(ctx, repositoryID) if err != nil { return nil, nil, fmt.Errorf("get retention rules: %w", err) } @@ -2014,6 +2031,7 @@ func (c *commitValueIterator) Close() { type RetentionRuleManager interface { GetRules(ctx context.Context, rulesConfigurationPath string) (*RetentionRules, error) + SaveRules(ctx context.Context, rulesConfigurationPath string, rules *RetentionRules) error } type RetentionRules struct { diff --git a/pkg/graveler/retention/garbage_collection.go b/pkg/graveler/retention/garbage_collection.go index 14476baa16f..a65dba20556 100644 --- a/pkg/graveler/retention/garbage_collection.go +++ b/pkg/graveler/retention/garbage_collection.go @@ -22,12 +22,12 @@ func (m *RuleManager) GetRules(ctx context.Context, rulesConfigurationPath strin Identifier: rulesConfigurationPath, IdentifierType: block.IdentifierTypeFull, }, -1) - defer func() { - _ = reader.Close() - }() if err != nil { return nil, err } + defer func() { + _ = reader.Close() + }() var rules graveler.RetentionRules err = json.NewDecoder(reader).Decode(&rules) if err != nil { diff --git a/pkg/permissions/actions.go b/pkg/permissions/actions.go index 104208c2690..ac439962c5a 100644 --- a/pkg/permissions/actions.go +++ b/pkg/permissions/actions.go @@ -57,6 +57,9 @@ const ( ListCredentialsAction = "auth:ListCredentials" ReadActionsAction = "ci:ReadAction" + + GetGarbageCollectionRules = "retention:GetGarbageCollectionRules" + SetGarbageCollectionRules = "retention:SetGarbageCollectionRules" ) var serviceSet = map[string]struct{}{ From c8de6a5964401ebf8664bb35e42c95a4ca938f13 Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Tue, 15 Jun 2021 15:56:51 +0300 Subject: [PATCH 16/24] gen --- clients/java/README.md | 4 + clients/java/api/openapi.yaml | 94 +++++++ clients/java/docs/GarbageCollectionRule.md | 14 + clients/java/docs/GarbageCollectionRules.md | 14 + clients/java/docs/RetentionApi.md | 158 +++++++++++ .../io/lakefs/clients/api/RetentionApi.java | 242 +++++++++++++++++ .../api/model/GarbageCollectionRule.java | 125 +++++++++ .../api/model/GarbageCollectionRules.java | 133 +++++++++ .../api/model/GarbageCollectionRuleTest.java | 59 ++++ .../api/model/GarbageCollectionRulesTest.java | 62 +++++ clients/python/.openapi-generator/FILES | 6 + clients/python/README.md | 4 + clients/python/docs/GarbageCollectionRule.md | 12 + clients/python/docs/GarbageCollectionRules.md | 12 + clients/python/docs/RetentionApi.md | 188 +++++++++++++ .../python/lakefs_client/api/retention_api.py | 252 ++++++++++++++++++ .../model/garbage_collection_rule.py | 174 ++++++++++++ .../model/garbage_collection_rules.py | 179 +++++++++++++ .../python/lakefs_client/models/__init__.py | 2 + .../test/test_garbage_collection_rule.py | 36 +++ .../test/test_garbage_collection_rules.py | 38 +++ 21 files changed, 1808 insertions(+) create mode 100644 clients/java/docs/GarbageCollectionRule.md create mode 100644 clients/java/docs/GarbageCollectionRules.md create mode 100644 clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionRule.java create mode 100644 clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionRules.java create mode 100644 clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionRuleTest.java create mode 100644 clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionRulesTest.java create mode 100644 clients/python/docs/GarbageCollectionRule.md create mode 100644 clients/python/docs/GarbageCollectionRules.md create mode 100644 clients/python/lakefs_client/model/garbage_collection_rule.py create mode 100644 clients/python/lakefs_client/model/garbage_collection_rules.py create mode 100644 clients/python/test/test_garbage_collection_rule.py create mode 100644 clients/python/test/test_garbage_collection_rules.py diff --git a/clients/java/README.md b/clients/java/README.md index b51a2a6a20f..05e1da90138 100644 --- a/clients/java/README.md +++ b/clients/java/README.md @@ -190,7 +190,9 @@ Class | Method | HTTP request | Description *RepositoriesApi* | [**deleteRepository**](docs/RepositoriesApi.md#deleteRepository) | **DELETE** /repositories/{repository} | delete repository *RepositoriesApi* | [**getRepository**](docs/RepositoriesApi.md#getRepository) | **GET** /repositories/{repository} | get repository *RepositoriesApi* | [**listRepositories**](docs/RepositoriesApi.md#listRepositories) | **GET** /repositories | list repositories +*RetentionApi* | [**getGarbageCollectionRules**](docs/RetentionApi.md#getGarbageCollectionRules) | **GET** /repositories/{repository}/gc/rules | *RetentionApi* | [**prepareGarbageCollectionCommits**](docs/RetentionApi.md#prepareGarbageCollectionCommits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection +*RetentionApi* | [**setGarbageCollectionRules**](docs/RetentionApi.md#setGarbageCollectionRules) | **POST** /repositories/{repository}/gc/rules | *StagingApi* | [**getPhysicalAddress**](docs/StagingApi.md#getPhysicalAddress) | **GET** /repositories/{repository}/branches/{branch}/staging/backing | get a physical address and a return token to write object to underlying storage *StagingApi* | [**linkPhysicalAddress**](docs/StagingApi.md#linkPhysicalAddress) | **PUT** /repositories/{repository}/branches/{branch}/staging/backing | associate staging on this physical address with a path *TagsApi* | [**createTag**](docs/TagsApi.md#createTag) | **POST** /repositories/{repository}/tags | create tag @@ -218,6 +220,8 @@ Class | Method | HTTP request | Description - [Error](docs/Error.md) - [GarbageCollectionCommits](docs/GarbageCollectionCommits.md) - [GarbageCollectionPrepareRequest](docs/GarbageCollectionPrepareRequest.md) + - [GarbageCollectionRule](docs/GarbageCollectionRule.md) + - [GarbageCollectionRules](docs/GarbageCollectionRules.md) - [Group](docs/Group.md) - [GroupCreation](docs/GroupCreation.md) - [GroupList](docs/GroupList.md) diff --git a/clients/java/api/openapi.yaml b/clients/java/api/openapi.yaml index 29480f1d5ec..f9e9266b5df 100644 --- a/clients/java/api/openapi.yaml +++ b/clients/java/api/openapi.yaml @@ -3738,6 +3738,68 @@ paths: tags: - metadata x-accepts: application/json + /repositories/{repository}/gc/rules: + get: + operationId: getGarbageCollectionRules + parameters: + - explode: false + in: path + name: repository + required: true + schema: + type: string + style: simple + responses: + "200": + content: + application/json: + schema: + $ref: '#/components/schemas/GarbageCollectionRules' + description: gc rule list + "401": + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + description: Unauthorized + default: + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + description: Internal Server Error + tags: + - retention + x-accepts: application/json + post: + operationId: set garbage collection rules + parameters: + - explode: false + in: path + name: repository + required: true + schema: + type: string + style: simple + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/GarbageCollectionRules' + required: true + responses: + "204": + description: set garbage collection rules successfully + default: + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + description: Internal Server Error + tags: + - retention + x-contentType: application/json + x-accepts: application/json /repositories/{repository}/gc/prepare: post: operationId: prepareGarbageCollectionCommits @@ -4972,6 +5034,38 @@ components: description: path to a dataset of commits type: string type: object + GarbageCollectionRule: + example: + branch_id: branch_id + retention_days: 6 + properties: + branch_id: + type: string + retention_days: + type: integer + required: + - branch_id + - retention_days + type: object + GarbageCollectionRules: + example: + branches: + - branch_id: branch_id + retention_days: 6 + - branch_id: branch_id + retention_days: 6 + default_retention_days: 0 + properties: + default_retention_days: + type: integer + branches: + items: + $ref: '#/components/schemas/GarbageCollectionRule' + type: array + required: + - branches + - default_retention_days + type: object inline_object: properties: content: diff --git a/clients/java/docs/GarbageCollectionRule.md b/clients/java/docs/GarbageCollectionRule.md new file mode 100644 index 00000000000..8d71ee5936b --- /dev/null +++ b/clients/java/docs/GarbageCollectionRule.md @@ -0,0 +1,14 @@ + + +# GarbageCollectionRule + + +## Properties + +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**branchId** | **String** | | +**retentionDays** | **Integer** | | + + + diff --git a/clients/java/docs/GarbageCollectionRules.md b/clients/java/docs/GarbageCollectionRules.md new file mode 100644 index 00000000000..47241dd588f --- /dev/null +++ b/clients/java/docs/GarbageCollectionRules.md @@ -0,0 +1,14 @@ + + +# GarbageCollectionRules + + +## Properties + +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**defaultRetentionDays** | **Integer** | | +**branches** | [**List<GarbageCollectionRule>**](GarbageCollectionRule.md) | | + + + diff --git a/clients/java/docs/RetentionApi.md b/clients/java/docs/RetentionApi.md index cdc8efe9f25..c9b956ba3a1 100644 --- a/clients/java/docs/RetentionApi.md +++ b/clients/java/docs/RetentionApi.md @@ -4,9 +4,89 @@ All URIs are relative to *http://localhost/api/v1* Method | HTTP request | Description ------------- | ------------- | ------------- +[**getGarbageCollectionRules**](RetentionApi.md#getGarbageCollectionRules) | **GET** /repositories/{repository}/gc/rules | [**prepareGarbageCollectionCommits**](RetentionApi.md#prepareGarbageCollectionCommits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection +[**setGarbageCollectionRules**](RetentionApi.md#setGarbageCollectionRules) | **POST** /repositories/{repository}/gc/rules | + +# **getGarbageCollectionRules** +> GarbageCollectionRules getGarbageCollectionRules(repository) + + + +### Example +```java +// Import classes: +import io.lakefs.clients.api.ApiClient; +import io.lakefs.clients.api.ApiException; +import io.lakefs.clients.api.Configuration; +import io.lakefs.clients.api.auth.*; +import io.lakefs.clients.api.models.*; +import io.lakefs.clients.api.RetentionApi; + +public class Example { + public static void main(String[] args) { + ApiClient defaultClient = Configuration.getDefaultApiClient(); + defaultClient.setBasePath("http://localhost/api/v1"); + + // Configure HTTP basic authorization: basic_auth + HttpBasicAuth basic_auth = (HttpBasicAuth) defaultClient.getAuthentication("basic_auth"); + basic_auth.setUsername("YOUR USERNAME"); + basic_auth.setPassword("YOUR PASSWORD"); + + // Configure API key authorization: cookie_auth + ApiKeyAuth cookie_auth = (ApiKeyAuth) defaultClient.getAuthentication("cookie_auth"); + cookie_auth.setApiKey("YOUR API KEY"); + // Uncomment the following line to set a prefix for the API key, e.g. "Token" (defaults to null) + //cookie_auth.setApiKeyPrefix("Token"); + + // Configure HTTP bearer authorization: jwt_token + HttpBearerAuth jwt_token = (HttpBearerAuth) defaultClient.getAuthentication("jwt_token"); + jwt_token.setBearerToken("BEARER TOKEN"); + + RetentionApi apiInstance = new RetentionApi(defaultClient); + String repository = "repository_example"; // String | + try { + GarbageCollectionRules result = apiInstance.getGarbageCollectionRules(repository); + System.out.println(result); + } catch (ApiException e) { + System.err.println("Exception when calling RetentionApi#getGarbageCollectionRules"); + System.err.println("Status code: " + e.getCode()); + System.err.println("Reason: " + e.getResponseBody()); + System.err.println("Response headers: " + e.getResponseHeaders()); + e.printStackTrace(); + } + } +} +``` + +### Parameters + +Name | Type | Description | Notes +------------- | ------------- | ------------- | ------------- + **repository** | **String**| | + +### Return type + +[**GarbageCollectionRules**](GarbageCollectionRules.md) + +### Authorization + +[basic_auth](../README.md#basic_auth), [cookie_auth](../README.md#cookie_auth), [jwt_token](../README.md#jwt_token) + +### HTTP request headers + + - **Content-Type**: Not defined + - **Accept**: application/json + +### HTTP response details +| Status code | Description | Response headers | +|-------------|-------------|------------------| +**200** | gc rule list | - | +**401** | Unauthorized | - | +**0** | Internal Server Error | - | + # **prepareGarbageCollectionCommits** > GarbageCollectionCommits prepareGarbageCollectionCommits(repository, garbageCollectionPrepareRequest) @@ -88,3 +168,81 @@ Name | Type | Description | Notes **404** | Resource Not Found | - | **0** | Internal Server Error | - | + +# **setGarbageCollectionRules** +> setGarbageCollectionRules(repository, garbageCollectionRules) + + + +### Example +```java +// Import classes: +import io.lakefs.clients.api.ApiClient; +import io.lakefs.clients.api.ApiException; +import io.lakefs.clients.api.Configuration; +import io.lakefs.clients.api.auth.*; +import io.lakefs.clients.api.models.*; +import io.lakefs.clients.api.RetentionApi; + +public class Example { + public static void main(String[] args) { + ApiClient defaultClient = Configuration.getDefaultApiClient(); + defaultClient.setBasePath("http://localhost/api/v1"); + + // Configure HTTP basic authorization: basic_auth + HttpBasicAuth basic_auth = (HttpBasicAuth) defaultClient.getAuthentication("basic_auth"); + basic_auth.setUsername("YOUR USERNAME"); + basic_auth.setPassword("YOUR PASSWORD"); + + // Configure API key authorization: cookie_auth + ApiKeyAuth cookie_auth = (ApiKeyAuth) defaultClient.getAuthentication("cookie_auth"); + cookie_auth.setApiKey("YOUR API KEY"); + // Uncomment the following line to set a prefix for the API key, e.g. "Token" (defaults to null) + //cookie_auth.setApiKeyPrefix("Token"); + + // Configure HTTP bearer authorization: jwt_token + HttpBearerAuth jwt_token = (HttpBearerAuth) defaultClient.getAuthentication("jwt_token"); + jwt_token.setBearerToken("BEARER TOKEN"); + + RetentionApi apiInstance = new RetentionApi(defaultClient); + String repository = "repository_example"; // String | + GarbageCollectionRules garbageCollectionRules = new GarbageCollectionRules(); // GarbageCollectionRules | + try { + apiInstance.setGarbageCollectionRules(repository, garbageCollectionRules); + } catch (ApiException e) { + System.err.println("Exception when calling RetentionApi#setGarbageCollectionRules"); + System.err.println("Status code: " + e.getCode()); + System.err.println("Reason: " + e.getResponseBody()); + System.err.println("Response headers: " + e.getResponseHeaders()); + e.printStackTrace(); + } + } +} +``` + +### Parameters + +Name | Type | Description | Notes +------------- | ------------- | ------------- | ------------- + **repository** | **String**| | + **garbageCollectionRules** | [**GarbageCollectionRules**](GarbageCollectionRules.md)| | + +### Return type + +null (empty response body) + +### Authorization + +[basic_auth](../README.md#basic_auth), [cookie_auth](../README.md#cookie_auth), [jwt_token](../README.md#jwt_token) + +### HTTP request headers + + - **Content-Type**: application/json + - **Accept**: application/json + +### HTTP response details +| Status code | Description | Response headers | +|-------------|-------------|------------------| +**204** | set garbage collection rules successfully | - | +**0** | Internal Server Error | - | + diff --git a/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java b/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java index 03c39c8f58c..829436837cc 100644 --- a/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java +++ b/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java @@ -30,6 +30,7 @@ import io.lakefs.clients.api.model.Error; import io.lakefs.clients.api.model.GarbageCollectionCommits; import io.lakefs.clients.api.model.GarbageCollectionPrepareRequest; +import io.lakefs.clients.api.model.GarbageCollectionRules; import java.lang.reflect.Type; import java.util.ArrayList; @@ -56,6 +57,126 @@ public void setApiClient(ApiClient apiClient) { this.localVarApiClient = apiClient; } + /** + * Build call for getGarbageCollectionRules + * @param repository (required) + * @param _callback Callback for upload/download progress + * @return Call to execute + * @throws ApiException If fail to serialize the request body object + * @http.response.details + + + + + +
Status Code Description Response Headers
200 gc rule list -
401 Unauthorized -
0 Internal Server Error -
+ */ + public okhttp3.Call getGarbageCollectionRulesCall(String repository, final ApiCallback _callback) throws ApiException { + Object localVarPostBody = null; + + // create path and map variables + String localVarPath = "/repositories/{repository}/gc/rules" + .replaceAll("\\{" + "repository" + "\\}", localVarApiClient.escapeString(repository.toString())); + + List localVarQueryParams = new ArrayList(); + List localVarCollectionQueryParams = new ArrayList(); + Map localVarHeaderParams = new HashMap(); + Map localVarCookieParams = new HashMap(); + Map localVarFormParams = new HashMap(); + + final String[] localVarAccepts = { + "application/json" + }; + final String localVarAccept = localVarApiClient.selectHeaderAccept(localVarAccepts); + if (localVarAccept != null) { + localVarHeaderParams.put("Accept", localVarAccept); + } + + final String[] localVarContentTypes = { + + }; + final String localVarContentType = localVarApiClient.selectHeaderContentType(localVarContentTypes); + localVarHeaderParams.put("Content-Type", localVarContentType); + + String[] localVarAuthNames = new String[] { "basic_auth", "cookie_auth", "jwt_token" }; + return localVarApiClient.buildCall(localVarPath, "GET", localVarQueryParams, localVarCollectionQueryParams, localVarPostBody, localVarHeaderParams, localVarCookieParams, localVarFormParams, localVarAuthNames, _callback); + } + + @SuppressWarnings("rawtypes") + private okhttp3.Call getGarbageCollectionRulesValidateBeforeCall(String repository, final ApiCallback _callback) throws ApiException { + + // verify the required parameter 'repository' is set + if (repository == null) { + throw new ApiException("Missing the required parameter 'repository' when calling getGarbageCollectionRules(Async)"); + } + + + okhttp3.Call localVarCall = getGarbageCollectionRulesCall(repository, _callback); + return localVarCall; + + } + + /** + * + * + * @param repository (required) + * @return GarbageCollectionRules + * @throws ApiException If fail to call the API, e.g. server error or cannot deserialize the response body + * @http.response.details + + + + + +
Status Code Description Response Headers
200 gc rule list -
401 Unauthorized -
0 Internal Server Error -
+ */ + public GarbageCollectionRules getGarbageCollectionRules(String repository) throws ApiException { + ApiResponse localVarResp = getGarbageCollectionRulesWithHttpInfo(repository); + return localVarResp.getData(); + } + + /** + * + * + * @param repository (required) + * @return ApiResponse<GarbageCollectionRules> + * @throws ApiException If fail to call the API, e.g. server error or cannot deserialize the response body + * @http.response.details + + + + + +
Status Code Description Response Headers
200 gc rule list -
401 Unauthorized -
0 Internal Server Error -
+ */ + public ApiResponse getGarbageCollectionRulesWithHttpInfo(String repository) throws ApiException { + okhttp3.Call localVarCall = getGarbageCollectionRulesValidateBeforeCall(repository, null); + Type localVarReturnType = new TypeToken(){}.getType(); + return localVarApiClient.execute(localVarCall, localVarReturnType); + } + + /** + * (asynchronously) + * + * @param repository (required) + * @param _callback The callback to be executed when the API call finishes + * @return The request call + * @throws ApiException If fail to process the API call, e.g. serializing the request body object + * @http.response.details + + + + + +
Status Code Description Response Headers
200 gc rule list -
401 Unauthorized -
0 Internal Server Error -
+ */ + public okhttp3.Call getGarbageCollectionRulesAsync(String repository, final ApiCallback _callback) throws ApiException { + + okhttp3.Call localVarCall = getGarbageCollectionRulesValidateBeforeCall(repository, _callback); + Type localVarReturnType = new TypeToken(){}.getType(); + localVarApiClient.executeAsync(localVarCall, localVarReturnType, _callback); + return localVarCall; + } /** * Build call for prepareGarbageCollectionCommits * @param repository (required) @@ -184,4 +305,125 @@ public okhttp3.Call prepareGarbageCollectionCommitsAsync(String repository, Garb localVarApiClient.executeAsync(localVarCall, localVarReturnType, _callback); return localVarCall; } + /** + * Build call for setGarbageCollectionRules + * @param repository (required) + * @param garbageCollectionRules (required) + * @param _callback Callback for upload/download progress + * @return Call to execute + * @throws ApiException If fail to serialize the request body object + * @http.response.details + + + + +
Status Code Description Response Headers
204 set garbage collection rules successfully -
0 Internal Server Error -
+ */ + public okhttp3.Call setGarbageCollectionRulesCall(String repository, GarbageCollectionRules garbageCollectionRules, final ApiCallback _callback) throws ApiException { + Object localVarPostBody = garbageCollectionRules; + + // create path and map variables + String localVarPath = "/repositories/{repository}/gc/rules" + .replaceAll("\\{" + "repository" + "\\}", localVarApiClient.escapeString(repository.toString())); + + List localVarQueryParams = new ArrayList(); + List localVarCollectionQueryParams = new ArrayList(); + Map localVarHeaderParams = new HashMap(); + Map localVarCookieParams = new HashMap(); + Map localVarFormParams = new HashMap(); + + final String[] localVarAccepts = { + "application/json" + }; + final String localVarAccept = localVarApiClient.selectHeaderAccept(localVarAccepts); + if (localVarAccept != null) { + localVarHeaderParams.put("Accept", localVarAccept); + } + + final String[] localVarContentTypes = { + "application/json" + }; + final String localVarContentType = localVarApiClient.selectHeaderContentType(localVarContentTypes); + localVarHeaderParams.put("Content-Type", localVarContentType); + + String[] localVarAuthNames = new String[] { "basic_auth", "cookie_auth", "jwt_token" }; + return localVarApiClient.buildCall(localVarPath, "POST", localVarQueryParams, localVarCollectionQueryParams, localVarPostBody, localVarHeaderParams, localVarCookieParams, localVarFormParams, localVarAuthNames, _callback); + } + + @SuppressWarnings("rawtypes") + private okhttp3.Call setGarbageCollectionRulesValidateBeforeCall(String repository, GarbageCollectionRules garbageCollectionRules, final ApiCallback _callback) throws ApiException { + + // verify the required parameter 'repository' is set + if (repository == null) { + throw new ApiException("Missing the required parameter 'repository' when calling setGarbageCollectionRules(Async)"); + } + + // verify the required parameter 'garbageCollectionRules' is set + if (garbageCollectionRules == null) { + throw new ApiException("Missing the required parameter 'garbageCollectionRules' when calling setGarbageCollectionRules(Async)"); + } + + + okhttp3.Call localVarCall = setGarbageCollectionRulesCall(repository, garbageCollectionRules, _callback); + return localVarCall; + + } + + /** + * + * + * @param repository (required) + * @param garbageCollectionRules (required) + * @throws ApiException If fail to call the API, e.g. server error or cannot deserialize the response body + * @http.response.details + + + + +
Status Code Description Response Headers
204 set garbage collection rules successfully -
0 Internal Server Error -
+ */ + public void setGarbageCollectionRules(String repository, GarbageCollectionRules garbageCollectionRules) throws ApiException { + setGarbageCollectionRulesWithHttpInfo(repository, garbageCollectionRules); + } + + /** + * + * + * @param repository (required) + * @param garbageCollectionRules (required) + * @return ApiResponse<Void> + * @throws ApiException If fail to call the API, e.g. server error or cannot deserialize the response body + * @http.response.details + + + + +
Status Code Description Response Headers
204 set garbage collection rules successfully -
0 Internal Server Error -
+ */ + public ApiResponse setGarbageCollectionRulesWithHttpInfo(String repository, GarbageCollectionRules garbageCollectionRules) throws ApiException { + okhttp3.Call localVarCall = setGarbageCollectionRulesValidateBeforeCall(repository, garbageCollectionRules, null); + return localVarApiClient.execute(localVarCall); + } + + /** + * (asynchronously) + * + * @param repository (required) + * @param garbageCollectionRules (required) + * @param _callback The callback to be executed when the API call finishes + * @return The request call + * @throws ApiException If fail to process the API call, e.g. serializing the request body object + * @http.response.details + + + + +
Status Code Description Response Headers
204 set garbage collection rules successfully -
0 Internal Server Error -
+ */ + public okhttp3.Call setGarbageCollectionRulesAsync(String repository, GarbageCollectionRules garbageCollectionRules, final ApiCallback _callback) throws ApiException { + + okhttp3.Call localVarCall = setGarbageCollectionRulesValidateBeforeCall(repository, garbageCollectionRules, _callback); + localVarApiClient.executeAsync(localVarCall, _callback); + return localVarCall; + } } diff --git a/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionRule.java b/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionRule.java new file mode 100644 index 00000000000..2cf99070c11 --- /dev/null +++ b/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionRule.java @@ -0,0 +1,125 @@ +/* + * lakeFS API + * lakeFS HTTP API + * + * The version of the OpenAPI document: 0.1.0 + * + * + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + + +package io.lakefs.clients.api.model; + +import java.util.Objects; +import java.util.Arrays; +import com.google.gson.TypeAdapter; +import com.google.gson.annotations.JsonAdapter; +import com.google.gson.annotations.SerializedName; +import com.google.gson.stream.JsonReader; +import com.google.gson.stream.JsonWriter; +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import java.io.IOException; + +/** + * GarbageCollectionRule + */ +@javax.annotation.Generated(value = "org.openapitools.codegen.languages.JavaClientCodegen") +public class GarbageCollectionRule { + public static final String SERIALIZED_NAME_BRANCH_ID = "branch_id"; + @SerializedName(SERIALIZED_NAME_BRANCH_ID) + private String branchId; + + public static final String SERIALIZED_NAME_RETENTION_DAYS = "retention_days"; + @SerializedName(SERIALIZED_NAME_RETENTION_DAYS) + private Integer retentionDays; + + + public GarbageCollectionRule branchId(String branchId) { + + this.branchId = branchId; + return this; + } + + /** + * Get branchId + * @return branchId + **/ + @ApiModelProperty(required = true, value = "") + + public String getBranchId() { + return branchId; + } + + + public void setBranchId(String branchId) { + this.branchId = branchId; + } + + + public GarbageCollectionRule retentionDays(Integer retentionDays) { + + this.retentionDays = retentionDays; + return this; + } + + /** + * Get retentionDays + * @return retentionDays + **/ + @ApiModelProperty(required = true, value = "") + + public Integer getRetentionDays() { + return retentionDays; + } + + + public void setRetentionDays(Integer retentionDays) { + this.retentionDays = retentionDays; + } + + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + GarbageCollectionRule garbageCollectionRule = (GarbageCollectionRule) o; + return Objects.equals(this.branchId, garbageCollectionRule.branchId) && + Objects.equals(this.retentionDays, garbageCollectionRule.retentionDays); + } + + @Override + public int hashCode() { + return Objects.hash(branchId, retentionDays); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("class GarbageCollectionRule {\n"); + sb.append(" branchId: ").append(toIndentedString(branchId)).append("\n"); + sb.append(" retentionDays: ").append(toIndentedString(retentionDays)).append("\n"); + sb.append("}"); + return sb.toString(); + } + + /** + * Convert the given object to string with each line indented by 4 spaces + * (except the first line). + */ + private String toIndentedString(Object o) { + if (o == null) { + return "null"; + } + return o.toString().replace("\n", "\n "); + } + +} + diff --git a/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionRules.java b/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionRules.java new file mode 100644 index 00000000000..a6ef66d96b1 --- /dev/null +++ b/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionRules.java @@ -0,0 +1,133 @@ +/* + * lakeFS API + * lakeFS HTTP API + * + * The version of the OpenAPI document: 0.1.0 + * + * + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + + +package io.lakefs.clients.api.model; + +import java.util.Objects; +import java.util.Arrays; +import com.google.gson.TypeAdapter; +import com.google.gson.annotations.JsonAdapter; +import com.google.gson.annotations.SerializedName; +import com.google.gson.stream.JsonReader; +import com.google.gson.stream.JsonWriter; +import io.lakefs.clients.api.model.GarbageCollectionRule; +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * GarbageCollectionRules + */ +@javax.annotation.Generated(value = "org.openapitools.codegen.languages.JavaClientCodegen") +public class GarbageCollectionRules { + public static final String SERIALIZED_NAME_DEFAULT_RETENTION_DAYS = "default_retention_days"; + @SerializedName(SERIALIZED_NAME_DEFAULT_RETENTION_DAYS) + private Integer defaultRetentionDays; + + public static final String SERIALIZED_NAME_BRANCHES = "branches"; + @SerializedName(SERIALIZED_NAME_BRANCHES) + private List branches = new ArrayList(); + + + public GarbageCollectionRules defaultRetentionDays(Integer defaultRetentionDays) { + + this.defaultRetentionDays = defaultRetentionDays; + return this; + } + + /** + * Get defaultRetentionDays + * @return defaultRetentionDays + **/ + @ApiModelProperty(required = true, value = "") + + public Integer getDefaultRetentionDays() { + return defaultRetentionDays; + } + + + public void setDefaultRetentionDays(Integer defaultRetentionDays) { + this.defaultRetentionDays = defaultRetentionDays; + } + + + public GarbageCollectionRules branches(List branches) { + + this.branches = branches; + return this; + } + + public GarbageCollectionRules addBranchesItem(GarbageCollectionRule branchesItem) { + this.branches.add(branchesItem); + return this; + } + + /** + * Get branches + * @return branches + **/ + @ApiModelProperty(required = true, value = "") + + public List getBranches() { + return branches; + } + + + public void setBranches(List branches) { + this.branches = branches; + } + + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + GarbageCollectionRules garbageCollectionRules = (GarbageCollectionRules) o; + return Objects.equals(this.defaultRetentionDays, garbageCollectionRules.defaultRetentionDays) && + Objects.equals(this.branches, garbageCollectionRules.branches); + } + + @Override + public int hashCode() { + return Objects.hash(defaultRetentionDays, branches); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("class GarbageCollectionRules {\n"); + sb.append(" defaultRetentionDays: ").append(toIndentedString(defaultRetentionDays)).append("\n"); + sb.append(" branches: ").append(toIndentedString(branches)).append("\n"); + sb.append("}"); + return sb.toString(); + } + + /** + * Convert the given object to string with each line indented by 4 spaces + * (except the first line). + */ + private String toIndentedString(Object o) { + if (o == null) { + return "null"; + } + return o.toString().replace("\n", "\n "); + } + +} + diff --git a/clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionRuleTest.java b/clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionRuleTest.java new file mode 100644 index 00000000000..68c0ff0ba45 --- /dev/null +++ b/clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionRuleTest.java @@ -0,0 +1,59 @@ +/* + * lakeFS API + * lakeFS HTTP API + * + * The version of the OpenAPI document: 0.1.0 + * + * + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + + +package io.lakefs.clients.api.model; + +import com.google.gson.TypeAdapter; +import com.google.gson.annotations.JsonAdapter; +import com.google.gson.annotations.SerializedName; +import com.google.gson.stream.JsonReader; +import com.google.gson.stream.JsonWriter; +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import java.io.IOException; +import org.junit.Assert; +import org.junit.Ignore; +import org.junit.Test; + + +/** + * Model tests for GarbageCollectionRule + */ +public class GarbageCollectionRuleTest { + private final GarbageCollectionRule model = new GarbageCollectionRule(); + + /** + * Model tests for GarbageCollectionRule + */ + @Test + public void testGarbageCollectionRule() { + // TODO: test GarbageCollectionRule + } + + /** + * Test the property 'branchId' + */ + @Test + public void branchIdTest() { + // TODO: test branchId + } + + /** + * Test the property 'retentionDays' + */ + @Test + public void retentionDaysTest() { + // TODO: test retentionDays + } + +} diff --git a/clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionRulesTest.java b/clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionRulesTest.java new file mode 100644 index 00000000000..47690ce85b6 --- /dev/null +++ b/clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionRulesTest.java @@ -0,0 +1,62 @@ +/* + * lakeFS API + * lakeFS HTTP API + * + * The version of the OpenAPI document: 0.1.0 + * + * + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + + +package io.lakefs.clients.api.model; + +import com.google.gson.TypeAdapter; +import com.google.gson.annotations.JsonAdapter; +import com.google.gson.annotations.SerializedName; +import com.google.gson.stream.JsonReader; +import com.google.gson.stream.JsonWriter; +import io.lakefs.clients.api.model.GarbageCollectionRule; +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.junit.Assert; +import org.junit.Ignore; +import org.junit.Test; + + +/** + * Model tests for GarbageCollectionRules + */ +public class GarbageCollectionRulesTest { + private final GarbageCollectionRules model = new GarbageCollectionRules(); + + /** + * Model tests for GarbageCollectionRules + */ + @Test + public void testGarbageCollectionRules() { + // TODO: test GarbageCollectionRules + } + + /** + * Test the property 'defaultRetentionDays' + */ + @Test + public void defaultRetentionDaysTest() { + // TODO: test defaultRetentionDays + } + + /** + * Test the property 'branches' + */ + @Test + public void branchesTest() { + // TODO: test branches + } + +} diff --git a/clients/python/.openapi-generator/FILES b/clients/python/.openapi-generator/FILES index 714611f4f63..4714a7b2a47 100644 --- a/clients/python/.openapi-generator/FILES +++ b/clients/python/.openapi-generator/FILES @@ -23,6 +23,8 @@ docs/DiffList.md docs/Error.md docs/GarbageCollectionCommits.md docs/GarbageCollectionPrepareRequest.md +docs/GarbageCollectionRule.md +docs/GarbageCollectionRules.md docs/Group.md docs/GroupCreation.md docs/GroupList.md @@ -103,6 +105,8 @@ lakefs_client/model/diff_list.py lakefs_client/model/error.py lakefs_client/model/garbage_collection_commits.py lakefs_client/model/garbage_collection_prepare_request.py +lakefs_client/model/garbage_collection_rule.py +lakefs_client/model/garbage_collection_rules.py lakefs_client/model/group.py lakefs_client/model/group_creation.py lakefs_client/model/group_list.py @@ -147,4 +151,6 @@ setup.cfg setup.py test-requirements.txt test/__init__.py +test/test_garbage_collection_rule.py +test/test_garbage_collection_rules.py tox.ini diff --git a/clients/python/README.md b/clients/python/README.md index c2f166f32c2..c3b448c956b 100644 --- a/clients/python/README.md +++ b/clients/python/README.md @@ -171,7 +171,9 @@ Class | Method | HTTP request | Description *RepositoriesApi* | [**delete_repository**](docs/RepositoriesApi.md#delete_repository) | **DELETE** /repositories/{repository} | delete repository *RepositoriesApi* | [**get_repository**](docs/RepositoriesApi.md#get_repository) | **GET** /repositories/{repository} | get repository *RepositoriesApi* | [**list_repositories**](docs/RepositoriesApi.md#list_repositories) | **GET** /repositories | list repositories +*RetentionApi* | [**get_garbage_collection_rules**](docs/RetentionApi.md#get_garbage_collection_rules) | **GET** /repositories/{repository}/gc/rules | *RetentionApi* | [**prepare_garbage_collection_commits**](docs/RetentionApi.md#prepare_garbage_collection_commits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection +*RetentionApi* | [**set_garbage_collection_rules**](docs/RetentionApi.md#set_garbage_collection_rules) | **POST** /repositories/{repository}/gc/rules | *StagingApi* | [**get_physical_address**](docs/StagingApi.md#get_physical_address) | **GET** /repositories/{repository}/branches/{branch}/staging/backing | get a physical address and a return token to write object to underlying storage *StagingApi* | [**link_physical_address**](docs/StagingApi.md#link_physical_address) | **PUT** /repositories/{repository}/branches/{branch}/staging/backing | associate staging on this physical address with a path *TagsApi* | [**create_tag**](docs/TagsApi.md#create_tag) | **POST** /repositories/{repository}/tags | create tag @@ -199,6 +201,8 @@ Class | Method | HTTP request | Description - [Error](docs/Error.md) - [GarbageCollectionCommits](docs/GarbageCollectionCommits.md) - [GarbageCollectionPrepareRequest](docs/GarbageCollectionPrepareRequest.md) + - [GarbageCollectionRule](docs/GarbageCollectionRule.md) + - [GarbageCollectionRules](docs/GarbageCollectionRules.md) - [Group](docs/Group.md) - [GroupCreation](docs/GroupCreation.md) - [GroupList](docs/GroupList.md) diff --git a/clients/python/docs/GarbageCollectionRule.md b/clients/python/docs/GarbageCollectionRule.md new file mode 100644 index 00000000000..d49eb708c0a --- /dev/null +++ b/clients/python/docs/GarbageCollectionRule.md @@ -0,0 +1,12 @@ +# GarbageCollectionRule + + +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**branch_id** | **str** | | +**retention_days** | **int** | | + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/clients/python/docs/GarbageCollectionRules.md b/clients/python/docs/GarbageCollectionRules.md new file mode 100644 index 00000000000..935e7e322bc --- /dev/null +++ b/clients/python/docs/GarbageCollectionRules.md @@ -0,0 +1,12 @@ +# GarbageCollectionRules + + +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**default_retention_days** | **int** | | +**branches** | [**[GarbageCollectionRule]**](GarbageCollectionRule.md) | | + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/clients/python/docs/RetentionApi.md b/clients/python/docs/RetentionApi.md index fbf928de618..612dd7d3820 100644 --- a/clients/python/docs/RetentionApi.md +++ b/clients/python/docs/RetentionApi.md @@ -4,9 +4,100 @@ All URIs are relative to *http://localhost/api/v1* Method | HTTP request | Description ------------- | ------------- | ------------- +[**get_garbage_collection_rules**](RetentionApi.md#get_garbage_collection_rules) | **GET** /repositories/{repository}/gc/rules | [**prepare_garbage_collection_commits**](RetentionApi.md#prepare_garbage_collection_commits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection +[**set_garbage_collection_rules**](RetentionApi.md#set_garbage_collection_rules) | **POST** /repositories/{repository}/gc/rules | +# **get_garbage_collection_rules** +> GarbageCollectionRules get_garbage_collection_rules(repository) + + + +### Example + +* Basic Authentication (basic_auth): +* Api Key Authentication (cookie_auth): +* Bearer (JWT) Authentication (jwt_token): +```python +import time +import lakefs_client +from lakefs_client.api import retention_api +from lakefs_client.model.garbage_collection_rules import GarbageCollectionRules +from lakefs_client.model.error import Error +from pprint import pprint +# Defining the host is optional and defaults to http://localhost/api/v1 +# See configuration.py for a list of all supported configuration parameters. +configuration = lakefs_client.Configuration( + host = "http://localhost/api/v1" +) + +# The client must configure the authentication and authorization parameters +# in accordance with the API server security policy. +# Examples for each auth method are provided below, use the example that +# satisfies your auth use case. + +# Configure HTTP basic authorization: basic_auth +configuration = lakefs_client.Configuration( + username = 'YOUR_USERNAME', + password = 'YOUR_PASSWORD' +) + +# Configure API key authorization: cookie_auth +configuration.api_key['cookie_auth'] = 'YOUR_API_KEY' + +# Uncomment below to setup prefix (e.g. Bearer) for API key, if needed +# configuration.api_key_prefix['cookie_auth'] = 'Bearer' + +# Configure Bearer authorization (JWT): jwt_token +configuration = lakefs_client.Configuration( + access_token = 'YOUR_BEARER_TOKEN' +) + +# Enter a context with an instance of the API client +with lakefs_client.ApiClient(configuration) as api_client: + # Create an instance of the API class + api_instance = retention_api.RetentionApi(api_client) + repository = "repository_example" # str | + + # example passing only required values which don't have defaults set + try: + api_response = api_instance.get_garbage_collection_rules(repository) + pprint(api_response) + except lakefs_client.ApiException as e: + print("Exception when calling RetentionApi->get_garbage_collection_rules: %s\n" % e) +``` + + +### Parameters + +Name | Type | Description | Notes +------------- | ------------- | ------------- | ------------- + **repository** | **str**| | + +### Return type + +[**GarbageCollectionRules**](GarbageCollectionRules.md) + +### Authorization + +[basic_auth](../README.md#basic_auth), [cookie_auth](../README.md#cookie_auth), [jwt_token](../README.md#jwt_token) + +### HTTP request headers + + - **Content-Type**: Not defined + - **Accept**: application/json + + +### HTTP response details +| Status code | Description | Response headers | +|-------------|-------------|------------------| +**200** | gc rule list | - | +**401** | Unauthorized | - | +**0** | Internal Server Error | - | + +[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md) + # **prepare_garbage_collection_commits** > GarbageCollectionCommits prepare_garbage_collection_commits(repository) @@ -112,3 +203,100 @@ Name | Type | Description | Notes [[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md) +# **set_garbage_collection_rules** +> set_garbage_collection_rules(repository, garbage_collection_rules) + + + +### Example + +* Basic Authentication (basic_auth): +* Api Key Authentication (cookie_auth): +* Bearer (JWT) Authentication (jwt_token): +```python +import time +import lakefs_client +from lakefs_client.api import retention_api +from lakefs_client.model.garbage_collection_rules import GarbageCollectionRules +from lakefs_client.model.error import Error +from pprint import pprint +# Defining the host is optional and defaults to http://localhost/api/v1 +# See configuration.py for a list of all supported configuration parameters. +configuration = lakefs_client.Configuration( + host = "http://localhost/api/v1" +) + +# The client must configure the authentication and authorization parameters +# in accordance with the API server security policy. +# Examples for each auth method are provided below, use the example that +# satisfies your auth use case. + +# Configure HTTP basic authorization: basic_auth +configuration = lakefs_client.Configuration( + username = 'YOUR_USERNAME', + password = 'YOUR_PASSWORD' +) + +# Configure API key authorization: cookie_auth +configuration.api_key['cookie_auth'] = 'YOUR_API_KEY' + +# Uncomment below to setup prefix (e.g. Bearer) for API key, if needed +# configuration.api_key_prefix['cookie_auth'] = 'Bearer' + +# Configure Bearer authorization (JWT): jwt_token +configuration = lakefs_client.Configuration( + access_token = 'YOUR_BEARER_TOKEN' +) + +# Enter a context with an instance of the API client +with lakefs_client.ApiClient(configuration) as api_client: + # Create an instance of the API class + api_instance = retention_api.RetentionApi(api_client) + repository = "repository_example" # str | + garbage_collection_rules = GarbageCollectionRules( + default_retention_days=1, + branches=[ + GarbageCollectionRule( + branch_id="branch_id_example", + retention_days=1, + ), + ], + ) # GarbageCollectionRules | + + # example passing only required values which don't have defaults set + try: + api_instance.set_garbage_collection_rules(repository, garbage_collection_rules) + except lakefs_client.ApiException as e: + print("Exception when calling RetentionApi->set_garbage_collection_rules: %s\n" % e) +``` + + +### Parameters + +Name | Type | Description | Notes +------------- | ------------- | ------------- | ------------- + **repository** | **str**| | + **garbage_collection_rules** | [**GarbageCollectionRules**](GarbageCollectionRules.md)| | + +### Return type + +void (empty response body) + +### Authorization + +[basic_auth](../README.md#basic_auth), [cookie_auth](../README.md#cookie_auth), [jwt_token](../README.md#jwt_token) + +### HTTP request headers + + - **Content-Type**: application/json + - **Accept**: application/json + + +### HTTP response details +| Status code | Description | Response headers | +|-------------|-------------|------------------| +**204** | set garbage collection rules successfully | - | +**0** | Internal Server Error | - | + +[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md) + diff --git a/clients/python/lakefs_client/api/retention_api.py b/clients/python/lakefs_client/api/retention_api.py index 46ee2879933..d966e0cf2f5 100644 --- a/clients/python/lakefs_client/api/retention_api.py +++ b/clients/python/lakefs_client/api/retention_api.py @@ -25,6 +25,7 @@ from lakefs_client.model.error import Error from lakefs_client.model.garbage_collection_commits import GarbageCollectionCommits from lakefs_client.model.garbage_collection_prepare_request import GarbageCollectionPrepareRequest +from lakefs_client.model.garbage_collection_rules import GarbageCollectionRules class RetentionApi(object): @@ -39,6 +40,126 @@ def __init__(self, api_client=None): api_client = ApiClient() self.api_client = api_client + def __get_garbage_collection_rules( + self, + repository, + **kwargs + ): + """get_garbage_collection_rules # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + + >>> thread = api.get_garbage_collection_rules(repository, async_req=True) + >>> result = thread.get() + + Args: + repository (str): + + Keyword Args: + _return_http_data_only (bool): response data without head status + code and headers. Default is True. + _preload_content (bool): if False, the urllib3.HTTPResponse object + will be returned without reading/decoding response data. + Default is True. + _request_timeout (float/tuple): timeout setting for this request. If one + number provided, it will be total request timeout. It can also + be a pair (tuple) of (connection, read) timeouts. + Default is None. + _check_input_type (bool): specifies if type checking + should be done one the data sent to the server. + Default is True. + _check_return_type (bool): specifies if type checking + should be done one the data received from the server. + Default is True. + _host_index (int/None): specifies the index of the server + that we want to use. + Default is read from the configuration. + async_req (bool): execute request asynchronously + + Returns: + GarbageCollectionRules + If the method is called asynchronously, returns the request + thread. + """ + kwargs['async_req'] = kwargs.get( + 'async_req', False + ) + kwargs['_return_http_data_only'] = kwargs.get( + '_return_http_data_only', True + ) + kwargs['_preload_content'] = kwargs.get( + '_preload_content', True + ) + kwargs['_request_timeout'] = kwargs.get( + '_request_timeout', None + ) + kwargs['_check_input_type'] = kwargs.get( + '_check_input_type', True + ) + kwargs['_check_return_type'] = kwargs.get( + '_check_return_type', True + ) + kwargs['_host_index'] = kwargs.get('_host_index') + kwargs['repository'] = \ + repository + return self.call_with_http_info(**kwargs) + + self.get_garbage_collection_rules = _Endpoint( + settings={ + 'response_type': (GarbageCollectionRules,), + 'auth': [ + 'basic_auth', + 'cookie_auth', + 'jwt_token' + ], + 'endpoint_path': '/repositories/{repository}/gc/rules', + 'operation_id': 'get_garbage_collection_rules', + 'http_method': 'GET', + 'servers': None, + }, + params_map={ + 'all': [ + 'repository', + ], + 'required': [ + 'repository', + ], + 'nullable': [ + ], + 'enum': [ + ], + 'validation': [ + ] + }, + root_map={ + 'validations': { + }, + 'allowed_values': { + }, + 'openapi_types': { + 'repository': + (str,), + }, + 'attribute_map': { + 'repository': 'repository', + }, + 'location_map': { + 'repository': 'path', + }, + 'collection_format_map': { + } + }, + headers_map={ + 'accept': [ + 'application/json' + ], + 'content_type': [], + }, + api_client=api_client, + callable=__get_garbage_collection_rules + ) + def __prepare_garbage_collection_commits( self, repository, @@ -165,3 +286,134 @@ def __prepare_garbage_collection_commits( api_client=api_client, callable=__prepare_garbage_collection_commits ) + + def __set_garbage_collection_rules( + self, + repository, + garbage_collection_rules, + **kwargs + ): + """set_garbage_collection_rules # noqa: E501 + + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + + >>> thread = api.set_garbage_collection_rules(repository, garbage_collection_rules, async_req=True) + >>> result = thread.get() + + Args: + repository (str): + garbage_collection_rules (GarbageCollectionRules): + + Keyword Args: + _return_http_data_only (bool): response data without head status + code and headers. Default is True. + _preload_content (bool): if False, the urllib3.HTTPResponse object + will be returned without reading/decoding response data. + Default is True. + _request_timeout (float/tuple): timeout setting for this request. If one + number provided, it will be total request timeout. It can also + be a pair (tuple) of (connection, read) timeouts. + Default is None. + _check_input_type (bool): specifies if type checking + should be done one the data sent to the server. + Default is True. + _check_return_type (bool): specifies if type checking + should be done one the data received from the server. + Default is True. + _host_index (int/None): specifies the index of the server + that we want to use. + Default is read from the configuration. + async_req (bool): execute request asynchronously + + Returns: + None + If the method is called asynchronously, returns the request + thread. + """ + kwargs['async_req'] = kwargs.get( + 'async_req', False + ) + kwargs['_return_http_data_only'] = kwargs.get( + '_return_http_data_only', True + ) + kwargs['_preload_content'] = kwargs.get( + '_preload_content', True + ) + kwargs['_request_timeout'] = kwargs.get( + '_request_timeout', None + ) + kwargs['_check_input_type'] = kwargs.get( + '_check_input_type', True + ) + kwargs['_check_return_type'] = kwargs.get( + '_check_return_type', True + ) + kwargs['_host_index'] = kwargs.get('_host_index') + kwargs['repository'] = \ + repository + kwargs['garbage_collection_rules'] = \ + garbage_collection_rules + return self.call_with_http_info(**kwargs) + + self.set_garbage_collection_rules = _Endpoint( + settings={ + 'response_type': None, + 'auth': [ + 'basic_auth', + 'cookie_auth', + 'jwt_token' + ], + 'endpoint_path': '/repositories/{repository}/gc/rules', + 'operation_id': 'set_garbage_collection_rules', + 'http_method': 'POST', + 'servers': None, + }, + params_map={ + 'all': [ + 'repository', + 'garbage_collection_rules', + ], + 'required': [ + 'repository', + 'garbage_collection_rules', + ], + 'nullable': [ + ], + 'enum': [ + ], + 'validation': [ + ] + }, + root_map={ + 'validations': { + }, + 'allowed_values': { + }, + 'openapi_types': { + 'repository': + (str,), + 'garbage_collection_rules': + (GarbageCollectionRules,), + }, + 'attribute_map': { + 'repository': 'repository', + }, + 'location_map': { + 'repository': 'path', + 'garbage_collection_rules': 'body', + }, + 'collection_format_map': { + } + }, + headers_map={ + 'accept': [ + 'application/json' + ], + 'content_type': [ + 'application/json' + ] + }, + api_client=api_client, + callable=__set_garbage_collection_rules + ) diff --git a/clients/python/lakefs_client/model/garbage_collection_rule.py b/clients/python/lakefs_client/model/garbage_collection_rule.py new file mode 100644 index 00000000000..b491c1148c6 --- /dev/null +++ b/clients/python/lakefs_client/model/garbage_collection_rule.py @@ -0,0 +1,174 @@ +""" + lakeFS API + + lakeFS HTTP API # noqa: E501 + + The version of the OpenAPI document: 0.1.0 + Contact: services@treeverse.io + Generated by: https://openapi-generator.tech +""" + + +import re # noqa: F401 +import sys # noqa: F401 + +from lakefs_client.model_utils import ( # noqa: F401 + ApiTypeError, + ModelComposed, + ModelNormal, + ModelSimple, + cached_property, + change_keys_js_to_python, + convert_js_args_to_python_args, + date, + datetime, + file_type, + none_type, + validate_get_composed_info, +) + + +class GarbageCollectionRule(ModelNormal): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + + Attributes: + allowed_values (dict): The key is the tuple path to the attribute + and the for var_name this is (var_name,). The value is a dict + with a capitalized key describing the allowed value and an allowed + value. These dicts store the allowed enum values. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + discriminator_value_class_map (dict): A dict to go from the discriminator + variable value to the discriminator class name. + validations (dict): The key is the tuple path to the attribute + and the for var_name this is (var_name,). The value is a dict + that stores validations for max_length, min_length, max_items, + min_items, exclusive_maximum, inclusive_maximum, exclusive_minimum, + inclusive_minimum, and regex. + additional_properties_type (tuple): A tuple of classes accepted + as additional properties values. + """ + + allowed_values = { + } + + validations = { + } + + additional_properties_type = None + + _nullable = False + + @cached_property + def openapi_types(): + """ + This must be a method because a model may have properties that are + of type self, this must run after the class is loaded + + Returns + openapi_types (dict): The key is attribute name + and the value is attribute type. + """ + return { + 'branch_id': (str,), # noqa: E501 + 'retention_days': (int,), # noqa: E501 + } + + @cached_property + def discriminator(): + return None + + + attribute_map = { + 'branch_id': 'branch_id', # noqa: E501 + 'retention_days': 'retention_days', # noqa: E501 + } + + _composed_schemas = {} + + required_properties = set([ + '_data_store', + '_check_type', + '_spec_property_naming', + '_path_to_item', + '_configuration', + '_visited_composed_classes', + ]) + + @convert_js_args_to_python_args + def __init__(self, branch_id, retention_days, *args, **kwargs): # noqa: E501 + """GarbageCollectionRule - a model defined in OpenAPI + + Args: + branch_id (str): + retention_days (int): + + Keyword Args: + _check_type (bool): if True, values for parameters in openapi_types + will be type checked and a TypeError will be + raised if the wrong type is input. + Defaults to True + _path_to_item (tuple/list): This is a list of keys or values to + drill down to the model in received_data + when deserializing a response + _spec_property_naming (bool): True if the variable names in the input data + are serialized names, as specified in the OpenAPI document. + False if the variable names in the input data + are pythonic names, e.g. snake case (default) + _configuration (Configuration): the instance to use when + deserializing a file_type parameter. + If passed, type conversion is attempted + If omitted no type conversion is done. + _visited_composed_classes (tuple): This stores a tuple of + classes that we have traveled through so that + if we see that class again we will not use its + discriminator again. + When traveling through a discriminator, the + composed schema that is + is traveled through is added to this set. + For example if Animal has a discriminator + petType and we pass in "Dog", and the class Dog + allOf includes Animal, we move through Animal + once using the discriminator, and pick Dog. + Then in Dog, we will make an instance of the + Animal class but this time we won't travel + through its discriminator because we passed in + _visited_composed_classes = (Animal,) + """ + + _check_type = kwargs.pop('_check_type', True) + _spec_property_naming = kwargs.pop('_spec_property_naming', False) + _path_to_item = kwargs.pop('_path_to_item', ()) + _configuration = kwargs.pop('_configuration', None) + _visited_composed_classes = kwargs.pop('_visited_composed_classes', ()) + + if args: + raise ApiTypeError( + "Invalid positional arguments=%s passed to %s. Remove those invalid positional arguments." % ( + args, + self.__class__.__name__, + ), + path_to_item=_path_to_item, + valid_classes=(self.__class__,), + ) + + self._data_store = {} + self._check_type = _check_type + self._spec_property_naming = _spec_property_naming + self._path_to_item = _path_to_item + self._configuration = _configuration + self._visited_composed_classes = _visited_composed_classes + (self.__class__,) + + self.branch_id = branch_id + self.retention_days = retention_days + for var_name, var_value in kwargs.items(): + if var_name not in self.attribute_map and \ + self._configuration is not None and \ + self._configuration.discard_unknown_keys and \ + self.additional_properties_type is None: + # discard variable. + continue + setattr(self, var_name, var_value) diff --git a/clients/python/lakefs_client/model/garbage_collection_rules.py b/clients/python/lakefs_client/model/garbage_collection_rules.py new file mode 100644 index 00000000000..e4a8d0345e8 --- /dev/null +++ b/clients/python/lakefs_client/model/garbage_collection_rules.py @@ -0,0 +1,179 @@ +""" + lakeFS API + + lakeFS HTTP API # noqa: E501 + + The version of the OpenAPI document: 0.1.0 + Contact: services@treeverse.io + Generated by: https://openapi-generator.tech +""" + + +import re # noqa: F401 +import sys # noqa: F401 + +from lakefs_client.model_utils import ( # noqa: F401 + ApiTypeError, + ModelComposed, + ModelNormal, + ModelSimple, + cached_property, + change_keys_js_to_python, + convert_js_args_to_python_args, + date, + datetime, + file_type, + none_type, + validate_get_composed_info, +) + +def lazy_import(): + from lakefs_client.model.garbage_collection_rule import GarbageCollectionRule + globals()['GarbageCollectionRule'] = GarbageCollectionRule + + +class GarbageCollectionRules(ModelNormal): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + + Attributes: + allowed_values (dict): The key is the tuple path to the attribute + and the for var_name this is (var_name,). The value is a dict + with a capitalized key describing the allowed value and an allowed + value. These dicts store the allowed enum values. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + discriminator_value_class_map (dict): A dict to go from the discriminator + variable value to the discriminator class name. + validations (dict): The key is the tuple path to the attribute + and the for var_name this is (var_name,). The value is a dict + that stores validations for max_length, min_length, max_items, + min_items, exclusive_maximum, inclusive_maximum, exclusive_minimum, + inclusive_minimum, and regex. + additional_properties_type (tuple): A tuple of classes accepted + as additional properties values. + """ + + allowed_values = { + } + + validations = { + } + + additional_properties_type = None + + _nullable = False + + @cached_property + def openapi_types(): + """ + This must be a method because a model may have properties that are + of type self, this must run after the class is loaded + + Returns + openapi_types (dict): The key is attribute name + and the value is attribute type. + """ + lazy_import() + return { + 'default_retention_days': (int,), # noqa: E501 + 'branches': ([GarbageCollectionRule],), # noqa: E501 + } + + @cached_property + def discriminator(): + return None + + + attribute_map = { + 'default_retention_days': 'default_retention_days', # noqa: E501 + 'branches': 'branches', # noqa: E501 + } + + _composed_schemas = {} + + required_properties = set([ + '_data_store', + '_check_type', + '_spec_property_naming', + '_path_to_item', + '_configuration', + '_visited_composed_classes', + ]) + + @convert_js_args_to_python_args + def __init__(self, default_retention_days, branches, *args, **kwargs): # noqa: E501 + """GarbageCollectionRules - a model defined in OpenAPI + + Args: + default_retention_days (int): + branches ([GarbageCollectionRule]): + + Keyword Args: + _check_type (bool): if True, values for parameters in openapi_types + will be type checked and a TypeError will be + raised if the wrong type is input. + Defaults to True + _path_to_item (tuple/list): This is a list of keys or values to + drill down to the model in received_data + when deserializing a response + _spec_property_naming (bool): True if the variable names in the input data + are serialized names, as specified in the OpenAPI document. + False if the variable names in the input data + are pythonic names, e.g. snake case (default) + _configuration (Configuration): the instance to use when + deserializing a file_type parameter. + If passed, type conversion is attempted + If omitted no type conversion is done. + _visited_composed_classes (tuple): This stores a tuple of + classes that we have traveled through so that + if we see that class again we will not use its + discriminator again. + When traveling through a discriminator, the + composed schema that is + is traveled through is added to this set. + For example if Animal has a discriminator + petType and we pass in "Dog", and the class Dog + allOf includes Animal, we move through Animal + once using the discriminator, and pick Dog. + Then in Dog, we will make an instance of the + Animal class but this time we won't travel + through its discriminator because we passed in + _visited_composed_classes = (Animal,) + """ + + _check_type = kwargs.pop('_check_type', True) + _spec_property_naming = kwargs.pop('_spec_property_naming', False) + _path_to_item = kwargs.pop('_path_to_item', ()) + _configuration = kwargs.pop('_configuration', None) + _visited_composed_classes = kwargs.pop('_visited_composed_classes', ()) + + if args: + raise ApiTypeError( + "Invalid positional arguments=%s passed to %s. Remove those invalid positional arguments." % ( + args, + self.__class__.__name__, + ), + path_to_item=_path_to_item, + valid_classes=(self.__class__,), + ) + + self._data_store = {} + self._check_type = _check_type + self._spec_property_naming = _spec_property_naming + self._path_to_item = _path_to_item + self._configuration = _configuration + self._visited_composed_classes = _visited_composed_classes + (self.__class__,) + + self.default_retention_days = default_retention_days + self.branches = branches + for var_name, var_value in kwargs.items(): + if var_name not in self.attribute_map and \ + self._configuration is not None and \ + self._configuration.discard_unknown_keys and \ + self.additional_properties_type is None: + # discard variable. + continue + setattr(self, var_name, var_value) diff --git a/clients/python/lakefs_client/models/__init__.py b/clients/python/lakefs_client/models/__init__.py index b414c34ad80..e56835fd07d 100644 --- a/clients/python/lakefs_client/models/__init__.py +++ b/clients/python/lakefs_client/models/__init__.py @@ -26,6 +26,8 @@ from lakefs_client.model.error import Error from lakefs_client.model.garbage_collection_commits import GarbageCollectionCommits from lakefs_client.model.garbage_collection_prepare_request import GarbageCollectionPrepareRequest +from lakefs_client.model.garbage_collection_rule import GarbageCollectionRule +from lakefs_client.model.garbage_collection_rules import GarbageCollectionRules from lakefs_client.model.group import Group from lakefs_client.model.group_creation import GroupCreation from lakefs_client.model.group_list import GroupList diff --git a/clients/python/test/test_garbage_collection_rule.py b/clients/python/test/test_garbage_collection_rule.py new file mode 100644 index 00000000000..da4b70b86ca --- /dev/null +++ b/clients/python/test/test_garbage_collection_rule.py @@ -0,0 +1,36 @@ +""" + lakeFS API + + lakeFS HTTP API # noqa: E501 + + The version of the OpenAPI document: 0.1.0 + Contact: services@treeverse.io + Generated by: https://openapi-generator.tech +""" + + +import sys +import unittest + +import lakefs_client +from lakefs_client.model.garbage_collection_rule import GarbageCollectionRule + + +class TestGarbageCollectionRule(unittest.TestCase): + """GarbageCollectionRule unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def testGarbageCollectionRule(self): + """Test GarbageCollectionRule""" + # FIXME: construct object with mandatory attributes with example values + # model = GarbageCollectionRule() # noqa: E501 + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/clients/python/test/test_garbage_collection_rules.py b/clients/python/test/test_garbage_collection_rules.py new file mode 100644 index 00000000000..45eba4a7ad7 --- /dev/null +++ b/clients/python/test/test_garbage_collection_rules.py @@ -0,0 +1,38 @@ +""" + lakeFS API + + lakeFS HTTP API # noqa: E501 + + The version of the OpenAPI document: 0.1.0 + Contact: services@treeverse.io + Generated by: https://openapi-generator.tech +""" + + +import sys +import unittest + +import lakefs_client +from lakefs_client.model.garbage_collection_rule import GarbageCollectionRule +globals()['GarbageCollectionRule'] = GarbageCollectionRule +from lakefs_client.model.garbage_collection_rules import GarbageCollectionRules + + +class TestGarbageCollectionRules(unittest.TestCase): + """GarbageCollectionRules unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def testGarbageCollectionRules(self): + """Test GarbageCollectionRules""" + # FIXME: construct object with mandatory attributes with example values + # model = GarbageCollectionRules() # noqa: E501 + pass + + +if __name__ == '__main__': + unittest.main() From 759f5fe2336438aadfe75a0b9f9f53116026a52f Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Tue, 15 Jun 2021 16:05:25 +0300 Subject: [PATCH 17/24] gen --- clients/python/.openapi-generator/FILES | 2 -- 1 file changed, 2 deletions(-) diff --git a/clients/python/.openapi-generator/FILES b/clients/python/.openapi-generator/FILES index 4714a7b2a47..8be69338551 100644 --- a/clients/python/.openapi-generator/FILES +++ b/clients/python/.openapi-generator/FILES @@ -151,6 +151,4 @@ setup.cfg setup.py test-requirements.txt test/__init__.py -test/test_garbage_collection_rule.py -test/test_garbage_collection_rules.py tox.ini From b18aca6a7a7a8993cf1fa3c2110d7bd529f7a3ac Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Tue, 15 Jun 2021 17:38:27 +0300 Subject: [PATCH 18/24] use config --- pkg/catalog/catalog.go | 2 +- pkg/catalog/fake_graveler_test.go | 8 ++++++++ pkg/config/config.go | 4 ++++ pkg/graveler/graveler.go | 8 ++++---- pkg/graveler/retention/garbage_collection.go | 16 +++++++++------- 5 files changed, 26 insertions(+), 12 deletions(-) diff --git a/pkg/catalog/catalog.go b/pkg/catalog/catalog.go index 75c0044326b..b1eda1c53df 100644 --- a/pkg/catalog/catalog.go +++ b/pkg/catalog/catalog.go @@ -185,7 +185,7 @@ func New(ctx context.Context, cfg Config) (*Catalog, error) { refManager := ref.NewPGRefManager(executor, cfg.DB, ident.NewHexAddressProvider()) branchLocker := ref.NewBranchLocker(cfg.LockDB) - retentionRuleManager := retention.NewRuleManager(tierFSParams.Adapter) + retentionRuleManager := retention.NewRuleManager(tierFSParams.Adapter, cfg.Config.GetCommittedBlockStoragePrefix()) store := graveler.NewGraveler(branchLocker, committedManager, stagingManager, refManager, retentionRuleManager) return &Catalog{ diff --git a/pkg/catalog/fake_graveler_test.go b/pkg/catalog/fake_graveler_test.go index 9a726e6e1b4..9faeb186d7e 100644 --- a/pkg/catalog/fake_graveler_test.go +++ b/pkg/catalog/fake_graveler_test.go @@ -19,6 +19,14 @@ type FakeGraveler struct { hooks graveler.HooksHandler } +func (g *FakeGraveler) GetRetentionRules(ctx context.Context, repositoryID graveler.RepositoryID) (*graveler.RetentionRules, error) { + panic("implement me") +} + +func (g *FakeGraveler) SetRetentionRules(ctx context.Context, repositoryID graveler.RepositoryID, rules *graveler.RetentionRules) error { + panic("implement me") +} + func (g *FakeGraveler) GetExpiredCommits(ctx context.Context, repositoryID graveler.RepositoryID, previouslyExpiredCommits []graveler.CommitID) (expired []graveler.CommitID, active []graveler.CommitID, err error) { panic("implement me") } diff --git a/pkg/config/config.go b/pkg/config/config.go index 91064f2b9d0..7d6401fc45c 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -402,3 +402,7 @@ func (c *Config) GetCommittedParams() *committed.Params { func (c *Config) GetFixedInstallationID() string { return c.values.Installation.FixedID } + +func (c *Config) GetCommittedBlockStoragePrefix() string { + return c.values.Committed.BlockStoragePrefix +} diff --git a/pkg/graveler/graveler.go b/pkg/graveler/graveler.go index a6a1febdbc5..e17bf51e0e0 100644 --- a/pkg/graveler/graveler.go +++ b/pkg/graveler/graveler.go @@ -885,7 +885,7 @@ func (g *Graveler) GetRetentionRules(ctx context.Context, repositoryID Repositor if err != nil { return nil, err } - return g.retentionRuleManager.GetRules(ctx, fmt.Sprintf("%s/_lakefs/retention/rules/config.json", repo.StorageNamespace)) + return g.retentionRuleManager.GetRules(ctx, string(repo.StorageNamespace)) } func (g *Graveler) SetRetentionRules(ctx context.Context, repositoryID RepositoryID, rules *RetentionRules) error { @@ -894,7 +894,7 @@ func (g *Graveler) SetRetentionRules(ctx context.Context, repositoryID Repositor if err != nil { return err } - return g.retentionRuleManager.SaveRules(ctx, fmt.Sprintf("%s/_lakefs/retention/rules/config.json", repo.StorageNamespace), rules) + return g.retentionRuleManager.SaveRules(ctx, string(repo.StorageNamespace), rules) } func (g *Graveler) GetExpiredCommits(ctx context.Context, repositoryID RepositoryID, previouslyExpiredCommits []CommitID) (expired []CommitID, active []CommitID, err error) { @@ -2030,8 +2030,8 @@ func (c *commitValueIterator) Close() { } type RetentionRuleManager interface { - GetRules(ctx context.Context, rulesConfigurationPath string) (*RetentionRules, error) - SaveRules(ctx context.Context, rulesConfigurationPath string, rules *RetentionRules) error + GetRules(ctx context.Context, configurationFilePrefix string) (*RetentionRules, error) + SaveRules(ctx context.Context, configurationFilePrefix string, rules *RetentionRules) error } type RetentionRules struct { diff --git a/pkg/graveler/retention/garbage_collection.go b/pkg/graveler/retention/garbage_collection.go index a65dba20556..87d46d3a4a6 100644 --- a/pkg/graveler/retention/garbage_collection.go +++ b/pkg/graveler/retention/garbage_collection.go @@ -4,22 +4,24 @@ import ( "bytes" "context" "encoding/json" + "fmt" "github.com/treeverse/lakefs/pkg/block" "github.com/treeverse/lakefs/pkg/graveler" ) type RuleManager struct { - blockAdapter block.Adapter + configurationFileSuffix string + blockAdapter block.Adapter } -func NewRuleManager(blockAdapter block.Adapter) *RuleManager { - return &RuleManager{blockAdapter: blockAdapter} +func NewRuleManager(blockAdapter block.Adapter, blockStoragePrefix string) *RuleManager { + return &RuleManager{blockAdapter: blockAdapter, configurationFileSuffix: fmt.Sprintf("/%s/retention/rules/config.json", blockStoragePrefix)} } -func (m *RuleManager) GetRules(ctx context.Context, rulesConfigurationPath string) (*graveler.RetentionRules, error) { +func (m *RuleManager) GetRules(ctx context.Context, configurationFilePrefix string) (*graveler.RetentionRules, error) { reader, err := m.blockAdapter.Get(ctx, block.ObjectPointer{ - Identifier: rulesConfigurationPath, + Identifier: configurationFilePrefix + m.configurationFileSuffix, IdentifierType: block.IdentifierTypeFull, }, -1) if err != nil { @@ -36,13 +38,13 @@ func (m *RuleManager) GetRules(ctx context.Context, rulesConfigurationPath strin return &rules, nil } -func (m *RuleManager) SaveRules(ctx context.Context, rulesConfigurationPath string, rules *graveler.RetentionRules) error { +func (m *RuleManager) SaveRules(ctx context.Context, configurationFilePrefix string, rules *graveler.RetentionRules) error { rulesBytes, err := json.Marshal(rules) if err != nil { return err } return m.blockAdapter.Put(ctx, block.ObjectPointer{ - Identifier: rulesConfigurationPath, + Identifier: configurationFilePrefix + m.configurationFileSuffix, IdentifierType: block.IdentifierTypeFull, }, int64(len(rulesBytes)), bytes.NewReader(rulesBytes), block.PutOpts{}) } From 3acef2f6930b27f35d065db360412bed9d8a9351 Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Wed, 16 Jun 2021 17:18:08 +0300 Subject: [PATCH 19/24] CR fixes: fix abstraction leaks, naming changes --- api/swagger.yml | 15 +- clients/java/README.md | 2 +- clients/java/api/openapi.yaml | 18 +- .../docs/GarbageCollectionPrepareRequest.md | 2 +- .../docs/GarbageCollectionPrepareResponse.md | 13 ++ clients/java/docs/RetentionApi.md | 6 +- .../io/lakefs/clients/api/RetentionApi.java | 18 +- .../GarbageCollectionPrepareRequest.java | 30 ++-- .../GarbageCollectionPrepareResponse.java | 97 ++++++++++ .../GarbageCollectionPrepareResponseTest.java | 51 ++++++ clients/python/.openapi-generator/FILES | 4 +- clients/python/README.md | 2 +- .../docs/GarbageCollectionPrepareRequest.md | 2 +- .../docs/GarbageCollectionPrepareResponse.md | 11 ++ clients/python/docs/RetentionApi.md | 8 +- .../python/lakefs_client/api/retention_api.py | 6 +- .../garbage_collection_prepare_request.py | 6 +- .../garbage_collection_prepare_response.py | 170 ++++++++++++++++++ .../python/lakefs_client/models/__init__.py | 2 +- ...est_garbage_collection_prepare_response.py | 36 ++++ pkg/api/controller.go | 14 +- pkg/catalog/catalog.go | 72 +------- pkg/catalog/fake_graveler_test.go | 6 +- pkg/catalog/interface.go | 6 +- pkg/graveler/graveler.go | 82 +++++---- pkg/graveler/ref/manager.go | 5 - .../{ref => retention}/expired_commits.go | 17 +- .../expired_commits_test.go | 13 +- pkg/graveler/retention/garbage_collection.go | 91 ++++++++-- pkg/graveler/testutil/fakes.go | 4 - 30 files changed, 596 insertions(+), 213 deletions(-) create mode 100644 clients/java/docs/GarbageCollectionPrepareResponse.md create mode 100644 clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareResponse.java create mode 100644 clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionPrepareResponseTest.java create mode 100644 clients/python/docs/GarbageCollectionPrepareResponse.md create mode 100644 clients/python/lakefs_client/model/garbage_collection_prepare_response.py create mode 100644 clients/python/test/test_garbage_collection_prepare_response.py rename pkg/graveler/{ref => retention}/expired_commits.go (80%) rename pkg/graveler/{ref => retention}/expired_commits_test.go (92%) diff --git a/api/swagger.yml b/api/swagger.yml index 9bd04840bb9..e02d265e412 100644 --- a/api/swagger.yml +++ b/api/swagger.yml @@ -820,17 +820,18 @@ components: GarbageCollectionPrepareRequest: type: object properties: - previous_result_path: + previous_run_id: type: string - description: path to the result of a previous successful GC job + description: run id of a previous successful GC job - GarbageCollectionCommits: + GarbageCollectionPrepareResponse: type: object properties: - path: + run_id: type: string - description: path to a dataset of commits - + description: a unique identifier generated for this GC job + required: + - run_id GarbageCollectionRule: type: object properties: @@ -2929,7 +2930,7 @@ paths: content: application/json: schema: - $ref: "#/components/schemas/GarbageCollectionCommits" + $ref: "#/components/schemas/GarbageCollectionPrepareResponse" 401: $ref: "#/components/responses/Unauthorized" 404: diff --git a/clients/java/README.md b/clients/java/README.md index 05e1da90138..880b62eddeb 100644 --- a/clients/java/README.md +++ b/clients/java/README.md @@ -218,8 +218,8 @@ Class | Method | HTTP request | Description - [Diff](docs/Diff.md) - [DiffList](docs/DiffList.md) - [Error](docs/Error.md) - - [GarbageCollectionCommits](docs/GarbageCollectionCommits.md) - [GarbageCollectionPrepareRequest](docs/GarbageCollectionPrepareRequest.md) + - [GarbageCollectionPrepareResponse](docs/GarbageCollectionPrepareResponse.md) - [GarbageCollectionRule](docs/GarbageCollectionRule.md) - [GarbageCollectionRules](docs/GarbageCollectionRules.md) - [Group](docs/Group.md) diff --git a/clients/java/api/openapi.yaml b/clients/java/api/openapi.yaml index f9e9266b5df..d6edce2749f 100644 --- a/clients/java/api/openapi.yaml +++ b/clients/java/api/openapi.yaml @@ -3821,7 +3821,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/GarbageCollectionCommits' + $ref: '#/components/schemas/GarbageCollectionPrepareResponse' description: paths to commit dataset "401": content: @@ -5020,19 +5020,21 @@ components: type: object GarbageCollectionPrepareRequest: example: - previous_result_path: previous_result_path + previous_run_id: previous_run_id properties: - previous_result_path: - description: path to the result of a previous successful GC job + previous_run_id: + description: run id of a previous successful GC job type: string type: object - GarbageCollectionCommits: + GarbageCollectionPrepareResponse: example: - path: path + run_id: run_id properties: - path: - description: path to a dataset of commits + run_id: + description: a unique identifier generated for this GC job type: string + required: + - run_id type: object GarbageCollectionRule: example: diff --git a/clients/java/docs/GarbageCollectionPrepareRequest.md b/clients/java/docs/GarbageCollectionPrepareRequest.md index b2e0cbd9ea7..2b1d232528f 100644 --- a/clients/java/docs/GarbageCollectionPrepareRequest.md +++ b/clients/java/docs/GarbageCollectionPrepareRequest.md @@ -7,7 +7,7 @@ Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- -**previousResultPath** | **String** | path to the result of a previous successful GC job | [optional] +**previousRunId** | **String** | run id of a previous successful GC job | [optional] diff --git a/clients/java/docs/GarbageCollectionPrepareResponse.md b/clients/java/docs/GarbageCollectionPrepareResponse.md new file mode 100644 index 00000000000..4f10ed59b7e --- /dev/null +++ b/clients/java/docs/GarbageCollectionPrepareResponse.md @@ -0,0 +1,13 @@ + + +# GarbageCollectionPrepareResponse + + +## Properties + +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**runId** | **String** | a unique identifier generated for this GC job | + + + diff --git a/clients/java/docs/RetentionApi.md b/clients/java/docs/RetentionApi.md index c9b956ba3a1..8459d12e2d0 100644 --- a/clients/java/docs/RetentionApi.md +++ b/clients/java/docs/RetentionApi.md @@ -89,7 +89,7 @@ Name | Type | Description | Notes # **prepareGarbageCollectionCommits** -> GarbageCollectionCommits prepareGarbageCollectionCommits(repository, garbageCollectionPrepareRequest) +> GarbageCollectionPrepareResponse prepareGarbageCollectionCommits(repository, garbageCollectionPrepareRequest) save lists of active and expired commits for garbage collection @@ -127,7 +127,7 @@ public class Example { String repository = "repository_example"; // String | GarbageCollectionPrepareRequest garbageCollectionPrepareRequest = new GarbageCollectionPrepareRequest(); // GarbageCollectionPrepareRequest | try { - GarbageCollectionCommits result = apiInstance.prepareGarbageCollectionCommits(repository, garbageCollectionPrepareRequest); + GarbageCollectionPrepareResponse result = apiInstance.prepareGarbageCollectionCommits(repository, garbageCollectionPrepareRequest); System.out.println(result); } catch (ApiException e) { System.err.println("Exception when calling RetentionApi#prepareGarbageCollectionCommits"); @@ -149,7 +149,7 @@ Name | Type | Description | Notes ### Return type -[**GarbageCollectionCommits**](GarbageCollectionCommits.md) +[**GarbageCollectionPrepareResponse**](GarbageCollectionPrepareResponse.md) ### Authorization diff --git a/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java b/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java index 829436837cc..97e8113df2c 100644 --- a/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java +++ b/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java @@ -28,8 +28,8 @@ import io.lakefs.clients.api.model.Error; -import io.lakefs.clients.api.model.GarbageCollectionCommits; import io.lakefs.clients.api.model.GarbageCollectionPrepareRequest; +import io.lakefs.clients.api.model.GarbageCollectionPrepareResponse; import io.lakefs.clients.api.model.GarbageCollectionRules; import java.lang.reflect.Type; @@ -243,7 +243,7 @@ private okhttp3.Call prepareGarbageCollectionCommitsValidateBeforeCall(String re * * @param repository (required) * @param garbageCollectionPrepareRequest (optional) - * @return GarbageCollectionCommits + * @return GarbageCollectionPrepareResponse * @throws ApiException If fail to call the API, e.g. server error or cannot deserialize the response body * @http.response.details @@ -254,8 +254,8 @@ private okhttp3.Call prepareGarbageCollectionCommitsValidateBeforeCall(String re
0 Internal Server Error -
*/ - public GarbageCollectionCommits prepareGarbageCollectionCommits(String repository, GarbageCollectionPrepareRequest garbageCollectionPrepareRequest) throws ApiException { - ApiResponse localVarResp = prepareGarbageCollectionCommitsWithHttpInfo(repository, garbageCollectionPrepareRequest); + public GarbageCollectionPrepareResponse prepareGarbageCollectionCommits(String repository, GarbageCollectionPrepareRequest garbageCollectionPrepareRequest) throws ApiException { + ApiResponse localVarResp = prepareGarbageCollectionCommitsWithHttpInfo(repository, garbageCollectionPrepareRequest); return localVarResp.getData(); } @@ -264,7 +264,7 @@ public GarbageCollectionCommits prepareGarbageCollectionCommits(String repositor * * @param repository (required) * @param garbageCollectionPrepareRequest (optional) - * @return ApiResponse<GarbageCollectionCommits> + * @return ApiResponse<GarbageCollectionPrepareResponse> * @throws ApiException If fail to call the API, e.g. server error or cannot deserialize the response body * @http.response.details @@ -275,9 +275,9 @@ public GarbageCollectionCommits prepareGarbageCollectionCommits(String repositor
0 Internal Server Error -
*/ - public ApiResponse prepareGarbageCollectionCommitsWithHttpInfo(String repository, GarbageCollectionPrepareRequest garbageCollectionPrepareRequest) throws ApiException { + public ApiResponse prepareGarbageCollectionCommitsWithHttpInfo(String repository, GarbageCollectionPrepareRequest garbageCollectionPrepareRequest) throws ApiException { okhttp3.Call localVarCall = prepareGarbageCollectionCommitsValidateBeforeCall(repository, garbageCollectionPrepareRequest, null); - Type localVarReturnType = new TypeToken(){}.getType(); + Type localVarReturnType = new TypeToken(){}.getType(); return localVarApiClient.execute(localVarCall, localVarReturnType); } @@ -298,10 +298,10 @@ public ApiResponse prepareGarbageCollectionCommitsWith 0 Internal Server Error - */ - public okhttp3.Call prepareGarbageCollectionCommitsAsync(String repository, GarbageCollectionPrepareRequest garbageCollectionPrepareRequest, final ApiCallback _callback) throws ApiException { + public okhttp3.Call prepareGarbageCollectionCommitsAsync(String repository, GarbageCollectionPrepareRequest garbageCollectionPrepareRequest, final ApiCallback _callback) throws ApiException { okhttp3.Call localVarCall = prepareGarbageCollectionCommitsValidateBeforeCall(repository, garbageCollectionPrepareRequest, _callback); - Type localVarReturnType = new TypeToken(){}.getType(); + Type localVarReturnType = new TypeToken(){}.getType(); localVarApiClient.executeAsync(localVarCall, localVarReturnType, _callback); return localVarCall; } diff --git a/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareRequest.java b/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareRequest.java index 4dfe2e54455..a56fd16f7cb 100644 --- a/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareRequest.java +++ b/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareRequest.java @@ -29,31 +29,31 @@ */ @javax.annotation.Generated(value = "org.openapitools.codegen.languages.JavaClientCodegen") public class GarbageCollectionPrepareRequest { - public static final String SERIALIZED_NAME_PREVIOUS_RESULT_PATH = "previous_result_path"; - @SerializedName(SERIALIZED_NAME_PREVIOUS_RESULT_PATH) - private String previousResultPath; + public static final String SERIALIZED_NAME_PREVIOUS_RUN_ID = "previous_run_id"; + @SerializedName(SERIALIZED_NAME_PREVIOUS_RUN_ID) + private String previousRunId; - public GarbageCollectionPrepareRequest previousResultPath(String previousResultPath) { + public GarbageCollectionPrepareRequest previousRunId(String previousRunId) { - this.previousResultPath = previousResultPath; + this.previousRunId = previousRunId; return this; } /** - * path to the result of a previous successful GC job - * @return previousResultPath + * run id of a previous successful GC job + * @return previousRunId **/ @javax.annotation.Nullable - @ApiModelProperty(value = "path to the result of a previous successful GC job") + @ApiModelProperty(value = "run id of a previous successful GC job") - public String getPreviousResultPath() { - return previousResultPath; + public String getPreviousRunId() { + return previousRunId; } - public void setPreviousResultPath(String previousResultPath) { - this.previousResultPath = previousResultPath; + public void setPreviousRunId(String previousRunId) { + this.previousRunId = previousRunId; } @@ -66,19 +66,19 @@ public boolean equals(Object o) { return false; } GarbageCollectionPrepareRequest garbageCollectionPrepareRequest = (GarbageCollectionPrepareRequest) o; - return Objects.equals(this.previousResultPath, garbageCollectionPrepareRequest.previousResultPath); + return Objects.equals(this.previousRunId, garbageCollectionPrepareRequest.previousRunId); } @Override public int hashCode() { - return Objects.hash(previousResultPath); + return Objects.hash(previousRunId); } @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("class GarbageCollectionPrepareRequest {\n"); - sb.append(" previousResultPath: ").append(toIndentedString(previousResultPath)).append("\n"); + sb.append(" previousRunId: ").append(toIndentedString(previousRunId)).append("\n"); sb.append("}"); return sb.toString(); } diff --git a/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareResponse.java b/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareResponse.java new file mode 100644 index 00000000000..45418cf838d --- /dev/null +++ b/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareResponse.java @@ -0,0 +1,97 @@ +/* + * lakeFS API + * lakeFS HTTP API + * + * The version of the OpenAPI document: 0.1.0 + * + * + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + + +package io.lakefs.clients.api.model; + +import java.util.Objects; +import java.util.Arrays; +import com.google.gson.TypeAdapter; +import com.google.gson.annotations.JsonAdapter; +import com.google.gson.annotations.SerializedName; +import com.google.gson.stream.JsonReader; +import com.google.gson.stream.JsonWriter; +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import java.io.IOException; + +/** + * GarbageCollectionPrepareResponse + */ +@javax.annotation.Generated(value = "org.openapitools.codegen.languages.JavaClientCodegen") +public class GarbageCollectionPrepareResponse { + public static final String SERIALIZED_NAME_RUN_ID = "run_id"; + @SerializedName(SERIALIZED_NAME_RUN_ID) + private String runId; + + + public GarbageCollectionPrepareResponse runId(String runId) { + + this.runId = runId; + return this; + } + + /** + * a unique identifier generated for this GC job + * @return runId + **/ + @ApiModelProperty(required = true, value = "a unique identifier generated for this GC job") + + public String getRunId() { + return runId; + } + + + public void setRunId(String runId) { + this.runId = runId; + } + + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + GarbageCollectionPrepareResponse garbageCollectionPrepareResponse = (GarbageCollectionPrepareResponse) o; + return Objects.equals(this.runId, garbageCollectionPrepareResponse.runId); + } + + @Override + public int hashCode() { + return Objects.hash(runId); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("class GarbageCollectionPrepareResponse {\n"); + sb.append(" runId: ").append(toIndentedString(runId)).append("\n"); + sb.append("}"); + return sb.toString(); + } + + /** + * Convert the given object to string with each line indented by 4 spaces + * (except the first line). + */ + private String toIndentedString(Object o) { + if (o == null) { + return "null"; + } + return o.toString().replace("\n", "\n "); + } + +} + diff --git a/clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionPrepareResponseTest.java b/clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionPrepareResponseTest.java new file mode 100644 index 00000000000..89ea8c31a5c --- /dev/null +++ b/clients/java/src/test/java/io/lakefs/clients/api/model/GarbageCollectionPrepareResponseTest.java @@ -0,0 +1,51 @@ +/* + * lakeFS API + * lakeFS HTTP API + * + * The version of the OpenAPI document: 0.1.0 + * + * + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + + +package io.lakefs.clients.api.model; + +import com.google.gson.TypeAdapter; +import com.google.gson.annotations.JsonAdapter; +import com.google.gson.annotations.SerializedName; +import com.google.gson.stream.JsonReader; +import com.google.gson.stream.JsonWriter; +import io.swagger.annotations.ApiModel; +import io.swagger.annotations.ApiModelProperty; +import java.io.IOException; +import org.junit.Assert; +import org.junit.Ignore; +import org.junit.Test; + + +/** + * Model tests for GarbageCollectionPrepareResponse + */ +public class GarbageCollectionPrepareResponseTest { + private final GarbageCollectionPrepareResponse model = new GarbageCollectionPrepareResponse(); + + /** + * Model tests for GarbageCollectionPrepareResponse + */ + @Test + public void testGarbageCollectionPrepareResponse() { + // TODO: test GarbageCollectionPrepareResponse + } + + /** + * Test the property 'runId' + */ + @Test + public void runIdTest() { + // TODO: test runId + } + +} diff --git a/clients/python/.openapi-generator/FILES b/clients/python/.openapi-generator/FILES index 8be69338551..4c860f2148e 100644 --- a/clients/python/.openapi-generator/FILES +++ b/clients/python/.openapi-generator/FILES @@ -21,8 +21,8 @@ docs/CurrentUser.md docs/Diff.md docs/DiffList.md docs/Error.md -docs/GarbageCollectionCommits.md docs/GarbageCollectionPrepareRequest.md +docs/GarbageCollectionPrepareResponse.md docs/GarbageCollectionRule.md docs/GarbageCollectionRules.md docs/Group.md @@ -103,8 +103,8 @@ lakefs_client/model/current_user.py lakefs_client/model/diff.py lakefs_client/model/diff_list.py lakefs_client/model/error.py -lakefs_client/model/garbage_collection_commits.py lakefs_client/model/garbage_collection_prepare_request.py +lakefs_client/model/garbage_collection_prepare_response.py lakefs_client/model/garbage_collection_rule.py lakefs_client/model/garbage_collection_rules.py lakefs_client/model/group.py diff --git a/clients/python/README.md b/clients/python/README.md index c3b448c956b..48f234df38d 100644 --- a/clients/python/README.md +++ b/clients/python/README.md @@ -199,8 +199,8 @@ Class | Method | HTTP request | Description - [Diff](docs/Diff.md) - [DiffList](docs/DiffList.md) - [Error](docs/Error.md) - - [GarbageCollectionCommits](docs/GarbageCollectionCommits.md) - [GarbageCollectionPrepareRequest](docs/GarbageCollectionPrepareRequest.md) + - [GarbageCollectionPrepareResponse](docs/GarbageCollectionPrepareResponse.md) - [GarbageCollectionRule](docs/GarbageCollectionRule.md) - [GarbageCollectionRules](docs/GarbageCollectionRules.md) - [Group](docs/Group.md) diff --git a/clients/python/docs/GarbageCollectionPrepareRequest.md b/clients/python/docs/GarbageCollectionPrepareRequest.md index e1af9eace17..b6655591525 100644 --- a/clients/python/docs/GarbageCollectionPrepareRequest.md +++ b/clients/python/docs/GarbageCollectionPrepareRequest.md @@ -4,7 +4,7 @@ ## Properties Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- -**previous_result_path** | **str** | path to the result of a previous successful GC job | [optional] +**previous_run_id** | **str** | run id of a previous successful GC job | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/clients/python/docs/GarbageCollectionPrepareResponse.md b/clients/python/docs/GarbageCollectionPrepareResponse.md new file mode 100644 index 00000000000..4192853e6ca --- /dev/null +++ b/clients/python/docs/GarbageCollectionPrepareResponse.md @@ -0,0 +1,11 @@ +# GarbageCollectionPrepareResponse + + +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**run_id** | **str** | a unique identifier generated for this GC job | + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/clients/python/docs/RetentionApi.md b/clients/python/docs/RetentionApi.md index 612dd7d3820..2d2d4c98bf2 100644 --- a/clients/python/docs/RetentionApi.md +++ b/clients/python/docs/RetentionApi.md @@ -99,7 +99,7 @@ Name | Type | Description | Notes [[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md) # **prepare_garbage_collection_commits** -> GarbageCollectionCommits prepare_garbage_collection_commits(repository) +> GarbageCollectionPrepareResponse prepare_garbage_collection_commits(repository) save lists of active and expired commits for garbage collection @@ -112,8 +112,8 @@ save lists of active and expired commits for garbage collection import time import lakefs_client from lakefs_client.api import retention_api -from lakefs_client.model.garbage_collection_commits import GarbageCollectionCommits from lakefs_client.model.error import Error +from lakefs_client.model.garbage_collection_prepare_response import GarbageCollectionPrepareResponse from lakefs_client.model.garbage_collection_prepare_request import GarbageCollectionPrepareRequest from pprint import pprint # Defining the host is optional and defaults to http://localhost/api/v1 @@ -150,7 +150,7 @@ with lakefs_client.ApiClient(configuration) as api_client: api_instance = retention_api.RetentionApi(api_client) repository = "repository_example" # str | garbage_collection_prepare_request = GarbageCollectionPrepareRequest( - previous_result_path="previous_result_path_example", + previous_run_id="previous_run_id_example", ) # GarbageCollectionPrepareRequest | (optional) # example passing only required values which don't have defaults set @@ -181,7 +181,7 @@ Name | Type | Description | Notes ### Return type -[**GarbageCollectionCommits**](GarbageCollectionCommits.md) +[**GarbageCollectionPrepareResponse**](GarbageCollectionPrepareResponse.md) ### Authorization diff --git a/clients/python/lakefs_client/api/retention_api.py b/clients/python/lakefs_client/api/retention_api.py index d966e0cf2f5..6e619b36f0d 100644 --- a/clients/python/lakefs_client/api/retention_api.py +++ b/clients/python/lakefs_client/api/retention_api.py @@ -23,8 +23,8 @@ validate_and_convert_types ) from lakefs_client.model.error import Error -from lakefs_client.model.garbage_collection_commits import GarbageCollectionCommits from lakefs_client.model.garbage_collection_prepare_request import GarbageCollectionPrepareRequest +from lakefs_client.model.garbage_collection_prepare_response import GarbageCollectionPrepareResponse from lakefs_client.model.garbage_collection_rules import GarbageCollectionRules @@ -199,7 +199,7 @@ def __prepare_garbage_collection_commits( async_req (bool): execute request asynchronously Returns: - GarbageCollectionCommits + GarbageCollectionPrepareResponse If the method is called asynchronously, returns the request thread. """ @@ -228,7 +228,7 @@ def __prepare_garbage_collection_commits( self.prepare_garbage_collection_commits = _Endpoint( settings={ - 'response_type': (GarbageCollectionCommits,), + 'response_type': (GarbageCollectionPrepareResponse,), 'auth': [ 'basic_auth', 'cookie_auth', diff --git a/clients/python/lakefs_client/model/garbage_collection_prepare_request.py b/clients/python/lakefs_client/model/garbage_collection_prepare_request.py index a3a425c606b..9824a9a2755 100644 --- a/clients/python/lakefs_client/model/garbage_collection_prepare_request.py +++ b/clients/python/lakefs_client/model/garbage_collection_prepare_request.py @@ -73,7 +73,7 @@ def openapi_types(): and the value is attribute type. """ return { - 'previous_result_path': (str,), # noqa: E501 + 'previous_run_id': (str,), # noqa: E501 } @cached_property @@ -82,7 +82,7 @@ def discriminator(): attribute_map = { - 'previous_result_path': 'previous_result_path', # noqa: E501 + 'previous_run_id': 'previous_run_id', # noqa: E501 } _composed_schemas = {} @@ -131,7 +131,7 @@ def __init__(self, *args, **kwargs): # noqa: E501 Animal class but this time we won't travel through its discriminator because we passed in _visited_composed_classes = (Animal,) - previous_result_path (str): path to the result of a previous successful GC job. [optional] # noqa: E501 + previous_run_id (str): run id of a previous successful GC job. [optional] # noqa: E501 """ _check_type = kwargs.pop('_check_type', True) diff --git a/clients/python/lakefs_client/model/garbage_collection_prepare_response.py b/clients/python/lakefs_client/model/garbage_collection_prepare_response.py new file mode 100644 index 00000000000..5d0674cad32 --- /dev/null +++ b/clients/python/lakefs_client/model/garbage_collection_prepare_response.py @@ -0,0 +1,170 @@ +""" + lakeFS API + + lakeFS HTTP API # noqa: E501 + + The version of the OpenAPI document: 0.1.0 + Contact: services@treeverse.io + Generated by: https://openapi-generator.tech +""" + + +import re # noqa: F401 +import sys # noqa: F401 + +from lakefs_client.model_utils import ( # noqa: F401 + ApiTypeError, + ModelComposed, + ModelNormal, + ModelSimple, + cached_property, + change_keys_js_to_python, + convert_js_args_to_python_args, + date, + datetime, + file_type, + none_type, + validate_get_composed_info, +) + + +class GarbageCollectionPrepareResponse(ModelNormal): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + + Attributes: + allowed_values (dict): The key is the tuple path to the attribute + and the for var_name this is (var_name,). The value is a dict + with a capitalized key describing the allowed value and an allowed + value. These dicts store the allowed enum values. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + discriminator_value_class_map (dict): A dict to go from the discriminator + variable value to the discriminator class name. + validations (dict): The key is the tuple path to the attribute + and the for var_name this is (var_name,). The value is a dict + that stores validations for max_length, min_length, max_items, + min_items, exclusive_maximum, inclusive_maximum, exclusive_minimum, + inclusive_minimum, and regex. + additional_properties_type (tuple): A tuple of classes accepted + as additional properties values. + """ + + allowed_values = { + } + + validations = { + } + + additional_properties_type = None + + _nullable = False + + @cached_property + def openapi_types(): + """ + This must be a method because a model may have properties that are + of type self, this must run after the class is loaded + + Returns + openapi_types (dict): The key is attribute name + and the value is attribute type. + """ + return { + 'run_id': (str,), # noqa: E501 + } + + @cached_property + def discriminator(): + return None + + + attribute_map = { + 'run_id': 'run_id', # noqa: E501 + } + + _composed_schemas = {} + + required_properties = set([ + '_data_store', + '_check_type', + '_spec_property_naming', + '_path_to_item', + '_configuration', + '_visited_composed_classes', + ]) + + @convert_js_args_to_python_args + def __init__(self, run_id, *args, **kwargs): # noqa: E501 + """GarbageCollectionPrepareResponse - a model defined in OpenAPI + + Args: + run_id (str): a unique identifier generated for this GC job + + Keyword Args: + _check_type (bool): if True, values for parameters in openapi_types + will be type checked and a TypeError will be + raised if the wrong type is input. + Defaults to True + _path_to_item (tuple/list): This is a list of keys or values to + drill down to the model in received_data + when deserializing a response + _spec_property_naming (bool): True if the variable names in the input data + are serialized names, as specified in the OpenAPI document. + False if the variable names in the input data + are pythonic names, e.g. snake case (default) + _configuration (Configuration): the instance to use when + deserializing a file_type parameter. + If passed, type conversion is attempted + If omitted no type conversion is done. + _visited_composed_classes (tuple): This stores a tuple of + classes that we have traveled through so that + if we see that class again we will not use its + discriminator again. + When traveling through a discriminator, the + composed schema that is + is traveled through is added to this set. + For example if Animal has a discriminator + petType and we pass in "Dog", and the class Dog + allOf includes Animal, we move through Animal + once using the discriminator, and pick Dog. + Then in Dog, we will make an instance of the + Animal class but this time we won't travel + through its discriminator because we passed in + _visited_composed_classes = (Animal,) + """ + + _check_type = kwargs.pop('_check_type', True) + _spec_property_naming = kwargs.pop('_spec_property_naming', False) + _path_to_item = kwargs.pop('_path_to_item', ()) + _configuration = kwargs.pop('_configuration', None) + _visited_composed_classes = kwargs.pop('_visited_composed_classes', ()) + + if args: + raise ApiTypeError( + "Invalid positional arguments=%s passed to %s. Remove those invalid positional arguments." % ( + args, + self.__class__.__name__, + ), + path_to_item=_path_to_item, + valid_classes=(self.__class__,), + ) + + self._data_store = {} + self._check_type = _check_type + self._spec_property_naming = _spec_property_naming + self._path_to_item = _path_to_item + self._configuration = _configuration + self._visited_composed_classes = _visited_composed_classes + (self.__class__,) + + self.run_id = run_id + for var_name, var_value in kwargs.items(): + if var_name not in self.attribute_map and \ + self._configuration is not None and \ + self._configuration.discard_unknown_keys and \ + self.additional_properties_type is None: + # discard variable. + continue + setattr(self, var_name, var_value) diff --git a/clients/python/lakefs_client/models/__init__.py b/clients/python/lakefs_client/models/__init__.py index e56835fd07d..9b62b7150d9 100644 --- a/clients/python/lakefs_client/models/__init__.py +++ b/clients/python/lakefs_client/models/__init__.py @@ -24,8 +24,8 @@ from lakefs_client.model.diff import Diff from lakefs_client.model.diff_list import DiffList from lakefs_client.model.error import Error -from lakefs_client.model.garbage_collection_commits import GarbageCollectionCommits from lakefs_client.model.garbage_collection_prepare_request import GarbageCollectionPrepareRequest +from lakefs_client.model.garbage_collection_prepare_response import GarbageCollectionPrepareResponse from lakefs_client.model.garbage_collection_rule import GarbageCollectionRule from lakefs_client.model.garbage_collection_rules import GarbageCollectionRules from lakefs_client.model.group import Group diff --git a/clients/python/test/test_garbage_collection_prepare_response.py b/clients/python/test/test_garbage_collection_prepare_response.py new file mode 100644 index 00000000000..1755a0c993f --- /dev/null +++ b/clients/python/test/test_garbage_collection_prepare_response.py @@ -0,0 +1,36 @@ +""" + lakeFS API + + lakeFS HTTP API # noqa: E501 + + The version of the OpenAPI document: 0.1.0 + Contact: services@treeverse.io + Generated by: https://openapi-generator.tech +""" + + +import sys +import unittest + +import lakefs_client +from lakefs_client.model.garbage_collection_prepare_response import GarbageCollectionPrepareResponse + + +class TestGarbageCollectionPrepareResponse(unittest.TestCase): + """GarbageCollectionPrepareResponse unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def testGarbageCollectionPrepareResponse(self): + """Test GarbageCollectionPrepareResponse""" + # FIXME: construct object with mandatory attributes with example values + # model = GarbageCollectionPrepareResponse() # noqa: E501 + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/pkg/api/controller.go b/pkg/api/controller.go index 606c4435be0..34ad1df7b6b 100644 --- a/pkg/api/controller.go +++ b/pkg/api/controller.go @@ -1924,7 +1924,7 @@ func (c *Controller) GetGarbageCollectionRules(w http.ResponseWriter, r *http.Re return } ctx := r.Context() - rules, err := c.Catalog.GetRetentionRules(ctx, repository) + rules, err := c.Catalog.GetGarbageCollectionRules(ctx, repository) if handleAPIError(w, err) { return } @@ -1946,14 +1946,14 @@ func (c *Controller) SetGarbageCollectionRules(w http.ResponseWriter, r *http.Re return } ctx := r.Context() - rules := &graveler.RetentionRules{ + rules := &graveler.GarbageCollectionRules{ DefaultRetentionDays: body.DefaultRetentionDays, BranchRetentionDays: make(map[graveler.BranchID]int), } for _, rule := range body.Branches { rules.BranchRetentionDays[graveler.BranchID(rule.BranchId)] = rule.RetentionDays } - err := c.Catalog.SetRetentionRules(ctx, repository, rules) + err := c.Catalog.SetGarbageCollectionRules(ctx, repository, rules) if handleAPIError(w, err) { return } @@ -1971,15 +1971,11 @@ func (c *Controller) PrepareGarbageCollectionCommits(w http.ResponseWriter, r *h } ctx := r.Context() c.LogAction(ctx, "prepare_garbage_collection_commits") - previousResultPath := "" - if body.PreviousResultPath != nil { - previousResultPath = *body.PreviousResultPath - } - pth, err := c.Catalog.PrepareExpiredCommits(ctx, repository, previousResultPath) + runID, err := c.Catalog.PrepareExpiredCommits(ctx, repository, swag.StringValue(body.PreviousRunId)) if handleAPIError(w, err) { return } - writeResponse(w, http.StatusCreated, GarbageCollectionCommits{Path: swag.String(pth)}) + writeResponse(w, http.StatusCreated, GarbageCollectionPrepareResponse{RunId: runID}) } func (c *Controller) GetMetaRange(w http.ResponseWriter, r *http.Request, repository string, metaRange string) { diff --git a/pkg/catalog/catalog.go b/pkg/catalog/catalog.go index b1eda1c53df..22133ff87ba 100644 --- a/pkg/catalog/catalog.go +++ b/pkg/catalog/catalog.go @@ -4,15 +4,12 @@ import ( "context" "crypto" _ "crypto/sha256" - "encoding/csv" "errors" "fmt" "io" - "strconv" "strings" "github.com/cockroachdb/pebble" - "github.com/google/uuid" "github.com/hashicorp/go-multierror" "github.com/treeverse/lakefs/pkg/batch" "github.com/treeverse/lakefs/pkg/block" @@ -185,8 +182,8 @@ func New(ctx context.Context, cfg Config) (*Catalog, error) { refManager := ref.NewPGRefManager(executor, cfg.DB, ident.NewHexAddressProvider()) branchLocker := ref.NewBranchLocker(cfg.LockDB) - retentionRuleManager := retention.NewRuleManager(tierFSParams.Adapter, cfg.Config.GetCommittedBlockStoragePrefix()) - store := graveler.NewGraveler(branchLocker, committedManager, stagingManager, refManager, retentionRuleManager) + gcManager := retention.NewGarbageCollectionManager(tierFSParams.Adapter, refManager, refManager, cfg.Config.GetCommittedBlockStoragePrefix()) + store := graveler.NewGraveler(branchLocker, committedManager, stagingManager, refManager, gcManager) return &Catalog{ BlockAdapter: tierFSParams.Adapter, @@ -1180,75 +1177,22 @@ func (c *Catalog) GetRange(ctx context.Context, repositoryID, rangeID string) (g return c.Store.GetRange(ctx, graveler.RepositoryID(repositoryID), graveler.RangeID(rangeID)) } -func (c *Catalog) GetRetentionRules(ctx context.Context, repositoryID string) (*graveler.RetentionRules, error) { - return c.Store.GetRetentionRules(ctx, graveler.RepositoryID(repositoryID)) +func (c *Catalog) GetGarbageCollectionRules(ctx context.Context, repositoryID string) (*graveler.GarbageCollectionRules, error) { + return c.Store.GetGarbageCollectionRules(ctx, graveler.RepositoryID(repositoryID)) } -func (c *Catalog) SetRetentionRules(ctx context.Context, repositoryID string, rules *graveler.RetentionRules) error { - return c.Store.SetRetentionRules(ctx, graveler.RepositoryID(repositoryID), rules) +func (c *Catalog) SetGarbageCollectionRules(ctx context.Context, repositoryID string, rules *graveler.GarbageCollectionRules) error { + return c.Store.SetGarbageCollectionRules(ctx, graveler.RepositoryID(repositoryID), rules) } -func (c *Catalog) PrepareExpiredCommits(ctx context.Context, repository string, previousResultPath string) (string, error) { +func (c *Catalog) PrepareExpiredCommits(ctx context.Context, repository string, previousRunID string) (string, error) { repositoryID := graveler.RepositoryID(repository) if err := Validate([]ValidateArg{ {"repositoryID", repositoryID, ValidateRepositoryID}, }); err != nil { return "", err } - repo, err := c.Store.GetRepository(ctx, repositoryID) - if err != nil { - return "", err - } - previouslyExpiredCommits := make([]graveler.CommitID, 0) - if previousResultPath != "" { - previousRunReader, err := c.BlockAdapter.Get(ctx, block.ObjectPointer{ - StorageNamespace: string(repo.StorageNamespace), - Identifier: previousResultPath, - IdentifierType: block.IdentifierTypeRelative, - }, -1) - if err != nil { - return "", err - } - csvReader := csv.NewReader(previousRunReader) - previousCommits, err := csvReader.ReadAll() - if err != nil { - return "", err - } - for _, commitRow := range previousCommits { - previouslyExpiredCommits = append(previouslyExpiredCommits, graveler.CommitID(commitRow[1])) - } - } - activeCommits, expiredCommits, err := c.Store.GetExpiredCommits(ctx, repositoryID, previouslyExpiredCommits) - if err != nil { - return "", fmt.Errorf("preparing expired commits: %w", err) - } - b := &strings.Builder{} - csvWriter := csv.NewWriter(b) - for _, commitID := range expiredCommits { - err = csvWriter.Write([]string{string(commitID), strconv.FormatBool(true)}) - if err != nil { - return "", err - } - } - for _, commitID := range activeCommits { - err = csvWriter.Write([]string{string(commitID), strconv.FormatBool(false)}) - if err != nil { - return "", err - } - } - csvWriter.Flush() - commitsStr := b.String() - runID := uuid.New().String() - path := fmt.Sprintf("_lakefs/retention/commits/run_id=%s/commits.csv", runID) - err = c.BlockAdapter.Put(ctx, block.ObjectPointer{ - StorageNamespace: string(repo.StorageNamespace), - Identifier: path, - IdentifierType: block.IdentifierTypeRelative, - }, int64(len(commitsStr)), strings.NewReader(commitsStr), block.PutOpts{}) - if err != nil { - return "", err - } - return fmt.Sprintf("%s/%s", repo.StorageNamespace, path), nil + return c.Store.SaveGarbageCollectionCommits(ctx, repositoryID, previousRunID) } func (c *Catalog) Close() error { diff --git a/pkg/catalog/fake_graveler_test.go b/pkg/catalog/fake_graveler_test.go index 9faeb186d7e..91f93a8df8f 100644 --- a/pkg/catalog/fake_graveler_test.go +++ b/pkg/catalog/fake_graveler_test.go @@ -19,15 +19,15 @@ type FakeGraveler struct { hooks graveler.HooksHandler } -func (g *FakeGraveler) GetRetentionRules(ctx context.Context, repositoryID graveler.RepositoryID) (*graveler.RetentionRules, error) { +func (g *FakeGraveler) SaveGarbageCollectionCommits(ctx context.Context, repositoryID graveler.RepositoryID, previousRunID string) (runID string, err error) { panic("implement me") } -func (g *FakeGraveler) SetRetentionRules(ctx context.Context, repositoryID graveler.RepositoryID, rules *graveler.RetentionRules) error { +func (g *FakeGraveler) GetGarbageCollectionRules(ctx context.Context, repositoryID graveler.RepositoryID) (*graveler.GarbageCollectionRules, error) { panic("implement me") } -func (g *FakeGraveler) GetExpiredCommits(ctx context.Context, repositoryID graveler.RepositoryID, previouslyExpiredCommits []graveler.CommitID) (expired []graveler.CommitID, active []graveler.CommitID, err error) { +func (g *FakeGraveler) SetGarbageCollectionRules(ctx context.Context, repositoryID graveler.RepositoryID, rules *graveler.GarbageCollectionRules) error { panic("implement me") } diff --git a/pkg/catalog/interface.go b/pkg/catalog/interface.go index e9f9428615d..653916cdadb 100644 --- a/pkg/catalog/interface.go +++ b/pkg/catalog/interface.go @@ -116,9 +116,9 @@ type Interface interface { GetMetaRange(ctx context.Context, repositoryID, metaRangeID string) (graveler.MetaRangeInfo, error) GetRange(ctx context.Context, repositoryID, rangeID string) (graveler.RangeInfo, error) - GetRetentionRules(ctx context.Context, repositoryID string) (*graveler.RetentionRules, error) - SetRetentionRules(ctx context.Context, repositoryID string, rules *graveler.RetentionRules) error - PrepareExpiredCommits(ctx context.Context, repositoryID string, previousResultPath string) (string, error) + GetGarbageCollectionRules(ctx context.Context, repositoryID string) (*graveler.GarbageCollectionRules, error) + SetGarbageCollectionRules(ctx context.Context, repositoryID string, rules *graveler.GarbageCollectionRules) error + PrepareExpiredCommits(ctx context.Context, repositoryID string, previousRunID string) (string, error) io.Closer } diff --git a/pkg/graveler/graveler.go b/pkg/graveler/graveler.go index e17bf51e0e0..a750552c53b 100644 --- a/pkg/graveler/graveler.go +++ b/pkg/graveler/graveler.go @@ -371,13 +371,14 @@ type VersionController interface { // repositoryID. GetStagingToken(ctx context.Context, repositoryID RepositoryID, branchID BranchID) (*StagingToken, error) - GetRetentionRules(ctx context.Context, repositoryID RepositoryID) (*RetentionRules, error) + GetGarbageCollectionRules(ctx context.Context, repositoryID RepositoryID) (*GarbageCollectionRules, error) - SetRetentionRules(ctx context.Context, repositoryID RepositoryID, rules *RetentionRules) error + SetGarbageCollectionRules(ctx context.Context, repositoryID RepositoryID, rules *GarbageCollectionRules) error - // GetExpiredCommits returns the sets of active and expired commits, according to the branch rules for garbage collection. - // The commits in the given set previouslyExpiredCommits will not be scanned. - GetExpiredCommits(ctx context.Context, repositoryID RepositoryID, previouslyExpiredCommits []CommitID) (expired []CommitID, active []CommitID, err error) + // SaveGarbageCollectionCommits saves the sets of active and expired commits, according to the branch rules for garbage collection. + // Returns a run id which can later be used to retrieve the set of commits. + // If a previousRunID is specified, commits that were already expired in that run will not be scanned. + SaveGarbageCollectionCommits(ctx context.Context, repositoryID RepositoryID, previousRunID string) (runID string, err error) } // Plumbing includes commands for fiddling more directly with graveler implementation @@ -551,10 +552,6 @@ type RefManager interface { // FillGenerations computes and updates the generation field for all commits in a repository. // It should be used for restoring commits from a commit-dump which was performed before the field was introduced. FillGenerations(ctx context.Context, repositoryID RepositoryID) error - - // GetExpiredCommits returns the sets of active and expired commits, according to the branch rules for garbage collection. - // The commits in the given set previouslyExpiredCommits will not be scanned. - GetExpiredCommits(ctx context.Context, repositoryID RepositoryID, previouslyExpiredCommits []CommitID, rules *RetentionRules) (expired []CommitID, active []CommitID, err error) } // CommittedManager reads and applies committed snapshots @@ -668,24 +665,24 @@ func (id TagID) String() string { } type Graveler struct { - CommittedManager CommittedManager - StagingManager StagingManager - RefManager RefManager - branchLocker BranchLocker - hooks HooksHandler - retentionRuleManager RetentionRuleManager - log logging.Logger + CommittedManager CommittedManager + StagingManager StagingManager + RefManager RefManager + branchLocker BranchLocker + hooks HooksHandler + garbageCollectionManager GarbageCollectionManager + log logging.Logger } -func NewGraveler(branchLocker BranchLocker, committedManager CommittedManager, stagingManager StagingManager, refManager RefManager, retentionRuleManager RetentionRuleManager) *Graveler { +func NewGraveler(branchLocker BranchLocker, committedManager CommittedManager, stagingManager StagingManager, refManager RefManager, gcManager GarbageCollectionManager) *Graveler { return &Graveler{ - CommittedManager: committedManager, - StagingManager: stagingManager, - RefManager: refManager, - branchLocker: branchLocker, - hooks: &HooksNoOp{}, - retentionRuleManager: retentionRuleManager, - log: logging.Default().WithField("service_name", "graveler_graveler"), + CommittedManager: committedManager, + StagingManager: stagingManager, + RefManager: refManager, + branchLocker: branchLocker, + hooks: &HooksNoOp{}, + garbageCollectionManager: gcManager, + log: logging.Default().WithField("service_name", "graveler_graveler"), } } @@ -879,30 +876,36 @@ func (g *Graveler) GetStagingToken(ctx context.Context, repositoryID RepositoryI return &branch.StagingToken, nil } -func (g *Graveler) GetRetentionRules(ctx context.Context, repositoryID RepositoryID) (*RetentionRules, error) { - // TODO use "_lakefs" from configuration +func (g *Graveler) GetGarbageCollectionRules(ctx context.Context, repositoryID RepositoryID) (*GarbageCollectionRules, error) { repo, err := g.RefManager.GetRepository(ctx, repositoryID) if err != nil { return nil, err } - return g.retentionRuleManager.GetRules(ctx, string(repo.StorageNamespace)) + return g.garbageCollectionManager.GetRules(ctx, repo.StorageNamespace) } -func (g *Graveler) SetRetentionRules(ctx context.Context, repositoryID RepositoryID, rules *RetentionRules) error { - // TODO use "_lakefs" from configuration +func (g *Graveler) SetGarbageCollectionRules(ctx context.Context, repositoryID RepositoryID, rules *GarbageCollectionRules) error { repo, err := g.RefManager.GetRepository(ctx, repositoryID) if err != nil { return err } - return g.retentionRuleManager.SaveRules(ctx, string(repo.StorageNamespace), rules) + return g.garbageCollectionManager.SaveRules(ctx, repo.StorageNamespace, rules) } -func (g *Graveler) GetExpiredCommits(ctx context.Context, repositoryID RepositoryID, previouslyExpiredCommits []CommitID) (expired []CommitID, active []CommitID, err error) { - rules, err := g.GetRetentionRules(ctx, repositoryID) +func (g *Graveler) SaveGarbageCollectionCommits(ctx context.Context, repositoryID RepositoryID, previousRunID string) (runID string, err error) { + rules, err := g.GetGarbageCollectionRules(ctx, repositoryID) + if err != nil { + return "", fmt.Errorf("get gc rules: %w", err) + } + repo, err := g.RefManager.GetRepository(ctx, repositoryID) if err != nil { - return nil, nil, fmt.Errorf("get retention rules: %w", err) + return "", fmt.Errorf("get repository: %w", err) } - return g.RefManager.GetExpiredCommits(ctx, repositoryID, previouslyExpiredCommits, rules) + previouslyExpiredCommits, err := g.garbageCollectionManager.GetRunExpiredCommits(ctx, repo.StorageNamespace, previousRunID) + if err != nil { + return "", fmt.Errorf("get expired commits from previous run: %w", err) + } + return g.garbageCollectionManager.SaveGarbageCollectionCommits(ctx, repo.StorageNamespace, repositoryID, rules, previouslyExpiredCommits) } func (g *Graveler) Get(ctx context.Context, repositoryID RepositoryID, ref Ref, key Key) (*Value, error) { @@ -2029,12 +2032,15 @@ func (c *commitValueIterator) Close() { c.src.Close() } -type RetentionRuleManager interface { - GetRules(ctx context.Context, configurationFilePrefix string) (*RetentionRules, error) - SaveRules(ctx context.Context, configurationFilePrefix string, rules *RetentionRules) error +type GarbageCollectionManager interface { + GetRules(ctx context.Context, storageNamespace StorageNamespace) (*GarbageCollectionRules, error) + SaveRules(ctx context.Context, storageNamespace StorageNamespace, rules *GarbageCollectionRules) error + + SaveGarbageCollectionCommits(ctx context.Context, storageNamespace StorageNamespace, repositoryID RepositoryID, rules *GarbageCollectionRules, previouslyExpiredCommits []CommitID) (string, error) + GetRunExpiredCommits(ctx context.Context, storageNamespace StorageNamespace, runID string) ([]CommitID, error) } -type RetentionRules struct { +type GarbageCollectionRules struct { DefaultRetentionDays int `json:"default"` BranchRetentionDays map[BranchID]int `json:"branches"` } diff --git a/pkg/graveler/ref/manager.go b/pkg/graveler/ref/manager.go index ec1ec976d0c..24f0c27f166 100644 --- a/pkg/graveler/ref/manager.go +++ b/pkg/graveler/ref/manager.go @@ -410,8 +410,3 @@ func (m *Manager) FillGenerations(ctx context.Context, repositoryID graveler.Rep }) return err } - -func (m *Manager) GetExpiredCommits(ctx context.Context, repositoryID graveler.RepositoryID, previouslyExpiredCommits []graveler.CommitID, rules *graveler.RetentionRules) (expired []graveler.CommitID, active []graveler.CommitID, err error) { - finder := NewExpiredCommitsFinder(m, m, rules) - return finder.GetExpiredCommits(ctx, repositoryID, previouslyExpiredCommits) -} diff --git a/pkg/graveler/ref/expired_commits.go b/pkg/graveler/retention/expired_commits.go similarity index 80% rename from pkg/graveler/ref/expired_commits.go rename to pkg/graveler/retention/expired_commits.go index c1810f02bf6..87b3e700e94 100644 --- a/pkg/graveler/ref/expired_commits.go +++ b/pkg/graveler/retention/expired_commits.go @@ -1,4 +1,4 @@ -package ref +package retention import ( "context" @@ -7,21 +7,16 @@ import ( "github.com/treeverse/lakefs/pkg/graveler" ) -type ExpirationDateGetter interface { - Get(c *graveler.CommitRecord) time.Time -} - type ExpiredCommitsFinder struct { branchLister graveler.BranchLister commitGetter graveler.CommitGetter - rules *graveler.RetentionRules } -func NewExpiredCommitsFinder(branchLister graveler.BranchLister, commitGetter graveler.CommitGetter, rules *graveler.RetentionRules) *ExpiredCommitsFinder { - return &ExpiredCommitsFinder{branchLister: branchLister, commitGetter: commitGetter, rules: rules} +func NewExpiredCommitsFinder(branchLister graveler.BranchLister, commitGetter graveler.CommitGetter) *ExpiredCommitsFinder { + return &ExpiredCommitsFinder{branchLister: branchLister, commitGetter: commitGetter} } -func (e *ExpiredCommitsFinder) GetExpiredCommits(ctx context.Context, repositoryID graveler.RepositoryID, previouslyExpiredCommits []graveler.CommitID) (expired []graveler.CommitID, active []graveler.CommitID, err error) { +func (e *ExpiredCommitsFinder) GetExpiredCommits(ctx context.Context, repositoryID graveler.RepositoryID, rules *graveler.GarbageCollectionRules, previouslyExpiredCommits []graveler.CommitID) (expired []graveler.CommitID, active []graveler.CommitID, err error) { now := time.Now() processed := make(map[graveler.CommitID]time.Time) @@ -37,8 +32,8 @@ func (e *ExpiredCommitsFinder) GetExpiredCommits(ctx context.Context, repository expiredMap := make(map[graveler.CommitID]bool) for branchIterator.Next() { branchRecord := branchIterator.Value() - branchExpirationThreshold := now.AddDate(0, 0, -e.rules.DefaultRetentionDays) - if branchExpirationPeriod, ok := e.rules.BranchRetentionDays[branchRecord.BranchID]; ok { + branchExpirationThreshold := now.AddDate(0, 0, -rules.DefaultRetentionDays) + if branchExpirationPeriod, ok := rules.BranchRetentionDays[branchRecord.BranchID]; ok { branchExpirationThreshold = now.AddDate(0, 0, -branchExpirationPeriod) } commitID := branchRecord.CommitID diff --git a/pkg/graveler/ref/expired_commits_test.go b/pkg/graveler/retention/expired_commits_test.go similarity index 92% rename from pkg/graveler/ref/expired_commits_test.go rename to pkg/graveler/retention/expired_commits_test.go index 0de373d126e..a452677555a 100644 --- a/pkg/graveler/ref/expired_commits_test.go +++ b/pkg/graveler/retention/expired_commits_test.go @@ -1,4 +1,4 @@ -package ref +package retention import ( "context" @@ -150,17 +150,17 @@ func TestExpiredCommits(t *testing.T) { ctrl := gomock.NewController(t) refManagerMock := mock.NewMockRefManager(ctrl) ctx := context.Background() - retentionRules := &graveler.RetentionRules{DefaultRetentionDays: 0, BranchRetentionDays: make(map[graveler.BranchID]int)} + garbageCollectionRules := &graveler.GarbageCollectionRules{DefaultRetentionDays: 0, BranchRetentionDays: make(map[graveler.BranchID]int)} for head, retentionDays := range tst.headsRetentionDays { branchRecords = append(branchRecords, &graveler.BranchRecord{ BranchID: graveler.BranchID(head), Branch: &graveler.Branch{CommitID: graveler.CommitID(head)}, }) - retentionRules.BranchRetentionDays[graveler.BranchID(head)] = retentionDays + garbageCollectionRules.BranchRetentionDays[graveler.BranchID(head)] = retentionDays } sort.Slice(branchRecords, func(i, j int) bool { - // start with the branch with the strictest retention rules - return retentionRules.BranchRetentionDays[branchRecords[i].BranchID] > retentionRules.BranchRetentionDays[branchRecords[j].BranchID] + // start with the branch with the strictest gc rules + return garbageCollectionRules.BranchRetentionDays[branchRecords[i].BranchID] > garbageCollectionRules.BranchRetentionDays[branchRecords[j].BranchID] }) branchIterator := gtestutil.NewFakeBranchIterator(branchRecords) refManagerMock.EXPECT().ListBranches(ctx, graveler.RepositoryID("test")).Return(branchIterator, nil) @@ -176,13 +176,12 @@ func TestExpiredCommits(t *testing.T) { finder := ExpiredCommitsFinder{ commitGetter: refManagerMock, branchLister: refManagerMock, - rules: retentionRules, } previouslyExpiredCommitIDs := make([]graveler.CommitID, len(tst.previouslyExpired)) for i := range tst.previouslyExpired { previouslyExpiredCommitIDs[i] = graveler.CommitID(tst.previouslyExpired[i]) } - activeCommits, expiredCommits, err := finder.GetExpiredCommits(ctx, "test", previouslyExpiredCommitIDs) + activeCommits, expiredCommits, err := finder.GetExpiredCommits(ctx, "test", garbageCollectionRules, previouslyExpiredCommitIDs) if err != nil { t.Fatalf("failed to find expired commits: %v", err) } diff --git a/pkg/graveler/retention/garbage_collection.go b/pkg/graveler/retention/garbage_collection.go index 87d46d3a4a6..b99718e61fa 100644 --- a/pkg/graveler/retention/garbage_collection.go +++ b/pkg/graveler/retention/garbage_collection.go @@ -3,25 +3,39 @@ package retention import ( "bytes" "context" + "encoding/csv" "encoding/json" "fmt" + "strconv" + "strings" + "github.com/google/uuid" "github.com/treeverse/lakefs/pkg/block" "github.com/treeverse/lakefs/pkg/graveler" ) -type RuleManager struct { - configurationFileSuffix string - blockAdapter block.Adapter +const ( + configFileSuffixTemplate = "/%s/retention/gc/rules/config.json" + commitsFileSuffixTemplate = "/%s/retention/gc/commits/run_id=%s/commits.csv" +) + +type GarbageCollectionManager struct { + blockAdapter block.Adapter + expiredCommitsFinder *ExpiredCommitsFinder + committedBlockStoragePrefix string } -func NewRuleManager(blockAdapter block.Adapter, blockStoragePrefix string) *RuleManager { - return &RuleManager{blockAdapter: blockAdapter, configurationFileSuffix: fmt.Sprintf("/%s/retention/rules/config.json", blockStoragePrefix)} +func NewGarbageCollectionManager(blockAdapter block.Adapter, commitGetter graveler.CommitGetter, branchLister graveler.BranchLister, committedBlockStoragePrefix string) *GarbageCollectionManager { + return &GarbageCollectionManager{ + blockAdapter: blockAdapter, + expiredCommitsFinder: NewExpiredCommitsFinder(branchLister, commitGetter), + committedBlockStoragePrefix: committedBlockStoragePrefix, + } } -func (m *RuleManager) GetRules(ctx context.Context, configurationFilePrefix string) (*graveler.RetentionRules, error) { +func (m *GarbageCollectionManager) GetRules(ctx context.Context, storageNamespace graveler.StorageNamespace) (*graveler.GarbageCollectionRules, error) { reader, err := m.blockAdapter.Get(ctx, block.ObjectPointer{ - Identifier: configurationFilePrefix + m.configurationFileSuffix, + Identifier: string(storageNamespace) + fmt.Sprintf(configFileSuffixTemplate, m.committedBlockStoragePrefix), IdentifierType: block.IdentifierTypeFull, }, -1) if err != nil { @@ -30,7 +44,7 @@ func (m *RuleManager) GetRules(ctx context.Context, configurationFilePrefix stri defer func() { _ = reader.Close() }() - var rules graveler.RetentionRules + var rules graveler.GarbageCollectionRules err = json.NewDecoder(reader).Decode(&rules) if err != nil { return nil, err @@ -38,13 +52,70 @@ func (m *RuleManager) GetRules(ctx context.Context, configurationFilePrefix stri return &rules, nil } -func (m *RuleManager) SaveRules(ctx context.Context, configurationFilePrefix string, rules *graveler.RetentionRules) error { +func (m *GarbageCollectionManager) SaveRules(ctx context.Context, storageNamespace graveler.StorageNamespace, rules *graveler.GarbageCollectionRules) error { rulesBytes, err := json.Marshal(rules) if err != nil { return err } return m.blockAdapter.Put(ctx, block.ObjectPointer{ - Identifier: configurationFilePrefix + m.configurationFileSuffix, + Identifier: string(storageNamespace) + fmt.Sprintf(configFileSuffixTemplate, m.committedBlockStoragePrefix), IdentifierType: block.IdentifierTypeFull, }, int64(len(rulesBytes)), bytes.NewReader(rulesBytes), block.PutOpts{}) } + +func (m *GarbageCollectionManager) GetRunExpiredCommits(ctx context.Context, storageNamespace graveler.StorageNamespace, runID string) ([]graveler.CommitID, error) { + if runID == "" { + return nil, nil + } + previousRunReader, err := m.blockAdapter.Get(ctx, block.ObjectPointer{ + Identifier: string(storageNamespace) + fmt.Sprintf(commitsFileSuffixTemplate, m.committedBlockStoragePrefix, runID), + IdentifierType: block.IdentifierTypeFull, + }, -1) + if err != nil { + return nil, err + } + csvReader := csv.NewReader(previousRunReader) + previousCommits, err := csvReader.ReadAll() + if err != nil { + return nil, err + } + res := make([]graveler.CommitID, 0) + for _, commitRow := range previousCommits { + if commitRow[1] == "true" { + res = append(res, graveler.CommitID(commitRow[0])) + } + } + return res, nil +} + +func (m *GarbageCollectionManager) SaveGarbageCollectionCommits(ctx context.Context, storageNamespace graveler.StorageNamespace, repositoryID graveler.RepositoryID, rules *graveler.GarbageCollectionRules, previouslyExpiredCommits []graveler.CommitID) (string, error) { + active, expired, err := m.expiredCommitsFinder.GetExpiredCommits(ctx, repositoryID, rules, previouslyExpiredCommits) + if err != nil { + return "", fmt.Errorf("find expired commits: %w", err) + } + b := &strings.Builder{} + csvWriter := csv.NewWriter(b) + for _, commitID := range expired { + err := csvWriter.Write([]string{string(commitID), strconv.FormatBool(true)}) + if err != nil { + return "", err + } + } + for _, commitID := range active { + err := csvWriter.Write([]string{string(commitID), strconv.FormatBool(false)}) + if err != nil { + return "", err + } + } + csvWriter.Flush() + commitsStr := b.String() + runID := uuid.New().String() + err = m.blockAdapter.Put(ctx, block.ObjectPointer{ + Identifier: string(storageNamespace) + fmt.Sprintf(commitsFileSuffixTemplate, m.committedBlockStoragePrefix, runID), + IdentifierType: block.IdentifierTypeFull, + }, int64(len(commitsStr)), strings.NewReader(commitsStr), block.PutOpts{}) + if err != nil { + return "", err + } + return runID, nil +} diff --git a/pkg/graveler/testutil/fakes.go b/pkg/graveler/testutil/fakes.go index b6a73ebeb9b..ed9289bc899 100644 --- a/pkg/graveler/testutil/fakes.go +++ b/pkg/graveler/testutil/fakes.go @@ -204,10 +204,6 @@ type RefsFake struct { Commits map[graveler.CommitID]*graveler.Commit } -func (m *RefsFake) GetExpiredCommits(ctx context.Context, repositoryID graveler.RepositoryID, previouslyExpiredCommits []graveler.CommitID, rules *graveler.RetentionRules) (expired []graveler.CommitID, active []graveler.CommitID, err error) { - panic("implement me") -} - func (m *RefsFake) FillGenerations(ctx context.Context, repositoryID graveler.RepositoryID) error { panic("implement me") } From d02f0c736ea9ac27007236869f70d858c28f582d Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Sun, 20 Jun 2021 10:22:10 +0300 Subject: [PATCH 20/24] CR fixes --- api/swagger.yml | 9 +- clients/java/README.md | 2 +- clients/java/api/openapi.yaml | 20 ++- clients/java/docs/RetentionApi.md | 5 +- .../io/lakefs/clients/api/RetentionApi.java | 14 +- clients/python/README.md | 2 +- clients/python/docs/RetentionApi.md | 5 +- .../python/lakefs_client/api/retention_api.py | 2 +- pkg/api/controller.go | 16 +-- pkg/graveler/graveler.go | 8 +- pkg/graveler/graveler.pb.go | 123 +++++++++++++++--- pkg/graveler/graveler.proto | 5 + pkg/graveler/retention/expired_commits.go | 47 ++++--- .../retention/expired_commits_test.go | 38 +++--- pkg/graveler/retention/garbage_collection.go | 22 ++-- pkg/permissions/actions.go | 5 +- 16 files changed, 233 insertions(+), 90 deletions(-) diff --git a/api/swagger.yml b/api/swagger.yml index e02d265e412..ba549da785f 100644 --- a/api/swagger.yml +++ b/api/swagger.yml @@ -2888,6 +2888,8 @@ paths: $ref: "#/components/schemas/GarbageCollectionRules" 401: $ref: "#/components/responses/Unauthorized" + 404: + $ref: "#/components/responses/NotFound" default: $ref: "#/components/responses/ServerError" post: @@ -2903,11 +2905,14 @@ paths: responses: 204: description: set garbage collection rules successfully + 401: + $ref: "#/components/responses/Unauthorized" + 404: + $ref: "#/components/responses/NotFound" default: $ref: "#/components/responses/ServerError" - - /repositories/{repository}/gc/prepare: + /repositories/{repository}/gc/prepare_commits: parameters: - in: path name: repository diff --git a/clients/java/README.md b/clients/java/README.md index 880b62eddeb..f782225780e 100644 --- a/clients/java/README.md +++ b/clients/java/README.md @@ -191,7 +191,7 @@ Class | Method | HTTP request | Description *RepositoriesApi* | [**getRepository**](docs/RepositoriesApi.md#getRepository) | **GET** /repositories/{repository} | get repository *RepositoriesApi* | [**listRepositories**](docs/RepositoriesApi.md#listRepositories) | **GET** /repositories | list repositories *RetentionApi* | [**getGarbageCollectionRules**](docs/RetentionApi.md#getGarbageCollectionRules) | **GET** /repositories/{repository}/gc/rules | -*RetentionApi* | [**prepareGarbageCollectionCommits**](docs/RetentionApi.md#prepareGarbageCollectionCommits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection +*RetentionApi* | [**prepareGarbageCollectionCommits**](docs/RetentionApi.md#prepareGarbageCollectionCommits) | **POST** /repositories/{repository}/gc/prepare_commits | save lists of active and expired commits for garbage collection *RetentionApi* | [**setGarbageCollectionRules**](docs/RetentionApi.md#setGarbageCollectionRules) | **POST** /repositories/{repository}/gc/rules | *StagingApi* | [**getPhysicalAddress**](docs/StagingApi.md#getPhysicalAddress) | **GET** /repositories/{repository}/branches/{branch}/staging/backing | get a physical address and a return token to write object to underlying storage *StagingApi* | [**linkPhysicalAddress**](docs/StagingApi.md#linkPhysicalAddress) | **PUT** /repositories/{repository}/branches/{branch}/staging/backing | associate staging on this physical address with a path diff --git a/clients/java/api/openapi.yaml b/clients/java/api/openapi.yaml index d6edce2749f..e0271e394a6 100644 --- a/clients/java/api/openapi.yaml +++ b/clients/java/api/openapi.yaml @@ -3762,6 +3762,12 @@ paths: schema: $ref: '#/components/schemas/Error' description: Unauthorized + "404": + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + description: Resource Not Found default: content: application/json: @@ -3790,6 +3796,18 @@ paths: responses: "204": description: set garbage collection rules successfully + "401": + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + description: Unauthorized + "404": + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + description: Resource Not Found default: content: application/json: @@ -3800,7 +3818,7 @@ paths: - retention x-contentType: application/json x-accepts: application/json - /repositories/{repository}/gc/prepare: + /repositories/{repository}/gc/prepare_commits: post: operationId: prepareGarbageCollectionCommits parameters: diff --git a/clients/java/docs/RetentionApi.md b/clients/java/docs/RetentionApi.md index 8459d12e2d0..e8c4df78b1b 100644 --- a/clients/java/docs/RetentionApi.md +++ b/clients/java/docs/RetentionApi.md @@ -5,7 +5,7 @@ All URIs are relative to *http://localhost/api/v1* Method | HTTP request | Description ------------- | ------------- | ------------- [**getGarbageCollectionRules**](RetentionApi.md#getGarbageCollectionRules) | **GET** /repositories/{repository}/gc/rules | -[**prepareGarbageCollectionCommits**](RetentionApi.md#prepareGarbageCollectionCommits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection +[**prepareGarbageCollectionCommits**](RetentionApi.md#prepareGarbageCollectionCommits) | **POST** /repositories/{repository}/gc/prepare_commits | save lists of active and expired commits for garbage collection [**setGarbageCollectionRules**](RetentionApi.md#setGarbageCollectionRules) | **POST** /repositories/{repository}/gc/rules | @@ -85,6 +85,7 @@ Name | Type | Description | Notes |-------------|-------------|------------------| **200** | gc rule list | - | **401** | Unauthorized | - | +**404** | Resource Not Found | - | **0** | Internal Server Error | - | @@ -244,5 +245,7 @@ null (empty response body) | Status code | Description | Response headers | |-------------|-------------|------------------| **204** | set garbage collection rules successfully | - | +**401** | Unauthorized | - | +**404** | Resource Not Found | - | **0** | Internal Server Error | - | diff --git a/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java b/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java index 97e8113df2c..ddd86ad5de4 100644 --- a/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java +++ b/clients/java/src/main/java/io/lakefs/clients/api/RetentionApi.java @@ -68,6 +68,7 @@ public void setApiClient(ApiClient apiClient) { Status Code Description Response Headers 200 gc rule list - 401 Unauthorized - + 404 Resource Not Found - 0 Internal Server Error - */ @@ -127,6 +128,7 @@ private okhttp3.Call getGarbageCollectionRulesValidateBeforeCall(String reposito Status Code Description Response Headers 200 gc rule list - 401 Unauthorized - + 404 Resource Not Found - 0 Internal Server Error - */ @@ -146,6 +148,7 @@ public GarbageCollectionRules getGarbageCollectionRules(String repository) throw Status Code Description Response Headers 200 gc rule list - 401 Unauthorized - + 404 Resource Not Found - 0 Internal Server Error - */ @@ -167,6 +170,7 @@ public ApiResponse getGarbageCollectionRulesWithHttpInfo Status Code Description Response Headers 200 gc rule list - 401 Unauthorized - + 404 Resource Not Found - 0 Internal Server Error - */ @@ -197,7 +201,7 @@ public okhttp3.Call prepareGarbageCollectionCommitsCall(String repository, Garba Object localVarPostBody = garbageCollectionPrepareRequest; // create path and map variables - String localVarPath = "/repositories/{repository}/gc/prepare" + String localVarPath = "/repositories/{repository}/gc/prepare_commits" .replaceAll("\\{" + "repository" + "\\}", localVarApiClient.escapeString(repository.toString())); List localVarQueryParams = new ArrayList(); @@ -316,6 +320,8 @@ public okhttp3.Call prepareGarbageCollectionCommitsAsync(String repository, Garb + +
Status Code Description Response Headers
204 set garbage collection rules successfully -
401 Unauthorized -
404 Resource Not Found -
0 Internal Server Error -
*/ @@ -379,6 +385,8 @@ private okhttp3.Call setGarbageCollectionRulesValidateBeforeCall(String reposito + +
Status Code Description Response Headers
204 set garbage collection rules successfully -
401 Unauthorized -
404 Resource Not Found -
0 Internal Server Error -
*/ @@ -397,6 +405,8 @@ public void setGarbageCollectionRules(String repository, GarbageCollectionRules + +
Status Code Description Response Headers
204 set garbage collection rules successfully -
401 Unauthorized -
404 Resource Not Found -
0 Internal Server Error -
*/ @@ -417,6 +427,8 @@ public ApiResponse setGarbageCollectionRulesWithHttpInfo(String repository + +
Status Code Description Response Headers
204 set garbage collection rules successfully -
401 Unauthorized -
404 Resource Not Found -
0 Internal Server Error -
*/ diff --git a/clients/python/README.md b/clients/python/README.md index 48f234df38d..f22b1689e5a 100644 --- a/clients/python/README.md +++ b/clients/python/README.md @@ -172,7 +172,7 @@ Class | Method | HTTP request | Description *RepositoriesApi* | [**get_repository**](docs/RepositoriesApi.md#get_repository) | **GET** /repositories/{repository} | get repository *RepositoriesApi* | [**list_repositories**](docs/RepositoriesApi.md#list_repositories) | **GET** /repositories | list repositories *RetentionApi* | [**get_garbage_collection_rules**](docs/RetentionApi.md#get_garbage_collection_rules) | **GET** /repositories/{repository}/gc/rules | -*RetentionApi* | [**prepare_garbage_collection_commits**](docs/RetentionApi.md#prepare_garbage_collection_commits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection +*RetentionApi* | [**prepare_garbage_collection_commits**](docs/RetentionApi.md#prepare_garbage_collection_commits) | **POST** /repositories/{repository}/gc/prepare_commits | save lists of active and expired commits for garbage collection *RetentionApi* | [**set_garbage_collection_rules**](docs/RetentionApi.md#set_garbage_collection_rules) | **POST** /repositories/{repository}/gc/rules | *StagingApi* | [**get_physical_address**](docs/StagingApi.md#get_physical_address) | **GET** /repositories/{repository}/branches/{branch}/staging/backing | get a physical address and a return token to write object to underlying storage *StagingApi* | [**link_physical_address**](docs/StagingApi.md#link_physical_address) | **PUT** /repositories/{repository}/branches/{branch}/staging/backing | associate staging on this physical address with a path diff --git a/clients/python/docs/RetentionApi.md b/clients/python/docs/RetentionApi.md index 2d2d4c98bf2..f8b70af9b22 100644 --- a/clients/python/docs/RetentionApi.md +++ b/clients/python/docs/RetentionApi.md @@ -5,7 +5,7 @@ All URIs are relative to *http://localhost/api/v1* Method | HTTP request | Description ------------- | ------------- | ------------- [**get_garbage_collection_rules**](RetentionApi.md#get_garbage_collection_rules) | **GET** /repositories/{repository}/gc/rules | -[**prepare_garbage_collection_commits**](RetentionApi.md#prepare_garbage_collection_commits) | **POST** /repositories/{repository}/gc/prepare | save lists of active and expired commits for garbage collection +[**prepare_garbage_collection_commits**](RetentionApi.md#prepare_garbage_collection_commits) | **POST** /repositories/{repository}/gc/prepare_commits | save lists of active and expired commits for garbage collection [**set_garbage_collection_rules**](RetentionApi.md#set_garbage_collection_rules) | **POST** /repositories/{repository}/gc/rules | @@ -94,6 +94,7 @@ Name | Type | Description | Notes |-------------|-------------|------------------| **200** | gc rule list | - | **401** | Unauthorized | - | +**404** | Resource Not Found | - | **0** | Internal Server Error | - | [[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md) @@ -296,6 +297,8 @@ void (empty response body) | Status code | Description | Response headers | |-------------|-------------|------------------| **204** | set garbage collection rules successfully | - | +**401** | Unauthorized | - | +**404** | Resource Not Found | - | **0** | Internal Server Error | - | [[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md) diff --git a/clients/python/lakefs_client/api/retention_api.py b/clients/python/lakefs_client/api/retention_api.py index 6e619b36f0d..f5e76f2129f 100644 --- a/clients/python/lakefs_client/api/retention_api.py +++ b/clients/python/lakefs_client/api/retention_api.py @@ -234,7 +234,7 @@ def __prepare_garbage_collection_commits( 'cookie_auth', 'jwt_token' ], - 'endpoint_path': '/repositories/{repository}/gc/prepare', + 'endpoint_path': '/repositories/{repository}/gc/prepare_commits', 'operation_id': 'prepare_garbage_collection_commits', 'http_method': 'POST', 'servers': None, diff --git a/pkg/api/controller.go b/pkg/api/controller.go index 34ad1df7b6b..bb346d2f9ab 100644 --- a/pkg/api/controller.go +++ b/pkg/api/controller.go @@ -1917,7 +1917,7 @@ func (c *Controller) GetCommit(w http.ResponseWriter, r *http.Request, repositor func (c *Controller) GetGarbageCollectionRules(w http.ResponseWriter, r *http.Request, repository string) { if !c.authorize(w, r, []permissions.Permission{ { - Action: permissions.GetGarbageCollectionRules, + Action: permissions.GetGarbageCollectionRulesAction, Resource: permissions.RepoArn(repository), }, }) { @@ -1929,9 +1929,9 @@ func (c *Controller) GetGarbageCollectionRules(w http.ResponseWriter, r *http.Re return } resp := GarbageCollectionRules{} - resp.DefaultRetentionDays = rules.DefaultRetentionDays + resp.DefaultRetentionDays = int(rules.DefaultRetentionDays) for branchID, retentionDays := range rules.BranchRetentionDays { - resp.Branches = append(resp.Branches, GarbageCollectionRule{BranchId: branchID.String(), RetentionDays: retentionDays}) + resp.Branches = append(resp.Branches, GarbageCollectionRule{BranchId: branchID, RetentionDays: int(retentionDays)}) } writeResponse(w, http.StatusOK, resp) } @@ -1939,7 +1939,7 @@ func (c *Controller) GetGarbageCollectionRules(w http.ResponseWriter, r *http.Re func (c *Controller) SetGarbageCollectionRules(w http.ResponseWriter, r *http.Request, body SetGarbageCollectionRulesJSONRequestBody, repository string) { if !c.authorize(w, r, []permissions.Permission{ { - Action: permissions.GetGarbageCollectionRules, + Action: permissions.SetGarbageCollectionRulesAction, Resource: permissions.RepoArn(repository), }, }) { @@ -1947,11 +1947,11 @@ func (c *Controller) SetGarbageCollectionRules(w http.ResponseWriter, r *http.Re } ctx := r.Context() rules := &graveler.GarbageCollectionRules{ - DefaultRetentionDays: body.DefaultRetentionDays, - BranchRetentionDays: make(map[graveler.BranchID]int), + DefaultRetentionDays: int32(body.DefaultRetentionDays), + BranchRetentionDays: make(map[string]int32), } for _, rule := range body.Branches { - rules.BranchRetentionDays[graveler.BranchID(rule.BranchId)] = rule.RetentionDays + rules.BranchRetentionDays[rule.BranchId] = int32(rule.RetentionDays) } err := c.Catalog.SetGarbageCollectionRules(ctx, repository, rules) if handleAPIError(w, err) { @@ -1963,7 +1963,7 @@ func (c *Controller) SetGarbageCollectionRules(w http.ResponseWriter, r *http.Re func (c *Controller) PrepareGarbageCollectionCommits(w http.ResponseWriter, r *http.Request, body PrepareGarbageCollectionCommitsJSONRequestBody, repository string) { if !c.authorize(w, r, []permissions.Permission{ { - Action: permissions.ListObjectsAction, + Action: permissions.PrepareGarbageCollectionCommitsAction, Resource: permissions.RepoArn(repository), }, }) { diff --git a/pkg/graveler/graveler.go b/pkg/graveler/graveler.go index a750552c53b..932e2cb3b03 100644 --- a/pkg/graveler/graveler.go +++ b/pkg/graveler/graveler.go @@ -377,7 +377,8 @@ type VersionController interface { // SaveGarbageCollectionCommits saves the sets of active and expired commits, according to the branch rules for garbage collection. // Returns a run id which can later be used to retrieve the set of commits. - // If a previousRunID is specified, commits that were already expired in that run will not be scanned. + // If a previousRunID is specified, commits that were already expired and their ancestors will not be considered as expired/active. + // Note: Ancestors of previously expired commits may still be considered if they can be reached from a non-expired commit. SaveGarbageCollectionCommits(ctx context.Context, repositoryID RepositoryID, previousRunID string) (runID string, err error) } @@ -2039,8 +2040,3 @@ type GarbageCollectionManager interface { SaveGarbageCollectionCommits(ctx context.Context, storageNamespace StorageNamespace, repositoryID RepositoryID, rules *GarbageCollectionRules, previouslyExpiredCommits []CommitID) (string, error) GetRunExpiredCommits(ctx context.Context, storageNamespace StorageNamespace, runID string) ([]CommitID, error) } - -type GarbageCollectionRules struct { - DefaultRetentionDays int `json:"default"` - BranchRetentionDays map[BranchID]int `json:"branches"` -} diff --git a/pkg/graveler/graveler.pb.go b/pkg/graveler/graveler.pb.go index 31373872b5e..c98f1d93940 100644 --- a/pkg/graveler/graveler.pb.go +++ b/pkg/graveler/graveler.pb.go @@ -247,6 +247,61 @@ func (x *CommitData) GetGeneration() int32 { return 0 } +type GarbageCollectionRules struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + DefaultRetentionDays int32 `protobuf:"varint,1,opt,name=default_retention_days,json=defaultRetentionDays,proto3" json:"default_retention_days,omitempty"` + BranchRetentionDays map[string]int32 `protobuf:"bytes,2,rep,name=branch_retention_days,json=branchRetentionDays,proto3" json:"branch_retention_days,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"varint,2,opt,name=value,proto3"` +} + +func (x *GarbageCollectionRules) Reset() { + *x = GarbageCollectionRules{} + if protoimpl.UnsafeEnabled { + mi := &file_graveler_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GarbageCollectionRules) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GarbageCollectionRules) ProtoMessage() {} + +func (x *GarbageCollectionRules) ProtoReflect() protoreflect.Message { + mi := &file_graveler_proto_msgTypes[3] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GarbageCollectionRules.ProtoReflect.Descriptor instead. +func (*GarbageCollectionRules) Descriptor() ([]byte, []int) { + return file_graveler_proto_rawDescGZIP(), []int{3} +} + +func (x *GarbageCollectionRules) GetDefaultRetentionDays() int32 { + if x != nil { + return x.DefaultRetentionDays + } + return 0 +} + +func (x *GarbageCollectionRules) GetBranchRetentionDays() map[string]int32 { + if x != nil { + return x.BranchRetentionDays + } + return nil +} + var File_graveler_proto protoreflect.FileDescriptor var file_graveler_proto_rawDesc = []byte{ @@ -288,10 +343,27 @@ var file_graveler_proto_rawDesc = []byte{ 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, - 0x02, 0x38, 0x01, 0x42, 0x26, 0x5a, 0x24, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, - 0x6d, 0x2f, 0x74, 0x72, 0x65, 0x65, 0x76, 0x65, 0x72, 0x73, 0x65, 0x2f, 0x6c, 0x61, 0x6b, 0x65, - 0x66, 0x73, 0x2f, 0x67, 0x72, 0x61, 0x76, 0x65, 0x6c, 0x65, 0x72, 0x62, 0x06, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x33, + 0x02, 0x38, 0x01, 0x22, 0x9a, 0x02, 0x0a, 0x16, 0x47, 0x61, 0x72, 0x62, 0x61, 0x67, 0x65, 0x43, + 0x6f, 0x6c, 0x6c, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x75, 0x6c, 0x65, 0x73, 0x12, 0x34, + 0x0a, 0x16, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x5f, 0x72, 0x65, 0x74, 0x65, 0x6e, 0x74, + 0x69, 0x6f, 0x6e, 0x5f, 0x64, 0x61, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x14, + 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x52, 0x65, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x6f, 0x6e, + 0x44, 0x61, 0x79, 0x73, 0x12, 0x81, 0x01, 0x0a, 0x15, 0x62, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x5f, + 0x72, 0x65, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x64, 0x61, 0x79, 0x73, 0x18, 0x02, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x4d, 0x2e, 0x69, 0x6f, 0x2e, 0x74, 0x72, 0x65, 0x65, 0x76, 0x65, + 0x72, 0x73, 0x65, 0x2e, 0x6c, 0x61, 0x6b, 0x65, 0x66, 0x73, 0x2e, 0x67, 0x72, 0x61, 0x76, 0x65, + 0x6c, 0x65, 0x72, 0x2e, 0x47, 0x61, 0x72, 0x62, 0x61, 0x67, 0x65, 0x43, 0x6f, 0x6c, 0x6c, 0x65, + 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x75, 0x6c, 0x65, 0x73, 0x2e, 0x42, 0x72, 0x61, 0x6e, 0x63, + 0x68, 0x52, 0x65, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x6f, 0x6e, 0x44, 0x61, 0x79, 0x73, 0x45, 0x6e, + 0x74, 0x72, 0x79, 0x52, 0x13, 0x62, 0x72, 0x61, 0x6e, 0x63, 0x68, 0x52, 0x65, 0x74, 0x65, 0x6e, + 0x74, 0x69, 0x6f, 0x6e, 0x44, 0x61, 0x79, 0x73, 0x1a, 0x46, 0x0a, 0x18, 0x42, 0x72, 0x61, 0x6e, + 0x63, 0x68, 0x52, 0x65, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x6f, 0x6e, 0x44, 0x61, 0x79, 0x73, 0x45, + 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, + 0x42, 0x26, 0x5a, 0x24, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x74, + 0x72, 0x65, 0x65, 0x76, 0x65, 0x72, 0x73, 0x65, 0x2f, 0x6c, 0x61, 0x6b, 0x65, 0x66, 0x73, 0x2f, + 0x67, 0x72, 0x61, 0x76, 0x65, 0x6c, 0x65, 0x72, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -306,22 +378,25 @@ func file_graveler_proto_rawDescGZIP() []byte { return file_graveler_proto_rawDescData } -var file_graveler_proto_msgTypes = make([]protoimpl.MessageInfo, 4) +var file_graveler_proto_msgTypes = make([]protoimpl.MessageInfo, 6) var file_graveler_proto_goTypes = []interface{}{ - (*BranchData)(nil), // 0: io.treeverse.lakefs.graveler.BranchData - (*TagData)(nil), // 1: io.treeverse.lakefs.graveler.TagData - (*CommitData)(nil), // 2: io.treeverse.lakefs.graveler.CommitData - nil, // 3: io.treeverse.lakefs.graveler.CommitData.MetadataEntry - (*timestamppb.Timestamp)(nil), // 4: google.protobuf.Timestamp + (*BranchData)(nil), // 0: io.treeverse.lakefs.graveler.BranchData + (*TagData)(nil), // 1: io.treeverse.lakefs.graveler.TagData + (*CommitData)(nil), // 2: io.treeverse.lakefs.graveler.CommitData + (*GarbageCollectionRules)(nil), // 3: io.treeverse.lakefs.graveler.GarbageCollectionRules + nil, // 4: io.treeverse.lakefs.graveler.CommitData.MetadataEntry + nil, // 5: io.treeverse.lakefs.graveler.GarbageCollectionRules.BranchRetentionDaysEntry + (*timestamppb.Timestamp)(nil), // 6: google.protobuf.Timestamp } var file_graveler_proto_depIdxs = []int32{ - 4, // 0: io.treeverse.lakefs.graveler.CommitData.creation_date:type_name -> google.protobuf.Timestamp - 3, // 1: io.treeverse.lakefs.graveler.CommitData.metadata:type_name -> io.treeverse.lakefs.graveler.CommitData.MetadataEntry - 2, // [2:2] is the sub-list for method output_type - 2, // [2:2] is the sub-list for method input_type - 2, // [2:2] is the sub-list for extension type_name - 2, // [2:2] is the sub-list for extension extendee - 0, // [0:2] is the sub-list for field type_name + 6, // 0: io.treeverse.lakefs.graveler.CommitData.creation_date:type_name -> google.protobuf.Timestamp + 4, // 1: io.treeverse.lakefs.graveler.CommitData.metadata:type_name -> io.treeverse.lakefs.graveler.CommitData.MetadataEntry + 5, // 2: io.treeverse.lakefs.graveler.GarbageCollectionRules.branch_retention_days:type_name -> io.treeverse.lakefs.graveler.GarbageCollectionRules.BranchRetentionDaysEntry + 3, // [3:3] is the sub-list for method output_type + 3, // [3:3] is the sub-list for method input_type + 3, // [3:3] is the sub-list for extension type_name + 3, // [3:3] is the sub-list for extension extendee + 0, // [0:3] is the sub-list for field type_name } func init() { file_graveler_proto_init() } @@ -366,6 +441,18 @@ func file_graveler_proto_init() { return nil } } + file_graveler_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GarbageCollectionRules); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } } type x struct{} out := protoimpl.TypeBuilder{ @@ -373,7 +460,7 @@ func file_graveler_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_graveler_proto_rawDesc, NumEnums: 0, - NumMessages: 4, + NumMessages: 6, NumExtensions: 0, NumServices: 0, }, diff --git a/pkg/graveler/graveler.proto b/pkg/graveler/graveler.proto index eb846b1db5b..156239d6564 100644 --- a/pkg/graveler/graveler.proto +++ b/pkg/graveler/graveler.proto @@ -26,3 +26,8 @@ message CommitData { int32 version = 8; int32 generation = 9; } + +message GarbageCollectionRules { + int32 default_retention_days = 1; + map branch_retention_days = 2; +} diff --git a/pkg/graveler/retention/expired_commits.go b/pkg/graveler/retention/expired_commits.go index 87b3e700e94..cae231dfe4f 100644 --- a/pkg/graveler/retention/expired_commits.go +++ b/pkg/graveler/retention/expired_commits.go @@ -7,46 +7,53 @@ import ( "github.com/treeverse/lakefs/pkg/graveler" ) -type ExpiredCommitsFinder struct { +var empty struct{} + +type GarbageCollectionCommitsFinder struct { branchLister graveler.BranchLister commitGetter graveler.CommitGetter } -func NewExpiredCommitsFinder(branchLister graveler.BranchLister, commitGetter graveler.CommitGetter) *ExpiredCommitsFinder { - return &ExpiredCommitsFinder{branchLister: branchLister, commitGetter: commitGetter} +type GarbageCollectionCommits struct { + expired []graveler.CommitID + active []graveler.CommitID +} + +func NewGarbageCollectionCommitsFinder(branchLister graveler.BranchLister, commitGetter graveler.CommitGetter) *GarbageCollectionCommitsFinder { + return &GarbageCollectionCommitsFinder{branchLister: branchLister, commitGetter: commitGetter} } -func (e *ExpiredCommitsFinder) GetExpiredCommits(ctx context.Context, repositoryID graveler.RepositoryID, rules *graveler.GarbageCollectionRules, previouslyExpiredCommits []graveler.CommitID) (expired []graveler.CommitID, active []graveler.CommitID, err error) { +func (e *GarbageCollectionCommitsFinder) GetGarbageCollectionCommits(ctx context.Context, repositoryID graveler.RepositoryID, rules *graveler.GarbageCollectionRules, previouslyExpired []graveler.CommitID) (*GarbageCollectionCommits, error) { now := time.Now() processed := make(map[graveler.CommitID]time.Time) branchIterator, err := e.branchLister.ListBranches(ctx, repositoryID) if err != nil { - return nil, nil, err + return nil, err } previouslyExpiredMap := make(map[graveler.CommitID]bool) - for _, commitID := range previouslyExpiredCommits { + for _, commitID := range previouslyExpired { previouslyExpiredMap[commitID] = true } - activeMap := make(map[graveler.CommitID]bool) - expiredMap := make(map[graveler.CommitID]bool) + activeMap := make(map[graveler.CommitID]struct{}) + expiredMap := make(map[graveler.CommitID]struct{}) for branchIterator.Next() { branchRecord := branchIterator.Value() - branchExpirationThreshold := now.AddDate(0, 0, -rules.DefaultRetentionDays) - if branchExpirationPeriod, ok := rules.BranchRetentionDays[branchRecord.BranchID]; ok { - branchExpirationThreshold = now.AddDate(0, 0, -branchExpirationPeriod) + branchExpirationThreshold := now.AddDate(0, 0, int(-rules.DefaultRetentionDays)) + if branchExpirationPeriod, ok := rules.BranchRetentionDays[string(branchRecord.BranchID)]; ok { + branchExpirationThreshold = now.AddDate(0, 0, int(-branchExpirationPeriod)) } commitID := branchRecord.CommitID previousCommit, err := e.commitGetter.GetCommit(ctx, repositoryID, commitID) if err != nil { - return nil, nil, err + return nil, err } if previousThreshold, ok := processed[commitID]; ok && !previousThreshold.After(branchExpirationThreshold) { // was already here with earlier expiration date continue } processed[commitID] = branchExpirationThreshold - activeMap[commitID] = true + activeMap[commitID] = empty for len(previousCommit.Parents) > 0 { commitID = previousCommit.Parents[0] if _, ok := previouslyExpiredMap[commitID]; ok { @@ -58,25 +65,25 @@ func (e *ExpiredCommitsFinder) GetExpiredCommits(ctx context.Context, repository break } if previousCommit.CreationDate.After(branchExpirationThreshold) { - activeMap[commitID] = true + activeMap[commitID] = empty delete(expiredMap, commitID) - } else if active, ok := activeMap[commitID]; !ok || !active { - expiredMap[commitID] = true + } else if _, ok := activeMap[commitID]; !ok { + expiredMap[commitID] = empty } previousCommit, err = e.commitGetter.GetCommit(ctx, repositoryID, commitID) if err != nil { - return nil, nil, err + return nil, err } processed[commitID] = branchExpirationThreshold } } if branchIterator.Err() != nil { - return nil, nil, branchIterator.Err() + return nil, branchIterator.Err() } - return toArray(activeMap), toArray(expiredMap), nil + return &GarbageCollectionCommits{active: commitSetToArray(activeMap), expired: commitSetToArray(expiredMap)}, nil } -func toArray(commitMap map[graveler.CommitID]bool) []graveler.CommitID { +func commitSetToArray(commitMap map[graveler.CommitID]struct{}) []graveler.CommitID { res := make([]graveler.CommitID, 0, len(commitMap)) for commitID := range commitMap { res = append(res, commitID) diff --git a/pkg/graveler/retention/expired_commits_test.go b/pkg/graveler/retention/expired_commits_test.go index a452677555a..d6dfb277145 100644 --- a/pkg/graveler/retention/expired_commits_test.go +++ b/pkg/graveler/retention/expired_commits_test.go @@ -36,7 +36,7 @@ func newCommitSet(commitIDs []string) map[graveler.CommitID]bool { func TestExpiredCommits(t *testing.T) { tests := map[string]struct { commits map[string]testCommit - headsRetentionDays map[string]int + headsRetentionDays map[string]int32 previouslyExpired []string expectedActiveIDs []string expectedExpiredIDs []string @@ -50,7 +50,7 @@ func TestExpiredCommits(t *testing.T) { "e": newTestCommit(5, "b"), "f": newTestCommit(1, "e"), }, - headsRetentionDays: map[string]int{"f": 7, "d": 3}, + headsRetentionDays: map[string]int32{"f": 7, "d": 3}, expectedActiveIDs: []string{"b", "d", "e", "f"}, expectedExpiredIDs: []string{"a", "c"}, }, @@ -61,7 +61,7 @@ func TestExpiredCommits(t *testing.T) { "c": newTestCommit(20, "a"), "d": newTestCommit(20, "a"), }, - headsRetentionDays: map[string]int{"b": 7, "c": 7, "d": 7}, + headsRetentionDays: map[string]int32{"b": 7, "c": 7, "d": 7}, expectedActiveIDs: []string{"b", "c", "d"}, expectedExpiredIDs: []string{"a"}, }, @@ -73,7 +73,7 @@ func TestExpiredCommits(t *testing.T) { "d": newTestCommit(2, "b"), "e": newTestCommit(1, "b"), }, - headsRetentionDays: map[string]int{"d": 15, "e": 7, "c": 2}, + headsRetentionDays: map[string]int32{"d": 15, "e": 7, "c": 2}, expectedActiveIDs: []string{"a", "b", "c", "d", "e"}, expectedExpiredIDs: []string{}, }, @@ -84,7 +84,7 @@ func TestExpiredCommits(t *testing.T) { "c": newTestCommit(7), "d": newTestCommit(6, "c", "a"), }, - headsRetentionDays: map[string]int{"b": 3, "d": 10}, + headsRetentionDays: map[string]int32{"b": 3, "d": 10}, expectedActiveIDs: []string{"b", "c", "d"}, expectedExpiredIDs: []string{"a"}, }, @@ -97,7 +97,7 @@ func TestExpiredCommits(t *testing.T) { "e": newTestCommit(5, "b"), "f": newTestCommit(1, "e"), }, - headsRetentionDays: map[string]int{"f": 7, "d": 3}, + headsRetentionDays: map[string]int32{"f": 7, "d": 3}, previouslyExpired: []string{"a"}, expectedActiveIDs: []string{"b", "d", "e", "f"}, expectedExpiredIDs: []string{"c"}, @@ -115,7 +115,7 @@ func TestExpiredCommits(t *testing.T) { "b": newTestCommit(5, "a"), "c": newTestCommit(5, "a"), }, - headsRetentionDays: map[string]int{"c": 7, "b": 7}, + headsRetentionDays: map[string]int32{"c": 7, "b": 7}, previouslyExpired: []string{"e1", "e2", "e3", "e4", "e5", "e6", "e7"}, expectedActiveIDs: []string{"a", "b", "c"}, expectedExpiredIDs: []string{}, @@ -137,7 +137,7 @@ func TestExpiredCommits(t *testing.T) { "g": newTestCommit(4, "b", "e"), "h": newTestCommit(3, "a", "f"), }, - headsRetentionDays: map[string]int{"h": 14, "g": 7, "f": 7}, + headsRetentionDays: map[string]int32{"h": 14, "g": 7, "f": 7}, previouslyExpired: []string{}, expectedActiveIDs: []string{"h", "a", "b", "c", "f", "g"}, expectedExpiredIDs: []string{"e", "d"}, @@ -150,17 +150,17 @@ func TestExpiredCommits(t *testing.T) { ctrl := gomock.NewController(t) refManagerMock := mock.NewMockRefManager(ctrl) ctx := context.Background() - garbageCollectionRules := &graveler.GarbageCollectionRules{DefaultRetentionDays: 0, BranchRetentionDays: make(map[graveler.BranchID]int)} + garbageCollectionRules := &graveler.GarbageCollectionRules{DefaultRetentionDays: 0, BranchRetentionDays: make(map[string]int32)} for head, retentionDays := range tst.headsRetentionDays { branchRecords = append(branchRecords, &graveler.BranchRecord{ BranchID: graveler.BranchID(head), Branch: &graveler.Branch{CommitID: graveler.CommitID(head)}, }) - garbageCollectionRules.BranchRetentionDays[graveler.BranchID(head)] = retentionDays + garbageCollectionRules.BranchRetentionDays[head] = retentionDays } sort.Slice(branchRecords, func(i, j int) bool { // start with the branch with the strictest gc rules - return garbageCollectionRules.BranchRetentionDays[branchRecords[i].BranchID] > garbageCollectionRules.BranchRetentionDays[branchRecords[j].BranchID] + return garbageCollectionRules.BranchRetentionDays[string(branchRecords[i].BranchID)] > garbageCollectionRules.BranchRetentionDays[string(branchRecords[j].BranchID)] }) branchIterator := gtestutil.NewFakeBranchIterator(branchRecords) refManagerMock.EXPECT().ListBranches(ctx, graveler.RepositoryID("test")).Return(branchIterator, nil) @@ -173,7 +173,7 @@ func TestExpiredCommits(t *testing.T) { refManagerMock.EXPECT().GetCommit(ctx, graveler.RepositoryID("test"), id).Return(commitMap[id], nil).Times(1) } } - finder := ExpiredCommitsFinder{ + finder := GarbageCollectionCommitsFinder{ commitGetter: refManagerMock, branchLister: refManagerMock, } @@ -181,23 +181,23 @@ func TestExpiredCommits(t *testing.T) { for i := range tst.previouslyExpired { previouslyExpiredCommitIDs[i] = graveler.CommitID(tst.previouslyExpired[i]) } - activeCommits, expiredCommits, err := finder.GetExpiredCommits(ctx, "test", garbageCollectionRules, previouslyExpiredCommitIDs) + gcCommits, err := finder.GetGarbageCollectionCommits(ctx, "test", garbageCollectionRules, previouslyExpiredCommitIDs) if err != nil { t.Fatalf("failed to find expired commits: %v", err) } sort.Strings(tst.expectedActiveIDs) - sort.Slice(activeCommits, func(i, j int) bool { - return activeCommits[i].Ref() < activeCommits[j].Ref() + sort.Slice(gcCommits.active, func(i, j int) bool { + return gcCommits.active[i].Ref() < gcCommits.active[j].Ref() }) - if diff := deep.Equal(tst.expectedActiveIDs, testToStringArray(activeCommits)); diff != nil { + if diff := deep.Equal(tst.expectedActiveIDs, testToStringArray(gcCommits.active)); diff != nil { t.Errorf("active commits ids diff=%s", diff) } sort.Strings(tst.expectedExpiredIDs) - sort.Slice(expiredCommits, func(i, j int) bool { - return expiredCommits[i].Ref() < expiredCommits[j].Ref() + sort.Slice(gcCommits.expired, func(i, j int) bool { + return gcCommits.expired[i].Ref() < gcCommits.expired[j].Ref() }) - if diff := deep.Equal(tst.expectedExpiredIDs, testToStringArray(expiredCommits)); diff != nil { + if diff := deep.Equal(tst.expectedExpiredIDs, testToStringArray(gcCommits.expired)); diff != nil { t.Errorf("expired commits ids diff=%s", diff) } }) diff --git a/pkg/graveler/retention/garbage_collection.go b/pkg/graveler/retention/garbage_collection.go index b99718e61fa..cc6f9219c53 100644 --- a/pkg/graveler/retention/garbage_collection.go +++ b/pkg/graveler/retention/garbage_collection.go @@ -4,11 +4,13 @@ import ( "bytes" "context" "encoding/csv" - "encoding/json" "fmt" + "io" "strconv" "strings" + "google.golang.org/protobuf/proto" + "github.com/google/uuid" "github.com/treeverse/lakefs/pkg/block" "github.com/treeverse/lakefs/pkg/graveler" @@ -21,14 +23,14 @@ const ( type GarbageCollectionManager struct { blockAdapter block.Adapter - expiredCommitsFinder *ExpiredCommitsFinder + expiredCommitsFinder *GarbageCollectionCommitsFinder committedBlockStoragePrefix string } func NewGarbageCollectionManager(blockAdapter block.Adapter, commitGetter graveler.CommitGetter, branchLister graveler.BranchLister, committedBlockStoragePrefix string) *GarbageCollectionManager { return &GarbageCollectionManager{ blockAdapter: blockAdapter, - expiredCommitsFinder: NewExpiredCommitsFinder(branchLister, commitGetter), + expiredCommitsFinder: NewGarbageCollectionCommitsFinder(branchLister, commitGetter), committedBlockStoragePrefix: committedBlockStoragePrefix, } } @@ -45,7 +47,11 @@ func (m *GarbageCollectionManager) GetRules(ctx context.Context, storageNamespac _ = reader.Close() }() var rules graveler.GarbageCollectionRules - err = json.NewDecoder(reader).Decode(&rules) + rulesBytes, err := io.ReadAll(reader) + if err != nil { + return nil, err + } + err = proto.Unmarshal(rulesBytes, &rules) if err != nil { return nil, err } @@ -53,7 +59,7 @@ func (m *GarbageCollectionManager) GetRules(ctx context.Context, storageNamespac } func (m *GarbageCollectionManager) SaveRules(ctx context.Context, storageNamespace graveler.StorageNamespace, rules *graveler.GarbageCollectionRules) error { - rulesBytes, err := json.Marshal(rules) + rulesBytes, err := proto.Marshal(rules) if err != nil { return err } @@ -89,19 +95,19 @@ func (m *GarbageCollectionManager) GetRunExpiredCommits(ctx context.Context, sto } func (m *GarbageCollectionManager) SaveGarbageCollectionCommits(ctx context.Context, storageNamespace graveler.StorageNamespace, repositoryID graveler.RepositoryID, rules *graveler.GarbageCollectionRules, previouslyExpiredCommits []graveler.CommitID) (string, error) { - active, expired, err := m.expiredCommitsFinder.GetExpiredCommits(ctx, repositoryID, rules, previouslyExpiredCommits) + gcCommits, err := m.expiredCommitsFinder.GetGarbageCollectionCommits(ctx, repositoryID, rules, previouslyExpiredCommits) if err != nil { return "", fmt.Errorf("find expired commits: %w", err) } b := &strings.Builder{} csvWriter := csv.NewWriter(b) - for _, commitID := range expired { + for _, commitID := range gcCommits.expired { err := csvWriter.Write([]string{string(commitID), strconv.FormatBool(true)}) if err != nil { return "", err } } - for _, commitID := range active { + for _, commitID := range gcCommits.active { err := csvWriter.Write([]string{string(commitID), strconv.FormatBool(false)}) if err != nil { return "", err diff --git a/pkg/permissions/actions.go b/pkg/permissions/actions.go index ac439962c5a..4cb423806a2 100644 --- a/pkg/permissions/actions.go +++ b/pkg/permissions/actions.go @@ -58,8 +58,9 @@ const ( ReadActionsAction = "ci:ReadAction" - GetGarbageCollectionRules = "retention:GetGarbageCollectionRules" - SetGarbageCollectionRules = "retention:SetGarbageCollectionRules" + PrepareGarbageCollectionCommitsAction = "retention:PrepareGarbageCollectionCommits" + GetGarbageCollectionRulesAction = "retention:GetGarbageCollectionRules" + SetGarbageCollectionRulesAction = "retention:SetGarbageCollectionRules" ) var serviceSet = map[string]struct{}{ From 7ce29bc616da1ed53da8f85f42c3584552ab9c2b Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Sun, 20 Jun 2021 10:28:04 +0300 Subject: [PATCH 21/24] CR fixes --- api/swagger.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/api/swagger.yml b/api/swagger.yml index ba549da785f..7230a543102 100644 --- a/api/swagger.yml +++ b/api/swagger.yml @@ -823,6 +823,7 @@ components: previous_run_id: type: string description: run id of a previous successful GC job + example: 64eaa103-d726-4a33-bcb8-7c0b4abfe09e GarbageCollectionPrepareResponse: type: object @@ -830,8 +831,10 @@ components: run_id: type: string description: a unique identifier generated for this GC job + example: 64eaa103-d726-4a33-bcb8-7c0b4abfe09e required: - run_id + GarbageCollectionRule: type: object properties: From a7c306a86e51196aa62d8f35af8bfe66f649c834 Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Sun, 20 Jun 2021 10:29:21 +0300 Subject: [PATCH 22/24] gen clients --- clients/java/api/openapi.yaml | 6 ++++-- .../clients/api/model/GarbageCollectionPrepareRequest.java | 2 +- .../clients/api/model/GarbageCollectionPrepareResponse.java | 2 +- clients/python/docs/RetentionApi.md | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/clients/java/api/openapi.yaml b/clients/java/api/openapi.yaml index e0271e394a6..14963c81415 100644 --- a/clients/java/api/openapi.yaml +++ b/clients/java/api/openapi.yaml @@ -5038,18 +5038,20 @@ components: type: object GarbageCollectionPrepareRequest: example: - previous_run_id: previous_run_id + previous_run_id: 64eaa103-d726-4a33-bcb8-7c0b4abfe09e properties: previous_run_id: description: run id of a previous successful GC job + example: 64eaa103-d726-4a33-bcb8-7c0b4abfe09e type: string type: object GarbageCollectionPrepareResponse: example: - run_id: run_id + run_id: 64eaa103-d726-4a33-bcb8-7c0b4abfe09e properties: run_id: description: a unique identifier generated for this GC job + example: 64eaa103-d726-4a33-bcb8-7c0b4abfe09e type: string required: - run_id diff --git a/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareRequest.java b/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareRequest.java index a56fd16f7cb..d959ea81f11 100644 --- a/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareRequest.java +++ b/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareRequest.java @@ -45,7 +45,7 @@ public GarbageCollectionPrepareRequest previousRunId(String previousRunId) { * @return previousRunId **/ @javax.annotation.Nullable - @ApiModelProperty(value = "run id of a previous successful GC job") + @ApiModelProperty(example = "64eaa103-d726-4a33-bcb8-7c0b4abfe09e", value = "run id of a previous successful GC job") public String getPreviousRunId() { return previousRunId; diff --git a/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareResponse.java b/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareResponse.java index 45418cf838d..835a9c4234a 100644 --- a/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareResponse.java +++ b/clients/java/src/main/java/io/lakefs/clients/api/model/GarbageCollectionPrepareResponse.java @@ -44,7 +44,7 @@ public GarbageCollectionPrepareResponse runId(String runId) { * a unique identifier generated for this GC job * @return runId **/ - @ApiModelProperty(required = true, value = "a unique identifier generated for this GC job") + @ApiModelProperty(example = "64eaa103-d726-4a33-bcb8-7c0b4abfe09e", required = true, value = "a unique identifier generated for this GC job") public String getRunId() { return runId; diff --git a/clients/python/docs/RetentionApi.md b/clients/python/docs/RetentionApi.md index f8b70af9b22..bf0a9785b0c 100644 --- a/clients/python/docs/RetentionApi.md +++ b/clients/python/docs/RetentionApi.md @@ -151,7 +151,7 @@ with lakefs_client.ApiClient(configuration) as api_client: api_instance = retention_api.RetentionApi(api_client) repository = "repository_example" # str | garbage_collection_prepare_request = GarbageCollectionPrepareRequest( - previous_run_id="previous_run_id_example", + previous_run_id="64eaa103-d726-4a33-bcb8-7c0b4abfe09e", ) # GarbageCollectionPrepareRequest | (optional) # example passing only required values which don't have defaults set From b5e6447f24e745a8733b439f34b6f63e5c25be83 Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Sun, 20 Jun 2021 12:04:16 +0300 Subject: [PATCH 23/24] cr fixes --- pkg/catalog/catalog.go | 2 +- pkg/graveler/graveler.go | 8 --- pkg/graveler/ref/merge_base_finder.go | 6 +- pkg/graveler/retention/expired_commits.go | 49 ++++++--------- .../retention/expired_commits_test.go | 10 ++- ...ction.go => garbage_collection_manager.go} | 63 +++++++++++++------ 6 files changed, 73 insertions(+), 65 deletions(-) rename pkg/graveler/retention/{garbage_collection.go => garbage_collection_manager.go} (66%) diff --git a/pkg/catalog/catalog.go b/pkg/catalog/catalog.go index 22133ff87ba..9a37c50b617 100644 --- a/pkg/catalog/catalog.go +++ b/pkg/catalog/catalog.go @@ -182,7 +182,7 @@ func New(ctx context.Context, cfg Config) (*Catalog, error) { refManager := ref.NewPGRefManager(executor, cfg.DB, ident.NewHexAddressProvider()) branchLocker := ref.NewBranchLocker(cfg.LockDB) - gcManager := retention.NewGarbageCollectionManager(tierFSParams.Adapter, refManager, refManager, cfg.Config.GetCommittedBlockStoragePrefix()) + gcManager := retention.NewGarbageCollectionManager(tierFSParams.Adapter, refManager, cfg.Config.GetCommittedBlockStoragePrefix()) store := graveler.NewGraveler(branchLocker, committedManager, stagingManager, refManager, gcManager) return &Catalog{ diff --git a/pkg/graveler/graveler.go b/pkg/graveler/graveler.go index 932e2cb3b03..25928a5e141 100644 --- a/pkg/graveler/graveler.go +++ b/pkg/graveler/graveler.go @@ -478,14 +478,6 @@ type CommitIterator interface { Close() } -type CommitGetter interface { - GetCommit(ctx context.Context, repositoryID RepositoryID, commitID CommitID) (*Commit, error) -} - -type BranchLister interface { - ListBranches(ctx context.Context, repositoryID RepositoryID) (BranchIterator, error) -} - // These are the more complex internal components that compose the functionality of the Graveler // RefManager handles references: branches, commits, probably tags in the future diff --git a/pkg/graveler/ref/merge_base_finder.go b/pkg/graveler/ref/merge_base_finder.go index dfaafeada22..2970e3178be 100644 --- a/pkg/graveler/ref/merge_base_finder.go +++ b/pkg/graveler/ref/merge_base_finder.go @@ -7,6 +7,10 @@ import ( "github.com/treeverse/lakefs/pkg/graveler" ) +type CommitGetter interface { + GetCommit(ctx context.Context, repositoryID graveler.RepositoryID, commitID graveler.CommitID) (*graveler.Commit, error) +} + type reachedFlags uint8 const ( @@ -16,7 +20,7 @@ const ( // FindMergeBase finds the best common ancestor according to the definition in the git-merge-base documentation: https://git-scm.com/docs/git-merge-base // One common ancestor is better than another common ancestor if the latter is an ancestor of the former. -func FindMergeBase(ctx context.Context, getter graveler.CommitGetter, repositoryID graveler.RepositoryID, leftID, rightID graveler.CommitID) (*graveler.Commit, error) { +func FindMergeBase(ctx context.Context, getter CommitGetter, repositoryID graveler.RepositoryID, leftID, rightID graveler.CommitID) (*graveler.Commit, error) { var commitRecord *graveler.CommitRecord queue := NewCommitsGenerationPriorityQueue() reached := make(map[graveler.CommitID]reachedFlags) diff --git a/pkg/graveler/retention/expired_commits.go b/pkg/graveler/retention/expired_commits.go index cae231dfe4f..acfe3a21e1c 100644 --- a/pkg/graveler/retention/expired_commits.go +++ b/pkg/graveler/retention/expired_commits.go @@ -9,28 +9,15 @@ import ( var empty struct{} -type GarbageCollectionCommitsFinder struct { - branchLister graveler.BranchLister - commitGetter graveler.CommitGetter -} - type GarbageCollectionCommits struct { expired []graveler.CommitID active []graveler.CommitID } -func NewGarbageCollectionCommitsFinder(branchLister graveler.BranchLister, commitGetter graveler.CommitGetter) *GarbageCollectionCommitsFinder { - return &GarbageCollectionCommitsFinder{branchLister: branchLister, commitGetter: commitGetter} -} - -func (e *GarbageCollectionCommitsFinder) GetGarbageCollectionCommits(ctx context.Context, repositoryID graveler.RepositoryID, rules *graveler.GarbageCollectionRules, previouslyExpired []graveler.CommitID) (*GarbageCollectionCommits, error) { +// GetGarbageCollectionCommits returns the sets of expired and active commits, according to the repository's garbage collection rules. +func GetGarbageCollectionCommits(ctx context.Context, branchIterator graveler.BranchIterator, commitGetter *RepositoryCommitGetter, rules *graveler.GarbageCollectionRules, previouslyExpired []graveler.CommitID) (*GarbageCollectionCommits, error) { now := time.Now() processed := make(map[graveler.CommitID]time.Time) - - branchIterator, err := e.branchLister.ListBranches(ctx, repositoryID) - if err != nil { - return nil, err - } previouslyExpiredMap := make(map[graveler.CommitID]bool) for _, commitID := range previouslyExpired { previouslyExpiredMap[commitID] = true @@ -39,12 +26,13 @@ func (e *GarbageCollectionCommitsFinder) GetGarbageCollectionCommits(ctx context expiredMap := make(map[graveler.CommitID]struct{}) for branchIterator.Next() { branchRecord := branchIterator.Value() - branchExpirationThreshold := now.AddDate(0, 0, int(-rules.DefaultRetentionDays)) - if branchExpirationPeriod, ok := rules.BranchRetentionDays[string(branchRecord.BranchID)]; ok { - branchExpirationThreshold = now.AddDate(0, 0, int(-branchExpirationPeriod)) + retentionDays := int(rules.DefaultRetentionDays) + if branchRetentionDays, ok := rules.BranchRetentionDays[string(branchRecord.BranchID)]; ok { + retentionDays = int(branchRetentionDays) } + branchExpirationThreshold := now.AddDate(0, 0, -retentionDays) commitID := branchRecord.CommitID - previousCommit, err := e.commitGetter.GetCommit(ctx, repositoryID, commitID) + commit, err := commitGetter.GetCommit(ctx, commitID) if err != nil { return nil, err } @@ -54,27 +42,28 @@ func (e *GarbageCollectionCommitsFinder) GetGarbageCollectionCommits(ctx context } processed[commitID] = branchExpirationThreshold activeMap[commitID] = empty - for len(previousCommit.Parents) > 0 { - commitID = previousCommit.Parents[0] - if _, ok := previouslyExpiredMap[commitID]; ok { + for len(commit.Parents) > 0 { + // every branch retains only its main ancestry, acquired by recursively taking the first parent: + nextCommitID := commit.Parents[0] + if _, ok := previouslyExpiredMap[nextCommitID]; ok { // commit was already expired in a previous run break } - if previousThreshold, ok := processed[commitID]; ok && !previousThreshold.After(branchExpirationThreshold) { + if previousThreshold, ok := processed[nextCommitID]; ok && !previousThreshold.After(branchExpirationThreshold) { // was already here with earlier expiration date break } - if previousCommit.CreationDate.After(branchExpirationThreshold) { - activeMap[commitID] = empty - delete(expiredMap, commitID) - } else if _, ok := activeMap[commitID]; !ok { - expiredMap[commitID] = empty + if commit.CreationDate.After(branchExpirationThreshold) { + activeMap[nextCommitID] = empty + delete(expiredMap, nextCommitID) + } else if _, ok := activeMap[nextCommitID]; !ok { + expiredMap[nextCommitID] = empty } - previousCommit, err = e.commitGetter.GetCommit(ctx, repositoryID, commitID) + commit, err = commitGetter.GetCommit(ctx, nextCommitID) if err != nil { return nil, err } - processed[commitID] = branchExpirationThreshold + processed[nextCommitID] = branchExpirationThreshold } } if branchIterator.Err() != nil { diff --git a/pkg/graveler/retention/expired_commits_test.go b/pkg/graveler/retention/expired_commits_test.go index d6dfb277145..03335cd0a8a 100644 --- a/pkg/graveler/retention/expired_commits_test.go +++ b/pkg/graveler/retention/expired_commits_test.go @@ -163,7 +163,6 @@ func TestExpiredCommits(t *testing.T) { return garbageCollectionRules.BranchRetentionDays[string(branchRecords[i].BranchID)] > garbageCollectionRules.BranchRetentionDays[string(branchRecords[j].BranchID)] }) branchIterator := gtestutil.NewFakeBranchIterator(branchRecords) - refManagerMock.EXPECT().ListBranches(ctx, graveler.RepositoryID("test")).Return(branchIterator, nil) commitMap := make(map[graveler.CommitID]*graveler.Commit) previouslyExpired := newCommitSet(tst.previouslyExpired) for commitID, testCommit := range tst.commits { @@ -173,15 +172,14 @@ func TestExpiredCommits(t *testing.T) { refManagerMock.EXPECT().GetCommit(ctx, graveler.RepositoryID("test"), id).Return(commitMap[id], nil).Times(1) } } - finder := GarbageCollectionCommitsFinder{ - commitGetter: refManagerMock, - branchLister: refManagerMock, - } previouslyExpiredCommitIDs := make([]graveler.CommitID, len(tst.previouslyExpired)) for i := range tst.previouslyExpired { previouslyExpiredCommitIDs[i] = graveler.CommitID(tst.previouslyExpired[i]) } - gcCommits, err := finder.GetGarbageCollectionCommits(ctx, "test", garbageCollectionRules, previouslyExpiredCommitIDs) + gcCommits, err := GetGarbageCollectionCommits(ctx, branchIterator, &RepositoryCommitGetter{ + refManager: refManagerMock, + repositoryID: "test", + }, garbageCollectionRules, previouslyExpiredCommitIDs) if err != nil { t.Fatalf("failed to find expired commits: %v", err) } diff --git a/pkg/graveler/retention/garbage_collection.go b/pkg/graveler/retention/garbage_collection_manager.go similarity index 66% rename from pkg/graveler/retention/garbage_collection.go rename to pkg/graveler/retention/garbage_collection_manager.go index cc6f9219c53..1925d7386a2 100644 --- a/pkg/graveler/retention/garbage_collection.go +++ b/pkg/graveler/retention/garbage_collection_manager.go @@ -6,14 +6,12 @@ import ( "encoding/csv" "fmt" "io" - "strconv" "strings" - "google.golang.org/protobuf/proto" - "github.com/google/uuid" "github.com/treeverse/lakefs/pkg/block" "github.com/treeverse/lakefs/pkg/graveler" + "google.golang.org/protobuf/proto" ) const ( @@ -23,22 +21,32 @@ const ( type GarbageCollectionManager struct { blockAdapter block.Adapter - expiredCommitsFinder *GarbageCollectionCommitsFinder + refManager graveler.RefManager committedBlockStoragePrefix string } -func NewGarbageCollectionManager(blockAdapter block.Adapter, commitGetter graveler.CommitGetter, branchLister graveler.BranchLister, committedBlockStoragePrefix string) *GarbageCollectionManager { +type RepositoryCommitGetter struct { + refManager graveler.RefManager + repositoryID graveler.RepositoryID +} + +func (r *RepositoryCommitGetter) GetCommit(ctx context.Context, commitID graveler.CommitID) (*graveler.Commit, error) { + return r.refManager.GetCommit(ctx, r.repositoryID, commitID) +} + +func NewGarbageCollectionManager(blockAdapter block.Adapter, refManager graveler.RefManager, committedBlockStoragePrefix string) *GarbageCollectionManager { return &GarbageCollectionManager{ blockAdapter: blockAdapter, - expiredCommitsFinder: NewGarbageCollectionCommitsFinder(branchLister, commitGetter), + refManager: refManager, committedBlockStoragePrefix: committedBlockStoragePrefix, } } func (m *GarbageCollectionManager) GetRules(ctx context.Context, storageNamespace graveler.StorageNamespace) (*graveler.GarbageCollectionRules, error) { reader, err := m.blockAdapter.Get(ctx, block.ObjectPointer{ - Identifier: string(storageNamespace) + fmt.Sprintf(configFileSuffixTemplate, m.committedBlockStoragePrefix), - IdentifierType: block.IdentifierTypeFull, + StorageNamespace: string(storageNamespace), + Identifier: fmt.Sprintf(configFileSuffixTemplate, m.committedBlockStoragePrefix), + IdentifierType: block.IdentifierTypeRelative, }, -1) if err != nil { return nil, err @@ -64,8 +72,9 @@ func (m *GarbageCollectionManager) SaveRules(ctx context.Context, storageNamespa return err } return m.blockAdapter.Put(ctx, block.ObjectPointer{ - Identifier: string(storageNamespace) + fmt.Sprintf(configFileSuffixTemplate, m.committedBlockStoragePrefix), - IdentifierType: block.IdentifierTypeFull, + StorageNamespace: string(storageNamespace), + Identifier: fmt.Sprintf(configFileSuffixTemplate, m.committedBlockStoragePrefix), + IdentifierType: block.IdentifierTypeRelative, }, int64(len(rulesBytes)), bytes.NewReader(rulesBytes), block.PutOpts{}) } @@ -81,12 +90,16 @@ func (m *GarbageCollectionManager) GetRunExpiredCommits(ctx context.Context, sto return nil, err } csvReader := csv.NewReader(previousRunReader) - previousCommits, err := csvReader.ReadAll() - if err != nil { - return nil, err - } - res := make([]graveler.CommitID, 0) - for _, commitRow := range previousCommits { + csvReader.ReuseRecord = true + var res []graveler.CommitID + for { + commitRow, err := csvReader.Read() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } if commitRow[1] == "true" { res = append(res, graveler.CommitID(commitRow[0])) } @@ -95,25 +108,37 @@ func (m *GarbageCollectionManager) GetRunExpiredCommits(ctx context.Context, sto } func (m *GarbageCollectionManager) SaveGarbageCollectionCommits(ctx context.Context, storageNamespace graveler.StorageNamespace, repositoryID graveler.RepositoryID, rules *graveler.GarbageCollectionRules, previouslyExpiredCommits []graveler.CommitID) (string, error) { - gcCommits, err := m.expiredCommitsFinder.GetGarbageCollectionCommits(ctx, repositoryID, rules, previouslyExpiredCommits) + branchIterator, err := m.refManager.ListBranches(ctx, repositoryID) + if err != nil { + return "", fmt.Errorf("list repository branches: %w", err) + } + commitGetter := &RepositoryCommitGetter{ + refManager: m.refManager, + repositoryID: repositoryID, + } + gcCommits, err := GetGarbageCollectionCommits(ctx, branchIterator, commitGetter, rules, previouslyExpiredCommits) if err != nil { return "", fmt.Errorf("find expired commits: %w", err) } b := &strings.Builder{} csvWriter := csv.NewWriter(b) for _, commitID := range gcCommits.expired { - err := csvWriter.Write([]string{string(commitID), strconv.FormatBool(true)}) + err := csvWriter.Write([]string{string(commitID), "true"}) if err != nil { return "", err } } for _, commitID := range gcCommits.active { - err := csvWriter.Write([]string{string(commitID), strconv.FormatBool(false)}) + err := csvWriter.Write([]string{string(commitID), "false"}) if err != nil { return "", err } } csvWriter.Flush() + err = csvWriter.Error() + if err != nil { + return "", err + } commitsStr := b.String() runID := uuid.New().String() err = m.blockAdapter.Put(ctx, block.ObjectPointer{ From ea995297fd9d931be7ee312f548874f4000ded75 Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Sun, 20 Jun 2021 15:51:51 +0300 Subject: [PATCH 24/24] cr fix --- pkg/graveler/retention/expired_commits.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pkg/graveler/retention/expired_commits.go b/pkg/graveler/retention/expired_commits.go index acfe3a21e1c..76ec97f6741 100644 --- a/pkg/graveler/retention/expired_commits.go +++ b/pkg/graveler/retention/expired_commits.go @@ -7,8 +7,6 @@ import ( "github.com/treeverse/lakefs/pkg/graveler" ) -var empty struct{} - type GarbageCollectionCommits struct { expired []graveler.CommitID active []graveler.CommitID @@ -41,7 +39,7 @@ func GetGarbageCollectionCommits(ctx context.Context, branchIterator graveler.Br continue } processed[commitID] = branchExpirationThreshold - activeMap[commitID] = empty + activeMap[commitID] = struct{}{} for len(commit.Parents) > 0 { // every branch retains only its main ancestry, acquired by recursively taking the first parent: nextCommitID := commit.Parents[0] @@ -54,10 +52,10 @@ func GetGarbageCollectionCommits(ctx context.Context, branchIterator graveler.Br break } if commit.CreationDate.After(branchExpirationThreshold) { - activeMap[nextCommitID] = empty + activeMap[nextCommitID] = struct{}{} delete(expiredMap, nextCommitID) } else if _, ok := activeMap[nextCommitID]; !ok { - expiredMap[nextCommitID] = empty + expiredMap[nextCommitID] = struct{}{} } commit, err = commitGetter.GetCommit(ctx, nextCommitID) if err != nil {