Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rocks catalog interface #959

Merged
merged 16 commits into from
Nov 29, 2020
241 changes: 170 additions & 71 deletions catalog/rocks/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,53 +7,17 @@ import (

// Basic Types

// Repository represents repository metadata
type Repository struct {
StorageNamespace StorageNamespace
CreationDate time.Time
DefaultBranch BranchID
}

// Entry represents metadata or a given object (modified date, physical address, etc)
type Entry struct {
LastModified time.Time
Address string
Metadata map[string]string
ETag string
}

// Commit represents commit metadata (author, time, tree ID)
type Commit struct {
Committer string
Message string
TreeID TreeID
CreationDate time.Time
Parents []CommitID
Metadata map[string]string
}

// Branch is a pointer to a commit.
type Branch struct {
CommitID CommitID
// nolint: structcheck, unused
stagingToken StagingToken
}

// Diff represents a changed state for a given entry (added, removed, changed, conflict)
// DiffType represents a changed state for a given entry (added, removed, changed, conflict)
type DiffType uint8

//goland:noinspection GoUnusedConst
const (
DiffTypeAdded DiffType = iota
DiffTypeRemoved
DiffTypeChanged
DiffTypeConflict
)

type Diff struct {
Path Path
Type DiffType
}

// function/methods receiving the following basic types could assume they passed validation
type (
// StorageNamespace is the URI to the storage location
Expand Down Expand Up @@ -85,73 +49,202 @@ type (

// CommonPrefix represents a path prefixing one or more Entry objects
CommonPrefix string

// Metadata key/value strings to holds metadata information on entry and commit
Metadata map[string]string
)

// Repository represents repository metadata
type Repository struct {
StorageNamespace StorageNamespace
CreationDate time.Time
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should decide if we want to call it a "date". I prefer CreatedAt or CreationTime; preferably the former because it does not repeat the type.

DefaultBranchID BranchID
}

type RepositoryRecord struct {
RepositoryID RepositoryID
*Repository
}

// Entry represents metadata or a given object (modified date, physical address, etc)
type Entry struct {
LastModified time.Time
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
LastModified time.Time
CreationDate time.Time

Can we ever change an entry?

Address string
Metadata Metadata
ETag string
}

func (e *Entry) IsTombstone() bool {
return e == nil
}

// EntryRecord holds Path with the associated Entry information
type EntryRecord struct {
Path Path
*Entry
}

// Commit represents commit metadata (author, time, tree ID)
type Commit struct {
Committer string
Message string
TreeID TreeID
CreationDate time.Time
Parents []CommitID
Metadata map[string]string
}

// CommitRecords holds CommitID with the associated Commit data
type CommitRecord struct {
CommitID CommitID
*Commit
}

// Branch is a pointer to a commit
type Branch struct {
CommitID CommitID
// nolint: structcheck, unused
stagingToken StagingToken
}

// BranchRecord holds BranchID with the associated Branch data
type BranchRecord struct {
BranchID BranchID
*Branch
}

// Listing represents either an entry or a CommonPrefix
type Listing struct {
CommonPrefix
*Entry
}

// Diff represents a change in path
type Diff struct {
Path Path
Type DiffType
}

// Interfaces
type Catalog interface {
// entries
// GetEntry returns entry from repository / reference by path, nil entry is a valid value for tombstone
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Tombstone" still here.

// returns error if entry does not exist
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's specify which error (we had weirdness on this in the MVCC implementation).

GetEntry(ctx context.Context, repositoryID RepositoryID, ref Ref, path Path) (*Entry, error)

// SetEntry stores entry on repository / branch by path. nil entry is a valid value for tombstone
SetEntry(ctx context.Context, repositoryID RepositoryID, branchID BranchID, path Path, entry Entry) error

// DeleteEntry deletes entry on repository / branch by path
DeleteEntry(ctx context.Context, repositoryID RepositoryID, branchID BranchID, path Path) error

// ListEntries lists entries on repository / ref will filter by prefix, from path 'from'.
// When 'delimiter' is set the listing will include common prefixes based on the delimiter
// The 'amount' specifies the maximum amount of listing per call that the API will return (no more than ListEntriesMaxAmount, -1 will use the server default).
// Returns the list of entries, boolean specify if there are more results which will require another call with 'from' set to the last path from the previous call.
ListEntries(ctx context.Context, repositoryID RepositoryID, ref Ref, prefix, from, delimiter string, amount int) ([]Listing, bool, error)

// refs
// CreateBranch creates branch on repository pointing to ref
CreateBranch(ctx context.Context, repositoryID RepositoryID, branchID BranchID, ref Ref) (Branch, error)

// UpdateBranch updates branch on repository pointing to ref
UpdateBranch(ctx context.Context, repositoryID RepositoryID, branchID BranchID, ref Ref) (Branch, error)

// GetBranch gets branch information by branch / repository id
GetBranch(ctx context.Context, repositoryID RepositoryID, branchID BranchID) (Branch, error)

// Dereference translates ref to commit id
Dereference(ctx context.Context, repositoryID RepositoryID, ref Ref) (CommitID, error)
Log(ctx context.Context, repositoryID RepositoryID, commitID CommitID, amount int) ([]Commit, bool, error)

// Log lists commits in repository
// The 'from' is used to get all commits after the specified commit id
// The 'amount' specifies the maximum number of commits the call will return
// Returns commits, has more boolean and an error
Log(ctx context.Context, repositoryID RepositoryID, from CommitID, amount int) ([]Commit, bool, error)

// ListBranches lists branches on repositories
// The 'from' is used to get all branches after this branch id
// The 'amount' specifies the maximum number of branches the call will return
// Returns branches, has more boolean and an error
ListBranches(ctx context.Context, repositoryID RepositoryID, from BranchID, amount int) ([]Branch, bool, error)

// DeleteBranch deletes branch from repository
DeleteBranch(ctx context.Context, repositoryID RepositoryID, branchID BranchID) error

// commits
Commit(ctx context.Context, repositoryID RepositoryID, branchID BranchID, commit Commit) (CommitID, error)
// Commit the staged data and returns a commit ID that references that change
// ErrNothingToCommit in case there is no data in stage
Commit(ctx context.Context, repositoryID RepositoryID, branchID BranchID, committer string, message string, metadata Metadata) (CommitID, error)

// Reset throw all staged data on the repository / branch
Reset(ctx context.Context, repositoryID RepositoryID, branchID BranchID) error
Revert(ctx context.Context, repositoryID RepositoryID, branchID BranchID, ref Ref) error

// diffs and merges
// Revert commits a change that will revert all the changes make from 'ref' specified
Revert(ctx context.Context, repositoryID RepositoryID, branchID BranchID, ref Ref) (CommitID, error)

// Merge merge 'from' with 'to' branches under repository returns the new commit id on 'to' branch
Merge(ctx context.Context, repositoryID RepositoryID, from Ref, to BranchID) (CommitID, error)

// DiffUncommitted returns the changes as 'Diff' slice on a repository / branch
// List the differences 'from' path, with 'amount' of result.
// Returns differences found, true (boolean) in case there are more differences - use 'from' with last path from previous call to get the next differences
DiffUncommitted(ctx context.Context, repositoryID RepositoryID, branchID BranchID, from Path, amount int) ([]Diff, bool, error)

// Diff returns the changes between 'left' and 'right' ref, list changes 'from' path with no more than 'amount' per call.
// Returns the list of changes, true (boolean) in case there are more differences - use last path as 'from' in the next call to continue getting differences
Diff(ctx context.Context, repositoryID RepositoryID, left, right Ref, from Path, amount int) ([]Diff, bool, error)
}

// internal structures used by Catalog
// Internal structures used by Catalog
// xxxIterator used as follow:
// ```
// it := NewXXXIterator(data)
// for it.Next() {
// data := it.Value()
// process(data)
// }
// if it.Err() {
// return fmt.Errorf("stopped because of an error %w", it.Err())
// }
// ```
// Calling SeekGE() returns true, like calling Next() - we can process 'Value()' when true and check Err() in case of false
// When Next() or SeekGE() returns false (doesn't matter if it because of an error) calling Value() should return nil

type RepositoryIterator interface {
First() (RepositoryID, Repository)
Next() (RepositoryID, Repository)
SeekGE(BranchID) (RepositoryID, Repository)
Next() bool
SeekGE(id RepositoryID) bool
Value() *RepositoryRecord
Err() error
Close()
}

type EntryIterator interface {
First() (*Path, *Entry)
SeekGE(Path) (*Path, *Entry)
Next() (*Path, *Entry)
Next() bool
SeekGE(id Path) bool
Value() *EntryRecord
Err() error
Close()
}

type DiffIterator interface {
First() (*Path, *DiffType)
SeekGE(Path) (*Path, *DiffType)
Next() (*Path, *DiffType)
Next() bool
SeekGE(id Path) bool
Value() *Diff
Err() error
Close()
}

type BranchIterator interface {
First() (*BranchID, *Branch)
Next() (*BranchID, *Branch)
SeekGE(BranchID) (*BranchID, *Branch)
Next() bool
SeekGE(id BranchID) bool
Value() *BranchRecord
Err() error
Close()
}

type CommitIterator interface {
First() (*CommitID, *Commit)
Next() (*CommitID, *Commit)
Next() bool
SeekGE(id CommitID) bool
Value() *CommitRecord
Err() error
Close()
}

Expand Down Expand Up @@ -184,15 +277,15 @@ type RefManager interface {
// DeleteBranch deletes the branch
DeleteBranch(ctx context.Context, repositoryID RepositoryID, branchID BranchID) error

// ListBranches lists branches
ListBranches(ctx context.Context, repositoryID RepositoryID, from BranchID) (BranchIterator, error)

// GetCommit returns the Commit metadata object for the given CommitID
GetCommit(ctx context.Context, repositoryID RepositoryID, commitID CommitID) (*Commit, error)

// AddCommit stores the Commit object, returning its ID
AddCommit(ctx context.Context, repositoryID RepositoryID, commit Commit) (CommitID, error)

// ListBranches lists branches
ListBranches(ctx context.Context, repositoryID RepositoryID, from BranchID) (BranchIterator, error)

// FindMergeBase returns the merge-base for the given CommitIDs
// see: https://git-scm.com/docs/git-merge-base
// and internally: https://github.com/treeverse/lakeFS/blob/09954804baeb36ada74fa17d8fdc13a38552394e/index/dag/commits.go
Expand Down Expand Up @@ -229,16 +322,22 @@ type CommittedManager interface {
// StagingManager handles changes to a branch that aren't yet committed
// provides basic CRUD abilities, with deletes being written as tombstones (null entry)
type StagingManager interface {
// GetEntry returns the provided path, if exists, for the given StagingToken
GetEntry(ctx context.Context, st StagingToken, from Path) (*Entry, error)
// GetEntry returns the provided path (or nil entry to represent a tombstone)
// Returns ErrNotFound if no entry found on path
GetEntry(ctx context.Context, repositoryID RepositoryID, branchID BranchID, st StagingToken, from Path) (*Entry, error)

// SetEntry writes an entry (or nil entry to represent a tombstone)
SetEntry(ctx context.Context, repositoryID RepositoryID, branchID BranchID, path Path, entry *Entry) error

// DeleteEntry deletes an entry by path
DeleteEntry(ctx context.Context, repositoryID RepositoryID, branchID BranchID, path Path) error

// ListEntries takes a given BranchID and returns an EntryIterator seeked to >= "from" path
ListEntries(ctx context.Context, st StagingToken, from Path) (EntryIterator, error)
ListEntries(ctx context.Context, repositoryID RepositoryID, branchID BranchID, st StagingToken, from Path) (EntryIterator, error)

// SetEntry writes an entry (or null entry to represent a tombstone)
SetEntry(ctx context.Context, st StagingToken, path Path, entry *Entry) error
// Snapshot returns a new snapshot and returns it's ID
Snapshot(ctx context.Context, repositoryID RepositoryID, branchID BranchID, st StagingToken) (StagingToken, error)

// DropStaging deletes all entries and tombstones for a given StagingToken
// This is useful in a `lakefs reset` operation, and potentially as a last step of a commit
DropStaging(ctx context.Context, st StagingToken) error
// ListSnapshot returns an iterator to scan the snapshot entries
ListSnapshot(ctx context.Context, repositoryID RepositoryID, branchID BranchID, st StagingToken, from Path) (EntryIterator, error)
}