Skip to content

Commit

Permalink
refactor(save): overhaul save path InitID preparation
Browse files Browse the repository at this point in the history
Merge pull request #1439 from qri-io/refactor_save_initID
  • Loading branch information
b5 authored Jul 14, 2020
2 parents c444288 + 18d48ac commit f948cdf
Show file tree
Hide file tree
Showing 14 changed files with 274 additions and 334 deletions.
2 changes: 1 addition & 1 deletion api/dataset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ func TestSaveWithInferredNewName(t *testing.T) {
h.SaveHandler(w, req)
bodyText = resultText(w)
// Name is guaranteed to be unique
expectText = `"name":"data_1"`
expectText = `"name":"data_2"`
if !strings.Contains(bodyText, expectText) {
t.Errorf("expected, body response to contain %q, not found. got %q", expectText, bodyText)
}
Expand Down
144 changes: 99 additions & 45 deletions base/dataset_prepare.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package base
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"path/filepath"
Expand All @@ -12,66 +13,119 @@ import (
"github.com/qri-io/dataset/detect"
"github.com/qri-io/dataset/validate"
"github.com/qri-io/qfs"
"github.com/qri-io/qri/base/dsfs"
"github.com/qri-io/qri/dsref"
"github.com/qri-io/qri/repo"
qerr "github.com/qri-io/qri/errors"
"github.com/qri-io/qri/logbook"
"github.com/qri-io/qri/repo/profile"
reporef "github.com/qri-io/qri/repo/ref"
)

// PrepareHeadDatasetVersion prepares to save by loading the head commit, opening the body file,
// and constructing a mutable version that has no transform or commit.
func PrepareHeadDatasetVersion(ctx context.Context, r repo.Repo, peername, name string) (curr, mutable *dataset.Dataset, currPath string, err error) {
// TODO(dustmop): We should not be calling CanonicalizeDatasetRef here. It's already been
// called up in lib, which means that we've thrown information away. Furthermore, we
// should be relying on stable identifiers this low down the stack. Instead, load the dataset
// head (if it exists) by using the initID, and pass it into base.Save.

// Determine if the save is creating a new dataset or updating an existing dataset by
// seeing if the name can canonicalize to a repo that we know about
lookup := &reporef.DatasetRef{Name: name, Peername: peername}
if err = repo.CanonicalizeDatasetRef(r, lookup); err == repo.ErrNotFound || lookup.Path == "" {
return &dataset.Dataset{}, &dataset.Dataset{}, "", nil
// PrepareSaveRef works out a dataset reference for saving a dataset version.
// When a dataset exists, resolve the initID & path. When no dataset with that
// name exists, ensure a locally-unique dataset name and create a new logbook
// history & InitID to write to. PrepareSaveRef returns a true boolean value
// if an initID was created
// successful calls to PrepareSaveRef always have an InitID, and will have the
// Path of the current version, if one exists
func PrepareSaveRef(
ctx context.Context,
pro *profile.Profile,
book *logbook.Book,
resolver dsref.Resolver,
refStr string,
bodyPathNameHint string,
wantNewName bool,
) (dsref.Ref, bool, error) {

var badCaseErr error

ref, err := dsref.ParseHumanFriendly(refStr)
if errors.Is(err, dsref.ErrBadCaseName) {
// save bad case error for later, will fail if dataset is new
badCaseErr = err
} else if errors.Is(err, dsref.ErrEmptyRef) {
// User is calling save but didn't provide a dataset reference. Try to infer a usable name.
if bodyPathNameHint == "" {
bodyPathNameHint = "dataset"
}
basename := filepath.Base(bodyPathNameHint)
basename = strings.TrimSuffix(basename, filepath.Ext(bodyPathNameHint))
basename = strings.TrimSuffix(basename, ".")
ref.Name = dsref.GenerateName(basename, "dataset_")

// need to use profile username b/c resolver.ResolveRef can't handle "me"
// shorthand
check := &dsref.Ref{Username: pro.Peername, Name: ref.Name}
if _, resolveErr := resolver.ResolveRef(ctx, check); resolveErr == nil {
if !wantNewName {
// Name was inferred, and has previous version. Unclear if the user meant to create
// a brand new dataset or if they wanted to add a new version to the existing dataset.
// Raise an error recommending one of these course of actions.
return ref, false, fmt.Errorf(`inferred dataset name already exists. To add a new commit to this dataset, run save again with the dataset reference %q. To create a new dataset, use --new flag`, check.Human())
}
ref.Name = GenerateAvailableName(ctx, pro, resolver, ref.Name)
}
} else if errors.Is(err, dsref.ErrNotHumanFriendly) {
return ref, false, err
} else if err != nil {
// If some other parse error happened, describe a valid dataset name.
return ref, false, dsref.ErrDescribeValidName
}

currPath = lookup.Path
log.Debugf("loading currPath: %s. lookup result: %v", currPath, lookup)

if curr, err = dsfs.LoadDataset(ctx, r.Store(), currPath); err != nil {
return
// Validate that username is our own, it's not valid to try to save a dataset with someone
// else's username. Without this check, base will replace the username with our own regardless,
// it's better to have an error to display, rather than silently ignore it.
if ref.Username != "" && ref.Username != "me" && ref.Username != pro.Peername {
return ref, false, fmt.Errorf("cannot save using a different username than %q", pro.Peername)
}
if curr.BodyPath != "" {
var body qfs.File
body, err = dsfs.LoadBody(ctx, r.Store(), curr)
if err != nil {
return
ref.Username = pro.Peername

// attempt to resolve the reference
if _, resolveErr := resolver.ResolveRef(ctx, &ref); resolveErr != nil {
if !errors.Is(resolveErr, dsref.ErrRefNotFound) {
return ref, false, resolveErr
}
} else if resolveErr == nil {
if wantNewName {
// Name was explicitly given, with the --new flag, but the name is already in use.
// This is an error.
return ref, false, qerr.New(ErrNameTaken, "dataset name has a previous version, cannot make new dataset")
}
curr.SetBodyFile(body)

if badCaseErr != nil {
// name already exists but is a bad case, log a warning and then continue.
log.Error(badCaseErr)
}

// we have a valid previous reference & an initID, return!
return ref, false, nil
}

// Load a mutable copy of the dataset because most of the save path assuming we are doing
// a patch update to the current head, and not a full replacement.
if mutable, err = dsfs.LoadDataset(ctx, r.Store(), currPath); err != nil {
return
// at this point we're attempting to create a new dataset.
// If dataset name is using bad-case characters, and is not yet in use, fail with error.
if badCaseErr != nil {
return ref, true, badCaseErr
}
if !dsref.IsValidName(ref.Name) {
return ref, true, fmt.Errorf("invalid dataset name: %s", ref.Name)
}

// TODO(dustmop): Stop removing the transform once we move to apply, and untangle the
// save command from applying a transform.
// remove the Transform & commit
// transform & commit must be created from scratch with each new version
mutable.Transform = nil
mutable.Commit = nil
return
ref.InitID, err = book.WriteDatasetInit(ctx, ref.Name)
return ref, true, err
}

// MaybeInferName infer a name for the dataset if none is set
func MaybeInferName(ds *dataset.Dataset) string {
if ds.Name == "" {
filename := ds.BodyFile().FileName()
basename := strings.TrimSuffix(filename, filepath.Ext(filename))
return dsref.GenerateName(basename, "dataset_")
// GenerateAvailableName creates a name for the dataset that is not currently in
// use. Generated names start with _2, implying the "_1" file is the original
// no-suffix name.
func GenerateAvailableName(ctx context.Context, pro *profile.Profile, resolver dsref.Resolver, prefix string) string {
lookup := &dsref.Ref{Username: pro.Peername, Name: prefix}
counter := 1
for {
counter++
lookup.Name = fmt.Sprintf("%s_%d", prefix, counter)
if _, err := resolver.ResolveRef(ctx, lookup); errors.Is(err, dsref.ErrRefNotFound) {
return lookup.Name
}
}
return ""
}

// InferValues populates any missing fields that must exist to create a snapshot
Expand Down
121 changes: 73 additions & 48 deletions base/dataset_prepare_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,57 +3,95 @@ package base
import (
"context"
"encoding/json"
"fmt"
"testing"
"time"

"github.com/google/go-cmp/cmp"
"github.com/qri-io/dataset"
"github.com/qri-io/qfs"
"github.com/qri-io/qri/dsref"
"github.com/qri-io/qri/logbook"
)

func TestPrepareDatasetSave(t *testing.T) {
ctx := context.Background()
func TestPrepareSaveRef(t *testing.T) {
logbook.NewTimestamp = func() int64 { return 0 }
defer func() {
logbook.NewTimestamp = func() int64 { return time.Now().Unix() }
}()

r := newTestRepo(t)
ref := addCitiesDataset(t, r)

prev, mutable, prevPath, err := PrepareHeadDatasetVersion(ctx, r, ref.Peername, ref.Name)
pro, err := r.Profile()
if err != nil {
t.Errorf("case cities dataset error: %s ", err.Error())
}
if prev.IsEmpty() {
t.Errorf("case cites dataset: previous should not be empty")
}
if mutable.IsEmpty() {
t.Errorf("case cities dataset: mutable should not be empty")
}
if mutable.Transform != nil {
t.Errorf("case cities dataset: mutable.Transform should be nil")
}
if mutable.Commit != nil {
t.Errorf("case cities dataset: mutable.Commit should be nil")
}
if prev.BodyFile() == nil {
t.Errorf("case cities dataset: previous body should not be nil")
}
if prevPath == "" {
t.Errorf("case cities dataset: previous path should not be empty")
t.Fatal(err)
}
ctx := context.Background()
book := r.Logbook()

prev, mutable, prevPath, err = PrepareHeadDatasetVersion(ctx, r, "me", "non-existent")
if err != nil {
t.Errorf("case non-existant previous dataset error: %s ", err.Error())
}
if !prev.IsEmpty() {
t.Errorf("case non-existant previous dataset: previous should be empty, got non-empty dataset")
book.WriteDatasetInit(ctx, "cities")
book.WriteDatasetInit(ctx, "Bad_Case")

bad := []struct {
refStr, filepath string
newName bool
expect dsref.Ref
expectIsNew bool
err string
}{
{"me/invalid name", "", false, dsref.Ref{Username: "me", Name: "invalid"}, false, dsref.ErrDescribeValidName.Error()},
{"me/cities", "", true, dsref.Ref{Username: "peer", Name: "cities", InitID: "r7kr6djpgu2hm5fprxalfcsgehacoxomqse4c7nubu5mw6qcz57q"}, false, "name already in use"},
{"me/cities@/ipfs/foo", "", true, dsref.Ref{Username: "me", Name: "cities", InitID: ""}, false, dsref.ErrNotHumanFriendly.Error()},
{"alice/not_this_user", "", true, dsref.Ref{Username: "alice", Name: "not_this_user", InitID: ""}, false, "cannot save using a different username than \"peer\""},
{"me/New_Bad_Case", "", false, dsref.Ref{Username: "peer", Name: "New_Bad_Case", InitID: ""}, true, dsref.ErrBadCaseName.Error()},
}
if !mutable.IsEmpty() {
t.Errorf("case non-existant previous dataset: mutable should be empty, got non-empty dataset")

for _, c := range bad {
t.Run(fmt.Sprintf("bad_%s", c.refStr), func(t *testing.T) {
ref, isNew, err := PrepareSaveRef(ctx, pro, book, book, c.refStr, c.filepath, c.newName)
if !c.expect.Equals(ref) {
t.Errorf("resulting ref mismatch. want:\n%#v\ngot:\n%#v", c.expect, ref)
}
if c.expectIsNew != isNew {
t.Errorf("isNew result mismatch. want %t got %t", c.expectIsNew, isNew)
}
if err == nil {
t.Fatal("expected error, got none")
}
if c.err != err.Error() {
t.Errorf("error mismatch. want %q got %q", c.err, err.Error())
}
})
}
if prev.BodyFile() != nil {
t.Errorf("case non-existant previous dataset: previous body should be nil, got non-nil body")

good := []struct {
refStr, filepath string
newName bool
expect dsref.Ref
expectIsNew bool
}{
{"", "", false, dsref.Ref{Username: "peer", Name: "dataset", InitID: "2fxdc6hvi5gdraujcru5vnaluuuf57x345eirtwwtwitmjhr54ca"}, true},
{"me/cities", "", false, dsref.Ref{Username: "peer", Name: "cities", InitID: "r7kr6djpgu2hm5fprxalfcsgehacoxomqse4c7nubu5mw6qcz57q"}, false},
{"", "/path/to/data/apples.csv", false, dsref.Ref{Username: "peer", Name: "apples", InitID: "bj2srktro6zxsvork6stjzecq4f4kaii2xg2q2n6b4gwk2robf2q"}, true},
{"", "/path/to/data/apples.csv", true, dsref.Ref{Username: "peer", Name: "apples_2", InitID: "tbrfupxauhuc6rwamyejr35w4nw2icgcxvm4f6fnftyaoyeo7ida"}, true},
{"me/Bad_Case", "", false, dsref.Ref{Username: "peer", Name: "Bad_Case", InitID: "setbycsqt5gwyg3fmcm4ty37dzk5ohhq4oxk2hif64fkdhi6naca"}, false},
}
if prevPath != "" {
t.Errorf("case non-existant previous dataset: previous path should be empty, got non-empty path")

for _, c := range good {
t.Run(fmt.Sprintf("good_%s", c.refStr), func(t *testing.T) {
ref, isNew, err := PrepareSaveRef(ctx, pro, book, book, c.refStr, c.filepath, c.newName)
if err != nil {
t.Fatalf("unexpected error: %q", err)
}
if !c.expect.Equals(ref) {
t.Errorf("resulting ref mismatch. want:\n%#v\ngot:\n%#v", c.expect, ref)
}
if c.expectIsNew != isNew {
t.Errorf("isNew result mismatch. want %t got %t", c.expectIsNew, isNew)
}
})
}

}

func TestInferValues(t *testing.T) {
Expand All @@ -72,19 +110,6 @@ func TestInferValues(t *testing.T) {
}
}

func TestMaybeInferName(t *testing.T) {
ds := &dataset.Dataset{}
ds.SetBodyFile(qfs.NewMemfileBytes("gabba gabba hey.csv", []byte("a,b,c,c,s,v")))
inferred := MaybeInferName(ds)
if inferred == "" {
t.Errorf("expected name to be inferred")
}
expectName := "gabba_gabba_hey"
if expectName != inferred {
t.Errorf("inferred name mismatch. expected: '%s', got: '%s'", expectName, inferred)
}
}

func TestInferValuesStructure(t *testing.T) {
r := newTestRepo(t)
pro, err := r.Profile()
Expand Down
Loading

0 comments on commit f948cdf

Please sign in to comment.