refactor(save): overhaul save path InitID preparation

Merge pull request #1439 from qri-io/refactor_save_initID
qri-io · Jul 14, 2020 · f948cdf · f948cdf
2 parents c444288 + 18d48ac
commit f948cdf
Show file tree

Hide file tree

Showing 14 changed files with 274 additions and 334 deletions.
diff --git a/api/dataset_test.go b/api/dataset_test.go
@@ -203,7 +203,7 @@ func TestSaveWithInferredNewName(t *testing.T) {
  h.SaveHandler(w, req)
  bodyText = resultText(w)
  // Name is guaranteed to be unique
- expectText = `"name":"data_1"`
+ expectText = `"name":"data_2"`
  if !strings.Contains(bodyText, expectText) {
  t.Errorf("expected, body response to contain %q, not found. got %q", expectText, bodyText)
  }

diff --git a/base/dataset_prepare.go b/base/dataset_prepare.go
@@ -3,6 +3,7 @@ package base
 import (
  "bytes"
  "context"
+ "errors"
  "fmt"
  "io"
  "path/filepath"
@@ -12,66 +13,119 @@ import (
  "github.com/qri-io/dataset/detect"
  "github.com/qri-io/dataset/validate"
  "github.com/qri-io/qfs"
- "github.com/qri-io/qri/base/dsfs"
  "github.com/qri-io/qri/dsref"
- "github.com/qri-io/qri/repo"
+ qerr "github.com/qri-io/qri/errors"
+ "github.com/qri-io/qri/logbook"
  "github.com/qri-io/qri/repo/profile"
- reporef "github.com/qri-io/qri/repo/ref"
 )
 
-// PrepareHeadDatasetVersion prepares to save by loading the head commit, opening the body file,
-// and constructing a mutable version that has no transform or commit.
-func PrepareHeadDatasetVersion(ctx context.Context, r repo.Repo, peername, name string) (curr, mutable *dataset.Dataset, currPath string, err error) {
- // TODO(dustmop): We should not be calling CanonicalizeDatasetRef here. It's already been
- // called up in lib, which means that we've thrown information away. Furthermore, we
- // should be relying on stable identifiers this low down the stack. Instead, load the dataset
- // head (if it exists) by using the initID, and pass it into base.Save.
-
- // Determine if the save is creating a new dataset or updating an existing dataset by
- // seeing if the name can canonicalize to a repo that we know about
- lookup := &reporef.DatasetRef{Name: name, Peername: peername}
- if err = repo.CanonicalizeDatasetRef(r, lookup); err == repo.ErrNotFound || lookup.Path == "" {
- return &dataset.Dataset{}, &dataset.Dataset{}, "", nil
+// PrepareSaveRef works out a dataset reference for saving a dataset version.
+// When a dataset exists, resolve the initID & path. When no dataset with that
+// name exists, ensure a locally-unique dataset name and create a new logbook
+// history & InitID to write to. PrepareSaveRef returns a true boolean value
+// if an initID was created
+// successful calls to PrepareSaveRef always have an InitID, and will have the
+// Path of the current version, if one exists
+func PrepareSaveRef(
+ ctx context.Context,
+ pro *profile.Profile,
+ book *logbook.Book,
+ resolver dsref.Resolver,
+ refStr string,
+ bodyPathNameHint string,
+ wantNewName bool,
+) (dsref.Ref, bool, error) {
+
+ var badCaseErr error
+
+ ref, err := dsref.ParseHumanFriendly(refStr)
+ if errors.Is(err, dsref.ErrBadCaseName) {
+ // save bad case error for later, will fail if dataset is new
+ badCaseErr = err
+ } else if errors.Is(err, dsref.ErrEmptyRef) {
+ // User is calling save but didn't provide a dataset reference. Try to infer a usable name.
+ if bodyPathNameHint == "" {
+ bodyPathNameHint = "dataset"
+ }
+ basename := filepath.Base(bodyPathNameHint)
+ basename = strings.TrimSuffix(basename, filepath.Ext(bodyPathNameHint))
+ basename = strings.TrimSuffix(basename, ".")
+ ref.Name = dsref.GenerateName(basename, "dataset_")
+
+ // need to use profile username b/c resolver.ResolveRef can't handle "me"
+ // shorthand
+ check := &dsref.Ref{Username: pro.Peername, Name: ref.Name}
+ if _, resolveErr := resolver.ResolveRef(ctx, check); resolveErr == nil {
+ if !wantNewName {
+ // Name was inferred, and has previous version. Unclear if the user meant to create
+ // a brand new dataset or if they wanted to add a new version to the existing dataset.
+ // Raise an error recommending one of these course of actions.
+ return ref, false, fmt.Errorf(`inferred dataset name already exists. To add a new commit to this dataset, run save again with the dataset reference %q. To create a new dataset, use --new flag`, check.Human())
+ }
+ ref.Name = GenerateAvailableName(ctx, pro, resolver, ref.Name)
+ }
+ } else if errors.Is(err, dsref.ErrNotHumanFriendly) {
+ return ref, false, err
+ } else if err != nil {
+ // If some other parse error happened, describe a valid dataset name.
+ return ref, false, dsref.ErrDescribeValidName
  }
 
- currPath = lookup.Path
- log.Debugf("loading currPath: %s. lookup result: %v", currPath, lookup)
-
- if curr, err = dsfs.LoadDataset(ctx, r.Store(), currPath); err != nil {
- return
+ // Validate that username is our own, it's not valid to try to save a dataset with someone
+ // else's username. Without this check, base will replace the username with our own regardless,
+ // it's better to have an error to display, rather than silently ignore it.
+ if ref.Username != "" && ref.Username != "me" && ref.Username != pro.Peername {
+ return ref, false, fmt.Errorf("cannot save using a different username than %q", pro.Peername)
  }
- if curr.BodyPath != "" {
- var body qfs.File
- body, err = dsfs.LoadBody(ctx, r.Store(), curr)
- if err != nil {
- return
+ ref.Username = pro.Peername
+
+ // attempt to resolve the reference
+ if _, resolveErr := resolver.ResolveRef(ctx, &ref); resolveErr != nil {
+ if !errors.Is(resolveErr, dsref.ErrRefNotFound) {
+ return ref, false, resolveErr
+ }
+ } else if resolveErr == nil {
+ if wantNewName {
+ // Name was explicitly given, with the --new flag, but the name is already in use.
+ // This is an error.
+ return ref, false, qerr.New(ErrNameTaken, "dataset name has a previous version, cannot make new dataset")
  }
- curr.SetBodyFile(body)
+
+ if badCaseErr != nil {
+ // name already exists but is a bad case, log a warning and then continue.
+ log.Error(badCaseErr)
+ }
+
+ // we have a valid previous reference & an initID, return!
+ return ref, false, nil
  }
 
- // Load a mutable copy of the dataset because most of the save path assuming we are doing
- // a patch update to the current head, and not a full replacement.
- if mutable, err = dsfs.LoadDataset(ctx, r.Store(), currPath); err != nil {
- return
+ // at this point we're attempting to create a new dataset.
+ // If dataset name is using bad-case characters, and is not yet in use, fail with error.
+ if badCaseErr != nil {
+ return ref, true, badCaseErr
+ }
+ if !dsref.IsValidName(ref.Name) {
+ return ref, true, fmt.Errorf("invalid dataset name: %s", ref.Name)
  }
 
- // TODO(dustmop): Stop removing the transform once we move to apply, and untangle the
- // save command from applying a transform.
- // remove the Transform & commit
- // transform & commit must be created from scratch with each new version
- mutable.Transform = nil
- mutable.Commit = nil
- return
+ ref.InitID, err = book.WriteDatasetInit(ctx, ref.Name)
+ return ref, true, err
 }
 
-// MaybeInferName infer a name for the dataset if none is set
-func MaybeInferName(ds *dataset.Dataset) string {
- if ds.Name == "" {
- filename := ds.BodyFile().FileName()
- basename := strings.TrimSuffix(filename, filepath.Ext(filename))
- return dsref.GenerateName(basename, "dataset_")
+// GenerateAvailableName creates a name for the dataset that is not currently in
+// use. Generated names start with _2, implying the "_1" file is the original
+// no-suffix name.
+func GenerateAvailableName(ctx context.Context, pro *profile.Profile, resolver dsref.Resolver, prefix string) string {
+ lookup := &dsref.Ref{Username: pro.Peername, Name: prefix}
+ counter := 1
+ for {
+ counter++
+ lookup.Name = fmt.Sprintf("%s_%d", prefix, counter)
+ if _, err := resolver.ResolveRef(ctx, lookup); errors.Is(err, dsref.ErrRefNotFound) {
+ return lookup.Name
+ }
  }
- return ""
 }
 
 // InferValues populates any missing fields that must exist to create a snapshot

diff --git a/base/dataset_prepare_test.go b/base/dataset_prepare_test.go
@@ -3,57 +3,95 @@ package base
 import (
  "context"
  "encoding/json"
+ "fmt"
  "testing"
+ "time"
 
  "github.com/google/go-cmp/cmp"
  "github.com/qri-io/dataset"
  "github.com/qri-io/qfs"
+ "github.com/qri-io/qri/dsref"
+ "github.com/qri-io/qri/logbook"
 )
 
-func TestPrepareDatasetSave(t *testing.T) {
- ctx := context.Background()
+func TestPrepareSaveRef(t *testing.T) {
+ logbook.NewTimestamp = func() int64 { return 0 }
+ defer func() {
+ logbook.NewTimestamp = func() int64 { return time.Now().Unix() }
+ }()
+
  r := newTestRepo(t)
- ref := addCitiesDataset(t, r)
 
- prev, mutable, prevPath, err := PrepareHeadDatasetVersion(ctx, r, ref.Peername, ref.Name)
+ pro, err := r.Profile()
  if err != nil {
- t.Errorf("case cities dataset error: %s ", err.Error())
- }
- if prev.IsEmpty() {
- t.Errorf("case cites dataset: previous should not be empty")
- }
- if mutable.IsEmpty() {
- t.Errorf("case cities dataset: mutable should not be empty")
- }
- if mutable.Transform != nil {
- t.Errorf("case cities dataset: mutable.Transform should be nil")
- }
- if mutable.Commit != nil {
- t.Errorf("case cities dataset: mutable.Commit should be nil")
- }
- if prev.BodyFile() == nil {
- t.Errorf("case cities dataset: previous body should not be nil")
- }
- if prevPath == "" {
- t.Errorf("case cities dataset: previous path should not be empty")
+ t.Fatal(err)
  }
+ ctx := context.Background()
+ book := r.Logbook()
 
- prev, mutable, prevPath, err = PrepareHeadDatasetVersion(ctx, r, "me", "non-existent")
- if err != nil {
- t.Errorf("case non-existant previous dataset error: %s ", err.Error())
- }
- if !prev.IsEmpty() {
- t.Errorf("case non-existant previous dataset: previous should be empty, got non-empty dataset")
+ book.WriteDatasetInit(ctx, "cities")
+ book.WriteDatasetInit(ctx, "Bad_Case")
+
+ bad := []struct {
+ refStr, filepath string
+ newName bool
+ expect dsref.Ref
+ expectIsNew bool
+ err string
+ }{
+ {"me/invalid name", "", false, dsref.Ref{Username: "me", Name: "invalid"}, false, dsref.ErrDescribeValidName.Error()},
+ {"me/cities", "", true, dsref.Ref{Username: "peer", Name: "cities", InitID: "r7kr6djpgu2hm5fprxalfcsgehacoxomqse4c7nubu5mw6qcz57q"}, false, "name already in use"},
+ {"me/cities@/ipfs/foo", "", true, dsref.Ref{Username: "me", Name: "cities", InitID: ""}, false, dsref.ErrNotHumanFriendly.Error()},
+ {"alice/not_this_user", "", true, dsref.Ref{Username: "alice", Name: "not_this_user", InitID: ""}, false, "cannot save using a different username than \"peer\""},
+ {"me/New_Bad_Case", "", false, dsref.Ref{Username: "peer", Name: "New_Bad_Case", InitID: ""}, true, dsref.ErrBadCaseName.Error()},
  }
- if !mutable.IsEmpty() {
- t.Errorf("case non-existant previous dataset: mutable should be empty, got non-empty dataset")
+
+ for _, c := range bad {
+ t.Run(fmt.Sprintf("bad_%s", c.refStr), func(t *testing.T) {
+ ref, isNew, err := PrepareSaveRef(ctx, pro, book, book, c.refStr, c.filepath, c.newName)
+ if !c.expect.Equals(ref) {
+ t.Errorf("resulting ref mismatch. want:\n%#v\ngot:\n%#v", c.expect, ref)
+ }
+ if c.expectIsNew != isNew {
+ t.Errorf("isNew result mismatch. want %t got %t", c.expectIsNew, isNew)
+ }
+ if err == nil {
+ t.Fatal("expected error, got none")
+ }
+ if c.err != err.Error() {
+ t.Errorf("error mismatch. want %q got %q", c.err, err.Error())
+ }
+ })
  }
- if prev.BodyFile() != nil {
- t.Errorf("case non-existant previous dataset: previous body should be nil, got non-nil body")
+
+ good := []struct {
+ refStr, filepath string
+ newName bool
+ expect dsref.Ref
+ expectIsNew bool
+ }{
+ {"", "", false, dsref.Ref{Username: "peer", Name: "dataset", InitID: "2fxdc6hvi5gdraujcru5vnaluuuf57x345eirtwwtwitmjhr54ca"}, true},
+ {"me/cities", "", false, dsref.Ref{Username: "peer", Name: "cities", InitID: "r7kr6djpgu2hm5fprxalfcsgehacoxomqse4c7nubu5mw6qcz57q"}, false},
+ {"", "/path/to/data/apples.csv", false, dsref.Ref{Username: "peer", Name: "apples", InitID: "bj2srktro6zxsvork6stjzecq4f4kaii2xg2q2n6b4gwk2robf2q"}, true},
+ {"", "/path/to/data/apples.csv", true, dsref.Ref{Username: "peer", Name: "apples_2", InitID: "tbrfupxauhuc6rwamyejr35w4nw2icgcxvm4f6fnftyaoyeo7ida"}, true},
+ {"me/Bad_Case", "", false, dsref.Ref{Username: "peer", Name: "Bad_Case", InitID: "setbycsqt5gwyg3fmcm4ty37dzk5ohhq4oxk2hif64fkdhi6naca"}, false},
  }
- if prevPath != "" {
- t.Errorf("case non-existant previous dataset: previous path should be empty, got non-empty path")
+
+ for _, c := range good {
+ t.Run(fmt.Sprintf("good_%s", c.refStr), func(t *testing.T) {
+ ref, isNew, err := PrepareSaveRef(ctx, pro, book, book, c.refStr, c.filepath, c.newName)
+ if err != nil {
+ t.Fatalf("unexpected error: %q", err)
+ }
+ if !c.expect.Equals(ref) {
+ t.Errorf("resulting ref mismatch. want:\n%#v\ngot:\n%#v", c.expect, ref)
+ }
+ if c.expectIsNew != isNew {
+ t.Errorf("isNew result mismatch. want %t got %t", c.expectIsNew, isNew)
+ }
+ })
  }
+
 }
 
 func TestInferValues(t *testing.T) {
@@ -72,19 +110,6 @@ func TestInferValues(t *testing.T) {
  }
 }
 
-func TestMaybeInferName(t *testing.T) {
- ds := &dataset.Dataset{}
- ds.SetBodyFile(qfs.NewMemfileBytes("gabba gabba hey.csv", []byte("a,b,c,c,s,v")))
- inferred := MaybeInferName(ds)
- if inferred == "" {
- t.Errorf("expected name to be inferred")
- }
- expectName := "gabba_gabba_hey"
- if expectName != inferred {
- t.Errorf("inferred name mismatch. expected: '%s', got: '%s'", expectName, inferred)
- }
-}
-
 func TestInferValuesStructure(t *testing.T) {
  r := newTestRepo(t)
  pro, err := r.Profile()