diff --git a/api/testdata/api.snapshot b/api/testdata/api.snapshot index e1bd9acae..a668a0690 100755 Binary files a/api/testdata/api.snapshot and b/api/testdata/api.snapshot differ diff --git a/cmd/cmd.go b/cmd/cmd.go index 8050d4dd8..1a0209566 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -12,14 +12,11 @@ import ( "io" "os" "path/filepath" - "strings" golog "github.com/ipfs/go-log" "github.com/qri-io/ioes" "github.com/qri-io/qri/lib" - "github.com/qri-io/qri/repo" "github.com/qri-io/qri/repo/gen" - reporef "github.com/qri-io/qri/repo/ref" ) var log = golog.Logger("cmd") @@ -112,11 +109,3 @@ func parseSecrets(secrets ...string) (map[string]string, error) { } return s, nil } - -// parseCmdLineDatasetRef parses DatasetRefs, assuming peer "me" if none given. -func parseCmdLineDatasetRef(ref string) (reporef.DatasetRef, error) { - if !strings.ContainsAny(ref, "@/") { - ref = "me/" + ref - } - return repo.ParseDatasetRef(ref) -} diff --git a/cmd/fsi_integration_test.go b/cmd/fsi_integration_test.go index 8bfebe8d8..757e0d9f8 100644 --- a/cmd/fsi_integration_test.go +++ b/cmd/fsi_integration_test.go @@ -126,7 +126,7 @@ func TestInitBadName(t *testing.T) { if err == nil { t.Fatal("expected error trying to init, did not get an error") } - expect := `dataset name must start with a letter, and only contain letters, numbers, and underscore` + expect := `dataset name must start with a letter, and only contain letters, numbers, and underscore. Maximum length is 144 characters` if err.Error() != expect { t.Errorf("error mismatch, expect: %s, got: %s", expect, err.Error()) } diff --git a/cmd/save.go b/cmd/save.go index 472871aa1..6b06802e2 100644 --- a/cmd/save.go +++ b/cmd/save.go @@ -148,14 +148,8 @@ func (o *SaveOptions) Run() (err error) { o.StartSpinner() defer o.StopSpinner() - // TODO (b5): cmd should never need to parse a dataset reference - ref, err := parseCmdLineDatasetRef(o.Refs.Ref()) - if err != nil && len(o.FilePaths) == 0 { - return lib.NewError(lib.ErrBadArgs, "error parsing dataset reference '"+o.Refs.Ref()+"'") - } - p := &lib.SaveParams{ - Ref: ref.AliasString(), + Ref: o.Refs.Ref(), BodyPath: o.BodyPath, Title: o.Title, Message: o.Message, diff --git a/cmd/save_test.go b/cmd/save_test.go index f1fe255fe..afa772446 100644 --- a/cmd/save_test.go +++ b/cmd/save_test.go @@ -10,7 +10,9 @@ import ( "time" "github.com/google/go-cmp/cmp" + "github.com/qri-io/dataset" "github.com/qri-io/ioes" + "github.com/qri-io/qfs" "github.com/qri-io/qfs/localfs" "github.com/qri-io/qri/base/dsfs" "github.com/qri-io/qri/dscache" @@ -286,6 +288,43 @@ func TestSaveBasicCommands(t *testing.T) { t.Errorf("%s: result mismatch (-want +got):%s\n", c.description, diff) } } + + badCases := []struct { + description string + command string + expectErr string + }{ + { + "dataset file other username", + "qri save --file dataset.yaml other/my_dataset", + "cannot save using a different username than \"test_peer\"", + }, + { + "dataset file explicit version", + "qri save --file dataset.yaml me/my_dataset@/ipfs/QmVersion", + "ref can only have username/name", + }, + { + "body file other username", + "qri save --body body_ten.csv other/my_dataset", + "cannot save using a different username than \"test_peer\"", + }, + } + for _, c := range badCases { + run := NewTestRunner(t, "test_peer", "qri_test_save_basic") + defer run.Delete() + + err := run.ExecCommand(c.command) + if err == nil { + output := run.GetCommandOutput() + t.Errorf("%s: expected an error, did not get one, output: %s\n", c.description, output) + continue + } + if err.Error() != c.expectErr { + t.Errorf("%s: mismatch, expect: %s, got: %s\n", c.description, c.expectErr, err.Error()) + continue + } + } } func TestSaveInferName(t *testing.T) { @@ -492,6 +531,45 @@ func TestSaveDscacheExistingDataset(t *testing.T) { } } +func TestSaveBadCaseCantBeUsedForNewDatasets(t *testing.T) { + run := NewTestRunner(t, "test_peer", "qri_save_bad_case") + defer run.Delete() + + prevTimestampFunc := logbook.NewTimestamp + logbook.NewTimestamp = func() int64 { + return 1000 + } + defer func() { + logbook.NewTimestamp = prevTimestampFunc + }() + + // Try to save a new dataset, but its name has upper-case characters. + err := run.ExecCommand("qri save --body testdata/movies/body_two.json test_peer/a_New_Dataset") + if err == nil { + t.Fatal("expected error trying to save, did not get an error") + } + expect := `dataset name may not contain any upper-case letters` + if err.Error() != expect { + t.Errorf("error mismatch, expect: %s, got: %s", expect, err.Error()) + } + + // Construct a dataset in order to have an existing version in the repo. + ds := dataset.Dataset{ + Structure: &dataset.Structure{ + Format: "json", + Schema: dataset.BaseSchemaArray, + }, + } + ds.SetBodyFile(qfs.NewMemfileBytes("body.json", []byte("[[\"one\",2],[\"three\",4]]"))) + + // Add the dataset to the repo directly, which avoids the name validation check. + ctx := context.Background() + run.AddDatasetToRefstore(ctx, t, "test_peer/a_New_Dataset", &ds) + + // Save the dataset, which will work now that a version already exists. + run.MustExec(t, "qri save --body testdata/movies/body_two.json test_peer/a_New_Dataset") +} + func parseDatasetRefFromOutput(text string) string { pos := strings.Index(text, "dataset saved:") if pos == -1 { diff --git a/cmd/test_runner_test.go b/cmd/test_runner_test.go index b8f07d276..09d808d4d 100644 --- a/cmd/test_runner_test.go +++ b/cmd/test_runner_test.go @@ -10,8 +10,11 @@ import ( "testing" "time" + "github.com/qri-io/dataset" "github.com/qri-io/ioes" + "github.com/qri-io/qri/base" "github.com/qri-io/qri/base/dsfs" + "github.com/qri-io/qri/dsref" "github.com/qri-io/qri/lib" "github.com/qri-io/qri/registry" "github.com/qri-io/qri/registry/regserver" @@ -246,3 +249,49 @@ func executeCommandC(root *cobra.Command, args ...string) (err error) { _, err = root.ExecuteC() return err } + +// AddDatasetToRefstore adds a dataset to the test runner's refstore. It ignores the upper-levels +// of our stack, namely cmd/ and lib/, which means it can be used to add a dataset with a name +// that is using upper-case characters. +func (run *TestRunner) AddDatasetToRefstore(ctx context.Context, t *testing.T, refStr string, ds *dataset.Dataset) { + ref, err := dsref.ParseHumanFriendly(refStr) + if err != nil && err != dsref.ErrBadCaseName { + t.Fatal(err) + } + + ds.Peername = ref.Username + ds.Name = ref.Name + + inst, err := lib.NewInstance(ctx, run.RepoRoot.QriPath) + // NOTE(dustmop): There's a bug with TestRepo that I don't understand completely. The commands + // are run using a different refstore than the refstore returned by accessing the fields of the + // TestRepo directly. The command runner constructs a repo and then refstore which has a path + // similar to "/var/folders/tmpDir/T/qri_save_bad_case1234" with "qri" and "ipfs" directories + // within. However, trying to directly access the Repo object from TestRepo will return a + // refstore with the path "/var/folders/tmpDir/T/qri_save_bad_case1234" as the *qri repository*. + // + // So doing: + // run.RepoRoot.Repo() + // gives a refstore that saves to: + // "/var/folders/tmpDir/T/qri_save_bad_case1234/refs.fbs" + // While the commandRunner is using: + // "/var/folders/tmpDir/T/qri_save_bad_case1234/qri/refs.fbs" + // + // We work around this by constructing a lib.Instance, which uses the PathFactory to get the + // qri subfolder and correctly use the refstore at: + // "/var/folders/tmpDir/T/qri_save_bad_case1234/qri/refs.fbs" + // + // This is probably the same bug that is handled in repo/buildrepo/build.go with a hack that + // appends "/qri" to the repoPath. + r := inst.Repo() + + str := ioes.NewStdIOStreams() + secrets := make(map[string]string) + scriptOut := &bytes.Buffer{} + sw := base.SaveDatasetSwitches{} + + _, err = base.SaveDataset(ctx, r, str, ds, secrets, scriptOut, sw) + if err != nil { + t.Fatal(err) + } +} diff --git a/cmd/whatchanged.go b/cmd/whatchanged.go index f98efe1bf..c2bc66ac3 100644 --- a/cmd/whatchanged.go +++ b/cmd/whatchanged.go @@ -12,9 +12,9 @@ import ( func NewWhatChangedCommand(f Factory, ioStreams ioes.IOStreams) *cobra.Command { o := &WhatChangedOptions{IOStreams: ioStreams} cmd := &cobra.Command{ - Use: "whatchanged DATASET", + Use: "whatchanged DATASET", Hidden: true, - Short: "shows what changed at a particular commit", + Short: "shows what changed at a particular commit", Long: `Shows what changed for components at a particular commit, that is, which were added, modified or removed. This is analagous to the status command, except only available for dataset versions in history.`, diff --git a/dsref/parse.go b/dsref/parse.go index 58c75ba11..4a471c77a 100644 --- a/dsref/parse.go +++ b/dsref/parse.go @@ -3,6 +3,7 @@ package dsref import ( "fmt" "regexp" + "unicode" ) // These functions parse a string to create a dsref. We refer to a "human-friendly reference" @@ -41,12 +42,18 @@ var ( concreteRef = regexp.MustCompile(`^@(` + b58Id + `)?\/(` + alphaNumeric + `)\/(` + b58Id + `)`) b58StrictCheck = regexp.MustCompile(`^Qm[1-9A-HJ-NP-Za-km-z]*$`) + // ErrEmptyRef is an error for when a reference is empty + ErrEmptyRef = fmt.Errorf("empty reference") // ErrParseError is an error returned when parsing fails ErrParseError = fmt.Errorf("could not parse ref") // ErrNotHumanFriendly is an error returned when a reference is not human-friendly ErrNotHumanFriendly = fmt.Errorf("ref can only have username/name") + // ErrBadCaseName is the error when a bad case is used in the dataset name + ErrBadCaseName = fmt.Errorf("dataset name may not contain any upper-case letters") + // ErrBadCaseShouldRename is the error when a dataset should be renamed to not use upper case letters + ErrBadCaseShouldRename = fmt.Errorf("dataset name should not contain any upper-case letters, rename it to only use lower-case letters, numbers, and underscores") // ErrDescribeValidName is an error describing a valid dataset name - ErrDescribeValidName = fmt.Errorf("dataset name must start with a letter, and only contain letters, numbers, and underscore") + ErrDescribeValidName = fmt.Errorf("dataset name must start with a letter, and only contain letters, numbers, and underscore. Maximum length is 144 characters") ) // Parse a reference from a string @@ -54,7 +61,7 @@ func Parse(text string) (Ref, error) { var r Ref origLength := len(text) if origLength == 0 { - return r, fmt.Errorf("empty reference") + return r, ErrEmptyRef } remain, partial, err := parseHumanFriendly(text) @@ -80,6 +87,14 @@ func Parse(text string) (Ref, error) { return r, fmt.Errorf("parsing ref, unexpected character at position %d: '%c'", pos, text[0]) } + // Dataset names are not supposed to contain upper-case characters. For now, return an error + // but also the reference; callers should display a warning, but continue to work for now. + for _, rune := range r.Name { + if unicode.IsUpper(rune) { + return r, ErrBadCaseName + } + } + return r, nil } @@ -88,7 +103,7 @@ func ParseHumanFriendly(text string) (Ref, error) { var r Ref origLength := len(text) if origLength == 0 { - return r, fmt.Errorf("empty reference") + return r, ErrEmptyRef } remain, partial, err := parseHumanFriendly(text) @@ -108,13 +123,21 @@ func ParseHumanFriendly(text string) (Ref, error) { return r, fmt.Errorf("parsing ref, unexpected character at position %d: '%c'", pos, text[0]) } + // Dataset names are not supposed to contain upper-case characters. For now, return an error + // but also the reference; callers should display a warning, but continue to work for now. + for _, rune := range r.Name { + if unicode.IsUpper(rune) { + return r, ErrBadCaseName + } + } + return r, nil } // IsRefString returns whether the string parses as a valid reference func IsRefString(text string) bool { _, err := Parse(text) - return err == nil + return err == nil || err == ErrBadCaseName } // IsValidName returns whether the dataset name is valid diff --git a/dsref/parse_test.go b/dsref/parse_test.go index 88454eaaa..558f36df7 100644 --- a/dsref/parse_test.go +++ b/dsref/parse_test.go @@ -51,6 +51,17 @@ func TestParse(t *testing.T) { } } +func TestParseBadCase(t *testing.T) { + ref, err := Parse("test_peer/a_New_Dataset") + if err != ErrBadCaseName { + t.Errorf("expected to get error %s, but got %s", ErrBadCaseName, err) + } + expect := Ref{Username: "test_peer", Name: "a_New_Dataset"} + if !ref.Equals(expect) { + t.Errorf("mismatch: expect %s, got %s", expect, ref) + } +} + func TestParseHumanFriendly(t *testing.T) { goodCases := []struct { description string diff --git a/lib/datasets.go b/lib/datasets.go index 8b11e78aa..27a6398a9 100644 --- a/lib/datasets.go +++ b/lib/datasets.go @@ -235,6 +235,11 @@ func (r *DatasetRequests) Get(p *GetParams, res *GetResult) (err error) { } ctx := context.TODO() + // Check if the dataset ref uses bad-case characters, show a warning. + if _, err := dsref.Parse(p.Path); err == dsref.ErrBadCaseName { + log.Error(dsref.ErrBadCaseShouldRename) + } + ref, err := base.ToDatasetRef(p.Path, r.node.Repo, p.UseFSI) if err != nil { log.Debugf("Get dataset, base.ToDatasetRef %q failed, error: %s", p.Path, err) @@ -433,7 +438,6 @@ func (p *SaveParams) AbsolutizePaths() error { } // Save adds a history entry, updating a dataset -// TODO - need to make sure users aren't forking by referencing commits other than tip func (r *DatasetRequests) Save(p *SaveParams, res *reporef.DatasetRef) (err error) { if r.cli != nil { p.ScriptOutput = nil @@ -445,27 +449,50 @@ func (r *DatasetRequests) Save(p *SaveParams, res *reporef.DatasetRef) (err erro return fmt.Errorf("option to make dataset private not yet implemented, refer to https://github.com/qri-io/qri/issues/291 for updates") } - // From cmd/, an empty reference becomes "me/", but from api/, it becomes "" (empty string). - // We must allow empty references for the case when the --new flag is being used, since it - // can be used to generate a name from a bodypath. - // TODO(dlong): Fix me! Check for these cases, return reasonable errors, test at lib/ level. - ref, err := repo.ParseDatasetRef(p.Ref) - if err != nil && err != repo.ErrEmptyRef { + ref, err := dsref.ParseHumanFriendly(p.Ref) + if err == dsref.ErrBadCaseName { + // If dataset name is using bad-case characters, and is not yet in use, fail with error. + if !r.nameIsInUse(ref) { + return err + } + // If dataset name already exists, just log a warning and then continue. + log.Error(dsref.ErrBadCaseShouldRename) + } else if err == dsref.ErrEmptyRef { + // Okay if reference is empty. Later code will try to infer the name from other parameters. + } else if err != nil { + // If some other error happened, return that error. return err } + // Validate that username is our own, it's not valid to try to save a dataset with someone + // else's username. Without this check, base will replace the username with our own regardless, + // it's better to have an error to display, rather than silently ignore it. + pro, err := r.node.Repo.Profile() + if err != nil { + return err + } + if ref.Username != "" && ref.Username != "me" && ref.Username != pro.Peername { + return fmt.Errorf("cannot save using a different username than \"%s\"", pro.Peername) + } + + // Parsed human-friendly dsref can only have username and name. + datasetRef := reporef.DatasetRef{ + Peername: ref.Username, + Name: ref.Name, + } + ds := &dataset.Dataset{} if p.ReadFSI { - err = repo.CanonicalizeDatasetRef(r.node.Repo, &ref) + err = repo.CanonicalizeDatasetRef(r.node.Repo, &datasetRef) if err != nil && err != repo.ErrNoHistory { return err } - if ref.FSIPath == "" { + if datasetRef.FSIPath == "" { return fsi.ErrNoLink } - ds, err = fsi.ReadDir(ref.FSIPath) + ds, err = fsi.ReadDir(datasetRef.FSIPath) if err != nil { return } @@ -473,8 +500,8 @@ func (r *DatasetRequests) Save(p *SaveParams, res *reporef.DatasetRef) (err erro // add param-supplied changes ds.Assign(&dataset.Dataset{ - Name: ref.Name, - Peername: ref.Peername, + Name: datasetRef.Name, + Peername: datasetRef.Peername, BodyPath: p.BodyPath, Commit: &dataset.Commit{ Title: p.Title, @@ -488,7 +515,7 @@ func (r *DatasetRequests) Save(p *SaveParams, res *reporef.DatasetRef) (err erro } if p.Recall != "" { - ref := reporef.DatasetRef{ + datasetRef := reporef.DatasetRef{ Peername: ds.Peername, Name: ds.Name, // TODO - fix, but really this should be fine for a while because @@ -496,7 +523,7 @@ func (r *DatasetRequests) Save(p *SaveParams, res *reporef.DatasetRef) (err erro // ProfileID: ds.ProfileID, Path: ds.Path, } - recall, err := base.Recall(ctx, r.node.Repo, p.Recall, ref) + recall, err := base.Recall(ctx, r.node.Repo, p.Recall, datasetRef) if err != nil { return err } @@ -541,7 +568,7 @@ func (r *DatasetRequests) Save(p *SaveParams, res *reporef.DatasetRef) (err erro } // TODO (b5) - this should be integrated into base.SaveDataset - fsiPath := ref.FSIPath + fsiPath := datasetRef.FSIPath switches := base.SaveDatasetSwitches{ Replace: p.Replace, @@ -552,7 +579,7 @@ func (r *DatasetRequests) Save(p *SaveParams, res *reporef.DatasetRef) (err erro ShouldRender: p.ShouldRender, NewName: p.NewName, } - ref, err = base.SaveDataset(ctx, r.node.Repo, r.node.LocalStreams, ds, p.Secrets, p.ScriptOutput, switches) + datasetRef, err = base.SaveDataset(ctx, r.node.Repo, r.node.LocalStreams, ds, p.Secrets, p.ScriptOutput, switches) if err != nil { log.Debugf("create ds error: %s\n", err.Error()) return err @@ -560,14 +587,14 @@ func (r *DatasetRequests) Save(p *SaveParams, res *reporef.DatasetRef) (err erro // TODO (b5) - this should be integrated into base.SaveDataset if fsiPath != "" { - ref.FSIPath = fsiPath - if err = r.node.Repo.PutRef(ref); err != nil { + datasetRef.FSIPath = fsiPath + if err = r.node.Repo.PutRef(datasetRef); err != nil { return err } } if p.ReturnBody { - if err = base.InlineJSONBody(ref.Dataset); err != nil { + if err = base.InlineJSONBody(datasetRef.Dataset); err != nil { return err } } @@ -575,7 +602,7 @@ func (r *DatasetRequests) Save(p *SaveParams, res *reporef.DatasetRef) (err erro if p.Publish { var publishedRef reporef.DatasetRef err = r.SetPublishStatus(&SetPublishStatusParams{ - Ref: ref.String(), + Ref: datasetRef.String(), PublishStatus: true, // UpdateRegistry: true, // UpdateRegistryPin: true, @@ -586,16 +613,38 @@ func (r *DatasetRequests) Save(p *SaveParams, res *reporef.DatasetRef) (err erro } } - *res = ref + *res = datasetRef if p.WriteFSI { // Need to pass filesystem here so that we can read the README component and write it // properly back to disk. - fsi.WriteComponents(res.Dataset, ref.FSIPath, r.inst.node.Repo.Filesystem()) + fsi.WriteComponents(res.Dataset, datasetRef.FSIPath, r.inst.node.Repo.Filesystem()) } return nil } +// This is somewhat of a hack, we shouldn't need to lookup anything about the dataset reference +// before running Save. However, we need to check for now until we solve the problem of +// dataset names existing with bad-case characters. +// See this issue: https://github.com/qri-io/qri/issues/1132 +func (r *DatasetRequests) nameIsInUse(ref dsref.Ref) bool { + param := GetParams{ + Path: ref.Alias(), + } + res := GetResult{} + err := r.Get(¶m, &res) + if err == repo.ErrNotFound { + return false + } + if err != nil { + // TODO(dustmop): Unsure if this is correct. If `Get` hits some other error, we aren't + // sure if the dataset name is in use. Log the error and assume the dataset does in fact + // exist. + log.Error(err) + } + return true +} + // SetPublishStatusParams encapsulates parameters for setting the publication status of a dataset type SetPublishStatusParams struct { Ref string diff --git a/lib/datasets_test.go b/lib/datasets_test.go index 5d77ffaae..fdbfae8f6 100644 --- a/lib/datasets_test.go +++ b/lib/datasets_test.go @@ -626,7 +626,7 @@ func TestDatasetRequestsRename(t *testing.T) { err string }{ {&RenameParams{}, "current name is required to rename a dataset"}, - {&RenameParams{Current: dsref.Ref{Username: "peer", Name: "movies"}, Next: dsref.Ref{Username: "peer", Name: "new movies"}}, "dataset name must start with a letter, and only contain letters, numbers, and underscore"}, + {&RenameParams{Current: dsref.Ref{Username: "peer", Name: "movies"}, Next: dsref.Ref{Username: "peer", Name: "new movies"}}, "dataset name must start with a letter, and only contain letters, numbers, and underscore. Maximum length is 144 characters"}, {&RenameParams{Current: dsref.Ref{Username: "peer", Name: "cities"}, Next: dsref.Ref{Username: "peer", Name: "sitemap"}}, "dataset 'peer/sitemap' already exists"}, }