Skip to content

Commit

Permalink
refactor(load): Simplify Loader more, various cleanups
Browse files Browse the repository at this point in the history
Loader now only provides LoadDataset. LoadResolved has been removed. For base, cleanup the simpler implementation that it has, and use that for transform and sql tests.

Change handling of dataset names so that it is now an error to use upper-case letters, not just a warning. Rename can still be used to fix bad names

fsi loading is handled by the loader via a useFSI flag, which scope sets for the few commands that require it.

Fix the bug with listWarning that prevented `list` from working if the user's refstore contained bad username / profileID combinations

Remove some TODOs that are no longer applicable.

TODO: lib/load_test is not working due to changes in the semantics of LoadDataset. Need to figure out how to fix it before merging.
  • Loading branch information
dustmop committed Apr 16, 2021
1 parent 6d2052f commit b8a70d2
Show file tree
Hide file tree
Showing 24 changed files with 199 additions and 12,804 deletions.
12,500 changes: 0 additions & 12,500 deletions api/testdata/api.snapshot

Large diffs are not rendered by default.

32 changes: 0 additions & 32 deletions base/dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,38 +18,6 @@ import (
// these problems are non-fatal
var ErrUnlistableReferences = errors.New("Warning: Some datasets could not be listed, because of invalid state. These datasets still exist in your repository, but have references that cannot be resolved. This will be fixed in a future version. You can see all datasets by using `qri list --raw`")

// NewLocalDatasetLoader creates a dsfs.Loader that operates on a filestore
func NewLocalDatasetLoader(fs qfs.Filesystem) dsref.Loader {
return loader{fs}
}

type loader struct {
fs qfs.Filesystem
}

// LoadDataset is not implemented for base
func (l loader) LoadDataset(ctx context.Context, refstr string) (*dataset.Dataset, error) {
return nil, fmt.Errorf("not implemented")
}

// LoadResolved loads a dataset from a resolved ref
func (l loader) LoadResolved(ctx context.Context, ref dsref.Ref, location string) (*dataset.Dataset, error) {
if ref.Path == "" {
return nil, dsref.ErrRefNotResolved
}
if location != "" {
return nil, errors.New("can only load datasets from local store")
}

ds, err := dsfs.LoadDataset(ctx, l.fs, ref.Path)
if err != nil {
return nil, err
}

err = OpenDataset(ctx, l.fs, ds)
return ds, err
}

// OpenDataset prepares a dataset for use, checking each component
// for populated Path or Byte suffixed fields, consuming those fields to
// set File handlers that are ready for reading
Expand Down
2 changes: 1 addition & 1 deletion base/render_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ func TestRender(t *testing.T) {
r := newTestRepo(t)
ref := addCitiesDataset(t, r)

ds, err := NewLocalDatasetLoader(r.Filesystem()).LoadResolved(ctx, ref, "")
ds, err := NewTestDatasetLoader(r.Filesystem(), r).LoadDataset(ctx, ref.String())
if err != nil {
t.Fatal(err)
}
Expand Down
45 changes: 45 additions & 0 deletions base/test_loader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package base

import (
"context"

"github.com/qri-io/dataset"
"github.com/qri-io/qfs"
"github.com/qri-io/qri/base/dsfs"
"github.com/qri-io/qri/dsref"
)

type loader struct {
fs qfs.Filesystem
resolver dsref.Resolver
}

// NewTestDatasetLoader constructs a loader that is useful for tests
// since they only need a simplified version of resolution and loading
func NewTestDatasetLoader(fs qfs.Filesystem, resolver dsref.Resolver) dsref.Loader {
return loader{
fs: fs,
resolver: resolver,
}
}

// LoadDataset loads a dataset
func (l loader) LoadDataset(ctx context.Context, refstr string) (*dataset.Dataset, error) {
ref, err := dsref.Parse(refstr)
if err != nil {
return nil, err
}

_, err = l.resolver.ResolveRef(ctx, &ref)
if err != nil {
return nil, err
}

ds, err := dsfs.LoadDataset(ctx, l.fs, ref.Path)
if err != nil {
return nil, err
}

err = OpenDataset(ctx, l.fs, ds)
return ds, err
}
20 changes: 11 additions & 9 deletions changes/changes.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ type EmptyObject map[string]interface{}

// Service generates a change report between two datasets
type Service interface {
Report(ctx context.Context, leftRef, rightRef dsref.Ref, loadLocation string) (*ChangeReportResponse, error)
Report(ctx context.Context, leftRef, rightRef string) (*ChangeReportResponse, error)
}

// Service can generate a change report between two datasets
Expand Down Expand Up @@ -385,19 +385,21 @@ func (svc *service) columnStatsDelta(left, right interface{}, lCol, rCol *tabula

// Report computes the change report of two sources
// This takes some assumptions - we work only with tabular data, with header rows and functional structure.json
func (svc *service) Report(ctx context.Context, leftRef, rightRef dsref.Ref, loadLocation string) (*ChangeReportResponse, error) {
rightDs, err := svc.loader.LoadResolved(ctx, rightRef, loadLocation)
func (svc *service) Report(ctx context.Context, leftRef, rightRef string) (*ChangeReportResponse, error) {
rightDs, err := svc.loader.LoadDataset(ctx, rightRef)
if err != nil {
return nil, err
}
if leftRef.Path == "" {
if rightDs.PreviousPath == "" {
return nil, fmt.Errorf("dataset has only one version")
}
leftRef.Path = rightDs.PreviousPath

// If only right side is given, then the left side becomes the previous
// version of that reference.
if leftRef == "" {
ref, _ := dsref.Parse(rightRef)
ref.Path = rightDs.PreviousPath
leftRef = ref.String()
}

leftDs, err := svc.loader.LoadResolved(ctx, leftRef, loadLocation)
leftDs, err := svc.loader.LoadDataset(ctx, leftRef)
if err != nil {
return nil, err
}
Expand Down
11 changes: 5 additions & 6 deletions changes/changes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -673,16 +673,15 @@ func TestReport(t *testing.T) {
svc := run.Service

cities1 := dsref.MustParse("peer/cities")
if _, err := run.Repo.ResolveRef(ctx, &cities1); err != nil {
t.Fatal(err)
}

cities2 := run.updateCitiesDataset(t)

_, err := svc.Report(ctx, cities1, cities2, "")
_, err := svc.Report(ctx, cities1.String(), cities2.String())
if err != nil {
t.Fatal(err)
}

// TODO(dustmop): Inspect the return value to verify that the report is
// actually generating something useful
}

type testRunner struct {
Expand All @@ -699,7 +698,7 @@ func newTestRunner(t *testing.T) (run *testRunner) {
}

statsSvc := stats.New(nil)
loader := base.NewLocalDatasetLoader(r.Filesystem())
loader := base.NewTestDatasetLoader(r.Filesystem(), r)

return &testRunner{
Repo: r,
Expand Down
16 changes: 10 additions & 6 deletions cmd/cmd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -640,9 +640,9 @@ func TestListFormatJson(t *testing.T) {
}
}

// Test that a dataset name with bad upper-case characters, if it already exists, produces a
// warning but not an error when you try to Get it
func TestBadCaseIsJustWarning(t *testing.T) {
// Test that a dataset name with bad upper-case characters, even if it already exists,
// produces an error and needs to be renamed
func TestBadCaseIsAnError(t *testing.T) {
run := NewTestRunner(t, "test_peer_qri_get_bad_case", "qri_get_bad_case")
defer run.Delete()

Expand All @@ -658,10 +658,14 @@ func TestBadCaseIsJustWarning(t *testing.T) {
// Add the dataset to the repo directly, which avoids the name validation check.
run.AddDatasetToRefstore(t, "test_peer_qri_get_bad_case/a_New_Dataset", &ds)

// Save the dataset, which will work now that a version already exists.
// Save the dataset, get an error because it needs to be renamed
err := run.ExecCommand("qri get test_peer_qri_get_bad_case/a_New_Dataset")
if err != nil {
t.Errorf("expect no error (just a warning), got %q", err)
if err == nil {
t.Fatalf("expected an error, didn't get one")
}
expectErr := `"test_peer_qri_get_bad_case/a_New_Dataset" is not a valid dataset reference: dataset name may not contain any upper-case letters`
if expectErr != err.Error() {
t.Errorf("error mismatch, expect: %q, got: %q", expectErr, err)
}
}

Expand Down
6 changes: 0 additions & 6 deletions dsref/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,9 @@ import (
// ErrNoHistory indicates a resolved reference has no HEAD path
var ErrNoHistory = fmt.Errorf("no history")

// ErrRefNotResolved is the error when LoadResolved is called on a non-resolved ref
var ErrRefNotResolved = fmt.Errorf("ref has not been resolved, cannot load")

// Loader loads datasets
type Loader interface {
// LoadDataset will parse the ref string, resolve it, and load the dataset
// from whatever store contains it
LoadDataset(ctx context.Context, refstr string) (*dataset.Dataset, error)
// LoadResolved loads a dataset that has already been resolved by finding it
// at the given location. Will fail if the Ref has no Path with ErrRefNotResolved
LoadResolved(ctx context.Context, ref Ref, location string) (*dataset.Dataset, error)
}
7 changes: 3 additions & 4 deletions dsref/spec/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,18 +44,17 @@ func AssertLoaderSpec(t *testing.T, r dsref.Loader, putFunc PutDatasetFunc) {
t.Fatalf("putting dataset: %s", err)
}

ref := dsref.Ref{}
_, err = r.LoadResolved(ctx, ref, "")
_, err = r.LoadDataset(ctx, "")
if err == nil {
t.Errorf("expected loading without a reference Path value to fail, got nil")
}

ref = dsref.Ref{
ref := dsref.Ref{
Username: username,
Name: name,
Path: path,
}
got, err := r.LoadResolved(ctx, ref, "")
got, err := r.LoadDataset(ctx, ref.String())
if err != nil {
t.Fatal(err)
}
Expand Down
5 changes: 3 additions & 2 deletions lib/automation.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ func (automationImpl) Apply(scope scope, p *ApplyParams) (*ApplyResult, error) {
var err error
ref := dsref.Ref{}
if p.Ref != "" {
ref, _, err = scp.ParseAndResolveRefWithWorkingDir(ctx, p.Ref)
scope.EnableWorkingDir(true)
ref, _, err = scope.ParseAndResolveRef(ctx, p.Ref)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -109,7 +110,7 @@ func (automationImpl) Apply(scope scope, p *ApplyParams) (*ApplyResult, error) {
}, runID)

scriptOut := p.ScriptOutput
transformer := transform.NewTransformer(scp.AppContext(), scp.Loader(), scp.Bus())
transformer := transform.NewTransformer(scope.AppContext(), scope.Loader(), scope.Bus())
if err = transformer.Apply(ctx, ds, runID, p.Wait, scriptOut, p.Secrets); err != nil {
return nil, err
}
Expand Down
19 changes: 1 addition & 18 deletions lib/changes.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"context"

"github.com/qri-io/qri/changes"
"github.com/qri-io/qri/dsref"
)

// ChangeReportParams defines parameters for diffing two sources
Expand All @@ -27,22 +26,6 @@ func (m DiffMethods) Changes(ctx context.Context, p *ChangeReportParams) (*Chang

// Changes generates report of changes between two datasets
func (diffImpl) Changes(scope scope, p *ChangeReportParams) (*ChangeReport, error) {
ctx := scope.Context()

right, location, err := scope.ParseAndResolveRef(ctx, p.RightRef)
if err != nil {
return nil, err
}

var left dsref.Ref
if p.LeftRef != "" {
if left, _, err = scope.ParseAndResolveRef(ctx, p.LeftRef); err != nil {
return nil, err
}
} else {
left = right.Copy()
}

svc := changes.New(scope.Loader(), scope.Stats())
return svc.Report(ctx, left, right, location)
return svc.Report(scope.Context(), p.LeftRef, p.RightRef)
}
8 changes: 7 additions & 1 deletion lib/collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ func (collectionImpl) List(scope scope, p *ListParams) ([]dsref.VersionInfo, err
} else if listProfile.Peername == "" || reqProfile.Peername == listProfile.Peername {
infos, err = base.ListDatasets(scope.Context(), scope.Repo(), p.Term, restrictPid, p.Offset, p.Limit, p.Public, p.ShowNumVersions)
if errors.Is(err, ErrListWarning) {
// This warning can happen when there's conflicts between usernames and
// profileIDs. This type of conflict should not break listing functionality.
// Instead, store the warning and treat it as non-fatal.
listWarning = err
err = nil
}
Expand Down Expand Up @@ -162,7 +165,10 @@ func (collectionImpl) List(scope scope, p *ListParams) ([]dsref.VersionInfo, err
}

if listWarning != nil {
return nil, listWarning
// If there was a warning listing the datasets, we should still return the list
// itself. The caller should handle this warning by simply printing it, but this
// shouldn't break the `list` functionality.
return infos, listWarning
}

return infos, nil
Expand Down
30 changes: 13 additions & 17 deletions lib/datasets.go
Original file line number Diff line number Diff line change
Expand Up @@ -633,22 +633,20 @@ type datasetImpl struct{}
func (datasetImpl) Get(scope scope, p *GetParams) (*GetResult, error) {
res := &GetResult{}

var ds *dataset.Dataset
ref, location, err := scope.ParseAndResolveRefWithWorkingDir(scope.Context(), p.Ref)
if err != nil {
return nil, err
}
ds, err = scope.Loader().LoadResolved(scope.Context(), ref, location)
scope.EnableWorkingDir(true)
ds, err := scope.Loader().LoadDataset(scope.Context(), p.Ref)
if err != nil {
return nil, err
}
ref := dsref.ConvertDatasetToVersionInfo(ds).SimpleRef()

res.Ref = &ref
res.Dataset = ds

if fsi.IsFSIPath(ref.Path) {
res.FSIPath = fsi.FilesystemPathToLocal(ref.Path)
ds.Path = ""
}

// TODO (b5) - Published field is longer set as part of Reference Resolution
// getting publication status should be delegated to a new function

Expand Down Expand Up @@ -1085,12 +1083,13 @@ func (datasetImpl) Remove(scope scope, p *RemoveParams) (*RemoveResponse, error)
return nil, fmt.Errorf("can only remove whole dataset versions, not individual components")
}
if scope.SourceName() != "local" {
return nil, fmt.Errorf("can only remove from local storage")
return nil, fmt.Errorf("remove requires the 'local' source")
}

ref, _, err := scope.ParseAndResolveRefWithWorkingDir(scope.Context(), p.Ref)
scope.EnableWorkingDir(true)
ref, _, err := scope.ParseAndResolveRef(scope.Context(), p.Ref)
if err != nil {
log.Debugw("Remove, repo.ParseAndResolveRefWithWorkingDir failed", "err", err)
log.Debugw("Remove, repo.ParseAndResolveRef failed", "err", err)
// TODO(b5): this "logbook.ErrNotFound" is needed to get cmd.TestRemoveEvenIfLogbookGone
// to pass. Relying on dataset resolution returning an error defined in logbook is incorrect
// This should really be checking for some sort of "can't fully resolve" error
Expand Down Expand Up @@ -1196,7 +1195,7 @@ func (datasetImpl) Remove(scope scope, p *RemoveParams) (*RemoveResponse, error)
} else if len(history) > 0 {
if fsiPath != "" {
// if we're operating on an fsi-linked directory, we need to re-resolve to
// get the path on qfs. This could be avoided if we refactored ParseAndResolveRefWithWorkingDir
// get the path on qfs. This could be avoided if we refactored ParseAndResolveRef
// to return an extra fsiPath value
qfsRef := ref.Copy()
qfsRef.Path = ""
Expand Down Expand Up @@ -1252,7 +1251,7 @@ func (datasetImpl) Pull(scope scope, p *PullParams) (*dataset.Dataset, error) {
res := &dataset.Dataset{}

if scope.SourceName() != "network" {
return nil, fmt.Errorf("can only pull from network")
return nil, fmt.Errorf("pull requires the 'network' source")
}

ref, location, err := scope.ParseAndResolveRef(scope.Context(), p.Ref)
Expand Down Expand Up @@ -1324,8 +1323,8 @@ func (datasetImpl) Validate(scope scope, p *ValidateParams) (*ValidateResponse,
// if there is both a bodyfilename and a schema/structure
// we don't need to resolve any references
if p.BodyFilename == "" || schemaFlagType == "" {
// TODO (ramfox): we need consts in `dsref` for "local", "network", "p2p"
ref, _, err = scope.ParseAndResolveRefWithWorkingDir(scope.Context(), p.Ref)
scope.EnableWorkingDir(true)
ref, _, err = scope.ParseAndResolveRef(scope.Context(), p.Ref)
if err != nil {
return nil, err
}
Expand All @@ -1337,9 +1336,6 @@ func (datasetImpl) Validate(scope scope, p *ValidateParams) (*ValidateResponse,

var ds *dataset.Dataset

// TODO(dlong): This pattern has shown up many places, such as lib.Get.
// Should probably combine into a utility function.

if p.Ref != "" {
if fsiPath != "" {
if ds, err = fsi.ReadDir(fsiPath); err != nil {
Expand Down
4 changes: 2 additions & 2 deletions lib/datasets_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -736,8 +736,8 @@ func TestDatasetRequestsRemove(t *testing.T) {
params RemoveParams
}{
{`"" is not a valid dataset reference: empty reference`, RemoveParams{Ref: "", Revision: allRevs}},
{"reference not found", RemoveParams{Ref: "abc/ABC", Revision: allRevs}},
{"can only remove whole dataset versions, not individual components", RemoveParams{Ref: "abc/ABC", Revision: &dsref.Rev{Field: "st", Gen: -1}}},
{"reference not found", RemoveParams{Ref: "abc/not_found", Revision: allRevs}},
{"can only remove whole dataset versions, not individual components", RemoveParams{Ref: "abc/not_found", Revision: &dsref.Rev{Field: "st", Gen: -1}}},
{"invalid number of revisions to delete: 0", RemoveParams{Ref: "peer/movies", Revision: &dsref.Rev{Field: "ds", Gen: 0}}},
{"dataset is not linked to filesystem, cannot use keep-files", RemoveParams{Ref: "peer/movies", Revision: allRevs, KeepFiles: true}},
}
Expand Down
Loading

0 comments on commit b8a70d2

Please sign in to comment.