From 4b1e562a4bd30ded619f7454c47c505105db2254 Mon Sep 17 00:00:00 2001 From: Kasey Date: Tue, 11 Jun 2019 16:44:21 -0400 Subject: [PATCH] fix(add): add specific versions of a dataset using the full reference This commit contains a few things: 1) removes checking for is a dataset is local before attempting to add. Right now we don't have a reliable way of determining if a file is local or coming off the d-web. 2) add checks to see if there was a previous version of the dataset in the repo before adding. If the previous version is actually a more recent version, do not add the older version to the repo, just fetch and pin the data 3) tests this new `ReplaceRefIfMoreRecent` function 4) fixes an error in base that was sending the wrong path to the fetcher (conflict probably from upgrading to ipfs 0.4.21) --- actions/dataset.go | 58 ++++++++++++-- actions/dataset_ref.go | 7 ++ actions/dataset_test.go | 164 ++++++++++++++++++++++++++++++++++++++++ base/dataset.go | 2 +- 4 files changed, 225 insertions(+), 6 deletions(-) diff --git a/actions/dataset.go b/actions/dataset.go index e40ba8d02..282547dd9 100644 --- a/actions/dataset.go +++ b/actions/dataset.go @@ -3,8 +3,10 @@ package actions import ( "fmt" "io" + "time" "github.com/qri-io/dataset" + "github.com/qri-io/dataset/dsfs" "github.com/qri-io/dataset/validate" "github.com/qri-io/qfs" "github.com/qri-io/qfs/cafs" @@ -119,10 +121,12 @@ func UpdateRemoteDataset(node *p2p.QriNode, ref *repo.DatasetRef, pin bool) (res // AddDataset fetches & pins a dataset to the store, adding it to the list of stored refs func AddDataset(node *p2p.QriNode, ref *repo.DatasetRef) (err error) { if !ref.Complete() { - if local, err := ResolveDatasetRef(node, ref); err != nil { + // TODO (ramfox): we should check to see if the dataset already exists locally + // unfortunately, because of the nature of the ipfs filesystem commands, we don't + // know if files we fetch are local only or possibly coming from the network. + // instead, for now, let's just always try to add + if _, err := ResolveDatasetRef(node, ref); err != nil { return err - } else if local { - return fmt.Errorf("error: dataset %s already exists in repo", ref) } } @@ -207,11 +211,55 @@ func AddDataset(node *p2p.QriNode, ref *repo.DatasetRef) (err error) { return fmt.Errorf("add failed: %s", err.Error()) } - if err = node.Repo.PutRef(*ref); err != nil { + prevRef, err := node.Repo.GetRef(repo.DatasetRef{Peername: ref.Peername, Name: ref.Name}) + if err != nil && err == repo.ErrNotFound { + if err = node.Repo.PutRef(*ref); err != nil { + log.Debug(err.Error()) + return fmt.Errorf("error putting dataset in repo: %s", err.Error()) + } + return nil + } + if err != nil { + return err + } + + prevRef.Dataset, err = dsfs.LoadDataset(node.Repo.Store(), prevRef.Path) + if err != nil { + log.Debug(err.Error()) + return fmt.Errorf("error loading repo dataset: %s", prevRef.Path) + } + + ref.Dataset, err = dsfs.LoadDataset(node.Repo.Store(), ref.Path) + if err != nil { log.Debug(err.Error()) - return fmt.Errorf("error putting dataset name in repo: %s", err.Error()) + return fmt.Errorf("error loading added dataset: %s", ref.Path) + } + + return ReplaceRefIfMoreRecent(node, &prevRef, ref) +} + +// ReplaceRefIfMoreRecent replaces the given ref in the ref store, if +// it is more recent then the ref currently in the refstore +func ReplaceRefIfMoreRecent(node *p2p.QriNode, prev, curr *repo.DatasetRef) error { + var ( + prevTime time.Time + currTime time.Time + ) + if curr == nil || curr.Dataset == nil || curr.Dataset.Commit == nil { + return fmt.Errorf("added dataset ref is not fully dereferenced") + } + currTime = curr.Dataset.Commit.Timestamp + if prev == nil || prev.Dataset == nil || prev.Dataset.Commit == nil { + return fmt.Errorf("previous dataset ref is not fully derefernced") } + prevTime = prev.Dataset.Commit.Timestamp + if prevTime.Before(currTime) || prevTime.Equal(currTime) { + if err := node.Repo.PutRef(*curr); err != nil { + log.Debug(err.Error()) + return fmt.Errorf("error putting dataset name in repo: %s", err.Error()) + } + } return nil } diff --git a/actions/dataset_ref.go b/actions/dataset_ref.go index 601fc7a33..bbb757977 100644 --- a/actions/dataset_ref.go +++ b/actions/dataset_ref.go @@ -16,6 +16,13 @@ import ( // falling back to a network call if one isn't found // TODO - this looks small now, but in the future we may consider // reinforcing p2p network with registry lookups +// TODO (ramfox) - Canonicalizing a Dataset with no errors is not a good enough tell to see +// if a dataset is local or not, we have to actually attempt to load it. +// however, if we are connected to a network, we cannot fully reason if a file +// is local or from the network. We need to build tools that allow us better +// control over local only and network actions. Once we have those, we can attempt +// to load the dataset locally, if it error with DatasetNotFound, or something similar +// we will know that the dataset does not exist locally func ResolveDatasetRef(node *p2p.QriNode, ref *repo.DatasetRef) (local bool, err error) { if err := repo.CanonicalizeDatasetRef(node.Repo, ref); err == nil && ref.Path != "" { return true, nil diff --git a/actions/dataset_test.go b/actions/dataset_test.go index f5d5e0605..402dac2e4 100644 --- a/actions/dataset_test.go +++ b/actions/dataset_test.go @@ -3,6 +3,7 @@ package actions import ( "context" "testing" + "time" "github.com/qri-io/dataset" "github.com/qri-io/qfs" @@ -416,3 +417,166 @@ func testEventsLog(t *testing.T, rmf RepoMakerFunc) { } } } + +func TestReplaceRefIfMoreRecent(t *testing.T) { + node := newTestNode(t) + older := time.Date(2019, 1, 1, 12, 0, 0, 0, time.UTC) + newer := older.AddDate(1, 0, 0) + cases := []struct { + description string + a, b repo.DatasetRef + path string + }{ + { + "first dataset is older then the second", + repo.DatasetRef{ + Peername: "woo", + Name: "first_older", + Path: "/map/first", + ProfileID: "id", + Dataset: &dataset.Dataset{ + Commit: &dataset.Commit{ + Timestamp: older, + }, + }, + }, + repo.DatasetRef{ + Peername: "woo", + Name: "first_older", + Path: "/map/second", + ProfileID: "id", + Dataset: &dataset.Dataset{ + Commit: &dataset.Commit{ + Timestamp: newer, + }, + }, + }, + "/map/second", + }, + { + "first dataset is newer then the second", + repo.DatasetRef{ + Peername: "woo", + Name: "first_newer", + Path: "/map/first", + ProfileID: "id", + Dataset: &dataset.Dataset{ + Commit: &dataset.Commit{ + Timestamp: newer, + }, + }, + }, + repo.DatasetRef{ + Peername: "woo", + Name: "first_newer", + Path: "/map/second", + ProfileID: "id", + Dataset: &dataset.Dataset{ + Commit: &dataset.Commit{ + Timestamp: older, + }, + }, + }, + "/map/first", + }, + { + "first dataset is same time as the the second", + repo.DatasetRef{ + Peername: "woo", + Name: "first_same", + Path: "/map/first", + ProfileID: "id", + Dataset: &dataset.Dataset{ + Commit: &dataset.Commit{ + Timestamp: newer, + }, + }, + }, + repo.DatasetRef{ + Peername: "woo", + Name: "first_same", + Path: "/map/second", + ProfileID: "id", + Dataset: &dataset.Dataset{ + Commit: &dataset.Commit{ + Timestamp: newer, + }, + }, + }, + "/map/second", + }, + } + + for _, c := range cases { + if err := node.Repo.PutRef(c.a); err != nil { + t.Fatal(err) + } + if err := ReplaceRefIfMoreRecent(node, &c.a, &c.b); err != nil { + t.Fatal(err) + } + ref, err := node.Repo.GetRef(repo.DatasetRef{Peername: c.a.Peername, Name: c.a.Name}) + if err != nil { + t.Fatal(err) + } + if ref.Path != c.path { + t.Errorf("case '%s', ref path error, expected: '%s', got: '%s'", c.description, c.path, ref.Path) + } + } + + casesError := []struct { + description string + a, b repo.DatasetRef + err string + }{ + { + "original ref has no timestamp & should error", + repo.DatasetRef{ + Peername: "woo", + Name: "err", + Path: "/map/first", + ProfileID: "id", + }, + repo.DatasetRef{ + Peername: "woo", + Name: "err", + Path: "/map/second", + ProfileID: "id", + Dataset: &dataset.Dataset{ + Commit: &dataset.Commit{ + Timestamp: newer, + }, + }, + }, + "previous dataset ref is not fully derefernced", + }, + { + "added ref has no timestamp & should error", + repo.DatasetRef{ + Peername: "woo", + Name: "err", + Path: "/map/first", + ProfileID: "id", + Dataset: &dataset.Dataset{ + Commit: &dataset.Commit{ + Timestamp: newer, + }, + }, + }, + repo.DatasetRef{}, + "added dataset ref is not fully dereferenced", + }, + } + + for _, c := range casesError { + if err := node.Repo.PutRef(c.a); err != nil { + t.Fatal(err) + } + err := ReplaceRefIfMoreRecent(node, &c.a, &c.b) + if err == nil { + t.Errorf("case '%s' did not error", c.description) + } + if err.Error() != c.err { + t.Errorf("case '%s', error mismatch. expected: '%s', got: '%s'", c.description, c.err, err.Error()) + } + } +} diff --git a/base/dataset.go b/base/dataset.go index 78474df3c..c6c4ae06b 100644 --- a/base/dataset.go +++ b/base/dataset.go @@ -228,7 +228,7 @@ func FetchDataset(r repo.Repo, ref *repo.DatasetRef, pin, load bool) (err error) // TODO: This is asserting that the target is Fetch-able, but inside dsfs.LoadDataset, // only Get is called. Clean up the semantics of Fetch and Get to get this expection // more correctly in line with what's actually required. - _, err = fetcher.Fetch(cafs.SourceAny, key) + _, err = fetcher.Fetch(cafs.SourceAny, path) if err != nil { return fmt.Errorf("error fetching file: %s", err.Error()) }