Skip to content

Commit

Permalink
fix(add): add specific versions of a dataset using the full reference
Browse files Browse the repository at this point in the history
This commit contains a few things:

1) removes checking for is a dataset is local before attempting to add. Right now we don't have a reliable way of determining if a file is local or coming off the d-web.
2) add checks to see if there was a previous version of the dataset in the repo before adding. If the previous version is actually a more recent version, do not add the older version to the repo, just fetch and pin the data
3) tests this new `ReplaceRefIfMoreRecent` function
4) fixes an error in base that was sending the wrong path to the fetcher (conflict probably from upgrading to ipfs 0.4.21)
  • Loading branch information
ramfox committed Jun 11, 2019
1 parent 36a4b9a commit 4b1e562
Show file tree
Hide file tree
Showing 4 changed files with 225 additions and 6 deletions.
58 changes: 53 additions & 5 deletions actions/dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ package actions
import (
"fmt"
"io"
"time"

"github.com/qri-io/dataset"
"github.com/qri-io/dataset/dsfs"
"github.com/qri-io/dataset/validate"
"github.com/qri-io/qfs"
"github.com/qri-io/qfs/cafs"
Expand Down Expand Up @@ -119,10 +121,12 @@ func UpdateRemoteDataset(node *p2p.QriNode, ref *repo.DatasetRef, pin bool) (res
// AddDataset fetches & pins a dataset to the store, adding it to the list of stored refs
func AddDataset(node *p2p.QriNode, ref *repo.DatasetRef) (err error) {
if !ref.Complete() {
if local, err := ResolveDatasetRef(node, ref); err != nil {
// TODO (ramfox): we should check to see if the dataset already exists locally
// unfortunately, because of the nature of the ipfs filesystem commands, we don't
// know if files we fetch are local only or possibly coming from the network.
// instead, for now, let's just always try to add
if _, err := ResolveDatasetRef(node, ref); err != nil {
return err
} else if local {
return fmt.Errorf("error: dataset %s already exists in repo", ref)
}
}

Expand Down Expand Up @@ -207,11 +211,55 @@ func AddDataset(node *p2p.QriNode, ref *repo.DatasetRef) (err error) {
return fmt.Errorf("add failed: %s", err.Error())
}

if err = node.Repo.PutRef(*ref); err != nil {
prevRef, err := node.Repo.GetRef(repo.DatasetRef{Peername: ref.Peername, Name: ref.Name})
if err != nil && err == repo.ErrNotFound {
if err = node.Repo.PutRef(*ref); err != nil {
log.Debug(err.Error())
return fmt.Errorf("error putting dataset in repo: %s", err.Error())
}
return nil
}
if err != nil {
return err
}

prevRef.Dataset, err = dsfs.LoadDataset(node.Repo.Store(), prevRef.Path)
if err != nil {
log.Debug(err.Error())
return fmt.Errorf("error loading repo dataset: %s", prevRef.Path)
}

ref.Dataset, err = dsfs.LoadDataset(node.Repo.Store(), ref.Path)
if err != nil {
log.Debug(err.Error())
return fmt.Errorf("error putting dataset name in repo: %s", err.Error())
return fmt.Errorf("error loading added dataset: %s", ref.Path)
}

return ReplaceRefIfMoreRecent(node, &prevRef, ref)
}

// ReplaceRefIfMoreRecent replaces the given ref in the ref store, if
// it is more recent then the ref currently in the refstore
func ReplaceRefIfMoreRecent(node *p2p.QriNode, prev, curr *repo.DatasetRef) error {
var (
prevTime time.Time
currTime time.Time
)
if curr == nil || curr.Dataset == nil || curr.Dataset.Commit == nil {
return fmt.Errorf("added dataset ref is not fully dereferenced")
}
currTime = curr.Dataset.Commit.Timestamp
if prev == nil || prev.Dataset == nil || prev.Dataset.Commit == nil {
return fmt.Errorf("previous dataset ref is not fully derefernced")
}
prevTime = prev.Dataset.Commit.Timestamp

if prevTime.Before(currTime) || prevTime.Equal(currTime) {
if err := node.Repo.PutRef(*curr); err != nil {
log.Debug(err.Error())
return fmt.Errorf("error putting dataset name in repo: %s", err.Error())
}
}
return nil
}

Expand Down
7 changes: 7 additions & 0 deletions actions/dataset_ref.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ import (
// falling back to a network call if one isn't found
// TODO - this looks small now, but in the future we may consider
// reinforcing p2p network with registry lookups
// TODO (ramfox) - Canonicalizing a Dataset with no errors is not a good enough tell to see
// if a dataset is local or not, we have to actually attempt to load it.
// however, if we are connected to a network, we cannot fully reason if a file
// is local or from the network. We need to build tools that allow us better
// control over local only and network actions. Once we have those, we can attempt
// to load the dataset locally, if it error with DatasetNotFound, or something similar
// we will know that the dataset does not exist locally
func ResolveDatasetRef(node *p2p.QriNode, ref *repo.DatasetRef) (local bool, err error) {
if err := repo.CanonicalizeDatasetRef(node.Repo, ref); err == nil && ref.Path != "" {
return true, nil
Expand Down
164 changes: 164 additions & 0 deletions actions/dataset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package actions
import (
"context"
"testing"
"time"

"github.com/qri-io/dataset"
"github.com/qri-io/qfs"
Expand Down Expand Up @@ -416,3 +417,166 @@ func testEventsLog(t *testing.T, rmf RepoMakerFunc) {
}
}
}

func TestReplaceRefIfMoreRecent(t *testing.T) {
node := newTestNode(t)
older := time.Date(2019, 1, 1, 12, 0, 0, 0, time.UTC)
newer := older.AddDate(1, 0, 0)
cases := []struct {
description string
a, b repo.DatasetRef
path string
}{
{
"first dataset is older then the second",
repo.DatasetRef{
Peername: "woo",
Name: "first_older",
Path: "/map/first",
ProfileID: "id",
Dataset: &dataset.Dataset{
Commit: &dataset.Commit{
Timestamp: older,
},
},
},
repo.DatasetRef{
Peername: "woo",
Name: "first_older",
Path: "/map/second",
ProfileID: "id",
Dataset: &dataset.Dataset{
Commit: &dataset.Commit{
Timestamp: newer,
},
},
},
"/map/second",
},
{
"first dataset is newer then the second",
repo.DatasetRef{
Peername: "woo",
Name: "first_newer",
Path: "/map/first",
ProfileID: "id",
Dataset: &dataset.Dataset{
Commit: &dataset.Commit{
Timestamp: newer,
},
},
},
repo.DatasetRef{
Peername: "woo",
Name: "first_newer",
Path: "/map/second",
ProfileID: "id",
Dataset: &dataset.Dataset{
Commit: &dataset.Commit{
Timestamp: older,
},
},
},
"/map/first",
},
{
"first dataset is same time as the the second",
repo.DatasetRef{
Peername: "woo",
Name: "first_same",
Path: "/map/first",
ProfileID: "id",
Dataset: &dataset.Dataset{
Commit: &dataset.Commit{
Timestamp: newer,
},
},
},
repo.DatasetRef{
Peername: "woo",
Name: "first_same",
Path: "/map/second",
ProfileID: "id",
Dataset: &dataset.Dataset{
Commit: &dataset.Commit{
Timestamp: newer,
},
},
},
"/map/second",
},
}

for _, c := range cases {
if err := node.Repo.PutRef(c.a); err != nil {
t.Fatal(err)
}
if err := ReplaceRefIfMoreRecent(node, &c.a, &c.b); err != nil {
t.Fatal(err)
}
ref, err := node.Repo.GetRef(repo.DatasetRef{Peername: c.a.Peername, Name: c.a.Name})
if err != nil {
t.Fatal(err)
}
if ref.Path != c.path {
t.Errorf("case '%s', ref path error, expected: '%s', got: '%s'", c.description, c.path, ref.Path)
}
}

casesError := []struct {
description string
a, b repo.DatasetRef
err string
}{
{
"original ref has no timestamp & should error",
repo.DatasetRef{
Peername: "woo",
Name: "err",
Path: "/map/first",
ProfileID: "id",
},
repo.DatasetRef{
Peername: "woo",
Name: "err",
Path: "/map/second",
ProfileID: "id",
Dataset: &dataset.Dataset{
Commit: &dataset.Commit{
Timestamp: newer,
},
},
},
"previous dataset ref is not fully derefernced",
},
{
"added ref has no timestamp & should error",
repo.DatasetRef{
Peername: "woo",
Name: "err",
Path: "/map/first",
ProfileID: "id",
Dataset: &dataset.Dataset{
Commit: &dataset.Commit{
Timestamp: newer,
},
},
},
repo.DatasetRef{},
"added dataset ref is not fully dereferenced",
},
}

for _, c := range casesError {
if err := node.Repo.PutRef(c.a); err != nil {
t.Fatal(err)
}
err := ReplaceRefIfMoreRecent(node, &c.a, &c.b)
if err == nil {
t.Errorf("case '%s' did not error", c.description)
}
if err.Error() != c.err {
t.Errorf("case '%s', error mismatch. expected: '%s', got: '%s'", c.description, c.err, err.Error())
}
}
}
2 changes: 1 addition & 1 deletion base/dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ func FetchDataset(r repo.Repo, ref *repo.DatasetRef, pin, load bool) (err error)
// TODO: This is asserting that the target is Fetch-able, but inside dsfs.LoadDataset,
// only Get is called. Clean up the semantics of Fetch and Get to get this expection
// more correctly in line with what's actually required.
_, err = fetcher.Fetch(cafs.SourceAny, key)
_, err = fetcher.Fetch(cafs.SourceAny, path)
if err != nil {
return fmt.Errorf("error fetching file: %s", err.Error())
}
Expand Down

0 comments on commit 4b1e562

Please sign in to comment.