Skip to content

Commit

Permalink
fix(core.Add): major cleanup to make add work properly
Browse files Browse the repository at this point in the history
now that we have a clean story around CodingDataset, it's bene much easier
to clean up the add command & get it working over RPC. There were/are a *bunch*
of lingering issues, as adding a remote dataset touches every major feature of
our infrastructure.

While fixing add, the issue of "who owns a dataset" has come up in a big way, as
two lists are now in contention: the list of dataset's a profile has *created*,
and the list of datasets a peer is *storing*, which can be different. As an example,
if my profile is `b5` and I run `qri add edgi_webmon/epaGovSitemap`, I'm saying
"give me that dataset made by edgi_webmon". Based on that, running `qri list` with
no arguments now returns the _full list of stored references_, which in this case
will include `edgi_webmon/epaGovSitemap` in the list of output datasets. To see the
datasets I have created, I now have to run `qri list me` or `qri list b5`. I'm not
totally sure how I feel about this, but I think this is a usable solution for now.

All of this needs more thought & tests. In the near future we should definitely look
to add tests that simluate at least two qri nodes running core methods that require
interaction with each other. I'm thinking this'll make for a nice core package
integration test.

The other thing I'd like to see is adding datasets & histories to registries,
sewing in a fallback during add that checks a registry during DsRef canonicalization.
As histories accumulate we're going to see bugs and confiusion resolving which version
is tip, as the references themselves contain no mechanism for determining tip without
inspecting histories.

There's so, so much work to do here, but I'm very happy to at least see this thing
working
  • Loading branch information
b5 committed Apr 25, 2018
1 parent 499c149 commit e7951d4
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 60 deletions.
6 changes: 3 additions & 3 deletions cmd/add.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,12 @@ changes to qri.`,
PreRun: func(cmd *cobra.Command, args []string) {
loadConfig()
},
Args: cobra.MinimumNArgs(1),
Run: func(cmd *cobra.Command, args []string) {

ingest := (addDsFilepath != "" || addDsMetaFilepath != "" || addDsStructureFilepath != "" || addDsURL != "")

if len(args) == 0 {
ErrExit(fmt.Errorf("specify the location of a dataset to add"))
} else if ingest && len(args) != 1 {
if ingest && len(args) != 1 {
ErrExit(fmt.Errorf("adding datasets with --structure, --meta, or --data requires exactly 1 argument for the new dataset name"))
}

Expand All @@ -83,6 +82,7 @@ changes to qri.`,
res := repo.DatasetRef{}
err = req.Add(&ref, &res)
ExitIfErr(err)
printDatasetRefInfo(1, res)
printInfo("Successfully added dataset %s", ref)
}
},
Expand Down
2 changes: 1 addition & 1 deletion cmd/print.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ func printDatasetRefInfo(i int, ref repo.DatasetRef) {
blue := color.New(color.FgBlue).SprintFunc()
ds := ref.Dataset

fmt.Printf("%s %s\n", cyan(i), white(ref.Name))
fmt.Printf("%s %s\n", cyan(i), white(ref.AliasString()))
fmt.Printf(" %s\n", blue(ref.Path))
if ds != nil && ds.Meta != nil {
if ds.Meta.Title != "" {
Expand Down
45 changes: 1 addition & 44 deletions core/datasets.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (

"github.com/ipfs/go-datastore"
"github.com/qri-io/cafs"
ipfs "github.com/qri-io/cafs/ipfs"
"github.com/qri-io/dataset"
"github.com/qri-io/dataset/detect"
"github.com/qri-io/dataset/dsfs"
Expand Down Expand Up @@ -709,49 +708,7 @@ func (r *DatasetRequests) Add(ref *repo.DatasetRef, res *repo.DatasetRef) (err e
}
}

fs, ok := r.repo.Store().(*ipfs.Filestore)
if !ok {
return fmt.Errorf("can only add datasets when running an IPFS filestore")
}

key := datastore.NewKey(strings.TrimSuffix(ref.Path, "/"+dsfs.PackageFileDataset.String()))

_, err = fs.Fetch(cafs.SourceAny, key)
if err != nil {
return fmt.Errorf("error fetching file: %s", err.Error())
}

err = fs.Pin(key, true)
if err != nil {
log.Debug(err.Error())
return fmt.Errorf("error pinning root key: %s", err.Error())
}

path := datastore.NewKey(key.String() + "/" + dsfs.PackageFileDataset.String())

profile, err := r.repo.Profile()
if err != nil {
log.Debug(err.Error())
return fmt.Errorf("error getting profile: %s", err)
}

ref.Peername = profile.Peername
ref.ProfileID = profile.ID

err = r.repo.PutRef(*ref)
if err != nil {
log.Debug(err.Error())
return fmt.Errorf("error putting dataset name in repo: %s", err.Error())
}

ds, err := dsfs.LoadDataset(fs, path)
if err != nil {
log.Debug(err.Error())
return fmt.Errorf("error loading newly saved dataset path: %s", path.String())
}

ref.Dataset = ds.Encode()

err = r.repo.AddDataset(ref)
*res = *ref
return
}
Expand Down
2 changes: 1 addition & 1 deletion core/datasets_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ func TestDatasetRequestsAdd(t *testing.T) {
res *repo.DatasetRef
err string
}{
{&repo.DatasetRef{Name: "abc", Path: "hash###"}, nil, "can only add datasets when running an IPFS filestore"},
{&repo.DatasetRef{Name: "abc", Path: "hash###"}, nil, "this store cannot fetch from remote sources"},
}

mr, err := testrepo.NewTestRepo()
Expand Down
11 changes: 6 additions & 5 deletions p2p/dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ const MtDatasetInfo = MsgType("dataset_info")
func (n *QriNode) RequestDataset(ref *repo.DatasetRef) (err error) {
log.Debugf("%s RequestDataset %s", n.ID, ref)

if ref.Path == "" {
return fmt.Errorf("path is required")
}
// if ref.Path == "" {
// return fmt.Errorf("path is required")
// }

act := actions.Dataset{n.Repo}

Expand Down Expand Up @@ -54,8 +54,9 @@ func (n *QriNode) RequestDataset(ref *repo.DatasetRef) (err error) {
for _, pid := range pids {

if err := n.SendMessage(req, replies, pid); err != nil {
log.Debug(err.Error())
return err
log.Debugf("%s err: %s", pid, err.Error())
continue
// return err
}

res := <-replies
Expand Down
12 changes: 7 additions & 5 deletions p2p/peers.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@ func (n *QriNode) ClosestConnectedPeers(id profile.ID, max int) (pid []peer.ID)
}
}

for _, conn := range n.Host.Network().Conns() {
pid = append(pid, conn.RemotePeer())
added++
if added == max {
break
if len(pid) == 0 {
for _, conn := range n.Host.Network().Conns() {
pid = append(pid, conn.RemotePeer())
added++
if added == max {
break
}
}
}

Expand Down
1 change: 0 additions & 1 deletion p2p/profile.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ func (n *QriNode) RequestProfile(pid peer.ID) (*profile.Profile, error) {
}

res := <-replies
log.Debugf("profile response for message: %s", res.ID)

cp := &profile.CodingProfile{}
if err := json.Unmarshal(res.Body, cp); err != nil {
Expand Down
40 changes: 40 additions & 0 deletions repo/actions/dataset.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
package actions

import (
"fmt"
"github.com/ipfs/go-datastore"
"github.com/qri-io/cafs"
"github.com/qri-io/dataset"
"github.com/qri-io/dataset/dsfs"
"github.com/qri-io/qri/repo"
"github.com/qri-io/qri/repo/profile"
"strings"
)

// Dataset wraps a repo.Repo, adding actions related to working
Expand All @@ -17,6 +19,7 @@ type Dataset struct {

// CreateDataset initializes a dataset from a dataset pointer and data file
func (act Dataset) CreateDataset(name string, ds *dataset.Dataset, data cafs.File, pin bool) (ref repo.DatasetRef, err error) {
log.Debug("CreateDataset: %s", name)
var (
path datastore.Key
pro *profile.Profile
Expand Down Expand Up @@ -67,6 +70,43 @@ func (act Dataset) CreateDataset(name string, ds *dataset.Dataset, data cafs.Fil
return
}

// AddDataset fetches & pins a dataset to the store, adding it to the list of stored refs
// TODO - this needs tests, first we need an implementation of the fetcher interface that isn't cafs/ipfs
func (act Dataset) AddDataset(ref *repo.DatasetRef) (err error) {
log.Debugf("AddDataset: %s", ref)
fetcher, ok := act.Store().(cafs.Fetcher)
if !ok {
err = fmt.Errorf("this store cannot fetch from remote sources")
return
}

key := datastore.NewKey(strings.TrimSuffix(ref.Path, "/"+dsfs.PackageFileDataset.String()))
_, err = fetcher.Fetch(cafs.SourceAny, key)
if err != nil {
return fmt.Errorf("error fetching file: %s", err.Error())
}

if err = act.PinDataset(*ref); err != nil {
log.Debug(err.Error())
return fmt.Errorf("error pinning root key: %s", err.Error())
}

if err = act.PutRef(*ref); err != nil {
log.Debug(err.Error())
return fmt.Errorf("error putting dataset name in repo: %s", err.Error())
}

path := datastore.NewKey(key.String() + "/" + dsfs.PackageFileDataset.String())
ds, err := dsfs.LoadDataset(act.Store(), path)
if err != nil {
log.Debug(err.Error())
return fmt.Errorf("error loading newly saved dataset path: %s", path.String())
}

ref.Dataset = ds.Encode()
return
}

// ReadDataset grabs a dataset from the store
func (act Dataset) ReadDataset(ref *repo.DatasetRef) (err error) {
if act.Repo.Store() != nil {
Expand Down

0 comments on commit e7951d4

Please sign in to comment.