From bc09dcfbcecfd34d08cf9e471eae00e59b0e5af7 Mon Sep 17 00:00:00 2001 From: Brendan O'Brien Date: Tue, 14 Nov 2017 14:03:34 -0500 Subject: [PATCH] fix: lots of nitty-gritty fixes in time for demo. this commit adds in some work-in-progress stuff on repo graphing, but it's mainly just a bunch of cleanup in time for a demo. --- api/handlers/datasets.go | 2 +- core/datasets.go | 7 + repo/graph.go | 128 ++++++++++++++++++ repo/graph_test.go | 67 +++++++++ repo/repo.go | 2 + vendor/github.com/qri-io/dataset/dataset.go | 12 ++ .../github.com/qri-io/dataset/dsfs/dataset.go | 39 ++++-- .../qri-io/dataset/dsfs/dataset_test.go | 28 ++-- vendor/github.com/qri-io/dataset/metadata.go | 68 ++++++++++ .../qri-io/dataset/metadata_test.go | 36 +++++ vendor/vendor.json | 12 +- 11 files changed, 367 insertions(+), 34 deletions(-) create mode 100644 repo/graph.go create mode 100644 repo/graph_test.go diff --git a/api/handlers/datasets.go b/api/handlers/datasets.go index c5f844680..9aa674430 100644 --- a/api/handlers/datasets.go +++ b/api/handlers/datasets.go @@ -135,7 +135,7 @@ func (h *DatasetHandlers) getDatasetHandler(w http.ResponseWriter, r *http.Reque util.WriteErrResponse(w, http.StatusInternalServerError, err) return } - util.WriteResponse(w, res.Dataset) + util.WriteResponse(w, res) } func (h *DatasetHandlers) initDatasetHandler(w http.ResponseWriter, r *http.Request) { diff --git a/core/datasets.go b/core/datasets.go index 5b7988de3..0ba7b3721 100644 --- a/core/datasets.go +++ b/core/datasets.go @@ -165,6 +165,13 @@ func (r *DatasetRequests) InitDataset(p *InitDatasetParams, res *repo.DatasetRef } ds := &dataset.Dataset{} + if p.Url != "" { + ds.DownloadUrl = p.Url + // if we're adding from a dataset url, set a default accrual periodicity of once a week + // this'll set us up to re-check urls over time + // TODO - make this configurable via a param + ds.AccrualPeriodicity = "R/P1W" + } if p.Metadata != nil { if err := json.NewDecoder(p.Metadata).Decode(ds); err != nil { return fmt.Errorf("error parsing metadata json: %s", err.Error()) diff --git a/repo/graph.go b/repo/graph.go new file mode 100644 index 000000000..569495f25 --- /dev/null +++ b/repo/graph.go @@ -0,0 +1,128 @@ +package repo + +// import ( +// "fmt" +// "github.com/qri-io/dataset/dsfs" +// "github.com/qri-io/dataset/dsgraph" +// ) + +// var walkParallelism = 4 + +// func RepoGraph(r Repo) (*dsgraph.Node, error) { +// root := &dsgraph.Node{Type: dsgraph.NtNamespace, Path: "root"} +// err := WalkRepoDatasets(r, func(prev *dsgraph.Node) func(int, *DatasetRef, error) (bool, error) { +// return func(depth int, ref *DatasetRef, e error) (kontinue bool, err error) { +// if e != nil { +// return false, e +// } + +// ds := NodesFromDatasetRef(ref) +// if depth == 0 { +// prev.AddLinks(dsgraph.Link{Type: dsgraph.LtNamespaceTip, From: prev, To: ds}) +// } else { +// prev.AddLinks(dsgraph.Link{Type: dsgraph.LtPrevious, From: prev, To: ds}) +// } +// prev = ds +// return true, nil +// } +// }(root)) +// return root, err +// } + +// func NodesFromDatasetRef(ref *DatasetRef) *dsgraph.Node { +// root := &dsgraph.Node{Type: dsgraph.NtDataset, Path: ref.Path.String()} +// ds := ref.Dataset +// if ds == nil { +// return root +// } + +// data := &dsgraph.Node{Type: dsgraph.NtData, Path: ds.Data.Path().String()} +// prev := &dsgraph.Node{Type: dsgraph.NtDataset, Path: ds.Previous.Path().String()} +// root.AddLinks( +// dsgraph.Link{Type: dsgraph.LtDsData, From: root, To: data}, +// dsgraph.Link{Type: dsgraph.LtPrevious, From: root, To: prev}, +// ) +// // if ds.Commit.Path().String() != "" { +// // commit := &dsgraph.Node{Type: dsgraph.NtCommit, Path: ds.Commit.Path()} +// // root.AddLinks(dsgraph.Link{Type: dsgraph.LtDsData, From: root, To: data}) +// // } +// if ds.AbstractStructure != nil && ds.AbstractStructure.Path().String() != "" { +// abst := &dsgraph.Node{Type: dsgraph.NtAbstStructure, Path: ds.AbstractStructure.Path().String()} +// root.AddLinks(dsgraph.Link{Type: dsgraph.LtAbstStructure, From: root, To: abst}) +// } +// if ds.Query != nil && ds.Query.Path().String() != "" { +// query := &dsgraph.Node{Type: dsgraph.NtQuery, Path: ds.Query.Path().String()} +// root.AddLinks(dsgraph.Link{Type: dsgraph.LtQuery, From: root, To: query}) +// } + +// return root +// } + +// // WalkDatasets visits every dataset in the history of a user's namespace +// // Yes, this potentially a very expensive function to call, use sparingly. +// func WalkRepoDatasets(r Repo, visit func(logdepth int, ref *DatasetRef, err error) (bool, error)) error { +// store := r.Store() +// count, err := r.NameCount() +// if err != nil { +// return err +// } else if count == 0 { +// return ErrRepoEmpty +// } + +// if count < walkParallelism { +// walkParallelism = count +// } + +// doSection := func(idx, pageSize int, done chan error) { +// refs, err := r.Namespace(pageSize, idx*pageSize) +// if err != nil { +// done <- err +// return +// } + +// for _, ref := range refs { +// fmt.Println(ref.Path.String()) +// ref.Dataset, err = dsfs.LoadDatasetRefs(store, ref.Path) +// kontinue, err := visit(0, ref, err) +// if err != nil { +// fmt.Println("top", err.Error()) +// done <- err +// return +// } +// if !kontinue { +// break +// } + +// depth := 1 +// for ref.Dataset != nil && ref.Dataset.Previous.String() != "" && ref.Dataset.Previous.String() != "/" { +// ref.Path = ref.Dataset.Previous +// ref.Dataset, err = dsfs.LoadDatasetRefs(store, ref.Path) +// kontinue, err = visit(depth, ref, err) +// if err != nil { +// fmt.Println("prev", err.Error()) +// done <- err +// return +// } +// if !kontinue { +// break +// } +// depth++ +// } +// } +// } + +// pageSize := count / walkParallelism +// done := make(chan error, 0) +// for i := 0; i < walkParallelism; i++ { +// go doSection(i, pageSize, done) +// } + +// for i := 0; i < walkParallelism; i++ { +// err := <-done +// if err != nil { +// return err +// } +// } + +// return nil +// } diff --git a/repo/graph_test.go b/repo/graph_test.go new file mode 100644 index 000000000..036d34495 --- /dev/null +++ b/repo/graph_test.go @@ -0,0 +1,67 @@ +package repo + +// import ( +// "encoding/json" +// "fmt" +// "testing" + +// "github.com/ipfs/go-datastore" +// "github.com/qri-io/cafs/memfs" +// "github.com/qri-io/dataset" +// "github.com/qri-io/qri/repo/profile" +// ) + +// var ( +// ds1 = &dataset.Dataset{ +// Previous: datastore.NewKey(""), +// } +// ds2 = &dataset.Dataset{ +// Previous: datastore.NewKey(""), +// } +// ) + +// func TestRepoGraph(t *testing.T) { +// store := memfs.NewMapstore() +// p := &profile.Profile{} + +// r, err := NewMemRepo(p, store, nil, nil) +// if err != nil { +// t.Errorf("error creating test repo: %s", err.Error()) +// return +// } + +// data1p, _ := store.Put(memfs.NewMemfileBytes("data1", []byte("dataset_1")), true) +// ds1.Data = data1p +// ds1j, _ := ds1.MarshalJSON() +// ds1p, err := store.Put(memfs.NewMemfileBytes("ds1", ds1j), true) +// if err != nil { +// t.Errorf("error putting dataset: %s", err.Error()) +// return +// } +// r.PutDataset(ds1p, ds1) +// r.PutName("ds1", ds1p) + +// data2p, _ := store.Put(memfs.NewMemfileBytes("data1", []byte("dataset_2")), true) +// ds2.Data = data2p +// ds2j, _ := ds1.MarshalJSON() +// ds2p, err := store.Put(memfs.NewMemfileBytes("ds2", ds2j), true) +// if err != nil { +// t.Errorf("error putting dataset: %s", err.Error()) +// return +// } +// r.PutDataset(ds2p, ds2) +// r.PutName("ds1", ds2p) + +// node, err := RepoGraph(r) +// if err != nil { +// t.Errorf("error generating repo graph: %s", err.Error()) +// return +// } + +// data, err := json.Marshal(node) +// if err != nil { +// t.Errorf("json marshal error: %s", err.Error()) +// return +// } +// fmt.Println(data) +// } diff --git a/repo/repo.go b/repo/repo.go index f141ab887..9333a298b 100644 --- a/repo/repo.go +++ b/repo/repo.go @@ -21,6 +21,8 @@ var ( ErrNotFound = fmt.Errorf("repo: not found") // when a Namestore name is already taken ErrNameTaken = fmt.Errorf("repo: name already in use") + // when the repo has no datasets + ErrRepoEmpty = fmt.Errorf("repo: this repo contains no datasets") ) // Repo is the interface for working with a qri repository diff --git a/vendor/github.com/qri-io/dataset/dataset.go b/vendor/github.com/qri-io/dataset/dataset.go index a80cf7ec6..b8ab93e0b 100644 --- a/vendor/github.com/qri-io/dataset/dataset.go +++ b/vendor/github.com/qri-io/dataset/dataset.go @@ -47,6 +47,8 @@ type Dataset struct { AccessUrl string `json:"accessUrl,omitempty"` // Url that should / must lead directly to the data itself DownloadUrl string `json:"downloadUrl,omitempty"` + // The frequency with which dataset changes. Must be an ISO 8601 repeating duration + AccrualPeriodicity string `json:"accrualPeriodicity,omitempty"` // path to readme Readme datastore.Key `json:"readme,omitempty"` // Author @@ -155,6 +157,9 @@ func (d *Dataset) Assign(datasets ...*Dataset) { if ds.Author != nil { d.Author = ds.Author } + if ds.AccrualPeriodicity != "" { + d.AccrualPeriodicity = ds.AccrualPeriodicity + } if ds.Citations != nil { d.Citations = ds.Citations } @@ -308,6 +313,9 @@ func (d *Dataset) MarshalJSON() ([]byte, error) { } data["timestamp"] = d.Timestamp data["title"] = d.Title + if d.AccrualPeriodicity != "" { + data["accrualPeriodicity"] = d.AccrualPeriodicity + } if d.Version != VersionNumber("") { data["version"] = d.Version } @@ -341,6 +349,7 @@ func (d *Dataset) UnmarshalJSON(data []byte) error { for _, f := range []string{ "abstractStructure", "accessUrl", + "accrualPeriodicity", "author", "citations", "commit", @@ -439,6 +448,9 @@ func CompareDatasets(a, b *Dataset) error { if a.DownloadUrl != b.DownloadUrl { return fmt.Errorf("DownloadUrl mismatch: %s != %s", a.DownloadUrl, b.DownloadUrl) } + if a.AccrualPeriodicity != b.AccrualPeriodicity { + return fmt.Errorf("AccrualPeriodicity mismatch: %s != %s", a.AccrualPeriodicity, b.AccrualPeriodicity) + } // if err := CompareLicense(a.License, b.License); err != nil { // return err // } diff --git a/vendor/github.com/qri-io/dataset/dsfs/dataset.go b/vendor/github.com/qri-io/dataset/dsfs/dataset.go index f773505f2..ad05f0d68 100644 --- a/vendor/github.com/qri-io/dataset/dsfs/dataset.go +++ b/vendor/github.com/qri-io/dataset/dsfs/dataset.go @@ -11,8 +11,31 @@ import ( "github.com/qri-io/dataset" ) -// Load a dataset from a cafs +// LoadDataset reads a dataset from a cafs and dereferences structure, query, and commitMsg if they exist, +// returning a fully-hydrated dataset func LoadDataset(store cafs.Filestore, path datastore.Key) (*dataset.Dataset, error) { + ds, err := LoadDatasetRefs(store, path) + if err != nil { + return nil, err + } + + if err := DerefDatasetStructure(store, ds); err != nil { + return nil, fmt.Errorf("error dereferencing %s file: %s", PackageFileStructure, err.Error()) + } + + if err := DerefDatasetQuery(store, ds); err != nil { + return nil, fmt.Errorf("error dereferencing %s file: %s", PackageFileQuery, err.Error()) + } + + if err := DerefDatasetCommitMsg(store, ds); err != nil { + return nil, fmt.Errorf("error dereferencing %s file: %s", PackageFileQuery, err.Error()) + } + + return ds, nil +} + +// LoadDatasetRefs reads a dataset from a content addressed filesystem +func LoadDatasetRefs(store cafs.Filestore, path datastore.Key) (*dataset.Dataset, error) { ds := &dataset.Dataset{} data, err := fileBytes(store.Get(path)) @@ -34,18 +57,6 @@ func LoadDataset(store cafs.Filestore, path datastore.Key) (*dataset.Dataset, er return nil, fmt.Errorf("error unmarshaling %s file: %s", PackageFileDataset.String(), err.Error()) } - if err := DerefDatasetStructure(store, ds); err != nil { - return nil, fmt.Errorf("error dereferencing %s file: %s", PackageFileStructure, err.Error()) - } - - if err := DerefDatasetQuery(store, ds); err != nil { - return nil, fmt.Errorf("error dereferencing %s file: %s", PackageFileQuery, err.Error()) - } - - if err := DerefDatasetCommitMsg(store, ds); err != nil { - return nil, fmt.Errorf("error dereferencing %s file: %s", PackageFileQuery, err.Error()) - } - return ds, nil } @@ -88,6 +99,8 @@ func DerefDatasetCommitMsg(store cafs.Filestore, ds *dataset.Dataset) error { return nil } +// SaveDataset writes a dataset to a cafs, replacing subcomponents of a dataset with hash references +// during the write process. Directory structure is according to PackageFile nameing conventions func SaveDataset(store cafs.Filestore, ds *dataset.Dataset, pin bool) (datastore.Key, error) { if ds == nil { return datastore.NewKey(""), nil diff --git a/vendor/github.com/qri-io/dataset/dsfs/dataset_test.go b/vendor/github.com/qri-io/dataset/dsfs/dataset_test.go index 0154f6d95..5a7e25a71 100644 --- a/vendor/github.com/qri-io/dataset/dsfs/dataset_test.go +++ b/vendor/github.com/qri-io/dataset/dsfs/dataset_test.go @@ -6,6 +6,20 @@ import ( "testing" ) +func TestLoadDataset(t *testing.T) { + store := memfs.NewMapstore() + apath, err := SaveDataset(store, AirportCodes, true) + if err != nil { + t.Errorf(err.Error()) + return + } + + _, err = LoadDataset(store, apath) + if err != nil { + t.Errorf(err.Error()) + } +} + func TestDatasetSave(t *testing.T) { store := memfs.NewMapstore() @@ -35,17 +49,3 @@ func TestDatasetSave(t *testing.T) { } // fmt.Println(string(store.(memfs.MapStore)[datastore.NewKey("/mem/Qmdv5WeDGw1f6pw4DSYQdsugNDFUqHw9FuFU8Gu7T4PUqF")].([]byte))) } - -func TestLoadDataset(t *testing.T) { - store := memfs.NewMapstore() - apath, err := SaveDataset(store, AirportCodes, true) - if err != nil { - t.Errorf(err.Error()) - return - } - - _, err = LoadDataset(store, apath) - if err != nil { - t.Errorf(err.Error()) - } -} diff --git a/vendor/github.com/qri-io/dataset/metadata.go b/vendor/github.com/qri-io/dataset/metadata.go index 9d371c76f..065100ee6 100644 --- a/vendor/github.com/qri-io/dataset/metadata.go +++ b/vendor/github.com/qri-io/dataset/metadata.go @@ -3,6 +3,7 @@ package dataset import ( "encoding/json" "fmt" + "time" ) // Current version of the specification @@ -70,3 +71,70 @@ type Theme struct { Name string `json:"name,omitempty"` Title string `json:"title,omitempty"` } + +// takes an ISO 8601 periodicity measure & returns a time.Duration. +// invalid periodicities return time.Duration(0) +func AccuralDuration(p string) time.Duration { + switch p { + // Decennial + case "R/P10Y": + return time.Duration(time.Hour * 24 * 365 * 10) + // Quadrennial + case "R/P4Y": + return time.Duration(time.Hour * 24 * 365 * 4) + // Annual + case "R/P1Y": + return time.Duration(time.Hour * 24 * 365) + // Bimonthly + case "R/P2M": + return time.Duration(time.Hour * 24 * 30 * 10) + // Semiweekly + case "R/P3.5D": + // TODO - inaccurate + return time.Duration(time.Hour * 24 * 4) + // Daily + case "R/P1D": + return time.Duration(time.Hour * 24) + // Biweekly + case "R/P2W": + return time.Duration(time.Hour * 24 * 14) + // Semiannual + case "R/P6M": + return time.Duration(time.Hour * 24 * 30 * 6) + // Biennial + case "R/P2Y": + return time.Duration(time.Hour * 24 * 365 * 2) + // Triennial + case "R/P3Y": + return time.Duration(time.Hour * 24 * 365 * 3) + // Three times a week + case "R/P0.33W": + return time.Duration((time.Hour * 24 * 7) / 3) + // Three times a month + case "R/P0.33M": + return time.Duration((time.Hour * 24 * 30) / 3) + // Continuously updated + case "R/PT1S": + return time.Second + // Monthly + case "R/P1M": + return time.Duration(time.Hour * 24 * 30) + // Quarterly + case "R/P3M": + return time.Duration((time.Hour * 24 * 365) / 7) + // Semimonthly + case "R/P0.5M": + return time.Duration(time.Hour * 24 * 15) + // Three times a year + case "R/P4M": + return time.Duration((time.Hour * 24 * 365) / 4) + // Weekly + case "R/P1W": + return time.Duration(time.Hour * 24 * 7) + // Hourly + case "R/PT1H": + return time.Hour + default: + return time.Duration(0) + } +} diff --git a/vendor/github.com/qri-io/dataset/metadata_test.go b/vendor/github.com/qri-io/dataset/metadata_test.go index 98f72b8c0..0b1e2330f 100644 --- a/vendor/github.com/qri-io/dataset/metadata_test.go +++ b/vendor/github.com/qri-io/dataset/metadata_test.go @@ -3,6 +3,7 @@ package dataset import ( "fmt" "testing" + "time" ) func TestLicense(t *testing.T) { @@ -22,3 +23,38 @@ func CompareLicense(a, b *License) error { return nil } + +func TestAccrualDuration(t *testing.T) { + cases := []struct { + in string + expect time.Duration + }{ + {"", time.Duration(0)}, + {"R/P10Y", time.Duration(315360000000000000)}, + {"R/P4Y", time.Duration(126144000000000000)}, + {"R/P1Y", time.Duration(31536000000000000)}, + {"R/P2M", time.Duration(25920000000000000)}, + {"R/P3.5D", time.Duration(345600000000000)}, + {"R/P1D", time.Duration(86400000000000)}, + {"R/P2W", time.Duration(1209600000000000)}, + {"R/P6M", time.Duration(15552000000000000)}, + {"R/P2Y", time.Duration(63072000000000000)}, + {"R/P3Y", time.Duration(94608000000000000)}, + {"R/P0.33W", time.Duration(201600000000000)}, + {"R/P0.33M", time.Duration(864000000000000)}, + {"R/PT1S", time.Duration(1000000000)}, + {"R/P1M", time.Duration(2592000000000000)}, + {"R/P3M", time.Duration(4505142857142857)}, + {"R/P0.5M", time.Duration(1296000000000000)}, + {"R/P4M", time.Duration(7884000000000000)}, + {"R/P1W", time.Duration(604800000000000)}, + {"R/PT1H", time.Duration(3600000000000)}, + } + + for i, c := range cases { + got := AccuralDuration(c.in) + if got != c.expect { + t.Errorf("case %d error. expected: %d, got: %d", i, c.expect, got) + } + } +} diff --git a/vendor/vendor.json b/vendor/vendor.json index 59b34ac60..224bd40db 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -831,10 +831,10 @@ "revisionTime": "2017-11-05T13:19:53Z" }, { - "checksumSHA1": "btLghKYAy1PpC5S2YzzOwShVTVc=", + "checksumSHA1": "wpnXvucWvEsuTdsHRpFKtI9sjPU=", "path": "github.com/qri-io/dataset", - "revision": "9dc49ce8d79aa67a5181e7158a881fd305928ed5", - "revisionTime": "2017-11-14T11:33:02Z" + "revision": "44f7728f7a46a59f3c9c0ccf08c0d328f0dd4550", + "revisionTime": "2017-11-14T17:43:48Z" }, { "checksumSHA1": "sPVjpo1FHsa+nZhWvgyC5us7UDs=", @@ -855,10 +855,10 @@ "revisionTime": "2017-11-08T20:07:50Z" }, { - "checksumSHA1": "cCA0il3uWQUU0WhasOiGYvmJc0E=", + "checksumSHA1": "CIZtemovKLhwTXHEB9LpNI25aQw=", "path": "github.com/qri-io/dataset/dsfs", - "revision": "9dc49ce8d79aa67a5181e7158a881fd305928ed5", - "revisionTime": "2017-11-14T11:33:02Z" + "revision": "89373c98b861a3bb5185e6a70fb11b71e2a4aef3", + "revisionTime": "2017-11-14T16:31:57Z" }, { "checksumSHA1": "TNpJqOXK8DsFCCb7eouHTgiQRzw=",