Skip to content

Commit

Permalink
feat(repo.Graph): initial support for repo graphs
Browse files Browse the repository at this point in the history
  • Loading branch information
b5 committed Nov 16, 2017
1 parent bc1f377 commit 1a5c9f9
Show file tree
Hide file tree
Showing 12 changed files with 407 additions and 40 deletions.
3 changes: 3 additions & 0 deletions api/handlers/datasets.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,9 @@ func (h *DatasetHandlers) addDatasetHandler(w http.ResponseWriter, r *http.Reque
}
// TODO - clean this up
p.Hash = r.URL.Path[len("/add/"):]
if p.Name == "" && r.FormValue("name") != "" {
p.Name = r.FormValue("name")
}
} else {
p = &core.AddParams{
Name: r.URL.Query().Get("name"),
Expand Down
8 changes: 8 additions & 0 deletions core/datasets.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,14 @@ func (r *DatasetRequests) InitDataset(p *InitDatasetParams, res *repo.DatasetRef
return fmt.Errorf("error putting data file in store: %s", err.Error())
}

dataexists, err := repo.HasPath(r.repo, datakey)
if err != nil {
return fmt.Errorf("error checking repo for already-existing data: %s", err.Error())
}
if dataexists {
return fmt.Errorf("this data already exists")
}

name := p.Name
if name == "" && filename != "" {
name = detect.Camelize(filename)
Expand Down
6 changes: 5 additions & 1 deletion core/datasets_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
func TestDatasetRequestsInit(t *testing.T) {
badDataFile := testrepo.BadDataFile
jobsByAutomationFile := testrepo.JobsByAutomationFile
jobsByAutomationFile2 := testrepo.JobsByAutomationFile2
badDataFormatFile := testrepo.BadDataFormatFile
badStructureFile := testrepo.BadStructureFile

Expand All @@ -24,13 +25,16 @@ func TestDatasetRequestsInit(t *testing.T) {
{&InitDatasetParams{}, nil, "either a file or a url is required to create a dataset"},
{&InitDatasetParams{Data: badDataFile}, nil, "error detecting format extension: no file extension provided"},
{&InitDatasetParams{DataFilename: badDataFile.FileName(), Data: badDataFile}, nil, "invalid data format: error reading first row of csv: EOF"},
{&InitDatasetParams{DataFilename: jobsByAutomationFile.FileName(), Data: jobsByAutomationFile}, nil, ""},
// Ensure that DataFormat validation is being called
{&InitDatasetParams{DataFilename: badDataFormatFile.FileName(),
Data: badDataFormatFile}, nil, "invalid data format: error: inconsistent column length on line 2 of length 3 (rather than 4). ensure all csv columns same length"},
// Ensure that structure validation is being called
{&InitDatasetParams{DataFilename: badStructureFile.FileName(),
Data: badStructureFile}, nil, "invalid structure: error: cannot use the same name, 'colb' more than once"},
// this should work
{&InitDatasetParams{DataFilename: jobsByAutomationFile.FileName(), Data: jobsByAutomationFile}, nil, ""},
// Ensure that we can't double-add data
{&InitDatasetParams{DataFilename: jobsByAutomationFile2.FileName(), Data: jobsByAutomationFile2}, nil, "this data already exists"},
}

mr, err := testrepo.NewTestRepo()
Expand Down
35 changes: 19 additions & 16 deletions core/queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ type RunParams struct {
}

func (r *QueryRequests) Run(p *RunParams, res *repo.DatasetRef) error {
fmt.Println("running query: %s", p.Dataset.QueryString)
var (
store = r.repo.Store()
structure *dataset.Structure
Expand All @@ -80,7 +81,9 @@ func (r *QueryRequests) Run(p *RunParams, res *repo.DatasetRef) error {
q := ds.Query
if q == nil {
q = &dataset.Query{
Syntax: "sql",
Abstract: &dataset.AbstractQuery{
Syntax: "sql",
Statement: ds.QueryString,
},
}
Expand Down Expand Up @@ -116,23 +119,23 @@ func (r *QueryRequests) Run(p *RunParams, res *repo.DatasetRef) error {
}
}

// TODO - restore query hash discovery
// fmt.Printf("query hash: %s\n", dshash)
// dspath := datastore.NewKey("/ipfs/" + dshash)

// TODO - restore query results graph
// rgraph, err := r.repo.QueryResults()
// if err != nil {
// return err
// }
qpath, err := dsfs.SaveQuery(store, q, false)
if err != nil {
return fmt.Errorf("error calculating query hash: %s", err.Error())
}

// cache := rgraph[qpath]
// if len(cache) > 0 {
// resource, err = core.GetStructure(store, cache[0])
// if err != nil {
// results, err = core.GetStructuredData(store, resource.Path)
// }
// }
if dsp, err := repo.DatasetForQuery(r.repo, qpath); err != nil && err != repo.ErrNotFound {
return fmt.Errorf("error checking for existing query: %s", err.Error())
} else if err != repo.ErrNotFound {
if ds, err := dsfs.LoadDataset(store, dsp); err == nil {
ref := &repo.DatasetRef{Name: p.SaveName, Path: dsp, Dataset: ds}
if err := r.repo.LogQuery(ref); err != nil {
return fmt.Errorf("error logging query to repo: %s", err.Error())
}
*res = *ref
return nil
}
}

// TODO - detect data format from passed-in results structure
structure, results, err = sql.Exec(store, q, func(o *sql.ExecOpt) {
Expand Down
32 changes: 18 additions & 14 deletions repo/fs/namestore.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"io/ioutil"
"os"
"sort"

"github.com/ipfs/go-datastore"
"github.com/qri-io/cafs"
Expand Down Expand Up @@ -108,26 +109,29 @@ func (n Namestore) Namespace(limit, offset int) ([]*repo.DatasetRef, error) {
return nil, err
}

i := -1
added := 0
res := make([]*repo.DatasetRef, limit)
// TODO -- horrible hack. Fix.
namesl := make([]*repo.DatasetRef, len(names))
idx := 0
for name, path := range names {
i++
namesl[idx] = &repo.DatasetRef{
Name: name,
Path: path,
}
idx++
}
sort.Slice(namesl, func(i, j int) bool { return namesl[i].Name < namesl[j].Name })

res := make([]*repo.DatasetRef, limit)
for i, ref := range namesl {
if i < offset {
continue
}
if limit > 0 && added == limit {
break
}

res[added] = &repo.DatasetRef{
Name: name,
Path: path,
if i-offset == limit {
return res, nil
}
added++
res[i-offset] = ref
}
res = res[:added]
return res, nil
return res[:len(namesl)-offset], nil
}

func (n Namestore) NameCount() (int, error) {
Expand Down
41 changes: 38 additions & 3 deletions repo/graph.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package repo

import (
"fmt"
"sync"

"github.com/ipfs/go-datastore"
Expand All @@ -10,6 +11,40 @@ import (

var walkParallelism = 4

// HasPath returns true if this repo already has a reference to
// a given path.
func HasPath(r Repo, path datastore.Key) (bool, error) {
nodes, err := r.Graph()
if err != nil {
return false, fmt.Errorf("error getting repo graph: %s", err.Error())
}
p := path.String()
for np := range nodes {
if p == np {
return true, nil
}
}
return false, nil
}

func DatasetForQuery(r Repo, qpath datastore.Key) (datastore.Key, error) {
nodes, err := r.Graph()
if err != nil {
return datastore.NewKey(""), fmt.Errorf("error getting repo graph: %s", err.Error())
}
qps := qpath.String()
fmt.Println("checking", qps)
qs := QueriesMap(nodes)
fmt.Println(qs)
for qp, dsp := range qs {
if qp == qps {
fmt.Println("MATCH", qp, qps)
return dsp, nil
}
}
return datastore.NewKey(""), ErrNotFound
}

// RepoGraph generates a map of all paths on this repository pointing
// to dsgraph.Node structs with all links configured. This is potentially
// expensive to calculate. Best to do some caching.
Expand Down Expand Up @@ -84,10 +119,10 @@ func (nl NodeList) nodesFromDatasetRef(r Repo, ref *DatasetRef) *dsgraph.Node {
To: nl.node(dsgraph.NtData, ds.Data.String()),
})

if ds.Previous.Path().String() != "/" {
if ds.Previous.String() != "/" {
root.AddLinks(dsgraph.Link{
From: root,
To: nl.node(dsgraph.NtDataset, ds.Previous.Path().String()),
To: nl.node(dsgraph.NtDataset, ds.Previous.String()),
})
}
// if ds.Commit.Path().String() != "" {
Expand All @@ -103,7 +138,7 @@ func (nl NodeList) nodesFromDatasetRef(r Repo, ref *DatasetRef) *dsgraph.Node {
if ds.Query != nil && ds.Query.Path().String() != "" {
if q, err := dsfs.LoadQuery(r.Store(), ds.Query.Path()); err == nil {
query := nl.node(dsgraph.NtQuery, ds.Query.Path().String())
if q.Abstract.Path().String() != "" {
if q.Abstract != nil && q.Abstract.Path().String() != "" {
query.AddLinks(dsgraph.Link{
From: query,
To: nl.node(dsgraph.NtAbstQuery, q.Abstract.Path().String()),
Expand Down
35 changes: 35 additions & 0 deletions repo/test/test_repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,38 @@ var JobsByAutomationFile = memfs.NewMemfileBytes("jobs_ranked_by_automation_prob
674,"0.98","53-3031","Driver/Sales Workers"
673,"0.98","27-4013","Radio Operators"
`))

// TODO - refactor to just give the raw data and a convenience method to create files
// As these stand they can only be used once
var JobsByAutomationFile2 = memfs.NewMemfileBytes("jobs_ranked_by_automation_probability.csv", []byte(`rank,probability_of_automation,soc_code,job_title
702,"0.99","41-9041","Telemarketers"
701,"0.99","23-2093","Title Examiners, Abstractors, and Searchers"
700,"0.99","51-6051","Sewers, Hand"
699,"0.99","15-2091","Mathematical Technicians"
698,"0.99","13-2053","Insurance Underwriters"
697,"0.99","49-9064","Watch Repairers"
696,"0.99","43-5011","Cargo and Freight Agents"
695,"0.99","13-2082","Tax Preparers"
694,"0.99","51-9151","Photographic Process Workers and Processing Machine Operators"
693,"0.99","43-4141","New Accounts Clerks"
692,"0.99","25-4031","Library Technicians"
691,"0.99","43-9021","Data Entry Keyers"
690,"0.98","51-2093","Timing Device Assemblers and Adjusters"
689,"0.98","43-9041","Insurance Claims and Policy Processing Clerks"
688,"0.98","43-4011","Brokerage Clerks"
687,"0.98","43-4151","Order Clerks"
686,"0.98","13-2072","Loan Officers"
685,"0.98","13-1032","Insurance Appraisers, Auto Damage"
684,"0.98","27-2023","Umpires, Referees, and Other Sports Officials"
683,"0.98","43-3071","Tellers"
682,"0.98","51-9194","Etchers and Engravers"
681,"0.98","51-9111","Packaging and Filling Machine Operators and Tenders"
680,"0.98","43-3061","Procurement Clerks"
679,"0.98","43-5071","Shipping, Receiving, and Traffic Clerks"
678,"0.98","51-4035","Milling and Planing Machine Setters, Operators, and Tenders, Metal and Plastic"
677,"0.98","13-2041","Credit Analysts"
676,"0.98","41-2022","Parts Salespersons"
675,"0.98","13-1031","Claims Adjusters, Examiners, and Investigators"
674,"0.98","53-3031","Driver/Sales Workers"
673,"0.98","27-4013","Radio Operators"
`))
28 changes: 28 additions & 0 deletions vendor/github.com/qri-io/dataset/dsfs/query.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

62 changes: 62 additions & 0 deletions vendor/github.com/qri-io/dataset/dsfs/query_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 1a5c9f9

Please sign in to comment.