Skip to content

Commit

Permalink
feat: added new dataset request method Diff with minimal test
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomas Osterbind committed Jan 30, 2018
1 parent 44f896b commit 4e7a033
Show file tree
Hide file tree
Showing 2 changed files with 200 additions and 8 deletions.
76 changes: 68 additions & 8 deletions core/datasets.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@ import (
"github.com/qri-io/dataset/dsio"
"github.com/qri-io/dataset/validate"
"github.com/qri-io/dataset/vals"
"github.com/qri-io/datasetDiffer"
"github.com/qri-io/jsonschema"
"github.com/qri-io/qri/p2p"
"github.com/qri-io/qri/repo"
"github.com/qri-io/varName"
diff "github.com/yudai/gojsondiff"
)

// DatasetRequests encapsulates business logic for this node's
Expand Down Expand Up @@ -721,19 +723,77 @@ func (r *DatasetRequests) Validate(p *ValidateDatasetParams, errors *[]jsonschem
type DiffParams struct {
// The pointers to the datasets to diff
DsLeft, DsRight *dataset.Dataset
// The output format specified as a string. Currently support "struct", "ascii_full", and "ascii_delta"
Format string
// whether output should include color text modifiers if applicable
Color bool
// override flag to diff full dataset without having to specify each component
DiffAll bool
// if DiffAll is false, DiffComponents specifies which components of a dataset to diff
// currently supported components include "structure", "data", "meta", "transform", and "visConfig"
DiffComponents map[string]bool
}

// Diff computes teh diff of two datasets
func (r *DatasetRequests) Diff(p *DiffParams, diffs *datasetDiffer.DiffList) (err error) {
// TODO
return fmt.Errorf("Not yet implemented")
// Diff computes the diff of two datasets
func (r *DatasetRequests) Diff(p *DiffParams, diffs *map[string]diff.Diff) (err error) {
diffMap := map[string]diff.Diff{}
if p.DiffAll {
diffMap, err := datasetDiffer.DiffDatasets(p.DsLeft, p.DsRight)
if err != nil {
return fmt.Errorf("error diffing datasets: %s", err.Error())
}
// TODO: remove this temporary hack
if diffMap["data"] == nil || len(diffMap["data"].Deltas()) == 0 {
// dereference data paths
// marshal json to []byte
// call `datasetDiffer.DiffJSON(a, b)`
}
diffs = &diffMap
} else {
for k, v := range p.DiffComponents {
if v {
switch k {
case "structure":
if p.DsLeft.Structure != nil && p.DsRight.Structure != nil {
structureDiffs, err := datasetDiffer.DiffStructure(p.DsLeft.Structure, p.DsRight.Structure)
if err != nil {
return fmt.Errorf("error diffing structure: %s", err.Error())
}
diffMap[k] = structureDiffs
}
case "data":
//TODO
if p.DsLeft.DataPath != "" && p.DsRight.DataPath != "" {
dataDiffs, err := datasetDiffer.DiffData(p.DsLeft, p.DsRight)
if err != nil {
return fmt.Errorf("error diffing data: %s", err.Error())
}
diffMap[k] = dataDiffs
}
case "transform":
if p.DsLeft.Transform != nil && p.DsRight.Transform != nil {
transformDiffs, err := datasetDiffer.DiffTransform(p.DsLeft.Transform, p.DsRight.Transform)
if err != nil {
return fmt.Errorf("error diffing transform: %s", err.Error())
}
diffMap[k] = transformDiffs
}
case "meta":
if p.DsLeft.Meta != nil && p.DsRight.Meta != nil {
metaDiffs, err := datasetDiffer.DiffMeta(p.DsLeft.Meta, p.DsRight.Meta)
if err != nil {
return fmt.Errorf("error diffing meta: %s", err.Error())
}
diffMap[k] = metaDiffs
}
case "visConfig":
if p.DsLeft.VisConfig != nil && p.DsRight.VisConfig != nil {
visConfigDiffs, err := datasetDiffer.DiffVisConfig(p.DsLeft.VisConfig, p.DsRight.VisConfig)
if err != nil {
return fmt.Errorf("error diffing visConfig: %s", err.Error())
}
diffMap[k] = visConfigDiffs
}
}
}
}
diffs = &diffMap
}
return nil
}
132 changes: 132 additions & 0 deletions core/datasets_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@ import (
"encoding/json"
"testing"

"github.com/ipfs/go-datastore"
"github.com/qri-io/cafs/memfs"
"github.com/qri-io/dataset"
"github.com/qri-io/dataset/dsfs"
"github.com/qri-io/datasetDiffer"
"github.com/qri-io/qri/repo"
testrepo "github.com/qri-io/qri/repo/test"
)
Expand Down Expand Up @@ -373,3 +376,132 @@ func TestDatasetRequestsAdd(t *testing.T) {
}
}
}

func TestDataRequestsDiff(t *testing.T) {
mr, err := testrepo.NewTestRepo()
if err != nil {
t.Errorf("error allocating test repo: %s", err.Error())
return
}
req := NewDatasetRequests(mr, nil)
// File 1
dsRef1 := &repo.DatasetRef{}
initParams := &InitParams{
DataFilename: jobsByAutomationFile.FileName(),
Data: jobsByAutomationFile,
// MetadataFilename: jobsMeta.FileName(),
// Metadata: jobsMeta,
}
err = req.Init(initParams, dsRef1)
if err != nil {
t.Errorf("couldn't load file 1: %s", err.Error())
}
dsBase, err := dsfs.LoadDataset(mr.Store(), dsRef1.Path)
if err != nil {
t.Errorf("error loading dataset 1: %s", err.Error())
}
// File 2
dsRef2 := &repo.DatasetRef{}
initParams = &InitParams{
DataFilename: jobsByAutomationFile2.FileName(),
Data: jobsByAutomationFile2,
}
err = req.Init(initParams, dsRef2)
if err != nil {
t.Errorf("couldn't load second file: %s", err.Error())
}
dsNewStructure, err := dsfs.LoadDataset(mr.Store(), dsRef2.Path)
if err != nil {
t.Errorf("error loading dataset: %s", err.Error())
}
//test cases
cases := []struct {
dsLeft, dsRight *dataset.Dataset
expected string
err string
}{
{dsBase, dsNewStructure, "Structure Changed. (3 changes)", ""},
}
// execute
for i, c := range cases {
got, err := datasetDiffer.DiffDatasets(c.dsLeft, c.dsRight)
if err != nil {
if err.Error() == c.err {
continue
} else {
t.Errorf("case %d error mismatch: expected '%s', got '%s'", i, c.err, err.Error())
return
}
}
stringDiffs := datasetDiffer.MapDiffsToString(got)
if stringDiffs != c.expected {
t.Errorf("case %d response mistmatch: expected '%s', got '%s'", i, c.expected, stringDiffs)
}
}
}

var jobsByAutomationFile = memfs.NewMemfileBytes("jobs_ranked_by_automation_probability.csv", []byte(`rank,probability_of_automation,soc_code,job_title
702,"0.99","41-9041","Telemarketers"
701,"0.99","23-2093","Title Examiners, Abstractors, and Searchers"
700,"0.99","51-6051","Sewers, Hand"
699,"0.99","15-2091","Mathematical Technicians"
698,"0.99","13-2053","Insurance Underwriters"
697,"0.99","49-9064","Watch Repairers"
696,"0.99","43-5011","Cargo and Freight Agents"
695,"0.99","13-2082","Tax Preparers"
694,"0.99","51-9151","Photographic Process Workers and Processing Machine Operators"
693,"0.99","43-4141","New Accounts Clerks"
692,"0.99","25-4031","Library Technicians"
691,"0.99","43-9021","Data Entry Keyers"
690,"0.98","51-2093","Timing Device Assemblers and Adjusters"
689,"0.98","43-9041","Insurance Claims and Policy Processing Clerks"
688,"0.98","43-4011","Brokerage Clerks"
687,"0.98","43-4151","Order Clerks"
686,"0.98","13-2072","Loan Officers"
685,"0.98","13-1032","Insurance Appraisers, Auto Damage"
684,"0.98","27-2023","Umpires, Referees, and Other Sports Officials"
683,"0.98","43-3071","Tellers"
682,"0.98","51-9194","Etchers and Engravers"
681,"0.98","51-9111","Packaging and Filling Machine Operators and Tenders"
680,"0.98","43-3061","Procurement Clerks"
679,"0.98","43-5071","Shipping, Receiving, and Traffic Clerks"
678,"0.98","51-4035","Milling and Planing Machine Setters, Operators, and Tenders, Metal and Plastic"
677,"0.98","13-2041","Credit Analysts"
676,"0.98","41-2022","Parts Salespersons"
675,"0.98","13-1031","Claims Adjusters, Examiners, and Investigators"
674,"0.98","53-3031","Driver/Sales Workers"
673,"0.98","27-4013","Radio Operators"
`))

var jobsByAutomationFile2 = memfs.NewMemfileBytes("jobs_ranked_by_automation_prob.csv", []byte(`rank,probability_of_automation,industry_code,job_name
702,"0.99","41-9041","Telemarketers"
701,"0.99","23-2093","Title Examiners, Abstractors, and Searchers"
700,"0.99","51-6051","Sewers, Hand"
699,"0.99","15-2091","Mathematical Technicians"
698,"0.88","13-2053","Insurance Underwriters"
697,"0.99","49-9064","Watch Repairers"
696,"0.99","43-5011","Cargo and Freight Agents"
695,"0.99","13-2082","Tax Preparers"
694,"0.99","51-9151","Photographic Process Workers and Processing Machine Operators"
693,"0.99","43-4141","New Accounts Clerks"
692,"0.99","25-4031","Library Technicians"
691,"0.99","43-9021","Data Entry Keyers"
690,"0.98","51-2093","Timing Device Assemblers and Adjusters"
689,"0.98","43-9041","Insurance Claims and Policy Processing Clerks"
688,"0.98","43-4011","Brokerage Clerks"
687,"0.98","43-4151","Order Clerks"
686,"0.98","13-2072","Loan Officers"
685,"0.98","13-1032","Insurance Appraisers, Auto Damage"
684,"0.98","27-2023","Umpires, Referees, and Other Sports Officials"
683,"0.98","43-3071","Tellers"
682,"0.98","51-9194","Etchers and Engravers"
681,"0.98","51-9111","Packaging and Filling Machine Operators and Tenders"
680,"0.98","43-3061","Procurement Clerks"
679,"0.98","43-5071","Shipping, Receiving, and Traffic Clerks"
678,"0.98","51-4035","Milling and Planing Machine Setters, Operators, and Tenders, Metal and Plastic"
677,"0.98","13-2041","Credit Analysts"
676,"0.98","41-2022","Parts Salespersons"
675,"0.98","13-1031","Claims Adjusters, Examiners, and Investigators"
674,"0.98","53-3031","Driver/Sales Workers"
673,"0.98","27-4013","Radio Operators"
`))

0 comments on commit 4e7a033

Please sign in to comment.