Skip to content

Commit

Permalink
fix(validate): Improve validate so it works with FSI
Browse files Browse the repository at this point in the history
FSI broke validate because we switched from schema.json back to structure.json. The old functionality was looking specifically for schema.json (in the cmd package, no less) and did not survive this change. Instead, we should use the FSI package which correctly finds relevant component files.

Cleanup the lower levels of how validate works. Before, it was converting things back and forth, between cmd -> lib -> base -> dataset -> jsonschema. No need to do all of that unnecessary work.
  • Loading branch information
dustmop committed Jan 13, 2020
1 parent 455a4b3 commit 07cde33
Show file tree
Hide file tree
Showing 9 changed files with 142 additions and 195 deletions.
114 changes: 22 additions & 92 deletions base/validate.go
Original file line number Diff line number Diff line change
@@ -1,116 +1,46 @@
package base

import (
"bytes"
"context"
"encoding/json"
"fmt"
"io/ioutil"

"github.com/qri-io/dataset"
"github.com/qri-io/dataset/detect"
"github.com/qri-io/dataset/dsio"
"github.com/qri-io/dataset/validate"
"github.com/qri-io/jsonschema"
"github.com/qri-io/qfs"
"github.com/qri-io/qri/base/dsfs"
"github.com/qri-io/qri/repo"
)

// Validate checks a dataset body for errors based on a schema
func Validate(ctx context.Context, r repo.Repo, ref repo.DatasetRef, body, schema qfs.File) (errors []jsonschema.ValError, err error) {
if !ref.IsEmpty() {
err = repo.CanonicalizeDatasetRef(r, &ref)
if err != nil && err != repo.ErrNotFound {
log.Debug(err.Error())
err = fmt.Errorf("error with new reference: %s", err.Error())
return
}
// Validate checks a dataset body for errors based on the structure's schema
func Validate(ctx context.Context, r repo.Repo, body qfs.File, st *dataset.Structure) ([]jsonschema.ValError, error) {
if body == nil {
return nil, fmt.Errorf("body passed to Validate must not be nil")
}

var (
st = &dataset.Structure{}
data []byte
)

// if a dataset is specified, load it
if ref.Path != "" {
if err = ReadDataset(ctx, r, &ref); err != nil {
log.Debug(err.Error())
return
}

ds := ref.Dataset
st = ds.Structure
} else if body == nil {
err = fmt.Errorf("cannot find dataset: %s", ref)
return
if st == nil {
return nil, fmt.Errorf("st passed to Validate must not be nil")
}

if body != nil {
data, err = ioutil.ReadAll(body)
if err != nil {
log.Debug(err.Error())
err = fmt.Errorf("error reading data: %s", err.Error())
return
}

// if no schema, detect one
if st.Schema == nil {
var df dataset.DataFormat
df, err = detect.ExtensionDataFormat(body.FileName())
if err != nil {
err = fmt.Errorf("detecting data format: %s", err.Error())
return
}
str, _, e := detect.FromReader(df, bytes.NewBuffer(data))
if e != nil {
err = fmt.Errorf("error detecting from reader: %s", e)
return
}
st = str
// jsonschema assumes body is json, convert the format if necessary
if st.Format != "json" {
convert := dataset.Structure{
Format: "json",
Schema: st.Schema,
}
}

// if a schema is specified, override with it
if schema != nil {
stbytes, e := ioutil.ReadAll(schema)
if e != nil {
log.Debug(e.Error())
err = e
return
}
sch := map[string]interface{}{}
if e := json.Unmarshal(stbytes, &sch); e != nil {
err = fmt.Errorf("error reading schema: %s", e.Error())
return
}
st.Schema = sch
}

if data == nil && ref.Dataset != nil {
ds := ref.Dataset

f, e := dsfs.LoadBody(ctx, r.Store(), ds)
if e != nil {
log.Debug(e.Error())
err = fmt.Errorf("error loading dataset data: %s", e.Error())
return
}
data, err = ioutil.ReadAll(f)
file, err := ConvertBodyFormat(body, st, &convert)
if err != nil {
log.Debug(err.Error())
err = fmt.Errorf("error loading dataset data: %s", err.Error())
return
return nil, err
}
body = file
}

er, err := dsio.NewEntryReader(st, bytes.NewBuffer(data))
// jsonschema does not handle data streams, have to read the whole body
data, err := ioutil.ReadAll(body)
if err != nil {
log.Debug(err.Error())
err = fmt.Errorf("error reading data: %s", err.Error())
return
return nil, err
}

return validate.EntryReader(er)
jsch, err := st.JSONSchema()
if err != nil {
return nil, err
}
return jsch.ValidateBytes(data)
}
14 changes: 13 additions & 1 deletion base/validate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,26 @@ package base
import (
"context"
"testing"

"github.com/qri-io/qri/base/dsfs"
)

func TestValidate(t *testing.T) {
ctx := context.Background()
r := newTestRepo(t)
cities := addCitiesDataset(t, r)

errs, err := Validate(ctx, r, cities, nil, nil)
ds, err := dsfs.LoadDataset(ctx, r.Store(), cities.Path)
if err != nil {
t.Fatal(err)
}

if err = OpenDataset(ctx, r.Filesystem(), ds); err != nil {
t.Fatal(err)
}
body := ds.BodyFile()

errs, err := Validate(ctx, r, body, ds.Structure)
if err != nil {
t.Error(err.Error())
}
Expand Down
11 changes: 0 additions & 11 deletions cmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@ import (
"fmt"
"io"
"os"
"path"
"path/filepath"
"runtime"
"strings"

golog "github.com/ipfs/go-log"
Expand Down Expand Up @@ -121,12 +119,3 @@ func parseCmdLineDatasetRef(ref string) (repo.DatasetRef, error) {
}
return repo.ParseDatasetRef(ref)
}

// currentPath is used for test purposes to get the path from which qri is executing
func currentPath() (string, bool) {
_, filename, _, ok := runtime.Caller(1)
if !ok {
return "", ok
}
return path.Dir(filename), true
}
6 changes: 0 additions & 6 deletions cmd/save_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,6 @@ func TestSaveRun(t *testing.T) {
return
}

_, ok := currentPath()
if !ok {
t.Errorf("error getting path to current folder")
return
}

cases := []struct {
description string
ref string
Expand Down
53 changes: 3 additions & 50 deletions cmd/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,9 @@ package cmd

import (
"fmt"
"os"
"path/filepath"

"github.com/qri-io/ioes"
"github.com/qri-io/jsonschema"
"github.com/qri-io/qfs"
"github.com/qri-io/qri/lib"
"github.com/qri-io/qri/repo"
"github.com/spf13/cobra"
Expand Down Expand Up @@ -108,63 +105,19 @@ func (o *ValidateOptions) Complete(f Factory, args []string) (err error) {

// Run executes the run command
func (o *ValidateOptions) Run() (err error) {
var (
bodyFile, schemaFile *os.File
)

printRefSelect(o.Out, o.Refs)

o.StartSpinner()
defer o.StopSpinner()

if o.Refs.IsLinked() {
if o.BodyFilepath == "" {
// TODO(dlong): FSI should determine the filename by looking for each known file
// extension.
if _, err := os.Stat("body.json"); !os.IsNotExist(err) {
o.BodyFilepath = "body.json"
}
if _, err := os.Stat("body.csv"); !os.IsNotExist(err) {
o.BodyFilepath = "body.csv"
}
if err = qfs.AbsPath(&o.BodyFilepath); err != nil {
return err
}
}
if o.SchemaFilepath == "" {
o.SchemaFilepath = "schema.json"
if err = qfs.AbsPath(&o.SchemaFilepath); err != nil {
return err
}
}
}

if o.BodyFilepath != "" {
if bodyFile, err = loadFileIfPath(o.BodyFilepath); err != nil {
return lib.NewError(err, fmt.Sprintf("error opening body file: could not %s", err))
}
}
if o.SchemaFilepath != "" {
if schemaFile, err = loadFileIfPath(o.SchemaFilepath); err != nil {
return lib.NewError(err, fmt.Sprintf("error opening schema file: could not %s", err))
}
}

ref := o.Refs.Ref()
p := &lib.ValidateDatasetParams{
Ref: ref,
// TODO: restore
// URL: addDsURL,
BodyFilename: filepath.Base(o.BodyFilepath),
}

// this is because passing nil to interfaces is bad
// see: https://golang.org/doc/faq#nil_error
if bodyFile != nil {
p.Body = bodyFile
}
if schemaFile != nil {
p.Schema = schemaFile
BodyFilename: o.BodyFilepath,
SchemaFilename: o.SchemaFilepath,
UseFSI: o.Refs.IsLinked(),
}

res := []jsonschema.ValError{}
Expand Down
10 changes: 2 additions & 8 deletions cmd/validate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,6 @@ func TestValidateRun(t *testing.T) {
return
}

path, ok := currentPath()
if !ok {
t.Errorf("error getting path to current folder")
return
}

cases := []struct {
description string
ref string
Expand All @@ -91,8 +85,8 @@ func TestValidateRun(t *testing.T) {
// {"", "", "", "url", "", "bad arguments provided", "if you are validating data from a url, please include a dataset name or supply the --schema flag with a file path that Qri can validate against"},
{"movie problems", "peer/movies", "", "", "", movieOutput, "", ""},
{"dataset not found", "peer/bad_dataset", "", "", "", "", "cannot find dataset: peer/bad_dataset", ""},
{"body file not found", "", "bad/filepath", "testdata/days_of_week_schema.json", "", "", "open " + path + "/bad/filepath: no such file or directory", "error opening body file: could not open " + path + "/bad/filepath: no such file or directory"},
{"schema file not found", "", "testdata/days_of_week.csv", "bad/schema_filepath", "", "", "open " + path + "/bad/schema_filepath: no such file or directory", "error opening schema file: could not open " + path + "/bad/schema_filepath: no such file or directory"},
{"body file not found", "", "bad/filepath", "testdata/days_of_week_schema.json", "", "", "error opening body file: bad/filepath", ""},
{"schema file not found", "", "testdata/days_of_week.csv", "bad/schema_filepath", "", "", "error opening schema file: bad/schema_filepath", ""},
{"validate successfully", "", "testdata/days_of_week.csv", "testdata/days_of_week_schema.json", "", "✔ All good!\n", "", ""},
// TODO: pull from url
}
Expand Down
Loading

0 comments on commit 07cde33

Please sign in to comment.