Skip to content

Commit

Permalink
feat(core.Init): initialize a dataset from a url
Browse files Browse the repository at this point in the history
this feature expands dataset initialization to allow adding a dataset from a url that points to the data to add. Access to initialization comes via both an api url form value, or through the CLI by way of setting the --url flag.
  • Loading branch information
b5 committed Nov 8, 2017
1 parent 8540c49 commit 7858ba7
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 142 deletions.
26 changes: 15 additions & 11 deletions api/handlers/datasets.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func (h *DatasetHandlers) AddDatasetHandler(w http.ResponseWriter, r *http.Reque
}

func (h *DatasetHandlers) ZipDatasetHandler(w http.ResponseWriter, r *http.Request) {
res := &dataset.Dataset{}
res := &repo.DatasetRef{}
args := &core.GetDatasetParams{
Path: datastore.NewKey(r.URL.Path[len("/download/"):]),
Hash: r.FormValue("hash"),
Expand All @@ -98,7 +98,7 @@ func (h *DatasetHandlers) ZipDatasetHandler(w http.ResponseWriter, r *http.Reque

w.Header().Set("Content-Type", "application/zip")
w.Header().Set("Content-Disposition", fmt.Sprintf("filename=\"%s.zip\"", "dataset"))
dsutil.WriteZipArchive(h.store, res, w)
dsutil.WriteZipArchive(h.store, res.Dataset, w)
}

func (h *DatasetHandlers) listDatasetsHandler(w http.ResponseWriter, r *http.Request) {
Expand All @@ -116,7 +116,7 @@ func (h *DatasetHandlers) listDatasetsHandler(w http.ResponseWriter, r *http.Req
}

func (h *DatasetHandlers) getDatasetHandler(w http.ResponseWriter, r *http.Request) {
res := &dataset.Dataset{}
res := &repo.DatasetRef{}
args := &core.GetDatasetParams{
Path: datastore.NewKey(r.URL.Path[len("/datasets/"):]),
Hash: r.FormValue("hash"),
Expand All @@ -126,7 +126,7 @@ func (h *DatasetHandlers) getDatasetHandler(w http.ResponseWriter, r *http.Reque
util.WriteErrResponse(w, http.StatusInternalServerError, err)
return
}
util.WriteResponse(w, res)
util.WriteResponse(w, res.Dataset)
}

func (h *DatasetHandlers) saveDatasetHandler(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -179,23 +179,27 @@ func (h *DatasetHandlers) saveStructureHandler(w http.ResponseWriter, r *http.Re
}

func (h *DatasetHandlers) initDatasetFileHandler(w http.ResponseWriter, r *http.Request) {
var f cafs.File
infile, header, err := r.FormFile("file")
if err != nil {
if err != nil && err != http.ErrMissingFile {
util.WriteErrResponse(w, http.StatusBadRequest, err)
return
} else {
f = memfs.NewMemfileReader(header.Filename, infile)
}

p := &core.InitDatasetParams{
Url: r.FormValue("url"),
Name: r.FormValue("name"),
Data: memfs.NewMemfileReader(header.Filename, infile),
Data: f,
}
res := &dataset.Dataset{}
res := &repo.DatasetRef{}
if err := h.InitDataset(p, res); err != nil {
h.log.Infof("error initializing dataset: %s", err.Error())
util.WriteErrResponse(w, http.StatusInternalServerError, err)
return
}
util.WriteResponse(w, res)
util.WriteResponse(w, res.Dataset)
}

func (h *DatasetHandlers) deleteDatasetHandler(w http.ResponseWriter, r *http.Request) {
Expand All @@ -204,8 +208,8 @@ func (h *DatasetHandlers) deleteDatasetHandler(w http.ResponseWriter, r *http.Re
Path: datastore.NewKey(r.URL.Path[len("/datasets"):]),
}

ds := &dataset.Dataset{}
if err := h.Get(&core.GetDatasetParams{Name: p.Name, Path: p.Path}, ds); err != nil {
ref := &repo.DatasetRef{}
if err := h.Get(&core.GetDatasetParams{Name: p.Name, Path: p.Path}, ref); err != nil {
return
}

Expand All @@ -216,7 +220,7 @@ func (h *DatasetHandlers) deleteDatasetHandler(w http.ResponseWriter, r *http.Re
return
}

util.WriteResponse(w, ds)
util.WriteResponse(w, ref.Dataset)
}

func (h *DatasetHandlers) getStructuredDataHandler(w http.ResponseWriter, r *http.Request) {
Expand Down
152 changes: 43 additions & 109 deletions cmd/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,20 @@ package cmd
import (
"flag"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"time"

"github.com/ipfs/go-datastore"
"github.com/qri-io/dataset"
"github.com/qri-io/dataset/detect"
"github.com/qri-io/dataset/dsfs"
"github.com/qri-io/qri/core"
"github.com/qri-io/qri/repo"
"github.com/spf13/cobra"
)

var (
initFile string
initMetaFile string
initName string
initPassive bool
initRescursive bool
initFile string
initMetaFile string
initName string
initUrl string
initPassive bool
)

// initCmd represents the init command
Expand All @@ -44,124 +39,63 @@ var initCmd = &cobra.Command{
Short: "Initialize a dataset, adding it to your local collection of datasets",
Long: ``,
Run: func(cmd *cobra.Command, args []string) {
if initFile == "" {
ErrExit(fmt.Errorf("please provide a file argument"))
}
var dataFile, metaFile *os.File

path, err := filepath.Abs(initFile)
ExitIfErr(err)
if initFile == "" && initUrl == "" {
ErrExit(fmt.Errorf("please provide either a file or a url argument"))
}

r := GetRepo(false)
// ns := LoadNamespaceGraph()
ds, err := GetIpfsFilestore(false)
ExitIfErr(err)
if initName == "" {
ErrExit(fmt.Errorf("please provide a --name"))
}

if initRescursive {
files, err := ioutil.ReadDir(path)
if initFile != "" {
filepath, err := filepath.Abs(initFile)
ExitIfErr(err)
foundFiles := map[string]datastore.Key{}
for _, fi := range files {
if fi.IsDir() {
continue
} else {
initName = fi.Name()
st, err := detect.FromFile(initName)
ExitIfErr(err)
// Add to the namespace as the filename
// TODO - require this be a proper, no-space alphanumeric type thing

datahash, err := ds.AddPath(filepath.Join(path, fi.Name()), true)
ExitIfErr(err)
datakey := datastore.NewKey("/ipfs/" + datahash)

// rkey, dskey, err := datasets.AddFileStructure(ds, filepath.Join(path, fi.Name()), rsc)
d := &dataset.Dataset{
Timestamp: time.Now().In(time.UTC),
Structure: st,
Data: datakey,
}

dspath, err := dsfs.SaveDataset(ds, d, true)
ExitIfErr(err)

foundFiles[initName] = dspath
r.PutName(initName, dspath)
}
}
} else {
file, err := os.Stat(path)
dataFile, err = os.Open(filepath)
ExitIfErr(err)
}

// TODO - extract a default name from the file name
// TODO - require this be a proper, no-space alphanumeric type thing
if !initPassive && initName == "" {
initName = InputText(fmt.Sprintf("choose a variable name for %s", file.Name()), file.Name())
if err != nil {
return
}
} else if initName == "" {
initName = repo.CoerceDatasetName(file.Name())
}

if !repo.ValidDatasetName(initName) {
ErrExit(fmt.Errorf("invalid dataset name: %s", initName))
}

st, err := detect.FromFile(path)
if initMetaFile != "" {
filepath, err := filepath.Abs(initMetaFile)
ExitIfErr(err)

datahash, err := ds.AddPath(path, true)
metaFile, err = os.Open(filepath)
ExitIfErr(err)
datakey := datastore.NewKey("/ipfs/" + datahash)

d := &dataset.Dataset{}

// parse any provided metadata
if initMetaFile != "" {
mdata, err := ioutil.ReadFile(initMetaFile)
if err != nil {
ErrExit(fmt.Errorf("error opening metadata file: %s", err.Error()))
}
if err := d.UnmarshalJSON(mdata); err != nil {
ErrExit(fmt.Errorf("error parsing metadata file: %s", err.Error()))
}
}

if d.Structure == nil {
d.Structure = &dataset.Structure{}
}

// structure may have been set by the metadata file above
// by calling assign on ourselves with inferred structure in
// the middle, any user-contributed schema metadata will overwrite
// inferred metadata, but inferred schema properties will populate
// empty fields
d.Structure.Assign(st, d.Structure)
d.Timestamp = time.Now().In(time.UTC)
d.Data = datakey
d.Length = int(file.Size())
}

dspath, err := dsfs.SaveDataset(ds, d, true)
ExitIfErr(err)
r := GetRepo(false)
store, err := GetIpfsFilestore(false)
ExitIfErr(err)
req := core.NewDatasetRequests(store, r)

// Add to the namespace as the filename
// TODO - require this be a proper, no-space alphanumeric type thing
// ns[initName] = dspath
err = r.PutName(initName, dspath)
ExitIfErr(err)
p := &core.InitDatasetParams{
Name: initName,
Url: initUrl,
DataFilename: filepath.Base(initFile),
}

PrintSuccess("initialized dataset %s: %s", initName, dspath)
// PrintDatasetDetailedInfo(ds)
// this is because passing nil to interfaces is bad: https://golang.org/doc/faq#nil_error
if dataFile != nil {
p.Data = dataFile
}
if metaFile != nil {
p.Metadata = metaFile
}

ref := &repo.DatasetRef{}
err = req.InitDataset(p, ref)
ExitIfErr(err)
// req.Get(&core.GetDatasetParams{ Name: p.Name }, res)
PrintSuccess("initialized dataset %s: %s", ref.Name, ref.Path.String())
},
}

func init() {
flag.Parse()
RootCmd.AddCommand(initCmd)
initCmd.Flags().StringVarP(&initUrl, "url", "u", "", "url to file to initialize from")
initCmd.Flags().StringVarP(&initFile, "file", "f", "", "data file to initialize from")
initCmd.Flags().StringVarP(&initName, "name", "n", "", "name to give dataset")
initCmd.Flags().StringVarP(&initMetaFile, "meta", "m", "", "dataset metadata")
initCmd.Flags().BoolVarP(&initRescursive, "recursive", "r", false, "recursive add from a directory")
initCmd.Flags().BoolVarP(&initPassive, "passive", "p", false, "disable interactive init")
}
7 changes: 6 additions & 1 deletion cmd/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@ func GetRepo(online bool) repo.Repo {
fs, err := GetIpfsFilestore(online)
ExitIfErr(err)

r, err := fs_repo.NewRepo(fs, viper.GetString(QriRepoPath), fs.Node().PeerHost.ID().Pretty())
id := ""
if fs.Node().PeerHost != nil {
id = fs.Node().PeerHost.ID().Pretty()
}

r, err := fs_repo.NewRepo(fs, viper.GetString(QriRepoPath), id)
ExitIfErr(err)
return r
}
Expand Down
Loading

0 comments on commit 7858ba7

Please sign in to comment.