From 898a8f32527a8a2726ea9f9e803c519b153a052d Mon Sep 17 00:00:00 2001 From: Balaji Jinnah Date: Fri, 7 Aug 2020 12:44:15 +0530 Subject: [PATCH] feat(dgraph): Add suport for RDF query. (#6038) --- edgraph/server.go | 2 + go.mod | 2 +- go.sum | 4 +- query/common_test.go | 14 +++ query/outputrdf.go | 236 +++++++++++++++++++++++++++++++++++++++ query/rdf_result_test.go | 205 ++++++++++++++++++++++++++++++++++ 6 files changed, 460 insertions(+), 3 deletions(-) create mode 100644 query/outputrdf.go create mode 100644 query/rdf_result_test.go diff --git a/edgraph/server.go b/edgraph/server.go index b46494c89bc..f683cb861fb 100644 --- a/edgraph/server.go +++ b/edgraph/server.go @@ -1074,6 +1074,8 @@ func processQuery(ctx context.Context, qc *queryContext) (*api.Response, error) respMap["types"] = formatTypes(er.Types) } resp.Json, err = json.Marshal(respMap) + } else if qc.req.RespFormat == api.Request_RDF { + resp.Rdf, err = query.ToRDF(qc.latency, er.Subgraphs) } else { resp.Json, err = query.ToJson(qc.latency, er.Subgraphs) } diff --git a/go.mod b/go.mod index a3d0573f9f6..ea3143419af 100644 --- a/go.mod +++ b/go.mod @@ -17,7 +17,7 @@ require ( github.com/blevesearch/snowballstem v0.0.0-20180110192139-26b06a2c243d // indirect github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd github.com/dgraph-io/badger/v2 v2.0.1-rc1.0.20200718033852-37ee16d8ad1c - github.com/dgraph-io/dgo/v200 v200.0.0-20200401175452-e463f9234453 + github.com/dgraph-io/dgo/v200 v200.0.0-20200805103119-a3544c464dd6 github.com/dgraph-io/graphql-transport-ws v0.0.0-20200715131837-c0460019ead2 github.com/dgraph-io/ristretto v0.0.3-0.20200630154024-f66de99634de github.com/dgrijalva/jwt-go v3.2.0+incompatible diff --git a/go.sum b/go.sum index 41a2b47a67f..572c96d067d 100644 --- a/go.sum +++ b/go.sum @@ -80,8 +80,8 @@ github.com/dgraph-io/badger v1.6.0 h1:DshxFxZWXUcO0xX476VJC07Xsr6ZCBVRHKZ93Oh7Ev github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4= github.com/dgraph-io/badger/v2 v2.0.1-rc1.0.20200718033852-37ee16d8ad1c h1:LoEZfU53r3H1et4WY9M0h1c3fuCljBnn3pk/7TB5eWY= github.com/dgraph-io/badger/v2 v2.0.1-rc1.0.20200718033852-37ee16d8ad1c/go.mod h1:26P/7fbL4kUZVEVKLAKXkBXKOydDmM2p1e+NhhnBCAE= -github.com/dgraph-io/dgo/v200 v200.0.0-20200401175452-e463f9234453 h1:DTgOrw91nMIukDm/WEvdobPLl0LgeDd/JE66+24jBks= -github.com/dgraph-io/dgo/v200 v200.0.0-20200401175452-e463f9234453/go.mod h1:Co+FwJrnndSrPORO8Gdn20dR7FPTfmXr0W/su0Ve/Ig= +github.com/dgraph-io/dgo/v200 v200.0.0-20200805103119-a3544c464dd6 h1:toHzMCdCUgYsjM0cW9+wafnKFXfp1HizIJUyzihN+vk= +github.com/dgraph-io/dgo/v200 v200.0.0-20200805103119-a3544c464dd6/go.mod h1:rHa+h3kI4M8ASOirxyIyNeXBfHFgeskVUum2OrDMN3U= github.com/dgraph-io/graphql-transport-ws v0.0.0-20200715131837-c0460019ead2 h1:NSl3XXyON9bgmBJSAvr5FPrgILAovtoTs7FwdtaZZq0= github.com/dgraph-io/graphql-transport-ws v0.0.0-20200715131837-c0460019ead2/go.mod h1:7z3c/5w0sMYYZF5bHsrh8IH4fKwG5O5Y70cPH1ZLLRQ= github.com/dgraph-io/ristretto v0.0.3-0.20200630154024-f66de99634de h1:t0UHb5vdojIDUqktM6+xJAfScFBsVpXZmqC9dsgJmeA= diff --git a/query/common_test.go b/query/common_test.go index dbcffd0eeae..65ab6b00081 100644 --- a/query/common_test.go +++ b/query/common_test.go @@ -69,6 +69,20 @@ func processQuery(ctx context.Context, t *testing.T, query string) (string, erro return string(jsonResponse), err } +func processQueryRDF(ctx context.Context, t *testing.T, query string) (string, error) { + txn := client.NewTxn() + defer txn.Discard(ctx) + + res, err := txn.Do(ctx, &api.Request{ + Query: query, + RespFormat: api.Request_RDF, + }) + if err != nil { + return "", err + } + return string(res.Rdf), err +} + func processQueryNoErr(t *testing.T, query string) string { res, err := processQuery(context.Background(), t, query) require.NoError(t, err) diff --git a/query/outputrdf.go b/query/outputrdf.go new file mode 100644 index 00000000000..6fc364aaeec --- /dev/null +++ b/query/outputrdf.go @@ -0,0 +1,236 @@ +/* + * Copyright 2017-2020 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package query + +import ( + "bytes" + "fmt" + "strconv" + + "github.com/dgraph-io/dgraph/algo" + "github.com/dgraph-io/dgraph/protos/pb" + "github.com/dgraph-io/dgraph/types" + "github.com/dgraph-io/dgraph/x" + "github.com/pkg/errors" +) + +// rdfBuilder is used to generate RDF from subgraph. +type rdfBuilder struct { + buf *bytes.Buffer +} + +// ToRDF converts the given subgraph list into rdf format. +func ToRDF(l *Latency, sgl []*SubGraph) ([]byte, error) { + b := &rdfBuilder{ + buf: &bytes.Buffer{}, + } + for _, sg := range sgl { + if err := validateSubGraphForRDF(sg); err != nil { + return nil, err + } + // Skip parent graph. we don't want parent values. + for _, child := range sg.Children { + if err := b.castToRDF(child); err != nil { + return nil, err + } + } + } + return b.buf.Bytes(), nil +} + +// castToRDF converts the given subgraph to RDF and appends to the +// output string. +func (b *rdfBuilder) castToRDF(sg *SubGraph) error { + if err := validateSubGraphForRDF(sg); err != nil { + return err + } + if sg.SrcUIDs != nil { + // Get RDF for the given subgraph. + if err := b.rdfForSubgraph(sg); err != nil { + return err + } + } + // Recursively cnvert RDF for the children graph. + for _, child := range sg.Children { + if err := b.castToRDF(child); err != nil { + return err + } + } + return nil +} + +// rdfForSubgraph generates RDF and appends to the output parameter. +func (b *rdfBuilder) rdfForSubgraph(sg *SubGraph) error { + for i, uid := range sg.SrcUIDs.Uids { + if sg.Params.IgnoreResult { + // Skip ignored values. + continue + } + if sg.IsInternal() { + if sg.Params.Expand != "" { + continue + } + // Check if we have val for the given uid. If you got uid then populate + // the rdf. + val, ok := sg.Params.UidToVal[uid] + if !ok && val.Value == nil { + continue + } + outputval, err := getObjectVal(val) + if err != nil { + continue + } + b.writeRDF(uid, []byte(sg.aggWithVarFieldName()), outputval) + continue + } + switch { + case len(sg.counts) > 0: + // Add count rdf. + b.rdfForCount(uid, sg.counts[i], sg) + case i < len(sg.uidMatrix) && len(sg.uidMatrix[i].Uids) != 0: + // Add posting list relation. + b.rdfForUIDList(uid, sg.uidMatrix[i], sg) + case i < len(sg.valueMatrix): + b.rdfForValueList(uid, sg.valueMatrix[i], sg.fieldName()) + } + } + return nil +} + +func (b *rdfBuilder) writeRDF(subject uint64, predicate []byte, object []byte) { + // add subject + b.writeTriple([]byte(fmt.Sprintf("%#x", subject))) + x.Check(b.buf.WriteByte(' ')) + // add predicate + b.writeTriple(predicate) + x.Check(b.buf.WriteByte(' ')) + // add object + x.Check2(b.buf.Write(object)) + x.Check(b.buf.WriteByte(' ')) + x.Check(b.buf.WriteByte('.')) + x.Check(b.buf.WriteByte('\n')) +} + +func (b *rdfBuilder) writeTriple(val []byte) { + x.Check(b.buf.WriteByte('<')) + x.Check2(b.buf.Write(val)) + x.Check(b.buf.WriteByte('>')) +} + +// rdfForCount returns rdf for count fucntion. +func (b *rdfBuilder) rdfForCount(subject uint64, count uint32, sg *SubGraph) { + fieldName := sg.Params.Alias + if fieldName == "" { + fieldName = fmt.Sprintf("count(%s)", sg.Attr) + } + b.writeRDF(subject, []byte(fieldName), + quotedNumber([]byte(strconv.FormatUint(uint64(count), 10)))) +} + +// rdfForUIDList returns rdf for uid list. +func (b *rdfBuilder) rdfForUIDList(subject uint64, list *pb.List, sg *SubGraph) { + for _, destUID := range list.Uids { + if algo.IndexOf(sg.DestUIDs, destUID) < 0 { + // This uid is filtered. + continue + } + // Build object. + b.writeRDF( + subject, + []byte(sg.fieldName()), + buildTriple([]byte(fmt.Sprintf("%#x", destUID)))) + } +} + +// rdfForValueList returns rdf for the value list. +func (b *rdfBuilder) rdfForValueList(subject uint64, valueList *pb.ValueList, attr string) { + if attr == "uid" { + b.writeRDF(subject, + []byte(attr), + buildTriple([]byte(fmt.Sprintf("%#x", subject)))) + return + } + for _, destValue := range valueList.Values { + val, err := convertWithBestEffort(destValue, attr) + if err != nil { + continue + } + outputval, err := getObjectVal(val) + if err != nil { + continue + } + b.writeRDF(subject, []byte(attr), outputval) + } +} + +func getObjectVal(v types.Val) ([]byte, error) { + outputval, err := valToBytes(v) + if err != nil { + return nil, err + } + switch v.Tid { + case types.UidID: + return buildTriple(outputval), nil + case types.IntID: + return quotedNumber(outputval), nil + case types.FloatID: + return quotedNumber(outputval), nil + case types.GeoID: + return nil, errors.New("Geo id is not supported in rdf output") + default: + return outputval, nil + } +} + +func buildTriple(val []byte) []byte { + buf := make([]byte, 0, 2+len(val)) + buf = append(buf, '<') + buf = append(buf, val...) + buf = append(buf, '>') + return buf +} + +func validateSubGraphForRDF(sg *SubGraph) error { + if sg.IsGroupBy() { + return errors.New("groupby is not supported in rdf output format") + } + uidCount := sg.Attr == "uid" && sg.Params.DoCount && sg.IsInternal() + if uidCount { + return errors.New("uid count is not supported in the rdf output format") + } + if sg.Params.Normalize { + return errors.New("normalize directive is not supported in the rdf output format") + } + if sg.Params.IgnoreReflex { + return errors.New("ignorereflex directive is not supported in the rdf output format") + } + if sg.SrcFunc != nil && sg.SrcFunc.Name == "checkpwd" { + return errors.New("chkpwd function is not supported in the rdf output format") + } + if sg.Params.Facet != nil && !sg.Params.ExpandAll { + return errors.New("facets are not supported in the rdf output format") + } + return nil +} + +func quotedNumber(val []byte) []byte { + tmpVal := make([]byte, 0, len(val)+2) + tmpVal = append(tmpVal, '"') + tmpVal = append(tmpVal, val...) + tmpVal = append(tmpVal, '"') + return tmpVal +} diff --git a/query/rdf_result_test.go b/query/rdf_result_test.go new file mode 100644 index 00000000000..e91b5effff6 --- /dev/null +++ b/query/rdf_result_test.go @@ -0,0 +1,205 @@ +/* + * Copyright 2020 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package query + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestRDFResult(t *testing.T) { + query := `{ + friends_15_and_19(func: uid(1)) { + name + friend @filter(ge(age, 15) AND lt(age, 19)) { + name + age + } + } + }` + + rdf, err := processQueryRDF(context.Background(), t, query) + require.NoError(t, err) + require.Equal(t, rdf, `<0x1> "Michonne" . +<0x1> <0x17> . +<0x1> <0x18> . +<0x1> <0x19> . +<0x17> "Rick Grimes" . +<0x18> "Glenn Rhee" . +<0x19> "Daryl Dixon" . +<0x17> "15" . +<0x18> "15" . +<0x19> "17" . +`) +} + +func TestRDFNormalize(t *testing.T) { + query := ` + { + me(func: uid(0x01)) @normalize { + mn: name + gender + friend { + n: name + d: dob + friend { + fn : name + } + } + son { + sn: name + } + } + }` + _, err := processQueryRDF(context.Background(), t, query) + require.Error(t, err, "normalize directive is not supported in the rdf output format") +} + +func TestRDFGroupBy(t *testing.T) { + query := ` + { + me(func: uid(1, 23, 24, 25, 31)) @groupby(age) { + count(uid) + } + }` + _, err := processQueryRDF(context.Background(), t, query) + require.Contains(t, err.Error(), "groupby is not supported in rdf output format") +} + +func TestRDFUidCount(t *testing.T) { + query := ` + { + me(func: gt(count(friend), 0)) { + count(uid) + } + }` + _, err := processQueryRDF(context.Background(), t, query) + require.Contains(t, err.Error(), "uid count is not supported in the rdf output format") +} + +func TestRDFIngoreReflex(t *testing.T) { + query := ` + { + me(func:anyofterms(name, "Michonne Rick Daryl")) @ignoreReflex { + name + friend { + name + friend { + name + } + } + } + }` + _, err := processQueryRDF(context.Background(), t, query) + require.Contains(t, err.Error(), + "ignorereflex directive is not supported in the rdf output format") +} + +func TestRDFCheckPwd(t *testing.T) { + query := ` + { + me(func: uid(0x01)) { + expand(_all_) + checkpwd(password, "12345") + } + } + ` + _, err := processQueryRDF(context.Background(), t, query) + require.Contains(t, err.Error(), + "chkpwd function is not supported in the rdf output format") +} + +func TestRDFPredicateCount(t *testing.T) { + query := ` + { + me(func:anyofterms(name, "Michonne Rick Daryl")) { + name + count(friend) + friend { + name + } + } + } + ` + + rdf, err := processQueryRDF(context.Background(), t, query) + require.NoError(t, err) + require.Equal(t, `<0x1> "Michonne" . +<0x17> "Rick Grimes" . +<0x19> "Daryl Dixon" . +<0x1> "5" . +<0x17> "1" . +<0x19> "0" . +<0x1> <0x17> . +<0x1> <0x18> . +<0x1> <0x19> . +<0x1> <0x1f> . +<0x1> <0x65> . +<0x17> <0x1> . +<0x1> "Michonne" . +<0x17> "Rick Grimes" . +<0x18> "Glenn Rhee" . +<0x19> "Daryl Dixon" . +<0x1f> "Andrea" . +`, rdf) +} + +func TestRDFFacets(t *testing.T) { + query := ` + { + shortest(from: 1, to:1001, numpaths: 4) { + path @facets(weight) + } + }` + _, err := processQueryRDF(context.Background(), t, query) + require.Contains(t, err.Error(), + "facets are not supported in the rdf output format") +} + +func TestDateRDF(t *testing.T) { + query := ` + { + me(func: uid(0x01)) { + name + gender + friend(orderdesc: film.film.initial_release_date) { + name + film.film.initial_release_date + } + } + } + ` + rdf, err := processQueryRDF(context.Background(), t, query) + require.NoError(t, err) + require.Equal(t, rdf, `<0x1> "Michonne" . +<0x1> "female" . +<0x1> <0x19> . +<0x1> <0x18> . +<0x1> <0x17> . +<0x1> <0x1f> . +<0x17> "Rick Grimes" . +<0x18> "Glenn Rhee" . +<0x19> "Daryl Dixon" . +<0x1f> "Andrea" . +<0x17> "1900-01-02T00:00:00Z" . +<0x18> "1909-05-05T00:00:00Z" . +<0x19> "1929-01-10T00:00:00Z" . +<0x1f> "1801-01-15T00:00:00Z" . +`) +}