From e47cd3a22e468a26aa8b58e75d6fb13f6cfcca75 Mon Sep 17 00:00:00 2001 From: Arijit Das Date: Mon, 11 Nov 2019 16:01:42 +0530 Subject: [PATCH 1/6] Sort alphabets of languages for non indexed fields. --- posting/list.go | 32 ++++++------ query/common_test.go | 9 +++- query/query2_test.go | 32 ++++++++++++ types/collate/collate.go | 75 ++++++++++++++++++++++++++++ types/conversion.go | 3 +- types/conversion_test.go | 102 +++++++++++++++++++-------------------- types/geo_test.go | 6 +-- types/scalar_types.go | 21 ++++---- types/sort.go | 6 +++ types/sort_test.go | 2 +- worker/task.go | 2 +- 11 files changed, 207 insertions(+), 83 deletions(-) create mode 100644 types/collate/collate.go diff --git a/posting/list.go b/posting/list.go index 64b505f7428..7bf84f98a7b 100644 --- a/posting/list.go +++ b/posting/list.go @@ -1014,21 +1014,22 @@ func (l *List) Value(readTs uint64) (rval types.Val, rerr error) { func (l *List) ValueFor(readTs uint64, langs []string) (rval types.Val, rerr error) { l.RLock() // All public methods should acquire locks, while private ones should assert them. defer l.RUnlock() - p, err := l.postingFor(readTs, langs) + p, lang, err := l.postingFor(readTs, langs) if err != nil { return rval, err } - return valueToTypesVal(p), nil + return valueToTypesVal(p, lang), nil } // PostingFor returns the posting according to the preferred language list. -func (l *List) PostingFor(readTs uint64, langs []string) (p *pb.Posting, rerr error) { +// It also returns the first available language. +func (l *List) PostingFor(readTs uint64, langs []string) (p *pb.Posting, lang string, rerr error) { l.RLock() defer l.RUnlock() return l.postingFor(readTs, langs) } -func (l *List) postingFor(readTs uint64, langs []string) (p *pb.Posting, rerr error) { +func (l *List) postingFor(readTs uint64, langs []string) (p *pb.Posting, lang string, rerr error) { l.AssertRLock() // Avoid recursive locking by asserting a lock here. return l.postingForLangs(readTs, langs) } @@ -1041,18 +1042,19 @@ func (l *List) ValueForTag(readTs uint64, tag string) (rval types.Val, rerr erro if err != nil { return rval, err } - return valueToTypesVal(p), nil + return valueToTypesVal(p, tag), nil } -func valueToTypesVal(p *pb.Posting) (rval types.Val) { +func valueToTypesVal(p *pb.Posting, lang string) (rval types.Val) { // This is ok because we dont modify the value of a posting. We create a newPosting // and add it to the PostingList to do a set. rval.Value = p.Value rval.Tid = types.TypeID(p.ValType) + rval.Lang = lang return } -func (l *List) postingForLangs(readTs uint64, langs []string) (pos *pb.Posting, rerr error) { +func (l *List) postingForLangs(readTs uint64, langs []string) (pos *pb.Posting, lang string, rerr error) { l.AssertRLock() any := false @@ -1064,16 +1066,16 @@ func (l *List) postingForLangs(readTs uint64, langs []string) (pos *pb.Posting, } pos, rerr = l.postingForTag(readTs, lang) if rerr == nil { - return pos, nil + return pos, lang, nil } } // look for value without language if any || len(langs) == 0 { if found, pos, err := l.findPosting(readTs, math.MaxUint64); err != nil { - return nil, err + return nil, "", err } else if found { - return pos, nil + return pos, "", nil } } @@ -1089,15 +1091,15 @@ func (l *List) postingForLangs(readTs uint64, langs []string) (pos *pb.Posting, return nil }) if err != nil { - return nil, err + return nil, "", err } } if found { - return pos, nil + return pos, "", nil } - return pos, ErrNoValue + return pos, "", ErrNoValue } func (l *List) postingForTag(readTs uint64, tag string) (p *pb.Posting, rerr error) { @@ -1121,7 +1123,7 @@ func (l *List) findValue(readTs, uid uint64) (rval types.Val, found bool, err er return rval, found, err } - return valueToTypesVal(p), true, nil + return valueToTypesVal(p, ""), true, nil } func (l *List) findPosting(readTs uint64, uid uint64) (found bool, pos *pb.Posting, err error) { @@ -1142,7 +1144,7 @@ func (l *List) Facets(readTs uint64, param *pb.FacetParams, langs []string) (fs ferr error) { l.RLock() defer l.RUnlock() - p, err := l.postingFor(readTs, langs) + p, _, err := l.postingFor(readTs, langs) if err != nil { return nil, err } diff --git a/query/common_test.go b/query/common_test.go index ed289ec36f4..a29a1d37839 100644 --- a/query/common_test.go +++ b/query/common_test.go @@ -224,6 +224,8 @@ type Node { } name : string @index(term, exact, trigram) @count @lang . +name_lang : string @lang . +lang_type : string @index(exact) . alt_name : [string] @index(term, exact, trigram) @count . alias : string @index(exact, term, fulltext) . abbr : string . @@ -348,7 +350,12 @@ func populateCluster() { <10005> "Bob" . <10006> "Colin" . <10007> "Elizabeth" . - + <10101> "zon"@sv . + <10101> "öffnen"@de . + <10101> "Test" . + <10102> "öppna0"@sv . + <10102> "zumachen"@de . + <10102> "Test" . <11000> "Baz Luhrmann"@en . <11001> "Strictly Ballroom"@en . <11002> "Puccini: La boheme (Sydney Opera)"@en . diff --git a/query/query2_test.go b/query/query2_test.go index d9c09911d86..c46eee21cfb 100644 --- a/query/query2_test.go +++ b/query/query2_test.go @@ -855,6 +855,38 @@ func TestToFastJSONOrderDesc2(t *testing.T) { js) } +func TestLanguageOrderNonIndexed1(t *testing.T) { + query := ` + { + q(func:eq(lang_type, "Test"), orderasc: name_lang@de) { + name_lang@de + name_lang@sv + } + } + ` + + js := processQueryNoErr(t, query) + require.JSONEq(t, + `{ "data": { "q": [ { "name_lang@de": "öffnen", "name_lang@sv": "zon" }, { "name_lang@de": "zumachen", "name_lang@sv": "öppna0" } ] } }`, + js) +} + +func TestLanguageOrderNonIndexed2(t *testing.T) { + query := ` + { + q(func:eq(lang_type, "Test"), orderasc: name_lang@sv) { + name_lang@de + name_lang@sv + } + } + ` + + js := processQueryNoErr(t, query) + require.JSONEq(t, + `{ "data": { "q": [ { "name_lang@de": "öffnen", "name_lang@sv": "zon" }, { "name_lang@de": "zumachen", "name_lang@sv": "öppna0" } ] } }`, + js) +} + // Test sorting / ordering by dob. func TestToFastJSONOrderDesc_pawan(t *testing.T) { diff --git a/types/collate/collate.go b/types/collate/collate.go new file mode 100644 index 00000000000..bae8b8ee1b6 --- /dev/null +++ b/types/collate/collate.go @@ -0,0 +1,75 @@ +/* + * Copyright 2019 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package collate + +import ( + "sync" + + "github.com/golang/glog" + "golang.org/x/text/collate" + "golang.org/x/text/language" +) + +const enBase = "en" + +// collateCache stores mapping of lang -> Collator. +var collateCache struct { + sync.Mutex + m map[string]*collate.Collator +} + +// Collator returns the collator object used to compare and sort strings according to a particular collation order. +// If the confidence of the matching is better than none, we return that particular collator object. +// Otherwise, we return the default collator object of "en" (English). +func Collator(lang string) *collate.Collator { + collateCache.Lock() + defer collateCache.Unlock() + + if lang == "" { + return collateCache.m[enBase] + } + + // Check for collator object in cache. + if cl, found := collateCache.m[lang]; found { + return cl + } + + // Parse will return the best guess for a language tag. + // It will return undefined, or 'language.Und', if it gives up. That means the language + // tag is either new (to the standard) or simply invalid. + if tag, err := language.Parse(lang); err != nil { + glog.Errorf("While trying to parse lang %q. Error: %v", lang, err) + } else if tag != language.Und { + if _, conf := tag.Base(); conf > language.No { + // Caching the collator object for future use. + cl := collate.New(tag) + collateCache.m[lang] = cl + return cl + } + } + + glog.Warningf("Unable to find lang %q. Reverting to English.", lang) + return collateCache.m[enBase] +} + +func init() { + // Initalize the cache and add the default Collator. + collateCache.m = make(map[string]*collate.Collator) + defaultTag, _ := language.Parse(enBase) + defaultCollater := collate.New(defaultTag) + collateCache.m[enBase] = defaultCollater +} diff --git a/types/conversion.go b/types/conversion.go index 918296bbaa1..093ab53d1d0 100644 --- a/types/conversion.go +++ b/types/conversion.go @@ -115,6 +115,7 @@ func Convert(from Val, toID TypeID) (Val, error) { } *res = val case StringID, DefaultID: + to.Lang = from.Lang *res = vc case BoolID: val, err := strconv.ParseBool(vc) @@ -508,7 +509,7 @@ func ObjectValue(id TypeID, value interface{}) (*api.Value, error) { func toBinary(id TypeID, b interface{}) ([]byte, error) { p1 := ValueForType(BinaryID) - if err := Marshal(Val{id, b}, &p1); err != nil { + if err := Marshal(Val{id, b, ""}, &p1); err != nil { return nil, err } return p1.Value.([]byte), nil diff --git a/types/conversion_test.go b/types/conversion_test.go index ccaadd9c891..2df8517c18e 100644 --- a/types/conversion_test.go +++ b/types/conversion_test.go @@ -190,14 +190,14 @@ func TestConvertToDefault(t *testing.T) { in Val out string }{ - {in: Val{StringID, []byte("a")}, out: "a"}, - {in: Val{StringID, []byte("")}, out: ""}, - {in: Val{DefaultID, []byte("abc")}, out: "abc"}, - {in: Val{BinaryID, []byte("2016")}, out: "2016"}, - {in: Val{IntID, bs(int64(3))}, out: "3"}, - {in: Val{FloatID, bs(float64(-3.5))}, out: "-3.5"}, - {in: Val{DateTimeID, bs(time.Date(2006, 01, 02, 15, 04, 05, 0, time.UTC))}, out: "2006-01-02T15:04:05Z"}, - {in: Val{DateTimeID, bs(time.Time{})}, out: "0001-01-01T00:00:00Z"}, + {in: Val{StringID, []byte("a"), ""}, out: "a"}, + {in: Val{StringID, []byte(""), ""}, out: ""}, + {in: Val{DefaultID, []byte("abc"), ""}, out: "abc"}, + {in: Val{BinaryID, []byte("2016"), ""}, out: "2016"}, + {in: Val{IntID, bs(int64(3)), ""}, out: "3"}, + {in: Val{FloatID, bs(float64(-3.5)), ""}, out: "-3.5"}, + {in: Val{DateTimeID, bs(time.Date(2006, 01, 02, 15, 04, 05, 0, time.UTC)), ""}, out: "2006-01-02T15:04:05Z"}, + {in: Val{DateTimeID, bs(time.Time{}), ""}, out: "0001-01-01T00:00:00Z"}, } for _, tc := range tests { @@ -212,18 +212,18 @@ func TestConvertFromDefault(t *testing.T) { in string out Val }{ - {in: "1", out: Val{IntID, int64(1)}}, - {in: "1.3", out: Val{FloatID, float64(1.3)}}, - {in: "true", out: Val{BoolID, true}}, - {in: "2016", out: Val{BinaryID, []byte("2016")}}, - {in: "", out: Val{BinaryID, []byte("")}}, - {in: "hello", out: Val{StringID, "hello"}}, - {in: "", out: Val{StringID, ""}}, - {in: "2016", out: Val{DateTimeID, time.Date(2016, 1, 1, 0, 0, 0, 0, time.UTC)}}, + {in: "1", out: Val{IntID, int64(1), ""}}, + {in: "1.3", out: Val{FloatID, float64(1.3), ""}}, + {in: "true", out: Val{BoolID, true, ""}}, + {in: "2016", out: Val{BinaryID, []byte("2016"), ""}}, + {in: "", out: Val{BinaryID, []byte(""), ""}}, + {in: "hello", out: Val{StringID, "hello", ""}}, + {in: "", out: Val{StringID, "", ""}}, + {in: "2016", out: Val{DateTimeID, time.Date(2016, 1, 1, 0, 0, 0, 0, time.UTC), ""}}, } for _, tc := range tests { - out, err := Convert(Val{DefaultID, []byte(tc.in)}, tc.out.Tid) + out, err := Convert(Val{DefaultID, []byte(tc.in), ""}, tc.out.Tid) require.NoError(t, err) require.EqualValues(t, tc.out, out) } @@ -234,13 +234,13 @@ func TestConvertToBinary(t *testing.T) { in Val out []byte }{ - {in: Val{StringID, []byte("a")}, out: []byte("a")}, - {in: Val{StringID, []byte("")}, out: []byte("")}, - {in: Val{DefaultID, []byte("abc")}, out: []byte("abc")}, - {in: Val{BinaryID, []byte("2016")}, out: []byte("2016")}, - {in: Val{IntID, bs(int64(3))}, out: bs(int64(3))}, - {in: Val{FloatID, bs(float64(-3.5))}, out: bs(float64(-3.5))}, - {in: Val{DateTimeID, bs(time.Date(2006, 01, 02, 15, 04, 05, 0, time.UTC))}, out: bs(time.Date(2006, 01, 02, 15, 04, 05, 0, time.UTC))}, + {in: Val{StringID, []byte("a"), ""}, out: []byte("a")}, + {in: Val{StringID, []byte(""), ""}, out: []byte("")}, + {in: Val{DefaultID, []byte("abc"), ""}, out: []byte("abc")}, + {in: Val{BinaryID, []byte("2016"), ""}, out: []byte("2016")}, + {in: Val{IntID, bs(int64(3)), ""}, out: bs(int64(3))}, + {in: Val{FloatID, bs(float64(-3.5)), ""}, out: bs(float64(-3.5))}, + {in: Val{DateTimeID, bs(time.Date(2006, 01, 02, 15, 04, 05, 0, time.UTC)), ""}, out: bs(time.Date(2006, 01, 02, 15, 04, 05, 0, time.UTC))}, } for _, tc := range tests { @@ -255,22 +255,22 @@ func TestConvertFromBinary(t *testing.T) { in []byte out Val }{ - {in: bs(true), out: Val{BoolID, true}}, - {in: bs(false), out: Val{BoolID, false}}, - {in: []byte(""), out: Val{BoolID, false}}, - {in: nil, out: Val{BoolID, false}}, - {in: bs(int64(1)), out: Val{IntID, int64(1)}}, - {in: bs(float64(1.3)), out: Val{FloatID, float64(1.3)}}, - {in: []byte("2016"), out: Val{BinaryID, []byte("2016")}}, - {in: []byte(""), out: Val{BinaryID, []byte("")}}, - {in: []byte("hello"), out: Val{StringID, "hello"}}, - {in: []byte(""), out: Val{StringID, ""}}, - {in: bs(time.Date(2016, 1, 1, 0, 0, 0, 0, time.UTC)), out: Val{DateTimeID, time.Date(2016, 1, 1, 0, 0, 0, 0, time.UTC)}}, - {in: bs(time.Time{}), out: Val{DateTimeID, time.Time{}}}, + {in: bs(true), out: Val{BoolID, true, ""}}, + {in: bs(false), out: Val{BoolID, false, ""}}, + {in: []byte(""), out: Val{BoolID, false, ""}}, + {in: nil, out: Val{BoolID, false, ""}}, + {in: bs(int64(1)), out: Val{IntID, int64(1), ""}}, + {in: bs(float64(1.3)), out: Val{FloatID, float64(1.3), ""}}, + {in: []byte("2016"), out: Val{BinaryID, []byte("2016"), ""}}, + {in: []byte(""), out: Val{BinaryID, []byte(""), ""}}, + {in: []byte("hello"), out: Val{StringID, "hello", ""}}, + {in: []byte(""), out: Val{StringID, "", ""}}, + {in: bs(time.Date(2016, 1, 1, 0, 0, 0, 0, time.UTC)), out: Val{DateTimeID, time.Date(2016, 1, 1, 0, 0, 0, 0, time.UTC), ""}}, + {in: bs(time.Time{}), out: Val{DateTimeID, time.Time{}, ""}}, } for _, tc := range tests { - out, err := Convert(Val{BinaryID, tc.in}, tc.out.Tid) + out, err := Convert(Val{BinaryID, tc.in, ""}, tc.out.Tid) require.NoError(t, err) require.EqualValues(t, tc.out, out) } @@ -317,24 +317,24 @@ func TestConvertFromPassword(t *testing.T) { out Val failure string }{ - {in: "", out: Val{StringID, ""}}, - {in: "password", out: Val{PasswordID, "password"}}, - {in: "password", out: Val{StringID, "password"}}, - {in: "password", out: Val{BinaryID, []byte("password")}}, + {in: "", out: Val{StringID, "", ""}}, + {in: "password", out: Val{PasswordID, "password", ""}}, + {in: "password", out: Val{StringID, "password", ""}}, + {in: "password", out: Val{BinaryID, []byte("password"), ""}}, { in: "password", failure: `Cannot convert password to type default`, }, { - in: "password", out: Val{IntID, bs(int64(1))}, + in: "password", out: Val{IntID, bs(int64(1)), ""}, failure: `Cannot convert password to type int`, }, { - in: "password", out: Val{FloatID, bs(float64(1.0))}, + in: "password", out: Val{FloatID, bs(float64(1.0)), ""}, failure: `Cannot convert password to type float`, }, { - in: "password", out: Val{BoolID, bs(true)}, + in: "password", out: Val{BoolID, bs(true), ""}, failure: `Cannot convert password to type bool`, }, } @@ -357,23 +357,23 @@ func TestConvertToPassword(t *testing.T) { out string failure string }{ - {in: Val{StringID, []byte("testing")}, out: "$2a$10$"}, - {in: Val{PasswordID, []byte("testing")}, out: "testing"}, - {in: Val{DefaultID, []byte("testing")}, out: "$2a$10$"}, + {in: Val{StringID, []byte("testing"), ""}, out: "$2a$10$"}, + {in: Val{PasswordID, []byte("testing"), ""}, out: "testing"}, + {in: Val{DefaultID, []byte("testing"), ""}, out: "$2a$10$"}, { - in: Val{StringID, []byte("")}, + in: Val{StringID, []byte(""), ""}, failure: `Password too short, i.e. should have at least 6 chars`, }, { - in: Val{IntID, bs(int64(1))}, + in: Val{IntID, bs(int64(1)), ""}, failure: `Cannot convert int to type password`, }, { - in: Val{FloatID, bs(float64(1.0))}, + in: Val{FloatID, bs(float64(1.0)), ""}, failure: `Cannot convert float to type password`, }, { - in: Val{BoolID, bs(true)}, + in: Val{BoolID, bs(true), ""}, failure: `Cannot convert bool to type password`, }, } diff --git a/types/geo_test.go b/types/geo_test.go index da4848fabf4..8ecfb5e24e9 100644 --- a/types/geo_test.go +++ b/types/geo_test.go @@ -27,7 +27,7 @@ func TestParse(t *testing.T) { `{'type':'MultiLineString','coordinates':[[[1,2,3],[4,5,6],[7,8,9],[1,2,3]]]}`, } for _, v := range array { - src := Val{StringID, []byte(v)} + src := Val{StringID, []byte(v), ""} if g, err := Convert(src, GeoID); err != nil { t.Errorf("Error parsing %s: %v", v, err) @@ -45,7 +45,7 @@ func TestParse(t *testing.T) { t.Errorf("Error marshaling to WKB: %v", err) } - src := Val{GeoID, []byte(wkb.Value.([]byte))} + src := Val{GeoID, []byte(wkb.Value.([]byte)), ""} if bg, err := Convert(src, GeoID); err != nil { t.Errorf("Error unmarshaling WKB: %v", err) @@ -64,7 +64,7 @@ func TestParseGeoJsonErrors(t *testing.T) { `thisisntjson`, } for _, v := range array { - src := Val{StringID, []byte(v)} + src := Val{StringID, []byte(v), ""} if _, err := Convert(src, GeoID); err == nil { t.Errorf("Expected error parsing %s: %v", v, err) } diff --git a/types/scalar_types.go b/types/scalar_types.go index 5d66413dc83..a3edd2884a1 100644 --- a/types/scalar_types.go +++ b/types/scalar_types.go @@ -106,6 +106,7 @@ func (t TypeID) Name() string { type Val struct { Tid TypeID Value interface{} + Lang string } // Safe ensures that Val's Value is not nil. This is useful when doing type @@ -144,43 +145,43 @@ func ValueForType(id TypeID) Val { switch id { case BinaryID: var b []byte - return Val{BinaryID, &b} + return Val{BinaryID, &b, ""} case IntID: var i int64 - return Val{IntID, &i} + return Val{IntID, &i, ""} case FloatID: var f float64 - return Val{FloatID, &f} + return Val{FloatID, &f, ""} case BoolID: var b bool - return Val{BoolID, &b} + return Val{BoolID, &b, ""} case DateTimeID: var t time.Time - return Val{DateTimeID, &t} + return Val{DateTimeID, &t, ""} case StringID: var s string - return Val{StringID, s} + return Val{StringID, s, ""} case DefaultID: var s string - return Val{DefaultID, s} + return Val{DefaultID, s, ""} case GeoID: var g geom.T - return Val{GeoID, &g} + return Val{GeoID, &g, ""} case UidID: var i uint64 - return Val{UidID, &i} + return Val{UidID, &i, ""} case PasswordID: var p string - return Val{PasswordID, p} + return Val{PasswordID, p, ""} default: return Val{} diff --git a/types/sort.go b/types/sort.go index bfff2e477bc..f58b5e7b82e 100644 --- a/types/sort.go +++ b/types/sort.go @@ -21,6 +21,7 @@ import ( "time" "github.com/dgraph-io/dgraph/protos/pb" + "github.com/dgraph-io/dgraph/types/collate" "github.com/dgraph-io/dgraph/x" "github.com/pkg/errors" ) @@ -134,6 +135,11 @@ func less(a, b Val) bool { case UidID: return (a.Value.(uint64) < b.Value.(uint64)) case StringID, DefaultID: + // Use language comparator. + if a.Lang != "" { + cl := collate.Collator(a.Lang) + return cl.CompareString(a.Safe().(string), b.Safe().(string)) < 0 + } return (a.Safe().(string)) < (b.Safe().(string)) } return false diff --git a/types/sort_test.go b/types/sort_test.go index 2c64550dbbc..e71aab28895 100644 --- a/types/sort_test.go +++ b/types/sort_test.go @@ -36,7 +36,7 @@ func toString(t *testing.T, values [][]Val, vID TypeID) []string { func getInput(t *testing.T, tid TypeID, in []string) [][]Val { list := make([][]Val, len(in)) for i, s := range in { - va := Val{StringID, []byte(s)} + va := Val{StringID, []byte(s), ""} v, err := Convert(va, tid) require.NoError(t, err) list[i] = []Val{v} diff --git a/worker/task.go b/worker/task.go index baf11f25b7f..1b83b91b61f 100644 --- a/worker/task.go +++ b/worker/task.go @@ -534,7 +534,7 @@ func retrieveValuesAndFacets(args funcArgs, pl *posting.List, listType bool) ( } // Retrieve the posting that matches the language preferences. - langMatch, err := pl.PostingFor(q.ReadTs, q.Langs) + langMatch, _, err := pl.PostingFor(q.ReadTs, q.Langs) if err != nil && err != posting.ErrNoValue { return nil, nil, err } From 63e659b0ef3067d1c12d0a688b0996df8fcd6054 Mon Sep 17 00:00:00 2001 From: Arijit Das Date: Mon, 18 Nov 2019 18:21:03 +0530 Subject: [PATCH 2/6] Remove the collate cache and addressed review comments. --- gql/parser.go | 8 +++ posting/list.go | 32 ++++++------ query/common_test.go | 4 +- query/query.go | 4 +- types/collate/collate.go | 75 ---------------------------- types/conversion.go | 3 +- types/conversion_test.go | 102 +++++++++++++++++++-------------------- types/geo_test.go | 6 +-- types/scalar_types.go | 21 ++++---- types/sort.go | 31 +++++++----- types/sort_test.go | 16 +++--- worker/sort.go | 12 ++++- worker/task.go | 2 +- 13 files changed, 130 insertions(+), 186 deletions(-) delete mode 100644 types/collate/collate.go diff --git a/gql/parser.go b/gql/parser.go index 437416f0314..17409fea122 100644 --- a/gql/parser.go +++ b/gql/parser.go @@ -2599,6 +2599,10 @@ func getRoot(it *lex.ItemIterator) (gq *GraphQuery, rerr error) { return nil, it.Errorf("Sorting by an attribute: [%s] can only be done once", val) } attr, langs := attrAndLang(val) + if len(langs) > 1 { + return nil, it.Errorf("Sorting by an attribute: [%s] "+ + "can only be done on one language", val) + } gq.Order = append(gq.Order, &pb.Order{Attr: attr, Desc: key == "orderdesc", Langs: langs}) order[val] = true @@ -3011,6 +3015,10 @@ func godeep(it *lex.ItemIterator, gq *GraphQuery) error { "can only be done once", p.Val) } attr, langs := attrAndLang(p.Val) + if len(langs) > 1 { + return it.Errorf("Sorting by an attribute: [%s] "+ + "can only be done on one language", p.Val) + } curp.Order = append(curp.Order, &pb.Order{Attr: attr, Desc: p.Key == "orderdesc", Langs: langs}) order[p.Val] = true diff --git a/posting/list.go b/posting/list.go index 7bf84f98a7b..64b505f7428 100644 --- a/posting/list.go +++ b/posting/list.go @@ -1014,22 +1014,21 @@ func (l *List) Value(readTs uint64) (rval types.Val, rerr error) { func (l *List) ValueFor(readTs uint64, langs []string) (rval types.Val, rerr error) { l.RLock() // All public methods should acquire locks, while private ones should assert them. defer l.RUnlock() - p, lang, err := l.postingFor(readTs, langs) + p, err := l.postingFor(readTs, langs) if err != nil { return rval, err } - return valueToTypesVal(p, lang), nil + return valueToTypesVal(p), nil } // PostingFor returns the posting according to the preferred language list. -// It also returns the first available language. -func (l *List) PostingFor(readTs uint64, langs []string) (p *pb.Posting, lang string, rerr error) { +func (l *List) PostingFor(readTs uint64, langs []string) (p *pb.Posting, rerr error) { l.RLock() defer l.RUnlock() return l.postingFor(readTs, langs) } -func (l *List) postingFor(readTs uint64, langs []string) (p *pb.Posting, lang string, rerr error) { +func (l *List) postingFor(readTs uint64, langs []string) (p *pb.Posting, rerr error) { l.AssertRLock() // Avoid recursive locking by asserting a lock here. return l.postingForLangs(readTs, langs) } @@ -1042,19 +1041,18 @@ func (l *List) ValueForTag(readTs uint64, tag string) (rval types.Val, rerr erro if err != nil { return rval, err } - return valueToTypesVal(p, tag), nil + return valueToTypesVal(p), nil } -func valueToTypesVal(p *pb.Posting, lang string) (rval types.Val) { +func valueToTypesVal(p *pb.Posting) (rval types.Val) { // This is ok because we dont modify the value of a posting. We create a newPosting // and add it to the PostingList to do a set. rval.Value = p.Value rval.Tid = types.TypeID(p.ValType) - rval.Lang = lang return } -func (l *List) postingForLangs(readTs uint64, langs []string) (pos *pb.Posting, lang string, rerr error) { +func (l *List) postingForLangs(readTs uint64, langs []string) (pos *pb.Posting, rerr error) { l.AssertRLock() any := false @@ -1066,16 +1064,16 @@ func (l *List) postingForLangs(readTs uint64, langs []string) (pos *pb.Posting, } pos, rerr = l.postingForTag(readTs, lang) if rerr == nil { - return pos, lang, nil + return pos, nil } } // look for value without language if any || len(langs) == 0 { if found, pos, err := l.findPosting(readTs, math.MaxUint64); err != nil { - return nil, "", err + return nil, err } else if found { - return pos, "", nil + return pos, nil } } @@ -1091,15 +1089,15 @@ func (l *List) postingForLangs(readTs uint64, langs []string) (pos *pb.Posting, return nil }) if err != nil { - return nil, "", err + return nil, err } } if found { - return pos, "", nil + return pos, nil } - return pos, "", ErrNoValue + return pos, ErrNoValue } func (l *List) postingForTag(readTs uint64, tag string) (p *pb.Posting, rerr error) { @@ -1123,7 +1121,7 @@ func (l *List) findValue(readTs, uid uint64) (rval types.Val, found bool, err er return rval, found, err } - return valueToTypesVal(p, ""), true, nil + return valueToTypesVal(p), true, nil } func (l *List) findPosting(readTs uint64, uid uint64) (found bool, pos *pb.Posting, err error) { @@ -1144,7 +1142,7 @@ func (l *List) Facets(readTs uint64, param *pb.FacetParams, langs []string) (fs ferr error) { l.RLock() defer l.RUnlock() - p, _, err := l.postingFor(readTs, langs) + p, err := l.postingFor(readTs, langs) if err != nil { return nil, err } diff --git a/query/common_test.go b/query/common_test.go index a29a1d37839..1d401761b0b 100644 --- a/query/common_test.go +++ b/query/common_test.go @@ -224,7 +224,7 @@ type Node { } name : string @index(term, exact, trigram) @count @lang . -name_lang : string @lang . +name_lang : string @lang . lang_type : string @index(exact) . alt_name : [string] @index(term, exact, trigram) @count . alias : string @index(exact, term, fulltext) . @@ -354,7 +354,7 @@ func populateCluster() { <10101> "öffnen"@de . <10101> "Test" . <10102> "öppna0"@sv . - <10102> "zumachen"@de . + <10102> "zumachen"@de . <10102> "Test" . <11000> "Baz Luhrmann"@en . <11001> "Strictly Ballroom"@en . diff --git a/query/query.go b/query/query.go index 6d1090839cf..65d85e7f391 100644 --- a/query/query.go +++ b/query/query.go @@ -2325,7 +2325,7 @@ func (sg *SubGraph) sortAndPaginateUsingFacet(ctx context.Context) error { continue } if err := types.SortWithFacet(values, &pb.List{Uids: uids}, - facetList, []bool{sg.Params.FacetOrderDesc}); err != nil { + facetList, []bool{sg.Params.FacetOrderDesc}, ""); err != nil { return err } sg.uidMatrix[i].Uids = uids @@ -2372,7 +2372,7 @@ func (sg *SubGraph) sortAndPaginateUsingVar(ctx context.Context) error { if len(values) == 0 { continue } - if err := types.Sort(values, &pb.List{Uids: uids}, []bool{sg.Params.Order[0].Desc}); err != nil { + if err := types.Sort(values, &pb.List{Uids: uids}, []bool{sg.Params.Order[0].Desc}, ""); err != nil { return err } sg.uidMatrix[i].Uids = uids diff --git a/types/collate/collate.go b/types/collate/collate.go deleted file mode 100644 index bae8b8ee1b6..00000000000 --- a/types/collate/collate.go +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright 2019 Dgraph Labs, Inc. and Contributors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package collate - -import ( - "sync" - - "github.com/golang/glog" - "golang.org/x/text/collate" - "golang.org/x/text/language" -) - -const enBase = "en" - -// collateCache stores mapping of lang -> Collator. -var collateCache struct { - sync.Mutex - m map[string]*collate.Collator -} - -// Collator returns the collator object used to compare and sort strings according to a particular collation order. -// If the confidence of the matching is better than none, we return that particular collator object. -// Otherwise, we return the default collator object of "en" (English). -func Collator(lang string) *collate.Collator { - collateCache.Lock() - defer collateCache.Unlock() - - if lang == "" { - return collateCache.m[enBase] - } - - // Check for collator object in cache. - if cl, found := collateCache.m[lang]; found { - return cl - } - - // Parse will return the best guess for a language tag. - // It will return undefined, or 'language.Und', if it gives up. That means the language - // tag is either new (to the standard) or simply invalid. - if tag, err := language.Parse(lang); err != nil { - glog.Errorf("While trying to parse lang %q. Error: %v", lang, err) - } else if tag != language.Und { - if _, conf := tag.Base(); conf > language.No { - // Caching the collator object for future use. - cl := collate.New(tag) - collateCache.m[lang] = cl - return cl - } - } - - glog.Warningf("Unable to find lang %q. Reverting to English.", lang) - return collateCache.m[enBase] -} - -func init() { - // Initalize the cache and add the default Collator. - collateCache.m = make(map[string]*collate.Collator) - defaultTag, _ := language.Parse(enBase) - defaultCollater := collate.New(defaultTag) - collateCache.m[enBase] = defaultCollater -} diff --git a/types/conversion.go b/types/conversion.go index 093ab53d1d0..918296bbaa1 100644 --- a/types/conversion.go +++ b/types/conversion.go @@ -115,7 +115,6 @@ func Convert(from Val, toID TypeID) (Val, error) { } *res = val case StringID, DefaultID: - to.Lang = from.Lang *res = vc case BoolID: val, err := strconv.ParseBool(vc) @@ -509,7 +508,7 @@ func ObjectValue(id TypeID, value interface{}) (*api.Value, error) { func toBinary(id TypeID, b interface{}) ([]byte, error) { p1 := ValueForType(BinaryID) - if err := Marshal(Val{id, b, ""}, &p1); err != nil { + if err := Marshal(Val{id, b}, &p1); err != nil { return nil, err } return p1.Value.([]byte), nil diff --git a/types/conversion_test.go b/types/conversion_test.go index 2df8517c18e..ccaadd9c891 100644 --- a/types/conversion_test.go +++ b/types/conversion_test.go @@ -190,14 +190,14 @@ func TestConvertToDefault(t *testing.T) { in Val out string }{ - {in: Val{StringID, []byte("a"), ""}, out: "a"}, - {in: Val{StringID, []byte(""), ""}, out: ""}, - {in: Val{DefaultID, []byte("abc"), ""}, out: "abc"}, - {in: Val{BinaryID, []byte("2016"), ""}, out: "2016"}, - {in: Val{IntID, bs(int64(3)), ""}, out: "3"}, - {in: Val{FloatID, bs(float64(-3.5)), ""}, out: "-3.5"}, - {in: Val{DateTimeID, bs(time.Date(2006, 01, 02, 15, 04, 05, 0, time.UTC)), ""}, out: "2006-01-02T15:04:05Z"}, - {in: Val{DateTimeID, bs(time.Time{}), ""}, out: "0001-01-01T00:00:00Z"}, + {in: Val{StringID, []byte("a")}, out: "a"}, + {in: Val{StringID, []byte("")}, out: ""}, + {in: Val{DefaultID, []byte("abc")}, out: "abc"}, + {in: Val{BinaryID, []byte("2016")}, out: "2016"}, + {in: Val{IntID, bs(int64(3))}, out: "3"}, + {in: Val{FloatID, bs(float64(-3.5))}, out: "-3.5"}, + {in: Val{DateTimeID, bs(time.Date(2006, 01, 02, 15, 04, 05, 0, time.UTC))}, out: "2006-01-02T15:04:05Z"}, + {in: Val{DateTimeID, bs(time.Time{})}, out: "0001-01-01T00:00:00Z"}, } for _, tc := range tests { @@ -212,18 +212,18 @@ func TestConvertFromDefault(t *testing.T) { in string out Val }{ - {in: "1", out: Val{IntID, int64(1), ""}}, - {in: "1.3", out: Val{FloatID, float64(1.3), ""}}, - {in: "true", out: Val{BoolID, true, ""}}, - {in: "2016", out: Val{BinaryID, []byte("2016"), ""}}, - {in: "", out: Val{BinaryID, []byte(""), ""}}, - {in: "hello", out: Val{StringID, "hello", ""}}, - {in: "", out: Val{StringID, "", ""}}, - {in: "2016", out: Val{DateTimeID, time.Date(2016, 1, 1, 0, 0, 0, 0, time.UTC), ""}}, + {in: "1", out: Val{IntID, int64(1)}}, + {in: "1.3", out: Val{FloatID, float64(1.3)}}, + {in: "true", out: Val{BoolID, true}}, + {in: "2016", out: Val{BinaryID, []byte("2016")}}, + {in: "", out: Val{BinaryID, []byte("")}}, + {in: "hello", out: Val{StringID, "hello"}}, + {in: "", out: Val{StringID, ""}}, + {in: "2016", out: Val{DateTimeID, time.Date(2016, 1, 1, 0, 0, 0, 0, time.UTC)}}, } for _, tc := range tests { - out, err := Convert(Val{DefaultID, []byte(tc.in), ""}, tc.out.Tid) + out, err := Convert(Val{DefaultID, []byte(tc.in)}, tc.out.Tid) require.NoError(t, err) require.EqualValues(t, tc.out, out) } @@ -234,13 +234,13 @@ func TestConvertToBinary(t *testing.T) { in Val out []byte }{ - {in: Val{StringID, []byte("a"), ""}, out: []byte("a")}, - {in: Val{StringID, []byte(""), ""}, out: []byte("")}, - {in: Val{DefaultID, []byte("abc"), ""}, out: []byte("abc")}, - {in: Val{BinaryID, []byte("2016"), ""}, out: []byte("2016")}, - {in: Val{IntID, bs(int64(3)), ""}, out: bs(int64(3))}, - {in: Val{FloatID, bs(float64(-3.5)), ""}, out: bs(float64(-3.5))}, - {in: Val{DateTimeID, bs(time.Date(2006, 01, 02, 15, 04, 05, 0, time.UTC)), ""}, out: bs(time.Date(2006, 01, 02, 15, 04, 05, 0, time.UTC))}, + {in: Val{StringID, []byte("a")}, out: []byte("a")}, + {in: Val{StringID, []byte("")}, out: []byte("")}, + {in: Val{DefaultID, []byte("abc")}, out: []byte("abc")}, + {in: Val{BinaryID, []byte("2016")}, out: []byte("2016")}, + {in: Val{IntID, bs(int64(3))}, out: bs(int64(3))}, + {in: Val{FloatID, bs(float64(-3.5))}, out: bs(float64(-3.5))}, + {in: Val{DateTimeID, bs(time.Date(2006, 01, 02, 15, 04, 05, 0, time.UTC))}, out: bs(time.Date(2006, 01, 02, 15, 04, 05, 0, time.UTC))}, } for _, tc := range tests { @@ -255,22 +255,22 @@ func TestConvertFromBinary(t *testing.T) { in []byte out Val }{ - {in: bs(true), out: Val{BoolID, true, ""}}, - {in: bs(false), out: Val{BoolID, false, ""}}, - {in: []byte(""), out: Val{BoolID, false, ""}}, - {in: nil, out: Val{BoolID, false, ""}}, - {in: bs(int64(1)), out: Val{IntID, int64(1), ""}}, - {in: bs(float64(1.3)), out: Val{FloatID, float64(1.3), ""}}, - {in: []byte("2016"), out: Val{BinaryID, []byte("2016"), ""}}, - {in: []byte(""), out: Val{BinaryID, []byte(""), ""}}, - {in: []byte("hello"), out: Val{StringID, "hello", ""}}, - {in: []byte(""), out: Val{StringID, "", ""}}, - {in: bs(time.Date(2016, 1, 1, 0, 0, 0, 0, time.UTC)), out: Val{DateTimeID, time.Date(2016, 1, 1, 0, 0, 0, 0, time.UTC), ""}}, - {in: bs(time.Time{}), out: Val{DateTimeID, time.Time{}, ""}}, + {in: bs(true), out: Val{BoolID, true}}, + {in: bs(false), out: Val{BoolID, false}}, + {in: []byte(""), out: Val{BoolID, false}}, + {in: nil, out: Val{BoolID, false}}, + {in: bs(int64(1)), out: Val{IntID, int64(1)}}, + {in: bs(float64(1.3)), out: Val{FloatID, float64(1.3)}}, + {in: []byte("2016"), out: Val{BinaryID, []byte("2016")}}, + {in: []byte(""), out: Val{BinaryID, []byte("")}}, + {in: []byte("hello"), out: Val{StringID, "hello"}}, + {in: []byte(""), out: Val{StringID, ""}}, + {in: bs(time.Date(2016, 1, 1, 0, 0, 0, 0, time.UTC)), out: Val{DateTimeID, time.Date(2016, 1, 1, 0, 0, 0, 0, time.UTC)}}, + {in: bs(time.Time{}), out: Val{DateTimeID, time.Time{}}}, } for _, tc := range tests { - out, err := Convert(Val{BinaryID, tc.in, ""}, tc.out.Tid) + out, err := Convert(Val{BinaryID, tc.in}, tc.out.Tid) require.NoError(t, err) require.EqualValues(t, tc.out, out) } @@ -317,24 +317,24 @@ func TestConvertFromPassword(t *testing.T) { out Val failure string }{ - {in: "", out: Val{StringID, "", ""}}, - {in: "password", out: Val{PasswordID, "password", ""}}, - {in: "password", out: Val{StringID, "password", ""}}, - {in: "password", out: Val{BinaryID, []byte("password"), ""}}, + {in: "", out: Val{StringID, ""}}, + {in: "password", out: Val{PasswordID, "password"}}, + {in: "password", out: Val{StringID, "password"}}, + {in: "password", out: Val{BinaryID, []byte("password")}}, { in: "password", failure: `Cannot convert password to type default`, }, { - in: "password", out: Val{IntID, bs(int64(1)), ""}, + in: "password", out: Val{IntID, bs(int64(1))}, failure: `Cannot convert password to type int`, }, { - in: "password", out: Val{FloatID, bs(float64(1.0)), ""}, + in: "password", out: Val{FloatID, bs(float64(1.0))}, failure: `Cannot convert password to type float`, }, { - in: "password", out: Val{BoolID, bs(true), ""}, + in: "password", out: Val{BoolID, bs(true)}, failure: `Cannot convert password to type bool`, }, } @@ -357,23 +357,23 @@ func TestConvertToPassword(t *testing.T) { out string failure string }{ - {in: Val{StringID, []byte("testing"), ""}, out: "$2a$10$"}, - {in: Val{PasswordID, []byte("testing"), ""}, out: "testing"}, - {in: Val{DefaultID, []byte("testing"), ""}, out: "$2a$10$"}, + {in: Val{StringID, []byte("testing")}, out: "$2a$10$"}, + {in: Val{PasswordID, []byte("testing")}, out: "testing"}, + {in: Val{DefaultID, []byte("testing")}, out: "$2a$10$"}, { - in: Val{StringID, []byte(""), ""}, + in: Val{StringID, []byte("")}, failure: `Password too short, i.e. should have at least 6 chars`, }, { - in: Val{IntID, bs(int64(1)), ""}, + in: Val{IntID, bs(int64(1))}, failure: `Cannot convert int to type password`, }, { - in: Val{FloatID, bs(float64(1.0)), ""}, + in: Val{FloatID, bs(float64(1.0))}, failure: `Cannot convert float to type password`, }, { - in: Val{BoolID, bs(true), ""}, + in: Val{BoolID, bs(true)}, failure: `Cannot convert bool to type password`, }, } diff --git a/types/geo_test.go b/types/geo_test.go index 8ecfb5e24e9..da4848fabf4 100644 --- a/types/geo_test.go +++ b/types/geo_test.go @@ -27,7 +27,7 @@ func TestParse(t *testing.T) { `{'type':'MultiLineString','coordinates':[[[1,2,3],[4,5,6],[7,8,9],[1,2,3]]]}`, } for _, v := range array { - src := Val{StringID, []byte(v), ""} + src := Val{StringID, []byte(v)} if g, err := Convert(src, GeoID); err != nil { t.Errorf("Error parsing %s: %v", v, err) @@ -45,7 +45,7 @@ func TestParse(t *testing.T) { t.Errorf("Error marshaling to WKB: %v", err) } - src := Val{GeoID, []byte(wkb.Value.([]byte)), ""} + src := Val{GeoID, []byte(wkb.Value.([]byte))} if bg, err := Convert(src, GeoID); err != nil { t.Errorf("Error unmarshaling WKB: %v", err) @@ -64,7 +64,7 @@ func TestParseGeoJsonErrors(t *testing.T) { `thisisntjson`, } for _, v := range array { - src := Val{StringID, []byte(v), ""} + src := Val{StringID, []byte(v)} if _, err := Convert(src, GeoID); err == nil { t.Errorf("Expected error parsing %s: %v", v, err) } diff --git a/types/scalar_types.go b/types/scalar_types.go index a3edd2884a1..5d66413dc83 100644 --- a/types/scalar_types.go +++ b/types/scalar_types.go @@ -106,7 +106,6 @@ func (t TypeID) Name() string { type Val struct { Tid TypeID Value interface{} - Lang string } // Safe ensures that Val's Value is not nil. This is useful when doing type @@ -145,43 +144,43 @@ func ValueForType(id TypeID) Val { switch id { case BinaryID: var b []byte - return Val{BinaryID, &b, ""} + return Val{BinaryID, &b} case IntID: var i int64 - return Val{IntID, &i, ""} + return Val{IntID, &i} case FloatID: var f float64 - return Val{FloatID, &f, ""} + return Val{FloatID, &f} case BoolID: var b bool - return Val{BoolID, &b, ""} + return Val{BoolID, &b} case DateTimeID: var t time.Time - return Val{DateTimeID, &t, ""} + return Val{DateTimeID, &t} case StringID: var s string - return Val{StringID, s, ""} + return Val{StringID, s} case DefaultID: var s string - return Val{DefaultID, s, ""} + return Val{DefaultID, s} case GeoID: var g geom.T - return Val{GeoID, &g, ""} + return Val{GeoID, &g} case UidID: var i uint64 - return Val{UidID, &i, ""} + return Val{UidID, &i} case PasswordID: var p string - return Val{PasswordID, p, ""} + return Val{PasswordID, p} default: return Val{} diff --git a/types/sort.go b/types/sort.go index f58b5e7b82e..c45566bd1c5 100644 --- a/types/sort.go +++ b/types/sort.go @@ -21,9 +21,10 @@ import ( "time" "github.com/dgraph-io/dgraph/protos/pb" - "github.com/dgraph-io/dgraph/types/collate" "github.com/dgraph-io/dgraph/x" "github.com/pkg/errors" + "golang.org/x/text/collate" + "golang.org/x/text/language" ) type sortBase struct { @@ -31,6 +32,7 @@ type sortBase struct { desc []bool // Sort orders for different values. ul *pb.List o []*pb.Facets + cl *collate.Collator } // Len returns size of vector. @@ -71,7 +73,7 @@ func (s byValue) Less(i, j int) bool { } // Its either less or greater. - less := less(first[vidx], second[vidx]) + less := less(first[vidx], second[vidx], s.cl) if s.desc[vidx] { return !less } @@ -82,7 +84,7 @@ func (s byValue) Less(i, j int) bool { // SortWithFacet sorts the given array in-place and considers the given facets to calculate // the proper ordering. -func SortWithFacet(v [][]Val, ul *pb.List, l []*pb.Facets, desc []bool) error { +func SortWithFacet(v [][]Val, ul *pb.List, l []*pb.Facets, desc []bool, lang string) error { if len(v) == 0 || len(v[0]) == 0 { return nil } @@ -94,16 +96,22 @@ func SortWithFacet(v [][]Val, ul *pb.List, l []*pb.Facets, desc []bool) error { default: return errors.Errorf("Value of type: %s isn't sortable", typ.Name()) } - var toBeSorted sort.Interface - b := sortBase{v, desc, ul, l} - toBeSorted = byValue{b} + + var cl *collate.Collator + if lang != "" { + langTag, _ := language.Parse(lang) + cl = collate.New(langTag) + } + + b := sortBase{v, desc, ul, l, cl} + toBeSorted := byValue{b} sort.Sort(toBeSorted) return nil } // Sort sorts the given array in-place. -func Sort(v [][]Val, ul *pb.List, desc []bool) error { - return SortWithFacet(v, ul, nil, desc) +func Sort(v [][]Val, ul *pb.List, desc []bool, lang string) error { + return SortWithFacet(v, ul, nil, desc, lang) } // Less returns true if a is strictly less than b. @@ -118,10 +126,10 @@ func Less(a, b Val) (bool, error) { default: return false, errors.Errorf("Compare not supported for type: %v", a.Tid) } - return less(a, b), nil + return less(a, b, nil), nil } -func less(a, b Val) bool { +func less(a, b Val, cl *collate.Collator) bool { if a.Tid != b.Tid { return mismatchedLess(a, b) } @@ -136,8 +144,7 @@ func less(a, b Val) bool { return (a.Value.(uint64) < b.Value.(uint64)) case StringID, DefaultID: // Use language comparator. - if a.Lang != "" { - cl := collate.Collator(a.Lang) + if cl != nil { return cl.CompareString(a.Safe().(string), b.Safe().(string)) < 0 } return (a.Safe().(string)) < (b.Safe().(string)) diff --git a/types/sort_test.go b/types/sort_test.go index e71aab28895..60dba69dc90 100644 --- a/types/sort_test.go +++ b/types/sort_test.go @@ -36,7 +36,7 @@ func toString(t *testing.T, values [][]Val, vID TypeID) []string { func getInput(t *testing.T, tid TypeID, in []string) [][]Val { list := make([][]Val, len(in)) for i, s := range in { - va := Val{StringID, []byte(s), ""} + va := Val{StringID, []byte(s)} v, err := Convert(va, tid) require.NoError(t, err) list[i] = []Val{v} @@ -55,7 +55,7 @@ func getUIDList(n int) *pb.List { func TestSortStrings(t *testing.T) { list := getInput(t, StringID, []string{"bb", "aaa", "aa", "bab"}) ul := getUIDList(4) - require.NoError(t, Sort(list, ul, []bool{false})) + require.NoError(t, Sort(list, ul, []bool{false}, "")) require.EqualValues(t, []uint64{300, 200, 400, 100}, ul.Uids) require.EqualValues(t, []string{"aa", "aaa", "bab", "bb"}, toString(t, list, StringID)) @@ -64,7 +64,7 @@ func TestSortStrings(t *testing.T) { func TestSortInts(t *testing.T) { list := getInput(t, IntID, []string{"22", "111", "11", "212"}) ul := getUIDList(4) - require.NoError(t, Sort(list, ul, []bool{false})) + require.NoError(t, Sort(list, ul, []bool{false}, "")) require.EqualValues(t, []uint64{300, 100, 200, 400}, ul.Uids) require.EqualValues(t, []string{"11", "22", "111", "212"}, toString(t, list, IntID)) @@ -73,7 +73,7 @@ func TestSortInts(t *testing.T) { func TestSortFloats(t *testing.T) { list := getInput(t, FloatID, []string{"22.2", "11.2", "11.5", "2.12"}) ul := getUIDList(4) - require.NoError(t, Sort(list, ul, []bool{false})) + require.NoError(t, Sort(list, ul, []bool{false}, "")) require.EqualValues(t, []uint64{400, 200, 300, 100}, ul.Uids) require.EqualValues(t, []string{"2.12", "11.2", "11.5", "22.2"}, @@ -83,7 +83,7 @@ func TestSortFloats(t *testing.T) { func TestSortFloatsDesc(t *testing.T) { list := getInput(t, FloatID, []string{"22.2", "11.2", "11.5", "2.12"}) ul := getUIDList(4) - require.NoError(t, Sort(list, ul, []bool{true})) + require.NoError(t, Sort(list, ul, []bool{true}, "")) require.EqualValues(t, []uint64{100, 300, 200, 400}, ul.Uids) require.EqualValues(t, []string{"22.2", "11.5", "11.2", "2.12"}, @@ -99,7 +99,7 @@ func TestSortDateTimes(t *testing.T) { } list := getInput(t, DateTimeID, in) ul := getUIDList(4) - require.NoError(t, Sort(list, ul, []bool{false})) + require.NoError(t, Sort(list, ul, []bool{false}, "")) require.EqualValues(t, []uint64{400, 200, 300, 100}, ul.Uids) require.EqualValues(t, []string{"2006-01-02T15:04:01Z", "2006-01-02T15:04:05Z", @@ -114,7 +114,7 @@ func TestSortIntAndFloat(t *testing.T) { {{Tid: IntID, Value: int64(100)}}, } ul := getUIDList(3) - require.NoError(t, Sort(list, ul, []bool{false})) + require.NoError(t, Sort(list, ul, []bool{false}, "")) require.EqualValues(t, []uint64{200, 100, 300}, ul.Uids) require.EqualValues(t, []string{"21.5", "55", "100"}, @@ -169,7 +169,7 @@ func TestSortMismatchedTypes(t *testing.T) { {{Tid: FloatID, Value: 33.33}}, } ul := getUIDList(7) - require.NoError(t, Sort(list, ul, []bool{false})) + require.NoError(t, Sort(list, ul, []bool{false}, "")) // Don't care about relative ordering between types. However, like types // should be sorted with each other. diff --git a/worker/sort.go b/worker/sort.go index b6deb083ba2..9132ec99d58 100644 --- a/worker/sort.go +++ b/worker/sort.go @@ -390,7 +390,7 @@ func multiSort(ctx context.Context, r *sortresult, ts *pb.SortMessage) error { x.AssertTrue(idx >= 0) vals[j] = sortVals[idx] } - if err := types.Sort(vals, ul, desc); err != nil { + if err := types.Sort(vals, ul, desc, ""); err != nil { return err } // Paginate @@ -686,6 +686,14 @@ func sortByValue(ctx context.Context, ts *pb.SortMessage, ul *pb.List, values := make([][]types.Val, 0, lenList) multiSortVals := make([]types.Val, 0, lenList) order := ts.Order[0] + + var lang string + if langLen := len(order.Langs); langLen == 1 { + lang = order.Langs[0] + } else if langLen > 1 { + panic("Sorting on multiple language is not supported.") + } + for i := 0; i < lenList; i++ { select { case <-ctx.Done(): @@ -703,7 +711,7 @@ func sortByValue(ctx context.Context, ts *pb.SortMessage, ul *pb.List, values = append(values, []types.Val{val}) } } - err := types.Sort(values, &pb.List{Uids: uids}, []bool{order.Desc}) + err := types.Sort(values, &pb.List{Uids: uids}, []bool{order.Desc}, lang) ul.Uids = uids if len(ts.Order) > 1 { for _, v := range values { diff --git a/worker/task.go b/worker/task.go index 1b83b91b61f..baf11f25b7f 100644 --- a/worker/task.go +++ b/worker/task.go @@ -534,7 +534,7 @@ func retrieveValuesAndFacets(args funcArgs, pl *posting.List, listType bool) ( } // Retrieve the posting that matches the language preferences. - langMatch, _, err := pl.PostingFor(q.ReadTs, q.Langs) + langMatch, err := pl.PostingFor(q.ReadTs, q.Langs) if err != nil && err != posting.ErrNoValue { return nil, nil, err } From a1195a391cea7351a953febf84c8dceca20169b0 Mon Sep 17 00:00:00 2001 From: Arijit Das Date: Tue, 19 Nov 2019 14:05:47 +0530 Subject: [PATCH 3/6] Fixes test related to languages. --- gql/parser_test.go | 8 ++++---- query/query.go | 3 ++- query/query1_test.go | 2 +- types/sort.go | 2 +- worker/sort.go | 4 ++-- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/gql/parser_test.go b/gql/parser_test.go index 5903dd1dc1f..d6b5f66aed3 100644 --- a/gql/parser_test.go +++ b/gql/parser_test.go @@ -3753,7 +3753,7 @@ func TestParseQueryWithAttrLang(t *testing.T) { { me(func: uid(0x1)) { name - friend(first:5, orderasc: name@en:fr) { + friend(first:5, orderasc: name@en) { name@en } } @@ -3764,7 +3764,7 @@ func TestParseQueryWithAttrLang(t *testing.T) { require.NotNil(t, res.Query) require.Equal(t, 1, len(res.Query)) require.Equal(t, "name", res.Query[0].Children[1].Order[0].Attr) - require.Equal(t, []string{"en", "fr"}, res.Query[0].Children[1].Order[0].Langs) + require.Equal(t, []string{"en"}, res.Query[0].Children[1].Order[0].Langs) } func TestParseQueryWithAttrLang2(t *testing.T) { @@ -4479,7 +4479,7 @@ func TestInvalidValUsage(t *testing.T) { func TestOrderWithLang(t *testing.T) { query := ` { - me(func: uid(0x1), orderasc: name@en:fr:., orderdesc: lastname@ci, orderasc: salary) { + me(func: uid(0x1), orderasc: name@en, orderdesc: lastname@ci, orderasc: salary) { name } } @@ -4490,7 +4490,7 @@ func TestOrderWithLang(t *testing.T) { require.Equal(t, 1, len(res.Query)) orders := res.Query[0].Order require.Equal(t, "name", orders[0].Attr) - require.Equal(t, []string{"en", "fr", "."}, orders[0].Langs) + require.Equal(t, []string{"en"}, orders[0].Langs) require.Equal(t, "lastname", orders[1].Attr) require.Equal(t, []string{"ci"}, orders[1].Langs) require.Equal(t, "salary", orders[2].Attr) diff --git a/query/query.go b/query/query.go index 65d85e7f391..40fccd8cbab 100644 --- a/query/query.go +++ b/query/query.go @@ -2372,7 +2372,8 @@ func (sg *SubGraph) sortAndPaginateUsingVar(ctx context.Context) error { if len(values) == 0 { continue } - if err := types.Sort(values, &pb.List{Uids: uids}, []bool{sg.Params.Order[0].Desc}, ""); err != nil { + if err := types.Sort(values, &pb.List{Uids: uids}, + []bool{sg.Params.Order[0].Desc}, ""); err != nil { return err } sg.uidMatrix[i].Uids = uids diff --git a/query/query1_test.go b/query/query1_test.go index 2ef9d560912..26cc11f0fa7 100644 --- a/query/query1_test.go +++ b/query/query1_test.go @@ -204,7 +204,7 @@ func TestToFastJSONOrderLang(t *testing.T) { query := ` { me(func: uid(0x01)) { - friend(first:2, orderdesc: alias@en:de:.) { + friend(first:2, orderdesc: alias@en) { alias } } diff --git a/types/sort.go b/types/sort.go index c45566bd1c5..a67075c6959 100644 --- a/types/sort.go +++ b/types/sort.go @@ -32,7 +32,7 @@ type sortBase struct { desc []bool // Sort orders for different values. ul *pb.List o []*pb.Facets - cl *collate.Collator + cl *collate.Collator // Compares Unicode strings according to the given collation order. } // Len returns size of vector. diff --git a/worker/sort.go b/worker/sort.go index 9132ec99d58..c8a2d45000d 100644 --- a/worker/sort.go +++ b/worker/sort.go @@ -688,9 +688,9 @@ func sortByValue(ctx context.Context, ts *pb.SortMessage, ul *pb.List, order := ts.Order[0] var lang string - if langLen := len(order.Langs); langLen == 1 { + if langCount := len(order.Langs); langCount == 1 { lang = order.Langs[0] - } else if langLen > 1 { + } else if langCount > 1 { panic("Sorting on multiple language is not supported.") } From d8886ee761994a35a05bcce9e0c69373e982401e Mon Sep 17 00:00:00 2001 From: Arijit Das Date: Tue, 19 Nov 2019 15:36:13 +0530 Subject: [PATCH 4/6] Added test related to lang. --- gql/parser_test.go | 13 +++++++++++++ types/sort_test.go | 19 +++++++++++++++++++ worker/sort.go | 5 ++--- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/gql/parser_test.go b/gql/parser_test.go index d6b5f66aed3..dad96ababf2 100644 --- a/gql/parser_test.go +++ b/gql/parser_test.go @@ -4476,6 +4476,19 @@ func TestInvalidValUsage(t *testing.T) { require.Contains(t, err.Error(), "Query syntax invalid.") } +func TestOrderWithMultipleLangFail(t *testing.T) { + query := ` + { + me(func: uid(0x1), orderasc: name@en:fr, orderdesc: lastname@ci, orderasc: salary) { + name + } + } + ` + _, err := Parse(Request{Str: query}) + require.Error(t, err) + require.Contains(t, err.Error(), "Sorting by an attribute: [name@en:fr] can only be done on one language") +} + func TestOrderWithLang(t *testing.T) { query := ` { diff --git a/types/sort_test.go b/types/sort_test.go index 60dba69dc90..6f2594a07ea 100644 --- a/types/sort_test.go +++ b/types/sort_test.go @@ -61,6 +61,25 @@ func TestSortStrings(t *testing.T) { toString(t, list, StringID)) } +func TestSortLanguage(t *testing.T) { + // Sorting strings of german language. + list := getInput(t, StringID, []string{"öffnen", "zumachen"}) + ul := getUIDList(2) + + require.NoError(t, Sort(list, ul, []bool{false}, "de")) + require.EqualValues(t, []uint64{100, 200}, ul.Uids) + require.EqualValues(t, []string{"öffnen", "zumachen"}, + toString(t, list, StringID)) + + // Sorting strings of swedish language. + list = getInput(t, StringID, []string{"öppna", "zon"}) + + require.NoError(t, Sort(list, ul, []bool{false}, "sv")) + require.EqualValues(t, []uint64{200, 100}, ul.Uids) + require.EqualValues(t, []string{"zon", "öppna"}, + toString(t, list, StringID)) +} + func TestSortInts(t *testing.T) { list := getInput(t, IntID, []string{"22", "111", "11", "212"}) ul := getUIDList(4) diff --git a/worker/sort.go b/worker/sort.go index c8a2d45000d..2cf68049f8d 100644 --- a/worker/sort.go +++ b/worker/sort.go @@ -688,10 +688,9 @@ func sortByValue(ctx context.Context, ts *pb.SortMessage, ul *pb.List, order := ts.Order[0] var lang string - if langCount := len(order.Langs); langCount == 1 { + if langCount := len(order.Langs); langCount > 0 { + x.AssertTruef(langCount == 1, "Sorting on multiple language is not supported.") lang = order.Langs[0] - } else if langCount > 1 { - panic("Sorting on multiple language is not supported.") } for i := 0; i < lenList; i++ { From a6bf730c930b4e8690fda4eb45c36756e6abb2bd Mon Sep 17 00:00:00 2001 From: Arijit Das Date: Wed, 20 Nov 2019 17:39:29 +0530 Subject: [PATCH 5/6] Address review comments. --- query/common_test.go | 2 +- query/query2_test.go | 24 ++++++++++++++++++++++-- types/sort.go | 7 +++++-- worker/sort.go | 5 +++-- 4 files changed, 31 insertions(+), 7 deletions(-) diff --git a/query/common_test.go b/query/common_test.go index 1d401761b0b..aff5d737406 100644 --- a/query/common_test.go +++ b/query/common_test.go @@ -353,7 +353,7 @@ func populateCluster() { <10101> "zon"@sv . <10101> "öffnen"@de . <10101> "Test" . - <10102> "öppna0"@sv . + <10102> "öppna"@sv . <10102> "zumachen"@de . <10102> "Test" . <11000> "Baz Luhrmann"@en . diff --git a/query/query2_test.go b/query/query2_test.go index c46eee21cfb..5cca8071b8b 100644 --- a/query/query2_test.go +++ b/query/query2_test.go @@ -867,7 +867,17 @@ func TestLanguageOrderNonIndexed1(t *testing.T) { js := processQueryNoErr(t, query) require.JSONEq(t, - `{ "data": { "q": [ { "name_lang@de": "öffnen", "name_lang@sv": "zon" }, { "name_lang@de": "zumachen", "name_lang@sv": "öppna0" } ] } }`, + `{ + "data": { + "q": [{ + "name_lang@de": "öffnen", + "name_lang@sv": "zon" + }, { + "name_lang@de": "zumachen", + "name_lang@sv": "öppna" + }] + } + }`, js) } @@ -883,7 +893,17 @@ func TestLanguageOrderNonIndexed2(t *testing.T) { js := processQueryNoErr(t, query) require.JSONEq(t, - `{ "data": { "q": [ { "name_lang@de": "öffnen", "name_lang@sv": "zon" }, { "name_lang@de": "zumachen", "name_lang@sv": "öppna0" } ] } }`, + `{ + "data": { + "q": [{ + "name_lang@de": "öffnen", + "name_lang@sv": "zon" + }, { + "name_lang@de": "zumachen", + "name_lang@sv": "öppna" + }] + } + }`, js) } diff --git a/types/sort.go b/types/sort.go index a67075c6959..f98c39b5be0 100644 --- a/types/sort.go +++ b/types/sort.go @@ -99,8 +99,11 @@ func SortWithFacet(v [][]Val, ul *pb.List, l []*pb.Facets, desc []bool, lang str var cl *collate.Collator if lang != "" { - langTag, _ := language.Parse(lang) - cl = collate.New(langTag) + // Collator is nil if we are unable to parse the language. + // We default to bytewise comparison in that case. + if langTag, err := language.Parse(lang); err == nil { + cl = collate.New(langTag) + } } b := sortBase{v, desc, ul, l, cl} diff --git a/worker/sort.go b/worker/sort.go index 2cf68049f8d..48dd730a18f 100644 --- a/worker/sort.go +++ b/worker/sort.go @@ -688,9 +688,10 @@ func sortByValue(ctx context.Context, ts *pb.SortMessage, ul *pb.List, order := ts.Order[0] var lang string - if langCount := len(order.Langs); langCount > 0 { - x.AssertTruef(langCount == 1, "Sorting on multiple language is not supported.") + if langCount := len(order.Langs); langCount == 1 { lang = order.Langs[0] + } else if langCount > 1 { + return nil, errors.Errorf("Sorting on multiple language is not supported.") } for i := 0; i < lenList; i++ { From eb124f386c61388a4ddeea691403ab2b42b4c966 Mon Sep 17 00:00:00 2001 From: Arijit Das Date: Thu, 21 Nov 2019 17:22:26 +0530 Subject: [PATCH 6/6] Fix failing systest. --- systest/21million/queries/query-016 | 12 +++--- systest/21million/queries/query-044 | 64 ++++++++++++++--------------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/systest/21million/queries/query-016 b/systest/21million/queries/query-016 index f4ed0fda77d..9dbff9b56b4 100644 --- a/systest/21million/queries/query-016 +++ b/systest/21million/queries/query-016 @@ -18,26 +18,26 @@ ] }, { - "name@en": "Short Film", + "name@en": "Drama", "~genre": [ { - "name@en": "\"Crossover \"- NYU Student Film" + "name@en": "-30-" } ] }, { - "name@en": "Drama", + "name@en": "Comedy", "~genre": [ { - "name@en": "#Stuck" + "name@en": "¡ El Presidente !" } ] }, { - "name@en": "Comedy", + "name@en": "Short Film", "~genre": [ { - "name@en": "\"FF.SS.\" – Cioè: \"...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?\"" + "name@en": ":08 Min. Core Workouts" } ] } diff --git a/systest/21million/queries/query-044 b/systest/21million/queries/query-044 index fb89cc026b4..d9097cac777 100644 --- a/systest/21million/queries/query-044 +++ b/systest/21million/queries/query-044 @@ -27,176 +27,176 @@ { "movies": [ { - "name@en": "Documentary film", "director.film": [ { - "name@en": "!Women Art Revolution", + "name@en": "¡Ay Carmela!", "starring": [ { "performance.actor": [ { - "name@en": "B. Ruby Rich" + "name@en": "Alfonso Guirao" } ] }, { "performance.actor": [ { - "name@en": "Barbara Kruger" + "name@en": "Andrés Pajares" } ] } ] }, { - "name@en": "$100 and a T-Shirt: A Documentary About Zines in the Northwest", + "name@en": "-30-", "starring": [ { "performance.actor": [ { - "name@en": "Calvin Johnson" + "name@en": "Jack Webb" } ] }, { "performance.actor": [ { - "name@en": "Donna Cossy" + "name@en": "Whitney Blake" } ] } ] } - ] + ], + "name@en": "Drama" }, { - "name@en": "Short Film", "director.film": [ { - "name@en": "#PostModem", + "name@en": "!Women Art Revolution", "starring": [ { "performance.actor": [ { - "name@en": "Amy Seimetz" + "name@en": "Barbara Kruger" } ] }, { "performance.actor": [ { - "name@en": "Arly Montes" + "name@en": "B. Ruby Rich" } ] } ] }, { - "name@en": "#twitterkills", + "name@en": "...But Film is My Mistress", "starring": [ { "performance.actor": [ { - "name@en": "Amanda Hendy" + "name@en": "Lars von Trier" } ] }, { "performance.actor": [ { - "name@en": "Annie Hendy" + "name@en": "Bernardo Bertolucci" } ] } ] } - ] + ], + "name@en": "Documentary film" }, { - "name@en": "Drama", "director.film": [ { - "name@en": "$100,000 for Ringo", + "name@en": "¡Aquí están los aguilares!", "starring": [ { "performance.actor": [ { - "name@en": "Eleonora Bianchi" + "name@en": "Antonio Aguilar" } ] }, { "performance.actor": [ { - "name@en": "Fernando Sancho" + "name@en": "Luis Aguilar" } ] } ] }, { - "name@en": "$9.99", + "name@en": "¡Mis abuelitas... nomás!", "starring": [ { "performance.actor": [ { - "name@en": "Anthony LaPaglia" + "name@en": "Antonio Espino" } ] }, { "performance.actor": [ { - "name@en": "Barry Otto" + "name@en": "Alejandro Parodi" } ] } ] } - ] + ], + "name@en": "Comedy" }, { - "name@en": "Comedy", "director.film": [ { - "name@en": "\"FF.SS.\" – Cioè: \"...che mi hai portato a fare sopra a Posillipo se non mi vuoi più bene?\"", + "name@en": "?E?anx (The Cave)", "starring": [ { "performance.actor": [ { - "name@en": "Alfredo Cerruti" + "name@en": "Elaina William" } ] }, { "performance.actor": [ { - "name@en": "Bobby Solo" + "name@en": "Edmond Lulua" } ] } ] }, { - "name@en": "#1 Cheerleader Camp", + "name@en": "¡Destápalo!", "starring": [ { "performance.actor": [ { - "name@en": "Adam Wright" + "name@en": "Eitaro Tanimoto" } ] }, { "performance.actor": [ { - "name@en": "Alex Kozitsky" + "name@en": "Erika de la Llave" } ] } ] } - ] + ], + "name@en": "Short Film" } ] }