dgraph-io · srfrog · Jan 16, 2019 · Jan 5, 2019 · Jan 8, 2019 · Jan 8, 2019
diff --git a/posting/lists.go b/posting/lists.go
@@ -159,7 +159,7 @@ func Cleanup() {
 // to lru cache and returns it.
 //
 // plist := Get(key, group)
-// ... // Use plist
+// ... Use plist
 // TODO: This should take a node id and index. And just append all indices to a list.
 // When doing a commit, it should update all the sync index watermarks.
 // worker pkg would push the indices to the watermarks held by lists.

diff --git a/tok/tok.go b/tok/tok.go
@@ -21,14 +21,34 @@ import (
 	"plugin"
 	"time"
 
-	farm "github.com/dgryski/go-farm"
 	"github.com/golang/glog"
 	geom "github.com/twpayne/go-geom"
 
 	"github.com/dgraph-io/dgraph/types"
 	"github.com/dgraph-io/dgraph/x"
 )
 
+// Tokenizer identifiers are unique and can't be reused.
+// The range 0x00 - 0x79 is system reserved.
+// The range 0x80 - 0xff is for custom tokenizers.
+// TODO: use these everywhere where we must ensure a system tokenizer.
+const (
+	IdentNone     = 0x0
+	IdentTerm     = 0x1
+	IdentExact    = 0x2
+	IdentYear     = 0x4
+	IdentGeo      = 0x5
+	IdentInt      = 0x6
+	IdentFloat    = 0x7
+	IdentFullText = 0x8
+	IdentBool     = 0x9
+	IdentTrigram  = 0xA
+	IdentHash     = 0xB
+	IdentMonth    = 0x41
+	IdentDay      = 0x42
+	IdentHour     = 0x43
+)
+
 // Tokenizer defines what a tokenizer must provide.
 type Tokenizer interface {
 
@@ -344,9 +364,11 @@ func (t HashTokenizer) Tokens(v interface{}) ([]string, error) {
 	if !ok {
 		return nil, x.Errorf("Hash tokenizer only supported for string types")
 	}
-	var hash [8]byte
-	binary.BigEndian.PutUint64(hash[:], farm.Hash64([]byte(term)))
-	return []string{string(hash[:])}, nil
+	hash := x.Hash256([]byte(term))
+	if len(hash) == 0 {
+		return nil, x.Errorf("Hash tokenizer failed to create hash")
+	}
+	return []string{string(hash)}, nil
 }
 func (t HashTokenizer) Identifier() byte { return 0xB }
 func (t HashTokenizer) IsSortable() bool { return false }

diff --git a/worker/task.go b/worker/task.go
@@ -964,19 +964,39 @@ func (qs *queryState) handleRegexFunction(ctx context.Context, arg funcArgs) err
 }
 
 func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) error {
+	span := otrace.FromContext(ctx)
+	stop := x.SpanTimer(span, "handleCompareFunction")
+	defer stop()
+	if span != nil {
+		span.Annotatef(nil, "Number of uids: %d. args.srcFn: %+v", arg.srcFn.n, arg.srcFn)
+	}
+
 	attr := arg.q.Attr
+	span.Annotatef(nil, "Attr: %s. Fname: %s", attr, arg.srcFn.fname)
 	tokenizer, err := pickTokenizer(attr, arg.srcFn.fname)
 	// We should already have checked this in getInequalityTokens.
 	x.Check(err)
 	// Only if the tokenizer that we used IsLossy, then we need to fetch
 	// and compare the actual values.
+	span.Annotatef(nil, "Tokenizer: %s, Lossy: %t", tokenizer.Name(), tokenizer.IsLossy())
 	if tokenizer.IsLossy() {
 		// Need to evaluate inequality for entries in the first bucket.
 		typ, err := schema.State().TypeOf(attr)
 		if err != nil || !typ.IsScalar() {
 			return x.Errorf("Attribute not scalar: %s %v", attr, typ)
 		}
 
+		var keyFn func(int, uint64) []byte
+		if tokenizer.Identifier() == tok.IdentHash {
+			keyFn = func(row int, _ uint64) []byte {
+				return x.IndexKey(attr, arg.srcFn.tokens[row])
+			}
+		} else {
+			keyFn = func(_ int, uid uint64) []byte {
+				return x.DataKey(attr, uid)
+			}
+		}
+
 		x.AssertTrue(len(arg.out.UidMatrix) > 0)
 		rowsToFilter := 0
 		if arg.srcFn.fname == eq {
@@ -1000,8 +1020,9 @@ func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) e
 			algo.ApplyFilter(arg.out.UidMatrix[row], func(uid uint64, i int) bool {
 				switch lang {
 				case "":
+					// TODO: use hash index in list
 					if isList {
-						pl, err := posting.GetNoStore(x.DataKey(attr, uid))
+						pl, err := posting.GetNoStore(keyFn(row, uid))
 						if err != nil {
 							filterErr = err
 							return false
@@ -1023,11 +1044,15 @@ func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) e
 						return false
 					}
 
-					pl, err := posting.GetNoStore(x.DataKey(attr, uid))
+					pl, err := posting.GetNoStore(keyFn(row, uid))
 					if err != nil {
 						filterErr = err
 						return false
 					}
+					if arg.q.SrcFunc.Name == "eq" {
+						span.Annotate(nil, fmt.Sprintf("--- eq token: %d:%s", row, arg.srcFn.eqTokens[row].Value))
+						return true
+					}
 					sv, err := pl.Value(arg.q.ReadTs)
 					if err != nil {
 						if err != posting.ErrNoValue {
@@ -1039,7 +1064,7 @@ func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) e
 					return err == nil &&
 						types.CompareVals(arg.q.SrcFunc.Name, dst, arg.srcFn.eqTokens[row])
 				case ".":
-					pl, err := posting.GetNoStore(x.DataKey(attr, uid))
+					pl, err := posting.GetNoStore(keyFn(row, uid))
 					if err != nil {
 						filterErr = err
 						return false
@@ -1058,17 +1083,24 @@ func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) e
 					}
 					return false
 				default:
-					sv, err := fetchValue(uid, attr, arg.q.Langs, typ, arg.q.ReadTs)
+					pl, err := posting.GetNoStore(keyFn(row, uid))
 					if err != nil {
 						if err != posting.ErrNoValue {
 							filterErr = err
 						}
 						return false
 					}
-					if sv.Value == nil {
+					src, err := pl.ValueFor(arg.q.ReadTs, arg.q.Langs)
+					if err != nil {
+						filterErr = err
+						return false
+					}
+					dst, err := types.Convert(src, typ)
+					if err != nil {
+						filterErr = err
 						return false
 					}
-					return types.CompareVals(arg.q.SrcFunc.Name, sv, arg.srcFn.eqTokens[row])
+					return types.CompareVals(arg.q.SrcFunc.Name, dst, arg.srcFn.eqTokens[row])
 				}
 			})
 			if filterErr != nil {

diff --git a/worker/tokens.go b/worker/tokens.go
@@ -87,18 +87,22 @@ func pickTokenizer(attr string, f string) (tok.Tokenizer, error) {
 
 	tokenizers := schema.State().Tokenizer(attr)
 
-	var tokenizer tok.Tokenizer
-	for _, t := range tokenizers {
+	tokIdx := -1
+	for i, t := range tokenizers {
 		if !t.IsLossy() {
-			tokenizer = t
+			tokIdx = i
 			break
 		}
+		// prefer hash over other lossy tokenizers.
+		if t.Identifier() == tok.IdentHash {
+			tokIdx = i
+		}
 	}
 
 	// If function is eq and we found a tokenizer thats !Lossy(), lets return
 	// it to avoid the second lookup.
-	if f == "eq" && tokenizer != nil {
-		return tokenizer, nil
+	if f == "eq" && tokIdx != -1 {
+		return tokenizers[tokIdx], nil
 	}
 
 	// Lets try to find a sortable tokenizer.
@@ -178,21 +182,23 @@ func getInequalityTokens(readTs uint64, attr, f string,
 			continue
 		}
 		// if its lossy then we handle inequality comparison later
-		// on in handleCompareAttr
+		// on in handleCompareFunction
 		if tokenizer.IsLossy() {
 			out = append(out, k.Term)
 		} else {
 			// for non Lossy lets compare for inequality (gt & lt)
 			// to see if key needs to be included
-			if f == "gt" {
+			switch {
+			case f == "gt":
 				if bytes.Compare([]byte(k.Term), ineqTokenInBytes) > 0 {
 					out = append(out, k.Term)
 				}
-			} else if f == "lt" {
+			case f == "lt":
 				if bytes.Compare([]byte(k.Term), ineqTokenInBytes) < 0 {
 					out = append(out, k.Term)
 				}
-			} else { //for le or ge or any other fn consider the key
+			default:
+				// for le or ge or any other fn consider the key
 				out = append(out, k.Term)
 			}
 		}

diff --git a/x/hash.go b/x/hash.go
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2018 Dgraph Labs, Inc. and Contributors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package x
+
+import "golang.org/x/crypto/blake2b"
+
+func Hash256(data []byte) []byte {
+	h := blake2b.Sum256(data)
+	return h[:]
+}