From 6cfbadb3ffd43f57a1d0542deb8f92d32a56ee3f Mon Sep 17 00:00:00 2001
From: Ahsan Barkati <ahsanbarkati@gmail.com>
Date: Thu, 14 Jan 2021 01:03:17 +0530
Subject: [PATCH] fix(sort): Make sort consistent for indexed and without
 indexed predicates (#7241)

Make the result of sort consistent for predicate with and without index.
After this change, the predicates with null values will appear at the
last of the sort result for both ascending and descending sort, for both
index/no-index case. Before this change the result for predicate with index
didn't contain the null predicates and the one without index treated nulls as
infinite value - they appeared at the beginning for descending while at the
end for the ascending sort case.

NOTE: This is a breaking change for the response of sort.

Co-authored-by: Rajas Vanjape <rajas@dgraph.io>
(cherry picked from commit 85278f8aeb826a5e83c1de18310ab615572a62bf)
---
 query/common_test.go     |  30 +++++++-
 query/query1_test.go     | 153 +++++++++++++++++++++++++++++++++++++++
 query/query2_test.go     |  16 +++-
 query/rdf_result_test.go |   6 +-
 worker/sort.go           |  61 ++++++++++++++--
 5 files changed, 253 insertions(+), 13 deletions(-)
diff --git a/query/common_test.go b/query/common_test.go
index 703ca796330..f5e0c6c9af1 100644
--- a/query/common_test.go
+++ b/query/common_test.go
@@ -324,7 +324,10 @@ noindex_salary                 : float .
 language                       : [string] .
 score                          : [int] @index(int) .
 average                        : [float] @index(float) .
-gender						   : string .
+gender                         : string .
+indexpred                      : string @index(exact) .
+pred                           : string .
+pname                          : string .
 `
 
 func populateCluster() {
@@ -758,6 +761,31 @@ func populateCluster() {
 		<20001> <average> "49.33" .
 		<20001> <pet_name> "mahi" .
 		<20001> <pet_name> "ms" .
+
+		# data for testing consistency of sort
+		<61> <pred> "A" .
+		<62> <pred> "B" .
+		<63> <pred> "C" .
+		<64> <pred> "D" .
+		<65> <pred> "E" .
+
+		<61> <indexpred> "A" .
+		<62> <indexpred> "B" .
+		<63> <indexpred> "C" .
+		<64> <indexpred> "D" .
+		<65> <indexpred> "E" .
+
+		<61> <pname> "nameA" .
+		<62> <pname> "nameB" .
+		<63> <pname> "nameC" .
+		<64> <pname> "nameD" .
+		<65> <pname> "nameE" .
+		<66> <pname> "nameF" .
+		<67> <pname> "nameG" .
+		<68> <pname> "nameH" .
+		<69> <pname> "nameI" .
+		<70> <pname> "nameJ" .
+
 	`)
 	if err != nil {
 		panic(fmt.Sprintf("Could not able add triple to the cluster. Got error %v", err.Error()))
diff --git a/query/query1_test.go b/query/query1_test.go
index 80cf6ad044e..0ab4888a6c8 100644
--- a/query/query1_test.go
+++ b/query/query1_test.go
@@ -19,6 +19,7 @@ package query
 import (
 	"context"
 	"encoding/json"
+	"fmt"
 	"io/ioutil"
 	"strings"
 	"testing"
@@ -1915,6 +1916,158 @@ func TestMultiSort7Paginate(t *testing.T) {
 	require.JSONEq(t, `{"data": {"me":[{"name":"Alice","age":25},{"name":"Alice","age":75},{"name":"Alice","age":75},{"name":"Bob","age":25},{"name":"Bob","age":75},{"name":"Colin","age":25},{"name":"Elizabeth","age":25}]}}`, js)
 }
 
+func TestSortWithNulls(t *testing.T) {
+	tests := []struct {
+		index  int32
+		offset int32
+		first  int32
+		desc   bool
+		result string
+	}{
+		{0, -1, -1, false, `{"data": {"me":[
+			{"pname":"nameA","pred":"A"},
+			{"pname":"nameB","pred":"B"},
+			{"pname":"nameC","pred":"C"},
+			{"pname":"nameD","pred":"D"},
+			{"pname":"nameE","pred":"E"},
+			{"pname":"nameF"},
+			{"pname":"nameG"},
+			{"pname":"nameH"},
+			{"pname":"nameI"},
+			{"pname":"nameJ"}]}}`,
+		},
+		{1, -1, -1, true, `{"data": {"me":[
+			{"pname":"nameE","pred":"E"},
+			{"pname":"nameD","pred":"D"},
+			{"pname":"nameC","pred":"C"},
+			{"pname":"nameB","pred":"B"},
+			{"pname":"nameA","pred":"A"},
+			{"pname":"nameF"},
+			{"pname":"nameG"},
+			{"pname":"nameH"},
+			{"pname":"nameI"},
+			{"pname":"nameJ"}]}}`,
+		},
+		{2, -1, 2, false, `{"data": {"me":[
+			{"pname":"nameA", "pred": "A"},
+			{"pname":"nameB","pred":"B"}]}}`,
+		},
+		{4, -1, 2, true, `{"data": {"me":[
+			{"pname":"nameE", "pred":"E"},
+			{"pname":"nameD", "pred": "D"}]}}`,
+		},
+		{5, -1, 7, false, `{"data": {"me":[
+			{"pname":"nameA","pred":"A"},
+			{"pname":"nameB","pred":"B"},
+			{"pname":"nameC","pred":"C"},
+			{"pname":"nameD","pred":"D"},
+			{"pname":"nameE","pred":"E"},
+			{"pname":"nameF"},
+			{"pname":"nameG"}]}}`,
+		},
+		{6, -1, 7, true, `{"data": {"me":[
+			{"pname":"nameE","pred":"E"},
+			{"pname":"nameD","pred":"D"},
+			{"pname":"nameC","pred":"C"},
+			{"pname":"nameB","pred":"B"},
+			{"pname":"nameA","pred":"A"},
+			{"pname":"nameF"},
+			{"pname":"nameG"}]}}`,
+		},
+		{7, 2, 7, false, `{"data": {"me":[
+			{"pname":"nameC","pred":"C"},
+			{"pname":"nameD","pred":"D"},
+			{"pname":"nameE","pred":"E"},
+			{"pname":"nameF"},
+			{"pname":"nameG"},
+			{"pname":"nameH"},
+			{"pname":"nameI"}]}}`,
+		},
+		{8, 2, 7, true, `{"data": {"me":[
+			{"pname":"nameC","pred":"C"},
+			{"pname":"nameB","pred":"B"},
+			{"pname":"nameA","pred":"A"},
+			{"pname":"nameF"},
+			{"pname":"nameG"},
+			{"pname":"nameH"},
+			{"pname":"nameI"}]}}`,
+		},
+		{9, 2, 100, false, `{"data": {"me":[
+			{"pname":"nameC","pred":"C"},
+			{"pname":"nameD","pred":"D"},
+			{"pname":"nameE","pred":"E"},
+			{"pname":"nameF"},
+			{"pname":"nameG"},
+			{"pname":"nameH"},
+			{"pname":"nameI"},
+			{"pname":"nameJ"}]}}`,
+		},
+		{10, 2, 100, true, `{"data": {"me":[
+			{"pname":"nameC","pred":"C"},
+			{"pname":"nameB","pred":"B"},
+			{"pname":"nameA","pred":"A"},
+			{"pname":"nameF"},
+			{"pname":"nameG"},
+			{"pname":"nameH"},
+			{"pname":"nameI"},
+			{"pname":"nameJ"}]}}`,
+		},
+		{11, 5, 5, false, `{"data": {"me":[
+			{"pname":"nameF"},
+			{"pname":"nameG"},
+			{"pname":"nameH"},
+			{"pname":"nameI"},
+			{"pname":"nameJ"}]}}`,
+		},
+		{12, 5, 5, true, `{"data": {"me":[
+			{"pname":"nameF"},
+			{"pname":"nameG"},
+			{"pname":"nameH"},
+			{"pname":"nameI"},
+			{"pname":"nameJ"}]}}`,
+		},
+		{13, 9, 5, false, `{"data": {"me":[
+			{"pname":"nameJ"}]}}`,
+		},
+		{14, 9, 5, true, `{"data": {"me":[
+			{"pname":"nameJ"}]}}`,
+		},
+		{15, 12, 5, false, `{"data": {"me":[]}}`},
+		{16, 12, 5, true, `{"data": {"me":[]}}`},
+	}
+
+	makeQuery := func(offset, first int32, desc, index bool) string {
+		pred := "pred"
+		if index {
+			pred = "indexpred"
+		}
+		order := "orderasc: " + pred
+		if desc {
+			order = "orderdesc: " + pred
+		}
+		qfunc := "me(func: uid(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), "
+		qfunc += order
+		if offset != -1 {
+			qfunc += fmt.Sprintf(", offset: %d", offset)
+		}
+		if first != -1 {
+			qfunc += fmt.Sprintf(", first: %d", first)
+		}
+		query := "{" + qfunc + ") { pname pred:" + pred + " } }"
+		return processQueryNoErr(t, query)
+	}
+
+	for _, tc := range tests {
+		// Case of sort with Index.
+		actual := makeQuery(tc.offset, tc.first, tc.desc, true)
+		require.JSONEqf(t, tc.result, actual, "Failed on index-testcase: %d\n", tc.index)
+
+		// Case of sort without index
+		actual = makeQuery(tc.offset, tc.first, tc.desc, false)
+		require.JSONEqf(t, tc.result, actual, "Failed on testcase: %d\n", tc.index)
+	}
+}
+
 func TestMultiSortPaginateWithOffset(t *testing.T) {
 	t.Parallel()
 	tests := []struct {
diff --git a/query/query2_test.go b/query/query2_test.go
index 9f7f6eaa7e9..3c05914643e 100644
--- a/query/query2_test.go
+++ b/query/query2_test.go
@@ -973,7 +973,13 @@ func TestLanguageOrderIndexed3(t *testing.T) {
 	require.JSONEq(t,
 		`{
 			"data": {
-				"q": []
+				"q": [{
+					"name_lang_index@de": "öffnen",
+					"name_lang_index@sv": "zon"
+				}, {
+					"name_lang_index@de": "zumachen",
+					"name_lang_index@sv": "öppna"
+				}]
 			}
 		}`,
 		js)
@@ -993,7 +999,13 @@ func TestLanguageOrderIndexed4(t *testing.T) {
 	require.JSONEq(t,
 		`{
 			"data": {
-				"q": []
+				"q": [{
+					"name_lang_index@de": "öffnen",
+					"name_lang_index@sv": "zon"
+				}, {
+					"name_lang_index@de": "zumachen",
+					"name_lang_index@sv": "öppna"
+				}]
 			}
 		}`,
 		js)
diff --git a/query/rdf_result_test.go b/query/rdf_result_test.go
index e91b5effff6..2ee50ee201e 100644
--- a/query/rdf_result_test.go
+++ b/query/rdf_result_test.go
@@ -187,12 +187,13 @@ func TestDateRDF(t *testing.T) {
 	`
 	rdf, err := processQueryRDF(context.Background(), t, query)
 	require.NoError(t, err)
-	require.Equal(t, rdf, `<0x1> <name> "Michonne" .
+	expected := `<0x1> <name> "Michonne" .
 <0x1> <gender> "female" .
 <0x1> <friend> <0x19> .
 <0x1> <friend> <0x18> .
 <0x1> <friend> <0x17> .
 <0x1> <friend> <0x1f> .
+<0x1> <friend> <0x65> .
 <0x17> <name> "Rick Grimes" .
 <0x18> <name> "Glenn Rhee" .
 <0x19> <name> "Daryl Dixon" .
@@ -201,5 +202,6 @@ func TestDateRDF(t *testing.T) {
 <0x18> <film.film.initial_release_date> "1909-05-05T00:00:00Z" .
 <0x19> <film.film.initial_release_date> "1929-01-10T00:00:00Z" .
 <0x1f> <film.film.initial_release_date> "1801-01-15T00:00:00Z" .
-`)
+`
+	require.Equal(t, expected, rdf)
 }
diff --git a/worker/sort.go b/worker/sort.go
index 024207a32f3..614b9557347 100644
--- a/worker/sort.go
+++ b/worker/sort.go
@@ -189,8 +189,8 @@ func sortWithIndex(ctx context.Context, ts *pb.SortMessage) *sortresult {
 		// offsets[i] is the offset for i-th posting list. It gets decremented as we
 		// iterate over buckets.
 		out[i].offset = int(ts.Offset)
-		var emptyList pb.List
-		out[i].ulist = &emptyList
+		out[i].ulist = &pb.List{}
+		out[i].skippedUids = &pb.List{}
 		out[i].uset = map[uint64]struct{}{}
 	}
 
@@ -303,6 +303,39 @@ BUCKETS:
 		}
 	}
 
+	for i, ul := range ts.UidMatrix {
+		// nullNodes is list of UIDs for which the value of the sort predicate is null.
+		var nullNodes []uint64
+		// present is a map[uid]->bool to keep track of the UIDs containing the sort predicate.
+		present := make(map[uint64]bool)
+
+		// Add the UIDs to the map, which are in the resultant intersected list and the UIDs which
+		// have been skipped because of offset while intersection.
+		for _, uid := range out[i].ulist.Uids {
+			present[uid] = true
+		}
+		for _, uid := range out[i].skippedUids.Uids {
+			present[uid] = true
+		}
+
+		// nullPreds is a list of UIDs which doesn't contain the sort predicate.
+		for _, uid := range ul.Uids {
+			if _, ok := present[uid]; !ok {
+				nullNodes = append(nullNodes, uid)
+			}
+		}
+
+		// Apply the offset on null nodes, if the nodes with value were not enough.
+		if out[i].offset < len(nullNodes) {
+			nullNodes = nullNodes[out[i].offset:]
+		} else {
+			nullNodes = nullNodes[:0]
+		}
+		remainingCount := int(ts.Count) - len(r.UidMatrix[i].Uids)
+		canAppend := x.Min(uint64(remainingCount), uint64(len(nullNodes)))
+		r.UidMatrix[i].Uids = append(r.UidMatrix[i].Uids, nullNodes[:canAppend]...)
+	}
+
 	select {
 	case <-ctx.Done():
 		return resultWithError(ctx.Err())
@@ -532,6 +565,7 @@ func fetchValues(ctx context.Context, in *pb.Query, idx int, or chan orderResult
 type intersectedList struct {
 	offset          int
 	ulist           *pb.List
+	skippedUids     *pb.List
 	values          []types.Val
 	uset            map[uint64]struct{}
 	multiSortOffset int32
@@ -585,8 +619,10 @@ func intersectBucket(ctx context.Context, ts *pb.SortMessage, token string,
 		n := len(result.Uids)
 		if il.offset >= n {
 			// We are going to skip the whole intersection. No need to do actual
-			// sorting. Just update offsets[i]. We now offset less.
+			// sorting. Just update offsets[i]. We now offset less. Also, keep track of the UIDs
+			// that have been skipped for the offset.
 			il.offset -= n
+			il.skippedUids.Uids = append(il.skippedUids.Uids, result.Uids...)
 			continue
 		}
 
@@ -604,6 +640,9 @@ func intersectBucket(ctx context.Context, ts *pb.SortMessage, token string,
 		if il.offset > 0 {
 			// Apply the offset.
 			if len(ts.Order) == 1 {
+				// Keep track of UIDs which had sort predicate but have been skipped because of
+				// the offset.
+				il.skippedUids.Uids = append(il.skippedUids.Uids, result.Uids[:il.offset]...)
 				result.Uids = result.Uids[il.offset:n]
 			} else {
 				// In case of multi sort we can't apply the offset yet, as the order might change
@@ -716,25 +755,31 @@ func sortByValue(ctx context.Context, ts *pb.SortMessage, ul *pb.List,
 		return nil, errors.Errorf("Sorting on multiple language is not supported.")
 	}
 
+	// nullsList is the list of UIDs for which value doesn't exist.
+	var nullsList []uint64
+	var nullVals [][]types.Val
 	for i := 0; i < lenList; i++ {
 		select {
 		case <-ctx.Done():
 			return multiSortVals, ctx.Err()
 		default:
 			uid := ul.Uids[i]
-			uids = append(uids, uid)
 			val, err := fetchValue(uid, order.Attr, order.Langs, typ, ts.ReadTs)
 			if err != nil {
-				// Value couldn't be found or couldn't be converted to the sort
-				// type.  By using a nil Value, it will appear at the
-				// end (start) for orderasc (orderdesc).
+				// Value couldn't be found or couldn't be converted to the sort type.
+				// It will be appended to the end of the result based on the pagination.
 				val.Value = nil
+				nullsList = append(nullsList, uid)
+				nullVals = append(nullVals, []types.Val{val})
+				continue
 			}
+			uids = append(uids, uid)
 			values = append(values, []types.Val{val})
 		}
 	}
 	err := types.Sort(values, &uids, []bool{order.Desc}, lang)
-	ul.Uids = uids
+	ul.Uids = append(uids, nullsList...)
+	values = append(values, nullVals...)
 	if len(ts.Order) > 1 {
 		for _, v := range values {
 			multiSortVals = append(multiSortVals, v[0])