Skip to content

Commit

Permalink
opt(query): Use sroar in pb.List (#7864)
Browse files Browse the repository at this point in the history
Use roaring bitmaps in pb.List by default. We fall back to using sorted_uids 
when we need the list to be ordered. The APIs defined in the codec acts as
an abstraction layer that decides if sorted_uids or bitmap should be used. 
sorted_uids are used if it is present, otherwise, the bitmap is used.

Co-authored-by: Manish R Jain <manish@dgraph.io>
  • Loading branch information
ahsanbarkati and manishrjain authored Jun 16, 2021
1 parent 314091a commit 3407eb1
Show file tree
Hide file tree
Showing 23 changed files with 809 additions and 709 deletions.
29 changes: 22 additions & 7 deletions algo/uidlist.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,42 @@ package algo
import (
"sort"

"github.com/dgraph-io/dgraph/codec"
"github.com/dgraph-io/dgraph/protos/pb"
"github.com/dgraph-io/sroar"
)

const jump = 32 // Jump size in InsersectWithJump.

// ApplyFilter applies a filter to our UIDList.
// TODO: ApplyFilter in this way should only happen for sorted uids. For normal
// filter, it should use Bitmap FastAnd or And.
func ApplyFilter(u *pb.List, f func(uint64, int) bool) {
out := u.Uids[:0]
for i, uid := range u.Uids {
if f(uid, i) {
out = append(out, uid)
uids := codec.GetUids(u)
var out []uint64
for i, x := range uids {
if f(x, i) {
out = append(out, x)
}
}
u.Uids = out

if len(u.SortedUids) > 0 {
u.SortedUids = out
} else {
b := sroar.NewBitmap()
b.SetMany(out)
u.Bitmap = codec.ToBytes(b)
}
}

// IndexOf performs a binary search on the uids slice and returns the index at
// which it finds the uid, else returns -1
func IndexOf(u *pb.List, uid uint64) int {
i := sort.Search(len(u.Uids), func(i int) bool { return u.Uids[i] >= uid })
if i < len(u.Uids) && u.Uids[i] == uid {
bm := codec.FromList(u)
// TODO(Ahsan): We might want bm.Rank()
uids := bm.ToArray()
i := sort.Search(len(uids), func(i int) bool { return uids[i] >= uid })
if i < len(uids) && uids[i] == uid {
return i
}
return -1
Expand Down
5 changes: 3 additions & 2 deletions algo/uidlist_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,18 @@ package algo
import (
"testing"

"github.com/dgraph-io/dgraph/codec"
"github.com/dgraph-io/dgraph/protos/pb"
"github.com/stretchr/testify/require"
)

func newList(data []uint64) *pb.List {
return &pb.List{Uids: data}
return &pb.List{SortedUids: data}
}

func TestApplyFilterUint(t *testing.T) {
l := []uint64{1, 2, 3, 4, 5}
u := newList(l)
ApplyFilter(u, func(a uint64, idx int) bool { return (l[idx] % 2) == 1 })
require.Equal(t, []uint64{1, 3, 5}, u.Uids)
require.Equal(t, []uint64{1, 3, 5}, codec.GetUids(u))
}
58 changes: 49 additions & 9 deletions codec/codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,51 @@ func ApproxLen(bitmap []byte) int {

func ToList(rm *sroar.Bitmap) *pb.List {
return &pb.List{
Uids: rm.ToArray(),
// Bitmap: ToBytes(rm),
Bitmap: ToBytes(rm),
}
}

func ToSortedList(rm *sroar.Bitmap) *pb.List {
return &pb.List{
SortedUids: rm.ToArray(),
}
}

func ListCardinality(l *pb.List) uint64 {
if l == nil {
return 0
}
if len(l.SortedUids) > 0 {
return uint64(len(l.SortedUids))
}
b := FromList(l)
return uint64(b.GetCardinality())
}

func OneUid(uid uint64) *pb.List {
bm := sroar.NewBitmap()
bm.Set(uid)
return ToList(bm)
}

func GetUids(l *pb.List) []uint64 {
if l == nil {
return []uint64{}
}
if len(l.SortedUids) > 0 {
return l.SortedUids
}
return FromList(l).ToArray()
}

func BitmapToSorted(l *pb.List) {
if l == nil {
return
}
l.SortedUids = FromList(l).ToArray()
l.Bitmap = nil
}

func And(rm *sroar.Bitmap, l *pb.List) {
rl := FromList(l)
rm.And(rl)
Expand Down Expand Up @@ -94,21 +134,21 @@ func ToBytes(bm *sroar.Bitmap) []byte {
if bm.IsEmpty() {
return nil
}
return bm.ToBuffer()
// TODO: We should not use ToBufferWithCopy always.
return bm.ToBufferWithCopy()
}

func FromList(l *pb.List) *sroar.Bitmap {
iw := sroar.NewBitmap()
if l == nil {
return iw
}

if len(l.BitmapDoNotUse) > 0 {
// Only one of Uids or Bitmap should be defined.
iw = sroar.FromBuffer(l.BitmapDoNotUse)
if len(l.SortedUids) > 0 {
iw.SetMany(l.SortedUids)
}
if len(l.Uids) > 0 {
iw.SetMany(l.Uids)
if len(l.Bitmap) > 0 {
// TODO: We should not use FromBufferWithCopy always.
iw = sroar.FromBufferWithCopy(l.Bitmap)
}
return iw
}
Expand Down
4 changes: 2 additions & 2 deletions edgraph/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1557,8 +1557,8 @@ func processQuery(ctx context.Context, qc *queryContext) (*api.Response, error)
// If the list of UIDs is empty but the map of values is not,
// we need to get the UIDs from the keys in the map.
var uidList []uint64
if v.OrderedUIDs != nil && len(v.OrderedUIDs.Uids) > 0 {
uidList = v.OrderedUIDs.Uids
if v.OrderedUIDs != nil && len(v.OrderedUIDs.SortedUids) > 0 {
uidList = v.OrderedUIDs.SortedUids
} else if !v.UidMap.IsEmpty() {
uidList = v.UidMap.ToArray()
} else {
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.12

// replace github.com/dgraph-io/badger/v3 => /home/mrjn/go/src/github.com/dgraph-io/badger
// replace github.com/dgraph-io/ristretto => /home/mrjn/go/src/github.com/dgraph-io/ristretto
// replace github.com/dgraph-io/roaring => /home/mrjn/go/src/github.com/dgraph-io/roaring
// replace github.com/dgraph-io/sroar => /home/ash/go/src/github.com/dgraph-io/sroar

require (
cloud.google.com/go/storage v1.15.0
Expand All @@ -26,7 +26,7 @@ require (
github.com/dgraph-io/graphql-transport-ws v0.0.0-20210511143556-2cef522f1f15
github.com/dgraph-io/ristretto v0.0.4-0.20210504190834-0bf2acd73aa3
github.com/dgraph-io/simdjson-go v0.3.0
github.com/dgraph-io/sroar v0.0.0-20210522085927-7150620bb343
github.com/dgraph-io/sroar v0.0.0-20210604145002-865050cb7465
github.com/dgrijalva/jwt-go v3.2.0+incompatible
github.com/dgrijalva/jwt-go/v4 v4.0.0-preview1
github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,8 @@ github.com/dgraph-io/ristretto v0.0.4-0.20210504190834-0bf2acd73aa3 h1:jU/wpYsEL
github.com/dgraph-io/ristretto v0.0.4-0.20210504190834-0bf2acd73aa3/go.mod h1:fux0lOrBhrVCJd3lcTHsIJhq1T2rokOu6v9Vcb3Q9ug=
github.com/dgraph-io/simdjson-go v0.3.0 h1:h71LO7vR4LHMPUhuoGN8bqGm1VNfGOlAG8BI6iDUKw0=
github.com/dgraph-io/simdjson-go v0.3.0/go.mod h1:Otpysdjaxj9OGaJusn4pgQV7OFh2bELuHANq0I78uvY=
github.com/dgraph-io/sroar v0.0.0-20210522085927-7150620bb343 h1:sm/HpWavHElAxuO/fqs+uNxqu/1arpmQodwMwc6joFQ=
github.com/dgraph-io/sroar v0.0.0-20210522085927-7150620bb343/go.mod h1:bdNPtQmcxoIQVkZEWZvX0n0/IDlHFab397xdBlP4OoE=
github.com/dgraph-io/sroar v0.0.0-20210604145002-865050cb7465 h1:wj9EEeLJyJnopcmtJlw5TzOcEgudIYLWGDE3cBpYUBQ=
github.com/dgraph-io/sroar v0.0.0-20210604145002-865050cb7465/go.mod h1:bdNPtQmcxoIQVkZEWZvX0n0/IDlHFab397xdBlP4OoE=
github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/dgrijalva/jwt-go/v4 v4.0.0-preview1 h1:CaO/zOnF8VvUfEbhRatPcwKVWamvbYd8tQGRWacE9kU=
Expand Down
33 changes: 18 additions & 15 deletions posting/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -1274,27 +1274,30 @@ func (l *List) Uids(opt ListOptions) (*pb.List, error) {
// Before this, we were only picking math.Int32 number of uids.
// Now we're picking everything.
if opt.First == 0 {
out.Uids = bm.ToArray()
out.Bitmap = codec.ToBytes(bm)
// TODO: Not yet ready to use Bitmap for data transfer. We'd have to deal with all the
// places where List.Uids is being called.
// out.Bitmap = codec.ToBytes(bm)
return out, nil
}

var itr *sroar.Iterator
if opt.First > 0 {
itr = bm.NewIterator()
} else {
itr = bm.NewReverseIterator()
}
num := abs(opt.First)
for len(out.Uids) < num && itr.HasNext() {
out.Uids = append(out.Uids, itr.Next())
num := uint64(abs(opt.First))
sz := uint64(bm.GetCardinality())
if num < sz {
if opt.First > 0 {
x, err := bm.Select(num)
if err != nil {
return nil, errors.Wrap(err, "While selecting Uids")
}
codec.RemoveRange(bm, x, math.MaxUint64)
} else {
x, err := bm.Select(sz - num)
if err != nil {
return nil, errors.Wrap(err, "While selecting Uids")
}
codec.RemoveRange(bm, 0, x)
}
}
return out, nil

// errors.Wrapf(err, "cannot retrieve UIDs from list with key %s",
// hex.EncodeToString(l.key))
return codec.ToList(bm), nil
}

// Postings calls postFn with the postings that are common with
Expand Down
39 changes: 23 additions & 16 deletions posting/list_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -502,8 +502,8 @@ func TestAddMutation_mrjn2(t *testing.T) {
opt := ListOptions{ReadTs: uint64(i)}
list, err := ol.Uids(opt)
require.NoError(t, err)
require.EqualValues(t, 1, len(list.Uids))
require.EqualValues(t, uint64(i), list.Uids[0])
require.EqualValues(t, 1, codec.ListCardinality(list))
require.EqualValues(t, uint64(i), codec.GetUids(list)[0])
}
require.EqualValues(t, 0, ol.Length(readTs, 0))
require.NoError(t, ol.commitMutation(1, 0))
Expand Down Expand Up @@ -557,8 +557,9 @@ func TestAddMutation_mrjn2(t *testing.T) {
opts := ListOptions{ReadTs: 15}
list, err := ol.Uids(opts)
require.NoError(t, err)
require.EqualValues(t, 7, list.Uids[0])
require.EqualValues(t, 9, list.Uids[1])
uids := codec.GetUids(list)
require.EqualValues(t, 7, uids[0])
require.EqualValues(t, 9, uids[1])
}
}

Expand Down Expand Up @@ -1077,8 +1078,9 @@ func TestMultiPartListBasic(t *testing.T) {
opt := ListOptions{ReadTs: uint64(size) + 1}
l, err := ol.Uids(opt)
require.NoError(t, err)
require.Equal(t, commits, len(l.Uids), "List of Uids received: %+v", l.Uids)
for i, uid := range l.Uids {
uids := codec.GetUids(l)
require.Equal(t, commits, len(uids), "List of Uids received: %+v", uids)
for i, uid := range uids {
require.Equal(t, uint64(i+1), uid)
}
}
Expand Down Expand Up @@ -1289,10 +1291,12 @@ func TestMultiPartListWriteToDisk(t *testing.T) {
require.NoError(t, err)
newUids, err := newList.Uids(opt)
require.NoError(t, err)
require.Equal(t, commits, len(originalUids.Uids))
require.Equal(t, len(originalUids.Uids), len(newUids.Uids))
for i := range originalUids.Uids {
require.Equal(t, originalUids.Uids[i], newUids.Uids[i])
origUids := codec.GetUids(originalUids)
newIds := codec.GetUids(newUids)
require.Equal(t, commits, len(origUids))
require.Equal(t, len(origUids), len(newIds))
for i := range origUids {
require.Equal(t, origUids[i], newIds[i])
}
}

Expand Down Expand Up @@ -1355,8 +1359,9 @@ func TestMultiPartListDeleteAndAdd(t *testing.T) {
opt := ListOptions{ReadTs: math.MaxUint64}
l, err := ol.Uids(opt)
require.NoError(t, err)
require.Equal(t, size, len(l.Uids), "List of Uids received: %+v", l.Uids)
for i, uid := range l.Uids {
uids := codec.GetUids(l)
require.Equal(t, size, len(uids), "List of Uids received: %+v", uids)
for i, uid := range uids {
require.Equal(t, uint64(i+1), uid)
}

Expand Down Expand Up @@ -1391,8 +1396,9 @@ func TestMultiPartListDeleteAndAdd(t *testing.T) {
opt = ListOptions{ReadTs: math.MaxUint64}
l, err = ol.Uids(opt)
require.NoError(t, err)
require.Equal(t, size/2, len(l.Uids), "List of Uids received: %+v", l.Uids)
for i, uid := range l.Uids {
uids = codec.GetUids(l)
require.Equal(t, size/2, len(uids), "List of Uids received: %+v", uids)
for i, uid := range uids {
require.Equal(t, uint64(size/2)+uint64(i+1), uid)
}

Expand Down Expand Up @@ -1426,8 +1432,9 @@ func TestMultiPartListDeleteAndAdd(t *testing.T) {
opt = ListOptions{ReadTs: math.MaxUint64}
l, err = ol.Uids(opt)
require.NoError(t, err)
require.Equal(t, size, len(l.Uids), "List of Uids received: %+v", l.Uids)
for i, uid := range l.Uids {
uids = codec.GetUids(l)
require.Equal(t, size, len(uids), "List of Uids received: %+v", uids)
for i, uid := range uids {
require.Equal(t, uint64(i+1), uid)
}
}
Expand Down
7 changes: 4 additions & 3 deletions posting/mvcc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"math"
"testing"

"github.com/dgraph-io/dgraph/codec"
"github.com/dgraph-io/dgraph/protos/pb"
"github.com/dgraph-io/dgraph/x"
"github.com/stretchr/testify/require"
Expand All @@ -38,7 +39,7 @@ func TestRollupTimestamp(t *testing.T) {

uidList, err := l.Uids(ListOptions{ReadTs: 7})
require.NoError(t, err)
require.Equal(t, 3, len(uidList.Uids))
require.Equal(t, uint64(3), codec.ListCardinality(uidList))

edge := &pb.DirectedEdge{
Entity: 1,
Expand All @@ -53,7 +54,7 @@ func TestRollupTimestamp(t *testing.T) {

uidList, err = nl.Uids(ListOptions{ReadTs: 11})
require.NoError(t, err)
require.Equal(t, 0, len(uidList.Uids))
require.Equal(t, uint64(0), codec.ListCardinality(uidList))

// Now check that we don't lost the highest version during a rollup operation, despite the STAR
// delete marker being the most recent update.
Expand All @@ -71,7 +72,7 @@ func TestPostingListRead(t *testing.T) {
require.NoError(t, err)
uidList, err := nl.Uids(ListOptions{ReadTs: uint64(readTs)})
require.NoError(t, err)
require.Equal(t, sz, len(uidList.Uids))
require.Equal(t, uint64(sz), codec.ListCardinality(uidList))
}

addEdgeToUID(t, attr, 1, 2, 1, 2)
Expand Down
5 changes: 3 additions & 2 deletions protos/pb.proto
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ import "github.com/gogo/protobuf/gogoproto/gogo.proto";
/* option (gogoproto.goproto_getters_all) = true; */

message List {
repeated fixed64 uids = 1;
bytes bitmap_do_not_use = 2;
/* repeated fixed64 uids = 1; *1/ */
bytes bitmap = 2;
repeated fixed64 sortedUids = 3;
}

message TaskValue {
Expand Down
Loading

0 comments on commit 3407eb1

Please sign in to comment.