From 63aba948d5052676a9bccab5fbc610652cf8f499 Mon Sep 17 00:00:00 2001 From: Samyak S Sarnayak Date: Wed, 7 Apr 2021 11:11:18 +0530 Subject: [PATCH 1/3] add "random" keyword in GQL for selecting random nodes in graph --- gql/parser.go | 4 ++-- query/query.go | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/gql/parser.go b/gql/parser.go index a1e11cdd5ef..04468a54606 100644 --- a/gql/parser.go +++ b/gql/parser.go @@ -2595,7 +2595,7 @@ func parseLanguageList(it *lex.ItemIterator) ([]string, error) { func validKeyAtRoot(k string) bool { switch k { - case "func", "orderasc", "orderdesc", "first", "offset", "after": + case "func", "orderasc", "orderdesc", "first", "offset", "after", "random": return true case "from", "to", "numpaths", "minweight", "maxweight": // Specific to shortest path @@ -2609,7 +2609,7 @@ func validKeyAtRoot(k string) bool { // Check for validity of key at non-root nodes. func validKey(k string) bool { switch k { - case "orderasc", "orderdesc", "first", "offset", "after": + case "orderasc", "orderdesc", "first", "offset", "after", "random": return true } return false diff --git a/query/query.go b/query/query.go index 9368da565a6..1232b414f3b 100644 --- a/query/query.go +++ b/query/query.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "math" + "math/rand" "sort" "strconv" "strings" @@ -114,6 +115,8 @@ type params struct { Count int // Offset is the value of the "offset" parameter. Offset int + // Random is the value of the "random" parameter + Random int // AfterUID is the value of the "after" parameter. AfterUID uint64 // DoCount is true if the count of the predicate is requested instead of its value. @@ -745,6 +748,15 @@ func (args *params) fill(gq *gql.GraphQuery) error { } args.Count = int(first) } + + if v, ok := gq.Args["random"]; ok { + random_, err := strconv.ParseInt(v, 0, 32) + if err != nil { + return err + } + args.Random = int(random_) + } + return nil } @@ -2298,6 +2310,13 @@ func ProcessGraph(ctx context.Context, sg, parent *SubGraph, rch chan error) { } } + if sg.Params.Random > 0 { + if err = sg.applyRandom(ctx); err != nil { + rch <- err + return + } + } + // Here we consider handling count with filtering. We do this after // pagination because otherwise, we need to do the count with pagination // taken into account. For example, a PL might have only 50 entries but the @@ -2395,6 +2414,34 @@ func ProcessGraph(ctx context.Context, sg, parent *SubGraph, rch chan error) { rch <- childErr } +// applies "random" to lists inside uidMatrix +// Params.Random number of nodes are selected +// duplicates are not removed i.e., random selection by replacement is done +// TODO: error handling here +func (sg *SubGraph) applyRandom(ctx context.Context) error { + sg.updateUidMatrix() + + // store row id -> new uid list mapping + newUids := make(map[int][]uint64) + for i := 0; i < sg.Params.Random; i++ { + randomIdx := rand.Intn(len(sg.uidMatrix)) + randomIdy := rand.Intn(len(sg.uidMatrix[randomIdx].Uids)) + + if newUids[randomIdx] == nil { + newUids[randomIdx] = make([]uint64, 0) + } + + newUids[randomIdx] = append(newUids[randomIdx], sg.uidMatrix[randomIdx].Uids[randomIdy]) + } + + for idx, uids := range newUids { + sg.uidMatrix[idx].Uids = uids + } + + sg.DestMap = codec.Merge(sg.uidMatrix) + return nil +} + // applyPagination applies count and offset to lists inside uidMatrix. func (sg *SubGraph) applyPagination(ctx context.Context) error { if sg.Params.Count == 0 && sg.Params.Offset == 0 { // No pagination. @@ -2638,7 +2685,7 @@ func (sg *SubGraph) sortAndPaginateUsingVar(ctx context.Context) error { func isValidArg(a string) bool { switch a { case "numpaths", "from", "to", "orderasc", "orderdesc", "first", "offset", "after", "depth", - "minweight", "maxweight": + "minweight", "maxweight", "random": return true } return false From 6252918891d85756775d5f1dbf01d9724a8bed3f Mon Sep 17 00:00:00 2001 From: Samyak S Sarnayak Date: Tue, 13 Apr 2021 19:27:31 +0530 Subject: [PATCH 2/3] random query (#7693) - remove duplicates in result and rename random_ To remove duplicates, I had to: - introduce a new struct `UidKey` to store the index of a UID in the matrix - calculate total number of nodes and check if required number of nodes is less than that. If it's not, return all the nodes. - one caveat is that the order is not randomized when the requested number is equal to total number of nodes. --- query/query.go | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/query/query.go b/query/query.go index 1232b414f3b..d43a7fafeb5 100644 --- a/query/query.go +++ b/query/query.go @@ -750,11 +750,11 @@ func (args *params) fill(gq *gql.GraphQuery) error { } if v, ok := gq.Args["random"]; ok { - random_, err := strconv.ParseInt(v, 0, 32) + random, err := strconv.ParseInt(v, 0, 32) if err != nil { return err } - args.Random = int(random_) + args.Random = int(random) } return nil @@ -2414,6 +2414,13 @@ func ProcessGraph(ctx context.Context, sg, parent *SubGraph, rch chan error) { rch <- childErr } +// stores index of a uid as the index in the uidMatrix (x) +// and index in the corresponding list of the uidMatrix (y) +type UidKey struct { + x int + y int +} + // applies "random" to lists inside uidMatrix // Params.Random number of nodes are selected // duplicates are not removed i.e., random selection by replacement is done @@ -2421,12 +2428,38 @@ func ProcessGraph(ctx context.Context, sg, parent *SubGraph, rch chan error) { func (sg *SubGraph) applyRandom(ctx context.Context) error { sg.updateUidMatrix() + // calculate total number of nodes + totalNodes := 0 + for i := range sg.uidMatrix { + totalNodes += len(sg.uidMatrix[i].Uids) + } + // and if the required number of nodes is more (or equal) to that + // then just return all the nodes + // (this can cause an infinite loop if not checked) + if sg.Params.Random >= totalNodes { + return nil + } + // store row id -> new uid list mapping newUids := make(map[int][]uint64) - for i := 0; i < sg.Params.Random; i++ { + + // to keep track of UIDs already selected + selected := make(map[UidKey]bool) + + // keep track of number of nodes selected + numSelected := 0 + + for numSelected < sg.Params.Random { randomIdx := rand.Intn(len(sg.uidMatrix)) randomIdy := rand.Intn(len(sg.uidMatrix[randomIdx].Uids)) + if present := selected[UidKey{randomIdx, randomIdy}]; present { + continue + } else { + selected[UidKey{randomIdx, randomIdy}] = true + numSelected += 1 + } + if newUids[randomIdx] == nil { newUids[randomIdx] = make([]uint64, 0) } From 558ea5d951989c5b50c5765fc48c406bdf0be666 Mon Sep 17 00:00:00 2001 From: Samyak S Sarnayak Date: Thu, 22 Apr 2021 16:41:19 +0530 Subject: [PATCH 3/3] random query (#7693) - simpler random selection the random uids are now selected from each uid list instead of the uid matrix. This simplifies the process. --- query/query.go | 54 ++++++++++++++------------------------------------ 1 file changed, 15 insertions(+), 39 deletions(-) diff --git a/query/query.go b/query/query.go index d43a7fafeb5..b7b423f723f 100644 --- a/query/query.go +++ b/query/query.go @@ -2422,53 +2422,29 @@ type UidKey struct { } // applies "random" to lists inside uidMatrix -// Params.Random number of nodes are selected -// duplicates are not removed i.e., random selection by replacement is done -// TODO: error handling here +// sg.Params.Random number of nodes are selected in each uid list +// duplicates are avoided (random selection without replacement) +// if sg.Params.Random is more than the number of available nodes +// all nodes are returned func (sg *SubGraph) applyRandom(ctx context.Context) error { sg.updateUidMatrix() - // calculate total number of nodes - totalNodes := 0 - for i := range sg.uidMatrix { - totalNodes += len(sg.uidMatrix[i].Uids) - } - // and if the required number of nodes is more (or equal) to that - // then just return all the nodes - // (this can cause an infinite loop if not checked) - if sg.Params.Random >= totalNodes { - return nil - } - - // store row id -> new uid list mapping - newUids := make(map[int][]uint64) - - // to keep track of UIDs already selected - selected := make(map[UidKey]bool) - - // keep track of number of nodes selected - numSelected := 0 + for i := 0; i < len(sg.uidMatrix); i++ { + // shuffle the uid list and select the + // first sg.Params.Random uids - for numSelected < sg.Params.Random { - randomIdx := rand.Intn(len(sg.uidMatrix)) - randomIdy := rand.Intn(len(sg.uidMatrix[randomIdx].Uids)) + uidList := sg.uidMatrix[i].Uids - if present := selected[UidKey{randomIdx, randomIdy}]; present { - continue - } else { - selected[UidKey{randomIdx, randomIdy}] = true - numSelected += 1 - } + rand.Shuffle(len(uidList), func(i, j int) { + uidList[i], uidList[j] = uidList[j], uidList[i] + }) - if newUids[randomIdx] == nil { - newUids[randomIdx] = make([]uint64, 0) + numRandom := sg.Params.Random + if sg.Params.Random > len(uidList) { + numRandom = len(uidList) } - newUids[randomIdx] = append(newUids[randomIdx], sg.uidMatrix[randomIdx].Uids[randomIdy]) - } - - for idx, uids := range newUids { - sg.uidMatrix[idx].Uids = uids + sg.uidMatrix[i].Uids = uidList[:numRandom] } sg.DestMap = codec.Merge(sg.uidMatrix)