From 4168eaac216f03c6bf09c824ea44049b113a0bd5 Mon Sep 17 00:00:00 2001 From: Aman Mangal Date: Fri, 27 Sep 2019 17:00:17 +0530 Subject: [PATCH] Fix data race in regular expression processing --- query/common_test.go | 14 +++++++++++ query/query3_test.go | 58 ++++++++++++++++++++++++++++++++++++++++++++ worker/task.go | 21 +++++++++++----- 3 files changed, 87 insertions(+), 6 deletions(-) diff --git a/query/common_test.go b/query/common_test.go index 7c1577a0da8..51e2562293d 100644 --- a/query/common_test.go +++ b/query/common_test.go @@ -264,6 +264,8 @@ previous_model : uid @reverse . created_at : datetime @index(hour) . updated_at : datetime @index(year) . number : int @index(int) . +firstName : string . +lastName : string . ` func populateCluster() { @@ -548,6 +550,18 @@ func populateCluster() { <202> "Prius" . <202> "プリウス"@jp . <202> "CarModel" . + + # data for regexp testing + _:luke "Luke" . + _:luke "Skywalker" . + _:leia "Princess" . + _:leia "Leia" . + _:han "Han" . + _:han "Solo" . + _:har "Harrison" . + _:har "Ford" . + _:ss "Steven" . + _:ss "Spielberg" . `) addGeoPointToCluster(1, "loc", []float64{1.1, 2.0}) diff --git a/query/query3_test.go b/query/query3_test.go index f5e9ed9b16a..a7b15ed3895 100644 --- a/query/query3_test.go +++ b/query/query3_test.go @@ -2137,3 +2137,61 @@ func TestQueryMultipleTypes(t *testing.T) { {"name":"Person", "fields":[{"name":"name", "type": "default"}, {"name":"pet", "type":"default"}]}]}}`, js) } + +func TestRegexInFilterNoDataOnRoot(t *testing.T) { + query := ` + { + q(func: has(nonExistent)) @filter(regexp(make, /.*han/i)) { + uid + firstName + lastName + } + } + ` + res := processQueryNoErr(t, query) + require.JSONEq(t, `{"data":{"q":[]}}`, res) +} + +func TestRegexInFilterIndexedPredOnRoot(t *testing.T) { + query := ` + { + q(func: regexp(name, /.*nonExistent/i)) { + uid + firstName + lastName + } + } + ` + res := processQueryNoErr(t, query) + require.JSONEq(t, `{"data":{"q":[]}}`, res) +} + +func TestMultiRegexInFilter(t *testing.T) { + query := ` + { + q(func: has(full_name)) @filter(regexp(full_name, /.*michonne/i) OR regexp(name, /.*michonne/i)) { + expand(_all_) + } + } + ` + res := processQueryNoErr(t, query) + require.JSONEq(t, `{"data": {"q": [{"name": "Michonne"}]}}`, res) +} + +func TestMultiRegexInFilter2(t *testing.T) { + query := ` + { + q(func: has(firstName)) @filter(regexp(firstName, /.*han/i) OR regexp(lastName, /.*han/i)) { + firstName + lastName + } + } + ` + + // run 20 times ensure that there is no data race + // https://github.com/dgraph-io/dgraph/issues/4030 + for i := 0; i < 20; i++ { + res := processQueryNoErr(t, query) + require.JSONEq(t, `{"data": {"q": [{"firstName": "Han", "lastName":"Solo"}]}}`, res) + } +} diff --git a/worker/task.go b/worker/task.go index 7a245550b17..63e565576a2 100644 --- a/worker/task.go +++ b/worker/task.go @@ -772,8 +772,9 @@ type queryState struct { cache *posting.LocalCache } -func (qs *queryState) helpProcessTask( - ctx context.Context, q *pb.Query, gid uint32) (*pb.Result, error) { +func (qs *queryState) helpProcessTask(ctx context.Context, q *pb.Query, gid uint32) ( + *pb.Result, error) { + span := otrace.FromContext(ctx) out := new(pb.Result) attr := q.Attr @@ -854,8 +855,6 @@ func (qs *queryState) helpProcessTask( } if srcFn.fnType == regexFn { - // Go through the indexkeys for the predicate and match them with - // the regex matcher. span.Annotate(nil, "handleRegexFunction") if err := qs.handleRegexFunction(ctx, funcArgs{q, gid, srcFn, out}); err != nil { return nil, err @@ -961,8 +960,18 @@ func (qs *queryState) handleRegexFunction(ctx context.Context, arg funcArgs) err // Here we determine the list of uids to match. switch { // If this is a filter eval, use the given uid list (good) - case arg.q.UidList != nil && len(arg.q.UidList.Uids) != 0: - uids = arg.q.UidList + case arg.q.UidList != nil: + // These UIDs are copied into arg.out.UidMatrix which is later updated while + // processing the query. The below trick makes a copy of the list to avoid the + // race conditions later. I (Aman) did a race condition tests to ensure that we + // do not have more race condition in similar code in the rest of the file. + // The race condition was found only here because in filter condition, even when + // predicates do not have indexes, we allow regexp queries (for example, we do + // not support eq/gt/lt/le in @filter, see #4077), and this was new code that + // was added just to support the aforementioned case, the race condition is only + // in this part of the code. + uids = &pb.List{} + uids.Uids = append(arg.q.UidList.Uids[:0:0], arg.q.UidList.Uids...) // Prefer to use an index (fast) case useIndex: