Skip to content

Commit d9d9387

Browse files
alonre24DvirDukhan
authored andcommitted
Merge pull request #575 from RedisAI/Valgrind_cleaning
Valgrind cleaning
1 parent dea14d8 commit d9d9387

File tree

12 files changed

+141
-51
lines changed

12 files changed

+141
-51
lines changed

.circleci/config.yml

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,63 @@ jobs:
201201
- run:
202202
name: Test with valgrind
203203
command: |
204-
make -C opt test VALGRIND=1
204+
make -C opt test VALGRIND=1 CLUSTER=0 AOF=0
205+
no_output_timeout: 120m
206+
207+
valgrind-cluster:
208+
docker:
209+
- image: redisfab/rmbuilder:6.0.5-x64-buster
210+
steps:
211+
- checkout
212+
- run:
213+
name: Submodule checkout
214+
command: git submodule update --init --recursive
215+
- restore_cache:
216+
keys:
217+
- build-dependencies-{{ checksum "get_deps.sh" }}
218+
# If no exact match is found will get dependencies from source
219+
- setup-automation
220+
- run:
221+
name: Install dependencies
222+
command: |
223+
./opt/readies/bin/getredis -v 6 --valgrind --force
224+
./get_deps.sh cpu
225+
- run:
226+
name: Build for valgrind with cluster
227+
command: |
228+
make -C opt all VALGRIND=1 SHOW=1
229+
- run:
230+
name: Test with valgrind and cluster
231+
command: |
232+
make -C opt test VALGRIND=1 GEN=0 AOF=0
233+
no_output_timeout: 120m
234+
235+
valgrind-AOF:
236+
docker:
237+
- image: redisfab/rmbuilder:6.0.5-x64-buster
238+
steps:
239+
- checkout
240+
- run:
241+
name: Submodule checkout
242+
command: git submodule update --init --recursive
243+
- restore_cache:
244+
keys:
245+
- build-dependencies-{{ checksum "get_deps.sh" }}
246+
# If no exact match is found will get dependencies from source
247+
- setup-automation
248+
- run:
249+
name: Install dependencies
250+
command: |
251+
./opt/readies/bin/getredis -v 6 --valgrind --force
252+
./get_deps.sh cpu
253+
- run:
254+
name: Build for valgrind with AOF
255+
command: |
256+
make -C opt all VALGRIND=1 SHOW=1
257+
- run:
258+
name: Test with valgrind and AOF
259+
command: |
260+
make -C opt test VALGRIND=1 GEN=0 CLUSTER=0
205261
no_output_timeout: 120m
206262

207263
build-macos:
@@ -406,7 +462,13 @@ workflows:
406462
<<: *on-any-branch
407463
<<: *after-linter
408464
- valgrind:
409-
<<: *on-master
465+
<<: *on-any-branch
466+
<<: *after-linter
467+
- valgrind-cluster:
468+
<<: *on-integ-branch
469+
<<: *after-linter
470+
- valgrind-AOF:
471+
<<: *on-integ-branch
410472
<<: *after-linter
411473
- build-and-test-gpu:
412474
<<: *on-any-branch

opt/redis_valgrind.sup

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,13 @@
2626
obj:*/libtorch.so.*
2727
}
2828

29+
{
30+
ignore_unversioned_libs
31+
Memcheck:Overlap
32+
...
33+
obj:*/libtorch_cpu.so*
34+
}
35+
2936
{
3037
ignore_unversioned_libs
3138
Memcheck:Leak

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ file (GLOB BACKEND_COMMON_SRC
1616
err.c
1717
util/dict.c
1818
tensor.c
19+
util/string_utils.c
1920
serialization/ai_datatypes.c)
2021

2122
ADD_LIBRARY(redisai_obj OBJECT

src/DAG/dag.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,7 @@ static int _StoreTensorInKeySpace(RedisModuleCtx *ctx, RAI_Tensor *tensor,
122122
RedisModule_ReplyWithError(ctx, "ERR could not save tensor");
123123
goto clean_up;
124124
}
125-
if (RedisModule_ModuleTypeSetValue(key, RedisAI_TensorType, RAI_TensorGetShallowCopy(tensor)) !=
126-
REDISMODULE_OK) {
125+
if (RedisModule_ModuleTypeSetValue(key, RedisAI_TensorType, tensor) != REDISMODULE_OK) {
127126
RedisModule_ReplyWithError(ctx, "ERR could not save tensor");
128127
RedisModule_CloseKey(key);
129128
goto clean_up;
@@ -152,6 +151,7 @@ static void _DAG_PersistTensors(RedisModuleCtx *ctx, RedisAI_RunInfo *rinfo) {
152151
persist_entry = AI_dictNext(persist_iter);
153152
continue;
154153
}
154+
tensor = RAI_TensorGetShallowCopy(tensor);
155155
if (_StoreTensorInKeySpace(ctx, tensor, persist_key_name, true) == REDISMODULE_ERR) {
156156
*rinfo->dagError = 1;
157157
RedisModule_Log(ctx, "warning",

src/backends/tensorflow.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ RAI_Model *RAI_ModelCreateTF(RAI_Backend backend, const char *devicestr, RAI_Mod
247247

248248
for (size_t i = 0; i < ninputs; ++i) {
249249
TF_Operation *oper = TF_GraphOperationByName(model, inputs[i]);
250-
if (oper == NULL) {
250+
if (oper == NULL || strcmp(TF_OperationOpType(oper), "Placeholder") != 0) {
251251
size_t len = strlen(inputs[i]);
252252
char *msg = RedisModule_Calloc(60 + len, sizeof(*msg));
253253
sprintf(msg, "ERR Input node named \"%s\" not found in TF graph.", inputs[i]);

src/libtorch_c/torch_extensions/torch_redis.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ torch::IValue redisExecute(std::string fn_name, std::vector<std::string> args )
5656
RedisModuleCallReply *reply = RedisModule_Call(ctx, fn_name.c_str(), "!v", arguments, len);
5757
RedisModule_ThreadSafeContextUnlock(ctx);
5858
torch::IValue value = IValueFromRedisReply(reply);
59-
RedisModule_FreeThreadSafeContext(ctx);
6059
RedisModule_FreeCallReply(reply);
60+
RedisModule_FreeThreadSafeContext(ctx);
6161
for(int i= 0; i < len; i++){
6262
RedisModule_FreeString(NULL, arguments[i]);
6363
}

src/tensor.c

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "rmutil/alloc.h"
1616
#include "tensor_struct.h"
1717
#include "util/dict.h"
18+
#include "util/string_utils.h"
1819
#include <assert.h>
1920
#include <pthread.h>
2021
#include <stddef.h>
@@ -96,7 +97,8 @@ int Tensor_DataTypeStr(DLDataType dtype, char *dtypestr) {
9697

9798
RAI_Tensor *RAI_TensorCreateWithDLDataType(DLDataType dtype, long long *dims, int ndims,
9899
int tensorAllocMode) {
99-
const size_t dtypeSize = Tensor_DataTypeSize(dtype);
100+
101+
size_t dtypeSize = Tensor_DataTypeSize(dtype);
100102
if (dtypeSize == 0) {
101103
return NULL;
102104
}
@@ -132,11 +134,6 @@ RAI_Tensor *RAI_TensorCreateWithDLDataType(DLDataType dtype, long long *dims, in
132134
break;
133135
}
134136

135-
if (tensorAllocMode != TENSORALLOC_NONE && data == NULL) {
136-
RedisModule_Free(ret);
137-
return NULL;
138-
}
139-
140137
ret->tensor = (DLManagedTensor){.dl_tensor = (DLTensor){.ctx = ctx,
141138
.data = data,
142139
.ndim = ndims,
@@ -162,18 +159,14 @@ void RAI_RStringDataTensorDeleter(DLManagedTensor *arg) {
162159
RedisModuleString *rstr = (RedisModuleString *)arg->manager_ctx;
163160
RedisModule_FreeString(NULL, rstr);
164161
}
165-
162+
RedisModule_Free(arg->dl_tensor.data);
166163
RedisModule_Free(arg);
167164
}
168165

169-
RAI_Tensor *RAI_TensorCreateWithDLDataTypeAndRString(DLDataType dtype, long long *dims, int ndims,
170-
RedisModuleString *rstr) {
171-
const size_t dtypeSize = Tensor_DataTypeSize(dtype);
172-
if (dtypeSize == 0) {
173-
return NULL;
174-
}
166+
RAI_Tensor *_TensorCreateWithDLDataTypeAndRString(DLDataType dtype, size_t dtypeSize,
167+
long long *dims, int ndims,
168+
RedisModuleString *rstr, RAI_Error *err) {
175169

176-
RAI_Tensor *ret = RedisModule_Alloc(sizeof(*ret));
177170
int64_t *shape = RedisModule_Alloc(ndims * sizeof(*shape));
178171
int64_t *strides = RedisModule_Alloc(ndims * sizeof(*strides));
179172

@@ -188,9 +181,21 @@ RAI_Tensor *RAI_TensorCreateWithDLDataTypeAndRString(DLDataType dtype, long long
188181
}
189182

190183
DLContext ctx = (DLContext){.device_type = kDLCPU, .device_id = 0};
184+
size_t nbytes = len * dtypeSize;
185+
186+
size_t blob_len;
187+
const char *blob = RedisModule_StringPtrLen(rstr, &blob_len);
188+
if (blob_len != nbytes) {
189+
RedisModule_Free(shape);
190+
RedisModule_Free(strides);
191+
RAI_SetError(err, RAI_ETENSORSET, "ERR data length does not match tensor shape and type");
192+
return NULL;
193+
}
194+
char *data = RedisModule_Alloc(nbytes);
195+
memcpy(data, blob, nbytes);
196+
RAI_HoldString(NULL, rstr);
191197

192-
char *data = (char *)RedisModule_StringPtrLen(rstr, NULL);
193-
198+
RAI_Tensor *ret = RedisModule_Alloc(sizeof(*ret));
194199
ret->tensor = (DLManagedTensor){.dl_tensor = (DLTensor){.ctx = ctx,
195200
.data = data,
196201
.ndim = ndims,
@@ -637,13 +642,16 @@ int RAI_parseTensorSetArgs(RedisModuleString **argv, int argc, RAI_Tensor **t, i
637642
RAI_SetError(error, RAI_ETENSORSET, "wrong number of arguments for 'AI.TENSORSET' command");
638643
return -1;
639644
}
645+
640646
// get the tensor datatype
641647
const char *typestr = RedisModule_StringPtrLen(argv[2], NULL);
642-
size_t datasize = RAI_TensorDataSizeFromString(typestr);
643-
if (!datasize) {
648+
DLDataType datatype = RAI_TensorDataTypeFromString(typestr);
649+
size_t datasize = Tensor_DataTypeSize(datatype);
650+
if (datasize == 0) {
644651
RAI_SetError(error, RAI_ETENSORSET, "ERR invalid data type");
645652
return -1;
646653
}
654+
647655
const char *fmtstr;
648656
int datafmt = TENSOR_NONE;
649657
int tensorAllocMode = TENSORALLOC_CALLOC;
@@ -699,23 +707,17 @@ int RAI_parseTensorSetArgs(RedisModuleString **argv, int argc, RAI_Tensor **t, i
699707
}
700708
}
701709

702-
const long long nbytes = len * datasize;
703-
size_t datalen;
704-
const char *data;
705-
DLDataType datatype = RAI_TensorDataTypeFromString(typestr);
706710
if (datafmt == TENSOR_BLOB) {
707711
RedisModuleString *rstr = argv[argpos];
708-
RedisModule_RetainString(NULL, rstr);
709-
*t = RAI_TensorCreateWithDLDataTypeAndRString(datatype, dims, ndims, rstr);
712+
*t = _TensorCreateWithDLDataTypeAndRString(datatype, datasize, dims, ndims, rstr, error);
710713
} else {
711714
*t = RAI_TensorCreateWithDLDataType(datatype, dims, ndims, tensorAllocMode);
712715
}
713-
714-
if (!t) {
716+
if (!(*t)) {
715717
array_free(dims);
716-
RAI_SetError(error, RAI_ETENSORSET, "ERR could not create tensor");
717718
return -1;
718719
}
720+
719721
long i = 0;
720722
if (datafmt == TENSOR_VALUES) {
721723
for (; (argpos <= argc - 1) && (i < len); argpos++) {

tests/flow/tests.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ valgrind_config() {
7272

7373
RLTEST_ARGS+="\
7474
--use-valgrind \
75-
--vg-no-fail-on-errors \
7675
--vg-suppressions $VALGRIND_SUPRESSIONS"
7776
}
7877

tests/flow/tests_common.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def test_common_tensorset_error_replies(env):
5050
try:
5151
con.execute_command('SET','non-tensor','value')
5252
con.execute_command('AI.TENSORSET', 'non-tensor', 'INT32', 2, 'unsupported', 2, 3)
53+
env.assertFalse(True)
5354
except Exception as e:
5455
exception = e
5556
env.assertEqual(type(exception), redis.exceptions.ResponseError)
@@ -58,6 +59,7 @@ def test_common_tensorset_error_replies(env):
5859
# ERR invalid data type
5960
try:
6061
con.execute_command('AI.TENSORSET', 'z', 'INT128', 2, 'VALUES', 2, 3)
62+
env.assertFalse(True)
6163
except Exception as e:
6264
exception = e
6365
env.assertEqual(type(exception), redis.exceptions.ResponseError)
@@ -66,6 +68,7 @@ def test_common_tensorset_error_replies(env):
6668
# ERR invalid or negative value found in tensor shape
6769
try:
6870
con.execute_command('AI.TENSORSET', 'z', 'INT32', -1, 'VALUES', 2, 3)
71+
env.assertFalse(True)
6972
except Exception as e:
7073
exception = e
7174
env.assertEqual(type(exception), redis.exceptions.ResponseError)
@@ -74,6 +77,7 @@ def test_common_tensorset_error_replies(env):
7477
# ERR invalid argument found in tensor shape
7578
try:
7679
con.execute_command('AI.TENSORSET', 'z', 'INT32', 2, 'unsupported', 2, 3)
80+
env.assertFalse(True)
7781
except Exception as e:
7882
exception = e
7983
env.assertEqual(type(exception), redis.exceptions.ResponseError)
@@ -82,6 +86,7 @@ def test_common_tensorset_error_replies(env):
8286
# ERR invalid value
8387
try:
8488
con.execute_command('AI.TENSORSET', 'z', 'FLOAT', 2, 'VALUES', 2, 'A')
89+
env.assertFalse(True)
8590
except Exception as e:
8691
exception = e
8792
env.assertEqual(type(exception), redis.exceptions.ResponseError)
@@ -90,56 +95,58 @@ def test_common_tensorset_error_replies(env):
9095
# ERR invalid value
9196
try:
9297
con.execute_command('AI.TENSORSET', 'z', 'INT32', 2, 'VALUES', 2, 'A')
98+
env.assertFalse(True)
9399
except Exception as e:
94100
exception = e
95101
env.assertEqual(type(exception), redis.exceptions.ResponseError)
96102
env.assertEqual(exception.__str__(), "invalid value")
97103

98104
try:
99105
con.execute_command('AI.TENSORSET', 1)
106+
env.assertFalse(True)
100107
except Exception as e:
101108
exception = e
102109
env.assertEqual(type(exception), redis.exceptions.ResponseError)
103110

104111
try:
105112
con.execute_command('AI.TENSORSET', 'y', 'FLOAT')
106-
except Exception as e:
107-
exception = e
108-
env.assertEqual(type(exception), redis.exceptions.ResponseError)
109-
110-
try:
111-
con.execute_command('AI.TENSORSET', 'y', 'FLOAT', '2')
113+
env.assertFalse(True)
112114
except Exception as e:
113115
exception = e
114116
env.assertEqual(type(exception), redis.exceptions.ResponseError)
115117

116118
try:
117119
con.execute_command('AI.TENSORSET', 'y', 'FLOAT', 2, 'VALUES')
120+
env.assertFalse(True)
118121
except Exception as e:
119122
exception = e
120123
env.assertEqual(type(exception), redis.exceptions.ResponseError)
121124

122125
try:
123126
con.execute_command('AI.TENSORSET', 'y', 'FLOAT', 2, 'VALUES', 1)
127+
env.assertFalse(True)
124128
except Exception as e:
125129
exception = e
126130
env.assertEqual(type(exception), redis.exceptions.ResponseError)
127131

128132
try:
129133
con.execute_command('AI.TENSORSET', 'y', 'FLOAT', 2, 'VALUES', '1')
134+
env.assertFalse(True)
130135
except Exception as e:
131136
exception = e
132137
env.assertEqual(type(exception), redis.exceptions.ResponseError)
133138

134139
try:
135140
con.execute_command('AI.TENSORSET', 'blob_tensor_moreargs', 'FLOAT', 2, 'BLOB', '\x00', 'extra-argument')
141+
env.assertFalse(True)
136142
except Exception as e:
137143
exception = e
138144
env.assertEqual(type(exception), redis.exceptions.ResponseError)
139145
env.assertEqual("wrong number of arguments for 'AI.TENSORSET' command", exception.__str__())
140146

141147
try:
142148
con.execute_command('AI.TENSORSET', 'blob_tensor_lessargs', 'FLOAT', 2, 'BLOB')
149+
env.assertFalse(True)
143150
except Exception as e:
144151
exception = e
145152
env.assertEqual(type(exception), redis.exceptions.ResponseError)
@@ -148,6 +155,7 @@ def test_common_tensorset_error_replies(env):
148155
# ERR data length does not match tensor shape and type
149156
try:
150157
con.execute_command('AI.TENSORSET', 'sample_raw_wrong_blob_for_dim', 'FLOAT', 1, 1, 28, 280, 'BLOB', sample_raw)
158+
env.assertFalse(True)
151159
except Exception as e:
152160
exception = e
153161
env.assertEqual(type(exception), redis.exceptions.ResponseError)

tests/flow/tests_dag.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ def test_dagrun_ro_modelrun_scriptrun_resnet(env):
230230
if(VALGRIND):
231231
env.debugPrint("skipping {} since it's hanging CI".format(sys._getframe().f_code.co_name), force=True)
232232
env.skip()
233+
233234
con = env.getConnection()
234235
model_name = 'imagenet_model{{1}}'
235236
script_name = 'imagenet_script{{1}}'

0 commit comments

Comments
 (0)