8
8
9
9
10
10
def batched_boarders_and_data (
11
- data_min_size = 5 , data_max_size = 10 ,
12
- examples_min_number = 1 , examples_max_number = 4 ,
13
- example_min_size = 1 , example_max_size = 3 ,
14
- dtype = np .float32 , elements = None ):
11
+ data_min_size = 5 ,
12
+ data_max_size = 10 ,
13
+ examples_min_number = 1 ,
14
+ examples_max_number = 4 ,
15
+ example_min_size = 1 ,
16
+ example_max_size = 3 ,
17
+ dtype = np .float32 ,
18
+ elements = None ,
19
+ ):
15
20
dims_ = st .tuples (
16
- st .integers (min_value = data_min_size ,
17
- max_value = data_max_size ),
18
- st .integers (min_value = examples_min_number ,
19
- max_value = examples_max_number ),
20
- st .integers (min_value = example_min_size ,
21
- max_value = example_max_size ),
21
+ st .integers (min_value = data_min_size , max_value = data_max_size ),
22
+ st .integers (min_value = examples_min_number , max_value = examples_max_number ),
23
+ st .integers (min_value = example_min_size , max_value = example_max_size ),
22
24
)
23
25
return dims_ .flatmap (
24
26
lambda dims : st .tuples (
25
27
hu .arrays (
26
- [dims [1 ], dims [2 ], 2 ], dtype = np .int32 ,
27
- elements = st .integers (min_value = 0 , max_value = dims [0 ])
28
+ [dims [1 ], dims [2 ], 2 ],
29
+ dtype = np .int32 ,
30
+ elements = st .integers (min_value = 0 , max_value = dims [0 ]),
28
31
),
29
- hu .arrays ([dims [0 ]], dtype , elements )
30
- ))
32
+ hu .arrays ([dims [0 ]], dtype , elements ),
33
+ )
34
+ )
31
35
32
36
33
37
@st .composite
@@ -45,17 +49,19 @@ def _tensor_splits(draw):
45
49
ranges [pair [0 ]][pair [1 ]] = (offset , lengths [pair [1 ]])
46
50
offset += lengths [pair [1 ]]
47
51
48
- data = draw (st . lists (
49
- st .floats ( min_value = - 1.0 , max_value = 1.0 ),
50
- min_size = offset ,
51
- max_size = offset
52
- ))
52
+ data = draw (
53
+ st .lists (
54
+ st . floats ( min_value = - 1.0 , max_value = 1.0 ), min_size = offset , max_size = offset
55
+ )
56
+ )
53
57
54
58
key = draw (st .permutations (range (offset )))
55
59
56
60
return (
57
- np .array (data ).astype (np .float32 ), np .array (ranges ),
58
- np .array (lengths ), np .array (key ).astype (np .int64 )
61
+ np .array (data ).astype (np .float32 ),
62
+ np .array (ranges ),
63
+ np .array (lengths ),
64
+ np .array (key ).astype (np .int64 ),
59
65
)
60
66
61
67
@@ -107,7 +113,7 @@ def gather_ranges(data, ranges):
107
113
length = 0
108
114
for range in example_ranges :
109
115
assert len (range ) == 2
110
- output .extend (data [range [0 ]: range [0 ] + range [1 ]])
116
+ output .extend (data [range [0 ] : range [0 ] + range [1 ]])
111
117
length += range [1 ]
112
118
lengths .append (length )
113
119
return output , lengths
@@ -128,7 +134,7 @@ def gather_ranges_to_dense(data, ranges, lengths):
128
134
out .append ([0 ] * lengths [i ])
129
135
else :
130
136
assert length == lengths [i ]
131
- out .append (data [start : start + length ])
137
+ out .append (data [start : start + length ])
132
138
outputs .append (np .array (out ))
133
139
return outputs
134
140
@@ -149,8 +155,8 @@ def gather_ranges_to_dense_with_key(data, ranges, key, lengths):
149
155
else :
150
156
assert length == lengths [i ]
151
157
key_data_list = zip (
152
- key [start : start + length ],
153
- data [ start : start + length ] )
158
+ key [start : start + length ], data [ start : start + length ]
159
+ )
154
160
sorted_key_data_list = sorted (key_data_list , key = lambda x : x [0 ])
155
161
sorted_data = [d for (k , d ) in sorted_key_data_list ]
156
162
out .append (sorted_data )
@@ -159,8 +165,7 @@ def gather_ranges_to_dense_with_key(data, ranges, key, lengths):
159
165
160
166
161
167
class TestGatherRanges (serial .SerializedTestCase ):
162
- @serial .given (
163
- boarders_and_data = batched_boarders_and_data (), ** hu .gcs_cpu_only )
168
+ @serial .given (boarders_and_data = batched_boarders_and_data (), ** hu .gcs_cpu_only )
164
169
def test_gather_ranges (self , boarders_and_data , gc , dc ):
165
170
boarders , data = boarders_and_data
166
171
@@ -173,9 +178,9 @@ def boarders_to_range(boarders):
173
178
174
179
self .assertReferenceChecks (
175
180
device_option = gc ,
176
- op = core .CreateOperator ("GatherRanges" ,
177
- ["data" , "ranges" ],
178
- [ "output" , "lengths" ] ),
181
+ op = core .CreateOperator (
182
+ "GatherRanges" , ["data" , "ranges" ], [ "output" , "lengths" ]
183
+ ),
179
184
inputs = [data , ranges ],
180
185
reference = gather_ranges ,
181
186
)
@@ -188,12 +193,12 @@ def test_gather_ranges_split(self, tensor_splits, gc, dc):
188
193
device_option = gc ,
189
194
op = core .CreateOperator (
190
195
"GatherRangesToDense" ,
191
- [' data' , ' ranges' ],
192
- [' X_{}' .format (i ) for i in range (len (lengths ))],
193
- lengths = lengths
196
+ [" data" , " ranges" ],
197
+ [" X_{}" .format (i ) for i in range (len (lengths ))],
198
+ lengths = lengths ,
194
199
),
195
200
inputs = [data , ranges , lengths ],
196
- reference = gather_ranges_to_dense
201
+ reference = gather_ranges_to_dense ,
197
202
)
198
203
199
204
@given (tensor_splits = _tensor_splits (), ** hu .gcs_cpu_only )
@@ -204,24 +209,20 @@ def test_gather_ranges_with_key_split(self, tensor_splits, gc, dc):
204
209
device_option = gc ,
205
210
op = core .CreateOperator (
206
211
"GatherRangesToDense" ,
207
- [' data' , ' ranges' , ' key' ],
208
- [' X_{}' .format (i ) for i in range (len (lengths ))],
209
- lengths = lengths
212
+ [" data" , " ranges" , " key" ],
213
+ [" X_{}" .format (i ) for i in range (len (lengths ))],
214
+ lengths = lengths ,
210
215
),
211
216
inputs = [data , ranges , key , lengths ],
212
- reference = gather_ranges_to_dense_with_key
217
+ reference = gather_ranges_to_dense_with_key ,
213
218
)
214
219
215
220
def test_shape_and_type_inference (self ):
216
221
with hu .temp_workspace ("shape_type_inf_int32" ):
217
- net = core .Net ('test_net' )
218
- net .ConstantFill (
219
- [], "ranges" , shape = [3 , 5 , 2 ], dtype = core .DataType .INT32 ,
220
- )
221
- net .ConstantFill (
222
- [], "values" , shape = [64 ], dtype = core .DataType .INT64 ,
223
- )
224
- net .GatherRanges (['values' , 'ranges' ], ['values_output' , 'lengths_output' ])
222
+ net = core .Net ("test_net" )
223
+ net .ConstantFill ([], "ranges" , shape = [3 , 5 , 2 ], dtype = core .DataType .INT32 )
224
+ net .ConstantFill ([], "values" , shape = [64 ], dtype = core .DataType .INT64 )
225
+ net .GatherRanges (["values" , "ranges" ], ["values_output" , "lengths_output" ])
225
226
(shapes , types ) = workspace .InferShapesAndTypes ([net ], {})
226
227
227
228
self .assertEqual (shapes ["values_output" ], [64 ])
@@ -238,7 +239,10 @@ def test_empty_range_check(self, tensor_splits, gc, dc):
238
239
workspace .FeedBlob ("key" , key )
239
240
240
241
def getOpWithThreshold (
241
- min_observation = 2 , max_empty_ratio = 0.3 , max_mismatched_ratio = 0.6
242
+ min_observation = 2 ,
243
+ max_empty_ratio = 0.3 ,
244
+ max_mismatched_ratio = 0.6 ,
245
+ log_every_n = 1 ,
242
246
):
243
247
return core .CreateOperator (
244
248
"GatherRangesToDense" ,
@@ -248,13 +252,19 @@ def getOpWithThreshold(
248
252
min_observation = min_observation ,
249
253
max_empty_ratio = max_empty_ratio ,
250
254
max_mismatched_ratio = max_mismatched_ratio ,
255
+ log_every_n = log_every_n ,
251
256
)
252
257
253
258
workspace .RunOperatorOnce (getOpWithThreshold ())
254
259
255
- # A critical log should be triggered by this setting.
260
+ # An error log should be triggered by each feature in this setting.
256
261
workspace .RunOperatorOnce (getOpWithThreshold (max_empty_ratio = 0.2 ))
257
262
263
+ # Error logs should be triggered only half the time.
264
+ workspace .RunOperatorOnce (
265
+ getOpWithThreshold (max_empty_ratio = 0.2 , log_every_n = 2 )
266
+ )
267
+
258
268
workspace .RunOperatorOnce (
259
269
getOpWithThreshold (
260
270
max_empty_ratio = 0.2 , max_mismatched_ratio = 0.4 , min_observation = 5
0 commit comments