Skip to content

Commit f5ea2ca

Browse files
Xinyi Zhangfacebook-github-bot
Xinyi Zhang
authored andcommitted
Reduce logging frequency for empty range tolarence
Summary: Pull Request resolved: pytorch#28704 Reviewed By: xianjiec Differential Revision: D18138828 fbshipit-source-id: 4f3c376502cb6e30b931217702c4ca537c9eb644
1 parent 7ed9a3e commit f5ea2ca

File tree

4 files changed

+84
-58
lines changed

4 files changed

+84
-58
lines changed

c10/util/Logging.h

+7
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,13 @@ C10_DECLARE_bool(caffe2_use_fatal_for_enforce);
4747
#define C10_LOG_FIRST_N(severity, n) LOG(severity)
4848
#endif
4949

50+
// Same for LOG_EVERY_N
51+
#ifdef LOG_EVERY_N
52+
#define C10_LOG_EVERY_N(severity, n) LOG_EVERY_N(severity, n)
53+
#else
54+
#define C10_LOG_EVERY_N(severity, n) LOG(severity)
55+
#endif
56+
5057
namespace c10 {
5158

5259
using std::string;

caffe2/operators/gather_ranges_to_dense_op.cc

+4-1
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,13 @@ are sorted by the corresponding KEY.
7474
"empty ranges or mismatched ranges is alarming.")
7575
.Arg(
7676
"max_empty_ratio",
77-
"An crital log is triggered when ratio of empty ranges exceeds this.")
77+
"An error log is triggered when ratio of empty ranges exceeds this.")
7878
.Arg(
7979
"max_mismatched_ratio",
8080
"An error is raised when ratio of mismatched ranges exceeds this.")
81+
.Arg(
82+
"log_every_n",
83+
"A log is recorded only after an error is triggered every n times.")
8184
.TensorInferenceFunction([](const OperatorDef& def,
8285
const vector<TensorShape>& in) {
8386
ArgumentHelper helper(def);

caffe2/operators/gather_ranges_to_dense_op.h

+15-9
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,12 @@ class GatherRangesToDenseOp final : public Operator<Context> {
3030
this->template GetSingleArgument<float>("max_empty_ratio", 0.9)),
3131
maxMismatchedRatio_(this->template GetSingleArgument<float>(
3232
"max_mismatched_ratio",
33-
0.01)) {
33+
0.01)),
34+
// This number of log_every_n is intentionally set to a prime number
35+
// so that the log will be trigger on all features eventually if
36+
// multiple features are corrupt.
37+
logEveryN_(
38+
this->template GetSingleArgument<int64_t>("log_every_n", 4999)) {
3439
CAFFE_ENFORCE_GT(lengths_.size(), 0, "There has to be at least one length");
3540
for (auto length : lengths_) {
3641
CAFFE_ENFORCE_GT(length, 0, "Each length should be positive");
@@ -45,7 +50,7 @@ class GatherRangesToDenseOp final : public Operator<Context> {
4550
}
4651

4752
~GatherRangesToDenseOp() noexcept override {
48-
if (totalRanges_ > 0) {
53+
if (totalRanges_ > minObservation_) {
4954
LOG(INFO) << "In GatherRangesToDenseOp:\n"
5055
<< " Lifetime empty ranges for each feature is "
5156
<< emptyRanges_ << ".\n"
@@ -173,13 +178,13 @@ class GatherRangesToDenseOp final : public Operator<Context> {
173178
") which exceeds ",
174179
maxMismatchedRatio_);
175180
if (totalRanges_ * maxEmptyRatio_ <= emptyRanges_[j]) {
176-
LOG(ERROR) << "Ratio of empty range for feature at index " << j
177-
<< " is "
178-
<< (static_cast<double>(emptyRanges_[j]) /
179-
static_cast<double>(totalRanges_))
180-
<< " (" << emptyRanges_[j] << "/" << totalRanges_
181-
<< ") which exceeds " << maxEmptyRatio_ << "\n"
182-
<< this->getErrorMsg();
181+
C10_LOG_EVERY_N(ERROR, logEveryN_)
182+
<< "Ratio of empty range for feature at index " << j << " is "
183+
<< (static_cast<double>(emptyRanges_[j]) /
184+
static_cast<double>(totalRanges_))
185+
<< " (" << emptyRanges_[j] << "/" << totalRanges_
186+
<< ") which exceeds " << maxEmptyRatio_ << "\n"
187+
<< this->getErrorMsg();
183188
}
184189
}
185190
}
@@ -200,6 +205,7 @@ class GatherRangesToDenseOp final : public Operator<Context> {
200205
int64_t minObservation_ = 0;
201206
float maxEmptyRatio_ = 0;
202207
float maxMismatchedRatio_ = 0;
208+
int64_t logEveryN_ = 0;
203209
};
204210

205211
} // namespace caffe2

caffe2/python/operator_test/gather_ranges_op_test.py

+58-48
Original file line numberDiff line numberDiff line change
@@ -8,26 +8,30 @@
88

99

1010
def batched_boarders_and_data(
11-
data_min_size=5, data_max_size=10,
12-
examples_min_number=1, examples_max_number=4,
13-
example_min_size=1, example_max_size=3,
14-
dtype=np.float32, elements=None):
11+
data_min_size=5,
12+
data_max_size=10,
13+
examples_min_number=1,
14+
examples_max_number=4,
15+
example_min_size=1,
16+
example_max_size=3,
17+
dtype=np.float32,
18+
elements=None,
19+
):
1520
dims_ = st.tuples(
16-
st.integers(min_value=data_min_size,
17-
max_value=data_max_size),
18-
st.integers(min_value=examples_min_number,
19-
max_value=examples_max_number),
20-
st.integers(min_value=example_min_size,
21-
max_value=example_max_size),
21+
st.integers(min_value=data_min_size, max_value=data_max_size),
22+
st.integers(min_value=examples_min_number, max_value=examples_max_number),
23+
st.integers(min_value=example_min_size, max_value=example_max_size),
2224
)
2325
return dims_.flatmap(
2426
lambda dims: st.tuples(
2527
hu.arrays(
26-
[dims[1], dims[2], 2], dtype=np.int32,
27-
elements=st.integers(min_value=0, max_value=dims[0])
28+
[dims[1], dims[2], 2],
29+
dtype=np.int32,
30+
elements=st.integers(min_value=0, max_value=dims[0]),
2831
),
29-
hu.arrays([dims[0]], dtype, elements)
30-
))
32+
hu.arrays([dims[0]], dtype, elements),
33+
)
34+
)
3135

3236

3337
@st.composite
@@ -45,17 +49,19 @@ def _tensor_splits(draw):
4549
ranges[pair[0]][pair[1]] = (offset, lengths[pair[1]])
4650
offset += lengths[pair[1]]
4751

48-
data = draw(st.lists(
49-
st.floats(min_value=-1.0, max_value=1.0),
50-
min_size=offset,
51-
max_size=offset
52-
))
52+
data = draw(
53+
st.lists(
54+
st.floats(min_value=-1.0, max_value=1.0), min_size=offset, max_size=offset
55+
)
56+
)
5357

5458
key = draw(st.permutations(range(offset)))
5559

5660
return (
57-
np.array(data).astype(np.float32), np.array(ranges),
58-
np.array(lengths), np.array(key).astype(np.int64)
61+
np.array(data).astype(np.float32),
62+
np.array(ranges),
63+
np.array(lengths),
64+
np.array(key).astype(np.int64),
5965
)
6066

6167

@@ -107,7 +113,7 @@ def gather_ranges(data, ranges):
107113
length = 0
108114
for range in example_ranges:
109115
assert len(range) == 2
110-
output.extend(data[range[0]:range[0] + range[1]])
116+
output.extend(data[range[0] : range[0] + range[1]])
111117
length += range[1]
112118
lengths.append(length)
113119
return output, lengths
@@ -128,7 +134,7 @@ def gather_ranges_to_dense(data, ranges, lengths):
128134
out.append([0] * lengths[i])
129135
else:
130136
assert length == lengths[i]
131-
out.append(data[start:start + length])
137+
out.append(data[start : start + length])
132138
outputs.append(np.array(out))
133139
return outputs
134140

@@ -149,8 +155,8 @@ def gather_ranges_to_dense_with_key(data, ranges, key, lengths):
149155
else:
150156
assert length == lengths[i]
151157
key_data_list = zip(
152-
key[start:start + length],
153-
data[start:start + length])
158+
key[start : start + length], data[start : start + length]
159+
)
154160
sorted_key_data_list = sorted(key_data_list, key=lambda x: x[0])
155161
sorted_data = [d for (k, d) in sorted_key_data_list]
156162
out.append(sorted_data)
@@ -159,8 +165,7 @@ def gather_ranges_to_dense_with_key(data, ranges, key, lengths):
159165

160166

161167
class TestGatherRanges(serial.SerializedTestCase):
162-
@serial.given(
163-
boarders_and_data=batched_boarders_and_data(), **hu.gcs_cpu_only)
168+
@serial.given(boarders_and_data=batched_boarders_and_data(), **hu.gcs_cpu_only)
164169
def test_gather_ranges(self, boarders_and_data, gc, dc):
165170
boarders, data = boarders_and_data
166171

@@ -173,9 +178,9 @@ def boarders_to_range(boarders):
173178

174179
self.assertReferenceChecks(
175180
device_option=gc,
176-
op=core.CreateOperator("GatherRanges",
177-
["data", "ranges"],
178-
["output", "lengths"]),
181+
op=core.CreateOperator(
182+
"GatherRanges", ["data", "ranges"], ["output", "lengths"]
183+
),
179184
inputs=[data, ranges],
180185
reference=gather_ranges,
181186
)
@@ -188,12 +193,12 @@ def test_gather_ranges_split(self, tensor_splits, gc, dc):
188193
device_option=gc,
189194
op=core.CreateOperator(
190195
"GatherRangesToDense",
191-
['data', 'ranges'],
192-
['X_{}'.format(i) for i in range(len(lengths))],
193-
lengths=lengths
196+
["data", "ranges"],
197+
["X_{}".format(i) for i in range(len(lengths))],
198+
lengths=lengths,
194199
),
195200
inputs=[data, ranges, lengths],
196-
reference=gather_ranges_to_dense
201+
reference=gather_ranges_to_dense,
197202
)
198203

199204
@given(tensor_splits=_tensor_splits(), **hu.gcs_cpu_only)
@@ -204,24 +209,20 @@ def test_gather_ranges_with_key_split(self, tensor_splits, gc, dc):
204209
device_option=gc,
205210
op=core.CreateOperator(
206211
"GatherRangesToDense",
207-
['data', 'ranges', 'key'],
208-
['X_{}'.format(i) for i in range(len(lengths))],
209-
lengths=lengths
212+
["data", "ranges", "key"],
213+
["X_{}".format(i) for i in range(len(lengths))],
214+
lengths=lengths,
210215
),
211216
inputs=[data, ranges, key, lengths],
212-
reference=gather_ranges_to_dense_with_key
217+
reference=gather_ranges_to_dense_with_key,
213218
)
214219

215220
def test_shape_and_type_inference(self):
216221
with hu.temp_workspace("shape_type_inf_int32"):
217-
net = core.Net('test_net')
218-
net.ConstantFill(
219-
[], "ranges", shape=[3, 5, 2], dtype=core.DataType.INT32,
220-
)
221-
net.ConstantFill(
222-
[], "values", shape=[64], dtype=core.DataType.INT64,
223-
)
224-
net.GatherRanges(['values', 'ranges'], ['values_output', 'lengths_output'])
222+
net = core.Net("test_net")
223+
net.ConstantFill([], "ranges", shape=[3, 5, 2], dtype=core.DataType.INT32)
224+
net.ConstantFill([], "values", shape=[64], dtype=core.DataType.INT64)
225+
net.GatherRanges(["values", "ranges"], ["values_output", "lengths_output"])
225226
(shapes, types) = workspace.InferShapesAndTypes([net], {})
226227

227228
self.assertEqual(shapes["values_output"], [64])
@@ -238,7 +239,10 @@ def test_empty_range_check(self, tensor_splits, gc, dc):
238239
workspace.FeedBlob("key", key)
239240

240241
def getOpWithThreshold(
241-
min_observation=2, max_empty_ratio=0.3, max_mismatched_ratio=0.6
242+
min_observation=2,
243+
max_empty_ratio=0.3,
244+
max_mismatched_ratio=0.6,
245+
log_every_n=1,
242246
):
243247
return core.CreateOperator(
244248
"GatherRangesToDense",
@@ -248,13 +252,19 @@ def getOpWithThreshold(
248252
min_observation=min_observation,
249253
max_empty_ratio=max_empty_ratio,
250254
max_mismatched_ratio=max_mismatched_ratio,
255+
log_every_n=log_every_n,
251256
)
252257

253258
workspace.RunOperatorOnce(getOpWithThreshold())
254259

255-
# A critical log should be triggered by this setting.
260+
# An error log should be triggered by each feature in this setting.
256261
workspace.RunOperatorOnce(getOpWithThreshold(max_empty_ratio=0.2))
257262

263+
# Error logs should be triggered only half the time.
264+
workspace.RunOperatorOnce(
265+
getOpWithThreshold(max_empty_ratio=0.2, log_every_n=2)
266+
)
267+
258268
workspace.RunOperatorOnce(
259269
getOpWithThreshold(
260270
max_empty_ratio=0.2, max_mismatched_ratio=0.4, min_observation=5

0 commit comments

Comments
 (0)