Skip to content

Commit cec1311

Browse files
authored
histogram: remove backwards buckets in v1 histogram migration (#5404)
* remove empty buckets on both ends to avoid having `backwards` buckets * fix typo * use data min/max as bucket left/right hand limit * grammar fix * improve readability - improve docs - test extremal histogram values - fix tag names in test * small change
1 parent 97b6f6f commit cec1311

File tree

2 files changed

+121
-5
lines changed

2 files changed

+121
-5
lines changed

tensorboard/data_compat.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,39 @@ def make_summary(tag, metadata, data):
8181

8282

8383
def _migrate_histogram_value(value):
84+
"""Convert `old-style` histogram value to `new-style`.
85+
86+
The "old-style" format can have outermost bucket limits of -DBL_MAX and
87+
DBL_MAX, which are problematic for visualization. We replace those here
88+
with the actual min and max values seen in the input data, but then in
89+
order to avoid introducing "backwards" buckets (where left edge > right
90+
edge), we first must drop all empty buckets on the left and right ends.
91+
"""
8492
histogram_value = value.histo
85-
bucket_lefts = [histogram_value.min] + histogram_value.bucket_limit[:-1]
86-
bucket_rights = histogram_value.bucket_limit[:-1] + [histogram_value.max]
8793
bucket_counts = histogram_value.bucket
88-
buckets = np.array(
89-
[bucket_lefts, bucket_rights, bucket_counts], dtype=np.float32
90-
).transpose()
94+
# Find the indices of the leftmost and rightmost non-empty buckets.
95+
n = len(bucket_counts)
96+
start = next((i for i in range(n) if bucket_counts[i] > 0), n)
97+
end = next((i for i in reversed(range(n)) if bucket_counts[i] > 0), -1)
98+
if start > end:
99+
# If all input buckets were empty, treat it as a zero-bucket
100+
# new-style histogram.
101+
buckets = np.zeros([0, 3], dtype=np.float32)
102+
else:
103+
# Discard empty buckets on both ends, and keep only the "inner"
104+
# edges from the remaining buckets. Note that bucket indices range
105+
# from `start` to `end` inclusive, but bucket_limit indices are
106+
# exclusive of `end` - this is because bucket_limit[i] is the
107+
# right-hand edge for bucket[i].
108+
bucket_counts = bucket_counts[start : end + 1]
109+
inner_edges = histogram_value.bucket_limit[start:end]
110+
# Use min as the left-hand limit for the first non-empty bucket.
111+
bucket_lefts = [histogram_value.min] + inner_edges
112+
# Use max as the right-hand limit for the last non-empty bucket.
113+
bucket_rights = inner_edges + [histogram_value.max]
114+
buckets = np.array(
115+
[bucket_lefts, bucket_rights, bucket_counts], dtype=np.float32
116+
).transpose()
91117

92118
summary_metadata = histogram_metadata.create_summary_metadata(
93119
display_name=value.metadata.display_name or value.tag,

tensorboard/data_compat_test.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,10 +201,100 @@ def test_histogram(self):
201201
self.assertEqual(expected_metadata, new_value.metadata)
202202
self.assertTrue(new_value.HasField("tensor"))
203203
buckets = tensor_util.make_ndarray(new_value.tensor)
204+
for bucket in buckets:
205+
# No `backwards` buckets.
206+
self.assertLessEqual(bucket[0], bucket[1])
204207
self.assertEqual(old_value.histo.min, buckets[0][0])
205208
self.assertEqual(old_value.histo.max, buckets[-1][1])
206209
self.assertEqual(23 * 45, buckets[:, 2].astype(int).sum())
207210

211+
def test_empty_histogram(self):
212+
with tf.compat.v1.Graph().as_default():
213+
old_op = tf.compat.v1.summary.histogram(
214+
"empty_yet_important", tf.constant([])
215+
)
216+
old_value = self._value_from_op(old_op)
217+
assert old_value.HasField("histo"), old_value
218+
new_value = data_compat.migrate_value(old_value)
219+
220+
self.assertEqual("empty_yet_important", new_value.tag)
221+
expected_metadata = histogram_metadata.create_summary_metadata(
222+
display_name="empty_yet_important", description=""
223+
)
224+
self.assertEqual(expected_metadata, new_value.metadata)
225+
self.assertTrue(new_value.HasField("tensor"))
226+
buckets = tensor_util.make_ndarray(new_value.tensor)
227+
self.assertEmpty(buckets)
228+
229+
def test_single_value_histogram(self):
230+
with tf.compat.v1.Graph().as_default():
231+
old_op = tf.compat.v1.summary.histogram(
232+
"single_value_data", tf.constant([1] * 1024)
233+
)
234+
old_value = self._value_from_op(old_op)
235+
assert old_value.HasField("histo"), old_value
236+
new_value = data_compat.migrate_value(old_value)
237+
238+
self.assertEqual("single_value_data", new_value.tag)
239+
expected_metadata = histogram_metadata.create_summary_metadata(
240+
display_name="single_value_data", description=""
241+
)
242+
self.assertEqual(expected_metadata, new_value.metadata)
243+
self.assertTrue(new_value.HasField("tensor"))
244+
buckets = tensor_util.make_ndarray(new_value.tensor)
245+
# Only one bucket is kept.
246+
self.assertEqual((1, 3), buckets.shape)
247+
self.assertEqual(1, buckets[0][0])
248+
self.assertEqual(1, buckets[-1][1])
249+
self.assertEqual(1024, buckets[0][2])
250+
251+
def test_histogram_with_empty_buckets_on_both_ends(self):
252+
with tf.compat.v1.Graph().as_default():
253+
old_op = tf.compat.v1.summary.histogram(
254+
"data_with_empty_buckets_on_both_ends",
255+
tf.constant([1, 1, 1, 2, 2, 3, 3, 3, 3]),
256+
)
257+
old_value = self._value_from_op(old_op)
258+
assert old_value.HasField("histo"), old_value
259+
new_value = data_compat.migrate_value(old_value)
260+
261+
self.assertEqual("data_with_empty_buckets_on_both_ends", new_value.tag)
262+
expected_metadata = histogram_metadata.create_summary_metadata(
263+
display_name="data_with_empty_buckets_on_both_ends", description=""
264+
)
265+
self.assertEqual(expected_metadata, new_value.metadata)
266+
self.assertTrue(new_value.HasField("tensor"))
267+
buckets = tensor_util.make_ndarray(new_value.tensor)
268+
for bucket in buckets:
269+
# No `backwards` buckets.
270+
self.assertLessEqual(bucket[0], bucket[1])
271+
self.assertEqual(1, buckets[0][0])
272+
self.assertEqual(3, buckets[-1][1])
273+
self.assertEqual(9, buckets[:, 2].astype(int).sum())
274+
275+
def test_histogram_with_extremal_values(self):
276+
with tf.compat.v1.Graph().as_default():
277+
old_op = tf.compat.v1.summary.histogram(
278+
"extremal_values", tf.constant([-1e20, 1e20])
279+
)
280+
old_value = self._value_from_op(old_op)
281+
assert old_value.HasField("histo"), old_value
282+
new_value = data_compat.migrate_value(old_value)
283+
284+
self.assertEqual("extremal_values", new_value.tag)
285+
expected_metadata = histogram_metadata.create_summary_metadata(
286+
display_name="extremal_values", description=""
287+
)
288+
self.assertEqual(expected_metadata, new_value.metadata)
289+
self.assertTrue(new_value.HasField("tensor"))
290+
buckets = tensor_util.make_ndarray(new_value.tensor)
291+
for bucket in buckets:
292+
# No `backwards` buckets.
293+
self.assertLessEqual(bucket[0], bucket[1])
294+
self.assertEqual(old_value.histo.min, buckets[0][0])
295+
self.assertEqual(old_value.histo.max, buckets[-1][1])
296+
self.assertEqual(2, buckets[:, 2].astype(int).sum())
297+
208298
def test_new_style_histogram(self):
209299
with tf.compat.v1.Graph().as_default():
210300
op = histogram_summary.op(

0 commit comments

Comments
 (0)