From 6dc689b6fcd04f7a497fd885b1caa08ec9595c1f Mon Sep 17 00:00:00 2001 From: Yating Jing Date: Fri, 5 Nov 2021 19:38:48 +0000 Subject: [PATCH 1/6] remove empty buckets on both ends to avoid having `backwards` buckets --- tensorboard/data_compat.py | 18 ++++++++- tensorboard/data_compat_test.py | 68 ++++++++++++++++++++++++++++++++- 2 files changed, 83 insertions(+), 3 deletions(-) diff --git a/tensorboard/data_compat.py b/tensorboard/data_compat.py index d36aebbbdd..c59bcb1929 100644 --- a/tensorboard/data_compat.py +++ b/tensorboard/data_compat.py @@ -81,12 +81,28 @@ def make_summary(tag, metadata, data): def _migrate_histogram_value(value): + """Convert `old-stype` histogram to `new-style`. + + Since by default min value is DBL_MAX and max value is -DBL_MAX, empty + buckets on the left and right ends are removed to prevent the case of + `backwards` (left edge > right edge) buckets. + """ histogram_value = value.histo bucket_lefts = [histogram_value.min] + histogram_value.bucket_limit[:-1] bucket_rights = histogram_value.bucket_limit[:-1] + [histogram_value.max] bucket_counts = histogram_value.bucket + # Find the indices of the first leftmost and rightmost buckets with + # nonzero counts. + n = len(bucket_counts) + start = next((i for i in range(n) if bucket_counts[i] > 0), n) + end = next((i for i in range(n - 1, -1, -1) if bucket_counts[i] > 0), -1) buckets = np.array( - [bucket_lefts, bucket_rights, bucket_counts], dtype=np.float32 + [ + bucket_lefts[start : end + 1], + bucket_rights[start : end + 1], + bucket_counts[start : end + 1], + ], + dtype=np.float32, ).transpose() summary_metadata = histogram_metadata.create_summary_metadata( diff --git a/tensorboard/data_compat_test.py b/tensorboard/data_compat_test.py index f0d8e7dcdb..572fe92977 100644 --- a/tensorboard/data_compat_test.py +++ b/tensorboard/data_compat_test.py @@ -201,10 +201,74 @@ def test_histogram(self): self.assertEqual(expected_metadata, new_value.metadata) self.assertTrue(new_value.HasField("tensor")) buckets = tensor_util.make_ndarray(new_value.tensor) - self.assertEqual(old_value.histo.min, buckets[0][0]) - self.assertEqual(old_value.histo.max, buckets[-1][1]) + for bucket in buckets: + # No `backwards` buckets. + self.assertLess(bucket[0], bucket[1]) self.assertEqual(23 * 45, buckets[:, 2].astype(int).sum()) + def test_empty_histogram(self): + with tf.compat.v1.Graph().as_default(): + old_op = tf.compat.v1.summary.histogram( + "empty_yet_important", tf.constant([]) + ) + old_value = self._value_from_op(old_op) + assert old_value.HasField("histo"), old_value + new_value = data_compat.migrate_value(old_value) + + self.assertEqual("empty_yet_important", new_value.tag) + expected_metadata = histogram_metadata.create_summary_metadata( + display_name="empty_yet_important", description="" + ) + self.assertEqual(expected_metadata, new_value.metadata) + self.assertTrue(new_value.HasField("tensor")) + buckets = tensor_util.make_ndarray(new_value.tensor) + self.assertEmpty(buckets) + + def test_single_value_histogram(self): + with tf.compat.v1.Graph().as_default(): + old_op = tf.compat.v1.summary.histogram( + "single_value_data", tf.constant([1] * 1024) + ) + old_value = self._value_from_op(old_op) + assert old_value.HasField("histo"), old_value + new_value = data_compat.migrate_value(old_value) + + self.assertEqual("single_value_data", new_value.tag) + expected_metadata = histogram_metadata.create_summary_metadata( + display_name="single_value_data", description="" + ) + self.assertEqual(expected_metadata, new_value.metadata) + self.assertTrue(new_value.HasField("tensor")) + buckets = tensor_util.make_ndarray(new_value.tensor) + # Only one bucket is kept. + self.assertEqual((1, 3), buckets.shape) + # Default bucket boundaries exponentially distribute around, starting + # at 1e-12 and increasing by a factor of 1.1 up to 1e20. + self.assertAlmostEqual(0.9172464, buckets[0][0]) + self.assertAlmostEqual(1.008971, buckets[0][1]) + self.assertEqual(1024, buckets[0][2]) + + def test_histogram_with_empty_buckets_on_both_ends(self): + with tf.compat.v1.Graph().as_default(): + old_op = tf.compat.v1.summary.histogram( + "single_value_data", tf.constant([1, 1, 1, 2, 2, 3, 3, 3, 3]) + ) + old_value = self._value_from_op(old_op) + assert old_value.HasField("histo"), old_value + new_value = data_compat.migrate_value(old_value) + + self.assertEqual("single_value_data", new_value.tag) + expected_metadata = histogram_metadata.create_summary_metadata( + display_name="single_value_data", description="" + ) + self.assertEqual(expected_metadata, new_value.metadata) + self.assertTrue(new_value.HasField("tensor")) + buckets = tensor_util.make_ndarray(new_value.tensor) + for bucket in buckets: + # No `backwards` buckets. + self.assertLess(bucket[0], bucket[1]) + self.assertEqual(9, buckets[:, 2].astype(int).sum()) + def test_new_style_histogram(self): with tf.compat.v1.Graph().as_default(): op = histogram_summary.op( From 44ee7f4e9aff8dc847913dcb1cf5988fa58bdc2a Mon Sep 17 00:00:00 2001 From: Yating Jing Date: Fri, 5 Nov 2021 19:44:44 +0000 Subject: [PATCH 2/6] fix typo --- tensorboard/data_compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorboard/data_compat.py b/tensorboard/data_compat.py index c59bcb1929..53cb3bd927 100644 --- a/tensorboard/data_compat.py +++ b/tensorboard/data_compat.py @@ -81,7 +81,7 @@ def make_summary(tag, metadata, data): def _migrate_histogram_value(value): - """Convert `old-stype` histogram to `new-style`. + """Convert `old-style` histogram value to `new-style`. Since by default min value is DBL_MAX and max value is -DBL_MAX, empty buckets on the left and right ends are removed to prevent the case of From 011b6565b94a47813981421e913df6d50ed30e52 Mon Sep 17 00:00:00 2001 From: Yating Jing Date: Mon, 8 Nov 2021 16:04:22 +0000 Subject: [PATCH 3/6] use data min/max as bucket left/right hand limit --- tensorboard/data_compat.py | 18 ++++++++++-------- tensorboard/data_compat_test.py | 14 ++++++++------ 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/tensorboard/data_compat.py b/tensorboard/data_compat.py index 53cb3bd927..41162f2d9e 100644 --- a/tensorboard/data_compat.py +++ b/tensorboard/data_compat.py @@ -88,21 +88,23 @@ def _migrate_histogram_value(value): `backwards` (left edge > right edge) buckets. """ histogram_value = value.histo - bucket_lefts = [histogram_value.min] + histogram_value.bucket_limit[:-1] - bucket_rights = histogram_value.bucket_limit[:-1] + [histogram_value.max] bucket_counts = histogram_value.bucket # Find the indices of the first leftmost and rightmost buckets with # nonzero counts. n = len(bucket_counts) start = next((i for i in range(n) if bucket_counts[i] > 0), n) end = next((i for i in range(n - 1, -1, -1) if bucket_counts[i] > 0), -1) + # Discard empty buckets on both ends. + bucket_lefts = histogram_value.bucket_limit[start:end] + bucket_rights = histogram_value.bucket_limit[start:end] + bucket_counts = bucket_counts[start : end + 1] + if start <= end: + # Use min as the left-hand limit for the first non-empty bucket. + bucket_lefts = [histogram_value.min] + bucket_lefts + # Use max as the right-hand limit for the last non-empty bucket. + bucket_rights = bucket_rights + [histogram_value.max] buckets = np.array( - [ - bucket_lefts[start : end + 1], - bucket_rights[start : end + 1], - bucket_counts[start : end + 1], - ], - dtype=np.float32, + [bucket_lefts, bucket_rights, bucket_counts], dtype=np.float32 ).transpose() summary_metadata = histogram_metadata.create_summary_metadata( diff --git a/tensorboard/data_compat_test.py b/tensorboard/data_compat_test.py index 572fe92977..53f1492621 100644 --- a/tensorboard/data_compat_test.py +++ b/tensorboard/data_compat_test.py @@ -203,7 +203,9 @@ def test_histogram(self): buckets = tensor_util.make_ndarray(new_value.tensor) for bucket in buckets: # No `backwards` buckets. - self.assertLess(bucket[0], bucket[1]) + self.assertLessEqual(bucket[0], bucket[1]) + self.assertEqual(old_value.histo.min, buckets[0][0]) + self.assertEqual(old_value.histo.max, buckets[-1][1]) self.assertEqual(23 * 45, buckets[:, 2].astype(int).sum()) def test_empty_histogram(self): @@ -242,10 +244,8 @@ def test_single_value_histogram(self): buckets = tensor_util.make_ndarray(new_value.tensor) # Only one bucket is kept. self.assertEqual((1, 3), buckets.shape) - # Default bucket boundaries exponentially distribute around, starting - # at 1e-12 and increasing by a factor of 1.1 up to 1e20. - self.assertAlmostEqual(0.9172464, buckets[0][0]) - self.assertAlmostEqual(1.008971, buckets[0][1]) + self.assertEqual(1, buckets[0][0]) + self.assertEqual(1, buckets[-1][1]) self.assertEqual(1024, buckets[0][2]) def test_histogram_with_empty_buckets_on_both_ends(self): @@ -266,7 +266,9 @@ def test_histogram_with_empty_buckets_on_both_ends(self): buckets = tensor_util.make_ndarray(new_value.tensor) for bucket in buckets: # No `backwards` buckets. - self.assertLess(bucket[0], bucket[1]) + self.assertLessEqual(bucket[0], bucket[1]) + self.assertEqual(1, buckets[0][0]) + self.assertEqual(3, buckets[-1][1]) self.assertEqual(9, buckets[:, 2].astype(int).sum()) def test_new_style_histogram(self): From 21ff50e7ac3cf42a80d6250539c1f884400d613d Mon Sep 17 00:00:00 2001 From: Yating Jing Date: Mon, 8 Nov 2021 16:06:37 +0000 Subject: [PATCH 4/6] grammar fix --- tensorboard/data_compat.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorboard/data_compat.py b/tensorboard/data_compat.py index 41162f2d9e..dec3b35887 100644 --- a/tensorboard/data_compat.py +++ b/tensorboard/data_compat.py @@ -89,8 +89,7 @@ def _migrate_histogram_value(value): """ histogram_value = value.histo bucket_counts = histogram_value.bucket - # Find the indices of the first leftmost and rightmost buckets with - # nonzero counts. + # Find the indices of the leftmost and rightmost non-empty buckets. n = len(bucket_counts) start = next((i for i in range(n) if bucket_counts[i] > 0), n) end = next((i for i in range(n - 1, -1, -1) if bucket_counts[i] > 0), -1) From 7cebaaf1b70379b486fe4aa7eb91aa4d58b699c3 Mon Sep 17 00:00:00 2001 From: Yating Jing Date: Tue, 9 Nov 2021 18:40:29 +0000 Subject: [PATCH 5/6] improve readability - improve docs - test extremal histogram values - fix tag names in test --- tensorboard/data_compat.py | 43 ++++++++++++++++++++------------- tensorboard/data_compat_test.py | 30 ++++++++++++++++++++--- 2 files changed, 53 insertions(+), 20 deletions(-) diff --git a/tensorboard/data_compat.py b/tensorboard/data_compat.py index dec3b35887..88de79e344 100644 --- a/tensorboard/data_compat.py +++ b/tensorboard/data_compat.py @@ -83,34 +83,43 @@ def make_summary(tag, metadata, data): def _migrate_histogram_value(value): """Convert `old-style` histogram value to `new-style`. - Since by default min value is DBL_MAX and max value is -DBL_MAX, empty - buckets on the left and right ends are removed to prevent the case of - `backwards` (left edge > right edge) buckets. + The "old-style" format can have outermost bucket limits of -DBL_MAX and + DBL_MAX, which are problematic for visualization. We replace those here + with the actual min and max values seen in the input data, but then in + order to avoid introducing "backwards" buckets (where left edge > right + edge), we first must drop all empty buckets on the left and right ends. """ + summary_metadata = histogram_metadata.create_summary_metadata( + display_name=value.metadata.display_name or value.tag, + description=value.metadata.summary_description, + ) + histogram_value = value.histo bucket_counts = histogram_value.bucket # Find the indices of the leftmost and rightmost non-empty buckets. n = len(bucket_counts) start = next((i for i in range(n) if bucket_counts[i] > 0), n) - end = next((i for i in range(n - 1, -1, -1) if bucket_counts[i] > 0), -1) - # Discard empty buckets on both ends. - bucket_lefts = histogram_value.bucket_limit[start:end] - bucket_rights = histogram_value.bucket_limit[start:end] + end = next((i for i in reversed(range(n)) if bucket_counts[i] > 0), -1) + if start > end: + # If all input buckets were empty, treat it as a zero-bucket + # new-style histogram. + return make_summary( + value.tag, summary_metadata, np.zeros([0, 3], dtype=np.float32) + ) + # Discard empty buckets on both ends, and keep only the "inner" edges + # from the remaining buckets. Note that bucket indices range from `start` + # to `end` inclusive, but bucket_limit indices are exclusive of `end` - + # this is because bucket_limit[i] is the right-hand edge for bucket[i]. bucket_counts = bucket_counts[start : end + 1] - if start <= end: - # Use min as the left-hand limit for the first non-empty bucket. - bucket_lefts = [histogram_value.min] + bucket_lefts - # Use max as the right-hand limit for the last non-empty bucket. - bucket_rights = bucket_rights + [histogram_value.max] + inner_edges = histogram_value.bucket_limit[start:end] + # Use min as the left-hand limit for the first non-empty bucket. + bucket_lefts = [histogram_value.min] + inner_edges + # Use max as the right-hand limit for the last non-empty bucket. + bucket_rights = inner_edges + [histogram_value.max] buckets = np.array( [bucket_lefts, bucket_rights, bucket_counts], dtype=np.float32 ).transpose() - summary_metadata = histogram_metadata.create_summary_metadata( - display_name=value.metadata.display_name or value.tag, - description=value.metadata.summary_description, - ) - return make_summary(value.tag, summary_metadata, buckets) diff --git a/tensorboard/data_compat_test.py b/tensorboard/data_compat_test.py index 53f1492621..9c35397dde 100644 --- a/tensorboard/data_compat_test.py +++ b/tensorboard/data_compat_test.py @@ -251,15 +251,16 @@ def test_single_value_histogram(self): def test_histogram_with_empty_buckets_on_both_ends(self): with tf.compat.v1.Graph().as_default(): old_op = tf.compat.v1.summary.histogram( - "single_value_data", tf.constant([1, 1, 1, 2, 2, 3, 3, 3, 3]) + "data_with_empty_buckets_on_both_ends", + tf.constant([1, 1, 1, 2, 2, 3, 3, 3, 3]), ) old_value = self._value_from_op(old_op) assert old_value.HasField("histo"), old_value new_value = data_compat.migrate_value(old_value) - self.assertEqual("single_value_data", new_value.tag) + self.assertEqual("data_with_empty_buckets_on_both_ends", new_value.tag) expected_metadata = histogram_metadata.create_summary_metadata( - display_name="single_value_data", description="" + display_name="data_with_empty_buckets_on_both_ends", description="" ) self.assertEqual(expected_metadata, new_value.metadata) self.assertTrue(new_value.HasField("tensor")) @@ -271,6 +272,29 @@ def test_histogram_with_empty_buckets_on_both_ends(self): self.assertEqual(3, buckets[-1][1]) self.assertEqual(9, buckets[:, 2].astype(int).sum()) + def test_histogram_with_extremal_values(self): + with tf.compat.v1.Graph().as_default(): + old_op = tf.compat.v1.summary.histogram( + "extremal_values", tf.constant([-1e20, 1e20]) + ) + old_value = self._value_from_op(old_op) + assert old_value.HasField("histo"), old_value + new_value = data_compat.migrate_value(old_value) + + self.assertEqual("extremal_values", new_value.tag) + expected_metadata = histogram_metadata.create_summary_metadata( + display_name="extremal_values", description="" + ) + self.assertEqual(expected_metadata, new_value.metadata) + self.assertTrue(new_value.HasField("tensor")) + buckets = tensor_util.make_ndarray(new_value.tensor) + for bucket in buckets: + # No `backwards` buckets. + self.assertLessEqual(bucket[0], bucket[1]) + self.assertEqual(old_value.histo.min, buckets[0][0]) + self.assertEqual(old_value.histo.max, buckets[-1][1]) + self.assertEqual(2, buckets[:, 2].astype(int).sum()) + def test_new_style_histogram(self): with tf.compat.v1.Graph().as_default(): op = histogram_summary.op( From ff907e91c1b7925cf1333ed773d0973cf136750a Mon Sep 17 00:00:00 2001 From: Yating Jing Date: Wed, 10 Nov 2021 01:31:56 +0000 Subject: [PATCH 6/6] small change --- tensorboard/data_compat.py | 42 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/tensorboard/data_compat.py b/tensorboard/data_compat.py index 88de79e344..3a53ead3f2 100644 --- a/tensorboard/data_compat.py +++ b/tensorboard/data_compat.py @@ -89,11 +89,6 @@ def _migrate_histogram_value(value): order to avoid introducing "backwards" buckets (where left edge > right edge), we first must drop all empty buckets on the left and right ends. """ - summary_metadata = histogram_metadata.create_summary_metadata( - display_name=value.metadata.display_name or value.tag, - description=value.metadata.summary_description, - ) - histogram_value = value.histo bucket_counts = histogram_value.bucket # Find the indices of the leftmost and rightmost non-empty buckets. @@ -103,22 +98,27 @@ def _migrate_histogram_value(value): if start > end: # If all input buckets were empty, treat it as a zero-bucket # new-style histogram. - return make_summary( - value.tag, summary_metadata, np.zeros([0, 3], dtype=np.float32) - ) - # Discard empty buckets on both ends, and keep only the "inner" edges - # from the remaining buckets. Note that bucket indices range from `start` - # to `end` inclusive, but bucket_limit indices are exclusive of `end` - - # this is because bucket_limit[i] is the right-hand edge for bucket[i]. - bucket_counts = bucket_counts[start : end + 1] - inner_edges = histogram_value.bucket_limit[start:end] - # Use min as the left-hand limit for the first non-empty bucket. - bucket_lefts = [histogram_value.min] + inner_edges - # Use max as the right-hand limit for the last non-empty bucket. - bucket_rights = inner_edges + [histogram_value.max] - buckets = np.array( - [bucket_lefts, bucket_rights, bucket_counts], dtype=np.float32 - ).transpose() + buckets = np.zeros([0, 3], dtype=np.float32) + else: + # Discard empty buckets on both ends, and keep only the "inner" + # edges from the remaining buckets. Note that bucket indices range + # from `start` to `end` inclusive, but bucket_limit indices are + # exclusive of `end` - this is because bucket_limit[i] is the + # right-hand edge for bucket[i]. + bucket_counts = bucket_counts[start : end + 1] + inner_edges = histogram_value.bucket_limit[start:end] + # Use min as the left-hand limit for the first non-empty bucket. + bucket_lefts = [histogram_value.min] + inner_edges + # Use max as the right-hand limit for the last non-empty bucket. + bucket_rights = inner_edges + [histogram_value.max] + buckets = np.array( + [bucket_lefts, bucket_rights, bucket_counts], dtype=np.float32 + ).transpose() + + summary_metadata = histogram_metadata.create_summary_metadata( + display_name=value.metadata.display_name or value.tag, + description=value.metadata.summary_description, + ) return make_summary(value.tag, summary_metadata, buckets)