Skip to content

Commit 54bf18f

Browse files
authored
dataclass_compat: track initial tag metadata (#3512)
Summary: The `dataclass_compat` transform dispatches on plugin name, which is stored in the summary metadata. But the semantics of summary metadata is that it need only be set in the first event of each time series. Thus, until now, events past the first step of a time series have not been modified by `dataclass_compat`. This has been fine because all transformations have been either (a) only metadata transforms (which don’t matter after the first step anyway) or (b) tensor transforms to hparams data (which only ever has one step anyway). But an upcoming change will need to transform every step of each audio time series, so we need to identify which summaries have audio data even when they don’t have plugin name set on the same event. To do so, we let `dataclass_compat` update a stateful map from tag name to the first `SummaryMetadata` seen for that time series (scoped to a particular run). Test Plan: Some unit tests included now; the tests for migration of audio summaries will be more complete because they have more interesting things to test. wchargin-branch: dataclass-compat-initial-metadata
1 parent e7bfcbd commit 54bf18f

File tree

5 files changed

+95
-17
lines changed

5 files changed

+95
-17
lines changed

tensorboard/backend/event_processing/event_file_loader.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,10 +173,27 @@ class EventFileLoader(LegacyEventFileLoader):
173173
Specifically, this includes `data_compat` and `dataclass_compat`.
174174
"""
175175

176+
def __init__(self, file_path):
177+
super(EventFileLoader, self).__init__(file_path)
178+
# Track initial metadata for each tag, for `dataclass_compat`.
179+
# This is meant to be tracked per run, not per event file, so
180+
# there is a potential failure case when the second event file
181+
# in a single run has no summary metadata. This only occurs when
182+
# all of the following hold: (a) the events were written with
183+
# the TensorFlow 1.x (not 2.x) writer, (b) the summaries were
184+
# created by `tensorboard.summary.v1` ops and so do not undergo
185+
# `data_compat` transformation, and (c) the file writer was
186+
# reopened by calling `.reopen()` on it, which creates a new
187+
# file but does not clear the tag cache. This is considered
188+
# sufficiently improbable that we don't take extra mitigations.
189+
self._initial_metadata = {} # from tag name to `SummaryMetadata`
190+
176191
def Load(self):
177192
for event in super(EventFileLoader, self).Load():
178193
event = data_compat.migrate_event(event)
179-
events = dataclass_compat.migrate_event(event)
194+
events = dataclass_compat.migrate_event(
195+
event, self._initial_metadata
196+
)
180197
for event in events:
181198
yield event
182199

tensorboard/backend/event_processing/plugin_event_accumulator_test.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,15 @@ def __init__(self, testcase, zero_out_timestamps=False):
5959
self._testcase = testcase
6060
self.items = []
6161
self.zero_out_timestamps = zero_out_timestamps
62+
self._initial_metadata = {}
6263

6364
def Load(self):
6465
while self.items:
6566
event = self.items.pop(0)
6667
event = data_compat.migrate_event(event)
67-
events = dataclass_compat.migrate_event(event)
68+
events = dataclass_compat.migrate_event(
69+
event, self._initial_metadata
70+
)
6871
for event in events:
6972
yield event
7073

tensorboard/dataclass_compat.py

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,21 +37,26 @@
3737
from tensorboard.util import tensor_util
3838

3939

40-
def migrate_event(event):
40+
def migrate_event(event, initial_metadata):
4141
"""Migrate an event to a sequence of events.
4242
4343
Args:
4444
event: An `event_pb2.Event`. The caller transfers ownership of the
4545
event to this method; the event may be mutated, and may or may
4646
not appear in the returned sequence.
47+
initial_metadata: Map from tag name (string) to `SummaryMetadata`
48+
proto for the initial occurrence of the given tag within the
49+
enclosing run. While loading a given run, the caller should
50+
always pass the same dictionary here, initially `{}`; this
51+
function will mutate it and reuse it for future calls.
4752
4853
Returns:
4954
A sequence of `event_pb2.Event`s to use instead of `event`.
5055
"""
5156
if event.HasField("graph_def"):
5257
return _migrate_graph_event(event)
5358
if event.HasField("summary"):
54-
return _migrate_summary_event(event)
59+
return _migrate_summary_event(event, initial_metadata)
5560
return (event,)
5661

5762

@@ -70,9 +75,11 @@ def _migrate_graph_event(old_event):
7075
return (old_event, result)
7176

7277

73-
def _migrate_summary_event(event):
78+
def _migrate_summary_event(event, initial_metadata):
7479
values = event.summary.value
75-
new_values = [new for old in values for new in _migrate_value(old)]
80+
new_values = [
81+
new for old in values for new in _migrate_value(old, initial_metadata)
82+
]
7683
# Optimization: Don't create a new event if there were no shallow
7784
# changes (there may still have been in-place changes).
7885
if len(values) == len(new_values) and all(
@@ -84,11 +91,21 @@ def _migrate_summary_event(event):
8491
return (event,)
8592

8693

87-
def _migrate_value(value):
94+
def _migrate_value(value, initial_metadata):
8895
"""Convert an old value to a stream of new values. May mutate."""
89-
if value.metadata.data_class != summary_pb2.DATA_CLASS_UNKNOWN:
96+
metadata = initial_metadata.get(value.tag)
97+
initial = False
98+
if metadata is None:
99+
initial = True
100+
# Retain a copy of the initial metadata, so that even after we
101+
# update its data class we know whether to also transform later
102+
# events in this time series.
103+
metadata = summary_pb2.SummaryMetadata()
104+
metadata.CopyFrom(value.metadata)
105+
initial_metadata[value.tag] = metadata
106+
if metadata.data_class != summary_pb2.DATA_CLASS_UNKNOWN:
90107
return (value,)
91-
plugin_name = value.metadata.plugin_data.plugin_name
108+
plugin_name = metadata.plugin_data.plugin_name
92109
if plugin_name == histograms_metadata.PLUGIN_NAME:
93110
return _migrate_histogram_value(value)
94111
if plugin_name == images_metadata.PLUGIN_NAME:
@@ -103,27 +120,32 @@ def _migrate_value(value):
103120

104121

105122
def _migrate_scalar_value(value):
106-
value.metadata.data_class = summary_pb2.DATA_CLASS_SCALAR
123+
if value.HasField("metadata"):
124+
value.metadata.data_class = summary_pb2.DATA_CLASS_SCALAR
107125
return (value,)
108126

109127

110128
def _migrate_histogram_value(value):
111-
value.metadata.data_class = summary_pb2.DATA_CLASS_TENSOR
129+
if value.HasField("metadata"):
130+
value.metadata.data_class = summary_pb2.DATA_CLASS_TENSOR
112131
return (value,)
113132

114133

115134
def _migrate_image_value(value):
116-
value.metadata.data_class = summary_pb2.DATA_CLASS_BLOB_SEQUENCE
135+
if value.HasField("metadata"):
136+
value.metadata.data_class = summary_pb2.DATA_CLASS_BLOB_SEQUENCE
117137
return (value,)
118138

119139

120140
def _migrate_text_value(value):
121-
value.metadata.data_class = summary_pb2.DATA_CLASS_TENSOR
141+
if value.HasField("metadata"):
142+
value.metadata.data_class = summary_pb2.DATA_CLASS_TENSOR
122143
return (value,)
123144

124145

125146
def _migrate_hparams_value(value):
126-
value.metadata.data_class = summary_pb2.DATA_CLASS_TENSOR
147+
if value.HasField("metadata"):
148+
value.metadata.data_class = summary_pb2.DATA_CLASS_TENSOR
127149
if not value.HasField("tensor"):
128150
value.tensor.CopyFrom(hparams_metadata.NULL_TENSOR)
129151
return (value,)

tensorboard/dataclass_compat_test.py

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,15 @@
5151
class MigrateEventTest(tf.test.TestCase):
5252
"""Tests for `migrate_event`."""
5353

54-
def _migrate_event(self, old_event):
54+
def _migrate_event(self, old_event, initial_metadata=None):
5555
"""Like `migrate_event`, but performs some sanity checks."""
56+
if initial_metadata is None:
57+
initial_metadata = {}
5658
old_event_copy = event_pb2.Event()
5759
old_event_copy.CopyFrom(old_event)
58-
new_events = dataclass_compat.migrate_event(old_event)
60+
new_events = dataclass_compat.migrate_event(
61+
old_event, initial_metadata=initial_metadata
62+
)
5963
for event in new_events: # ensure that wall time and step are preserved
6064
self.assertEqual(event.wall_time, old_event.wall_time)
6165
self.assertEqual(event.step, old_event.step)
@@ -95,6 +99,35 @@ def test_already_newstyle_summary_passes_through(self):
9599
self.assertLen(new_events, 1)
96100
self.assertIs(new_events[0], old_event)
97101

102+
def test_doesnt_add_metadata_to_later_steps(self):
103+
old_events = []
104+
for step in range(3):
105+
e = event_pb2.Event()
106+
e.step = step
107+
summary = scalar_summary.pb("foo", 0.125)
108+
if step > 0:
109+
for v in summary.value:
110+
v.ClearField("metadata")
111+
e.summary.ParseFromString(summary.SerializeToString())
112+
old_events.append(e)
113+
114+
initial_metadata = {}
115+
new_events = []
116+
for e in old_events:
117+
migrated = self._migrate_event(e, initial_metadata=initial_metadata)
118+
new_events.extend(migrated)
119+
120+
self.assertLen(new_events, len(old_events))
121+
self.assertEqual(
122+
{
123+
e.step
124+
for e in new_events
125+
for v in e.summary.value
126+
if v.HasField("metadata")
127+
},
128+
{0},
129+
)
130+
98131
def test_scalar(self):
99132
old_event = event_pb2.Event()
100133
old_event.step = 123

tensorboard/uploader/uploader_test.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1278,9 +1278,12 @@ def _clear_wall_times(request):
12781278

12791279

12801280
def _apply_compat(events):
1281+
initial_metadata = {}
12811282
for event in events:
12821283
event = data_compat.migrate_event(event)
1283-
events = dataclass_compat.migrate_event(event)
1284+
events = dataclass_compat.migrate_event(
1285+
event, initial_metadata=initial_metadata
1286+
)
12841287
for event in events:
12851288
yield event
12861289

0 commit comments

Comments
 (0)