diff --git a/tensorboard/plugins/debugger_v2/BUILD b/tensorboard/plugins/debugger_v2/BUILD index b081b72324..1a703cb53d 100644 --- a/tensorboard/plugins/debugger_v2/BUILD +++ b/tensorboard/plugins/debugger_v2/BUILD @@ -7,11 +7,33 @@ package(default_visibility = ["//tensorboard:internal"]) licenses(["notice"]) +py_library( + name = "debug_data_multiplexer", + srcs = ["debug_data_multiplexer.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorboard:expect_tensorflow_installed", + ], +) + +py_library( + name = "debug_data_provider", + srcs = ["debug_data_provider.py"], + srcs_version = "PY2AND3", + deps = [ + ":debug_data_multiplexer", + "//tensorboard/data:provider", + ], +) + py_library( name = "debugger_v2_plugin", srcs = ["debugger_v2_plugin.py"], srcs_version = "PY2AND3", deps = [ + ":debug_data_provider", + "//tensorboard:plugin_util", + "//tensorboard/backend:http_util", "//tensorboard/plugins:base_plugin", "@org_pocoo_werkzeug", ], @@ -25,6 +47,8 @@ py_test( deps = [ ":debugger_v2_plugin", "//tensorboard:expect_tensorflow_installed", + "//tensorboard/backend:application", "//tensorboard/plugins:base_plugin", + "//tensorboard/util:test_util", ], ) diff --git a/tensorboard/plugins/debugger_v2/debug_data_multiplexer.py b/tensorboard/plugins/debugger_v2/debug_data_multiplexer.py new file mode 100644 index 0000000000..73476454cf --- /dev/null +++ b/tensorboard/plugins/debugger_v2/debug_data_multiplexer.py @@ -0,0 +1,117 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""A wrapper around DebugDataReader used for retrieving tfdbg v2 data.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dummy run name for the debugger. +# Currently, the `DebuggerV2ExperimentMultiplexer` class is tied to a single +# logdir, which holds at most one DebugEvent file set in the tfdbg v2 (tfdbg2 +# for short) format. +# TODO(cais): When tfdbg2 allows there to be multiple DebugEvent file sets in +# the same logdir, replace this magic string with actual run names. +DEFAULT_DEBUGGER_RUN_NAME = "__default_debugger_run__" + + +class DebuggerV2EventMultiplexer(object): + """A class used for accessing tfdbg v2 DebugEvent data on local filesystem. + + This class is a short-term hack, mirroring the EventMultiplexer for the main + TensorBoard plugins (e.g., scalar, histogram and graphs.) As such, it only + implements the methods relevant to the Debugger V2 pluggin. + + TODO(cais): Integrate it with EventMultiplexer and use the integrated class + from MultiplexerDataProvider for a single path of accessing debugger and + non-debugger data. + """ + + def __init__(self, logdir): + """Constructor for the `DebugEventMultiplexer`. + + Args: + logdir: Path to the directory to load the tfdbg v2 data from. + """ + self._logdir = logdir + # TODO(cais): Start off a reading thread here. + + def FirstEventTimestamp(self, run): + """Return the timestamp of the first DebugEvent of the given run. + + This may perform I/O if no events have been loaded yet for the run. + + Args: + run: A string name of the run for which the timestamp is retrieved. + This currently must be hardcoded as `DEFAULT_DEBUGGER_RUN_NAME`, + as each logdir contains at most one DebugEvent file set (i.e., a + run of a tfdbg2-instrumented TensorFlow program.) + + Returns: + The wall_time of the first event of the run, which will be in seconds + since the epoch as a `float`. + """ + if run != DEFAULT_DEBUGGER_RUN_NAME: + raise ValueError( + "Expected run name to be %s, but got %s" + % (DEFAULT_DEBUGGER_RUN_NAME, run) + ) + from tensorflow.python.debug.lib import debug_events_reader + + with debug_events_reader.DebugEventsReader(self._logdir) as reader: + metadata_iterator, _ = reader.metadata_iterator() + return next(metadata_iterator).wall_time + + def PluginRunToTagToContent(self, plugin_name): + raise NotImplementedError( + "DebugDataMultiplexer.PluginRunToTagToContent() has not been " + "implemented yet." + ) + + def Runs(self): + """Return all the run names in the `EventMultiplexer`. + + The `Run()` method of this class is specialized for the tfdbg2-format + DebugEvent files. It only returns runs + + Returns: + If tfdbg2-format data exists in the `logdir` of this object, returns: + ``` + {runName: { "debugger-v2": [tag1, tag2, tag3] } } + ``` + where `runName` is the hard-coded string `DEFAULT_DEBUGGER_RUN_NAME` + string. This is related to the fact that tfdbg2 currently contains + at most one DebugEvent file set per directory. + If no tfdbg2-format data exists in the `logdir`, an empty `dict`. + """ + reader = None + from tensorflow.python.debug.lib import debug_events_reader + + try: + reader = debug_events_reader.DebugDataReader(self._logdir) + # NOTE(cais): Currently each logdir is enforced to have only one + # DebugEvent file set. So we add hard-coded default run name. + except ValueError as error: + # When no DebugEvent file set is found in the logdir, a `ValueError` + # is thrown. + return {} + with reader: + return { + DEFAULT_DEBUGGER_RUN_NAME: { + # TODO(cais): Add the semantically meaningful tag names such as + # 'execution_digests_book', 'alerts_book' + "debugger-v2": [] + } + } diff --git a/tensorboard/plugins/debugger_v2/debug_data_provider.py b/tensorboard/plugins/debugger_v2/debug_data_provider.py new file mode 100644 index 0000000000..b1df1cc384 --- /dev/null +++ b/tensorboard/plugins/debugger_v2/debug_data_provider.py @@ -0,0 +1,105 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""An implementation of DataProvider that serves tfdbg v2 data. + +This implementation is: + 1. Based on reading data from a DebugEvent file set on the local filesystem. + 2. Implements only the relevant methods for the debugger v2 plugin, including + - list_runs() + - read_blob_sequences() + - read_blob() + +This class is a short-term hack. To be used in production, it awaits integration +with a more complete implementation of DataProvider such as +MultiplexerDataProvider. +""" + +from tensorboard.data import provider + +from tensorboard.plugins.debugger_v2 import debug_data_multiplexer + + +class LocalDebuggerV2DataProvider(provider.DataProvider): + """A DataProvider implementation for tfdbg v2 data on local filesystem. + + In this implementation, `experiment_id` is assumed to be the path to the + logdir that contains the DebugEvent file set. + """ + + def __init__(self, logdir): + """Constructor of LocalDebuggerV2DataProvider. + + Args: + logdir: Path to the directory from which the tfdbg v2 data will be + loaded. + """ + super(LocalDebuggerV2DataProvider, self).__init__() + self._multiplexer = debug_data_multiplexer.DebuggerV2EventMultiplexer( + logdir + ) + + def list_runs(self, experiment_id): + """List runs available. + + Args: + experiment_id: currently unused, because the backing + DebuggerV2EventMultiplexer does not accommodate multiple experiments. + + Returns: + Run names as a list of str. + """ + return [ + provider.Run( + run_id=run, # use names as IDs + run_name=run, + start_time=self._get_first_event_timestamp(run), + ) + for run in self._multiplexer.Runs() + ] + + def _get_first_event_timestamp(self, run_name): + try: + return self._multiplexer.FirstEventTimestamp(run_name) + except ValueError as e: + return None + + def list_scalars(self, experiment_id, plugin_name, run_tag_filter=None): + del experiment_id, plugin_name, run_tag_filter # Unused. + raise TypeError("Debugger V2 DataProvider doesn't support scalars.") + + def read_scalars( + self, experiment_id, plugin_name, downsample=None, run_tag_filter=None + ): + del experiment_id, plugin_name, downsample, run_tag_filter + raise TypeError("Debugger V2 DataProvider doesn't support scalars.") + + def list_blob_sequences( + self, experiment_id, plugin_name, run_tag_filter=None + ): + del experiment_id, plugin_name, run_tag_filter # Unused currently. + # TODO(cais): Implement this. + raise NotImplementedError() + + def read_blob_sequences( + self, experiment_id, plugin_name, downsample=None, run_tag_filter=None + ): + del experiment_id, plugin_name, downsample, run_tag_filter # Unused. + # TODO(cais): Implement this. + raise NotImplementedError() + + def read_blob(self, blob_key): + del blob_key # Unused currently. + # TODO(cais): Implement this. + raise NotImplementedError() diff --git a/tensorboard/plugins/debugger_v2/debugger_v2_plugin.py b/tensorboard/plugins/debugger_v2/debugger_v2_plugin.py index cf651c9701..9572a46afc 100644 --- a/tensorboard/plugins/debugger_v2/debugger_v2_plugin.py +++ b/tensorboard/plugins/debugger_v2/debugger_v2_plugin.py @@ -18,7 +18,12 @@ from __future__ import division from __future__ import print_function +from werkzeug import wrappers + +from tensorboard import plugin_util from tensorboard.plugins import base_plugin +from tensorboard.plugins.debugger_v2 import debug_data_provider +from tensorboard.backend import http_util class DebuggerV2Plugin(base_plugin.TBPlugin): @@ -33,10 +38,18 @@ def __init__(self, context): context: A base_plugin.TBContext instance. """ super(DebuggerV2Plugin, self).__init__(context) + self._logdir = context.logdir + # TODO(cais): Implement factory for DataProvider that takes into account + # the settings. + self._data_provider = debug_data_provider.LocalDebuggerV2DataProvider( + self._logdir + ) def get_plugin_apps(self): # TODO(cais): Add routes as they are implemented. - return {} + return { + "/runs": self.serve_runs, + } def is_active(self): """Check whether the Debugger V2 Plugin is always active. @@ -55,3 +68,11 @@ def frontend_metadata(self): return base_plugin.FrontendMetadata( is_ng_component=True, tab_name="Debugger V2", disable_reload=True ) + + @wrappers.Request.application + def serve_runs(self, request): + experiment = plugin_util.experiment_id(request.environ) + runs = self._data_provider.list_runs(experiment) + return http_util.Respond( + request, [run.run_id for run in runs], "application/json" + ) diff --git a/tensorboard/plugins/debugger_v2/debugger_v2_plugin_test.py b/tensorboard/plugins/debugger_v2/debugger_v2_plugin_test.py index 7686881401..dfd91daea2 100644 --- a/tensorboard/plugins/debugger_v2/debugger_v2_plugin_test.py +++ b/tensorboard/plugins/debugger_v2/debugger_v2_plugin_test.py @@ -18,26 +18,99 @@ from __future__ import division from __future__ import print_function -import tempfile +import json import tensorflow as tf +from werkzeug import test as werkzeug_test # pylint: disable=wrong-import-order +from werkzeug import wrappers +from tensorboard.backend import application from tensorboard.plugins import base_plugin from tensorboard.plugins.debugger_v2 import debugger_v2_plugin +from tensorboard.util import test_util +def _generate_tfdbg_v2_data(logdir): + """Generate a simple dump of tfdbg v2 data by running a TF2 program. + + The run is instrumented by the enable_dump_debug_info() API. + + The instrumented program is intentionally diverse in: + - Execution paradigm: eager + tf.function + - Control flow (TF while loop) + - dtype and shape + in order to faciliate testing. + + Args: + logdir: Logdir to write the debugger data to. + """ + writer = tf.debugging.experimental.enable_dump_debug_info( + logdir, circular_buffer_size=-1 + ) + try: + + @tf.function + def unstack_and_sum(x): + elements = tf.unstack(x) + return elements[0] + elements[1] + elements[2] + elements[3] + + @tf.function + def repeated_add(x, times): + sum = tf.constant(0, dtype=x.dtype) + i = tf.constant(0, dtype=tf.int32) + while tf.less(i, times): + sum += x + i += 1 + return sum + + @tf.function + def my_function(x): + times = tf.constant(3, dtype=tf.int32) + return repeated_add(unstack_and_sum(x), times) + + x = tf.constant([1, 3, 3, 7], dtype=tf.float32) + for i in range(3): + assert my_function(x).numpy() == 42.0 + writer.FlushNonExecutionFiles() + writer.FlushExecutionFiles() + finally: + tf.debugging.experimental.disable_dump_debug_info() + + +_ROUTE_PREFIX = "/data/plugin/debugger-v2" + + +@test_util.run_v2_only("tfdbg2 is not available in r1.") class DebuggerV2PluginTest(tf.test.TestCase): - def testInstantiatePlugin(self): - dummy_logdir = tempfile.mkdtemp() - context = base_plugin.TBContext(logdir=dummy_logdir) - plugin = debugger_v2_plugin.DebuggerV2Plugin(context) - self.assertTrue(plugin) + def setUp(self): + super(DebuggerV2PluginTest, self).setUp() + self.logdir = self.get_temp_dir() + context = base_plugin.TBContext(logdir=self.logdir) + self.plugin = debugger_v2_plugin.DebuggerV2Plugin(context) + wsgi_app = application.TensorBoardWSGI([self.plugin]) + self.server = werkzeug_test.Client(wsgi_app, wrappers.BaseResponse) def testPluginIsNotActiveByDefault(self): - dummy_logdir = tempfile.mkdtemp() - context = base_plugin.TBContext(logdir=dummy_logdir) - plugin = debugger_v2_plugin.DebuggerV2Plugin(context) - self.assertFalse(plugin.is_active()) + self.assertFalse(self.plugin.is_active()) + + def testServeRunsWithoutExistingRuns(self): + response = self.server.get(_ROUTE_PREFIX + "/runs") + self.assertEqual(200, response.status_code) + self.assertEqual( + "application/json", response.headers.get("content-type") + ) + self.assertEqual(json.loads(response.get_data()), []) + + def testServeRunsWithExistingRuns(self): + _generate_tfdbg_v2_data(self.logdir) + response = self.server.get(_ROUTE_PREFIX + "/runs") + self.assertEqual(200, response.status_code) + self.assertEqual( + "application/json", response.headers.get("content-type") + ) + self.assertEqual( + json.loads(response.get_data()), ["__default_debugger_run__"] + ) if __name__ == "__main__":