diff --git a/tensorboard/uploader/BUILD b/tensorboard/uploader/BUILD index d3a8b4096e..c5f0d64da0 100644 --- a/tensorboard/uploader/BUILD +++ b/tensorboard/uploader/BUILD @@ -31,6 +31,7 @@ py_test( deps = [ ":exporter_lib", ":test_util", + ":util", "//tensorboard:expect_grpc_installed", "//tensorboard:expect_grpc_testing_installed", "//tensorboard:test", diff --git a/tensorboard/uploader/exporter.py b/tensorboard/uploader/exporter.py index 646641bbe9..c6ad84d827 100644 --- a/tensorboard/uploader/exporter.py +++ b/tensorboard/uploader/exporter.py @@ -45,6 +45,9 @@ # Maximum value of a signed 64-bit integer. _MAX_INT64 = 2 ** 63 - 1 +# Output filename for experiment metadata (creation time, description, +# etc.) within an experiment directory. +_FILENAME_METADATA = "metadata.json" # Output filename for scalar data within an experiment directory. _FILENAME_SCALARS = "scalars.json" @@ -118,11 +121,32 @@ def export(self, read_time=None): """ if read_time is None: read_time = time.time() - for experiment in list_experiments(self._api, read_time=read_time): + experiment_metadata_mask = experiment_pb2.ExperimentMask( + create_time=True, update_time=True, name=True, description=True, + ) + experiments = list_experiments( + self._api, fieldmask=experiment_metadata_mask, read_time=read_time + ) + for experiment in experiments: experiment_id = experiment.experiment_id + experiment_metadata = { + "name": experiment.name, + "description": experiment.description, + "create_time": util.format_time_absolute( + experiment.create_time + ), + "update_time": util.format_time_absolute( + experiment.update_time + ), + } experiment_dir = _experiment_directory(self._outdir, experiment_id) os.mkdir(experiment_dir) + metadata_filepath = os.path.join(experiment_dir, _FILENAME_METADATA) + with _open_excl(metadata_filepath) as outfile: + json.dump(experiment_metadata, outfile, sort_keys=True) + outfile.write("\n") + scalars_filepath = os.path.join(experiment_dir, _FILENAME_SCALARS) try: with _open_excl(scalars_filepath) as outfile: diff --git a/tensorboard/uploader/exporter_test.py b/tensorboard/uploader/exporter_test.py index 2ecdc9be36..60b10cd585 100644 --- a/tensorboard/uploader/exporter_test.py +++ b/tensorboard/uploader/exporter_test.py @@ -38,6 +38,7 @@ from tensorboard.uploader.proto import export_service_pb2_grpc from tensorboard.uploader import exporter as exporter_lib from tensorboard.uploader import test_util +from tensorboard.uploader import util from tensorboard.util import grpc_util from tensorboard import test as tb_test from tensorboard.compat.proto import summary_pb2 @@ -64,8 +65,20 @@ def test_e2e_success_case(self): def stream_experiments(request, **kwargs): del request # unused self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) - yield _make_experiments_response(["123", "456"]) - yield _make_experiments_response(["789"]) + + response = export_service_pb2.StreamExperimentsResponse() + response.experiments.add(experiment_id="123") + response.experiments.add(experiment_id="456") + yield response + + response = export_service_pb2.StreamExperimentsResponse() + experiment = response.experiments.add() + experiment.experiment_id = "789" + experiment.name = "bert" + experiment.description = "ernie" + util.set_timestamp(experiment.create_time, 981173106) + util.set_timestamp(experiment.update_time, 1015218367) + yield response def stream_experiment_data(request, **kwargs): self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) @@ -115,12 +128,17 @@ def outdir_files(): # The first iteration should request the list of experiments and # data for one of them. self.assertEqual(next(generator), "123") + expected_files.append(os.path.join("experiment_123", "metadata.json")) expected_files.append(os.path.join("experiment_123", "scalars.json")) self.assertCountEqual(expected_files, outdir_files()) expected_eids_request = export_service_pb2.StreamExperimentsRequest() expected_eids_request.read_timestamp.CopyFrom(start_time_pb) expected_eids_request.limit = 2 ** 63 - 1 + expected_eids_request.experiments_mask.create_time = True + expected_eids_request.experiments_mask.update_time = True + expected_eids_request.experiments_mask.name = True + expected_eids_request.experiments_mask.description = True mock_api_client.StreamExperiments.assert_called_once_with( expected_eids_request, metadata=grpc_util.version_metadata() ) @@ -137,6 +155,7 @@ def outdir_files(): mock_api_client.StreamExperimentData.reset_mock() self.assertEqual(next(generator), "456") + expected_files.append(os.path.join("experiment_456", "metadata.json")) expected_files.append(os.path.join("experiment_456", "scalars.json")) self.assertCountEqual(expected_files, outdir_files()) mock_api_client.StreamExperiments.assert_not_called() @@ -147,6 +166,7 @@ def outdir_files(): # Again, request data for the next experiment; this experiment ID # was in the second response batch in the list of IDs. + expected_files.append(os.path.join("experiment_789", "metadata.json")) expected_files.append(os.path.join("experiment_789", "scalars.json")) mock_api_client.StreamExperiments.reset_mock() mock_api_client.StreamExperimentData.reset_mock() @@ -192,6 +212,21 @@ def outdir_files(): self.assertEqual(points, {}) self.assertEqual(datum, {}) + # Spot-check one of the metadata files. + with open( + os.path.join(outdir, "experiment_789", "metadata.json") + ) as infile: + metadata = json.load(infile) + self.assertEqual( + metadata, + { + "name": "bert", + "description": "ernie", + "create_time": "2001-02-03T04:05:06Z", + "update_time": "2002-03-04T05:06:07Z", + }, + ) + def test_rejects_dangerous_experiment_ids(self): mock_api_client = self._create_mock_api_client() diff --git a/tensorboard/uploader/util.py b/tensorboard/uploader/util.py index cc0534baab..39697d7ec6 100644 --- a/tensorboard/uploader/util.py +++ b/tensorboard/uploader/util.py @@ -123,7 +123,9 @@ def format_time(timestamp_pb, now=None): """Converts a `timestamp_pb2.Timestamp` to human-readable string. This always includes the absolute date and time, and for recent dates - may include a relative time like "(just now)" or "(2 hours ago)". + may include a relative time like "(just now)" or "(2 hours ago)". It + should thus be used for ephemeral values. Use `format_time_absolute` + if the output will be persisted. Args: timestamp_pb: A `google.protobuf.timestamp_pb2.Timestamp` value to @@ -163,5 +165,21 @@ def ago_text(n, singular, plural): return str(dt) + relative_part +def format_time_absolute(timestamp_pb): + """Converts a `timestamp_pb2.Timestamp` to UTC time string. + + This will always be of the form "2001-02-03T04:05:06Z". + + Args: + timestamp_pb: A `google.protobuf.timestamp_pb2.Timestamp` value to + convert to string. The input will not be modified. + + Returns: + An RFC 3339 date-time string. + """ + dt = datetime.datetime.utcfromtimestamp(timestamp_pb.seconds) + return dt.strftime("%Y-%m-%dT%H:%M:%SZ") + + def _ngettext(n, singular, plural): return "%d %s ago" % (n, singular if n == 1 else plural) diff --git a/tensorboard/uploader/util_test.py b/tensorboard/uploader/util_test.py index d6dd451808..caba38fda8 100644 --- a/tensorboard/uploader/util_test.py +++ b/tensorboard/uploader/util_test.py @@ -253,5 +253,28 @@ def test_long_ago(self): self.assertEqual(actual, "2019-01-02 03:04:05") +class FormatTimeAbsoluteTest(tb_test.TestCase): + def _run(self, t=None, tz=None): + timestamp_pb = timestamp_pb2.Timestamp() + util.set_timestamp(timestamp_pb, t) + try: + with mock.patch.dict(os.environ, {"TZ": tz}): + time.tzset() + return util.format_time_absolute(timestamp_pb) + finally: + time.tzset() + + def test_in_tz_utc(self): + t = 981173106 + actual = self._run(t, tz="UTC") + self.assertEqual(actual, "2001-02-03T04:05:06Z") + + def test_in_tz_nonutc(self): + # Shouldn't be affected by timezone. + t = 981173106 + actual = self._run(t, tz="America/Los_Angeles") + self.assertEqual(actual, "2001-02-03T04:05:06Z") + + if __name__ == "__main__": tb_test.main()