From 3f66658759dc62757aad56e450b41e94dd6bf811 Mon Sep 17 00:00:00 2001 From: William Chargin Date: Fri, 28 Feb 2020 16:26:15 -0800 Subject: [PATCH 1/2] uploader: export experiments to subdirectories MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: We now emit an experiments scalar data to `experiment_123/scalars.json` rather than `scalars_123.json`. This generalizes more cleanly to writing multiple files under an experiment’s directory. Test Plan: Unit tests updated. One previous test case can no longer be hit and has thus been removed. wchargin-branch: uploader-export-subdirs wchargin-source: baa128470c061d7d2c0153f6491a1cdf8dd416c5 --- tensorboard/uploader/exporter.py | 14 ++++--- tensorboard/uploader/exporter_test.py | 54 +++++++++++---------------- 2 files changed, 30 insertions(+), 38 deletions(-) diff --git a/tensorboard/uploader/exporter.py b/tensorboard/uploader/exporter.py index 3eb74aaffc..78833e42e4 100644 --- a/tensorboard/uploader/exporter.py +++ b/tensorboard/uploader/exporter.py @@ -44,6 +44,8 @@ # Maximum value of a signed 64-bit integer. _MAX_INT64 = 2 ** 63 - 1 +# Output filename for scalar data within an experiment directory. +_FILENAME_SCALARS = "scalars.json" class TensorBoardExporter(object): """Exports all of the user's experiment data from TensorBoard.dev. @@ -113,9 +115,12 @@ def export(self, read_time=None): if read_time is None: read_time = time.time() for experiment_id in self._request_experiment_ids(read_time): - filepath = _scalars_filepath(self._outdir, experiment_id) + experiment_dir = _experiment_directory(self._outdir, experiment_id) + os.mkdir(experiment_dir) + + scalars_filepath = os.path.join(experiment_dir, _FILENAME_SCALARS) try: - with _open_excl(filepath) as outfile: + with _open_excl(scalars_filepath) as outfile: data = self._request_scalar_data(experiment_id, read_time) for block in data: json.dump(block, outfile, sort_keys=True) @@ -221,8 +226,7 @@ def __init__(self, experiment_id): self.experiment_id = experiment_id -def _scalars_filepath(base_dir, experiment_id): - """Gets file path in which to store scalars for the given experiment.""" +def _experiment_directory(base_dir, experiment_id): # Experiment IDs from the server should be filename-safe; verify # this before creating any files. bad_chars = frozenset(experiment_id) - _FILENAME_SAFE_CHARS @@ -232,7 +236,7 @@ def _scalars_filepath(base_dir, experiment_id): bad_chars=sorted(bad_chars), eid=experiment_id ) ) - return os.path.join(base_dir, "scalars_%s.json" % experiment_id) + return os.path.join(base_dir, "experiment_%s" % experiment_id) def _mkdir_p(path): diff --git a/tensorboard/uploader/exporter_test.py b/tensorboard/uploader/exporter_test.py index 84caaaa7c6..a50bc714e2 100644 --- a/tensorboard/uploader/exporter_test.py +++ b/tensorboard/uploader/exporter_test.py @@ -99,18 +99,27 @@ def stream_experiment_data(request, **kwargs): start_time = 1571084846.25 start_time_pb = test_util.timestamp_pb(1571084846250000000) + def outdir_files(): + # Recursively list `outdir`. + result = [] + for (dirpath, dirnames, filenames) in os.walk(outdir): + for filename in filenames: + fullpath = os.path.join(dirpath, filename) + result.append(os.path.relpath(fullpath, outdir)) + return result + generator = exporter.export(read_time=start_time) expected_files = [] self.assertTrue(os.path.isdir(outdir)) - self.assertCountEqual(expected_files, os.listdir(outdir)) + self.assertCountEqual(expected_files, outdir_files()) mock_api_client.StreamExperiments.assert_not_called() mock_api_client.StreamExperimentData.assert_not_called() # The first iteration should request the list of experiments and # data for one of them. self.assertEqual(next(generator), "123") - expected_files.append("scalars_123.json") - self.assertCountEqual(expected_files, os.listdir(outdir)) + expected_files.append(os.path.join("experiment_123", "scalars.json")) + self.assertCountEqual(expected_files, outdir_files()) expected_eids_request = export_service_pb2.StreamExperimentsRequest() expected_eids_request.read_timestamp.CopyFrom(start_time_pb) @@ -131,8 +140,8 @@ def stream_experiment_data(request, **kwargs): mock_api_client.StreamExperimentData.reset_mock() self.assertEqual(next(generator), "456") - expected_files.append("scalars_456.json") - self.assertCountEqual(expected_files, os.listdir(outdir)) + expected_files.append(os.path.join("experiment_456", "scalars.json")) + self.assertCountEqual(expected_files, outdir_files()) mock_api_client.StreamExperiments.assert_not_called() expected_data_request.experiment_id = "456" mock_api_client.StreamExperimentData.assert_called_once_with( @@ -141,12 +150,12 @@ def stream_experiment_data(request, **kwargs): # Again, request data for the next experiment; this experiment ID # was in the second response batch in the list of IDs. - expected_files.append("scalars_789.json") + expected_files.append(os.path.join("experiment_789", "scalars.json")) mock_api_client.StreamExperiments.reset_mock() mock_api_client.StreamExperimentData.reset_mock() self.assertEqual(next(generator), "789") - self.assertCountEqual(expected_files, os.listdir(outdir)) + self.assertCountEqual(expected_files, outdir_files()) mock_api_client.StreamExperiments.assert_not_called() expected_data_request.experiment_id = "789" mock_api_client.StreamExperimentData.assert_called_once_with( @@ -158,12 +167,14 @@ def stream_experiment_data(request, **kwargs): mock_api_client.StreamExperimentData.reset_mock() self.assertEqual(list(generator), []) - self.assertCountEqual(expected_files, os.listdir(outdir)) + self.assertCountEqual(expected_files, outdir_files()) mock_api_client.StreamExperiments.assert_not_called() mock_api_client.StreamExperimentData.assert_not_called() - # Spot-check one of the files. - with open(os.path.join(outdir, "scalars_456.json")) as infile: + # Spot-check one of the scalar data files. + with open( + os.path.join(outdir, "experiment_456", "scalars.json") + ) as infile: jsons = [json.loads(line) for line in infile] self.assertLen(jsons, 4) datum = jsons[2] @@ -309,29 +320,6 @@ def test_rejects_existing_directory(self): mock_api_client.StreamExperiments.assert_not_called() mock_api_client.StreamExperimentData.assert_not_called() - def test_rejects_existing_file(self): - mock_api_client = self._create_mock_api_client() - - def stream_experiments(request, **kwargs): - del request # unused - yield export_service_pb2.StreamExperimentsResponse( - experiment_ids=["123"] - ) - - mock_api_client.StreamExperiments = stream_experiments - - outdir = os.path.join(self.get_temp_dir(), "outdir") - exporter = exporter_lib.TensorBoardExporter(mock_api_client, outdir) - generator = exporter.export() - - with open(os.path.join(outdir, "scalars_123.json"), "w"): - pass - - with self.assertRaises(exporter_lib.OutputFileExistsError): - next(generator) - - mock_api_client.StreamExperimentData.assert_not_called() - def test_propagates_mkdir_errors(self): mock_api_client = self._create_mock_api_client() outdir = os.path.join(self.get_temp_dir(), "some_file", "outdir") From c19d9fa27da923c012128aed49059d9fadcb7a69 Mon Sep 17 00:00:00 2001 From: William Chargin Date: Fri, 28 Feb 2020 16:45:57 -0800 Subject: [PATCH 2/2] [update patch] wchargin-branch: uploader-export-subdirs wchargin-source: f5009e29cf96dd119b769ad553c5666438f4df99 --- tensorboard/uploader/exporter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorboard/uploader/exporter.py b/tensorboard/uploader/exporter.py index 78833e42e4..a6160a2202 100644 --- a/tensorboard/uploader/exporter.py +++ b/tensorboard/uploader/exporter.py @@ -47,6 +47,7 @@ # Output filename for scalar data within an experiment directory. _FILENAME_SCALARS = "scalars.json" + class TensorBoardExporter(object): """Exports all of the user's experiment data from TensorBoard.dev.