From 27409c514f255eec266b631cbe95c3c745d336db Mon Sep 17 00:00:00 2001 From: James Date: Mon, 23 Oct 2023 11:22:38 -0700 Subject: [PATCH 1/8] HParams: Load spinner in data table and do not reload table (#6658) ## Motivation for features / changes Issue number 5 in #6651. It is also just a better experience to keep previously loaded data intact while fetching new data. ## Technical description of changes In order to put the spinner below the headers I added the spinner to the data-table. It seems like a general feature that should be in that widget anyway. ## Screenshots of UI changes (or N/A) ![2023-10-20_15-13-34 (1)](https://github.com/tensorflow/tensorboard/assets/8672809/23bfa0ed-077c-45fb-9df5-a0c3db1db67d) --- .../views/runs_table/runs_data_table.ng.html | 5 +---- .../views/runs_table/runs_data_table.scss | 10 --------- .../data_table/data_table_component.ng.html | 3 +++ .../data_table/data_table_component.scss | 10 +++++++++ .../data_table/data_table_component.ts | 1 + .../widgets/data_table/data_table_module.ts | 2 ++ .../widgets/data_table/data_table_test.ts | 21 +++++++++++++++++++ 7 files changed, 38 insertions(+), 14 deletions(-) diff --git a/tensorboard/webapp/runs/views/runs_table/runs_data_table.ng.html b/tensorboard/webapp/runs/views/runs_table/runs_data_table.ng.html index 0097393986..4848283c26 100644 --- a/tensorboard/webapp/runs/views/runs_table/runs_data_table.ng.html +++ b/tensorboard/webapp/runs/views/runs_table/runs_data_table.ng.html @@ -23,17 +23,14 @@ placeholder="Filter runs (regex)" > -
- -
+
+ +
diff --git a/tensorboard/webapp/widgets/data_table/data_table_component.scss b/tensorboard/webapp/widgets/data_table/data_table_component.scss index 02a53f75fa..125e8781a4 100644 --- a/tensorboard/webapp/widgets/data_table/data_table_component.scss +++ b/tensorboard/webapp/widgets/data_table/data_table_component.scss @@ -45,6 +45,16 @@ $_accent: map-get(mat.get-color-config($tb-theme), accent); } } +.loading { + align-items: center; + border: 0; + @include tb-theme-foreground-prop(border-bottom, border, 1px solid); + display: flex; + height: 48px; + padding: 0 24px; + justify-content: center; +} + .add-button-cell { display: table-cell; width: 40px; diff --git a/tensorboard/webapp/widgets/data_table/data_table_component.ts b/tensorboard/webapp/widgets/data_table/data_table_component.ts index 336d1e676d..95993ae8cd 100644 --- a/tensorboard/webapp/widgets/data_table/data_table_component.ts +++ b/tensorboard/webapp/widgets/data_table/data_table_component.ts @@ -65,6 +65,7 @@ export class DataTableComponent implements OnDestroy, AfterContentInit { @Input() columnCustomizationEnabled!: boolean; @Input() selectableColumns?: ColumnHeader[]; @Input() columnFilters!: Map; + @Input() loading: boolean = false; @ContentChildren(HeaderCellComponent) headerCells!: QueryList; diff --git a/tensorboard/webapp/widgets/data_table/data_table_module.ts b/tensorboard/webapp/widgets/data_table/data_table_module.ts index 62ced1d9f5..2220aef24d 100644 --- a/tensorboard/webapp/widgets/data_table/data_table_module.ts +++ b/tensorboard/webapp/widgets/data_table/data_table_module.ts @@ -17,6 +17,7 @@ import {CommonModule} from '@angular/common'; import {NgModule} from '@angular/core'; import {MatIconModule} from '@angular/material/icon'; import {MatButtonModule} from '@angular/material/button'; +import {MatProgressSpinnerModule} from '@angular/material/progress-spinner'; import {DataTableComponent} from './data_table_component'; import {HeaderCellComponent} from './header_cell_component'; import {DataTableHeaderModule} from './data_table_header_module'; @@ -43,6 +44,7 @@ import {FilterDialogModule} from './filter_dialog_module'; CommonModule, MatIconModule, MatButtonModule, + MatProgressSpinnerModule, DataTableHeaderModule, CustomModalModule, ColumnSelectorModule, diff --git a/tensorboard/webapp/widgets/data_table/data_table_test.ts b/tensorboard/webapp/widgets/data_table/data_table_test.ts index ec9a2ab96c..3a575be9c1 100644 --- a/tensorboard/webapp/widgets/data_table/data_table_test.ts +++ b/tensorboard/webapp/widgets/data_table/data_table_test.ts @@ -46,6 +46,7 @@ import {FilterDialog} from './filter_dialog_component'; [sortingInfo]="sortingInfo" [selectableColumns]="selectableColumns" [columnFilters]="columnFilters" + [loading]="loading" (sortDataBy)="sortDataBy($event)" (orderColumns)="orderColumns($event)" (addColumn)="addColumn.emit($event)" @@ -88,6 +89,7 @@ class TestableComponent { @Input() orderColumns!: (newOrder: ColumnHeaderType[]) => void; @Input() selectableColumns!: ColumnHeader[]; @Input() columnFilters!: Map; + @Input() loading!: boolean; @Output() addColumn = new EventEmitter<{ header: ColumnHeader; @@ -123,6 +125,7 @@ describe('data table', () => { data?: TableData[]; potentialColumns?: ColumnHeader[]; columnFilters?: Map; + loading?: boolean; }): ComponentFixture { const fixture = TestBed.createComponent(TestableComponent); @@ -140,6 +143,10 @@ describe('data table', () => { fixture.componentInstance.selectableColumns = input.potentialColumns; } + if (input.loading !== undefined) { + fixture.componentInstance.loading = input.loading; + } + fixture.componentInstance.columnFilters = input.columnFilters || new Map(); sortDataBySpy = jasmine.createSpy(); @@ -159,6 +166,20 @@ describe('data table', () => { expect(dataTable).toBeTruthy(); }); + it('renders spinner when loading', () => { + const fixture = createComponent({loading: true}); + fixture.detectChanges(); + const spinner = fixture.debugElement.query(By.css('.loading')); + expect(spinner).toBeTruthy(); + }); + + it('does not renders spinner when not loading', () => { + const fixture = createComponent({loading: false}); + fixture.detectChanges(); + const spinner = fixture.debugElement.query(By.css('.loading')); + expect(spinner).toBeFalsy(); + }); + it('emits sortDataBy event when header emits headerClicked event', () => { const fixture = createComponent({ headers: [ From e6355e87b5296796401f4f33b57ca6d63241e8f1 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 23 Oct 2023 16:33:12 -0700 Subject: [PATCH 2/8] MDCMigration: Fix prev and next buttons for dark mode (#6663) ## Motivation for features / changes The new mat-button uses different colors in it's styling. This caused a readability problem with our Next and Prev buttons when enabled. This fixes the buttons to be readable and look nice. We decided the transparent background looked nice in dark mode but not in light mode. This was brought to our attention in #6651. ## Screenshots of UI changes (or N/A) Before: Screenshot 2023-10-20 at 11 22 09 AM Screenshot 2023-10-20 at 11 22 17 AM After: Screenshot 2023-10-20 at 11 00 38 AM Screenshot 2023-10-20 at 11 13 09 AM ## Alternate designs / implementations considered (or N/A) Also considered having a transparent background in light mode. --- .../webapp/metrics/views/main_view/card_grid_component.scss | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tensorboard/webapp/metrics/views/main_view/card_grid_component.scss b/tensorboard/webapp/metrics/views/main_view/card_grid_component.scss index c78c16ba04..c12d11cf3b 100644 --- a/tensorboard/webapp/metrics/views/main_view/card_grid_component.scss +++ b/tensorboard/webapp/metrics/views/main_view/card_grid_component.scss @@ -92,10 +92,8 @@ card-view { } .pagination-button { - @include tb-theme-foreground-prop(color, secondary-text); background-color: $metrics-button-background-color-on-gray; - - &:disabled { - @include tb-theme-foreground-prop(color, disabled-text); + @include tb-dark-theme { + background-color: transparent; } } From e16abe5828cd7c54d65f45304d8822b865b58a95 Mon Sep 17 00:00:00 2001 From: Brian Dubois Date: Tue, 24 Oct 2023 07:20:08 -0400 Subject: [PATCH 3/8] tbdev turndown: Remove dataframe api. (#6644) Remove the experimental API that generates pandas data frames from TensorBoard.dev data. We are turning down TensorBoard.dev. --- tensorboard/BUILD | 1 - tensorboard/__init__.py | 7 - tensorboard/data/BUILD | 10 - tensorboard/data/__init__.py | 3 - tensorboard/data/experimental/BUILD | 75 +----- tensorboard/data/experimental/__init__.py | 19 -- .../data/experimental/base_experiment.py | 76 ------ .../data/experimental/experiment_from_dev.py | 159 ----------- .../experimental/experiment_from_dev_test.py | 250 ------------------ tensorboard/data/experimental/test_binary.py | 62 ----- tensorboard/data/experimental/utils.py | 64 ----- tensorboard/pip_package/BUILD | 1 - 12 files changed, 2 insertions(+), 725 deletions(-) delete mode 100644 tensorboard/data/experimental/__init__.py delete mode 100644 tensorboard/data/experimental/base_experiment.py delete mode 100644 tensorboard/data/experimental/experiment_from_dev.py delete mode 100644 tensorboard/data/experimental/experiment_from_dev_test.py delete mode 100644 tensorboard/data/experimental/test_binary.py delete mode 100644 tensorboard/data/experimental/utils.py diff --git a/tensorboard/BUILD b/tensorboard/BUILD index d03a9e1452..7c8c1252df 100644 --- a/tensorboard/BUILD +++ b/tensorboard/BUILD @@ -89,7 +89,6 @@ py_library( ":lib_init_only", ":notebook", ":program", - "//tensorboard/data:lib_init_only", "//tensorboard/summary", "//tensorboard/summary:summary_v1", "//tensorboard/summary:summary_v2", diff --git a/tensorboard/__init__.py b/tensorboard/__init__.py index 0275d36555..95bc174e64 100644 --- a/tensorboard/__init__.py +++ b/tensorboard/__init__.py @@ -71,13 +71,6 @@ # additional discussion. -@_lazy.lazy_load("tensorboard.data") -def data(): - import importlib - - return importlib.import_module("tensorboard.data") - - @_lazy.lazy_load("tensorboard.errors") def errors(): import importlib diff --git a/tensorboard/data/BUILD b/tensorboard/data/BUILD index 22154fd938..0a3337af1a 100644 --- a/tensorboard/data/BUILD +++ b/tensorboard/data/BUILD @@ -8,16 +8,6 @@ package(default_visibility = ["//tensorboard:internal"]) licenses(["notice"]) -py_library( - name = "lib_init_only", - srcs = ["__init__.py"], - srcs_version = "PY3", - visibility = ["//tensorboard:internal"], - deps = [ - "//tensorboard/data/experimental:lib_init_only", - ], -) - py_library( name = "provider", srcs = ["provider.py"], diff --git a/tensorboard/data/__init__.py b/tensorboard/data/__init__.py index dfcbc38f92..931c2ef11d 100644 --- a/tensorboard/data/__init__.py +++ b/tensorboard/data/__init__.py @@ -12,6 +12,3 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - - -from tensorboard.data import experimental # noqa: F401 diff --git a/tensorboard/data/experimental/BUILD b/tensorboard/data/experimental/BUILD index 8d49db14e5..da1d82e6a2 100644 --- a/tensorboard/data/experimental/BUILD +++ b/tensorboard/data/experimental/BUILD @@ -1,78 +1,7 @@ -# Description: -# Experiment Data Access API. -load("@rules_python//python:py_binary.bzl", "py_binary") -load("@rules_python//python:py_library.bzl", "py_library") -load("@rules_python//python:py_test.bzl", "py_test") - +# This is a stub BUILD file that remains after the deletion of the experimental +# data frame API. We keep it (temporarily) to allow copybara imports to succeed. package(default_visibility = ["//tensorboard:internal"]) licenses(["notice"]) exports_files(["LICENSE"]) - -py_library( - name = "base_experiment", - srcs = ["base_experiment.py"], - srcs_version = "PY3", -) - -py_library( - name = "experiment_from_dev", - srcs = ["experiment_from_dev.py"], - srcs_version = "PY3", - deps = [ - ":base_experiment", - ":utils", - "//tensorboard:expect_grpc_installed", - "//tensorboard:expect_pandas_installed", - "//tensorboard/uploader:auth", - "//tensorboard/uploader:server_info", - "//tensorboard/uploader:util", - "//tensorboard/uploader/proto:protos_all_py_pb2", - "//tensorboard/uploader/proto:protos_all_py_pb2_grpc", - "//tensorboard/util:grpc_util", - ], -) - -py_test( - name = "experiment_from_dev_test", - srcs = ["experiment_from_dev_test.py"], - srcs_version = "PY3", - deps = [ - ":experiment_from_dev", - "//tensorboard:expect_numpy_installed", - "//tensorboard:expect_pandas_installed", - "//tensorboard:test", - "//tensorboard/compat/proto:protos_all_py_pb2", - "//tensorboard/uploader:test_util", - "//tensorboard/uploader/proto:protos_all_py_pb2", - "//tensorboard/util:grpc_util", - ], -) - -py_library( - name = "lib_init_only", - srcs = ["__init__.py"], - srcs_version = "PY3", - visibility = ["//tensorboard:internal"], - deps = [ - ":experiment_from_dev", - ], -) - -py_binary( - name = "test_binary", - srcs = ["test_binary.py"], - srcs_version = "PY3", - deps = ["//tensorboard/data/experimental:experiment_from_dev"], -) - -py_library( - name = "utils", - srcs = ["utils.py"], - srcs_version = "PY3", - visibility = ["//tensorboard:internal"], - deps = [ - "//tensorboard:expect_numpy_installed", - ], -) diff --git a/tensorboard/data/experimental/__init__.py b/tensorboard/data/experimental/__init__.py deleted file mode 100644 index 3257ac3e28..0000000000 --- a/tensorboard/data/experimental/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -from tensorboard.data.experimental.experiment_from_dev import ( # noqa: F401 - ExperimentFromDev, -) diff --git a/tensorboard/data/experimental/base_experiment.py b/tensorboard/data/experimental/base_experiment.py deleted file mode 100644 index eb6399673f..0000000000 --- a/tensorboard/data/experimental/base_experiment.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Base Class of Experiment Data Access API.""" - - -import abc - - -class BaseExperiment(metaclass=abc.ABCMeta): - """Base class for experiment data access.""" - - # TODO(cais): Add list_scalar_runs(). - # TODO(cais): Add list_scalar_tags(). - - @abc.abstractmethod - def get_scalars( - self, - runs_filter=None, - tags_filter=None, - pivot=False, - include_wall_time=False, - ): - """Export scalar data as a pandas.DataFrame. - - Args: - runs_filter: A regex filter for runs (e.g., r'run_[2-4]'). Operates in - logical AND relation with `tags_filter`. - tags_filter: A regex filter for tags (e.g., r'.*loss.*'). Operates in - logical AND related with `runs_filter`. - pivot: Whether to returned DataFrame will be pivoted (via pandas’ - `pivot_data()` method to a “wide” format wherein the tags of a - given run and a given step are all collected in a single row. - Setting `pivot` to `True` stipulates that the sets of step values - are identical among all tags in every run of the experiment (after - any run and tag filtering), so that the pivoting operation will not - introduce missing values in the resultant DataFrame. Failing to meet - this condition will cause `pivot=True` to raise a `ValueError`. - If not provided, defaults to `False`. - include_wall_time: Include wall_time (timestamps in nanoseconds since - the epoch in float64) as a column in the returned DataFrame. - If not provided, defaults to `False`. - - Returns: - If `pivot` (default): - A pivoted DataFrame with the indexing columns of - - run - - step - And value columns that correspond to the tags. - Duplicate entries for each run-step combination will be aggregated - with `numpy.stack`. This format is more friendly to manipulation and - plotting and hence io chosen as the default. When certain rows have - missing values, a warning message will be displayed and advise the - user to use the `pivot=False` if steps have different meanings in - the experiment. - If `not pivot`: - A DataFrame with the following columns. - - run: (non-null object) - - tag: (non-null object) - - steps: (non-null int64) - - wall_time: (non-null object) - - value: (non-null float32) - """ - # TODO(cais): Add description about sorting order. - pass diff --git a/tensorboard/data/experimental/experiment_from_dev.py b/tensorboard/data/experimental/experiment_from_dev.py deleted file mode 100644 index 70bd98efdf..0000000000 --- a/tensorboard/data/experimental/experiment_from_dev.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Experiment Data Access API for tensorboard.dev.""" - - -import sys -import time - -import grpc - -from tensorboard.data.experimental import base_experiment -from tensorboard.data.experimental import utils as experimental_utils -from tensorboard.uploader import auth -from tensorboard.uploader import util -from tensorboard.uploader import server_info as server_info_lib -from tensorboard.uploader.proto import export_service_pb2 -from tensorboard.uploader.proto import export_service_pb2_grpc -from tensorboard.uploader.proto import server_info_pb2 -from tensorboard.util import grpc_util - - -DEFAULT_ORIGIN = "https://tensorboard.dev" - - -def import_pandas(): - """Import pandas, guarded by a user-friendly error message on failure.""" - try: - import pandas - except ImportError: - raise ImportError( - "The get_scalars() feature requires the pandas package, " - "which does not seem to be available in your Python " - "environment. You can install it with command:\n\n" - " pip install pandas\n" - ) - return pandas - - -class ExperimentFromDev(base_experiment.BaseExperiment): - """Implementation of BaseExperiment, specialized for tensorboard.dev.""" - - def __init__(self, experiment_id, api_endpoint=None): - """Constructor of ExperimentFromDev. - - Args: - experiment_id: String ID of the experiment on tensorboard.dev (e.g., - "AdYd1TgeTlaLWXx6I8JUbA"). - api_endpoint: Optional override value for API endpoint. Used for - development only. - """ - super().__init__() - self._experiment_id = experiment_id - self._api_client = get_api_client(api_endpoint=api_endpoint) - - def get_scalars( - self, - runs_filter=None, - tags_filter=None, - pivot=False, - include_wall_time=False, - ): - # NOTE(#3650): Import pandas early in this method, so if the - # Python environment does not have pandas installed, an error can be - # raised early, before any rpc call is made. - pandas = import_pandas() - if runs_filter is not None: - raise NotImplementedError( - "runs_filter support for get_scalars() is not implemented yet." - ) - if tags_filter is not None: - raise NotImplementedError( - "tags_filter support for get_scalars() is not implemented yet." - ) - - request = export_service_pb2.StreamExperimentDataRequest() - request.experiment_id = self._experiment_id - read_time = time.time() - util.set_timestamp(request.read_timestamp, read_time) - # TODO(cais, wchargin): Use another rpc to check for staleness and avoid - # a new StreamExperimentData rpc request if data is not stale. - stream = self._api_client.StreamExperimentData( - request, metadata=grpc_util.version_metadata() - ) - - runs = [] - tags = [] - steps = [] - wall_times = [] - values = [] - for response in stream: - # TODO(cais, wchargin): Display progress bar during data loading. - num_values = len(response.points.values) - runs.extend([response.run_name] * num_values) - tags.extend([response.tag_name] * num_values) - steps.extend(list(response.points.steps)) - wall_times.extend( - [t.ToNanoseconds() / 1e9 for t in response.points.wall_times] - ) - values.extend(list(response.points.values)) - - data = { - "run": runs, - "tag": tags, - "step": steps, - "value": values, - } - if include_wall_time: - data["wall_time"] = wall_times - dataframe = pandas.DataFrame(data) - if pivot: - dataframe = experimental_utils.pivot_dataframe(dataframe) - return dataframe - - -def get_api_client(api_endpoint=None): - server_info = _get_server_info(api_endpoint=api_endpoint) - _handle_server_info(server_info) - channel_creds = grpc.ssl_channel_credentials() - credentials = auth.CredentialsStore().read_credentials() - if credentials: - channel_creds = grpc.composite_channel_credentials( - channel_creds, auth.id_token_call_credentials(credentials) - ) - channel = grpc.secure_channel( - server_info.api_server.endpoint, channel_creds - ) - return export_service_pb2_grpc.TensorBoardExporterServiceStub(channel) - - -def _get_server_info(api_endpoint=None): - # TODO(cais): Add more plugins to the list when more plugin/data types - # are supported - plugins = ["scalars"] - if api_endpoint: - return server_info_lib.create_server_info( - DEFAULT_ORIGIN, api_endpoint, plugins - ) - return server_info_lib.fetch_server_info(DEFAULT_ORIGIN, plugins) - - -def _handle_server_info(info): - compat = info.compatibility - if compat.verdict == server_info_pb2.VERDICT_WARN: - sys.stderr.write("Warning [from server]: %s\n" % compat.details) - sys.stderr.flush() - elif compat.verdict == server_info_pb2.VERDICT_ERROR: - raise ValueError("Error [from server]: %s" % compat.details) diff --git a/tensorboard/data/experimental/experiment_from_dev_test.py b/tensorboard/data/experimental/experiment_from_dev_test.py deleted file mode 100644 index 13f5ce2de9..0000000000 --- a/tensorboard/data/experimental/experiment_from_dev_test.py +++ /dev/null @@ -1,250 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tensorboard.uploader.exporter.""" - - -from unittest import mock - -import numpy as np -import pandas - -from tensorboard import test as tb_test -from tensorboard.data.experimental import experiment_from_dev -from tensorboard.uploader import test_util -from tensorboard.uploader.proto import export_service_pb2 -from tensorboard.util import grpc_util - - -class ExperimentFromDevTest(tb_test.TestCase): - def test_get_scalars_works(self): - mock_api_client = mock.Mock() - - def stream_experiment_data(request, **kwargs): - self.assertEqual(request.experiment_id, "789") - self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) - for run in ("train", "test"): - for tag in ("accuracy", "loss"): - response = export_service_pb2.StreamExperimentDataResponse() - response.run_name = run - response.tag_name = tag - display_name = "%s:%s" % (request.experiment_id, tag) - response.tag_metadata.CopyFrom( - test_util.scalar_metadata(display_name) - ) - for step in range(10): - response.points.steps.append(step) - if tag == "loss": - if run == "train": - value = 1.0 / (step + 1) - seconds = step - else: - value = -1.0 / (step + 1) - seconds = 600 + step - else: # "accuracy" - if run == "train": - value = 1.0 / (10 - step) - seconds = step * 2 - else: - value = -1.0 / (10 - step) - seconds = 600 + step * 2 - response.points.values.append(value) - response.points.wall_times.add(seconds=seconds, nanos=0) - yield response - - mock_api_client.StreamExperimentData = mock.Mock( - wraps=stream_experiment_data - ) - - with mock.patch.object( - experiment_from_dev, - "get_api_client", - lambda api_endpoint: mock_api_client, - ): - experiment = experiment_from_dev.ExperimentFromDev("789") - for pivot in (False, True): - for include_wall_time in (False, True): - with self.subTest( - "pivot=%s; include_wall_time=%s" - % (pivot, include_wall_time) - ): - dataframe = experiment.get_scalars( - pivot=pivot, include_wall_time=include_wall_time - ) - - if pivot: - run_key = ( - ("run", "") if include_wall_time else "run" - ) - step_key = ( - ("step", "") if include_wall_time else "step" - ) - accuracy_value_key = ( - ("value", "accuracy") - if include_wall_time - else "accuracy" - ) - loss_value_key = ( - ("value", "loss") - if include_wall_time - else "loss" - ) - data = { - run_key: ["test"] * 10 + ["train"] * 10, - step_key: np.concatenate( - [np.arange(0, 10), np.arange(0, 10)] - ), - accuracy_value_key: np.concatenate( - [ - -1.0 / (10.0 - np.arange(0, 10)), - 1.0 / (10.0 - np.arange(0, 10)), - ], - ), - loss_value_key: np.concatenate( - [ - -1.0 / (1.0 + np.arange(0, 10)), - 1.0 / (1.0 + np.arange(0, 10)), - ], - ), - } - if include_wall_time: - data[ - ("wall_time", "accuracy") - ] = np.concatenate( - [ - 600.0 + 2.0 * np.arange(0, 10), - 2.0 * np.arange(0, 10), - ] - ) - data[("wall_time", "loss")] = np.concatenate( - [ - 600.0 + np.arange(0, 10), - 1.0 * np.arange(0, 10), - ] - ) - expected = pandas.DataFrame(data) - else: # No pivot_table. - data = { - "run": ["train"] * 20 + ["test"] * 20, - "tag": (["accuracy"] * 10 + ["loss"] * 10) * 2, - "step": list(np.arange(0, 10)) * 4, - "value": np.concatenate( - [ - 1.0 / (10.0 - np.arange(0, 10)), - 1.0 / (1.0 + np.arange(0, 10)), - -1.0 / (10.0 - np.arange(0, 10)), - -1.0 / (1.0 + np.arange(0, 10)), - ] - ), - } - if include_wall_time: - data["wall_time"] = np.concatenate( - [ - 2.0 * np.arange(0, 10), - 1.0 * np.arange(0, 10), - 600.0 + 2.0 * np.arange(0, 10), - 600.0 + np.arange(0, 10), - ] - ) - expected = pandas.DataFrame(data) - - pandas.testing.assert_frame_equal( - dataframe, - expected, - check_names=True, - ) - - def test_get_scalars_with_pivot_table_with_missing_value(self): - mock_api_client = mock.Mock() - - def stream_experiment_data(request, **kwargs): - self.assertEqual(request.experiment_id, "789") - self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) - response = export_service_pb2.StreamExperimentDataResponse() - response.run_name = "train" - response.tag_name = "batch_loss" - response.points.steps.append(0) - response.points.values.append(0.5) - response.points.wall_times.add(seconds=0, nanos=0) - response.points.steps.append(1) - response.points.values.append(0.25) - response.points.wall_times.add(seconds=1, nanos=0) - yield response - response = export_service_pb2.StreamExperimentDataResponse() - response.run_name = "train" - response.tag_name = "epoch_loss" - response.points.steps.append(0) - response.points.values.append(0.375) - response.points.wall_times.add(seconds=2, nanos=0) - yield response - - mock_api_client.StreamExperimentData = mock.Mock( - wraps=stream_experiment_data - ) - - with mock.patch.object( - experiment_from_dev, - "get_api_client", - lambda api_endpoint: mock_api_client, - ): - experiment = experiment_from_dev.ExperimentFromDev("789") - with self.assertRaisesRegex( - ValueError, - r"contains missing value\(s\).*different sets of " - r"steps.*pivot=False", - ): - experiment.get_scalars(pivot=True) - - def test_get_scalars_with_actual_inf_and_nan(self): - """Test for get_scalars() call that involve inf and nan in user data.""" - mock_api_client = mock.Mock() - - def stream_experiment_data(request, **kwargs): - self.assertEqual(request.experiment_id, "789") - self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) - response = export_service_pb2.StreamExperimentDataResponse() - response.run_name = "train" - response.tag_name = "batch_loss" - response.points.steps.append(0) - response.points.values.append(np.nan) - response.points.wall_times.add(seconds=0, nanos=0) - response.points.steps.append(1) - response.points.values.append(np.inf) - response.points.wall_times.add(seconds=10, nanos=0) - yield response - - mock_api_client.StreamExperimentData = mock.Mock( - wraps=stream_experiment_data - ) - - with mock.patch.object( - experiment_from_dev, - "get_api_client", - lambda api_endpoint: mock_api_client, - ): - experiment = experiment_from_dev.ExperimentFromDev("789") - dataframe = experiment.get_scalars(pivot=True) - - expected = pandas.DataFrame( - { - "run": ["train"] * 2, - "step": [0, 1], - "batch_loss": [np.nan, np.inf], - } - ) - pandas.testing.assert_frame_equal(dataframe, expected, check_names=True) - - -if __name__ == "__main__": - tb_test.main() diff --git a/tensorboard/data/experimental/test_binary.py b/tensorboard/data/experimental/test_binary.py deleted file mode 100644 index 2ec9e57cc2..0000000000 --- a/tensorboard/data/experimental/test_binary.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A test binary that can be used to test ExperimentFromDev features.""" - - -import argparse - -from tensorboard.data.experimental import experiment_from_dev - - -def parse_args(): - parser = argparse.ArgumentParser("Test run of ExperimentFromDev") - parser.add_argument( - "--experiment_id", - type=str, - default="AdYd1TgeTlaLWXx6I8JUbA", - help="Experiment ID", - ) - parser.add_argument( - "--api_endpoint", - type=str, - default=None, - help="Optional API endpoint used to override the default", - ) - parser.add_argument( - "--pivot", - action="store_true", - help="Pivot the DataFrame, so that the tags become columns " - "of the DataFrame.", - ) - parser.add_argument( - "--include_wall_time", - action="store_true", - help="Include wall_time column(s) in the DataFrame", - ) - return parser.parse_args() - - -def main(args): - experiment = experiment_from_dev.ExperimentFromDev( - args.experiment_id, api_endpoint=args.api_endpoint - ) - dataframe = experiment.get_scalars( - pivot=args.pivot, include_wall_time=args.include_wall_time - ) - print(dataframe) - - -if __name__ == "__main__": - main(parse_args()) diff --git a/tensorboard/data/experimental/utils.py b/tensorboard/data/experimental/utils.py deleted file mode 100644 index a093e17ad6..0000000000 --- a/tensorboard/data/experimental/utils.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utility methods for working with the Experiment Data Access API.""" - -import numpy as np - - -def pivot_dataframe(dataframe): - """Gets a pivoted wide-form pandas dataframe. - - The wide-form DataFrame has all its tags included as columns of the - DataFrame, which is more convenient to work. If the condition of having - uniform sets of step values across all tags in all runs is not met, - this will error. - - Args: - dataframe: pandas dataframe to pivot. - - Returns: - Pivoted wide-form pandas dataframe. - Raises: - ValueError if step values across all tags are not uniform. - """ - num_missing_0 = np.count_nonzero(dataframe.isnull().values) - dataframe = dataframe.pivot_table( - values=( - ["value", "wall_time"] - if "wall_time" in dataframe.columns - else "value" - ), - index=["run", "step"], - columns="tag", - dropna=False, - ) - num_missing_1 = np.count_nonzero(dataframe.isnull().values) - if num_missing_1 > num_missing_0: - raise ValueError( - "pivoted DataFrame contains missing value(s). " - "This is likely due to two timeseries having different " - "sets of steps in your experiment. " - "You can avoid this error by calling `get_scalars()` with " - "`pivot=False` to disable the DataFrame pivoting." - ) - # `reset_index()` removes the MultiIndex structure of the pivoted - # DataFrame. Before the call, the DataFrame consits of two levels - # of index: "run" and "step". After the call, the index become a - # single range index (e.g,. `dataframe[:2]` works). - dataframe = dataframe.reset_index() - # Remove the columns name "tag". - dataframe.columns.name = None - dataframe.columns.names = [None for name in dataframe.columns.names] - return dataframe diff --git a/tensorboard/pip_package/BUILD b/tensorboard/pip_package/BUILD index 8baa65b0fe..9e71cad7ef 100644 --- a/tensorboard/pip_package/BUILD +++ b/tensorboard/pip_package/BUILD @@ -42,7 +42,6 @@ sh_binary( "//tensorboard", # Main tensorboard binary and everything it uses "//tensorboard:lib", # User-facing overall TensorBoard API "//tensorboard:version", # Version module (read by setup.py) - "//tensorboard/data/experimental:experiment_from_dev", "//tensorboard/plugins/hparams", # User-facing hparams API "//tensorboard/plugins/mesh", # User-facing mesh API "//tensorboard/plugins/projector", # User-facing projector API From b63c5b0605a34de592b6a3cfbc9b4604ef3da4f1 Mon Sep 17 00:00:00 2001 From: Brian Dubois Date: Tue, 24 Oct 2023 07:50:50 -0400 Subject: [PATCH 4/8] tbdev turndown: No longer allow usage of `tensorboard dev upload`. (#6638) In this step of the TensorBoard.dev turndown, we remove the ability to upload via the `tensorboard dev upload` command. Instead, when a user invokes `tensorboard dev upload`, we print the following message, which is similar to the message we currently return in the server info from TensorBoard.dev backends: ``` **************************************************************** **************************************************************** **************************************************************** Uploading TensorBoard logs to https://tensorboard.dev/ is no longer supported. TensorBoard.dev is shutting down. Please export your experiments by Dec 31, 2023. See the FAQ at https://tensorboard.dev. **************************************************************** **************************************************************** **************************************************************** ``` We also remove a bunch of upload-specific code. Testing: I tested the changes by trying some version of the following commands: `bazel run tensorboard -- dev auth revoke` `bazel run tensorboard -- dev upload` `bazel run tensorboard -- dev list` `bazel run tensorboard -- dev update-metadata` `bazel run tensorboard -- dev delete` `bazel run tensorboard -- dev export` --- tensorboard/uploader/BUILD | 48 +- tensorboard/uploader/dry_run_stubs.py | 53 - tensorboard/uploader/dry_run_stubs_test.py | 52 - tensorboard/uploader/uploader.py | 1110 ---------- tensorboard/uploader/uploader_subcommand.py | 162 +- .../uploader/uploader_subcommand_test.py | 177 +- tensorboard/uploader/uploader_test.py | 1851 ----------------- tensorboard/uploader/util.py | 22 - tensorboard/uploader/util_test.py | 25 - 9 files changed, 43 insertions(+), 3457 deletions(-) delete mode 100644 tensorboard/uploader/dry_run_stubs.py delete mode 100644 tensorboard/uploader/dry_run_stubs_test.py diff --git a/tensorboard/uploader/BUILD b/tensorboard/uploader/BUILD index 12a365df15..a038c6a814 100644 --- a/tensorboard/uploader/BUILD +++ b/tensorboard/uploader/BUILD @@ -68,7 +68,6 @@ py_library( visibility = ["//tensorboard:internal"], deps = [ ":auth", - ":dry_run_stubs", ":exporter", ":flags_parser", ":formatters", @@ -79,7 +78,6 @@ py_library( "//tensorboard:expect_absl_logging_installed", "//tensorboard:expect_grpc_installed", "//tensorboard:program", - "//tensorboard/compat:tensorflow", "//tensorboard/plugins:base_plugin", "//tensorboard/uploader/proto:protos_all_py_pb2_grpc", ], @@ -90,7 +88,6 @@ py_test( srcs = ["uploader_subcommand_test.py"], srcs_version = "PY3", deps = [ - ":dry_run_stubs", ":server_info", ":uploader", ":uploader_subcommand", @@ -104,21 +101,11 @@ py_library( srcs = ["uploader.py"], srcs_version = "PY3", deps = [ - ":logdir_loader", - ":upload_tracker", ":util", "//tensorboard:expect_grpc_installed", - "//tensorboard:expect_protobuf_installed", - "//tensorboard/backend:process_graph", - "//tensorboard/backend/event_processing:directory_loader", - "//tensorboard/backend/event_processing:event_file_loader", - "//tensorboard/backend/event_processing:io_wrapper", - "//tensorboard/compat/proto:protos_all_py_pb2", - "//tensorboard/plugins/graph:metadata", "//tensorboard/uploader/proto:protos_all_py_pb2", "//tensorboard/util:grpc_util", "//tensorboard/util:tb_logging", - "//tensorboard/util:tensor_util", ], ) @@ -133,28 +120,15 @@ py_test( srcs = ["uploader_test.py"], srcs_version = "PY3", deps = [ - ":dry_run_stubs", ":server_info", ":test_util", - ":upload_tracker", ":uploader", - ":util", - "//tensorboard:data_compat", - "//tensorboard:dataclass_compat", "//tensorboard:expect_grpc_installed", "//tensorboard:expect_grpc_testing_installed", - "//tensorboard:expect_protobuf_installed", "//tensorboard:expect_tensorflow_installed", "//tensorboard/compat:no_tensorflow", - "//tensorboard/compat/proto:protos_all_py_pb2", - "//tensorboard/plugins/graph:metadata", - "//tensorboard/plugins/histogram:summary_v2", - "//tensorboard/plugins/scalar:metadata", - "//tensorboard/plugins/scalar:summary_v2", - "//tensorboard/summary:summary_v1", "//tensorboard/uploader/proto:protos_all_py_pb2", "//tensorboard/uploader/proto:protos_all_py_pb2_grpc", - "//tensorboard/util:test_util", ], ) @@ -168,26 +142,6 @@ py_test( ], ) -py_library( - name = "dry_run_stubs", - srcs = ["dry_run_stubs.py"], - srcs_version = "PY3", - deps = [ - "//tensorboard/uploader/proto:protos_all_py_pb2", - ], -) - -py_test( - name = "dry_run_stubs_test", - srcs = ["dry_run_stubs_test.py"], - srcs_version = "PY3", - deps = [ - ":dry_run_stubs", - "//tensorboard:test", - "//tensorboard/uploader/proto:protos_all_py_pb2", - ], -) - py_library( name = "auth", srcs = ["auth.py"], @@ -257,8 +211,8 @@ py_test( name = "util_test", srcs = ["util_test.py"], deps = [ - ":test_util", ":util", + "//tensorboard:expect_grpc_installed", "//tensorboard:expect_protobuf_installed", "//tensorboard:test", ], diff --git a/tensorboard/uploader/dry_run_stubs.py b/tensorboard/uploader/dry_run_stubs.py deleted file mode 100644 index 7303cfc34d..0000000000 --- a/tensorboard/uploader/dry_run_stubs.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Dry-run stubs for various rpc services.""" - - -from tensorboard.uploader.proto import write_service_pb2 - - -class DryRunTensorBoardWriterStub: - """A dry-run TensorBoardWriter gRPC Server. - - Only the methods used by the `tensorboard dev upload` are - mocked out in this class. - - When additional methods start to be used by the command, - their mocks should be added to this class. - """ - - def CreateExperiment(self, request, **kwargs): - """Create a new experiment and remember it has been created.""" - del request, kwargs # Unused. - return write_service_pb2.CreateExperimentResponse() - - def WriteScalar(self, request, **kwargs): - del request, kwargs # Unused. - return write_service_pb2.WriteScalarResponse() - - def WriteTensor(self, request, **kwargs): - del request, kwargs # Unused. - return write_service_pb2.WriteTensorResponse() - - def GetOrCreateBlobSequence(self, request, **kwargs): - del request, kwargs # Unused. - return write_service_pb2.GetOrCreateBlobSequenceResponse( - blob_sequence_id="dummy_blob_sequence_id" - ) - - def WriteBlob(self, request, **kwargs): - del kwargs # Unused. - for item in request: - yield write_service_pb2.WriteBlobResponse() diff --git a/tensorboard/uploader/dry_run_stubs_test.py b/tensorboard/uploader/dry_run_stubs_test.py deleted file mode 100644 index 50e7f4d2c0..0000000000 --- a/tensorboard/uploader/dry_run_stubs_test.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for dry-run rpc servicers.""" - - -from tensorboard import test as tb_test -from tensorboard.uploader import dry_run_stubs -from tensorboard.uploader.proto import write_service_pb2 - - -class DryRunTensorBoardWriterServicerTest(tb_test.TestCase): - def setUp(self): - super().setUp() - self._stub = dry_run_stubs.DryRunTensorBoardWriterStub() - - def testCreateExperiment(self): - self._stub.CreateExperiment(write_service_pb2.CreateExperimentRequest()) - - def testWriteScalar(self): - self._stub.WriteScalar(write_service_pb2.WriteScalarRequest()) - - def testWriteTensor(self): - self._stub.WriteTensor(write_service_pb2.WriteTensorRequest()) - - def testGetOrCreateBlobSequence(self): - self._stub.GetOrCreateBlobSequence( - write_service_pb2.GetOrCreateBlobSequenceRequest() - ) - - def testWriteBlob(self): - def dummy_iterator(): - yield write_service_pb2.WriteBlobRequest() - yield write_service_pb2.WriteBlobRequest() - - for response in self._stub.WriteBlob(dummy_iterator()): - self.assertTrue(response) - - -if __name__ == "__main__": - tb_test.main() diff --git a/tensorboard/uploader/uploader.py b/tensorboard/uploader/uploader.py index 3654524a9e..79a689e7d1 100644 --- a/tensorboard/uploader/uploader.py +++ b/tensorboard/uploader/uploader.py @@ -15,28 +15,11 @@ """Uploads a TensorBoard logdir to TensorBoard.dev.""" -import contextlib -import functools -import time - import grpc -from google.protobuf import message -from tensorboard.compat.proto import graph_pb2 -from tensorboard.compat.proto import summary_pb2 -from tensorboard.compat.proto import types_pb2 from tensorboard.uploader.proto import write_service_pb2 -from tensorboard.uploader import logdir_loader -from tensorboard.uploader import upload_tracker -from tensorboard.uploader import util -from tensorboard.backend import process_graph -from tensorboard.backend.event_processing import directory_loader -from tensorboard.backend.event_processing import event_file_loader -from tensorboard.backend.event_processing import io_wrapper -from tensorboard.plugins.graph import metadata as graphs_metadata from tensorboard.util import grpc_util from tensorboard.util import tb_logging -from tensorboard.util import tensor_util # Minimum length of a logdir polling cycle in seconds. Shorter cycles will # sleep to avoid spinning over the logdir, which isn't great for disks and can @@ -55,179 +38,6 @@ logger = tb_logging.get_logger() -class TensorBoardUploader: - """Uploads a TensorBoard logdir to TensorBoard.dev.""" - - def __init__( - self, - writer_client, - logdir, - allowed_plugins, - upload_limits, - logdir_poll_rate_limiter=None, - rpc_rate_limiter=None, - tensor_rpc_rate_limiter=None, - blob_rpc_rate_limiter=None, - name=None, - description=None, - verbosity=None, - one_shot=None, - ): - """Constructs a TensorBoardUploader. - - Args: - writer_client: a TensorBoardWriterService stub instance - logdir: path of the log directory to upload - allowed_plugins: collection of string plugin names; events will only - be uploaded if their time series's metadata specifies one of these - plugin names - upload_limits: instance of tensorboard.service.UploadLimits proto. - logdir_poll_rate_limiter: a `RateLimiter` to use to limit logdir - polling frequency, to avoid thrashing disks, especially on networked - file systems - rpc_rate_limiter: a `RateLimiter` to use to limit write RPC frequency. - Note this limit applies at the level of single RPCs in the Scalar - and Tensor case, but at the level of an entire blob upload in the - Blob case-- which may require a few preparatory RPCs and a stream - of chunks. Note the chunk stream is internally rate-limited by - backpressure from the server, so it is not a concern that we do not - explicitly rate-limit within the stream here. - name: String name to assign to the experiment. - description: String description to assign to the experiment. - verbosity: Level of verbosity, an integer. Supported value: - 0 - No upload statistics is printed. - 1 - Print upload statistics while uploading data (default). - one_shot: Once uploading starts, upload only the existing data in - the logdir and then return immediately, instead of the default - behavior of continuing to listen for new data in the logdir and - upload them when it appears. - """ - self._api = writer_client - self._logdir = logdir - self._allowed_plugins = frozenset(allowed_plugins) - self._upload_limits = upload_limits - - self._name = name - self._description = description - self._verbosity = 1 if verbosity is None else verbosity - self._one_shot = False if one_shot is None else one_shot - self._request_sender = None - self._experiment_id = None - if logdir_poll_rate_limiter is None: - self._logdir_poll_rate_limiter = util.RateLimiter( - _MIN_LOGDIR_POLL_INTERVAL_SECS - ) - else: - self._logdir_poll_rate_limiter = logdir_poll_rate_limiter - - if rpc_rate_limiter is None: - self._rpc_rate_limiter = util.RateLimiter( - self._upload_limits.min_scalar_request_interval / 1000 - ) - else: - self._rpc_rate_limiter = rpc_rate_limiter - - if tensor_rpc_rate_limiter is None: - self._tensor_rpc_rate_limiter = util.RateLimiter( - self._upload_limits.min_tensor_request_interval / 1000 - ) - else: - self._tensor_rpc_rate_limiter = tensor_rpc_rate_limiter - - if blob_rpc_rate_limiter is None: - self._blob_rpc_rate_limiter = util.RateLimiter( - self._upload_limits.min_blob_request_interval / 1000 - ) - else: - self._blob_rpc_rate_limiter = blob_rpc_rate_limiter - - active_filter = ( - lambda secs: secs + _EVENT_FILE_INACTIVE_SECS >= time.time() - ) - directory_loader_factory = functools.partial( - directory_loader.DirectoryLoader, - loader_factory=event_file_loader.TimestampedEventFileLoader, - path_filter=io_wrapper.IsTensorFlowEventsFile, - active_filter=active_filter, - ) - self._logdir_loader = logdir_loader.LogdirLoader( - self._logdir, directory_loader_factory - ) - self._tracker = upload_tracker.UploadTracker( - verbosity=self._verbosity, one_shot=self._one_shot - ) - - def has_data(self) -> bool: - """Returns this object's upload tracker.""" - return self._tracker.has_data() - - @property - def experiment_id(self) -> str: - """Returns the experiment_id associated with this uploader. - - May be none if no experiment is set, for instance, if - `create_experiment` has not been called. - """ - return self._experiment_id - - def create_experiment(self): - """Creates an Experiment for this upload session and returns the ID.""" - logger.info("Creating experiment") - request = write_service_pb2.CreateExperimentRequest( - name=self._name, description=self._description - ) - response = grpc_util.call_with_retries( - self._api.CreateExperiment, request - ) - self._request_sender = _BatchedRequestSender( - response.experiment_id, - self._api, - allowed_plugins=self._allowed_plugins, - upload_limits=self._upload_limits, - rpc_rate_limiter=self._rpc_rate_limiter, - tensor_rpc_rate_limiter=self._tensor_rpc_rate_limiter, - blob_rpc_rate_limiter=self._blob_rpc_rate_limiter, - tracker=self._tracker, - ) - self._experiment_id = response.experiment_id - return response.experiment_id - - def start_uploading(self): - """Uploads data from the logdir. - - This will continuously scan the logdir, uploading as data is added - unless the uploader was built with the _one_shot option, in which - case it will terminate after the first scan. - - Raises: - RuntimeError: If `create_experiment` has not yet been called. - ExperimentNotFoundError: If the experiment is deleted during the - course of the upload. - """ - if self._request_sender is None: - raise RuntimeError( - "Must call create_experiment() before start_uploading()" - ) - while True: - self._logdir_poll_rate_limiter.tick() - self._upload_once() - if self._one_shot: - break - - def _upload_once(self): - """Runs one upload cycle, sending zero or more RPCs.""" - logger.info("Starting an upload cycle") - - sync_start_time = time.time() - self._logdir_loader.synchronize_runs() - sync_duration_secs = time.time() - sync_start_time - logger.info("Logdir sync took %.3f seconds", sync_duration_secs) - - run_to_events = self._logdir_loader.get_run_events() - with self._tracker.send_tracker(): - self._request_sender.send_requests(run_to_events) - - def update_experiment_metadata( writer_client, experiment_id, name=None, description=None ): @@ -308,923 +118,3 @@ class ExperimentNotFoundError(RuntimeError): class PermissionDeniedError(RuntimeError): pass - - -class _OutOfSpaceError(Exception): - """Action could not proceed without overflowing request budget. - - This is a signaling exception (like `StopIteration`) used internally - by `_*RequestSender`; it does not mean that anything has gone wrong. - """ - - pass - - -class _BatchedRequestSender: - """Helper class for building requests that fit under a size limit. - - This class maintains stateful request builders for each of the possible - request types (scalars, tensors, and blobs). These accumulate batches - independently, each maintaining its own byte budget and emitting a request - when the batch becomes full. As a consequence, events of different types - will likely be sent to the backend out of order. E.g., in the extreme case, - a single tensor-flavored request may be sent only when the event stream is - exhausted, even though many more recent scalar events were sent earlier. - - This class is not threadsafe. Use external synchronization if - calling its methods concurrently. - """ - - def __init__( - self, - experiment_id, - api, - allowed_plugins, - upload_limits, - rpc_rate_limiter, - tensor_rpc_rate_limiter, - blob_rpc_rate_limiter, - tracker, - ): - # Map from `(run_name, tag_name)` to `SummaryMetadata` if the time - # series is a scalar time series, else to `_NON_SCALAR_TIME_SERIES`. - self._tag_metadata = {} - self._allowed_plugins = frozenset(allowed_plugins) - self._tracker = tracker - self._scalar_request_sender = _ScalarBatchedRequestSender( - experiment_id, - api, - rpc_rate_limiter, - upload_limits.max_scalar_request_size, - tracker=self._tracker, - ) - self._tensor_request_sender = _TensorBatchedRequestSender( - experiment_id, - api, - tensor_rpc_rate_limiter, - upload_limits.max_tensor_request_size, - upload_limits.max_tensor_point_size, - tracker=self._tracker, - ) - self._blob_request_sender = _BlobRequestSender( - experiment_id, - api, - blob_rpc_rate_limiter, - upload_limits.max_blob_request_size, - upload_limits.max_blob_size, - tracker=self._tracker, - ) - self._tracker = tracker - - def send_requests(self, run_to_events): - """Accepts a stream of TF events and sends batched write RPCs. - - Each sent request will be batched, the size of each batch depending on - the type of data (Scalar vs Tensor vs Blob) being sent. - - Args: - run_to_events: Mapping from run name to generator of `tf.Event` - values, as returned by `LogdirLoader.get_run_events`. - - Raises: - RuntimeError: If no progress can be made because even a single - point is too large (say, due to a gigabyte-long tag name). - """ - - for (run_name, event, value) in self._run_values(run_to_events): - time_series_key = (run_name, value.tag) - - # The metadata for a time series is memorized on the first event. - # If later events arrive with a mismatching plugin_name, they are - # ignored with a warning. - metadata = self._tag_metadata.get(time_series_key) - first_in_time_series = False - if metadata is None: - first_in_time_series = True - metadata = value.metadata - self._tag_metadata[time_series_key] = metadata - - plugin_name = metadata.plugin_data.plugin_name - # TODO(cais): Call self._tracker.add_plugin_name() to track the - # data for what plugins have been uploaded. - if value.HasField("metadata") and ( - plugin_name != value.metadata.plugin_data.plugin_name - ): - logger.warning( - "Mismatching plugin names for %s. Expected %s, found %s.", - time_series_key, - metadata.plugin_data.plugin_name, - value.metadata.plugin_data.plugin_name, - ) - continue - if plugin_name not in self._allowed_plugins: - if first_in_time_series: - logger.info( - "Skipping time series %r with unsupported plugin name %r", - time_series_key, - plugin_name, - ) - continue - - if metadata.data_class == summary_pb2.DATA_CLASS_SCALAR: - self._scalar_request_sender.add_event( - run_name, event, value, metadata - ) - elif metadata.data_class == summary_pb2.DATA_CLASS_TENSOR: - self._tensor_request_sender.add_event( - run_name, event, value, metadata - ) - elif metadata.data_class == summary_pb2.DATA_CLASS_BLOB_SEQUENCE: - self._blob_request_sender.add_event( - run_name, event, value, metadata - ) - - self._scalar_request_sender.flush() - self._tensor_request_sender.flush() - self._blob_request_sender.flush() - - def _run_values(self, run_to_events): - """Helper generator to create a single stream of work items. - - Note that `dataclass_compat` may emit multiple variants of - the same event, for backwards compatibility. Thus this stream should - be filtered to obtain the desired version of each event. Here, we - ignore any event that does not have a `summary` field. - - Furthermore, the events emitted here could contain values that do not - have `metadata.data_class` set; these too should be ignored. In - `_send_summary_value(...)` above, we switch on `metadata.data_class` - and drop any values with an unknown (i.e., absent or unrecognized) - `data_class`. - """ - # Note that this join in principle has deletion anomalies: if the input - # stream contains runs with no events, or events with no values, we'll - # lose that information. This is not a problem: we would need to prune - # such data from the request anyway. - for (run_name, events) in run_to_events.items(): - for event in events: - _filter_graph_defs(event) - for value in event.summary.value: - yield (run_name, event, value) - - -class _ScalarBatchedRequestSender: - """Helper class for building requests that fit under a size limit. - - This class accumulates a current request. `add_event(...)` may or may not - send the request (and start a new one). After all `add_event(...)` calls - are complete, a final call to `flush()` is needed to send the final request. - - This class is not threadsafe. Use external synchronization if calling its - methods concurrently. - """ - - def __init__( - self, - experiment_id, - api, - rpc_rate_limiter, - max_request_size, - tracker, - ): - if experiment_id is None: - raise ValueError("experiment_id cannot be None") - self._experiment_id = experiment_id - self._api = api - self._rpc_rate_limiter = rpc_rate_limiter - self._byte_budget_manager = _ByteBudgetManager(max_request_size) - self._tracker = tracker - - self._runs = {} # cache: map from run name to `Run` proto in request - self._tags = ( - {} - ) # cache: map from `(run, tag)` to `Tag` proto in run in request - self._new_request() - - def _new_request(self): - """Allocates a new request and refreshes the budget.""" - self._request = write_service_pb2.WriteScalarRequest() - self._runs.clear() - self._tags.clear() - self._num_values = 0 - self._request.experiment_id = self._experiment_id - self._byte_budget_manager.reset(self._request) - - def add_event(self, run_name, event, value, metadata): - """Attempts to add the given event to the current request. - - If the event cannot be added to the current request because the byte - budget is exhausted, the request is flushed, and the event is added - to the next request. - """ - try: - self._add_event_internal(run_name, event, value, metadata) - except _OutOfSpaceError: - self.flush() - # Try again. This attempt should never produce OutOfSpaceError - # because we just flushed. - try: - self._add_event_internal(run_name, event, value, metadata) - except _OutOfSpaceError: - raise RuntimeError("add_event failed despite flush") - - def _add_event_internal(self, run_name, event, value, metadata): - run_proto = self._runs.get(run_name) - if run_proto is None: - run_proto = self._create_run(run_name) - self._runs[run_name] = run_proto - tag_proto = self._tags.get((run_name, value.tag)) - if tag_proto is None: - tag_proto = self._create_tag(run_proto, value.tag, metadata) - self._tags[(run_name, value.tag)] = tag_proto - self._create_point(tag_proto, event, value) - self._num_values += 1 - - def flush(self): - """Sends the active request after removing empty runs and tags. - - Starts a new, empty active request. - """ - request = self._request - _prune_empty_tags_and_runs(request) - if not request.runs: - return - - self._rpc_rate_limiter.tick() - - with _request_logger( - request, request.runs - ), self._tracker.scalars_tracker(self._num_values): - try: - # TODO(@nfelt): execute this RPC asynchronously. - grpc_util.call_with_retries(self._api.WriteScalar, request) - except grpc.RpcError as e: - if e.code() == grpc.StatusCode.NOT_FOUND: - raise ExperimentNotFoundError() - logger.error("Upload call failed with error %s", e) - - self._new_request() - - def _create_run(self, run_name): - """Adds a run to the live request, if there's space. - - Args: - run_name: String name of the run to add. - - Returns: - The `WriteScalarRequest.Run` that was added to `request.runs`. - - Raises: - _OutOfSpaceError: If adding the run would exceed the remaining - request budget. - """ - run_proto = self._request.runs.add(name=run_name) - self._byte_budget_manager.add_run(run_proto) - return run_proto - - def _create_tag(self, run_proto, tag_name, metadata): - """Adds a tag for the given value, if there's space. - - Args: - run_proto: `WriteScalarRequest.Run` proto to which to add a tag. - tag_name: String name of the tag to add (as `value.tag`). - metadata: TensorBoard `SummaryMetadata` proto from the first - occurrence of this time series. - - Returns: - The `WriteScalarRequest.Tag` that was added to `run_proto.tags`. - - Raises: - _OutOfSpaceError: If adding the tag would exceed the remaining - request budget. - """ - tag_proto = run_proto.tags.add(name=tag_name) - tag_proto.metadata.CopyFrom(metadata) - self._byte_budget_manager.add_tag(tag_proto) - return tag_proto - - def _create_point(self, tag_proto, event, value): - """Adds a scalar point to the given tag, if there's space. - - Args: - tag_proto: `WriteScalarRequest.Tag` proto to which to add a point. - event: Enclosing `Event` proto with the step and wall time data. - value: Scalar `Summary.Value` proto with the actual scalar data. - - Raises: - _OutOfSpaceError: If adding the point would exceed the remaining - request budget. - """ - point = tag_proto.points.add() - point.step = event.step - # TODO(@nfelt): skip tensor roundtrip for Value with simple_value set - point.value = tensor_util.make_ndarray(value.tensor).item() - util.set_timestamp(point.wall_time, event.wall_time) - try: - self._byte_budget_manager.add_point(point) - except _OutOfSpaceError: - tag_proto.points.pop() - raise - - -class _TensorBatchedRequestSender: - """Helper class for building WriteTensor() requests that fit under a size limit. - - This class accumulates a current request. `add_event(...)` may or may not - send the request (and start a new one). After all `add_event(...)` calls - are complete, a final call to `flush()` is needed to send the final request. - - This class is not threadsafe. Use external synchronization if calling its - methods concurrently. - """ - - def __init__( - self, - experiment_id, - api, - rpc_rate_limiter, - max_request_size, - max_tensor_point_size, - tracker, - ): - if experiment_id is None: - raise ValueError("experiment_id cannot be None") - self._experiment_id = experiment_id - self._api = api - self._rpc_rate_limiter = rpc_rate_limiter - self._byte_budget_manager = _ByteBudgetManager(max_request_size) - self._max_tensor_point_size = max_tensor_point_size - self._tracker = tracker - - self._runs = {} # cache: map from run name to `Run` proto in request - self._tags = ( - {} - ) # cache: map from `(run, tag)` to `Tag` proto in run in request - self._new_request() - - def _new_request(self): - """Allocates a new request and refreshes the budget.""" - - self._request = write_service_pb2.WriteTensorRequest() - self._runs.clear() - self._tags.clear() - self._request.experiment_id = self._experiment_id - self._byte_budget_manager.reset(self._request) - self._num_values = 0 - self._num_values_skipped = 0 - self._tensor_bytes = 0 - self._tensor_bytes_skipped = 0 - - def add_event(self, run_name, event, value, metadata): - """Attempts to add the given event to the current request. - - If the event cannot be added to the current request because the byte - budget is exhausted, the request is flushed, and the event is added - to the next request. - """ - try: - self._add_event_internal(run_name, event, value, metadata) - except _OutOfSpaceError: - self.flush() - # Try again. This attempt should never produce OutOfSpaceError - # because we just flushed. - try: - self._add_event_internal(run_name, event, value, metadata) - except _OutOfSpaceError: - raise RuntimeError("add_event failed despite flush") - - def _add_event_internal(self, run_name, event, value, metadata): - run_proto = self._runs.get(run_name) - if run_proto is None: - run_proto = self._create_run(run_name) - self._runs[run_name] = run_proto - tag_proto = self._tags.get((run_name, value.tag)) - if tag_proto is None: - tag_proto = self._create_tag(run_proto, value.tag, metadata) - self._tags[(run_name, value.tag)] = tag_proto - self._create_point(tag_proto, event, value, run_name) - self._num_values += 1 - - def flush(self): - """Sends the active request after removing empty runs and tags. - - Starts a new, empty active request. - """ - request = self._request - _prune_empty_tags_and_runs(request) - if not request.runs: - return - - self._rpc_rate_limiter.tick() - - with _request_logger(request, request.runs): - with self._tracker.tensors_tracker( - self._num_values, - self._num_values_skipped, - self._tensor_bytes, - self._tensor_bytes_skipped, - ): - try: - grpc_util.call_with_retries(self._api.WriteTensor, request) - except grpc.RpcError as e: - if e.code() == grpc.StatusCode.NOT_FOUND: - raise ExperimentNotFoundError() - logger.error("Upload call failed with error %s", e) - - self._new_request() - - def _create_run(self, run_name): - """Adds a run to the live request, if there's space. - - Args: - run_name: String name of the run to add. - - Returns: - The `WriteTensorRequest.Run` that was added to `request.runs`. - - Raises: - _OutOfSpaceError: If adding the run would exceed the remaining - request budget. - """ - run_proto = self._request.runs.add(name=run_name) - self._byte_budget_manager.add_run(run_proto) - return run_proto - - def _create_tag(self, run_proto, tag_name, metadata): - """Adds a tag for the given value, if there's space. - - Args: - run_proto: `WriteTensorRequest.Run` proto to which to add a tag. - tag_name: String name of the tag to add (as `value.tag`). - metadata: TensorBoard `SummaryMetadata` proto from the first - occurrence of this time series. - - Returns: - The `WriteTensorRequest.Tag` that was added to `run_proto.tags`. - - Raises: - _OutOfSpaceError: If adding the tag would exceed the remaining - request budget. - """ - tag_proto = run_proto.tags.add(name=tag_name) - tag_proto.metadata.CopyFrom(metadata) - self._byte_budget_manager.add_tag(tag_proto) - return tag_proto - - def _create_point(self, tag_proto, event, value, run_name): - """Adds a tensor point to the given tag, if there's space. - - Args: - tag_proto: `WriteTensorRequest.Tag` proto to which to add a point. - event: Enclosing `Event` proto with the step and wall time data. - value: Tensor `Summary.Value` proto with the actual tensor data. - run_name: Name of the wrong, only used for error reporting. - - Raises: - _OutOfSpaceError: If adding the point would exceed the remaining - request budget. - """ - point = tag_proto.points.add() - point.step = event.step - point.value.CopyFrom(value.tensor) - util.set_timestamp(point.wall_time, event.wall_time) - - self._tensor_bytes += point.value.ByteSize() - if point.value.ByteSize() > self._max_tensor_point_size: - logger.warning( - "Tensor (run:%s, tag:%s, step: %d) too large; skipping. " - "Size %d exceeds limit of %d bytes.", - run_name, - tag_proto.name, - event.step, - point.value.ByteSize(), - self._max_tensor_point_size, - ) - tag_proto.points.pop() - self._num_values_skipped += 1 - self._tensor_bytes_skipped += point.value.ByteSize() - return - - self._validate_tensor_value( - value.tensor, value.tag, event.step, event.wall_time - ) - - try: - self._byte_budget_manager.add_point(point) - except _OutOfSpaceError: - tag_proto.points.pop() - raise - - def _validate_tensor_value(self, tensor_proto, tag, step, wall_time): - """Validate a TensorProto by attempting to parse it.""" - try: - tensor_util.make_ndarray(tensor_proto) - except ValueError as error: - raise ValueError( - "The uploader failed to upload a tensor. This seems to be " - "due to a malformation in the tensor, which may be caused by " - "a bug in the process that wrote the tensor.\n\n" - "The tensor has tag '%s' and is at step %d and wall_time %.6f.\n\n" - "Original error:\n%s" % (tag, step, wall_time, error) - ) - - -class _ByteBudgetManager: - """Helper class for managing the request byte budget for certain RPCs. - - This should be used for RPCs that organize data by Runs, Tags, and Points, - specifically WriteScalar and WriteTensor. - - Any call to add_run(), add_tag(), or add_point() may raise an - _OutOfSpaceError, which is non-fatal. It signals to the caller that they - should flush the current request and begin a new one. - - For more information on the protocol buffer encoding and how byte cost - can be calculated, visit: - - https://developers.google.com/protocol-buffers/docs/encoding - """ - - def __init__(self, max_bytes): - # The remaining number of bytes that we may yet add to the request. - self._byte_budget = None # type: int - self._max_bytes = max_bytes - - def reset(self, base_request): - """Resets the byte budget and calculates the cost of the base request. - - Args: - base_request: Base request. - - Raises: - _OutOfSpaceError: If the size of the request exceeds the entire - request byte budget. - """ - self._byte_budget = self._max_bytes - self._byte_budget -= base_request.ByteSize() - if self._byte_budget < 0: - raise RuntimeError("Byte budget too small for base request") - - def add_run(self, run_proto): - """Integrates the cost of a run proto into the byte budget. - - Args: - run_proto: The proto representing a run. - - Raises: - _OutOfSpaceError: If adding the run would exceed the remaining request - budget. - """ - cost = ( - # The size of the run proto without any tag fields set. - run_proto.ByteSize() - # The size of the varint that describes the length of the run - # proto. We can't yet know the final size of the run proto -- we - # haven't yet set any tag or point values -- so we can't know the - # final size of this length varint. We conservatively assume it is - # maximum size. - + _MAX_VARINT64_LENGTH_BYTES - # The size of the proto key. - + 1 - ) - if cost > self._byte_budget: - raise _OutOfSpaceError() - self._byte_budget -= cost - - def add_tag(self, tag_proto): - """Integrates the cost of a tag proto into the byte budget. - - Args: - tag_proto: The proto representing a tag. - - Raises: - _OutOfSpaceError: If adding the tag would exceed the remaining request - budget. - """ - cost = ( - # The size of the tag proto without any tag fields set. - tag_proto.ByteSize() - # The size of the varint that describes the length of the tag - # proto. We can't yet know the final size of the tag proto -- we - # haven't yet set any point values -- so we can't know the final - # size of this length varint. We conservatively assume it is maximum - # size. - + _MAX_VARINT64_LENGTH_BYTES - # The size of the proto key. - + 1 - ) - if cost > self._byte_budget: - raise _OutOfSpaceError() - self._byte_budget -= cost - - def add_point(self, point_proto): - """Integrates the cost of a point proto into the byte budget. - - Args: - point_proto: The proto representing a point. - - Raises: - _OutOfSpaceError: If adding the point would exceed the remaining request - budget. - """ - submessage_cost = point_proto.ByteSize() - cost = ( - # The size of the point proto. - submessage_cost - # The size of the varint that describes the length of the point - # proto. - + _varint_cost(submessage_cost) - # The size of the proto key. - + 1 - ) - if cost > self._byte_budget: - raise _OutOfSpaceError() - self._byte_budget -= cost - - -class _BlobRequestSender: - """Uploader for blob-type event data. - - Unlike the other types, this class does not accumulate events in batches; - every blob is sent individually and immediately. Nonetheless we retain - the `add_event()`/`flush()` structure for symmetry. - - This class is not threadsafe. Use external synchronization if calling its - methods concurrently. - """ - - def __init__( - self, - experiment_id, - api, - rpc_rate_limiter, - max_blob_request_size, - max_blob_size, - tracker, - ): - if experiment_id is None: - raise ValueError("experiment_id cannot be None") - self._experiment_id = experiment_id - self._api = api - self._rpc_rate_limiter = rpc_rate_limiter - self._max_blob_request_size = max_blob_request_size - self._max_blob_size = max_blob_size - self._tracker = tracker - - # Start in the empty state, just like self._new_request(). - self._run_name = None - self._event = None - self._value = None - self._metadata = None - - def _new_request(self): - """Declares the previous event complete.""" - self._run_name = None - self._event = None - self._value = None - self._metadata = None - - def add_event( - self, - run_name, - event, - value, - metadata, - ): - """Attempts to add the given event to the current request. - - If the event cannot be added to the current request because the byte - budget is exhausted, the request is flushed, and the event is added - to the next request. - """ - if self._value: - raise RuntimeError("Tried to send blob while another is pending") - self._run_name = run_name - self._event = event # provides step and possibly plugin_name - self._value = value - # TODO(soergel): should we really unpack the tensor here, or ship - # it wholesale and unpack server side, or something else? - # TODO(soergel): can we extract the proto fields directly instead? - self._blobs = tensor_util.make_ndarray(self._value.tensor) - if self._blobs.ndim == 1: - self._metadata = metadata - self.flush() - else: - logger.warning( - "A blob sequence must be represented as a rank-1 Tensor. " - "Provided data has rank %d, for run %s, tag %s, step %s ('%s' plugin) .", - self._blobs.ndim, - run_name, - self._value.tag, - self._event.step, - metadata.plugin_data.plugin_name, - ) - # Skip this upload. - self._new_request() - - def flush(self): - """Sends the current blob sequence fully, and clears it to make way for the next.""" - if self._value: - blob_sequence_id = self._get_or_create_blob_sequence() - logger.info( - "Sending %d blobs for sequence id: %s", - len(self._blobs), - blob_sequence_id, - ) - - sent_blobs = 0 - for seq_index, blob in enumerate(self._blobs): - # Note the _send_blob() stream is internally flow-controlled. - # This rate limit applies to *starting* the stream. - self._rpc_rate_limiter.tick() - with self._tracker.blob_tracker(len(blob)) as blob_tracker: - sent_blobs += self._send_blob( - blob_sequence_id, seq_index, blob - ) - blob_tracker.mark_uploaded(bool(sent_blobs)) - - logger.info( - "Sent %d of %d blobs for sequence id: %s", - sent_blobs, - len(self._blobs), - blob_sequence_id, - ) - - self._new_request() - - def _get_or_create_blob_sequence(self): - request = write_service_pb2.GetOrCreateBlobSequenceRequest( - experiment_id=self._experiment_id, - run=self._run_name, - tag=self._value.tag, - step=self._event.step, - final_sequence_length=len(self._blobs), - metadata=self._metadata, - ) - util.set_timestamp(request.wall_time, self._event.wall_time) - with _request_logger(request): - try: - # TODO(@nfelt): execute this RPC asynchronously. - response = grpc_util.call_with_retries( - self._api.GetOrCreateBlobSequence, request - ) - blob_sequence_id = response.blob_sequence_id - except grpc.RpcError as e: - if e.code() == grpc.StatusCode.NOT_FOUND: - raise ExperimentNotFoundError() - logger.error("Upload call failed with error %s", e) - # TODO(soergel): clean up - raise - - return blob_sequence_id - - def _send_blob(self, blob_sequence_id, seq_index, blob): - """Tries to send a single blob for a given index within a blob sequence. - - The blob will not be sent if it was sent already, or if it is too large. - - Returns: - The number of blobs successfully sent (i.e., 1 or 0). - """ - # TODO(soergel): retry and resume logic - - if len(blob) > self._max_blob_size: - logger.warning( - "Blob too large; skipping. Size %d exceeds limit of %d bytes.", - len(blob), - self._max_blob_size, - ) - return 0 - - request_iterator = self._write_blob_request_iterator( - blob_sequence_id, seq_index, blob - ) - upload_start_time = time.time() - count = 0 - # TODO(soergel): don't wait for responses for greater throughput - # See https://stackoverflow.com/questions/55029342/handling-async-streaming-request-in-grpc-python - try: - for response in self._api.WriteBlob(request_iterator): - count += 1 - # TODO(soergel): validate responses? probably not. - pass - upload_duration_secs = time.time() - upload_start_time - logger.info( - "Upload for %d chunks totaling %d bytes took %.3f seconds (%.3f MB/sec)", - count, - len(blob), - upload_duration_secs, - len(blob) / upload_duration_secs / (1024 * 1024), - ) - return 1 - except grpc.RpcError as e: - if e.code() == grpc.StatusCode.ALREADY_EXISTS: - logger.error("Attempted to re-upload existing blob. Skipping.") - return 0 - else: - logger.info("WriteBlob RPC call got error %s", e) - raise - - def _write_blob_request_iterator(self, blob_sequence_id, seq_index, blob): - # For now all use cases have the blob in memory already. - # In the future we may want to stream from disk; that will require - # refactoring here. - # TODO(soergel): compute crc32c's to allow server-side data validation. - for offset in range(0, len(blob), self._max_blob_request_size): - chunk = blob[offset : offset + self._max_blob_request_size] - finalize_object = offset + self._max_blob_request_size >= len(blob) - request = write_service_pb2.WriteBlobRequest( - blob_sequence_id=blob_sequence_id, - index=seq_index, - data=chunk, - offset=offset, - crc32c=None, - finalize_object=finalize_object, - final_crc32c=None, - blob_bytes=len(blob), - ) - yield request - - -@contextlib.contextmanager -def _request_logger(request, runs=None): - upload_start_time = time.time() - request_bytes = request.ByteSize() - logger.info("Trying request of %d bytes", request_bytes) - yield - upload_duration_secs = time.time() - upload_start_time - if runs: - logger.info( - "Upload for %d runs (%d bytes) took %.3f seconds", - len(runs), - request_bytes, - upload_duration_secs, - ) - else: - logger.info( - "Upload of (%d bytes) took %.3f seconds", - request_bytes, - upload_duration_secs, - ) - - -def _varint_cost(n): - """Computes the size of `n` encoded as an unsigned base-128 varint. - - This should be consistent with the proto wire format: - - - Args: - n: A non-negative integer. - - Returns: - An integer number of bytes. - """ - result = 1 - while n >= 128: - result += 1 - n >>= 7 - return result - - -def _prune_empty_tags_and_runs(request): - for (run_idx, run) in reversed(list(enumerate(request.runs))): - for (tag_idx, tag) in reversed(list(enumerate(run.tags))): - if not tag.points: - del run.tags[tag_idx] - if not run.tags: - del request.runs[run_idx] - - -def _filter_graph_defs(event): - for v in event.summary.value: - if v.metadata.plugin_data.plugin_name != graphs_metadata.PLUGIN_NAME: - continue - if v.tag == graphs_metadata.RUN_GRAPH_NAME: - data = list(v.tensor.string_val) - filtered_data = [_filtered_graph_bytes(x) for x in data] - filtered_data = [x for x in filtered_data if x is not None] - if filtered_data != data: - new_tensor = tensor_util.make_tensor_proto( - filtered_data, dtype=types_pb2.DT_STRING - ) - v.tensor.CopyFrom(new_tensor) - - -def _filtered_graph_bytes(graph_bytes): - try: - graph_def = graph_pb2.GraphDef().FromString(graph_bytes) - # The reason for the RuntimeWarning catch here is b/27494216, whereby - # some proto parsers incorrectly raise that instead of DecodeError - # on certain kinds of malformed input. Triggering this seems to require - # a combination of mysterious circumstances. - except (message.DecodeError, RuntimeWarning): - logger.warning( - "Could not parse GraphDef of size %d. Skipping.", - len(graph_bytes), - ) - return None - # Use the default filter parameters: - # limit_attr_size=1024, large_attrs_key="_too_large_attrs" - process_graph.prepare_graph_for_ui(graph_def) - return graph_def.SerializeToString() diff --git a/tensorboard/uploader/uploader_subcommand.py b/tensorboard/uploader/uploader_subcommand.py index 64ec292bfe..bd4bcaf4a0 100644 --- a/tensorboard/uploader/uploader_subcommand.py +++ b/tensorboard/uploader/uploader_subcommand.py @@ -16,19 +16,16 @@ import abc -import os import sys import textwrap from absl import logging import grpc -from tensorboard.compat import tf from tensorboard.uploader.proto import experiment_pb2 from tensorboard.uploader.proto import export_service_pb2_grpc from tensorboard.uploader.proto import write_service_pb2_grpc from tensorboard.uploader import auth -from tensorboard.uploader import dry_run_stubs from tensorboard.uploader import exporter as exporter_lib from tensorboard.uploader import flags_parser from tensorboard.uploader import formatters @@ -59,7 +56,7 @@ def _prompt_for_user_ack(intent): """Prompts for user consent, exiting the program if they decline.""" body = intent.get_ack_message_body() - header = "\n***** TensorBoard Uploader *****\n" + header = "\n***** TensorBoard.dev Uploader *****\n" user_ack_message = "\n".join((header, body, _MESSAGE_TOS)) sys.stderr.write(user_ack_message) sys.stderr.write("\n") @@ -87,6 +84,31 @@ def _run(flags, experiment_url_callback=None): sys.stderr.write("Logged out of uploader.\n") sys.stderr.flush() return + if isinstance(intent, UploadIntent): + sys.stderr.write( + textwrap.dedent( + """\ + **************************************************************** + **************************************************************** + **************************************************************** + + Uploading TensorBoard logs to https://tensorboard.dev/ is no longer + supported. + + TensorBoard.dev is shutting down. + + Please export your experiments by Dec 31, 2023. + + See the FAQ at https://tensorboard.dev. + + **************************************************************** + **************************************************************** + **************************************************************** + """ + ) + ) + sys.stderr.flush() + return # TODO(b/141723268): maybe reconfirm Google Account prior to reuse. credentials = store.read_credentials() if not credentials: @@ -397,115 +419,25 @@ def _die_if_bad_experiment_description(description): class UploadIntent(_Intent): - """The user intends to upload an experiment from the given logdir.""" + """The user intends to upload an experiment from the given logdir. - _MESSAGE_TEMPLATE = textwrap.dedent( - """\ - This will upload your TensorBoard logs to https://tensorboard.dev/ from - the following directory: + However, TensorBoard.dev is being turned down and we no longer allow + upload. + """ - {logdir} + def get_ack_message_body(self): + """Does nothing. - This TensorBoard will be visible to everyone. Do not upload sensitive - data. + Uploading is no longer supported and is handled specially by main. """ - ) - - def __init__( - self, - logdir, - name=None, - description=None, - verbosity=None, - dry_run=None, - one_shot=None, - experiment_url_callback=None, - ): - self.logdir = logdir - self.name = name - self.description = description - self.verbosity = verbosity - self.dry_run = False if dry_run is None else dry_run - self.one_shot = False if one_shot is None else one_shot - self.experiment_url_callback = experiment_url_callback - - def get_ack_message_body(self): - return self._MESSAGE_TEMPLATE.format(logdir=self.logdir) + return "" def execute(self, server_info, channel): - if self.dry_run: - api_client = dry_run_stubs.DryRunTensorBoardWriterStub() - else: - api_client = write_service_pb2_grpc.TensorBoardWriterServiceStub( - channel - ) - _die_if_bad_experiment_name(self.name) - _die_if_bad_experiment_description(self.description) - uploader = uploader_lib.TensorBoardUploader( - api_client, - self.logdir, - allowed_plugins=server_info_lib.allowed_plugins(server_info), - upload_limits=server_info_lib.upload_limits(server_info), - name=self.name, - description=self.description, - verbosity=self.verbosity, - one_shot=self.one_shot, - ) - if self.one_shot and not tf.io.gfile.isdir(self.logdir): - print("%s: No such directory." % self.logdir) - print( - "User specified `one_shot` mode with an unavailable " - "logdir. Exiting without creating an experiment." - ) - return - experiment_id = uploader.create_experiment() - url = server_info_lib.experiment_url(server_info, experiment_id) - if self.experiment_url_callback is not None: - self.experiment_url_callback(url) - if not self.one_shot: - print( - "Upload started and will continue reading any new data as it's " - "added to the logdir.\n\nTo stop uploading, press Ctrl-C." - ) - if self.dry_run: - print( - "\n** This is a dry run. " - "No data will be sent to tensorboard.dev. **\n" - ) - else: - print( - "\nNew experiment created. View your TensorBoard at: %s\n" % url - ) - interrupted = False - try: - uploader.start_uploading() - except uploader_lib.ExperimentNotFoundError: - print("Experiment was deleted; uploading has been cancelled") - return - except KeyboardInterrupt: - interrupted = True - finally: - if self.one_shot and not uploader.has_data(): - print( - "TensorBoard was run in `one_shot` mode, but did not find " - "any uploadable data in the specified logdir: %s\n" - "An empty experiment was created. " - "To delete the empty experiment you can execute the " - "following\n\n" - " tensorboard dev delete --experiment_id=%s" - % (self.logdir, uploader.experiment_id) - ) - end_message = "\n\n" - if interrupted: - end_message += "Interrupted." - else: - end_message += "Done." - # Only Add the "View your TensorBoard" message if there was any - # data added at all. - if not self.dry_run and uploader.has_data(): - end_message += " View your TensorBoard at %s" % url - sys.stdout.write(end_message + "\n") - sys.stdout.flush() + """Does nothing. + + Uploading is no longer supported and is handled specially by main. + """ + pass class _ExportIntent(_Intent): @@ -575,20 +507,8 @@ def _get_intent(flags, experiment_url_callback=None): if cmd is None: raise base_plugin.FlagsError("Must specify subcommand (try --help).") if cmd == flags_parser.SUBCOMMAND_KEY_UPLOAD: - if flags.logdir: - return UploadIntent( - os.path.expanduser(flags.logdir), - name=flags.name, - description=flags.description, - verbosity=flags.verbose, - dry_run=flags.dry_run, - one_shot=flags.one_shot, - experiment_url_callback=experiment_url_callback, - ) - else: - raise base_plugin.FlagsError( - "Must specify directory to upload via `--logdir`." - ) + return UploadIntent() + if cmd == flags_parser.SUBCOMMAND_KEY_UPDATE_METADATA: if flags.experiment_id: if flags.name is not None or flags.description is not None: diff --git a/tensorboard/uploader/uploader_subcommand_test.py b/tensorboard/uploader/uploader_subcommand_test.py index 347b807b10..1e4a065e08 100644 --- a/tensorboard/uploader/uploader_subcommand_test.py +++ b/tensorboard/uploader/uploader_subcommand_test.py @@ -23,189 +23,14 @@ from tensorboard.uploader.proto import experiment_pb2 from tensorboard.uploader.proto import server_info_pb2 -from tensorboard.uploader.proto import write_service_pb2 from tensorboard.uploader.proto import write_service_pb2_grpc -from tensorboard.uploader import dry_run_stubs from tensorboard.uploader import exporter as exporter_lib from tensorboard.uploader import uploader as uploader_lib from tensorboard.uploader import uploader_subcommand -from tensorboard.plugins.histogram import metadata as histograms_metadata -from tensorboard.plugins.graph import metadata as graphs_metadata -from tensorboard.plugins.scalar import metadata as scalars_metadata from tensorboard.plugins import base_plugin -# By default allow at least one plugin for each upload type: Scalar, Tensor, and -# Blobs. -_SCALARS_HISTOGRAMS_AND_GRAPHS = frozenset( - ( - scalars_metadata.PLUGIN_NAME, - histograms_metadata.PLUGIN_NAME, - graphs_metadata.PLUGIN_NAME, - ) -) - - -class UploadIntentTest(tf.test.TestCase): - def testUploadIntentOneShotEmptyDirectoryFails(self): - """Test the upload intent under the one-shot mode with missing dir. - - In the case of a non-existent directoy, uploading should not - create an experiment. - """ - # Mock three places: - # 1. The uploader itself, we will inspect invocations of its methods but - # do not want to actually upload anything. - # 2. Writing to stdout, so we can inspect messages to the user. - # 3. The creation of the grpc WriteServiceChannel, which happens in the - # non dry_run execution, but we don't want to actually open a network - # communication. - mock_uploader = mock.MagicMock() - mock_stdout_write = mock.MagicMock() - with mock.patch.object( - uploader_lib, - "TensorBoardUploader", - return_value=mock_uploader, - ), mock.patch.object( - sys.stdout, "write", mock_stdout_write - ), mock.patch.object( - write_service_pb2_grpc, "TensorBoardWriterServiceStub" - ): - # Set up an UploadIntent configured with one_shot and a - # non-existent directory. - intent = uploader_subcommand.UploadIntent( - "/dev/null/non/existent/directory", one_shot=True - ) - # Execute the intent.execute method. - intent.execute(server_info_pb2.ServerInfoResponse(), None) - # Expect that there is no call to create an experiment. - self.assertEqual(mock_uploader.create_experiment.call_count, 0) - # Expect a message to the user indicating no experiment was created. - stdout_writes = [x[0][0] for x in mock_stdout_write.call_args_list] - self.assertRegex( - ",".join(stdout_writes), - ".*Exiting without creating an experiment.*", - ) - - def testUploadIntentOneShot(self): - """Test the upload intent under the one-shot mode.""" - # Mock three places: - # 1. The uploader itself, we will inspect invocations of its methods but - # do not want to actually upload anything. - # 2. Writing to stdout, so we can inspect messages to the user. - # 3. The creation of the grpc WriteServiceChannel, which happens in the - # non dry_run execution, but we don't want to actually open a network - # communication. mock_uploader = mock.MagicMock() - mock_uploader = mock.MagicMock() - mock_uploader.create_experiment = mock.MagicMock( - return_value="fake_experiment_id" - ) - mock_stdout_write = mock.MagicMock() - with mock.patch.object( - sys.stdout, "write", mock_stdout_write - ), mock.patch.object( - uploader_lib, "TensorBoardUploader", return_value=mock_uploader - ), mock.patch.object( - write_service_pb2_grpc, "TensorBoardWriterServiceStub" - ): - # Set up an UploadIntent configured with one_shot and an empty temp - # directory. - intent = uploader_subcommand.UploadIntent( - self.get_temp_dir(), one_shot=True - ) - # Execute the intent.execute method. - intent.execute(server_info_pb2.ServerInfoResponse(), None) - # Expect that there is one call to create_experiment. - self.assertEqual(mock_uploader.create_experiment.call_count, 1) - # Expect that there is one call to start_uploading. - self.assertEqual(mock_uploader.start_uploading.call_count, 1) - # Expect that ".*Done scanning logdir.*" is among the things printed. - stdout_writes = [x[0][0] for x in mock_stdout_write.call_args_list] - self.assertRegex( - ",".join(stdout_writes), - ".*experiment created.*", - ) - # Expect that the last thing written is the string "Done" and the - # experiment_id. - self.assertRegex(stdout_writes[-1], ".*Done.*") - self.assertRegex(stdout_writes[-1], ".*fake_experiment_id.*") - - def testUploadIntentWithExperimentUrlCallback(self): - """Test the upload intent with a callback.""" - server_info = server_info_pb2.ServerInfoResponse() - server_info.url_format.template = "https://tensorboard.dev/x/{}" - server_info.url_format.id_placeholder = "{}" - - stub = dry_run_stubs.DryRunTensorBoardWriterStub() - stub.CreateExperiment = ( - lambda req, **__: write_service_pb2.CreateExperimentResponse( - experiment_id="test_experiment_id", url="this URL is ignored" - ) - ) - - expected_url = "https://tensorboard.dev/x/test_experiment_id" - - with mock.patch.object( - dry_run_stubs, - "DryRunTensorBoardWriterStub", - wraps=lambda: stub, - ), mock.patch.object(sys.stdout, "write"): - mock_channel = mock.Mock() - mock_experiment_url_callback = mock.Mock() - intent = uploader_subcommand.UploadIntent( - self.get_temp_dir(), - dry_run=True, - one_shot=True, - experiment_url_callback=mock_experiment_url_callback, - ) - intent.execute(server_info, mock_channel) - mock_experiment_url_callback.assert_called_once_with(expected_url) - - def testUploadIntentDryRunNonOneShotInterrupted(self): - mock_server_info = mock.MagicMock() - mock_channel = mock.MagicMock() - mock_stdout_write = mock.MagicMock() - mock_uploader = mock.MagicMock() - with mock.patch.object( - mock_uploader, - "start_uploading", - side_effect=KeyboardInterrupt(), - ), mock.patch.object( - uploader_lib, "TensorBoardUploader", return_value=mock_uploader - ), mock.patch.object( - sys.stdout, "write", mock_stdout_write - ): - intent = uploader_subcommand.UploadIntent( - self.get_temp_dir(), dry_run=True, one_shot=False - ) - intent.execute(mock_server_info, mock_channel) - self.assertRegex( - mock_stdout_write.call_args_list[-1][0][0], ".*Interrupted.*" - ) - - def testUploadIntentNonDryRunNonOneShotInterrupted(self): - mock_server_info = mock.MagicMock() - mock_channel = mock.MagicMock() - mock_stdout_write = mock.MagicMock() - mock_uploader = mock.MagicMock() - with mock.patch.object( - mock_uploader, - "start_uploading", - side_effect=KeyboardInterrupt(), - ), mock.patch.object( - uploader_lib, "TensorBoardUploader", return_value=mock_uploader - ), mock.patch.object( - sys.stdout, "write", mock_stdout_write - ): - intent = uploader_subcommand.UploadIntent( - self.get_temp_dir(), dry_run=False, one_shot=False - ) - intent.execute(mock_server_info, mock_channel) - self.assertIn( - "\nInterrupted. View your TensorBoard at ", - mock_stdout_write.call_args_list[-1][0][0], - ) - +class IntentTest(tf.test.TestCase): def testListIntentSetsExperimentMask(self): mock_server_info = mock.MagicMock() mock_channel = mock.MagicMock() diff --git a/tensorboard/uploader/uploader_test.py b/tensorboard/uploader/uploader_test.py index db9d7d48f4..eb9de9d009 100644 --- a/tensorboard/uploader/uploader_test.py +++ b/tensorboard/uploader/uploader_test.py @@ -16,8 +16,6 @@ import itertools -import os -import re from unittest import mock import grpc @@ -25,50 +23,11 @@ import tensorflow as tf -from google.protobuf import message -from tensorboard import data_compat -from tensorboard import dataclass_compat -from tensorboard.compat.proto import tensor_shape_pb2 from tensorboard.uploader.proto import experiment_pb2 -from tensorboard.uploader.proto import scalar_pb2 -from tensorboard.uploader.proto import server_info_pb2 from tensorboard.uploader.proto import write_service_pb2 from tensorboard.uploader.proto import write_service_pb2_grpc from tensorboard.uploader import test_util -from tensorboard.uploader import upload_tracker from tensorboard.uploader import uploader as uploader_lib -from tensorboard.uploader import logdir_loader -from tensorboard.uploader import util -from tensorboard.compat.proto import event_pb2 -from tensorboard.compat.proto import graph_pb2 -from tensorboard.compat.proto import summary_pb2 -from tensorboard.compat.proto import tensor_pb2 -from tensorboard.compat.proto import types_pb2 -from tensorboard.plugins.histogram import metadata as histograms_metadata -from tensorboard.plugins.histogram import summary_v2 as histogram_v2 -from tensorboard.plugins.graph import metadata as graphs_metadata -from tensorboard.plugins.scalar import metadata as scalars_metadata -from tensorboard.plugins.scalar import summary_v2 as scalar_v2 -from tensorboard.summary import v1 as summary_v1 -from tensorboard.util import test_util as tb_test_util -from tensorboard.util import tensor_util - - -def _create_example_graph_bytes(large_attr_size): - graph_def = graph_pb2.GraphDef() - graph_def.node.add(name="alice", op="Person") - graph_def.node.add(name="bob", op="Person") - - graph_def.node[1].attr["small"].s = b"small_attr_value" - graph_def.node[1].attr["large"].s = b"l" * large_attr_size - graph_def.node.add( - name="friendship", op="Friendship", input=["alice", "bob"] - ) - return graph_def.SerializeToString() - - -class AbortUploadError(Exception): - """Exception used in testing to abort the upload process.""" def _create_mock_client(): @@ -93,1782 +52,6 @@ def _create_mock_client(): return mock_client -# By default allow at least one plugin for each upload type: Scalar, Tensor, and -# Blobs. -_SCALARS_HISTOGRAMS_AND_GRAPHS = frozenset( - ( - scalars_metadata.PLUGIN_NAME, - histograms_metadata.PLUGIN_NAME, - graphs_metadata.PLUGIN_NAME, - ) -) - -# Sentinel for `_create_*` helpers, for arguments for which we want to -# supply a default other than the `None` used by the code under test. -_USE_DEFAULT = object() - - -def _create_uploader( - writer_client=_USE_DEFAULT, - logdir=None, - max_scalar_request_size=_USE_DEFAULT, - max_blob_request_size=_USE_DEFAULT, - max_blob_size=_USE_DEFAULT, - logdir_poll_rate_limiter=_USE_DEFAULT, - rpc_rate_limiter=_USE_DEFAULT, - tensor_rpc_rate_limiter=_USE_DEFAULT, - blob_rpc_rate_limiter=_USE_DEFAULT, - name=None, - description=None, - verbosity=0, # Use 0 to minimize littering the test output. - one_shot=None, -): - if writer_client is _USE_DEFAULT: - writer_client = _create_mock_client() - if max_scalar_request_size is _USE_DEFAULT: - max_scalar_request_size = 128000 - if max_blob_request_size is _USE_DEFAULT: - max_blob_request_size = 128000 - if max_blob_size is _USE_DEFAULT: - max_blob_size = 12345 - if logdir_poll_rate_limiter is _USE_DEFAULT: - logdir_poll_rate_limiter = util.RateLimiter(0) - if rpc_rate_limiter is _USE_DEFAULT: - rpc_rate_limiter = util.RateLimiter(0) - if tensor_rpc_rate_limiter is _USE_DEFAULT: - tensor_rpc_rate_limiter = util.RateLimiter(0) - if blob_rpc_rate_limiter is _USE_DEFAULT: - blob_rpc_rate_limiter = util.RateLimiter(0) - - upload_limits = server_info_pb2.UploadLimits( - max_scalar_request_size=max_scalar_request_size, - max_tensor_request_size=128000, - max_tensor_point_size=11111, - max_blob_request_size=max_blob_request_size, - max_blob_size=max_blob_size, - ) - - return uploader_lib.TensorBoardUploader( - writer_client, - logdir, - allowed_plugins=_SCALARS_HISTOGRAMS_AND_GRAPHS, - upload_limits=upload_limits, - logdir_poll_rate_limiter=logdir_poll_rate_limiter, - rpc_rate_limiter=rpc_rate_limiter, - tensor_rpc_rate_limiter=tensor_rpc_rate_limiter, - blob_rpc_rate_limiter=blob_rpc_rate_limiter, - name=name, - description=description, - verbosity=verbosity, - one_shot=one_shot, - ) - - -def _create_request_sender( - experiment_id=None, - api=None, - allowed_plugins=_USE_DEFAULT, -): - if api is _USE_DEFAULT: - api = _create_mock_client() - if allowed_plugins is _USE_DEFAULT: - allowed_plugins = _SCALARS_HISTOGRAMS_AND_GRAPHS - - upload_limits = server_info_pb2.UploadLimits( - max_scalar_request_size=128000, - max_tensor_request_size=128000, - max_tensor_point_size=11111, - max_blob_size=12345, - ) - - rpc_rate_limiter = util.RateLimiter(0) - tensor_rpc_rate_limiter = util.RateLimiter(0) - blob_rpc_rate_limiter = util.RateLimiter(0) - - return uploader_lib._BatchedRequestSender( - experiment_id=experiment_id, - api=api, - allowed_plugins=allowed_plugins, - upload_limits=upload_limits, - rpc_rate_limiter=rpc_rate_limiter, - tensor_rpc_rate_limiter=tensor_rpc_rate_limiter, - blob_rpc_rate_limiter=blob_rpc_rate_limiter, - tracker=upload_tracker.UploadTracker(verbosity=0), - ) - - -def _create_scalar_request_sender( - experiment_id=None, - api=_USE_DEFAULT, - max_request_size=_USE_DEFAULT, - tracker=None, -): - if api is _USE_DEFAULT: - api = _create_mock_client() - if max_request_size is _USE_DEFAULT: - max_request_size = 128000 - return uploader_lib._ScalarBatchedRequestSender( - experiment_id=experiment_id, - api=api, - rpc_rate_limiter=util.RateLimiter(0), - max_request_size=max_request_size, - tracker=tracker or upload_tracker.UploadTracker(verbosity=0), - ) - - -def _create_tensor_request_sender( - experiment_id=None, - api=_USE_DEFAULT, - max_request_size=_USE_DEFAULT, - max_tensor_point_size=_USE_DEFAULT, - tracker=None, -): - if api is _USE_DEFAULT: - api = _create_mock_client() - if max_request_size is _USE_DEFAULT: - max_request_size = 128000 - if max_tensor_point_size is _USE_DEFAULT: - max_tensor_point_size = 11111 - return uploader_lib._TensorBatchedRequestSender( - experiment_id=experiment_id, - api=api, - rpc_rate_limiter=util.RateLimiter(0), - max_request_size=max_request_size, - max_tensor_point_size=max_tensor_point_size, - tracker=tracker or upload_tracker.UploadTracker(verbosity=0), - ) - - -class TensorboardUploaderTest(tf.test.TestCase): - def test_create_experiment(self): - logdir = "/logs/foo" - uploader = _create_uploader(_create_mock_client(), logdir) - eid = uploader.create_experiment() - self.assertEqual(eid, "123") - - def test_create_experiment_with_name(self): - logdir = "/logs/foo" - mock_client = _create_mock_client() - new_name = "This is the new name" - uploader = _create_uploader(mock_client, logdir, name=new_name) - eid = uploader.create_experiment() - self.assertEqual(eid, "123") - mock_client.CreateExperiment.assert_called_once() - (args, _) = mock_client.CreateExperiment.call_args - - expected_request = write_service_pb2.CreateExperimentRequest( - name=new_name, - ) - self.assertEqual(args[0], expected_request) - - def test_create_experiment_with_description(self): - logdir = "/logs/foo" - mock_client = _create_mock_client() - new_description = """ - **description**" - may have "strange" unicode chars 🌴 \\/<> - """ - uploader = _create_uploader( - mock_client, logdir, description=new_description - ) - eid = uploader.create_experiment() - self.assertEqual(eid, "123") - mock_client.CreateExperiment.assert_called_once() - (args, _) = mock_client.CreateExperiment.call_args - - expected_request = write_service_pb2.CreateExperimentRequest( - description=new_description, - ) - self.assertEqual(args[0], expected_request) - - def test_create_experiment_with_all_metadata(self): - logdir = "/logs/foo" - mock_client = _create_mock_client() - new_description = """ - **description**" - may have "strange" unicode chars 🌴 \\/<> - """ - new_name = "This is a cool name." - uploader = _create_uploader( - mock_client, logdir, name=new_name, description=new_description - ) - eid = uploader.create_experiment() - self.assertEqual(eid, "123") - mock_client.CreateExperiment.assert_called_once() - (args, _) = mock_client.CreateExperiment.call_args - - expected_request = write_service_pb2.CreateExperimentRequest( - name=new_name, - description=new_description, - ) - self.assertEqual(args[0], expected_request) - - def test_start_uploading_without_create_experiment_fails(self): - mock_client = _create_mock_client() - uploader = _create_uploader(mock_client, "/logs/foo") - with self.assertRaisesRegex(RuntimeError, "call create_experiment()"): - uploader.start_uploading() - - def test_start_uploading_scalars(self): - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tensor_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tracker = mock.MagicMock() - with mock.patch.object( - upload_tracker, "UploadTracker", return_value=mock_tracker - ): - uploader = _create_uploader( - mock_client, - "/logs/foo", - # Send each Event below in a separate WriteScalarRequest - max_scalar_request_size=100, - rpc_rate_limiter=mock_rate_limiter, - tensor_rpc_rate_limiter=mock_tensor_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - verbosity=1, # In order to test the upload tracker. - ) - uploader.create_experiment() - - def scalar_event(tag, value): - return event_pb2.Event(summary=scalar_v2.scalar_pb(tag, value)) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - { - "run 1": _apply_compat( - [scalar_event("1.1", 5.0), scalar_event("1.2", 5.0)] - ), - "run 2": _apply_compat( - [scalar_event("2.1", 5.0), scalar_event("2.2", 5.0)] - ), - }, - { - "run 3": _apply_compat( - [scalar_event("3.1", 5.0), scalar_event("3.2", 5.0)] - ), - "run 4": _apply_compat( - [scalar_event("4.1", 5.0), scalar_event("4.2", 5.0)] - ), - "run 5": _apply_compat( - [scalar_event("5.1", 5.0), scalar_event("5.2", 5.0)] - ), - }, - AbortUploadError, - ] - - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(AbortUploadError): - uploader.start_uploading() - self.assertEqual(4 + 6, mock_client.WriteScalar.call_count) - self.assertEqual(4 + 6, mock_rate_limiter.tick.call_count) - self.assertEqual(0, mock_tensor_rate_limiter.tick.call_count) - self.assertEqual(0, mock_blob_rate_limiter.tick.call_count) - - # Check upload tracker calls. - self.assertEqual(mock_tracker.send_tracker.call_count, 2) - self.assertEqual(mock_tracker.scalars_tracker.call_count, 10) - self.assertLen(mock_tracker.scalars_tracker.call_args[0], 1) - self.assertEqual(mock_tracker.tensors_tracker.call_count, 0) - self.assertEqual(mock_tracker.blob_tracker.call_count, 0) - - def test_start_uploading_scalars_one_shot(self): - """Check that one-shot uploading stops without AbortUploadError.""" - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tensor_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tracker = mock.MagicMock() - with mock.patch.object( - upload_tracker, "UploadTracker", return_value=mock_tracker - ): - uploader = _create_uploader( - mock_client, - "/logs/foo", - # Send each Event below in a separate WriteScalarRequest - max_scalar_request_size=100, - rpc_rate_limiter=mock_rate_limiter, - tensor_rpc_rate_limiter=mock_tensor_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - verbosity=1, # In order to test the upload tracker. - one_shot=True, - ) - uploader.create_experiment() - - def scalar_event(tag, value): - return event_pb2.Event(summary=scalar_v2.scalar_pb(tag, value)) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - { - "run 1": _apply_compat( - [scalar_event("1.1", 5.0), scalar_event("1.2", 5.0)] - ), - "run 2": _apply_compat( - [scalar_event("2.1", 5.0), scalar_event("2.2", 5.0)] - ), - }, - # Note the lack of AbortUploadError here. - ] - - with mock.patch.object(uploader, "_logdir_loader", mock_logdir_loader): - uploader.start_uploading() - - self.assertEqual(4, mock_client.WriteScalar.call_count) - self.assertEqual(4, mock_rate_limiter.tick.call_count) - self.assertEqual(0, mock_tensor_rate_limiter.tick.call_count) - self.assertEqual(0, mock_blob_rate_limiter.tick.call_count) - - # Check upload tracker calls. - self.assertEqual(mock_tracker.send_tracker.call_count, 1) - self.assertEqual(mock_tracker.scalars_tracker.call_count, 4) - self.assertLen(mock_tracker.scalars_tracker.call_args[0], 1) - self.assertEqual(mock_tracker.tensors_tracker.call_count, 0) - self.assertEqual(mock_tracker.blob_tracker.call_count, 0) - - def test_start_uploading_tensors(self): - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tensor_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tracker = mock.MagicMock() - with mock.patch.object( - upload_tracker, "UploadTracker", return_value=mock_tracker - ): - uploader = _create_uploader( - mock_client, - "/logs/foo", - rpc_rate_limiter=mock_rate_limiter, - tensor_rpc_rate_limiter=mock_tensor_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - verbosity=1, # In order to test the upload tracker. - ) - uploader.create_experiment() - - def tensor_event(tag, value): - return event_pb2.Event( - summary=histogram_v2.histogram_pb(tag, value) - ) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - { - "run 1": _apply_compat( - [tensor_event("1.1", [5.0]), tensor_event("1.2", [5.0])] - ), - }, - AbortUploadError, - ] - - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(AbortUploadError): - uploader.start_uploading() - self.assertEqual(1, mock_client.WriteTensor.call_count) - self.assertEqual(0, mock_rate_limiter.tick.call_count) - self.assertEqual(1, mock_tensor_rate_limiter.tick.call_count) - self.assertEqual(0, mock_blob_rate_limiter.tick.call_count) - - # Check upload tracker calls. - self.assertEqual(mock_tracker.send_tracker.call_count, 1) - self.assertEqual(mock_tracker.scalars_tracker.call_count, 0) - tensors_tracker = mock_tracker.tensors_tracker - self.assertEqual(tensors_tracker.call_count, 1) - self.assertLen(tensors_tracker.call_args[0], 4) - self.assertEqual(tensors_tracker.call_args[0][0], 2) # num_tensors - self.assertEqual( - tensors_tracker.call_args[0][1], 0 - ) # num_tensors_skipped - # tensor_bytes: avoid asserting the exact value as it's hard to reason about. - self.assertGreater(tensors_tracker.call_args[0][2], 0) - self.assertEqual( - tensors_tracker.call_args[0][3], 0 - ) # tensor_bytes_skipped - self.assertEqual(mock_tracker.blob_tracker.call_count, 0) - - def test_start_uploading_graphs(self): - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tensor_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tracker = mock.MagicMock() - with mock.patch.object( - upload_tracker, "UploadTracker", return_value=mock_tracker - ): - uploader = _create_uploader( - mock_client, - "/logs/foo", - # Verify behavior with lots of small chunks - max_blob_request_size=100, - rpc_rate_limiter=mock_rate_limiter, - tensor_rpc_rate_limiter=mock_tensor_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - verbosity=1, # In order to test tracker. - ) - uploader.create_experiment() - - # Of course a real Event stream will never produce the same Event twice, - # but is this test context it's fine to reuse this one. - graph_event = event_pb2.Event( - graph_def=_create_example_graph_bytes(950) - ) - expected_graph_def = graph_pb2.GraphDef.FromString( - graph_event.graph_def - ) - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - { - "run 1": _apply_compat([graph_event, graph_event]), - "run 2": _apply_compat([graph_event, graph_event]), - }, - { - "run 3": _apply_compat([graph_event, graph_event]), - "run 4": _apply_compat([graph_event, graph_event]), - "run 5": _apply_compat([graph_event, graph_event]), - }, - AbortUploadError, - ] - - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(AbortUploadError): - uploader.start_uploading() - self.assertEqual(1, mock_client.CreateExperiment.call_count) - self.assertEqual(10, mock_client.WriteBlob.call_count) - for (i, call) in enumerate(mock_client.WriteBlob.call_args_list): - requests = list(call[0][0]) - data = b"".join(r.data for r in requests) - actual_graph_def = graph_pb2.GraphDef.FromString(data) - self.assertProtoEquals(expected_graph_def, actual_graph_def) - self.assertEqual( - set(r.blob_sequence_id for r in requests), - {"blob%d" % i}, - ) - self.assertEqual(0, mock_rate_limiter.tick.call_count) - self.assertEqual(0, mock_tensor_rate_limiter.tick.call_count) - self.assertEqual(10, mock_blob_rate_limiter.tick.call_count) - - # Check upload tracker calls. - self.assertEqual(mock_tracker.send_tracker.call_count, 2) - self.assertEqual(mock_tracker.scalars_tracker.call_count, 0) - self.assertEqual(mock_tracker.tensors_tracker.call_count, 0) - self.assertEqual(mock_tracker.blob_tracker.call_count, 10) - self.assertLen(mock_tracker.blob_tracker.call_args[0], 1) - self.assertGreater(mock_tracker.blob_tracker.call_args[0][0], 0) - - def test_upload_skip_large_blob(self): - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - uploader = _create_uploader( - mock_client, - "/logs/foo", - # Verify behavior with lots of small chunks - max_blob_request_size=100, - max_blob_size=100, - rpc_rate_limiter=mock_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - ) - uploader.create_experiment() - - graph_event = event_pb2.Event( - graph_def=_create_example_graph_bytes(950) - ) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - {"run 1": _apply_compat([graph_event])}, - AbortUploadError, - ] - - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(AbortUploadError): - uploader.start_uploading() - self.assertEqual(1, mock_client.CreateExperiment.call_count) - self.assertEqual(0, mock_client.WriteBlob.call_count) - self.assertEqual(0, mock_rate_limiter.tick.call_count) - self.assertEqual(1, mock_blob_rate_limiter.tick.call_count) - - def test_filter_graphs(self): - # Three graphs: one short, one long, one corrupt. - bytes_0 = _create_example_graph_bytes(123) - bytes_1 = _create_example_graph_bytes(9999) - # invalid (truncated) proto: length-delimited field 1 (0x0a) of - # length 0x7f specified, but only len("bogus") = 5 bytes given - # - bytes_2 = b"\x0a\x7fbogus" - - logdir = self.get_temp_dir() - for (i, b) in enumerate([bytes_0, bytes_1, bytes_2]): - run_dir = os.path.join(logdir, "run_%04d" % i) - event = event_pb2.Event(step=0, wall_time=123 * i, graph_def=b) - with tb_test_util.FileWriter(run_dir) as writer: - writer.add_event(event) - - limiter = mock.create_autospec(util.RateLimiter) - limiter.tick.side_effect = [None, AbortUploadError] - mock_client = _create_mock_client() - uploader = _create_uploader( - mock_client, - logdir, - logdir_poll_rate_limiter=limiter, - ) - uploader.create_experiment() - - with self.assertRaises(AbortUploadError): - uploader.start_uploading() - - actual_blobs = [] - for call in mock_client.WriteBlob.call_args_list: - requests = call[0][0] - actual_blobs.append(b"".join(r.data for r in requests)) - - actual_graph_defs = [] - for blob in actual_blobs: - try: - actual_graph_defs.append(graph_pb2.GraphDef.FromString(blob)) - except message.DecodeError: - actual_graph_defs.append(None) - - with self.subTest("graphs with small attr values should be unchanged"): - expected_graph_def_0 = graph_pb2.GraphDef.FromString(bytes_0) - self.assertEqual(actual_graph_defs[0], expected_graph_def_0) - - with self.subTest("large attr values should be filtered out"): - expected_graph_def_1 = graph_pb2.GraphDef.FromString(bytes_1) - del expected_graph_def_1.node[1].attr["large"] - expected_graph_def_1.node[1].attr["_too_large_attrs"].list.s.append( - b"large" - ) - requests = list(mock_client.WriteBlob.call_args[0][0]) - self.assertEqual(actual_graph_defs[1], expected_graph_def_1) - - with self.subTest("corrupt graphs should be skipped"): - self.assertLen(actual_blobs, 2) - - def test_upload_server_error(self): - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - uploader = _create_uploader( - mock_client, - "/logs/foo", - rpc_rate_limiter=mock_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - ) - uploader.create_experiment() - - # Of course a real Event stream will never produce the same Event twice, - # but is this test context it's fine to reuse this one. - graph_event = event_pb2.Event( - graph_def=_create_example_graph_bytes(950) - ) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - {"run 1": _apply_compat([graph_event])}, - {"run 1": _apply_compat([graph_event])}, - AbortUploadError, - ] - - mock_client.WriteBlob.side_effect = [ - [write_service_pb2.WriteBlobResponse()], - test_util.grpc_error(grpc.StatusCode.INTERNAL, "nope"), - ] - - # This demonstrates that the INTERNAL error is NOT handled, so the - # uploader will die if this happens. - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(grpc.RpcError): - uploader.start_uploading() - self.assertEqual(1, mock_client.CreateExperiment.call_count) - self.assertEqual(2, mock_client.WriteBlob.call_count) - self.assertEqual(0, mock_rate_limiter.tick.call_count) - self.assertEqual(2, mock_blob_rate_limiter.tick.call_count) - - def test_upload_same_graph_twice(self): - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - uploader = _create_uploader( - mock_client, - "/logs/foo", - rpc_rate_limiter=mock_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - ) - uploader.create_experiment() - - graph_event = event_pb2.Event( - graph_def=_create_example_graph_bytes(950) - ) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - {"run 1": _apply_compat([graph_event])}, - {"run 1": _apply_compat([graph_event])}, - AbortUploadError, - ] - - mock_client.WriteBlob.side_effect = [ - [write_service_pb2.WriteBlobResponse()], - test_util.grpc_error(grpc.StatusCode.ALREADY_EXISTS, "nope"), - ] - - # This demonstrates that the ALREADY_EXISTS error is handled gracefully. - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(AbortUploadError): - uploader.start_uploading() - self.assertEqual(1, mock_client.CreateExperiment.call_count) - self.assertEqual(2, mock_client.WriteBlob.call_count) - self.assertEqual(0, mock_rate_limiter.tick.call_count) - self.assertEqual(2, mock_blob_rate_limiter.tick.call_count) - - def test_upload_empty_logdir(self): - logdir = self.get_temp_dir() - mock_client = _create_mock_client() - uploader = _create_uploader(mock_client, logdir) - uploader.create_experiment() - uploader._upload_once() - mock_client.WriteScalar.assert_not_called() - - def test_upload_polls_slowly_once_done(self): - class Success(Exception): - pass - - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - upload_call_count = 0 - - def mock_upload_once(): - nonlocal upload_call_count - upload_call_count += 1 - tick_count = mock_rate_limiter.tick.call_count - self.assertEqual(tick_count, upload_call_count) - if tick_count >= 3: - raise Success() - - uploader = _create_uploader( - logdir=self.get_temp_dir(), - logdir_poll_rate_limiter=mock_rate_limiter, - ) - uploader._upload_once = mock_upload_once - - uploader.create_experiment() - with self.assertRaises(Success): - uploader.start_uploading() - - def test_upload_swallows_rpc_failure(self): - logdir = self.get_temp_dir() - with tb_test_util.FileWriter(logdir) as writer: - writer.add_test_summary("foo") - mock_client = _create_mock_client() - uploader = _create_uploader(mock_client, logdir) - uploader.create_experiment() - error = test_util.grpc_error(grpc.StatusCode.INTERNAL, "Failure") - mock_client.WriteScalar.side_effect = error - uploader._upload_once() - mock_client.WriteScalar.assert_called_once() - - def test_upload_full_logdir(self): - logdir = self.get_temp_dir() - mock_client = _create_mock_client() - uploader = _create_uploader(mock_client, logdir) - uploader.create_experiment() - - # Convenience helpers for constructing expected requests. - run = write_service_pb2.WriteScalarRequest.Run - tag = write_service_pb2.WriteScalarRequest.Tag - point = scalar_pb2.ScalarPoint - - # First round - writer = tb_test_util.FileWriter(logdir) - writer.add_test_summary("foo", simple_value=5.0, step=1) - writer.add_test_summary("foo", simple_value=6.0, step=2) - writer.add_test_summary("foo", simple_value=7.0, step=3) - writer.add_test_summary("bar", simple_value=8.0, step=3) - writer.flush() - writer_a = tb_test_util.FileWriter(os.path.join(logdir, "a")) - writer_a.add_test_summary("qux", simple_value=9.0, step=2) - writer_a.flush() - uploader._upload_once() - self.assertEqual(1, mock_client.WriteScalar.call_count) - request1 = mock_client.WriteScalar.call_args[0][0] - _clear_wall_times(request1) - expected_request1 = write_service_pb2.WriteScalarRequest( - experiment_id="123", - runs=[ - run( - name=".", - tags=[ - tag( - name="foo", - metadata=test_util.scalar_metadata("foo"), - points=[ - point(step=1, value=5.0), - point(step=2, value=6.0), - point(step=3, value=7.0), - ], - ), - tag( - name="bar", - metadata=test_util.scalar_metadata("bar"), - points=[point(step=3, value=8.0)], - ), - ], - ), - run( - name="a", - tags=[ - tag( - name="qux", - metadata=test_util.scalar_metadata("qux"), - points=[point(step=2, value=9.0)], - ) - ], - ), - ], - ) - self.assertProtoEquals(expected_request1, request1) - mock_client.WriteScalar.reset_mock() - - # Second round - writer.add_test_summary("foo", simple_value=10.0, step=5) - writer.add_test_summary("baz", simple_value=11.0, step=1) - writer.flush() - writer_b = tb_test_util.FileWriter(os.path.join(logdir, "b")) - writer_b.add_test_summary("xyz", simple_value=12.0, step=1) - writer_b.flush() - uploader._upload_once() - self.assertEqual(1, mock_client.WriteScalar.call_count) - request2 = mock_client.WriteScalar.call_args[0][0] - _clear_wall_times(request2) - expected_request2 = write_service_pb2.WriteScalarRequest( - experiment_id="123", - runs=[ - run( - name=".", - tags=[ - tag( - name="foo", - metadata=test_util.scalar_metadata("foo"), - points=[point(step=5, value=10.0)], - ), - tag( - name="baz", - metadata=test_util.scalar_metadata("baz"), - points=[point(step=1, value=11.0)], - ), - ], - ), - run( - name="b", - tags=[ - tag( - name="xyz", - metadata=test_util.scalar_metadata("xyz"), - points=[point(step=1, value=12.0)], - ) - ], - ), - ], - ) - self.assertProtoEquals(expected_request2, request2) - mock_client.WriteScalar.reset_mock() - - # Empty third round - uploader._upload_once() - mock_client.WriteScalar.assert_not_called() - - def test_verbosity_zero_creates_upload_tracker_with_verbosity_zero(self): - mock_client = _create_mock_client() - mock_tracker = mock.MagicMock() - with mock.patch.object( - upload_tracker, "UploadTracker", return_value=mock_tracker - ) as mock_constructor: - uploader = _create_uploader( - mock_client, - "/logs/foo", - verbosity=0, # Explicitly set verbosity to 0. - ) - uploader.create_experiment() - - def scalar_event(tag, value): - return event_pb2.Event(summary=scalar_v2.scalar_pb(tag, value)) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - { - "run 1": _apply_compat( - [scalar_event("1.1", 5.0), scalar_event("1.2", 5.0)] - ), - }, - AbortUploadError, - ] - - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(AbortUploadError): - uploader.start_uploading() - - self.assertEqual(mock_constructor.call_count, 1) - self.assertEqual( - mock_constructor.call_args[1], {"verbosity": 0, "one_shot": False} - ) - self.assertEqual(mock_tracker.scalars_tracker.call_count, 1) - - -class BatchedRequestSenderTest(tf.test.TestCase): - def _populate_run_from_events( - self, scalar_run, tensor_run, events, allowed_plugins=_USE_DEFAULT - ): - mock_client = _create_mock_client() - builder = _create_request_sender( - experiment_id="123", - api=mock_client, - allowed_plugins=allowed_plugins, - ) - builder.send_requests({"": _apply_compat(events)}) - scalar_requests = [ - c[0][0] for c in mock_client.WriteScalar.call_args_list - ] - if scalar_requests: - self.assertLen(scalar_requests, 1) - self.assertLen(scalar_requests[0].runs, 1) - scalar_run.MergeFrom(scalar_requests[0].runs[0]) - tensor_requests = [ - c[0][0] for c in mock_client.WriteTensor.call_args_list - ] - if tensor_requests: - self.assertLen(tensor_requests, 1) - self.assertLen(tensor_requests[0].runs, 1) - tensor_run.MergeFrom(tensor_requests[0].runs[0]) - - def test_empty_events(self): - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events(scalar_run, tensor_run, []) - self.assertProtoEquals( - scalar_run, write_service_pb2.WriteScalarRequest.Run() - ) - self.assertProtoEquals( - tensor_run, write_service_pb2.WriteTensorRequest.Run() - ) - - def test_scalar_and_tensor_events(self): - events = [ - event_pb2.Event(summary=scalar_v2.scalar_pb("scalar1", 5.0)), - event_pb2.Event(summary=scalar_v2.scalar_pb("scalar2", 5.0)), - event_pb2.Event( - summary=histogram_v2.histogram_pb("histogram", [5.0]) - ), - event_pb2.Event( - summary=histogram_v2.histogram_pb("histogram", [6.0]) - ), - ] - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events(scalar_run, tensor_run, events) - scalar_tag_counts = _extract_tag_counts(scalar_run) - self.assertEqual(scalar_tag_counts, {"scalar1": 1, "scalar2": 1}) - tensor_tag_counts = _extract_tag_counts(tensor_run) - self.assertEqual(tensor_tag_counts, {"histogram": 2}) - - def test_skips_non_scalar_and_non_tensor_events(self): - events = [ - event_pb2.Event(summary=scalar_v2.scalar_pb("scalar1", 5.0)), - event_pb2.Event(file_version="brain.Event:2"), - event_pb2.Event( - summary=histogram_v2.histogram_pb("histogram", [5.0]) - ), - ] - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events(scalar_run, tensor_run, events) - scalar_tag_counts = _extract_tag_counts(scalar_run) - self.assertEqual(scalar_tag_counts, {"scalar1": 1}) - tensor_tag_counts = _extract_tag_counts(tensor_run) - self.assertEqual(tensor_tag_counts, {"histogram": 1}) - - def test_skips_non_scalar_events_in_scalar_time_series(self): - events = [ - event_pb2.Event(file_version="brain.Event:2"), - event_pb2.Event(summary=scalar_v2.scalar_pb("scalar1", 5.0)), - event_pb2.Event(summary=scalar_v2.scalar_pb("scalar2", 5.0)), - event_pb2.Event( - summary=histogram_v2.histogram_pb("scalar2", [5.0]) - ), - ] - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events(scalar_run, tensor_run, events) - scalar_tag_counts = _extract_tag_counts(scalar_run) - self.assertEqual(scalar_tag_counts, {"scalar1": 1, "scalar2": 1}) - tensor_tag_counts = _extract_tag_counts(tensor_run) - self.assertEqual(tensor_tag_counts, {}) - - def test_skips_events_from_disallowed_plugins(self): - event = event_pb2.Event( - step=1, wall_time=123.456, summary=scalar_v2.scalar_pb("foo", 5.0) - ) - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events( - scalar_run, - tensor_run, - [event], - allowed_plugins=frozenset("not-scalars"), - ) - expected_scalar_run = write_service_pb2.WriteScalarRequest.Run() - self.assertProtoEquals(scalar_run, expected_scalar_run) - expected_tensor_run = write_service_pb2.WriteTensorRequest.Run() - self.assertProtoEquals(tensor_run, expected_tensor_run) - - def test_remembers_first_metadata_in_time_series(self): - scalar_1 = event_pb2.Event(summary=scalar_v2.scalar_pb("loss", 4.0)) - scalar_2 = event_pb2.Event(summary=scalar_v2.scalar_pb("loss", 3.0)) - scalar_2.summary.value[0].ClearField("metadata") - events = [ - event_pb2.Event(file_version="brain.Event:2"), - scalar_1, - scalar_2, - ] - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events(scalar_run, tensor_run, events) - scalar_tag_counts = _extract_tag_counts(scalar_run) - self.assertEqual(scalar_tag_counts, {"loss": 2}) - - def test_expands_multiple_values_in_event(self): - event = event_pb2.Event(step=1, wall_time=123.456) - event.summary.value.add(tag="foo", simple_value=1.0) - event.summary.value.add(tag="foo", simple_value=2.0) - event.summary.value.add(tag="foo", simple_value=3.0) - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events(scalar_run, tensor_run, [event]) - expected_scalar_run = write_service_pb2.WriteScalarRequest.Run() - foo_tag = expected_scalar_run.tags.add() - foo_tag.name = "foo" - foo_tag.metadata.display_name = "foo" - foo_tag.metadata.plugin_data.plugin_name = "scalars" - foo_tag.metadata.data_class = summary_pb2.DATA_CLASS_SCALAR - foo_tag.points.add( - step=1, wall_time=test_util.timestamp_pb(123456000000), value=1.0 - ) - foo_tag.points.add( - step=1, wall_time=test_util.timestamp_pb(123456000000), value=2.0 - ) - foo_tag.points.add( - step=1, wall_time=test_util.timestamp_pb(123456000000), value=3.0 - ) - self.assertProtoEquals(scalar_run, expected_scalar_run) - - -class ScalarBatchedRequestSenderTest(tf.test.TestCase): - def _add_events(self, sender, run_name, events): - for event in events: - for value in event.summary.value: - sender.add_event(run_name, event, value, value.metadata) - - def _add_events_and_flush(self, events): - mock_client = _create_mock_client() - sender = _create_scalar_request_sender( - experiment_id="123", - api=mock_client, - ) - self._add_events(sender, "", events) - sender.flush() - - requests = [c[0][0] for c in mock_client.WriteScalar.call_args_list] - self.assertLen(requests, 1) - self.assertLen(requests[0].runs, 1) - return requests[0].runs[0] - - def test_aggregation_by_tag(self): - def make_event(step, wall_time, tag, value): - return event_pb2.Event( - step=step, - wall_time=wall_time, - summary=scalar_v2.scalar_pb(tag, value), - ) - - events = [ - make_event(1, 1.0, "one", 11.0), - make_event(1, 2.0, "two", 22.0), - make_event(2, 3.0, "one", 33.0), - make_event(2, 4.0, "two", 44.0), - make_event( - 1, 5.0, "one", 55.0 - ), # Should preserve duplicate step=1. - make_event(1, 6.0, "three", 66.0), - ] - run_proto = self._add_events_and_flush(events) - tag_data = { - tag.name: [ - (p.step, p.wall_time.ToSeconds(), p.value) for p in tag.points - ] - for tag in run_proto.tags - } - self.assertEqual( - tag_data, - { - "one": [(1, 1.0, 11.0), (2, 3.0, 33.0), (1, 5.0, 55.0)], - "two": [(1, 2.0, 22.0), (2, 4.0, 44.0)], - "three": [(1, 6.0, 66.0)], - }, - ) - - def test_v1_summary(self): - event = event_pb2.Event(step=1, wall_time=123.456) - event.summary.value.add(tag="foo", simple_value=5.0) - run_proto = self._add_events_and_flush(_apply_compat([event])) - expected_run_proto = write_service_pb2.WriteScalarRequest.Run() - foo_tag = expected_run_proto.tags.add() - foo_tag.name = "foo" - foo_tag.metadata.display_name = "foo" - foo_tag.metadata.plugin_data.plugin_name = "scalars" - foo_tag.metadata.data_class = summary_pb2.DATA_CLASS_SCALAR - foo_tag.points.add( - step=1, wall_time=test_util.timestamp_pb(123456000000), value=5.0 - ) - self.assertProtoEquals(run_proto, expected_run_proto) - - def test_v1_summary_tb_summary(self): - tf_summary = summary_v1.scalar_pb("foo", 5.0) - tb_summary = summary_pb2.Summary.FromString( - tf_summary.SerializeToString() - ) - event = event_pb2.Event(step=1, wall_time=123.456, summary=tb_summary) - run_proto = self._add_events_and_flush(_apply_compat([event])) - expected_run_proto = write_service_pb2.WriteScalarRequest.Run() - foo_tag = expected_run_proto.tags.add() - foo_tag.name = "foo/scalar_summary" - foo_tag.metadata.display_name = "foo" - foo_tag.metadata.plugin_data.plugin_name = "scalars" - foo_tag.metadata.data_class = summary_pb2.DATA_CLASS_SCALAR - foo_tag.points.add( - step=1, wall_time=test_util.timestamp_pb(123456000000), value=5.0 - ) - self.assertProtoEquals(run_proto, expected_run_proto) - - def test_v2_summary(self): - event = event_pb2.Event( - step=1, wall_time=123.456, summary=scalar_v2.scalar_pb("foo", 5.0) - ) - run_proto = self._add_events_and_flush(_apply_compat([event])) - expected_run_proto = write_service_pb2.WriteScalarRequest.Run() - foo_tag = expected_run_proto.tags.add() - foo_tag.name = "foo" - foo_tag.metadata.plugin_data.plugin_name = "scalars" - foo_tag.metadata.data_class = summary_pb2.DATA_CLASS_SCALAR - foo_tag.points.add( - step=1, wall_time=test_util.timestamp_pb(123456000000), value=5.0 - ) - self.assertProtoEquals(run_proto, expected_run_proto) - - def test_propagates_experiment_deletion(self): - event = event_pb2.Event(step=1) - event.summary.value.add(tag="foo", simple_value=1.0) - - mock_client = _create_mock_client() - sender = _create_scalar_request_sender("123", mock_client) - self._add_events(sender, "run", _apply_compat([event])) - - error = test_util.grpc_error(grpc.StatusCode.NOT_FOUND, "nope") - mock_client.WriteScalar.side_effect = error - with self.assertRaises(uploader_lib.ExperimentNotFoundError): - sender.flush() - - def test_no_budget_for_base_request(self): - mock_client = _create_mock_client() - long_experiment_id = "A" * 12 - with self.assertRaises(RuntimeError) as cm: - _create_scalar_request_sender( - experiment_id=long_experiment_id, - api=mock_client, - max_request_size=12, - ) - self.assertEqual( - str(cm.exception), "Byte budget too small for base request" - ) - - def test_no_room_for_single_point(self): - mock_client = _create_mock_client() - event = event_pb2.Event(step=1, wall_time=123.456) - event.summary.value.add(tag="foo", simple_value=1.0) - long_run_name = "A" * 12 - sender = _create_scalar_request_sender( - "123", mock_client, max_request_size=12 - ) - with self.assertRaises(RuntimeError) as cm: - self._add_events(sender, long_run_name, [event]) - self.assertEqual(str(cm.exception), "add_event failed despite flush") - - def test_break_at_run_boundary(self): - mock_client = _create_mock_client() - # Choose run name sizes such that one run fits in a 1024 byte request, - # but not two. - long_run_1 = "A" * 768 - long_run_2 = "B" * 768 - event_1 = event_pb2.Event(step=1) - event_1.summary.value.add(tag="foo", simple_value=1.0) - event_2 = event_pb2.Event(step=2) - event_2.summary.value.add(tag="bar", simple_value=-2.0) - - sender = _create_scalar_request_sender( - "123", - mock_client, - # Set a limit to request size - max_request_size=1024, - ) - self._add_events(sender, long_run_1, _apply_compat([event_1])) - self._add_events(sender, long_run_2, _apply_compat([event_2])) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteScalar.call_args_list] - - for request in requests: - _clear_wall_times(request) - - # Expect two RPC calls despite a single explicit call to flush(). - expected = [ - write_service_pb2.WriteScalarRequest(experiment_id="123"), - write_service_pb2.WriteScalarRequest(experiment_id="123"), - ] - ( - expected[0] - .runs.add(name=long_run_1) - .tags.add(name="foo", metadata=test_util.scalar_metadata("foo")) - .points.add(step=1, value=1.0) - ) - ( - expected[1] - .runs.add(name=long_run_2) - .tags.add(name="bar", metadata=test_util.scalar_metadata("bar")) - .points.add(step=2, value=-2.0) - ) - self.assertEqual(requests, expected) - - def test_break_at_tag_boundary(self): - mock_client = _create_mock_client() - # Choose tag name sizes such that one tag fits in a 1024 byte requst, - # but not two. Note that tag names appear in both `Tag.name` and the - # summary metadata. - long_tag_1 = "a" * 384 - long_tag_2 = "b" * 384 - event = event_pb2.Event(step=1) - event.summary.value.add(tag=long_tag_1, simple_value=1.0) - event.summary.value.add(tag=long_tag_2, simple_value=2.0) - - sender = _create_scalar_request_sender( - "123", - mock_client, - # Set a limit to request size - max_request_size=1024, - ) - self._add_events(sender, "train", _apply_compat([event])) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteScalar.call_args_list] - for request in requests: - _clear_wall_times(request) - - # Expect two RPC calls despite a single explicit call to flush(). - expected = [ - write_service_pb2.WriteScalarRequest(experiment_id="123"), - write_service_pb2.WriteScalarRequest(experiment_id="123"), - ] - ( - expected[0] - .runs.add(name="train") - .tags.add( - name=long_tag_1, metadata=test_util.scalar_metadata(long_tag_1) - ) - .points.add(step=1, value=1.0) - ) - ( - expected[1] - .runs.add(name="train") - .tags.add( - name=long_tag_2, metadata=test_util.scalar_metadata(long_tag_2) - ) - .points.add(step=1, value=2.0) - ) - self.assertEqual(requests, expected) - - def test_break_at_scalar_point_boundary(self): - mock_client = _create_mock_client() - point_count = 2000 # comfortably saturates a single 1024-byte request - events = [] - for step in range(point_count): - summary = scalar_v2.scalar_pb("loss", -2.0 * step) - if step > 0: - summary.value[0].ClearField("metadata") - events.append(event_pb2.Event(summary=summary, step=step)) - tracker = upload_tracker.UploadTracker(verbosity=0) - sender = _create_scalar_request_sender( - "123", - mock_client, - # Set a limit to request size - max_request_size=1024, - tracker=tracker, - ) - self._add_events(sender, "train", _apply_compat(events)) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteScalar.call_args_list] - for request in requests: - _clear_wall_times(request) - - self.assertGreater(len(requests), 1) - self.assertLess(len(requests), point_count) - # This is the observed number of requests when running the test. There - # is no reasonable way to derive this value from just reading the code. - # The number of requests does not have to be 33 to be correct but if it - # changes it probably warrants some investigation or thought. - self.assertEqual(33, len(requests)) - - total_points_in_result = 0 - for request in requests: - self.assertLen(request.runs, 1) - run = request.runs[0] - self.assertEqual(run.name, "train") - self.assertLen(run.tags, 1) - tag = run.tags[0] - self.assertEqual(tag.name, "loss") - for point in tag.points: - self.assertEqual(point.step, total_points_in_result) - self.assertEqual(point.value, -2.0 * point.step) - total_points_in_result += 1 - self.assertLessEqual(request.ByteSize(), 1024) - self.assertEqual(total_points_in_result, point_count) - with self.subTest("Scalar report count correct."): - self.assertEqual(tracker._stats.num_scalars, point_count) - - def test_prunes_tags_and_runs(self): - mock_client = _create_mock_client() - event_1 = event_pb2.Event(step=1) - event_1.summary.value.add(tag="foo", simple_value=1.0) - event_2 = event_pb2.Event(step=2) - event_2.summary.value.add(tag="bar", simple_value=-2.0) - - add_point_call_count = 0 - - def mock_add_point(byte_budget_manager_self, point): - # Simulate out-of-space error the first time that we try to store - # the second point. - nonlocal add_point_call_count - add_point_call_count += 1 - if add_point_call_count == 2: - raise uploader_lib._OutOfSpaceError() - - with mock.patch.object( - uploader_lib._ByteBudgetManager, - "add_point", - mock_add_point, - ): - sender = _create_scalar_request_sender("123", mock_client) - self._add_events(sender, "train", _apply_compat([event_1])) - self._add_events(sender, "test", _apply_compat([event_2])) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteScalar.call_args_list] - for request in requests: - _clear_wall_times(request) - - expected = [ - write_service_pb2.WriteScalarRequest(experiment_id="123"), - write_service_pb2.WriteScalarRequest(experiment_id="123"), - ] - ( - expected[0] - .runs.add(name="train") - .tags.add(name="foo", metadata=test_util.scalar_metadata("foo")) - .points.add(step=1, value=1.0) - ) - ( - expected[1] - .runs.add(name="test") - .tags.add(name="bar", metadata=test_util.scalar_metadata("bar")) - .points.add(step=2, value=-2.0) - ) - self.assertEqual(expected, requests) - - def test_wall_time_precision(self): - # Test a wall time that is exactly representable in float64 but has enough - # digits to incur error if converted to nanoseconds the naive way (* 1e9). - event1 = event_pb2.Event(step=1, wall_time=1567808404.765432119) - event1.summary.value.add(tag="foo", simple_value=1.0) - # Test a wall time where as a float64, the fractional part on its own will - # introduce error if truncated to 9 decimal places instead of rounded. - event2 = event_pb2.Event(step=2, wall_time=1.000000002) - event2.summary.value.add(tag="foo", simple_value=2.0) - run_proto = self._add_events_and_flush(_apply_compat([event1, event2])) - self.assertEqual( - test_util.timestamp_pb(1567808404765432119), - run_proto.tags[0].points[0].wall_time, - ) - self.assertEqual( - test_util.timestamp_pb(1000000002), - run_proto.tags[0].points[1].wall_time, - ) - - -class TensorBatchedRequestSenderTest(tf.test.TestCase): - def _add_events(self, sender, run_name, events): - for event in events: - for value in event.summary.value: - sender.add_event(run_name, event, value, value.metadata) - - def _add_events_and_flush(self, events, max_tensor_point_size=_USE_DEFAULT): - mock_client = _create_mock_client() - sender = _create_tensor_request_sender( - experiment_id="123", - api=mock_client, - max_tensor_point_size=max_tensor_point_size, - ) - self._add_events(sender, "", events) - sender.flush() - - requests = [c[0][0] for c in mock_client.WriteTensor.call_args_list] - self.assertLen(requests, 1) - self.assertLen(requests[0].runs, 1) - return requests[0].runs[0] - - def test_histogram_event(self): - event = event_pb2.Event( - step=1, - wall_time=123.456, - summary=histogram_v2.histogram_pb("foo", [1.0]), - ) - - run_proto = self._add_events_and_flush(_apply_compat([event])) - expected_run_proto = write_service_pb2.WriteTensorRequest.Run() - foo_tag = expected_run_proto.tags.add() - foo_tag.name = "foo" - foo_tag.metadata.plugin_data.plugin_name = "histograms" - foo_tag.metadata.data_class = summary_pb2.DATA_CLASS_TENSOR - foo_tag.points.add( - step=1, - wall_time=test_util.timestamp_pb(123456000000), - value=tensor_pb2.TensorProto(dtype=types_pb2.DT_DOUBLE), - ) - # Simplify the tensor value a bit before making assertions on it. - # We care that it is copied to the request but we don't need it to be - # an extensive test. - run_proto.tags[0].points[0].value.ClearField("tensor_shape") - run_proto.tags[0].points[0].value.ClearField("tensor_content") - self.assertProtoEquals(run_proto, expected_run_proto) - - def test_histogram_event_with_empty_tensor_content_errors_out(self): - event = event_pb2.Event(step=42) - event.summary.value.add( - tag="one", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, - # Use empty tensor content to elicit an error. - tensor_content=b"", - ), - ) - - mock_client = _create_mock_client() - sender = _create_tensor_request_sender("123", mock_client) - with self.assertRaisesRegex( - ValueError, - re.compile( - r"failed to upload a tensor.*malformation.*tag.*\'one\'.*step.*42", - re.DOTALL, - ), - ): - self._add_events(sender, "run", _apply_compat([event])) - - def test_histogram_event_with_incorrect_tensor_shape_errors_out(self): - event = event_pb2.Event(step=1337) - tensor_proto = tensor_util.make_tensor_proto([1.0, 2.0]) - # Add an extraneous dimension to the tensor shape in order to - # elicit an error. - tensor_proto.tensor_shape.dim.append( - tensor_shape_pb2.TensorShapeProto.Dim(size=2) - ) - event.summary.value.add(tag="two", tensor=tensor_proto) - - mock_client = _create_mock_client() - sender = _create_tensor_request_sender("123", mock_client) - with self.assertRaisesRegex( - ValueError, - re.compile( - r"failed to upload a tensor.*malformation.*tag.*\'two\'.*step.*1337." - r"*shape", - re.DOTALL, - ), - ): - self._add_events(sender, "run", _apply_compat([event])) - - def test_aggregation_by_tag(self): - def make_event(step, wall_time, tag): - event = event_pb2.Event(step=step, wall_time=wall_time) - event.summary.value.add( - tag=tag, - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - return event - - events = [ - make_event(1, 1.0, "one"), - make_event(1, 2.0, "two"), - make_event(2, 3.0, "one"), - make_event(2, 4.0, "two"), - make_event(1, 5.0, "one"), # Should preserve duplicate step=1. - make_event(1, 6.0, "three"), - ] - run_proto = self._add_events_and_flush(events) - tag_data = { - tag.name: [(p.step, p.wall_time.ToSeconds()) for p in tag.points] - for tag in run_proto.tags - } - self.assertEqual( - tag_data, - { - "one": [(1, 1.0), (2, 3.0), (1, 5.0)], - "two": [(1, 2.0), (2, 4.0)], - "three": [(1, 6.0)], - }, - ) - - def test_propagates_experiment_deletion(self): - event = event_pb2.Event(step=1) - event.summary.value.add( - tag="one", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - - mock_client = _create_mock_client() - sender = _create_tensor_request_sender("123", mock_client) - self._add_events(sender, "run", _apply_compat([event])) - - error = test_util.grpc_error(grpc.StatusCode.NOT_FOUND, "nope") - mock_client.WriteTensor.side_effect = error - with self.assertRaises(uploader_lib.ExperimentNotFoundError): - sender.flush() - - def test_no_budget_for_base_request(self): - mock_client = _create_mock_client() - long_experiment_id = "A" * 12 - with self.assertRaises(RuntimeError) as cm: - _create_tensor_request_sender( - experiment_id=long_experiment_id, - api=mock_client, - max_request_size=12, - ) - self.assertEqual( - str(cm.exception), "Byte budget too small for base request" - ) - - def test_no_room_for_single_point(self): - mock_client = _create_mock_client() - event = event_pb2.Event(step=1) - event.summary.value.add( - tag="one", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - long_run_name = "A" * 12 - sender = _create_tensor_request_sender( - "123", mock_client, max_request_size=12 - ) - with self.assertRaises(RuntimeError) as cm: - self._add_events(sender, long_run_name, [event]) - self.assertEqual(str(cm.exception), "add_event failed despite flush") - - def test_break_at_run_boundary(self): - mock_client = _create_mock_client() - # Choose run name sizes such that one run fits in a 1024 byte request, - # but not two. - long_run_1 = "A" * 768 - long_run_2 = "B" * 768 - event_1 = event_pb2.Event(step=1) - event_1.summary.value.add( - tag="one", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - event_2 = event_pb2.Event(step=2) - event_2.summary.value.add( - tag="two", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[2.0] - ), - ) - - sender = _create_tensor_request_sender( - "123", - mock_client, - # Set a limit to request size - max_request_size=1024, - ) - self._add_events(sender, long_run_1, _apply_compat([event_1])) - self._add_events(sender, long_run_2, _apply_compat([event_2])) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteTensor.call_args_list] - - # Expect two RPC calls despite a single explicit call to flush(). - self.assertEqual(2, len(requests)) - self.assertEqual(1, len(requests[0].runs)) - self.assertEqual(long_run_1, requests[0].runs[0].name) - self.assertEqual(1, len(requests[1].runs)) - self.assertEqual(long_run_2, requests[1].runs[0].name) - - def test_break_at_tag_boundary(self): - mock_client = _create_mock_client() - # Choose tag name sizes such that one tag fits in a 1024 byte request, - # but not two. - long_tag_1 = "a" * 600 - long_tag_2 = "b" * 600 - event = event_pb2.Event(step=1, wall_time=1) - event.summary.value.add( - tag=long_tag_1, - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - event.summary.value.add( - tag=long_tag_2, - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[2.0] - ), - ) - - sender = _create_tensor_request_sender( - "123", - mock_client, - # Set a limit to request size - max_request_size=1024, - ) - self._add_events(sender, "train", _apply_compat([event])) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteTensor.call_args_list] - - # Expect two RPC calls despite a single explicit call to flush(). - self.assertEqual(2, len(requests)) - # First RPC contains one tag. - self.assertEqual(1, len(requests[0].runs)) - self.assertEqual("train", requests[0].runs[0].name) - self.assertEqual(1, len(requests[0].runs[0].tags)) - self.assertEqual(long_tag_1, requests[0].runs[0].tags[0].name) - # Second RPC contains the other tag. - self.assertEqual(1, len(requests[1].runs)) - self.assertEqual("train", requests[1].runs[0].name) - self.assertEqual(1, len(requests[1].runs[0].tags)) - self.assertEqual(long_tag_2, requests[1].runs[0].tags[0].name) - - def test_break_at_tensor_point_boundary(self): - mock_client = _create_mock_client() - point_count = 2000 # comfortably saturates a single 1024-byte request - events = [] - for step in range(point_count): - event = event_pb2.Event(step=step) - tensor_proto = tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0 * step, -1.0 * step] - ) - tensor_proto.tensor_shape.dim.append( - tensor_shape_pb2.TensorShapeProto.Dim(size=2) - ) - event.summary.value.add(tag="histo", tensor=tensor_proto) - events.append(event) - - tracker = upload_tracker.UploadTracker(verbosity=0) - sender = _create_tensor_request_sender( - "123", - mock_client, - # Set a limit to request size - max_request_size=1024, - tracker=tracker, - ) - self._add_events(sender, "train", _apply_compat(events)) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteTensor.call_args_list] - - self.assertGreater(len(requests), 1) - self.assertLess(len(requests), point_count) - self.assertEqual(72, len(requests)) - - total_points_in_result = 0 - for request in requests: - self.assertLen(request.runs, 1) - run = request.runs[0] - self.assertEqual(run.name, "train") - self.assertLen(run.tags, 1) - tag = run.tags[0] - self.assertEqual(tag.name, "histo") - for point in tag.points: - self.assertEqual(point.step, total_points_in_result) - self.assertEqual( - point.value.double_val, - [1.0 * point.step, -1.0 * point.step], - ) - total_points_in_result += 1 - self.assertLessEqual(request.ByteSize(), 1024) - self.assertEqual(total_points_in_result, point_count) - with self.subTest("Tensor report count correct."): - self.assertEqual(tracker._stats.num_tensors, point_count) - - def test_strip_large_tensors(self): - # Generate test data with varying tensor point sizes. Use raw bytes. - event_1 = event_pb2.Event(step=1) - event_1.summary.value.add( - tag="one", - # This TensorProto has a byte size of 18. - tensor=tensor_util.make_tensor_proto([1.0, 2.0]), - ) - event_1.summary.value.add( - tag="two", - # This TensorProto has a byte size of 22. - tensor=tensor_util.make_tensor_proto([1.0, 2.0, 3.0]), - ) - # This TensorProto has a 12-byte tensor_content. - event_2 = event_pb2.Event(step=2) - event_2.summary.value.add( - tag="one", - # This TensorProto has a byte size of 18. - tensor=tensor_util.make_tensor_proto([2.0, 4.0]), - ) - event_2.summary.value.add( - tag="two", - # This TensorProto has a byte size of 26. - tensor=tensor_util.make_tensor_proto([1.0, 2.0, 3.0, 4.0]), - ) - - run_proto = self._add_events_and_flush( - _apply_compat([event_1, event_2]), - # Set threshold that will filter out the tensor point with 26 bytes - # of data and above. The additional byte is for proto overhead. - max_tensor_point_size=24, - ) - tag_data = { - tag.name: [(p.step, p.value.tensor_content) for p in tag.points] - for tag in run_proto.tags - } - # A single tensor point is filtered out. - self.assertEqual( - tag_data, - { - "one": [ - (1, b"\x00\x00\x80?\x00\x00\x00@"), - (2, b"\x00\x00\x00@\x00\x00\x80@"), - ], - "two": [(1, b"\x00\x00\x80?\x00\x00\x00@\x00\x00@@")], - }, - ) - - run_proto_2 = self._add_events_and_flush( - _apply_compat([event_1, event_2]), - # Set threshold that will filter out the tensor points with 22 and 26 - # bytes of data and above. The additional byte is for proto overhead. - max_tensor_point_size=20, - ) - tag_data_2 = { - tag.name: [(p.step, p.value.tensor_content) for p in tag.points] - for tag in run_proto_2.tags - } - # All tensor points from the same tag are filtered out, and the tag is pruned. - self.assertEqual( - tag_data_2, - { - "one": [ - (1, b"\x00\x00\x80?\x00\x00\x00@"), - (2, b"\x00\x00\x00@\x00\x00\x80@"), - ], - }, - ) - - def test_prunes_tags_and_runs(self): - mock_client = _create_mock_client() - event_1 = event_pb2.Event(step=1) - event_1.summary.value.add( - tag="one", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - event_2 = event_pb2.Event(step=2) - event_2.summary.value.add( - tag="two", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[2.0] - ), - ) - - add_point_call_count = 0 - - def mock_add_point(byte_budget_manager_self, point): - # Simulate out-of-space error the first time that we try to store - # the second point. - nonlocal add_point_call_count - add_point_call_count += 1 - if add_point_call_count == 2: - raise uploader_lib._OutOfSpaceError() - - with mock.patch.object( - uploader_lib._ByteBudgetManager, - "add_point", - mock_add_point, - ): - sender = _create_tensor_request_sender("123", mock_client) - self._add_events(sender, "train", _apply_compat([event_1])) - self._add_events(sender, "test", _apply_compat([event_2])) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteTensor.call_args_list] - - # Expect two RPC calls despite a single explicit call to flush(). - self.assertEqual(2, len(requests)) - # First RPC contains one tag. - self.assertEqual(1, len(requests[0].runs)) - self.assertEqual("train", requests[0].runs[0].name) - self.assertEqual(1, len(requests[0].runs[0].tags)) - self.assertEqual("one", requests[0].runs[0].tags[0].name) - # Second RPC contains the other tag. - self.assertEqual(1, len(requests[1].runs)) - self.assertEqual("test", requests[1].runs[0].name) - self.assertEqual(1, len(requests[1].runs[0].tags)) - self.assertEqual("two", requests[1].runs[0].tags[0].name) - - def test_wall_time_precision(self): - # Test a wall time that is exactly representable in float64 but has enough - # digits to incur error if converted to nanoseconds the naive way (* 1e9). - event_1 = event_pb2.Event(step=1, wall_time=1567808404.765432119) - event_1.summary.value.add( - tag="tag", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - # Test a wall time where as a float64, the fractional part on its own will - # introduce error if truncated to 9 decimal places instead of rounded. - event_2 = event_pb2.Event(step=2, wall_time=1.000000002) - event_2.summary.value.add( - tag="tag", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[2.0] - ), - ) - run_proto = self._add_events_and_flush( - _apply_compat([event_1, event_2]) - ) - self.assertEqual( - test_util.timestamp_pb(1567808404765432119), - run_proto.tags[0].points[0].wall_time, - ) - self.assertEqual( - test_util.timestamp_pb(1000000002), - run_proto.tags[0].points[1].wall_time, - ) - - class DeleteExperimentTest(tf.test.TestCase): def _create_mock_client(self): # Create a stub instance (using a test channel) in order to derive a mock @@ -1992,39 +175,5 @@ def test_internal_error(self): self.assertIn("travesty", msg) -class VarintCostTest(tf.test.TestCase): - def test_varint_cost(self): - self.assertEqual(uploader_lib._varint_cost(0), 1) - self.assertEqual(uploader_lib._varint_cost(7), 1) - self.assertEqual(uploader_lib._varint_cost(127), 1) - self.assertEqual(uploader_lib._varint_cost(128), 2) - self.assertEqual(uploader_lib._varint_cost(128 * 128 - 1), 2) - self.assertEqual(uploader_lib._varint_cost(128 * 128), 3) - - -def _clear_wall_times(request): - """Clears the wall_time fields in a WriteScalarRequest to be - deterministic.""" - for run in request.runs: - for tag in run.tags: - for point in tag.points: - point.ClearField("wall_time") - - -def _apply_compat(events): - initial_metadata = {} - for event in events: - event = data_compat.migrate_event(event) - events = dataclass_compat.migrate_event( - event, initial_metadata=initial_metadata - ) - for event in events: - yield event - - -def _extract_tag_counts(run_proto): - return {tag.name: len(tag.points) for tag in run_proto.tags} - - if __name__ == "__main__": tf.test.main() diff --git a/tensorboard/uploader/util.py b/tensorboard/uploader/util.py index f8917e86f6..3b90fdbbae 100644 --- a/tensorboard/uploader/util.py +++ b/tensorboard/uploader/util.py @@ -19,28 +19,6 @@ import errno import os import os.path -import time - - -class RateLimiter: - """Helper class for rate-limiting using a fixed minimum interval.""" - - def __init__(self, interval_secs): - """Constructs a RateLimiter that permits a tick() every - `interval_secs`.""" - self._time = time # Use property for ease of testing. - self._interval_secs = interval_secs - self._last_called_secs = 0 - - def tick(self): - """Blocks until it has been at least `interval_secs` since last - tick().""" - wait_secs = ( - self._last_called_secs + self._interval_secs - self._time.time() - ) - if wait_secs > 0: - self._time.sleep(wait_secs) - self._last_called_secs = self._time.time() def get_user_config_directory(): diff --git a/tensorboard/uploader/util_test.py b/tensorboard/uploader/util_test.py index 49d12694d5..09cc7101eb 100644 --- a/tensorboard/uploader/util_test.py +++ b/tensorboard/uploader/util_test.py @@ -22,35 +22,10 @@ from unittest import mock from google.protobuf import timestamp_pb2 -from tensorboard.uploader import test_util from tensorboard.uploader import util from tensorboard import test as tb_test -class RateLimiterTest(tb_test.TestCase): - def test_rate_limiting(self): - rate_limiter = util.RateLimiter(10) - fake_time = test_util.FakeTime(current=1000) - with mock.patch.object(rate_limiter, "_time", fake_time): - self.assertEqual(1000, fake_time.time()) - # No sleeping for initial tick. - rate_limiter.tick() - self.assertEqual(1000, fake_time.time()) - # Second tick requires a full sleep. - rate_limiter.tick() - self.assertEqual(1010, fake_time.time()) - # Third tick requires a sleep just to make up the remaining second. - fake_time.sleep(9) - self.assertEqual(1019, fake_time.time()) - rate_limiter.tick() - self.assertEqual(1020, fake_time.time()) - # Fourth tick requires no sleep since we have no remaining seconds. - fake_time.sleep(11) - self.assertEqual(1031, fake_time.time()) - rate_limiter.tick() - self.assertEqual(1031, fake_time.time()) - - class GetUserConfigDirectoryTest(tb_test.TestCase): def test_windows(self): with mock.patch.object(os, "name", "nt"): From bd2789502c60842ce021201fdc66068625779563 Mon Sep 17 00:00:00 2001 From: Riley Jones <78179109+rileyajones@users.noreply.github.com> Date: Tue, 24 Oct 2023 17:09:53 -0700 Subject: [PATCH 5/8] Bug Fix: Make the runs data table only show a scrollbar when needed (#6656) ## Motivation for features / changes The runs table previously always showed the scroll bar. Now it only shows when there are enough runs. ## Screenshots of UI changes (or N/A) Before (no scroll): ![image](https://github.com/tensorflow/tensorboard/assets/78179109/ab6f197e-1494-4c3a-b11d-88e181860e8b) Before (with scroll): ![image](https://github.com/tensorflow/tensorboard/assets/78179109/ff5908ee-5a75-4153-9a93-d541c5980d51) After (no scroll): ![image](https://github.com/tensorflow/tensorboard/assets/78179109/e287cf02-8cc2-4386-baee-505a5b406266) After (with scroll): ![image](https://github.com/tensorflow/tensorboard/assets/78179109/78c45e94-38aa-4997-971d-d7148d548bda) --- tensorboard/webapp/runs/views/runs_table/runs_data_table.scss | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorboard/webapp/runs/views/runs_table/runs_data_table.scss b/tensorboard/webapp/runs/views/runs_table/runs_data_table.scss index d6c79bb5be..246b95cbe2 100644 --- a/tensorboard/webapp/runs/views/runs_table/runs_data_table.scss +++ b/tensorboard/webapp/runs/views/runs_table/runs_data_table.scss @@ -22,7 +22,7 @@ $_arrow_size: 16px; } :host { - overflow-y: scroll; + overflow-y: auto; width: 100%; } From 9b76dbc3425d828ca5312a5f98fa83bc8cb786f9 Mon Sep 17 00:00:00 2001 From: Riley Jones <78179109+rileyajones@users.noreply.github.com> Date: Tue, 24 Oct 2023 17:36:27 -0700 Subject: [PATCH 6/8] Sort run names with leading numbers differently (#6664) ## Motivation for features / changes We have been treating all run names as strings even though some run names are (serialized) numbers and some begin with numbers. This means that sorting worked pretty unintuitively. Note: I've also changed the behavior around sorting `undefined` values. When sorting descending they should now appear at the top of the list. This was a recent change but it wasn't clear it was intentional and I found it made the code more complex. https://github.com/tensorflow/tensorboard/issues/6651 Internal users see [b/278671226](http://b/278671226) ## Screenshots of UI changes (or N/A) Before: ![image](https://github.com/tensorflow/tensorboard/assets/78179109/5f805c37-283d-4f55-b1bf-3dfa4d9ea1da) After: ![image](https://github.com/tensorflow/tensorboard/assets/78179109/0d740b6e-2ed5-4762-aec0-22400eeb152d) ![image](https://github.com/tensorflow/tensorboard/assets/78179109/5283bade-b4da-401d-9893-932d9fb9d378) --- .../webapp/runs/views/runs_table/BUILD | 2 + .../views/runs_table/runs_table_container.ts | 30 +- .../runs/views/runs_table/sorting_utils.ts | 131 ++++++++ .../views/runs_table/sorting_utils_test.ts | 286 ++++++++++++++++++ 4 files changed, 420 insertions(+), 29 deletions(-) create mode 100644 tensorboard/webapp/runs/views/runs_table/sorting_utils.ts create mode 100644 tensorboard/webapp/runs/views/runs_table/sorting_utils_test.ts diff --git a/tensorboard/webapp/runs/views/runs_table/BUILD b/tensorboard/webapp/runs/views/runs_table/BUILD index 4d936d8af1..6ccc536e0b 100644 --- a/tensorboard/webapp/runs/views/runs_table/BUILD +++ b/tensorboard/webapp/runs/views/runs_table/BUILD @@ -64,6 +64,7 @@ tf_ng_module( "runs_table_component.ts", "runs_table_container.ts", "runs_table_module.ts", + "sorting_utils.ts", ], assets = [ ":regex_edit_dialog_styles", @@ -131,6 +132,7 @@ tf_ts_library( "regex_edit_dialog_test.ts", "runs_data_table_test.ts", "runs_table_test.ts", + "sorting_utils_test.ts", ], deps = [ ":runs_table", diff --git a/tensorboard/webapp/runs/views/runs_table/runs_table_container.ts b/tensorboard/webapp/runs/views/runs_table/runs_table_container.ts index 7487ab8edd..cdf7567b6d 100644 --- a/tensorboard/webapp/runs/views/runs_table/runs_table_container.ts +++ b/tensorboard/webapp/runs/views/runs_table/runs_table_container.ts @@ -70,7 +70,6 @@ import { ColumnHeader, FilterAddedEvent, SortingInfo, - SortingOrder, TableData, } from '../../../widgets/data_table/types'; import { @@ -101,6 +100,7 @@ import { getPotentialHparamColumns, } from '../../../metrics/views/main_view/common_selectors'; import {runsTableFullScreenToggled} from '../../../core/actions'; +import {sortTableDataItems} from './sorting_utils'; const getRunsLoading = createSelector< State, @@ -182,34 +182,6 @@ function sortRunTableItems( return sortedItems; } -function sortTableDataItems( - items: TableData[], - sort: SortingInfo -): TableData[] { - const sortedItems = [...items]; - - sortedItems.sort((a, b) => { - let aValue = a[sort.name]; - let bValue = b[sort.name]; - - if (sort.name === 'experimentAlias') { - aValue = (aValue as ExperimentAlias).aliasNumber; - bValue = (bValue as ExperimentAlias).aliasNumber; - } - - if (aValue === bValue) { - return 0; - } - - if (aValue === undefined || bValue === undefined) { - return bValue === undefined ? -1 : 1; - } - - return aValue < bValue === (sort.order === SortingOrder.ASCENDING) ? -1 : 1; - }); - return sortedItems; -} - function matchFilter( filter: DiscreteFilter | IntervalFilter, value: number | DiscreteHparamValue | undefined diff --git a/tensorboard/webapp/runs/views/runs_table/sorting_utils.ts b/tensorboard/webapp/runs/views/runs_table/sorting_utils.ts new file mode 100644 index 0000000000..cf67fbaf5f --- /dev/null +++ b/tensorboard/webapp/runs/views/runs_table/sorting_utils.ts @@ -0,0 +1,131 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +import { + SortingInfo, + SortingOrder, + TableData, +} from '../../../widgets/data_table/types'; +import {ExperimentAlias} from '../../../experiments/types'; + +enum UndefinedStrategy { + BEFORE, + AFTER, +} + +interface SortOptions { + insertUndefined: UndefinedStrategy; +} + +const POTENTIALLY_NUMERIC_TYPES = new Set(['string', 'number']); + +const DEFAULT_SORT_OPTIONS: SortOptions = { + insertUndefined: UndefinedStrategy.AFTER, +}; + +export function parseNumericPrefix(value: string | number) { + if (typeof value === 'number') { + return isNaN(value) ? undefined : value; + } + + if (!isNaN(parseInt(value))) { + return parseInt(value); + } + + for (let i = 0; i < value.length; i++) { + if (isNaN(parseInt(value[i]))) { + if (i === 0) return; + return parseInt(value.slice(0, i)); + } + } + + return; +} + +export function sortTableDataItems( + items: TableData[], + sort: SortingInfo +): TableData[] { + const sortedItems = [...items]; + + sortedItems.sort((a, b) => { + let aValue = a[sort.name]; + let bValue = b[sort.name]; + + if (sort.name === 'experimentAlias') { + aValue = (aValue as ExperimentAlias).aliasNumber; + bValue = (bValue as ExperimentAlias).aliasNumber; + } + + if (aValue === bValue) { + return 0; + } + + if (aValue === undefined || bValue === undefined) { + return compareValues(aValue, bValue); + } + + if ( + POTENTIALLY_NUMERIC_TYPES.has(typeof aValue) && + POTENTIALLY_NUMERIC_TYPES.has(typeof bValue) + ) { + const aPrefix = parseNumericPrefix(aValue as string | number); + const bPrefix = parseNumericPrefix(bValue as string | number); + // Show runs with numbers before to runs without numbers + if ( + (aPrefix === undefined || bPrefix === undefined) && + aPrefix !== bPrefix + ) { + return compareValues(aPrefix, bPrefix, { + insertUndefined: UndefinedStrategy.BEFORE, + }); + } + if (aPrefix !== undefined && bPrefix !== undefined) { + if (aPrefix === bPrefix) { + const aPostfix = + aValue.toString().slice(aPrefix.toString().length) || undefined; + const bPostfix = + bValue.toString().slice(bPrefix.toString().length) || undefined; + return compareValues(aPostfix, bPostfix, { + insertUndefined: UndefinedStrategy.BEFORE, + }); + } + + return compareValues(aPrefix, bPrefix); + } + } + + return compareValues(aValue, bValue); + }); + return sortedItems; + + function compareValues( + a: TableData[string] | undefined, + b: TableData[string] | undefined, + {insertUndefined}: SortOptions = DEFAULT_SORT_OPTIONS + ) { + if (a === b) { + return 0; + } + + if (a === undefined) { + return insertUndefined === UndefinedStrategy.AFTER ? 1 : -1; + } + if (b === undefined) { + return insertUndefined === UndefinedStrategy.AFTER ? -1 : 1; + } + + return a < b === (sort.order === SortingOrder.ASCENDING) ? -1 : 1; + } +} diff --git a/tensorboard/webapp/runs/views/runs_table/sorting_utils_test.ts b/tensorboard/webapp/runs/views/runs_table/sorting_utils_test.ts new file mode 100644 index 0000000000..25348ec939 --- /dev/null +++ b/tensorboard/webapp/runs/views/runs_table/sorting_utils_test.ts @@ -0,0 +1,286 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +import {SortingOrder} from '../../../widgets/data_table/types'; +import {parseNumericPrefix, sortTableDataItems} from './sorting_utils'; + +describe('sorting utils', () => { + describe('parseNumericPrefix', () => { + it('returns undefined when a non numeric value is provided', () => { + expect(parseNumericPrefix('')).toBeUndefined(); + expect(parseNumericPrefix('foo')).toBeUndefined(); + expect(parseNumericPrefix('foo123')).toBeUndefined(); + expect(parseNumericPrefix(NaN)).toBeUndefined(); + }); + + it('returns all leading numbers from a string', () => { + expect(parseNumericPrefix('0')).toEqual(0); + expect(parseNumericPrefix('123')).toEqual(123); + expect(parseNumericPrefix('123train')).toEqual(123); + expect(parseNumericPrefix('123/')).toEqual(123); + expect(parseNumericPrefix('123/foo')).toEqual(123); + expect(parseNumericPrefix('123/foo/456')).toEqual(123); + }); + + it('returns numbers when provided', () => { + expect(parseNumericPrefix(123)).toEqual(123); + }); + }); + + describe('sortTableDataItems', () => { + it('sorts experimentAlias by alias number', () => { + expect( + sortTableDataItems( + [ + { + id: 'row 1 id', + experimentAlias: { + aliasNumber: 5, + }, + }, + { + id: 'row 2 id', + experimentAlias: { + aliasNumber: 3, + }, + }, + ], + { + order: SortingOrder.ASCENDING, + name: 'experimentAlias', + } + ) + ).toEqual([ + { + id: 'row 2 id', + experimentAlias: { + aliasNumber: 3, + }, + }, + { + id: 'row 1 id', + experimentAlias: { + aliasNumber: 5, + }, + }, + ]); + }); + + it('sorts runs by their leading numbers', () => { + expect( + sortTableDataItems( + [ + { + id: 'row 1 id', + name: '1/myrun', + }, + { + id: 'row 2 id', + name: '2/myrun', + }, + { + id: 'row 3 id', + name: '10/myrun', + }, + ], + { + order: SortingOrder.ASCENDING, + name: 'name', + } + ) + ).toEqual([ + { + id: 'row 1 id', + name: '1/myrun', + }, + { + id: 'row 2 id', + name: '2/myrun', + }, + { + id: 'row 3 id', + name: '10/myrun', + }, + ]); + }); + + it('sorts runs with purely numeric run names before runs with leading numbers', () => { + expect( + sortTableDataItems( + [ + { + id: 'row 1 id', + name: '0', + }, + { + id: 'row 2 id', + name: '0/myrun2', + }, + { + id: 'row 3 id', + name: '0/myrun1', + }, + ], + { + order: SortingOrder.ASCENDING, + name: 'name', + } + ) + ).toEqual([ + { + id: 'row 1 id', + name: '0', + }, + { + id: 'row 3 id', + name: '0/myrun1', + }, + { + id: 'row 2 id', + name: '0/myrun2', + }, + ]); + }); + + it('sorts runs with string names', () => { + expect( + sortTableDataItems( + [ + { + id: 'row 1 id', + name: 'aaa', + }, + { + id: 'row 2 id', + name: 'bbb', + }, + { + id: 'row 3 id', + name: 'ccc', + }, + ], + { + order: SortingOrder.ASCENDING, + name: 'name', + } + ) + ).toEqual([ + { + id: 'row 1 id', + name: 'aaa', + }, + { + id: 'row 2 id', + name: 'bbb', + }, + { + id: 'row 3 id', + name: 'ccc', + }, + ]); + }); + + it('shows runs without numbers before runs with numbers', () => { + expect( + sortTableDataItems( + [ + { + id: 'row 1 id', + name: 'aaa', + }, + { + id: 'row 2 id', + name: '1aaa', + }, + { + id: 'row 3 id', + name: '2bbb', + }, + ], + { + order: SortingOrder.ASCENDING, + name: 'name', + } + ) + ).toEqual([ + { + id: 'row 1 id', + name: 'aaa', + }, + { + id: 'row 2 id', + name: '1aaa', + }, + { + id: 'row 3 id', + name: '2bbb', + }, + ]); + }); + + it('places undefined values at the end', () => { + const input: any = [ + { + id: 'row 1 id', + foo: '1/myrun', + }, + { + id: 'row 2 id', + }, + { + id: 'row 3 id', + foo: '10/myrun', + }, + ]; + + expect( + sortTableDataItems(input, { + order: SortingOrder.ASCENDING, + name: 'foo', + }) + ).toEqual([ + { + id: 'row 1 id', + foo: '1/myrun', + }, + { + id: 'row 3 id', + foo: '10/myrun', + }, + { + id: 'row 2 id', + }, + ]); + + expect( + sortTableDataItems(input, { + order: SortingOrder.DESCENDING, + name: 'foo', + }) + ).toEqual([ + { + id: 'row 3 id', + foo: '10/myrun', + }, + { + id: 'row 1 id', + foo: '1/myrun', + }, + { + id: 'row 2 id', + }, + ]); + }); + }); +}); From ec708ef1c40d201b3e856016877fdbafe380f67a Mon Sep 17 00:00:00 2001 From: Brian Dubois Date: Thu, 2 Nov 2023 08:18:40 -0400 Subject: [PATCH 7/8] Add 2.15.1 relnotes to RELEASE.md. --- RELEASE.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index ccdf1d931d..4045140f64 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,19 @@ +# Release 2.15.1 + +## Breaking Changes + +- TensorBoard.dev is shutting down. See the FAQ at https://tensorboard.dev. + - No longer able to upload new data to TensorBoard.dev. The `tensorboard dev upload` command will fail. (#6638) + - The experimental dataframe api has been deleted. (#6644) + +## Bug Fixes + +- Time Series dashboard: + - Sort run names with leading numbers differently. (#6664) + - Show scrollbar in runs table only when needed. (#6656) + - Fix 'Prev' and 'Next' buttons in dark mode. (#6663) + - Better loading/reloading behavior for runs table. (#6658) + # Release 2.15.0 The 2.15 minor series tracks TensorFlow 2.15. From 892e19990a1652f88fb32eeb368f17f50998f203 Mon Sep 17 00:00:00 2001 From: Brian Dubois Date: Thu, 2 Nov 2023 08:21:49 -0400 Subject: [PATCH 8/8] TensorBoard 2.15.1 --- tensorboard/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorboard/version.py b/tensorboard/version.py index 2630468e2f..87847ea5d7 100644 --- a/tensorboard/version.py +++ b/tensorboard/version.py @@ -15,7 +15,7 @@ """Contains the version string.""" -VERSION = "2.15.0" +VERSION = "2.15.1" if __name__ == "__main__": print(VERSION)