Skip to content

Commit 55d0455

Browse files
committed
uploader: fix rate limiting for logdir polling
Summary: Recent refactorings to the RPC rate limiting had the unintended side effect of removing rate limiting on logdir polling. This meant that after all data had been read from the logdir, the uploader would continue to poll it aggressively (thousands of times per second on my machine), which is bad for disks and expensive on network file systems. This commit adds a separate rate limiter for the logdir polling. We don’t reuse the RPC rate limiter because that would force us to tick twice on the same timer every cycle. Fixes #3001. Test Plan: Run `bazel run //tensorboard -- dev upload --logdir /nope --verbosity 0` (with any logdir path, but it’s easier when it’s empty), and note that the “Starting an upload cycle” logs now progress at 0.2 per second rather than 6000 per second. TODO: Unit tests. wchargin-branch: uploader-rate-limit-polling wchargin-source: 2036f1c749e0c2f4e6db2362257e1f3f3a4342f2
1 parent 5147a5e commit 55d0455

File tree

2 files changed

+20
-0
lines changed

2 files changed

+20
-0
lines changed

tensorboard/uploader/uploader.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@
3838
from tensorboard.util import tb_logging
3939
from tensorboard.util import tensor_util
4040

41+
# Minimum length of a logdir polling cycle in seconds. Shorter cycles will
42+
# sleep to avoid spinning over the logdir, which isn't great for disks and can
43+
# be expensive for network file sytems.
44+
_MIN_LOGDIR_POLL_INTERVAL_SECS = 5
45+
4146
# Minimum interval between initiating write RPCs. When writes would otherwise
4247
# happen more frequently, the process will sleep to use up the rest of the time.
4348
_MIN_WRITE_RPC_INTERVAL_SECS = 5
@@ -70,6 +75,7 @@ def __init__(
7075
writer_client,
7176
logdir,
7277
allowed_plugins,
78+
logdir_poll_rate_limiter=None,
7379
rpc_rate_limiter=None,
7480
name=None,
7581
description=None,
@@ -82,6 +88,9 @@ def __init__(
8288
allowed_plugins: collection of string plugin names; events will only
8389
be uploaded if their time series's metadata specifies one of these
8490
plugin names
91+
logdir_poll_rate_limiter: a `RateLimiter` to use to limit logdir
92+
polling frequency, to avoid thrashing disks, especially on networked
93+
file systems.
8594
rpc_rate_limiter: a `RateLimiter` to use to limit write RPC frequency.
8695
Note this limit applies at the level of single RPCs in the Scalar
8796
and Tensor case, but at the level of an entire blob upload in the
@@ -98,6 +107,12 @@ def __init__(
98107
self._name = name
99108
self._description = description
100109
self._request_sender = None
110+
if logdir_poll_rate_limiter is None:
111+
self._logdir_poll_rate_limiter = util.RateLimiter(
112+
_MIN_LOGDIR_POLL_INTERVAL_SECS
113+
)
114+
else:
115+
self._logdir_poll_rate_limiter = logdir_poll_rate_limiter
101116
if rpc_rate_limiter is None:
102117
self._rpc_rate_limiter = util.RateLimiter(
103118
_MIN_WRITE_RPC_INTERVAL_SECS
@@ -147,6 +162,7 @@ def start_uploading(self):
147162
"Must call create_experiment() before start_uploading()"
148163
)
149164
while True:
165+
self._logdir_poll_rate_limiter.tick()
150166
self._upload_once()
151167

152168
def _upload_once(self):

tensorboard/uploader/uploader_test.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ def _create_uploader(
8181
writer_client=_USE_DEFAULT,
8282
logdir=None,
8383
allowed_plugins=_USE_DEFAULT,
84+
logdir_poll_rate_limiter=_USE_DEFAULT,
8485
rpc_rate_limiter=_USE_DEFAULT,
8586
name=None,
8687
description=None,
@@ -89,12 +90,15 @@ def _create_uploader(
8990
writer_client = _create_mock_client()
9091
if allowed_plugins is _USE_DEFAULT:
9192
allowed_plugins = _SCALARS_ONLY
93+
if logdir_poll_rate_limiter is _USE_DEFAULT:
94+
logdir_poll_rate_limiter = util.RateLimiter(0)
9295
if rpc_rate_limiter is _USE_DEFAULT:
9396
rpc_rate_limiter = util.RateLimiter(0)
9497
return uploader_lib.TensorBoardUploader(
9598
writer_client,
9699
logdir,
97100
allowed_plugins=allowed_plugins,
101+
logdir_poll_rate_limiter=logdir_poll_rate_limiter,
98102
rpc_rate_limiter=rpc_rate_limiter,
99103
name=name,
100104
description=description,

0 commit comments

Comments
 (0)