Skip to content

Commit 2681aed

Browse files
authored
server: when no port is specified, try to find one (#1851)
Summary: This patch teaches TensorBoard about `--port=default` (which is the new default value for `--port`). If `--port=default` is specified, TensorBoard will attempt to bind to port 6006, just as prior to this patch. If this fails, TensorBoard will try to bind incrementally to ports 6007, 6008, …, giving up after 10 attempts. If an explicit port is specified, TensorBoard will not try to search for other ports; it will fail (as before) if it cannot bind. This is the only way in which `--port=default` and `--port=6006` differ. This patch also changes the behavior of one edge case, at least on Linux. Prior to this patch, explicit port numbers were interpreted modulo 65536 (TCP ports are u16s), so `--port=71542` and `--port=6006` had the same behavior. It is now an error on all platforms to specify a port that does not fit in a u16. Resolves #1848. Test Plan: $ bazel build //tensorboard $ ./bazel-bin/tensorboard/tensorboard --logdir ./logs/ & sleep 2 [1] 223547 TensorBoard 1.13.0a0 at http://<hostname>:6006 (Press CTRL+C to quit) $ ./bazel-bin/tensorboard/tensorboard --logdir ./logs/ & sleep 2 [2] 223591 TensorBoard 1.13.0a0 at http://<hostname>:6007 (Press CTRL+C to quit) $ ./bazel-bin/tensorboard/tensorboard --logdir ./logs/ --port default & sleep 2 [3] 224292 TensorBoard 1.13.0a0 at http://<hostname>:6008 (Press CTRL+C to quit) $ ./bazel-bin/tensorboard/tensorboard --logdir ./logs/ --port 6006 & sleep 2 # should fail [4] 225773 E0214 15:35:08.939894 140033352959744 program.py:232] TensorBoard could not bind to port 6006, it was already in use ERROR: TensorBoard could not bind to port 6006, it was already in use [4]+ Exit 255 ./bazel-bin/tensorboard/tensorboard --logdir ./logs/ --port 6006 $ ./bazel-bin/tensorboard/tensorboard --logdir ./logs/ --port 0 & sleep 2 [4] 226298 TensorBoard 1.13.0a0 at http://<hostname>:33673 (Press CTRL+C to quit) $ for i in {6009..6015}; do ./bazel-bin/tensorboard/tensorboard --logdir ./logs/ --port "$i" & done && sleep 2 [5] 226638 [6] 226639 [7] 226640 [8] 226641 [9] 226642 [10] 226643 [11] 226644 TensorBoard 1.13.0a0 at http://<hostname>:6011 (Press CTRL+C to quit) TensorBoard 1.13.0a0 at http://<hostname>:6013 (Press CTRL+C to quit) TensorBoard 1.13.0a0 at http://<hostname>:6015 (Press CTRL+C to quit) TensorBoard 1.13.0a0 at http://<hostname>:6012 (Press CTRL+C to quit) TensorBoard 1.13.0a0 at http://<hostname>:6014 (Press CTRL+C to quit) TensorBoard 1.13.0a0 at http://<hostname>:6009 (Press CTRL+C to quit) TensorBoard 1.13.0a0 at http://<hostname>:6010 (Press CTRL+C to quit) $ ./bazel-bin/tensorboard/tensorboard --logdir ./logs/ & sleep 2 [12] 227173 E0214 15:35:33.812504 140362460620544 program.py:232] TensorBoard could not bind to any port around 6006 (tried 10 times) ERROR: TensorBoard could not bind to any port around 6006 (tried 10 times) $ jobs -p | xargs kill wchargin-branch: port-search
1 parent 9b6e29e commit 2681aed

File tree

2 files changed

+67
-27
lines changed

2 files changed

+67
-27
lines changed

tensorboard/plugins/core/core_plugin.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@
3737
logger = tb_logging.get_logger()
3838

3939

40+
# If no port is specified, try to bind to this port. See help for --port
41+
# for more details.
42+
DEFAULT_PORT = 6006
43+
44+
4045
class CorePlugin(base_plugin.TBPlugin):
4146
"""Core plugin for TensorBoard.
4247
@@ -294,12 +299,14 @@ def define_flags(self, parser):
294299
parser.add_argument(
295300
'--port',
296301
metavar='PORT',
297-
type=int,
298-
default=6006,
302+
type=lambda s: (None if s == "default" else int(s)),
303+
default="default",
299304
help='''\
300305
Port to serve TensorBoard on. Pass 0 to request an unused port selected
301-
by the operating system. (default: %(default)s)\
302-
''')
306+
by the operating system, or pass "default" to try to bind to the default
307+
port (%s) but search for a nearby free port if the default port is
308+
unavailable. (default: "default").\
309+
''' % DEFAULT_PORT)
303310

304311
parser.add_argument(
305312
'--purge_orphaned_data',

tensorboard/program.py

Lines changed: 56 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,15 @@
4545

4646
import six
4747
from six.moves import urllib
48+
from six.moves import xrange # pylint: disable=redefined-builtin
4849
from werkzeug import serving
4950

5051
from tensorboard import manager
5152
from tensorboard import version
5253
from tensorboard.backend import application
5354
from tensorboard.backend.event_processing import event_file_inspector as efi
5455
from tensorboard.plugins import base_plugin
56+
from tensorboard.plugins.core import core_plugin
5557
from tensorboard.util import tb_logging
5658
from tensorboard.util import util
5759

@@ -347,36 +349,67 @@ class WerkzeugServer(serving.ThreadedWSGIServer, TensorBoardServer):
347349
def __init__(self, wsgi_app, flags):
348350
self._flags = flags
349351
host = flags.host
352+
353+
# base_port: what's the first port to which we should try to bind?
354+
# should_scan: if that fails, shall we try additional ports?
355+
(base_port, should_scan) = (
356+
(flags.port, False)
357+
if flags.port is not None
358+
else (core_plugin.DEFAULT_PORT, True)
359+
)
360+
if base_port > 0xFFFF:
361+
raise TensorBoardServerException(
362+
'TensorBoard cannot bind to port %d > %d' % (base_port, 0xFFFF)
363+
)
364+
max_attempts = 10 if should_scan else 1
365+
base_port = min(base_port + max_attempts, 65536) - max_attempts
366+
350367
self._auto_wildcard = False
351368
if not host:
352369
# Without an explicit host, we default to serving on all interfaces,
353370
# and will attempt to serve both IPv4 and IPv6 traffic through one socket.
354-
host = self._get_wildcard_address(flags.port)
371+
host = self._get_wildcard_address(base_port)
355372
self._auto_wildcard = True
356-
try:
357-
super(WerkzeugServer, self).__init__(host, flags.port, wsgi_app)
358-
except socket.error as e:
359-
if hasattr(errno, 'EACCES') and e.errno == errno.EACCES:
360-
raise TensorBoardServerException(
361-
'TensorBoard must be run as superuser to bind to port %d' %
362-
flags.port)
363-
elif hasattr(errno, 'EADDRINUSE') and e.errno == errno.EADDRINUSE:
364-
if flags.port == 0:
373+
374+
for (attempt_index, port) in (
375+
enumerate(xrange(base_port, base_port + max_attempts))):
376+
try:
377+
# Yes, this invokes the super initializer potentially many
378+
# times. This seems to work fine, and looking at the superclass
379+
# chain (type(self).__mro__) it doesn't seem that anything
380+
# _should_ go wrong (nor does any superclass provide a facility
381+
# to do this natively).
382+
super(WerkzeugServer, self).__init__(host, port, wsgi_app)
383+
break
384+
except socket.error as e:
385+
if hasattr(errno, 'EACCES') and e.errno == errno.EACCES:
386+
raise TensorBoardServerException(
387+
'TensorBoard must be run as superuser to bind to port %d' %
388+
port)
389+
elif hasattr(errno, 'EADDRINUSE') and e.errno == errno.EADDRINUSE:
390+
if attempt_index < max_attempts - 1:
391+
continue
392+
if port == 0:
393+
raise TensorBoardServerException(
394+
'TensorBoard unable to find any open port')
395+
elif should_scan:
396+
raise TensorBoardServerException(
397+
'TensorBoard could not bind to any port around %s '
398+
'(tried %d times)'
399+
% (base_port, max_attempts))
400+
else:
401+
raise TensorBoardServerException(
402+
'TensorBoard could not bind to port %d, it was already in use' %
403+
port)
404+
elif hasattr(errno, 'EADDRNOTAVAIL') and e.errno == errno.EADDRNOTAVAIL:
365405
raise TensorBoardServerException(
366-
'TensorBoard unable to find any open port')
367-
else:
406+
'TensorBoard could not bind to unavailable address %s' % host)
407+
elif hasattr(errno, 'EAFNOSUPPORT') and e.errno == errno.EAFNOSUPPORT:
368408
raise TensorBoardServerException(
369-
'TensorBoard could not bind to port %d, it was already in use' %
370-
flags.port)
371-
elif hasattr(errno, 'EADDRNOTAVAIL') and e.errno == errno.EADDRNOTAVAIL:
372-
raise TensorBoardServerException(
373-
'TensorBoard could not bind to unavailable address %s' % host)
374-
elif hasattr(errno, 'EAFNOSUPPORT') and e.errno == errno.EAFNOSUPPORT:
375-
raise TensorBoardServerException(
376-
'Tensorboard could not bind to unsupported address family %s' %
377-
host)
378-
# Raise the raw exception if it wasn't identifiable as a user error.
379-
raise
409+
'Tensorboard could not bind to unsupported address family %s' %
410+
host)
411+
# Raise the raw exception if it wasn't identifiable as a user error.
412+
raise
380413

381414
def _get_wildcard_address(self, port):
382415
"""Returns a wildcard address for the port in question.

0 commit comments

Comments
 (0)