diff --git a/tensorboard/plugin_util.py b/tensorboard/plugin_util.py index 5952fd5d4c..130dc3a1dd 100644 --- a/tensorboard/plugin_util.py +++ b/tensorboard/plugin_util.py @@ -90,6 +90,22 @@ def __init__(self): _CLEANER_STORE = _CleanerStore() +def safe_html(unsafe_string): + """Return the input as a str, sanitized for insertion into the DOM. + + Arguments: + unsafe_string: A Unicode string or UTF-8--encoded bytestring + possibly containing unsafe HTML markup. + + Returns: + A string containing safe HTML. + """ + total_null_bytes = 0 + if isinstance(unsafe_string, bytes): + unsafe_string = unsafe_string.decode("utf-8") + return _CLEANER_STORE.cleaner.clean(unsafe_string) + + def markdown_to_safe_html(markdown_string): """Convert Markdown to HTML that's safe to splice into the DOM. diff --git a/tensorboard/plugin_util_test.py b/tensorboard/plugin_util_test.py index c22b03e4ea..3160a4cc1e 100644 --- a/tensorboard/plugin_util_test.py +++ b/tensorboard/plugin_util_test.py @@ -22,6 +22,58 @@ from tensorboard.backend import experiment_id +class SafeHTMLTest(tb_test.TestCase): + def test_empty_input(self): + self.assertEqual(plugin_util.safe_html(""), "") + + def test_whitelisted_tags_and_attributes_allowed(self): + s = ( + 'Check out ' + "my website!" + ) + self.assertEqual(plugin_util.safe_html(s), "%s" % s) + + def test_arbitrary_tags_and_attributes_removed(self): + self.assertEqual( + plugin_util.safe_html( + "We should bring back the ; " + '' + "sign the petition!" + ), + "We should bring back the " + "<blink>blink tag</blink>; " + 'sign the petition!', + ) + + def test_javascript_hrefs_sanitized(self): + self.assertEqual( + plugin_util.safe_html( + 'A sketchy link for you' + ), + "A sketchy link for you", + ) + + def test_byte_strings_interpreted_as_utf8(self): + s = "Look\u2014some UTF-8!".encode("utf-8") + assert isinstance(s, bytes), (type(s), bytes) + self.assertEqual(plugin_util.safe_html(s), "Look\u2014some UTF-8!") + + def test_unicode_strings_passed_through(self): + s = "Look\u2014some UTF-8!" + assert not isinstance(s, bytes), (type(s), bytes) + self.assertEqual(plugin_util.safe_html(s), "Look\u2014some UTF-8!") + + def test_null_bytes_stripped(self): + # If this function is mistakenly called with UTF-16 or UTF-32 encoded text, + # there will probably be a bunch of null bytes. Ensure these are stripped. + s = "un_der_score".encode("utf-32-le") + # UTF-32 encoding of ASCII will have 3 null bytes per char. 36 = 3 * 12. + self.assertEqual( + plugin_util.safe_html(s), + "un_der_score", + ) + + class MarkdownToSafeHTMLTest(tb_test.TestCase): def _test(self, markdown_string, expected): actual = plugin_util.markdown_to_safe_html(markdown_string) diff --git a/tensorboard/plugins/text/text_plugin.py b/tensorboard/plugins/text/text_plugin.py index 4574115ca3..daf74b3146 100644 --- a/tensorboard/plugins/text/text_plugin.py +++ b/tensorboard/plugins/text/text_plugin.py @@ -152,7 +152,7 @@ def reduce_to_2d(arr): return arr[slices] -def text_array_to_html(text_arr): +def text_array_to_html(text_arr, enable_markdown): """Take a numpy.ndarray containing strings, and convert it into html. If the ndarray contains a single scalar string, that string is converted to @@ -164,29 +164,42 @@ def text_array_to_html(text_arr): Args: text_arr: A numpy.ndarray containing strings. + enable_markdown: boolean, whether to enable Markdown Returns: The array converted to html. """ if not text_arr.shape: - # It is a scalar. No need to put it in a table, just apply markdown - return plugin_util.markdown_to_safe_html(text_arr.item()) + # It is a scalar. No need to put it in a table. + if enable_markdown: + return plugin_util.markdown_to_safe_html(text_arr.item()) + else: + return plugin_util.safe_html(text_arr.item()) warning = "" if len(text_arr.shape) > 2: warning = plugin_util.markdown_to_safe_html( WARNING_TEMPLATE % len(text_arr.shape) ) text_arr = reduce_to_2d(text_arr) - table = plugin_util.markdowns_to_safe_html( - text_arr.reshape(-1), - lambda xs: make_table(np.array(xs).reshape(text_arr.shape)), - ) + if enable_markdown: + table = plugin_util.markdowns_to_safe_html( + text_arr.reshape(-1), + lambda xs: make_table(np.array(xs).reshape(text_arr.shape)), + ) + else: + # Convert utf-8 bytes to str. The built-in np.char.decode doesn't work on + # object arrays, and converting to an numpy chararray is lossy. + decode = lambda bs: bs.decode("utf-8") if isinstance(bs, bytes) else bs + text_arr_str = np.array( + [decode(bs) for bs in text_arr.reshape(-1)] + ).reshape(text_arr.shape) + table = plugin_util.safe_html(make_table(text_arr_str)) return warning + table -def process_event(wall_time, step, string_ndarray): +def process_event(wall_time, step, string_ndarray, enable_markdown): """Convert a text event into a JSON-compatible response.""" - html = text_array_to_html(string_ndarray) + html = text_array_to_html(string_ndarray, enable_markdown) return { "wall_time": wall_time, "step": step, @@ -242,7 +255,7 @@ def tags_route(self, request): index = self.index_impl(ctx, experiment) return http_util.Respond(request, index, "application/json") - def text_impl(self, ctx, run, tag, experiment): + def text_impl(self, ctx, run, tag, experiment, enable_markdown): all_text = self._data_provider.read_tensors( ctx, experiment_id=experiment, @@ -253,7 +266,10 @@ def text_impl(self, ctx, run, tag, experiment): text = all_text.get(run, {}).get(tag, None) if text is None: return [] - return [process_event(d.wall_time, d.step, d.numpy) for d in text] + return [ + process_event(d.wall_time, d.step, d.numpy, enable_markdown) + for d in text + ] @wrappers.Request.application def text_route(self, request): @@ -261,7 +277,9 @@ def text_route(self, request): experiment = plugin_util.experiment_id(request.environ) run = request.args.get("run") tag = request.args.get("tag") - response = self.text_impl(ctx, run, tag, experiment) + markdown_arg = request.args.get("markdown") + enable_markdown = markdown_arg != "false" # Default to enabled. + response = self.text_impl(ctx, run, tag, experiment, enable_markdown) return http_util.Respond(request, response, "application/json") def get_plugin_apps(self): diff --git a/tensorboard/plugins/text/text_plugin_test.py b/tensorboard/plugins/text/text_plugin_test.py index aeedeb7144..2187f10b68 100644 --- a/tensorboard/plugins/text/text_plugin_test.py +++ b/tensorboard/plugins/text/text_plugin_test.py @@ -76,7 +76,8 @@ def generate_testdata(self, logdir=None): writer.add_summary(summ, global_step=step) step += 1 - vector_message = ["one", "two", "three", "four"] + # Test unicode superscript 4. + vector_message = ["one", "two", "three", "\u2074"] summ = sess.run( vector_summary, feed_dict={placeholder: vector_message} ) @@ -101,22 +102,38 @@ def testIndex(self): def testText(self): plugin = self.load_plugin() fry = plugin.text_impl( - context.RequestContext(), "fry", "message", experiment="123" + context.RequestContext(), + "fry", + "message", + experiment="123", + enable_markdown=True, ) leela = plugin.text_impl( - context.RequestContext(), "leela", "message", experiment="123" + context.RequestContext(), + "leela", + "message", + experiment="123", + enable_markdown=False, ) self.assertEqual(len(fry), 4) self.assertEqual(len(leela), 4) for i in range(4): self.assertEqual(fry[i]["step"], i) self.assertEqual(leela[i]["step"], i) - - table = plugin.text_impl( - context.RequestContext(), "fry", "vector", experiment="123" + self.assertEqual( + fry[i]["text"], "
fry loves %s
" % GEMS[i] + ) + self.assertEqual(leela[i]["text"], "leela *loves* %s" % GEMS[i]) + + md_table = plugin.text_impl( + context.RequestContext(), + "fry", + "vector", + experiment="123", + enable_markdown=True, )[0]["text"] self.assertEqual( - table, + md_table, textwrap.dedent( """\three |
|
four |
+ \u2074 |
+
| one | +
| two | +
| three | +
| \u2074 |
foo
" self.assertEqual(convert(scalar), scalar_expected) diff --git a/tensorboard/plugins/text/tf_text_dashboard/BUILD b/tensorboard/plugins/text/tf_text_dashboard/BUILD index d2199e1c44..37bc28b732 100644 --- a/tensorboard/plugins/text/tf_text_dashboard/BUILD +++ b/tensorboard/plugins/text/tf_text_dashboard/BUILD @@ -22,6 +22,7 @@ tf_ts_library( "//tensorboard/components/tf_markdown_view", "//tensorboard/components/tf_paginated_view", "//tensorboard/components/tf_runs_selector", + "//tensorboard/components/tf_storage", "@npm//@polymer/decorators", "@npm//@polymer/polymer", "@npm//@types/d3", diff --git a/tensorboard/plugins/text/tf_text_dashboard/tf-text-dashboard.ts b/tensorboard/plugins/text/tf_text_dashboard/tf-text-dashboard.ts index 2d25ee5d56..32ccad09d0 100644 --- a/tensorboard/plugins/text/tf_text_dashboard/tf-text-dashboard.ts +++ b/tensorboard/plugins/text/tf_text_dashboard/tf-text-dashboard.ts @@ -30,7 +30,7 @@ import '../../../components/tf_dashboard_common/dashboard-style'; import '../../../components/tf_dashboard_common/tf-dashboard-layout'; import '../../../components/tf_paginated_view/tf-category-paginated-view'; import '../../../components/tf_runs_selector/tf-runs-selector'; - +import * as tf_storage from '../../../components/tf_storage/storage'; import './tf-text-loader'; @customElement('tf-text-dashboard') @@ -38,6 +38,13 @@ class TfTextDashboard extends LegacyElementMixin(PolymerElement) { static readonly template = html`