diff --git a/tensorboard/BUILD b/tensorboard/BUILD index 585a173d65..e3c5641c0e 100644 --- a/tensorboard/BUILD +++ b/tensorboard/BUILD @@ -104,6 +104,30 @@ py_library( deps = ["//tensorboard:expect_sqlite3_installed"], ) +py_library( + name = "plugin_util", + srcs = ["plugin_util.py"], + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], + deps = [ + "@org_mozilla_bleach", + "@org_pythonhosted_markdown", + "@org_pythonhosted_six", + ], +) + +py_test( + name = "plugin_util_test", + size = "small", + srcs = ["plugin_util_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":plugin_util", + "//tensorboard:expect_tensorflow_installed", + "@org_pythonhosted_six", + ], +) + py_library( name = "util", srcs = ["util.py"], diff --git a/tensorboard/plugin_util.py b/tensorboard/plugin_util.py new file mode 100644 index 0000000000..6e754b65ec --- /dev/null +++ b/tensorboard/plugin_util.py @@ -0,0 +1,80 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Provides utilities that may be especially useful to plugins.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import bleach +# pylint: disable=g-bad-import-order +# Google-only: import markdown_freewisdom +import markdown +import six + + +_ALLOWED_ATTRIBUTES = { + 'a': ['href', 'title'], + 'img': ['src', 'title', 'alt'], +} + +_ALLOWED_TAGS = [ + 'ul', + 'ol', + 'li', + 'p', + 'pre', + 'code', + 'blockquote', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'hr', + 'br', + 'strong', + 'em', + 'a', + 'img', + 'table', + 'thead', + 'tbody', + 'td', + 'tr', + 'th', +] + + +def markdown_to_safe_html(markdown_string): + """Convert Markdown to HTML that's safe to splice into the DOM. + + Arguments: + markdown_string: A Unicode string or UTF-8--encoded bytestring + containing Markdown source. Markdown tables are supported. + + Returns: + A string containing safe HTML. + """ + # Convert to utf-8 whenever we have a binary input. + if isinstance(markdown_string, six.binary_type): + markdown_string = markdown_string.decode('utf-8') + + string_html = markdown.markdown( + markdown_string, extensions=['markdown.extensions.tables']) + string_sanitized = bleach.clean( + string_html, tags=_ALLOWED_TAGS, attributes=_ALLOWED_ATTRIBUTES) + return string_sanitized diff --git a/tensorboard/plugin_util_test.py b/tensorboard/plugin_util_test.py new file mode 100644 index 0000000000..6595b8a33e --- /dev/null +++ b/tensorboard/plugin_util_test.py @@ -0,0 +1,113 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import textwrap + +import six +import tensorflow as tf + +from tensorboard import plugin_util + + +class MarkdownToSafeHTMLTest(tf.test.TestCase): + + def _test(self, markdown_string, expected): + actual = plugin_util.markdown_to_safe_html(markdown_string) + self.assertEqual(expected, actual) + + def test_empty_input(self): + self._test(u'', u'') + + def test_basic_formatting(self): + self._test(u'# _Hello_, **world!**\n\n' + 'Check out [my website](http://example.com)!', + u'
Check out my website!
') + + def test_table_formatting(self): + self._test( + textwrap.dedent( + u"""\ + Here is some data: + + TensorBoard usage | Happiness + ------------------|---------- + 0.0 | 0.0 + 0.5 | 0.5 + 1.0 | 1.0 + + Wouldn't you agree?"""), + textwrap.dedent( + u"""\ +Here is some data:
+| TensorBoard usage | +Happiness | +
|---|---|
| 0.0 | +0.0 | +
| 0.5 | +0.5 | +
| 1.0 | +1.0 | +
Wouldn't you agree?
""")) + + def test_whitelisted_tags_and_attributes_allowed(self): + s = (u'Check out ' + 'my website!') + self._test(s, u'%s
' % s) + + def test_arbitrary_tags_and_attributes_removed(self): + self._test(u'We should bring back the ; ' + '' + 'sign the petition!', + u'We should bring back the ' + '<blink>blink tag</blink>; ' + 'sign the petition!
') + + def test_javascript_hrefs_sanitized(self): + self._test(u'A sketchy link for you', + u'A sketchy link for you
') + + def test_byte_strings_interpreted_as_utf8(self): + s = u'> Look\u2014some UTF-8!'.encode('utf-8') + assert isinstance(s, six.binary_type), (type(s), six.binary_type) + self._test(s, + u'\n') + + def test_unicode_strings_passed_through(self): + s = u'> Look\u2014some UTF-8!' + assert not isinstance(s, six.binary_type), (type(s), six.binary_type) + self._test(s, + u'Look\u2014some UTF-8!
\n
\n') + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorboard/plugins/histogram/BUILD b/tensorboard/plugins/histogram/BUILD index 8832751cfc..f345772232 100644 --- a/tensorboard/plugins/histogram/BUILD +++ b/tensorboard/plugins/histogram/BUILD @@ -15,6 +15,7 @@ py_library( visibility = ["//visibility:public"], deps = [ ":metadata", + "//tensorboard:plugin_util", "//tensorboard/backend:http_util", "//tensorboard/backend/event_processing:event_accumulator", "//tensorboard/plugins:base_plugin", diff --git a/tensorboard/plugins/histogram/histograms_plugin.py b/tensorboard/plugins/histogram/histograms_plugin.py index 64129918ef..8777401495 100644 --- a/tensorboard/plugins/histogram/histograms_plugin.py +++ b/tensorboard/plugins/histogram/histograms_plugin.py @@ -31,6 +31,7 @@ import numpy as np import tensorflow as tf +from tensorboard import plugin_util from tensorboard.backend import http_util from tensorboard.backend.event_processing import event_accumulator from tensorboard.plugins import base_plugin @@ -83,7 +84,8 @@ def index_impl(self): content = metadata.parse_summary_metadata(content) summary_metadata = self._multiplexer.SummaryMetadata(run, tag) result[run][tag] = {'displayName': summary_metadata.display_name, - 'description': summary_metadata.summary_description} + 'description': plugin_util.markdown_to_safe_html( + summary_metadata.summary_description)} return result diff --git a/tensorboard/plugins/histogram/histograms_plugin_test.py b/tensorboard/plugins/histogram/histograms_plugin_test.py index 1deeb2aaa1..cba04ce785 100644 --- a/tensorboard/plugins/histogram/histograms_plugin_test.py +++ b/tensorboard/plugins/histogram/histograms_plugin_test.py @@ -42,7 +42,8 @@ class HistogramsPluginTest(tf.test.TestCase): _SCALAR_TAG = 'my-boring-scalars' _DISPLAY_NAME = 'Important production statistics' - _DESCRIPTION = 'quod erat scribendum' + _DESCRIPTION = 'quod *erat* scribendum' + _HTML_DESCRIPTION = 'Look\u2014some UTF-8!
\n
quod erat scribendum
' _RUN_WITH_LEGACY_HISTOGRAM = '_RUN_WITH_LEGACY_HISTOGRAM' _RUN_WITH_HISTOGRAM = '_RUN_WITH_HISTOGRAM' @@ -115,7 +116,7 @@ def test_index(self): self._RUN_WITH_HISTOGRAM: { '%s/histogram_summary' % self._HISTOGRAM_TAG: { 'displayName': self._DISPLAY_NAME, - 'description': self._DESCRIPTION, + 'description': self._HTML_DESCRIPTION, }, }, }, self.plugin.index_impl()) diff --git a/tensorboard/plugins/text/BUILD b/tensorboard/plugins/text/BUILD index 5eb24b6c72..3c0a11c42f 100644 --- a/tensorboard/plugins/text/BUILD +++ b/tensorboard/plugins/text/BUILD @@ -15,6 +15,7 @@ py_library( visibility = ["//visibility:public"], deps = [ "//tensorboard:expect_tensorflow_installed", + "//tensorboard:plugin_util", "//tensorboard/backend:http_util", "//tensorboard/plugins:base_plugin", "@org_mozilla_bleach", @@ -33,6 +34,7 @@ py_test( deps = [ ":text_plugin", "//tensorboard:expect_tensorflow_installed", + "//tensorboard:plugin_util", "//tensorboard/backend:application", "//tensorboard/backend/event_processing:event_multiplexer", "//tensorboard/plugins:base_plugin", diff --git a/tensorboard/plugins/text/text_plugin.py b/tensorboard/plugins/text/text_plugin.py index f13a3cd637..a564cad432 100644 --- a/tensorboard/plugins/text/text_plugin.py +++ b/tensorboard/plugins/text/text_plugin.py @@ -27,15 +27,10 @@ import numpy as np # pylint: enable=g-bad-import-order -import bleach -# pylint: disable=g-bad-import-order -# Google-only: import markdown_freewisdom -import markdown -import six -# pylint: enable=g-bad-import-order import tensorflow as tf from werkzeug import wrappers +from tensorboard import plugin_util from tensorboard.backend import http_util from tensorboard.plugins import base_plugin @@ -46,67 +41,12 @@ TAGS_ROUTE = '/tags' TEXT_ROUTE = '/text' -ALLOWED_TAGS = [ - 'ul', - 'ol', - 'li', - 'p', - 'pre', - 'code', - 'blockquote', - 'h1', - 'h2', - 'h3', - 'h4', - 'h5', - 'h6', - 'hr', - 'br', - 'strong', - 'em', - 'a', - 'img', - 'table', - 'thead', - 'tbody', - 'td', - 'tr', - 'th', -] - -ALLOWED_ATTRIBUTES = {'a': ['href', 'title'], 'img': ['src', 'title', 'alt']} WARNING_TEMPLATE = textwrap.dedent("""\ **Warning:** This text summary contained data of dimensionality %d, but only \ 2d tables are supported. Showing a 2d slice of the data instead.""") -def markdown_and_sanitize(markdown_string): - """Takes a markdown string and converts it into sanitized html. - - It uses the table extension; while that's not a part of standard - markdown, it is sure to be useful for TensorBoard users. - - The sanitizer uses the allowed_tags and attributes specified above. Mostly, - we ensure that our standard use cases like tables and links are supported. - - Args: - markdown_string: Markdown string to sanitize - - Returns: - a string containing sanitized html for input markdown - """ - # Convert to utf-8 whenever we have a binary input. - if isinstance(markdown_string, six.binary_type): - markdown_string = markdown_string.decode('utf-8') - - string_html = markdown.markdown( - markdown_string, extensions=['markdown.extensions.tables']) - string_sanitized = bleach.clean( - string_html, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES) - return string_sanitized - - def make_table_row(contents, tag='td'): """Given an iterable of string contents, make a table row. @@ -226,13 +166,16 @@ def text_array_to_html(text_arr): """ if not text_arr.shape: # It is a scalar. No need to put it in a table, just apply markdown - return markdown_and_sanitize(text_arr.astype(np.dtype(str)).tostring()) + return plugin_util.markdown_to_safe_html( + text_arr.astype(np.dtype(str)).tostring()) warning = '' if len(text_arr.shape) > 2: - warning = markdown_and_sanitize(WARNING_TEMPLATE % len(text_arr.shape)) + warning = plugin_util.markdown_to_safe_html(WARNING_TEMPLATE + % len(text_arr.shape)) text_arr = reduce_to_2d(text_arr) - html_arr = [markdown_and_sanitize(x) for x in text_arr.reshape(-1)] + html_arr = [plugin_util.markdown_to_safe_html(x) + for x in text_arr.reshape(-1)] html_arr = np.array(html_arr).reshape(text_arr.shape) return warning + make_table(html_arr) diff --git a/tensorboard/plugins/text/text_plugin_test.py b/tensorboard/plugins/text/text_plugin_test.py index d8967f6e5b..49bb3a2724 100644 --- a/tensorboard/plugins/text/text_plugin_test.py +++ b/tensorboard/plugins/text/text_plugin_test.py @@ -26,6 +26,7 @@ import numpy as np import tensorflow as tf +from tensorboard import plugin_util from tensorboard.backend.event_processing import event_multiplexer from tensorboard.plugins import base_plugin from tensorboard.plugins.text import text_plugin @@ -49,10 +50,6 @@ def testRoutesProvided(self): self.assertIsInstance(routes['/tags'], collections.Callable) self.assertIsInstance(routes['/text'], collections.Callable) - def assertConverted(self, actual, expected): - expected_html = text_plugin.markdown_and_sanitize(expected) - self.assertEqual(actual, expected_html) - def generate_testdata(self, include_text=True, logdir=None): tf.reset_default_graph() sess = tf.Session() @@ -104,9 +101,7 @@ def testText(self): self.assertEqual(len(leela), 4) for i in range(4): self.assertEqual(fry[i]['step'], i) - self.assertConverted(fry[i]['text'], 'fry *loves* ' + GEMS[i]) self.assertEqual(leela[i]['step'], i) - self.assertConverted(leela[i]['text'], 'leela *loves* ' + GEMS[i]) table = self.plugin.text_impl('fry', 'vector')[0]['text'] self.assertEqual(table, @@ -128,82 +123,6 @@ def testText(self): """)) - def assertTextConverted(self, actual, expected): - self.assertEqual(text_plugin.markdown_and_sanitize(actual), expected) - - def testMarkdownConversion(self): - emphasis = '*Italics1* _Italics2_ **bold1** __bold2__' - emphasis_converted = ('Italics1 Italics2 ' - 'bold1 bold2
') - - self.assertEqual( - text_plugin.markdown_and_sanitize(emphasis), emphasis_converted) - - md_list = textwrap.dedent("""\ - 1. List item one. - 2. List item two. - * Sublist - * Sublist2 - 1. List continues. - """) - md_list_converted = textwrap.dedent("""\ -| An | -Example | -Table | -
|---|---|---|
| A | -B | -C | -
| 1 | -2 | -3 | -
hello you
' - self.assertEqual(text_plugin.markdown_and_sanitize(dangerous), sanitized) - def testTableGeneration(self): array2d = np.array([['one', 'two'], ['three', 'four']]) expected_table = textwrap.dedent("""\ @@ -386,8 +305,8 @@ def test_text_array_to_html(self): d3 = np.array([[['foo', 'bar'], ['zoink', 'zod']], [['FOO', 'BAR'], ['ZOINK', 'ZOD']]]) - warning = text_plugin.markdown_and_sanitize(text_plugin.WARNING_TEMPLATE % - 3) + warning = plugin_util.markdown_to_safe_html( + text_plugin.WARNING_TEMPLATE % 3) d3_expected = warning + textwrap.dedent("""\