tensorflow · wchargin · Jul 31, 2017 · Jul 27, 2017
diff --git a/tensorboard/BUILD b/tensorboard/BUILD
@@ -104,6 +104,30 @@ py_library(
     deps = ["//tensorboard:expect_sqlite3_installed"],
 )
 
+py_library(
+    name = "plugin_util",
+    srcs = ["plugin_util.py"],
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
+    deps = [
+        "@org_mozilla_bleach",
+        "@org_pythonhosted_markdown",
+        "@org_pythonhosted_six",
+    ],
+)
+
+py_test(
+    name = "plugin_util_test",
+    size = "small",
+    srcs = ["plugin_util_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":plugin_util",
+        "//tensorboard:expect_tensorflow_installed",
+        "@org_pythonhosted_six",
+    ],
+)
+
 py_library(
     name = "util",
     srcs = ["util.py"],

diff --git a/tensorboard/plugin_util.py b/tensorboard/plugin_util.py
@@ -0,0 +1,80 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Provides utilities that may be especially useful to plugins."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import bleach
+# pylint: disable=g-bad-import-order
+# Google-only: import markdown_freewisdom
+import markdown
+import six
+
+
+_ALLOWED_ATTRIBUTES = {
+    'a': ['href', 'title'],
+    'img': ['src', 'title', 'alt'],
+}
+
+_ALLOWED_TAGS = [
+    'ul',
+    'ol',
+    'li',
+    'p',
+    'pre',
+    'code',
+    'blockquote',
+    'h1',
+    'h2',
+    'h3',
+    'h4',
+    'h5',
+    'h6',
+    'hr',
+    'br',
+    'strong',
+    'em',
+    'a',
+    'img',
+    'table',
+    'thead',
+    'tbody',
+    'td',
+    'tr',
+    'th',
+]
+
+
+def markdown_to_safe_html(markdown_string):
+  """Convert Markdown to HTML that's safe to splice into the DOM.
+
+  Arguments:
+    markdown_string: A Unicode string or UTF-8--encoded bytestring
+      containing Markdown source. Markdown tables are supported.
+
+  Returns:
+    A string containing safe HTML.
+  """
+  # Convert to utf-8 whenever we have a binary input.
+  if isinstance(markdown_string, six.binary_type):
+    markdown_string = markdown_string.decode('utf-8')
+
+  string_html = markdown.markdown(
+      markdown_string, extensions=['markdown.extensions.tables'])
+  string_sanitized = bleach.clean(
+      string_html, tags=_ALLOWED_TAGS, attributes=_ALLOWED_ATTRIBUTES)
+  return string_sanitized
diff --git a/tensorboard/plugin_util_test.py b/tensorboard/plugin_util_test.py
@@ -0,0 +1,113 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import textwrap
+
+import six
+import tensorflow as tf
+
+from tensorboard import plugin_util
+
+
+class MarkdownToSafeHTMLTest(tf.test.TestCase):
+
+  def _test(self, markdown_string, expected):
+    actual = plugin_util.markdown_to_safe_html(markdown_string)
+    self.assertEqual(expected, actual)
+
+  def test_empty_input(self):
+    self._test(u'', u'')
+
+  def test_basic_formatting(self):
+    self._test(u'# _Hello_, **world!**\n\n'
+               'Check out [my website](http://example.com)!',
+               u'<h1><em>Hello</em>, <strong>world!</strong></h1>\n'
+               '<p>Check out <a href="http://example.com">my website</a>!</p>')
+
+  def test_table_formatting(self):
+    self._test(
+        textwrap.dedent(
+            u"""\
+            Here is some data:
+
+            TensorBoard usage | Happiness
+            ------------------|----------
+                          0.0 |       0.0
+                          0.5 |       0.5
+                          1.0 |       1.0
+
+            Wouldn't you agree?"""),
+        textwrap.dedent(
+            u"""\
+            <p>Here is some data:</p>
+            <table>
+            <thead>
+            <tr>
+            <th>TensorBoard usage</th>
+            <th>Happiness</th>
+            </tr>
+            </thead>
+            <tbody>
+            <tr>
+            <td>0.0</td>
+            <td>0.0</td>
+            </tr>
+            <tr>
+            <td>0.5</td>
+            <td>0.5</td>
+            </tr>
+            <tr>
+            <td>1.0</td>
+            <td>1.0</td>
+            </tr>
+            </tbody>
+            </table>
+            <p>Wouldn't you agree?</p>"""))
+
+  def test_whitelisted_tags_and_attributes_allowed(self):
+    s = (u'Check out <a href="http://example.com" title="do it">'
+         'my website</a>!')
+    self._test(s, u'<p>%s</p>' % s)
+
+  def test_arbitrary_tags_and_attributes_removed(self):
+    self._test(u'We should bring back the <blink>blink tag</blink>; '
+               '<a name="bookmark" href="http://please-dont.com">'
+               'sign the petition!</a>',
+               u'<p>We should bring back the '
+               '&lt;blink&gt;blink tag&lt;/blink&gt;; '
+               '<a href="http://please-dont.com">sign the petition!</a></p>')
+
+  def test_javascript_hrefs_sanitized(self):
+    self._test(u'A <a href="javascript:void0">sketchy link</a> for you',
+               u'<p>A <a>sketchy link</a> for you</p>')
+
+  def test_byte_strings_interpreted_as_utf8(self):
+    s = u'> Look\u2014some UTF-8!'.encode('utf-8')
+    assert isinstance(s, six.binary_type), (type(s), six.binary_type)
+    self._test(s,
+               u'<blockquote>\n<p>Look\u2014some UTF-8!</p>\n</blockquote>')
+
+  def test_unicode_strings_passed_through(self):
+    s = u'> Look\u2014some UTF-8!'
+    assert not isinstance(s, six.binary_type), (type(s), six.binary_type)
+    self._test(s,
+               u'<blockquote>\n<p>Look\u2014some UTF-8!</p>\n</blockquote>')
+
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/tensorboard/plugins/histogram/BUILD b/tensorboard/plugins/histogram/BUILD
@@ -15,6 +15,7 @@ py_library(
     visibility = ["//visibility:public"],
     deps = [
         ":metadata",
+        "//tensorboard:plugin_util",
         "//tensorboard/backend:http_util",
         "//tensorboard/backend/event_processing:event_accumulator",
         "//tensorboard/plugins:base_plugin",

diff --git a/tensorboard/plugins/histogram/histograms_plugin.py b/tensorboard/plugins/histogram/histograms_plugin.py
@@ -31,6 +31,7 @@
 import numpy as np
 import tensorflow as tf
 
+from tensorboard import plugin_util
 from tensorboard.backend import http_util
 from tensorboard.backend.event_processing import event_accumulator
 from tensorboard.plugins import base_plugin
@@ -83,7 +84,8 @@ def index_impl(self):
         content = metadata.parse_summary_metadata(content)
         summary_metadata = self._multiplexer.SummaryMetadata(run, tag)
         result[run][tag] = {'displayName': summary_metadata.display_name,
-                            'description': summary_metadata.summary_description}
+                            'description': plugin_util.markdown_to_safe_html(
+                                summary_metadata.summary_description)}
 
     return result
 

diff --git a/tensorboard/plugins/histogram/histograms_plugin_test.py b/tensorboard/plugins/histogram/histograms_plugin_test.py
@@ -42,7 +42,8 @@ class HistogramsPluginTest(tf.test.TestCase):
   _SCALAR_TAG = 'my-boring-scalars'
 
   _DISPLAY_NAME = 'Important production statistics'
-  _DESCRIPTION = 'quod erat scribendum'
+  _DESCRIPTION = 'quod *erat* scribendum'
+  _HTML_DESCRIPTION = '<p>quod <em>erat</em> scribendum</p>'
 
   _RUN_WITH_LEGACY_HISTOGRAM = '_RUN_WITH_LEGACY_HISTOGRAM'
   _RUN_WITH_HISTOGRAM = '_RUN_WITH_HISTOGRAM'
@@ -115,7 +116,7 @@ def test_index(self):
         self._RUN_WITH_HISTOGRAM: {
             '%s/histogram_summary' % self._HISTOGRAM_TAG: {
                 'displayName': self._DISPLAY_NAME,
-                'description': self._DESCRIPTION,
+                'description': self._HTML_DESCRIPTION,
             },
         },
     }, self.plugin.index_impl())

diff --git a/tensorboard/plugins/text/BUILD b/tensorboard/plugins/text/BUILD
@@ -15,6 +15,7 @@ py_library(
     visibility = ["//visibility:public"],
     deps = [
         "//tensorboard:expect_tensorflow_installed",
+        "//tensorboard:plugin_util",
         "//tensorboard/backend:http_util",
         "//tensorboard/plugins:base_plugin",
         "@org_mozilla_bleach",
@@ -33,6 +34,7 @@ py_test(
     deps = [
         ":text_plugin",
         "//tensorboard:expect_tensorflow_installed",
+        "//tensorboard:plugin_util",
         "//tensorboard/backend:application",
         "//tensorboard/backend/event_processing:event_multiplexer",
         "//tensorboard/plugins:base_plugin",

diff --git a/tensorboard/plugins/text/text_plugin.py b/tensorboard/plugins/text/text_plugin.py
@@ -27,15 +27,10 @@
 import numpy as np
 # pylint: enable=g-bad-import-order
 
-import bleach
-# pylint: disable=g-bad-import-order
-# Google-only: import markdown_freewisdom
-import markdown
-import six
-# pylint: enable=g-bad-import-order
 import tensorflow as tf
 from werkzeug import wrappers
 
+from tensorboard import plugin_util
 from tensorboard.backend import http_util
 from tensorboard.plugins import base_plugin
 
@@ -46,67 +41,12 @@
 TAGS_ROUTE = '/tags'
 TEXT_ROUTE = '/text'
 
-ALLOWED_TAGS = [
-    'ul',
-    'ol',
-    'li',
-    'p',
-    'pre',
-    'code',
-    'blockquote',
-    'h1',
-    'h2',
-    'h3',
-    'h4',
-    'h5',
-    'h6',
-    'hr',
-    'br',
-    'strong',
-    'em',
-    'a',
-    'img',
-    'table',
-    'thead',
-    'tbody',
-    'td',
-    'tr',
-    'th',
-]
-
-ALLOWED_ATTRIBUTES = {'a': ['href', 'title'], 'img': ['src', 'title', 'alt']}
 
 WARNING_TEMPLATE = textwrap.dedent("""\
   **Warning:** This text summary contained data of dimensionality %d, but only \
   2d tables are supported. Showing a 2d slice of the data instead.""")
 
 
-def markdown_and_sanitize(markdown_string):
-  """Takes a markdown string and converts it into sanitized html.
-
-  It uses the table extension; while that's not a part of standard
-  markdown, it is sure to be useful for TensorBoard users.
-
-  The sanitizer uses the allowed_tags and attributes specified above. Mostly,
-  we ensure that our standard use cases like tables and links are supported.
-
-  Args:
-    markdown_string: Markdown string to sanitize
-
-  Returns:
-    a string containing sanitized html for input markdown
-  """
-  # Convert to utf-8 whenever we have a binary input.
-  if isinstance(markdown_string, six.binary_type):
-    markdown_string = markdown_string.decode('utf-8')
-
-  string_html = markdown.markdown(
-      markdown_string, extensions=['markdown.extensions.tables'])
-  string_sanitized = bleach.clean(
-      string_html, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES)
-  return string_sanitized
-
-
 def make_table_row(contents, tag='td'):
   """Given an iterable of string contents, make a table row.
 
@@ -226,13 +166,16 @@ def text_array_to_html(text_arr):
   """
   if not text_arr.shape:
     # It is a scalar. No need to put it in a table, just apply markdown
-    return markdown_and_sanitize(text_arr.astype(np.dtype(str)).tostring())
+    return plugin_util.markdown_to_safe_html(
+        text_arr.astype(np.dtype(str)).tostring())
   warning = ''
   if len(text_arr.shape) > 2:
-    warning = markdown_and_sanitize(WARNING_TEMPLATE % len(text_arr.shape))
+    warning = plugin_util.markdown_to_safe_html(WARNING_TEMPLATE
+                                                % len(text_arr.shape))
     text_arr = reduce_to_2d(text_arr)
 
-  html_arr = [markdown_and_sanitize(x) for x in text_arr.reshape(-1)]
+  html_arr = [plugin_util.markdown_to_safe_html(x)
+              for x in text_arr.reshape(-1)]
   html_arr = np.array(html_arr).reshape(text_arr.shape)
 
   return warning + make_table(html_arr)