Skip to content

Commit 41809ee

Browse files
committed
Centralize a markdown_to_safe_html function
Summary: Now that every summary has a `summary_description` field, plugins will need to serve rendered Markdown to the frontend (assuming that we don't want to do Markdown rendering in JavaScript, which we don't). This has security implications (sanitization), so TensorBoard should expose a function that does the right thing. An alternative would be to provide a route that provides rendered Markdown for the `summary_description` only. This has disadvantages: (1) it would require at least one extra network roundtrip (plugins must first request the list of tags, then request the rendered description), (2) it would induce a FoUC-like state where the tags have been fetched but their metadata has not; and (3) it would not generalize to plugins that want to safely render other kinds of Markdown, like the text plugin. Test Plan: Run unit tests, and verify that the text and histogram plugins continue to work. wchargin-branch: markdown-to-safe-html
1 parent 99e3ca3 commit 41809ee

File tree

9 files changed

+236
-155
lines changed

9 files changed

+236
-155
lines changed

tensorboard/BUILD

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,30 @@ py_library(
104104
deps = ["//tensorboard:expect_sqlite3_installed"],
105105
)
106106

107+
py_library(
108+
name = "plugin_util",
109+
srcs = ["plugin_util.py"],
110+
srcs_version = "PY2AND3",
111+
visibility = ["//visibility:public"],
112+
deps = [
113+
"@org_mozilla_bleach",
114+
"@org_pythonhosted_markdown",
115+
"@org_pythonhosted_six",
116+
],
117+
)
118+
119+
py_test(
120+
name = "plugin_util_test",
121+
size = "small",
122+
srcs = ["plugin_util_test.py"],
123+
srcs_version = "PY2AND3",
124+
deps = [
125+
":plugin_util",
126+
"//tensorboard:expect_tensorflow_installed",
127+
"@org_pythonhosted_six",
128+
],
129+
)
130+
107131
py_library(
108132
name = "util",
109133
srcs = ["util.py"],

tensorboard/plugin_util.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ==============================================================================
15+
"""Provides utilities that may be especially useful to plugins."""
16+
17+
from __future__ import absolute_import
18+
from __future__ import division
19+
from __future__ import print_function
20+
21+
import bleach
22+
# pylint: disable=g-bad-import-order
23+
# Google-only: import markdown_freewisdom
24+
import markdown
25+
import six
26+
27+
28+
_ALLOWED_ATTRIBUTES = {
29+
'a': ['href', 'title'],
30+
'img': ['src', 'title', 'alt'],
31+
}
32+
33+
_ALLOWED_TAGS = [
34+
'ul',
35+
'ol',
36+
'li',
37+
'p',
38+
'pre',
39+
'code',
40+
'blockquote',
41+
'h1',
42+
'h2',
43+
'h3',
44+
'h4',
45+
'h5',
46+
'h6',
47+
'hr',
48+
'br',
49+
'strong',
50+
'em',
51+
'a',
52+
'img',
53+
'table',
54+
'thead',
55+
'tbody',
56+
'td',
57+
'tr',
58+
'th',
59+
]
60+
61+
62+
def markdown_to_safe_html(markdown_string):
63+
"""Convert Markdown to HTML that's safe to splice into the DOM.
64+
65+
Arguments:
66+
markdown_string: A Unicode string or UTF-8--encoded bytestring
67+
containing Markdown source. Markdown tables are supported.
68+
69+
Returns:
70+
A string containing safe HTML.
71+
"""
72+
# Convert to utf-8 whenever we have a binary input.
73+
if isinstance(markdown_string, six.binary_type):
74+
markdown_string = markdown_string.decode('utf-8')
75+
76+
string_html = markdown.markdown(
77+
markdown_string, extensions=['markdown.extensions.tables'])
78+
string_sanitized = bleach.clean(
79+
string_html, tags=_ALLOWED_TAGS, attributes=_ALLOWED_ATTRIBUTES)
80+
return string_sanitized

tensorboard/plugin_util_test.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import absolute_import
16+
from __future__ import division
17+
from __future__ import print_function
18+
19+
import textwrap
20+
21+
import six
22+
import tensorflow as tf
23+
24+
from tensorboard import plugin_util
25+
26+
27+
class MarkdownToSafeHTMLTest(tf.test.TestCase):
28+
29+
def _test(self, markdown_string, expected):
30+
actual = plugin_util.markdown_to_safe_html(markdown_string)
31+
self.assertEqual(expected, actual)
32+
33+
def test_empty_input(self):
34+
self._test(u'', u'')
35+
36+
def test_basic_formatting(self):
37+
self._test(u'# _Hello_, **world!**\n\n'
38+
'Check out [my website](http://example.com)!',
39+
u'<h1><em>Hello</em>, <strong>world!</strong></h1>\n'
40+
'<p>Check out <a href="http://example.com">my website</a>!</p>')
41+
42+
def test_table_formatting(self):
43+
self._test(
44+
textwrap.dedent(
45+
u"""\
46+
Here is some data:
47+
48+
TensorBoard usage | Happiness
49+
------------------|----------
50+
0.0 | 0.0
51+
0.5 | 0.5
52+
1.0 | 1.0
53+
54+
Wouldn't you agree?"""),
55+
textwrap.dedent(
56+
u"""\
57+
<p>Here is some data:</p>
58+
<table>
59+
<thead>
60+
<tr>
61+
<th>TensorBoard usage</th>
62+
<th>Happiness</th>
63+
</tr>
64+
</thead>
65+
<tbody>
66+
<tr>
67+
<td>0.0</td>
68+
<td>0.0</td>
69+
</tr>
70+
<tr>
71+
<td>0.5</td>
72+
<td>0.5</td>
73+
</tr>
74+
<tr>
75+
<td>1.0</td>
76+
<td>1.0</td>
77+
</tr>
78+
</tbody>
79+
</table>
80+
<p>Wouldn't you agree?</p>"""))
81+
82+
def test_whitelisted_tags_and_attributes_allowed(self):
83+
s = (u'Check out <a href="http://example.com" title="do it">'
84+
'my website</a>!')
85+
self._test(s, u'<p>%s</p>' % s)
86+
87+
def test_arbitrary_tags_and_attributes_removed(self):
88+
self._test(u'We should bring back the <blink>blink tag</blink>; '
89+
'<a name="bookmark" href="http://please-dont.com">'
90+
'sign the petition!</a>',
91+
u'<p>We should bring back the '
92+
'&lt;blink&gt;blink tag&lt;/blink&gt;; '
93+
'<a href="http://please-dont.com">sign the petition!</a></p>')
94+
95+
def test_javascript_hrefs_sanitized(self):
96+
self._test(u'A <a href="javascript:void0">sketchy link</a> for you',
97+
u'<p>A <a>sketchy link</a> for you</p>')
98+
99+
def test_byte_strings_interpreted_as_utf8(self):
100+
s = u'> Look\u2014some UTF-8!'.encode('utf-8')
101+
assert isinstance(s, six.binary_type), (type(s), six.binary_type)
102+
self._test(s,
103+
u'<blockquote>\n<p>Look\u2014some UTF-8!</p>\n</blockquote>')
104+
105+
def test_unicode_strings_passed_through(self):
106+
s = u'> Look\u2014some UTF-8!'
107+
assert not isinstance(s, six.binary_type), (type(s), six.binary_type)
108+
self._test(s,
109+
u'<blockquote>\n<p>Look\u2014some UTF-8!</p>\n</blockquote>')
110+
111+
112+
if __name__ == '__main__':
113+
tf.test.main()

tensorboard/plugins/histogram/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ py_library(
1515
visibility = ["//visibility:public"],
1616
deps = [
1717
":metadata",
18+
"//tensorboard:plugin_util",
1819
"//tensorboard/backend:http_util",
1920
"//tensorboard/backend/event_processing:event_accumulator",
2021
"//tensorboard/plugins:base_plugin",

tensorboard/plugins/histogram/histograms_plugin.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import numpy as np
3232
import tensorflow as tf
3333

34+
from tensorboard import plugin_util
3435
from tensorboard.backend import http_util
3536
from tensorboard.backend.event_processing import event_accumulator
3637
from tensorboard.plugins import base_plugin
@@ -83,7 +84,8 @@ def index_impl(self):
8384
content = metadata.parse_summary_metadata(content)
8485
summary_metadata = self._multiplexer.SummaryMetadata(run, tag)
8586
result[run][tag] = {'displayName': summary_metadata.display_name,
86-
'description': summary_metadata.summary_description}
87+
'description': plugin_util.markdown_to_safe_html(
88+
summary_metadata.summary_description)}
8789

8890
return result
8991

tensorboard/plugins/histogram/histograms_plugin_test.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ class HistogramsPluginTest(tf.test.TestCase):
4242
_SCALAR_TAG = 'my-boring-scalars'
4343

4444
_DISPLAY_NAME = 'Important production statistics'
45-
_DESCRIPTION = 'quod erat scribendum'
45+
_DESCRIPTION = 'quod *erat* scribendum'
46+
_HTML_DESCRIPTION = '<p>quod <em>erat</em> scribendum</p>'
4647

4748
_RUN_WITH_LEGACY_HISTOGRAM = '_RUN_WITH_LEGACY_HISTOGRAM'
4849
_RUN_WITH_HISTOGRAM = '_RUN_WITH_HISTOGRAM'
@@ -115,7 +116,7 @@ def test_index(self):
115116
self._RUN_WITH_HISTOGRAM: {
116117
'%s/histogram_summary' % self._HISTOGRAM_TAG: {
117118
'displayName': self._DISPLAY_NAME,
118-
'description': self._DESCRIPTION,
119+
'description': self._HTML_DESCRIPTION,
119120
},
120121
},
121122
}, self.plugin.index_impl())

tensorboard/plugins/text/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ py_library(
1515
visibility = ["//visibility:public"],
1616
deps = [
1717
"//tensorboard:expect_tensorflow_installed",
18+
"//tensorboard:plugin_util",
1819
"//tensorboard/backend:http_util",
1920
"//tensorboard/plugins:base_plugin",
2021
"@org_mozilla_bleach",
@@ -33,6 +34,7 @@ py_test(
3334
deps = [
3435
":text_plugin",
3536
"//tensorboard:expect_tensorflow_installed",
37+
"//tensorboard:plugin_util",
3638
"//tensorboard/backend:application",
3739
"//tensorboard/backend/event_processing:event_multiplexer",
3840
"//tensorboard/plugins:base_plugin",

tensorboard/plugins/text/text_plugin.py

Lines changed: 7 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,10 @@
2727
import numpy as np
2828
# pylint: enable=g-bad-import-order
2929

30-
import bleach
31-
# pylint: disable=g-bad-import-order
32-
# Google-only: import markdown_freewisdom
33-
import markdown
34-
import six
35-
# pylint: enable=g-bad-import-order
3630
import tensorflow as tf
3731
from werkzeug import wrappers
3832

33+
from tensorboard import plugin_util
3934
from tensorboard.backend import http_util
4035
from tensorboard.plugins import base_plugin
4136

@@ -46,67 +41,12 @@
4641
TAGS_ROUTE = '/tags'
4742
TEXT_ROUTE = '/text'
4843

49-
ALLOWED_TAGS = [
50-
'ul',
51-
'ol',
52-
'li',
53-
'p',
54-
'pre',
55-
'code',
56-
'blockquote',
57-
'h1',
58-
'h2',
59-
'h3',
60-
'h4',
61-
'h5',
62-
'h6',
63-
'hr',
64-
'br',
65-
'strong',
66-
'em',
67-
'a',
68-
'img',
69-
'table',
70-
'thead',
71-
'tbody',
72-
'td',
73-
'tr',
74-
'th',
75-
]
76-
77-
ALLOWED_ATTRIBUTES = {'a': ['href', 'title'], 'img': ['src', 'title', 'alt']}
7844

7945
WARNING_TEMPLATE = textwrap.dedent("""\
8046
**Warning:** This text summary contained data of dimensionality %d, but only \
8147
2d tables are supported. Showing a 2d slice of the data instead.""")
8248

8349

84-
def markdown_and_sanitize(markdown_string):
85-
"""Takes a markdown string and converts it into sanitized html.
86-
87-
It uses the table extension; while that's not a part of standard
88-
markdown, it is sure to be useful for TensorBoard users.
89-
90-
The sanitizer uses the allowed_tags and attributes specified above. Mostly,
91-
we ensure that our standard use cases like tables and links are supported.
92-
93-
Args:
94-
markdown_string: Markdown string to sanitize
95-
96-
Returns:
97-
a string containing sanitized html for input markdown
98-
"""
99-
# Convert to utf-8 whenever we have a binary input.
100-
if isinstance(markdown_string, six.binary_type):
101-
markdown_string = markdown_string.decode('utf-8')
102-
103-
string_html = markdown.markdown(
104-
markdown_string, extensions=['markdown.extensions.tables'])
105-
string_sanitized = bleach.clean(
106-
string_html, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES)
107-
return string_sanitized
108-
109-
11050
def make_table_row(contents, tag='td'):
11151
"""Given an iterable of string contents, make a table row.
11252
@@ -226,13 +166,16 @@ def text_array_to_html(text_arr):
226166
"""
227167
if not text_arr.shape:
228168
# It is a scalar. No need to put it in a table, just apply markdown
229-
return markdown_and_sanitize(text_arr.astype(np.dtype(str)).tostring())
169+
return plugin_util.markdown_to_safe_html(
170+
text_arr.astype(np.dtype(str)).tostring())
230171
warning = ''
231172
if len(text_arr.shape) > 2:
232-
warning = markdown_and_sanitize(WARNING_TEMPLATE % len(text_arr.shape))
173+
warning = plugin_util.markdown_to_safe_html(WARNING_TEMPLATE
174+
% len(text_arr.shape))
233175
text_arr = reduce_to_2d(text_arr)
234176

235-
html_arr = [markdown_and_sanitize(x) for x in text_arr.reshape(-1)]
177+
html_arr = [plugin_util.markdown_to_safe_html(x)
178+
for x in text_arr.reshape(-1)]
236179
html_arr = np.array(html_arr).reshape(text_arr.shape)
237180

238181
return warning + make_table(html_arr)

0 commit comments

Comments
 (0)