Skip to content

Commit

Permalink
Fix inline diff for rich text fields after PloneHotfix20210518.
Browse files Browse the repository at this point in the history
The first version of the hotfix escaped all html.
Now for the rich text field, use the safe html transform, otherwise the inline diff is no longer inline.
This may be added in a new version of the hotfix as well.
  • Loading branch information
mauritsvanrees committed Jun 4, 2021
1 parent 6141c4b commit dc1f9ff
Show file tree
Hide file tree
Showing 9 changed files with 66 additions and 37 deletions.
6 changes: 3 additions & 3 deletions Products/CMFDiffTool/BinaryDiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from os import linesep
from Products.CMFDiffTool.BaseDiff import _getValue
from Products.CMFDiffTool.FieldDiff import FieldDiff
from Products.CMFDiffTool.utils import html_encode
from Products.CMFDiffTool.utils import html_escape


class BinaryDiff(FieldDiff):
Expand Down Expand Up @@ -51,8 +51,8 @@ def inline_diff(self):
if self.oldFilename != self.newFilename:
html.append(
self.inlinediff_fmt % (css_class,
self.filenameTitle(html_encode(self.oldFilename)),
self.filenameTitle(html_encode(self.newFilename))),
self.filenameTitle(html_escape(self.oldFilename)),
self.filenameTitle(html_escape(self.newFilename))),
)

if html:
Expand Down
4 changes: 2 additions & 2 deletions Products/CMFDiffTool/CMFDTHtmlDiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from AccessControl.class_init import InitializeClass
from Products.CMFDiffTool.libs import htmldiff
from Products.CMFDiffTool.TextDiff import TextDiff
from Products.CMFDiffTool.utils import html_encode
from Products.CMFDiffTool.utils import html_safe


# Give it a dumb name so it doesn't conflict with all the other html diffs
Expand All @@ -19,7 +19,7 @@ def inline_diff(self):
filename=self.oldFilename))
b = '\n'.join(self._parseField(self.newValue,
filename=self.newFilename))
return htmldiff.htmldiff(html_encode(a), html_encode(b))
return htmldiff.htmldiff(html_safe(a), html_safe(b))

def _parseField(self, value, filename=None):
"""Use the field's raw value if available."""
Expand Down
12 changes: 6 additions & 6 deletions Products/CMFDiffTool/FieldDiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from AccessControl.class_init import InitializeClass
from Products.CMFDiffTool.BaseDiff import _getValue
from Products.CMFDiffTool.BaseDiff import BaseDiff
from Products.CMFDiffTool.utils import html_encode
from Products.CMFDiffTool.utils import html_escape
from six.moves import range

import difflib
Expand Down Expand Up @@ -81,18 +81,18 @@ def inline_diff(self):
for tag, alo, ahi, blo, bhi in self.getLineDiffs():
if tag == 'replace':
for i in range(alo, ahi):
r.append(inlinediff_fmt % (css_class, html_encode(a[i]), ''))
r.append(inlinediff_fmt % (css_class, html_escape(a[i]), ''))
for i in range(blo, bhi):
r.append(inlinediff_fmt % (css_class, '', html_encode(b[i])))
r.append(inlinediff_fmt % (css_class, '', html_escape(b[i])))
elif tag == 'delete':
for i in range(alo, ahi):
r.append(inlinediff_fmt % (css_class, html_encode(a[i]), ''))
r.append(inlinediff_fmt % (css_class, html_escape(a[i]), ''))
elif tag == 'insert':
for i in range(blo, bhi):
r.append(inlinediff_fmt % (css_class, '', html_encode(b[i])))
r.append(inlinediff_fmt % (css_class, '', html_escape(b[i])))
elif tag == 'equal':
for i in range(alo, ahi):
r.append(same_fmt % (css_class, html_encode(a[i])))
r.append(same_fmt % (css_class, html_escape(a[i])))
else:
raise ValueError('unknown tag "%s"' % tag)
return '\n'.join(r)
Expand Down
12 changes: 6 additions & 6 deletions Products/CMFDiffTool/ListDiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from Products.CMFDiffTool.choicediff import get_field_object
from Products.CMFDiffTool.choicediff import title_or_value
from Products.CMFDiffTool.FieldDiff import FieldDiff
from Products.CMFDiffTool.utils import html_encode
from Products.CMFDiffTool.utils import html_escape
from six.moves import range


Expand Down Expand Up @@ -89,34 +89,34 @@ def inline_diff(self):
if tag == 'replace':
for i in range(alo, ahi):
obj = self.oldValue[i]
obj_title = html_encode(obj.Title())
obj_title = html_escape(obj.Title())
obj_url = obj.absolute_url()
r.append(inlinediff_fmt %
(css_class, 'diff_sub', obj_url, obj_title))
for i in range(blo, bhi):
obj = self.newValue[i]
obj_title = html_encode(obj.Title())
obj_title = html_escape(obj.Title())
obj_url = obj.absolute_url()
r.append(inlinediff_fmt %
(css_class, 'diff_add', obj_url, obj_title))
elif tag == 'delete':
for i in range(alo, ahi):
obj = self.oldValue[i]
obj_title = html_encode(obj.Title())
obj_title = html_escape(obj.Title())
obj_url = obj.absolute_url()
r.append(inlinediff_fmt %
(css_class, 'diff_sub', obj_url, obj_title))
elif tag == 'insert':
for i in range(blo, bhi):
obj = self.newValue[i]
obj_title = html_encode(obj.Title())
obj_title = html_escape(obj.Title())
obj_url = obj.absolute_url()
r.append(inlinediff_fmt %
(css_class, 'diff_add', obj_url, obj_title))
elif tag == 'equal':
for i in range(alo, ahi):
obj = self.oldValue[i]
obj_title = html_encode(obj.Title())
obj_title = html_escape(obj.Title())
obj_url = obj.absolute_url()
r.append(same_fmt % (css_class, obj_url, obj_title))
else:
Expand Down
6 changes: 3 additions & 3 deletions Products/CMFDiffTool/TextDiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from os import linesep
from Products.CMFDiffTool import CMFDiffToolMessageFactory as _
from Products.CMFDiffTool.FieldDiff import FieldDiff
from Products.CMFDiffTool.utils import html_encode
from Products.CMFDiffTool.utils import html_escape
from Products.CMFDiffTool.utils import safe_unicode
from Products.CMFDiffTool.utils import safe_utf8
from zope.component.hooks import getSite
Expand Down Expand Up @@ -88,11 +88,11 @@ def inline_diff(self):
if old_fname != new_fname:
html.append(
self.inlinediff_fmt % ('%s FilenameDiff' % css_class,
html_encode(old_fname), html_encode(new_fname)),
html_escape(old_fname), html_escape(new_fname)),
)
if a != b:
html.append(
self.inlinediff_fmt % (css_class, html_encode(a), html_encode(b)),
self.inlinediff_fmt % (css_class, html_escape(a), html_escape(b)),
)
if html:
return linesep.join(html)
Expand Down
8 changes: 4 additions & 4 deletions Products/CMFDiffTool/namedfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from Products.CMFDiffTool.BinaryDiff import BinaryDiff
from Products.CMFDiffTool.ListDiff import ListDiff
from Products.CMFDiffTool.TextDiff import TextDiff
from Products.CMFDiffTool.utils import html_encode
from Products.CMFDiffTool.utils import html_escape


FILE_FIELD_TYPES = []
Expand Down Expand Up @@ -71,7 +71,7 @@ def inline_diff(self):
old = self._parseField(self.oldValue, self.oldFilename)[0]
new = self._parseField(self.newValue, self.newFilename)[0]

return '' if self.same else self.inlinediff_fmt % (css_class, html_encode(old), html_encode(new))
return '' if self.same else self.inlinediff_fmt % (css_class, html_escape(old), html_escape(new))


InitializeClass(NamedFileBinaryDiff)
Expand Down Expand Up @@ -137,9 +137,9 @@ def is_same_dict(d1, d2):
)

return '\n'.join([
((self.same_fmt % (css_class, html_encode(d_old['repr'])))
((self.same_fmt % (css_class, html_escape(d_old['repr'])))
if is_same_dict(d_old, d_new) else self.inlinediff_fmt
% (css_class, html_encode(d_old['repr']), html_encode(d_new['repr']))
% (css_class, html_escape(d_old['repr']), html_escape(d_new['repr']))
) for (d_old, d_new) in zip(old_data, new_data)])


Expand Down
14 changes: 8 additions & 6 deletions Products/CMFDiffTool/tests/test_richtextdiff.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from plone.app.testing import PLONE_INTEGRATION_TESTING
from plone.app.textfield.value import RichTextValue
from Products.CMFDiffTool.CMFDTHtmlDiff import CMFDTHtmlDiff
from Products.CMFDiffTool.interfaces import IDifference
Expand All @@ -13,6 +14,7 @@ def __init__(self, body):

class RichTextDiffTestCase(unittest.TestCase):
"""Test RichTextDiff"""
layer = PLONE_INTEGRATION_TESTING

def test_parseField_value_is_none(self):
value = None
Expand All @@ -37,9 +39,9 @@ def test_inline_diff_same_hacker(self):
value = RichTextValue(u'<script>alert("Hacker value")</script>')
diff = CMFDTHtmlDiff(DummyType(value), DummyType(value), 'body')
inline_diff = diff.inline_diff()
# The script tag should be escaped.
# The script tag should not be escaped, but totally not shown.
self.assertNotIn("<script", inline_diff)
self.assertIn("&gt;", inline_diff)
self.assertNotIn("&gt;", inline_diff)

def test_inline_diff_different(self):
old_value = RichTextValue(u'foo')
Expand All @@ -58,14 +60,14 @@ def test_inline_diff_different_hacker(self):
new_value = RichTextValue(u'<script>alert("Hacker value")</script>')
diff = CMFDTHtmlDiff(DummyType(old_value), DummyType(new_value), 'body')
inline_diff = diff.inline_diff()
# The script tag should be escaped.
# The script tag should not be escaped, but totally not shown.
self.assertNotIn("<script", inline_diff)
self.assertIn("&gt;", inline_diff)
self.assertNotIn("&gt;", inline_diff)

old_value = RichTextValue(u'<script>alert("Hacker value")</script>')
new_value = RichTextValue(u'clean')
diff = CMFDTHtmlDiff(DummyType(old_value), DummyType(new_value), 'body')
inline_diff = diff.inline_diff()
# The script tag should be escaped.
# The script tag should not be escaped, but totally not shown.
self.assertNotIn("<script", inline_diff)
self.assertIn("&gt;", inline_diff)
self.assertNotIn("&gt;", inline_diff)
37 changes: 31 additions & 6 deletions Products/CMFDiffTool/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# -*- coding: utf-8 -*-
from Products.PortalTransforms.data import datastream
from Products.PortalTransforms.transforms.safe_html import SafeHTML

import six

Expand All @@ -23,18 +25,41 @@ def safe_utf8(value):
return safe_unicode(value).encode('utf-8')


# In both Python 2 and 3, the escape function cannot handle a non string-like value,
# for example an integer. Seems good to always return a string-like value though.
def scrub_html(value):
# Strip illegal HTML tags from string text.
transform = SafeHTML()
# Available in Plone 5.2:
# return transform.scrub_html(value)
data = datastream("text/x-html-safe")
data = transform.convert(value, data)
return data.getData()


# We will have two functions:
# - html_escape: escape html, for example turn '<' into '&lt;'
# - html_safe: return html with dangerous tags removed, using safe html transform.
#
# In both Python 2 and 3, the convert function that we use in safe_html
# cannot handle a non string-like value, for example an integer.
# Same is true for the escape function.
# Seems good to always return a string-like value though.
# But should that be bytes or string or unicode?
if six.PY2:
# We use this in places where the result gets inserted in a string/bytes,
# so we should use a string (utf-8) here.
def html_encode(value):
def html_escape(value):
value = safe_utf8(value)
return escape(value, 1)

def html_safe(value):
value = safe_utf8(value)
return scrub_html(value)
else:
# In Python 3 this gets inserted in a string/text,
# and escape cannot handle a bytes value.
def html_encode(value):
# In Python 3 this gets inserted in a string/text.
def html_escape(value):
value = safe_unicode(value)
return escape(value, 1)

def html_safe(value):
value = safe_unicode(value)
return scrub_html(value)
4 changes: 3 additions & 1 deletion news/39.bugfix
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
Added XSS fix from PloneHotfix20210518.
Added XSS fix from PloneHotfix20210518 for inline diff.
See `vulnerability <https://plone.org/security/hotfix/20210518/xss-vulnerability-in-cmfdifftool>`_.
The first version of the hotfix escaped all html.
Now for the rich text field, use the safe html transform, otherwise the inline diff is no longer inline.
[maurits]

0 comments on commit dc1f9ff

Please sign in to comment.