Skip to content

Commit

Permalink
Add hook for deciding whether two tags can be merged
Browse files Browse the repository at this point in the history
  • Loading branch information
jsonn committed Mar 27, 2018
1 parent 8ef89a8 commit bfde931
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 2 deletions.
11 changes: 10 additions & 1 deletion html_sanitizer/sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,14 @@ def __init__(self, settings=None):
set(self.attributes.keys()) - self.tags,
))

@staticmethod
def is_mergeable(e1, e2):
"""
Decide if the adjacent elements of the same type e1 and e2 can be
merged. This can be overriden to honouring distinct classes etc.
"""
return True

def sanitize(self, html):
"""
Clean HTML code from ugly copy-pasted CSS and empty elements
Expand Down Expand Up @@ -232,7 +240,8 @@ def sanitize(self, html):
# tag type
nx = element.getnext()
if (whitespace_re.match(element.tail or '') and
nx is not None and nx.tag == element.tag):
nx is not None and nx.tag == element.tag and
self.is_mergeable(element, nx)):
# Yes, we should. Tail is empty, that is, no text between
# tags of a mergeable type.
if nx.text:
Expand Down
28 changes: 27 additions & 1 deletion html_sanitizer/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,9 @@ def test_14_classes(self):
), (
'<p class="centered">Test <span class="bla">span</span></p>',
'<p class="centered">Test <span class="bla">span</span></p>',
), (
'<p class="centered">Test <span class="bla">span</span><span class="blub">span</span></p>',
'<p class="centered">Test <span class="bla">span span</span></p>',
), (
'<h1 class="centered">Test</h1>',
'<h1 class="centered">Test</h1>',
Expand All @@ -263,7 +266,30 @@ def test_14_classes(self):
'<h2>Test</h2>',
)], sanitizer=sanitizer)

def test_15_emoji(self):
def test_15_classes(self):
"""Class attributes may disable merging"""
sanitizer = Sanitizer({
'tags': {'h1', 'h2', 'p', 'a', 'span'},
'attributes': {
'a': ('href', 'name', 'target', 'title', 'id'),
'h1': ('class',),
'p': ('class',),
'span': ('class',),
},
'empty': set(),
'separate': {'a', 'p'},
'is_mergeable': lambda e1, e2: e1.get('class') == e2.get('class'),
})

self.run_tests([(
'<p class="centered">Test <span class="bla">span</span><span class="blub">span</span></p>',
'<p class="centered">Test <span class="bla">span</span><span class="blub">span</span></p>',
), (
'<p class="centered">Test <span class="bla">span</span><span class="bla">span</span></p>',
'<p class="centered">Test <span class="bla">span span</span></p>',
)], sanitizer=sanitizer)

def test_16_emoji(self):
self.run_tests([(
'<p>😂</p>',
'<p>😂</p>',
Expand Down

0 comments on commit bfde931

Please sign in to comment.