diff --git a/html_sanitizer/sanitizer.py b/html_sanitizer/sanitizer.py index 81a1d16..a8436f0 100644 --- a/html_sanitizer/sanitizer.py +++ b/html_sanitizer/sanitizer.py @@ -138,6 +138,14 @@ def __init__(self, settings=None): set(self.attributes.keys()) - self.tags, )) + @staticmethod + def is_mergeable(e1, e2): + """ + Decide if the adjacent elements of the same type e1 and e2 can be + merged. This can be overriden to honouring distinct classes etc. + """ + return True + def sanitize(self, html): """ Clean HTML code from ugly copy-pasted CSS and empty elements @@ -232,7 +240,8 @@ def sanitize(self, html): # tag type nx = element.getnext() if (whitespace_re.match(element.tail or '') and - nx is not None and nx.tag == element.tag): + nx is not None and nx.tag == element.tag and + self.is_mergeable(element, nx)): # Yes, we should. Tail is empty, that is, no text between # tags of a mergeable type. if nx.text: diff --git a/html_sanitizer/tests.py b/html_sanitizer/tests.py index c409906..ab27c42 100644 --- a/html_sanitizer/tests.py +++ b/html_sanitizer/tests.py @@ -255,6 +255,9 @@ def test_14_classes(self): ), ( '
Test span
', 'Test span
', + ), ( + 'Test spanspan
', + 'Test span span
', ), ( 'Test spanspan
', + 'Test spanspan
', + ), ( + 'Test spanspan
', + 'Test span span
', + )], sanitizer=sanitizer) + + def test_16_emoji(self): self.run_tests([( '😂
', '😂
',