Skip to content

Commit

Permalink
Translate <form> into <p> and disable LXML's form cleanup.
Browse files Browse the repository at this point in the history
  • Loading branch information
jsonn committed Mar 27, 2018
1 parent cac93be commit 8ef89a8
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 0 deletions.
4 changes: 4 additions & 0 deletions html_sanitizer/sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def normalize_whitespace_in_text_or_tail(element):
italic_span_to_em,
tag_replacer('b', 'strong'),
tag_replacer('i', 'em'),
tag_replacer('form', 'p'),
],
'element_postprocessors': [
],
Expand Down Expand Up @@ -163,6 +164,8 @@ def sanitize(self, html):
# information to convert spans into em/strong tags
safe_attrs_only=False,
inline_style=False,
# Do not strip all form tags; we will filter them below
forms=False
)(doc)

# walk the tree recursively, because we want to be able to remove
Expand Down Expand Up @@ -279,6 +282,7 @@ def sanitize(self, html):
remove_unknown_tags=False,
safe_attrs_only=True,
add_nofollow=self.add_nofollow,
forms=False
)(doc)

html = lxml.html.tostring(doc, encoding='unicode')
Expand Down
9 changes: 9 additions & 0 deletions html_sanitizer/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,15 @@ def test_01_sanitize(self):
'<p><strong>A</strong>, <strong>B</strong>'
' und <strong>C</strong></p>',
),
(
'<p><form>Zeile 1</form></p>',
'<p>Zeile 1</p>',
),
# Suboptimal, should be cleaned further
(
'<form><p>Zeile 2</p></form>',
'<p><p>Zeile 2</p></p>',
),
]

self.run_tests(entries)
Expand Down

0 comments on commit 8ef89a8

Please sign in to comment.