From a0744252196ea31dfa0edeb92e3bd45d048a7beb Mon Sep 17 00:00:00 2001 From: Lumir Balhar Date: Wed, 13 Nov 2024 10:27:32 +0100 Subject: [PATCH] Remove only the CSS comment if a suspicious content is detected --- CHANGES.rst | 1 + lxml_html_clean/clean.py | 62 ++++++++++++++++++++++++++-------------- tests/test_clean.py | 10 ++++--- 3 files changed, 47 insertions(+), 26 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 7433327..d939a06 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -16,6 +16,7 @@ Bugs fixed within CSS comments. In certain contexts, such as within ```` or ```` tags, ``' - return True + if _has_javascript_scheme(style): + return True + if 'expression(' in style: + return True + if '@import' in style: + return True + if '' + return True return False + def _remove_sneaky_css_comments(self, style): + """ + Look for suspicious code in CSS comment and if found, + remove the entire comment from the given style. + + Browsers might parse ' s = lxml.html.fragment_fromstring(html) + expected = f'<{tag}>'.encode() + self.assertEqual( - f'<{tag}>'.encode(), + expected, lxml.html.tostring(clean_html(s))) def test_sneaky_js_in_style_comment_noscript(self): - html = '' s = lxml.html.fragment_fromstring(html) self.assertEqual( - b'', + b'', lxml.html.tostring(clean_html(s))) def test_sneaky_import_in_style(self):