diff --git a/docs/user/checks.rst b/docs/user/checks.rst index de9de17c7240..0a9681f9c9b2 100644 --- a/docs/user/checks.rst +++ b/docs/user/checks.rst @@ -1225,8 +1225,8 @@ autofixer which can automatically sanitize the markup. .. seealso:: - The HTML check is performed by the `Bleach `_ - library developed by Mozilla. + The HTML check is performed by the `Ammonia `_ + library. diff --git a/requirements.txt b/requirements.txt index 9f1ba7ae7796..8d1c0c49090b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -bleach>=3.1.1,<6.1 borgbackup>=1.1.11,<1.3,!=1.2.0 celery[redis]>=5.0.3,<5.3 charset-normalizer>=2.0.12,<4.0 @@ -20,6 +19,7 @@ importlib-metadata>=4.12,<5; python_version < '3.8' jsonschema>=4.5,<5 lxml>=4.9.1,<4.10 misaka>=2.1.0,<2.2 +nh3>=0.2.2,<0.3 openpyxl>=2.6.0,<3.1,!=3.0.2 packaging>=22,<23.1 Pillow>=9.0.0,<10.0 diff --git a/weblate/checks/markup.py b/weblate/checks/markup.py index 847aab0e3fc4..7432dd6d9740 100644 --- a/weblate/checks/markup.py +++ b/weblate/checks/markup.py @@ -4,14 +4,14 @@ import re -import bleach +import nh3 from django.core.exceptions import ValidationError from django.core.validators import URLValidator from django.utils.functional import cached_property from django.utils.translation import gettext_lazy as _ from weblate.checks.base import TargetCheck -from weblate.utils.html import extract_bleach +from weblate.utils.html import extract_html_tags from weblate.utils.xml import parse_xml BBCODE_MATCH = re.compile( @@ -340,4 +340,4 @@ def check_single(self, source, target, unit): if "md-text" in unit.all_flags: target = MD_LINK.sub("", target) - return bleach.clean(target, **extract_bleach(source)) != target + return nh3.clean(target, **extract_html_tags(source)) != target diff --git a/weblate/trans/autofixes/html.py b/weblate/trans/autofixes/html.py index 1ba69cfee43d..6caa477f64ee 100644 --- a/weblate/trans/autofixes/html.py +++ b/weblate/trans/autofixes/html.py @@ -2,12 +2,12 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -import bleach +import nh3 from django.utils.translation import gettext_lazy as _ from weblate.checks.markup import MD_LINK from weblate.trans.autofixes.base import AutoFix -from weblate.utils.html import extract_bleach +from weblate.utils.html import extract_html_tags class BleachHTML(AutoFix): @@ -37,7 +37,7 @@ def handle_replace(match): if "md-text" in flags: target = MD_LINK.sub(handle_replace, target) - new_target = bleach.clean(target, **extract_bleach(source)) + new_target = nh3.clean(target, **extract_html_tags(source)) for text, replace in replacements.items(): new_target = new_target.replace(text, replace) return new_target, new_target != old_target diff --git a/weblate/utils/html.py b/weblate/utils/html.py index 7ef848ff2306..15636e04089e 100644 --- a/weblate/utils/html.py +++ b/weblate/utils/html.py @@ -21,8 +21,8 @@ def start(self, tag, attrs): self.found_attributes[tag].update(attrs.keys()) -def extract_bleach(text): - """Extract tags from text in a form suitable for bleach.""" +def extract_html_tags(text): + """Extract tags from text in a form suitable for HTML sanitization.""" extractor = MarkupExtractor() parser = HTMLParser(collect_ids=False, target=extractor) parser.feed(text) diff --git a/weblate/utils/requirements.py b/weblate/utils/requirements.py index dbd7d7b8b9f8..2b572bf3b330 100644 --- a/weblate/utils/requirements.py +++ b/weblate/utils/requirements.py @@ -26,7 +26,7 @@ "translate-toolkit", "lxml", "Pillow", - "bleach", + "nh3", "python-dateutil", "social-auth-core", "social-auth-app-django", diff --git a/weblate/utils/tests/test_html.py b/weblate/utils/tests/test_html.py index 16892a9d9178..d92573fe72f3 100644 --- a/weblate/utils/tests/test_html.py +++ b/weblate/utils/tests/test_html.py @@ -4,22 +4,23 @@ from django.test import SimpleTestCase -from weblate.utils.html import extract_bleach +from weblate.utils.html import extract_html_tags class HtmlTestCase(SimpleTestCase): def test_noattr(self): self.assertEqual( - extract_bleach("text"), {"tags": {"b"}, "attributes": {"b": set()}} + extract_html_tags("text"), + {"tags": {"b"}, "attributes": {"b": set()}}, ) def test_attrs(self): self.assertEqual( - extract_bleach('t'), + extract_html_tags('t'), {"tags": {"a"}, "attributes": {"a": {"href"}}}, ) def test_noclose(self): self.assertEqual( - extract_bleach("
"), {"tags": {"br"}, "attributes": {"br": set()}} + extract_html_tags("
"), {"tags": {"br"}, "attributes": {"br": set()}} )