Skip to content

Commit

Permalink
deps: Replace bleach by nh3
Browse files Browse the repository at this point in the history
Bleach is getting deprecated and nh3 is way faster.
  • Loading branch information
nijel committed Jan 24, 2023
1 parent 3ac0aef commit 6e33e1c
Show file tree
Hide file tree
Showing 7 changed files with 17 additions and 16 deletions.
4 changes: 2 additions & 2 deletions docs/user/checks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1225,8 +1225,8 @@ autofixer which can automatically sanitize the markup.

.. seealso::

The HTML check is performed by the `Bleach <https://bleach.readthedocs.io/>`_
library developed by Mozilla.
The HTML check is performed by the `Ammonia <https://github.com/rust-ammonia/ammonia>`_
library.



Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
bleach>=3.1.1,<6.1
borgbackup>=1.1.11,<1.3,!=1.2.0
celery[redis]>=5.0.3,<5.3
charset-normalizer>=2.0.12,<4.0
Expand All @@ -20,6 +19,7 @@ importlib-metadata>=4.12,<5; python_version < '3.8'
jsonschema>=4.5,<5
lxml>=4.9.1,<4.10
misaka>=2.1.0,<2.2
nh3>=0.2.2,<0.3
openpyxl>=2.6.0,<3.1,!=3.0.2
packaging>=22,<23.1
Pillow>=9.0.0,<10.0
Expand Down
6 changes: 3 additions & 3 deletions weblate/checks/markup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@

import re

import bleach
import nh3
from django.core.exceptions import ValidationError
from django.core.validators import URLValidator
from django.utils.functional import cached_property
from django.utils.translation import gettext_lazy as _

from weblate.checks.base import TargetCheck
from weblate.utils.html import extract_bleach
from weblate.utils.html import extract_html_tags
from weblate.utils.xml import parse_xml

BBCODE_MATCH = re.compile(
Expand Down Expand Up @@ -340,4 +340,4 @@ def check_single(self, source, target, unit):
if "md-text" in unit.all_flags:
target = MD_LINK.sub("", target)

return bleach.clean(target, **extract_bleach(source)) != target
return nh3.clean(target, **extract_html_tags(source)) != target
6 changes: 3 additions & 3 deletions weblate/trans/autofixes/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
#
# SPDX-License-Identifier: GPL-3.0-or-later

import bleach
import nh3
from django.utils.translation import gettext_lazy as _

from weblate.checks.markup import MD_LINK
from weblate.trans.autofixes.base import AutoFix
from weblate.utils.html import extract_bleach
from weblate.utils.html import extract_html_tags


class BleachHTML(AutoFix):
Expand Down Expand Up @@ -37,7 +37,7 @@ def handle_replace(match):
if "md-text" in flags:
target = MD_LINK.sub(handle_replace, target)

new_target = bleach.clean(target, **extract_bleach(source))
new_target = nh3.clean(target, **extract_html_tags(source))
for text, replace in replacements.items():
new_target = new_target.replace(text, replace)
return new_target, new_target != old_target
4 changes: 2 additions & 2 deletions weblate/utils/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ def start(self, tag, attrs):
self.found_attributes[tag].update(attrs.keys())


def extract_bleach(text):
"""Extract tags from text in a form suitable for bleach."""
def extract_html_tags(text):
"""Extract tags from text in a form suitable for HTML sanitization."""
extractor = MarkupExtractor()
parser = HTMLParser(collect_ids=False, target=extractor)
parser.feed(text)
Expand Down
2 changes: 1 addition & 1 deletion weblate/utils/requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"translate-toolkit",
"lxml",
"Pillow",
"bleach",
"nh3",
"python-dateutil",
"social-auth-core",
"social-auth-app-django",
Expand Down
9 changes: 5 additions & 4 deletions weblate/utils/tests/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,23 @@

from django.test import SimpleTestCase

from weblate.utils.html import extract_bleach
from weblate.utils.html import extract_html_tags


class HtmlTestCase(SimpleTestCase):
def test_noattr(self):
self.assertEqual(
extract_bleach("<b>text</b>"), {"tags": {"b"}, "attributes": {"b": set()}}
extract_html_tags("<b>text</b>"),
{"tags": {"b"}, "attributes": {"b": set()}},
)

def test_attrs(self):
self.assertEqual(
extract_bleach('<a href="#">t</a>'),
extract_html_tags('<a href="#">t</a>'),
{"tags": {"a"}, "attributes": {"a": {"href"}}},
)

def test_noclose(self):
self.assertEqual(
extract_bleach("<br>"), {"tags": {"br"}, "attributes": {"br": set()}}
extract_html_tags("<br>"), {"tags": {"br"}, "attributes": {"br": set()}}
)

0 comments on commit 6e33e1c

Please sign in to comment.