From de7d4ad95714ee3de12b2279cc4b1e24be8c09ea Mon Sep 17 00:00:00 2001 From: Arthur Date: Wed, 10 Jan 2024 13:33:27 -0800 Subject: [PATCH 1/2] 12851 replace bleach with nh3 --- base_requirements.txt | 8 ++++---- netbox/utilities/utils.py | 18 +++++++++--------- requirements.txt | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/base_requirements.txt b/base_requirements.txt index 0c7e54b13fa..87a3066c4f0 100644 --- a/base_requirements.txt +++ b/base_requirements.txt @@ -1,7 +1,3 @@ -# HTML sanitizer -# https://github.com/mozilla/bleach/blob/main/CHANGES -bleach - # The Python web framework on which NetBox is built # https://docs.djangoproject.com/en/stable/releases/ Django<5.1 @@ -108,6 +104,10 @@ mkdocstrings[python-legacy] # https://github.com/netaddr/netaddr/blob/master/CHANGELOG netaddr +# Python bindings to the ammonia HTML sanitization library. +# https://github.com/messense/nh3 +nh3 + # Fork of PIL (Python Imaging Library) for image processing # https://github.com/python-pillow/Pillow/blob/main/CHANGES.rst Pillow diff --git a/netbox/utilities/utils.py b/netbox/utilities/utils.py index f3f8c7c5042..3fdf733b59d 100644 --- a/netbox/utilities/utils.py +++ b/netbox/utilities/utils.py @@ -1,11 +1,11 @@ import datetime import decimal import json +import nh3 import re from decimal import Decimal from itertools import count, groupby -import bleach from django.contrib.contenttypes.models import ContentType from django.core import serializers from django.db.models import Count, ManyToOneRel, OuterRef, Subquery @@ -522,19 +522,19 @@ def clean_html(html, schemes): } ALLOWED_ATTRIBUTES = { - "div": ['class'], - "h1": ["id"], "h2": ["id"], "h3": ["id"], "h4": ["id"], "h5": ["id"], "h6": ["id"], - "a": ["href", "title"], - "img": ["src", "title", "alt"], - "th": ["align"], - "td": ["align"], + "div": {'class'}, + "h1": {"id"}, "h2": {"id"}, "h3": {"id"}, "h4": {"id"}, "h5": {"id"}, "h6": {"id"}, + "a": {"href", "title"}, + "img": {"src", "title", "alt"}, + "th": {"align"}, + "td": {"align"}, } - return bleach.clean( + return nh3.clean( html, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES, - protocols=schemes + url_schemes=set(schemes) ) diff --git a/requirements.txt b/requirements.txt index 788a22f9c25..cc733d4b908 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -bleach==6.1.0 Django==5.0.1 django-cors-headers==4.3.1 django-debug-toolbar==4.2.0 @@ -24,6 +23,7 @@ Markdown==3.5.1 mkdocs-material==9.5.3 mkdocstrings[python-legacy]==0.24.0 netaddr==0.9.0 +nh3==0.2.15 Pillow==10.1.0 psycopg[binary,pool]==3.1.16 PyYAML==6.0.1 From 806d35bc689ed40f0ce8b39e9d0828abe1d798ec Mon Sep 17 00:00:00 2001 From: Jeremy Stretch Date: Thu, 11 Jan 2024 08:50:59 -0500 Subject: [PATCH 2/2] Move tags & attributes lists to constants.py --- netbox/utilities/constants.py | 24 ++++++++++++++++++++++++ netbox/utilities/utils.py | 24 +++--------------------- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/netbox/utilities/constants.py b/netbox/utilities/constants.py index 3458940659b..c7c26f6b3ab 100644 --- a/netbox/utilities/constants.py +++ b/netbox/utilities/constants.py @@ -69,3 +69,27 @@ 'semicolon': ';', 'tab': '\t', } + + +# +# HTML allowed tags & attributes +# + +HTML_ALLOWED_TAGS = { + "a", "b", "blockquote", "br", "code", "dd", "del", "div", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5", "h6", + "hr", "i", "img", "li", "ol", "p", "pre", "strong", "table", "tbody", "td", "th", "thead", "tr", "ul" +} + +HTML_ALLOWED_ATTRIBUTES = { + "a": {"href", "title"}, + "div": {"class"}, + "h1": {"id"}, + "h2": {"id"}, + "h3": {"id"}, + "h4": {"id"}, + "h5": {"id"}, + "h6": {"id"}, + "img": {"alt", "src", "title"}, + "td": {"align"}, + "th": {"align"}, +} diff --git a/netbox/utilities/utils.py b/netbox/utilities/utils.py index 3fdf733b59d..ca05c35bbef 100644 --- a/netbox/utilities/utils.py +++ b/netbox/utilities/utils.py @@ -24,6 +24,7 @@ from netbox.plugins import PluginConfig from urllib.parse import urlencode from utilities.constants import HTTP_REQUEST_META_SAFE_COPY +from .constants import HTML_ALLOWED_ATTRIBUTES, HTML_ALLOWED_TAGS def title(value): @@ -511,29 +512,10 @@ def clean_html(html, schemes): Sanitizes HTML based on a whitelist of allowed tags and attributes. Also takes a list of allowed URI schemes. """ - - ALLOWED_TAGS = { - "div", "pre", "code", "blockquote", "del", - "hr", "h1", "h2", "h3", "h4", "h5", "h6", - "ul", "ol", "li", "p", "br", - "strong", "em", "a", "b", "i", "img", - "table", "thead", "tbody", "tr", "th", "td", - "dl", "dt", "dd", - } - - ALLOWED_ATTRIBUTES = { - "div": {'class'}, - "h1": {"id"}, "h2": {"id"}, "h3": {"id"}, "h4": {"id"}, "h5": {"id"}, "h6": {"id"}, - "a": {"href", "title"}, - "img": {"src", "title", "alt"}, - "th": {"align"}, - "td": {"align"}, - } - return nh3.clean( html, - tags=ALLOWED_TAGS, - attributes=ALLOWED_ATTRIBUTES, + tags=HTML_ALLOWED_TAGS, + attributes=HTML_ALLOWED_ATTRIBUTES, url_schemes=set(schemes) )