Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix <a href=" javascript:"> with leading space bypassing HTML filter #1465

Merged
merged 1 commit into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions libweasyl/defang.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@
""", re.X | re.I)


_C0_OR_SPACE = "".join(map(chr, range(0x21)))


def get_scheme(url):
"""
Get the scheme from a URL, if the URL is valid.
Expand Down Expand Up @@ -128,13 +131,15 @@ def defang(fragment):
extend_attributes = []

for key, value in child.items():
if key == "href" and child.tag == "a" and get_scheme(value) in allowed_schemes:
url = urlparse(value)
# `value_stripped` is a correct thing to do according to the WHATWG URL spec (but not the only possible validation error, and not all are handled here yet). It also works around CVE-2023-24329 while on Python <3.10.12.
if key == "href" and child.tag == "a" and get_scheme(value_stripped := value.strip(_C0_OR_SPACE)) in allowed_schemes:
url = urlparse(value_stripped)
extend_attributes.append((key, value_stripped))

if url.hostname not in (None, "www.weasyl.com", "weasyl.com"):
extend_attributes.append(("rel", "nofollow ugc"))
elif key == "src" and child.tag == "img" and get_scheme(value) in allowed_schemes:
pass
elif key == "src" and child.tag == "img" and get_scheme(value_stripped := value.strip(_C0_OR_SPACE)) in allowed_schemes:
extend_attributes.append((key, value_stripped))
elif key == "style" and ALLOWED_STYLE.match(value):
pass
elif key == "class":
Expand Down
23 changes: 22 additions & 1 deletion libweasyl/test/test_text.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# encoding: utf-8
from lxml import html
from lxml.etree import LIBXML_VERSION
import pytest

from libweasyl.text import markdown, markdown_excerpt, markdown_link
from libweasyl.defang import defang
from libweasyl.text import markdown, markdown_excerpt, markdown_link, strip_outer_tag


libxml_xfail = pytest.mark.xfail(LIBXML_VERSION < (2, 9), reason='libxml2 too old to preserve whitespace')
Expand Down Expand Up @@ -133,6 +135,7 @@ def test_markdown_strikethrough():
('<a href="http://example.com/">external</a>', '<a href="http://example.com/" rel="nofollow ugc">external</a>'),
('<a href="http://example.com/" rel="noreferrer">external</a>', '<a href="http://example.com/" rel="nofollow ugc">external</a>'),
("[external](//example.com/)", '<a href="//example.com/" rel="nofollow ugc">external</a>'),
('<a href=" //example.com/">external</a>', '<a href="//example.com/" rel="nofollow ugc">external</a>'),
])
def test_markdown_external_link_noreferrer(target, expected):
assert markdown(target) == "<p>%s</p>\n" % (expected,)
Expand All @@ -156,6 +159,24 @@ def test_tag_stripping():
assert markdown("<!--[if IE]><script>alert(1)</script><![endif]-->") == "\n"


@pytest.mark.parametrize(('target', 'expected'), [
('<a href=" javascript:alert(1)">no</a>', "<a>no</a>"),
('<a href="java&#x09;script:alert(1)">no</a>', "<a>no</a>"),
])
def test_unsafe_link(target, expected):
assert markdown(target) == "<p>%s</p>\n" % (expected,)


@pytest.mark.parametrize(('target', 'expected'), [
('<a href="java\nscript:alert(1)">no</a>', "<a>no</a>"),
])
def test_unsafe_link_direct(target, expected):
fragment = html.fragment_fromstring(target, create_parent=True)
defang(fragment)
start, stripped, end = strip_outer_tag(html.tostring(fragment, encoding="unicode"))
assert stripped == expected


markdown_excerpt_tests = [
('', ''),
('short', 'short'),
Expand Down
Loading