Skip to content

Commit

Permalink
Fix #80 and #87
Browse files Browse the repository at this point in the history
  • Loading branch information
qll authored and lepture committed Feb 26, 2016
1 parent 0ad832f commit 6a9d23d
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 11 deletions.
21 changes: 17 additions & 4 deletions mistune.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@


_key_pattern = re.compile(r'\s+')
_nonalpha_pattern = re.compile(r'\W')
_escape_pattern = re.compile(r'&(?!#?\w+;)')
_newline_pattern = re.compile(r'\r\n|\r')
_block_quote_leading_pattern = re.compile(r'^ *> ?', flags=re.M)
Expand All @@ -36,6 +37,7 @@
_valid_end = r'(?!:/|[^\w\s@]*@)\b'
_valid_attr = r'''"[^"]*"|'[^']*'|[^'">]'''
_block_tag = r'(?!(?:%s)\b)\w+%s' % ('|'.join(_inline_tags), _valid_end)
_scheme_blacklist = ('javascript', 'data', 'vbscript')


def _pure_pattern(regex):
Expand Down Expand Up @@ -70,6 +72,19 @@ def escape(text, quote=False, smart_amp=True):
return text


def escape_link(url, **kwargs):
"""Remove dangerous URL schemes like javascript: and escape afterwards."""
if ':' in url:
scheme, _ = url.split(':', 1)
scheme = _nonalpha_pattern.sub('', scheme)
# whitelist would be better but mistune's use case is too general
if scheme in _scheme_blacklist:
return ''
# escape &entities; to &entities;
kwargs['smart_amp'] = False
return escape(url, **kwargs)


def preprocessing(text, tab=4):
text = _newline_pattern.sub('\n', text)
text = text.replace('\t', ' ' * tab)
Expand Down Expand Up @@ -838,8 +853,7 @@ def link(self, link, title, text):
:param title: title content for `title` attribute.
:param text: text content for description.
"""
if link.startswith('javascript:'):
link = ''
link = escape_link(link, quote=True)
if not title:
return '<a href="%s">%s</a>' % (link, text)
title = escape(title, quote=True)
Expand All @@ -852,8 +866,7 @@ def image(self, src, title, text):
:param title: title text of the image.
:param text: alt text of the image.
"""
if src.startswith('javascript:'):
src = ''
src = escape_link(src, quote=True)
text = escape(text, quote=True)
if title:
title = escape(title, quote=True)
Expand Down
6 changes: 3 additions & 3 deletions tests/fixtures/normal/amps_and_angles_encoding.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@

<p>6 &gt; 5.</p>

<p>Here's a <a href="http://example.com/?foo=1&bar=2">link</a> with an ampersand in the URL.</p>
<p>Here's a <a href="http://example.com/?foo=1&amp;bar=2">link</a> with an ampersand in the URL.</p>

<p>Here's a link with an amersand in the link text: <a href="http://att.com/" title="AT&amp;T">AT&amp;T</a>.</p>

<p>Here's an inline <a href="/script?foo=1&bar=2">link</a>.</p>
<p>Here's an inline <a href="/script?foo=1&amp;bar=2">link</a>.</p>

<p>Here's an inline <a href="/script?foo=1&bar=2">link</a>.</p>
<p>Here's an inline <a href="/script?foo=1&amp;bar=2">link</a>.</p>
23 changes: 19 additions & 4 deletions tests/test_extra.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,25 @@ def test_linebreak():


def test_safe_links():
ret = mistune.markdown('javascript ![foo](<javascript:alert>) alert')
assert 'src=""' in ret
ret = mistune.markdown('javascript [foo](<javascript:alert>) alert')
assert 'href=""' in ret
attack_vectors = (
# "standard" javascript pseudo protocol
('javascript:alert`1`', ''),
# javascript pseudo protocol with entities
('javascript&colon;alert`1`', 'javascript&amp;colon;alert`1`'),
# javascript pseudo protocol with prefix (dangerous in Chrome)
('\x1Ajavascript:alert`1`', ''),
# data-URI (dangerous in Firefox)
('data:text/html,<script>alert`1`</script>', ''),
# vbscript-URI (dangerous in Internet Explorer)
('vbscript:msgbox', ''),
# breaking out of the attribute
('"<>', '&quot;&lt;&gt;'),
)
for vector, expected in attack_vectors:
# image
assert 'src="%s"' % expected in mistune.markdown('![atk](%s)' % vector)
# link
assert 'href="%s"' % expected in mistune.markdown('[atk](%s)' % vector)


def test_skip_style():
Expand Down

0 comments on commit 6a9d23d

Please sign in to comment.