-
Notifications
You must be signed in to change notification settings - Fork 27.4k
fix(ngSanitize): follow HTML parser rules for start tags / allow < in text content #8212
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ describe('HTML', function() { | |
|
||
var handler, start, text, comment; | ||
beforeEach(function() { | ||
text = ""; | ||
handler = { | ||
start: function(tag, attrs, unary){ | ||
start = { | ||
|
@@ -35,7 +36,7 @@ describe('HTML', function() { | |
}); | ||
}, | ||
chars: function(text_){ | ||
text = text_; | ||
text += text_; | ||
}, | ||
end:function(tag) { | ||
expect(tag).toEqual(start.tag); | ||
|
@@ -81,8 +82,31 @@ describe('HTML', function() { | |
expect(text).toEqual('text'); | ||
}); | ||
|
||
it('should not treat "<" followed by a non-/ or non-letter as a tag', function() { | ||
expectHTML('<- text1 text2 <1 text1 text2 <{', handler). | ||
toBe('<- text1 text2 <1 text1 text2 <{'); | ||
}); | ||
|
||
it('should throw badparse if text content contains "<" followed by "/" without matching ">"', function() { | ||
expect(function() { | ||
htmlParser('foo </ bar', handler); | ||
}).toThrowMinErr('$sanitize', 'badparse', 'The sanitizer was unable to parse the following block of html: </ bar'); | ||
}); | ||
|
||
it('should throw badparse if text content contains "<" followed by an ASCII letter without matching ">"', function() { | ||
expect(function() { | ||
htmlParser('foo <a bar', handler); | ||
}).toThrowMinErr('$sanitize', 'badparse', 'The sanitizer was unable to parse the following block of html: <a bar'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I this really a bad text string? I would let it go as a text block. For instance:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As far as HTML parsing is concerned, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Although arguably we are not trying to "parse" html here, only sanitize text that may be inadvertently parsed by a browser later There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that this is right. we shouldn't try to fix broken html. |
||
}); | ||
|
||
it('should accept tag delimiters such as "<" inside real tags', function() { | ||
// Assert that the < is part of the text node content, and not part of a tag name. | ||
htmlParser('<p> 10 < 100 </p>', handler); | ||
expect(text).toEqual(' 10 < 100 '); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldn't this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is encoded in the real world, however in the test, the chars handler just appends the value to a string |
||
}); | ||
|
||
it('should parse newlines in tags', function() { | ||
htmlParser('<\ntag\n attr="value"\n>text<\n/\ntag\n>', handler); | ||
htmlParser('<tag\n attr="value"\n>text</\ntag\n>', handler); | ||
expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false}); | ||
expect(text).toEqual('text'); | ||
}); | ||
|
@@ -123,8 +147,9 @@ describe('HTML', function() { | |
expectHTML('a<!DocTyPe html>c.').toEqual('ac.'); | ||
}); | ||
|
||
it('should remove nested script', function() { | ||
expectHTML('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.').toEqual('ac.'); | ||
it('should escape non-start tags', function() { | ||
expectHTML('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.'). | ||
toBe('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.'); | ||
}); | ||
|
||
it('should remove attrs', function() { | ||
|
@@ -165,14 +190,16 @@ describe('HTML', function() { | |
expectHTML(everything).toEqual(everything); | ||
}); | ||
|
||
it('should handle improper html', function() { | ||
it('should mangle improper html', function() { | ||
// This text is encoded more than a real HTML parser would, but it should render the same. | ||
expectHTML('< div rel="</div>" alt=abc dir=\'"\' >text< /div>'). | ||
toEqual('<div rel="</div>" alt="abc" dir=""">text</div>'); | ||
toBe('< div rel="" alt=abc dir=\'"\' >text< /div>'); | ||
}); | ||
|
||
it('should handle improper html2', function() { | ||
it('should mangle improper html2', function() { | ||
// A proper HTML parser would clobber this more in most cases, but it looks reasonable. | ||
expectHTML('< div rel="</div>" / >'). | ||
toEqual('<div rel="</div>"/>'); | ||
toBe('< div rel="" / >'); | ||
}); | ||
|
||
it('should ignore back slash as escape', function() { | ||
|
@@ -195,6 +222,12 @@ describe('HTML', function() { | |
expectHTML('\na\n').toEqual(' a '); | ||
}); | ||
|
||
it('should accept tag delimiters such as "<" inside real tags (with nesting)', function() { | ||
//this is an integrated version of the 'should accept tag delimiters such as "<" inside real tags' test | ||
expectHTML('<p> 10 < <span>100</span> </p>') | ||
.toEqual('<p> 10 < <span>100</span> </p>'); | ||
}); | ||
|
||
describe('htmlSanitizerWriter', function() { | ||
/* global htmlSanitizeWriter: false */ | ||
if (angular.isUndefined(window.htmlSanitizeWriter)) return; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
/cc @IgorMinar PTAL --- This particular block is only here to make sure that we throw if we find an apparent start-tag without a trailing
>
This might not be the right thing to do --- if we don't have a trailing
>
, we could potentially just treat it as a text node. I'm not sure what the best thing to do in this case is.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it is better to treat as a text node. IMO the sanitizer should be secure but tolerant
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that's fine.