Skip to content
This repository was archived by the owner on Apr 12, 2024. It is now read-only.

Commit 3d0b49c

Browse files
memologcaitp
authored andcommitted
fix(ngSanitize): encode surrogate pair properly
The encodeEndities function encode non-alphanumeric characters to entities with charCodeAt. charCodeAt does not return one value when their unicode codeponts is higher than 65,356. It returns surrogate pair, and this is why the Emoji which has higher codepoints is garbled. We need to handle them properly. Closes #5088 Closes #6911
1 parent b6aec56 commit 3d0b49c

File tree

2 files changed

+11
-0
lines changed

2 files changed

+11
-0
lines changed

src/ngSanitize/sanitize.js

+6
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ var START_TAG_REGEXP =
161161
COMMENT_REGEXP = /<!--(.*?)-->/g,
162162
DOCTYPE_REGEXP = /<!DOCTYPE([^>]*?)>/i,
163163
CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g,
164+
SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
164165
// Match everything outside of normal chars and " (quote character)
165166
NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g;
166167

@@ -399,6 +400,11 @@ function decodeEntities(value) {
399400
function encodeEntities(value) {
400401
return value.
401402
replace(/&/g, '&amp;').
403+
replace(SURROGATE_PAIR_REGEXP, function (value) {
404+
var hi = value.charCodeAt(0);
405+
var low = value.charCodeAt(1);
406+
return '&#' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';';
407+
}).
402408
replace(NON_ALPHANUMERIC_REGEXP, function(value){
403409
return '&#' + value.charCodeAt(0) + ';';
404410
}).

test/ngSanitize/sanitizeSpec.js

+5
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,11 @@ describe('HTML', function() {
239239
expect(html).toEqual('<div>');
240240
});
241241

242+
it('should handle surrogate pair', function() {
243+
writer.chars(String.fromCharCode(55357, 56374));
244+
expect(html).toEqual('&#128054;');
245+
});
246+
242247
describe('explicitly disallow', function() {
243248
it('should not allow attributes', function() {
244249
writer.start('div', {id:'a', name:'a', style:'a'});

0 commit comments

Comments
 (0)