From 5700b1691d11acd5bcbeef8d82a097666ec84038 Mon Sep 17 00:00:00 2001 From: Yutaka Yamaguchi Date: Sun, 30 Mar 2014 06:45:36 +0900 Subject: [PATCH] fix(ngSanitize): encode surrogate pair properly The encodeEndities function encode non-alphanumeric characters to entities with charCodeAt. charCodeAt does not return one value when their unicode codeponts is higher than 65,356. It returns surrogate pair, and this is why the Emoji which has higher codepoints is garbled. We need to handle them properly. Closes #5088 --- src/ngSanitize/sanitize.js | 6 ++++++ test/ngSanitize/sanitizeSpec.js | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js index 38d088bbe407..74441b3bf903 100644 --- a/src/ngSanitize/sanitize.js +++ b/src/ngSanitize/sanitize.js @@ -161,6 +161,7 @@ var START_TAG_REGEXP = COMMENT_REGEXP = //g, DOCTYPE_REGEXP = /]*?)>/i, CDATA_REGEXP = //g, + SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g, // Match everything outside of normal chars and " (quote character) NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g; @@ -399,6 +400,11 @@ function decodeEntities(value) { function encodeEntities(value) { return value. replace(/&/g, '&'). + replace(SURROGATE_PAIR_REGEXP, function (value) { + var hi = value.charCodeAt(0); + var low = value.charCodeAt(1); + return '&#' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';'; + }). replace(NON_ALPHANUMERIC_REGEXP, function(value){ return '&#' + value.charCodeAt(0) + ';'; }). diff --git a/test/ngSanitize/sanitizeSpec.js b/test/ngSanitize/sanitizeSpec.js index fbffbba5cbeb..a086d2f7582d 100644 --- a/test/ngSanitize/sanitizeSpec.js +++ b/test/ngSanitize/sanitizeSpec.js @@ -239,6 +239,11 @@ describe('HTML', function() { expect(html).toEqual('
'); }); + it('should handle surrogate pair', function() { + writer.chars(String.fromCharCode(55357, 56374)); + expect(html).toEqual('🐶'); + }); + describe('explicitly disallow', function() { it('should not allow attributes', function() { writer.start('div', {id:'a', name:'a', style:'a'});