Skip to content
This repository has been archived by the owner on Jan 29, 2020. It is now read-only.

Commit

Permalink
Merge pull request #3 from zerocrates/escapehtmlattr-supplementary
Browse files Browse the repository at this point in the history
Fix encoding of supplementary characters
  • Loading branch information
weierophinney committed Jun 30, 2016
2 parents 37227d6 + 3a30cef commit e04091f
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 3 deletions.
12 changes: 9 additions & 3 deletions src/Escaper.php
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ protected function htmlAttrMatcher($matches)
* replace it with while grabbing the integer value of the character.
*/
if (strlen($chr) > 1) {
$chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8');
$chr = $this->convertEncoding($chr, 'UTF-32BE', 'UTF-8');
}

$hex = bin2hex($chr);
Expand Down Expand Up @@ -277,7 +277,13 @@ protected function jsMatcher($matches)
return sprintf('\\x%02X', ord($chr));
}
$chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8');
return sprintf('\\u%04s', strtoupper(bin2hex($chr)));
$hex = strtoupper(bin2hex($chr));
if (strlen($hex) <= 4) {
return sprintf('\\u%04s', $hex);
}
$highSurrogate = substr($hex, 0, 4);
$lowSurrogate = substr($hex, 4, 4);
return sprintf('\\u%04s\\u%04s', $highSurrogate, $lowSurrogate);
}

/**
Expand All @@ -293,7 +299,7 @@ protected function cssMatcher($matches)
if (strlen($chr) == 1) {
$ord = ord($chr);
} else {
$chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8');
$chr = $this->convertEncoding($chr, 'UTF-32BE', 'UTF-8');
$ord = hexdec(bin2hex($chr));
}
return sprintf('\\%X ', $ord);
Expand Down
6 changes: 6 additions & 0 deletions test/EscaperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ class EscaperTest extends \PHPUnit_Framework_TestCase
'&' => '&amp;',
/* Characters beyond ASCII value 255 to unicode escape */
'Ā' => '&#x0100;',
/* Characters beyond Unicode BMP to unicode escape */
"\xF0\x90\x80\x80" => '&#x10000;',
/* Immune chars excluded */
',' => ',',
'.' => '.',
Expand Down Expand Up @@ -79,6 +81,8 @@ class EscaperTest extends \PHPUnit_Framework_TestCase
'&' => '\\x26',
/* Characters beyond ASCII value 255 to unicode escape */
'Ā' => '\\u0100',
/* Characters beyond Unicode BMP to unicode escape */
"\xF0\x90\x80\x80" => '\\uD800\\uDC00',
/* Immune chars excluded */
',' => ',',
'.' => '.',
Expand Down Expand Up @@ -143,6 +147,8 @@ class EscaperTest extends \PHPUnit_Framework_TestCase
'&' => '\\26 ',
/* Characters beyond ASCII value 255 to unicode escape */
'Ā' => '\\100 ',
/* Characters beyond Unicode BMP to unicode escape */
"\xF0\x90\x80\x80" => '\\10000 ',
/* Immune chars excluded */
',' => '\\2C ',
'.' => '\\2E ',
Expand Down

0 comments on commit e04091f

Please sign in to comment.