diff --git a/lib/Sabberworm/CSS/Parser.php b/lib/Sabberworm/CSS/Parser.php index 409b796d..dac27adf 100644 --- a/lib/Sabberworm/CSS/Parser.php +++ b/lib/Sabberworm/CSS/Parser.php @@ -32,11 +32,18 @@ class Parser { private $aText; private $iCurrentPosition; private $oParserSettings; - private $sCharset; + private $sOriginalCharset; private $iLength; private $blockRules; private $aSizeUnits; private $iLineNo; + private $sTextLibrary; + + const BOM8 = "\xef\xbb\xbf"; + const BOM16BE = "\xfe\xff"; + const BOM16LE = "\xff\xfe"; + const BOM32BE = "\x00\x00\xfe\xff"; + const BOM32LE = "\xff\xfe\x00\x00"; /** * Parser constructor. @@ -64,20 +71,91 @@ public function __construct($sText, Settings $oParserSettings = null, $iLineNo = $this->aSizeUnits[$iSize][strtolower($val)] = $val; } ksort($this->aSizeUnits, SORT_NUMERIC); - } + $this->fixCharset(); + } + + private function fixCharset() { + // We need to know the charset before the parsing starts, + // UTF BOMs have the highest precedence and must ve removed before other processing. + $this->sOriginalCharset = strtolower($this->oParserSettings->sDefaultCharset); + if (strpos($this->sText, self::BOM8) === 0) { + $this->sText = substr($this->sText, strlen(self::BOM8)); + $this->sOriginalCharset = 'utf-8'; + } else if (strpos($this->sText, self::BOM32BE) === 0) { + $this->sText = substr($this->sText, strlen(self::BOM32BE)); + $this->sOriginalCharset = 'utf-32be'; + } else if (strpos($this->sText, self::BOM32LE) === 0) { + $this->sText = substr($this->sText, strlen(self::BOM32LE)); + $this->sOriginalCharset = 'utf-32le'; + } else if (strpos($this->sText, self::BOM16BE) === 0) { + $this->sText = substr($this->sText, strlen(self::BOM16BE)); + $this->sOriginalCharset = 'utf-16be'; + } else if (strpos($this->sText, self::BOM16LE) === 0) { + $this->sText = substr($this->sText, strlen(self::BOM16LE)); + $this->sOriginalCharset = 'utf-16le'; + } else if (preg_match('/(.*)@charset\s+["\']([a-z0-9-]+)["\']\s*;/ims', $this->sText, $aMatches)) { + // This is a simplified guessing, the charset atRule location is validated later, + // hopefully this is not used much these days. + if (trim($aMatches[1]) === '' and preg_match('/^@charset\s+["\']([a-z0-9-]+)["\']\s*;/im', $aMatches[0])) { + $this->sOriginalCharset = strtolower($aMatches[2]); + } + } + + // Convert all text to utf-8 so that code does not have to deal with encoding conversions and incompatible characters. + if ($this->sOriginalCharset !== 'utf-8') { + if (function_exists('mb_convert_encoding')) { + $this->sText = mb_convert_encoding($this->sText, 'utf-8', $this->sOriginalCharset); + } else { + $this->sText = iconv($this->sOriginalCharset, 'utf-8', $this->sText); + } + } - public function setCharset($sCharset) { - $this->sCharset = $sCharset; - $this->aText = $this->strsplit($this->sText); - $this->iLength = count($this->aText); + // Multibyte support can make the parsing slower, + // but even if it is disabled the unicode characters usually survive this parsing unharmed. + $this->sTextLibrary = 'ascii'; + if (!$this->oParserSettings->bMultibyteSupport) { + $this->iLength = strlen($this->sText); + return; + } + + // If there are only ASCII characters in the CSS then we can safely use good old PHP string functions here. + if (function_exists('mb_convert_encoding')) { + $sSubst = mb_substitute_character(); + mb_substitute_character('none'); + $asciiText = mb_convert_encoding($this->sText, 'ASCII', 'utf-8'); + mb_substitute_character($sSubst); + } else { + $asciiText = @iconv('utf-8', 'ASCII//IGNORE', $this->sText); + } + if ($this->sText !== $asciiText) { + if (function_exists('mb_convert_encoding')) { + // Usually mbstring extension is much faster than iconv. + $this->sTextLibrary = 'mb'; + } else { + $this->sTextLibrary = 'iconv'; + } + } + unset($asciiText); + $this->iLength = $this->strlen($this->sText); + + // Substring operations are slower with unicode, aText array is used for faster emulation. + if ($this->sTextLibrary !== 'ascii') { + $this->aText = preg_split('//u', $this->sText, null, PREG_SPLIT_NO_EMPTY); + if (!is_array($this->aText) || count($this->aText) !== $this->iLength) { + $this->aText = null; + } + } } public function getCharset() { - return $this->sCharset; + return 'utf-8'; + } + + public function getOriginalCharset() { + return $this->sOriginalCharset; } public function parse() { - $this->setCharset($this->oParserSettings->sDefaultCharset); $oResult = new Document($this->iLineNo); $this->parseDocument($oResult); return $oResult; @@ -88,8 +166,12 @@ private function parseDocument(Document $oDocument) { } private function parseList(CSSList $oList, $bIsRoot = false) { - while (!$this->isEnd()) { + while (true) { $comments = $this->consumeWhiteSpace(); + if ($this->isEnd()) { + // End of file, ignore any trailing comments. + break; + } $oListItem = null; if($this->oParserSettings->bLenientParsing) { try { @@ -113,7 +195,7 @@ private function parseList(CSSList $oList, $bIsRoot = false) { throw new SourceException("Unexpected end of document", $this->iLineNo); } } - + private function parseListItem(CSSList $oList, $bIsRoot = false) { if ($this->comes('@')) { $oAtRule = $this->parseAtRule(); @@ -124,11 +206,11 @@ private function parseListItem(CSSList $oList, $bIsRoot = false) { if(count($oList->getContents()) > 0) { throw new UnexpectedTokenException('@charset must be the first parseable token in a document', '', 'custom', $this->iLineNo); } - $this->setCharset($oAtRule->getCharset()->getString()); + // We have already guessed the charset in the constructor, it cannot be changed now. } return $oAtRule; } else if ($this->comes('}')) { - $this->consume('}'); + $this->consumeUnsafe(1); if ($bIsRoot) { throw new SourceException("Unopened {", $this->iLineNo); } else { @@ -154,10 +236,22 @@ private function parseAtRule() { $this->consume(';'); return new Import($oLocation, $sMediaQuery, $iIdentifierLineNum); } else if ($sIdentifier === 'charset') { - $sCharset = $this->parseStringValue(); + $oCharset = $this->parseStringValue(); $this->consumeWhiteSpace(); $this->consume(';'); - return new Charset($sCharset, $iIdentifierLineNum); + if (!$this->oParserSettings->bLenientParsing) { + $sExpectedCharset = $this->getOriginalCharset(); + if ($sExpectedCharset === 'utf-16le' || $sExpectedCharset === 'utf-16be') { + $sExpectedCharset = 'utf-16'; + } else if ($sExpectedCharset === 'utf-32le' || $sExpectedCharset === 'utf-32be') { + $sExpectedCharset = 'utf-32'; + } + if (strtolower($oCharset->getString() !== $sExpectedCharset)) { + throw new UnexpectedTokenException('@charset value does not match detected value', '', 'custom', $this->iLineNo); + } + } + // Replace the original charset with utf-8 because we have changed the encoding in the constructor. + return new Charset(new CSSString('utf-8', $this->iLineNo), $iIdentifierLineNum); } else if ($this->identifierIs($sIdentifier, 'keyframes')) { $oResult = new KeyFrame($iIdentifierLineNum); $oResult->setVendorKeyFrame($sIdentifier); @@ -213,7 +307,7 @@ private function parseIdentifier($bAllowFunctions = true, $bIgnoreCase = true) { $sResult = $this->strtolower($sResult); } if ($bAllowFunctions && $this->comes('(')) { - $this->consume('('); + $this->consumeUnsafe(1); $aArguments = $this->parseValue(array('=', ' ', ',')); $sResult = new CSSFunction($sResult, $aArguments, ',', $this->iLineNo); $this->consume(')'); @@ -230,7 +324,7 @@ private function parseStringValue() { $sQuote = '"'; } if ($sQuote !== null) { - $this->consume($sQuote); + $this->consumeUnsafe(1); } $sResult = ""; $sContent = null; @@ -247,33 +341,39 @@ private function parseStringValue() { } $sResult .= $sContent; } - $this->consume($sQuote); + $this->consumeUnsafe(1); // Consuming quote. } return new CSSString($sResult, $this->iLineNo); } private function parseCharacter($bIsForIdentifier) { - if ($this->peek() === '\\') { + $peek = $this->peek(); + if ($peek === '\\') { if ($bIsForIdentifier && $this->oParserSettings->bLenientParsing && ($this->comes('\0') || $this->comes('\9'))) { // Non-strings can contain \0 or \9 which is an IE hack supported in lenient parsing. return null; } - $this->consume('\\'); - if ($this->comes('\n') || $this->comes('\r')) { + $this->consumeUnsafe(1); // Consuming \ + $peek = $this->peek(); + if ($peek === '\n' || $peek === '\r') { return ''; } - if (preg_match('/[0-9a-fA-F]/Su', $this->peek()) === 0) { - return $this->consume(1); - } - $sUnicode = $this->consumeExpression('/^[0-9a-fA-F]{1,6}/u'); - if ($this->strlen($sUnicode) < 6) { - //Consume whitespace after incomplete unicode escape - if (preg_match('/\\s/isSu', $this->peek())) { - if ($this->comes('\r\n')) { - $this->consume(2); - } else { - $this->consume(1); - } + if (preg_match('/[0-9a-fA-F]/Su', $peek) === 0) { + $this->consumeUnsafe($peek); + return $peek; + } + $peek6 = $this->peek(6); + if (!preg_match('/^[0-9a-fA-F]{1,6}/', $peek6, $aMatches)) { + throw new UnexpectedTokenException('Invalid hex encoded unicode character', $peek6, 'custom', $this->iLineNo); + } + $sUnicode = $aMatches[0]; + $iUnicodeLength = strlen($sUnicode); + $this->consumeUnsafe($iUnicodeLength); // Consuming hex string + if ($iUnicodeLength < 6) { + // Consume one space after incomplete unicode escape if present + $peek = $this->peek(); + if ($peek === ' ') { + $this->consumeUnsafe(1); } } $iUnicode = intval($sUnicode, 16); @@ -282,23 +382,30 @@ private function parseCharacter($bIsForIdentifier) { $sUtf32 .= chr($iUnicode & 0xff); $iUnicode = $iUnicode >> 8; } - return iconv('utf-32le', $this->sCharset, $sUtf32); + $sChar = iconv('utf-32le', 'utf-8', $sUtf32); + if ($sChar === chr(0)) { + // PHP does not like null characters in strings for security reasons, just ignore them. + return ''; + } + return $sChar; } if ($bIsForIdentifier) { - $peek = ord($this->peek()); + $ordPeek = ord($peek); // Ranges: a-z A-Z 0-9 - _ - if (($peek >= 97 && $peek <= 122) || - ($peek >= 65 && $peek <= 90) || - ($peek >= 48 && $peek <= 57) || - ($peek === 45) || - ($peek === 95) || - ($peek > 0xa1)) { - return $this->consume(1); - } + if (($ordPeek >= 97 && $ordPeek <= 122) || + ($ordPeek >= 65 && $ordPeek <= 90) || + ($ordPeek >= 48 && $ordPeek <= 57) || + ($ordPeek === 45) || + ($ordPeek === 95) || + ($ordPeek > 0xa1)) { + $this->consumeUnsafe($peek); + return $peek; + } + return null; } else { - return $this->consume(1); + $this->consumeUnsafe($peek); + return $peek; } - return null; } private function parseSelector() { @@ -312,9 +419,19 @@ private function parseSelector() { private function parseRuleSet($oRuleSet) { while ($this->comes(';')) { - $this->consume(';'); + $this->consumeUnsafe(1); } - while (!$this->comes('}')) { + while (true) { + $peek = $this->peek(); + if ($peek === '}') { + $this->consumeUnsafe(1); + return; + } + if ($peek === '') { + // End of file reached + return; + } + $oRule = null; if($this->oParserSettings->bLenientParsing) { try { @@ -323,11 +440,11 @@ private function parseRuleSet($oRuleSet) { try { $sConsume = $this->consumeUntil(array("\n", ";", '}'), true); // We need to “unfind” the matches to the end of the ruleSet as this will be matched later - if($this->streql(substr($sConsume, -1), '}')) { + if(substr($sConsume, -1) === '}') { // Safe with utf-8 now --$this->iCurrentPosition; } else { while ($this->comes(';')) { - $this->consume(';'); + $this->consumeUnsafe(1); } } } catch (UnexpectedTokenException $e) { @@ -342,11 +459,14 @@ private function parseRuleSet($oRuleSet) { $oRuleSet->addRule($oRule); } } - $this->consume('}'); } private function parseRule() { $aComments = $this->consumeWhiteSpace(); + if ($this->peek() === '}') { + // We have reached the end of rule set, any comments at the end will be ignored + return null; + } $oRule = new Rule($this->parseIdentifier(), $this->iLineNo); $oRule->setComments($aComments); $oRule->addComments($this->consumeWhiteSpace()); @@ -355,19 +475,19 @@ private function parseRule() { $oRule->setValue($oValue); if ($this->oParserSettings->bLenientParsing) { while ($this->comes('\\')) { - $this->consume('\\'); + $this->consumeUnsafe(1); $oRule->addIeHack($this->consume()); $this->consumeWhiteSpace(); } } if ($this->comes('!')) { - $this->consume('!'); + $this->consumeUnsafe(1); $this->consumeWhiteSpace(); $this->consume('important'); $oRule->setIsImportant(true); } while ($this->comes(';')) { - $this->consume(';'); + $this->consumeUnsafe(1); } return $oRule; } @@ -428,16 +548,18 @@ private static function listDelimiterForRule($sRule) { private function parsePrimitiveValue() { $oValue = null; $this->consumeWhiteSpace(); - if (is_numeric($this->peek()) || ($this->comes('-.') && is_numeric($this->peek(1, 2))) || (($this->comes('-') || $this->comes('.')) && is_numeric($this->peek(1, 1)))) { - $oValue = $this->parseNumericValue(); - } else if ($this->comes('#') || $this->comes('rgb', true) || $this->comes('hsl', true)) { + $peek = $this->peek(); + $lowerpeek = $this->strtolower($peek); + if ($peek === "'" || $peek === '"') { + $oValue = $this->parseStringValue(); + } else if ($peek === '#' || ($lowerpeek === 'r' && $this->comes('rgb', true)) || ($lowerpeek === 'h' &&$this->comes('hsl', true))) { $oValue = $this->parseColorValue(); - } else if ($this->comes('url', true)) { + } else if ($lowerpeek === 'u' && $this->comes('url', true)) { $oValue = $this->parseURLValue(); - } else if ($this->comes("'") || $this->comes('"')) { - $oValue = $this->parseStringValue(); - } else if ($this->comes("progid:") && $this->oParserSettings->bLenientParsing) { + } else if ($lowerpeek === 'p' && $this->oParserSettings->bLenientParsing && $this->comes("progid:")) { $oValue = $this->parseMicrosoftFilter(); + } else if (is_numeric($peek) || ($this->comes('-.') && is_numeric($this->peek(1, 2))) || (($peek === '-' || $peek === '.') && is_numeric($this->peek(1, 1)))) { + $oValue = $this->parseNumericValue(); } else { $oValue = $this->parseIdentifier(true, false); } @@ -447,20 +569,35 @@ private function parsePrimitiveValue() { private function parseNumericValue($bForColor = false) { $sSize = ''; - if ($this->comes('-')) { - $sSize .= $this->consume('-'); - } - while (is_numeric($this->peek()) || $this->comes('.')) { - if ($this->comes('.')) { - $sSize .= $this->consume('.'); - } else { - $sSize .= $this->consume(1); + $bHasDot = false; + while (true) { + $peek = $this->peek(); + if ($peek === '') { + // End of file, this is weird + break; + } + if ($sSize === '' && $peek === '-') { + $sSize .= '-'; + $this->consumeUnsafe(1); + continue; } + if (!$bHasDot && $peek === '.') { + $bHasDot = true; + $sSize .= '.'; + $this->consumeUnsafe(1); + continue; + } + if (is_numeric($peek)) { + $sSize .= $peek; + $this->consumeUnsafe($peek); + continue; + } + break; } $sUnit = null; foreach ($this->aSizeUnits as $iLength => &$aValues) { - $sKey = strtolower($this->peek($iLength)); + $sKey = strtolower($this->peek($iLength)); // Length is always ascii if(array_key_exists($sKey, $aValues)) { if (($sUnit = $aValues[$sKey]) !== null) { $this->consume($iLength); @@ -474,7 +611,7 @@ private function parseNumericValue($bForColor = false) { private function parseColorValue() { $aColor = array(); if ($this->comes('#')) { - $this->consume('#'); + $this->consumeUnsafe(1); $sValue = $this->parseIdentifier(false); if ($this->strlen($sValue) === 3) { $sValue = $sValue[0] . $sValue[0] . $sValue[1] . $sValue[1] . $sValue[2] . $sValue[2]; @@ -529,8 +666,8 @@ private function identifierIs($sIdentifier, $sMatch) { } private function comes($sString, $bCaseInsensitive = false) { - $sPeek = $this->peek(strlen($sString)); - return ($sPeek == '') + $sPeek = $this->peek($this->strlen($sString)); + return ($sPeek === '') ? false : $this->streql($sPeek, $sString, $bCaseInsensitive); } @@ -540,6 +677,13 @@ private function peek($iLength = 1, $iOffset = 0) { if ($iOffset >= $this->iLength) { return ''; } + if ($iLength === 1) { + if ($this->aText !== null) { + return $this->aText[$iOffset]; + } else { + return $this->sText[$iOffset]; + } + } return $this->substr($iOffset, $iLength); } @@ -565,19 +709,32 @@ private function consume($mValue = 1) { } } - private function consumeExpression($mExpression) { - $aMatches = null; - if (preg_match($mExpression, $this->inputLeft(), $aMatches, PREG_OFFSET_CAPTURE) === 1) { - return $this->consume($aMatches[0][0]); + /** + * Consume characters without any safety checks. + * + * Make sure there are no newlines when giving integer value. + * + * NOTE: use only after peek() and comes()! + * + * @param int|string $mValue + * @return void + */ + private function consumeUnsafe($mValue) { + if (is_string($mValue)) { + $iLineCount = substr_count($mValue, "\n"); + $this->iLineNo += $iLineCount; + $this->iCurrentPosition += $this->strlen($mValue); + } else { + // Must not have newlines!!! + $this->iCurrentPosition += $mValue; } - throw new UnexpectedTokenException($mExpression, $this->peek(5), 'expression', $this->iLineNo); } private function consumeWhiteSpace() { $comments = array(); do { - while (preg_match('/\\s/isSu', $this->peek()) === 1) { - $this->consume(1); + while (preg_match('/^\\s/isSu', $this->peek(), $aMatches)) { + $this->consumeUnsafe($aMatches[0]); } if($this->oParserSettings->bLenientParsing) { try { @@ -585,7 +742,7 @@ private function consumeWhiteSpace() { } catch(UnexpectedTokenException $e) { // When we can’t find the end of a comment, we assume the document is finished. $this->iCurrentPosition = $this->iLength; - return; + return $comments; } } else { $oComment = $this->consumeComment(); @@ -601,26 +758,28 @@ private function consumeWhiteSpace() { * @return false|Comment */ private function consumeComment() { - $mComment = false; if ($this->comes('/*')) { $iLineNo = $this->iLineNo; - $this->consume(1); + $this->consumeUnsafe(2); $mComment = ''; - while (($char = $this->consume(1)) !== '') { - $mComment .= $char; - if ($this->comes('*/')) { - $this->consume(2); + while (true) { + $peek = $this->peek(); + if ($peek === '') { + if (!$this->oParserSettings->bLenientParsing) { + throw new UnexpectedTokenException('*/', '', 'search', $this->iLineNo); + } + break; + } + if ($peek === '*' && $this->comes('*/')) { + $this->consumeUnsafe(2); break; } + $this->consumeUnsafe($peek); + $mComment .= $peek; } + return new Comment($mComment, $iLineNo); } - - if ($mComment !== false) { - // We skip the * which was included in the comment. - return new Comment(substr($mComment, 1), $iLineNo); - } - - return $mComment; + return false; } private function isEnd() { @@ -651,14 +810,21 @@ private function consumeUntil($aEnd, $bIncludeEnd = false, $consumeEnd = false, throw new UnexpectedTokenException('One of ("'.implode('","', $aEnd).'")', $this->peek(5), 'search', $this->iLineNo); } - private function inputLeft() { - return $this->substr($this->iCurrentPosition, -1); - } - private function substr($iStart, $iLength) { - if ($iLength < 0) { - $iLength = $this->iLength - $iStart + $iLength; + if ($iLength <= 0 || $iStart >= $this->iLength) { + return ''; + } + if ($this->sTextLibrary === 'ascii') { + return substr($this->sText, $iStart, $iLength); } + if ($iLength > 100 || $iStart < 0 || !isset($this->aText)) { + if ($this->sTextLibrary === 'mb') { + return mb_substr($this->sText, $iStart, $iLength, 'utf-8'); + } else { + return iconv_substr($this->sText, $iStart, $iLength, 'utf-8'); + } + } + // Use faster substr emulation for short unicode lengths. if ($iStart + $iLength > $this->iLength) { $iLength = $this->iLength - $iStart; } @@ -672,8 +838,10 @@ private function substr($iStart, $iLength) { } private function strlen($sString) { - if ($this->oParserSettings->bMultibyteSupport) { - return mb_strlen($sString, $this->sCharset); + if ($this->sTextLibrary === 'mb') { + return mb_strlen($sString, 'utf-8'); + } else if ($this->sTextLibrary === 'iconv') { + return iconv_strlen($sString, 'utf-8'); } else { return strlen($sString); } @@ -688,40 +856,12 @@ private function streql($sString1, $sString2, $bCaseInsensitive = true) { } private function strtolower($sString) { - if ($this->oParserSettings->bMultibyteSupport) { - return mb_strtolower($sString, $this->sCharset); + if ($this->sTextLibrary === 'mb') { + return mb_strtolower($sString, 'utf-8'); } else { + // Iconv cannot lowercase strings, bad luck return strtolower($sString); } } - private function strsplit($sString) { - if ($this->oParserSettings->bMultibyteSupport) { - if ($this->streql($this->sCharset, 'utf-8')) { - return preg_split('//u', $sString, null, PREG_SPLIT_NO_EMPTY); - } else { - $iLength = mb_strlen($sString, $this->sCharset); - $aResult = array(); - for ($i = 0; $i < $iLength; ++$i) { - $aResult[] = mb_substr($sString, $i, 1, $this->sCharset); - } - return $aResult; - } - } else { - if($sString === '') { - return array(); - } else { - return str_split($sString); - } - } - } - - private function strpos($sString, $sNeedle, $iOffset) { - if ($this->oParserSettings->bMultibyteSupport) { - return mb_strpos($sString, $sNeedle, $iOffset, $this->sCharset); - } else { - return strpos($sString, $sNeedle, $iOffset); - } - } - } diff --git a/lib/Sabberworm/CSS/Property/Charset.php b/lib/Sabberworm/CSS/Property/Charset.php index 61c6ebc5..e0b29aa1 100644 --- a/lib/Sabberworm/CSS/Property/Charset.php +++ b/lib/Sabberworm/CSS/Property/Charset.php @@ -1,6 +1,7 @@ sCharset = $sCharset; + /** + * @param CSSString $oCharset + * @param int $iLineNo + */ + public function __construct(CSSString $oCharset, $iLineNo = 0) { + $this->oCharset = $oCharset; $this->iLineNo = $iLineNo; $this->aComments = array(); } @@ -28,12 +33,18 @@ public function getLineNo() { return $this->iLineNo; } - public function setCharset($sCharset) { - $this->sCharset = $sCharset; + /** + * @param CSSString $oCharset + */ + public function setCharset(CSSString $oCharset) { + $this->oCharset = $oCharset; } + /** + * @return CSSString + */ public function getCharset() { - return $this->sCharset; + return $this->oCharset; } public function __toString() { @@ -41,7 +52,7 @@ public function __toString() { } public function render(\Sabberworm\CSS\OutputFormat $oOutputFormat) { - return "@charset {$this->sCharset->render($oOutputFormat)};"; + return "@charset {$this->oCharset->render($oOutputFormat)};"; } public function atRuleName() { @@ -49,7 +60,7 @@ public function atRuleName() { } public function atRuleArgs() { - return $this->sCharset; + return $this->oCharset; } public function addComments(array $aComments) { diff --git a/lib/Sabberworm/CSS/RuleSet/DeclarationBlock.php b/lib/Sabberworm/CSS/RuleSet/DeclarationBlock.php index e18f5d82..9e32af4c 100644 --- a/lib/Sabberworm/CSS/RuleSet/DeclarationBlock.php +++ b/lib/Sabberworm/CSS/RuleSet/DeclarationBlock.php @@ -226,7 +226,11 @@ public function expandFontShorthand() { } foreach ($aValues as $mValue) { if (!$mValue instanceof Value) { - $mValue = mb_strtolower($mValue); + if (function_exists('mb_strtolower')) { + $mValue = mb_strtolower($mValue, 'utf-8'); + } else { + $mValue = strtolower($mValue); + } } if (in_array($mValue, array('normal', 'inherit'))) { foreach (array('font-style', 'font-weight', 'font-variant') as $sProperty) { @@ -300,7 +304,11 @@ public function expandBackgroundShorthand() { $iNumBgPos = 0; foreach ($aValues as $mValue) { if (!$mValue instanceof Value) { - $mValue = mb_strtolower($mValue); + if (function_exists('mb_strtolower')) { + $mValue = mb_strtolower($mValue, 'utf-8'); + } else { + $mValue = strtolower($mValue); + } } if ($mValue instanceof URL) { $aBgProperties['background-image'] = $mValue; @@ -369,7 +377,11 @@ public function expandListStyleShorthand() { } foreach ($aValues as $mValue) { if (!$mValue instanceof Value) { - $mValue = mb_strtolower($mValue); + if (function_exists('mb_strtolower')) { + $mValue = mb_strtolower($mValue, 'utf-8'); + } else { + $mValue = strtolower($mValue); + } } if ($mValue instanceof Url) { $aListProperties['list-style-image'] = $mValue; diff --git a/lib/Sabberworm/CSS/Value/CSSString.php b/lib/Sabberworm/CSS/Value/CSSString.php index b0700081..f466e7be 100644 --- a/lib/Sabberworm/CSS/Value/CSSString.php +++ b/lib/Sabberworm/CSS/Value/CSSString.php @@ -24,9 +24,40 @@ public function __toString() { } public function render(\Sabberworm\CSS\OutputFormat $oOutputFormat) { - $sString = addslashes($this->sString); - $sString = str_replace("\n", '\A', $sString); - return $oOutputFormat->getStringQuotingType() . $sString . $oOutputFormat->getStringQuotingType(); + $sQuote = $oOutputFormat->getStringQuotingType(); + $aString = preg_split('//u', $this->sString, null, PREG_SPLIT_NO_EMPTY); + $iLength = count($aString); + foreach ($aString as $i => $sChar) { + if (strlen($sChar) === 1) { + if ($sChar === $sQuote || $sChar === '\\') { + // Encode quoting related characters as hex values + } else { + $iOrd = ord($sChar); + if ($iOrd > 31 && $iOrd < 127) { + // Keep only human readable ascii characters + continue; + } + } + } + + $sHex = ''; + $sUtf32 = iconv('utf-8', 'utf-32le', $sChar); + $aBytes = str_split($sUtf32); + foreach (array_reverse($aBytes) as $sByte) { + $sHex .= str_pad(dechex(ord($sByte)), 2, '0', STR_PAD_LEFT); + } + $sHex = ltrim($sHex, '0'); + if ($i + 1 < $iLength && strlen($sHex) < 6) { + // Add space after incomplete unicode escape if there can be any confusion + $sNextChar = $aString[$i + 1]; + if (preg_match('/^[a-fA-F0-9\s]/u', $sNextChar)) { + $sHex .= ' '; + } + } + $aString[$i] = '\\' . $sHex; + } + + return $sQuote . implode($aString) . $sQuote; } } \ No newline at end of file diff --git a/tests/Sabberworm/CSS/ParserTest.php b/tests/Sabberworm/CSS/ParserTest.php index 6df3e872..10e855c5 100644 --- a/tests/Sabberworm/CSS/ParserTest.php +++ b/tests/Sabberworm/CSS/ParserTest.php @@ -89,40 +89,236 @@ function testUnicodeParsing() { } $aContentRules = $oRuleSet->getRules('content'); $aContents = $aContentRules[0]->getValues(); - $sString = $aContents[0][0]->__toString(); if ($sSelector == '.test-1') { - $this->assertSame('" "', $sString); + $this->assertSame(' ', $aContents[0][0]->getString()); + $this->assertSame('" "', $aContents[0][0]->__toString()); } if ($sSelector == '.test-2') { - $this->assertSame('"é"', $sString); + $this->assertSame('é', $aContents[0][0]->getString()); + $this->assertSame('"\e9"', $aContents[0][0]->__toString()); } if ($sSelector == '.test-3') { - $this->assertSame('" "', $sString); + $this->assertSame(' ', $aContents[0][0]->getString()); + $this->assertSame('" "', $aContents[0][0]->__toString()); } if ($sSelector == '.test-4') { - $this->assertSame('"𝄞"', $sString); + $this->assertSame('𝄞', $aContents[0][0]->getString()); + $this->assertSame('"\1d11e"', $aContents[0][0]->__toString()); } if ($sSelector == '.test-5') { - $this->assertSame('"水"', $sString); + $this->assertSame('水', $aContents[0][0]->getString()); + $this->assertSame('"\6c34"', $aContents[0][0]->__toString()); } if ($sSelector == '.test-6') { - $this->assertSame('"¥"', $sString); + $this->assertSame('¥', $aContents[0][0]->getString()); + $this->assertSame('"\a5"', $aContents[0][0]->__toString()); } if ($sSelector == '.test-7') { - $this->assertSame('"\A"', $sString); + $this->assertSame("\n", $aContents[0][0]->getString()); + $this->assertSame('"\a"', $aContents[0][0]->__toString()); } if ($sSelector == '.test-8') { - $this->assertSame('"\"\""', $sString); + $this->assertSame('""', $aContents[0][0]->getString()); + $this->assertSame('"\22\22"', $aContents[0][0]->__toString()); } if ($sSelector == '.test-9') { - $this->assertSame('"\"\\\'"', $sString); + $this->assertSame('"\'', $aContents[0][0]->getString()); + $this->assertSame('"\22\'"', $aContents[0][0]->__toString()); } if ($sSelector == '.test-10') { - $this->assertSame('"\\\'\\\\"', $sString); + $this->assertSame("'\\", $aContents[0][0]->getString()); + $this->assertSame('"\'\5c"', $aContents[0][0]->__toString()); } if ($sSelector == '.test-11') { - $this->assertSame('"test"', $sString); + $this->assertSame('test', $aContents[0][0]->getString()); + $this->assertSame('"test"', $aContents[0][0]->__toString()); } + if ($sSelector == '.test-12') { + $this->assertSame('test', $aContents[0][0]->getString()); + $this->assertSame('"test"', $aContents[0][0]->__toString()); + } + if ($sSelector == '.test-13') { + $this->assertSame('éo', $aContents[0][0]->getString()); + $this->assertSame('"\e9o"', $aContents[0][0]->__toString()); + } + if ($sSelector == '.test-14') { + $this->assertSame('éo', $aContents[0][0]->getString()); + $this->assertSame('"\e9o"', $aContents[0][0]->__toString()); + } + if ($sSelector == '.test-15') { + $this->assertSame('é o', $aContents[0][0]->getString()); + $this->assertSame('"\e9 o"', $aContents[0][0]->__toString()); + } + } + + $sExpected = $oDoc->render(); + $oDoc = $this->parsedStructureForFile('unicode', Settings::create()->withMultibyteSupport(true)); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('unicode', Settings::create()->withMultibyteSupport(false)); + $this->assertSame($sExpected, $oDoc->render()); + } + + function testCharsetConversionsLenient() { + $oDoc = $this->parsedStructureForFile('charset-utf-8'); + $sExpected = $oDoc->render(); + $oDoc = $this->parsedStructureForFile('charset-utf-8-bom'); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('-charset-iso-8859-2', Settings::create()->withDefaultCharset('iso-8859-2')); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-utf-16be'); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-utf-16le'); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-utf-32be'); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-utf-32le'); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-utf-8-bom', Settings::create()->withDefaultCharset('iso-8859-1')); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-utf-16be', Settings::create()->withDefaultCharset('iso-8859-1')); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-utf-16le', Settings::create()->withDefaultCharset('iso-8859-1')); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-utf-32be', Settings::create()->withDefaultCharset('iso-8859-1')); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-utf-32le', Settings::create()->withDefaultCharset('iso-8859-1')); + $this->assertSame($sExpected, $oDoc->render()); + + $oDoc = $this->parsedStructureForFile('charset-utf-8-declared'); + $sExpected = $oDoc->render(); + $oDoc = $this->parsedStructureForFile('charset-utf-8-bom-misdeclared'); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-utf-16be-misdeclared'); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-utf-16le-misdeclared'); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-utf-32be-misdeclared'); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-utf-32le-misdeclared'); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-iso-8859-2-declared'); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-utf-8-bom-misdeclared', Settings::create()->withDefaultCharset('iso-8859-1')); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-utf-16be-misdeclared', Settings::create()->withDefaultCharset('iso-8859-1')); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-utf-16le-misdeclared', Settings::create()->withDefaultCharset('iso-8859-1')); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-utf-32be-misdeclared', Settings::create()->withDefaultCharset('iso-8859-1')); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-utf-32le-misdeclared', Settings::create()->withDefaultCharset('iso-8859-1')); + $this->assertSame($sExpected, $oDoc->render()); + $oDoc = $this->parsedStructureForFile('charset-iso-8859-2-declared', Settings::create()->withDefaultCharset('iso-8859-1')); + $this->assertSame($sExpected, $oDoc->render()); + } + + function testCharsetConversionsStrict() { + $this->parsedStructureForFile('charset-utf-8', Settings::create()->withLenientParsing(false)); + $this->parsedStructureForFile('charset-utf-8-bom', Settings::create()->withLenientParsing(false)); + $this->parsedStructureForFile('charset-utf-8-declared', Settings::create()->withLenientParsing(false)); + try { + $this->parsedStructureForFile('charset-utf-8-bom-misdeclared', Settings::create()->withLenientParsing(false)); + $this->fail('Exception expected when @charset does not match BOM'); + } catch (UnexpectedTokenException $e) { + $this->assertSame('@charset value does not match detected value [line no: 1]', $e->getMessage()); + } + + $this->parsedStructureForFile('charset-utf-16be', Settings::create()->withLenientParsing(false)); + $this->parsedStructureForFile('charset-utf-16be-declared', Settings::create()->withLenientParsing(false)); + try { + $this->parsedStructureForFile('charset-utf-16be-misdeclared', Settings::create()->withLenientParsing(false)); + $this->fail('Exception expected when @charset does not match BOM'); + } catch (UnexpectedTokenException $e) { + $this->assertSame('@charset value does not match detected value [line no: 1]', $e->getMessage()); + } + + $this->parsedStructureForFile('charset-utf-16le', Settings::create()->withLenientParsing(false)); + $this->parsedStructureForFile('charset-utf-16le-declared', Settings::create()->withLenientParsing(false)); + try { + $this->parsedStructureForFile('charset-utf-16le-misdeclared', Settings::create()->withLenientParsing(false)); + $this->fail('Exception expected when @charset does not match BOM'); + } catch (UnexpectedTokenException $e) { + $this->assertSame('@charset value does not match detected value [line no: 1]', $e->getMessage()); + } + + $this->parsedStructureForFile('charset-utf-32be', Settings::create()->withLenientParsing(false)); + $this->parsedStructureForFile('charset-utf-32be-declared', Settings::create()->withLenientParsing(false)); + try { + $this->parsedStructureForFile('charset-utf-32be-misdeclared', Settings::create()->withLenientParsing(false)); + $this->fail('Exception expected when @charset does not match BOM'); + } catch (UnexpectedTokenException $e) { + $this->assertSame('@charset value does not match detected value [line no: 1]', $e->getMessage()); + } + + $this->parsedStructureForFile('charset-utf-32le', Settings::create()->withLenientParsing(false)); + $this->parsedStructureForFile('charset-utf-32le-declared', Settings::create()->withLenientParsing(false)); + try { + $this->parsedStructureForFile('charset-utf-32le-misdeclared', Settings::create()->withLenientParsing(false)); + $this->fail('Exception expected when @charset does not match BOM in non-lenient mode'); + } catch (UnexpectedTokenException $e) { + $this->assertSame('@charset value does not match detected value [line no: 1]', $e->getMessage()); + } + } + + function testOriginalCharset() { + $oParser = $this->parserForFile('charset-utf-8'); + $this->assertSame('utf-8', $oParser->getOriginalCharset()); + + $oParser = $this->parserForFile('charset-utf-8-declared'); + $this->assertSame('utf-8', $oParser->getOriginalCharset()); + $oParser = $this->parserForFile('charset-utf-8-declared', Settings::create()->withDefaultCharset('iso-8859-1')); + $this->assertSame('utf-8', $oParser->getOriginalCharset()); + + $oParser = $this->parserForFile('charset-utf-16be'); + $this->assertSame('utf-16be', $oParser->getOriginalCharset()); + $oParser = $this->parserForFile('charset-utf-16be', Settings::create()->withDefaultCharset('iso-8859-1')); + $this->assertSame('utf-16be', $oParser->getOriginalCharset()); + + $oParser = $this->parserForFile('charset-utf-16le'); + $this->assertSame('utf-16le', $oParser->getOriginalCharset()); + $oParser = $this->parserForFile('charset-utf-16le', Settings::create()->withDefaultCharset('iso-8859-1')); + $this->assertSame('utf-16le', $oParser->getOriginalCharset()); + + $oParser = $this->parserForFile('charset-utf-32be'); + $this->assertSame('utf-32be', $oParser->getOriginalCharset()); + $oParser = $this->parserForFile('charset-utf-32be', Settings::create()->withDefaultCharset('iso-8859-1')); + $this->assertSame('utf-32be', $oParser->getOriginalCharset()); + + $oParser = $this->parserForFile('charset-utf-32le'); + $this->assertSame('utf-32le', $oParser->getOriginalCharset()); + $oParser = $this->parserForFile('charset-utf-32le', Settings::create()->withDefaultCharset('iso-8859-1')); + $this->assertSame('utf-32le', $oParser->getOriginalCharset()); + + $oParser = $this->parserForFile('charset-utf-8-bom'); + $this->assertSame('utf-8', $oParser->getOriginalCharset()); + $oParser = $this->parserForFile('charset-utf-8-bom', Settings::create()->withDefaultCharset('iso-8859-1')); + $this->assertSame('utf-8', $oParser->getOriginalCharset()); + + $oParser = $this->parserForFile('-charset-iso-8859-2', Settings::create()->withDefaultCharset('iso-8859-2')); + $this->assertSame('iso-8859-2', $oParser->getOriginalCharset()); + + $oParser = $this->parserForFile('charset-iso-8859-2-declared'); + $this->assertSame('iso-8859-2', $oParser->getOriginalCharset()); + $oParser = $this->parserForFile('charset-iso-8859-2-declared', Settings::create()->withDefaultCharset('iso-8859-1')); + $this->assertSame('iso-8859-2', $oParser->getOriginalCharset()); + } + + function testMultipleCharsets() { + $sDoubleCharset = "@charset 'utf-8'; +@charset 'utf-16'; +.test {}"; + $sExpected = "@charset \"utf-8\"; +.test {}"; + $parser = new Parser($sDoubleCharset); + $doc = $parser->parse(); + $this->assertSame($sExpected, $doc->render()); + + try { + $parser = new Parser($sDoubleCharset, Settings::create()->withLenientParsing(false)); + $doc = $parser->parse(); + $this->fail('Exception expected when @charset is repeated in non-lenient mode'); + } catch (UnexpectedTokenException $e) { + $this->assertSame('@charset value does not match detected value [line no: 2]', $e->getMessage()); } } @@ -262,7 +458,7 @@ function testSlashedValues() { function testFunctionSyntax() { $oDoc = $this->parsedStructureForFile('functions'); $sExpected = 'div.main {background-image: linear-gradient(#000,#fff);} -.collapser::before, .collapser::-moz-before, .collapser::-webkit-before {content: "»";font-size: 1.2em;margin-right: .2em;-moz-transition-property: -moz-transform;-moz-transition-duration: .2s;-moz-transform-origin: center 60%;} +.collapser::before, .collapser::-moz-before, .collapser::-webkit-before {content: "\bb";font-size: 1.2em;margin-right: .2em;-moz-transition-property: -moz-transform;-moz-transition-duration: .2s;-moz-transform-origin: center 60%;} .collapser.expanded::before, .collapser.expanded::-moz-before, .collapser.expanded::-webkit-before {-moz-transform: rotate(90deg);} .collapser + * {height: 0;overflow: hidden;-moz-transition-property: height;-moz-transition-duration: .3s;} .collapser.expanded + * {height: auto;}'; @@ -355,7 +551,7 @@ function testListValueRemoval() { } /** - * @expectedException Sabberworm\CSS\Parsing\OutputException + * @expectedException \Sabberworm\CSS\Parsing\OutputException */ function testSelectorRemoval() { $oDoc = $this->parsedStructureForFile('1readme'); @@ -419,22 +615,38 @@ function testCharsetLenient2() { } /** - * @expectedException Sabberworm\CSS\Parsing\UnexpectedTokenException + * @expectedException \Sabberworm\CSS\Parsing\UnexpectedTokenException */ function testCharsetFailure1() { $this->parsedStructureForFile('-charset-after-rule', Settings::create()->withLenientParsing(false)); } /** - * @expectedException Sabberworm\CSS\Parsing\UnexpectedTokenException + * @expectedException \Sabberworm\CSS\Parsing\UnexpectedTokenException */ function testCharsetFailure2() { $this->parsedStructureForFile('-charset-in-block', Settings::create()->withLenientParsing(false)); } - function parsedStructureForFile($sFileName, $oSettings = null) { + /** + * Get parser instance for the given file + * @param $sFileName + * @param Settings $oSettings + * @return Parser + */ + protected function parserForFile($sFileName, $oSettings = null) { $sFile = dirname(__FILE__) . '/../../files' . DIRECTORY_SEPARATOR . "$sFileName.css"; - $oParser = new Parser(file_get_contents($sFile), $oSettings); + return new Parser(file_get_contents($sFile), $oSettings); + } + + /** + * Get parser document instance for the given file + * @param string $sFileName + * @param Settings $oSettings + * @return CSSList\Document + */ + protected function parsedStructureForFile($sFileName, $oSettings = null) { + $oParser = $this->parserForFile($sFileName, $oSettings); return $oParser->parse(); } @@ -507,7 +719,7 @@ function testUnexpectedTokenExceptionLineNo() { } /** - * @expectedException Sabberworm\CSS\Parsing\UnexpectedTokenException + * @expectedException \Sabberworm\CSS\Parsing\UnexpectedTokenException */ function testIeHacksStrictParsing() { // We can't strictly parse IE hacks. @@ -516,7 +728,40 @@ function testIeHacksStrictParsing() { function testIeHacksParsing() { $oDoc = $this->parsedStructureForFile('ie-hacks', Settings::create()->withLenientParsing(true)); - $sExpected = 'p {padding-right: .75rem \9;background-image: none \9;color: red \9\0;background-color: red \9\0;background-color: red \9\0 !important;content: "red \0";content: "red઼";}'; + foreach ($oDoc->getAllRuleSets() as $oRuleSet) { + if (!$oRuleSet instanceof DeclarationBlock) { + continue; + } + $sSelector = $oRuleSet->getSelectors(); + $sSelector = $sSelector[0]->getSelector(); + if ($sSelector === 'p') { + $aRule = $oRuleSet->getRules('padding-right'); + $this->assertCount(1, $aRule); + $this->assertSame('.75rem', (string)$aRule[0]->getValue()); + + $aRule = $oRuleSet->getRules('background-image'); + $this->assertCount(1, $aRule); + $this->assertSame('none', (string)$aRule[0]->getValue()); + + $aRule = $oRuleSet->getRules('color'); + $this->assertCount(1, $aRule); + $this->assertSame('red', (string)$aRule[0]->getValue()); + + $aRule = $oRuleSet->getRules('background-color'); + $this->assertCount(2, $aRule); + $this->assertSame('red', (string)$aRule[0]->getValue()); + $this->assertSame('red', (string)$aRule[1]->getValue()); + + $aRule = $oRuleSet->getRules('content'); + $this->assertCount(2, $aRule); + $this->assertSame('"red \9"', (string)$aRule[0]->getValue()); + $this->assertSame('red ' . chr(9), $aRule[0]->getValue()->getString()); + $this->assertSame('"red\abc"', (string)$aRule[1]->getValue()); + $this->assertSame('red' . urldecode('%E0%AA%BC'), $aRule[1]->getValue()->getString()); + } + } + + $sExpected = 'p {padding-right: .75rem \9;background-image: none \9;color: red \9\0;background-color: red \9\0;background-color: red \9\0 !important;content: "red \9";content: "red\abc";}'; $this->assertEquals($sExpected, $oDoc->render()); } @@ -584,7 +829,7 @@ function testTopLevelCommentExtracting() { } /** - * @expectedException Sabberworm\CSS\Parsing\UnexpectedTokenException + * @expectedException \Sabberworm\CSS\Parsing\UnexpectedTokenException */ function testMicrosoftFilterStrictParsing() { $oDoc = $this->parsedStructureForFile('ms-filter', Settings::create()->beStrict()); diff --git a/tests/files/-charset-iso-8859-2.css b/tests/files/-charset-iso-8859-2.css new file mode 100644 index 00000000..9e7d324f --- /dev/null +++ b/tests/files/-charset-iso-8859-2.css @@ -0,0 +1,4 @@ +.test { + /* lut konek */ + content: ""; +} diff --git a/tests/files/charset-iso-8859-2-declared.css b/tests/files/charset-iso-8859-2-declared.css new file mode 100644 index 00000000..bf2b8360 --- /dev/null +++ b/tests/files/charset-iso-8859-2-declared.css @@ -0,0 +1,6 @@ +@charset "iso-8859-2"; + +.test { + /* lut konek */ + content: ""; +} diff --git a/tests/files/charset-utf-16be-declared.css b/tests/files/charset-utf-16be-declared.css new file mode 100644 index 00000000..349164b7 Binary files /dev/null and b/tests/files/charset-utf-16be-declared.css differ diff --git a/tests/files/charset-utf-16be-misdeclared.css b/tests/files/charset-utf-16be-misdeclared.css new file mode 100644 index 00000000..f18b484b Binary files /dev/null and b/tests/files/charset-utf-16be-misdeclared.css differ diff --git a/tests/files/charset-utf-16be.css b/tests/files/charset-utf-16be.css new file mode 100644 index 00000000..6225ef93 Binary files /dev/null and b/tests/files/charset-utf-16be.css differ diff --git a/tests/files/charset-utf-16le-declared.css b/tests/files/charset-utf-16le-declared.css new file mode 100644 index 00000000..5c1bc943 Binary files /dev/null and b/tests/files/charset-utf-16le-declared.css differ diff --git a/tests/files/charset-utf-16le-misdeclared.css b/tests/files/charset-utf-16le-misdeclared.css new file mode 100644 index 00000000..1c7deecc Binary files /dev/null and b/tests/files/charset-utf-16le-misdeclared.css differ diff --git a/tests/files/charset-utf-16le.css b/tests/files/charset-utf-16le.css new file mode 100644 index 00000000..4b5077a5 Binary files /dev/null and b/tests/files/charset-utf-16le.css differ diff --git a/tests/files/charset-utf-32be-declared.css b/tests/files/charset-utf-32be-declared.css new file mode 100644 index 00000000..9165e19e Binary files /dev/null and b/tests/files/charset-utf-32be-declared.css differ diff --git a/tests/files/charset-utf-32be-misdeclared.css b/tests/files/charset-utf-32be-misdeclared.css new file mode 100644 index 00000000..16dd77ae Binary files /dev/null and b/tests/files/charset-utf-32be-misdeclared.css differ diff --git a/tests/files/charset-utf-32be.css b/tests/files/charset-utf-32be.css new file mode 100644 index 00000000..71e2664b Binary files /dev/null and b/tests/files/charset-utf-32be.css differ diff --git a/tests/files/charset-utf-32le-declared.css b/tests/files/charset-utf-32le-declared.css new file mode 100644 index 00000000..9ffb4e8d Binary files /dev/null and b/tests/files/charset-utf-32le-declared.css differ diff --git a/tests/files/charset-utf-32le-misdeclared.css b/tests/files/charset-utf-32le-misdeclared.css new file mode 100644 index 00000000..3e1ce26f Binary files /dev/null and b/tests/files/charset-utf-32le-misdeclared.css differ diff --git a/tests/files/charset-utf-32le.css b/tests/files/charset-utf-32le.css new file mode 100644 index 00000000..326cfdfb Binary files /dev/null and b/tests/files/charset-utf-32le.css differ diff --git a/tests/files/charset-utf-8-bom-misdeclared.css b/tests/files/charset-utf-8-bom-misdeclared.css new file mode 100644 index 00000000..b7819651 --- /dev/null +++ b/tests/files/charset-utf-8-bom-misdeclared.css @@ -0,0 +1,6 @@ +@charset "iso-8859-2"; + +.test { + /* Žlutý koníček */ + content: "Ž"; +} diff --git a/tests/files/charset-utf-8-bom.css b/tests/files/charset-utf-8-bom.css new file mode 100644 index 00000000..775af19b --- /dev/null +++ b/tests/files/charset-utf-8-bom.css @@ -0,0 +1,4 @@ +.test { + /* Žlutý koníček */ + content: "Ž"; +} diff --git a/tests/files/charset-utf-8-declared.css b/tests/files/charset-utf-8-declared.css new file mode 100644 index 00000000..646002eb --- /dev/null +++ b/tests/files/charset-utf-8-declared.css @@ -0,0 +1,6 @@ +@charset "utf-8"; + +.test { + /* Žlutý koníček */ + content: "Ž"; +} diff --git a/tests/files/charset-utf-8.css b/tests/files/charset-utf-8.css new file mode 100644 index 00000000..6574534a --- /dev/null +++ b/tests/files/charset-utf-8.css @@ -0,0 +1,4 @@ +.test { + /* Žlutý koníček */ + content: "Ž"; +} diff --git a/tests/files/unicode.css b/tests/files/unicode.css index 24823200..4f8d654a 100644 --- a/tests/files/unicode.css +++ b/tests/files/unicode.css @@ -2,11 +2,14 @@ .test-2 { content: "\E9"; } /* Same as "é" */ .test-3 { content: "\0020"; } /* Same as " " */ .test-5 { content: "\6C34" } /* Same as "水" */ +.test-4 { content: "\1D11E" } /* Beyond the Basic Multilingual Plane */ .test-6 { content: "\00A5" } /* Same as "¥" */ .test-7 { content: '\a' } /* Same as "\A" (Newline) */ .test-8 { content: "\"\22" } /* Same as "\"\"" */ .test-9 { content: "\"\27" } /* Same as ""\"\'"" */ .test-10 { content: "\'\\" } /* Same as "'\" */ .test-11 { content: "\test" } /* Same as "test" */ - -.test-4 { content: "\1D11E" } /* Beyond the Basic Multilingual Plane */ +.test-12 { content: "te\0st" } /* Same as "test" without the null character */ +.test-13 { content: "\e9o" } /* Same as "éo" */ +.test-14 { content: "\e9 o" } /* Same as "éo" */ +.test-15 { content: "\e9 o" } /* Same as "é o" */