Skip to content

Commit 376023a

Browse files
committed
Extract StringUnescaper from ConstExprParser
1 parent 5e2f2e0 commit 376023a

File tree

3 files changed

+105
-92
lines changed

3 files changed

+105
-92
lines changed

src/Ast/ConstExpr/QuoteAwareConstExprStringNode.php

+8-5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
use function sprintf;
1212
use function str_pad;
1313
use function strlen;
14+
use const STR_PAD_LEFT;
1415

1516
class QuoteAwareConstExprStringNode implements ConstExprNode
1617
{
@@ -47,9 +48,10 @@ public function __toString(): string
4748
return sprintf('"%s"', $this->escapeDoubleQuotedString());
4849
}
4950

50-
private function escapeDoubleQuotedString() {
51+
private function escapeDoubleQuotedString()
52+
{
5153
$quote = '"';
52-
$escaped = addcslashes($this->value, "\n\r\t\f\v$" . $quote . "\\");
54+
$escaped = addcslashes($this->value, "\n\r\t\f\v$" . $quote . '\\');
5355

5456
// Escape control characters and non-UTF-8 characters.
5557
// Regex based on https://stackoverflow.com/a/11709412/385378.
@@ -68,10 +70,11 @@ private function escapeDoubleQuotedString() {
6870
| (?<=[\xF0-\xF4])[\x80-\xBF](?![\x80-\xBF]{2}) # Short 4 byte sequence
6971
| (?<=[\xF0-\xF4][\x80-\xBF])[\x80-\xBF](?![\x80-\xBF]) # Short 4 byte sequence (2)
7072
)/x';
71-
return preg_replace_callback($regex, function ($matches) {
73+
return preg_replace_callback($regex, static function ($matches) {
7274
assert(strlen($matches[0]) === 1);
73-
$hex = dechex(ord($matches[0]));;
74-
return '\\x' . str_pad($hex, 2, '0', \STR_PAD_LEFT);
75+
$hex = dechex(ord($matches[0]));
76+
77+
return '\\x' . str_pad($hex, 2, '0', STR_PAD_LEFT);
7578
}, $escaped);
7679
}
7780

src/Parser/ConstExprParser.php

+1-87
Original file line numberDiff line numberDiff line change
@@ -4,27 +4,12 @@
44

55
use PHPStan\PhpDocParser\Ast;
66
use PHPStan\PhpDocParser\Lexer\Lexer;
7-
use function chr;
8-
use function hexdec;
9-
use function octdec;
10-
use function preg_replace_callback;
11-
use function str_replace;
127
use function strtolower;
138
use function substr;
149

1510
class ConstExprParser
1611
{
1712

18-
private const REPLACEMENTS = [
19-
'\\' => '\\',
20-
'n' => "\n",
21-
'r' => "\r",
22-
't' => "\t",
23-
'f' => "\f",
24-
'v' => "\v",
25-
'e' => "\x1B",
26-
];
27-
2813
/** @var bool */
2914
private $unescapeStrings;
3015

@@ -56,7 +41,7 @@ public function parse(TokenIterator $tokens, bool $trimStrings = false): Ast\Con
5641
$type = $tokens->currentTokenType();
5742
if ($trimStrings) {
5843
if ($this->unescapeStrings) {
59-
$value = self::unescapeString($value);
44+
$value = StringUnescaper::unescapeString($value);
6045
} else {
6146
$value = substr($value, 1, -1);
6247
}
@@ -171,75 +156,4 @@ private function parseArrayItem(TokenIterator $tokens): Ast\ConstExpr\ConstExprA
171156
return new Ast\ConstExpr\ConstExprArrayItemNode($key, $value);
172157
}
173158

174-
private static function unescapeString(string $string): string
175-
{
176-
$quote = $string[0];
177-
178-
if ($quote === '\'') {
179-
return str_replace(
180-
['\\\\', '\\\''],
181-
['\\', '\''],
182-
substr($string, 1, -1)
183-
);
184-
}
185-
186-
return self::parseEscapeSequences(substr($string, 1, -1), '"');
187-
}
188-
189-
/**
190-
* Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L90-L130
191-
*/
192-
private static function parseEscapeSequences(string $str, string $quote): string
193-
{
194-
$str = str_replace('\\' . $quote, $quote, $str);
195-
196-
return preg_replace_callback(
197-
'~\\\\([\\\\nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3}|u\{([0-9a-fA-F]+)\})~',
198-
static function ($matches) {
199-
$str = $matches[1];
200-
201-
if (isset(self::REPLACEMENTS[$str])) {
202-
return self::REPLACEMENTS[$str];
203-
}
204-
if ($str[0] === 'x' || $str[0] === 'X') {
205-
return chr(hexdec(substr($str, 1)));
206-
}
207-
if ($str[0] === 'u') {
208-
return self::codePointToUtf8(hexdec($matches[2]));
209-
}
210-
211-
return chr(octdec($str));
212-
},
213-
$str
214-
);
215-
}
216-
217-
/**
218-
* Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L132-L154
219-
*/
220-
private static function codePointToUtf8(int $num): string
221-
{
222-
if ($num <= 0x7F) {
223-
return chr($num);
224-
}
225-
if ($num <= 0x7FF) {
226-
return chr(($num >> 6) + 0xC0)
227-
. chr(($num & 0x3F) + 0x80);
228-
}
229-
if ($num <= 0xFFFF) {
230-
return chr(($num >> 12) + 0xE0)
231-
. chr((($num >> 6) & 0x3F) + 0x80)
232-
. chr(($num & 0x3F) + 0x80);
233-
}
234-
if ($num <= 0x1FFFFF) {
235-
return chr(($num >> 18) + 0xF0)
236-
. chr((($num >> 12) & 0x3F) + 0x80)
237-
. chr((($num >> 6) & 0x3F) + 0x80)
238-
. chr(($num & 0x3F) + 0x80);
239-
}
240-
241-
// Invalid UTF-8 codepoint escape sequence: Codepoint too large
242-
return "\xef\xbf\xbd";
243-
}
244-
245159
}

src/Parser/StringUnescaper.php

+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
<?php declare(strict_types = 1);
2+
3+
namespace PHPStan\PhpDocParser\Parser;
4+
5+
use function chr;
6+
use function hexdec;
7+
use function octdec;
8+
use function preg_replace_callback;
9+
use function str_replace;
10+
use function substr;
11+
12+
class StringUnescaper
13+
{
14+
15+
private const REPLACEMENTS = [
16+
'\\' => '\\',
17+
'n' => "\n",
18+
'r' => "\r",
19+
't' => "\t",
20+
'f' => "\f",
21+
'v' => "\v",
22+
'e' => "\x1B",
23+
];
24+
25+
public static function unescapeString(string $string): string
26+
{
27+
$quote = $string[0];
28+
29+
if ($quote === '\'') {
30+
return str_replace(
31+
['\\\\', '\\\''],
32+
['\\', '\''],
33+
substr($string, 1, -1)
34+
);
35+
}
36+
37+
return self::parseEscapeSequences(substr($string, 1, -1), '"');
38+
}
39+
40+
/**
41+
* Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L90-L130
42+
*/
43+
private static function parseEscapeSequences(string $str, string $quote): string
44+
{
45+
$str = str_replace('\\' . $quote, $quote, $str);
46+
47+
return preg_replace_callback(
48+
'~\\\\([\\\\nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3}|u\{([0-9a-fA-F]+)\})~',
49+
static function ($matches) {
50+
$str = $matches[1];
51+
52+
if (isset(self::REPLACEMENTS[$str])) {
53+
return self::REPLACEMENTS[$str];
54+
}
55+
if ($str[0] === 'x' || $str[0] === 'X') {
56+
return chr(hexdec(substr($str, 1)));
57+
}
58+
if ($str[0] === 'u') {
59+
return self::codePointToUtf8(hexdec($matches[2]));
60+
}
61+
62+
return chr(octdec($str));
63+
},
64+
$str
65+
);
66+
}
67+
68+
/**
69+
* Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L132-L154
70+
*/
71+
private static function codePointToUtf8(int $num): string
72+
{
73+
if ($num <= 0x7F) {
74+
return chr($num);
75+
}
76+
if ($num <= 0x7FF) {
77+
return chr(($num >> 6) + 0xC0)
78+
. chr(($num & 0x3F) + 0x80);
79+
}
80+
if ($num <= 0xFFFF) {
81+
return chr(($num >> 12) + 0xE0)
82+
. chr((($num >> 6) & 0x3F) + 0x80)
83+
. chr(($num & 0x3F) + 0x80);
84+
}
85+
if ($num <= 0x1FFFFF) {
86+
return chr(($num >> 18) + 0xF0)
87+
. chr((($num >> 12) & 0x3F) + 0x80)
88+
. chr((($num >> 6) & 0x3F) + 0x80)
89+
. chr(($num & 0x3F) + 0x80);
90+
}
91+
92+
// Invalid UTF-8 codepoint escape sequence: Codepoint too large
93+
return "\xef\xbf\xbd";
94+
}
95+
96+
}

0 commit comments

Comments
 (0)