From 3776332be20dfa4d1c6ace3b1fdb136e5a0a97d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Vo=C5=99=C3=AD=C5=A1ek?= Date: Wed, 26 Mar 2025 15:28:04 +0100 Subject: [PATCH 1/4] add tests for bug2792 --- .../Analyser/nsrt/preg_match_shapes.php | 60 ++++++++++++++++++- .../nsrt/preg_replace_callback_shapes.php | 11 ++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 5e87970c8e..c69e8abb42 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -1011,7 +1011,65 @@ function bug12749f(string $str): void } } -function bug12397(string $string) : array { +function bug12397(string $string): void { $m = preg_match('#\b([A-Z]{2,})-(\d+)#', $string, $match); assertType('list{0?: string, 1?: non-falsy-string, 2?: numeric-string}', $match); } + +function bug2792(string $string): void { + if (preg_match('~a\Kb~', $string, $match) === 1) { + assertType('array{\'b\'}', $match); + } + + if (preg_match('~a\K~', $string, $match) === 1) { + assertType('array{\'\'}', $match); + } + + if (preg_match('~a\K.+~', $string, $match) === 1) { + assertType('array{non-empty-string}', $match); + } + + if (preg_match('~a\K.*~', $string, $match) === 1) { + assertType('array{string}', $match); + } + + if (preg_match('~a\K(.+)~', $string, $match) === 1) { + assertType('array{non-empty-string, non-empty-string}', $match); + } + + if (preg_match('~a\K(.*)~', $string, $match) === 1) { + assertType('array{string, string}', $match); + } + + if (preg_match('~a\K(.+?)~', $string, $match) === 1) { + assertType('array{non-empty-string, non-empty-string}', $match); + } + + if (preg_match('~a\K(.*?)~', $string, $match) === 1) { + assertType('array{string, string}', $match); + } + + if (preg_match('~a\K(?=.+)~', $string, $match) === 1) { + assertType('array{\'\'}', $match); + } + + if (preg_match('~a\K(?=.*)~', $string, $match) === 1) { + assertType('array{\'\'}', $match); + } + + if (preg_match('~a(?:x\Kb|c)~', $string, $match) === 1) { + assertType('array{\'ac\'|\'b\'}', $match); + } + + if (preg_match('~a(?:c|x\Kb)~', $string, $match) === 1) { + assertType('array{\'ac\'|\'b\'}', $match); + } + + if (preg_match('~a(y|(?:x\Kb|c))d~', $string, $match) === 1) { + assertType('array{\'acd\'|\'ayd\'|\'bd\', \'c\'|\'xb\'|\'y\'}', $match); + } + + if (preg_match('~a((?:c|x\Kb)|y)d~', $string, $match) === 1) { + assertType('array{\'acd\'|\'ayd\'|\'bd\', \'c\'|\'xb\'|\'y\'}', $match); + } +} diff --git a/tests/PHPStan/Analyser/nsrt/preg_replace_callback_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_replace_callback_shapes.php index c6ba4824c2..596d74b450 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_replace_callback_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_replace_callback_shapes.php @@ -45,3 +45,14 @@ function ($matches) { PREG_OFFSET_CAPTURE|PREG_UNMATCHED_AS_NULL ); }; + +function bug2792(string $string) : void { + preg_replace_callback( + '~\'(?:[^\']+|\'\')*+\'\K|\[(\w*)\]~', + function ($matches) { + assertType("array{0: string, 1?: string}", $matches); + return ''; + }, + $string + ); +} From fc34a342bb8ea032d57c345a8100824adc42e71d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Vo=C5=99=C3=AD=C5=A1ek?= Date: Sat, 5 Apr 2025 23:29:20 +0200 Subject: [PATCH 2/4] impl --- src/Type/Regex/RegexGroupParser.php | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/Type/Regex/RegexGroupParser.php b/src/Type/Regex/RegexGroupParser.php index 0383ea4c5a..2f90e08c22 100644 --- a/src/Type/Regex/RegexGroupParser.php +++ b/src/Type/Regex/RegexGroupParser.php @@ -110,7 +110,7 @@ public function parseGroups(string $regex): ?RegexAstWalkResult RegexGroupWalkResult::createEmpty(), ); - if (!$subjectAsGroupResult->mightContainEmptyStringLiteral()) { + if (!$subjectAsGroupResult->mightContainEmptyStringLiteral() && !$this->containsEscapeK($ast)) { // we could handle numeric-string, in case we know the regex is delimited by ^ and $ if ($subjectAsGroupResult->isNonFalsy()->yes()) { $astWalkResult = $astWalkResult->withSubjectBaseType( @@ -171,6 +171,21 @@ private function updateCapturingAstAddEmptyToken(TreeNode $ast): void $ast->setChildren([$emptyAlternationAst]); } + private function containsEscapeK(TreeNode $ast): bool + { + if ($ast->getId() === 'token' && $ast->getValueToken() === 'match_point_reset') { + return true; + } + + foreach ($ast->getChildren() as $child) { + if ($this->containsEscapeK($child)) { + return true; + } + } + + return false; + } + private function walkRegexAst( TreeNode $ast, ?RegexAlternation $alternation, From a6b65036e3ef5022b0c08adeeb716504023d1eea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Vo=C5=99=C3=AD=C5=A1ek?= Date: Sat, 5 Apr 2025 23:36:00 +0200 Subject: [PATCH 3/4] adjust --- .../Analyser/nsrt/preg_match_shapes.php | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index c69e8abb42..6a500b9f78 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -1018,15 +1018,15 @@ function bug12397(string $string): void { function bug2792(string $string): void { if (preg_match('~a\Kb~', $string, $match) === 1) { - assertType('array{\'b\'}', $match); + assertType('array{string}', $match); // could be array{'b'} } if (preg_match('~a\K~', $string, $match) === 1) { - assertType('array{\'\'}', $match); + assertType('array{string}', $match); // could be array{''} } if (preg_match('~a\K.+~', $string, $match) === 1) { - assertType('array{non-empty-string}', $match); + assertType('array{string}', $match); // could be array{non-empty-string} } if (preg_match('~a\K.*~', $string, $match) === 1) { @@ -1034,7 +1034,7 @@ function bug2792(string $string): void { } if (preg_match('~a\K(.+)~', $string, $match) === 1) { - assertType('array{non-empty-string, non-empty-string}', $match); + assertType('array{string, non-empty-string}', $match); // could be array{non-empty-string, non-empty-string} } if (preg_match('~a\K(.*)~', $string, $match) === 1) { @@ -1042,7 +1042,7 @@ function bug2792(string $string): void { } if (preg_match('~a\K(.+?)~', $string, $match) === 1) { - assertType('array{non-empty-string, non-empty-string}', $match); + assertType('array{string, non-empty-string}', $match); // could be array{non-empty-string, non-empty-string} } if (preg_match('~a\K(.*?)~', $string, $match) === 1) { @@ -1050,26 +1050,26 @@ function bug2792(string $string): void { } if (preg_match('~a\K(?=.+)~', $string, $match) === 1) { - assertType('array{\'\'}', $match); + assertType('array{string}', $match); // could be array{''} } if (preg_match('~a\K(?=.*)~', $string, $match) === 1) { - assertType('array{\'\'}', $match); + assertType('array{string}', $match); // could be array{''} } if (preg_match('~a(?:x\Kb|c)~', $string, $match) === 1) { - assertType('array{\'ac\'|\'b\'}', $match); + assertType('array{string}', $match); // could be array{'ac'|'b'} } if (preg_match('~a(?:c|x\Kb)~', $string, $match) === 1) { - assertType('array{\'ac\'|\'b\'}', $match); + assertType('array{string}', $match); // could be array{'ac'|'b'} } if (preg_match('~a(y|(?:x\Kb|c))d~', $string, $match) === 1) { - assertType('array{\'acd\'|\'ayd\'|\'bd\', \'c\'|\'xb\'|\'y\'}', $match); + assertType('array{string, non-empty-string}', $match); // could be array{'acd'|'ayd'|'bd', 'c'|'xb'|'y'} } if (preg_match('~a((?:c|x\Kb)|y)d~', $string, $match) === 1) { - assertType('array{\'acd\'|\'ayd\'|\'bd\', \'c\'|\'xb\'|\'y\'}', $match); + assertType('array{string, non-empty-string}', $match); // could be array{'acd'|'ayd'|'bd', 'c'|'xb'|'y'} } } From e164bd1f3f920c4b82d92cfaa2221369062b707b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Vo=C5=99=C3=AD=C5=A1ek?= Date: Sun, 6 Apr 2025 11:16:11 +0200 Subject: [PATCH 4/4] fix typo --- tests/PHPStan/Analyser/nsrt/preg_match_shapes.php | 2 +- tests/PHPStan/Analyser/nsrt/preg_replace_callback_shapes.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 6a500b9f78..545fd191f1 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -1016,7 +1016,7 @@ function bug12397(string $string): void { assertType('list{0?: string, 1?: non-falsy-string, 2?: numeric-string}', $match); } -function bug2792(string $string): void { +function bug12792(string $string): void { if (preg_match('~a\Kb~', $string, $match) === 1) { assertType('array{string}', $match); // could be array{'b'} } diff --git a/tests/PHPStan/Analyser/nsrt/preg_replace_callback_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_replace_callback_shapes.php index 596d74b450..7bd70492ee 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_replace_callback_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_replace_callback_shapes.php @@ -46,7 +46,7 @@ function ($matches) { ); }; -function bug2792(string $string) : void { +function bug12792(string $string) : void { preg_replace_callback( '~\'(?:[^\']+|\'\')*+\'\K|\[(\w*)\]~', function ($matches) {