From 07477a787434e39b8e4f5cac87ca66cdbec90407 Mon Sep 17 00:00:00 2001 From: jrfnl Date: Tue, 21 May 2024 16:20:55 +0200 Subject: [PATCH] Tokenizer/PHP: don't retokenize tokens in a broken type DNF declaration A DNF type always needs to have matching open and close parentheses. If the parentheses are unbalanced, we can't be sure this is a DNF type and run the risk of retokenizing non-type tokens. Even if the code under scan was intended as a DNF type, it will be invalid code/contain a parse error if the parentheses are unbalanced. In that case, retokenizing to the type specific tokens is likely to make the effect of this parse error on sniffs _worse_. With that in mind, I'm adding extra hardening to the type handling layer in the PHP tokenizer, which will ensure that the retokenization to type specific tokens _only_ happens when there are balanced pairs of DNF parentheses. Includes tests safeguarding the behaviour of the type handling layer for this type of invalid/parse error types. Safe for two, all these tests would previously fail on containing at least _some_ type specific tokens. --- src/Tokenizers/PHP.php | 21 +- .../Tokenizer/PHP/DNFTypesParseError2Test.inc | 48 ++++ .../Tokenizer/PHP/DNFTypesParseError2Test.php | 218 ++++++++++++++++++ 3 files changed, 280 insertions(+), 7 deletions(-) create mode 100644 tests/Core/Tokenizer/PHP/DNFTypesParseError2Test.inc create mode 100644 tests/Core/Tokenizer/PHP/DNFTypesParseError2Test.php diff --git a/src/Tokenizers/PHP.php b/src/Tokenizers/PHP.php index 3e7be371cf..60a0d41df8 100644 --- a/src/Tokenizers/PHP.php +++ b/src/Tokenizers/PHP.php @@ -3132,9 +3132,14 @@ protected function processAdditional() $typeTokenCountBefore = 0; $typeOperators = [$i]; + $parenthesesCount = 0; $confirmed = false; $maybeNullable = null; + if ($this->tokens[$i]['code'] === T_OPEN_PARENTHESIS || $this->tokens[$i]['code'] === T_CLOSE_PARENTHESIS) { + ++$parenthesesCount; + } + for ($x = ($i - 1); $x >= 0; $x--) { if (isset(Tokens::$emptyTokens[$this->tokens[$x]['code']]) === true) { continue; @@ -3201,11 +3206,13 @@ protected function processAdditional() continue; } - if ($this->tokens[$x]['code'] === T_BITWISE_OR - || $this->tokens[$x]['code'] === T_BITWISE_AND - || $this->tokens[$x]['code'] === T_OPEN_PARENTHESIS - || $this->tokens[$x]['code'] === T_CLOSE_PARENTHESIS - ) { + if ($this->tokens[$x]['code'] === T_BITWISE_OR || $this->tokens[$x]['code'] === T_BITWISE_AND) { + $typeOperators[] = $x; + continue; + } + + if ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS || $this->tokens[$x]['code'] === T_CLOSE_PARENTHESIS) { + ++$parenthesesCount; $typeOperators[] = $x; continue; } @@ -3287,8 +3294,8 @@ protected function processAdditional() unset($parens, $last); }//end if - if ($confirmed === false) { - // Not a union or intersection type after all, move on. + if ($confirmed === false || ($parenthesesCount % 2) !== 0) { + // Not a (valid) union, intersection or DNF type after all, move on. continue; } diff --git a/tests/Core/Tokenizer/PHP/DNFTypesParseError2Test.inc b/tests/Core/Tokenizer/PHP/DNFTypesParseError2Test.inc new file mode 100644 index 0000000000..7929758286 --- /dev/null +++ b/tests/Core/Tokenizer/PHP/DNFTypesParseError2Test.inc @@ -0,0 +1,48 @@ + + * @copyright 2024 PHPCSStandards and contributors + * @license https://github.com/PHPCSStandards/PHP_CodeSniffer/blob/master/licence.txt BSD Licence + */ + +namespace PHP_CodeSniffer\Tests\Core\Tokenizer\PHP; + +use PHP_CodeSniffer\Tests\Core\Tokenizer\AbstractTokenizerTestCase; +use PHP_CodeSniffer\Util\Tokens; + +final class DNFTypesParseError2Test extends AbstractTokenizerTestCase +{ + + + /** + * Document handling for a DNF type / parse error where the type declaration contains an unmatched parenthesis. + * + * @param string $testMarker The comment prefacing the target token. + * + * @dataProvider dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens + * @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional + * + * @return void + */ + public function testBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens($testMarker) + { + $tokens = $this->phpcsFile->getTokens(); + + // Verify that the type union is still tokenized as T_BITWISE_OR as the type declaration + // is not recognized as a valid type declaration. + $unionPtr = $this->getTargetToken($testMarker, [T_BITWISE_OR, T_TYPE_UNION], '|'); + $token = $tokens[$unionPtr]; + + $this->assertSame(T_BITWISE_OR, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (code)'); + $this->assertSame('T_BITWISE_OR', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (type)'); + + // Verify that the unmatched open parenthesis is tokenized as a normal parenthesis. + $openPtr = $this->getTargetToken($testMarker, [T_OPEN_PARENTHESIS, T_TYPE_OPEN_PARENTHESIS], '('); + $token = $tokens[$openPtr]; + + $this->assertSame(T_OPEN_PARENTHESIS, $token['code'], 'Token tokenized as '.$token['type'].', not T_OPEN_PARENTHESIS (code)'); + $this->assertSame('T_OPEN_PARENTHESIS', $token['type'], 'Token tokenized as '.$token['type'].', not T_OPEN_PARENTHESIS (type)'); + + // Verify that the type intersection is still tokenized as T_BITWISE_AND as the type declaration + // is not recognized as a valid type declaration. + $intersectPtr = $this->getTargetToken($testMarker, [T_BITWISE_AND, T_TYPE_INTERSECTION], '&'); + $token = $tokens[$intersectPtr]; + + $this->assertSame(T_BITWISE_AND, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_AND (code)'); + $this->assertSame('T_BITWISE_AND', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_AND (type)'); + + }//end testBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens() + + + /** + * Data provider. + * + * @see testBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens() + * + * @return array> + */ + public static function dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens() + { + return [ + 'OO const type' => ['/* testBrokenConstDNFTypeParensMissingClose */'], + 'OO property type' => ['/* testBrokenPropertyDNFTypeParensMissingClose */'], + 'Parameter type' => ['/* testBrokenParamDNFTypeParensMissingClose */'], + 'Return type' => ['/* testBrokenReturnDNFTypeParensMissingClose */'], + ]; + + }//end dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens() + + + /** + * Document handling for a DNF type / parse error where the type declaration contains an unmatched parenthesis. + * + * @param string $testMarker The comment prefacing the target token. + * + * @dataProvider dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens + * @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional + * + * @return void + */ + public function testBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens($testMarker) + { + $tokens = $this->phpcsFile->getTokens(); + + // Verify that the type union is still tokenized as T_BITWISE_OR as the type declaration + // is not recognized as a valid type declaration. + $unionPtr = $this->getTargetToken($testMarker, [T_BITWISE_OR, T_TYPE_UNION], '|'); + $token = $tokens[$unionPtr]; + + $this->assertSame(T_BITWISE_OR, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (code)'); + $this->assertSame('T_BITWISE_OR', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (type)'); + + // Verify that the unmatched open parenthesis is tokenized as a normal parenthesis. + $closePtr = $this->getTargetToken($testMarker, [T_CLOSE_PARENTHESIS, T_TYPE_CLOSE_PARENTHESIS], ')'); + $token = $tokens[$closePtr]; + + $this->assertSame(T_CLOSE_PARENTHESIS, $token['code'], 'Token tokenized as '.$token['type'].', not T_CLOSE_PARENTHESIS (code)'); + $this->assertSame('T_CLOSE_PARENTHESIS', $token['type'], 'Token tokenized as '.$token['type'].', not T_CLOSE_PARENTHESIS (type)'); + + // Verify that the type intersection is still tokenized as T_BITWISE_AND as the type declaration + // is not recognized as a valid type declaration. + $intersectPtr = $this->getTargetToken($testMarker, [T_BITWISE_AND, T_TYPE_INTERSECTION], '&'); + $token = $tokens[$intersectPtr]; + + $this->assertSame(T_BITWISE_AND, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_AND (code)'); + $this->assertSame('T_BITWISE_AND', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_AND (type)'); + + }//end testBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens() + + + /** + * Data provider. + * + * @see testBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens() + * + * @return array> + */ + public static function dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens() + { + return [ + 'OO const type' => ['/* testBrokenConstDNFTypeParensMissingOpen */'], + 'OO property type' => ['/* testBrokenPropertyDNFTypeParensMissingOpen */'], + 'Parameter type' => ['/* testBrokenParamDNFTypeParensMissingOpen */'], + 'Return type' => ['/* testBrokenReturnDNFTypeParensMissingOpen */'], + ]; + + }//end dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens() + + + /** + * Document handling for a DNF type / parse error where the type declaration contains an unmatched parenthesis, + * but also contains a set of matched parentheses. + * + * @param string $testMarker The comment prefacing the target token. + * + * @dataProvider dataBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched + * @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional + * + * @return void + */ + public function testBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched($testMarker) + { + $tokens = $this->phpcsFile->getTokens(); + $startPtr = $this->getTargetToken($testMarker, [T_OPEN_PARENTHESIS, T_TYPE_OPEN_PARENTHESIS], '('); + + for ($i = $startPtr; $i < $this->phpcsFile->numTokens; $i++) { + if (isset(Tokens::$emptyTokens[$tokens[$i]['code']]) === true) { + continue; + } + + if ($tokens[$i]['code'] === T_EQUAL + || $tokens[$i]['code'] === T_VARIABLE + || $tokens[$i]['code'] === T_OPEN_CURLY_BRACKET + ) { + // Reached the end of the type. + break; + } + + $errorPrefix = 'Token tokenized as '.$tokens[$i]['type']; + + // Verify that type tokens have not been retokenized to `T_TYPE_*` tokens for broken type declarations. + switch ($tokens[$i]['content']) { + case '|': + $this->assertSame(T_BITWISE_OR, $tokens[$i]['code'], $errorPrefix.', not T_BITWISE_OR (code)'); + $this->assertSame('T_BITWISE_OR', $tokens[$i]['type'], $errorPrefix.', not T_BITWISE_OR (type)'); + break; + + case '&': + $this->assertSame(T_BITWISE_AND, $tokens[$i]['code'], $errorPrefix.', not T_BITWISE_AND (code)'); + $this->assertSame('T_BITWISE_AND', $tokens[$i]['type'], $errorPrefix.', not T_BITWISE_AND (type)'); + break; + + case '(': + // Verify that the open parenthesis is tokenized as a normal parenthesis. + $this->assertSame(T_OPEN_PARENTHESIS, $tokens[$i]['code'], $errorPrefix.', not T_OPEN_PARENTHESIS (code)'); + $this->assertSame('T_OPEN_PARENTHESIS', $tokens[$i]['type'], $errorPrefix.', not T_OPEN_PARENTHESIS (type)'); + break; + + case ')': + $this->assertSame(T_CLOSE_PARENTHESIS, $tokens[$i]['code'], $errorPrefix.', not T_CLOSE_PARENTHESIS (code)'); + $this->assertSame('T_CLOSE_PARENTHESIS', $tokens[$i]['type'], $errorPrefix.', not T_CLOSE_PARENTHESIS (type)'); + break; + + default: + break; + }//end switch + }//end for + + }//end testBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched() + + + /** + * Data provider. + * + * @see testBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched() + * + * @return array> + */ + public static function dataBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched() + { + return [ + 'OO const type - missing one close parenthesis' => ['/* testBrokenConstDNFTypeParensMissingOneClose */'], + 'OO property type - missing one open parenthesis' => ['/* testBrokenPropertyDNFTypeParensMissingOneOpen */'], + 'Parameter type - missing one close parenthesis' => ['/* testBrokenParamDNFTypeParensMissingOneClose */'], + 'Return type - missing one open parenthesis' => ['/* testBrokenReturnDNFTypeParensMissingOneOpen */'], + ]; + + }//end dataBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched() + + +}//end class