diff --git a/src/Tokenizers/PHP.php b/src/Tokenizers/PHP.php index 2e8c87bed3..03096887bb 100644 --- a/src/Tokenizers/PHP.php +++ b/src/Tokenizers/PHP.php @@ -992,20 +992,24 @@ protected function tokenize($string) if ($tokens[$i][0] === T_LNUMBER || $tokens[$i][0] === T_DNUMBER - || ($tokens[$i][0] === T_STRING - && $tokens[$i][1][0] === '_') ) { $newContent .= $tokens[$i][1]; + continue; + } - // Any T_DNUMBER token needs to make the - // new number a T_DNUMBER as well. - if ($tokens[$i][0] === T_DNUMBER) { - $newType = T_DNUMBER; - } + if ($tokens[$i][0] === T_STRING + && $tokens[$i][1][0] === '_' + && ((strpos($newContent, '0x') === 0 + && preg_match('`^((? PHP_INT_MAX) + || (stripos($newContent, '0b') === 0 && bindec(str_replace('_', '', $newContent)) > PHP_INT_MAX) + || (stripos($newContent, '0x') !== 0 + && stripos($newContent, 'e') !== false || strpos($newContent, '.') !== false) + || (strpos($newContent, '0') === 0 && stripos($newContent, '0x') !== 0 + && stripos($newContent, '0b') !== 0 && octdec(str_replace('_', '', $newContent)) > PHP_INT_MAX) + || (strpos($newContent, '0') !== 0 && str_replace('_', '', $newContent) > PHP_INT_MAX)) + ) { + $newType = T_DNUMBER; + } + $newToken = []; $newToken['code'] = $newType; - $newToken['type'] = Util\Tokens::tokenName($token[0]); + $newToken['type'] = Util\Tokens::tokenName($newType); $newToken['content'] = $newContent; $finalTokens[$newStackPtr] = $newToken; diff --git a/tests/Core/Tokenizer/BackfillNumericSeparatorTest.inc b/tests/Core/Tokenizer/BackfillNumericSeparatorTest.inc index bfdfcb9ac4..eef53f593b 100644 --- a/tests/Core/Tokenizer/BackfillNumericSeparatorTest.inc +++ b/tests/Core/Tokenizer/BackfillNumericSeparatorTest.inc @@ -1,10 +1,14 @@ getTokens(); - $number = $this->getTargetToken($testData['marker'], $testData['type']); - $this->assertSame($tokens[$number]['content'], $testData['value']); + $number = $this->getTargetToken($testData['marker'], [T_LNUMBER, T_DNUMBER]); + + $this->assertSame(constant($testData['type']), $tokens[$number]['code']); + $this->assertSame($testData['type'], $tokens[$number]['type']); + $this->assertSame($testData['value'], $tokens[$number]['content']); }//end testBackfill() @@ -43,66 +46,335 @@ public function testBackfill($testData) */ public function dataTestBackfill() { + $testHexType = 'T_LNUMBER'; + if (PHP_INT_MAX < 0xCAFEF00D) { + $testHexType = 'T_DNUMBER'; + } + + $testHexMultipleType = 'T_LNUMBER'; + if (PHP_INT_MAX < 0x42726F776E) { + $testHexMultipleType = 'T_DNUMBER'; + } + + $testIntMoreThanMaxType = 'T_LNUMBER'; + if (PHP_INT_MAX < 10223372036854775807) { + $testIntMoreThanMaxType = 'T_DNUMBER'; + } + return [ [ [ 'marker' => '/* testSimpleLNumber */', - 'type' => T_LNUMBER, + 'type' => 'T_LNUMBER', 'value' => '1_000_000_000', ], ], [ [ 'marker' => '/* testSimpleDNumber */', - 'type' => T_DNUMBER, + 'type' => 'T_DNUMBER', 'value' => '107_925_284.88', ], ], [ [ 'marker' => '/* testFloat */', - 'type' => T_DNUMBER, + 'type' => 'T_DNUMBER', 'value' => '6.674_083e-11', ], ], [ [ 'marker' => '/* testFloat2 */', - 'type' => T_DNUMBER, + 'type' => 'T_DNUMBER', 'value' => '6.674_083e+11', ], ], + [ + [ + 'marker' => '/* testFloat3 */', + 'type' => 'T_DNUMBER', + 'value' => '1_2.3_4e1_23', + ], + ], [ [ 'marker' => '/* testHex */', - 'type' => T_LNUMBER, + 'type' => $testHexType, 'value' => '0xCAFE_F00D', ], ], [ [ 'marker' => '/* testHexMultiple */', - 'type' => T_LNUMBER, + 'type' => $testHexMultipleType, 'value' => '0x42_72_6F_77_6E', ], ], + [ + [ + 'marker' => '/* testHexInt */', + 'type' => 'T_LNUMBER', + 'value' => '0x42_72_6F', + ], + ], [ [ 'marker' => '/* testBinary */', - 'type' => T_LNUMBER, + 'type' => 'T_LNUMBER', 'value' => '0b0101_1111', ], ], [ [ 'marker' => '/* testOctal */', - 'type' => T_LNUMBER, + 'type' => 'T_LNUMBER', 'value' => '0137_041', ], ], + [ + [ + 'marker' => '/* testIntMoreThanMax */', + 'type' => $testIntMoreThanMaxType, + 'value' => '10_223_372_036_854_775_807', + ], + ], ]; }//end dataTestBackfill() + /** + * Test that numbers using numeric seperators which are considered parse errors and/or + * which aren't relevant to the backfill, do not incorrectly trigger the backfill anyway. + * + * @param string $testMarker The comment which prefaces the target token in the test file. + * @param array $expectedTokens The token type and content of the expected token sequence. + * + * @dataProvider dataNoBackfill + * @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize + * + * @return void + */ + public function testNoBackfill($testMarker, $expectedTokens) + { + $tokens = self::$phpcsFile->getTokens(); + $number = $this->getTargetToken($testMarker, [T_LNUMBER, T_DNUMBER]); + + foreach ($expectedTokens as $key => $expectedToken) { + $i = ($number + $key); + $this->assertSame($expectedToken['code'], $tokens[$i]['code']); + $this->assertSame($expectedToken['content'], $tokens[$i]['content']); + } + + }//end testNoBackfill() + + + /** + * Data provider. + * + * @see testBackfill() + * + * @return array + */ + public function dataNoBackfill() + { + return [ + [ + '/* testInvalid1 */', + [ + [ + 'code' => T_LNUMBER, + 'content' => '100', + ], + [ + 'code' => T_STRING, + 'content' => '_', + ], + ], + ], + [ + '/* testInvalid2 */', + [ + [ + 'code' => T_LNUMBER, + 'content' => '1', + ], + [ + 'code' => T_STRING, + 'content' => '__1', + ], + ], + ], + [ + '/* testInvalid3 */', + [ + [ + 'code' => T_LNUMBER, + 'content' => '1', + ], + [ + 'code' => T_STRING, + 'content' => '_', + ], + [ + 'code' => T_DNUMBER, + 'content' => '.0', + ], + ], + ], + [ + '/* testInvalid4 */', + [ + [ + 'code' => T_DNUMBER, + 'content' => '1.', + ], + [ + 'code' => T_STRING, + 'content' => '_0', + ], + ], + ], + [ + '/* testInvalid5 */', + [ + [ + 'code' => T_LNUMBER, + 'content' => '0', + ], + [ + 'code' => T_STRING, + 'content' => 'x_123', + ], + ], + ], + [ + '/* testInvalid6 */', + [ + [ + 'code' => T_LNUMBER, + 'content' => '0', + ], + [ + 'code' => T_STRING, + 'content' => 'b_101', + ], + ], + ], + [ + '/* testInvalid7 */', + [ + [ + 'code' => T_LNUMBER, + 'content' => '1', + ], + [ + 'code' => T_STRING, + 'content' => '_e2', + ], + ], + ], + [ + '/* testInvalid8 */', + [ + [ + 'code' => T_LNUMBER, + 'content' => '1', + ], + [ + 'code' => T_STRING, + 'content' => 'e_2', + ], + ], + ], + [ + '/* testInvalid9 */', + [ + [ + 'code' => T_LNUMBER, + 'content' => '107_925_284', + ], + [ + 'code' => T_WHITESPACE, + 'content' => ' ', + ], + [ + 'code' => T_DNUMBER, + 'content' => '.88', + ], + ], + ], + [ + '/* testInvalid10 */', + [ + [ + 'code' => T_LNUMBER, + 'content' => '107_925_284', + ], + [ + 'code' => T_COMMENT, + 'content' => '/*comment*/', + ], + [ + 'code' => T_DNUMBER, + 'content' => '.88', + ], + ], + ], + [ + '/* testCalc1 */', + [ + [ + 'code' => T_LNUMBER, + 'content' => '667_083', + ], + [ + 'code' => T_WHITESPACE, + 'content' => ' ', + ], + [ + 'code' => T_MINUS, + 'content' => '-', + ], + [ + 'code' => T_WHITESPACE, + 'content' => ' ', + ], + [ + 'code' => T_LNUMBER, + 'content' => '11', + ], + ], + ], + [ + '/* test Calc2 */', + [ + [ + 'code' => T_DNUMBER, + 'content' => '6.674_08e3', + ], + [ + 'code' => T_WHITESPACE, + 'content' => ' ', + ], + [ + 'code' => T_PLUS, + 'content' => '+', + ], + [ + 'code' => T_WHITESPACE, + 'content' => ' ', + ], + [ + 'code' => T_LNUMBER, + 'content' => '11', + ], + ], + ], + ]; + + }//end dataNoBackfill() + + }//end class