diff --git a/docs/Traversal.md b/docs/Traversal.md new file mode 100644 index 00000000..9c395889 --- /dev/null +++ b/docs/Traversal.md @@ -0,0 +1,92 @@ + +# AST Traversal + +All Nodes implement the `IteratorAggregate` interface, which means their immediate children can be directly traversed with `foreach`: + +```php +foreach ($node as $key => $child) { + var_dump($key) + var_dump($child); +} +``` + +`$key` is set to the child name (e.g. `parameters`). +Multiple child nodes may have the same key. + +The Iterator that is returned to `foreach` from `$node->getIterator()` implements the `RecursiveIterator` interface. +To traverse all descendant nodes, you need to "flatten" it with PHP's built-in `RecursiveIteratorIterator`: + +```php +$it = new \RecursiveIteratorIterator($node, \RecursiveIteratorIterator::SELF_FIRST); +foreach ($it as $node) { + var_dump($node); +} +``` + +The code above will walk all nodes and tokens depth-first. +Passing `RecursiveIteratorIterator::CHILD_FIRST` would traverse breadth-first, while `RecursiveIteratorIterator::LEAVES_ONLY` (the default) would only traverse terminal Tokens. + +## Exclude Tokens + +To exclude terminal Tokens and only traverse Nodes, use PHP's built-in `ParentIterator`: + +```php +$nodes = new \ParentIterator(new \RecursiveIteratorIterator($node, \RecursiveIteratorIterator::SELF_FIRST)); +``` + +## Skipping child traversal + +To skip traversal of certain Nodes, use PHP's `RecursiveCallbackIterator`. +Naive example of traversing all nodes in the current scope: + +```php +// Find all nodes in the current scope +$nodesInScopeReIt = new \RecursiveCallbackFilterIterator($node, function ($current, string $key, \RecursiveIterator $it) { + // Don't traverse into function nodes, they form a different scope + return !($current instanceof Node\Expression\FunctionDeclaration); +}); +// Convert the RecursiveIterator to a flat Iterator +$it = new \RecursiveIteratorIterator($nodesInScope, \RecursiveIteratorIterator::SELF_FIRST); +``` + +## Filtering + +Building on that example, to get all variables in that scope us a non-recursive `CallbackFilterIterator`: + +```php +// Filter out all variables +$vars = new \CallbackFilterIterator($it, function ($current, string $key, \Iterator $it) { + return $current instanceof Node\Expression\Variable && $current->name instanceof Token; +}); + +foreach ($vars as $var) { + echo $var->name . PHP_EOL; +} +``` + +## Traversing ancestors + +Use the `NodeAncestorIterator` to walk the AST upwards from a Node to the root. +Example that finds the closest namespace Node to a Node: + +```php +use Microsoft\PhpParser\Iterator\NodeAncestorIterator; +use Microsoft\PhpParser\Node; + +foreach (new NodeAncestorIterator($node) as $ancestor) { + if ($ancestor instanceof Node\Statement\NamespaceDefinition) { + var_dump($ancestor->name); + break; + } +} +``` + +## Converting to an array + +You can convert your iterator to a flat array with + +```php +$arr = iterator_to_array($it, true); +``` + +The `true` ensures that the array is indexed numerically and not by Iterator keys (otherwise later Nodes with the same key will override previous Nodes). diff --git a/docs/a.md b/docs/a.md deleted file mode 100644 index e69de29b..00000000 diff --git a/phpunit.xml b/phpunit.xml index 1812dacd..69114f79 100644 --- a/phpunit.xml +++ b/phpunit.xml @@ -35,6 +35,8 @@ tests/api/getResolvedName.php tests/api/PositionUtilitiesTest.php tests/api/TextEditTest.php + tests/api/NodeIteratorTest.php + tests/api/NodeAncestorIteratorTest.php diff --git a/src/Iterator/NodeAncestorIterator.php b/src/Iterator/NodeAncestorIterator.php new file mode 100644 index 00000000..5f5c399f --- /dev/null +++ b/src/Iterator/NodeAncestorIterator.php @@ -0,0 +1,75 @@ +start = $node; + } + + /** + * Rewinds the Iterator to the beginning + * + * @return void + */ + public function rewind() { + $this->current = $this->start; + } + + /** + * Returns `true` if `current()` can be called to get the current node. + * Returns `false` if the last Node was the root node. + * + * @return bool + */ + public function valid() { + return $this->current !== null; + } + + /** + * Always returns null. + * + * @return null + */ + public function key() { + return null; + } + + /** + * Returns the current Node + * + * @return Node + */ + public function current() { + return $this->current; + } + + /** + * Advances the Iterator to the parent of the current Node + * + * @return void + */ + public function next() { + $this->current = $this->current->parent; + } +} diff --git a/src/Iterator/NodeIterator.php b/src/Iterator/NodeIterator.php new file mode 100644 index 00000000..522c706f --- /dev/null +++ b/src/Iterator/NodeIterator.php @@ -0,0 +1,161 @@ +node = $node; + $this->childNames = $node::CHILD_NAMES; + $this->childNamesLength = \count($node::CHILD_NAMES); + } + + /** + * Rewinds the Iterator to the beginning + * + * @return void + */ + public function rewind() { + $this->childNamesIndex = -1; + $this->next(); + } + + /** + * Returns `true` if `current()` can be called to get the current child. + * Returns `false` if this Node has no more children (direct descendants). + * + * @return bool + */ + public function valid() { + return $this->childNamesIndex < $this->childNamesLength; + } + + /** + * Returns the current child name being iterated. + * Multiple values may have the same key. + * + * @return string + */ + public function key() { + return $this->childName; + } + + /** + * Returns the current child (direct descendant) + * + * @return Node|Token + */ + public function current() { + if ($this->valueIndex === null) { + return $this->node->{$this->childName}; + } else { + return $this->node->{$this->childName}[$this->valueIndex]; + } + } + + /** + * Advances the Iterator to the next child (direct descendant) + * + * @return void + */ + public function next() { + if ($this->valueIndex === $this->valueLength) { + // If not iterating value array or finished with it, go to next child name + $this->childNamesIndex++; + if ($this->childNamesIndex === $this->childNamesLength) { + // If child names index is invalid, become invalid + return; + } + $this->childName = $this->childNames[$this->childNamesIndex]; + $value = $this->node->{$this->childName}; + // If new value is null or empty array, skip it + if (empty($value)) { + $this->next(); + } else if (\is_array($value)) { + // If new value is an array, start index at 0 + $this->valueIndex = 0; + $this->valueLength = \count($value); + } else { + // Else reset everything to null + $this->valueIndex = null; + $this->valueLength = null; + } + } else { + // Else go to next item in value array + $this->valueIndex++; + // If new value is null or empty array, skip it + if (empty($this->node->{$this->childName}[$this->valueIndex])) { + $this->next(); + } + } + } + + /** + * Returns true if the current child is another Node (not a Token) + * and can be used to create another NodeIterator + * + * @return bool + */ + public function hasChildren(): bool { + return $this->current() instanceof Node; + } + + /** + * Returns a NodeIterator for the children of the current child Node + * + * @return NodeIterator + */ + public function getChildren() { + return new NodeIterator($this->current()); + } +} diff --git a/src/Node.php b/src/Node.php index d6ddbe66..b4a721d1 100644 --- a/src/Node.php +++ b/src/Node.php @@ -12,7 +12,7 @@ use Microsoft\PhpParser\Node\Statement\NamespaceDefinition; use Microsoft\PhpParser\Node\Statement\NamespaceUseDeclaration; -abstract class Node implements \JsonSerializable { +abstract class Node implements \JsonSerializable, \IteratorAggregate { /** @var array[] Map from node class to array of child keys */ private static $childNames = []; @@ -149,6 +149,15 @@ public function getRoot() : Node { return $node; } + /** + * Gets an Iterator to iterate all descendant nodes + * + * @return NodeIterator + */ + public function getIterator() { + return new Iterator\NodeIterator($this); + } + /** * Gets generator containing all descendant Nodes and Tokens. * diff --git a/tests/api/NodeAncestorIteratorTest.php b/tests/api/NodeAncestorIteratorTest.php new file mode 100644 index 00000000..91e1e6d8 --- /dev/null +++ b/tests/api/NodeAncestorIteratorTest.php @@ -0,0 +1,54 @@ +sourceFile = $parser->parseSourceFile(self::FILE_CONTENTS); + } + + public function testIteratesAncestors() { + $it = new NodeAncestorIterator($this->sourceFile->statementList[1]->compoundStatementOrSemicolon->statements[0]->expression->leftOperand); + $it->rewind(); + + $this->assertTrue($it->valid()); + $this->assertSame($this->sourceFile->statementList[1]->compoundStatementOrSemicolon->statements[0]->expression->leftOperand, $it->current()); + $it->next(); + + $this->assertTrue($it->valid()); + $this->assertSame($this->sourceFile->statementList[1]->compoundStatementOrSemicolon->statements[0]->expression, $it->current()); + $it->next(); + + $this->assertTrue($it->valid()); + $this->assertSame($this->sourceFile->statementList[1]->compoundStatementOrSemicolon->statements[0], $it->current()); + $it->next(); + + $this->assertTrue($it->valid()); + $this->assertSame($this->sourceFile->statementList[1]->compoundStatementOrSemicolon, $it->current()); + $it->next(); + + $this->assertTrue($it->valid()); + $this->assertSame($this->sourceFile->statementList[1], $it->current()); + $it->next(); + + $this->assertTrue($it->valid()); + $this->assertSame($this->sourceFile, $it->current()); + $it->next(); + + $this->assertFalse($it->valid()); + } +} diff --git a/tests/api/NodeIteratorTest.php b/tests/api/NodeIteratorTest.php new file mode 100644 index 00000000..5cf133e9 --- /dev/null +++ b/tests/api/NodeIteratorTest.php @@ -0,0 +1,186 @@ +sourceFile = $parser->parseSourceFile(self::FILE_CONTENTS); + } + + public function testIteratesChildren() { + $it = new NodeIterator($this->sourceFile); + $it->rewind(); + + // Node\Statement\InlineHtml + $this->assertTrue($it->valid()); + $this->assertSame('statementList', $it->key()); + $this->assertSame($this->sourceFile->statementList[0], $it->current()); + $it->next(); + + // Node\Statement\FunctionDeclaration + $this->assertTrue($it->valid()); + $this->assertSame('statementList', $it->key()); + $this->assertSame($this->sourceFile->statementList[1], $it->current()); + $it->next(); + + // Token(kind=EndOfFileToken) + $this->assertTrue($it->valid()); + $this->assertSame('endOfFileToken', $it->key()); + $this->assertSame($this->sourceFile->endOfFileToken, $it->current()); + $it->next(); + + $this->assertFalse($it->valid()); + } + + public function testRecursiveIteratorIteratorIteratesDescendants() { + + $it = new \RecursiveIteratorIterator(new NodeIterator($this->sourceFile), \RecursiveIteratorIterator::SELF_FIRST); + $it->rewind(); + + // Node\Statement\InlineHtml + $this->assertTrue($it->valid()); + $this->assertSame('statementList', $it->key()); + $this->assertSame($this->sourceFile->statementList[0], $it->current()); + $it->next(); + + // Token(kind=InlineHtml) + $this->assertTrue($it->valid()); + $this->assertSame('text', $it->key()); + $this->assertSame($this->sourceFile->statementList[0]->text, $it->current()); + $it->next(); + + // assertTrue($it->valid()); + $this->assertSame('scriptSectionStartTag', $it->key()); + $this->assertSame($this->sourceFile->statementList[0]->scriptSectionStartTag, $it->current()); + $it->next(); + + // Node\Statement\FunctionDeclaration + $this->assertTrue($it->valid()); + $this->assertSame('statementList', $it->key()); + $this->assertSame($this->sourceFile->statementList[1], $it->current()); + $it->next(); + + // function + // Token(kind=FunctionKeyword) + $this->assertTrue($it->valid()); + $this->assertSame('functionKeyword', $it->key()); + $this->assertSame($this->sourceFile->statementList[1]->functionKeyword, $it->current()); + $it->next(); + + // a + // Token(kind=Name) + $this->assertTrue($it->valid()); + $this->assertSame('name', $it->key()); + $this->assertSame($this->sourceFile->statementList[1]->name, $it->current()); + $it->next(); + + // ( + // Token(kind=OpenParenToken) + $this->assertTrue($it->valid()); + $this->assertSame('openParen', $it->key()); + $this->assertSame($this->sourceFile->statementList[1]->openParen, $it->current()); + $it->next(); + + // ) + // Token(kind=CloseParenToken) + $this->assertTrue($it->valid()); + $this->assertSame('closeParen', $it->key()); + $this->assertSame($this->sourceFile->statementList[1]->closeParen, $it->current()); + $it->next(); + + // Node\Statement\CompoundStatementNode + $this->assertTrue($it->valid()); + $this->assertSame('compoundStatementOrSemicolon', $it->key()); + $this->assertSame($this->sourceFile->statementList[1]->compoundStatementOrSemicolon, $it->current()); + $it->next(); + + // Node\Statement\CompoundStatementNode + $this->assertTrue($it->valid()); + $this->assertSame('openBrace', $it->key()); + $this->assertSame($this->sourceFile->statementList[1]->compoundStatementOrSemicolon->openBrace, $it->current()); + $it->next(); + + // Node\Statement\ExpressionStatement + $this->assertTrue($it->valid()); + $this->assertSame('statements', $it->key()); + $this->assertSame($this->sourceFile->statementList[1]->compoundStatementOrSemicolon->statements[0], $it->current()); + $it->next(); + + // Node\Expression\AssignmentExpression + $this->assertTrue($it->valid()); + $this->assertSame('expression', $it->key()); + $this->assertSame($this->sourceFile->statementList[1]->compoundStatementOrSemicolon->statements[0]->expression, $it->current()); + $it->next(); + + // Node\Expression\Variable + $this->assertTrue($it->valid()); + $this->assertSame('leftOperand', $it->key()); + $this->assertSame($this->sourceFile->statementList[1]->compoundStatementOrSemicolon->statements[0]->expression->leftOperand, $it->current()); + $it->next(); + + // $a + // Token(kind=VariableName) + $this->assertTrue($it->valid()); + $this->assertSame('name', $it->key()); + $this->assertSame($this->sourceFile->statementList[1]->compoundStatementOrSemicolon->statements[0]->expression->leftOperand->name, $it->current()); + $it->next(); + + // = + // Token(kind=EqualsToken) + $this->assertTrue($it->valid()); + $this->assertSame('operator', $it->key()); + $this->assertSame($this->sourceFile->statementList[1]->compoundStatementOrSemicolon->statements[0]->expression->operator, $it->current()); + $it->next(); + + // Node\NumericLiteral + $this->assertTrue($it->valid()); + $this->assertSame('rightOperand', $it->key()); + $this->assertSame($this->sourceFile->statementList[1]->compoundStatementOrSemicolon->statements[0]->expression->rightOperand, $it->current()); + $it->next(); + + // 1 + // Token(kind=IntegerLiteralToken) + $this->assertTrue($it->valid()); + $this->assertSame('children', $it->key()); + $this->assertSame($this->sourceFile->statementList[1]->compoundStatementOrSemicolon->statements[0]->expression->rightOperand->children, $it->current()); + $it->next(); + + // ; + // Token(kind=SemicolonToken) + $this->assertTrue($it->valid()); + $this->assertSame('semicolon', $it->key()); + $this->assertSame($this->sourceFile->statementList[1]->compoundStatementOrSemicolon->statements[0]->semicolon, $it->current()); + $it->next(); + + // } + // Token(kind=CloseBraceToken) + $this->assertTrue($it->valid()); + $this->assertSame('closeBrace', $it->key()); + $this->assertSame($this->sourceFile->statementList[1]->compoundStatementOrSemicolon->closeBrace, $it->current()); + $it->next(); + + // Token(kind=EndOfFileToken) + $this->assertTrue($it->valid()); + $this->assertSame('endOfFileToken', $it->key()); + $this->assertSame($this->sourceFile->endOfFileToken, $it->current()); + $it->next(); + + $this->assertFalse($it->valid()); + } +} diff --git a/validation/iteratorPerf1.php b/validation/iteratorPerf1.php new file mode 100644 index 00000000..dabd0753 --- /dev/null +++ b/validation/iteratorPerf1.php @@ -0,0 +1,95 @@ +getSize(); + $sourceFiles[] = $parser->parseSourceFile(file_get_contents($file->getPathname())); + } +} + +$asts = []; + +$startMemory = memory_get_peak_usage(true); +$startTime = microtime(true); + +function iterate($n) { + $i2 = 0; + foreach ($n::CHILD_NAMES as $name) { + $node = $n->$name; + + if ($node === null) { + continue; + } + + if (\is_array($node)) { + foreach ($node as $nodeArrItem) { + if ($nodeArrItem instanceof \Microsoft\PhpParser\Node) { + $i2++; + $i2 += iterate($nodeArrItem); + } + } + } else if ($node instanceof \Microsoft\PhpParser\Node) { + $i2++; + $i2 += iterate($node); + } else { + $i2++; + } + } + + return $i2; +} + +$i = 0; +foreach ($sourceFiles as $idx=>$sourceFile) { + $i += iterate($sourceFile); + $asts[] = $sourceFile; + + if ($idx % 10 === 0) { + echo $idx; + } + if ($idx > 100) { + break; + } +} + +echo PHP_EOL . "Total nodes: $i" . PHP_EOL; + +if (!isset($idx)) { + exit("Validation directory does not exist. First run `git submodule update --init --recursive from project root.`"); +} + +$asts = SplFixedArray::fromArray($asts); + +$endTime = microtime(true); +$endMemory = memory_get_peak_usage(true); + +// TODO - multiple runs, calculate statistical significance +$memoryUsage = $endMemory - $startMemory; +$timeUsage = $endTime - $startTime; +$totalSize /= 1024*1024; +$memoryUsage /= 1024*1024; + +echo "MACHINE INFO\n"; +echo "============\n"; +echo "PHP int size: " . PHP_INT_SIZE . PHP_EOL; +echo "PHP version: " . phpversion() . PHP_EOL; +echo "OS: " . php_uname() . PHP_EOL . PHP_EOL; + +echo "PERF STATS\n"; +echo "==========\n"; +echo "Input Source Files (#): $idx\n"; +echo "Input Source Size (MB): $totalSize\n"; +echo PHP_EOL; +echo "Time Usage (seconds): $timeUsage\n"; +echo "Memory Usage (MB): $memoryUsage\n"; diff --git a/validation/iteratorPerf2.php b/validation/iteratorPerf2.php new file mode 100644 index 00000000..72e34c85 --- /dev/null +++ b/validation/iteratorPerf2.php @@ -0,0 +1,82 @@ +getSize(); + $sourceFiles[] = $parser->parseSourceFile(file_get_contents($file->getPathname())); + } +} + +$asts = []; + +$startMemory = memory_get_peak_usage(true); +$startTime = microtime(true); + + +function iterate($n) { + // Pass a callback to simulate filtering + $cb = function ($n) { + return true; + }; + $i = 0; + foreach ($n->getDescendantNodesAndTokens($cb) as $child) { + $i++; + } + + return $i; +} + +$i = 0; +foreach ($sourceFiles as $idx => $sourceFile) { + $i += iterate($sourceFile); + $asts[] = $sourceFile; + + if ($idx % 10 === 0) { + echo $idx; + } + if ($idx > 100) { + break; + } +} + +echo PHP_EOL . "Total nodes and tokens: $i" . PHP_EOL; + +if (!isset($idx)) { + exit("Validation directory does not exist. First run `git submodule update --init --recursive from project root.`"); +} + +$asts = SplFixedArray::fromArray($asts); + +$endTime = microtime(true); +$endMemory = memory_get_peak_usage(true); + +// TODO - multiple runs, calculate statistical significance +$memoryUsage = $endMemory - $startMemory; +$timeUsage = $endTime - $startTime; +$totalSize /= 1024*1024; +$memoryUsage /= 1024*1024; + +echo "MACHINE INFO\n"; +echo "============\n"; +echo "PHP int size: " . PHP_INT_SIZE . PHP_EOL; +echo "PHP version: " . phpversion() . PHP_EOL; +echo "OS: " . php_uname() . PHP_EOL . PHP_EOL; + +echo "PERF STATS\n"; +echo "==========\n"; +echo "Input Source Files (#): $idx\n"; +echo "Input Source Size (MB): $totalSize\n"; +echo PHP_EOL; +echo "Time Usage (seconds): $timeUsage\n"; +echo "Memory Usage (MB): $memoryUsage\n"; diff --git a/validation/iteratorPerf3.php b/validation/iteratorPerf3.php new file mode 100644 index 00000000..17add021 --- /dev/null +++ b/validation/iteratorPerf3.php @@ -0,0 +1,80 @@ +getSize(); + $sourceFiles[] = $parser->parseSourceFile(file_get_contents($file->getPathname())); + } +} + +$asts = []; + +$startMemory = memory_get_peak_usage(true); +$startTime = microtime(true); + +function iterate($node) { + $i = 0; + $it = new \RecursiveIteratorIterator($node, \RecursiveIteratorIterator::SELF_FIRST); + foreach ($it as $node) { + $i++; + } + + return $i; +} + +$i = 0; +foreach ($sourceFiles as $idx=>$sourceFile) { + $i += iterate($sourceFile); + $asts[] = $sourceFile; + + if ($idx % 10 === 0) { + echo $idx; + } + if ($idx > 100) { + break; + } +} + +echo PHP_EOL . "Total nodes: $i" . PHP_EOL; + +if (!isset($idx)) { + exit("Validation directory does not exist. First run `git submodule update --init --recursive from project root.`"); +} + +$asts = SplFixedArray::fromArray($asts); + +$endTime = microtime(true); +$endMemory = memory_get_peak_usage(true); + +// TODO - multiple runs, calculate statistical significance +$memoryUsage = $endMemory - $startMemory; +$timeUsage = $endTime - $startTime; +$totalSize /= 1024*1024; +$memoryUsage /= 1024*1024; + +echo "MACHINE INFO\n"; +echo "============\n"; +echo "PHP int size: " . PHP_INT_SIZE . PHP_EOL; +echo "PHP version: " . phpversion() . PHP_EOL; +echo "OS: " . php_uname() . PHP_EOL . PHP_EOL; + +echo "PERF STATS\n"; +echo "==========\n"; +echo "Input Source Files (#): $idx\n"; +echo "Input Source Size (MB): $totalSize\n"; +echo PHP_EOL; +echo "Time Usage (seconds): $timeUsage\n"; +echo "Memory Usage (MB): $memoryUsage\n";