diff --git a/src/abstract-interpretation/normalized-ast-fold.ts b/src/abstract-interpretation/normalized-ast-fold.ts new file mode 100644 index 0000000000..fcab6c2bd8 --- /dev/null +++ b/src/abstract-interpretation/normalized-ast-fold.ts @@ -0,0 +1,228 @@ +import type { NoInfo, RNode } from '../r-bridge/lang-4.x/ast/model/model'; +import type { RExpressionList } from '../r-bridge/lang-4.x/ast/model/nodes/r-expression-list'; +import type { RFunctionCall } from '../r-bridge/lang-4.x/ast/model/nodes/r-function-call'; +import { EmptyArgument } from '../r-bridge/lang-4.x/ast/model/nodes/r-function-call'; +import type { RFunctionDefinition } from '../r-bridge/lang-4.x/ast/model/nodes/r-function-definition'; +import { RType } from '../r-bridge/lang-4.x/ast/model/type'; +import type { RForLoop } from '../r-bridge/lang-4.x/ast/model/nodes/r-for-loop'; +import type { RWhileLoop } from '../r-bridge/lang-4.x/ast/model/nodes/r-while-loop'; +import type { RRepeatLoop } from '../r-bridge/lang-4.x/ast/model/nodes/r-repeat-loop'; +import type { RIfThenElse } from '../r-bridge/lang-4.x/ast/model/nodes/r-if-then-else'; +import type { RBinaryOp } from '../r-bridge/lang-4.x/ast/model/nodes/r-binary-op'; +import type { RPipe } from '../r-bridge/lang-4.x/ast/model/nodes/r-pipe'; +import type { RUnaryOp } from '../r-bridge/lang-4.x/ast/model/nodes/r-unary-op'; +import type { RParameter } from '../r-bridge/lang-4.x/ast/model/nodes/r-parameter'; +import type { RArgument } from '../r-bridge/lang-4.x/ast/model/nodes/r-argument'; +import type { RAccess } from '../r-bridge/lang-4.x/ast/model/nodes/r-access'; +import type { RLogical } from '../r-bridge/lang-4.x/ast/model/nodes/r-logical'; +import type { RBreak } from '../r-bridge/lang-4.x/ast/model/nodes/r-break'; +import type { RComment } from '../r-bridge/lang-4.x/ast/model/nodes/r-comment'; +import type { RNext } from '../r-bridge/lang-4.x/ast/model/nodes/r-next'; +import type { RNumber } from '../r-bridge/lang-4.x/ast/model/nodes/r-number'; +import type { RLineDirective } from '../r-bridge/lang-4.x/ast/model/nodes/r-line-directive'; +import type { RString } from '../r-bridge/lang-4.x/ast/model/nodes/r-string'; +import type { RSymbol } from '../r-bridge/lang-4.x/ast/model/nodes/r-symbol'; + + +type FoldOfType = (node: Extract, { type: T }>) => Returns; + +/** explicitly excludes types that are not visitable */ +export type FoldableRType = Exclude; + +/** + * Describes the fold functions for each node type. + */ +export type NormalizedAstFold = { + [K in FoldableRType as `fold${Capitalize}`]: FoldOfType; +} + +/** + * Describes the type of a mapping object, + * which maps the type of the normalized AST node to the corresponding fold function. + */ +export type FittingNormalizedAstFold = Readonly<{ + [K in FoldableRType]: FoldOfType; +}> + +export type SingleOrArrayOrNothing = T | readonly (T | null | undefined)[] | null | undefined; + +export type EntryExitVisitor = ((node: RNode) => void) | undefined; + +/** + * Default implementation of a fold over the normalized AST (using the classic fold traversal). + * To modify the behavior, please extend this class and overwrite the methods of interest. + * You can control the value passing (`Returns` generic) + * by providing sensible Monoid behavior overwriting the {@link DefaultNormalizedAstFold#concat|concat} method + * and supplying the empty value in the constructor. + * + * @note By providing `entry` and `exit` you can use this as an extension to the simpler {@link visitAst} function but without + * the early termination within the visitors (for this, you can overwrite the respective `fold*` methods). + * + * @example First you want to create your own fold: + * + * ```ts + * let marker = false; + * class MyNumberFold extends DefaultNormalizedAstFold { + * override foldRNumber(node: RNumber) { + * super.foldRNumber(node); + * marker = true; + * } + * } + * ``` + * This one does explicitly not use the return functionality (and hence acts more as a conventional visitor). + * Now let us suppose we have a normalized AST as an {@link RNode} in the variable `ast` + * and want to check if the AST contains a number: + * + * ```ts + * const result = new MyNumberFold().fold(ast); + * ``` + * + * Please take a look at the corresponding tests or the wiki pages for more information on how to use this fold. + */ +export class DefaultNormalizedAstFold implements NormalizedAstFold { + protected readonly enter: EntryExitVisitor; + protected readonly exit: EntryExitVisitor; + protected readonly empty: Returns; + + /** + * Empty must provide a sensible default whenever you want to have `Returns` as non-`void` + * (e.g., whenever you want your visitors to be able to return a value). + */ + constructor(empty: Returns, enter?: EntryExitVisitor, exit?: EntryExitVisitor) { + this.empty = empty; + this.enter = enter; + this.exit = exit; + } + + /** + * Monoid::concat + * + * + * @see {@link https://en.wikipedia.org/wiki/Monoid} + * @see {@link DefaultNormalizedAstFold#concatAll|concatAll} + */ + protected concat(_a: Returns, _b: Returns): Returns { + return this.empty; + } + + /** + * overwrite this method, if you have a faster way to concat multiple nodes + * + * @see {@link DefaultNormalizedAstFold#concatAll|concatAll} + */ + protected concatAll(nodes: readonly Returns[]): Returns { + return nodes.reduce((acc, n) => this.concat(acc, n), this.empty); + } + + public fold(nodes: SingleOrArrayOrNothing | typeof EmptyArgument>): Returns { + if(Array.isArray(nodes)) { + const n = nodes as readonly (RNode | null | undefined | typeof EmptyArgument)[]; + return this.concatAll(n.filter(n => n && n !== EmptyArgument).map(node => this.foldSingle(node as RNode))); + } else if(nodes) { + return this.foldSingle(nodes as RNode); + } + return this.empty; + } + + protected foldSingle(node: RNode): Returns { + this.enter?.(node); + const type = node.type; + // @ts-expect-error -- ts may be unable to infer that the type is correct + const result = this.folds[type]?.(node); + this.exit?.(node); + return result; + } + + foldRAccess(access: RAccess) { + let accessed = this.foldSingle(access.accessed); + if(access.operator === '[' || access.operator === '[[') { + accessed = this.concat(accessed, this.fold(access.access)); + } + return accessed; + } + foldRArgument(argument: RArgument) { + return this.concat(this.fold(argument.name), this.fold(argument.value)); + } + foldRBinaryOp(binaryOp: RBinaryOp) { + return this.concat(this.foldSingle(binaryOp.lhs), this.foldSingle(binaryOp.rhs)); + } + foldRExpressionList(exprList: RExpressionList) { + return this.concat(this.fold(exprList.grouping), this.fold(exprList.children)); + } + foldRForLoop(loop: RForLoop) { + return this.concatAll([this.foldSingle(loop.variable), this.foldSingle(loop.vector), this.foldSingle(loop.body)]); + } + foldRFunctionCall(call: RFunctionCall) { + return this.concat(this.foldSingle(call.named ? call.functionName : call.calledFunction), this.fold(call.arguments)); + } + foldRFunctionDefinition(definition: RFunctionDefinition) { + return this.concat(this.fold(definition.parameters), this.foldSingle(definition.body)); + } + foldRIfThenElse(ite: RIfThenElse) { + return this.concatAll([this.foldSingle(ite.condition), this.foldSingle(ite.then), this.fold(ite.otherwise)]); + } + foldRParameter(parameter: RParameter) { + return this.concat(this.foldSingle(parameter.name), this.fold(parameter.defaultValue)); + } + foldRPipe(pipe: RPipe) { + return this.concat(this.foldSingle(pipe.lhs), this.foldSingle(pipe.rhs)); + } + foldRRepeatLoop(loop: RRepeatLoop) { + return this.foldSingle(loop.body); + } + foldRUnaryOp(unaryOp: RUnaryOp) { + return this.foldSingle(unaryOp.operand); + } + foldRWhileLoop(loop: RWhileLoop) { + return this.concat(this.foldSingle(loop.condition), this.foldSingle(loop.body)); + } + foldRBreak(_node: RBreak) { + return this.empty; + } + foldRComment(_node: RComment) { + return this.empty; + } + foldRLineDirective(_node: RLineDirective) { + return this.empty; + } + foldRLogical(_node: RLogical) { + return this.empty; + } + foldRNext(_node: RNext) { + return this.empty; + } + foldRNumber(_node: RNumber) { + return this.empty; + } + foldRString(_node: RString) { + return this.empty; + } + foldRSymbol(_node: RSymbol) { + return this.empty; + } + + protected readonly folds: FittingNormalizedAstFold = { + [RType.Access]: n => this.foldRAccess(n), + [RType.Argument]: n => this.foldRArgument(n), + [RType.BinaryOp]: n => this.foldRBinaryOp(n), + [RType.Break]: n => this.foldRBreak(n), + [RType.Comment]: n => this.foldRComment(n), + [RType.ExpressionList]: n => this.foldRExpressionList(n), + [RType.ForLoop]: n => this.foldRForLoop(n), + [RType.FunctionCall]: n => this.foldRFunctionCall(n), + [RType.FunctionDefinition]: n => this.foldRFunctionDefinition(n), + [RType.IfThenElse]: n => this.foldRIfThenElse(n), + [RType.LineDirective]: n => this.foldRLineDirective(n), + [RType.Logical]: n => this.foldRLogical(n), + [RType.Next]: n => this.foldRNext(n), + [RType.Number]: n => this.foldRNumber(n), + [RType.Parameter]: n => this.foldRParameter(n), + [RType.Pipe]: n => this.foldRPipe(n), + [RType.RepeatLoop]: n => this.foldRRepeatLoop(n), + [RType.String]: n => this.foldRString(n), + [RType.Symbol]: n => this.foldRSymbol(n), + [RType.UnaryOp]: n => this.foldRUnaryOp(n), + [RType.WhileLoop]: n => this.foldRWhileLoop(n), + }; +} + diff --git a/src/abstract-interpretation/normalized-ast-visitor.ts b/src/abstract-interpretation/normalized-ast-visitor.ts deleted file mode 100644 index 830d7dfd88..0000000000 --- a/src/abstract-interpretation/normalized-ast-visitor.ts +++ /dev/null @@ -1,177 +0,0 @@ -import type { NoInfo, RNode } from '../r-bridge/lang-4.x/ast/model/model'; -import type { RAccess } from '../r-bridge/lang-4.x/ast/model/nodes/r-access'; -import type { RArgument } from '../r-bridge/lang-4.x/ast/model/nodes/r-argument'; -import type { RBinaryOp } from '../r-bridge/lang-4.x/ast/model/nodes/r-binary-op'; -import type { RBreak } from '../r-bridge/lang-4.x/ast/model/nodes/r-break'; -import type { RComment } from '../r-bridge/lang-4.x/ast/model/nodes/r-comment'; -import type { RExpressionList } from '../r-bridge/lang-4.x/ast/model/nodes/r-expression-list'; -import type { RForLoop } from '../r-bridge/lang-4.x/ast/model/nodes/r-for-loop'; -import type { RFunctionCall } from '../r-bridge/lang-4.x/ast/model/nodes/r-function-call'; -import { EmptyArgument } from '../r-bridge/lang-4.x/ast/model/nodes/r-function-call'; -import type { RFunctionDefinition } from '../r-bridge/lang-4.x/ast/model/nodes/r-function-definition'; -import type { RIfThenElse } from '../r-bridge/lang-4.x/ast/model/nodes/r-if-then-else'; -import type { RLineDirective } from '../r-bridge/lang-4.x/ast/model/nodes/r-line-directive'; -import type { RLogical } from '../r-bridge/lang-4.x/ast/model/nodes/r-logical'; -import type { RNext } from '../r-bridge/lang-4.x/ast/model/nodes/r-next'; -import type { RNumber } from '../r-bridge/lang-4.x/ast/model/nodes/r-number'; -import type { RParameter } from '../r-bridge/lang-4.x/ast/model/nodes/r-parameter'; -import type { RPipe } from '../r-bridge/lang-4.x/ast/model/nodes/r-pipe'; -import type { RRepeatLoop } from '../r-bridge/lang-4.x/ast/model/nodes/r-repeat-loop'; -import type { RString } from '../r-bridge/lang-4.x/ast/model/nodes/r-string'; -import type { RSymbol } from '../r-bridge/lang-4.x/ast/model/nodes/r-symbol'; -import type { RUnaryOp } from '../r-bridge/lang-4.x/ast/model/nodes/r-unary-op'; -import type { RWhileLoop } from '../r-bridge/lang-4.x/ast/model/nodes/r-while-loop'; -import { RType } from '../r-bridge/lang-4.x/ast/model/type'; -import { assertUnreachable } from '../util/assert'; - -export interface Visitor { - visitNumber?(num: RNumber): void; - visitString?(str: RString): void; - visitLogical?(logical: RLogical): void; - visitSymbol?(symbol: RSymbol): void; - visitAccess?(node: RAccess): void; - visitBinaryOp?(op: RBinaryOp): void; - visitPipe?(op: RPipe): void; - visitUnaryOp?(op: RUnaryOp): void; - visitFor?(loop: RForLoop): void; - visitWhile?(loop: RWhileLoop): void; - visitRepeat?(loop: RRepeatLoop): void; - visitNext?(next: RNext): void; - visitBreak?(next: RBreak): void; - visitComment?(comment: RComment): void; - visitLineDirective?(comment: RLineDirective): void; - visitIfThenElse?(ifThenExpr: RIfThenElse): void; - visitExprList?(exprList: RExpressionList): void; - visitFunctionDefinition?(definition: RFunctionDefinition): void; - visitFunctionCall?(call: RFunctionCall): void; - visitArgument?(argument: RArgument): void; - visitParameter?(parameter: RParameter): void; -} - - -export class NormalizedAstVisitor { - private readonly root: RNode; - - constructor(root: RNode) { - this.root = root; - } - - accept(v: Visitor): void { - this.visit(this.root, v); - } - - private visit(nodes: RNode | readonly (RNode | null | undefined | typeof EmptyArgument)[] | undefined | null, v: Visitor): void { - if(Array.isArray(nodes)) { - const n = nodes as readonly (RNode | null | undefined | typeof EmptyArgument)[]; - for(const node of n) { - if(node && node !== EmptyArgument) { - this.visitSingle(node, v); - } - } - } else if(nodes) { - this.visitSingle(nodes as RNode, v); - } - } - - private visitSingle(node: RNode, v: Visitor): void { - /* let the type system know that the type does not change */ - const type = node.type; - switch(type) { - case RType.FunctionCall: - v.visitFunctionCall?.(node); - this.visitSingle(node.named ? node.functionName : node.calledFunction, v); - this.visit(node.arguments, v); - break; - case RType.FunctionDefinition: - v.visitFunctionDefinition?.(node); - this.visit(node.parameters, v); - this.visitSingle(node.body, v); - break; - case RType.ExpressionList: - v.visitExprList?.(node); - this.visit(node.grouping, v); - this.visit(node.children, v); - break; - case RType.ForLoop: - v.visitFor?.(node); - this.visitSingle(node.variable, v); - this.visitSingle(node.vector, v); - this.visitSingle(node.body, v); - break; - case RType.WhileLoop: - v.visitWhile?.(node); - this.visitSingle(node.condition, v); - this.visitSingle(node.body, v); - break; - case RType.RepeatLoop: - v.visitRepeat?.(node); - this.visitSingle(node.body, v); - break; - case RType.IfThenElse: - v.visitIfThenElse?.(node); - this.visitSingle(node.condition, v); - this.visitSingle(node.then, v); - this.visit(node.otherwise, v); - break; - case RType.BinaryOp: - v.visitBinaryOp?.(node); - this.visitSingle(node.lhs, v); - this.visitSingle(node.rhs, v); - break; - case RType.Pipe: - v.visitPipe?.(node); - this.visitSingle(node.lhs, v); - this.visitSingle(node.rhs, v); - break; - case RType.UnaryOp: - v.visitUnaryOp?.(node); - this.visitSingle(node.operand, v); - break; - case RType.Parameter: - v.visitParameter?.(node); - this.visitSingle(node.name, v); - this.visit(node.defaultValue, v); - break; - case RType.Argument: - v.visitArgument?.(node); - this.visit(node.name, v); - this.visit(node.value, v); - break; - case RType.Access: - v.visitAccess?.(node); - this.visitSingle(node.accessed, v); - if(node.operator === '[' || node.operator === '[[') { - this.visit(node.access, v); - } - break; - case RType.Symbol: - v.visitSymbol?.(node); - break; - case RType.Logical: - v.visitLogical?.(node); - break; - case RType.Number: - v.visitNumber?.(node); - break; - case RType.String: - v.visitString?.(node); - break; - case RType.Comment: - v.visitComment?.(node); - break; - case RType.Break: - v.visitBreak?.(node); - break; - case RType.Next: - v.visitNext?.(node); - break; - case RType.LineDirective: - v.visitLineDirective?.(node); - break; - default: - assertUnreachable(type); - } - } - -} - diff --git a/src/documentation/doc-util/doc-normalized-ast.ts b/src/documentation/doc-util/doc-normalized-ast.ts index 3db18a09fb..723c5b0762 100644 --- a/src/documentation/doc-util/doc-normalized-ast.ts +++ b/src/documentation/doc-util/doc-normalized-ast.ts @@ -32,7 +32,7 @@ export async function printNormalizedAstForCode(shell: RShell, code: string, { s }).allRemainingSteps(); const duration = performance.now() - now; - const metaInfo = `The analysis required _${printAsMs(duration)}_ (including parsing) within the generation environment.`; + const metaInfo = `The analysis required _${printAsMs(duration)}_ (including parsing with the R shell) within the generation environment.`; return '\n\n' + printNormalizedAst(result.normalize.ast, prefix) + (showCode ? `
diff --git a/src/documentation/doc-util/doc-types.ts b/src/documentation/doc-util/doc-types.ts index e892bfadf8..65537107e7 100644 --- a/src/documentation/doc-util/doc-types.ts +++ b/src/documentation/doc-util/doc-types.ts @@ -12,7 +12,7 @@ import { details } from './doc-structure'; export interface TypeElementInSource { name: string; node: ts.Node; - kind: 'interface' | 'type' | 'enum'; + kind: 'interface' | 'type' | 'enum' | 'class'; extends: string[]; generics: string[]; filePath: string; @@ -162,6 +162,29 @@ function collectHierarchyInformation(sourceFiles: readonly ts.SourceFile[], opti return `${name}${escapeMarkdown(': ' + getType(member, typeChecker))}`; }) }); + } else if(ts.isClassDeclaration(node)) { + const className = node.name?.getText(sourceFile) ?? ''; + const baseTypes = node.heritageClauses?.flatMap(clause => + clause.types + .map(type => type.getText(sourceFile) ?? '') + .map(dropGenericsFromType) + ) ?? []; + const generics = node.typeParameters?.map(param => param.getText(sourceFile) ?? '') ?? []; + + hierarchyList.push({ + name: dropGenericsFromType(className), + node, + kind: 'class', + extends: baseTypes, + comments: getTextualComments(node), + generics, + filePath: sourceFile.fileName, + lineNumber: getStartLine(node, sourceFile), + properties: node.members.map(member => { + const name = member.name?.getText(sourceFile) ?? ''; + return `${name}${escapeMarkdown(': ' + getType(member, typeChecker))}`; + }), + }); } ts.forEachChild(node, child => visit(child, sourceFile)); diff --git a/src/documentation/print-normalized-ast-wiki.ts b/src/documentation/print-normalized-ast-wiki.ts index c5ed8a9b28..cd4f2ea3bc 100644 --- a/src/documentation/print-normalized-ast-wiki.ts +++ b/src/documentation/print-normalized-ast-wiki.ts @@ -6,9 +6,15 @@ import { codeBlock } from './doc-util/doc-code'; import { printNormalizedAstForCode } from './doc-util/doc-normalized-ast'; import { mermaidHide, printHierarchy, getTypesFromFolderAsMermaid } from './doc-util/doc-types'; import path from 'path'; -import { FlowrGithubBaseRef, FlowrWikiBaseRef, getFileContentFromRoot, getFilePathMd } from './doc-util/doc-files'; +import { FlowrGithubBaseRef, FlowrWikiBaseRef, getFilePathMd } from './doc-util/doc-files'; import { getReplCommand } from './doc-util/doc-cli-option'; import { printAsMs } from '../util/time'; +import { details } from './doc-util/doc-structure'; +import { PipelineExecutor } from '../core/pipeline-executor'; +import { requestFromInput } from '../r-bridge/retriever'; +import { visitAst } from '../r-bridge/lang-4.x/ast/model/processing/visitor'; +import { collectAllIds } from '../r-bridge/lang-4.x/ast/model/collect'; +import { DefaultNormalizedAstFold } from '../abstract-interpretation/normalized-ast-fold'; async function getText(shell: RShell) { const rversion = (await shell.usedRVersion())?.format() ?? 'unknown'; @@ -16,17 +22,12 @@ async function getText(shell: RShell) { const now = performance.now(); const types = getTypesFromFolderAsMermaid({ rootFolder: path.resolve('./src/r-bridge/lang-4.x/ast/model/'), + files: [path.resolve('./src/abstract-interpretation/normalized-ast-fold.ts')], typeName: 'RNode', inlineTypes: mermaidHide }); const elapsed = performance.now() - now; - const visitorInterface = getTypesFromFolderAsMermaid({ - rootFolder: path.resolve('./src/abstract-interpretation'), - typeName: 'Visitor', - inlineTypes: mermaidHide - }); - return `${autoGenHeader({ filename: module.filename, purpose: 'normalized ast', rVersion: rversion })} _flowR_ produces a normalized version of R's abstract syntax tree (AST), @@ -45,7 +46,7 @@ ${codeBlock('r', 'x <- 2 * 3 + 1')} Each node in the AST contains the type, the id, and the lexeme (if applicable). Each edge is labeled with the type of the parent-child relationship (the "role"). -${await printNormalizedAstForCode(shell, 'x <- 2 * 3 + 1')} +${await printNormalizedAstForCode(shell, 'x <- 2 * 3 + 1', { showCode: false, prefix: 'flowchart LR\n' })}   @@ -54,7 +55,7 @@ ${await printNormalizedAstForCode(shell, 'x <- 2 * 3 + 1')} > you can either use the [Visual Studio Code extension](${FlowrGithubBaseRef}/vscode-flowr) or the ${getReplCommand('normalize*')} > command in the REPL (see the [Interface wiki page](${FlowrWikiBaseRef}/Interface) for more information). -Indicative is the root expression list node, which is present in every normalized AST. +Indicative of the normalization is the root expression list node, which is present in every normalized AST. In general, we provide node types for: 1. literals (e.g., numbers and strings) @@ -83,74 +84,109 @@ Most notably, the \`info\` field holds the \`id\` of the node, which is used to In summary, we have the following types: ${ - printHierarchy({ program: types.program, hierarchy: types.info, root: 'RNode', collapseFromNesting: Number.MAX_VALUE }) + details('Normalized AST Node Types', + printHierarchy({ program: types.program, hierarchy: types.info, root: 'RNode', collapseFromNesting: Number.MAX_VALUE }) + ) } -With this, the example file produced the following AST (shown from left to right for space reasons): - -${await printNormalizedAstForCode(shell, getFileContentFromRoot('test/testfiles/example.R'), { prefix: 'flowchart LR\n' })} +The following segments intend to give you an overview of how to work with the normalized AST: +* [How to get a normalized AST](#how-get-a-normalized-ast) +* [Visitors and Folds](#visitors-and-folds) -# Working with the Normalized AST -## Visiting -This chapter will outline how to use the \`NormalizedAstVisitor\` to go over the AST. +## How Get a Normalized AST -### **Step 1**: Get the ast +As explained alongside the [Interface](${FlowrWikiBaseRef}/Interface#the-pipeline-executor) wiki page, you can use the +\`${PipelineExecutor.name}\` to get the normalized AST. If you are only interested in the normalization, +a pipeline like the \`DEFAULT_NORMALIZE_PIPELINE\` suffices: -We can get the AST by running a parse & normalize _flowr_ pipeline: ${codeBlock('ts', ` -async function getAst(code: string) { - const result = await new PipelineExecutor(DEFAULT_NORMALIZE_PIPELINE, { - shell: new RShell(), - request: requestFromInput(code.trim()) +async function getAst(code: string): Promise { + const result = await new ${PipelineExecutor.name}(DEFAULT_NORMALIZE_PIPELINE, { + shell: new ${RShell.name}(), + request: ${requestFromInput.name}(code.trim()) }).allRemainingSteps(); return result.normalize.ast; }`)} -### **Step 2**: Implement the \`Visitor\` Interface -To use the NormalizedAstVisitor we have to implement the Visitor interface: -${ - printHierarchy({ program: visitorInterface.program, hierarchy: visitorInterface.info, root: 'Visitor', collapseFromNesting: Number.MAX_VALUE }) -} +From the REPL, you can use the ${getReplCommand('normalize')} command. +## Traversing the Normalized AST -In this example we will implement a Visitor that counts the occurances of _if-statements_. For this we only implement the \`visitIfThenElse\` function. -${codeBlock('ts', ` -const ifCountVisitor: Visitor & {count: number } = { - visitIfThenElse() { - this.count++; - }, - count: 0 -} -`)} +We provide two ways to traverse the normalized AST: [Visitors](#visitors) and [Folds](#folds). + +### Visitors + +If you want a simple visitor which traverses the AST, the \`${visitAst.name}\` function from +${getFilePathMd('../r-bridge/lang-4.x/ast/model/processing/visitor.ts')} is a good starting point. +You may specify functions to be called whenever you enter and exit a node during the traversal, and any +computation is to be done by side effects. +For example, if you want to collect all the \`id\`s present within a normalized (sub-)ast, +as it is done by the ${collectAllIds.name} function, you can use the following visitor: -### **Step 3**: Run the NormalizedAstVisitor ${codeBlock('ts', ` -new NormalizedAstVisitor(ast).accept(ifCount); -`)} +const ids = new Set(); +visitAst(nodes, node => { + ids.add(node.info.id); +}); +return ids; +`)} + +### Folds + +We formulate a fold with the base class \`${DefaultNormalizedAstFold.name}\` in ${getFilePathMd('../abstract-interpretation/normalized-ast-fold.ts')}. +Using this class, you can create your own fold behavior by overwriting the default methods. +By default, the class provides a monoid abstraction using the _empty_ from the constructor and the _concat_ method. + + +${printHierarchy({ program: types.program, hierarchy: types.info, root: 'DefaultNormalizedAstFold' })} + +Now, of course, we could provide hundreds of examples here, but we use tests to verify that the fold behaves as expected +and happily point to them at ${getFilePathMd('../../test/functionality/r-bridge/normalize-ast-fold.test.ts')}. + +As a simple showcase, we want to use the fold to evaluate numeric expressions containing numbers, \`+\`, and \`*\` operators. -### Complete Code ${codeBlock('ts', ` -async function countIfs(code: string) { - const result = await new PipelineExecutor(DEFAULT_NORMALIZE_PIPELINE, { - shell: new RShell(), - request: requestFromInput(code.trim()) - }).allRemainingSteps(); +class MyMathFold extends ${DefaultNormalizedAstFold.name} { + constructor() { + /* use \`0\` as a placeholder empty for the monoid */ + super(0); + } - const ast = result.normalize.ast; + protected override concat(a: number, b: number): number { + /* for this example, we ignore cases that we cannot handle */ + return b; + } - const ifCountVisitor: Visitor & {count: number } = { - visitIfThenElse() { - this.count++; - }, - count: 0 + override foldRNumber(node: RNumber) { + /* return the value of the number */ + return node.content.num; } - new NormalizedAstVisitor(ast).accept(ifCountVisitor); - return ifCount.count; + override foldRBinaryOp(node: RBinaryOp) { + if(node.operator === '+') { + return this.fold(node.lhs) + this.fold(node.rhs); + } else if(node.operator === '*') { + return this.fold(node.lhs) * this.fold(node.rhs); + } else { + /* in case we cannot handle the operator we could throw an error, or just use the default behavior: */ + return super.foldRBinaryOp(node); + } + } } `)} +Now, we can use the \`${PipelineExecutor.name}\` to get the normalized AST and apply the fold: + +${codeBlock('ts', ` +const shell = new ${RShell.name}(); +const ast = (await new ${PipelineExecutor.name}(DEFAULT_NORMALIZE_PIPELINE, { + shell, request: retrieveNormalizedAst(${RShell.name}, '1 + 3 * 2') +}).allRemainingSteps()).normalize.ast; + +const result = new MyMathFold().fold(ast); +console.log(result); // -> 7 +`)} `; } diff --git a/src/r-bridge/lang-4.x/ast/model/processing/visitor.ts b/src/r-bridge/lang-4.x/ast/model/processing/visitor.ts index d4ed438896..075dd5b738 100644 --- a/src/r-bridge/lang-4.x/ast/model/processing/visitor.ts +++ b/src/r-bridge/lang-4.x/ast/model/processing/visitor.ts @@ -111,7 +111,7 @@ class NodeVisitor { } /** - * Collects all node ids within a tree given by a respective root node + * Visits all node ids within a tree given by a respective root node using a depth-first search with prefix order. * * @param nodes - The root id nodes to start collecting from * @param onVisit - Called before visiting the subtree of each node. Can be used to stop visiting the subtree starting with this node (return `true` stop) diff --git a/test/functionality/r-bridge/normalize-ast-fold.test.ts b/test/functionality/r-bridge/normalize-ast-fold.test.ts new file mode 100644 index 0000000000..000690414a --- /dev/null +++ b/test/functionality/r-bridge/normalize-ast-fold.test.ts @@ -0,0 +1,90 @@ +import { beforeAll, describe, expect, test } from 'vitest'; +import { DefaultNormalizedAstFold } from '../../../src/abstract-interpretation/normalized-ast-fold'; +import { retrieveNormalizedAst, withShell } from '../_helper/shell'; +import type { RString } from '../../../src/r-bridge/lang-4.x/ast/model/nodes/r-string'; +import type { RNumber } from '../../../src/r-bridge/lang-4.x/ast/model/nodes/r-number'; +import type { NormalizedAst } from '../../../src/r-bridge/lang-4.x/ast/model/processing/decorate'; +import type { RBinaryOp } from '../../../src/r-bridge/lang-4.x/ast/model/nodes/r-binary-op'; +import type { RExpressionList } from '../../../src/r-bridge/lang-4.x/ast/model/nodes/r-expression-list'; + +describe('normalize-visitor', withShell(shell => { + let normalized: NormalizedAst | undefined; + beforeAll(async() => { + normalized = await retrieveNormalizedAst(shell, 'x <- 42\ny <- "hello world"\nprint("foo")'); + }); + test('find the number', () => { + let marker = false; + class MyNumberFold extends DefaultNormalizedAstFold { + override foldRNumber(node: RNumber) { + super.foldRNumber(node); + marker = true; + } + } + const astFold = new MyNumberFold(); + astFold.fold(normalized?.ast); + expect(marker).toBe(true); + }); + test('find the number of strings within my program (monoid)', () => { + class MyStringFold extends DefaultNormalizedAstFold { + constructor() { + super(0); + } + + protected concat(a: number, b: number): number { + return a + b; + } + + override foldRString(_node: RString) { + return 1; + } + } + const astFold = new MyStringFold(); + const result = astFold.fold(normalized?.ast); + expect(result).toBe(2); + }); + test('do basic math (monoid)', async() => { + class MyMathFold extends DefaultNormalizedAstFold { + constructor() { + super(0); + } + + protected override concat(a: number, b: number): number { + return b; + } + + override foldRNumber(node: RNumber) { + return node.content.num; + } + + override foldRBinaryOp(node: RBinaryOp) { + if(node.operator === '+') { + return this.fold(node.lhs) + this.fold(node.rhs); + } else if(node.operator === '*') { + return this.fold(node.lhs) * this.fold(node.rhs); + } else { + return super.foldRBinaryOp(node); + } + } + } + const astFold = new MyMathFold(); + const math = await retrieveNormalizedAst(shell, '1 + 3 * 2'); + const result = astFold.fold(math?.ast); + expect(result).toBe(7); + }); + test('fold should stop if overwritten and no continue', async() => { + let foundNumber = false; + class MyMathFold extends DefaultNormalizedAstFold { + override foldRNumber(_node: RNumber) { + foundNumber = true; + } + + override foldRExpressionList(_node: RExpressionList) { + + } + } + const astFold = new MyMathFold(); + const math = await retrieveNormalizedAst(shell, '1 + 3 * 2'); + astFold.fold(math?.ast); + expect(foundNumber).toBe(false); + }); +})); diff --git a/wiki/Normalized AST.md b/wiki/Normalized AST.md index 2f5d5a9db6..ecb71c4c0c 100644 --- a/wiki/Normalized AST.md +++ b/wiki/Normalized AST.md @@ -1,4 +1,4 @@ -_This document was generated from 'src/documentation/print-normalized-ast-wiki.ts' on 2024-11-19, 16:10:08 UTC presenting an overview of flowR's normalized ast (v2.1.7, using R v4.4.0)._ +_This document was generated from 'src/documentation/print-normalized-ast-wiki.ts' on 2024-11-22, 09:56:33 UTC presenting an overview of flowR's normalized ast (v2.1.7, using R v4.4.1)._ _flowR_ produces a normalized version of R's abstract syntax tree (AST), offering the following benefits: @@ -24,7 +24,7 @@ Each edge is labeled with the type of the parent-child relationship (the "role") ```mermaid -flowchart TD +flowchart LR n7(["RExpressionList (7) "]) n6(["RBinaryOp (6) @@ -51,51 +51,7 @@ x"]) ``` -
- -R Code of the Normalized AST - -The analysis required _8.86 ms_ (including parsing) within the generation environment. - -```r -x <- 2 * 3 + 1 -``` - -
- -Mermaid Code - -``` -flowchart TD - n7(["RExpressionList (7) - "]) - n6(["RBinaryOp (6) -#60;#45;"]) - n7 -->|"expr-list-child-0"| n6 - n0(["RSymbol (0) -x"]) - n6 -->|"binop-lhs"| n0 - n5(["RBinaryOp (5) -#43;"]) - n6 -->|"binop-rhs"| n5 - n3(["RBinaryOp (3) -#42;"]) - n5 -->|"binop-lhs"| n3 - n1(["RNumber (1) -2"]) - n3 -->|"binop-lhs"| n1 - n2(["RNumber (2) -3"]) - n3 -->|"binop-rhs"| n2 - n4(["RNumber (4) -1"]) - n5 -->|"binop-rhs"| n4 - -``` - -
- -
+(The analysis required _7.04 ms_ (including parsing with the R shell) within the generation environment.) @@ -106,7 +62,7 @@ x"]) > you can either use the [Visual Studio Code extension](https://github.com/flowr-analysis/vscode-flowr) or the `:normalize*` > command in the REPL (see the [Interface wiki page](https://github.com/flowr-analysis/flowr/wiki//Interface) for more information). -Indicative is the root expression list node, which is present in every normalized AST. +Indicative of the normalization is the root expression list node, which is present in every normalized AST. In general, we provide node types for: 1. literals (e.g., numbers and strings) @@ -278,7 +234,7 @@ click RNamedAccess href "https://github.com/flowr-analysis/flowr/tree/main//src/ class RIndexAccess~Info = NoInfo~ <> RIndexAccess RIndexAccess : operator#58; #34;#91;#34; | #34;#91;#91;#34; - RIndexAccess : access#58; readonly (RArgument#60;Info#62; | #34;#60;#62;#34;)#91;#93; + RIndexAccess : access#58; readonly (#34;#60;#62;#34; | RArgument#60;Info#62;)#91;#93; click RIndexAccess href "https://github.com/flowr-analysis/flowr/tree/main//src/r-bridge/lang-4.x/ast/model/nodes/r-access.ts#L25" "access can be a number, a variable or an expression that resolves to one, a filter etc." RIndexAccess : type#58; RType.Access [from RAccessBase] RIndexAccess : accessed#58; RNode#60;Info#62; [from RAccessBase] @@ -424,7 +380,7 @@ Info .. RNode ``` -_The generation of the class diagram required 813.93 ms._ +_The generation of the class diagram required 622.61 ms._
Node types are controlled by the `RType` enum (see [`./src/r-bridge/lang-4.x/ast/model/type.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/r-bridge/lang-4.x/ast/model/type.ts)), @@ -434,6 +390,9 @@ Most notably, the `info` field holds the `id` of the node, which is used to refe In summary, we have the following types: + +
Normalized AST Node Types + * [RNode](https://github.com/flowr-analysis/flowr/tree/main//src/r-bridge/lang-4.x/ast/model/model.ts#L163) The `RNode` type is the union of all possible nodes in the R-ast. It should be used whenever you either not care what kind of @@ -1147,483 +1106,330 @@ In summary, we have the following types: -With this, the example file produced the following AST (shown from left to right for space reasons): +
+ +The following segments intend to give you an overview of how to work with the normalized AST: +* [How to get a normalized AST](#how-get-a-normalized-ast) +* [Visitors and Folds](#visitors-and-folds) +## How Get a Normalized AST -```mermaid -flowchart LR - n53(["RExpressionList (53) - "]) - n2(["RBinaryOp (2) -#60;#45;"]) - n53 -->|"expr-list-child-0"| n2 - n0(["RSymbol (0) -sum"]) - n2 -->|"binop-lhs"| n0 - n1(["RNumber (1) -0"]) - n2 -->|"binop-rhs"| n1 - n5(["RBinaryOp (5) -#60;#45;"]) - n53 -->|"expr-list-child-1"| n5 - n3(["RSymbol (3) -product"]) - n5 -->|"binop-lhs"| n3 - n4(["RNumber (4) -1"]) - n5 -->|"binop-rhs"| n4 - n8(["RBinaryOp (8) -#60;#45;"]) - n53 -->|"expr-list-child-2"| n8 - n6(["RSymbol (6) -w"]) - n8 -->|"binop-lhs"| n6 - n7(["RNumber (7) -7"]) - n8 -->|"binop-rhs"| n7 - n11(["RBinaryOp (11) -#60;#45;"]) - n53 -->|"expr-list-child-3"| n11 - n9(["RSymbol (9) -N"]) - n11 -->|"binop-lhs"| n9 - n10(["RNumber (10) -10"]) - n11 -->|"binop-rhs"| n10 - n36(["RForLoop (36) -for"]) - n53 -->|"expr-list-child-4"| n36 - n12(["RSymbol (12) -i"]) - n36 -->|"for-variable"| n12 - n20(["RBinaryOp (20) -#58;"]) - n36 -->|"for-vector"| n20 - n13(["RNumber (13) -1"]) - n20 -->|"binop-lhs"| n13 - n19(["RExpressionList (19) - "]) - n20 -->|"binop-rhs"| n19 - n19 -.-|"group-open"| n14 - n19 -.-|"group-close"| n15 - n14(["RSymbol (14) -("]) - n15(["RSymbol (15) -)"]) - n18(["RBinaryOp (18) -#45;"]) - n19 -->|"expr-list-child-0"| n18 - n16(["RSymbol (16) -N"]) - n18 -->|"binop-lhs"| n16 - n17(["RNumber (17) -1"]) - n18 -->|"binop-rhs"| n17 - n35(["RExpressionList (35) - "]) - n36 -->|"for-body"| n35 - n35 -.-|"group-open"| n21 - n35 -.-|"group-close"| n22 - n21(["RSymbol (21) -#123;"]) - n22(["RSymbol (22) -#125;"]) - n29(["RBinaryOp (29) -#60;#45;"]) - n35 -->|"expr-list-child-0"| n29 - n23(["RSymbol (23) -sum"]) - n29 -->|"binop-lhs"| n23 - n28(["RBinaryOp (28) -#43;"]) - n29 -->|"binop-rhs"| n28 - n26(["RBinaryOp (26) -#43;"]) - n28 -->|"binop-lhs"| n26 - n24(["RSymbol (24) -sum"]) - n26 -->|"binop-lhs"| n24 - n25(["RSymbol (25) -i"]) - n26 -->|"binop-rhs"| n25 - n27(["RSymbol (27) -w"]) - n28 -->|"binop-rhs"| n27 - n34(["RBinaryOp (34) -#60;#45;"]) - n35 -->|"expr-list-child-1"| n34 - n30(["RSymbol (30) -product"]) - n34 -->|"binop-lhs"| n30 - n33(["RBinaryOp (33) -#42;"]) - n34 -->|"binop-rhs"| n33 - n31(["RSymbol (31) -product"]) - n33 -->|"binop-lhs"| n31 - n32(["RSymbol (32) -i"]) - n33 -->|"binop-rhs"| n32 - n44(["RFunctionCall (44) -cat"]) - n53 -->|"expr-list-child-5"| n44 - n37(["RSymbol (37) -cat"]) - n44 -->|"call-name"| n37 - n39(["RArgument (39) -#34;Sum#58;#34;"]) - n44 -->|"call-argument-1"| n39 - n38(["RString (38) -#34;Sum#58;#34;"]) - n39 -->|"arg-value"| n38 - n41(["RArgument (41) -sum"]) - n44 -->|"call-argument-2"| n41 - n40(["RSymbol (40) -sum"]) - n41 -->|"arg-value"| n40 - n43(["RArgument (43) -#34; -#34;"]) - n44 -->|"call-argument-3"| n43 - n42(["RString (42) -#34; -#34;"]) - n43 -->|"arg-value"| n42 - n52(["RFunctionCall (52) -cat"]) - n53 -->|"expr-list-child-6"| n52 - n45(["RSymbol (45) -cat"]) - n52 -->|"call-name"| n45 - n47(["RArgument (47) -#34;Product#58;#34;"]) - n52 -->|"call-argument-1"| n47 - n46(["RString (46) -#34;Product#58;#34;"]) - n47 -->|"arg-value"| n46 - n49(["RArgument (49) -product"]) - n52 -->|"call-argument-2"| n49 - n48(["RSymbol (48) -product"]) - n49 -->|"arg-value"| n48 - n51(["RArgument (51) -#34; -#34;"]) - n52 -->|"call-argument-3"| n51 - n50(["RString (50) -#34; -#34;"]) - n51 -->|"arg-value"| n50 - -``` - -
- -R Code of the Normalized AST +As explained alongside the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface#the-pipeline-executor) wiki page, you can use the +`PipelineExecutor` to get the normalized AST. If you are only interested in the normalization, +a pipeline like the `DEFAULT_NORMALIZE_PIPELINE` suffices: -The analysis required _6.09 ms_ (including parsing) within the generation environment. -```r -sum <- 0 -product <- 1 -w <- 7 -N <- 10 - -for (i in 1:(N-1)) { - sum <- sum + i + w - product <- product * i +```ts +async function getAst(code: string): Promise { + const result = await new PipelineExecutor(DEFAULT_NORMALIZE_PIPELINE, { + shell: new RShell(), + request: requestFromInput(code.trim()) + }).allRemainingSteps(); + return result.normalize.ast; } - -cat("Sum:", sum, "\n") -cat("Product:", product, "\n") - -``` - -
- -Mermaid Code - -``` -flowchart LR - n53(["RExpressionList (53) - "]) - n2(["RBinaryOp (2) -#60;#45;"]) - n53 -->|"expr-list-child-0"| n2 - n0(["RSymbol (0) -sum"]) - n2 -->|"binop-lhs"| n0 - n1(["RNumber (1) -0"]) - n2 -->|"binop-rhs"| n1 - n5(["RBinaryOp (5) -#60;#45;"]) - n53 -->|"expr-list-child-1"| n5 - n3(["RSymbol (3) -product"]) - n5 -->|"binop-lhs"| n3 - n4(["RNumber (4) -1"]) - n5 -->|"binop-rhs"| n4 - n8(["RBinaryOp (8) -#60;#45;"]) - n53 -->|"expr-list-child-2"| n8 - n6(["RSymbol (6) -w"]) - n8 -->|"binop-lhs"| n6 - n7(["RNumber (7) -7"]) - n8 -->|"binop-rhs"| n7 - n11(["RBinaryOp (11) -#60;#45;"]) - n53 -->|"expr-list-child-3"| n11 - n9(["RSymbol (9) -N"]) - n11 -->|"binop-lhs"| n9 - n10(["RNumber (10) -10"]) - n11 -->|"binop-rhs"| n10 - n36(["RForLoop (36) -for"]) - n53 -->|"expr-list-child-4"| n36 - n12(["RSymbol (12) -i"]) - n36 -->|"for-variable"| n12 - n20(["RBinaryOp (20) -#58;"]) - n36 -->|"for-vector"| n20 - n13(["RNumber (13) -1"]) - n20 -->|"binop-lhs"| n13 - n19(["RExpressionList (19) - "]) - n20 -->|"binop-rhs"| n19 - n19 -.-|"group-open"| n14 - n19 -.-|"group-close"| n15 - n14(["RSymbol (14) -("]) - n15(["RSymbol (15) -)"]) - n18(["RBinaryOp (18) -#45;"]) - n19 -->|"expr-list-child-0"| n18 - n16(["RSymbol (16) -N"]) - n18 -->|"binop-lhs"| n16 - n17(["RNumber (17) -1"]) - n18 -->|"binop-rhs"| n17 - n35(["RExpressionList (35) - "]) - n36 -->|"for-body"| n35 - n35 -.-|"group-open"| n21 - n35 -.-|"group-close"| n22 - n21(["RSymbol (21) -#123;"]) - n22(["RSymbol (22) -#125;"]) - n29(["RBinaryOp (29) -#60;#45;"]) - n35 -->|"expr-list-child-0"| n29 - n23(["RSymbol (23) -sum"]) - n29 -->|"binop-lhs"| n23 - n28(["RBinaryOp (28) -#43;"]) - n29 -->|"binop-rhs"| n28 - n26(["RBinaryOp (26) -#43;"]) - n28 -->|"binop-lhs"| n26 - n24(["RSymbol (24) -sum"]) - n26 -->|"binop-lhs"| n24 - n25(["RSymbol (25) -i"]) - n26 -->|"binop-rhs"| n25 - n27(["RSymbol (27) -w"]) - n28 -->|"binop-rhs"| n27 - n34(["RBinaryOp (34) -#60;#45;"]) - n35 -->|"expr-list-child-1"| n34 - n30(["RSymbol (30) -product"]) - n34 -->|"binop-lhs"| n30 - n33(["RBinaryOp (33) -#42;"]) - n34 -->|"binop-rhs"| n33 - n31(["RSymbol (31) -product"]) - n33 -->|"binop-lhs"| n31 - n32(["RSymbol (32) -i"]) - n33 -->|"binop-rhs"| n32 - n44(["RFunctionCall (44) -cat"]) - n53 -->|"expr-list-child-5"| n44 - n37(["RSymbol (37) -cat"]) - n44 -->|"call-name"| n37 - n39(["RArgument (39) -#34;Sum#58;#34;"]) - n44 -->|"call-argument-1"| n39 - n38(["RString (38) -#34;Sum#58;#34;"]) - n39 -->|"arg-value"| n38 - n41(["RArgument (41) -sum"]) - n44 -->|"call-argument-2"| n41 - n40(["RSymbol (40) -sum"]) - n41 -->|"arg-value"| n40 - n43(["RArgument (43) -#34; -#34;"]) - n44 -->|"call-argument-3"| n43 - n42(["RString (42) -#34; -#34;"]) - n43 -->|"arg-value"| n42 - n52(["RFunctionCall (52) -cat"]) - n53 -->|"expr-list-child-6"| n52 - n45(["RSymbol (45) -cat"]) - n52 -->|"call-name"| n45 - n47(["RArgument (47) -#34;Product#58;#34;"]) - n52 -->|"call-argument-1"| n47 - n46(["RString (46) -#34;Product#58;#34;"]) - n47 -->|"arg-value"| n46 - n49(["RArgument (49) -product"]) - n52 -->|"call-argument-2"| n49 - n48(["RSymbol (48) -product"]) - n49 -->|"arg-value"| n48 - n51(["RArgument (51) -#34; -#34;"]) - n52 -->|"call-argument-3"| n51 - n50(["RString (50) -#34; -#34;"]) - n51 -->|"arg-value"| n50 - ``` -
- -
+From the REPL, you can use the `:normalize` command. +## Traversing the Normalized AST +We provide two ways to traverse the normalized AST: [Visitors](#visitors) and [Folds](#folds). -# Working with the Normalized AST -## Visiting -This chapter will outline how to use the `NormalizedAstVisitor` to go over the AST. +### Visitors -### **Step 1**: Get the ast +If you want a simple visitor which traverses the AST, the `visitAst` function from +[`./src/r-bridge/lang-4.x/ast/model/processing/visitor.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/r-bridge/lang-4.x/ast/model/processing/visitor.ts) is a good starting point. +You may specify functions to be called whenever you enter and exit a node during the traversal, and any +computation is to be done by side effects. +For example, if you want to collect all the `id`s present within a normalized (sub-)ast, +as it is done by the collectAllIds function, you can use the following visitor: -We can get the AST by running a parse & normalize _flowr_ pipeline: ```ts -async function getAst(code: string) { - const result = await new PipelineExecutor(DEFAULT_NORMALIZE_PIPELINE, { - shell: new RShell(), - request: requestFromInput(code.trim()) - }).allRemainingSteps(); - return result.normalize.ast; -} +const ids = new Set(); +visitAst(nodes, node => { + ids.add(node.info.id); +}); +return ids; ``` + +### Folds -### **Step 2**: Implement the `Visitor` Interface -To use the NormalizedAstVisitor we have to implement the Visitor interface: - * **[Visitor](https://github.com/flowr-analysis/flowr/tree/main//src/abstract-interpretation/normalized-ast-visitor.ts#L27)** - -
Defined at ./src/abstract-interpretation/normalized-ast-visitor.ts#L27 +We formulate a fold with the base class `DefaultNormalizedAstFold` in [`./src/abstract-interpretation/normalized-ast-fold.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/abstract-interpretation/normalized-ast-fold.ts). +Using this class, you can create your own fold behavior by overwriting the default methods. +By default, the class provides a monoid abstraction using the _empty_ from the constructor and the _concat_ method. + + + * [DefaultNormalizedAstFold](https://github.com/flowr-analysis/flowr/tree/main//src/abstract-interpretation/normalized-ast-fold.ts#L82) + Default implementation of a fold over the normalized AST (using the classic fold traversal). + To modify the behavior, please extend this class and overwrite the methods of interest. + You can control the value passing (`Returns` generic) + by providing sensible Monoid behavior overwriting the + concat + method + and supplying the empty value in the constructor. +
Defined at ./src/abstract-interpretation/normalized-ast-fold.ts#L82 ```ts - export interface Visitor { - visitNumber?(num: RNumber): void; - visitString?(str: RString): void; - visitLogical?(logical: RLogical): void; - visitSymbol?(symbol: RSymbol): void; - visitAccess?(node: RAccess): void; - visitBinaryOp?(op: RBinaryOp): void; - visitPipe?(op: RPipe): void; - visitUnaryOp?(op: RUnaryOp): void; - visitFor?(loop: RForLoop): void; - visitWhile?(loop: RWhileLoop): void; - visitRepeat?(loop: RRepeatLoop): void; - visitNext?(next: RNext): void; - visitBreak?(next: RBreak): void; - visitComment?(comment: RComment): void; - visitLineDirective?(comment: RLineDirective): void; - visitIfThenElse?(ifThenExpr: RIfThenElse): void; - visitExprList?(exprList: RExpressionList): void; - visitFunctionDefinition?(definition: RFunctionDefinition): void; - visitFunctionCall?(call: RFunctionCall): void; - visitArgument?(argument: RArgument): void; - visitParameter?(parameter: RParameter): void; + /** + * Default implementation of a fold over the normalized AST (using the classic fold traversal). + * To modify the behavior, please extend this class and overwrite the methods of interest. + * You can control the value passing (`Returns` generic) + * by providing sensible Monoid behavior overwriting the {@link DefaultNormalizedAstFold#concat|concat} method + * and supplying the empty value in the constructor. + * + * @note By providing `entry` and `exit` you can use this as an extension to the simpler {@link visitAst} function but without + * the early termination within the visitors (for this, you can overwrite the respective `fold*` methods). + * + * @example First you want to create your own fold: + * + * ```ts + * let marker = false; + * class MyNumberFold extends DefaultNormalizedAstFold { + * override foldRNumber(node: RNumber) { + * super.foldRNumber(node); + * marker = true; + * } + * } + * ``` + * This one does explicitly not use the return functionality (and hence acts more as a conventional visitor). + * Now let us suppose we have a normalized AST as an {@link RNode} in the variable `ast` + * and want to check if the AST contains a number: + * + * ```ts + * const result = new MyNumberFold().fold(ast); + * ``` + * + * Please take a look at the corresponding tests or the wiki pages for more information on how to use this fold. + */ + export class DefaultNormalizedAstFold implements NormalizedAstFold { + protected readonly enter: EntryExitVisitor; + protected readonly exit: EntryExitVisitor; + protected readonly empty: Returns; + + /** + * Empty must provide a sensible default whenever you want to have `Returns` as non-`void` + * (e.g., whenever you want your visitors to be able to return a value). + */ + constructor(empty: Returns, enter?: EntryExitVisitor, exit?: EntryExitVisitor) { + this.empty = empty; + this.enter = enter; + this.exit = exit; + } + + /** + * Monoid::concat + * + * + * @see {@link https://en.wikipedia.org/wiki/Monoid} + * @see {@link DefaultNormalizedAstFold#concatAll|concatAll} + */ + protected concat(_a: Returns, _b: Returns): Returns { + return this.empty; + } + + /** + * overwrite this method, if you have a faster way to concat multiple nodes + * + * @see {@link DefaultNormalizedAstFold#concatAll|concatAll} + */ + protected concatAll(nodes: readonly Returns[]): Returns { + return nodes.reduce((acc, n) => this.concat(acc, n), this.empty); + } + + public fold(nodes: SingleOrArrayOrNothing | typeof EmptyArgument>): Returns { + if(Array.isArray(nodes)) { + const n = nodes as readonly (RNode | null | undefined | typeof EmptyArgument)[]; + return this.concatAll(n.filter(n => n && n !== EmptyArgument).map(node => this.foldSingle(node as RNode))); + } else if(nodes) { + return this.foldSingle(nodes as RNode); + } + return this.empty; + } + + protected foldSingle(node: RNode): Returns { + this.enter?.(node); + const type = node.type; + // @ts-expect-error -- ts may be unable to infer that the type is correct + const result = this.folds[type]?.(node); + this.exit?.(node); + return result; + } + + foldRAccess(access: RAccess) { + let accessed = this.foldSingle(access.accessed); + if(access.operator === '[' || access.operator === '[[') { + accessed = this.concat(accessed, this.fold(access.access)); + } + return accessed; + } + foldRArgument(argument: RArgument) { + return this.concat(this.fold(argument.name), this.fold(argument.value)); + } + foldRBinaryOp(binaryOp: RBinaryOp) { + return this.concat(this.foldSingle(binaryOp.lhs), this.foldSingle(binaryOp.rhs)); + } + foldRExpressionList(exprList: RExpressionList) { + return this.concat(this.fold(exprList.grouping), this.fold(exprList.children)); + } + foldRForLoop(loop: RForLoop) { + return this.concatAll([this.foldSingle(loop.variable), this.foldSingle(loop.vector), this.foldSingle(loop.body)]); + } + foldRFunctionCall(call: RFunctionCall) { + return this.concat(this.foldSingle(call.named ? call.functionName : call.calledFunction), this.fold(call.arguments)); + } + foldRFunctionDefinition(definition: RFunctionDefinition) { + return this.concat(this.fold(definition.parameters), this.foldSingle(definition.body)); + } + foldRIfThenElse(ite: RIfThenElse) { + return this.concatAll([this.foldSingle(ite.condition), this.foldSingle(ite.then), this.fold(ite.otherwise)]); + } + foldRParameter(parameter: RParameter) { + return this.concat(this.foldSingle(parameter.name), this.fold(parameter.defaultValue)); + } + foldRPipe(pipe: RPipe) { + return this.concat(this.foldSingle(pipe.lhs), this.foldSingle(pipe.rhs)); + } + foldRRepeatLoop(loop: RRepeatLoop) { + return this.foldSingle(loop.body); + } + foldRUnaryOp(unaryOp: RUnaryOp) { + return this.foldSingle(unaryOp.operand); + } + foldRWhileLoop(loop: RWhileLoop) { + return this.concat(this.foldSingle(loop.condition), this.foldSingle(loop.body)); + } + foldRBreak(_node: RBreak) { + return this.empty; + } + foldRComment(_node: RComment) { + return this.empty; + } + foldRLineDirective(_node: RLineDirective) { + return this.empty; + } + foldRLogical(_node: RLogical) { + return this.empty; + } + foldRNext(_node: RNext) { + return this.empty; + } + foldRNumber(_node: RNumber) { + return this.empty; + } + foldRString(_node: RString) { + return this.empty; + } + foldRSymbol(_node: RSymbol) { + return this.empty; + } + + protected readonly folds: FittingNormalizedAstFold = { + [RType.Access]: n => this.foldRAccess(n), + [RType.Argument]: n => this.foldRArgument(n), + [RType.BinaryOp]: n => this.foldRBinaryOp(n), + [RType.Break]: n => this.foldRBreak(n), + [RType.Comment]: n => this.foldRComment(n), + [RType.ExpressionList]: n => this.foldRExpressionList(n), + [RType.ForLoop]: n => this.foldRForLoop(n), + [RType.FunctionCall]: n => this.foldRFunctionCall(n), + [RType.FunctionDefinition]: n => this.foldRFunctionDefinition(n), + [RType.IfThenElse]: n => this.foldRIfThenElse(n), + [RType.LineDirective]: n => this.foldRLineDirective(n), + [RType.Logical]: n => this.foldRLogical(n), + [RType.Next]: n => this.foldRNext(n), + [RType.Number]: n => this.foldRNumber(n), + [RType.Parameter]: n => this.foldRParameter(n), + [RType.Pipe]: n => this.foldRPipe(n), + [RType.RepeatLoop]: n => this.foldRRepeatLoop(n), + [RType.String]: n => this.foldRString(n), + [RType.Symbol]: n => this.foldRSymbol(n), + [RType.UnaryOp]: n => this.foldRUnaryOp(n), + [RType.WhileLoop]: n => this.foldRWhileLoop(n), + }; } ```
+
View more (NormalizedAstFold) + * [NormalizedAstFold](https://github.com/flowr-analysis/flowr/tree/main//src/abstract-interpretation/normalized-ast-fold.ts#L35) + Describes the fold functions for each node type. +
Defined at ./src/abstract-interpretation/normalized-ast-fold.ts#L35 + + + ```ts + /** + * Describes the fold functions for each node type. + */ + export type NormalizedAstFold = { + [K in FoldableRType as `fold${Capitalize}`]: FoldOfType; + } + ``` + + +
+ -In this example we will implement a Visitor that counts the occurances of _if-statements_. For this we only implement the `visitIfThenElse` function. - -```ts -const ifCountVisitor: Visitor & {count: number } = { - visitIfThenElse() { - this.count++; - }, - count: 0 -} -``` - - -### **Step 3**: Run the NormalizedAstVisitor +
+ -```ts -new NormalizedAstVisitor(ast).accept(ifCount); -``` +Now, of course, we could provide hundreds of examples here, but we use tests to verify that the fold behaves as expected +and happily point to them at [`./test/functionality/r-bridge/normalize-ast-fold.test.ts`](https://github.com/flowr-analysis/flowr/tree/main/./test/functionality/r-bridge/normalize-ast-fold.test.ts). +As a simple showcase, we want to use the fold to evaluate numeric expressions containing numbers, `+`, and `*` operators. -### Complete Code ```ts -async function countIfs(code: string) { - const result = await new PipelineExecutor(DEFAULT_NORMALIZE_PIPELINE, { - shell: new RShell(), - request: requestFromInput(code.trim()) - }).allRemainingSteps(); +class MyMathFold extends DefaultNormalizedAstFold { + constructor() { + /* use `0` as a placeholder empty for the monoid */ + super(0); + } - const ast = result.normalize.ast; + protected override concat(a: number, b: number): number { + /* for this example, we ignore cases that we cannot handle */ + return b; + } - const ifCountVisitor: Visitor & {count: number } = { - visitIfThenElse() { - this.count++; - }, - count: 0 + override foldRNumber(node: RNumber) { + /* return the value of the number */ + return node.content.num; } - new NormalizedAstVisitor(ast).accept(ifCountVisitor); - return ifCount.count; + override foldRBinaryOp(node: RBinaryOp) { + if(node.operator === '+') { + return this.fold(node.lhs) + this.fold(node.rhs); + } else if(node.operator === '*') { + return this.fold(node.lhs) * this.fold(node.rhs); + } else { + /* in case we cannot handle the operator we could throw an error, or just use the default behavior: */ + return super.foldRBinaryOp(node); + } + } } ``` +Now, we can use the `PipelineExecutor` to get the normalized AST and apply the fold: + + +```ts +const shell = new RShell(); +const ast = (await new PipelineExecutor(DEFAULT_NORMALIZE_PIPELINE, { + shell, request: retrieveNormalizedAst(RShell, '1 + 3 * 2') +}).allRemainingSteps()).normalize.ast; + +const result = new MyMathFold().fold(ast); +console.log(result); // 7 +``` +