diff --git a/.githooks/check-package-update.sh b/.githooks/check-package-update.sh index f3b4d03b83..eb8111d942 100644 --- a/.githooks/check-package-update.sh +++ b/.githooks/check-package-update.sh @@ -14,3 +14,4 @@ changed() { echo "$changedFiles" | (grep --quiet "package.json" && changed) +exit 0 diff --git a/.github/workflows/check-broken-links.yaml b/.github/workflows/broken-links-and-wiki.yaml similarity index 73% rename from .github/workflows/check-broken-links.yaml rename to .github/workflows/broken-links-and-wiki.yaml index 8e45f973af..f18571ba0b 100644 --- a/.github/workflows/check-broken-links.yaml +++ b/.github/workflows/broken-links-and-wiki.yaml @@ -1,4 +1,4 @@ -name: Check for Broken Links +name: Check for Broken Links and Publish Wiki 'on': push: @@ -17,10 +17,19 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - name: "Setup the Environment" + uses: Code-Inspect/flowr/.github/actions/setup@main + + - name: "Checkout Repository" + uses: actions/checkout@v4 with: lfs: true + - name: Update the Capabilities Wiki Page + run: | + npm ci + npm run capabilities-markdown --silent > wiki/Capabilities-New.md + - name: Check the README for broken links uses: becheran/mlc@v0.16.3 with: diff --git a/.gitignore b/.gitignore index 16c0ad3e85..08b1b136dc 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,8 @@ statistics-out*/ **/*.R.slice **/*.R.stats **/*.R*.dataflow +tmp*log* +temp*log* benchmark-*.json ### VisualStudioCode template diff --git a/package-lock.json b/package-lock.json index 837d8deaba..88d3306121 100644 --- a/package-lock.json +++ b/package-lock.json @@ -22,7 +22,7 @@ "devDependencies": { "@commitlint/cli": "^18.4.3", "@commitlint/config-angular": "^18.4.3", - "@eagleoutice/eslint-config-flowr": "^1.0.2", + "@eagleoutice/eslint-config-flowr": "^1.0.5", "@j-ulrich/release-it-regex-bumper": "^5.1.0", "@stylistic/eslint-plugin": "^1.6.2", "@stylistic/eslint-plugin-plus": "^1.6.2", @@ -770,9 +770,9 @@ } }, "node_modules/@eagleoutice/eslint-config-flowr": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@eagleoutice/eslint-config-flowr/-/eslint-config-flowr-1.0.4.tgz", - "integrity": "sha512-RIbhOwAjKtr6aTEv9agNF8NXkPziEGelSRq5eut3O6XAc2ZoEpYzQyOuuMcvVavATfaoPidQtaValQ4OU9uGqQ==", + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/@eagleoutice/eslint-config-flowr/-/eslint-config-flowr-1.0.5.tgz", + "integrity": "sha512-zIyQBHyvBL4wJlb1Jh+1lvgsWuNJ7w6nuqSYDSMDpYnDoWfZR5A6szwzjfpH4Tq60d3RS8YybrxTIwakczcPJw==", "dev": true, "peerDependencies": { "@stylistic/eslint-plugin": "^1.6.2", diff --git a/package.json b/package.json index 24c1cad17a..07e4f9e32f 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "scripts": { "publish-library": "cp .npmignore package.json README.md LICENSE dist/src/ && cd dist/src && npm publish --access public", "release": "release-it --ci", + "capabilities-markdown": "ts-node src/r-bridge/data/print.ts", "build": "tsc --project .", "lint-local": "eslint src/ test/ --rule \"no-warning-comments: off\"", "lint": "npm run license-compat -- --summary && eslint src/ test/", @@ -37,20 +38,23 @@ "mocha": { "require": "ts-node/register", "timeout": 60000, - "spec": "test/**/*.spec.ts" + "spec": "test/**/*.spec.ts", + "source-map": true, + "recursive": true, + "exit": true, + "produce-source-map": true }, "nyc": { "all": true, "per-file": true, "check-coverage": false, - "skip-full": false, + "skip-full": true, "lines": 70, "extension": [ ".ts" ], "include": "src/**/*.ts", "reporter": [ - "html", "text", "lcov", "cobertura" @@ -216,7 +220,7 @@ "devDependencies": { "@commitlint/cli": "^18.4.3", "@commitlint/config-angular": "^18.4.3", - "@eagleoutice/eslint-config-flowr": "^1.0.2", + "@eagleoutice/eslint-config-flowr": "^1.0.5", "@stylistic/eslint-plugin": "^1.6.2", "@stylistic/eslint-plugin-plus": "^1.6.2", "@stylistic/eslint-plugin-ts": "^1.6.2", diff --git a/scripts/normalize.R b/scripts/normalize.R new file mode 100644 index 0000000000..4916f0a15d --- /dev/null +++ b/scripts/normalize.R @@ -0,0 +1,83 @@ +input <- file("../test/testfiles/example-cfg.R") +exprs <- rlang::parse_exprs(input) + +# the conversion code is based on lazyeval::ast_ + +# currently no json at all :D +flowr.expr_to_json <- function(x) { + if (base::is.expression(x) || base::is.list(x)) { + trees <- base::vapply(x, flowr.tree, character(1)) + out <- base::paste0(trees, collapse = "\n\n") + } else { + out <- flowr.tree(x) + } + + cat(out, "\n") +} + +flowr.is_atomic <- function(x) { + typeof(x) %in% c("logical", "integer", "double", "complex", "character", "raw") +} + +flowr.is_name <- function(x) { + typeof(x) == "symbol" +} + +flowr.is_call <- function(x) { + typeof(x) == "language" +} + +flowr.is_pairlist <- function(x) { + typeof(x) == "pairlist" +} + + +flowr.tree <- function(x, level = 1) { + if (flowr.is_atomic(x) && base::length(x) == 1) { + label <- base::paste0(" ", base::deparse(x)[1]) + children <- NULL + } else if (flowr.is_name(x)) { + x <- base::as.character(x) + if (x == "") { + # Special case the missing argument + label <- "`MISSING" + } else { + label <- base::paste0("`", base::as.character(x)) + } + + children <- NULL + } else if (flowr.is_call(x)) { + label <- "()" + children <- base::vapply(base::as.list(x), flowr.tree, character(1), level = level + 1) + } else if (flowr.is_pairlist(x)) { + label <- "[]" + + branches <- paste("\u2517", format(names(x)), "=") + children <- character(length(x)) + for (i in seq_along(x)) { + children[i] <- flowr.tree(x[[i]], level = level + 1) + } + } else { + # Special case for srcrefs, since they're commonly seen + if (inherits(x, "srcref")) { + label <- "" + } else { + label <- paste0("<", typeof(x), ">") + } + children <- NULL + } + + indent <- paste0(str_dup(" ", level - 1), "> ") + + if (is.null(children)) { + paste0(indent, label) + } else { + paste0(indent, label, "\n", paste0(children, collapse = "\n")) + } +} + +str_dup <- function(x, n) { + paste0(rep(x, n), collapse = "") +} + +print(flowr.expr_to_json(exprs)) diff --git a/src/abstract-interpretation/handler/binop/binop.ts b/src/abstract-interpretation/handler/binop/binop.ts index 03db62a968..296765d1fe 100644 --- a/src/abstract-interpretation/handler/binop/binop.ts +++ b/src/abstract-interpretation/handler/binop/binop.ts @@ -1,13 +1,11 @@ import type { Handler } from '../handler' import type { AINode } from '../../processor' import { aiLogger } from '../../processor' -import type { BinaryOperatorFlavor, ParentInformation, RBinaryOp } from '../../../r-bridge' +import type { ParentInformation, RBinaryOp } from '../../../r-bridge' import { guard } from '../../../util/assert' import { operators } from './operators' -export type BinOpOperators = { - [key in BinaryOperatorFlavor]: (lhs: AINode, rhs: AINode, node: RBinaryOp) => AINode -} +export type BinaryOpProcessor = (lhs: AINode, rhs: AINode, node: RBinaryOp) => AINode export class BinOp implements Handler { lhs: AINode | undefined @@ -16,7 +14,7 @@ export class BinOp implements Handler { constructor(readonly node: RBinaryOp) {} getName(): string { - return `Bin Op (${this.node.flavor})` + return `Bin Op (${this.node.operator})` } enter(): void { @@ -27,7 +25,9 @@ export class BinOp implements Handler { aiLogger.trace(`Exited ${this.getName()}`) guard(this.lhs !== undefined, `No LHS found for assignment ${this.node.info.id}`) guard(this.rhs !== undefined, `No RHS found for assignment ${this.node.info.id}`) - return operators[this.node.flavor](this.lhs, this.rhs, this.node) + const processor: BinaryOpProcessor | undefined = operators[this.node.operator] + guard(processor !== undefined, `No processor found for binary operator ${this.node.operator}`) + return processor(this.lhs, this.rhs, this.node) } next(node: AINode): void { @@ -40,4 +40,4 @@ export class BinOp implements Handler { guard(false, `BinOp ${this.node.info.id} already has both LHS and RHS`) } } -} \ No newline at end of file +} diff --git a/src/abstract-interpretation/handler/binop/operators.ts b/src/abstract-interpretation/handler/binop/operators.ts index 366f974c05..42c35e284b 100644 --- a/src/abstract-interpretation/handler/binop/operators.ts +++ b/src/abstract-interpretation/handler/binop/operators.ts @@ -1,40 +1,26 @@ -import { guard } from '../../../util/assert' -import type { BinOpOperators } from './binop' +import type { BinaryOpProcessor } from './binop' import { addDomains, subtractDomains } from '../../domain' -export const operators: BinOpOperators = { - 'assignment': (lhs, rhs, node) => { +export const operators: Record = { + '<-': (lhs, rhs, node) => { return { id: lhs.id, domain: rhs.domain, astNode: node.lhs, } }, - 'arithmetic': (lhs, rhs, node) => { - switch(node.operator) { - case '+': - return { - id: lhs.id, - domain: addDomains(lhs.domain, rhs.domain), - astNode: node, - } - case '-': - return { - id: lhs.id, - domain: subtractDomains(lhs.domain, rhs.domain), - astNode: node, - } - default: - guard(false, `Unknown binary operator ${node.operator}`) + '+': (lhs, rhs, node) => { + return { + id: lhs.id, + domain: addDomains(lhs.domain, rhs.domain), + astNode: node, } }, - 'logical': () => { - guard(false, 'Not implemented yet') - }, - 'model formula': () => { - guard(false, 'Not implemented yet') - }, - 'comparison': () => { - guard(false, 'Not implemented yet') - }, + '-': (lhs, rhs, node) => { + return { + id: lhs.id, + domain: subtractDomains(lhs.domain, rhs.domain), + astNode: node, + } + } } diff --git a/src/abstract-interpretation/processor.ts b/src/abstract-interpretation/processor.ts index d5b015730e..8986bee7ec 100644 --- a/src/abstract-interpretation/processor.ts +++ b/src/abstract-interpretation/processor.ts @@ -1,4 +1,4 @@ -import type { DataflowInformation } from '../dataflow/internal/info' +import type { DataflowInformation } from '../dataflow/info' import type { NodeId, NormalizedAst, ParentInformation, RNodeWithParent } from '../r-bridge' import { RType } from '../r-bridge' import { CfgVertexType, extractCFG } from '../util/cfg/cfg' @@ -23,13 +23,13 @@ class Stack { private backingStore: ElementType[] = [] size(): number { - return this.backingStore.length + return this.backingStore.length } peek(): ElementType | undefined { - return this.backingStore[this.size() - 1] + return this.backingStore[this.size() - 1] } pop(): ElementType | undefined { - return this.backingStore.pop() + return this.backingStore.pop() } push(item: ElementType): ElementType { this.backingStore.push(item) diff --git a/src/config.ts b/src/config.ts index 27dde3b307..4bc981f3cb 100644 --- a/src/config.ts +++ b/src/config.ts @@ -2,7 +2,7 @@ import type { MergeableRecord } from './util/objects' import { deepMergeObject } from './util/objects' import path from 'path' import fs from 'fs' -import { log } from './util/log' +import { log, LogLevel } from './util/log' import { getParentDirectory } from './util/files' import Joi from 'joi' @@ -66,7 +66,9 @@ function parseConfigOptions(workingDirectory: string, configFile: string): Flowr if(!validate.error) { // assign default values to all config options except for the specified ones const ret = deepMergeObject(defaultConfigOptions, parsed) - log.info(`Using config ${JSON.stringify(ret)} from ${configPath}`) + if(log.settings.minLevel <= LogLevel.Info) { + log.info(`Using config ${JSON.stringify(ret)} from ${configPath}`) + } return ret } else { log.error(`Failed to validate config file at ${configPath}: ${validate.error.message}`) @@ -79,6 +81,8 @@ function parseConfigOptions(workingDirectory: string, configFile: string): Flowr searchPath = getParentDirectory(searchPath) } while(fs.existsSync(searchPath)) - log.info(`Using default config ${JSON.stringify(defaultConfigOptions)}`) + if(log.settings.minLevel <= LogLevel.Info) { + log.info(`Using default config ${JSON.stringify(defaultConfigOptions)}`) + } return defaultConfigOptions } diff --git a/src/core/index.ts b/src/core/index.ts deleted file mode 100644 index 84b443c457..0000000000 --- a/src/core/index.ts +++ /dev/null @@ -1,4 +0,0 @@ -export { SteppingSlicer } from './slicer' -export * from './steps' -export * from './input' -export * from './output' diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts new file mode 100644 index 0000000000..1e92d3dbc9 --- /dev/null +++ b/src/core/pipeline-executor.ts @@ -0,0 +1,256 @@ +import { guard } from '../util/assert' +import type { + Pipeline, + PipelineInput, + PipelineOutput, + PipelinePerRequestInput, + PipelineStepNames, + PipelineStepOutputWithName +} from './steps/pipeline' +import type { PipelineStepName } from './steps/step' +import { PipelineStepStage } from './steps/step' + +/** + * The pipeline executor allows to execute arbitrary {@link Pipeline|pipelines} in a step-by-step fashion. + * If you are not yet in the possession of a {@link Pipeline|pipeline}, you can use the {@link createPipeline} function + * to create one for yourself, based on the steps that you want to execute. + * + * Those steps are split into two phases or "stages" (which is the name that we will use in the following), represented + * by the {@link PipelineStepStage} type. These allow us to separate things that have to be done + * once per-file, e.g., actually parsing the AST, from those, that we need to repeat 'once per request' (whatever this + * request may be). In other words, what can be cached between operations and what can not. + * + * Furthermore, this executor follows an iterable fashion to be *as flexible as possible* + * (e.g., to be instrumented with measurements). So, you can use the pipeline executor like this: + * + * ```ts + * const stepper = new PipelineExecutor( ... ) + * while(stepper.hasNextStep()) { + * await stepper.nextStep() + * } + * + * stepper.switchToRequestStage() + * + * while(stepper.hasNextStep()) { + * await stepper.nextStep() + * } + * + * const result = stepper.getResults() + * ``` + * + * Of course, you might think, that this is rather overkill if you simply want to receive the result. + * And this is true. Therefore, if you do not want to perform some kind of magic in-between steps, you can use the + * **{@link allRemainingSteps}** function like this: + * + * ```ts + * const stepper = new PipelineExecutor( ... ) + * const result = await stepper.allRemainingSteps() + * ``` + * + * As the name suggest, you can combine this name with previous calls to {@link nextStep} to only execute the remaining + * steps in case, for whatever reason you only want to instrument some steps. + * + * By default, the {@link PipelineExecutor} does not offer an automatic way to repeat requests (mostly to prevent accidental errors). + * However, you can use the + * **{@link updateRequest}** function to reset the request steps and re-execute them for a new request. This allows something like the following: + * + * ```ts + * const stepper = new PipelineExecutor( ... ) + * const result = await stepper.allRemainingSteps() + * + * stepper.updateRequest( ... ) + * const result2 = await stepper.allRemainingSteps() + * ``` + * + * **Example - Slicing With the Pipeline Executor**: + * + * Suppose, you want to... you know _slice_ a file (which was, at one point the origin of flowR), then you can + * either create a pipeline yourself with the respective steps, or you can use the {@link DEFAULT_SLICING_PIPELINE} (and friends). + * With it, slicing essentially becomes 'easy-as-pie': + * + * ```ts + * const slicer = new PipelineExecutor(DEFAULT_SLICING_PIPELINE, { + * shell: new RShell(), + * // of course, the criterion and request given here are just examples, you can use whatever you want to slice! + * criterion: ['2@b'], + * request: requestFromInput('b <- 3; x <- 5\ncat(b)'), + * }) + * const result = await slicer.allRemainingSteps() + * ``` + * + * But now, we want to slice for `x` in the first line as well! We can do that by adding: + * + * ```ts + * stepper.updateRequest({ criterion: ['1@x'] }) + * const result2 = await stepper.allRemainingSteps() + * ``` + * + * @note Even though using the pipeline executor introduces a small performance overhead, we consider + * it to be the baseline for performance benchmarking. It may very well be possible to squeeze out a little bit more by + * directly constructing the steps in the right order. However, we consider this to be negligible when compared with the time required + * for, for example, the dataflow analysis of larger files. + * + * @see PipelineExecutor#allRemainingSteps + * @see PipelineExecutor#nextStep + */ +export class PipelineExecutor

{ + private readonly pipeline: P + private readonly length: number + + private input: PipelineInput

+ private output: PipelineOutput

= {} as PipelineOutput

+ private currentExecutionStage = PipelineStepStage.OncePerFile + private stepCounter = 0 + + /** + * Construct a new pipeline executor. + * The required additional input is specified by the {@link IPipelineStep#requiredInput|required input configuration} of each step in the `pipeline`. + * + * @param pipeline - The {@link Pipeline} to execute, probably created with {@link createPipeline}. + * @param input - External {@link PipelineInput|configuration and input} required to execute the given pipeline. + */ + constructor(pipeline: P, input: PipelineInput

) { + this.pipeline = pipeline + this.length = pipeline.order.length + this.input = input + } + + /** + * Retrieve the current {@link PipelineStepStage|stage} the pipeline executor is in. + * + * @see currentExecutionStage + * @see switchToRequestStage + * @see PipelineStepStage + */ + public getCurrentStage(): PipelineStepStage { + return this.currentExecutionStage + } + + /** + * Switch to the next {@link PipelineStepStage|stage} of the pipeline executor. + * + * This will fail if either a step change is currently not valid (as not all steps have been executed), + * or if there is no next stage (i.e., the pipeline is already completed or in the last stage). + * + * @see PipelineExecutor + * @see getCurrentStage + */ + public switchToRequestStage(): void { + guard(this.stepCounter === this.pipeline.firstStepPerRequest, 'First need to complete all steps before switching') + guard(this.currentExecutionStage === PipelineStepStage.OncePerFile, 'Cannot switch to next stage, already in per-request stage.') + this.currentExecutionStage = PipelineStepStage.OncePerRequest + } + + + public getResults(intermediate?:false): PipelineOutput

+ public getResults(intermediate: true): Partial> + public getResults(intermediate: boolean): PipelineOutput

| Partial> + /** + * Returns the results of the pipeline. + * + * @param intermediate - Normally you can only receive the results *after* the stepper completed the step of interested. + * However, if you pass `true` to this parameter, you can also receive the results *before* the {@link PipelineExecutor|pipeline executor} + * completed, although the typing system then can not guarantee which of the steps have already happened. + */ + public getResults(intermediate = false): PipelineOutput

| Partial> { + guard(intermediate || this.stepCounter >= this.length, 'Without the intermediate flag, the pipeline must be completed before providing access to the results.') + return this.output + } + + /** + * Returns true only if + * 1) there are more {@link IPipelineStep|steps} to-do for the current {@link PipelineStepStage|stage} and + * 2) we have not yet reached the end of the {@link Pipeline|pipeline}. + */ + public hasNextStep(): boolean { + return (this.stepCounter < this.length && this.currentExecutionStage !== PipelineStepStage.OncePerFile) + || this.stepCounter < this.pipeline.firstStepPerRequest + } + + /** + * Execute the next {@link IPipelineStep|step} and return the name of the {@link IPipelineStep|step} that was executed, + * so you can guard if the {@link IPipelineStep|step} differs from what you are interested in. + * Furthermore, it returns the {@link IPipelineStep|step's} result. + * + * @param expectedStepName - A safeguard if you want to retrieve the result. + * If given, it causes the execution to fail if the next step is not the one you expect. + * + * _Without `expectedStepName`, please refrain from accessing the result, as you have no safeguards if the pipeline changes._ + */ + public async nextStep(expectedStepName?: PassedName): Promise<{ + name: typeof expectedStepName extends undefined ? PipelineStepName : PassedName + result: typeof expectedStepName extends undefined ? unknown : PipelineStepOutputWithName + }> { + const [step, result] = this._doNextStep(expectedStepName) + const awaitedResult = await result + + this.output[step as PipelineStepNames

] = awaitedResult + this.stepCounter++ + + return { name: step as PassedName, result: awaitedResult } + } + + private _doNextStep(expectedStepName: Readonly): [ + step: PipelineStepName, + result: Promise> + ] { + const step = this.pipeline.steps.get(this.pipeline.order[this.stepCounter]) + guard(step !== undefined, () => `Cannot execute next step, step ${this.pipeline.order[this.stepCounter]} does not exist.`) + + if(expectedStepName !== undefined) { + guard(step.name === expectedStepName, () => `Cannot execute next step, expected step ${JSON.stringify(expectedStepName)} but got ${step.name}.`) + } + + return [step.name, step.processor(this.output, this.input) as unknown as PipelineStepOutputWithName] + } + + /** + * This only makes sense if you have already run a request and want to re-use the per-file results for a new one. + * (or if for whatever reason you did not pass information for the pipeline with the constructor). + * + * @param newRequestData - Data for the new request + */ + public updateRequest(newRequestData: PipelinePerRequestInput

): void { + const requestStep = this.pipeline.firstStepPerRequest + guard(this.stepCounter >= requestStep, 'Cannot reset request prior to once-per-request stage') + this.input = { + ...(this.input as object), + ...newRequestData + } as PipelineInput

+ this.stepCounter = requestStep + // clear the results for all steps with an index >= firstStepPerRequest, this is more of a sanity check + for(let i = requestStep; i < this.length; i++) { + this.output[this.pipeline.order[i] as PipelineStepNames

] = undefined as unknown as PipelineStepOutputWithName + } + } + + public async allRemainingSteps(canSwitchStage: false): Promise>> + public async allRemainingSteps(canSwitchStage?: true): Promise> + public async allRemainingSteps(canSwitchStage: boolean): Promise | Partial>> + /** + * Execute all remaining steps and automatically call {@link switchToRequestStage} if necessary. + * @param canSwitchStage - If true, automatically switch to the request stage if necessary + * (i.e., this is what you want if you have never executed {@link nextStep} and you want to execute *all* steps). + * However, passing false allows you to only execute the steps of the 'once-per-file' stage (i.e., the steps that can be cached). + * + * @note There is a small type difference if you pass 'false' and already have manually switched to the 'once-per-request' stage. + * Because now, the results of these steps are no longer part of the result type (although they are still included). + * In such a case, you may be better off with simply passing 'true' as the function will detect that the stage is already switched. + * We could solve this type problem by separating the {@link PipelineExecutor} class into two for each stage, + * but this would break the improved readability and unified handling of the executor that I wanted to achieve with this class. + */ + public async allRemainingSteps(canSwitchStage = true): Promise | Partial>> { + while(this.hasNextStep()) { + await this.nextStep() + } + + if(canSwitchStage && this.stepCounter < this.length && this.currentExecutionStage === PipelineStepStage.OncePerFile) { + this.switchToRequestStage() + while(this.hasNextStep()) { + await this.nextStep() + } + } + + return this.stepCounter < this.length ? this.getResults(true) : this.getResults() + } +} diff --git a/src/core/print/dataflow-printer.ts b/src/core/print/dataflow-printer.ts index a87c33bacf..931642a5c0 100644 --- a/src/core/print/dataflow-printer.ts +++ b/src/core/print/dataflow-printer.ts @@ -1,5 +1,5 @@ import { jsonReplacer } from '../../util/json' -import type { DataflowInformation } from '../../dataflow/internal/info' +import type { DataflowInformation } from '../../dataflow/info' import type { QuadSerializationConfiguration } from '../../util/quads' import { df2quads } from '../../dataflow/graph/quads' import { graphToMermaid, graphToMermaidUrl } from '../../util/mermaid' @@ -50,7 +50,7 @@ export function dataflowGraphToJson(df: DataflowInformation): string { } export function dataflowGraphToMermaid(df: DataflowInformation, idMap: DataflowMap): string { - return graphToMermaid(df.graph, idMap) + return graphToMermaid({ graph: df.graph, dataflowIdMap: idMap }).string } export function dataflowGraphToMermaidUrl(df: DataflowInformation, idMap: DataflowMap): string { diff --git a/src/core/print/parse-printer.ts b/src/core/print/parse-printer.ts index 7805a47217..e0bd19fdbe 100644 --- a/src/core/print/parse-printer.ts +++ b/src/core/print/parse-printer.ts @@ -1,9 +1,9 @@ import type { QuadSerializationConfiguration } from '../../util/quads' import { serialize2quads } from '../../util/quads' import type { XmlBasedJson } from '../../r-bridge' -import { attributesKey, childrenKey, contentKey } from '../../r-bridge' -import { prepareParsedData } from '../../r-bridge/lang-4.x/ast/parser/json/format' import { convertPreparedParsedData } from '../../r-bridge/lang-4.x/ast/parser/json/parser' +import { prepareParsedData } from '../../r-bridge/lang-4.x/ast/parser/json/format' +import { attributesKey, childrenKey, contentKey } from '../../r-bridge/lang-4.x/ast/parser/xml' function filterObject(obj: XmlBasedJson, keys: Set): XmlBasedJson[] | XmlBasedJson { if(typeof obj !== 'object') { diff --git a/src/core/print/print.ts b/src/core/print/print.ts index 6eadbd5c87..8d9049e858 100644 --- a/src/core/print/print.ts +++ b/src/core/print/print.ts @@ -1,4 +1,6 @@ -import type { StepFunction } from '../steps' +import type { IPipelineStep, StepProcessingFunction } from '../steps/step' +import type { TailOfArray } from '../../util/arrays' +import { guard } from '../../util/assert' /** * Defines the output format of a step that you are interested in. @@ -39,7 +41,7 @@ export const enum StepOutputFormat { /** * Helper function to support the {@link Internal} format, as it is simply returning the input. * - * @see IStepPrinter + * @see IPipelineStepPrinter */ export function internalPrinter(input: Input): Input { return input @@ -47,8 +49,28 @@ export function internalPrinter(input: Input): Input { /** * A mapping function that maps the result of a step (i.e., the dataflow graph) - * to another representation (linked by {@link StepOutputFormat} in an {@link IStep}). + * to another representation (linked by {@link StepOutputFormat} in an {@link IPipelineStep}). + * + * For the internal format, refer to {@link InternalStepPrinter} as a shorthand. */ -export type IStepPrinter = +export type IPipelineStepPrinter = Format extends StepOutputFormat.Internal ? (input: Awaited>) => Awaited> : (input: Awaited>, ...additional: AdditionalInput) => Promise | string + +export type InternalStepPrinter = IPipelineStepPrinter + +/** + * For a `step` of the given name, which returned the given `data`. Convert that data into the given `format`. + * Depending on your step and the format this may require `additional` inputs. + */ +export function printStepResult< + Step extends IPipelineStep, + Processor extends Step['processor'], + Format extends Exclude & number, + Printer extends Step['printer'][Format], + AdditionalInput extends TailOfArray>, +>(step: Step, data: Awaited>, format: Format, ...additional: AdditionalInput): Promise { + const printer = step.printer[format] as IPipelineStepPrinter | undefined + guard(printer !== undefined, `printer for ${step.name} does not support ${String(format)}`) + return printer(data, ...additional) as Promise +} diff --git a/src/core/print/slice-diff-ansi.ts b/src/core/print/slice-diff-ansi.ts index c5f88967b4..808fa7f89c 100644 --- a/src/core/print/slice-diff-ansi.ts +++ b/src/core/print/slice-diff-ansi.ts @@ -45,9 +45,9 @@ export function sliceDiffAnsi(slice: Set, normalized: NormalizedAst, cri const lines = originalCode.split('\n') for(const { selected, location } of importantLocations) { - const { start, end } = location - const line = lines[start.line - 1] - lines[start.line - 1] = `${line.substring(0, start.column - 1)}${ansiFormatter.reset()}${highlight(line.substring(start.column - 1, end.column), selected)}${grayOut()}${line.substring(end.column)}` + const [sl, sc, , ec] = location + const line = lines[sl - 1] + lines[sl - 1] = `${line.substring(0, sc - 1)}${ansiFormatter.reset()}${highlight(line.substring(sc - 1, ec), selected)}${grayOut()}${line.substring(ec)}` } return `${grayOut()}${lines.join('\n')}${ansiFormatter.reset()}` diff --git a/src/core/slicer.ts b/src/core/stepping-slicer.ts similarity index 51% rename from src/core/slicer.ts rename to src/core/stepping-slicer.ts index 13649e1f76..1f303d130d 100644 --- a/src/core/slicer.ts +++ b/src/core/stepping-slicer.ts @@ -1,25 +1,32 @@ -import type { - NormalizedAst, IdGenerator, - NoInfo, - RParseRequest, - RShell, - XmlParserHooks -} from '../r-bridge' -import type { LAST_PER_FILE_STEP, - StepRequired, STEPS, - StepName, StepResult -} from './steps' -import { - executeSingleSubStep, LAST_STEP, - STEPS_PER_FILE, - STEPS_PER_SLICE -} from './steps' -import { guard } from '../util/assert' -import type { SliceResult, SlicingCriteria } from '../slicing' -import type { DeepPartial } from 'ts-essentials' -import type { SteppingSlicerInput } from './input' -import type { StepResults } from './output' -import type { DataflowInformation } from '../dataflow/internal/info' +import type { SlicingCriteria } from '../slicing' +import type { Pipeline, PipelineOutput, PipelineStepOutputWithName } from './steps/pipeline' +import { createPipeline } from './steps/pipeline' +import { PARSE_WITH_R_SHELL_STEP } from './steps/all/core/00-parse' +import { NORMALIZE } from './steps/all/core/10-normalize' +import { STATIC_DATAFLOW } from './steps/all/core/20-dataflow' +import { STATIC_SLICE } from './steps/all/static-slicing/00-slice' +import { NAIVE_RECONSTRUCT } from './steps/all/static-slicing/10-reconstruct' +import { PipelineExecutor } from './pipeline-executor' +import type { LAST_PER_FILE_STEP, StepName } from './steps/steps' +import { LAST_STEP } from './steps/steps' +import type { SteppingSlicerInput } from './steps/input' +import type { PipelineStepName, PipelineStepStage } from './steps/step' + +const legacyPipelines = { + // brrh, but who cares, it is legacy! + 'parse': createPipeline(PARSE_WITH_R_SHELL_STEP), + 'normalize': createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE), + 'dataflow': createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW), + 'ai': createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW), + 'slice': createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW, STATIC_SLICE), + 'reconstruct': createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW, STATIC_SLICE, NAIVE_RECONSTRUCT) +} as const + +type LegacyPipelineType = typeof legacyPipelines[InterestedIn] + +function getLegacyPipeline(interestedIn: StepName): Pipeline { + return legacyPipelines[interestedIn] +} /** * This is ultimately the root of flowR's static slicing procedure. @@ -78,46 +85,25 @@ import type { DataflowInformation } from '../dataflow/internal/info' * for, for example, the dataflow analysis. * * @see retrieveResultOfStep - * @see SteppingSlicer#doNextStep * @see StepName */ -export class SteppingSlicer { - public static readonly maximumNumberOfStepsPerFile = Object.keys(STEPS_PER_FILE).length - public static readonly maximumNumberOfStepsPerSlice = SteppingSlicer.maximumNumberOfStepsPerFile + Object.keys(STEPS_PER_SLICE).length - - private readonly shell: RShell - private readonly stepOfInterest: InterestedIn - private readonly request: RParseRequest - private readonly hooks?: DeepPartial - private readonly getId?: IdGenerator - - private criterion?: SlicingCriteria - - private results = {} as Record - - private stage: StepRequired = 'once-per-file' - private stepCounter = 0 - private reachedWanted = false +export class SteppingSlicer { + private executor: PipelineExecutor> /** * Create a new stepping slicer. For more details on the arguments please see {@link SteppingSlicerInput}. */ constructor(input: SteppingSlicerInput) { - this.shell = input.shell - this.request = input.request - this.hooks = input.hooks - this.getId = input.getId - this.stepOfInterest = (input.stepOfInterest ?? LAST_STEP) as InterestedIn - this.criterion = input.criterion + this.executor = new PipelineExecutor(getLegacyPipeline(input.stepOfInterest ?? LAST_STEP), input) as PipelineExecutor> } /** * Retrieve the current stage the stepping slicer is in. - * @see StepRequired + * @see PipelineStepStage * @see switchToSliceStage */ - public getCurrentStage(): StepRequired { - return this.stage + public getCurrentStage(): PipelineStepStage { + return this.executor.getCurrentStage() } /** @@ -126,14 +112,12 @@ export class SteppingSlicer - public getResults(intermediate: true): Partial> + public getResults(intermediate?:false): PipelineOutput> + public getResults(intermediate: true): Partial>> /** * Returns the result of the step of interest, as well as the results of all steps before it. * @@ -141,19 +125,15 @@ export class SteppingSlicer | Partial> { - guard(intermediate || this.reachedWanted, 'Before reading the results, we need to reach the step we are interested in') - return this.results as StepResults + public getResults(intermediate = false): PipelineOutput> | Partial>> { + return this.executor.getResults(intermediate) } /** * Returns true only if 1) there are more steps to-do for the current stage and 2) we have not yet reached the step we are interested in */ public hasNextStep(): boolean { - return !this.reachedWanted && (this.stage === 'once-per-file' ? - this.stepCounter < SteppingSlicer.maximumNumberOfStepsPerFile - : this.stepCounter < SteppingSlicer.maximumNumberOfStepsPerSlice - ) + return this.executor.hasNextStep() } /** @@ -164,65 +144,11 @@ export class SteppingSlicer(expectedStepName?: PassedName): Promise<{ - name: typeof expectedStepName extends undefined ? StepName : PassedName - result: typeof expectedStepName extends undefined ? unknown : StepResult> + public async nextStep(expectedStepName?: PassedName): Promise<{ + name: typeof expectedStepName extends undefined ? PipelineStepName : PassedName + result: typeof expectedStepName extends undefined ? unknown : PipelineStepOutputWithName, Exclude> }> { - guard(this.hasNextStep(), 'No more steps to do') - - const guardStep = this.getGuardStep(expectedStepName) - - const { step, result } = await this.doNextStep(guardStep) - - this.results[step] = result - this.stepCounter += 1 - if(this.stepOfInterest === step) { - this.reachedWanted = true - } - - return { name: step as PassedName, result: result as StepResult } - } - - private getGuardStep(expectedStepName: StepName | undefined) { - return expectedStepName === undefined ? - (name: K): K => name - : - (name: K): K => { - guard(expectedStepName === name, `Expected step ${expectedStepName} but got ${name}`) - return name - } - } - - private async doNextStep(guardStep: (name: K) => K) { - let step: StepName - let result: unknown - - switch(this.stepCounter) { - case 0: - step = guardStep('parse') - result = await executeSingleSubStep(step, this.request, this.shell) - break - case 1: - step = guardStep('normalize') - result = executeSingleSubStep(step, this.results.parse as string, this.hooks, this.getId) - break - case 2: - step = guardStep('dataflow') - result = executeSingleSubStep(step, this.request, this.results.normalize as NormalizedAst) - break - case 3: - guard(this.criterion !== undefined, 'Cannot decode criteria without a criterion') - step = guardStep('slice') - result = executeSingleSubStep(step, (this.results.dataflow as DataflowInformation).graph, this.results.normalize as NormalizedAst, this.criterion) - break - case 4: - step = guardStep('reconstruct') - result = executeSingleSubStep(step, this.results.normalize as NormalizedAst, (this.results.slice as SliceResult).result) - break - default: - throw new Error(`Unknown step ${this.stepCounter}, reaching this should not happen!`) - } - return { step, result } + return this.executor.nextStep(expectedStepName) } /** @@ -232,18 +158,12 @@ export class SteppingSlicer= SteppingSlicer.maximumNumberOfStepsPerFile , 'Cannot reset slice prior to once-per-slice stage') - this.criterion = newCriterion - this.stepCounter = SteppingSlicer.maximumNumberOfStepsPerFile - this.results.slice = undefined - this.results.reconstruct = undefined - if(this.stepOfInterest === 'slice' || this.stepOfInterest === 'reconstruct') { - this.reachedWanted = false - } + // @ts-expect-error -- it is legacy + this.executor.updateRequest({ criterion: newCriterion }) } - public async allRemainingSteps(canSwitchStage: false): Promise>> - public async allRemainingSteps(canSwitchStage?: true): Promise> + public async allRemainingSteps(canSwitchStage: false): Promise>>> + public async allRemainingSteps(canSwitchStage?: true): Promise>> /** * Execute all remaining steps and automatically call {@link switchToSliceStage} if necessary. * @param canSwitchStage - if true, automatically switch to the slice stage if necessary @@ -256,16 +176,7 @@ export class SteppingSlicer | Partial>> { - while(this.hasNextStep()) { - await this.nextStep() - } - if(canSwitchStage && !this.reachedWanted && this.stage === 'once-per-file') { - this.switchToSliceStage() - while(this.hasNextStep()) { - await this.nextStep() - } - } - return this.reachedWanted ? this.getResults() : this.getResults(true) + public async allRemainingSteps(canSwitchStage = true): Promise> | Partial>>> { + return this.executor.allRemainingSteps(canSwitchStage) } } diff --git a/src/core/steps.ts b/src/core/steps.ts index c188ec2a4c..9092384379 100644 --- a/src/core/steps.ts +++ b/src/core/steps.ts @@ -14,10 +14,7 @@ */ import type { MergeableRecord } from '../util/objects' -import { retrieveParseDataFromRCode } from '../r-bridge' -import { produceDataFlowGraph } from '../dataflow' -import { reconstructToCode, staticSlicing } from '../slicing' -import type { IStepPrinter } from './print/print' +import type { IPipelineStepPrinter } from './print/print' import { internalPrinter, StepOutputFormat } from './print/print' import { normalizedAstToJson, @@ -33,13 +30,13 @@ import { dataflowGraphToMermaidUrl, dataflowGraphToQuads } from './print/dataflow-printer' -import { normalize } from '../r-bridge/lang-4.x/ast/parser/json/parser' +import type { StepProcessingFunction } from './steps/step' +import { PARSE_WITH_R_SHELL_STEP } from './steps/all/core/00-parse' +import { NORMALIZE } from './steps/all/core/10-normalize' +import { STATIC_DATAFLOW } from './steps/all/core/20-dataflow' +import { STATIC_SLICE } from './steps/all/static-slicing/00-slice' +import { NAIVE_RECONSTRUCT } from './steps/all/static-slicing/10-reconstruct' -/** - * This represents close a function that we know completely nothing about. - * Nevertheless, this is the basis of what a step processor should look like. - */ -export type StepFunction = (...args: never[]) => unknown /** * This represents the required execution frequency of a step. */ @@ -50,7 +47,7 @@ export type StepRequired = 'once-per-file' | 'once-per-slice' * Defines what is to be known of a single step in the slicing process. */ export interface IStep< - Fn extends StepFunction, + Fn extends StepProcessingFunction, > extends MergeableRecord { /** Human-readable description of this step */ description: string @@ -59,10 +56,10 @@ export interface IStep< /* does this step has to be repeated for each new slice or can it be performed only once in the initialization */ required: StepRequired printer: { - [K in StepOutputFormat]?: IStepPrinter + [K in StepOutputFormat]?: IPipelineStepPrinter } & { // we always want to have the internal printer - [StepOutputFormat.Internal]: IStepPrinter + [StepOutputFormat.Internal]: IPipelineStepPrinter } } @@ -70,17 +67,17 @@ export interface IStep< export const STEPS_PER_FILE = { 'parse': { description: 'Parse the given R code into an AST', - processor: (r, s) => retrieveParseDataFromRCode(r, s), + processor: PARSE_WITH_R_SHELL_STEP.processor, required: 'once-per-file', printer: { [StepOutputFormat.Internal]: internalPrinter, [StepOutputFormat.Json]: text => text, [StepOutputFormat.RdfQuads]: parseToQuads } - } satisfies IStep, + } satisfies IStep, 'normalize': { description: 'Normalize the AST to flowR\'s AST (first step of the normalization)', - processor: (j, h, g) => normalize(j, h, g), + processor: NORMALIZE.processor, required: 'once-per-file', printer: { [StepOutputFormat.Internal]: internalPrinter, @@ -89,10 +86,10 @@ export const STEPS_PER_FILE = { [StepOutputFormat.Mermaid]: printNormalizedAstToMermaid, [StepOutputFormat.MermaidUrl]: printNormalizedAstToMermaidUrl } - } satisfies IStep, + } satisfies IStep, 'dataflow': { description: 'Construct the dataflow graph', - processor: (r, a) => produceDataFlowGraph(r, a), + processor: STATIC_DATAFLOW.processor, required: 'once-per-file', printer: { [StepOutputFormat.Internal]: internalPrinter, @@ -101,26 +98,26 @@ export const STEPS_PER_FILE = { [StepOutputFormat.Mermaid]: dataflowGraphToMermaid, [StepOutputFormat.MermaidUrl]: dataflowGraphToMermaidUrl } - } satisfies IStep + } satisfies IStep } as const export const STEPS_PER_SLICE = { 'slice': { description: 'Calculate the actual static slice from the dataflow graph and the given slicing criteria', - processor: (d, a, c) => staticSlicing(d, a, c), + processor: STATIC_SLICE.processor, required: 'once-per-slice', printer: { [StepOutputFormat.Internal]: internalPrinter } - } satisfies IStep, + } satisfies IStep, 'reconstruct': { description: 'Reconstruct R code from the static slice', - processor: (a, s) => reconstructToCode(a, s), + processor: NAIVE_RECONSTRUCT.processor, required: 'once-per-slice', printer: { [StepOutputFormat.Internal]: internalPrinter } - } satisfies IStep + } satisfies IStep } as const export const STEPS = { ...STEPS_PER_FILE, ...STEPS_PER_SLICE } as const @@ -151,7 +148,7 @@ export function printStepResult< AdditionalInput extends Tail>, >(step: Name, data: Awaited>, format: Format, ...additional: AdditionalInput): Promise { const base = STEPS[step].printer - const printer = base[format as keyof typeof base] as IStepPrinter, Format, AdditionalInput> | undefined + const printer = base[format as keyof typeof base] as IPipelineStepPrinter, Format, AdditionalInput> | undefined guard(printer !== undefined, `printer for ${step} does not support ${String(format)}`) return printer(data, ...additional) as Promise } diff --git a/src/core/steps/all/core/00-parse.ts b/src/core/steps/all/core/00-parse.ts new file mode 100644 index 0000000000..3d953ff2ac --- /dev/null +++ b/src/core/steps/all/core/00-parse.ts @@ -0,0 +1,34 @@ +import { internalPrinter, StepOutputFormat } from '../../../print/print' +import { parseToQuads } from '../../../print/parse-printer' +import type { IPipelineStep } from '../../step' +import { PipelineStepStage } from '../../step' +import type { RParseRequest, RShell } from '../../../../r-bridge' +import type { DeepReadonly } from 'ts-essentials' +import type { RShellExecutor } from '../../../../r-bridge/shell-executor' +import { retrieveParseDataFromRCode } from '../../../../r-bridge' + +export interface ParseRequiredInput { + /** This is the {@link RShell} or {@link RShellExecutor} connection to be used to obtain the original parses AST of the R code */ + readonly shell: RShell | RShellExecutor + /** The request which essentially indicates the input to extract the AST from */ + readonly request: RParseRequest +} + +function processor(_results: unknown, input: Partial) { + return retrieveParseDataFromRCode(input.request as RParseRequest, input.shell as RShell) +} + +export const PARSE_WITH_R_SHELL_STEP = { + name: 'parse', + humanReadableName: 'parse with R shell', + description: 'Parse the given R code into an AST', + processor, + executed: PipelineStepStage.OncePerFile, + printer: { + [StepOutputFormat.Internal]: internalPrinter, + [StepOutputFormat.Json]: text => text, + [StepOutputFormat.RdfQuads]: parseToQuads + }, + dependencies: [], + requiredInput: undefined as unknown as ParseRequiredInput +} as const satisfies DeepReadonly> diff --git a/src/core/steps/all/core/10-normalize.ts b/src/core/steps/all/core/10-normalize.ts new file mode 100644 index 0000000000..11d3a0ee01 --- /dev/null +++ b/src/core/steps/all/core/10-normalize.ts @@ -0,0 +1,44 @@ +import type { + IdGenerator, + NoInfo +} from '../../../../r-bridge' +import { internalPrinter, StepOutputFormat } from '../../../print/print' +import { + normalizedAstToJson, + normalizedAstToQuads, + printNormalizedAstToMermaid, + printNormalizedAstToMermaidUrl +} from '../../../print/normalize-printer' +import type { IPipelineStep } from '../../step' +import { PipelineStepStage } from '../../step' +import type { DeepReadonly } from 'ts-essentials' +import type { ParseRequiredInput } from './00-parse' +import { normalize } from '../../../../r-bridge/lang-4.x/ast/parser/json/parser' + +export interface NormalizeRequiredInput extends ParseRequiredInput { + /** This id generator is only necessary if you want to retrieve a dataflow from the parsed R AST, it determines the id generator to use and by default uses the {@link deterministicCountingIdGenerator}*/ + readonly getId?: IdGenerator +} + +function processor(results: { parse?: string }, input: Partial) { + return normalize(results.parse as string, input.getId) +} + +export const NORMALIZE = { + name: 'normalize', + humanReadableName: 'normalize', + description: 'Normalize the AST to flowR\'s AST', + processor, + executed: PipelineStepStage.OncePerFile, + printer: { + [StepOutputFormat.Internal]: internalPrinter, + [StepOutputFormat.Json]: normalizedAstToJson, + [StepOutputFormat.RdfQuads]: normalizedAstToQuads, + [StepOutputFormat.Mermaid]: printNormalizedAstToMermaid, + [StepOutputFormat.MermaidUrl]: printNormalizedAstToMermaidUrl + }, + dependencies: [ 'parse' ], + requiredInput: undefined as unknown as NormalizeRequiredInput +} as const satisfies DeepReadonly> + + diff --git a/src/core/steps/all/core/20-dataflow.ts b/src/core/steps/all/core/20-dataflow.ts new file mode 100644 index 0000000000..ca589dab6f --- /dev/null +++ b/src/core/steps/all/core/20-dataflow.ts @@ -0,0 +1,40 @@ +import { internalPrinter, StepOutputFormat } from '../../../print/print' +import type { IPipelineStep } from '../../step' +import { PipelineStepStage } from '../../step' +import { + dataflowGraphToJson, + dataflowGraphToMermaid, + dataflowGraphToMermaidUrl, + dataflowGraphToQuads +} from '../../../print/dataflow-printer' +import type { DeepReadonly } from 'ts-essentials' +import type { NormalizedAst, RParseRequest } from '../../../../r-bridge' +import { produceDataFlowGraph } from '../../../../dataflow' + +const staticDataflowCommon = { + name: 'dataflow', + description: 'Construct the dataflow graph', + executed: PipelineStepStage.OncePerFile, + printer: { + [StepOutputFormat.Internal]: internalPrinter, + [StepOutputFormat.Json]: dataflowGraphToJson, + [StepOutputFormat.RdfQuads]: dataflowGraphToQuads, + [StepOutputFormat.Mermaid]: dataflowGraphToMermaid, + [StepOutputFormat.MermaidUrl]: dataflowGraphToMermaidUrl + }, + dependencies: [ 'normalize' ], +} as const + +function legacyProcessor(results: { normalize?: NormalizedAst }, input: { request?: RParseRequest }) { + return produceDataFlowGraph(input.request as RParseRequest, results.normalize as NormalizedAst) +} + +export const STATIC_DATAFLOW = { + ...staticDataflowCommon, + humanReadableName: 'dataflow', + processor: legacyProcessor, + requiredInput: { + request: undefined as unknown as RParseRequest + } +} as const satisfies DeepReadonly> + diff --git a/src/core/steps/all/core/21-abstract-interpretation.ts b/src/core/steps/all/core/21-abstract-interpretation.ts new file mode 100644 index 0000000000..fc6d91e0d6 --- /dev/null +++ b/src/core/steps/all/core/21-abstract-interpretation.ts @@ -0,0 +1,25 @@ +import { internalPrinter, StepOutputFormat } from '../../../print/print' +import type { DataflowInformation } from '../../../../dataflow/info' +import type { IPipelineStep } from '../../step' +import type { DeepReadonly } from 'ts-essentials' +import { PipelineStepStage } from '../../step' + +// Use runAbstractInterpretation here when it's ready +function processor(results: { dataflow?: DataflowInformation }, _input: unknown): DataflowInformation { + return results.dataflow as DataflowInformation +} + +export const ABSTRACT_INTERPRETATION = { + humanReadableName: 'Abstract Interpretation', + description: 'Run abstract interpretation', + processor: processor, + required: 'once-per-file', + executed: PipelineStepStage.OncePerFile, + dependencies: [ 'dataflow' ], + decorates: 'dataflow', + name: 'ai', + requiredInput: {}, + printer: { + [StepOutputFormat.Internal]: internalPrinter + } +} as const satisfies DeepReadonly> diff --git a/src/core/steps/all/static-slicing/00-slice.ts b/src/core/steps/all/static-slicing/00-slice.ts new file mode 100644 index 0000000000..8b09c6cfcc --- /dev/null +++ b/src/core/steps/all/static-slicing/00-slice.ts @@ -0,0 +1,33 @@ +import { internalPrinter, StepOutputFormat } from '../../../print/print' +import type { IPipelineStep } from '../../step' +import { PipelineStepStage } from '../../step' +import type { SlicingCriteria } from '../../../../slicing' +import { staticSlicing } from '../../../../slicing' +import type { DeepReadonly } from 'ts-essentials' +import type { NormalizeRequiredInput } from '../core/10-normalize' +import type { DataflowInformation } from '../../../../dataflow/info' +import type { NormalizedAst } from '../../../../r-bridge' + +export interface SliceRequiredInput extends NormalizeRequiredInput { + /** The slicing criterion is only of interest if you actually want to slice the R code */ + readonly criterion: SlicingCriteria, + /** How many re-visits of the same node are ok? */ + readonly threshold?: number +} + +function processor(results: { dataflow?: DataflowInformation, normalize?: NormalizedAst }, input: Partial) { + return staticSlicing((results.dataflow as DataflowInformation).graph, results.normalize as NormalizedAst, input.criterion as SlicingCriteria, input.threshold) +} + +export const STATIC_SLICE = { + name: 'slice', + humanReadableName: 'static slice', + description: 'Calculate the actual static slice from the dataflow graph and the given slicing criteria', + processor, + executed: PipelineStepStage.OncePerRequest, + printer: { + [StepOutputFormat.Internal]: internalPrinter + }, + dependencies: [ 'dataflow' ], + requiredInput: undefined as unknown as SliceRequiredInput +} as const satisfies DeepReadonly> diff --git a/src/core/steps/all/static-slicing/10-reconstruct.ts b/src/core/steps/all/static-slicing/10-reconstruct.ts new file mode 100644 index 0000000000..56f8cf76c9 --- /dev/null +++ b/src/core/steps/all/static-slicing/10-reconstruct.ts @@ -0,0 +1,30 @@ +import { internalPrinter, StepOutputFormat } from '../../../print/print' +import type { IPipelineStep } from '../../step' +import { PipelineStepStage } from '../../step' +import type { AutoSelectPredicate } from '../../../../slicing' +import { reconstructToCode } from '../../../../slicing' +import type { DeepReadonly } from 'ts-essentials' +import type { NormalizedAst } from '../../../../r-bridge' +import type { SliceRequiredInput } from './00-slice' +import type { SliceResult } from '../../../../slicing/static/slicer-types' + +export interface ReconstructRequiredInput extends SliceRequiredInput { + autoSelectIf?: AutoSelectPredicate +} + +function processor(results: { normalize?: NormalizedAst, slice?: SliceResult }, input: Partial) { + return reconstructToCode(results.normalize as NormalizedAst, (results.slice as SliceResult).result, input.autoSelectIf) +} + +export const NAIVE_RECONSTRUCT = { + name: 'reconstruct', + humanReadableName: 'static code reconstruction', + description: 'Reconstruct R code from the static slice', + processor, + executed: PipelineStepStage.OncePerRequest, + printer: { + [StepOutputFormat.Internal]: internalPrinter + }, + dependencies: [ 'slice' ], + requiredInput: undefined as unknown as ReconstructRequiredInput +} as const satisfies DeepReadonly> diff --git a/src/core/input.ts b/src/core/steps/input.ts similarity index 84% rename from src/core/input.ts rename to src/core/steps/input.ts index 27d08060f4..c105cef504 100644 --- a/src/core/input.ts +++ b/src/core/steps/input.ts @@ -1,7 +1,6 @@ -import type { MergeableRecord } from '../util/objects' -import type { IdGenerator, NoInfo, RParseRequest, RShell, XmlParserHooks } from '../r-bridge' -import type { DeepPartial } from 'ts-essentials' -import type { AutoSelectPredicate, SlicingCriteria } from '../slicing' +import type { MergeableRecord } from '../../util/objects' +import type { IdGenerator, NoInfo, RParseRequest, RShell } from '../../r-bridge' +import type { AutoSelectPredicate, SlicingCriteria } from '../../slicing' import type { STEPS_PER_SLICE, StepName, STEPS_PER_FILE } from './steps' /** @@ -19,8 +18,6 @@ interface BaseSteppingSlicerInput ext shell: RShell /** The request which essentially indicates the input to extract the AST from */ request: RParseRequest - /** These hooks only make sense if you at least want to normalize the parsed R AST. They can augment the normalization process */ - hooks?: DeepPartial /** This id generator is only necessary if you want to retrieve a dataflow from the parsed R AST, it determines the id generator to use and by default uses the {@link deterministicCountingIdGenerator}*/ getId?: IdGenerator /** The slicing criterion is only of interest if you actually want to slice the R code */ diff --git a/src/core/output.ts b/src/core/steps/output.ts similarity index 100% rename from src/core/output.ts rename to src/core/steps/output.ts diff --git a/src/core/steps/pipeline/create.ts b/src/core/steps/pipeline/create.ts new file mode 100644 index 0000000000..9df2b42908 --- /dev/null +++ b/src/core/steps/pipeline/create.ts @@ -0,0 +1,146 @@ +import type { IPipelineStep, PipelineStepName } from '../step' +import { PipelineStepStage } from '../step' +import { InvalidPipelineError } from './invalid-pipeline-error' +import type { Pipeline } from './pipeline' +import { jsonReplacer } from '../../../util/json' +import { partitionArray } from '../../../util/arrays' + +/** + * Given a set of {@link IPipelineStep|steps} with their dependencies, this function verifies all requirements of {@link createPipeline}. + */ +export function verifyAndBuildPipeline(steps: readonly IPipelineStep[]): Pipeline { + if(steps.length === 0) { + throw new InvalidPipelineError('0) Pipeline is empty') + } + + const [perFileSteps, perRequestSteps] = partitionArray(steps, s => s.executed === PipelineStepStage.OncePerFile) + + // we construct a map linking each name to its respective step + const perFileStepMap = new Map() + const initsPerFile: PipelineStepName[] = [] + const visited = new Set() + + // we start by working on the per-file steps + initializeSteps(perFileSteps, perFileStepMap, initsPerFile, visited) + // first, we sort the per-file steps + const sortedPerFile = topologicalSort(initsPerFile, perFileStepMap, visited) + validateStepOutput(sortedPerFile, perFileStepMap, steps) + + const perRequestStepMap = new Map(perFileStepMap) + // we track all elements without dependencies, i.e., those that start the pipeline + const initsPerRequest: PipelineStepName[] = [] + + // now, we do the same for the per-request steps, keeping the per-file steps known + initializeSteps(perRequestSteps, perRequestStepMap, initsPerRequest, visited) + + const sortedPerRequest = topologicalSort(initsPerRequest, perRequestStepMap, visited) + const sorted = [...sortedPerFile, ...sortedPerRequest] + validateStepOutput(sorted, perRequestStepMap, steps) + + return { + steps: perRequestStepMap, + order: sorted, + firstStepPerRequest: sortedPerFile.length + } +} + +function validateStepOutput(sorted: PipelineStepName[], stepMap: Map, steps: readonly IPipelineStep[]) { + if(sorted.length !== stepMap.size) { + // check if any of the dependencies in the map are invalid + checkForInvalidDependency(steps, stepMap) + // otherwise, we assume a cycle + throw new InvalidPipelineError(`3) Pipeline contains at least one cycle; sorted: ${JSON.stringify(sorted)}, steps: ${JSON.stringify([...stepMap.keys()])}`) + } +} + +function allDependenciesAreVisited(step: IPipelineStep, visited: ReadonlySet) { + return step.dependencies.every(d => visited.has(d)) +} + +function handleStep(step: IPipelineStep, init: PipelineStepName, visited: Set, sorted: PipelineStepName[], elem: PipelineStepName, decoratorsOfLastOthers: Set, inits: PipelineStepName[]) { + if(step.decorates === init) { + if(allDependenciesAreVisited(step, visited)) { + sorted.push(elem) + visited.add(elem) + } else { + decoratorsOfLastOthers.add(elem) + } + } else if(step.decorates === undefined && allDependenciesAreVisited(step, visited)) { + inits.push(elem) + } +} + +function topologicalSort(inits: PipelineStepName[], stepMap: Map, visited: Set) { + const sorted: PipelineStepName[] = [] + + while(inits.length > 0) { + const init = inits.pop() as PipelineStepName + sorted.push(init) + visited.add(init) + + // these decorators still have dependencies open; we have to check if they can be satisfied by the other steps to add + const decoratorsOfLastOthers = new Set() + for(const [elem, step] of stepMap.entries()) { + if(visited.has(elem)) { + continue + } + handleStep(step, init, visited, sorted, elem, decoratorsOfLastOthers, inits) + } + + // for the other decorators we have to cycle until we find a solution, or know, that no solution exists + topologicallyInsertDecoratorElements(decoratorsOfLastOthers, stepMap, visited, sorted) + } + return sorted +} + +function topologicallyInsertDecoratorElements(decoratorsOfLastOthers: Set, stepMap: Map, visited: Set, sorted: PipelineStepName[]) { + if(decoratorsOfLastOthers.size === 0) { + return + } + + let changed = true + while(changed) { + changed = false + for(const elem of [...decoratorsOfLastOthers]) { + const step = stepMap.get(elem) as IPipelineStep + if(allDependenciesAreVisited(step, visited)) { + decoratorsOfLastOthers.delete(elem) + sorted.push(elem) + visited.add(elem) + changed = true + } + } + } + if(decoratorsOfLastOthers.size > 0) { + throw new InvalidPipelineError(`5) Pipeline contains at least one decoration cycle: ${JSON.stringify(decoratorsOfLastOthers, jsonReplacer)}`) + } +} + +function checkForInvalidDependency(steps: readonly IPipelineStep[], stepMap: Map) { + for(const step of steps) { + for(const dep of step.dependencies) { + if(!stepMap.has(dep)) { + throw new InvalidPipelineError(`2) Step "${step.name}" depends on step "${dep}" which does not exist`) + } + } + if(step.decorates && !stepMap.has(step.decorates)) { + throw new InvalidPipelineError(`4) Step "${step.name}" decorates step "${step.decorates}" which does not exist`) + } + } +} + +function initializeSteps(steps: readonly IPipelineStep[], stepMap: Map, inits: PipelineStepName[], visited: ReadonlySet) { + for(const step of steps) { + const name = step.name + // if the name is already in the map we have a duplicate + if(stepMap.has(name)) { + throw new InvalidPipelineError(`1) Step name "${name}" is not unique in the pipeline`) + } + stepMap.set(name, step) + // only steps that have no dependencies and do not decorate others can be initial steps + if(allDependenciesAreVisited(step, visited) && (step.decorates === undefined || visited.has(step.decorates))) { + inits.push(name) + } + } +} + diff --git a/src/core/steps/pipeline/default.ts b/src/core/steps/pipeline/default.ts new file mode 100644 index 0000000000..640888f3f4 --- /dev/null +++ b/src/core/steps/pipeline/default.ts @@ -0,0 +1,13 @@ +/** + * Contains the default pipeline for working with flowr + */ +import { createPipeline } from './pipeline' +import { PARSE_WITH_R_SHELL_STEP } from '../all/core/00-parse' +import { NORMALIZE } from '../all/core/10-normalize' +import { STATIC_DATAFLOW } from '../all/core/20-dataflow' +import { STATIC_SLICE } from '../all/static-slicing/00-slice' +import { NAIVE_RECONSTRUCT } from '../all/static-slicing/10-reconstruct' + +export const DEFAULT_SLICING_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW, STATIC_SLICE, NAIVE_RECONSTRUCT) + +export const DEFAULT_DATAFLOW_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW) diff --git a/src/core/steps/pipeline/index.ts b/src/core/steps/pipeline/index.ts new file mode 100644 index 0000000000..38bb83622a --- /dev/null +++ b/src/core/steps/pipeline/index.ts @@ -0,0 +1,3 @@ +export * from './pipeline' +export * from './invalid-pipeline-error' +export * from './default' diff --git a/src/core/steps/pipeline/invalid-pipeline-error.ts b/src/core/steps/pipeline/invalid-pipeline-error.ts new file mode 100644 index 0000000000..a673273528 --- /dev/null +++ b/src/core/steps/pipeline/invalid-pipeline-error.ts @@ -0,0 +1,9 @@ +/** + * Thrown if for whatever reason, the pipeline is invalid. + */ +export class InvalidPipelineError extends Error { + constructor(message: string) { + super(message) + this.name = 'InvalidPipelineError' + } +} diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts new file mode 100644 index 0000000000..efedd959bd --- /dev/null +++ b/src/core/steps/pipeline/pipeline.ts @@ -0,0 +1,71 @@ +import type { IPipelineStep, PipelineStepName, PipelineStepStage } from '../step' +import { verifyAndBuildPipeline } from './create' +import type { DeepReadonly, UnionToIntersection } from 'ts-essentials' + +/** + * A pipeline is a collection of {@link Pipeline#steps|steps} that are executed in a certain {@link Pipeline#order|order}. + * It is to be created {@link createPipeline}. + * + * If you want to get the type of all steps in the pipeline (given they are created canonically using const step names), refer to {@link PipelineStepNames}. + */ +export interface Pipeline { + readonly steps: ReadonlyMap> + readonly order: readonly T['name'][] + /** + * In the order, this is the index of the first step that + * is executed {@link PipelineStepStage#OncePerRequest|once per request}. + * If it is "out of bounds" (i.e., the number of steps), all steps are executed {@link PipelineStepStage#OncePerFile|once per file}. + */ + readonly firstStepPerRequest: number +} + +/** + * Returns the types of all step names in the given pipeline. + * + * @see Pipeline for details + */ +export type PipelineStepNames

= PipelineStep

['name'] +export type PipelineStep

= P extends Pipeline ? U : never + +export type PipelineStepWithName

= P extends Pipeline ? U extends IPipelineStep ? U : never : never +export type PipelineStepProcessorWithName

= PipelineStepWithName['processor'] +export type PipelineStepPrintersWithName

= PipelineStepWithName['printer'] +export type PipelineStepOutputWithName

= Awaited>> + + +export type PipelineInput

= UnionToIntersection['requiredInput']> + +/** + * Only gets the union of 'requiredInput' of those PipelineSteps which have a 'execute' field of type 'OncePerRequest'. + * In other words, information that you may want to change for another request (e.g., another slice) with the same file. + */ +export type PipelinePerRequestInput

= { + [K in PipelineStepNames

]: PipelineStep

['executed'] extends PipelineStepStage.OncePerFile ? never : PipelineStepWithName['requiredInput'] +}[PipelineStepNames

] + +export type PipelineOutput

= { + [K in PipelineStepNames

]: PipelineStepOutputWithName +} + +/** + * Creates a {@link Pipeline|pipeline} from a given collection of {@link IPipelineStep|steps}. + * In order to be valid, the collection of {@link IPipelineStep|steps} must satisfy the following set of constraints + * (which should be logical, when you consider what a pipeline should accomplish): + * + * 0) the collection of {@link IPipelineStep|steps} is not empty + * 1) all {@link IPipelineStepOrder#name|names} of {@link IPipelineStep|steps} are unique for the given pipeline + * 2) all {@link IPipelineStepOrder#dependencies|dependencies} of all {@link IPipelineStep|steps} are exist + * 3) there are no cycles in the dependency graph + * 4) the target of a {@link IPipelineStepOrder#decorates|step's decoration} exists + * 5) if a {@link IPipelineStepOrder#decorates|decoration} applies, all of its {@link IPipelineStepOrder#dependencies|dependencies} are already in the pipeline + * 6) in the resulting {@link Pipeline|pipeline}, there is a strict cut between {@link IPipelineStep|steps} that are executed + * {@link PipelineStepStage#OncePerFile|once per file} and {@link PipelineStepStage#OncePerRequest|once per request}. + * + * @returns The function will try to order your collection steps so that all the constraints hold. + * If it succeeds it will return the resulting {@link Pipeline|pipeline}, otherwise it will throw an {@link InvalidPipelineError}. + * + * @throws InvalidPipelineError If any of the constraints listed above are not satisfied. + */ +export function createPipeline(...steps: T): Pipeline { + return verifyAndBuildPipeline(steps) +} diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts new file mode 100644 index 0000000000..741bb87a69 --- /dev/null +++ b/src/core/steps/step.ts @@ -0,0 +1,99 @@ +/** + * Defines the {@link IPipelineStep} interface which specifies all data available for a single step. + * + * @module + */ + +import type { MergeableRecord } from '../../util/objects' +import type { InternalStepPrinter, IPipelineStepPrinter, StepOutputFormat } from '../print/print' + +/** + * This represents the format of a step processor which retrieves two things: + * + * 1) the input configuration as passed to the {@link PipelineExecutor}. + * 2) the output produced by the previous steps. + * + * Please be aware, that if the respective information is available is not ensured by the type system but rather + * ensured at runtime by your dependencies. If you want to make sure, that the information is present, + * list all steps that you require as your {@link IPipelineStepOrder#dependencies|dependencies}, even if they would be + * already covered transitively. + */ +export type StepProcessingFunction = + (results: Record, input: Record) => unknown +/** + * This represents the required execution frequency of a step. + */ +export const enum PipelineStepStage { + /** This step has to be executed once per file */ + OncePerFile, + /** This step has to be executed once per request (e.g., slice for a given variable) */ + OncePerRequest +} + +export type PipelineStepName = string & { __brand?: 'StepName' } + +/** + * Contains the data to specify the order of {@link IPipelineStep|steps} in a pipeline. + */ +export interface IPipelineStepOrder< + Name extends PipelineStepName = PipelineStepName, +> { + /** + * Name of the respective step, it does not have to be unique in general but only unique per-pipeline. + * In other words, you can have multiple steps with a name like `parse` as long as you use only one of them in a given pipeline. + * This is, because these names are required in the {@link IPipelineStep#dependencies} field to refer to other steps this one relies on. + */ + readonly name: Name + /** + * Give the names of other steps this one requires to be completed as a prerequisite (e.g., to gain access to their input). + * Does not have to be transitive, this will be checked by the scheduler of the pipeline. + */ + readonly dependencies: readonly PipelineStepName[] + /* does this step has to be repeated for each new request or can it be performed only once in the initialization */ + readonly executed: PipelineStepStage + /** + * This is similar to {@link dependencies}, but is used to say that a given step _decorates_ another one. + * This imbues two requirements: + * The step must take the output of the decorated step as input, and produce the same output as the decorated step. + * + * If so, it is ensured that _this_ step is executed _after_ the step it decorates, but before any step that depends on it. + */ + readonly decorates?: PipelineStepName +} + +/** + * Defines what is to be known of a single step in a pipeline. + * It wraps around a single {@link IPipelineStep#processor|processor} function, providing additional information. + * Steps will be executed synchronously, in-sequence, based on their {@link IPipelineStep#dependencies|dependencies}. + */ +export interface IPipelineStep< + Name extends PipelineStepName = PipelineStepName, + // eslint-disable-next-line -- by default, we assume nothing about the function shape + Fn extends StepProcessingFunction = (...args: any[]) => any, +> extends MergeableRecord, IPipelineStepOrder { + /** Human-readable name of this step */ + readonly humanReadableName: string + /** Human-readable description of this step */ + readonly description: string + /** The main processor that essentially performs the logic of this step */ + readonly processor: (...input: Parameters) => ReturnType + /** + * How to visualize the results of the respective step to the user? + */ + readonly printer: { + [K in StepOutputFormat]?: IPipelineStepPrinter + } & { + // we always want to have the internal printer + [StepOutputFormat.Internal]: InternalStepPrinter + } + /** + * Input configuration required to perform the respective steps. + * Required inputs of dependencies do not have to, but can be repeated. + *

+ * Use the pattern `undefined as unknown as T` to indicate that the value is required but not provided. + */ + readonly requiredInput: object +} + + + diff --git a/src/core/steps/steps.ts b/src/core/steps/steps.ts new file mode 100644 index 0000000000..f1b01d2f1f --- /dev/null +++ b/src/core/steps/steps.ts @@ -0,0 +1,41 @@ +/** + * This file defines *all* steps of the slicing process and the data they require. + * + * Note, that the order of elements here also describes the *desired* order of their desired execution for readability. + * However, it is the {@link SteppingSlicer} which controls the order of execution and the steps required to achieve a given result. + * + * If you add a new step, you have to (at least) update the {@link SteppingSlicer} as well as the corresponding type predicate {@link SteppingSlicerInput}. + * Furthermore, if your step is the new *last* step, please update {@link LAST_STEP}. + * + * Please note that the combination of `satisfies` and `as` seems to be required. + * With `satisfies` we make sure that the respective element has all the keys it requires, and the `as` force the type to be exactly the given one + * + * @module + */ + +import { PARSE_WITH_R_SHELL_STEP } from './all/core/00-parse' +import { NORMALIZE } from './all/core/10-normalize' +import { STATIC_DATAFLOW } from './all/core/20-dataflow' +import { STATIC_SLICE } from './all/static-slicing/00-slice' +import { NAIVE_RECONSTRUCT } from './all/static-slicing/10-reconstruct' + + +export const STEPS_PER_FILE = { + 'parse': PARSE_WITH_R_SHELL_STEP, + 'normalize': NORMALIZE, + 'dataflow': STATIC_DATAFLOW +} as const + +export const STEPS_PER_SLICE = { + 'slice': STATIC_SLICE, + 'reconstruct': NAIVE_RECONSTRUCT +} as const + +export const STEPS = { ...STEPS_PER_FILE, ...STEPS_PER_SLICE } as const +export const LAST_PER_FILE_STEP = 'dataflow' as const +export const LAST_STEP = 'reconstruct' as const + +export type StepName = keyof typeof STEPS +export type Step = typeof STEPS[Name] +export type StepProcessor = Step['processor'] +export type StepResult = Awaited>> diff --git a/src/dataflow/environments/append.ts b/src/dataflow/environments/append.ts index d1f497c2ab..b044af741e 100644 --- a/src/dataflow/environments/append.ts +++ b/src/dataflow/environments/append.ts @@ -1,8 +1,9 @@ import { guard } from '../../util/assert' -import type { REnvironmentInformation, IEnvironment, IdentifierDefinition } from './environment' -import { Environment } from './environment' +import type { REnvironmentInformation, IEnvironment } from './environment' +import { Environment, BuiltInEnvironment } from './environment' +import type { IdentifierDefinition } from './identifier' -function uniqueMergeValues(old: IdentifierDefinition[], value: IdentifierDefinition[]): IdentifierDefinition[] { +function uniqueMergeValues(old: IdentifierDefinition[], value: readonly IdentifierDefinition[]): IdentifierDefinition[] { const result = old for(const v of value) { const find = result.findIndex(o => o.nodeId === v.nodeId && o.definedAt === v.definedAt) @@ -26,7 +27,7 @@ function appendIEnvironmentWith(base: IEnvironment | undefined, next: IEnvironme } } - const parent = base.parent === undefined ? undefined : appendIEnvironmentWith(base.parent, next.parent) + const parent = base.parent === BuiltInEnvironment ? BuiltInEnvironment : appendIEnvironmentWith(base.parent, next.parent) const out = new Environment(base.name, parent) out.memory = map @@ -37,11 +38,11 @@ function appendIEnvironmentWith(base: IEnvironment | undefined, next: IEnvironme /** * Adds all writes of `next` to `base` (i.e., the operations of `next` *might* happen). */ -export function appendEnvironments(base: REnvironmentInformation, next: REnvironmentInformation | undefined): REnvironmentInformation -export function appendEnvironments(base: REnvironmentInformation | undefined, next: REnvironmentInformation): REnvironmentInformation -export function appendEnvironments(base: undefined, next: undefined): undefined -export function appendEnvironments(base: REnvironmentInformation | undefined, next: REnvironmentInformation | undefined): REnvironmentInformation | undefined -export function appendEnvironments(base: REnvironmentInformation | undefined, next: REnvironmentInformation | undefined): REnvironmentInformation | undefined { +export function appendEnvironment(base: REnvironmentInformation, next: REnvironmentInformation | undefined): REnvironmentInformation +export function appendEnvironment(base: REnvironmentInformation | undefined, next: REnvironmentInformation): REnvironmentInformation +export function appendEnvironment(base: undefined, next: undefined): undefined +export function appendEnvironment(base: REnvironmentInformation | undefined, next: REnvironmentInformation | undefined): REnvironmentInformation | undefined +export function appendEnvironment(base: REnvironmentInformation | undefined, next: REnvironmentInformation | undefined): REnvironmentInformation | undefined { if(base === undefined) { return next } else if(next === undefined) { diff --git a/src/dataflow/environments/built-in.ts b/src/dataflow/environments/built-in.ts new file mode 100644 index 0000000000..9f0535e460 --- /dev/null +++ b/src/dataflow/environments/built-in.ts @@ -0,0 +1,168 @@ +import type { NodeId, ParentInformation, RFunctionArgument, RSymbol } from '../../r-bridge' +import type { DataflowProcessorInformation } from '../processor' +import { ExitPointType } from '../info' +import type { DataflowInformation } from '../info' +import { processKnownFunctionCall } from '../internal/process/functions/call/known-call-handling' +import { EdgeType } from '../graph' +import { processSourceCall } from '../internal/process/functions/call/built-in/built-in-source' +import { processAccess } from '../internal/process/functions/call/built-in/built-in-access' +import { processIfThenElse } from '../internal/process/functions/call/built-in/built-in-if-then-else' +import { processAssignment } from '../internal/process/functions/call/built-in/built-in-assignment' +import { processSpecialBinOp } from '../internal/process/functions/call/built-in/built-in-logical-bin-op' +import { processPipe } from '../internal/process/functions/call/built-in/built-in-pipe' +import { processForLoop } from '../internal/process/functions/call/built-in/built-in-for-loop' +import { processRepeatLoop } from '../internal/process/functions/call/built-in/built-in-repeat-loop' +import { processWhileLoop } from '../internal/process/functions/call/built-in/built-in-while-loop' +import type { Identifier, IdentifierDefinition, IdentifierReference } from './identifier' +import { guard } from '../../util/assert' +import { processReplacementFunction } from '../internal/process/functions/call/built-in/built-in-replacement' +import { processQuote } from '../internal/process/functions/call/built-in/built-in-quote' +import { processFunctionDefinition } from '../internal/process/functions/call/built-in/built-in-function-definition' +import { processExpressionList } from '../internal/process/functions/call/built-in/built-in-expression-list' + +export const BuiltIn = 'built-in' + +export type BuiltInIdentifierProcessor = ( + name: RSymbol, + args: readonly RFunctionArgument[], + rootId: NodeId, + data: DataflowProcessorInformation, +) => DataflowInformation + +export type BuiltInIdentifierProcessorWithConfig = ( + name: RSymbol, + args: readonly RFunctionArgument[], + rootId: NodeId, + data: DataflowProcessorInformation, + config: Config +) => DataflowInformation + +export interface BuiltInIdentifierDefinition extends IdentifierReference { + kind: 'built-in-function' + definedAt: typeof BuiltIn + processor: BuiltInIdentifierProcessor +} + +export interface BuiltInIdentifierConstant extends IdentifierReference { + kind: 'built-in-value' + definedAt: typeof BuiltIn + value: T +} + +function defaultBuiltInProcessor( + name: RSymbol, + args: readonly RFunctionArgument[], + rootId: NodeId, + data: DataflowProcessorInformation, + config: { returnsNthArgument?: number | 'last', cfg?: ExitPointType, readAllArguments?: boolean } +): DataflowInformation { + const { information: res, processedArguments } = processKnownFunctionCall({ name, args, rootId, data }) + if(config.returnsNthArgument !== undefined) { + const arg = config.returnsNthArgument === 'last' ? processedArguments[args.length - 1] : processedArguments[config.returnsNthArgument] + if(arg !== undefined) { + res.graph.addEdge(rootId, arg.entryPoint, { type: EdgeType.Returns }) + } + } + if(config.readAllArguments) { + for(const arg of processedArguments) { + if(arg) { + res.graph.addEdge(rootId, arg.entryPoint, { type: EdgeType.Reads }) + } + } + } + + if(config.cfg !== undefined) { + res.exitPoints = [...res.exitPoints, { type: config.cfg, nodeId: rootId, controlDependencies: data.controlDependencies }] + } + return res +} + +export function registerBuiltInFunctions>( + processor: Proc, + config: Config, + ...names: readonly Identifier[] +): void { + for(const name of names) { + guard(!BuiltInMemory.has(name), `Built-in ${name} already defined`) + BuiltInMemory.set(name, [{ + kind: 'built-in-function', + definedAt: BuiltIn, + controlDependencies: undefined, + processor: (name, args, rootId, data) => processor(name, args, rootId, data, config), + name, + nodeId: BuiltIn + }]) + } +} + +/* registers all combinations of replacements */ +export function registerReplacementFunctions( + standardConfig: {makeMaybe?: boolean}, + assignments: readonly ('<-' | '<<-')[], + ...prefixes: readonly Identifier[] +): void { + for(const assignment of assignments) { + for(const prefix of prefixes) { + const effectiveName = `${prefix}${assignment}` + guard(!BuiltInMemory.has(effectiveName), `Built-in ${effectiveName} already defined`) + BuiltInMemory.set(effectiveName, [{ + kind: 'built-in-function', + definedAt: BuiltIn, + processor: (name, args, rootId, data) => processReplacementFunction(name, args, rootId, data, { ...standardConfig, assignmentOperator: assignment }), + name: effectiveName, + controlDependencies: undefined, + nodeId: BuiltIn + }]) + } + } +} + + +function registerSimpleFunctions(...names: readonly Identifier[]): void { + registerBuiltInFunctions(defaultBuiltInProcessor, { readAllArguments: true }, ...names) +} + +function registerBuiltInConstant(name: Identifier, value: T): void { + guard(!BuiltInMemory.has(name), `Built-in ${name} already defined`) + BuiltInMemory.set(name, [{ + kind: 'built-in-value', + definedAt: BuiltIn, + controlDependencies: undefined, + value, + name, + nodeId: BuiltIn + }]) +} + +export const BuiltInMemory = new Map() + +registerBuiltInConstant('NULL', null) +registerBuiltInConstant('NA', null) +registerBuiltInConstant('TRUE', true) +registerBuiltInConstant('T', true) +registerBuiltInConstant('FALSE', false) +registerBuiltInConstant('F', false) +registerSimpleFunctions('~', '+', '-', '*', '/', '^', '!', '?', '**', '==', '!=', '>', '<', '>=', '<=', '%%', '%/%', '%*%', ':', 'list') +registerBuiltInFunctions(defaultBuiltInProcessor, {}, 'cat', 'switch') /* returns null */ +registerBuiltInFunctions(defaultBuiltInProcessor, { returnsNthArgument: 0 }, 'print', '(') +registerBuiltInFunctions(defaultBuiltInProcessor, { returnsNthArgument: 0, cfg: ExitPointType.Return }, 'return') +registerBuiltInFunctions(defaultBuiltInProcessor, { cfg: ExitPointType.Break }, 'break') +registerBuiltInFunctions(defaultBuiltInProcessor, { cfg: ExitPointType.Next }, 'next') +registerBuiltInFunctions(processExpressionList, {}, '{') +registerBuiltInFunctions(processSourceCall, {}, 'source') +registerBuiltInFunctions(processAccess, { treatIndicesAsString: false }, '[', '[[') +registerBuiltInFunctions(processAccess, { treatIndicesAsString: true }, '$', '@') +registerBuiltInFunctions(processIfThenElse, {}, 'if') +registerBuiltInFunctions(processAssignment, {}, '<-', ':=', '=', 'assign', 'delayedAssign') +registerBuiltInFunctions(processAssignment, { superAssignment: true }, '<<-') +registerBuiltInFunctions(processAssignment, { swapSourceAndTarget: true }, '->') +registerBuiltInFunctions(processAssignment, { superAssignment: true, swapSourceAndTarget: true }, '->>') +registerBuiltInFunctions(processSpecialBinOp, { lazy: true }, '&&', '||', '&', '|') +registerBuiltInFunctions(processPipe, {}, '|>') +registerBuiltInFunctions(processFunctionDefinition, {}, 'function', '\\') +registerBuiltInFunctions(processQuote, { quoteArgumentsWithIndex: new Set([0]) }, 'quote', 'substitute', 'bquote') +registerBuiltInFunctions(processForLoop, {}, 'for') +registerBuiltInFunctions(processRepeatLoop, {}, 'repeat') +registerBuiltInFunctions(processWhileLoop, {}, 'while') +/* they are all mapped to `<-` but we separate super assignments */ +registerReplacementFunctions({ makeMaybe: true }, ['<-', '<<-'], '[', '[[', '$', '@', 'names', 'dimnames', 'attributes', 'attr', 'class', 'levels', 'rownames', 'colnames') diff --git a/src/dataflow/environments/clone.ts b/src/dataflow/environments/clone.ts new file mode 100644 index 0000000000..eaffcd112f --- /dev/null +++ b/src/dataflow/environments/clone.ts @@ -0,0 +1,28 @@ +import type { + IEnvironment, + REnvironmentInformation +} from './environment' +import { + Environment, + BuiltInEnvironment +} from './environment' +import type { Identifier, IdentifierDefinition } from './identifier' + +function cloneEnvironment(environment: IEnvironment, recurseParents: boolean): IEnvironment +function cloneEnvironment(environment: IEnvironment | undefined, recurseParents: boolean): IEnvironment | undefined { + if(environment === undefined) { + return undefined + } else if(environment.id === BuiltInEnvironment.id) { + return BuiltInEnvironment + } + const clone = new Environment(environment.name, recurseParents ? cloneEnvironment(environment.parent, recurseParents) : environment.parent) + clone.memory = new Map(JSON.parse(JSON.stringify([...environment.memory])) as [Identifier, IdentifierDefinition[]][]) + return clone +} + +export function cloneEnvironmentInformation(environment: REnvironmentInformation, recurseParents = true): REnvironmentInformation { + return { + current: cloneEnvironment(environment.current, recurseParents), + level: environment.level + } +} diff --git a/src/dataflow/environments/define.ts b/src/dataflow/environments/define.ts new file mode 100644 index 0000000000..86e0e1cad1 --- /dev/null +++ b/src/dataflow/environments/define.ts @@ -0,0 +1,49 @@ +import { guard } from '../../util/assert' +import { BuiltInEnvironment } from './environment' +import type { IEnvironment, REnvironmentInformation } from './environment' + +import { cloneEnvironmentInformation } from './clone' +import type { IdentifierDefinition } from './identifier' + +function defInEnv(newEnvironments: IEnvironment, name: string, definition: IdentifierDefinition) { + const existing = newEnvironments.memory.get(name) + // check if it is maybe or not + if(existing === undefined || definition.controlDependencies === undefined) { + newEnvironments.memory.set(name, [definition]) + } else { + existing.push(definition) + } +} + +/** + * Insert the given `definition` --- defined within the given scope --- into the passed along `environments` will take care of propagation. + * Does not modify the passed along `environments` in-place! It returns the new reference. + */ +export function define(definition: IdentifierDefinition, superAssign: boolean, environment: REnvironmentInformation): REnvironmentInformation { + const name = definition.name + guard(name !== undefined, () => `Name must be defined, but isn't for ${JSON.stringify(definition)}`) + let newEnvironment + if(superAssign) { + newEnvironment = cloneEnvironmentInformation(environment, true) + let current: IEnvironment = newEnvironment.current + let last = undefined + let found = false + do{ + if(current.memory.has(name)) { + current.memory.set(name, [definition]) + found = true + break + } + last = current + current = current.parent + } while(current.id !== BuiltInEnvironment.id) + if(!found) { + guard(last !== undefined, () => `Could not find global scope for ${name}`) + last.memory.set(name, [definition]) + } + } else { + newEnvironment = cloneEnvironmentInformation(environment, false) + defInEnv(newEnvironment.current, name, definition) + } + return newEnvironment +} diff --git a/src/dataflow/environments/diff.ts b/src/dataflow/environments/diff.ts new file mode 100644 index 0000000000..6c1a0999da --- /dev/null +++ b/src/dataflow/environments/diff.ts @@ -0,0 +1,87 @@ +import type { GenericDifferenceInformation, WriteableDifferenceReport } from '../../util/diff' +import { setDifference } from '../../util/diff' +import type { IEnvironment, REnvironmentInformation } from './environment' +import { jsonReplacer } from '../../util/json' +import type { IdentifierReference } from './identifier' +import { arrayEqual } from '../../util/arrays' + +export function diffIdentifierReferences(a: IdentifierReference | undefined, b: IdentifierReference | undefined, info: GenericDifferenceInformation): void { + if(a === undefined || b === undefined) { + if(a !== b) { + info.report.addComment(`${info.position}Different identifier references: ${info.leftname}: ${JSON.stringify(a, jsonReplacer)} vs. ${info.rightname}: ${JSON.stringify(b, jsonReplacer)}`) + } + return + } + if(a.name !== b.name) { + info.report.addComment(`${info.position}Different identifier names: ${info.leftname}: ${a.name} vs. ${info.rightname}: ${b.name}`) + } + if(a.nodeId !== b.nodeId) { + info.report.addComment(`${info.position}Different nodeIds: ${info.leftname}: ${a.nodeId} vs. ${info.rightname}: ${b.nodeId}`) + } + if(!arrayEqual(a.controlDependencies, b.controlDependencies)) { + info.report.addComment(`${info.position}Different control dependency: ${info.leftname}: ${JSON.stringify(a.controlDependencies)} vs. ${info.rightname}: ${JSON.stringify(b.controlDependencies)}`) + } +} + +function diffMemory(a: IEnvironment, b: IEnvironment, info: GenericDifferenceInformation) { + for(const [key, value] of a.memory) { + const value2 = b.memory.get(key) + if(value2 === undefined || value.length !== value2.length) { + info.report.addComment(`${info.position}Different definitions for ${key}. ${info.leftname}: ${JSON.stringify(value, jsonReplacer)} vs. ${info.rightname}: ${JSON.stringify(value2, jsonReplacer)}`) + continue + } + + // we sort both value arrays by their id so that we have no problems with differently ordered arrays (which have no impact) + const sorted = [...value].sort((a, b) => String(a.nodeId).localeCompare(String(b.nodeId))) + const sorted2 = [...value2].sort((a, b) => String(a.nodeId).localeCompare(String(b.nodeId))) + + for(let i = 0; i < value.length; ++i) { + const aVal = sorted[i] + const bVal = sorted2[i] + if(aVal.name !== bVal.name) { + info.report.addComment(`${info.position}Different names for ${key}. ${info.leftname}: ${aVal.name} vs. ${info.rightname}: ${bVal.name}`) + } + if(aVal.nodeId !== bVal.nodeId) { + info.report.addComment(`${info.position}Different ids for ${key}. ${info.leftname}: ${aVal.nodeId} vs. ${info.rightname}: ${bVal.nodeId}`) + } + if(!arrayEqual(aVal.controlDependencies, bVal.controlDependencies)) { + info.report.addComment(`${info.position}Different controlDependency for ${key} (${aVal.nodeId}). ${info.leftname}: ${JSON.stringify(aVal.controlDependencies)} vs. ${info.rightname}: ${JSON.stringify(bVal.controlDependencies)}`) + } + if(aVal.definedAt !== bVal.definedAt) { + info.report.addComment(`${info.position}Different definition ids (definedAt) for ${key} (${aVal.nodeId}). ${info.leftname}: ${aVal.definedAt} vs. ${info.rightname}: ${bVal.definedAt}`) + } + if(aVal.kind !== bVal.kind) { + info.report.addComment(`${info.position}Different kinds for ${key} (${aVal.nodeId}). ${info.leftname}: ${aVal.kind} vs. ${info.rightname}: ${bVal.kind}`) + } + } + } +} + +export function diffEnvironment(a: IEnvironment | undefined, b: IEnvironment | undefined, info: GenericDifferenceInformation): void { + if(a === undefined || b === undefined) { + if(a !== b) { + info.report.addComment(`${info.position}Different environments. ${info.leftname}: ${a !== undefined ? 'present' : 'undefined'} vs. ${info.rightname}: ${b !== undefined ? 'present' : 'undefined'}`) + } + return + } + if(a.name !== b.name) { + info.report.addComment(`${info.position}Different environment names. ${info.leftname}: ${a.name} vs. ${info.rightname}: ${b.name}`) + } + if(a.memory.size !== b.memory.size) { + info.report.addComment(`${info.position}Different environment sizes. ${info.leftname}: ${a.memory.size} vs. ${info.rightname}: ${b.memory.size}`) + setDifference(new Set([...a.memory.keys()]), new Set([...b.memory.keys()]), { + ...info, + position: `${info.position}Key comparison. ` + }) + } + diffMemory(a, b, info) + diffEnvironment(a.parent, b.parent, { ...info, position: `${info.position}Parents of ${a.id} & ${b.id}. ` }) +} + +export function diffEnvironmentInformation(a: REnvironmentInformation | undefined, b: REnvironmentInformation | undefined, info: GenericDifferenceInformation): void { + if(a === undefined || b === undefined) { + info.report.addComment(`${info.position}Different environments: ${JSON.stringify(a, jsonReplacer)} vs. ${JSON.stringify(b, jsonReplacer)}`) + return + } + diffEnvironment(a.current, b.current, info) +} diff --git a/src/dataflow/environments/environment.ts b/src/dataflow/environments/environment.ts index 342bc4313b..17d2c3fdd0 100644 --- a/src/dataflow/environments/environment.ts +++ b/src/dataflow/environments/environment.ts @@ -4,81 +4,42 @@ * * @module */ -import type { NodeId } from '../../r-bridge' -import type { DataflowGraph, DataflowGraphEdgeAttribute } from '../graph' +import type { DataflowGraph } from '../' import { resolveByName } from './resolve-by-name' -import type { DataflowScopeName } from './scopes' -import { GlobalScope, LocalScope } from './scopes' -import type { GenericDifferenceInformation } from '../../util/diff' -import { setDifference } from '../../util/diff' -import { jsonReplacer } from '../../util/json' - -/** identifiers are branded to avoid confusion with other string-like types */ -export type Identifier = string & { __brand?: 'identifier' } - - -export const BuiltIn = 'built-in' - - -/** - * Stores the definition of an identifier within an {@link IEnvironment} - */ -export interface IdentifierDefinition extends IdentifierReference { - kind: 'function' | 'variable' | 'parameter' | 'unknown' | 'built-in-function' | 'argument' - /** The assignment (or whatever, like `assign` function call) node which ultimately defined this identifier */ - definedAt: NodeId -} +import type { Identifier, IdentifierDefinition, IdentifierReference } from './identifier' +import { BuiltInMemory } from './built-in' +import type { NodeId } from '../../r-bridge' -/** - * Something like `a` in `b <- a`. - * Without any surrounding information, `a` will produce the - * identifier reference `a` in the current scope (like the global environment). - * Similarly, `b` will create a reference. - */ -export interface IdentifierReference { - name: Identifier, - scope: DataflowScopeName, - /** Node which represents the reference in the AST */ - nodeId: NodeId - /** - * Is this reference used in every execution path of the program or only if some of them. This can be too-conservative regarding `maybe`. - * For example, if we can not detect `if(FALSE)`, this will be `maybe` even if we could statically determine, that the `then` branch is never executed. - */ - used: DataflowGraphEdgeAttribute -} -export function diffIdentifierReferences(a: IdentifierReference, b: IdentifierReference, info: GenericDifferenceInformation): void { - if(a.name !== b.name) { - info.report.addComment(`${info.position}Different identifier names: ${a.name} vs. ${b.name}`) - } - if(a.scope !== b.scope) { - info.report.addComment(`${info.position}Different scopes: ${a.scope} vs. ${b.scope}`) - } - if(a.nodeId !== b.nodeId) { - info.report.addComment(`${info.position}Different nodeIds: ${a.nodeId} vs. ${b.nodeId}`) +export function makeReferenceMaybe(ref: IdentifierReference, graph: DataflowGraph, environments: REnvironmentInformation, includeDefs: boolean, defaultCd: NodeId | undefined = undefined): IdentifierReference { + const node = graph.get(ref.nodeId, true) + if(includeDefs) { + const definitions = ref.name ? resolveByName(ref.name, environments) : undefined + for(const definition of definitions ?? []) { + if(definition.kind !== 'built-in-function' && definition.kind !== 'built-in-value') { + if(definition.controlDependencies && defaultCd && !definition.controlDependencies.includes(defaultCd)) { + definition.controlDependencies.push(defaultCd) + } else { + definition.controlDependencies = defaultCd ? [defaultCd] : [] + } + } + } } - if(a.used !== b.used) { - info.report.addComment(`${info.position}Different used: ${a.used} vs. ${b.used}`) + if(node) { + if(node[0].controlDependencies && defaultCd && !node[0].controlDependencies.includes(defaultCd)) { + node[0].controlDependencies.push(defaultCd) + } else { + node[0].controlDependencies = defaultCd ? [defaultCd] : [] + } } + return { ...ref, controlDependencies: [...ref.controlDependencies ?? [], ...(defaultCd ? [defaultCd]: []) ] } } -export function makeAllMaybe(references: IdentifierReference[] | undefined, graph: DataflowGraph, environments: REnvironmentInformation): IdentifierReference[] { +export function makeAllMaybe(references: readonly IdentifierReference[] | undefined, graph: DataflowGraph, environments: REnvironmentInformation, includeDefs: boolean, defaultCd: NodeId | undefined = undefined): IdentifierReference[] { if(references === undefined) { return [] } - return references.map(ref => { - const node = graph.get(ref.nodeId, true) - const definitions = resolveByName(ref.name, LocalScope, environments) - for(const definition of definitions ?? []) { - if(definition.kind !== 'built-in-function') { - definition.used = 'maybe' - } - } - if(node) { - node[0].when = 'maybe' - } - return { ...ref, used: 'maybe' } - }) + return references.map(ref => makeReferenceMaybe(ref, graph, environments, includeDefs, defaultCd)) } @@ -87,7 +48,7 @@ export interface IEnvironment { readonly id: string readonly name: string /** Lexical parent of the environment, if any (can be manipulated by R code) */ - parent?: IEnvironment + parent: IEnvironment /** * Maps to exactly one definition of an identifier if the source is known, otherwise to a list of all possible definitions */ @@ -99,10 +60,10 @@ let environmentIdCounter = 0 export class Environment implements IEnvironment { readonly name: string readonly id: string = `${environmentIdCounter++}` - parent?: IEnvironment + parent: IEnvironment memory: Map - constructor(name: string, parent?: IEnvironment) { + constructor(name: string, parent: IEnvironment) { this.name = name this.parent = parent this.memory = new Map() @@ -123,120 +84,19 @@ export interface REnvironmentInformation { readonly level: number } -export const DefaultEnvironmentMemory = new Map([ - ['return', [{ - kind: 'built-in-function', - scope: GlobalScope, - used: 'always', - definedAt: BuiltIn, - name: 'return', - nodeId: BuiltIn - }]], - ['cat', [{ - kind: 'built-in-function', - scope: GlobalScope, - used: 'always', - definedAt: BuiltIn, - name: 'cat', - nodeId: BuiltIn - }]], - ['print', [{ - kind: 'built-in-function', - scope: GlobalScope, - used: 'always', - definedAt: BuiltIn, - name: 'print', - nodeId: BuiltIn - }]], - ['source', [{ - kind: 'built-in-function', - scope: GlobalScope, - used: 'always', - definedAt: BuiltIn, - name: 'source', - nodeId: BuiltIn - }]] -]) -export function initializeCleanEnvironments(): REnvironmentInformation { - const global = new Environment(GlobalScope) - // use a copy - global.memory = new Map(DefaultEnvironmentMemory) - return { - current: global, - level: 0 - } -} +/* the built-in environment is the root of all environments */ +export const BuiltInEnvironment = new Environment('built-in', undefined as unknown as IEnvironment) +BuiltInEnvironment.memory = BuiltInMemory -export function diffEnvironment(a: IEnvironment | undefined, b: IEnvironment | undefined, info: GenericDifferenceInformation): void { - if(a === undefined || b === undefined) { - if(a !== b) { - info.report.addComment(`${info.position}Different environments. ${info.leftname}: ${JSON.stringify(a, jsonReplacer)} vs. ${info.rightname}: ${JSON.stringify(b, jsonReplacer)}`) - } - return - } - if(a.name !== b.name) { - info.report.addComment(`${info.position}Different environment names. ${info.leftname}: ${JSON.stringify(a, jsonReplacer)} vs. ${info.rightname}: ${JSON.stringify(b, jsonReplacer)}`) - } - if(a.memory.size !== b.memory.size) { - info.report.addComment(`${info.position}Different environment sizes. ${info.leftname}: ${JSON.stringify(a, jsonReplacer)} vs. ${info.rightname}: ${JSON.stringify(b, jsonReplacer)}`) - setDifference(new Set([...a.memory.keys()]), new Set([...b.memory.keys()]), { ...info, position: `${info.position}Key comparison. ` }) - } - for(const [key, value] of a.memory) { - const value2 = b.memory.get(key) - if(value2 === undefined || value.length !== value2.length) { - info.report.addComment(`${info.position}Different definitions for ${key}. ${info.leftname}: ${JSON.stringify(value, jsonReplacer)} vs. ${info.rightname}: ${JSON.stringify(value2, jsonReplacer)}`) - continue - } +export const GLOBAL_ENV_NAME = 'global' - for(let i = 0; i < value.length; ++i) { - const aVal = value[i] - const bVal = value2[i] - if(aVal.name !== bVal.name) { - info.report.addComment(`${info.position}Different names for ${key}. ${info.leftname}: ${aVal.name} vs. ${info.rightname}: ${bVal.name}`) - } - if(aVal.nodeId !== bVal.nodeId) { - info.report.addComment(`${info.position}Different ids for ${key}. ${info.leftname}: ${aVal.nodeId} vs. ${info.rightname}: ${bVal.nodeId}`) - } - if(aVal.scope !== bVal.scope) { - info.report.addComment(`${info.position}Different scopes for ${key}. ${info.leftname}: ${aVal.scope} vs. ${info.rightname}: ${bVal.scope}`) - } - if(aVal.used !== bVal.used) { - info.report.addComment(`${info.position}Different used for ${key}. ${info.leftname}: ${aVal.used} vs. ${info.rightname}: ${bVal.used}`) - } - if(aVal.definedAt !== bVal.definedAt) { - info.report.addComment(`${info.position}Different definition ids (definedAt) for ${key}. ${info.leftname}: ${aVal.definedAt} vs. ${info.rightname}: ${bVal.definedAt}`) - } - if(aVal.kind !== bVal.kind) { - info.report.addComment(`${info.position}Different kinds for ${key}. ${info.leftname}: ${aVal.kind} vs. ${info.rightname}: ${bVal.kind}`) - } - } +export function initializeCleanEnvironments(): REnvironmentInformation { + return { + current: new Environment(GLOBAL_ENV_NAME, BuiltInEnvironment), + level: 0 } - diffEnvironment(a.parent, b.parent, { ...info, position: `${info.position}Parents of ${a.id} & ${b.id}. ` }) } -export function diffEnvironments(a: REnvironmentInformation | undefined, b: REnvironmentInformation | undefined, info: GenericDifferenceInformation): void { - if(a === undefined || b === undefined) { - info.report.addComment(`${info.position}Different environments: ${JSON.stringify(a, jsonReplacer)} vs. ${JSON.stringify(b, jsonReplacer)}`) - return - } - diffEnvironment(a.current, b.current, info) -} -function cloneEnvironment(environment: IEnvironment, recurseParents: boolean): IEnvironment -function cloneEnvironment(environment: IEnvironment | undefined, recurseParents: boolean): IEnvironment | undefined -function cloneEnvironment(environment: IEnvironment | undefined, recurseParents: boolean): IEnvironment | undefined { - if(environment === undefined) { - return undefined - } - const clone = new Environment(environment.name, recurseParents ? cloneEnvironment(environment.parent, recurseParents) : environment.parent) - clone.memory = new Map(JSON.parse(JSON.stringify([...environment.memory])) as [Identifier, IdentifierDefinition[]][]) - return clone -} -export function cloneEnvironments(environment: REnvironmentInformation, recurseParents = true): REnvironmentInformation { - return { - current: cloneEnvironment(environment.current, recurseParents), - level: environment.level - } -} diff --git a/src/dataflow/environments/identifier.ts b/src/dataflow/environments/identifier.ts new file mode 100644 index 0000000000..f1db650ef2 --- /dev/null +++ b/src/dataflow/environments/identifier.ts @@ -0,0 +1,32 @@ +import type { NodeId } from '../../r-bridge' +import type { BuiltInIdentifierConstant, BuiltInIdentifierDefinition } from './built-in' + +export type Identifier = string & { __brand?: 'identifier' } + +interface InGraphIdentifierDefinition extends IdentifierReference { + kind: 'function' | 'variable' | 'parameter' | 'argument' + /** The assignment (or whatever, like `assign` function call) node which ultimately defined this identifier */ + definedAt: NodeId +} + +/** + * Stores the definition of an identifier within an {@link IEnvironment} + */ +export type IdentifierDefinition = InGraphIdentifierDefinition | BuiltInIdentifierDefinition | BuiltInIdentifierConstant + +/** + * Something like `a` in `b <- a`. + * Without any surrounding information, `a` will produce the identifier reference `a`. + * Similarly, `b` will create a reference. + */ +export interface IdentifierReference { + /** Node which represents the reference in the AST */ + readonly nodeId: NodeId + /** Name the reference is identified by (e.g., the name of the variable), undefined if the reference is "artificial" (e.g., anonymous) */ + readonly name: Identifier | undefined, + /** + * If the reference is only effective if, e.g. an if-then-else condition is true, this references the root of the `if`. + * As a hackey intermediate solution (until we have pointer-analysis), an empty array may indicate a `maybe` which is due to pointer access (e.g., in `a[x] <- 3`). + */ + controlDependencies: NodeId[] | undefined +} diff --git a/src/dataflow/environments/index.ts b/src/dataflow/environments/index.ts index afb1c24245..c582dd3e61 100644 --- a/src/dataflow/environments/index.ts +++ b/src/dataflow/environments/index.ts @@ -3,5 +3,12 @@ export * from './resolve-by-name' export * from './scoping' export * from './overwrite' export * from './environment' -export * from './register' -export { DataflowScopeName } from './scopes' +export * from './define' +export { diffIdentifierReferences } from './diff' +export { diffEnvironmentInformation } from './diff' +export { diffEnvironment } from './diff' +export { cloneEnvironmentInformation } from './clone' +export { BuiltInMemory, BuiltIn } from './built-in' +export { IdentifierReference } from './identifier' +export { IdentifierDefinition } from './identifier' +export { Identifier } from './identifier' diff --git a/src/dataflow/environments/overwrite.ts b/src/dataflow/environments/overwrite.ts index 03f576b87e..546461e0c1 100644 --- a/src/dataflow/environments/overwrite.ts +++ b/src/dataflow/environments/overwrite.ts @@ -1,28 +1,16 @@ import { guard } from '../../util/assert' -import type { REnvironmentInformation, IEnvironment, IdentifierDefinition } from './environment' -import { Environment } from './environment' +import type { REnvironmentInformation, IEnvironment } from './environment' +import { BuiltInEnvironment , Environment } from './environment' +import type { IdentifierDefinition } from './identifier' -function anyIsMaybeGuardingSame(values: IdentifierDefinition[]): boolean { +function anyIsMaybeOrEmpty(values: readonly IdentifierDefinition[]): boolean { if(values.length === 0) { return true } - const attr = values[0].used - if(attr === 'maybe') { - return true - } - let same = true - for(let i = 1; i < values.length; i++) { - const used = values[i].used - if(used === 'maybe') { + for(const val of values) { + if(val.controlDependencies !== undefined) { return true } - // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- in case we want to add more attributes in the future - if(used !== attr) { - same = false - } - } - if(!same) { - throw new Error('all values must have either a maybe or are all the same') } return false } @@ -32,7 +20,7 @@ export function overwriteIEnvironmentWith(base: IEnvironment | undefined, next: guard(base.name === next.name, 'cannot overwrite environments with different names') const map = new Map(base.memory) for(const [key, values] of next.memory) { - const hasMaybe = anyIsMaybeGuardingSame(values) + const hasMaybe = anyIsMaybeOrEmpty(values) if(hasMaybe) { const old = map.get(key) // we need to make a copy to avoid side effects for old reference in other environments @@ -49,9 +37,9 @@ export function overwriteIEnvironmentWith(base: IEnvironment | undefined, next: } } - let parent: IEnvironment | undefined + let parent: IEnvironment if(includeParent) { - parent = base.parent === undefined ? undefined : overwriteIEnvironmentWith(base.parent, next.parent) + parent = base.parent.id === BuiltInEnvironment.id ? BuiltInEnvironment : overwriteIEnvironmentWith(base.parent, next.parent) } else { parent = base.parent } @@ -62,15 +50,15 @@ export function overwriteIEnvironmentWith(base: IEnvironment | undefined, next: } -export function overwriteEnvironments(base: REnvironmentInformation, next: REnvironmentInformation | undefined): REnvironmentInformation -export function overwriteEnvironments(base: REnvironmentInformation | undefined, next: REnvironmentInformation): REnvironmentInformation -export function overwriteEnvironments(base: undefined, next: undefined): undefined -export function overwriteEnvironments(base: REnvironmentInformation | undefined, next: REnvironmentInformation | undefined): REnvironmentInformation | undefined +export function overwriteEnvironment(base: REnvironmentInformation, next: REnvironmentInformation | undefined): REnvironmentInformation +export function overwriteEnvironment(base: REnvironmentInformation | undefined, next: REnvironmentInformation): REnvironmentInformation +export function overwriteEnvironment(base: undefined, next: undefined): undefined +export function overwriteEnvironment(base: REnvironmentInformation | undefined, next: REnvironmentInformation | undefined): REnvironmentInformation | undefined /** * Assumes, that all definitions within next replace those within base (given the same name). - * But if all definitions within next are maybe, then they are appended to the base definitions (updating them to be `maybe` from now on as well), similar to {@link appendEnvironments}. + * But if all definitions within next are maybe, then they are appended to the base definitions (updating them to be `maybe` from now on as well), similar to {@link appendEnvironment}. */ -export function overwriteEnvironments(base: REnvironmentInformation | undefined, next: REnvironmentInformation | undefined): REnvironmentInformation | undefined { +export function overwriteEnvironment(base: REnvironmentInformation | undefined, next: REnvironmentInformation | undefined): REnvironmentInformation | undefined { if(base === undefined) { return next } else if(next === undefined) { diff --git a/src/dataflow/environments/register.ts b/src/dataflow/environments/register.ts deleted file mode 100644 index 8b8e7a66ad..0000000000 --- a/src/dataflow/environments/register.ts +++ /dev/null @@ -1,37 +0,0 @@ -import type { IdentifierDefinition, IEnvironment, REnvironmentInformation } from './environment' -import { cloneEnvironments } from './environment' -import type { DataflowScopeName } from './scopes' -import { GlobalScope, LocalScope } from './scopes' -import { guard } from '../../util/assert' - -/** - * Insert the given `definition` --- defined within the given scope --- into the passed along `environments` will take care of propagation. - * Does not modify the passed along `environments` in-place! It returns the new reference. - */ -export function define(definition: IdentifierDefinition, withinScope: DataflowScopeName, environments: REnvironmentInformation): REnvironmentInformation { - let newEnvironments = environments - guard(withinScope === definition.scope, 'Mismatching scopes') - if(withinScope === LocalScope) { - newEnvironments = cloneEnvironments(environments, false) - newEnvironments.current.memory.set(definition.name, [definition]) - } else if(withinScope === GlobalScope) { - newEnvironments = cloneEnvironments(environments, true) - let current: IEnvironment | undefined = newEnvironments.current - let last = undefined - let found = false - do{ - if(current.memory.has(definition.name)) { - current.memory.set(definition.name, [definition]) - found = true - break - } - last = current - current = current.parent - } while(current !== undefined) - if(!found) { - guard(last !== undefined, () => `Could not find global scope for ${definition.name}`) - last.memory.set(definition.name, [definition]) - } - } - return newEnvironments -} diff --git a/src/dataflow/environments/resolve-by-name.ts b/src/dataflow/environments/resolve-by-name.ts index dddb6d4b24..fd3c67fbfd 100644 --- a/src/dataflow/environments/resolve-by-name.ts +++ b/src/dataflow/environments/resolve-by-name.ts @@ -1,36 +1,58 @@ -import type { Identifier, IdentifierDefinition, IEnvironment, REnvironmentInformation } from './environment' -import { dataflowLogger } from '../index' -import type { DataflowScopeName } from './scopes' -import { LocalScope } from './scopes' +import type { + IEnvironment, + REnvironmentInformation +} from './environment' +import { + BuiltInEnvironment +} from './environment' +import type { Ternary } from '../../util/logic' +import type { Identifier, IdentifierDefinition } from './identifier' + /** * Resolves a given identifier name to a list of its possible definition location using R scoping and resolving rules. * - * @param name - The name of the identifier to resolve - * @param withinScope - The scope in which the identifier is used - * @param environments - The current environments used for name resolution + * @param name - The name of the identifier to resolve + * @param environment - The current environment used for name resolution * * @returns A list of possible definitions of the identifier (one if the definition location is exactly and always known), or `undefined` if the identifier is undefined in the current scope/with the current environment information. */ -export function resolveByName(name: Identifier, withinScope: DataflowScopeName, environments: REnvironmentInformation): IdentifierDefinition[] | undefined { - if(withinScope !== LocalScope) { - throw new Error('Non-local scoping is not yet supported') - } - return resolve(name, withinScope, environments) -} - -function resolve(name: Identifier, withinScope: DataflowScopeName, environments: REnvironmentInformation) { - dataflowLogger.trace(`Resolving local identifier ${name} (scope name: ${withinScope}, local stack size: ${environments.level})`) - - let current: IEnvironment | undefined = environments.current +export function resolveByName(name: Identifier, environment: REnvironmentInformation): IdentifierDefinition[] | undefined { + let current: IEnvironment = environment.current do{ const definition = current.memory.get(name) if(definition !== undefined) { return definition } current = current.parent - } while(current !== undefined) + } while(current.id !== BuiltInEnvironment.id) + + return current.memory.get(name) +} - dataflowLogger.trace(`Unable to find identifier ${name} in stack`) - return undefined +export function resolvesToBuiltInConstant(name: Identifier | undefined, environment: REnvironmentInformation, wantedValue: unknown): Ternary { + if(name === undefined) { + return 'never' + } + const definition = resolveByName(name, environment) + + if(definition === undefined) { + return 'never' + } + + let all = true + let some = false + for(const def of definition) { + if(def.kind === 'built-in-value' && def.value === wantedValue) { + some = true + } else { + all = false + } + } + + if(all) { + return 'always' + } else { + return some ? 'maybe' : 'never' + } } diff --git a/src/dataflow/environments/scopes.ts b/src/dataflow/environments/scopes.ts deleted file mode 100644 index 8ed706a24d..0000000000 --- a/src/dataflow/environments/scopes.ts +++ /dev/null @@ -1,10 +0,0 @@ -export const GlobalScope = '.GlobalEnv' -export const LocalScope = 'local' - -/** - * Used to represent usual R scopes - */ -export type DataflowScopeName = - | /** default R global environment */ typeof GlobalScope - | /** unspecified automatic local environment */ typeof LocalScope - | /** named environments */ string diff --git a/src/dataflow/environments/scoping.ts b/src/dataflow/environments/scoping.ts index 56b422919e..1b8a9e0c01 100644 --- a/src/dataflow/environments/scoping.ts +++ b/src/dataflow/environments/scoping.ts @@ -1,15 +1,11 @@ import type { REnvironmentInformation } from './environment' import { Environment } from './environment' import { guard } from '../../util/assert' -import { LocalScope } from './scopes' /** Add a new local environment scope to the stack, returns the modified variant - sharing the original environments in the stack (no deep-clone) */ export function pushLocalEnvironment(base: REnvironmentInformation): REnvironmentInformation { - const local = new Environment(LocalScope) - local.parent = base.current - return { - current: local, + current: new Environment('local', base.current), level: base.level + 1 } } diff --git a/src/dataflow/extractor.ts b/src/dataflow/extractor.ts index f4c35b7fee..15a5971d79 100644 --- a/src/dataflow/extractor.ts +++ b/src/dataflow/extractor.ts @@ -1,69 +1,58 @@ -import type { NormalizedAst, ParentInformation, RAssignmentOp, RBinaryOp, RParseRequest } from '../r-bridge' -import { requestFingerprint , RType } from '../r-bridge' - -import type { DataflowInformation } from './internal/info' -import type { DataflowProcessorInformation, DataflowProcessors } from './processor' +import type { NormalizedAst, ParentInformation, RParseRequest } from '../r-bridge' +import { RType, requestFingerprint } from '../r-bridge' +import type { DataflowInformation } from './info' +import type { DataflowProcessors } from './processor' import { processDataflowFor } from './processor' -import { processUninterestingLeaf } from './internal/process/uninteresting-leaf' -import { processSymbol } from './internal/process/symbol' -import { processNonAssignmentBinaryOp } from './internal/process/operators/non-assignment-binary-op' -import { processUnaryOp } from './internal/process/operators/unary-op' -import { processExpressionList } from './internal/process/expression-list' -import { processRepeatLoop } from './internal/process/loops/repeat-loop' -import { processForLoop } from './internal/process/loops/for-loop' -import { processWhileLoop } from './internal/process/loops/while-loop' -import { processIfThenElse } from './internal/process/if-then-else' -import { processFunctionCall } from './internal/process/functions/function-call' -import { processFunctionDefinition } from './internal/process/functions/function-definition' -import { processFunctionParameter } from './internal/process/functions/parameter' -import type { DataflowScopeName } from './environments' +import { processUninterestingLeaf } from './internal/process/process-uninteresting-leaf' +import { processSymbol } from './internal/process/process-symbol' +import { processFunctionCall } from './internal/process/functions/call/default-call-handling' +import { processFunctionParameter } from './internal/process/functions/process-parameter' import { initializeCleanEnvironments } from './environments' -import { processFunctionArgument } from './internal/process/functions/argument' -import { processAssignment } from './internal/process/operators/assignment' -import { processAccess } from './internal/process/access' -import { processPipeOperation } from './internal/process/operators/pipe' -import { LocalScope } from './environments/scopes' +import { processFunctionArgument } from './internal/process/functions/process-argument' +import { processAsNamedCall } from './internal/process/process-named-call' +import { processValue } from './internal/process/process-value' +import { processNamedCall } from './internal/process/functions/call/named-call-handling' +import { wrapArgumentsUnnamed } from './internal/process/functions/call/argument/make-argument' +import { rangeFrom } from '../util/range' -// eslint-disable-next-line @typescript-eslint/no-explicit-any -- allows type adaption without re-creation -const processors: DataflowProcessors = { - [RType.Number]: processUninterestingLeaf, - [RType.String]: processUninterestingLeaf, - [RType.Logical]: processUninterestingLeaf, - [RType.Access]: processAccess, - [RType.Symbol]: processSymbol, - [RType.BinaryOp]: processBinaryOp, - [RType.Pipe]: processPipeOperation, - [RType.UnaryOp]: processUnaryOp, - [RType.ForLoop]: processForLoop, - [RType.WhileLoop]: processWhileLoop, - [RType.RepeatLoop]: processRepeatLoop, - [RType.IfThenElse]: processIfThenElse, - [RType.Break]: processUninterestingLeaf, - [RType.Next]: processUninterestingLeaf, +export const processors: DataflowProcessors = { + [RType.Number]: processValue, + [RType.String]: processValue, + [RType.Logical]: processValue, [RType.Comment]: processUninterestingLeaf, [RType.LineDirective]: processUninterestingLeaf, + [RType.Symbol]: processSymbol, + [RType.Access]: (n, d) => processAsNamedCall(n, d, n.operator, [n.accessed, ...n.access]), + [RType.BinaryOp]: (n, d) => processAsNamedCall(n, d, n.operator, [n.lhs, n.rhs]), + [RType.Pipe]: (n, d) => processAsNamedCall(n, d, n.lexeme, [n.lhs, n.rhs]), + [RType.UnaryOp]: (n, d) => processAsNamedCall(n, d, n.operator, [n.operand]), + [RType.ForLoop]: (n, d) => processAsNamedCall(n, d, n.lexeme, [n.variable, n.vector, n.body]), + [RType.WhileLoop]: (n, d) => processAsNamedCall(n, d, n.lexeme, [n.condition, n.body]), + [RType.RepeatLoop]: (n, d) => processAsNamedCall(n, d, n.lexeme, [n.body]), + [RType.IfThenElse]: (n, d) => processAsNamedCall(n, d, n.lexeme, [n.condition, n.then, n.otherwise]), + [RType.Break]: (n, d) => processAsNamedCall(n, d, n.lexeme, []), + [RType.Next]: (n, d) => processAsNamedCall(n, d, n.lexeme, []), [RType.FunctionCall]: processFunctionCall, - [RType.FunctionDefinition]: processFunctionDefinition, + [RType.FunctionDefinition]: (n, d) => processAsNamedCall(n, d, n.lexeme, [...n.parameters, n.body]), [RType.Parameter]: processFunctionParameter, [RType.Argument]: processFunctionArgument, - [RType.ExpressionList]: processExpressionList, + [RType.ExpressionList]: (n, d) => processNamedCall({ + type: RType.Symbol, + info: n.info, + content: n.grouping?.[0].content ?? '{', + lexeme: n.grouping?.[0].lexeme ?? '{', + location: n.location ?? rangeFrom(-1, -1, -1, -1), + namespace: n.grouping?.[0].content ? undefined : 'base' + }, wrapArgumentsUnnamed(n.children, d.completeAst.idMap), n.info.id, d) } -export function produceDataFlowGraph(request: RParseRequest, ast: NormalizedAst, initialScope: DataflowScopeName = LocalScope): DataflowInformation { +export function produceDataFlowGraph(request: RParseRequest, ast: NormalizedAst): DataflowInformation { return processDataflowFor(ast.ast, { - completeAst: ast, - activeScope: initialScope, - environments: initializeCleanEnvironments(), - processors: processors as DataflowProcessors, - currentRequest: request, - referenceChain: [requestFingerprint(request)] + completeAst: ast, + environment: initializeCleanEnvironments(), + processors, + currentRequest: request, + controlDependencies: undefined, + referenceChain: [requestFingerprint(request)] }) } - -export function processBinaryOp(node: RBinaryOp, data: DataflowProcessorInformation) { - if(node.flavor === 'assignment') { - return processAssignment(node as RAssignmentOp, data) - } else { - return processNonAssignmentBinaryOp(node, data) - } -} diff --git a/src/dataflow/graph/diff.ts b/src/dataflow/graph/diff.ts index 29fc7e48bd..a0c43d0ce5 100644 --- a/src/dataflow/graph/diff.ts +++ b/src/dataflow/graph/diff.ts @@ -1,32 +1,56 @@ import type { IdentifierReference } from '../environments' -import { diffIdentifierReferences, diffEnvironments } from '../environments' +import { diffEnvironmentInformation, diffIdentifierReferences } from '../environments' import type { NodeId } from '../../r-bridge' -import type { DataflowGraph, FunctionArgument, OutgoingEdges, PositionalFunctionArgument } from './graph' -import { guard } from '../../util/assert' -import type { - GenericDifferenceInformation, - WriteableDifferenceReport, DifferenceReport -} from '../../util/diff' -import { - setDifference -} from '../../util/diff' +import { EmptyArgument } from '../../r-bridge' +import type { DataflowGraph, FunctionArgument, OutgoingEdges } from './graph' +import { isNamedArgument } from './graph' +import type { GenericDifferenceInformation, WriteableDifferenceReport } from '../../util/diff' +import { setDifference } from '../../util/diff' import { jsonReplacer } from '../../util/json' +import { arrayEqual } from '../../util/arrays' +import { VertexType } from './vertex' +import type { DataflowGraphEdge } from './edge' -class DataflowDifferenceReport implements WriteableDifferenceReport { - _comments: string[] | undefined = undefined +interface ProblematicVertex { + tag: 'vertex', + id: NodeId +} + +interface ProblematicEdge { + tag: 'edge', + from: NodeId, + to: NodeId +} + +export type ProblematicDiffInfo = ProblematicVertex | ProblematicEdge + +export class DataflowDifferenceReport implements WriteableDifferenceReport { + _comments: string[] | undefined = undefined + _problematic: ProblematicDiffInfo[] | undefined = undefined - addComment(comment: string): void { + addComment(comment: string, ...related: ProblematicDiffInfo[]): void { if(this._comments === undefined) { this._comments = [comment] } else { this._comments.push(comment) } + if(related.length > 0) { + if(this._problematic === undefined) { + this._problematic = [...related] + } else { + this._problematic.push(...related) + } + } } comments(): readonly string[] | undefined { return this._comments } + problematic(): readonly ProblematicDiffInfo[] | undefined { + return this._problematic + } + isEqual(): boolean { return this._comments === undefined } @@ -37,7 +61,7 @@ export interface NamedGraph { graph: DataflowGraph } -interface DataflowDiffContext extends GenericDifferenceInformation { +interface DataflowDiffContext extends GenericDifferenceInformation { left: DataflowGraph right: DataflowGraph } @@ -62,15 +86,21 @@ function diff(ctx: DataflowDiffContext): boolean { function diffOutgoingEdges(ctx: DataflowDiffContext): void { - const lEdges = new Map(ctx.left.edges()) - const rEdges = new Map(ctx.right.edges()) + const lEdges = new Map([...ctx.left.edges()]) + const rEdges = new Map([...ctx.right.edges()]) if(lEdges.size !== rEdges.size) { - ctx.report.addComment(`Detected different number of edges! ${ctx.leftname} has ${lEdges.size}, ${ctx.rightname} has ${rEdges.size}`) + ctx.report.addComment(`Detected different number of edges! ${ctx.leftname} has ${lEdges.size} (${JSON.stringify(lEdges, jsonReplacer)}). ${ctx.rightname} has ${rEdges.size} ${JSON.stringify(rEdges, jsonReplacer)}`) } for(const [id, edge] of lEdges) { diffEdges(ctx, id, edge, rEdges.get(id)) } + // just to make it both ways in case the length differs + for(const [id, edge] of rEdges) { + if(!lEdges.has(id)) { + diffEdges(ctx, id, undefined, edge) + } + } } function diffRootVertices(ctx: DataflowDiffContext): void { @@ -78,7 +108,7 @@ function diffRootVertices(ctx: DataflowDiffContext): void { } -export function diffOfDataflowGraphs(left: NamedGraph, right: NamedGraph): DifferenceReport { +export function diffOfDataflowGraphs(left: NamedGraph, right: NamedGraph): DataflowDifferenceReport { if(left.graph === right.graph) { return new DataflowDifferenceReport() } @@ -88,67 +118,68 @@ export function diffOfDataflowGraphs(left: NamedGraph, right: NamedGraph): Diffe } -function diffFunctionArgumentsReferences(a: IdentifierReference | '', b: IdentifierReference | '', ctx: GenericDifferenceInformation): void { +function diffFunctionArgumentsReferences(fn: NodeId, a: IdentifierReference | '', b: IdentifierReference | '', ctx: GenericDifferenceInformation): void { if(a === '' || b === '') { if(a !== b) { - ctx.report.addComment(`${ctx.position}${ctx.leftname}: ${JSON.stringify(a, jsonReplacer)} vs ${ctx.rightname}: ${JSON.stringify(b, jsonReplacer)}`) + ctx.report.addComment( + `${ctx.position}${ctx.leftname}: ${JSON.stringify(a, jsonReplacer)} vs ${ctx.rightname}: ${JSON.stringify(b, jsonReplacer)}`, + { tag: 'vertex', id: fn } + ) } return } diffIdentifierReferences(a, b, ctx) } -export function equalExitPoints(a: NodeId[] | undefined, b: NodeId[] | undefined): boolean { - if(a === undefined || b === undefined) { - return a === b - } - if(a.length !== b.length) { - return false - } - for(let i = 0; i < a.length; ++i) { - if(a[i] !== b[i]) { - return false - } - } - return true -} - -export function equalFunctionArguments(a: false | FunctionArgument[], b: false | FunctionArgument[]): boolean { - const ctx: GenericDifferenceInformation = { +export function equalFunctionArguments(fn: NodeId, a: false | readonly FunctionArgument[], b: false | readonly FunctionArgument[]): boolean { + const ctx: GenericDifferenceInformation = { report: new DataflowDifferenceReport(), leftname: 'left', rightname: 'right', position: '' } - diffFunctionArguments(a, b, ctx) + diffFunctionArguments(fn, a, b, ctx) return ctx.report.isEqual() } -export function diffFunctionArguments(a: false | FunctionArgument[], b: false | FunctionArgument[], ctx: GenericDifferenceInformation): void { +export function diffFunctionArguments(fn: NodeId, a: false | readonly FunctionArgument[], b: false | readonly FunctionArgument[], ctx: GenericDifferenceInformation): void { if(a === false || b === false) { if(a !== b) { - ctx.report.addComment(`${ctx.position}${ctx.leftname}: ${JSON.stringify(a, jsonReplacer)} vs ${ctx.rightname}: ${JSON.stringify(b, jsonReplacer)}`) + ctx.report.addComment(`${ctx.position}${ctx.leftname}: ${JSON.stringify(a, jsonReplacer)} vs ${ctx.rightname}: ${JSON.stringify(b, jsonReplacer)}`, { tag: 'vertex', id: fn }) } return } else if(a.length !== b.length) { - ctx.report.addComment(`${ctx.position}Differs in number of arguments. ${ctx.leftname}: ${JSON.stringify(a, jsonReplacer)} vs ${ctx.rightname}: ${JSON.stringify(b, jsonReplacer)}`) + ctx.report.addComment(`${ctx.position}Differs in number of arguments. ${ctx.leftname}: ${JSON.stringify(a, jsonReplacer)} vs ${ctx.rightname}: ${JSON.stringify(b, jsonReplacer)}`, { tag: 'vertex', id: fn }) return } for(let i = 0; i < a.length; ++i) { const aArg = a[i] const bArg = b[i] - if(Array.isArray(aArg) && Array.isArray(bArg)) { + if(aArg === EmptyArgument || bArg === EmptyArgument) { + if(aArg !== bArg) { + ctx.report.addComment(`${ctx.position}In argument #${i} (of ${ctx.leftname}, empty) the argument differs: ${JSON.stringify(aArg)} vs ${JSON.stringify(bArg)}.`) + } + continue + } else if(isNamedArgument(aArg) && isNamedArgument(bArg)) { // must have same name - if(aArg[0] !== bArg[0]) { - ctx.report.addComment(`${ctx.position}In the ${i}th argument (of ${ctx.leftname}, named) the name differs: ${aArg[0]} vs ${bArg[0]}.`) + if(aArg.name !== bArg.name) { + ctx.report.addComment(`${ctx.position }In argument #${i} (of ${ctx.leftname}, named) the name differs: ${aArg.name} vs ${bArg.name}.`) continue } - diffFunctionArgumentsReferences(aArg[1], bArg[1], { + diffFunctionArgumentsReferences(fn, aArg, bArg, { ...ctx, - position: `${ctx.position} In the ${i}th argument (of ${ctx.leftname}, named). ` + position: `${ctx.position} In argument #${i} (of ${ctx.leftname}, named). ` }) } else { - diffFunctionArgumentsReferences(aArg as PositionalFunctionArgument, bArg as PositionalFunctionArgument, { ...ctx, position: `${ctx.position} In the ${i}th argument (of ${ctx.leftname}, unnamed).` }) + if(aArg.name !== bArg.name) { + ctx.report.addComment(`${ctx.position}In argument #${i} (of ${ctx.leftname}, unnamed) the name differs: ${aArg.name} vs ${bArg.name}.`) + } + if(!arrayEqual(aArg.controlDependencies, bArg.controlDependencies)) { + ctx.report.addComment( + `${ctx.position}In argument #${i} (of ${ctx.leftname}, unnamed) the control dependency differs: ${JSON.stringify(aArg.controlDependencies)} vs ${JSON.stringify(bArg.controlDependencies)}.`, + { tag: 'vertex', id: fn } + ) + } } } } @@ -156,85 +187,113 @@ export function diffFunctionArguments(a: false | FunctionArgument[], b: false | export function diffVertices(ctx: DataflowDiffContext): void { // collect vertices from both sides - const lVert = [...ctx.left.vertices(true)] - const rVert = [...ctx.right.vertices(true)] + const lVert = [...ctx.left.vertices(true)].map(([id, info]) => ([id, info] as const)) + const rVert = [...ctx.right.vertices(true)].map(([id, info]) => ([id, info] as const)) if(lVert.length !== rVert.length) { ctx.report.addComment(`Detected different number of vertices! ${ctx.leftname} has ${lVert.length}, ${ctx.rightname} has ${rVert.length}`) } for(const [id, lInfo] of lVert) { const rInfoMay = ctx.right.get(id) if(rInfoMay === undefined) { - ctx.report.addComment(`Vertex ${id} is not present in ${ctx.rightname}`) + ctx.report.addComment(`Vertex ${id} is not present in ${ctx.rightname}`, { tag: 'vertex', id }) continue } const [rInfo] = rInfoMay if(lInfo.tag !== rInfo.tag) { - ctx.report.addComment(`Vertex ${id} has different tags. ${ctx.leftname}: ${lInfo.tag} vs. ${ctx.rightname}: ${rInfo.tag}`) + ctx.report.addComment(`Vertex ${id} differs in tags. ${ctx.leftname}: ${lInfo.tag} vs. ${ctx.rightname}: ${rInfo.tag}`, { tag: 'vertex', id }) } if(lInfo.name !== rInfo.name) { - ctx.report.addComment(`Vertex ${id} has different names. ${ctx.leftname}: ${lInfo.name} vs ${ctx.rightname}: ${rInfo.name}`) + ctx.report.addComment(`Vertex ${id} differs in names. ${ctx.leftname}: ${lInfo.name} vs ${ctx.rightname}: ${rInfo.name}`, { tag: 'vertex', id }) } - - if(lInfo.tag === 'variable-definition' || lInfo.tag === 'function-definition') { - guard(lInfo.tag === rInfo.tag, () => `node ${id} does not match on tag (${lInfo.tag} vs ${rInfo.tag})`) - if(lInfo.scope !== rInfo.scope) { - ctx.report.addComment(`Vertex ${id} has different scopes. ${ctx.leftname}: ${lInfo.scope} vs ${ctx.rightname}: ${rInfo.scope}`) - } - } - - if(lInfo.when !== rInfo.when) { - ctx.report.addComment(`Vertex ${id} has different when. ${ctx.leftname}: ${lInfo.when} vs ${ctx.rightname}: ${rInfo.when}`) + if(!arrayEqual(lInfo.controlDependencies, rInfo.controlDependencies)) { + ctx.report.addComment( + `Vertex ${id} differs in controlDependency. ${ctx.leftname}: ${JSON.stringify(lInfo.controlDependencies)} vs ${ctx.rightname}: ${JSON.stringify(rInfo.controlDependencies)}`, + { tag: 'vertex', id } + ) } - diffEnvironments(lInfo.environment, rInfo.environment, { ...ctx, position: `${ctx.position}Vertex ${id} differs in environments. ` }) + diffEnvironmentInformation(lInfo.environment, rInfo.environment, { ...ctx, position: `${ctx.position}Vertex ${id} differs in environment. ` }) - if(lInfo.tag === 'function-call') { - guard(rInfo.tag === 'function-call', 'otherInfo must be a function call as well') - diffFunctionArguments(lInfo.args, rInfo.args, { ...ctx, position: `${ctx.position}Vertex ${id} (function call) differs in arguments. ` }) + if(lInfo.tag === VertexType.FunctionCall) { + if(rInfo.tag !== VertexType.FunctionCall) { + ctx.report.addComment(`Vertex ${id} differs in tags. ${ctx.leftname}: ${lInfo.tag} vs. ${ctx.rightname}: ${rInfo.tag}`) + } else { + if(lInfo.onlyBuiltin !== rInfo.onlyBuiltin) { + ctx.report.addComment(`Vertex ${id} differs in onlyBuiltin. ${ctx.leftname}: ${lInfo.onlyBuiltin} vs ${ctx.rightname}: ${rInfo.onlyBuiltin}`, { tag: 'vertex', id }) + } + diffFunctionArguments(lInfo.id, lInfo.args, rInfo.args, { + ...ctx, + position: `${ctx.position}Vertex ${id} (function call) differs in arguments. ` + }) + } } if(lInfo.tag === 'function-definition') { - guard(rInfo.tag === 'function-definition', 'otherInfo must be a function definition as well') + if(rInfo.tag !== 'function-definition') { + ctx.report.addComment(`Vertex ${id} differs in tags. ${ctx.leftname}: ${lInfo.tag} vs. ${ctx.rightname}: ${rInfo.tag}`, { tag: 'vertex', id }) + } else { + if(!arrayEqual(lInfo.exitPoints, rInfo.exitPoints)) { + ctx.report.addComment( + `Vertex ${id} differs in exit points. ${ctx.leftname}: ${JSON.stringify(lInfo.exitPoints, jsonReplacer)} vs ${ctx.rightname}: ${JSON.stringify(rInfo.exitPoints, jsonReplacer)}`, + { tag: 'vertex', id } + ) + } - if(!equalExitPoints(lInfo.exitPoints, rInfo.exitPoints)) { - ctx.report.addComment(`Vertex ${id} has different exit points. ${ctx.leftname}: ${JSON.stringify(lInfo.exitPoints, jsonReplacer)} vs ${ctx.rightname}: ${JSON.stringify(rInfo.exitPoints, jsonReplacer)}`) + diffEnvironmentInformation(lInfo.subflow.environment, rInfo.subflow.environment, { + ...ctx, + position: `${ctx.position}Vertex ${id} (function definition) differs in subflow environments. ` + }) + setDifference(lInfo.subflow.graph, rInfo.subflow.graph, { + ...ctx, + position: `${ctx.position}Vertex ${id} differs in subflow graph. ` + }) } - - if(lInfo.subflow.scope !== rInfo.subflow.scope) { - ctx.report.addComment(`Vertex ${id} has different subflow scope. ${ctx.leftname}: ${JSON.stringify(lInfo.subflow, jsonReplacer)} vs ${ctx.rightname}: ${JSON.stringify(rInfo.subflow, jsonReplacer)}`) - } - diffEnvironments(lInfo.subflow.environments, rInfo.subflow.environments, { ...ctx, position: `${ctx.position}Vertex ${id} (function definition) differs in subflow environments. ` }) - setDifference(lInfo.subflow.graph, rInfo.subflow.graph, { ...ctx, position: `${ctx.position}Vertex ${id} differs in subflow graph. ` }) } } } +function diffEdge(edge: DataflowGraphEdge, otherEdge: DataflowGraphEdge, ctx: DataflowDiffContext, id: NodeId, target: NodeId) { + if(edge.types.size !== otherEdge.types.size) { + ctx.report.addComment( + `Target of ${id}->${target} in ${ctx.leftname} differs in number of edge types: ${JSON.stringify([...edge.types])} vs ${JSON.stringify([...otherEdge.types])}`, + { tag: 'edge', from: id, to: target } + ) + } + if([...edge.types].some(e => !otherEdge.types.has(e))) { + ctx.report.addComment( + `Target of ${id}->${target} in ${ctx.leftname} differs in edge types: ${JSON.stringify([...edge.types])} vs ${JSON.stringify([...otherEdge.types])}`, + { tag: 'edge', from: id, to: target } + ) + } +} + export function diffEdges(ctx: DataflowDiffContext, id: NodeId, lEdges: OutgoingEdges | undefined, rEdges: OutgoingEdges | undefined): void { if(lEdges === undefined || rEdges === undefined) { if(lEdges !== rEdges) { - ctx.report.addComment(`Vertex ${id} has undefined outgoing edges. ${ctx.leftname}: ${JSON.stringify(lEdges, jsonReplacer)} vs ${ctx.rightname}: ${JSON.stringify(rEdges, jsonReplacer)}`) + ctx.report.addComment( + `Vertex ${id} has undefined outgoing edges. ${ctx.leftname}: ${JSON.stringify(lEdges, jsonReplacer)} vs ${ctx.rightname}: ${JSON.stringify(rEdges, jsonReplacer)}`, + { tag: 'vertex', id } + ) } return } if(lEdges.size !== rEdges.size) { - ctx.report.addComment(`Vertex ${id} has different number of outgoing edges. ${ctx.leftname}: ${JSON.stringify(lEdges, jsonReplacer)} vs ${ctx.rightname}: ${JSON.stringify(rEdges, jsonReplacer)}`) + ctx.report.addComment( + `Vertex ${id} differs in number of outgoing edges. ${ctx.leftname}: [${[...lEdges.keys()].join(',')}] vs ${ctx.rightname}: [${[...rEdges.keys()].join(',')}] `, + { tag: 'vertex', id } + ) } // order independent compare for(const [target, edge] of lEdges) { const otherEdge = rEdges.get(target) if(otherEdge === undefined) { - ctx.report.addComment(`Target of ${id}->${target} in ${ctx.leftname} is not present in ${ctx.rightname}`) + ctx.report.addComment( + `Target of ${id}->${target} in ${ctx.leftname} is not present in ${ctx.rightname}`, + { tag: 'edge', from: id, to: target } + ) continue } - if(edge.types.size !== otherEdge.types.size) { - ctx.report.addComment(`Target of ${id}->${target} in ${ctx.leftname} has different number of edge types: ${JSON.stringify([...edge.types])} vs ${JSON.stringify([...otherEdge.types])}`) - } - if([...edge.types].some(e => !otherEdge.types.has(e))) { - ctx.report.addComment(`Target of ${id}->${target} in ${ctx.leftname} has different edge types: ${JSON.stringify([...edge.types])} vs ${JSON.stringify([...otherEdge.types])}`) - } - if(edge.attribute !== otherEdge.attribute) { - ctx.report.addComment(`Target of ${id}->${target} in ${ctx.leftname} has different attributes: ${JSON.stringify(edge.attribute)} vs ${JSON.stringify(otherEdge.attribute)}`) - } + diffEdge(edge, otherEdge, ctx, id, target) } } diff --git a/src/dataflow/graph/edge.ts b/src/dataflow/graph/edge.ts index 407180b9d6..1c81d8aa9b 100644 --- a/src/dataflow/graph/edge.ts +++ b/src/dataflow/graph/edge.ts @@ -1,22 +1,19 @@ /** - * An edge consist of the target node (i.e., the variable or processing node), - * a type (if it is read or used in the context), and an attribute (if this edge exists for every program execution or - * if it is only one possible execution path). + * An edge consist of: + * - the target node (i.e., the variable or processing node), + * - a type (if it is read or used in the context), and + * - an attribute (if this edge exists for every program execution or if it is only one possible execution path). */ export interface DataflowGraphEdge { // currently multiple edges are represented by multiple types - types: Set - attribute: DataflowGraphEdgeAttribute + types: Set } -// context -- is it always read/defined-by -export type DataflowGraphEdgeAttribute = 'always' | 'maybe' - /** * Represents the relationship between the source and the target vertex in the dataflow graph. */ -export enum EdgeType { +export const enum EdgeType { /** The edge determines that source reads target */ Reads = 'reads', /** The edge determines that source is defined by target */ @@ -37,6 +34,45 @@ export enum EdgeType { Argument = 'argument', /** The edge determines that the source is a side effect that happens when the target is called */ SideEffectOnCall = 'side-effect-on-call', - /** The source and edge relate to each other bidirectionally */ - Relates = 'relates' + /** The Edge determines that the reference is affected by a non-standard evaluation (e.g., a for-loop body or a quotation) */ + NonStandardEvaluation = 'non-standard-evaluation' +} + +export const enum TraverseEdge { + /** Do not traverse this edge */ + Never = 0, + /** Traverse the edge as a side effect */ + SideEffect = 1, + /** Traverse this edge if the definition is relevant */ + DefinedByOnCall = 2, + /** Always traverse this edge */ + Always = 3 } + +const traverseEdge: Record = { + [EdgeType.Reads]: TraverseEdge.Always, + [EdgeType.DefinedBy]: TraverseEdge.Always, + [EdgeType.Argument]: TraverseEdge.Always, + [EdgeType.Calls]: TraverseEdge.Always, + [EdgeType.DefinesOnCall]: TraverseEdge.Always, + [EdgeType.DefinedByOnCall]: TraverseEdge.DefinedByOnCall, + [EdgeType.SideEffectOnCall]: TraverseEdge.SideEffect, + [EdgeType.NonStandardEvaluation]: TraverseEdge.Never, + [EdgeType.SameReadRead]: TraverseEdge.Never, + [EdgeType.SameDefDef]: TraverseEdge.Never, + [EdgeType.Returns]: TraverseEdge.Never +} as const + +export function shouldTraverseEdge(types: ReadonlySet): TraverseEdge { + let highest = TraverseEdge.Never + for(const type of types) { + const v = traverseEdge[type] + if(v === TraverseEdge.Always) { + return v + } else if(v > highest) { + highest = v + } + } + return highest +} + diff --git a/src/dataflow/graph/graph.ts b/src/dataflow/graph/graph.ts index 37e7329997..fc72269f85 100644 --- a/src/dataflow/graph/graph.ts +++ b/src/dataflow/graph/graph.ts @@ -1,56 +1,77 @@ import { guard } from '../../util/assert' import type { NodeId, NoInfo, RNodeWithParent } from '../../r-bridge' -import type { - IdentifierDefinition, - IdentifierReference -} from '../environments' -import { - cloneEnvironments, - initializeCleanEnvironments -} from '../environments' +import { EmptyArgument } from '../../r-bridge' +import type { IdentifierDefinition, IdentifierReference, REnvironmentInformation } from '../environments' +import { cloneEnvironmentInformation, initializeCleanEnvironments } from '../environments' import type { BiMap } from '../../util/bimap' -import { log } from '../../util/log' -import type { DataflowGraphEdge, DataflowGraphEdgeAttribute } from './edge' +import type { DataflowGraphEdge } from './edge' import { EdgeType } from './edge' -import type { DataflowInformation } from '../internal/info' -import { - diffOfDataflowGraphs, - equalExitPoints, equalFunctionArguments -} from './diff' +import type { DataflowInformation } from '../info' +import type { DataflowDifferenceReport } from './diff' +import { diffOfDataflowGraphs, equalFunctionArguments } from './diff' import type { DataflowGraphVertexArgument, DataflowGraphVertexFunctionCall, DataflowGraphVertexFunctionDefinition, DataflowGraphVertexInfo, - DataflowGraphVertices + DataflowGraphVertices } from './vertex' +import { + VertexType } from './vertex' -import type { DifferenceReport } from '../../util/diff' +import { arrayEqual } from '../../util/arrays' /** Used to get an entry point for every id, after that it allows reference-chasing of the graph */ export type DataflowMap = BiMap> -export type DataflowFunctionFlowInformation = Omit & { graph: Set } +export type DataflowFunctionFlowInformation = Omit & { graph: Set } + +export interface NamedFunctionArgument extends IdentifierReference { + readonly name: string +} +export interface PositionalFunctionArgument extends Omit { + readonly name?: undefined +} +export type FunctionArgument = NamedFunctionArgument | PositionalFunctionArgument | typeof EmptyArgument -export type NamedFunctionArgument = [string, IdentifierReference | ''] -export type PositionalFunctionArgument = IdentifierReference | '' -export type FunctionArgument = NamedFunctionArgument | PositionalFunctionArgument | 'empty' +export function isPositionalArgument(arg: FunctionArgument): arg is PositionalFunctionArgument { + return arg !== EmptyArgument && arg.name === undefined +} + +export function isNamedArgument(arg: FunctionArgument): arg is NamedFunctionArgument { + return arg !== EmptyArgument && arg.name !== undefined +} + +export function getReferenceOfArgument(arg: FunctionArgument): NodeId | undefined { + if(arg !== EmptyArgument) { + return arg.nodeId + } + return undefined +} -type ReferenceForEdge = Pick | IdentifierDefinition +type ReferenceForEdge = Pick | IdentifierDefinition /** * Maps the edges target to the edge information */ -export type OutgoingEdges = Map +export type OutgoingEdges = Map /** * Similar to {@link OutgoingEdges}, but inverted regarding the edge direction. * In other words, it maps the source to the edge information. */ -export type IngoingEdges = Map +export type IngoingEdges = Map +function extractEdgeIds(from: NodeId | ReferenceForEdge, to: NodeId | ReferenceForEdge): { fromId: NodeId, toId: NodeId } { + const fromId = typeof from === 'object' ? from.nodeId : from + const toId = typeof to === 'object' ? to.nodeId : to + return { fromId, toId } +} + +type EdgeData = Omit & { type: EdgeType } + /** * The dataflow graph holds the dataflow information found within the given AST. * We differentiate the directed edges in {@link EdgeType} and the vertices indicated by {@link DataflowGraphVertexArgument} @@ -62,15 +83,19 @@ export type IngoingEdges = Map * * All methods return the modified graph to allow for chaining. */ -export class DataflowGraph { - private static DEFAULT_ENVIRONMENT = initializeCleanEnvironments() +export class DataflowGraph { + private static DEFAULT_ENVIRONMENT: REnvironmentInformation | undefined = undefined + + constructor() { + DataflowGraph.DEFAULT_ENVIRONMENT = initializeCleanEnvironments() + } /** Contains the vertices of the root level graph (i.e., included those vertices from the complete graph, that are nested within function definitions) */ private rootVertices: Set = new Set() /** All vertices in the complete graph (including those nested in function definition) */ - private vertexInformation: DataflowGraphVertices = new Map() + private vertexInformation: DataflowGraphVertices = new Map() /** All edges in the complete graph (including those nested in function definition) */ - private edgeInformation: Map = new Map>() + private edgeInformation: Map> = new Map>() /** * Get the {@link DataflowGraphVertexInfo} attached to a node as well as all outgoing edges. @@ -78,23 +103,38 @@ export class DataflowGraph { * @param id - The id of the node to get * @param includeDefinedFunctions - If true this will search function definitions as well and not just the toplevel * @returns the node info for the given id (if it exists) + * + * @see #getVertex */ - public get(id: NodeId, includeDefinedFunctions = true): [DataflowGraphVertexInfo, OutgoingEdges] | undefined { + public get(id: NodeId, includeDefinedFunctions = true): [Vertex, OutgoingEdges] | undefined { // if we do not want to include function definitions, only retrieve the value if the id is part of the root vertices - const vertex: DataflowGraphVertexInfo | undefined = includeDefinedFunctions || this.rootVertices.has(id) ? this.vertexInformation.get(id) : undefined + const vertex: Vertex | undefined = this.getVertex(id, includeDefinedFunctions) return vertex === undefined ? undefined : [vertex, this.outgoingEdges(id) ?? new Map()] } + /** + * Get the {@link DataflowGraphVertexInfo} attached to a vertex. + * + * @param id - The id of the node to get + * @param includeDefinedFunctions - If true this will search function definitions as well and not just the toplevel + * @returns the node info for the given id (if it exists) + * + * @see #get + */ + public getVertex(id: NodeId, includeDefinedFunctions = true): Vertex | undefined { + return includeDefinedFunctions || this.rootVertices.has(id) ? this.vertexInformation.get(id) : undefined + } + public outgoingEdges(id: NodeId): OutgoingEdges | undefined { return this.edgeInformation.get(id) } public ingoingEdges(id: NodeId): IngoingEdges | undefined { - const edges = new Map() + const edges = new Map() for(const [source, outgoing] of this.edgeInformation.entries()) { if(outgoing.has(id)) { - edges.set(source, outgoing.get(id) as DataflowGraphEdge) + edges.set(source, outgoing.get(id) as Edge) } } return edges @@ -107,12 +147,12 @@ export class DataflowGraph { * * @see #edges */ - public* vertices(includeDefinedFunctions: boolean): IterableIterator<[NodeId, DataflowGraphVertexInfo]> { + public* vertices(includeDefinedFunctions: boolean): IterableIterator<[NodeId, Vertex]> { if(includeDefinedFunctions) { yield* this.vertexInformation.entries() } else { for(const id of this.rootVertices) { - yield [id, this.vertexInformation.get(id) as DataflowGraphVertexInfo] + yield [id, this.vertexInformation.get(id) as Vertex] } } } @@ -132,7 +172,7 @@ export class DataflowGraph { * @param id - The id to check for * @param includeDefinedFunctions - If true this will check function definitions as well and not just the toplevel */ - public hasNode(id: NodeId, includeDefinedFunctions: boolean): boolean { + public hasVertex(id: NodeId, includeDefinedFunctions: boolean): boolean { return includeDefinedFunctions ? this.vertexInformation.has(id) : this.rootVertices.has(id) } @@ -151,77 +191,54 @@ export class DataflowGraph { * Adds a new vertex to the graph, for ease of use, some arguments are optional and filled automatically. * * @param vertex - The vertex to add - * @param asRoot - If false, this will only add the vertex but do not add it to the {@link rootIds | root vertices} of the graph. + * @param asRoot - If false, this will only add the vertex but do not add it to the {@link rootIds|root vertices} of the graph. * This is probably only of use, when you construct dataflow graphs for tests. * * @see DataflowGraphVertexInfo * @see DataflowGraphVertexArgument */ - public addVertex(vertex: DataflowGraphVertexArgument, asRoot = true): this { + public addVertex(vertex: DataflowGraphVertexArgument & Omit, asRoot = true): this { const oldVertex = this.vertexInformation.get(vertex.id) if(oldVertex !== undefined) { - guard(oldVertex.name === vertex.name, 'vertex names must match for the same id if added') + guard(oldVertex.name === vertex.name, `vertex names must match for the same id ${vertex.id} if added, but: ${JSON.stringify(oldVertex.name)} vs ${JSON.stringify(vertex.name)}`) return this } // keep a clone of the original environment - const environment = vertex.environment === undefined ? DataflowGraph.DEFAULT_ENVIRONMENT : cloneEnvironments(vertex.environment) + const environment = vertex.environment === undefined ? DataflowGraph.DEFAULT_ENVIRONMENT : cloneEnvironmentInformation(vertex.environment) this.vertexInformation.set(vertex.id, { ...vertex, - when: vertex.when ?? 'always', + when: vertex.controlDependencies ?? 'always', environment - }) + } as unknown as Vertex) if(asRoot) { this.rootVertices.add(vertex.id) } return this } - /** Basically only exists for creations in tests, within the dataflow-extraction, this 3-argument variant will determine `attribute` automatically */ - public addEdge(from: NodeId, to: NodeId, type: EdgeType, attribute: DataflowGraphEdgeAttribute): this /** {@inheritDoc} */ - public addEdge(from: ReferenceForEdge, to: ReferenceForEdge, type: EdgeType): this + public addEdge(from: NodeId, to: NodeId, edgeInfo: EdgeData): this /** {@inheritDoc} */ - public addEdge(from: NodeId | ReferenceForEdge, to: NodeId | ReferenceForEdge, type: EdgeType, attribute?: DataflowGraphEdgeAttribute, promote?: boolean): this + public addEdge(from: ReferenceForEdge, to: ReferenceForEdge, edgeInfo: EdgeData): this + /** {@inheritDoc} */ + public addEdge(from: NodeId | ReferenceForEdge, to: NodeId | ReferenceForEdge, edgeInfo: EdgeData): this /** * Will insert a new edge into the graph, * if the direction of the edge is of no importance (`same-read-read` or `same-def-def`), source * and target will be sorted so that `from` has the lower, and `to` the higher id (default ordering). - *

- * If you omit the last argument but set promote, this will make the edge `maybe` if at least one of the {@link IdentifierReference | references} or {@link DataflowGraphVertexInfo | nodes} has a used flag of `maybe`. - * Promote will probably only be used internally and not by tests etc. */ - public addEdge(from: NodeId | ReferenceForEdge, to: NodeId | ReferenceForEdge, type: EdgeType, attribute?: DataflowGraphEdgeAttribute, promote= false): this { - const fromId = typeof from === 'object' ? from.nodeId : from - const toId = typeof to === 'object' ? to.nodeId : to + public addEdge(from: NodeId | ReferenceForEdge, to: NodeId | ReferenceForEdge, edgeInfo: EdgeData): this { + const { fromId, toId } = extractEdgeIds(from, to) + const { type, ...rest } = edgeInfo if(fromId === toId) { - log.trace(`ignoring self-edge from ${fromId} to ${toId} (${type}, ${attribute ?? '?'}, ${promote ? 'y' : 'n'})`) return this } - if(promote) { - attribute ??= (from as ReferenceForEdge).used === 'maybe' ? 'maybe' : (to as ReferenceForEdge).used - - // reduce the load on attribute checks - if(attribute !== 'maybe') { - const fromInfo = this.get(fromId, true) - if(fromInfo?.[0].when === 'maybe') { - log.trace(`automatically promoting edge from ${fromId} to ${toId} as maybe because at least one of the nodes is maybe`) - attribute = 'maybe' - } else { - const toInfo = this.get(toId, true) - if(toInfo?.[0].when === 'maybe') { - log.trace(`automatically promoting edge from ${fromId} to ${toId} as maybe because at least one of the nodes is maybe`) - attribute = 'maybe' - } - } - } - } - - guard(attribute !== undefined, 'attribute must be set') - const edge: DataflowGraphEdge = { types: new Set([type]), attribute } + /* we now that we pass all required arguments */ + const edge = { types: new Set([type]), ...rest } as unknown as Edge const existingFrom = this.edgeInformation.get(fromId) const edgeInFrom = existingFrom?.get(toId) @@ -232,40 +249,37 @@ export class DataflowGraph { } else { existingFrom.set(toId, edge) } + this.installEdge(type, toId, fromId, edge) + } else if(!edgeInFrom.types.has(type)) { + // adding the type + edgeInFrom.types.add(type) + } + return this + } - // sort (on id so that sorting is the same, independent of the attribute) - const bidirectional = type === 'same-read-read' || type === 'same-def-def' || type === 'relates' + private installEdge(type: EdgeType, toId: NodeId, fromId: NodeId, edge: Edge) { + // sort (on id so that sorting is the same, independent of the attribute) + const bidirectional = type === EdgeType.SameReadRead || type === EdgeType.SameDefDef - if(bidirectional) { - const existingTo = this.edgeInformation.get(toId) - if(existingTo === undefined) { - this.edgeInformation.set(toId, new Map([[fromId, edge]])) - } else { - existingTo.set(fromId, edge) - } - } else if(type === 'defines-on-call') { - const otherEdge: DataflowGraphEdge = { ...edge, - types: new Set([EdgeType.DefinedByOnCall]) - } - const existingTo = this.edgeInformation.get(toId) - if(existingTo === undefined) { - this.edgeInformation.set(toId, new Map([[fromId, otherEdge]])) - } else { - existingTo.set(fromId, otherEdge) - } + if(bidirectional) { + const existingTo = this.edgeInformation.get(toId) + if(existingTo === undefined) { + this.edgeInformation.set(toId, new Map([[fromId, edge]])) + } else { + existingTo.set(fromId, edge) } - } else { - if(attribute === 'maybe') { - // as the data is shared, we can just set it for one direction - edgeInFrom.attribute = 'maybe' + } else if(type === EdgeType.DefinesOnCall) { + const otherEdge: Edge = { + ...edge, + types: new Set([EdgeType.DefinedByOnCall]) } - - if(!edgeInFrom.types.has(type)) { - // adding the type - edgeInFrom.types.add(type) + const existingTo = this.edgeInformation.get(toId) + if(existingTo === undefined) { + this.edgeInformation.set(toId, new Map([[fromId, otherEdge]])) + } else { + existingTo.set(fromId, otherEdge) } } - return this } /** @@ -275,7 +289,7 @@ export class DataflowGraph { * @param mergeRootVertices - If false, this will only merge the vertices and edges but exclude the root vertices this is probably only of use * in the context of function definitions */ - public mergeWith(otherGraph: DataflowGraph | undefined, mergeRootVertices = true): this { + public mergeWith(otherGraph: DataflowGraph | undefined, mergeRootVertices = true): this { if(otherGraph === undefined) { return this } @@ -296,7 +310,7 @@ export class DataflowGraph { return this } - private mergeEdges(otherGraph: DataflowGraph) { + private mergeEdges(otherGraph: DataflowGraph) { for(const [id, edges] of otherGraph.edgeInformation.entries()) { for(const [target, edge] of edges) { const existing = this.edgeInformation.get(id) @@ -308,18 +322,15 @@ export class DataflowGraph { existing.set(target, edge) } else { get.types = new Set([...get.types, ...edge.types]) - if(edge.attribute === 'maybe') { - get.attribute = 'maybe' - } } } } } } - public equals(other: DataflowGraph, diff: true, names?: { left: string, right: string }): DifferenceReport - public equals(other: DataflowGraph, diff?: false, names?: { left: string, right: string }): boolean - public equals(other: DataflowGraph, diff = false, names = { left: 'left', right: 'right' }): boolean | DifferenceReport { + public equals(other: DataflowGraph, diff: true, names?: { left: string, right: string }): DataflowDifferenceReport + public equals(other: DataflowGraph, diff?: false, names?: { left: string, right: string }): boolean + public equals(other: DataflowGraph, diff = false, names = { left: 'left', right: 'right' }): boolean | DataflowDifferenceReport { const report = diffOfDataflowGraphs({ name: names.left, graph: this }, { name: names.right, graph: other }) if(diff) { return report @@ -333,36 +344,32 @@ export class DataflowGraph { * @param reference - The reference to the vertex to mark as definition */ public setDefinitionOfVertex(reference: IdentifierReference): void { - const got = this.get(reference.nodeId, true) - guard(got !== undefined, () => `node must be defined for ${JSON.stringify(reference)} to set definition scope to ${reference.scope}`) - const [node] = got - if(node.tag === 'function-definition' || node.tag === 'variable-definition') { - guard(node.scope === reference.scope, () => `node ${JSON.stringify(node)} must not be previously defined at position or have same scope for ${JSON.stringify(reference)}`) - guard(node.when === reference.used || node.when === 'maybe' || reference.used === 'maybe', () => `node ${JSON.stringify(node)} must not be previously defined at position or have same scope for ${JSON.stringify(reference)}`) - node.scope = reference.scope - node.when = reference.used === 'maybe' ? 'maybe' : node.when + const vertex = this.getVertex(reference.nodeId, true) + guard(vertex !== undefined, () => `node must be defined for ${JSON.stringify(reference)} to set reference`) + if(vertex.tag === VertexType.FunctionDefinition || vertex.tag === VertexType.VariableDefinition) { + guard(vertex.controlDependencies !== undefined + || reference.controlDependencies !== undefined + || arrayEqual(vertex.controlDependencies, reference.controlDependencies), + () => `node ${JSON.stringify(vertex)} must not be previously defined at position or have same scope for ${JSON.stringify(reference)}`) + vertex.controlDependencies = reference.controlDependencies } else { - this.vertexInformation.set(reference.nodeId, { - ...node, - tag: 'variable-definition', - scope: reference.scope, - }) + this.vertexInformation.set(reference.nodeId, { ...vertex, tag: 'variable-definition' }) } } } -function mergeNodeInfos(current: DataflowGraphVertexInfo, next: DataflowGraphVertexInfo): DataflowGraphVertexInfo { +function mergeNodeInfos(current: Vertex, next: Vertex): Vertex { guard(current.tag === next.tag, () => `nodes to be joined for the same id must have the same tag, but ${JSON.stringify(current)} vs ${JSON.stringify(next)}`) guard(current.name === next.name, () => `nodes to be joined for the same id must have the same name, but ${JSON.stringify(current)} vs ${JSON.stringify(next)}`) guard(current.environment === next.environment, 'nodes to be joined for the same id must have the same environment') if(current.tag === 'variable-definition') { guard(current.scope === next.scope, 'nodes to be joined for the same id must have the same scope') - } else if(current.tag === 'function-call') { - guard(equalFunctionArguments(current.args, (next as DataflowGraphVertexFunctionCall).args), 'nodes to be joined for the same id must have the same function call information') + } else if(current.tag === VertexType.FunctionCall) { + guard(equalFunctionArguments(current.id, current.args, (next as DataflowGraphVertexFunctionCall).args), 'nodes to be joined for the same id must have the same function call information') } else if(current.tag === 'function-definition') { guard(current.scope === next.scope, 'nodes to be joined for the same id must have the same scope') - guard(equalExitPoints(current.exitPoints, (next as DataflowGraphVertexFunctionDefinition).exitPoints), 'nodes to be joined must have same exist points') + guard(arrayEqual(current.exitPoints, (next as DataflowGraphVertexFunctionDefinition).exitPoints), 'nodes to be joined must have same exist points') } // make a copy diff --git a/src/dataflow/graph/quads.ts b/src/dataflow/graph/quads.ts index 5e5fa1c667..b7b869a38e 100644 --- a/src/dataflow/graph/quads.ts +++ b/src/dataflow/graph/quads.ts @@ -20,7 +20,6 @@ export function df2quads(graph: DataflowGraph, config: QuadSerializationConfigur from: fromId, to: toId, type: [...info.types], - when: info.attribute })) ) }, diff --git a/src/dataflow/graph/vertex.ts b/src/dataflow/graph/vertex.ts index aa42fc1d17..faab8748c1 100644 --- a/src/dataflow/graph/vertex.ts +++ b/src/dataflow/graph/vertex.ts @@ -1,10 +1,18 @@ import type { MergeableRecord } from '../../util/objects' import type { NodeId } from '../../r-bridge' -import type { DataflowScopeName, REnvironmentInformation } from '../environments' -import type { DataflowGraphEdgeAttribute } from './edge' +import type { REnvironmentInformation } from '../environments' import type { DataflowFunctionFlowInformation, FunctionArgument } from './graph' -export type DataflowGraphVertices = Map +export type DataflowGraphVertices = Map + + +export const enum VertexType { + Value = 'value', + Use = 'use', + FunctionCall = 'function-call', + VariableDefinition = 'variable-definition', + FunctionDefinition = 'function-definition' +} /** * Arguments required to construct a vertex in the dataflow graph. @@ -17,66 +25,62 @@ interface DataflowGraphVertexBase extends MergeableRecord { /** * Used to identify and separate different types of vertices. */ - readonly tag: string + readonly tag: VertexType /** * The id of the node (the id assigned by the {@link ParentInformation} decoration) */ - id: NodeId + id: NodeId /** * The name of the node, usually the variable name */ - name: string + name: string /** - * The environment in which the node is defined. - * If you do not provide an explicit environment, this will use the same clean one (with {@link initializeCleanEnvironments}). + * The environment in which the vertex is set. */ - environment?: REnvironmentInformation + environment?: REnvironmentInformation | undefined /** - * Is this node part of every local execution trace or only in some. - * If you do not provide an explicit value, this will default to `always`. + * See {@link IdentifierReference} */ - when?: DataflowGraphEdgeAttribute + controlDependencies: NodeId[] | undefined } -/** - * Arguments required to construct a vertex which represents the usage of a variable in the dataflow graph. - */ -export interface DataflowGraphExitPoint extends DataflowGraphVertexBase { - readonly tag: 'exit-point' +export const CONSTANT_NAME = '__@@C@@__' +export interface DataflowGraphValue extends DataflowGraphVertexBase { + readonly tag: VertexType.Value + readonly name: typeof CONSTANT_NAME + /* currently without containing the 'real' value as it is part of the normalized AST as well */ + readonly environment?: undefined } /** * Arguments required to construct a vertex which represents the usage of a variable in the dataflow graph. */ export interface DataflowGraphVertexUse extends DataflowGraphVertexBase { - readonly tag: 'use' + readonly tag: VertexType.Use + readonly environment?: undefined } /** * Arguments required to construct a vertex which represents the usage of a variable in the dataflow graph. */ export interface DataflowGraphVertexFunctionCall extends DataflowGraphVertexBase { - readonly tag: 'function-call' - args: FunctionArgument[] + readonly tag: VertexType.FunctionCall + args: FunctionArgument[] + /** a performance flag to indicate that the respective call is _only_ calling a builtin function without any df graph attached */ + onlyBuiltin: boolean + readonly environment?: REnvironmentInformation } /** * Arguments required to construct a vertex which represents the definition of a variable in the dataflow graph. */ export interface DataflowGraphVertexVariableDefinition extends DataflowGraphVertexBase { - readonly tag: 'variable-definition' - /** - * The scope in which the vertex is defined (can be global or local to the current environment). - */ - scope: DataflowScopeName + readonly tag: VertexType.VariableDefinition + readonly environment?: undefined } export interface DataflowGraphVertexFunctionDefinition extends DataflowGraphVertexBase { - readonly tag: 'function-definition' - /** - * The scope in which the vertex is defined (can be global or local to the current environment). - */ - scope: DataflowScopeName + readonly tag: VertexType.FunctionDefinition /** * The static subflow of the function definition, constructed within {@link processFunctionDefinition}. * If the vertex is (for example) a function, it can have a subgraph which is used as a template for each call. @@ -86,8 +90,9 @@ export interface DataflowGraphVertexFunctionDefinition extends DataflowGraphVert * All exist points of the function definitions. * In other words: last expressions/return calls */ - exitPoints: NodeId[] + exitPoints: readonly NodeId[] + environment?: REnvironmentInformation } -export type DataflowGraphVertexArgument = DataflowGraphVertexUse | DataflowGraphExitPoint | DataflowGraphVertexVariableDefinition | DataflowGraphVertexFunctionDefinition | DataflowGraphVertexFunctionCall +export type DataflowGraphVertexArgument = DataflowGraphVertexUse | DataflowGraphVertexVariableDefinition | DataflowGraphVertexFunctionDefinition | DataflowGraphVertexFunctionCall | DataflowGraphValue export type DataflowGraphVertexInfo = Required diff --git a/src/dataflow/index.ts b/src/dataflow/index.ts index 10953050ef..4e4ca54294 100644 --- a/src/dataflow/index.ts +++ b/src/dataflow/index.ts @@ -6,3 +6,5 @@ export * from './graph' export * from './extractor' export * from './environments/environment' export * from '../util/mermaid/dfg' +export { diffIdentifierReferences, diffEnvironmentInformation, diffEnvironment, cloneEnvironmentInformation } from './environments' +export { Identifier, IdentifierDefinition, IdentifierReference, BuiltInEnvironment, BuiltIn } from './environments' diff --git a/src/dataflow/info.ts b/src/dataflow/info.ts new file mode 100644 index 0000000000..d671f1517b --- /dev/null +++ b/src/dataflow/info.ts @@ -0,0 +1,72 @@ +import { DataflowGraph } from './graph' +import type { REnvironmentInformation, IdentifierReference } from './environments' +import type { DataflowProcessorInformation } from './processor' +import type { NodeId } from '../r-bridge' + +export const enum ExitPointType { + Default = 0, + Return = 1, + Break = 2, + Next = 3 +} + +export interface ExitPoint { + readonly type: ExitPointType, + readonly nodeId: NodeId, + readonly controlDependencies: NodeId[] | undefined +} + +export function addNonDefaultExitPoints(existing: ExitPoint[], add: readonly ExitPoint[]): void { + existing.push(...add.filter(({ type }) => type !== ExitPointType.Default)) +} + +/** + * The control flow information for the current {@link DataflowInformation}. + */ +export interface DataflowCfgInformation { + /** + * The entry node into the subgraph + */ + entryPoint: NodeId, + /** + * All already identified exit points (active 'return'/'break'/'next'-likes) of the respective structure. + */ + exitPoints: readonly ExitPoint[] +} + +/** + * The dataflow information is continuously updated during the dataflow analysis + * and holds its current state for the respective subtree processed. + */ +export interface DataflowInformation extends DataflowCfgInformation { + /** References that have not been identified as read or write and will be so on higher */ + unknownReferences: readonly IdentifierReference[] + /** References which are read */ + in: readonly IdentifierReference[] + /** References which are written to */ + out: readonly IdentifierReference[] + /** Current environments used for name resolution, probably updated on the next expression-list processing */ + environment: REnvironmentInformation + /** The current constructed dataflow graph */ + graph: DataflowGraph +} + +export function initializeCleanDataflowInformation(entryPoint: NodeId, data: Pick, 'environment'>): DataflowInformation { + return { + unknownReferences: [], + in: [], + out: [], + environment: data.environment, + graph: new DataflowGraph(), + entryPoint, + exitPoints: [{ nodeId: entryPoint, type: ExitPointType.Default, controlDependencies: undefined }] + } +} + +export function alwaysExits(data: DataflowInformation): boolean { + return data.exitPoints?.some(e => e.type !== ExitPointType.Default && e.controlDependencies === undefined) ?? false +} + +export function filterOutLoopExitPoints(exitPoints: readonly ExitPoint[]): readonly ExitPoint[] { + return exitPoints.filter(({ type }) => type === ExitPointType.Return || type === ExitPointType.Default) +} diff --git a/src/dataflow/internal/info.ts b/src/dataflow/internal/info.ts deleted file mode 100644 index 2afa60c555..0000000000 --- a/src/dataflow/internal/info.ts +++ /dev/null @@ -1,32 +0,0 @@ -import { DataflowGraph } from '../graph' -import type { REnvironmentInformation, IdentifierReference, DataflowScopeName } from '../environments' -import type { DataflowProcessorInformation } from '../processor' - -/** - * Continuously updated during the dataflow analysis to hold the current state. - */ -export interface DataflowInformation { - /** Nodes that have not been identified as read or write and will be so on higher */ - unknownReferences: IdentifierReference[] - /** Nodes which are read */ - in: IdentifierReference[] - /** Nodes which are written to */ - out: IdentifierReference[] - /** The current scope during the fold */ - scope: DataflowScopeName - /** Current environments used for name resolution, probably updated on the next expression-list processing */ - environments: REnvironmentInformation - /** The current constructed dataflow graph */ - graph: DataflowGraph -} - -export function initializeCleanInfo(data: DataflowProcessorInformation): DataflowInformation { - return { - unknownReferences: [], - in: [], - out: [], - scope: data.activeScope, - environments: data.environments, - graph: new DataflowGraph() - } -} diff --git a/src/dataflow/internal/linker.ts b/src/dataflow/internal/linker.ts index 01e7e79926..8ae1dad62e 100644 --- a/src/dataflow/internal/linker.ts +++ b/src/dataflow/internal/linker.ts @@ -2,26 +2,21 @@ import type { DataflowGraph, DataflowGraphVertexFunctionCall, DataflowGraphVertexInfo, - FunctionArgument, - NamedFunctionArgument, - PositionalFunctionArgument -} from '../graph' -import type { - DataflowScopeName, - IdentifierReference, - REnvironmentInformation } from '../environments' + FunctionArgument } from '../graph' import { - BuiltIn, - resolveByName -} from '../environments' + CONSTANT_NAME, + isNamedArgument, + VertexType +} from '../graph' +import type { IdentifierReference, REnvironmentInformation } from '../environments' +import { BuiltIn, resolveByName } from '../environments' import { DefaultMap } from '../../util/defaultmap' import { guard } from '../../util/assert' -import { log } from '../../util/log' +import { expensiveTrace, log } from '../../util/log' import type { DecoratedAstMap, NodeId, ParentInformation, RParameter } from '../../r-bridge' -import { RType } from '../../r-bridge' +import { EmptyArgument, RType } from '../../r-bridge' import { slicerLogger } from '../../slicing' import { dataflowLogger, EdgeType } from '../index' -import { LocalScope } from '../environments/scopes' export function linkIngoingVariablesInSameScope(graph: DataflowGraph, references: IdentifierReference[]): void { const nameIdShares = produceNameSharedIdMap(references) @@ -33,41 +28,27 @@ export type NameIdMap = DefaultMap export function produceNameSharedIdMap(references: IdentifierReference[]): NameIdMap { const nameIdShares = new DefaultMap(() => []) for(const reference of references) { - nameIdShares.get(reference.name).push(reference) + if(reference.name) { + nameIdShares.get(reference.name).push(reference) + } } return nameIdShares } export function linkReadVariablesInSameScopeWithNames(graph: DataflowGraph, nameIdShares: DefaultMap) { - for(const ids of nameIdShares.values()) { - if(ids.length <= 1) { + for(const [name, ids] of nameIdShares.entries()) { + if(ids.length <= 1 || name === CONSTANT_NAME) { continue } const base = ids[0] for(let i = 1; i < ids.length; i++) { - graph.addEdge(base.nodeId, ids[i].nodeId, EdgeType.SameReadRead, 'always', true) + graph.addEdge(base.nodeId, ids[i].nodeId, { type: EdgeType.SameReadRead }) } } } -function specialReturnFunction(info: DataflowGraphVertexFunctionCall, graph: DataflowGraph, id: NodeId) { - if(info.args.length > 1) { - dataflowLogger.error(`expected up to one argument for return, but got ${info.args.length}`) - } - for(const arg of info.args) { - if(Array.isArray(arg)) { - if(arg[1] !== '') { - graph.addEdge(id, arg[1], EdgeType.Returns, 'always') - } - } else if(arg !== '') { - graph.addEdge(id, arg, EdgeType.Returns, 'always') - } - } -} - - export function linkArgumentsOnCall(args: FunctionArgument[], params: RParameter[], graph: DataflowGraph): void { - const nameArgMap = new Map'>(args.filter(Array.isArray) as NamedFunctionArgument[]) + const nameArgMap = new Map(args.filter(isNamedArgument).map(a => [a.name, a] as const)) const nameParamMap = new Map>(params.map(p => [p.name.content, p])) const specialDotParameter = params.find(p => p.special) @@ -78,34 +59,30 @@ export function linkArgumentsOnCall(args: FunctionArgument[], params: RParameter // first map names for(const [name, arg] of nameArgMap) { - if(arg === '') { - dataflowLogger.trace(`skipping value argument for ${name}`) - continue - } const param = nameParamMap.get(name) if(param !== undefined) { dataflowLogger.trace(`mapping named argument "${name}" to parameter "${param.name.content}"`) - graph.addEdge(arg.nodeId, param.name.info.id, EdgeType.DefinesOnCall, 'always') + graph.addEdge(arg.nodeId, param.name.info.id, { type: EdgeType.DefinesOnCall }) matchedParameters.add(name) } else if(specialDotParameter !== undefined) { dataflowLogger.trace(`mapping named argument "${name}" to dot-dot-dot parameter`) - graph.addEdge(arg.nodeId, specialDotParameter.name.info.id, EdgeType.DefinesOnCall, 'always') + graph.addEdge(arg.nodeId, specialDotParameter.name.info.id, { type: EdgeType.DefinesOnCall }) } } const remainingParameter = params.filter(p => !matchedParameters.has(p.name.content)) - const remainingArguments = args.filter(a => !Array.isArray(a)) as (PositionalFunctionArgument | 'empty')[] + const remainingArguments = args.filter(a => !isNamedArgument(a)) for(let i = 0; i < remainingArguments.length; i++) { - const arg: PositionalFunctionArgument | 'empty' = remainingArguments[i] - if(arg === '' || arg === 'empty') { + const arg = remainingArguments[i] + if(arg === EmptyArgument) { dataflowLogger.trace(`skipping value argument for ${i}`) continue } if(remainingParameter.length <= i) { if(specialDotParameter !== undefined) { dataflowLogger.trace(`mapping unnamed argument ${i} (id: ${arg.nodeId}) to dot-dot-dot parameter`) - graph.addEdge(arg.nodeId, specialDotParameter.name.info.id, EdgeType.DefinesOnCall, 'always') + graph.addEdge(arg.nodeId, specialDotParameter.name.info.id, { type: EdgeType.DefinesOnCall }) } else { dataflowLogger.error(`skipping argument ${i} as there is no corresponding parameter - R should block that`) } @@ -113,7 +90,7 @@ export function linkArgumentsOnCall(args: FunctionArgument[], params: RParameter } const param = remainingParameter[i] dataflowLogger.trace(`mapping unnamed argument ${i} (id: ${arg.nodeId}) to parameter "${param.name.content}"`) - graph.addEdge(arg.nodeId, param.name.info.id, EdgeType.DefinesOnCall, 'always') + graph.addEdge(arg.nodeId, param.name.info.id, { type: EdgeType.DefinesOnCall }) } } @@ -130,7 +107,7 @@ function linkFunctionCallArguments(targetId: NodeId, idMap: DecoratedAstMap, fun dataflowLogger.trace(`function call definition base ${functionCallName} does not lead to a function definition (${functionRootId}) but got ${linkedFunction.type}`) return } - dataflowLogger.trace(`linking arguments for ${functionCallName} (${functionRootId}) to ${JSON.stringify(linkedFunction.location)}`) + expensiveTrace(dataflowLogger, () => `linking arguments for ${functionCallName} (${functionRootId}) to ${JSON.stringify(linkedFunction.location)}`) linkArgumentsOnCall(callArgs, linkedFunction.parameters, finalGraph) } @@ -139,35 +116,37 @@ function linkFunctionCall(graph: DataflowGraph, id: NodeId, info: DataflowGraphV functionCall: NodeId; called: DataflowGraphVertexInfo[] }[]) { - const edges = graph.get(id, true) - guard(edges !== undefined, () => `id ${id} must be present in graph`) + const edges = graph.outgoingEdges(id) + if(edges === undefined) { + /* no outgoing edges */ + return + } - const functionDefinitionReadIds = [...edges[1]].filter(([_, e]) => e.types.has(EdgeType.Reads) || e.types.has(EdgeType.Calls) || e.types.has(EdgeType.Relates)).map(([target, _]) => target) + const functionDefinitionReadIds = [...edges].filter(([_, e]) => !e.types.has(EdgeType.Argument) && (e.types.has(EdgeType.Reads) || e.types.has(EdgeType.Calls))).map(([target, _]) => target) const functionDefs = getAllLinkedFunctionDefinitions(new Set(functionDefinitionReadIds), graph) - for(const def of functionDefs.values()) { - guard(def.tag === 'function-definition', () => `expected function definition, but got ${def.tag}`) + guard(def.tag === VertexType.FunctionDefinition, () => `expected function definition, but got ${def.tag}`) if(info.environment !== undefined) { - // for each open ingoing reference, try to resolve it here, and if so add a read edge from the call to signal that it reads it + // for each open ingoing reference, try to resolve it here, and if so, add a read edge from the call to signal that it reads it for(const ingoing of def.subflow.in) { - const defs = resolveByName(ingoing.name, LocalScope, info.environment) + const defs = ingoing.name ? resolveByName(ingoing.name, info.environment) : undefined if(defs === undefined) { continue } for(const def of defs) { - graph.addEdge(id, def, EdgeType.Reads, 'always') + graph.addEdge(id, def, { type: EdgeType.Reads }) } } } const exitPoints = def.exitPoints for(const exitPoint of exitPoints) { - graph.addEdge(id, exitPoint, EdgeType.Returns, 'always') + graph.addEdge(id, exitPoint, { type: EdgeType.Returns }) } dataflowLogger.trace(`recording expression-list-level call from ${info.name} to ${def.name}`) - graph.addEdge(id, def.id, EdgeType.Calls, 'always') + graph.addEdge(id, def.id, { type: EdgeType.Calls }) linkFunctionCallArguments(def.id, idMap, def.name, id, info.args, graph) } if(thisGraph.isRoot(id)) { @@ -179,17 +158,16 @@ function linkFunctionCall(graph: DataflowGraph, id: NodeId, info: DataflowGraphV * Returns the called functions within the current graph, which can be used to merge the environments with the call. * Furthermore, it links the corresponding arguments. */ -export function linkFunctionCalls(graph: DataflowGraph, idMap: DecoratedAstMap, functionCalls: [NodeId, DataflowGraphVertexInfo][], thisGraph: DataflowGraph): { functionCall: NodeId, called: DataflowGraphVertexInfo[] }[] { +export function linkFunctionCalls( + graph: DataflowGraph, + idMap: DecoratedAstMap, + thisGraph: DataflowGraph +): { functionCall: NodeId, called: readonly DataflowGraphVertexInfo[] }[] { + const functionCalls = [...thisGraph.vertices(true)] + .filter(([_,info]) => info.tag === VertexType.FunctionCall) const calledFunctionDefinitions: { functionCall: NodeId, called: DataflowGraphVertexInfo[] }[] = [] for(const [id, info] of functionCalls) { - guard(info.tag === 'function-call', () => `encountered non-function call in function call linkage ${JSON.stringify(info)}`) - - if(info.name === 'return') { - specialReturnFunction(info, graph, id) - graph.addEdge(id, BuiltIn, EdgeType.Calls, 'always') - continue - } - linkFunctionCall(graph, id, info, idMap, thisGraph, calledFunctionDefinitions) + linkFunctionCall(graph, id, info as DataflowGraphVertexFunctionCall, idMap, thisGraph, calledFunctionDefinitions) } return calledFunctionDefinitions } @@ -218,12 +196,11 @@ export function getAllLinkedFunctionDefinitions(functionDefinitionReadIds: Set e.types.has(EdgeType.Returns)) if(returnEdges.length > 0) { - // only traverse return edges and do not follow calls etc. as this indicates that we have a function call which returns a result, and not the function call itself + // only traverse return edges and do not follow calls etc. as this indicates that we have a function call which returns a result, and not the function calls itself potential.push(...returnEdges.map(([target]) => target).filter(id => !visited.has(id))) continue } - const followEdges = outgoingEdges.filter(([_, e]) => e.types.has(EdgeType.Reads) || e.types.has(EdgeType.DefinedBy) || e.types.has(EdgeType.DefinedByOnCall) || e.types.has(EdgeType.Relates)) - + const followEdges = outgoingEdges.filter(([_, e]) => e.types.has(EdgeType.Reads) || e.types.has(EdgeType.DefinedBy) || e.types.has(EdgeType.DefinedByOnCall)) if(currentInfo[0].subflow !== undefined) { result.set(currentId, currentInfo[0]) @@ -239,7 +216,6 @@ export function getAllLinkedFunctionDefinitions(functionDefinitionReadIds: Set() for(const out of outgoing) { - lastOutgoing.set(out.name, out) + if(out.name) { + lastOutgoing.set(out.name, out) + } } for(const [name, targets] of openIns.entries()) { for(const out of lastOutgoing.values()) { if(out.name === name) { for(const target of targets) { - graph.addEdge(target.nodeId, out.nodeId, EdgeType.Reads, 'maybe') + graph.addEdge(target.nodeId, out.nodeId, { type: EdgeType.Reads }) } } } diff --git a/src/dataflow/internal/process/access.ts b/src/dataflow/internal/process/access.ts deleted file mode 100644 index 524ddf0d68..0000000000 --- a/src/dataflow/internal/process/access.ts +++ /dev/null @@ -1,56 +0,0 @@ -import type { ParentInformation, RAccess } from '../../../r-bridge' -import type { DataflowInformation } from '../info' -import type { DataflowProcessorInformation } from '../../processor' -import { processDataflowFor } from '../../processor' -import { makeAllMaybe, overwriteEnvironments } from '../../environments' -import { EdgeType } from '../../graph' - -export function processAccess(node: RAccess, data: DataflowProcessorInformation): DataflowInformation { - const processedAccessed = processDataflowFor(node.accessed, data) - const nextGraph = processedAccessed.graph - const outgoing = processedAccessed.out - const ingoing = processedAccessed.in - const environments = processedAccessed.environments - - const accessedNodes = processedAccessed.unknownReferences - - if(node.operator === '[' || node.operator === '[[') { - for(const access of node.access) { - if(access === null || access.value === undefined) { - continue - } - const processedAccess = processDataflowFor(access, data) - - nextGraph.mergeWith(processedAccess.graph) - // outgoing.push() - // we link to *out* instead of *in*, as access uses arguments for parsing and the arguments are defined - for(const newIn of [...processedAccess.out, ...processedAccess.unknownReferences]) { - for(const accessedNode of accessedNodes) { - nextGraph.addEdge(accessedNode, newIn, EdgeType.Reads, 'always') - } - } - ingoing.push(...processedAccess.in, ...processedAccess.unknownReferences) - overwriteEnvironments(environments, processedAccess.environments) - } - } - - return { - /* - * keep active nodes in case of assignments etc. - * We make them maybe as a kind of hack. - * This way when using - * ```ts - * a[[1]] <- 3 - * a[[2]] <- 4 - * a - * ``` - * the read for a will use both accesses as potential definitions and not just the last one! - */ - unknownReferences: makeAllMaybe(processedAccessed.unknownReferences, nextGraph, environments), - in: ingoing, - out: outgoing, - environments: environments, - scope: data.activeScope, - graph: nextGraph - } -} diff --git a/src/dataflow/internal/process/expression-list.ts b/src/dataflow/internal/process/expression-list.ts deleted file mode 100644 index c350805faa..0000000000 --- a/src/dataflow/internal/process/expression-list.ts +++ /dev/null @@ -1,188 +0,0 @@ -/** - * Processes a list of expressions joining their dataflow graphs accordingly. - * @module - */ -import type { DataflowInformation } from '../info' -import { initializeCleanInfo } from '../info' -import type { NodeId, ParentInformation, RExpressionList } from '../../../r-bridge' -import { RType, visitAst } from '../../../r-bridge' -import type { DataflowProcessorInformation } from '../../processor' -import { processDataflowFor } from '../../processor' -import type { - IdentifierReference, IEnvironment, - REnvironmentInformation } from '../../environments' -import { makeAllMaybe, - overwriteEnvironments, popLocalEnvironment, - resolveByName -} from '../../environments' -import { linkFunctionCalls, linkReadVariablesInSameScopeWithNames } from '../linker' -import { DefaultMap } from '../../../util/defaultmap' -import type { DataflowGraphVertexInfo } from '../../graph' -import { DataflowGraph } from '../../graph' -import { dataflowLogger, EdgeType } from '../../index' -import { guard } from '../../../util/assert' - - -const dotDotDotAccess = /\.\.\d+/ -function linkReadNameToWriteIfPossible(read: IdentifierReference, data: DataflowProcessorInformation, environments: REnvironmentInformation, listEnvironments: Set, remainingRead: Map, nextGraph: DataflowGraph) { - const readName = dotDotDotAccess.test(read.name) ? '...' : read.name - - const probableTarget = resolveByName(readName, data.activeScope, environments) - - // record if at least one has not been defined - if(probableTarget === undefined || probableTarget.some(t => !listEnvironments.has(t.nodeId))) { - if(remainingRead.has(readName)) { - remainingRead.get(readName)?.push(read) - } else { - remainingRead.set(readName, [read]) - } - } - - // keep it, for we have no target, as read-ids are unique within same fold, this should work for same links - // we keep them if they are defined outside the current parent and maybe throw them away later - if(probableTarget === undefined) { - return - } - - for(const target of probableTarget) { - // we can stick with maybe even if readId.attribute is always - nextGraph.addEdge(read, target, EdgeType.Reads, undefined, true) - } -} - - -function processNextExpression( - currentElement: DataflowInformation, - data: DataflowProcessorInformation, - environments: REnvironmentInformation, - listEnvironments: Set, - remainingRead: Map, - nextGraph: DataflowGraph -) { - // all inputs that have not been written until know, are read! - for(const read of [...currentElement.in, ...currentElement.unknownReferences]) { - linkReadNameToWriteIfPossible(read, data, environments, listEnvironments, remainingRead, nextGraph) - } - // add same variable reads for deferred if they are read previously but not dependent - for(const writeTarget of currentElement.out) { - const writeName = writeTarget.name - - const resolved = resolveByName(writeName, data.activeScope, environments) - if(resolved !== undefined) { - // write-write - for(const target of resolved) { - nextGraph.addEdge(target, writeTarget, EdgeType.SameDefDef, undefined, true) - } - } - } -} - -function updateSideEffectsForCalledFunctions(calledEnvs: { - functionCall: NodeId; - called: DataflowGraphVertexInfo[] -}[], environments: REnvironmentInformation, nextGraph: DataflowGraph) { - for(const { functionCall, called } of calledEnvs) { - for(const calledFn of called) { - guard(calledFn.tag === 'function-definition', 'called function must call a function definition') - // only merge the environments they have in common - let environment = calledFn.environment - while(environment.level > environments.level) { - environment = popLocalEnvironment(environment) - } - // update alle definitions to be defined at this function call - let current: IEnvironment | undefined = environment.current - while(current !== undefined) { - for(const definitions of current.memory.values()) { - for(const def of definitions) { - if(def.kind !== 'built-in-function') { - nextGraph.addEdge(def.nodeId, functionCall, EdgeType.SideEffectOnCall, def.used) - } - } - } - current = current.parent - } - // we update all definitions to be linked with teh corresponding function call - environments = overwriteEnvironments(environments, environment) - } - } - return environments -} - -export function processExpressionList(exprList: RExpressionList, data: DataflowProcessorInformation): DataflowInformation { - const expressions = exprList.children - dataflowLogger.trace(`processing expression list with ${expressions.length} expressions`) - if(expressions.length === 0) { - return initializeCleanInfo(data) - } - - let environments = data.environments - // used to detect if a "write" happens within the same expression list - const listEnvironments: Set = new Set() - - const remainingRead = new Map() - - const nextGraph = new DataflowGraph() - const out = [] - - let expressionCounter = 0 - let foundNextOrBreak = false - for(const expression of expressions) { - dataflowLogger.trace(`processing expression ${++expressionCounter} of ${expressions.length}`) - // use the current environments for processing - data = { ...data, environments } - const processed = processDataflowFor(expression, data) - // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- seems to be a bug in eslint - if(!foundNextOrBreak) { - visitAst(expression, n => { - if(n.type === RType.Next || n.type === RType.Break) { - foundNextOrBreak = true - } - return n.type === RType.ForLoop || n.type === RType.WhileLoop || n.type === RType.RepeatLoop || n.type === RType.FunctionDefinition - }) - } - // if the expression contained next or break anywhere before the next loop, the overwrite should be an append because we do not know if the rest is executed - // update the environments for the next iteration with the previous writes - // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- seems to be a bug in eslint - if(foundNextOrBreak) { - processed.out = makeAllMaybe(processed.out, nextGraph, processed.environments) - processed.in = makeAllMaybe(processed.in, nextGraph, processed.environments) - processed.unknownReferences = makeAllMaybe(processed.unknownReferences, nextGraph, processed.environments) - } - - nextGraph.mergeWith(processed.graph) - out.push(...processed.out) - - dataflowLogger.trace(`expression ${expressionCounter} of ${expressions.length} has ${processed.unknownReferences.length} unknown nodes`) - - processNextExpression(processed, data, environments, listEnvironments, remainingRead, nextGraph) - const functionCallIds = [...processed.graph.vertices(true)] - .filter(([_,info]) => info.tag === 'function-call') - - const calledEnvs = linkFunctionCalls(nextGraph, data.completeAst.idMap, functionCallIds, processed.graph) - - environments = overwriteEnvironments(environments, processed.environments) - - // if the called function has global redefinitions, we have to keep them within our environment - environments = updateSideEffectsForCalledFunctions(calledEnvs, environments, nextGraph) - - for(const { nodeId } of processed.out) { - listEnvironments.add(nodeId) - } - } - - - // now, we have to link same reads - linkReadVariablesInSameScopeWithNames(nextGraph, new DefaultMap(() => [], remainingRead)) - - dataflowLogger.trace(`expression list exits with ${remainingRead.size} remaining read names`) - - return { - /* no active nodes remain, they are consumed within the remaining read collection */ - unknownReferences: [], - in: [...remainingRead.values()].flat(), - out, - environments, - scope: data.activeScope, - graph: nextGraph - } -} diff --git a/src/dataflow/internal/process/functions/call/argument/make-argument.ts b/src/dataflow/internal/process/functions/call/argument/make-argument.ts new file mode 100644 index 0000000000..304cfb69b7 --- /dev/null +++ b/src/dataflow/internal/process/functions/call/argument/make-argument.ts @@ -0,0 +1,43 @@ +import type { + DecoratedAstMap, + ParentInformation, + RNode, + RUnnamedArgument +} from '../../../../../../r-bridge' +import { + EmptyArgument, + RType +} from '../../../../../../r-bridge' +import { rangeFrom } from '../../../../../../util/range' + +const voidRange = rangeFrom(-1, -1, -1, -1) + +export function toUnnamedArgument( + node: RNode | undefined, + idMap: DecoratedAstMap +): RUnnamedArgument | typeof EmptyArgument { + if(node === undefined) { + return EmptyArgument + } + const arg: RUnnamedArgument = { + type: RType.Argument, + lexeme: node.lexeme ?? '', + // is this correct? + location: node.location ?? voidRange, + info: { + ...node.info, + id: node.info.id + '-arg' + }, + name: undefined, + value: node + } + idMap.set(arg.info.id, node) + return arg +} + +export function wrapArgumentsUnnamed( + nodes: readonly (RNode | typeof EmptyArgument | undefined)[], + idMap: DecoratedAstMap +) { + return nodes.map(n => n === EmptyArgument || n?.type === RType.Argument ? n : toUnnamedArgument(n, idMap)) +} diff --git a/src/dataflow/internal/process/functions/call/argument/unpack-argument.ts b/src/dataflow/internal/process/functions/call/argument/unpack-argument.ts new file mode 100644 index 0000000000..b6fb53d2af --- /dev/null +++ b/src/dataflow/internal/process/functions/call/argument/unpack-argument.ts @@ -0,0 +1,14 @@ +import type { RFunctionArgument, RNode } from '../../../../../../r-bridge' +import { EmptyArgument } from '../../../../../../r-bridge' +import { log } from '../../../../../../util/log' + +export function unpackArgument(arg: RFunctionArgument): RNode | undefined { + if(arg === EmptyArgument) { + log.trace('Argument is empty, skipping') + return undefined + } else if(arg.name !== undefined) { + log.trace(`Argument ${JSON.stringify(arg)} is not unnamed, skipping`) + return undefined + } + return arg.value +} diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-access.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-access.ts new file mode 100644 index 0000000000..3401c0d52f --- /dev/null +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-access.ts @@ -0,0 +1,91 @@ +import type { NodeId, ParentInformation, RFunctionArgument, RSymbol } from '../../../../../../r-bridge' +import { RType, EmptyArgument } from '../../../../../../r-bridge' +import type { DataflowProcessorInformation } from '../../../../../processor' +import type { DataflowInformation } from '../../../../../info' +import { makeAllMaybe, makeReferenceMaybe } from '../../../../../environments' +import { dataflowLogger, EdgeType } from '../../../../../index' +import { guard } from '../../../../../../util/assert' +import type { ProcessKnownFunctionCallResult } from '../known-call-handling' +import { processKnownFunctionCall } from '../known-call-handling' + +export function processAccess( + name: RSymbol, + args: readonly RFunctionArgument[], + rootId: NodeId, + data: DataflowProcessorInformation, + config: { treatIndicesAsString: boolean } +): DataflowInformation { + if(args.length < 2) { + dataflowLogger.warn(`Access ${name.content} has less than 2 arguments, skipping`) + return processKnownFunctionCall({ name, args, rootId, data }).information + } + const head = args[0] + guard(head !== EmptyArgument, () => `Access ${name.content} has no source, impossible!`) + + let fnCall: ProcessKnownFunctionCallResult + if(!config.treatIndicesAsString) { + fnCall = processKnownFunctionCall({ name, args, rootId, data }) + } else { + const newArgs = [...args] + // if the argument is a symbol, we convert it to a string for this perspective + for(let i = 1; i < newArgs.length; i++) { + const arg = newArgs[i] + if(arg !== EmptyArgument && arg.value?.type === RType.Symbol) { + newArgs[i] = { + ...arg, + value: { + type: RType.String, + info: arg.value.info, + lexeme: arg.value.lexeme, + location: arg.value.location, + content: { + quotes: 'none', + str: arg.value.lexeme + } + } + } + } + } + fnCall = processKnownFunctionCall({ name, args: newArgs, rootId, data }) + } + + const info = fnCall.information + + info.graph.addEdge(name.info.id, fnCall.processedArguments[0]?.entryPoint ?? head.info.id, { type: EdgeType.Returns }) + + /* access always reads all of its indices */ + for(const arg of fnCall.processedArguments) { + if(arg !== undefined) { + info.graph.addEdge(name.info.id, arg.entryPoint, { type: EdgeType.Reads }) + } + if(config.treatIndicesAsString) { + // everything but the first is disabled here + break + } + } + + return { + ...info, + /* + * Keep active nodes in case of assignments etc. + * We make them maybe as a kind of hack. + * This way when using + * ```ts + * a[[1]] <- 3 + * a[[2]] <- 4 + * a + * ``` + * the read for a will use both accesses as potential definitions and not just the last one! + */ + unknownReferences: makeAllMaybe(info.unknownReferences, info.graph, info.environment, false), + entryPoint: rootId, + /** it is, to be precise, the accessed element we want to map to maybe */ + in: info.in.map(ref => { + if(ref.nodeId === head.value?.info.id) { + return makeReferenceMaybe(ref, info.graph, info.environment, false) + } else { + return ref + } + }) + } +} diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts new file mode 100644 index 0000000000..13d15846ff --- /dev/null +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts @@ -0,0 +1,192 @@ +import type { + Base, + Location, + NodeId, + ParentInformation, + RFunctionArgument, + RNode, + RNodeWithParent, + RString, + RSymbol, + RUnnamedArgument +} from '../../../../../../r-bridge' +import { removeRQuotes, RType } from '../../../../../../r-bridge' +import type { DataflowProcessorInformation } from '../../../../../processor' +import type { DataflowInformation } from '../../../../../info' +import type { IdentifierDefinition, IdentifierReference } from '../../../../../index' +import { dataflowLogger, EdgeType, VertexType } from '../../../../../index' +import { processKnownFunctionCall } from '../known-call-handling' +import { guard } from '../../../../../../util/assert' +import { log, LogLevel } from '../../../../../../util/log' +import { define, overwriteEnvironment } from '../../../../../environments' +import { unpackArgument } from '../argument/unpack-argument' +import { processAsNamedCall } from '../../../process-named-call' +import { toUnnamedArgument } from '../argument/make-argument' + +function toReplacementSymbol(target: RNodeWithParent & Base & Location, prefix: string, superAssignment: boolean): RSymbol { + return { + type: RType.Symbol, + info: target.info, + /* they are all mapped to <- in R, but we mark super as well */ + content: `${prefix}${superAssignment ? '<<-' : '<-'}`, + lexeme: target.lexeme, + location: target.location, + namespace: undefined + } +} + +function getEffectiveOrder(config: { + swapSourceAndTarget?: boolean +}, args: [T, T]): [T, T] { + return config.swapSourceAndTarget ? [args[1], args[0]] : args +} + +export interface AssignmentConfiguration { + readonly superAssignment?: boolean + readonly swapSourceAndTarget?: boolean + /* Make maybe if assigned to symbol */ + readonly makeMaybe?: boolean +} + +/** + * Processes an assignment, i.e., ` <- `. + * Handling it as a function call \`<-\` `(, )`. + * This includes handling of replacement functions (e.g., `names(x) <- ...` as \`names<-\` `(x, ...)`). + */ +export function processAssignment( + name: RSymbol, + /* we expect them to be ordered in the sense that we have (source, target): ` <- ` */ + args: readonly RFunctionArgument[], + rootId: NodeId, + data: DataflowProcessorInformation, + config: AssignmentConfiguration +): DataflowInformation { + if(args.length != 2) { + dataflowLogger.warn(`Assignment ${name.content} has something else than 2 arguments, skipping`) + return processKnownFunctionCall({ name, args, rootId, data }).information + } + + const effectiveArgs = getEffectiveOrder(config, args as [RFunctionArgument, RFunctionArgument]) + const { target, source } = extractSourceAndTarget(effectiveArgs, name) + + if(target.type === RType.Symbol) { + const res = processKnownFunctionCall({ name, args, rootId, data, reverseOrder: !config.swapSourceAndTarget }) + return processAssignmentToSymbol(config.superAssignment ?? false, name, source, target, getEffectiveOrder(config, res.processedArguments as [DataflowInformation, DataflowInformation]), rootId, data, res.information, config.makeMaybe) + } else if(target.type === RType.FunctionCall && target.flavor === 'named') { + /* as replacement functions take precedence over the lhs fn-call (i.e., `names(x) <- ...` is independent from the definition of `names`), we do not have to process the call */ + dataflowLogger.debug(`Assignment ${name.content} has a function call as target => replacement function ${target.lexeme}`) + const replacement = toReplacementSymbol(target, target.functionName.content, config.superAssignment ?? false) + return processAsNamedCall(replacement, data, replacement.content, [...target.arguments, source]) + } else if(target.type === RType.Access) { + dataflowLogger.debug(`Assignment ${name.content} has an access as target => replacement function ${target.lexeme}`) + const replacement = toReplacementSymbol(target, target.operator, config.superAssignment ?? false) + return processAsNamedCall(replacement, data, replacement.content, [toUnnamedArgument(target.accessed, data.completeAst.idMap), ...target.access, source]) + } else if(target.type === RType.String) { + return processAssignmentToString(target, args, name, rootId, data, config, source) + } + + dataflowLogger.warn(`Assignment ${name.content} has an unknown target type ${target.type}, skipping`) + return processKnownFunctionCall({ name, args: effectiveArgs, rootId, data }).information +} + +function extractSourceAndTarget(args: readonly RFunctionArgument[], name: RSymbol) { + const source = unpackArgument(args[1]) + const target = unpackArgument(args[0]) + + guard(source !== undefined, () => `Assignment ${name.content} has no source, impossible!`) + guard(target !== undefined, () => `Assignment ${name.content} has no target, impossible!`) + + return { source, target } +} + +function produceWrittenNodes(rootId: NodeId, target: DataflowInformation, isFunctionDef: boolean, data: DataflowProcessorInformation, makeMaybe: boolean): IdentifierDefinition[] { + return target.in.map(ref => ({ + ...ref, + kind: isFunctionDef ? 'function' : 'variable', + definedAt: rootId, + controlDependencies: data.controlDependencies ?? (makeMaybe ? [] : undefined) + })) +} + +function processAssignmentToString(target: RString, args: readonly RFunctionArgument[], name: RSymbol, rootId: NodeId, data: DataflowProcessorInformation, config: { + superAssignment?: boolean; + swapSourceAndTarget?: boolean +}, source: RNode) { + const symbol: RSymbol = { + type: RType.Symbol, + info: target.info, + content: removeRQuotes(target.lexeme), + lexeme: target.lexeme, + location: target.location, + namespace: undefined + } + + // treat first argument to Symbol + const mappedArgs = config.swapSourceAndTarget ? [args[0], { ...(args[1] as RUnnamedArgument), value: symbol }] : [{ ...(args[0] as RUnnamedArgument), value: symbol }, args[1]] + const res = processKnownFunctionCall({ name, args: mappedArgs, rootId, data, reverseOrder: !config.swapSourceAndTarget }) + return processAssignmentToSymbol(config.superAssignment ?? false, name, source, symbol, getEffectiveOrder(config, res.processedArguments as [DataflowInformation, DataflowInformation]), rootId, data, res.information) +} + +function checkFunctionDef(source: RNode, sourceInfo: DataflowInformation) { + return sourceInfo.graph.getVertex(source.info.id)?.tag === VertexType.FunctionDefinition +} + +/** + * Helper function whenever it is known that the _target_ of an assignment is a (single) symbol (i.e. `x <- ...`, but not `names(x) <- ...`). + */ +function processAssignmentToSymbol( + superAssignment: boolean, + name: RSymbol, + source: RNode, + target: RSymbol, + [targetArg, sourceArg]: [DataflowInformation, DataflowInformation], + rootId: NodeId, + data: DataflowProcessorInformation, + information: DataflowInformation, + makeMaybe?: boolean +): DataflowInformation { + const isFunctionDef = checkFunctionDef(source, sourceArg) + + const writeNodes = produceWrittenNodes(rootId, targetArg, isFunctionDef, data, makeMaybe ?? false) + + if(writeNodes.length !== 1 && log.settings.minLevel <= LogLevel.Warn) { + log.warn(`Unexpected write number in assignment: ${JSON.stringify(writeNodes)}`) + } + + // we drop the first arg which we use to pass along arguments :D + const readFromSourceWritten = sourceArg.out.slice(1) + const readTargets: readonly IdentifierReference[] = [{ nodeId: name.info.id, name: name.content, controlDependencies: data.controlDependencies }, ...sourceArg.unknownReferences, ...sourceArg.in, ...targetArg.in.filter(i => i.nodeId !== target.info.id), ...readFromSourceWritten] + const writeTargets = [...writeNodes, ...writeNodes, ...readFromSourceWritten] + + information.environment = overwriteEnvironment(targetArg.environment, sourceArg.environment) + + // install assigned variables in environment + for(const write of writeNodes) { + information.environment = define(write, superAssignment, information.environment) + information.graph.setDefinitionOfVertex(write) + information.graph.addEdge(write, source.info.id, { type: EdgeType.DefinedBy }) + information.graph.addEdge(write, rootId, { type: EdgeType.DefinedBy }) + // kinda dirty, but we have to remove existing read edges for the symbol, added by the child + const out = information.graph.outgoingEdges(write.nodeId) + for(const [id,edge] of (out?? [])) { + if(edge.types.has(EdgeType.Reads)) { + if(edge.types.size === 1) { + out?.delete(id) + } else { + edge.types.delete(EdgeType.Reads) + } + } + } + } + + information.graph.addEdge(name.info.id, targetArg.entryPoint, { type: EdgeType.Returns }) + + return { + ...information, + unknownReferences: [], + entryPoint: name.info.id, + in: readTargets, + out: writeTargets + } +} + diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts new file mode 100644 index 0000000000..c58194fdc1 --- /dev/null +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts @@ -0,0 +1,227 @@ +/** + * Processes a list of expressions joining their dataflow graphs accordingly. + * @module + */ +import type { ExitPoint, DataflowInformation } from '../../../../../info' +import { addNonDefaultExitPoints , alwaysExits , ExitPointType } from '../../../../../info' +import type { NodeId, ParentInformation, RFunctionArgument, RSymbol } from '../../../../../../r-bridge' +import type { DataflowProcessorInformation } from '../../../../../processor' +import { processDataflowFor } from '../../../../../processor' +import type { IdentifierReference, IEnvironment, REnvironmentInformation } from '../../../../../environments' +import { BuiltIn , makeAllMaybe, overwriteEnvironment, popLocalEnvironment, resolveByName } from '../../../../../environments' +import { linkFunctionCalls, linkReadVariablesInSameScopeWithNames } from '../../../../linker' +import { DefaultMap } from '../../../../../../util/defaultmap' +import type { DataflowGraphVertexInfo } from '../../../../../graph' +import { CONSTANT_NAME, DataflowGraph } from '../../../../../graph' +import { dataflowLogger, EdgeType } from '../../../../../index' +import { guard, isNotUndefined } from '../../../../../../util/assert' +import { unpackArgument } from '../argument/unpack-argument' +import { patchFunctionCall } from '../common' + + +const dotDotDotAccess = /\.\.\d+/ +function linkReadNameToWriteIfPossible(read: IdentifierReference, environments: REnvironmentInformation, listEnvironments: Set, remainingRead: Map, nextGraph: DataflowGraph) { + const readName = read.name && dotDotDotAccess.test(read.name) ? '...' : read.name ?? CONSTANT_NAME + + const probableTarget = resolveByName(readName, environments) + + // record if at least one has not been defined + if(probableTarget === undefined || probableTarget.some(t => !listEnvironments.has(t.nodeId))) { + if(remainingRead.has(readName)) { + remainingRead.get(readName)?.push(read) + } else { + remainingRead.set(readName, [read]) + } + } + + // keep it, for we have no target, as read-ids are unique within the same fold, this should work for same links + // we keep them if they are defined outside the current parent and maybe throw them away later + if(probableTarget === undefined) { + return + } + + for(const target of probableTarget) { + // we can stick with maybe even if readId.attribute is always + nextGraph.addEdge(read, target, { type: EdgeType.Reads }) + } +} + + +function processNextExpression( + currentElement: DataflowInformation, + environment: REnvironmentInformation, + listEnvironments: Set, + remainingRead: Map, + nextGraph: DataflowGraph +) { + // all inputs that have not been written until know, are read! + for(const read of [...currentElement.in, ...currentElement.unknownReferences]) { + linkReadNameToWriteIfPossible(read, environment, listEnvironments, remainingRead, nextGraph) + } + // add same variable reads for deferred if they are read previously but not dependent + for(const writeTarget of currentElement.out) { + const writeName = writeTarget.name + + const resolved = writeName ? resolveByName(writeName, environment) : undefined + if(resolved !== undefined) { + // write-write + for(const target of resolved) { + nextGraph.addEdge(target, writeTarget, { type: EdgeType.SameDefDef }) + } + } + } +} + +function updateSideEffectsForCalledFunctions(calledEnvs: { + functionCall: NodeId; + called: readonly DataflowGraphVertexInfo[] +}[], inputEnvironment: REnvironmentInformation, nextGraph: DataflowGraph) { + for(const { functionCall, called } of calledEnvs) { + for(const calledFn of called) { + guard(calledFn.tag === 'function-definition', 'called function must call a function definition') + // only merge the environments they have in common + let environment = calledFn.environment + while(environment.level > inputEnvironment.level) { + environment = popLocalEnvironment(environment) + } + // update alle definitions to be defined at this function call + let current: IEnvironment | undefined = environment.current + while(current !== undefined) { + for(const definitions of current.memory.values()) { + for(const def of definitions) { + if(def.definedAt !== BuiltIn) { + nextGraph.addEdge(def.nodeId, functionCall, { type: EdgeType.SideEffectOnCall }) + } + } + } + current = current.parent + } + // we update all definitions to be linked with the corresponding function call + inputEnvironment = overwriteEnvironment(inputEnvironment, environment) + } + } + return inputEnvironment +} + +export function processExpressionList( + name: RSymbol, + args: readonly RFunctionArgument[], + rootId: NodeId, + data: DataflowProcessorInformation +): DataflowInformation { + const expressions = args.map(unpackArgument) + + dataflowLogger.trace(`processing expression list with ${expressions.length} expressions`) + + let environment = data.environment + // used to detect if a "write" happens within the same expression list + const listEnvironments: Set = new Set() + + const remainingRead = new Map() + + const nextGraph = new DataflowGraph() + const out = [] + const exitPoints: ExitPoint[] = [] + + let expressionCounter = 0 + const processedExpressions: (DataflowInformation | undefined)[] = [] + let defaultReturnExpr: undefined | DataflowInformation = undefined + + for(const expression of expressions) { + dataflowLogger.trace(`processing expression ${++expressionCounter} of ${expressions.length}`) + if(expression === undefined) { + processedExpressions.push(undefined) + continue + } + // use the current environments for processing + data = { ...data, environment: environment } + const processed = processDataflowFor(expression, data) + processedExpressions.push(processed) + nextGraph.mergeWith(processed.graph) + defaultReturnExpr = processed + + // if the expression contained next or break anywhere before the next loop, the overwrite should be an append because we do not know if the rest is executed + // update the environments for the next iteration with the previous writes + if(exitPoints.length > 0) { + processed.out = makeAllMaybe(processed.out, nextGraph, processed.environment, true) + processed.in = makeAllMaybe(processed.in, nextGraph, processed.environment, false) + processed.unknownReferences = makeAllMaybe(processed.unknownReferences, nextGraph, processed.environment, false) + } + + addNonDefaultExitPoints(exitPoints, processed.exitPoints) + + out.push(...processed.out) + + dataflowLogger.trace(`expression ${expressionCounter} of ${expressions.length} has ${processed.unknownReferences.length} unknown nodes`) + + processNextExpression(processed, environment, listEnvironments, remainingRead, nextGraph) + + const calledEnvs = linkFunctionCalls(nextGraph, data.completeAst.idMap, processed.graph) + + environment = exitPoints.length > 0 ? overwriteEnvironment(environment, processed.environment) : processed.environment + // if the called function has global redefinitions, we have to keep them within our environment + environment = updateSideEffectsForCalledFunctions(calledEnvs, environment, nextGraph) + + for(const { nodeId } of processed.out) { + listEnvironments.add(nodeId) + } + + /** if at least built-one of the exit points encountered happens unconditionally, we exit here (dead code)! */ + if(alwaysExits(processed)) { + /* if there is an always-exit expression, there is no default return active anymore */ + defaultReturnExpr = undefined + break + } + } + + if(expressions.length > 0) { + // now, we have to link same reads + linkReadVariablesInSameScopeWithNames(nextGraph, new DefaultMap(() => [], remainingRead)) + } + + dataflowLogger.trace(`expression list exits with ${remainingRead.size} remaining read names`) + + if(defaultReturnExpr) { + exitPoints.push({ + type: ExitPointType.Default, + nodeId: defaultReturnExpr.entryPoint, + controlDependencies: data.controlDependencies + }) + } + + const ingoing = [...remainingRead.values()].flat() + + const rootNode = data.completeAst.idMap.get(rootId) + const withGroup = rootNode?.grouping + + if(withGroup) { + ingoing.push({ nodeId: rootId, name: name.content, controlDependencies: data.controlDependencies }) + patchFunctionCall({ + nextGraph, + rootId, + name, + data, + argumentProcessResult: processedExpressions + }) + // process all exit points as potential returns: + for(const exit of exitPoints) { + if(exit.type === ExitPointType.Return || exit.type === ExitPointType.Default) { + nextGraph.addEdge(rootId, exit.nodeId, { type: EdgeType.Returns }) + } + } + } + + const meId = withGroup ? rootId : (processedExpressions.find(isNotUndefined)?.entryPoint ?? rootId) + return { + /* no active nodes remain, they are consumed within the remaining read collection */ + unknownReferences: [], + in: ingoing, + out, + environment: environment, + graph: nextGraph, + /* if we have no group we take the last evaluated expr */ + entryPoint: meId, + exitPoints: withGroup ? [{ nodeId: rootId, type: ExitPointType.Default, controlDependencies: data.controlDependencies }] + : exitPoints + } +} diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-for-loop.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-for-loop.ts new file mode 100644 index 0000000000..f5216349bb --- /dev/null +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-for-loop.ts @@ -0,0 +1,120 @@ +import type { NodeId, ParentInformation, RFunctionArgument, RSymbol } from '../../../../../../r-bridge' +import type { DataflowProcessorInformation } from '../../../../../processor' +import { processDataflowFor } from '../../../../../processor' +import type { DataflowInformation } from '../../../../../info' +import { filterOutLoopExitPoints, alwaysExits } from '../../../../../info' +import { + appendEnvironment, + define, + initializeCleanEnvironments, + makeAllMaybe, + overwriteEnvironment +} from '../../../../../environments' +import { + linkCircularRedefinitionsWithinALoop, + linkIngoingVariablesInSameScope, + produceNameSharedIdMap +} from '../../../../linker' +import { EdgeType } from '../../../../../graph' +import { dataflowLogger } from '../../../../../index' +import { processKnownFunctionCall } from '../known-call-handling' +import { guard } from '../../../../../../util/assert' +import { patchFunctionCall } from '../common' +import { unpackArgument } from '../argument/unpack-argument' + +export function processForLoop( + name: RSymbol, + args: readonly RFunctionArgument[], + rootId: NodeId, + data: DataflowProcessorInformation +): DataflowInformation { + if(args.length !== 3) { + dataflowLogger.warn(`For-Loop ${name.content} does not have three arguments, skipping`) + return processKnownFunctionCall({ name, args, rootId, data }).information + } + + const [variableArg, vectorArg, bodyArg] = args.map(unpackArgument) + + guard(variableArg !== undefined && vectorArg !== undefined && bodyArg !== undefined, () => `For-Loop ${JSON.stringify(args)} has missing arguments! Bad!`) + const vector = processDataflowFor(vectorArg, data) + if(alwaysExits(vector)) { + dataflowLogger.warn(`For-Loop ${rootId} forces exit in vector, skipping rest`) + return vector + } + + const variable = processDataflowFor(variableArg, data) + // this should not be able to exit always! + + const originalDependency = data.controlDependencies + data = { ...data, controlDependencies: [...data.controlDependencies ?? [], name.info.id] } + + let headEnvironments = overwriteEnvironment(vector.environment, variable.environment) + const headGraph = variable.graph.mergeWith(vector.graph) + + const writtenVariable = [...variable.unknownReferences, ...variable.in] + for(const write of writtenVariable) { + headEnvironments = define({ ...write, definedAt: name.info.id, kind: 'variable' }, false, headEnvironments) + } + data = { ...data, environment: headEnvironments } + /* process the body without any environment first, to retrieve all open references */ + const body = processDataflowFor(bodyArg, { ...data, environment: initializeCleanEnvironments() }) + + const nextGraph = headGraph.mergeWith(body.graph) + const outEnvironment = appendEnvironment(headEnvironments, body.environment) + + // again within an if-then-else we consider all actives to be read + // currently I add it at the end, but is this correct? + const ingoing = [ + ...vector.in, + ...makeAllMaybe(body.in, nextGraph, outEnvironment, false), + ...vector.unknownReferences, + ...makeAllMaybe(body.unknownReferences, nextGraph, outEnvironment, false) + ] + + // now we have to bind all open reads with the given name to the locally defined writtenVariable! + const nameIdShares = produceNameSharedIdMap(ingoing) + + for(const write of writtenVariable) { + nextGraph.addEdge(write.nodeId, vector.entryPoint, { type: EdgeType.DefinedBy }) + + const name = write.name + if(name) { + const readIdsToLink = nameIdShares.get(name) + for(const readId of readIdsToLink) { + nextGraph.addEdge(readId.nodeId, write.nodeId, { type: EdgeType.Reads }) + } + // now, we remove the name from the id shares as they are no longer needed + nameIdShares.delete(name) + nextGraph.setDefinitionOfVertex(write) + } + } + + const outgoing = [...variable.out, ...writtenVariable, ...makeAllMaybe(body.out, nextGraph, outEnvironment, true)] + + linkIngoingVariablesInSameScope(nextGraph, ingoing) + linkCircularRedefinitionsWithinALoop(nextGraph, nameIdShares, body.out) + + patchFunctionCall({ + nextGraph, + rootId, + name, + data: { ...data, controlDependencies: originalDependency }, + argumentProcessResult: [variable, vector, body] + }) + /* mark the last argument as nse */ + nextGraph.addEdge(rootId, body.entryPoint, { type: EdgeType.NonStandardEvaluation }) + // as the for-loop always evaluates its variable and condition + nextGraph.addEdge(name.info.id, variable.entryPoint, { type: EdgeType.Reads }) + nextGraph.addEdge(name.info.id, vector.entryPoint, { type: EdgeType.Reads }) + + return { + unknownReferences: [], + // we only want those not bound by a local variable + in: [{ nodeId: rootId, name: name.content, controlDependencies: originalDependency }, ...variable.in, ...[...nameIdShares.values()].flat()], + out: outgoing, + graph: nextGraph, + entryPoint: name.info.id, + exitPoints: filterOutLoopExitPoints(body.exitPoints), + environment: outEnvironment + } +} diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-function-definition.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-function-definition.ts new file mode 100644 index 0000000000..64a5fdc44d --- /dev/null +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-function-definition.ts @@ -0,0 +1,220 @@ +import type { NodeId, ParentInformation, RFunctionArgument, RSymbol } from '../../../../../../r-bridge' +import { EmptyArgument } from '../../../../../../r-bridge' +import type { DataflowProcessorInformation } from '../../../../../processor' +import { processDataflowFor } from '../../../../../processor' +import type { DataflowInformation, ExitPoint } from '../../../../../info' +import { ExitPointType } from '../../../../../info' +import { linkInputs } from '../../../../linker' +import { + type DataflowFunctionFlowInformation, + DataflowGraph, + dataflowLogger, + EdgeType, + type IdentifierReference, + initializeCleanEnvironments, + type REnvironmentInformation, + VertexType +} from '../../../../../index' +import { processKnownFunctionCall } from '../known-call-handling' +import { unpackArgument } from '../argument/unpack-argument' +import { guard } from '../../../../../../util/assert' +import { + overwriteEnvironment, + popLocalEnvironment, + pushLocalEnvironment, + resolveByName +} from '../../../../../environments' + +export function processFunctionDefinition( + name: RSymbol, + args: readonly RFunctionArgument[], + rootId: NodeId, + data: DataflowProcessorInformation +): DataflowInformation { + if(args.length < 1) { + dataflowLogger.warn(`Function Definition ${name.content} does not have an argument, skipping`) + return processKnownFunctionCall({ name, args, rootId, data }).information + } + + /* we remove the last argument, as it is the body */ + const parameters = args.slice(0, -1) + const bodyArg = unpackArgument(args[args.length - 1]) + guard(bodyArg !== undefined, () => `Function Definition ${JSON.stringify(args)} has missing body! Bad!`) + + const originalEnvironment = data.environment + // within a function def we do not pass on the outer binds as they could be overwritten when called + data = prepareFunctionEnvironment(data) + + const subgraph = new DataflowGraph() + + let readInParameters: IdentifierReference[] = [] + for(const param of parameters) { + guard(param !== EmptyArgument, () => `Empty argument in function definition ${name.content}, ${JSON.stringify(args)}`) + const processed = processDataflowFor(param, data) + subgraph.mergeWith(processed.graph) + const read = [...processed.in, ...processed.unknownReferences] + linkInputs(read, data.environment, readInParameters, subgraph, false) + data = { ...data, environment: overwriteEnvironment(data.environment, processed.environment) } + } + const paramsEnvironments = data.environment + + const body = processDataflowFor(bodyArg, data) + // As we know, parameters cannot technically duplicate (i.e., their names are unique), we overwrite their environments. + // This is the correct behavior, even if someone uses non-`=` arguments in functions. + const bodyEnvironment = body.environment + + readInParameters = findPromiseLinkagesForParameters(subgraph, readInParameters, paramsEnvironments, body) + + const readInBody = [...body.in, ...body.unknownReferences] + // there is no uncertainty regarding the arguments, as if a function header is executed, so is its body + const remainingRead = linkInputs(readInBody, paramsEnvironments, readInParameters.slice(), body.graph, true /* functions do not have to be called */) + + subgraph.mergeWith(body.graph) + + dataflowLogger.trace(`Function definition with id ${name.info.id} has ${remainingRead.length} remaining reads`) + + // link same-def-def with arguments + for(const writeTarget of body.out) { + const writeName = writeTarget.name + + const resolved = writeName ? resolveByName(writeName, paramsEnvironments) : undefined + if(resolved !== undefined) { + // write-write + for(const target of resolved) { + subgraph.addEdge(target, writeTarget, { type: EdgeType.SameDefDef }) + } + } + } + + const outEnvironment = overwriteEnvironment(paramsEnvironments, bodyEnvironment) + + for(const read of remainingRead) { + if(read.name) { + subgraph.addVertex({ + tag: VertexType.Use, + id: read.nodeId, + name: read.name, + environment: undefined, + controlDependencies: [] + }) + } + } + + const flow: DataflowFunctionFlowInformation = { + unknownReferences: [], + in: remainingRead, + out: [], + entryPoint: body.entryPoint, + graph: new Set(subgraph.rootIds()), + environment: outEnvironment + } + + const exitPoints = body.exitPoints + updateNestedFunctionClosures(exitPoints, subgraph, outEnvironment, name) + + const graph = new DataflowGraph().mergeWith(subgraph, false) + graph.addVertex({ + tag: VertexType.FunctionDefinition, + id: name.info.id, + name: String(name.info.id), + environment: popLocalEnvironment(outEnvironment), + controlDependencies: data.controlDependencies, + subflow: flow, + exitPoints: exitPoints?.filter(e => e.type === ExitPointType.Return || e.type === ExitPointType.Default).map(e => e.nodeId) ?? [] + }) + return { + /* nothing escapes a function definition, but the function itself, will be forced in assignment: { nodeId: functionDefinition.info.id, scope: data.activeScope, used: 'always', name: functionDefinition.info.id as string } */ + unknownReferences: [], + in: [], + out: [], + exitPoints: [], + entryPoint: name.info.id, + graph, + environment: originalEnvironment + } +} + + + +function updateNestedFunctionClosures( + exitPoints: readonly ExitPoint[], + subgraph: DataflowGraph, + outEnvironment: REnvironmentInformation, + name: RSymbol +) { + // track *all* function definitions - including those nested within the current graph + // try to resolve their 'in' by only using the lowest scope which will be popped after this definition + for(const [id, info] of subgraph.vertices(true)) { + if(info.tag !== VertexType.FunctionDefinition) { + continue + } + + const ingoingRefs = info.subflow.in + const remainingIn: Set = new Set() + for(const ingoing of ingoingRefs) { + for(const { nodeId } of exitPoints) { + const node = subgraph.getVertex(nodeId, true) + const env = initializeCleanEnvironments() + env.current.memory = node === undefined ? outEnvironment.current.memory : (node.environment?.current.memory ?? outEnvironment.current.memory) + const resolved = ingoing.name ? resolveByName(ingoing.name, env) : undefined + if(resolved === undefined) { + remainingIn.add(ingoing) + continue + } + dataflowLogger.trace(`Found ${resolved.length} references to open ref ${id} in closure of function definition ${name.info.id}`) + for(const ref of resolved) { + subgraph.addEdge(ingoing, ref, { type: EdgeType.Reads }) + } + } + } + dataflowLogger.trace(`Keeping ${remainingIn.size} (unique) references to open ref ${id} in closure of function definition ${name.info.id}`) + info.subflow.in = [...remainingIn] + } +} + +function prepareFunctionEnvironment(data: DataflowProcessorInformation) { + let env = initializeCleanEnvironments() + for(let i = 0; i < data.environment.level + 1 /* add another env */; i++) { + env = pushLocalEnvironment(env) + } + return { ...data, environment: env } +} + +/** + * Within something like `f <- function(a=b, m=3) { b <- 1; a; b <- 5; a + 1 }` + * `a` will be defined by `b` and `b`will be a promise object bound by the first definition of b it can find. + * This means, that this function returns `2` due to the first `b <- 1` definition. + * If the code is `f <- function(a=b, m=3) { if(m > 3) { b <- 1; }; a; b <- 5; a + 1 }`, we need a link to `b <- 1` and `b <- 6` + * as `b` can be defined by either one of them. + *

+ * Currently we may be unable to narrow down every definition within the body as we have not implemented ways to track what covers a first definitions + */ +function findPromiseLinkagesForParameters(parameters: DataflowGraph, readInParameters: readonly IdentifierReference[], parameterEnvs: REnvironmentInformation, body: DataflowInformation): IdentifierReference[] { + // first, we try to bind again within parameters - if we have it, fine + const remainingRead: IdentifierReference[] = [] + for(const read of readInParameters) { + const resolved = read.name ? resolveByName(read.name, parameterEnvs) : undefined + if(resolved !== undefined) { + for(const ref of resolved) { + parameters.addEdge(read, ref, { type: EdgeType.Reads }) + } + continue + } + // If not resolved, link all outs within the body as potential reads. + // Regarding the sort, we can ignore equality as nodeIds are unique. + // We sort to get the lowest id - if it is an 'always' flag, we can safely use it instead of all of them. + const writingOuts = body.out.filter(o => o.name === read.name).sort((a, b) => String(a.nodeId) < String(b.nodeId) ? 1 : -1) + if(writingOuts.length === 0) { + remainingRead.push(read) + continue + } + if(writingOuts[0].controlDependencies === undefined) { + parameters.addEdge(read, writingOuts[0], { type: EdgeType.Reads }) + continue + } + for(const out of writingOuts) { + parameters.addEdge(read, out, { type: EdgeType.Reads }) + } + } + return remainingRead +} diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-if-then-else.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-if-then-else.ts new file mode 100644 index 0000000000..5c9e3327db --- /dev/null +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-if-then-else.ts @@ -0,0 +1,125 @@ +import type { NodeId, ParentInformation, RFunctionArgument, RSymbol } from '../../../../../../r-bridge' +import type { DataflowProcessorInformation } from '../../../../../processor' +import { processDataflowFor } from '../../../../../processor' +import type { DataflowInformation } from '../../../../../info' +import { alwaysExits } from '../../../../../info' +import { + appendEnvironment, + type IdentifierReference, + makeAllMaybe, + resolvesToBuiltInConstant +} from '../../../../../environments' +import { dataflowLogger, EdgeType } from '../../../../../index' +import { processKnownFunctionCall } from '../known-call-handling' +import { linkIngoingVariablesInSameScope } from '../../../../linker' +import { patchFunctionCall } from '../common' +import { unpackArgument } from '../argument/unpack-argument' + +export function processIfThenElse( + name: RSymbol, + args: readonly RFunctionArgument[], + rootId: NodeId, + data: DataflowProcessorInformation +): DataflowInformation { + if(args.length !== 2 && args.length !== 3) { + dataflowLogger.warn(`If-then-else ${name.content} has something different from 2 or 3 arguments, skipping`) + return processKnownFunctionCall({ name, args, rootId, data }).information + } + + const [condArg, thenArg, otherwiseArg] = args.map(unpackArgument) + + if(condArg === undefined || thenArg === undefined) { + dataflowLogger.warn(`If-then-else ${name.content} has empty condition or then case in ${JSON.stringify(args)}, skipping`) + return processKnownFunctionCall({ name, args, rootId, data }).information + } + + const cond = processDataflowFor(condArg, data) + + if(alwaysExits(cond)) { + dataflowLogger.warn(`If-then-else ${rootId} forces exit in condition, skipping rest`) + return cond + } + + const originalDependency = data.controlDependencies + // currently we update the cd afterward :sweat: + data = { ...data, environment: cond.environment } + + let then: DataflowInformation | undefined + let makeThenMaybe = false + + // we should defer this to the abstract interpretation + const conditionIsFalse = resolvesToBuiltInConstant(condArg?.lexeme, data.environment, false) + const conditionIsTrue = resolvesToBuiltInConstant(condArg?.lexeme, data.environment, true) + if(conditionIsFalse !== 'always') { + then = processDataflowFor(thenArg, data) + if(then.entryPoint) { + then.graph.addEdge(name.info.id, then.entryPoint, { type: EdgeType.Returns }) + } + if(conditionIsTrue !== 'always') { + makeThenMaybe = true + } + } + + let otherwise: DataflowInformation | undefined + let makeOtherwiseMaybe = false + if(otherwiseArg !== undefined && conditionIsTrue !== 'always') { + otherwise = processDataflowFor(otherwiseArg, data) + if(otherwise.entryPoint) { + otherwise.graph.addEdge(name.info.id, otherwise.entryPoint, { type: EdgeType.Returns }) + } + if(conditionIsFalse !== 'always') { + makeOtherwiseMaybe = true + } + } + + const nextGraph = cond.graph.mergeWith(then?.graph).mergeWith(otherwise?.graph) + const thenEnvironment = then?.environment ?? cond.environment + + // if there is no "else" case, we have to recover whatever we had before as it may be not executed + const finalEnvironment = appendEnvironment(thenEnvironment, otherwise ? otherwise.environment : cond.environment) + + // again within an if-then-else we consider all actives to be read + const ingoing: IdentifierReference[] = [ + ...cond.in, + ...(makeThenMaybe ? makeAllMaybe(then?.in, nextGraph, finalEnvironment, false, rootId) : then?.in ?? []), + ...(makeOtherwiseMaybe ? makeAllMaybe(otherwise?.in, nextGraph, finalEnvironment, false, rootId) : otherwise?.in ?? []), + ...cond.unknownReferences, + ...(makeThenMaybe ? makeAllMaybe(then?.unknownReferences, nextGraph, finalEnvironment, false, rootId) : then?.unknownReferences ?? []), + ...(makeOtherwiseMaybe ? makeAllMaybe(otherwise?.unknownReferences, nextGraph, finalEnvironment, false, rootId) : otherwise?.unknownReferences ?? []), + ] + + // we assign all with a maybe marker + // we do not merge even if they appear in both branches because the maybe links will refer to different ids + const outgoing = [ + ...cond.out, + ...(makeThenMaybe ? makeAllMaybe(then?.out, nextGraph, finalEnvironment, true, rootId) : then?.out ?? []), + ...(makeOtherwiseMaybe ? makeAllMaybe(otherwise?.out, nextGraph, finalEnvironment, true, rootId) : otherwise?.out ?? []), + ] + linkIngoingVariablesInSameScope(nextGraph, ingoing) + + patchFunctionCall({ + nextGraph, + rootId, + name, + data: { ...data, controlDependencies: originalDependency }, + argumentProcessResult: [cond, then, otherwise] + }) + + // as an if always evaluates its condition, we add a 'reads'-edge + nextGraph.addEdge(name.info.id, cond.entryPoint, { type: EdgeType.Reads }) + + const exitPoints = [ + ...(then?.exitPoints ?? []).map(e => ({ ...e, controlDependencies: makeThenMaybe ? [...data.controlDependencies ?? []] : e.controlDependencies })), + ...(otherwise?.exitPoints ?? []).map(e => ({ ...e, controlDependencies: makeOtherwiseMaybe ? [...data.controlDependencies ?? []] : e.controlDependencies })) + ] + + return { + unknownReferences: [], + in: [{ nodeId: rootId, name: name.content, controlDependencies: originalDependency }, ...ingoing], + out: outgoing, + exitPoints, + entryPoint: rootId, + environment: finalEnvironment, + graph: nextGraph + } +} diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-logical-bin-op.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-logical-bin-op.ts new file mode 100644 index 0000000000..59176f1e32 --- /dev/null +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-logical-bin-op.ts @@ -0,0 +1,48 @@ +import type { + NodeId, + ParentInformation, + RFunctionArgument, + RSymbol +} from '../../../../../../r-bridge' +import type { DataflowProcessorInformation } from '../../../../../processor' +import type { DataflowInformation } from '../../../../../info' +import { dataflowLogger, EdgeType } from '../../../../../index' +import { processKnownFunctionCall } from '../known-call-handling' + + +export function processSpecialBinOp( + name: RSymbol, + args: readonly RFunctionArgument[], + rootId: NodeId, + data: DataflowProcessorInformation, + config: { lazy: boolean } +): DataflowInformation { + if(!config.lazy) { + return processKnownFunctionCall({ name, args, rootId, data }).information + } else if(args.length != 2) { + dataflowLogger.warn(`Logical bin-op ${name.content} has something else than 2 arguments, skipping`) + return processKnownFunctionCall({ name, args, rootId, data }).information + } + + const { information, processedArguments } = processKnownFunctionCall({ name, args, rootId, data, + patchData: (d, i) => { + if(i === 1) { + // the rhs will be overshadowed by the lhs + return { ...d, controlDependencies: [...d.controlDependencies ?? [], name.info.id] } + } + return d + } + }) + + for(const arg of processedArguments) { + if(arg) { + information.graph.addEdge(name.info.id, arg.entryPoint, { type: EdgeType.Reads }) + } + // only do first if lazy + if(config.lazy) { + break + } + } + + return information +} diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-pipe.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-pipe.ts new file mode 100644 index 0000000000..f07303594c --- /dev/null +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-pipe.ts @@ -0,0 +1,52 @@ +import type { + NodeId, + ParentInformation, + RFunctionArgument, + RSymbol +} from '../../../../../../r-bridge' +import { RType } from '../../../../../../r-bridge' +import type { DataflowProcessorInformation } from '../../../../../processor' +import type { DataflowInformation } from '../../../../../info' +import { dataflowLogger, EdgeType, VertexType } from '../../../../../index' +import { processKnownFunctionCall } from '../known-call-handling' +import { guard } from '../../../../../../util/assert' +import { unpackArgument } from '../argument/unpack-argument' + + +export function processPipe( + name: RSymbol, + args: readonly RFunctionArgument[], + rootId: NodeId, + data: DataflowProcessorInformation +): DataflowInformation { + + const { information } = processKnownFunctionCall({ name, args, rootId, data }) + if(args.length !== 2) { + dataflowLogger.warn(`Pipe ${name.content} has something else than 2 arguments, skipping`) + return information + } + + const [lhs, rhs] = args.map(unpackArgument) + + guard(lhs !== undefined && rhs !== undefined, () => `lhs and rhs must be present, but ${JSON.stringify(lhs)} and ${JSON.stringify(rhs)} were found instead.`) + + if(rhs.type !== RType.FunctionCall) { + dataflowLogger.warn(`Expected rhs of pipe to be a function call, but got ${rhs.type} instead.`) + } else { + const functionCallNode = information.graph.getVertex(rhs.info.id, true) + guard(functionCallNode?.tag === VertexType.FunctionCall, () => `Expected function call node with id ${rhs.info.id} to be a function call node, but got ${functionCallNode?.tag} instead.`) + + // make the lhs an argument node: + const argId = lhs.info.id + + dataflowLogger.trace(`Linking pipe arg ${argId} as first argument of ${rhs.info.id}`) + functionCallNode.args.unshift({ + name: undefined, + nodeId: argId, + controlDependencies: data.controlDependencies + }) + information.graph.addEdge(functionCallNode.id, argId, { type: EdgeType.Argument }) + } + + return information +} diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-quote.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-quote.ts new file mode 100644 index 0000000000..61ef9255f5 --- /dev/null +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-quote.ts @@ -0,0 +1,43 @@ +import type { + NodeId, + ParentInformation, RFunctionArgument, + RSymbol +} from '../../../../../../r-bridge' +import { type DataflowProcessorInformation } from '../../../../../processor' +import type { DataflowInformation } from '../../../../../info' +import type { IdentifierReference } from '../../../../../index' +import { EdgeType } from '../../../../../index' +import { processKnownFunctionCall } from '../known-call-handling' + + +export function processQuote( + name: RSymbol, + args: readonly RFunctionArgument[], + rootId: NodeId, + data: DataflowProcessorInformation, + config?: { quoteArgumentsWithIndex?: Set } +): DataflowInformation { + const { information, processedArguments, fnRef } = processKnownFunctionCall({ name, args, rootId, data }) + + const inRefs: IdentifierReference[] = [fnRef] + const outRefs: IdentifierReference[] = [] + const unknownRefs: IdentifierReference[] = [] + + for(let i = 0; i < args.length; i++) { + const processedArg = processedArguments[i] + if(processedArg && !config?.quoteArgumentsWithIndex?.has(i)) { + inRefs.push(...processedArg.in) + outRefs.push(...processedArg.out) + unknownRefs.push(...processedArg.unknownReferences) + } else if(processedArg) { + information.graph.addEdge(rootId, processedArg.entryPoint, { type: EdgeType.NonStandardEvaluation }) + } + } + + return { + ...information, + in: inRefs, + out: outRefs, + unknownReferences: unknownRefs + } +} diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-repeat-loop.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-repeat-loop.ts new file mode 100644 index 0000000000..b77c142568 --- /dev/null +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-repeat-loop.ts @@ -0,0 +1,44 @@ +import type { NodeId, ParentInformation, RFunctionArgument, RSymbol } from '../../../../../../r-bridge' +import { EmptyArgument } from '../../../../../../r-bridge' +import type { DataflowProcessorInformation } from '../../../../../processor' +import type { DataflowInformation } from '../../../../../info' +import { filterOutLoopExitPoints } from '../../../../../info' +import { + linkCircularRedefinitionsWithinALoop, + produceNameSharedIdMap +} from '../../../../linker' +import { dataflowLogger } from '../../../../../index' +import { processKnownFunctionCall } from '../known-call-handling' +import { guard } from '../../../../../../util/assert' +import { unpackArgument } from '../argument/unpack-argument' + +export function processRepeatLoop( + name: RSymbol, + args: readonly RFunctionArgument[], + rootId: NodeId, + data: DataflowProcessorInformation +): DataflowInformation { + if(args.length !== 1 || args[0] === EmptyArgument) { + dataflowLogger.warn(`Repeat-Loop ${name.content} does not have 1 argument, skipping`) + return processKnownFunctionCall({ name, args, rootId, data }).information + } + + const unpacked = unpackArgument(args[0]) + const { information, processedArguments } = processKnownFunctionCall({ + name, + args: unpacked ? [unpacked] : args, + rootId, + data, + markAsNSE: [0] + }) + + const body = processedArguments[0] + guard(body !== undefined, () => `Repeat-Loop ${name.content} has no body, impossible!`) + + const namedIdShares = produceNameSharedIdMap([...body.in, ...body.unknownReferences]) + linkCircularRedefinitionsWithinALoop(information.graph, namedIdShares, body.out) + + information.exitPoints = filterOutLoopExitPoints(information.exitPoints) + + return information +} diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-replacement.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-replacement.ts new file mode 100644 index 0000000000..b25f866841 --- /dev/null +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-replacement.ts @@ -0,0 +1,54 @@ +import type { NodeId, ParentInformation, RFunctionArgument, RSymbol } from '../../../../../../r-bridge' +import type { DataflowProcessorInformation } from '../../../../../processor' +import type { DataflowInformation } from '../../../../../info' +import { initializeCleanDataflowInformation } from '../../../../../info' +import { dataflowLogger, EdgeType, getReferenceOfArgument, VertexType } from '../../../../../index' +import { processKnownFunctionCall } from '../known-call-handling' +import { expensiveTrace } from '../../../../../../util/log' +import { processAssignment } from './built-in-assignment' +import { processAllArguments } from '../common' +import { guard } from '../../../../../../util/assert' + +export function processReplacementFunction( + name: RSymbol, + /** last one has to be the value */ + args: readonly RFunctionArgument[], + rootId: NodeId, + data: DataflowProcessorInformation, + config: { makeMaybe?: boolean, assignmentOperator?: '<-' | '<<-' } +): DataflowInformation { + if(args.length < 2) { + dataflowLogger.warn(`Replacement ${name.content} has less than 2 arguments, skipping`) + return processKnownFunctionCall({ name, args, rootId, data }).information + } + + /* we only get here if <-, <<-, ... or whatever is part of the replacement is not overwritten */ + expensiveTrace(dataflowLogger, () => `Replacement ${name.content} with ${JSON.stringify(args)}, processing`) + + /* we assign the first argument by the last for now and maybe mark as maybe!, we can keep the symbol as we now know we have an assignment */ + const res = processAssignment(name, [args[0], args[args.length - 1]], rootId, data, { superAssignment: config.assignmentOperator === '<<-', makeMaybe: config.makeMaybe }) + + /* now, we soft-inject other arguments, so that calls like `x[y] <- 3` are linked correctly */ + const { callArgs } = processAllArguments({ + functionName: initializeCleanDataflowInformation(rootId, data), + args: args.slice(1, -1), + data, + functionRootId: rootId, + finalGraph: res.graph, + }) + const fn = res.graph.getVertex(rootId) + guard(fn?.tag === VertexType.FunctionCall && fn.args.length === 2, () => `Function ${rootId} not found in graph or not 2-arg fn-call (${JSON.stringify(fn)})`) + fn.args = [fn.args[0], ...callArgs, fn.args[1]] + + + /* a replacement reads all of its call args as well, at least as far as I am aware of */ + for(const arg of callArgs) { + const ref = getReferenceOfArgument(arg) + if(ref !== undefined) { + res.graph.addEdge(rootId, ref, { type: EdgeType.Reads }) + } + } + + return res +} + diff --git a/src/dataflow/internal/process/functions/source.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts similarity index 50% rename from src/dataflow/internal/process/functions/source.ts rename to src/dataflow/internal/process/functions/call/built-in/built-in-source.ts index ae48709b33..245ecf3c0d 100644 --- a/src/dataflow/internal/process/functions/source.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts @@ -1,12 +1,24 @@ -import type { IdGenerator, NoInfo, RArgument, RParseRequest, RParseRequestProvider , NormalizedAst, ParentInformation, RFunctionCall } from '../../../../r-bridge' -import { requestFingerprint , removeRQuotes, RType , sourcedDeterministicCountingIdGenerator , requestProviderFromFile } from '../../../../r-bridge' -import { RShellExecutor } from '../../../../r-bridge/shell-executor' -import { executeSingleSubStep } from '../../../../core' -import { type DataflowProcessorInformation, processDataflowFor } from '../../../processor' -import { type DataflowScopeName, type Identifier, overwriteEnvironments, type REnvironmentInformation, resolveByName } from '../../../environments' -import type { DataflowInformation } from '../../info' -import { dataflowLogger } from '../../../index' -import { getConfig } from '../../../../config' +import type { + IdGenerator, NodeId, + NoInfo, + NormalizedAst, + ParentInformation, RFunctionArgument, + RParseRequest, + RParseRequestProvider, + RSymbol +} from '../../../../../../r-bridge' +import { + EmptyArgument + , requestFingerprint, sourcedDeterministicCountingIdGenerator, requestProviderFromFile, RType, retrieveParseDataFromRCode, + removeRQuotes } from '../../../../../../r-bridge' +import { RShellExecutor } from '../../../../../../r-bridge/shell-executor' +import { type DataflowProcessorInformation, processDataflowFor } from '../../../../../processor' +import type { DataflowInformation } from '../../../../../info' +import { dataflowLogger } from '../../../../../index' +import { overwriteEnvironment } from '../../../../../environments' +import { getConfig } from '../../../../../../config' +import { normalize } from '../../../../../../r-bridge/lang-4.x/ast/parser/json/parser' +import { processKnownFunctionCall } from '../known-call-handling' let sourceProvider = requestProviderFromFile() @@ -14,28 +26,22 @@ export function setSourceProvider(provider: RParseRequestProvider): void { sourceProvider = provider } -export function isSourceCall(name: Identifier, scope: DataflowScopeName, environments: REnvironmentInformation): boolean { - const definitions = resolveByName(name, scope, environments) - if(definitions === undefined) { - return false - } - // fail if there are multiple definitions because then we must treat the complete import as a maybe because it might do something different - if(definitions.length !== 1) { - return false - } - const def = definitions[0] - return def.name == 'source' && def.kind == 'built-in-function' -} +export function processSourceCall( + name: RSymbol, + args: readonly RFunctionArgument[], + rootId: NodeId, + data: DataflowProcessorInformation +): DataflowInformation { + const information = processKnownFunctionCall({ name, args, rootId, data }).information -export function processSourceCall(functionCall: RFunctionCall, data: DataflowProcessorInformation, information: DataflowInformation): DataflowInformation { - const sourceFile = functionCall.arguments[0] as RArgument | undefined + const sourceFile = args[0] if(getConfig().ignoreSourceCalls) { dataflowLogger.info(`Skipping source call ${JSON.stringify(sourceFile)} (disabled in config file)`) return information } - if(sourceFile?.value?.type == RType.String) { + if(sourceFile !== EmptyArgument && sourceFile?.value?.type == RType.String) { const path = removeRQuotes(sourceFile.lexeme) const request = sourceProvider.createRequest(path) @@ -45,7 +51,7 @@ export function processSourceCall(functionCall: RFunctionCall(request: RParseRequest, data: DataflowP let normalized: NormalizedAst let dataflow: DataflowInformation try { - const parsed = executeSingleSubStep('parse', request, executor) as string - normalized = executeSingleSubStep('normalize', parsed, undefined, getId) as NormalizedAst + const parsed = retrieveParseDataFromRCode(request, executor) as string + normalized = normalize(parsed, getId) as NormalizedAst dataflow = processDataflowFor(normalized.ast, { ...data, currentRequest: request, - environments: information.environments, + environment: information.environment, referenceChain: [...data.referenceChain, requestFingerprint(request)] }) } catch(e) { @@ -74,7 +80,7 @@ export function sourceRequest(request: RParseRequest, data: DataflowP // update our graph with the sourced file's information const newInformation = { ...information } - newInformation.environments = overwriteEnvironments(information.environments, dataflow.environments) + newInformation.environment = overwriteEnvironment(information.environment, dataflow.environment) newInformation.graph.mergeWith(dataflow.graph) // this can be improved, see issue #628 for(const [k, v] of normalized.idMap) { diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-while-loop.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-while-loop.ts new file mode 100644 index 0000000000..84cef5cd09 --- /dev/null +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-while-loop.ts @@ -0,0 +1,74 @@ +import type { NodeId, ParentInformation, RFunctionArgument, RNode, RSymbol } from '../../../../../../r-bridge' +import { EmptyArgument } from '../../../../../../r-bridge' +import type { DataflowProcessorInformation } from '../../../../../processor' +import type { DataflowInformation } from '../../../../../info' +import { alwaysExits , filterOutLoopExitPoints } from '../../../../../info' +import { + linkCircularRedefinitionsWithinALoop, linkInputs, + produceNameSharedIdMap +} from '../../../../linker' +import { dataflowLogger, EdgeType, makeAllMaybe } from '../../../../../index' +import { processKnownFunctionCall } from '../known-call-handling' +import { guard, isUndefined } from '../../../../../../util/assert' +import { unpackArgument } from '../argument/unpack-argument' + +export function processWhileLoop( + name: RSymbol, + args: readonly RFunctionArgument[], + rootId: NodeId, + data: DataflowProcessorInformation +): DataflowInformation { + if(args.length !== 2 || args[1] === EmptyArgument) { + dataflowLogger.warn(`While-Loop ${name.content} does not have 2 arguments, skipping`) + return processKnownFunctionCall({ name, args, rootId, data }).information + } + + const unpackedArgs = args.map(unpackArgument) + + if(unpackedArgs.some(isUndefined)) { + dataflowLogger.warn(`While-Loop ${name.content} has empty arguments in ${JSON.stringify(args)}, skipping`) + return processKnownFunctionCall({ name, args, rootId, data }).information + } + + /* we inject the cf-dependency of the while-loop after the condition */ + const { information, processedArguments } = processKnownFunctionCall({ + name, + args: unpackedArgs as RNode[], + rootId, + data, + markAsNSE: [1], + patchData: (d, i) => { + if(i === 1) { + return { ...d, controlDependencies: [...d.controlDependencies ?? [], name.info.id] } + } + return d + } }) + const [condition, body] = processedArguments + + guard(condition !== undefined && body !== undefined, () => `While-Loop ${name.content} has no condition or body, impossible!`) + const originalDependency = data.controlDependencies + + if(alwaysExits(condition)) { + dataflowLogger.warn(`While-Loop ${rootId} forces exit in condition, skipping rest`) + return condition + } + + const remainingInputs = linkInputs([ + ...makeAllMaybe(body.unknownReferences, information.graph, information.environment, false), + ...makeAllMaybe(body.in, information.graph, information.environment, false) + ], information.environment, [...condition.in, ...condition.unknownReferences], information.graph, true) + linkCircularRedefinitionsWithinALoop(information.graph, produceNameSharedIdMap(remainingInputs), body.out) + + // as the while-loop always evaluates its condition + information.graph.addEdge(name.info.id, condition.entryPoint, { type: EdgeType.Reads }) + + return { + unknownReferences: [], + in: [{ nodeId: name.info.id, name: name.lexeme, controlDependencies: originalDependency }, ...remainingInputs], + out: [...makeAllMaybe(body.out, information.graph, information.environment, true), ...condition.out], + entryPoint: name.info.id, + exitPoints: filterOutLoopExitPoints(body.exitPoints), + graph: information.graph, + environment: information.environment + } +} diff --git a/src/dataflow/internal/process/functions/call/common.ts b/src/dataflow/internal/process/functions/call/common.ts new file mode 100644 index 0000000000..ef9a0a52ca --- /dev/null +++ b/src/dataflow/internal/process/functions/call/common.ts @@ -0,0 +1,107 @@ +import type { DataflowInformation } from '../../../../info' +import type { NodeId, ParentInformation, RFunctionArgument, RNode, RSymbol } from '../../../../../r-bridge' +import { EmptyArgument, RType } from '../../../../../r-bridge' +import type { DataflowProcessorInformation } from '../../../../processor' +import { processDataflowFor } from '../../../../processor' +import type { DataflowGraph, FunctionArgument } from '../../../../graph' +import { VertexType , EdgeType } from '../../../../graph' +import type { IdentifierReference, REnvironmentInformation } from '../../../../environments' +import { overwriteEnvironment, resolveByName } from '../../../../environments' + +export interface ProcessAllArgumentInput { + readonly functionName: DataflowInformation + readonly args: readonly (RNode | RFunctionArgument)[] + readonly data: DataflowProcessorInformation + readonly finalGraph: DataflowGraph + readonly functionRootId: NodeId + /* allows passing a data processor in-between each argument; cannot modify env currently */ + readonly patchData?: (data: DataflowProcessorInformation, i: number) => DataflowProcessorInformation + /** which arguments are to be marked as {@link EdgeType#NonStandardEvaluation|non-standard-evaluation}? */ + readonly markAsNSE?: readonly number[] +} + +export interface ProcessAllArgumentResult { + readonly finalEnv: REnvironmentInformation + readonly callArgs: FunctionArgument[] + readonly remainingReadInArgs: IdentifierReference[] + readonly processedArguments: (DataflowInformation | undefined)[] +} + +export function processAllArguments( + { functionName, args, data, finalGraph, functionRootId, patchData = d => d }: ProcessAllArgumentInput +): ProcessAllArgumentResult { + let finalEnv = functionName.environment + // arg env contains the environments with other args defined + let argEnv = functionName.environment + const callArgs: FunctionArgument[] = [] + const processedArguments: (DataflowInformation | undefined)[] = [] + const remainingReadInArgs = [] + let i = -1 + for(const arg of args) { + i++ + data = patchData(data, i) + if(arg === EmptyArgument) { + callArgs.push(EmptyArgument) + processedArguments.push(undefined) + continue + } + + const processed = processDataflowFor(arg, { ...data, environment: argEnv }) + processedArguments.push(processed) + + finalEnv = overwriteEnvironment(finalEnv, processed.environment) + + // resolve reads within argument, we resolve before adding the `processed.environment` to avoid cyclic dependencies + for(const ingoing of [...processed.in, ...processed.unknownReferences]) { + const tryToResolve = ingoing.name ? resolveByName(ingoing.name, argEnv) : undefined + + if(tryToResolve === undefined) { + remainingReadInArgs.push(ingoing) + } else { + for(const resolved of tryToResolve) { + finalGraph.addEdge(ingoing.nodeId, resolved.nodeId, { type: EdgeType.Reads }) + } + } + } + argEnv = overwriteEnvironment(argEnv, processed.environment) + + finalGraph.mergeWith(processed.graph) + + if(arg.type !== RType.Argument || !arg.name) { + callArgs.push({ nodeId: processed.entryPoint, controlDependencies: undefined }) + } else { + callArgs.push({ nodeId: processed.entryPoint, name: arg.name.content, controlDependencies: undefined }) + } + + finalGraph.addEdge(functionRootId, processed.entryPoint, { type: EdgeType.Argument }) + } + return { finalEnv, callArgs, remainingReadInArgs, processedArguments } +} + +export interface PatchFunctionCallInput { + readonly nextGraph: DataflowGraph + readonly rootId: NodeId + readonly name: RSymbol + readonly data: DataflowProcessorInformation + readonly argumentProcessResult: readonly (DataflowInformation | undefined)[] +} + +export function patchFunctionCall( + { nextGraph, rootId, name, data, argumentProcessResult }: PatchFunctionCallInput +): void { + nextGraph.addVertex({ + tag: VertexType.FunctionCall, + id: rootId, + name: name.content, + environment: data.environment, + /* will be overwritten accordingly */ + onlyBuiltin: false, + controlDependencies: data.controlDependencies, + args: argumentProcessResult.map(arg => arg === undefined ? EmptyArgument : { nodeId: arg.entryPoint, controlDependencies: undefined }) + }) + for(const arg of argumentProcessResult) { + if(arg) { + nextGraph.addEdge(rootId, arg.entryPoint, { type: EdgeType.Argument }) + } + } +} diff --git a/src/dataflow/internal/process/functions/call/default-call-handling.ts b/src/dataflow/internal/process/functions/call/default-call-handling.ts new file mode 100644 index 0000000000..af8b2df907 --- /dev/null +++ b/src/dataflow/internal/process/functions/call/default-call-handling.ts @@ -0,0 +1,14 @@ +import type { DataflowInformation } from '../../../../info' +import type { DataflowProcessorInformation } from '../../../../processor' +import type { ParentInformation, RFunctionCall } from '../../../../../r-bridge' +import { processNamedCall } from './named-call-handling' +import { processUnnamedFunctionCall } from './unnamed-call-handling' + + +export function processFunctionCall(functionCall: RFunctionCall, data: DataflowProcessorInformation): DataflowInformation { + if(functionCall.flavor === 'named') { + return processNamedCall(functionCall.functionName, functionCall.arguments, functionCall.info.id, data) + } else { + return processUnnamedFunctionCall(functionCall, data) + } +} diff --git a/src/dataflow/internal/process/functions/call/known-call-handling.ts b/src/dataflow/internal/process/functions/call/known-call-handling.ts new file mode 100644 index 0000000000..6b93159d40 --- /dev/null +++ b/src/dataflow/internal/process/functions/call/known-call-handling.ts @@ -0,0 +1,99 @@ +import type { NodeId, ParentInformation, RFunctionArgument, RNode, RSymbol } from '../../../../../r-bridge' +import type { DataflowProcessorInformation } from '../../../../processor' +import { processDataflowFor } from '../../../../processor' +import type { DataflowInformation } from '../../../../info' +import { ExitPointType } from '../../../../info' +import { DataflowGraph, EdgeType, VertexType } from '../../../../graph' +import type { IdentifierReference } from '../../../../index' +import { dataflowLogger } from '../../../../index' +import { processAllArguments } from './common' + +export interface ProcessKnownFunctionCallInput { + readonly name: RSymbol + readonly args: readonly (RNode | RFunctionArgument)[] + readonly rootId: NodeId + readonly data: DataflowProcessorInformation + /* should arguments be processed from right to left? This does not affect the order recorded in the call but of the environments */ + readonly reverseOrder?: boolean + /** which arguments are to be marked as {@link EdgeType#NonStandardEvaluation|non-standard-evaluation}? */ + readonly markAsNSE?: readonly number[] + /* allows passing a data processor in-between each argument */ + readonly patchData?: (data: DataflowProcessorInformation, arg: number) => DataflowProcessorInformation +} + +export interface ProcessKnownFunctionCallResult { + readonly information: DataflowInformation + readonly processedArguments: readonly (DataflowInformation | undefined)[] + readonly fnRef: IdentifierReference +} + +export function markNonStandardEvaluationEdges( + markAsNSE: readonly number[] | undefined, + callArgs: readonly (DataflowInformation | undefined)[], + finalGraph: DataflowGraph, + rootId: NodeId +) { + if(markAsNSE === undefined) { + return + } + for(const nse of markAsNSE) { + if(nse < callArgs.length) { + const arg = callArgs[nse] + if(arg !== undefined) { + finalGraph.addEdge(rootId, arg.entryPoint, { type: EdgeType.NonStandardEvaluation }) + } + } else { + dataflowLogger.warn(`Trying to mark argument ${nse} as non-standard-evaluation, but only ${callArgs.length} arguments are available`) + } + } +} + +export function processKnownFunctionCall( + { name,args, rootId,data, reverseOrder = false, markAsNSE = undefined, patchData = d => d }: ProcessKnownFunctionCallInput +): ProcessKnownFunctionCallResult { + const functionName = processDataflowFor(name, data) + + const finalGraph = new DataflowGraph() + const functionCallName = name.content + dataflowLogger.debug(`Using ${rootId} (name: ${functionCallName}) as root for the named function call`) + + const processArgs = reverseOrder ? [...args].reverse() : args + + const { + finalEnv, + callArgs, + remainingReadInArgs, + processedArguments + } = processAllArguments({ functionName, args: processArgs, data, finalGraph, functionRootId: rootId, patchData }) + markNonStandardEvaluationEdges(markAsNSE, processedArguments, finalGraph, rootId) + + finalGraph.addVertex({ + tag: VertexType.FunctionCall, + id: rootId, + name: functionCallName, + environment: data.environment, + /* will be overwritten accordingly */ + onlyBuiltin: false, + controlDependencies: data.controlDependencies, + args: reverseOrder ? [...callArgs].reverse() : callArgs + }) + + const inIds = remainingReadInArgs + const fnRef = { nodeId: rootId, name: functionCallName, controlDependencies: data.controlDependencies } + inIds.push(fnRef) + + return { + information: { + unknownReferences: [], + in: inIds, + /* we do not keep the argument out as it has been linked by the function */ + out: functionName.out, + graph: finalGraph, + environment: finalEnv, + entryPoint: rootId, + exitPoints: [{ nodeId: rootId, type: ExitPointType.Default, controlDependencies: data.controlDependencies }] + }, + processedArguments: reverseOrder ? [...processedArguments].reverse() : processedArguments, + fnRef + } +} diff --git a/src/dataflow/internal/process/functions/call/named-call-handling.ts b/src/dataflow/internal/process/functions/call/named-call-handling.ts new file mode 100644 index 0000000000..9db30cb29f --- /dev/null +++ b/src/dataflow/internal/process/functions/call/named-call-handling.ts @@ -0,0 +1,69 @@ +import type { NodeId, ParentInformation, RFunctionArgument, RSymbol } from '../../../../../r-bridge' +import type { DataflowProcessorInformation } from '../../../../processor' +import type { DataflowInformation } from '../../../../info' +import { initializeCleanDataflowInformation } from '../../../../info' +import { processKnownFunctionCall } from './known-call-handling' +import { appendEnvironment, resolveByName } from '../../../../environments' +import { VertexType } from '../../../../graph' + + +function mergeInformation(info: DataflowInformation | undefined, newInfo: DataflowInformation): DataflowInformation { + if(info === undefined) { + return newInfo + } + + return { + unknownReferences: [...info.unknownReferences, ...newInfo.unknownReferences], + in: [...info.in, ...newInfo.in], + out: [...info.out, ...newInfo.out], + graph: info.graph.mergeWith(newInfo.graph), + environment: appendEnvironment(info.environment, newInfo.environment), + entryPoint: newInfo.entryPoint, + exitPoints: [...info.exitPoints, ...newInfo.exitPoints], + } +} + +function processDefaultFunctionProcessor( + information: DataflowInformation | undefined, + name: RSymbol, + args: readonly RFunctionArgument[], + rootId: NodeId, + data: DataflowProcessorInformation +) { + const call = processKnownFunctionCall({ name, args, rootId, data }) + return mergeInformation(information, call.information) +} + +export function processNamedCall( + name: RSymbol, + args: readonly RFunctionArgument[], + rootId: NodeId, + data: DataflowProcessorInformation +): DataflowInformation { + const resolved = resolveByName(name.content, data.environment) ?? [] + let defaultProcessor = resolved.length === 0 + + let information: DataflowInformation | undefined = undefined + let builtIn = false + + for(const resolvedFunction of resolved) { + if(resolvedFunction.kind === 'built-in-function') { + builtIn = true + information = mergeInformation(information, resolvedFunction.processor(name, args, rootId, data)) + } else { + defaultProcessor = true + } + } + + if(defaultProcessor) { + information = processDefaultFunctionProcessor(information, name, args, rootId, data) + } else if(information && builtIn) { + // mark the function call as built in only + const v = information.graph.getVertex(rootId) + if(v?.tag === VertexType.FunctionCall) { + v.onlyBuiltin = true + } + } + + return information ?? initializeCleanDataflowInformation(rootId, data) +} diff --git a/src/dataflow/internal/process/functions/call/unnamed-call-handling.ts b/src/dataflow/internal/process/functions/call/unnamed-call-handling.ts new file mode 100644 index 0000000000..9f21b57c99 --- /dev/null +++ b/src/dataflow/internal/process/functions/call/unnamed-call-handling.ts @@ -0,0 +1,69 @@ +import type { ParentInformation, RUnnamedFunctionCall } from '../../../../../r-bridge' +import { RType } from '../../../../../r-bridge' +import type { DataflowProcessorInformation } from '../../../../processor' +import { processDataflowFor } from '../../../../processor' +import type { DataflowInformation } from '../../../../info' +import { DataflowGraph, EdgeType, VertexType } from '../../../../graph' +import { dataflowLogger } from '../../../../index' +import { processAllArguments } from './common' +import { linkArgumentsOnCall } from '../../../linker' + +export const UnnamedFunctionCallPrefix = 'unnamed-function-call-' + +export function processUnnamedFunctionCall(functionCall: RUnnamedFunctionCall, data: DataflowProcessorInformation): DataflowInformation { + const calledFunction = processDataflowFor(functionCall.calledFunction, data) + + const finalGraph = new DataflowGraph() + const functionRootId = functionCall.info.id + const calledRootId = functionCall.calledFunction.info.id + const functionCallName = `${UnnamedFunctionCallPrefix}${functionRootId}` + dataflowLogger.debug(`Using ${functionRootId} as root for the unnamed function call`) + // we know, that it calls the toplevel: + finalGraph.addEdge(functionRootId, calledRootId, { type: EdgeType.Calls }) + finalGraph.addEdge(functionRootId, calledRootId, { type: EdgeType.Reads }) + // keep the defined function + finalGraph.mergeWith(calledFunction.graph) + + const { + finalEnv, + callArgs, + remainingReadInArgs + } = processAllArguments({ + functionName: calledFunction, + args: functionCall.arguments, + data, + finalGraph, + functionRootId + }) + + finalGraph.addVertex({ + tag: VertexType.FunctionCall, + id: functionRootId, + name: functionCallName, + environment: data.environment, + /* can never be a direct built-in-call */ + onlyBuiltin: false, + controlDependencies: data.controlDependencies, + args: callArgs // same reference + }) + + const inIds = remainingReadInArgs + inIds.push({ nodeId: functionRootId, name: functionCallName, controlDependencies: data.controlDependencies }) + + if(functionCall.calledFunction.type === RType.FunctionDefinition) { + linkArgumentsOnCall(callArgs, functionCall.calledFunction.parameters, finalGraph) + } + // push the called function to the ids: + inIds.push(...calledFunction.in, ...calledFunction.unknownReferences) + + return { + unknownReferences: [], + in: inIds, + // we do not keep the argument out as it has been linked by the function + out: calledFunction.out, + graph: finalGraph, + environment: finalEnv, + entryPoint: calledRootId, + exitPoints: calledFunction.exitPoints + } +} diff --git a/src/dataflow/internal/process/functions/exit-points.ts b/src/dataflow/internal/process/functions/exit-points.ts deleted file mode 100644 index dcf6ac9e80..0000000000 --- a/src/dataflow/internal/process/functions/exit-points.ts +++ /dev/null @@ -1,141 +0,0 @@ -import type { - NodeId, - ParentInformation, - RExpressionList, - RFunctionDefinition, RIfThenElse, RLoopConstructs, - RNode } from '../../../../r-bridge' -import { - RType -} from '../../../../r-bridge' -import { assertUnreachable } from '../../../../util/assert' - -interface ExitPointsInformation { - knownIds: NodeId[] - potentialIds: NodeId[] -} - -export function retrieveExitPointsOfFunctionDefinition(functionDefinition: RFunctionDefinition): NodeId[] { - const exitPoints = visitExitPoints(functionDefinition.body) - return exitPoints.knownIds.concat(exitPoints.potentialIds) -} - -function visitExitPoints(node: RNode): ExitPointsInformation { - const type = node.type - switch(type) { - case RType.ExpressionList: - return visitExpressionList(node) - case RType.FunctionCall: - if(node.flavor === 'named' && node.functionName.content === 'return') { - return { - knownIds: [ node.info.id ], - potentialIds: [] - } - } - break - case RType.FunctionDefinition: - // do not further investigate - break - case RType.ForLoop: - case RType.WhileLoop: - case RType.RepeatLoop: - // loops return invisible null, as we do not trace values, but they may contain return statements - return visitLoops(node) - case RType.IfThenElse: - return visitIf(node) - case RType.Pipe: - case RType.BinaryOp: - // assignments return invisible rhs - return knownIdsOfChildren(node.info.id, node.lhs, node.rhs) - case RType.UnaryOp: - return knownIdsOfChildren(node.info.id, node.operand) - case RType.Parameter: - return node.defaultValue ? knownIdsOfChildren(node.info.id, node.defaultValue) : { knownIds: [], potentialIds: [] } - case RType.Argument: - return node.value ? knownIdsOfChildren(node.info.id, node.value) : { knownIds: [], potentialIds: [] } - case RType.Symbol: - case RType.Logical: - case RType.Number: - case RType.String: - case RType.Access: - // just use this node - break - // contain noting to return/return `invisible(null)` - case RType.Comment: - case RType.LineDirective: - case RType.Break: - case RType.Next: - return { knownIds: [], potentialIds: [] } - default: - assertUnreachable(type) - } - - return { - knownIds: [], - potentialIds: [ node.info.id ] - } -} - - -// we use keepSelfAsPotential in order to track nodes like 2 + 3, which keep themselves as potential exit points if there are no knownIds -function knownIdsOfChildren(keepSelfAsPotential: NodeId, ...children: RNode[]): ExitPointsInformation { - const knownIds = children.flatMap(child => visitExitPoints(child).knownIds) - return { - knownIds, - potentialIds: knownIds.length === 0 ? [ keepSelfAsPotential ] : [] - } -} -function visitLoops(loop: RLoopConstructs): ExitPointsInformation { - const result = visitExitPoints(loop.body) - // conditions may contain return statements which we have to keep - let otherKnownIds: NodeId[] = [] - if(loop.type === RType.ForLoop) { - otherKnownIds = visitExitPoints(loop.variable).knownIds - otherKnownIds.push(...visitExitPoints(loop.vector).knownIds) - } else if(loop.type === RType.WhileLoop) { - otherKnownIds = visitExitPoints(loop.condition).knownIds - } - return { - knownIds: [...result.knownIds, ...otherKnownIds], - potentialIds: [] - } -} - -function visitExpressionList(node: RExpressionList): ExitPointsInformation { - const known: NodeId[] = [] - let lastPotentialIds: NodeId[] = [] - - // we only keep the potential ids of the last expression, which is no comment - for(const child of node.children) { - const { knownIds, potentialIds } = visitExitPoints(child) - known.push(...knownIds) - if(child.type !== RType.Comment) { - lastPotentialIds = potentialIds - } - } - - return { - knownIds: known, - potentialIds: lastPotentialIds - } -} - -function visitIf(node: RIfThenElse): ExitPointsInformation { - // conditions can contain return statements - const known: NodeId[] = visitExitPoints(node.condition).knownIds - const potential: NodeId[] = [] - - const thenCase = visitExitPoints(node.then) - known.push(...thenCase.knownIds) - potential.push(...thenCase.potentialIds) - - if(node.otherwise !== undefined) { - const otherwiseCase = visitExitPoints(node.otherwise) - known.push(...otherwiseCase.knownIds) - potential.push(...otherwiseCase.potentialIds) - } - - return { - knownIds: known, - potentialIds: potential - } -} diff --git a/src/dataflow/internal/process/functions/function-call.ts b/src/dataflow/internal/process/functions/function-call.ts deleted file mode 100644 index f3bbfbee51..0000000000 --- a/src/dataflow/internal/process/functions/function-call.ts +++ /dev/null @@ -1,125 +0,0 @@ -import type { DataflowInformation } from '../../info' -import type { DataflowProcessorInformation } from '../../../processor' -import { processDataflowFor } from '../../../processor' -import { define, overwriteEnvironments, resolveByName } from '../../../environments' -import type { ParentInformation, RFunctionCall } from '../../../../r-bridge' -import { RType } from '../../../../r-bridge' -import { guard } from '../../../../util/assert' -import type { FunctionArgument } from '../../../index' -import { DataflowGraph, dataflowLogger, EdgeType } from '../../../index' -import { linkArgumentsOnCall } from '../../linker' -import { LocalScope } from '../../../environments/scopes' -import { isSourceCall, processSourceCall } from './source' - -export const UnnamedFunctionCallPrefix = 'unnamed-function-call-' - -export function processFunctionCall(functionCall: RFunctionCall, data: DataflowProcessorInformation): DataflowInformation { - const named = functionCall.flavor === 'named' - const functionName = processDataflowFor(named ? functionCall.functionName : functionCall.calledFunction, data) - - let finalEnv = functionName.environments - // arg env contains the environments with other args defined - let argEnv = functionName.environments - const finalGraph = new DataflowGraph() - const callArgs: FunctionArgument[] = [] - const args = [] - const remainingReadInArgs = [] - - const functionRootId = functionCall.info.id - - let functionCallName: string - - if(named) { - functionCallName = functionCall.functionName.content - dataflowLogger.debug(`Using ${functionRootId} (name: ${functionCallName}) as root for the function call`) - } else { - functionCallName = `${UnnamedFunctionCallPrefix}${functionRootId}` - dataflowLogger.debug(`Using ${functionRootId} as root for the unnamed function call`) - // we know, that it calls the toplevel: - finalGraph.addEdge(functionRootId, functionCall.calledFunction.info.id, EdgeType.Calls, 'always') - // keep the defined function - finalGraph.mergeWith(functionName.graph) - } - - for(const arg of functionCall.arguments) { - if(arg === undefined) { - callArgs.push('empty') - args.push(undefined) - continue - } - - const processed = processDataflowFor(arg, { ...data, environments: argEnv }) - args.push(processed) - - finalEnv = overwriteEnvironments(finalEnv, processed.environments) - argEnv = overwriteEnvironments(argEnv, processed.environments) - - finalGraph.mergeWith(processed.graph) - - guard(processed.out.length > 0, () => `Argument ${JSON.stringify(arg)} has no out references, but needs one for the unnamed arg`) - if(arg.name === undefined) { - callArgs.push(processed.out[0]) - } else { - callArgs.push([arg.name.content, processed.out[0]]) - } - - // add an argument edge to the final graph - finalGraph.addEdge(functionRootId, processed.out[0], EdgeType.Argument, 'always') - // resolve reads within argument - for(const ingoing of [...processed.in, ...processed.unknownReferences]) { - const tryToResolve = resolveByName(ingoing.name, LocalScope, argEnv) - - if(tryToResolve === undefined) { - remainingReadInArgs.push(ingoing) - } else { - for(const resolved of tryToResolve) { - finalGraph.addEdge(ingoing.nodeId, resolved.nodeId,EdgeType.Reads, 'always') - } - } - } - if(arg.type as RType === RType.Argument && arg.name !== undefined) { - argEnv = define( - { ...processed.out[0], definedAt: arg.info.id, kind: 'argument' }, - LocalScope, - argEnv - ) - } - } - - finalGraph.addVertex({ - tag: 'function-call', - id: functionRootId, - name: functionCallName, - environment: data.environments, - when: 'always', - scope: data.activeScope, - args: callArgs // same reference - }) - - const inIds = remainingReadInArgs - inIds.push({ nodeId: functionRootId, name: functionCallName, scope: data.activeScope, used: 'always' }) - - if(!named) { - if(functionCall.calledFunction.type === RType.FunctionDefinition) { - linkArgumentsOnCall(callArgs, functionCall.calledFunction.parameters, finalGraph) - } - // push the called function to the ids: - inIds.push(...functionName.in, ...functionName.unknownReferences) - } - - let info: DataflowInformation = { - unknownReferences: [], - in: inIds, - out: functionName.out, // we do not keep argument out as it has been linked by the function - graph: finalGraph, - environments: finalEnv, - scope: data.activeScope - } - - // parse a source call and analyze the referenced code - if(isSourceCall(functionCallName, data.activeScope,finalEnv)) { - info = processSourceCall(functionCall, data, info) - } - - return info -} diff --git a/src/dataflow/internal/process/functions/function-definition.ts b/src/dataflow/internal/process/functions/function-definition.ts deleted file mode 100644 index bed0e463f9..0000000000 --- a/src/dataflow/internal/process/functions/function-definition.ts +++ /dev/null @@ -1,210 +0,0 @@ -import type { DataflowInformation } from '../../info' -import type { DataflowProcessorInformation } from '../../../processor' -import { processDataflowFor } from '../../../processor' -import type { - IdentifierReference, - REnvironmentInformation } from '../../../environments' -import { - initializeCleanEnvironments, - overwriteEnvironments, - popLocalEnvironment, - pushLocalEnvironment, - resolveByName -} from '../../../environments' -import { linkInputs } from '../../linker' -import type { DataflowFunctionFlowInformation, DataflowMap } from '../../../index' -import { DataflowGraph, dataflowLogger, EdgeType } from '../../../index' -import type { NodeId, ParentInformation, RFunctionDefinition } from '../../../../r-bridge' -import { collectAllIds } from '../../../../r-bridge' -import { retrieveExitPointsOfFunctionDefinition } from './exit-points' -import { guard } from '../../../../util/assert' -import { LocalScope } from '../../../environments/scopes' - - -function updateNestedFunctionClosures(exitPoints: NodeId[], subgraph: DataflowGraph, outEnvironment: REnvironmentInformation, data: DataflowProcessorInformation, functionDefinition: RFunctionDefinition) { - // track *all* function definitions - included those nested within the current graph - // try to resolve their 'in' by only using the lowest scope which will be popped after this definition - for(const [id, info] of subgraph.vertices(true)) { - if(info.tag !== 'function-definition') { - continue - } - const ingoingRefs = info.subflow.in - const remainingIn: IdentifierReference[] = [] - for(const ingoing of ingoingRefs) { - for(const exitPoint of exitPoints) { - const node = subgraph.get(exitPoint, true) - const env = initializeCleanEnvironments() - env.current.memory = node === undefined ? outEnvironment.current.memory : node[0].environment.current.memory - const resolved = resolveByName(ingoing.name, data.activeScope, env) - if(resolved === undefined) { - remainingIn.push(ingoing) - continue - } - dataflowLogger.trace(`Found ${resolved.length} references to open ref ${id} in closure of function definition ${functionDefinition.info.id}`) - for(const ref of resolved) { - subgraph.addEdge(ingoing, ref, EdgeType.Reads, exitPoints.length > 1 ? 'maybe' : 'always') - } - } - } - dataflowLogger.trace(`Keeping ${remainingIn.length} references to open ref ${id} in closure of function definition ${functionDefinition.info.id}`) - info.subflow.in = [...new Set(remainingIn)] - } -} - -function prepareFunctionEnvironment(data: DataflowProcessorInformation) { - let env = initializeCleanEnvironments() - for(let i = 0; i < data.environments.level + 1 /* add another env */; i++) { - env = pushLocalEnvironment(env) - } - return { ...data, environments: env } -} - -/** - * Within something like `f <- function(a=b, m=3) { b <- 1; a; b <- 5; a + 1 }` - * `a` will be defined by `b` and `b`will be a promise object bound by the first definition of b it can find. - * This means, that this function returns `2` due to the first `b <- 1` definition. - * If the code would be `f <- function(a=b, m=3) { if(m > 3) { b <- 1; }; a; b <- 5; a + 1 }`, we need a link to `b <- 1` and `b <- 6` - * as `b` can be defined by either one of them. - *

- * Currently we may be unable to narrow down every definition within the body as we have not implemented ways to track what covers a first definitions - */ -function findPromiseLinkagesForParameters(parameters: DataflowGraph, readInParameters: IdentifierReference[], parameterEnvs: REnvironmentInformation, body: DataflowInformation): IdentifierReference[] { - // first we try to bind again within parameters - if we have it, fine - const remainingRead: IdentifierReference[] = [] - for(const read of readInParameters) { - const resolved = resolveByName(read.name, LocalScope, parameterEnvs) - if(resolved !== undefined) { - for(const ref of resolved) { - parameters.addEdge(read, ref, EdgeType.Reads, 'always') - } - continue - } - // if not resolved, link all outs within the body as potential reads - // regarding the sort we can ignore equality as nodeIds are unique - // we sort to get the lowest id - if it is an 'always' flag we can safely use it instead of all of them - const writingOuts = body.out.filter(o => o.name === read.name).sort((a, b) => a.nodeId < b.nodeId ? 1 : -1) - if(writingOuts.length === 0) { - remainingRead.push(read) - continue - } - if(writingOuts[0].used === 'always') { - parameters.addEdge(read, writingOuts[0], EdgeType.Reads, 'always') - continue - } - for(const out of writingOuts) { - parameters.addEdge(read, out, EdgeType.Reads, 'maybe') - } - } - return remainingRead -} - -export function processFunctionDefinition(functionDefinition: RFunctionDefinition, data: DataflowProcessorInformation): DataflowInformation { - dataflowLogger.trace(`Processing function definition with id ${functionDefinition.info.id}`) - - const originalEnvironments = data.environments - // within a function def we do not pass on the outer binds as they could be overwritten when called - data = prepareFunctionEnvironment(data) - - const subgraph = new DataflowGraph() - - let readInParameters: IdentifierReference[] = [] - for(const param of functionDefinition.parameters) { - const processed = processDataflowFor(param, data) - subgraph.mergeWith(processed.graph) - const read = [...processed.in, ...processed.unknownReferences] - linkInputs(read, data.activeScope, data.environments, readInParameters, subgraph, false) - data = { ...data, environments: overwriteEnvironments(data.environments, processed.environments) } - } - const paramsEnvironments = data.environments - - const body = processDataflowFor(functionDefinition.body, data) - // as we know, that parameters can not duplicate, we overwrite their environments (which is the correct behavior, if someone uses non-`=` arguments in functions) - const bodyEnvironment = body.environments - - - readInParameters = findPromiseLinkagesForParameters(subgraph, readInParameters, paramsEnvironments, body) - - const readInBody = [...body.in, ...body.unknownReferences] - // there is no uncertainty regarding the arguments, as if a function header is executed, so is its body - const remainingRead = linkInputs(readInBody, data.activeScope, paramsEnvironments, readInParameters.slice(), body.graph, true /* functions do not have to be called */) - - subgraph.mergeWith(body.graph) - - dataflowLogger.trace(`Function definition with id ${functionDefinition.info.id} has ${remainingRead.length} remaining reads`) - - // link same-def-def with arguments - for(const writeTarget of body.out) { - const writeName = writeTarget.name - - const resolved = resolveByName(writeName, data.activeScope, paramsEnvironments) - if(resolved !== undefined) { - // write-write - for(const target of resolved) { - subgraph.addEdge(target, writeTarget, EdgeType.SameDefDef, undefined, true) - } - } - } - - const outEnvironment = overwriteEnvironments(paramsEnvironments, bodyEnvironment) - for(const read of remainingRead) { - subgraph.addVertex({ tag: 'use', id: read.nodeId, name: read.name, environment: outEnvironment, when: 'maybe' }) - } - - - const flow: DataflowFunctionFlowInformation = { - unknownReferences: [], - in: remainingRead, - out: [], - graph: new Set(subgraph.rootIds()), - environments: outEnvironment, - scope: data.activeScope - } - - const exitPoints = retrieveExitPointsOfFunctionDefinition(functionDefinition) - // if exit points are extra, we must link them to all dataflow nodes they relate to. - linkExitPointsInGraph(exitPoints, subgraph, data.completeAst.idMap, outEnvironment) - updateNestedFunctionClosures(exitPoints, subgraph, outEnvironment, data, functionDefinition) - - const graph = new DataflowGraph().mergeWith(subgraph, false) - graph.addVertex({ - tag: 'function-definition', - id: functionDefinition.info.id, - name: functionDefinition.info.id, - environment: popLocalEnvironment(outEnvironment), - scope: data.activeScope, - when: 'always', - subflow: flow, - exitPoints - }) - return { - unknownReferences: [] /* nothing escapes a function definition, but the function itself, will be forced in assignment: { nodeId: functionDefinition.info.id, scope: data.activeScope, used: 'always', name: functionDefinition.info.id as string } */, - in: [], - out: [], - graph, - environments: originalEnvironments, - scope: data.activeScope - } -} - - -function linkExitPointsInGraph(exitPoints: string[], graph: DataflowGraph, idMap: DataflowMap, environment: REnvironmentInformation): void { - for(const exitPoint of exitPoints) { - const exitPointNode = graph.get(exitPoint, true) - // if there already is an exit point it is either a variable or already linked - if(exitPointNode !== undefined) { - continue - } - const nodeInAst = idMap.get(exitPoint) - - guard(nodeInAst !== undefined, `Could not find exit point node with id ${exitPoint} in ast`) - graph.addVertex({ tag: 'exit-point', id: exitPoint, name: `${nodeInAst.lexeme ?? '??'}`, when: 'always', environment }) - - const allIds = [...collectAllIds(nodeInAst)].filter(id => graph.get(id, true) !== undefined) - for(const relatedId of allIds) { - if(relatedId !== exitPoint) { - graph.addEdge(exitPoint, relatedId, EdgeType.Relates, 'always') - } - } - } -} - diff --git a/src/dataflow/internal/process/functions/argument.ts b/src/dataflow/internal/process/functions/process-argument.ts similarity index 52% rename from src/dataflow/internal/process/functions/argument.ts rename to src/dataflow/internal/process/functions/process-argument.ts index c2b1f16904..923c0eb84f 100644 --- a/src/dataflow/internal/process/functions/argument.ts +++ b/src/dataflow/internal/process/functions/process-argument.ts @@ -1,39 +1,49 @@ -import type { DataflowInformation } from '../../info' -import type { DataflowProcessorInformation } from '../../../processor' -import { processDataflowFor } from '../../../processor' import type { ParentInformation, RArgument, RNode } from '../../../../r-bridge' import { collectAllIds, RType } from '../../../../r-bridge' -import { DataflowGraph, EdgeType } from '../../../graph' import type { IdentifierReference } from '../../../environments' -import { LocalScope } from '../../../environments/scopes' +import { DataflowGraph, EdgeType, VertexType } from '../../../graph' +import type { DataflowProcessorInformation } from '../../../processor' +import { processDataflowFor } from '../../../processor' +import type { DataflowInformation } from '../../../info' +import { ExitPointType } from '../../../info' -export const UnnamedArgumentPrefix = 'unnamed-argument-' -export function linkReadsForArgument(root: RNode, ingoingRefs: IdentifierReference[], graph: DataflowGraph) { +export function linkReadsForArgument(root: RNode, ingoingRefs: readonly IdentifierReference[], graph: DataflowGraph) { const allIdsBeforeArguments = new Set(collectAllIds(root, n => n.type === RType.Argument && n.info.id !== root.info.id)) const ingoingBeforeArgs = ingoingRefs.filter(r => allIdsBeforeArguments.has(r.nodeId)) for(const ref of ingoingBeforeArgs) { // link against the root reference currently I do not know how to deal with nested function calls otherwise - graph.addEdge(root.info.id, ref, EdgeType.Reads, 'always') + graph.addEdge(root.info.id, ref, { type: EdgeType.Reads }) } } -export function processFunctionArgument(argument: RArgument, data: DataflowProcessorInformation): DataflowInformation { +export function processFunctionArgument( + argument: RArgument, + data: DataflowProcessorInformation +): DataflowInformation { const name = argument.name === undefined ? undefined : processDataflowFor(argument.name, data) const value = argument.value === undefined ? undefined : processDataflowFor(argument.value, data) // we do not keep the graph of the name, as this is no node that should ever exist const graph = value?.graph ?? new DataflowGraph() - const argContent = argument.name?.content - const argumentName = argContent ?? `${UnnamedArgumentPrefix}${argument.info.id}` - graph.addVertex({ tag: 'use', id: argument.info.id, name: argumentName, environment: data.environments, when: 'always' }) + const argumentName = argument.name?.content + let entryPoint = value?.entryPoint + if(argumentName) { + graph.addVertex({ + tag: VertexType.Use, + id: argument.info.id, + name: argumentName, + controlDependencies: data.controlDependencies + }) + entryPoint = argument.info.id + } const ingoingRefs = [...value?.unknownReferences ?? [], ...value?.in ?? [], ...(name === undefined ? [] : [...name.in])] - if(argument.value?.type === RType.FunctionDefinition) { - graph.addEdge(argument.info.id, argument.value.info.id, EdgeType.Reads, 'always') - } else { + if(entryPoint && argument.value?.type === RType.FunctionDefinition) { + graph.addEdge(entryPoint, argument.value.info.id, { type: EdgeType.Reads }) + } else if(argumentName) { // we only need to link against those which are not already bound to another function call argument linkReadsForArgument(argument, [...ingoingRefs, ...value?.out ?? [] /* value may perform definitions */], graph) } @@ -41,11 +51,11 @@ export function processFunctionArgument(argument: RArgument r.name !== undefined), + out: [...value?.out ?? [], ...(name?.out ?? [])], graph: graph, - environments: value?.environments ?? data.environments, - scope: data.activeScope + environment: value?.environment ?? data.environment, + entryPoint: entryPoint ?? argument.info.id, + exitPoints: value?.exitPoints ?? name?.exitPoints ?? [{ nodeId: argument.info.id, type: ExitPointType.Default, controlDependencies: data.controlDependencies }] } } diff --git a/src/dataflow/internal/process/functions/parameter.ts b/src/dataflow/internal/process/functions/process-parameter.ts similarity index 75% rename from src/dataflow/internal/process/functions/parameter.ts rename to src/dataflow/internal/process/functions/process-parameter.ts index 9f307a2972..8c7559ad43 100644 --- a/src/dataflow/internal/process/functions/parameter.ts +++ b/src/dataflow/internal/process/functions/process-parameter.ts @@ -1,4 +1,5 @@ -import type { DataflowInformation } from '../../info' +import type { DataflowInformation } from '../../../info' +import { ExitPointType } from '../../../info' import type { DataflowProcessorInformation } from '../../../processor' import { processDataflowFor } from '../../../processor' import type { IdentifierDefinition } from '../../../environments' @@ -7,7 +8,6 @@ import type { ParentInformation, RParameter } from '../../../../r-bridge' import { RType } from '../../../../r-bridge' import { log } from '../../../../util/log' import { EdgeType } from '../../../graph' -import { LocalScope } from '../../../environments/scopes' export function processFunctionParameter(parameter: RParameter, data: DataflowProcessorInformation): DataflowInformation { const name = processDataflowFor(parameter.name, data) @@ -17,24 +17,22 @@ export function processFunctionParameter(parameter: RParameter ({ ...n, kind: 'parameter', - used: 'always', - definedAt: parameter.info.id, - scope: LocalScope + definedAt: parameter.info.id })) - let environments = name.environments + let environment = name.environment for(const writtenNode of writtenNodes) { log.trace(`parameter ${writtenNode.name} (${writtenNode.nodeId}) is defined at id ${writtenNode.definedAt} with ${defaultValue === undefined ? 'no default value' : ' no default value'}`) graph.setDefinitionOfVertex(writtenNode) - environments = define(writtenNode, LocalScope, environments) + environment = define(writtenNode, false, environment) if(defaultValue !== undefined) { if(parameter.defaultValue?.type === RType.FunctionDefinition) { - graph.addEdge(writtenNode, parameter.defaultValue.info.id, EdgeType.DefinedBy, 'maybe' /* default arguments can be overridden! */) + graph.addEdge(writtenNode, parameter.defaultValue.info.id, { type: EdgeType.DefinedBy }) } else { const definedBy = [...defaultValue.in, ...defaultValue.unknownReferences] for(const node of definedBy) { - graph.addEdge(writtenNode, node, EdgeType.DefinedBy, 'maybe' /* default arguments can be overridden! */) + graph.addEdge(writtenNode, node, { type: EdgeType.DefinedBy }) } } } @@ -45,7 +43,8 @@ export function processFunctionParameter(parameter: RParameter(ifThen: RIfThenElse, data: DataflowProcessorInformation): DataflowInformation { - const cond = processDataflowFor(ifThen.condition, data) - - data = { ...data, environments: cond.environments } - - let then: DataflowInformation | undefined - let makeThenMaybe = false - if(ifThen.condition.lexeme !== 'FALSE') { - then = processDataflowFor(ifThen.then, data) - if(ifThen.condition.lexeme !== 'TRUE') { - makeThenMaybe = true - } - } - - let otherwise: DataflowInformation | undefined - let makeOtherwiseMaybe = false - if(ifThen.otherwise !== undefined && ifThen.condition.lexeme !== 'TRUE') { - otherwise = processDataflowFor(ifThen.otherwise, data) - if(ifThen.condition.lexeme !== 'FALSE') { - makeOtherwiseMaybe = true - } - } - - const nextGraph = cond.graph.mergeWith(then?.graph).mergeWith(otherwise?.graph) - - const thenEnvironment = appendEnvironments(cond.environments, then?.environments) - const finalEnvironment = otherwise ? appendEnvironments(thenEnvironment, otherwise.environments) : thenEnvironment - - // again within an if-then-else we consider all actives to be read - const ingoing: IdentifierReference[] = [ - ...cond.in, - ...(makeThenMaybe ? makeAllMaybe(then?.in, nextGraph, finalEnvironment) : then?.in ?? []), - ...(makeOtherwiseMaybe ? makeAllMaybe(otherwise?.in, nextGraph, finalEnvironment) : otherwise?.in ?? []), - ...cond.unknownReferences, - ...(makeThenMaybe ? makeAllMaybe(then?.unknownReferences, nextGraph, finalEnvironment) : then?.unknownReferences ?? []), - ...(makeOtherwiseMaybe ? makeAllMaybe(otherwise?.unknownReferences, nextGraph, finalEnvironment) : otherwise?.unknownReferences ?? []), - ] - - // we assign all with a maybe marker - // we do not merge even if they appear in both branches because the maybe links will refer to different ids - const outgoing = [ - ...cond.out, - ...(makeThenMaybe ? makeAllMaybe(then?.out, nextGraph, finalEnvironment) : then?.out ?? []), - ...(makeOtherwiseMaybe ? makeAllMaybe(otherwise?.out, nextGraph, finalEnvironment) : otherwise?.out ?? []), - ] - - linkIngoingVariablesInSameScope(nextGraph, ingoing) - - return { - unknownReferences: [], - in: ingoing, - out: outgoing, - environments: finalEnvironment, - graph: nextGraph, - scope: data.activeScope, - } -} diff --git a/src/dataflow/internal/process/loops/for-loop.ts b/src/dataflow/internal/process/loops/for-loop.ts deleted file mode 100644 index 7a428fa917..0000000000 --- a/src/dataflow/internal/process/loops/for-loop.ts +++ /dev/null @@ -1,72 +0,0 @@ -import { - linkCircularRedefinitionsWithinALoop, - linkIngoingVariablesInSameScope, - produceNameSharedIdMap -} from '../../linker' -import type { DataflowInformation } from '../../info' -import type { DataflowProcessorInformation } from '../../../processor' -import { processDataflowFor } from '../../../processor' -import { appendEnvironments, define, makeAllMaybe, overwriteEnvironments } from '../../../environments' -import type { ParentInformation, RForLoop } from '../../../../r-bridge' -import { EdgeType } from '../../../graph' -import { LocalScope } from '../../../environments/scopes' - -export function processForLoop( - loop: RForLoop, - data: DataflowProcessorInformation -): DataflowInformation { - const variable = processDataflowFor(loop.variable, data) - const vector = processDataflowFor(loop.vector, data) - let headEnvironments = overwriteEnvironments(vector.environments, variable.environments) - const headGraph= variable.graph.mergeWith(vector.graph) - - const writtenVariable = variable.unknownReferences - for(const write of writtenVariable) { - headEnvironments = define({ ...write, used: 'always', definedAt: loop.info.id, kind: 'variable' }, LocalScope, headEnvironments) - } - data = { ...data, environments: headEnvironments } - const body = processDataflowFor(loop.body, data) - - const nextGraph = headGraph.mergeWith(body.graph) - - const outEnvironments = appendEnvironments(headEnvironments, body.environments) - - // again within an if-then-else we consider all actives to be read - // currently i add it at the end, but is this correct? - const ingoing = [...vector.in, ...makeAllMaybe(body.in, nextGraph, outEnvironments), ...vector.unknownReferences, ...makeAllMaybe(body.unknownReferences, nextGraph, outEnvironments)] - - - // now we have to bind all open reads with the given name to the locally defined writtenVariable! - const nameIdShares = produceNameSharedIdMap(ingoing) - - for(const write of writtenVariable) { - for(const link of [...vector.in, ...vector.unknownReferences]) { - nextGraph.addEdge(write.nodeId, link.nodeId, EdgeType.DefinedBy, 'always', true) - } - - const name = write.name - const readIdsToLink = nameIdShares.get(name) - for(const readId of readIdsToLink) { - nextGraph.addEdge(readId.nodeId, write.nodeId, EdgeType.Reads, 'always', true) - } - // now, we remove the name from the id shares as they are no longer needed - nameIdShares.delete(name) - nextGraph.setDefinitionOfVertex(write) - } - - const outgoing = [...variable.out, ...writtenVariable, ...makeAllMaybe(body.out, nextGraph, outEnvironments)] - - linkIngoingVariablesInSameScope(nextGraph, ingoing) - - linkCircularRedefinitionsWithinALoop(nextGraph, nameIdShares, body.out) - - return { - unknownReferences: [], - // we only want those not bound by a local variable - in: [...variable.in, ...[...nameIdShares.values()].flat()], - out: outgoing, - graph: nextGraph, - environments: outEnvironments, - scope: data.activeScope - } -} diff --git a/src/dataflow/internal/process/loops/repeat-loop.ts b/src/dataflow/internal/process/loops/repeat-loop.ts deleted file mode 100644 index 771ff735d3..0000000000 --- a/src/dataflow/internal/process/loops/repeat-loop.ts +++ /dev/null @@ -1,22 +0,0 @@ -import type { DataflowInformation } from '../../info' -import type { DataflowProcessorInformation } from '../../../processor' -import { processDataflowFor } from '../../../processor' -import { linkCircularRedefinitionsWithinALoop, produceNameSharedIdMap } from '../../linker' -import type { ParentInformation, RRepeatLoop } from '../../../../r-bridge' - -export function processRepeatLoop(loop: RRepeatLoop, data: DataflowProcessorInformation): DataflowInformation { - const body = processDataflowFor(loop.body, data) - - const graph = body.graph - const namedIdShares = produceNameSharedIdMap([...body.in, ...body.unknownReferences]) - linkCircularRedefinitionsWithinALoop(graph, namedIdShares, body.out) - - return { - unknownReferences: [], - in: [...body.in, ...body.unknownReferences], - out: body.out, - environments: body.environments, - scope: data.activeScope, - graph: body.graph - } -} diff --git a/src/dataflow/internal/process/loops/while-loop.ts b/src/dataflow/internal/process/loops/while-loop.ts deleted file mode 100644 index dd830a3768..0000000000 --- a/src/dataflow/internal/process/loops/while-loop.ts +++ /dev/null @@ -1,38 +0,0 @@ -import type { DataflowInformation } from '../../info' -import type { DataflowProcessorInformation } from '../../../processor' -import { processDataflowFor } from '../../../processor' -import { - appendEnvironments, - makeAllMaybe, -} from '../../../environments' -import { linkCircularRedefinitionsWithinALoop, linkInputs, produceNameSharedIdMap } from '../../linker' -import type { ParentInformation, RWhileLoop } from '../../../../r-bridge' - -export function processWhileLoop(loop: RWhileLoop, data: DataflowProcessorInformation): DataflowInformation { - const condition = processDataflowFor(loop.condition, data) - data = { ...data, environments: condition.environments } - const body = processDataflowFor(loop.body, data) - - const environments = condition.environments - const nextGraph = condition.graph.mergeWith(body.graph) - - const finalEnvironments = appendEnvironments(condition.environments, body.environments) - - // this is theoretically redundant, but we would have to manually mark all affected edges as maybe this way. This does that for us. - const remainingInputs = linkInputs([ - ...makeAllMaybe(body.unknownReferences, nextGraph, finalEnvironments), - ...makeAllMaybe(body.in, nextGraph, finalEnvironments)], - data.activeScope, environments, [...condition.in, ...condition.unknownReferences], nextGraph, true) - - linkCircularRedefinitionsWithinALoop(nextGraph, produceNameSharedIdMap(remainingInputs), body.out) - - return { - unknownReferences: [], - in: remainingInputs, - out: [...makeAllMaybe(body.out, nextGraph, finalEnvironments), ...condition.out], - graph: nextGraph, - /* the body might not happen if the condition is false */ - environments: finalEnvironments, - scope: data.activeScope - } -} diff --git a/src/dataflow/internal/process/operators/assignment.ts b/src/dataflow/internal/process/operators/assignment.ts deleted file mode 100644 index b58915fba4..0000000000 --- a/src/dataflow/internal/process/operators/assignment.ts +++ /dev/null @@ -1,165 +0,0 @@ -import type { NodeId, ParentInformation, RAssignmentOp, RNode } from '../../../../r-bridge' -import { collectAllIds, RType } from '../../../../r-bridge' -import type { DataflowInformation } from '../../info' -import type { DataflowProcessorInformation } from '../../../processor' -import { processDataflowFor } from '../../../processor' -import { EdgeType } from '../../../graph' -import type { - IdentifierDefinition, - IdentifierReference } from '../../../environments' -import { - define, - overwriteEnvironments -} from '../../../environments' -import { log } from '../../../../util/log' -import { dataflowLogger } from '../../../index' -import { GlobalScope, LocalScope } from '../../../environments/scopes' - -export function processAssignment(op: RAssignmentOp, data: DataflowProcessorInformation): DataflowInformation { - dataflowLogger.trace(`Processing assignment with id ${op.info.id}`) - const lhs = processDataflowFor(op.lhs, data) - const rhs = processDataflowFor(op.rhs, data) - const { readTargets, newWriteNodes, writeTargets, environments, swap } = processReadAndWriteForAssignmentBasedOnOp(op, lhs, rhs, data) - const nextGraph = lhs.graph.mergeWith(rhs.graph) - - // deal with special cases based on the source node and the determined read targets - - const isFunctionSide = swap ? op.lhs : op.rhs - const isFunction = isFunctionSide.type === RType.FunctionDefinition - - for(const write of newWriteNodes) { - nextGraph.setDefinitionOfVertex(write) - - if(isFunction) { - nextGraph.addEdge(write, isFunctionSide.info.id, EdgeType.DefinedBy, 'always', true) - } else { - const impactReadTargets = determineImpactOfSource(swap ? op.lhs : op.rhs, readTargets) - - for(const read of impactReadTargets) { - nextGraph.addEdge(write, read, EdgeType.DefinedBy, undefined, true) - } - } - } - return { - unknownReferences: [], - in: readTargets, - out: writeTargets, - graph: nextGraph, - environments, - scope: data.activeScope - } -} - -interface SourceAndTarget { - source: DataflowInformation - target: DataflowInformation - global: boolean - /** true if `->` or `->>` */ - swap: boolean -} - -function identifySourceAndTarget( - op: RNode, - lhs: DataflowInformation, - rhs: DataflowInformation -) : SourceAndTarget { - let source: DataflowInformation - let target: DataflowInformation - let global = false - let swap = false - - switch(op.lexeme) { - case '<-': - case '=': - case ':=': - [target, source] = [lhs, rhs] - break - case '<<-': - [target, source, global] = [lhs, rhs, true] - break - case '->': - [target, source, swap] = [rhs, lhs, true] - break - case '->>': - [target, source, global, swap] = [rhs, lhs, true, true] - break - default: - throw new Error(`Unknown assignment operator ${JSON.stringify(op)}`) - } - return { source, target, global, swap } -} - -function produceWrittenNodes(op: RAssignmentOp, target: DataflowInformation, global: boolean, data: DataflowProcessorInformation, functionTypeCheck: RNode): IdentifierDefinition[] { - const writeNodes: IdentifierDefinition[] = [] - const isFunctionDef = functionTypeCheck.type === RType.FunctionDefinition - for(const active of target.unknownReferences) { - writeNodes.push({ - ...active, - scope: global ? GlobalScope : data.activeScope, - kind: isFunctionDef ? 'function' : 'variable', - definedAt: op.info.id - }) - } - return writeNodes -} - -function processReadAndWriteForAssignmentBasedOnOp( - op: RAssignmentOp, - lhs: DataflowInformation, rhs: DataflowInformation, - data: DataflowProcessorInformation -) { - // what is written/read additionally is based on lhs/rhs - assignments read written variables as well - const read = [...lhs.in, ...rhs.in] - const { source, target, global, swap } = identifySourceAndTarget(op, lhs, rhs) - - const funcTypeCheck = swap ? op.lhs : op.rhs - - const writeNodes = produceWrittenNodes(op, target, global, data, funcTypeCheck) - - if(writeNodes.length !== 1) { - log.warn(`Unexpected write number in assignment: ${JSON.stringify(writeNodes)}`) - } - - const readFromSourceWritten = source.out - let environments = overwriteEnvironments(source.environments, target.environments) - - // install assigned variables in environment - for(const write of writeNodes) { - environments = define(write, global ? GlobalScope: LocalScope, environments) - } - - return { - readTargets: [...source.unknownReferences, ...read, ...readFromSourceWritten], - writeTargets: [...writeNodes, ...target.out, ...readFromSourceWritten], - environments: environments, - newWriteNodes: writeNodes, - swap - } -} - -/** - * Some R-constructs like loops are known to return values completely independent of their input (loops return an invisible `NULL`). - * This returns only those of `readTargets` that actually impact the target. - */ -function determineImpactOfSource(source: RNode, readTargets: IdentifierReference[]): Set { - // collect all ids from the source but stop at Loops, function calls, definitions and everything which links its own return - // for loops this is necessary as they *always* return an invisible null, for function calls we do not know if they do - // yet, we need to keep the ids of these elements - const keepEndIds: NodeId[] = [] - const allIds = new Set(collectAllIds(source, n => { - if(n.type === RType.FunctionCall || n.type === RType.FunctionDefinition) { - keepEndIds.push(n.info.id) - return true - } - return n.type === RType.ForLoop || n.type === RType.WhileLoop || n.type === RType.RepeatLoop - }) - ) - for(const id of keepEndIds) { - allIds.add(id) - } - if(allIds.size === 0) { - return new Set() - } else { - return new Set(readTargets.filter(ref => allIds.has(ref.nodeId))) - } -} diff --git a/src/dataflow/internal/process/operators/non-assignment-binary-op.ts b/src/dataflow/internal/process/operators/non-assignment-binary-op.ts deleted file mode 100644 index 8d1d83556f..0000000000 --- a/src/dataflow/internal/process/operators/non-assignment-binary-op.ts +++ /dev/null @@ -1,27 +0,0 @@ -import type { DataflowInformation } from '../../info' -import type { DataflowProcessorInformation } from '../../../processor' -import { processDataflowFor } from '../../../processor' -import { linkIngoingVariablesInSameScope } from '../../linker' -import type { ParentInformation, RBinaryOp } from '../../../../r-bridge' -import { appendEnvironments, overwriteEnvironments } from '../../../environments' - -export function processNonAssignmentBinaryOp(op: RBinaryOp, data: DataflowProcessorInformation): DataflowInformation { - const lhs = processDataflowFor(op.lhs, data) - const rhs = processDataflowFor(op.rhs, data) - - const ingoing = [...lhs.in, ...rhs.in, ...lhs.unknownReferences, ...rhs.unknownReferences] - const nextGraph = lhs.graph.mergeWith(rhs.graph) - linkIngoingVariablesInSameScope(nextGraph, ingoing) - - // logical operations may not execute the right hand side (e.g., `FALSE && (x <- TRUE)`) - const merger = op.flavor === 'logical' ? appendEnvironments : overwriteEnvironments - - return { - unknownReferences: [], // binary ops require reads as without assignments there is no definition - in: ingoing, - out: [...lhs.out, ...rhs.out], - environments: merger(lhs.environments, rhs.environments), - graph: nextGraph, - scope: data.activeScope, - } -} diff --git a/src/dataflow/internal/process/operators/pipe.ts b/src/dataflow/internal/process/operators/pipe.ts deleted file mode 100644 index 15e44d577a..0000000000 --- a/src/dataflow/internal/process/operators/pipe.ts +++ /dev/null @@ -1,51 +0,0 @@ -import type { DataflowInformation } from '../../info' -import type { DataflowProcessorInformation } from '../../../processor' -import { processDataflowFor } from '../../../processor' -import { linkIngoingVariablesInSameScope } from '../../linker' -import type { ParentInformation, RPipe } from '../../../../r-bridge' -import { RType } from '../../../../r-bridge' -import { overwriteEnvironments } from '../../../environments' -import { dataflowLogger, EdgeType, graphToMermaidUrl } from '../../../index' -import { guard } from '../../../../util/assert' -import { UnnamedArgumentPrefix } from '../functions/argument' - -export function processPipeOperation(op: RPipe, data: DataflowProcessorInformation): DataflowInformation { - const lhs = processDataflowFor(op.lhs, data) - const rhs = processDataflowFor(op.rhs, data) - - // in-and outgoing are similar to that of a binary operation, we only 1) expect the rhs to be a function call and 2) modify the arguments. - const ingoing = [...lhs.in, ...rhs.in, ...lhs.unknownReferences, ...rhs.unknownReferences] - const nextGraph = lhs.graph.mergeWith(rhs.graph) - linkIngoingVariablesInSameScope(nextGraph, ingoing) - if(op.rhs.type !== RType.FunctionCall) { - dataflowLogger.warn(`Expected rhs of pipe to be a function call, but got ${op.rhs.type} instead.`) - } else { - const maybeFunctionCallNode = nextGraph.get(op.rhs.info.id, true) - guard(maybeFunctionCallNode !== undefined, () => `Expected function call node with id ${op.rhs.info.id} to be present in graph, but got undefined instead (graph: ${graphToMermaidUrl(nextGraph, data.completeAst.idMap)}).`) - - - const functionCallNode = maybeFunctionCallNode[0] - guard(functionCallNode.tag === 'function-call', () => `Expected function call node with id ${op.rhs.info.id} to be a function call node, but got ${functionCallNode.tag} instead.`) - - // make the lhs an argument node: - const argId = op.lhs.info.id - - dataflowLogger.trace(`Linking pipe arg ${argId} as first argument of ${op.rhs.info.id}`) - functionCallNode.args.unshift({ - nodeId: argId, - name: `${UnnamedArgumentPrefix}${argId}`, - scope: data.activeScope, - used: 'always' - }) - nextGraph.addEdge(functionCallNode.id, argId, EdgeType.Argument, 'always') - } - - return { - unknownReferences: [], - in: ingoing, - out: [...lhs.out, ...rhs.out], - environments: overwriteEnvironments(lhs.environments, rhs.environments), - graph: nextGraph, - scope: data.activeScope, - } -} diff --git a/src/dataflow/internal/process/operators/unary-op.ts b/src/dataflow/internal/process/operators/unary-op.ts deleted file mode 100644 index eec7302383..0000000000 --- a/src/dataflow/internal/process/operators/unary-op.ts +++ /dev/null @@ -1,9 +0,0 @@ -import type { DataflowInformation } from '../../info' -import type { DataflowProcessorInformation } from '../../../processor' -import { processDataflowFor } from '../../../processor' -import type { ParentInformation, RUnaryOp } from '../../../../r-bridge' - -export function processUnaryOp(op: RUnaryOp, data: DataflowProcessorInformation): DataflowInformation { - /* nothing has to happen to our knowledge */ - return processDataflowFor(op.operand, data) -} diff --git a/src/dataflow/internal/process/process-named-call.ts b/src/dataflow/internal/process/process-named-call.ts new file mode 100644 index 0000000000..55e3bb2e61 --- /dev/null +++ b/src/dataflow/internal/process/process-named-call.ts @@ -0,0 +1,22 @@ +import type { Base, EmptyArgument, Location, ParentInformation, RNode } from '../../../r-bridge' +import { RType } from '../../../r-bridge' +import type { DataflowProcessorInformation } from '../../processor' +import type { DataflowInformation } from '../../info' +import { processNamedCall } from './functions/call/named-call-handling' +import { wrapArgumentsUnnamed } from './functions/call/argument/make-argument' + +export function processAsNamedCall( + functionName: RNode & Base & Location, + data: DataflowProcessorInformation, + name: string, + args: readonly (RNode | typeof EmptyArgument | undefined)[] +): DataflowInformation { + return processNamedCall({ + type: RType.Symbol, + info: functionName.info, + content: name, + lexeme: functionName.lexeme, + location: functionName.location, + namespace: undefined + }, wrapArgumentsUnnamed(args, data.completeAst.idMap), functionName.info.id, data) +} diff --git a/src/dataflow/internal/process/process-symbol.ts b/src/dataflow/internal/process/process-symbol.ts new file mode 100644 index 0000000000..31ce647e7f --- /dev/null +++ b/src/dataflow/internal/process/process-symbol.ts @@ -0,0 +1,28 @@ +import type { ParentInformation, RSymbol } from '../../../r-bridge' +import { RNa, RNull } from '../../../r-bridge' +import { DataflowGraph, VertexType } from '../../graph' +import { type DataflowInformation, ExitPointType } from '../../info' +import type { DataflowProcessorInformation } from '../../processor' +import { processValue } from './process-value' + +export function processSymbol(symbol: RSymbol, data: DataflowProcessorInformation): DataflowInformation { + if(symbol.content === RNull || symbol.content === RNa) { + return processValue(symbol, data) + } + + return { + unknownReferences: [ { nodeId: symbol.info.id, name: symbol.content, controlDependencies: data.controlDependencies } ], + in: [], + out: [], + environment: data.environment, + graph: new DataflowGraph().addVertex({ + tag: VertexType.Use, + id: symbol.info.id, + name: symbol.content, + controlDependencies: + data.controlDependencies + }), + entryPoint: symbol.info.id, + exitPoints: [{ nodeId: symbol.info.id, type: ExitPointType.Default, controlDependencies: data.controlDependencies }] + } +} diff --git a/src/dataflow/internal/process/process-uninteresting-leaf.ts b/src/dataflow/internal/process/process-uninteresting-leaf.ts new file mode 100644 index 0000000000..75e9c6faa2 --- /dev/null +++ b/src/dataflow/internal/process/process-uninteresting-leaf.ts @@ -0,0 +1,7 @@ +import { initializeCleanDataflowInformation, type DataflowInformation } from '../../info' +import type { DataflowProcessorInformation } from '../../processor' +import type { RNodeWithParent } from '../../../r-bridge' + +export function processUninterestingLeaf(leaf: RNodeWithParent, info: DataflowProcessorInformation): DataflowInformation { + return initializeCleanDataflowInformation(leaf.info.id, info) +} diff --git a/src/dataflow/internal/process/process-value.ts b/src/dataflow/internal/process/process-value.ts new file mode 100644 index 0000000000..82c393d2a2 --- /dev/null +++ b/src/dataflow/internal/process/process-value.ts @@ -0,0 +1,22 @@ +import { type DataflowInformation, ExitPointType } from '../../info' +import type { DataflowProcessorInformation } from '../../processor' +import { CONSTANT_NAME, DataflowGraph, VertexType } from '../../graph' +import type { RNodeWithParent } from '../../../r-bridge' + +export function processValue(value: RNodeWithParent, data: DataflowProcessorInformation): DataflowInformation { + return { + unknownReferences: [], + in: [{ nodeId: value.info.id, name: undefined, controlDependencies: data.controlDependencies }], + out: [], + environment: data.environment, + graph: new DataflowGraph().addVertex({ + tag: VertexType.Value, + id: value.info.id, + name: CONSTANT_NAME, + value: value.lexeme, + controlDependencies: data.controlDependencies + }), + exitPoints: [{ nodeId: value.info.id, type: ExitPointType.Default, controlDependencies: data.controlDependencies }], + entryPoint: value.info.id + } +} diff --git a/src/dataflow/internal/process/symbol.ts b/src/dataflow/internal/process/symbol.ts deleted file mode 100644 index 0f0e5730ff..0000000000 --- a/src/dataflow/internal/process/symbol.ts +++ /dev/null @@ -1,21 +0,0 @@ -import type { ParentInformation, RSymbol } from '../../../r-bridge' -import { RNa, RNull } from '../../../r-bridge' -import { DataflowGraph } from '../../graph' -import type { DataflowInformation } from '../info' -import { initializeCleanInfo } from '../info' -import type { DataflowProcessorInformation } from '../../processor' - -export function processSymbol(symbol: RSymbol, data: DataflowProcessorInformation): DataflowInformation { - if(symbol.content === RNull || symbol.content === RNa) { - return initializeCleanInfo(data) - } - - return { - unknownReferences: [ { nodeId: symbol.info.id, scope: data.activeScope, name: symbol.content, used: 'always' } ], - in: [], - out: [], - environments: data.environments, - scope: data.activeScope, - graph: new DataflowGraph().addVertex({ tag: 'use', id: symbol.info.id, name: symbol.content, environment: data.environments }) - } -} diff --git a/src/dataflow/internal/process/uninteresting-leaf.ts b/src/dataflow/internal/process/uninteresting-leaf.ts deleted file mode 100644 index 17bbf266ec..0000000000 --- a/src/dataflow/internal/process/uninteresting-leaf.ts +++ /dev/null @@ -1,7 +0,0 @@ -import type { DataflowInformation } from '../info' -import { initializeCleanInfo } from '../info' -import type { DataflowProcessorInformation } from '../../processor' - -export function processUninterestingLeaf(_leaf: unknown, info: DataflowProcessorInformation): DataflowInformation { - return initializeCleanInfo(info) -} diff --git a/src/dataflow/processor.ts b/src/dataflow/processor.ts index 8e9497129b..4c3ce8d6e0 100644 --- a/src/dataflow/processor.ts +++ b/src/dataflow/processor.ts @@ -1,52 +1,48 @@ /** * Based on a two-way fold, this processor will automatically supply scope information */ -import type { - NormalizedAst, - ParentInformation, RNode, - RNodeWithParent, RParseRequest -} from '../r-bridge' -import type { DataflowInformation } from './internal/info' -import type { DataflowScopeName, REnvironmentInformation } from './environments' +import type { NodeId, NormalizedAst, ParentInformation, RNode, RNodeWithParent, RParseRequest } from '../r-bridge' +import type { REnvironmentInformation } from './environments' +import type { DataflowInformation } from './info' export interface DataflowProcessorInformation { /** * Initial and frozen ast-information */ - readonly completeAst: NormalizedAst + readonly completeAst: NormalizedAst /** * Correctly contains pushed local scopes introduced by `function` scopes. - * Will by default *not* contain any symbol-bindings introduces along the way, they have to be decorated when moving up the tree. + * Will by default *not* contain any symbol-bindings introduced along the way; they have to be decorated when moving up the tree. */ - readonly environments: REnvironmentInformation - /** - * Name of the currently active scope, (hopefully) always {@link LocalScope | Local} - */ - readonly activeScope: DataflowScopeName + readonly environment: REnvironmentInformation /** * Other processors to be called by the given functions */ - readonly processors: DataflowProcessors + readonly processors: DataflowProcessors /** * The {@link RParseRequest} that is currently being parsed */ - readonly currentRequest: RParseRequest + readonly currentRequest: RParseRequest /** * The chain of {@link RParseRequest} fingerprints ({@link requestFingerprint}) that lead to the {@link currentRequest}. * The most recent (last) entry is expected to always be the {@link currentRequest}. */ - readonly referenceChain: string[] + readonly referenceChain: string[] + /** + * The chain of control-flow {@link NodeId}s that lead to the current node (e.g. of known ifs). + */ + readonly controlDependencies: NodeId[] | undefined } export type DataflowProcessor> = (node: NodeType, data: DataflowProcessorInformation) => DataflowInformation -type NodeWithKey, TypeKey> = Node['type'] extends TypeKey ? Node : never +type NodeWithKey = RNode & { type: Key } /** * This way, a processor mapped to a {@link RType#Symbol} require a {@link RSymbol} as first parameter and so on. */ export type DataflowProcessors = { - [key in RNode['type']]: DataflowProcessor, key>> + [key in RNode['type']]: DataflowProcessor> } /** @@ -59,8 +55,11 @@ export type DataflowProcessors = { * Now this method can be called recursively within the other processors to parse the dataflow for nodes that you can not narrow down. * * @param current - The current node to start processing from - * @param data - The initial information to be passed down + * @param data - The initial (/current) information to be passed down */ -export function processDataflowFor(current: RNodeWithParent, data: DataflowProcessorInformation): DataflowInformation { - return data.processors[current.type](current as never, data) +export function processDataflowFor( + current: RNode, + data: DataflowProcessorInformation +): DataflowInformation { + return (data.processors[current.type] as DataflowProcessor)(current, data) } diff --git a/src/index.ts b/src/index.ts index bb309b2fc6..c650c8b913 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,4 +1,2 @@ -export * from './core' export * from './slicing' -export * from './dataflow' export * from './r-bridge' diff --git a/src/r-bridge/data/data.ts b/src/r-bridge/data/data.ts new file mode 100644 index 0000000000..effa9105d1 --- /dev/null +++ b/src/r-bridge/data/data.ts @@ -0,0 +1,751 @@ +import type { FlowrCapabilities } from './types' + +export const flowrCapabilities = { + name: 'Capabilities of flowR', + description: 'This is an evolving representation of what started with #636 to formulate capabilities in a structured format.', + version: '0.0.1', + capabilities: [ + { + name: 'Names and Identifiers', + id: 'names-and-identifiers', + capabilities: [ + { + name: 'Form', + id: 'form', + capabilities: [ + { + name: 'Normal', + id: 'name-normal', + supported: 'fully', + description: '_Recognize constructs like `a`, `plot`, ..._' + }, + { + name: 'Quoted', + id: 'name-quoted', + supported: 'fully', + description: "_Recognize `\"a\"`, `'plot'`, ..._" + }, + { + name: 'Escaped', + id: 'name-escaped', + supported: 'fully', + description: '_Recognize `` `a` ``, `` `plot` ``, ..._' + } + ] + }, + { + name: 'Resolution', + id: 'resolution', + capabilities: [ + { + name: 'Global Scope', + id: 'global-scope', + supported: 'fully', + description: '_For example, tracking a big table of current identifier bindings_' + }, + { + name: 'Lexicographic Scope', + id: 'lexicographic-scope', + supported: 'fully', + description: '_For example, support function definition scopes_' + }, + { + name: 'Closures', + id: 'closures', + supported: 'partially', + description: '_Handling [function factories](https://adv-r.hadley.nz/function-factories.html) and friends._ Currently, we do not have enough tests to be sure.' + }, + { + name: 'Dynamic Environment Resolution', + id: 'dynamic-environment-resolution', + supported: 'not', + description: '_For example, using `new.env` and friends_' + }, + { + name: 'Environment Sharing', + id: 'environment-sharing', + supported: 'not', + description: '_Handling side-effects by environments which are not copied when modified_' + }, + { + name: 'Search Path', + id: 'search-path', + supported: 'not', + description: "_Handling [R's search path](https://cran.r-project.org/doc/manuals/r-release/R-lang.html#Search-path) as explained in [Advanced R](https://adv-r.hadley.nz/environments.html#search-path)._ Currently, _flowR_ does not support dynamic modifications with `attach`, `search`, or `fn_env` and tests are definitely missing. Yet, theoretically, the tooling is all there." + }, + { + name: 'Namespaces', + id: 'namespaces', + supported: 'not', + description: "_Handling R's namespaces as explained in [Advanced R](https://adv-r.hadley.nz/environments.html#namespaces)_" + }, + { + name: 'Accessing Exported Names', + id: 'accessing-exported-names', + supported: 'partially', + description: '_Resolving calls with `::` to their origin._ Accessing external files is allowed, although the name of packages etc. is not resolved correctly.' + }, + { + name: 'Accessing Internal Names', + id: 'accessing-internal-names', + supported: 'not', + description: '_Similar to `::` but for internal names._' + }, + { + name: 'Library Loading', + id: 'library-loading', + supported: 'not', + description: '_Resolve libraries identified with `library`, `require`, `attachNamespace`, ... and attach them to the search path_' + } + ] + } + ] + }, + { + name: 'Expressions', + id: 'expressions', + capabilities: [ + { + name: 'Function Calls', + id: 'function-calls', + capabilities: [ + { + name: 'Grouping', + id: 'grouping', + supported: 'fully', + description: '_Recognize groups done with `(`, `{`, ... (more precisely, their default mapping to the primitive implementations)._' + }, + { + name: 'Normal Call', + id: 'call-normal', + supported: 'fully', + description: '_Recognize and resolve calls like `f(x)`, `foo::bar(x, y)`, ..._', + capabilities: [ + { + name: 'Unnamed Arguments', + id: 'unnamed-arguments', + supported: 'fully', + description: '_Recognize and resolve calls like `f(3)`, `foo::bar(3, c(1,2))`, ..._' + }, + { + name: 'Empty Arguments', + id: 'empty-arguments', + supported: 'fully', + description: '_Essentially a special form of an unnamed argument as in `foo::bar(3, ,42)`, ..._' + }, + { + name: 'Named Arguments', + id: 'named-arguments', + supported: 'fully', + description: '_Recognize and resolve calls like `f(x = 3)`, `foo::bar(x = 3, y = 4)`, ..._' + }, + { + name: 'String Arguments', + id: 'string-arguments', + supported: 'fully', + description: '_Recognize and resolve calls like `f(\'x\' = 3)`, `foo::bar(\'x\' = 3, "y" = 4)`, ..._' + }, + { + name: 'Resolve Arguments', + id: 'resolve-arguments', + supported: 'partially', + description: '_Correctly bind arguments (including [`pmatch`](https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/pmatch))._ Currently, we do not have a correct implementation for `pmatch`. Furthermore, more tests would be nice.' + }, + { + name: 'Side-Effects in Argument', + id: 'side-effects-in-argument', + supported: 'partially', + description: '_Handle side-effects of arguments (e.g., `f(x <- 3)`, `f(x = y <- 3)`, ...)._ We have not enough tests to be sure' + }, + { + name: 'Side-Effects in Function Call', + id: 'side-effects-in-function-call', + supported: 'partially', + description: '_Handle side-effects of function calls (e.g., `setXTo(3)`, ...) for example achieved with the super assignment._ We need more tests and handlings. Furthermore, we do not detect side effects with external files, network, logging, etc.' + } + ] + }, + { + name: 'Anonymous Calls', + id: 'call-anonymous', + supported: 'fully', + description: '_Recognize and resolve calls like `(function(x) x)(3)`, `factory(0)()`, ..._' + }, + { + name: 'Infix Calls', + id: 'infix-calls', + supported: 'fully', + description: '_Recognize and resolve calls like `x + y`, `x %>% f(y)`, ..._' + }, + { + name: 'Redefinition of Built-In Functions/primitives', + id: 'redefinition-of-built-in-functions-primitives', + supported: 'partially', + description: '_Handle cases like `print <- function(x) x`, `` `for` <- function(a,b,c) a``, ..._ Currently, we can not handle all of them there are no tests. Still wip as part of desugaring' + }, + { + name: 'Index Access', + id: 'index-access', + capabilities: [ + { + name: 'Single Bracket Access', + id: 'single-bracket-access', + supported: 'fully', + description: '_Detect calls like `x[i]`, `x[i, ,b]`, `x[3][y]`, ... This does not include the real separation of cells, which is handled extra._' + }, + { + name: 'Double Bracket Access', + id: 'double-bracket-access', + supported: 'fully', + description: '_Detect calls like `x[[i]]`, `x[[i, b]]`, ... Similar to single bracket._' + }, + { + name: 'Dollar Access', + id: 'dollar-access', + supported: 'fully', + description: '_Detect calls like `x$y`, `x$"y"`, `x$y$z`, ..._' + }, + { + name: 'Slot Access', + id: 'slot-access', + supported: 'fully', + description: '_Detect calls like `x@y`, `x@y@z`, ..._' + }, + { + name: 'Access with Argument-Names', + id: 'access-with-argument-names', + supported: 'fully', + description: '_Detect calls like `x[i = 3]`, `x[[i=]]`, ..._' + }, + { + name: 'Access with Empty', + id: 'access-with-empty', + supported: 'fully', + description: '_Detect calls like `x[]`, `x[2,,42]`, ..._' + }, + { + name: 'Subsetting', + id: 'subsetting', + supported: 'fully', + description: '_Detect calls like `x[i > 3]`, `x[c(1,3)]`, ..._' + } + ] + }, + { + name: 'Operators', + id: 'operators', + capabilities: [ + { + name: 'Unary Operator', + id: 'unary-operator', + supported: 'fully', + description: '_Recognize and resolve calls like `+3`, `-3`, ..._' + }, + { + name: 'Binary Operator', + id: 'binary-operator', + supported: 'fully', + description: '_Recognize and resolve calls like `3 + 4`, `3 * 4`, ..._', + capabilities: [ + { + name: 'Special Operator', + id: 'special-operator', + supported: 'fully', + description: '_Recognize and resolve calls like `3 %in% 4`, `3 %*% 4`, ..._' + }, + { + name: 'Model Formula', + id: 'model-formula', + supported: 'partially', + description: '_Recognize and resolve calls like `y ~ x`, `y ~ x + z`, ... including their implicit redefinitions of some functions._ Currently, we do not handle their redefinition and only treat model formulas as normal binary operators' + }, + { + name: 'Assignments and Bindings', + id: 'assignments-and-bindings', + capabilities: [ + { + name: 'Local Left Assignment', + id: 'local-left-assignment', + supported: 'fully', + description: '_Handle `x <- 3`, `x$y <- 3`, ..._' + }, + { + name: 'Local Right Assignment', + id: 'local-right-assignment', + supported: 'fully', + description: '_Handle `3 -> x`, `3 -> x$y`, ..._' + }, + { + name: 'Local Equal Assignment', + id: 'local-equal-assignment', + supported: 'fully', + description: '_Handle `x = 3`, `x$y := 3`, ..._' + }, + { + name: 'Super Left Assignment', + id: 'super-left-assignment', + supported: 'fully', + description: '_Handle `x <<- 42`, `x$y <<- 42`, ..._' + }, + { + name: 'Super Right Assignment', + id: 'super-right-assignment', + supported: 'fully', + description: '_Handle `42 ->> x`, `42 ->> x$y`, ..._' + }, + { + name: 'Return Value of Assignments', + id: 'return-value-of-assignments', + supported: 'fully', + description: '_Handle `x <- 3` returning `3`, e.g., in `x <- y <- 3`_' + }, + { + name: 'Assignment Functions', + id: 'assignment-functions', + supported: 'partially', + description: '_Handle `assign(x, 3)`, `delayedAssign(x, 3)`, ..._ Currently we can not handle all of them and tests are rare.' + }, + { + name: 'Range Assignment', + id: 'range-assignment', + supported: 'fully', + description: '_Handle `x[1:3] <- 3`, `x$y[1:3] <- 3`, ..._' + }, + { + name: 'Replacement Functions', + id: 'replacement-functions', + supported: 'partially', + description: '_Handle `x[i] <- 3`, `x$y <- 3`, ... as `` `[<-`(x, 3) ``, ..._ Currently work in progress as part of the desugaring but still untested.' + }, + { + name: 'Locked Bindings', + id: 'locked-bindings', + supported: 'not', + description: '_Handle `lockBinding(x, 3)`, ..._' + } + ] + } + ] + } + ] + }, + { + name: 'Control-Flow', + id: 'control-flow', + capabilities: [ + { + name: 'if', + id: 'if', + supported: 'fully', + description: '_Handle `if (x) y else z`, `if (x) y`, ..._' + }, + { + name: 'for loop', + id: 'for-loop', + supported: 'fully', + description: '_Handle `for (i in 1:3) print(i)`, ..._' + }, + { + name: 'while loop', + id: 'while-loop', + supported: 'fully', + description: '_Handle `while (x) b`, ..._' + }, + { + name: 'repeat loop', + id: 'repeat-loop', + supported: 'fully', + description: '_Handle `repeat {b; if (x) break}`, ..._' + }, + { + name: 'break', + id: 'break', + supported: 'fully', + description: '_Handle `break` (including `break()`) ..._' + }, + { + name: 'next', + id: 'next', + supported: 'fully', + description: '_Handle `next` (including `next()`) ..._' + }, + { + name: 'switch', + id: 'switch', + supported: 'fully', + description: '_Handle `switch(3, "a", "b", "c")`, ..._' + }, + { + name: 'return', + id: 'return', + supported: 'fully', + description: '_Handle `return(3)`, ... in function definitions_' + }, + { + name: 'exceptions', + id: 'exceptions', + supported: 'not', + description: '_Handle `try`, `stop`, ..._' + } + ] + }, + { + name: 'Function Definitions', + id: 'function-definitions', + capabilities: [ + { + name: 'Normal', + id: 'normal-definition', + supported: 'fully', + description: '_Handle `function() 3`, ..._' + }, + { + name: 'Formals', + id: 'formals', + capabilities: [ + { + name: 'Named', + id: 'formals-named', + supported: 'fully', + description: '_Handle `function(x) x`, ..._' + }, + { + name: 'Default', + id: 'formals-default', + supported: 'fully', + description: '_Handle `function(x = 3) x`, ..._' + }, + { + name: 'Dot-Dot-Dot', + id: 'formals-dot-dot-dot', + supported: 'fully', + description: '_Handle `function(...) 3`, ..._' + }, + { + name: 'Promises', + id: 'formals-promises', + supported: 'partially', + description: '_Handle `function(x = y) { y <- 3; x }`, `function(x = { x <- 3; x}) { x * x }`, ..._ We _try_ to identify promises correctly but this is really rudimentary.' + } + ] + }, + { + name: 'Implicit Return', + id: 'implicit-return', + supported: 'fully', + description: '_Handle the return of `function() 3`, ..._' + }, + { + name: 'Lambda Syntax', + id: 'lambda-syntax', + supported: 'fully', + description: '_Support `\\(x) x`, ..._' + } + ] + }, + { + name: 'Important Built-Ins', + id: 'important-built-ins', + capabilities: [ + { + name: 'Non-Strict Logical Operators', + id: 'non-strict-logical-operators', + supported: 'fully', + description: '_Handle `&&`, `||`, ..._' + }, + { + name: 'Pipe and Pipe-Bind', + id: 'built-in-pipe-and-pipe-bind', + supported: 'partially', + description: '_Handle the [new (4.1) pipe and pipe-bind syntax](https://www.r-bloggers.com/2021/05/the-new-r-pipe/): `|>`, and `=>`._ We have not enough tests and do not support pipe-bind.' + }, + { + name: 'Sequencing', + id: 'built-in-sequencing', + supported: 'not', + description: '_Handle `:`, `seq`, ... as they are used often._' + }, + { + name: 'Internal and Primitive Functions', + id: 'built-in-internal-and-primitive-functions', + supported: 'not', + description: '_Handle `.Internal`, `.Primitive`, ..._ In general we can not handle them as they refer to non-R code. We currently do not support them when used with the function.' + }, + { + name: 'Options', + id: 'built-in-options', + supported: 'not', + description: '_Handle `options`, `getOption`, ..._ Currently, we do not support the function at all.' + }, + { + name: 'Help', + id: 'built-in-help', + supported: 'partially', + description: '_Handle `help`, `?`, ..._ We do not support the function in a sensible way but just ignore it (although this does not happen resolved).' + }, + { + name: 'Reflection / "Computing on the Language"', + id: 'reflection-"computing-on-the-language"', + capabilities: [ + { + name: 'Get Function Structure', + id: 'get-function-structure', + supported: 'not', + description: '_Handle `body`, `formals`, `environment` to access the respective parts of a function._ We do not support the functions at all.' + }, + { + name: 'Modify Function Structure', + id: 'modify-function-structure', + supported: 'not', + description: '_Handle `body<-`, `formals<-`, `environment<-` to modify the respective parts of a function._ We do not support the functions at all.' + }, + { + name: 'Quoting', + id: 'built-in-quoting', + supported: 'partially', + description: '_Handle `quote`, `substitute`, `bquote`, ..._ We partially ignore some of them but most likely not all.' + }, + { + name: 'Evaluation', + id: 'built-in-evaluation', + supported: 'not', + description: '_Handle `eval`, `evalq`, `eval.parent`, ..._ We do not handle them at all.' + }, + { + name: 'Parsing', + id: 'built-in-parsing', + supported: 'not', + description: '_Handle `parse`, `deparse`, ..._ We handle them as unknown function calls, but not specifically besides that.' + } + ] + } + ] + } + ] + }, + { + name: 'Literal Values', + id: 'literal-values', + capabilities: [ + { + name: 'Numbers', + id: 'numbers', + supported: 'fully', + description: '_Recognize numbers like `3`, `3.14`, `NA`, float-hex, ..._' + }, + { + name: 'Strings', + id: 'strings', + supported: 'fully', + description: "_Recognize strings like `\"a\"`, `'b'`, ..._", + capabilities: [ + { + name: 'Raw Strings', + id: 'raw-strings', + supported: 'fully', + description: '_Recognize raw strings like `r"(a)"`, ..._' + } + ] + }, + { + name: 'Logical', + id: 'logical', + supported: 'fully', + description: '_Recognize the logicals `TRUE` and `FALSE`, ..._' + }, + { + name: 'NULL', + id: 'null', + supported: 'fully', + description: '_Recognize `NULL`_' + }, + { + name: 'Inf and NaN', + id: 'inf-and-nan', + supported: 'fully', + description: '_Recognize `Inf` and `NaN`_' + } + ] + } + ] + }, + { + name: 'Non-Standard Evaluations/Semantics', + id: 'non-standard-evaluations-semantics', + capabilities: [ + { + name: 'Recycling', + id: 'recycling', + supported: 'not', + description: '_Handle recycling of vectors as explained in [Advanced R](https://adv-r.hadley.nz/vectors-chap.html)._ We do not support recycling.' + }, + { + name: 'Vectorized Operator or Functions', + id: 'vectorized-operator-or-functions', + supported: 'not', + description: '_Handle vectorized operations as explained in [Advanced R](https://adv-r.hadley.nz/perf-improve.html?q=vectorised#vectorise)._ We do not support vectorized operations.' + }, + { + name: 'Hooks', + id: 'hooks', + supported: 'not', + description: '_Handle hooks like [`userhooks`](https://stat.ethz.ch/R-manual/R-devel/library/base/html/userhooks.html) and [`on.exit`](https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/on.exit)._ We do not support hooks.' + }, + { + name: 'Precedence', + id: 'precedence', + supported: 'fully', + description: '_Handle the precedence of operators as explained in the [Documentation](https://rdrr.io/r/base/Syntax.html)._ We handle the precedence of operators (implicitly with the parser).' + }, + { + name: 'Attributes', + id: 'attributes', + capabilities: [ + { + name: 'User-Defined', + id: 'user-defined', + supported: 'not', + description: '_Handle [attributes](https://cran.r-project.org/doc/manuals/r-release/R-lang.html#Attributes) like `attr`, `attributes`, ..._ We do not support attributes.' + }, + { + name: 'Built-In', + id: 'built-in', + supported: 'not', + description: '_Handle built-in attributes like `dim`, ..._ We do not support them.' + } + ] + } + ] + }, + { + name: 'Types', + id: 'types', + capabilities: [ + { + name: 'Primitive', + id: 'types-primitive', + supported: 'not', + description: '_Recognize and resolve primitive types like `numeric`, `character`, ..._ We do not support typing currently.' + }, + { + name: 'Non-Primitive', + id: 'types-non-primitive', + supported: 'not', + description: '_Recognize and resolve non-primitive/composite types._ We do not support typing currently.' + }, + { + name: 'Inference', + id: 'types-inference', + supported: 'not', + description: '_Infer types from the code._ We do not support typing currently.' + }, + { + name: 'Coercion', + id: 'types-coercion', + supported: 'not', + description: '_Handle coercion of types._ We do not support typing currently.' + }, + { + name: 'Object-Oriented Programming', + id: 'object-oriented-programming', + capabilities: [ + { + name: 'S3', + id: 'oop-s3', + note: 'https://adv-r.hadley.nz/s3.html', + supported: 'not', + description: '_Handle S3 classes and methods as one unit (with attributes etc.). Including Dispatch and Inheritance._ We do not support typing currently and do not handle objects of these classes "as units."' + }, + { + name: 'S4', + id: 'oop-s4', + note: 'https://adv-r.hadley.nz/s4.html', + supported: 'not', + description: '_Handle S4 classes and methods as one unit. Including Dispatch and Inheritance_ We do not support typing currently and do not handle objects of these classes "as units."' + }, + { + name: 'R6', + id: 'oop-r6', + note: 'https://adv-r.hadley.nz/r6.html', + supported: 'not', + description: '_Handle R6 classes and methods as one unit. Including Dispatch and Inheritance, as well as its Reference Semantics, Access Control, Finalizers, and Introspection._ We do not support typing currently and do not handle objects of these classes "as units."' + }, + { + name: 'R7/S7', + id: 'r7-s7', + note: 'https://www.r-bloggers.com/2022/12/what-is-r7-a-new-oop-system-for-r/, https://cran.r-project.org/web/packages/S7/index.html', + supported: 'not', + description: '_Handle R7 classes and methods as one unit. Including Dispatch and Inheritance, as well as its Reference Semantics, Validators, ..._ We do not support typing currently and do not handle objects of these classes "as units."' + } + ] + } + ] + }, + { + name: 'Structure', + id: 'structure', + capabilities: [ + { + name: 'Comments', + id: 'comments', + supported: 'fully', + description: '_Recognize comments like `# this is a comment`, ... and line-directives_' + }, + { + name: 'Semicolons', + id: 'semicolons', + supported: 'fully', + description: '_Recognize and resolve semicolons like `a; b; c`, ..._' + }, + { + name: 'Newlines', + id: 'newlines', + supported: 'fully', + description: '_Recognize and resolve newlines like `a\nb\nc`, ..._' + } + ] + }, + { + name: 'System, I/O, FFI, and Other Files', + id: 'system-i-o-ffi-and-other-files', + capabilities: [ + { + name: 'Sourcing External Files', + id: 'sourcing-external-files', + supported: 'partially', + description: '_Handle `source`, `sys.source`, ..._ We are currently working on supporting the inclusion of external files. Currently we can handle `source`.' + }, + { + name: 'Handling Binary Riles', + id: 'handling-binary-riles', + supported: 'not', + description: '_Handle files dumped with, e.g., [`save`](https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/save), ... due to their frequent usage._ We do not support binary files.' + }, + { + name: 'I/O', + id: 'i-o', + supported: 'not', + description: '_Handle `read.csv`, `write.csv`, ..._ We do not support I/O for the time being but treat them as unknown function calls.' + }, + { + name: 'Foreign Function Interface', + id: 'foreign-function-interface', + supported: 'not', + description: '_Handle `.Fortran`, `C`,..._ We do not support FFI but treat them as unknown function calls.' + }, + { + name: 'System Calls', + id: 'system-calls', + supported: 'not', + description: '_Handle [`system`](https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/system), `system.*`, ..._ We do not support system calls but treat them as unknown function calls.' + } + ] + }, + { + name: 'Pre-Processors/external Tooling', + id: 'pre-processors-external-tooling', + supported: 'not', + description: '_Handle pre-processors like `knitr`, `rmarkdown`, `roxygen2` ..._ We do not support pre-processors for the time being (being unable to handle things like `@importFrom`)' + } + ] +} as const satisfies FlowrCapabilities diff --git a/src/r-bridge/data/get.ts b/src/r-bridge/data/get.ts new file mode 100644 index 0000000000..ba0f25b3a4 --- /dev/null +++ b/src/r-bridge/data/get.ts @@ -0,0 +1,72 @@ +import { guard } from '../../util/assert' +import type { FlowrCapability } from './types' +import { flowrCapabilities } from './data' + + +type CapabilityIdFilter = T extends Filter ? T['id'] : never + +/** Recursively extract all valid identifiers (which have the given support predicate) */ +type ExtractAllIds = + T extends { readonly capabilities: infer U } + ? U extends readonly FlowrCapability[] + ? (CapabilityIdFilter | ExtractAllIds) + : CapabilityIdFilter + : CapabilityIdFilter + +type Capabilities = (typeof flowrCapabilities)['capabilities'][number] +export type FlowrCapabilityId = ExtractAllIds +export type SupportedFlowrCapabilityId = ExtractAllIds + +type PathToCapability = readonly number[] + +export interface FlowrCapabilityWithPath extends FlowrCapability{ + path: PathToCapability +} + +function search(id: FlowrCapabilityId, capabilities: readonly FlowrCapability[], path: number[] = []): FlowrCapabilityWithPath | undefined { + let idx = 0 + for(const capability of capabilities) { + idx++ // index by one :) + if(capability.id === id) { + return { ...capability, path: [...path, idx] } + } + if(capability.capabilities) { + const found = search(id, capability.capabilities, [...path, idx]) + if(found) { + return found + } + } + } + return undefined +} + +const capabilityCache = new Map() + +export function getCapabilityById(id: FlowrCapabilityId): FlowrCapabilityWithPath { + const cached = capabilityCache.get(id) + if(cached) { + return cached + } + const value = search(id, flowrCapabilities.capabilities) + guard(value !== undefined, () => `Could not find capability with id ${id}`) + capabilityCache.set(id, value) + return value +} + +export function getAllCapabilities(): readonly FlowrCapabilityWithPath[] { + const result: FlowrCapabilityWithPath[] = [] + function traverse(capabilities: readonly FlowrCapability[], currentPath: PathToCapability = []) { + let idx = 0 + for(const capability of capabilities) { + idx++ + const nextPath = [...currentPath, idx] + result.push({ ...capability, path: nextPath }) + if(capability.capabilities) { + traverse(capability.capabilities, nextPath) + } + } + } + traverse(flowrCapabilities.capabilities, []) + return result +} + diff --git a/src/r-bridge/data/index.ts b/src/r-bridge/data/index.ts new file mode 100644 index 0000000000..cd7457bd55 --- /dev/null +++ b/src/r-bridge/data/index.ts @@ -0,0 +1,4 @@ +export * from './types' +export * from './get' +export * from './data' +export * from './print' diff --git a/src/r-bridge/data/print.ts b/src/r-bridge/data/print.ts new file mode 100644 index 0000000000..7c24d91925 --- /dev/null +++ b/src/r-bridge/data/print.ts @@ -0,0 +1,65 @@ +import type { FlowrCapability } from './types' +import { flowrCapabilities } from './data' + +const supportedSymbolMap: Map = new Map([ + ['not', ':red_circle:'], + ['partially', ':large_orange_diamond:'], + ['fully', ':green_square:'] +]) + +function printSingleCapability(depth: number, index: number, capability: FlowrCapability) { + const indent = ' '.repeat(depth) + const indexStr = index.toString().padStart(2, ' ') + const nextLineIndent = ' '.repeat(depth + indexStr.length) + const mainLine = `${indent}${indexStr}. **${capability.name}** (\`${capability.id}\`)` + let nextLine = '' + + if(capability.supported) { + nextLine += `${supportedSymbolMap.get(capability.supported)} ` + } + if(capability.description) { + nextLine += capability.description + } + if(capability.note) { + nextLine += `\\\n${nextLineIndent}_${capability.note}_` + } + return nextLine ? `${mainLine}\\\n${nextLineIndent}${nextLine}` : mainLine +} + +function printAsMarkdown(capabilities: readonly FlowrCapability[], depth = 0, lines: string[] = []): string { + for(let i = 0; i < capabilities.length; i++) { + const capability = capabilities[i] + const result = printSingleCapability(depth, i + 1, capability) + lines.push(result) + if(capability.capabilities) { + printAsMarkdown(capability.capabilities, depth + 1, lines) + } + } + return lines.join('\n') +} + +function getPreamble(): string { + const currentDateAndTime = new Date().toISOString().replace('T', ', ').replace(/\.\d+Z$/, ' UTC') + return ` +# Flowr Capabilities + +_This document was generated automatically from '${module.filename}' on ${currentDateAndTime}_ + +The code-font behind each capability name is a link to the capability's id. This id can be used to reference the capability in a labeled test within flowR. +Besides we use colored bullets like this: + +| | | +| ---------------------- | ----------------------------------------------------- | +| :green_square: | _flowR_ is capable of handling this feature fully | +| :large_orange_diamond: | _flowR_ is capable of handling this feature partially | +| :red_circle: | _flowR_ is not capable of handling this feature | + +:cloud: This could be a feature diagram... :cloud: + +` +} + +/** if we run this script, we want a markdown representation of the capabilities */ +if(require.main === module) { + console.log(getPreamble() + printAsMarkdown(flowrCapabilities.capabilities)) +} diff --git a/src/r-bridge/data/types.ts b/src/r-bridge/data/types.ts new file mode 100644 index 0000000000..922b4f4d1f --- /dev/null +++ b/src/r-bridge/data/types.ts @@ -0,0 +1,34 @@ +const enum RequiredFeature { + /** https://github.com/Code-Inspect/flowr/labels/typing */ + Typing, + /** https://github.com/Code-Inspect/flowr/labels/abstract%20interpretation */ + AbstractInterpretation, +} + +export interface FlowrCapability { + /** The human-readable name of the capability */ + readonly name: string + /** + * The unique identifier of the capability, used to refer to it independent of the location. + * We could use a key-value mapping. However, this way, an id is self-contained and can be moved around as one object. + */ + readonly id: string + /** A list of features that are required for the capability, extend at need. */ + readonly needs?: RequiredFeature[] + readonly description?: string + readonly note?: string + /** The level of support for the capability, undefined if it is a meta-capability that does not need such an attribute */ + readonly supported?: 'not' | 'partially' | 'fully' + readonly capabilities?: readonly FlowrCapability[] +} + +export interface FlowrCapabilities { + /** The human-readable name of the capabilities */ + readonly name: string + /** A description of the capabilities */ + readonly description: string + /** The version of the capabilities */ + readonly version: string + /** A list of the capabilities */ + readonly capabilities: readonly FlowrCapability[] +} diff --git a/src/r-bridge/init.ts b/src/r-bridge/init.ts new file mode 100644 index 0000000000..a6a678b580 --- /dev/null +++ b/src/r-bridge/init.ts @@ -0,0 +1,19 @@ +import { ts2r } from './lang-4.x' + +export const ErrorMarker = 'err' + +/** Command(s) to be issued at the start of each shell */ +export function initCommand(eol: string): string { + /* define the get function complete wrapped in a try so that we can handle failures gracefully on stdout + * furthermore, we compile for performance reasons + */ + return 'flowr_get_ast<-compiler::cmpfun(function(...){tryCatch({' + /* the actual code to parse the R code, ... allows us to keep the old 'file=path' and 'text=content' semantics. we define flowr_output using the super assignment to persist it in the env! */ + + 'flowr_output<<-getParseData(parse(...,keep.source=TRUE),includeText=TRUE);' + /* json conversion of the output, dataframe="values" allows us to receive a list of lists (which is more compact)! + * so we do not depend on jsonlite and friends, we do so manually (:sparkles:) + */ + + 'cat(paste0(sprintf("[%s,%s,%s,%s,%s,%s,%s,%s,%s]",flowr_output$line1,flowr_output$col1,flowr_output$line2,flowr_output$col2,flowr_output$id,flowr_output$parent,encodeString(flowr_output$token,quote="\\""),ifelse(flowr_output$terminal,"true","false"),encodeString(flowr_output$text,quote="\\"")),collapse=","))' + /* error handling (just produce the marker) */ + + `},error=function(e){cat("${ErrorMarker}")});cat(${ts2r(eol)})},options=compiler::setCompilerOptions(optimize=3));` +} diff --git a/src/r-bridge/lang-4.x/ast/index.ts b/src/r-bridge/lang-4.x/ast/index.ts index 6f15feb89d..071fb1dcc1 100644 --- a/src/r-bridge/lang-4.x/ast/index.ts +++ b/src/r-bridge/lang-4.x/ast/index.ts @@ -1,3 +1,2 @@ export * from './model' export * from './parser/xml' -export { parseLog } from './parser/json/parser' diff --git a/src/r-bridge/lang-4.x/ast/model/model.ts b/src/r-bridge/lang-4.x/ast/model/model.ts index 5ce228c22e..743d4434b1 100644 --- a/src/r-bridge/lang-4.x/ast/model/model.ts +++ b/src/r-bridge/lang-4.x/ast/model/model.ts @@ -1,7 +1,7 @@ import type { SourceRange } from '../../../../util/range' import type { RType } from './type' import type { MergeableRecord } from '../../../../util/objects' -import type { RNa, RNull } from '../../values' +import type { RNa, RNull } from '../../convert-values' import type { RExpressionList, RNumber, @@ -58,7 +58,7 @@ interface Source { } /** - * Provides the common base of all {@link RNode | RNodes}. + * Provides the common base of all {@link RNode|RNodes}. * * @typeParam Info - can be used to store additional information about the node * @typeParam LexemeType - the type of the lexeme, probably always a `string` or `string | undefined` @@ -72,7 +72,7 @@ export interface Base extends MergeableRecord { } export interface WithChildren> { - children: Children[] + children: readonly Children[] } /** diff --git a/src/r-bridge/lang-4.x/ast/model/nodes/r-access.ts b/src/r-bridge/lang-4.x/ast/model/nodes/r-access.ts index 202d766d85..bd34b750c7 100644 --- a/src/r-bridge/lang-4.x/ast/model/nodes/r-access.ts +++ b/src/r-bridge/lang-4.x/ast/model/nodes/r-access.ts @@ -1,6 +1,7 @@ import type { Base, Location, NoInfo, RNode } from '../model' import type { RType } from '../type' -import type { RArgument } from './r-argument' +import type { RArgument, RUnnamedArgument } from './r-argument' +import type { EmptyArgument } from './r-function-call' /** * Represents an R Indexing operation with `$`, `@`, `[[`, or `[`. @@ -14,14 +15,14 @@ interface RAccessBase extends Base, Location { export interface RNamedAccess extends RAccessBase { operator: '$' | '@'; - access: string; + access: [RUnnamedArgument]; } /** access can be a number, a variable or an expression that resolves to one, a filter etc. */ export interface RIndexAccess extends RAccessBase { operator: '[' | '[['; /** is null if the access is empty, e.g. `a[,3]` */ - access: (RArgument | null)[] + access: readonly (RArgument | typeof EmptyArgument)[] } export type RAccess = RNamedAccess | RIndexAccess diff --git a/src/r-bridge/lang-4.x/ast/model/nodes/r-argument.ts b/src/r-bridge/lang-4.x/ast/model/nodes/r-argument.ts index c3a2d08517..1d1791aed8 100644 --- a/src/r-bridge/lang-4.x/ast/model/nodes/r-argument.ts +++ b/src/r-bridge/lang-4.x/ast/model/nodes/r-argument.ts @@ -11,3 +11,8 @@ export interface RArgument extends Base, Location { name: RSymbol | undefined; value: RNode | undefined; } + +export interface RUnnamedArgument extends RArgument { + name: undefined; + value: RNode; +} diff --git a/src/r-bridge/lang-4.x/ast/model/nodes/r-binary-op.ts b/src/r-bridge/lang-4.x/ast/model/nodes/r-binary-op.ts index ba3ce4d80a..195f281c80 100644 --- a/src/r-bridge/lang-4.x/ast/model/nodes/r-binary-op.ts +++ b/src/r-bridge/lang-4.x/ast/model/nodes/r-binary-op.ts @@ -1,31 +1,10 @@ import type { Base, Location, NoInfo, RNode } from '../model' import type { RType } from '../type' -import type { BinaryOperatorFlavor } from '../operators' export interface RBinaryOp extends Base, Location { - readonly type: RType.BinaryOp; - readonly flavor: BinaryOperatorFlavor; - operator: string; - lhs: RNode; - rhs: RNode; + readonly type: RType.BinaryOp; + operator: string; + lhs: RNode; + rhs: RNode; } -export interface RLogicalBinaryOp extends RBinaryOp { - flavor: 'logical' -} - -export interface RArithmeticBinaryOp extends RBinaryOp { - flavor: 'arithmetic' -} - -export interface RComparisonBinaryOp extends RBinaryOp { - flavor: 'comparison' -} - -export interface RAssignmentOp extends RBinaryOp { - flavor: 'assignment' -} - -export interface RModelFormulaBinaryOp extends RBinaryOp { - flavor: 'model formula' -} diff --git a/src/r-bridge/lang-4.x/ast/model/nodes/r-expression-list.ts b/src/r-bridge/lang-4.x/ast/model/nodes/r-expression-list.ts index d3d55bc33a..9afb7d7f6a 100644 --- a/src/r-bridge/lang-4.x/ast/model/nodes/r-expression-list.ts +++ b/src/r-bridge/lang-4.x/ast/model/nodes/r-expression-list.ts @@ -1,8 +1,10 @@ import type { Base, Location, NoInfo, RNode, WithChildren } from '../model' import type { RType } from '../type' +import type { RSymbol } from './r-symbol' export interface RExpressionList extends WithChildren>, Base, Partial { readonly type: RType.ExpressionList; - readonly content?: string; + /** encodes wrappers like `{}` or `()` */ + readonly grouping: undefined | [start: RSymbol, end: RSymbol] } diff --git a/src/r-bridge/lang-4.x/ast/model/nodes/r-function-call.ts b/src/r-bridge/lang-4.x/ast/model/nodes/r-function-call.ts index df0918b051..0ab9934d93 100644 --- a/src/r-bridge/lang-4.x/ast/model/nodes/r-function-call.ts +++ b/src/r-bridge/lang-4.x/ast/model/nodes/r-function-call.ts @@ -3,17 +3,21 @@ import type { RType } from '../type' import type { RSymbol } from './r-symbol' import type { RArgument } from './r-argument' +export const EmptyArgument = '<>' + +export type RFunctionArgument = RArgument | typeof EmptyArgument + /** * Calls of functions like `a()` and `foo(42, "hello")`. * * @see RUnnamedFunctionCall */ export interface RNamedFunctionCall extends Base, Location { - readonly type: RType.FunctionCall; - readonly flavor: 'named'; - functionName: RSymbol; - /** arguments can be undefined, for example when calling as `a(1, ,3)` */ - arguments: (RArgument | undefined)[]; + readonly type: RType.FunctionCall; + readonly flavor: 'named'; + functionName: RSymbol; + /** arguments can be empty, for example when calling as `a(1, ,3)` */ + readonly arguments: readonly RFunctionArgument[]; } @@ -23,13 +27,13 @@ export interface RNamedFunctionCall extends Base, Location * @see RNamedFunctionCall */ export interface RUnnamedFunctionCall extends Base, Location { - readonly type: RType.FunctionCall; - readonly flavor: 'unnamed'; - calledFunction: RNode; /* can be either a function definition or another call that returns a function etc. */ - /** marks function calls like `3 %xx% 4` which have been written in special infix notation */ - infixSpecial?: boolean; + readonly type: RType.FunctionCall; + readonly flavor: 'unnamed'; + calledFunction: RNode; /* can be either a function definition or another call that returns a function etc. */ + /** marks function calls like `3 %xx% 4` which have been written in special infix notation; deprecated in v2 */ + infixSpecial?: boolean; /** arguments can be undefined, for example when calling as `a(1, ,3)` */ - arguments: (RArgument | undefined)[]; + readonly arguments: readonly RFunctionArgument[]; } export type RFunctionCall = RNamedFunctionCall | RUnnamedFunctionCall; diff --git a/src/r-bridge/lang-4.x/ast/model/nodes/r-function-definition.ts b/src/r-bridge/lang-4.x/ast/model/nodes/r-function-definition.ts index e98de76d45..a030a060ee 100644 --- a/src/r-bridge/lang-4.x/ast/model/nodes/r-function-definition.ts +++ b/src/r-bridge/lang-4.x/ast/model/nodes/r-function-definition.ts @@ -1,11 +1,10 @@ -import type { Base, Location, NoInfo } from '../model' +import type { Base, Location, NoInfo, RNode } from '../model' import type { RType } from '../type' import type { RParameter } from './r-parameter' -import type { RExpressionList } from './r-expression-list' export interface RFunctionDefinition extends Base, Location { readonly type: RType.FunctionDefinition; /** the R formals, to our knowledge they must be unique */ parameters: RParameter[]; - body: RExpressionList; + body: RNode; } diff --git a/src/r-bridge/lang-4.x/ast/model/nodes/r-number.ts b/src/r-bridge/lang-4.x/ast/model/nodes/r-number.ts index f1a0d750c5..aa7d9580c2 100644 --- a/src/r-bridge/lang-4.x/ast/model/nodes/r-number.ts +++ b/src/r-bridge/lang-4.x/ast/model/nodes/r-number.ts @@ -1,6 +1,6 @@ import type { Leaf, Location, NoInfo } from '../model' import type { RType } from '../type' -import type { RNumberValue } from '../../../values' +import type { RNumberValue } from '../../../convert-values' /** includes numeric, integer, and complex */ export interface RNumber extends Leaf, Location { diff --git a/src/r-bridge/lang-4.x/ast/model/nodes/r-string.ts b/src/r-bridge/lang-4.x/ast/model/nodes/r-string.ts index ea97545767..6bdb840b74 100644 --- a/src/r-bridge/lang-4.x/ast/model/nodes/r-string.ts +++ b/src/r-bridge/lang-4.x/ast/model/nodes/r-string.ts @@ -1,6 +1,6 @@ import type { Leaf, Location, NoInfo } from '../model' import type { RType } from '../type' -import type { RStringValue } from '../../../values' +import type { RStringValue } from '../../../convert-values' export interface RString extends Leaf, Location { readonly type: RType.String; diff --git a/src/r-bridge/lang-4.x/ast/model/nodes/r-symbol.ts b/src/r-bridge/lang-4.x/ast/model/nodes/r-symbol.ts index 903cf2d3a9..3acd6db88c 100644 --- a/src/r-bridge/lang-4.x/ast/model/nodes/r-symbol.ts +++ b/src/r-bridge/lang-4.x/ast/model/nodes/r-symbol.ts @@ -1,6 +1,6 @@ import type { Leaf, Location, Namespace, NoInfo } from '../model' import type { RType } from '../type' -import { RNa, RNull } from '../../../values' +import { RNa, RNull } from '../../../convert-values' export function isSpecialSymbol(symbol: RSymbol): boolean { return symbol.content === RNull || symbol.content === RNa diff --git a/src/r-bridge/lang-4.x/ast/model/nodes/r-unary-op.ts b/src/r-bridge/lang-4.x/ast/model/nodes/r-unary-op.ts index f1fe29bf91..07214f7eb2 100644 --- a/src/r-bridge/lang-4.x/ast/model/nodes/r-unary-op.ts +++ b/src/r-bridge/lang-4.x/ast/model/nodes/r-unary-op.ts @@ -1,23 +1,9 @@ import type { Base, Location, NoInfo, RNode } from '../model' import type { RType } from '../type' -import type { UnaryOperatorFlavor } from '../operators' export interface RUnaryOp extends Base, Location { - readonly type: RType.UnaryOp; - readonly flavor: UnaryOperatorFlavor; - operator: string; - operand: RNode; -} - -export interface RLogicalUnaryOp extends RUnaryOp { - flavor: 'logical' -} - -export interface RArithmeticUnaryOp extends RUnaryOp { - flavor: 'arithmetic' -} - -export interface RModelFormulaUnaryOp extends RUnaryOp { - flavor: 'model formula' + readonly type: RType.UnaryOp; + operator: string; + operand: RNode; } diff --git a/src/r-bridge/lang-4.x/ast/model/operators.ts b/src/r-bridge/lang-4.x/ast/model/operators.ts index 3d5d29a167..0006f477e9 100644 --- a/src/r-bridge/lang-4.x/ast/model/operators.ts +++ b/src/r-bridge/lang-4.x/ast/model/operators.ts @@ -1,5 +1,6 @@ import type { MergeableRecord } from '../../../../util/objects' import { RawRType } from './type' +import type { SupportedFlowrCapabilityId } from '../../../data' /** * Just a type-alias so that type declarations become more readable. @@ -17,70 +18,63 @@ export const enum OperatorArity { Both = 3, } -export type UnaryOperatorFlavor = 'arithmetic' | 'logical' | 'model formula'; -export type BinaryOperatorFlavor = - | UnaryOperatorFlavor - | 'comparison' - | 'assignment'; -export type BinaryOperatorFlavorInAst = BinaryOperatorFlavor | 'special'; export type OperatorWrittenAs = 'infix' | 'prefix'; export type OperatorUsedAs = 'assignment' | 'operation' | 'access'; -export type OperatorName = string; export interface OperatorInformationValue extends MergeableRecord { - name: OperatorName; + name: string; stringUsedInRAst: RawRType | `%${string}%`; stringUsedInternally: string; // precedence: number // handled by R - flavorInRAst: BinaryOperatorFlavorInAst; - flavor: BinaryOperatorFlavor; writtenAs: OperatorWrittenAs; arity: OperatorArity; usedAs: OperatorUsedAs; + /** The capabilities this operator maps to using the new desugaring */ + capabilities: readonly SupportedFlowrCapabilityId[]; } /* eslint-disable */ export const OperatorDatabase: Record & MergeableRecord = { /* model formulae */ - '~': { name: 'model formulae', stringUsedInRAst: RawRType.Tilde, stringUsedInternally: '~', flavorInRAst: 'model formula', flavor: 'model formula', writtenAs: 'infix', arity: OperatorArity.Both, usedAs: 'operation' }, + '~': { name: 'model formulae', stringUsedInRAst: RawRType.Tilde, stringUsedInternally: '~', writtenAs: 'infix', arity: OperatorArity.Both, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'model-formula', 'function-calls'] }, /* arithmetic */ - '+': { name: 'addition or unary +', stringUsedInRAst: RawRType.Plus, stringUsedInternally: '+', flavorInRAst: 'arithmetic', flavor: 'arithmetic', writtenAs: 'infix', arity: OperatorArity.Both, usedAs: 'operation' }, - '-': { name: 'subtraction or unary -', stringUsedInRAst: RawRType.Minus, stringUsedInternally: '-', flavorInRAst: 'arithmetic', flavor: 'arithmetic', writtenAs: 'infix', arity: OperatorArity.Both, usedAs: 'operation' }, - '*': { name: 'multiplication', stringUsedInRAst: RawRType.Times, stringUsedInternally: '*', flavorInRAst: 'arithmetic', flavor: 'arithmetic', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, - '/': { name: 'division', stringUsedInRAst: RawRType.Div, stringUsedInternally: '/', flavorInRAst: 'arithmetic', flavor: 'arithmetic', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, - '^': { name: 'exponentiation', stringUsedInRAst: RawRType.Exp, stringUsedInternally: '^', flavorInRAst: 'arithmetic', flavor: 'arithmetic', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, + '+': { name: 'addition or unary +', stringUsedInRAst: RawRType.Plus, stringUsedInternally: '+', writtenAs: 'infix', arity: OperatorArity.Both, usedAs: 'operation', capabilities: ['function-calls'] }, + '-': { name: 'subtraction or unary -', stringUsedInRAst: RawRType.Minus, stringUsedInternally: '-', writtenAs: 'infix', arity: OperatorArity.Both, usedAs: 'operation', capabilities: ['function-calls'] }, + '*': { name: 'multiplication', stringUsedInRAst: RawRType.Times, stringUsedInternally: '*', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'function-calls'] }, + '/': { name: 'division', stringUsedInRAst: RawRType.Div, stringUsedInternally: '/', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'function-calls'] }, + '^': { name: 'exponentiation', stringUsedInRAst: RawRType.Exp, stringUsedInternally: '^', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'function-calls'] }, /* no error, R uses ^ to represent ** in the AST */ - '**': { name: 'alternative exponentiation', stringUsedInRAst: RawRType.Exp, stringUsedInternally: '**', flavorInRAst: 'arithmetic', flavor: 'arithmetic', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, - '%%': { name: 'modulus', stringUsedInRAst: '%%', stringUsedInternally: '%%', flavorInRAst: 'special', flavor: 'arithmetic', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, - '%/%': { name: 'integer division', stringUsedInRAst: '%/%', stringUsedInternally: '%/%', flavorInRAst: 'special', flavor: 'arithmetic', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, - '%*%': { name: 'matrix product', stringUsedInRAst: '%*%', stringUsedInternally: '%*%', flavorInRAst: 'special', flavor: 'arithmetic', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, - '%o%': { name: 'outer product', stringUsedInRAst: '%o%', stringUsedInternally: '%o%', flavorInRAst: 'special', flavor: 'arithmetic', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, - '%x%': { name: 'kronecker product', stringUsedInRAst: '%x%', stringUsedInternally: '%x%', flavorInRAst: 'special', flavor: 'arithmetic', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, + '**': { name: 'alternative exponentiation', stringUsedInRAst: RawRType.Exp, stringUsedInternally: '**', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'function-calls'] }, + '%%': { name: 'modulus', stringUsedInRAst: '%%', stringUsedInternally: '%%', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'special-operator', 'function-calls'] }, + '%/%': { name: 'integer division', stringUsedInRAst: '%/%', stringUsedInternally: '%/%', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'special-operator', 'function-calls'] }, + '%*%': { name: 'matrix product', stringUsedInRAst: '%*%', stringUsedInternally: '%*%', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'special-operator', 'function-calls'] }, + '%o%': { name: 'outer product', stringUsedInRAst: '%o%', stringUsedInternally: '%o%', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'special-operator', 'function-calls'] }, + '%x%': { name: 'kronecker product', stringUsedInRAst: '%x%', stringUsedInternally: '%x%', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'special-operator', 'function-calls'] }, /* comparison */ - '==': { name: 'equal to', stringUsedInRAst: RawRType.Eq, stringUsedInternally: '==', flavorInRAst: 'comparison', flavor: 'comparison', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, - '!=': { name: 'not equal to', stringUsedInRAst: RawRType.Ne, stringUsedInternally: '!=', flavorInRAst: 'comparison', flavor: 'comparison', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, - '>': { name: 'greater than', stringUsedInRAst: RawRType.Gt, stringUsedInternally: '>', flavorInRAst: 'comparison', flavor: 'comparison', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, - '>=': { name: 'greater than or equal to', stringUsedInRAst: RawRType.Ge, stringUsedInternally: '>=', flavorInRAst: 'comparison', flavor: 'comparison', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, - '<': { name: 'less than', stringUsedInRAst: RawRType.Lt, stringUsedInternally: '<', flavorInRAst: 'comparison', flavor: 'comparison', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, - '<=': { name: 'less than or equal to', stringUsedInRAst: RawRType.Le, stringUsedInternally: '<=', flavorInRAst: 'comparison', flavor: 'comparison', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, + '==': { name: 'equal to', stringUsedInRAst: RawRType.Eq, stringUsedInternally: '==', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'function-calls'] }, + '!=': { name: 'not equal to', stringUsedInRAst: RawRType.Ne, stringUsedInternally: '!=', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'function-calls'] }, + '>': { name: 'greater than', stringUsedInRAst: RawRType.Gt, stringUsedInternally: '>', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'function-calls'] }, + '>=': { name: 'greater than or equal to', stringUsedInRAst: RawRType.Ge, stringUsedInternally: '>=', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'function-calls'] }, + '<': { name: 'less than', stringUsedInRAst: RawRType.Lt, stringUsedInternally: '<', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'function-calls'] }, + '<=': { name: 'less than or equal to', stringUsedInRAst: RawRType.Le, stringUsedInternally: '<=', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'function-calls'] }, /* logical */ - '&': { name: 'logical and (vectorized)', stringUsedInRAst: RawRType.And, stringUsedInternally: '&', flavorInRAst: 'logical', flavor: 'logical', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, - '&&': { name: 'logical and (non-vectorized)', stringUsedInRAst: RawRType.And2, stringUsedInternally: '&&', flavorInRAst: 'logical', flavor: 'logical', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, - '|': { name: 'logical or (vectorized)', stringUsedInRAst: RawRType.Or, stringUsedInternally: '|', flavorInRAst: 'logical', flavor: 'logical', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, - '||': { name: 'logical or (not-vectorized)', stringUsedInRAst: RawRType.Or2, stringUsedInternally: '||', flavorInRAst: 'logical', flavor: 'logical', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, - '!': { name: 'unary not', stringUsedInRAst: RawRType.Exclamation, stringUsedInternally: '!', flavorInRAst: 'logical', flavor: 'logical', writtenAs: 'prefix', arity: OperatorArity.Unary, usedAs: 'operation' }, - '%in%': { name: 'matching operator', stringUsedInRAst: '%in%', stringUsedInternally: '%in%', flavorInRAst: 'special', flavor: 'logical', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, + '&': { name: 'logical and (vectorized)', stringUsedInRAst: RawRType.And, stringUsedInternally: '&', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'function-calls', 'non-strict-logical-operators'] }, + '&&': { name: 'logical and (non-vectorized)', stringUsedInRAst: RawRType.And2, stringUsedInternally: '&&', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'function-calls', 'non-strict-logical-operators'] }, + '|': { name: 'logical or (vectorized)', stringUsedInRAst: RawRType.Or, stringUsedInternally: '|', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'function-calls', 'non-strict-logical-operators'] }, + '||': { name: 'logical or (not-vectorized)', stringUsedInRAst: RawRType.Or2, stringUsedInternally: '||', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'function-calls', 'non-strict-logical-operators'] }, + '!': { name: 'unary not', stringUsedInRAst: RawRType.Exclamation, stringUsedInternally: '!', writtenAs: 'prefix', arity: OperatorArity.Unary, usedAs: 'operation', capabilities: ['unary-operator', 'function-calls'] }, + '%in%': { name: 'matching operator', stringUsedInRAst: '%in%', stringUsedInternally: '%in%', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'special-operator', 'function-calls'] }, /* assignment */ - '<-': { name: 'left assignment', stringUsedInRAst: RawRType.LeftAssign, stringUsedInternally: '<-', flavorInRAst: 'special', flavor: 'assignment', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'assignment' }, - ':=': { name: 'left assignment', stringUsedInRAst: RawRType.LeftAssign, stringUsedInternally: ':=', flavorInRAst: 'special', flavor: 'assignment', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'assignment' }, - '<<-': { name: 'left global assignment', stringUsedInRAst: RawRType.LeftAssign, stringUsedInternally: '<<-', flavorInRAst: 'special', flavor: 'assignment', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'assignment' }, - '->': { name: 'right assignment', stringUsedInRAst: RawRType.RightAssign, stringUsedInternally: '->', flavorInRAst: 'special', flavor: 'assignment', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'assignment' }, - '->>': { name: 'right global assignment', stringUsedInRAst: RawRType.RightAssign, stringUsedInternally: '->>', flavorInRAst: 'special', flavor: 'assignment', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'assignment' }, - '=': { name: 'equal assignment', stringUsedInRAst: RawRType.EqualAssign, stringUsedInternally: '=', flavorInRAst: 'special', flavor: 'assignment', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'assignment' }, + '<-': { name: 'left assignment', stringUsedInRAst: RawRType.LeftAssign, stringUsedInternally: '<-', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'assignment', capabilities: ['binary-operator', 'infix-calls', 'assignment-functions', 'local-left-assignment', 'function-calls'] }, + ':=': { name: 'left assignment', stringUsedInRAst: RawRType.LeftAssign, stringUsedInternally: ':=', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'assignment', capabilities: ['binary-operator', 'infix-calls', 'assignment-functions', 'local-equal-assignment', 'function-calls'] }, + '<<-': { name: 'left global assignment', stringUsedInRAst: RawRType.LeftAssign, stringUsedInternally: '<<-', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'assignment', capabilities: ['binary-operator', 'infix-calls', 'assignment-functions', 'super-left-assignment', 'function-calls'] }, + '->': { name: 'right assignment', stringUsedInRAst: RawRType.RightAssign, stringUsedInternally: '->', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'assignment', capabilities: ['binary-operator', 'infix-calls', 'assignment-functions', 'local-right-assignment', 'function-calls'] }, + '->>': { name: 'right global assignment', stringUsedInRAst: RawRType.RightAssign, stringUsedInternally: '->>', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'assignment', capabilities: ['binary-operator', 'infix-calls', 'assignment-functions', 'super-right-assignment', 'function-calls'] }, + '=': { name: 'equal assignment', stringUsedInRAst: RawRType.EqualAssign, stringUsedInternally: '=', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'assignment', capabilities: ['binary-operator', 'infix-calls', 'assignment-functions', 'local-equal-assignment', 'function-calls'] }, /* others */ /* maybe introduce custom in-r-ast flavor for these? we consider it arithmetic, as it works on numbers => if we change this we have to create custom tests! (with arithmetic, there is the automatic test set) */ - ':': { name: 'sequence operator', stringUsedInRAst: RawRType.Colon, stringUsedInternally: ':', flavorInRAst: 'special', flavor: 'arithmetic', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation' }, - '?': { name: 'question', stringUsedInRAst: RawRType.Question, stringUsedInternally: '?', flavorInRAst: 'special', flavor: 'logical', writtenAs: 'prefix', arity: OperatorArity.Unary, usedAs: 'operation' } + ':': { name: 'sequence operator', stringUsedInRAst: RawRType.Colon, stringUsedInternally: ':', writtenAs: 'infix', arity: OperatorArity.Binary, usedAs: 'operation', capabilities: ['binary-operator', 'infix-calls', 'function-calls', 'built-in-sequencing'] }, + '?': { name: 'question', stringUsedInRAst: RawRType.Question, stringUsedInternally: '?', writtenAs: 'prefix', arity: OperatorArity.Unary, usedAs: 'operation', capabilities: ['unary-operator', 'built-in-help'] } } /* eslint-enable */ @@ -88,34 +82,8 @@ function buildOperatorRAstCollection(operators: readonly string[]): Set return new Set(operators.map(op => OperatorDatabase[op].stringUsedInRAst)) } -export const ArithmeticOperators: readonly string[] = Object.keys( - OperatorDatabase -).filter((op) => OperatorDatabase[op].flavor === 'arithmetic') -// '**' will be treated as '^' -export const ArithmeticOperatorsRAst = buildOperatorRAstCollection(ArithmeticOperators) -export const ComparisonOperators: readonly string[] = Object.keys( - OperatorDatabase -).filter((op) => OperatorDatabase[op].flavor === 'comparison') -export const ComparisonOperatorsRAst = buildOperatorRAstCollection(ComparisonOperators) -export const LogicalOperators: readonly string[] = Object.keys( - OperatorDatabase -).filter((op) => OperatorDatabase[op].flavor === 'logical') -export const LogicalOperatorsRAst = buildOperatorRAstCollection(LogicalOperators) - -export const ModelFormulaOperators: readonly string[] = Object.keys( - OperatorDatabase -).filter((op) => OperatorDatabase[op].flavor === 'model formula') -export const ModelFormulaOperatorsRAst = buildOperatorRAstCollection(ModelFormulaOperators) - -export const Assignments: readonly string[] = Object.keys( - OperatorDatabase -).filter((op) => OperatorDatabase[op].flavor === 'assignment') -export const AssignmentsRAst = buildOperatorRAstCollection(Assignments) +export const Operators: readonly string[] = Object.keys(OperatorDatabase) -export const Operators = [ - ...ArithmeticOperators, - ...ComparisonOperators, - ...LogicalOperators, -] as const - -export type Operator = (typeof Operators)[number]; +// '**' will be treated as '^' +export const OperatorsInRAst = buildOperatorRAstCollection(Operators) +export const UnaryOperatorsInRAst = buildOperatorRAstCollection(Operators.filter(op => OperatorDatabase[op].arity !== OperatorArity.Binary)) diff --git a/src/r-bridge/lang-4.x/ast/model/processing/decorate.ts b/src/r-bridge/lang-4.x/ast/model/processing/decorate.ts index 284ea7ee42..74cce15d42 100644 --- a/src/r-bridge/lang-4.x/ast/model/processing/decorate.ts +++ b/src/r-bridge/lang-4.x/ast/model/processing/decorate.ts @@ -13,22 +13,24 @@ import type { NoInfo, RNode } from '../model' import { guard } from '../../../../../util/assert' import type { SourceRange } from '../../../../../util/range' import { BiMap } from '../../../../../util/bimap' -import { foldAst } from './fold' import type { RArgument, RBinaryOp, + RExpressionList, RFunctionCall, RNamedFunctionCall, RParameter, RPipe, RUnnamedFunctionCall } from '../nodes' +import { EmptyArgument } from '../nodes' import type { MergeableRecord } from '../../../../../util/objects' import { RoleInParent } from './role' import { RType } from '../type' +import type { RDelimiter } from '../nodes/info' +import { foldAstStateful } from './stateful-fold' +import type { NodeId } from './node-id' -/** The type of the id assigned to each node. Branded to avoid problematic usages with other string types. */ -export type NodeId = string & { __brand?: 'node-id'}; /** * A function that given an RNode returns a (guaranteed) unique id for it @@ -43,7 +45,7 @@ export type IdGenerator = (data: RNode) => NodeId */ export function deterministicCountingIdGenerator(start = 0): () => NodeId { let id = start - return () => `${id++}` + return () => id++ } export function sourcedDeterministicCountingIdGenerator(path: string, location: SourceRange, start = 0): () => NodeId { @@ -51,8 +53,8 @@ export function sourcedDeterministicCountingIdGenerator(path: string, location: return () => `${path}-${loc2Id(location)}-${id++}` } -function loc2Id(loc: SourceRange) { - return `${loc.start.line}:${loc.start.column}-${loc.end.line}:${loc.end.column}` +function loc2Id([sl,sc,el,ec]: SourceRange): string { + return `${sl}:${sc}-${el}:${ec}` } /** @@ -60,7 +62,7 @@ function loc2Id(loc: SourceRange) { * * @param data - the node to generate an id for, must have location information */ -export function nodeToLocationId(data: RNode): NodeId { +export function nodeToLocationId(data: RNode | RDelimiter): NodeId { const loc = data.location guard(loc !== undefined, 'location must be defined to generate a location id') return loc2Id(loc) @@ -68,9 +70,9 @@ export function nodeToLocationId(data: RNode): NodeId { /** * Generates unique ids based on the locations of the node (see {@link nodeToLocationId}). - * If a node has no location information, it will be assigned a unique counter value. + * If a node has no location information, it will be assigned a unique counter-value. * - * @param start - the start value for the counter in case nodes do not have a location information + * @param start - the start value for the counter, in case nodes do not have location information */ export function deterministicLocationIdGenerator(start = 0): IdGenerator { let id = start @@ -80,15 +82,24 @@ export function deterministicLocationIdGenerator(start = 0): IdGenera export interface ParentContextInfo extends MergeableRecord { role: RoleInParent /** - * 0-based index of the child in the parent (code semantics, e.g., for an if-then-else, the condition will be 0, the then-case 1, ...) + * The depth of the node in the AST * - * The index is adaptive, that means that if the name of an argument exists, it will have the index 0, and the value the index 1. + * The root node has a depth of 0, its children a depth of 1, and so on. + */ + depth: number + /** + * 0-based index of the child in the parent (code semantics, e.g., for an if-then-else, the condition will be 0, the then-case will be 1, ...) + * + * The index is adaptive, that means that if the name of an argument exists, it will have index 0, and the value will have index 1. * But if the argument is unnamed, its value will get the index 0 instead. */ index: number } -const defaultParentContext = { role: RoleInParent.Root, index: 0 } +const defaultParentContext: Omit = { + role: RoleInParent.Root, + index: 0 +} export interface ParentInformation extends ParentContextInfo { /** uniquely identifies an AST-Node */ @@ -107,11 +118,11 @@ interface FoldInfo { idMap: DecoratedAstMap, getId: IdGene * Contains the normalized AST as a doubly linked tree * and a map from ids to nodes so that parent links can be chased easily. */ -export interface NormalizedAst { +export interface NormalizedAst> { /** Bidirectional mapping of ids to the corresponding nodes and the other way */ idMap: DecoratedAstMap /** The root of the AST with parent information */ - ast: RNodeWithParent + ast: Node } /** @@ -122,37 +133,29 @@ export interface NormalizedAst { * * @typeParam OtherInfo - The original decoration of the ast nodes (probably is nothing as the id decoration is most likely the first step to be performed after extraction) * - * @returns A {@link DecoratedAst | decorated AST} based on the input and the id provider. + * @returns A decorated AST based on the input and the id provider. */ export function decorateAst(ast: RNode, getId: IdGenerator = deterministicCountingIdGenerator(0)): NormalizedAst { const idMap: DecoratedAstMap = new BiMap>() const info: FoldInfo = { idMap, getId } - /* Please note, that all fold processors do not re-create copies in higher folding steps so that the idMap stays intact. */ + /* Please note, that all fold processors do not re-create copies in higher-folding steps so that the idMap stays intact. */ const foldLeaf = createFoldForLeaf(info) const foldBinaryOp = createFoldForBinaryOp(info) const unaryOp = createFoldForUnaryOp(info) - const decoratedAst: RNodeWithParent = foldAst(ast, { - foldNumber: foldLeaf, - foldString: foldLeaf, - foldLogical: foldLeaf, - foldSymbol: foldLeaf, - foldAccess: createFoldForAccess(info), - binaryOp: { - foldLogicalOp: foldBinaryOp, - foldArithmeticOp: foldBinaryOp, - foldComparisonOp: foldBinaryOp, - foldAssignment: foldBinaryOp, - foldPipe: foldBinaryOp, - foldModelFormula: foldBinaryOp - }, - unaryOp: { - foldArithmeticOp: unaryOp, - foldLogicalOp: unaryOp, - foldModelFormula: unaryOp - }, - other: { + /* we pass down the depth */ + const decoratedAst: RNodeWithParent = foldAstStateful(ast, -1,{ + down: (_, down: number): number => down + 1, + foldNumber: foldLeaf, + foldString: foldLeaf, + foldLogical: foldLeaf, + foldSymbol: foldLeaf, + foldAccess: createFoldForAccess(info), + foldBinaryOp: foldBinaryOp, + foldPipe: foldBinaryOp, + foldUnaryOp: unaryOp, + other: { foldComment: foldLeaf, foldLineDirective: foldLeaf }, @@ -183,18 +186,27 @@ export function decorateAst(ast: RNode, getId: Id } function createFoldForLeaf(info: FoldInfo) { - return (data: RNode): RNodeWithParent => { + return (data: RNode, depth: number): RNodeWithParent => { const id = info.getId(data) - const decorated = { ...data, info: { ...data.info, id, parent: undefined, ...defaultParentContext } } as RNodeWithParent + const decorated = { + ...data, + info: { + ...data.info, + id, + parent: undefined, + ...defaultParentContext, + depth + } + } as RNodeWithParent info.idMap.set(id, decorated) return decorated } } function createFoldForBinaryOp(info: FoldInfo) { - return (data: RBinaryOp | RPipe, lhs: RNodeWithParent, rhs: RNodeWithParent): RNodeWithParent => { + return (data: RBinaryOp | RPipe, lhs: RNodeWithParent, rhs: RNodeWithParent, depth: number): RNodeWithParent => { const id = info.getId(data) - const decorated = { ...data, info: { ...data.info, id, parent: undefined }, lhs, rhs } as RNodeWithParent + const decorated = { ...data, info: { ...data.info, id, parent: undefined, depth }, lhs, rhs } as RNodeWithParent info.idMap.set(id, decorated) const lhsInfo = lhs.info lhsInfo.parent = id @@ -213,9 +225,9 @@ function createFoldForBinaryOp(info: FoldInfo) { } function createFoldForUnaryOp(info: FoldInfo) { - return (data: RNode, operand: RNodeWithParent): RNodeWithParent => { + return (data: RNode, operand: RNodeWithParent, depth: number): RNodeWithParent => { const id = info.getId(data) - const decorated = { ...data, info: { ...data.info, id, parent: undefined }, operand } as RNodeWithParent + const decorated = { ...data, info: { ...data.info, id, parent: undefined, depth }, operand } as RNodeWithParent info.idMap.set(id, decorated) const opInfo = operand.info opInfo.parent = id @@ -225,9 +237,9 @@ function createFoldForUnaryOp(info: FoldInfo) { } function createFoldForAccess(info: FoldInfo) { - return (data: RNode, accessed: RNodeWithParent, access: string | (RNodeWithParent | null)[]): RNodeWithParent => { + return (data: RNode, accessed: RNodeWithParent, access: readonly (RNodeWithParent | typeof EmptyArgument)[], depth: number): RNodeWithParent => { const id = info.getId(data) - const decorated = { ...data, info: { ...data.info, id, parent: undefined }, accessed, access } as RNodeWithParent + const decorated = { ...data, info: { ...data.info, id, parent: undefined, depth }, accessed, access } as RNodeWithParent info.idMap.set(id, decorated) const accessedInfo = accessed.info accessedInfo.parent = id @@ -236,7 +248,7 @@ function createFoldForAccess(info: FoldInfo) { let idx = 0 // the first oe will be skipped in the first iter for(const acc of access) { idx++ - if(acc !== null) { + if(acc !== EmptyArgument) { const curInfo = acc.info curInfo.parent = id curInfo.index = idx @@ -249,9 +261,9 @@ function createFoldForAccess(info: FoldInfo) { } function createFoldForForLoop(info: FoldInfo) { - return (data: RNode, variable: RNodeWithParent, vector: RNodeWithParent, body: RNodeWithParent): RNodeWithParent => { + return (data: RNode, variable: RNodeWithParent, vector: RNodeWithParent, body: RNodeWithParent, depth: number): RNodeWithParent => { const id = info.getId(data) - const decorated = { ...data, info: { ...data.info, id, parent: undefined }, variable, vector, body } as RNodeWithParent + const decorated = { ...data, info: { ...data.info, id, parent: undefined, depth }, variable, vector, body } as RNodeWithParent info.idMap.set(id, decorated) const varInfo = variable.info varInfo.parent = id @@ -269,9 +281,9 @@ function createFoldForForLoop(info: FoldInfo) { } function createFoldForRepeatLoop(info: FoldInfo) { - return (data: RNode, body: RNodeWithParent): RNodeWithParent => { + return (data: RNode, body: RNodeWithParent, depth: number): RNodeWithParent => { const id = info.getId(data) - const decorated = { ...data, info: { ...data.info, id, parent: undefined }, body } as RNodeWithParent + const decorated = { ...data, info: { ...data.info, id, parent: undefined, depth }, body } as RNodeWithParent info.idMap.set(id, decorated) const bodyInfo = body.info bodyInfo.parent = id @@ -281,9 +293,9 @@ function createFoldForRepeatLoop(info: FoldInfo) { } function createFoldForWhileLoop(info: FoldInfo) { - return (data: RNode, condition: RNodeWithParent, body: RNodeWithParent): RNodeWithParent => { + return (data: RNode, condition: RNodeWithParent, body: RNodeWithParent, depth: number): RNodeWithParent => { const id = info.getId(data) - const decorated = { ...data, info: { ...data.info, id, parent: undefined }, condition, body } as RNodeWithParent + const decorated = { ...data, info: { ...data.info, id, parent: undefined, depth }, condition, body } as RNodeWithParent info.idMap.set(id, decorated) const condInfo = condition.info condInfo.parent = id @@ -297,9 +309,9 @@ function createFoldForWhileLoop(info: FoldInfo) { } function createFoldForIfThenElse(info: FoldInfo) { - return (data: RNode, condition: RNodeWithParent, then: RNodeWithParent, otherwise?: RNodeWithParent): RNodeWithParent => { + return (data: RNode, condition: RNodeWithParent, then: RNodeWithParent, otherwise: RNodeWithParent | undefined, depth: number): RNodeWithParent => { const id = info.getId(data) - const decorated = { ...data, info: { ...data.info, id, parent: undefined }, condition, then, otherwise } as RNodeWithParent + const decorated = { ...data, info: { ...data.info, id, parent: undefined, depth }, condition, then, otherwise } as RNodeWithParent info.idMap.set(id, decorated) const condInfo = condition.info condInfo.parent = id @@ -319,9 +331,9 @@ function createFoldForIfThenElse(info: FoldInfo) { } function createFoldForExprList(info: FoldInfo) { - return (data: RNode, children: RNodeWithParent[]): RNodeWithParent => { + return (data: RExpressionList, grouping: [RNodeWithParent, RNodeWithParent] | undefined, children: readonly RNodeWithParent[], depth: number): RNodeWithParent => { const id = info.getId(data) - const decorated = { ...data, info: { ...data.info, id, parent: undefined }, children } as RNodeWithParent + const decorated = { ...data, info: { ...data.info, id, parent: undefined, depth }, grouping, children } as RNodeWithParent info.idMap.set(id, decorated) let i = 0 for(const child of children) { @@ -335,13 +347,13 @@ function createFoldForExprList(info: FoldInfo) { } function createFoldForFunctionCall(info: FoldInfo) { - return (data: RFunctionCall, functionName: RNodeWithParent, args: (RNodeWithParent | undefined)[]): RNodeWithParent => { + return (data: RFunctionCall, functionName: RNodeWithParent, args: readonly (RNodeWithParent | typeof EmptyArgument)[], depth: number): RNodeWithParent => { const id = info.getId(data) let decorated: RFunctionCall if(data.flavor === 'named') { - decorated = { ...data, info: { ...data.info, id, parent: undefined }, functionName, arguments: args } as RNamedFunctionCall + decorated = { ...data, info: { ...data.info, id, parent: undefined, depth }, functionName, arguments: args } as RNamedFunctionCall } else { - decorated = { ...data, info: { ...data.info, id, parent: undefined }, calledFunction: functionName, arguments: args } as RUnnamedFunctionCall + decorated = { ...data, info: { ...data.info, id, parent: undefined, depth }, calledFunction: functionName, arguments: args } as RUnnamedFunctionCall } info.idMap.set(id, decorated) const funcInfo = functionName.info @@ -350,10 +362,11 @@ function createFoldForFunctionCall(info: FoldInfo) { let idx = 0 for(const arg of args) { idx++ - if(arg !== undefined) { + if(arg !== EmptyArgument) { const argInfo = arg.info argInfo.parent = id argInfo.index = idx + argInfo.role = RoleInParent.FunctionCallArgument } } return decorated @@ -361,9 +374,9 @@ function createFoldForFunctionCall(info: FoldInfo) { } function createFoldForFunctionDefinition(info: FoldInfo) { - return (data: RNode, params: RNodeWithParent[], body: RNodeWithParent): RNodeWithParent => { + return (data: RNode, params: RNodeWithParent[], body: RNodeWithParent, depth: number): RNodeWithParent => { const id = info.getId(data) - const decorated = { ...data, info: { ...data.info, id, parent: undefined }, parameters: params, body } as RNodeWithParent + const decorated = { ...data, info: { ...data.info, id, parent: undefined, depth }, parameters: params, body } as RNodeWithParent info.idMap.set(id, decorated) let idx = 0 for(const param of params) { @@ -381,9 +394,9 @@ function createFoldForFunctionDefinition(info: FoldInfo) { } function createFoldForFunctionParameter(info: FoldInfo) { - return (data: RParameter, name: RNodeWithParent, defaultValue: RNodeWithParent | undefined): RNodeWithParent => { + return (data: RParameter, name: RNodeWithParent, defaultValue: RNodeWithParent | undefined, depth: number): RNodeWithParent => { const id = info.getId(data) - const decorated = { ...data, info: { ...data.info, id, parent: undefined }, name, defaultValue } as RParameter + const decorated = { ...data, info: { ...data.info, id, parent: undefined, depth }, name, defaultValue } as RParameter info.idMap.set(id, decorated) const nameInfo = name.info nameInfo.parent = id @@ -399,9 +412,9 @@ function createFoldForFunctionParameter(info: FoldInfo) { } function createFoldForFunctionArgument(info: FoldInfo) { - return (data: RArgument, name: RNodeWithParent | undefined, value: RNodeWithParent | undefined): RNodeWithParent => { + return (data: RArgument, name: RNodeWithParent | undefined, value: RNodeWithParent | undefined, depth: number): RNodeWithParent => { const id = info.getId(data) - const decorated = { ...data, info: { ...data.info, id, parent: undefined }, name, value } as RNodeWithParent + const decorated = { ...data, info: { ...data.info, id, parent: undefined, depth }, name, value } as RNodeWithParent info.idMap.set(id, decorated) let idx = 0 if(name) { diff --git a/src/r-bridge/lang-4.x/ast/model/processing/index.ts b/src/r-bridge/lang-4.x/ast/model/processing/index.ts index 98f3473fa1..5d784554c5 100644 --- a/src/r-bridge/lang-4.x/ast/model/processing/index.ts +++ b/src/r-bridge/lang-4.x/ast/model/processing/index.ts @@ -3,3 +3,4 @@ export * from './decorate' export * from './stateful-fold' export * from './visitor' export * from './role' +export type { NodeId } from './node-id' diff --git a/src/r-bridge/lang-4.x/ast/model/processing/node-id.ts b/src/r-bridge/lang-4.x/ast/model/processing/node-id.ts new file mode 100644 index 0000000000..5fe00a3f01 --- /dev/null +++ b/src/r-bridge/lang-4.x/ast/model/processing/node-id.ts @@ -0,0 +1,12 @@ +/** The type of the id assigned to each node. Branded to avoid problematic usages with other string or numeric types. */ +export type NodeId = T & { __brand?: 'node-id' }; +const numIdRegex = /^\d+$/ + +/** used so that we do not have to store strings for the default numeric ids */ +export function normalizeIdToNumberIfPossible(id: NodeId): NodeId { + // check if string is number + if(typeof id === 'string' && numIdRegex.test(id)) { + return Number(id) + } + return id +} diff --git a/src/r-bridge/lang-4.x/ast/model/processing/stateful-fold.ts b/src/r-bridge/lang-4.x/ast/model/processing/stateful-fold.ts index a25d78b949..1c2ed63784 100644 --- a/src/r-bridge/lang-4.x/ast/model/processing/stateful-fold.ts +++ b/src/r-bridge/lang-4.x/ast/model/processing/stateful-fold.ts @@ -7,13 +7,7 @@ import type { RSymbol, RLogical, RString, - RArithmeticBinaryOp, - RAssignmentOp, RBinaryOp, - RComparisonBinaryOp, - RLogicalBinaryOp, - RArithmeticUnaryOp, - RLogicalUnaryOp, RUnaryOp, RIfThenElse, RForLoop, @@ -27,10 +21,9 @@ import type { RArgument, RFunctionDefinition, RAccess, - RModelFormulaBinaryOp, - RModelFormulaUnaryOp, - RLineDirective, - RPipe + RLineDirective, RPipe +} from '../nodes' +import { EmptyArgument } from '../nodes' import type { RNode } from '../model' @@ -47,25 +40,15 @@ export type DownFold = (node: RNode, down: Down) => Down * The `down` argument holds information obtained during the down-pass, issued by the `down` function. */ export interface StatefulFoldFunctions { - down: DownFold - foldNumber: (num: RNumber, down: Down) => Up; - foldString: (str: RString, down: Down) => Up; - foldLogical: (logical: RLogical, down: Down) => Up; - foldSymbol: (symbol: RSymbol, down: Down) => Up; - foldAccess: (node: RAccess, name: Up, access: string | (null | Up)[], down: Down) => Up; - binaryOp: { - foldLogicalOp: (op: RLogicalBinaryOp, lhs: Up, rhs: Up, down: Down) => Up; - foldArithmeticOp: (op: RArithmeticBinaryOp, lhs: Up, rhs: Up, down: Down) => Up; - foldComparisonOp: (op: RComparisonBinaryOp, lhs: Up, rhs: Up, down: Down) => Up; - foldAssignment: (op: RAssignmentOp, lhs: Up, rhs: Up, down: Down) => Up; - foldPipe: (op: RPipe, lhs: Up, rhs: Up, down: Down) => Up; - foldModelFormula: (op: RModelFormulaBinaryOp, lhs: Up, rhs: Up, down: Down) => Up; - }; - unaryOp: { - foldLogicalOp: (op: RLogicalUnaryOp, operand: Up, down: Down) => Up; - foldArithmeticOp: (op: RArithmeticUnaryOp, operand: Up, down: Down) => Up; - foldModelFormula: (op: RModelFormulaUnaryOp, operand: Up, down: Down) => Up; - }; + down: DownFold + foldNumber: (num: RNumber, down: Down) => Up; + foldString: (str: RString, down: Down) => Up; + foldLogical: (logical: RLogical, down: Down) => Up; + foldSymbol: (symbol: RSymbol, down: Down) => Up; + foldAccess: (node: RAccess, name: Up, access: readonly (typeof EmptyArgument | Up)[], down: Down) => Up; + foldBinaryOp: (op: RBinaryOp, lhs: Up, rhs: Up, down: Down) => Up; + foldPipe: (op: RPipe, lhs: Up, rhs: Up, down: Down) => Up; + foldUnaryOp: (op: RUnaryOp, operand: Up, down: Down) => Up; loop: { foldFor: (loop: RForLoop, variable: Up, vector: Up, body: Up, down: Down) => Up; foldWhile: (loop: RWhileLoop, condition: Up, body: Up, down: Down) => Up; @@ -79,11 +62,11 @@ export interface StatefulFoldFunctions { }; /** The `otherwise` argument is `undefined` if the `else` branch is missing */ foldIfThenElse: (ifThenExpr: RIfThenElse, cond: Up, then: Up, otherwise: Up | undefined, down: Down ) => Up; - foldExprList: (exprList: RExpressionList, expressions: Up[], down: Down) => Up; + foldExprList: (exprList: RExpressionList, grouping: [start: Up, end: Up] | undefined, expressions: Up[], down: Down) => Up; functions: { foldFunctionDefinition: (definition: RFunctionDefinition, params: Up[], body: Up, down: Down) => Up; /** folds named and unnamed function calls */ - foldFunctionCall: (call: RFunctionCall, functionNameOrExpression: Up, args: (Up | undefined)[], down: Down) => Up; + foldFunctionCall: (call: RFunctionCall, functionNameOrExpression: Up, args: (Up | typeof EmptyArgument)[], down: Down) => Up; /** The `name` is `undefined` if the argument is unnamed, the value, if we have something like `x=,...` */ foldArgument: (argument: RArgument, name: Up | undefined, value: Up | undefined, down: Down) => Up; /** The `defaultValue` is `undefined` if the argument was not initialized with a default value */ @@ -112,13 +95,13 @@ export function foldAstStateful(ast: RNode, down: Down, fo case RType.LineDirective: return folds.other.foldLineDirective(ast, down) case RType.Pipe: - return folds.binaryOp.foldPipe(ast, foldAstStateful(ast.lhs, down, folds), foldAstStateful(ast.rhs, down, folds), down) + return folds.foldPipe(ast, foldAstStateful(ast.lhs, down, folds), foldAstStateful(ast.rhs, down, folds), down) case RType.BinaryOp: - return foldBinaryOp(ast, down, folds) + return folds.foldBinaryOp(ast, foldAstStateful(ast.lhs, down, folds), foldAstStateful(ast.rhs, down, folds), down) case RType.UnaryOp: - return foldUnaryOp(ast, down, folds) + return folds.foldUnaryOp(ast, foldAstStateful(ast.operand, down, folds), down) case RType.Access: - return folds.foldAccess(ast, foldAstStateful(ast.accessed, down, folds), ast.operator === '[' || ast.operator === '[[' ? ast.access.map(access => access === null ? null : foldAstStateful(access, down, folds)) : ast.access as string, down) + return folds.foldAccess(ast, foldAstStateful(ast.accessed, down, folds), ast.access.map(access => access === EmptyArgument ? EmptyArgument : foldAstStateful(access, down, folds)), down) case RType.ForLoop: return folds.loop.foldFor(ast, foldAstStateful(ast.variable, down, folds), foldAstStateful(ast.vector, down, folds), foldAstStateful(ast.body, down, folds), down) case RType.WhileLoop: @@ -126,7 +109,7 @@ export function foldAstStateful(ast: RNode, down: Down, fo case RType.RepeatLoop: return folds.loop.foldRepeat(ast, foldAstStateful(ast.body, down, folds), down) case RType.FunctionCall: - return folds.functions.foldFunctionCall(ast, foldAstStateful(ast.flavor === 'named' ? ast.functionName : ast.calledFunction, down, folds), ast.arguments.map(param => param === undefined ? param : foldAstStateful(param, down, folds)), down) + return folds.functions.foldFunctionCall(ast, foldAstStateful(ast.flavor === 'named' ? ast.functionName : ast.calledFunction, down, folds), ast.arguments.map(param => param === EmptyArgument ? param : foldAstStateful(param, down, folds)), down) case RType.FunctionDefinition: return folds.functions.foldFunctionDefinition(ast, ast.parameters.map(param => foldAstStateful(param, down, folds)), foldAstStateful(ast.body, down, folds), down) case RType.Parameter: @@ -140,39 +123,9 @@ export function foldAstStateful(ast: RNode, down: Down, fo case RType.IfThenElse: return folds.foldIfThenElse(ast, foldAstStateful(ast.condition, down, folds), foldAstStateful(ast.then, down, folds), ast.otherwise === undefined ? undefined : foldAstStateful(ast.otherwise, down, folds), down) case RType.ExpressionList: - return folds.foldExprList(ast, ast.children.map(expr => foldAstStateful(expr, down, folds)), down) + return folds.foldExprList(ast, ast.grouping ? [foldAstStateful(ast.grouping[0], down, folds), foldAstStateful(ast.grouping[1], down, folds)] : undefined , ast.children.map(expr => foldAstStateful(expr, down, folds)), down) default: assertUnreachable(type) } } -function foldBinaryOp(ast: RBinaryOp, down: Down, folds: StatefulFoldFunctions): Up { - switch(ast.flavor) { - case 'logical': - return folds.binaryOp.foldLogicalOp(ast as RLogicalBinaryOp, foldAstStateful(ast.lhs, down, folds), foldAstStateful(ast.rhs, down, folds), down) - case 'arithmetic': - return folds.binaryOp.foldArithmeticOp(ast as RArithmeticBinaryOp, foldAstStateful(ast.lhs, down, folds), foldAstStateful(ast.rhs, down, folds), down) - case 'comparison': - return folds.binaryOp.foldComparisonOp(ast as RComparisonBinaryOp, foldAstStateful(ast.lhs, down, folds), foldAstStateful(ast.rhs, down, folds), down) - case 'assignment': - return folds.binaryOp.foldAssignment(ast as RAssignmentOp, foldAstStateful(ast.lhs, down, folds), foldAstStateful(ast.rhs, down, folds), down) - case 'model formula': - return folds.binaryOp.foldModelFormula(ast as RModelFormulaBinaryOp, foldAstStateful(ast.lhs, down, folds), foldAstStateful(ast.rhs, down, folds), down) - default: - assertUnreachable(ast.flavor) - } -} - - -function foldUnaryOp(ast: RUnaryOp, down: Down, folds: StatefulFoldFunctions): Up { - switch(ast.flavor) { - case 'logical': - return folds.unaryOp.foldLogicalOp(ast as RLogicalUnaryOp, foldAstStateful(ast.operand, down, folds), down) - case 'arithmetic': - return folds.unaryOp.foldArithmeticOp(ast as RArithmeticUnaryOp, foldAstStateful(ast.operand, down, folds), down) - case 'model formula': - return folds.unaryOp.foldModelFormula(ast as RModelFormulaUnaryOp, foldAstStateful(ast.operand, down, folds), down) - default: - assertUnreachable(ast.flavor) - } -} diff --git a/src/r-bridge/lang-4.x/ast/model/processing/visitor.ts b/src/r-bridge/lang-4.x/ast/model/processing/visitor.ts index 3515f1200c..351273a31b 100644 --- a/src/r-bridge/lang-4.x/ast/model/processing/visitor.ts +++ b/src/r-bridge/lang-4.x/ast/model/processing/visitor.ts @@ -1,6 +1,7 @@ import type { NoInfo, RNode } from '../model' import { RType } from '../type' import { assertUnreachable } from '../../../../../util/assert' +import { EmptyArgument } from '../nodes' /** Return `true` to stop visiting from this node (i.e., do not continue to visit this node *and* the children) */ @@ -36,6 +37,7 @@ class NodeVisitor { this.visitSingle(node.body) break case RType.ExpressionList: + this.visit(node.grouping) this.visit(node.children) break case RType.ForLoop: @@ -94,15 +96,16 @@ class NodeVisitor { this.onExit?.(node) } - visit(nodes: RNode | (RNode | null | undefined)[] | undefined | null): void { + visit(nodes: RNode | readonly (RNode | null | undefined | typeof EmptyArgument)[] | undefined | null): void { if(Array.isArray(nodes)) { - for(const node of nodes) { - if(node) { + const n = nodes as (RNode | null | undefined | typeof EmptyArgument)[] + for(const node of n) { + if(node && node !== EmptyArgument) { this.visitSingle(node) } } } else if(nodes) { - this.visitSingle(nodes) + this.visitSingle(nodes as RNode) } } diff --git a/src/r-bridge/lang-4.x/ast/model/type.ts b/src/r-bridge/lang-4.x/ast/model/type.ts index 95d45274da..9a1be232af 100644 --- a/src/r-bridge/lang-4.x/ast/model/type.ts +++ b/src/r-bridge/lang-4.x/ast/model/type.ts @@ -201,18 +201,24 @@ export const enum RType { Delimiter = 'RDelimiter', } + +const validSymbolTypes = new Set([ + RawRType.Symbol, + RawRType.SymbolPackage, + RawRType.SymbolFunctionCall, + RawRType.NullConst, + RawRType.StringConst, + RawRType.ParenLeft, + RawRType.ParenRight, + RawRType.BraceLeft, + RawRType.BraceRight, + RawRType.Slot, +]) /** * Validates, whether the given type can be used as a symbol in R * * @see RawRType */ export function isSymbol(type: string): boolean { - return ( - type === RawRType.Symbol || - type === RawRType.SymbolPackage || - type === RawRType.SymbolFunctionCall || - type === RawRType.NullConst || - type === RawRType.StringConst || - type === RawRType.Slot - ) + return validSymbolTypes.has(type as RawRType) } diff --git a/src/r-bridge/lang-4.x/ast/parser/json/format.ts b/src/r-bridge/lang-4.x/ast/parser/json/format.ts index f6ad121e7a..67ebf8b176 100644 --- a/src/r-bridge/lang-4.x/ast/parser/json/format.ts +++ b/src/r-bridge/lang-4.x/ast/parser/json/format.ts @@ -18,14 +18,19 @@ export interface Entry extends Record { type ParsedDataRow = [line1: number, col1: number, line2: number, col2: number, id: number, parent: number, token: string, terminal: boolean, text: string] -export function prepareParsedData(data: string): Map { - const json: unknown = JSON.parse(data) +/** + * Parses the given data and sets child relationship, return the list of root entries (with a parent of {@link RootId}). + */ +export function prepareParsedData(data: string): Entry[] { + const json: unknown = JSON.parse(`[${data}]`) guard(Array.isArray(json), () => `Expected ${data} to be an array but was not`) const ret = new Map((json as ParsedDataRow[]).map(([line1, col1, line2, col2, id, parent, token, terminal, text]) => { return [id, { line1, col1, line2, col2, id, parent, token: removeRQuotes(token), terminal, text }] satisfies [number, Entry] })) + const roots: Entry[] = [] + // iterate a second time to set parent-child relations (since they may be out of order in the csv) for(const entry of ret.values()) { if(entry.parent != RootId) { @@ -34,8 +39,10 @@ export function prepareParsedData(data: string): Map { parent.children ??= [] parent.children.push(entry) } + } else { + roots.push(entry) } } - return ret + return roots } diff --git a/src/r-bridge/lang-4.x/ast/parser/json/parser.ts b/src/r-bridge/lang-4.x/ast/parser/json/parser.ts index 7bfeec0964..a5333addad 100644 --- a/src/r-bridge/lang-4.x/ast/parser/json/parser.ts +++ b/src/r-bridge/lang-4.x/ast/parser/json/parser.ts @@ -1,48 +1,41 @@ -import type { DeepPartial } from 'ts-essentials' -import type { XmlBasedJson, XmlParserHooks , ParserData } from '../xml' -import { nameKey , DEFAULT_PARSER_HOOKS , attributesKey, contentKey , childrenKey } from '../xml' -import { decorateAst, deterministicCountingIdGenerator } from '../../model' -import type { IdGenerator, NoInfo , NormalizedAst } from '../../model' -import { deepMergeObject } from '../../../../../util/objects' +import { childrenKey, nameKey, attributesKey, contentKey } from '../xml' +import type { IdGenerator, NoInfo, NormalizedAst } from '../../model' +import { decorateAst, deterministicCountingIdGenerator, RawRType } from '../../model' import type { Entry } from './format' -import { RootId, prepareParsedData } from './format' -import { parseRootObjToAst } from '../xml/internal' +import { prepareParsedData } from './format' import { log } from '../../../../../util/log' +import type { NormalizerData, XmlBasedJson } from '../xml' +import { normalizeRootObjToAst } from '../xml/internal' export const parseLog = log.getSubLogger({ name: 'ast-parser' }) -export function normalize(jsonString: string, hooks?: DeepPartial, getId: IdGenerator = deterministicCountingIdGenerator(0)): NormalizedAst { - const hooksWithDefaults = deepMergeObject(DEFAULT_PARSER_HOOKS, hooks) as XmlParserHooks - - const data: ParserData = { hooks: hooksWithDefaults, currentRange: undefined, currentLexeme: undefined } +export function normalize(jsonString: string, getId: IdGenerator = deterministicCountingIdGenerator(0)): NormalizedAst { + const data: NormalizerData = { currentRange: undefined, currentLexeme: undefined } const object = convertPreparedParsedData(prepareParsedData(jsonString)) - return decorateAst(parseRootObjToAst(data, object), getId) + return decorateAst(normalizeRootObjToAst(data, object), getId) } -export function convertPreparedParsedData(valueMapping: Map): XmlBasedJson { - const exprlist: XmlBasedJson = {} - exprlist[nameKey] = 'exprlist' - const children = [] - for(const entry of valueMapping.values()) { - if(entry.parent == RootId) { - children.push(convertEntry(entry)) +export function convertPreparedParsedData(rootEntries: Entry[]): XmlBasedJson { + return { + [RawRType.ExpressionList]: { + [nameKey]: RawRType.ExpressionList, + [childrenKey]: rootEntries.map(convertEntry) } } - exprlist[childrenKey] = children - return { 'exprlist': exprlist } } function convertEntry(csvEntry: Entry): XmlBasedJson { - const xmlEntry: XmlBasedJson = {} - - xmlEntry[attributesKey] = { - 'line1': csvEntry.line1, - 'col1': csvEntry.col1, - 'line2': csvEntry.line2, - 'col2': csvEntry.col2 + const xmlEntry: XmlBasedJson = { + [nameKey]: csvEntry.token, + [attributesKey]: { + 'line1': csvEntry.line1, + 'col1': csvEntry.col1, + 'line2': csvEntry.line2, + 'col2': csvEntry.col2 + } } - xmlEntry[nameKey] = csvEntry.token + if(csvEntry.text) { xmlEntry[contentKey] = csvEntry.text } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/hooks.ts b/src/r-bridge/lang-4.x/ast/parser/xml/hooks.ts deleted file mode 100644 index 32c4b14421..0000000000 --- a/src/r-bridge/lang-4.x/ast/parser/xml/hooks.ts +++ /dev/null @@ -1,378 +0,0 @@ -import type { NamedXmlBasedJson, XmlBasedJson } from './input-format' -import type { - RNumber, - NoInfo, - RBinaryOp, - RComment, - RForLoop, - RFunctionCall, - RIfThenElse, - RNode, - RRepeatLoop, - RString, - RUnaryOp, - RWhileLoop, - RSymbol, - RLogical, - BinaryOperatorFlavor, - RParameter, - RFunctionDefinition, - RArgument, - UnaryOperatorFlavor, - RBreak, - RNext, - RAccess, - RLineDirective, - RPipe -} from '../../model' -import type { RNa } from '../../../values' -import type { ParserData } from './data' -import type { DeepReadonly, DeepRequired } from 'ts-essentials' - -/** Denotes that if you return `undefined`, the parser will automatically take the original arguments (unchanged) */ -// eslint-disable-next-line @typescript-eslint/no-invalid-void-type -- makes writing hooks easier -type AutoIfOmit = T | undefined | void - -/** - * Hooks for every action the parser does. They can process the object before and after the actual parsing. - * - * There are two main hooks: - *

    - *
  • before: can transform the inputs to the function and is called before construction of the normalized ast. However, it can be called after initial checks are performed.
  • - *
  • after: can transform the result of the function and is called after construction of the normalized ast.
  • - *
- * Furthermore, for events that "try" to identify a structure, there is a third hook: - *
    - *
  • unknown: is called if the structure could not be identified.
  • - *
- * - * For those marked with {@link AutoIfOmit} you can return `undefined` to automatically take the original arguments (unchanged). - *

- * Use {@link executeHook} and {@link executeUnknownHook} to execute the hooks. - *

- * Please note, that there is no guarantee, that a hook is not using any other. For example, {@link tryNormalizeIfThen} is used by {@link tryNormalizeIfThenElse}. - */ -export interface XmlParserHooks { - values: { - /** {@link normalizeNumber} */ - onNumber: { - before(data: ParserData, inputObj: XmlBasedJson): AutoIfOmit - after(data: ParserData, result: RNumber | RLogical | RSymbol): AutoIfOmit> - }, - /** {@link parseString} */ - onString: { - before(data: ParserData, inputObj: XmlBasedJson): AutoIfOmit - after(data: ParserData, result: RString): AutoIfOmit - }, - /** {@link tryNormalizeSymbol} */ - onSymbol: { - /** - * triggered if {@link tryNormalizeSymbol} could not determine the namespace and or symbol. - * Can emit non-symbol values easily due to special symbols like `T`. - */ - unknown(data: ParserData, inputObjs: NamedXmlBasedJson[]): AutoIfOmit - before(data: ParserData, inputObjs: NamedXmlBasedJson[]): AutoIfOmit - after(data: ParserData, result: RSymbol | undefined): AutoIfOmit - } - }, - /** {@link tryNormalizeAccess} */ - onAccess: { - /** - * triggered if {@link tryNormalizeAccess} could not determine the access - */ - unknown(data: ParserData, inputObjs: NamedXmlBasedJson[]): AutoIfOmit - before(data: ParserData, inputObjs: NamedXmlBasedJson[]): AutoIfOmit - after(data: ParserData, result: RAccess): AutoIfOmit - }, - other: { - /** {@link normalizeComment} */ - onComment: { - before(data: ParserData, inputObj: XmlBasedJson): AutoIfOmit - after(data: ParserData, result: RComment): AutoIfOmit - } - /** {@link normalizeLineDirective} */ - onLineDirective: { - before(data: ParserData, inputObj: XmlBasedJson): AutoIfOmit - after(data: ParserData, result: RLineDirective | RComment): AutoIfOmit - } - }, - operators: { - /** {@link tryNormalizeBinary}, includes {@link RPipe} and {@link RFunctionCall} in case of special infix binary operations */ - onBinary: { - /** triggered if {@link tryNormalizeBinary} could not find a matching operator, you probably still want to return `undefined` */ - unknown(data: ParserData, input: { lhs: NamedXmlBasedJson, operator: NamedXmlBasedJson, rhs: NamedXmlBasedJson }): AutoIfOmit - before(data: ParserData, input: { flavor: BinaryOperatorFlavor | 'special' | 'pipe', lhs: NamedXmlBasedJson, operator: NamedXmlBasedJson, rhs: NamedXmlBasedJson }): AutoIfOmit<{flavor: BinaryOperatorFlavor | 'special' | 'pipe', lhs: NamedXmlBasedJson, operator: NamedXmlBasedJson, rhs: NamedXmlBasedJson}> - after(data: ParserData, result: RFunctionCall | RBinaryOp | RPipe): AutoIfOmit - }, - /** {@link tryNormalizeUnary} */ - onUnary: { - /** triggered if {@link tryNormalizeUnary} could not find a matching operator, you probably still want to return `undefined` */ - unknown(data: ParserData, input: { operator: NamedXmlBasedJson, operand: NamedXmlBasedJson } ): AutoIfOmit - before(data: ParserData, input: { flavor: UnaryOperatorFlavor, operator: NamedXmlBasedJson, operand: NamedXmlBasedJson }): AutoIfOmit<{flavor: UnaryOperatorFlavor, operator: NamedXmlBasedJson, operand: NamedXmlBasedJson}> - after(data: ParserData, result: RUnaryOp): AutoIfOmit - }, - }, - loops: { - /** {@link tryNormalizeFor} */ - onForLoop: { - /** triggered if {@link tryNormalizeFor} could not detect a for-loop, you probably still want to return `undefined` */ - unknown(data: ParserData, input: { forToken: NamedXmlBasedJson, condition: NamedXmlBasedJson, body: NamedXmlBasedJson }): AutoIfOmit - before(data: ParserData, input: { forToken: NamedXmlBasedJson, condition: NamedXmlBasedJson, body: NamedXmlBasedJson }): AutoIfOmit<{ forToken: NamedXmlBasedJson, condition: NamedXmlBasedJson, body: NamedXmlBasedJson }> - after(data: ParserData, result: RForLoop): AutoIfOmit - }, - /** {@link tryNormalizeRepeat} */ - onRepeatLoop: { - /** triggered if {@link tryNormalizeRepeat} could not detect a repeat-loop, you probably still want to return `undefined` */ - unknown(data: ParserData, input: { repeatToken: NamedXmlBasedJson, body: NamedXmlBasedJson }): AutoIfOmit - before(data: ParserData, input: { repeatToken: NamedXmlBasedJson, body: NamedXmlBasedJson }): AutoIfOmit<{ repeatToken: NamedXmlBasedJson, body: NamedXmlBasedJson }> - after(data: ParserData, result: RRepeatLoop): AutoIfOmit - }, - /** {@link tryNormalizeWhile} */ - onWhileLoop: { - /** triggered if {@link tryNormalizeWhile} could not detect a while-loop, you probably still want to return `undefined` */ - unknown(data: ParserData, input: { whileToken: NamedXmlBasedJson, leftParen: NamedXmlBasedJson, condition: NamedXmlBasedJson, rightParen: NamedXmlBasedJson, body: NamedXmlBasedJson }): AutoIfOmit - before(data: ParserData, input: { whileToken: NamedXmlBasedJson, leftParen: NamedXmlBasedJson, condition: NamedXmlBasedJson, rightParen: NamedXmlBasedJson, body: NamedXmlBasedJson }): AutoIfOmit<{ whileToken: NamedXmlBasedJson, leftParen: NamedXmlBasedJson, condition: NamedXmlBasedJson, rightParen: NamedXmlBasedJson, body: NamedXmlBasedJson }> - after(data: ParserData, result: RWhileLoop): AutoIfOmit - } - /** {@link normalizeBreak} */ - onBreak: { - before (data: ParserData, input: XmlBasedJson): AutoIfOmit - after (data: ParserData, result: RBreak): AutoIfOmit - }, - onNext: { - before (data: ParserData, input: XmlBasedJson): AutoIfOmit - after (data: ParserData, result: RNext): AutoIfOmit - } - }, - functions: { - /** {@link tryNormalizeFunctionDefinition} */ - onFunctionDefinition: { - /** triggered if {@link tryNormalizeFunctionDefinition} could not detect a function definition, you probably still want to return `undefined` */ - unknown(data: ParserData, mappedWithName: NamedXmlBasedJson[]): AutoIfOmit - before(data: ParserData, mappedWithName: NamedXmlBasedJson[]): AutoIfOmit - after(data: ParserData, result: RFunctionDefinition): AutoIfOmit - } - /** {@link tryNormalizeParameter} */ - onParameter: { - /** triggered if {@link tryNormalizeParameter} could not detect a parameter, you probably still want to return `undefined` */ - unknown(data: ParserData, mappedWithName: NamedXmlBasedJson[]): AutoIfOmit - before(data: ParserData, mappedWithName: NamedXmlBasedJson[]): AutoIfOmit - after(data: ParserData, result: RParameter): AutoIfOmit - } - /** {@link tryNormalizeFunctionCall} */ - onFunctionCall: { - /** triggered if {@link tryNormalizeFunctionCall} could not detect a function call, you probably still want to return `undefined` */ - unknown(data: ParserData, mappedWithName: NamedXmlBasedJson[]): AutoIfOmit - before(data: ParserData, mappedWithName: NamedXmlBasedJson[]): AutoIfOmit - after(data: ParserData, result: RFunctionCall | RNext | RBreak): AutoIfOmit - } - /** {@link tryToNormalizeArgument} */ - onArgument: { - /** triggered if {@link tryToNormalizeArgument} could not detect an argument, you probably still want to return `undefined` */ - unknown(data: ParserData, mappedWithName: NamedXmlBasedJson[]): AutoIfOmit - before(data: ParserData, mappedWithName: NamedXmlBasedJson[]): AutoIfOmit - after(data: ParserData, result: RArgument): AutoIfOmit - } - }, - expression: { - /** {@link normalizeExpression} */ - onExpression: { - /** *Warning:* can be a function call/definition as well, is not known when issuing before! */ - before(data: ParserData, inputObj: XmlBasedJson): AutoIfOmit - after(data: ParserData, result: RNode): AutoIfOmit - } - }, - control: { - /** {@link tryNormalizeIfThen}, triggered by {@link onIfThenElse} as well */ - onIfThen: { - /** triggered if {@link tryNormalizeIfThen} could not detect an if-then, you probably still want to return `undefined` */ - unknown(data: ParserData, tokens: [ - ifToken: NamedXmlBasedJson, - leftParen: NamedXmlBasedJson, - condition: NamedXmlBasedJson, - rightParen: NamedXmlBasedJson, - then: NamedXmlBasedJson - ]): AutoIfOmit - before(data: ParserData, tokens: [ - ifToken: NamedXmlBasedJson, - leftParen: NamedXmlBasedJson, - condition: NamedXmlBasedJson, - rightParen: NamedXmlBasedJson, - then: NamedXmlBasedJson - ]): AutoIfOmit<[ - ifToken: NamedXmlBasedJson, - leftParen: NamedXmlBasedJson, - condition: NamedXmlBasedJson, - rightParen: NamedXmlBasedJson, - then: NamedXmlBasedJson - ]> - after(data: ParserData, result: RIfThenElse): AutoIfOmit - }, - /** {@link tryNormalizeIfThenElse}, triggers {@link onIfThen} */ - onIfThenElse: { - /** triggered if {@link tryNormalizeIfThenElse} could not detect an if-then-else, you probably still want to return `undefined`, this is probably called as a consequence of the unknown hook of if-then */ - unknown(data: ParserData, tokens: [ - ifToken: NamedXmlBasedJson, - leftParen: NamedXmlBasedJson, - condition: NamedXmlBasedJson, - rightParen: NamedXmlBasedJson, - then: NamedXmlBasedJson, - elseToken: NamedXmlBasedJson, - elseBlock: NamedXmlBasedJson - ]): AutoIfOmit - /** be aware, that the if-then part triggers another hook! */ - before(data: ParserData, tokens: [ - ifToken: NamedXmlBasedJson, - leftParen: NamedXmlBasedJson, - condition: NamedXmlBasedJson, - rightParen: NamedXmlBasedJson, - then: NamedXmlBasedJson, - elseToken: NamedXmlBasedJson, - elseBlock: NamedXmlBasedJson - ]): AutoIfOmit<[ - ifToken: NamedXmlBasedJson, - leftParen: NamedXmlBasedJson, - condition: NamedXmlBasedJson, - rightParen: NamedXmlBasedJson, - then: NamedXmlBasedJson, - elseToken: NamedXmlBasedJson, - elseBlock: NamedXmlBasedJson - ]> - after(data: ParserData, result: RIfThenElse): AutoIfOmit - } - } -} - -/* eslint-disable -- hooks are unsafe */ -/** - * simple (rather type-wise unsafe ^^) function you can use to execute hooks and deal with {@link AutoIfOmit} - * - * @see executeUnknownHook - */ -export function executeHook(hook: (data: ParserData, input: T) => AutoIfOmit, data: ParserData, input: T): R { - const result = hook(data, input) - if (result === undefined) { - return input as unknown as R - } - return result -} - -/** - * @see executeHook - */ -export function executeUnknownHook(hook: (data: ParserData, input: T) => AutoIfOmit, data: ParserData, input: T): Exclude, void> { - return hook(data, input) as Exclude, void> -} -/* eslint-enable */ - -const doNothing = () => undefined - -export const DEFAULT_PARSER_HOOKS: DeepReadonly> = { - values: { - onNumber: { - before: doNothing, - after: doNothing - }, - onString: { - before: doNothing, - after: doNothing - }, - onSymbol: { - unknown: doNothing, - before: doNothing, - after: doNothing - } - }, - onAccess: { - unknown: doNothing, - before: doNothing, - after: doNothing - }, - other: { - onComment: { - before: doNothing, - after: doNothing - }, - onLineDirective: { - before: doNothing, - after: doNothing - } - }, - control: { - onIfThen: { - unknown: doNothing, - before: doNothing, - after: doNothing - }, - onIfThenElse: { - unknown: doNothing, - before: doNothing, - after: doNothing - } - }, - loops: { - onForLoop: { - unknown: doNothing, - before: doNothing, - after: doNothing - }, - onRepeatLoop: { - unknown: doNothing, - before: doNothing, - after: doNothing - }, - onWhileLoop: { - unknown: doNothing, - before: doNothing, - after: doNothing - }, - onBreak: { - before: doNothing, - after: doNothing - }, - onNext: { - before: doNothing, - after: doNothing - } - }, - operators: { - onBinary: { - unknown: doNothing, - before: doNothing, - after: doNothing - }, - onUnary: { - unknown: doNothing, - before: doNothing, - after: doNothing - } - }, - functions: { - onFunctionDefinition: { - unknown: doNothing, - before: doNothing, - after: doNothing - }, - onParameter: { - unknown: doNothing, - before: doNothing, - after: doNothing - }, - onFunctionCall: { - unknown: doNothing, - before: doNothing, - after: doNothing - }, - onArgument: { - unknown: doNothing, - before: doNothing, - after: doNothing - }, - }, - expression: { - onExpression: { - before: doNothing, - after: doNothing - } - } -} as const diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/index.ts b/src/r-bridge/lang-4.x/ast/parser/xml/index.ts index e85d65afca..a3fe76cb3b 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/index.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/index.ts @@ -1,3 +1,3 @@ -export * from './data' -export * from './hooks' +export * from './normalizer-data' export * from './input-format' +export * from './normalize-meta' diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/input-format.ts b/src/r-bridge/lang-4.x/ast/parser/xml/input-format.ts index ea753a79f7..8af32f9d50 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/input-format.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/input-format.ts @@ -22,35 +22,37 @@ export type XmlBasedJson = Record /** * A xml element in the json can either link to a string or another xml element */ -export type XmlBasedJsonValue = string | Record | XmlBasedJson[] - +export type XmlBasedJsonValue = string | Record | readonly XmlBasedJson[] /** * We expect all xml elements to have a name attached which represents their R token type. */ export interface NamedXmlBasedJson { /** corresponds to the R token type */ - name: RawRType, + readonly name: RawRType, /** remaining content (e.g., children, ...) */ - content: XmlBasedJson + readonly content: XmlBasedJson } +function error(key: string, obj: XmlBasedJson) { + throw new XmlParseError(`expected obj to have key ${key}, yet received ${JSON.stringify(obj)}`) +} + /** - * Retrieves the given key(s) from the converted xml. + * Single-key variant of {@link getKeysGuarded}. Will throw an {@link XmlParseError} if the key is not present. + */ +export function getKeyGuarded(obj: XmlBasedJson, key: string): T { + return (obj[key] ?? error(key, obj)) as T +} + +/** + * Retrieves the given keys from the converted xml. For a single key, see {@link getKeyGuarded}. * Will throw an {@link XmlParseError} if at least one of the keys is not present * * @typeParam T - the type of the values to retrieve. Note, that this type is not checked at runtime. */ -export function getKeysGuarded(obj: XmlBasedJson, key: string): T -export function getKeysGuarded(obj: XmlBasedJson, ...key: readonly string[]): Record -export function getKeysGuarded(obj: XmlBasedJson, ...key: readonly string[]): (Record | T) { - if(key.length === 1) { - return obj[key[0]] as T - } else { - return key.reduce>((acc, key) => { - acc[key] = obj[key] as T - return acc - }, {}) - } +export function getKeysGuarded(obj: XmlBasedJson, ...keys: readonly string[]): Record { + return Object.fromEntries(keys.map(k => [k, getKeyGuarded(obj, k)] as const)) } + diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/access.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/access.ts deleted file mode 100644 index 8252f965b8..0000000000 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/access.ts +++ /dev/null @@ -1,124 +0,0 @@ -import type { NamedXmlBasedJson } from '../input-format' -import { retrieveMetaStructure } from './meta' -import type { ParserData } from '../data' -import type { RAccess, RNode, RArgument } from '../../../model' -import { RType, RawRType } from '../../../model' -import { executeHook, executeUnknownHook } from '../hooks' -import { normalizeBasedOnType } from './structure' -import { guard } from '../../../../../../util/assert' -import { splitArrayOn } from '../../../../../../util/arrays' -import { tryToNormalizeArgument } from './functions/argument' -import { parseLog } from '../../json/parser' - -/** - * Tries to normalize the given data as access (e.g., indexing). - * - * @param data - The data used by the parser (see {@link ParserData}) - * @param mappedWithName - The json object to extract the meta-information from - * - * @returns The parsed {@link RAccess} or `undefined` if the given construct is not accessing a value - */ -export function tryNormalizeAccess(data: ParserData, mappedWithName: NamedXmlBasedJson[]): RAccess | undefined { - parseLog.trace('trying to parse access') - mappedWithName = executeHook(data.hooks.onAccess.before, data, mappedWithName) - - if(mappedWithName.length < 3) { - parseLog.trace('expected at least three elements are required to parse an access') - return executeUnknownHook(data.hooks.onAccess.unknown, data, mappedWithName) - } - - const accessOp = mappedWithName[1] - - let operator: RAccess['operator'] - let closingLength = 0 - - switch(accessOp.name) { - case RawRType.BracketLeft: - operator = '[' - closingLength = 1 - break - case RawRType.Dollar: - operator = '$' - break - case RawRType.At: - operator = '@' - break - case RawRType.DoubleBracketLeft: - operator = '[[' - closingLength = 2 - break - default: - parseLog.trace(`expected second element to be an access operator, yet received ${accessOp.name}`) - return executeUnknownHook(data.hooks.onAccess.unknown, data, mappedWithName) - } - - const accessed = mappedWithName[0] - if(accessed.name !== RawRType.Expression && accessed.name !== RawRType.ExprOfAssignOrHelp) { - parseLog.trace(`expected accessed element to be wrapped an expression, yet received ${accessed.name}`) - return executeUnknownHook(data.hooks.onAccess.unknown, data, mappedWithName) - } - - const parsedAccessed = normalizeBasedOnType(data, [accessed]) - if(parsedAccessed.length !== 1) { - parseLog.trace(`expected accessed element to be wrapped an expression, yet received ${accessed.name}`) - return executeUnknownHook(data.hooks.onAccess.unknown, data, mappedWithName) - } - - const remaining = mappedWithName.slice(2, mappedWithName.length - closingLength) - - parseLog.trace(`${remaining.length} remaining arguments for access`) - - const splitAccessOnComma = splitArrayOn(remaining, x => x.name === RawRType.Comma) - - const parsedAccess: (RNode | null)[] = splitAccessOnComma.map(x => { - if(x.length === 0) { - parseLog.trace('record empty access') - return null - } - parseLog.trace('trying to parse access') - const gotAccess = parseAccessArgument(operator, data, x) - guard(gotAccess !== undefined, () => `expected one access result in access as argument, yet received ${JSON.stringify(gotAccess)} for ${JSON.stringify([operator, x])}`) - return gotAccess - }) - - let resultingAccess: (RNode | null)[] | string = parsedAccess - - if(operator === '@' || operator === '$') { - guard(parsedAccess.length === 1, () => `expected one access result in access with ${JSON.stringify(operator)}, yet received ${JSON.stringify(parsedAccess)}`) - const first = parsedAccess[0] - guard(first !== null && (first.type === RType.Symbol || first.type === RType.String || first.type === RType.Logical), () => `${JSON.stringify(operator)} requires one symbol, yet received ${JSON.stringify(parsedAccess)}`) - resultingAccess = first.type === RType.String ? first.content.str : first.lexeme - } - - const { - content, location - } = retrieveMetaStructure(accessOp.content) - - const result = { - type: RType.Access, - location, - lexeme: content, - accessed: parsedAccessed[0], - operator, - access: resultingAccess, - info: { - fullRange: data.currentRange, - additionalTokens: [], - fullLexeme: data.currentLexeme - } - } as RAccess - return executeHook(data.hooks.onAccess.after, data, result) -} - - -function parseAccessArgument(operator: RAccess['operator'], data: ParserData, elements: NamedXmlBasedJson[]): RArgument | RNode | undefined { - // within access the content is *not* wrapped within another expression, that means if we have a SYMBOL_SUB we can directly parse the argument, - // otherwise we have to add the expression layer - // console.log('parseAccessArgument', elements.map(x => x.name)) - if(operator === '@' || operator === '$') { - const parse = normalizeBasedOnType(data, elements) - return parse.length !== 1 ? undefined : parse[0] as RNode - } else { - return tryToNormalizeArgument(data, elements) - } -} diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/control/index.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/control/index.ts index 6a0a196425..12abe6cc6d 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/control/index.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/control/index.ts @@ -1,3 +1,3 @@ -export * from './if-then' -export * from './if-then-else' +export * from './normalize-if-then' +export * from './normalize-if-then-else' diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/control/if-then-else.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/control/normalize-if-then-else.ts similarity index 66% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/control/if-then-else.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/control/normalize-if-then-else.ts index 4ddbb54651..5c48670a05 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/control/if-then-else.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/control/normalize-if-then-else.ts @@ -1,19 +1,19 @@ +import type { NormalizerData } from '../../normalizer-data' import type { NamedXmlBasedJson } from '../../input-format' -import { tryNormalizeSingleNode } from '../structure' -import type { ParserData } from '../../data' -import { tryNormalizeIfThen } from './if-then' -import { guard } from '../../../../../../../util/assert' import type { RIfThenElse } from '../../../../model' -import { RawRType, RType } from '../../../../model' -import { executeHook, executeUnknownHook } from '../../hooks' -import { ensureExpressionList } from '../meta' +import { RType , RawRType } from '../../../../model' import { parseLog } from '../../../json/parser' +import { tryNormalizeIfThen } from './normalize-if-then' +import { guard } from '../../../../../../../util/assert' +import { normalizeSingleNode } from '../structure' +import { ensureExpressionList } from '../../normalize-meta' + /** * Try to parse the construct as a {@link RIfThenElse}. */ export function tryNormalizeIfThenElse( - data: ParserData, + data: NormalizerData, tokens: [ ifToken: NamedXmlBasedJson, leftParen: NamedXmlBasedJson, @@ -25,21 +25,19 @@ export function tryNormalizeIfThenElse( ]): RIfThenElse | undefined { // we start by parsing a regular if-then structure parseLog.trace('trying to parse if-then-else structure') - tokens = executeHook(data.hooks.control.onIfThenElse.before, data, tokens) const parsedIfThen = tryNormalizeIfThen(data, [tokens[0], tokens[1], tokens[2], tokens[3], tokens[4]]) if(parsedIfThen === undefined) { - return executeUnknownHook(data.hooks.control.onIfThenElse.unknown, data, tokens) + return undefined } parseLog.trace('if-then part successful, now parsing else part') guard(tokens[5].name === RawRType.Else, () => `expected else token for if-then-else but found ${JSON.stringify(tokens[5])}`) - const parsedElse = tryNormalizeSingleNode(data, tokens[6]) + const parsedElse = normalizeSingleNode(data, tokens[6]) guard(parsedElse.type !== RType.Delimiter, () => `unexpected missing else-part of if-then-else, received ${JSON.stringify([parsedIfThen, parsedElse])} for ${JSON.stringify(tokens)}`) - const result: RIfThenElse = { + return { ...parsedIfThen, otherwise: ensureExpressionList(parsedElse) } - return executeHook(data.hooks.control.onIfThenElse.after, data, result) } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/control/if-then.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/control/normalize-if-then.ts similarity index 72% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/control/if-then.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/control/normalize-if-then.ts index 777db02dfd..17c84c1c86 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/control/if-then.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/control/normalize-if-then.ts @@ -1,18 +1,18 @@ +import type { NormalizerData } from '../../normalizer-data' import type { NamedXmlBasedJson } from '../../input-format' import { XmlParseError } from '../../input-format' -import { tryNormalizeSingleNode } from '../structure' -import { ensureExpressionList, retrieveMetaStructure } from '../meta' -import type { ParserData } from '../../data' import type { RIfThenElse } from '../../../../model' -import { RawRType, RType } from '../../../../model' -import { executeHook, executeUnknownHook } from '../../hooks' +import { RType , RawRType } from '../../../../model' import { parseLog } from '../../../json/parser' +import { normalizeSingleNode } from '../structure' +import { ensureExpressionList, retrieveMetaStructure } from '../../normalize-meta' + /** * Try to parse the construct as a {@link RIfThenElse}. */ export function tryNormalizeIfThen( - data: ParserData, + data: NormalizerData, tokens: [ ifToken: NamedXmlBasedJson, leftParen: NamedXmlBasedJson, @@ -23,17 +23,15 @@ export function tryNormalizeIfThen( parseLog.trace('trying to parse if-then structure') if(tokens[0].name !== RawRType.If) { parseLog.debug('encountered non-if token for supposed if-then structure') - return executeUnknownHook(data.hooks.control.onIfThen.unknown, data, tokens) + return undefined } else if(tokens[1].name !== RawRType.ParenLeft) { throw new XmlParseError(`expected left-parenthesis for if but found ${JSON.stringify(tokens[1])}`) } else if(tokens[3].name !== RawRType.ParenRight) { throw new XmlParseError(`expected right-parenthesis for if but found ${JSON.stringify(tokens[3])}`) } - tokens = executeHook(data.hooks.control.onIfThen.before, data, tokens) - - const parsedCondition = tryNormalizeSingleNode(data, tokens[2]) - const parsedThen = tryNormalizeSingleNode(data, tokens[4]) + const parsedCondition = normalizeSingleNode(data, tokens[2]) + const parsedThen = normalizeSingleNode(data, tokens[4]) if(parsedCondition.type === RType.Delimiter || parsedThen.type === RType.Delimiter) { @@ -42,7 +40,7 @@ export function tryNormalizeIfThen( const { location, content } = retrieveMetaStructure(tokens[0].content) - const result: RIfThenElse = { + return { type: RType.IfThenElse, condition: parsedCondition, then: ensureExpressionList(parsedThen), @@ -54,5 +52,4 @@ export function tryNormalizeIfThen( fullLexeme: data.currentLexeme } } - return executeHook(data.hooks.control.onIfThen.after, data, result) } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/expression/index.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/expression/index.ts index 9262f2f04c..bfc97106c1 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/expression/index.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/expression/index.ts @@ -1 +1 @@ -export * from './expression' +export * from './normalize-expression' diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/expression/expression.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/expression/normalize-expression.ts similarity index 66% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/expression/expression.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/expression/normalize-expression.ts index 2d7bfba491..b19b8e1e49 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/expression/expression.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/expression/normalize-expression.ts @@ -1,40 +1,33 @@ +import type { NormalizerData } from '../../normalizer-data' import type { NamedXmlBasedJson, XmlBasedJson } from '../../input-format' -import { childrenKey , getKeysGuarded } from '../../input-format' - -import { getWithTokenType, retrieveMetaStructure } from '../meta' -import type { ParserData } from '../../data' -import { normalizeBasedOnType, splitComments } from '../structure' -import { tryNormalizeFunctionCall, tryNormalizeFunctionDefinition } from '../functions' +import { childrenKey, getKeyGuarded } from '../../input-format' import type { RNode } from '../../../../model' import { RType } from '../../../../model' -import { executeHook } from '../../hooks' -import { tryNormalizeAccess } from '../access' +import { parseLog } from '../../../json/parser' +import { getWithTokenType, retrieveMetaStructure } from '../../normalize-meta' +import { normalizeExpressions, splitComments } from '../structure' +import { tryNormalizeFunctionCall, tryNormalizeFunctionDefinition } from '../functions' +import { tryNormalizeAccess } from '../normalize-access' import { normalizeComment } from '../other' import { partition } from '../../../../../../../util/arrays' -import { parseLog } from '../../../json/parser' /** * Returns an expression list if there are multiple children, otherwise returns the single child directly with no expr wrapper * - * @param data - The data used by the parser (see {@link ParserData}) + * @param data - The data used by the parser (see {@link NormalizerData}) * @param obj - The json object to extract the meta-information from */ -export function normalizeExpression(data: ParserData, obj: XmlBasedJson): RNode { - parseLog.debug('Parsing expr') - obj = executeHook(data.hooks.expression.onExpression.before, data, obj) +export function normalizeExpression(data: NormalizerData, obj: XmlBasedJson): RNode { + parseLog.debug('[expr]') - const { - unwrappedObj, - content, - location - } = retrieveMetaStructure(obj) + const { unwrappedObj, content, location } = retrieveMetaStructure(obj) - const childrenSource = getKeysGuarded(unwrappedObj, childrenKey) + const childrenSource = getKeyGuarded(unwrappedObj, childrenKey) const typed: NamedXmlBasedJson[] = getWithTokenType(childrenSource) const { others, comments } = splitComments(typed) - const childData: ParserData = { ...data, currentRange: location, currentLexeme: content } + const childData: NormalizerData = { ...data, currentRange: location, currentLexeme: content } const maybeFunctionCall = tryNormalizeFunctionCall(childData, others) if(maybeFunctionCall !== undefined) { @@ -55,17 +48,18 @@ export function normalizeExpression(data: ParserData, obj: XmlBasedJson): RNode } - const children = normalizeBasedOnType(childData, childrenSource) + const children = normalizeExpressions(childData, childrenSource) const [delimiters, nodes] = partition(children, x => x.type === RType.Delimiter) - let result: RNode if(nodes.length === 1) { - result = nodes[0] as RNode + const result = nodes[0] as RNode result.info.additionalTokens = [...result.info.additionalTokens ?? [], ...delimiters] + return result } else { - result = { + return { type: RType.ExpressionList, + grouping: undefined, location, children: nodes as RNode[], lexeme: content, @@ -76,5 +70,4 @@ export function normalizeExpression(data: ParserData, obj: XmlBasedJson): RNode } } } - return executeHook(data.hooks.expression.onExpression.after, data, result) } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/index.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/index.ts index deee410b40..200691777c 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/index.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/index.ts @@ -1,3 +1,3 @@ -export * from './call' -export * from './definition' +export * from './normalize-call' +export * from './normalize-definition' /* will not expose argument as we know when to expect arguments and only call them during parsing call/definition */ diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/argument.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/normalize-argument.ts similarity index 61% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/argument.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/normalize-argument.ts index 00659c903d..f53d1dd6ca 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/argument.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/normalize-argument.ts @@ -1,31 +1,29 @@ +import type { NormalizerData } from '../../normalizer-data' import type { NamedXmlBasedJson } from '../../input-format' -import { retrieveMetaStructure } from '../meta' -import type { RNode, RSymbol, RArgument } from '../../../../model' -import { RType, RawRType } from '../../../../model' -import type { ParserData } from '../../data' -import { executeHook, executeUnknownHook } from '../../hooks' -import { log } from '../../../../../../../util/log' -import { guard } from '../../../../../../../util/assert' -import { tryNormalizeSingleNode } from '../structure' -import type { RDelimiter } from '../../../../model/nodes/info' +import type { RArgument, RNode, RSymbol } from '../../../../model' +import { RawRType, RType } from '../../../../model' import { parseLog } from '../../../json/parser' +import { retrieveMetaStructure } from '../../normalize-meta' +import type { RDelimiter } from '../../../../model/nodes/info' +import { normalizeSingleNode } from '../structure' +import { guard } from '../../../../../../../util/assert' + /** * Either parses `[expr]` or `[SYMBOL_SUB, EQ_SUB, expr]` as an argument of a function call in R. * Probably directly called by the function call parser as otherwise, we do not expect to find arguments. * - * @param data - The data used by the parser (see {@link ParserData}) + * @param data - The data used by the parser (see {@link NormalizerData}) * @param objs - Either `[expr]` or `[SYMBOL_FORMALS, EQ_FORMALS, expr]` * * @returns The parsed argument or `undefined` if the given object is not an argument. */ -export function tryToNormalizeArgument(data: ParserData, objs: NamedXmlBasedJson[]): RArgument | undefined { +export function tryToNormalizeArgument(data: NormalizerData, objs: readonly NamedXmlBasedJson[]): RArgument | undefined { parseLog.debug('[argument]') - objs = executeHook(data.hooks.functions.onArgument.before, data, objs) if(objs.length < 1 || objs.length > 3) { - log.warn(`Either [expr|value], [SYMBOL_SUB, EQ_SUB], or [SYMBOL_SUB, EQ_SUB, expr], but got: ${objs.map(o => o.name).join(', ')}`) - return executeUnknownHook(data.hooks.functions.onArgument.unknown, data, objs) + parseLog.warn(`Either [expr|value], [SYMBOL_SUB, EQ_SUB], or [SYMBOL_SUB, EQ_SUB, expr], but got: ${objs.map(o => o.name).join(', ')}`) + return undefined } @@ -36,7 +34,7 @@ export function tryToNormalizeArgument(data: ParserData, objs: NamedXmlBasedJson let name: RSymbol | undefined if(symbolOrExpr.name === RawRType.Expression) { name = undefined - parsedValue = tryNormalizeSingleNode(data, symbolOrExpr) + parsedValue = normalizeSingleNode(data, symbolOrExpr) } else if(symbolOrExpr.name === RawRType.SymbolSub || symbolOrExpr.name === RawRType.StringConst) { name = { type: RType.Symbol, @@ -52,13 +50,13 @@ export function tryToNormalizeArgument(data: ParserData, objs: NamedXmlBasedJson } parsedValue = parseWithValue(data, objs) } else { - log.warn(`expected symbol or expr for argument, yet received ${objs.map(o => o.name).join(',')}`) - return executeUnknownHook(data.hooks.functions.onArgument.unknown, data, objs) + parseLog.warn(`expected symbol or expr for argument, yet received ${objs.map(o => o.name).join(',')}`) + return undefined } guard(parsedValue !== undefined && parsedValue?.type !== RType.Delimiter, () => `[argument] parsed value must not be undefined, yet: ${JSON.stringify(objs)}`) - const result: RArgument = { + return { type: RType.Argument, location, lexeme: content, @@ -70,12 +68,10 @@ export function tryToNormalizeArgument(data: ParserData, objs: NamedXmlBasedJson additionalTokens: [] } } - - return executeHook(data.hooks.functions.onArgument.after, data, result) } -function parseWithValue(data: ParserData, objs: NamedXmlBasedJson[]): RNode | RDelimiter | undefined | null{ +function parseWithValue(data: NormalizerData, objs: readonly NamedXmlBasedJson[]): RNode | RDelimiter | undefined | null{ guard(objs[1].name === RawRType.EqualSub, () => `[arg-default] second element of parameter must be ${RawRType.EqualFormals}, but: ${JSON.stringify(objs)}`) guard(objs.length === 2 || objs[2].name === RawRType.Expression, () => `[arg-default] third element of parameter must be an Expression or undefined (for 'x=') but: ${JSON.stringify(objs)}`) - return objs[2] ? tryNormalizeSingleNode(data, objs[2]) : null + return objs[2] ? normalizeSingleNode(data, objs[2]) : null } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/call.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/normalize-call.ts similarity index 73% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/call.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/normalize-call.ts index 14f0f9e5f9..c7cf035b74 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/call.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/normalize-call.ts @@ -1,42 +1,39 @@ +import type { NormalizerData } from '../../normalizer-data' import type { NamedXmlBasedJson, XmlBasedJson } from '../../input-format' -import { childrenKey , getKeysGuarded } from '../../input-format' - -import { guard } from '../../../../../../../util/assert' -import { getWithTokenType, retrieveMetaStructure } from '../meta' -import { splitArrayOn } from '../../../../../../../util/arrays' -import { normalizeString, tryNormalizeSymbol } from '../values' -import type { ParserData } from '../../data' +import { childrenKey, getKeyGuarded } from '../../input-format' import type { - RNode, + RArgument, + RBreak, RFunctionCall, - RUnnamedFunctionCall, RNamedFunctionCall, RNext, - RBreak, - RArgument } from '../../../../model' -import { - RType, RawRType + RNode, + RUnnamedFunctionCall } from '../../../../model' -import { executeHook, executeUnknownHook } from '../../hooks' -import { tryToNormalizeArgument } from './argument' +import { RawRType, RType, EmptyArgument } from '../../../../model' +import { parseLog } from '../../../json/parser' +import { getWithTokenType, retrieveMetaStructure } from '../../normalize-meta' +import { splitArrayOn } from '../../../../../../../util/arrays' +import { guard } from '../../../../../../../util/assert' +import { tryToNormalizeArgument } from './normalize-argument' import type { SourceRange } from '../../../../../../../util/range' import { normalizeExpression } from '../expression' -import { parseLog } from '../../../json/parser' +import { normalizeString, tryNormalizeSymbol } from '../values' /** * Tries to parse the given data as a function call. * - * @param data - The data used by the parser (see {@link ParserData}) + * @param data - The data used by the parser (see {@link NormalizerData}) * @param mappedWithName - The json object to extract the meta-information from * * @returns The parsed {@link RFunctionCall} (either named or unnamed) or `undefined` if the given construct is not a function call * May return a {@link RNext} or {@link RBreak} as `next()` and `break()` work as such. */ -export function tryNormalizeFunctionCall(data: ParserData, mappedWithName: NamedXmlBasedJson[]): RFunctionCall | RNext | RBreak | undefined { +export function tryNormalizeFunctionCall(data: NormalizerData, mappedWithName: NamedXmlBasedJson[]): RFunctionCall | RNext | RBreak | undefined { const fnBase = mappedWithName[0] if(fnBase.name !== RawRType.Expression && fnBase.name !== RawRType.ExprOfAssignOrHelp) { parseLog.trace(`expected function call name to be wrapped an expression, yet received ${fnBase.name}`) - return executeUnknownHook(data.hooks.functions.onFunctionCall.unknown, data, mappedWithName) + return undefined } if(mappedWithName.length < 3 || mappedWithName[1].name !== RawRType.ParenLeft || mappedWithName[mappedWithName.length - 1].name !== RawRType.ParenRight) { @@ -45,33 +42,25 @@ export function tryNormalizeFunctionCall(data: ParserData, mappedWithName: Named } parseLog.trace('trying to parse function call') - mappedWithName = executeHook(data.hooks.functions.onFunctionCall.before, data, mappedWithName) const { unwrappedObj, content, location } = retrieveMetaStructure(fnBase.content) - const symbolContent: XmlBasedJson[] = getKeysGuarded(unwrappedObj, childrenKey) - - let result: RFunctionCall | RNext | RBreak + const symbolContent: XmlBasedJson[] = getKeyGuarded(unwrappedObj, childrenKey) const namedSymbolContent = getWithTokenType(symbolContent) if(namedSymbolContent.length === 1 && namedSymbolContent[0].name === RawRType.StringConst) { // special handling when someone calls a function by string - result = parseNamedFunctionCall(data, namedSymbolContent, mappedWithName, location, content) + return parseNamedFunctionCall(data, namedSymbolContent, mappedWithName, location, content) } else if(namedSymbolContent.findIndex(x => x.name === RawRType.SymbolFunctionCall) < 0) { parseLog.trace(`is not named function call, as the name is not of type ${RType.FunctionCall}, but: ${namedSymbolContent.map(n => n.name).join(',')}`) const mayResult = tryParseUnnamedFunctionCall(data, mappedWithName, location, content) - if(mayResult === undefined) { - return executeUnknownHook(data.hooks.functions.onFunctionCall.unknown, data, mappedWithName) - } - result = mayResult + return mayResult } else { - result = parseNamedFunctionCall(data, namedSymbolContent, mappedWithName, location, content) + return parseNamedFunctionCall(data, namedSymbolContent, mappedWithName, location, content) } - - return executeHook(data.hooks.functions.onFunctionCall.after, data, result) } -function parseArguments(mappedWithName: NamedXmlBasedJson[], data: ParserData): (RArgument | undefined)[] { +function parseArguments(mappedWithName: readonly NamedXmlBasedJson[], data: NormalizerData): (RArgument | undefined)[] { const argContainer = mappedWithName.slice(1) guard(argContainer.length > 1 && argContainer[0].name === RawRType.ParenLeft && argContainer[argContainer.length - 1].name === RawRType.ParenRight, 'expected args in parenthesis') const splitArgumentsOnComma = splitArrayOn(argContainer.slice(1, argContainer.length - 1), x => x.name === RawRType.Comma) @@ -81,7 +70,7 @@ function parseArguments(mappedWithName: NamedXmlBasedJson[], data: ParserData): }) } -function tryParseUnnamedFunctionCall(data: ParserData, mappedWithName: NamedXmlBasedJson[], location: SourceRange, content: string): RUnnamedFunctionCall | RNext | RBreak | undefined { +function tryParseUnnamedFunctionCall(data: NormalizerData, mappedWithName: NamedXmlBasedJson[], location: SourceRange, content: string): RUnnamedFunctionCall | RNext | RBreak | undefined { // maybe remove symbol-content again because I just use the root expr of mapped with name if(mappedWithName.length < 3) { parseLog.trace('expected unnamed function call to have 3 elements [like ()], but was not') @@ -127,7 +116,7 @@ function tryParseUnnamedFunctionCall(data: ParserData, mappedWithName: NamedXmlB location, lexeme: content, calledFunction: calledFunction, - arguments: parsedArguments, + arguments: parsedArguments.map(x => x ?? EmptyArgument), info: { fullRange: data.currentRange, additionalTokens: [], @@ -137,7 +126,7 @@ function tryParseUnnamedFunctionCall(data: ParserData, mappedWithName: NamedXmlB } -function parseNamedFunctionCall(data: ParserData, symbolContent: NamedXmlBasedJson[], mappedWithName: NamedXmlBasedJson[], location: SourceRange, content: string): RNamedFunctionCall { +function parseNamedFunctionCall(data: NormalizerData, symbolContent: NamedXmlBasedJson[], mappedWithName: NamedXmlBasedJson[], location: SourceRange, content: string): RNamedFunctionCall { let functionName: RNode | undefined if(symbolContent.length === 1 && symbolContent[0].name === RawRType.StringConst) { const stringBase = normalizeString(data, symbolContent[0].content) @@ -163,7 +152,7 @@ function parseNamedFunctionCall(data: ParserData, symbolContent: NamedXmlBasedJs location, lexeme: content, functionName, - arguments: parsedArguments, + arguments: parsedArguments.map(x => x ?? EmptyArgument), info: { fullRange: data.currentRange, additionalTokens: [], diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/definition.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/normalize-definition.ts similarity index 67% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/definition.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/normalize-definition.ts index e3694476b4..12e85302ca 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/definition.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/normalize-definition.ts @@ -1,34 +1,29 @@ -import type { ParserData } from '../../data' +import type { NormalizerData } from '../../normalizer-data' import type { NamedXmlBasedJson } from '../../input-format' import type { RFunctionDefinition, RParameter } from '../../../../model' -import { RawRType, RType } from '../../../../model' -import { executeHook, executeUnknownHook } from '../../hooks' -import { ensureExpressionList, retrieveMetaStructure } from '../meta' +import { RType , RawRType } from '../../../../model' +import { parseLog } from '../../../json/parser' +import { ensureExpressionList, retrieveMetaStructure } from '../../normalize-meta' import { guard, isNotUndefined } from '../../../../../../../util/assert' import { splitArrayOn } from '../../../../../../../util/arrays' -import { normalizeBasedOnType } from '../structure' -import { tryNormalizeParameter } from './parameter' -import { log } from '../../../../../../../util/log' -import { parseLog } from '../../../json/parser' +import { tryNormalizeParameter } from './normalize-parameter' +import { normalizeExpressions } from '../structure' /** * Tries to parse the given data as a function definition. * - * @param data - The data used by the parser (see {@link ParserData}) + * @param data - The data used by the parser (see {@link NormalizerData}) * @param mappedWithName - The json object to extract the meta-information from * * @returns The parsed {@link RFunctionDefinition} or `undefined` if the given construct is not a function definition */ -export function tryNormalizeFunctionDefinition(data: ParserData, mappedWithName: NamedXmlBasedJson[]): RFunctionDefinition | undefined { +export function tryNormalizeFunctionDefinition(data: NormalizerData, mappedWithName: readonly NamedXmlBasedJson[]): RFunctionDefinition | undefined { const fnBase = mappedWithName[0] if(fnBase.name !== RawRType.Function && fnBase.name !== RawRType.Lambda) { parseLog.trace(`expected function definition to be identified by keyword, yet received ${fnBase.name}`) - return executeUnknownHook(data.hooks.functions.onFunctionDefinition.unknown, data, mappedWithName) + return undefined } - parseLog.trace('trying to parse function definition') - mappedWithName = executeHook(data.hooks.functions.onFunctionDefinition.before, data, mappedWithName) - const { content, location } = retrieveMetaStructure(fnBase.content) const openParen = mappedWithName[1] @@ -44,8 +39,8 @@ export function tryNormalizeFunctionDefinition(data: ParserData, mappedWithName: const parameters: (undefined | RParameter)[] = splitParameters.map(x => tryNormalizeParameter(data, x)) if(parameters.some(p => p === undefined)) { - log.error(`function had unexpected unknown parameters: ${JSON.stringify(parameters.filter(isNotUndefined))}, aborting.`) - return executeUnknownHook(data.hooks.functions.onFunctionDefinition.unknown, data, mappedWithName) + parseLog.error(`function had unexpected unknown parameters: ${JSON.stringify(parameters.filter(isNotUndefined))}, aborting.`) + return undefined } parseLog.trace(`function definition retained ${parameters.length} parameters after parsing, moving to body.`) @@ -53,11 +48,11 @@ export function tryNormalizeFunctionDefinition(data: ParserData, mappedWithName: const bodyStructure = mappedWithName.slice(closingParenIndex + 1) guard(bodyStructure.length === 1, () => `expected function body to be unique, yet received ${bodyStructure.length}`) - const body = normalizeBasedOnType(data, bodyStructure) + const body = normalizeExpressions(data, bodyStructure) guard(body.length === 1 && body[0].type !== RType.Delimiter, () => `expected function body to yield one normalized expression, but ${body.length}`) - const result: RFunctionDefinition = { + return { type: RType.FunctionDefinition, location, lexeme: content, @@ -69,5 +64,4 @@ export function tryNormalizeFunctionDefinition(data: ParserData, mappedWithName: fullLexeme: data.currentLexeme } } - return executeHook(data.hooks.functions.onFunctionDefinition.after, data, result) } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/parameter.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/normalize-parameter.ts similarity index 63% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/parameter.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/normalize-parameter.ts index 6ccdd9544e..3383b0746b 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/parameter.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/functions/normalize-parameter.ts @@ -1,45 +1,42 @@ -import type { NamedXmlBasedJson } from '../../input-format' -import { retrieveMetaStructure } from '../meta' +import type { NormalizerData } from '../../normalizer-data' import type { RNode, RParameter } from '../../../../model' import { RType, RawRType } from '../../../../model' -import type { ParserData } from '../../data' -import { executeHook, executeUnknownHook } from '../../hooks' -import { log } from '../../../../../../../util/log' -import { guard } from '../../../../../../../util/assert' -import { tryNormalizeSingleNode } from '../structure' -import type { RDelimiter } from '../../../../model/nodes/info' +import type { NamedXmlBasedJson } from '../../input-format' import { parseLog } from '../../../json/parser' +import type { RDelimiter } from '../../../../model/nodes/info' +import { retrieveMetaStructure } from '../../normalize-meta' +import { guard } from '../../../../../../../util/assert' +import { normalizeSingleNode } from '../structure' /** * Either parses `[SYMBOL_FORMALS]` or `[SYMBOL_FORMALS, EQ_FORMALS, expr]` as a parameter of a function definition in R. * Probably directly called by the function definition parser as otherwise, we do not expect to find parameters. * - * @param data - The data used by the parser (see {@link ParserData}) + * @param data - The data used by the parser (see {@link NormalizerData}) * @param objs - Either `[SYMBOL_FORMALS]` or `[SYMBOL_FORMALS, EQ_FORMALS, expr]` * * @returns The parsed parameter or `undefined` if the given object is not a parameter. */ -export function tryNormalizeParameter(data: ParserData, objs: NamedXmlBasedJson[]): RParameter | undefined { +export function tryNormalizeParameter(data: NormalizerData, objs: readonly NamedXmlBasedJson[]): RParameter | undefined { parseLog.debug('[parameter]') - objs = executeHook(data.hooks.functions.onParameter.before, data, objs) if(objs.length !== 1 && objs.length !== 3) { - log.warn(`Either [SYMBOL_FORMALS] or [SYMBOL_FORMALS, EQ_FORMALS, expr], but got: ${JSON.stringify(objs)}`) - return executeUnknownHook(data.hooks.functions.onParameter.unknown, data, objs) + parseLog.warn(`Either [SYMBOL_FORMALS] or [SYMBOL_FORMALS, EQ_FORMALS, expr], but got: ${JSON.stringify(objs)}`) + return undefined } const symbol = objs[0] if(symbol.name !== RawRType.SymbolFormals) { - log.warn(`expected symbol for parameter, yet received ${JSON.stringify(objs)}`) - return executeUnknownHook(data.hooks.functions.onParameter.unknown, data, objs) + parseLog.warn(`expected symbol for parameter, yet received ${JSON.stringify(objs)}`) + return undefined } const defaultValue: RNode | RDelimiter | undefined = objs.length === 3 ? parseWithDefaultValue(data, objs) : undefined const { location, content } = retrieveMetaStructure(symbol.content) - const result: RParameter = { + return { type: RType.Parameter, location, special: content === '...', @@ -62,12 +59,10 @@ export function tryNormalizeParameter(data: ParserData, objs: NamedXmlBasedJson[ additionalTokens: defaultValue?.type === RType.Delimiter ? [defaultValue] : [] } } - - return executeHook(data.hooks.functions.onParameter.after, data, result) } -function parseWithDefaultValue(data: ParserData, objs: NamedXmlBasedJson[]): RNode | RDelimiter { +function parseWithDefaultValue(data: NormalizerData, objs: readonly NamedXmlBasedJson[]): RNode | RDelimiter { guard(objs[1].name === RawRType.EqualFormals, () => `[arg-default] second element of parameter must be ${RawRType.EqualFormals}, but: ${JSON.stringify(objs)}`) guard(objs[2].name === RawRType.Expression, () => `[arg-default] third element of parameter must be an Expression but: ${JSON.stringify(objs)}`) - return tryNormalizeSingleNode(data, objs[2]) + return normalizeSingleNode(data, objs[2]) } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/index.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/index.ts index 86be1b333a..257f7c5eba 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/index.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/index.ts @@ -6,4 +6,3 @@ export * from './functions' export * from './operators' export * from './structure' export * from './expression' -export * from './meta' diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/break.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/break.ts deleted file mode 100644 index cc16ea2c7d..0000000000 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/break.ts +++ /dev/null @@ -1,26 +0,0 @@ -import type { ParserData } from '../../data' -import type { XmlBasedJson } from '../../input-format' -import { executeHook } from '../../hooks' -import { retrieveMetaStructure } from '../meta' -import type { RBreak } from '../../../../model' -import { RType } from '../../../../model' -import { parseLog } from '../../../json/parser' - -export function normalizeBreak(data: ParserData, obj: XmlBasedJson): RBreak { - parseLog.debug(`[break] try: ${JSON.stringify(obj)}`) - obj = executeHook(data.hooks.loops.onBreak.before, data, obj) - - const { location, content } = retrieveMetaStructure(obj) - - const result: RBreak = { - type: RType.Break, - location, - lexeme: content, - info: { - fullRange: location, - additionalTokens: [], - fullLexeme: content - } - } - return executeHook(data.hooks.loops.onBreak.after, data, result) -} diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/index.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/index.ts index f05a92078f..9acc3a560d 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/index.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/index.ts @@ -1,5 +1,5 @@ -export * from './for' -export * from './while' -export * from './repeat' -export * from './break' -export * from './next' +export * from './normalize-for' +export * from './normalize-while' +export * from './normalize-repeat' +export * from './normalize-break' +export * from './normalize-next' diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/normalize-break.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/normalize-break.ts new file mode 100644 index 0000000000..859a2467b5 --- /dev/null +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/normalize-break.ts @@ -0,0 +1,25 @@ +import type { NormalizerData } from '../../normalizer-data' +import type { XmlBasedJson } from '../../input-format' +import type { RBreak } from '../../../../model' +import { RType } from '../../../../model' +import { parseLog } from '../../../json/parser' +import { expensiveTrace } from '../../../../../../../util/log' +import { retrieveMetaStructure } from '../../normalize-meta' + + +export function normalizeBreak(data: NormalizerData, obj: XmlBasedJson): RBreak { + expensiveTrace(parseLog, () => `[break] ${JSON.stringify(obj)}`) + + const { location, content } = retrieveMetaStructure(obj) + + return { + type: RType.Break, + location, + lexeme: content, + info: { + fullRange: location, + additionalTokens: [], + fullLexeme: content + } + } +} diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/for.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/normalize-for.ts similarity index 69% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/for.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/normalize-for.ts index 793d02bfe0..37b60d18d3 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/for.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/normalize-for.ts @@ -1,27 +1,24 @@ +import type { NormalizerData } from '../../normalizer-data' import type { NamedXmlBasedJson, XmlBasedJson } from '../../input-format' -import { childrenKey , getKeysGuarded, XmlParseError } from '../../input-format' - -import { ensureExpressionList, getTokenType, retrieveMetaStructure } from '../meta' +import { XmlParseError, childrenKey, getKeyGuarded } from '../../input-format' +import { RType, RawRType } from '../../../../model' +import type { RComment, RForLoop, RNode, RSymbol } from '../../../../model' +import { parseLog } from '../../../json/parser' +import { normalizeExpressions, splitComments, normalizeSingleNode } from '../structure' +import { ensureExpressionList, getTokenType, retrieveMetaStructure } from '../../normalize-meta' import { guard } from '../../../../../../../util/assert' -import type { ParserData } from '../../data' import { tryNormalizeSymbol } from '../values' -import { normalizeBasedOnType, splitComments, tryNormalizeSingleNode } from '../structure' -import type { RComment, RForLoop, RNode, RSymbol } from '../../../../model' -import { RawRType, RType } from '../../../../model' -import { executeHook, executeUnknownHook } from '../../hooks' import { normalizeComment } from '../other' -import { parseLog } from '../../../json/parser' + export function tryNormalizeFor( - data: ParserData, - forToken: NamedXmlBasedJson, - head: NamedXmlBasedJson, - body: NamedXmlBasedJson + data: NormalizerData, + [forToken, head, body]: [NamedXmlBasedJson, NamedXmlBasedJson, NamedXmlBasedJson] ): RForLoop | undefined { // funny, for does not use top-level parenthesis if(forToken.name !== RawRType.For) { parseLog.debug('encountered non-for token for supposed for-loop structure') - return executeUnknownHook(data.hooks.loops.onForLoop.unknown, data, { forToken, condition: head, body }) + return undefined } else if(head.name !== RawRType.ForCondition) { throw new XmlParseError(`expected condition for for-loop but found ${JSON.stringify(head)}`) } else if(body.name !== RawRType.Expression && body.name !== RawRType.ExprOfAssignOrHelp) { @@ -30,13 +27,11 @@ export function tryNormalizeFor( parseLog.debug('trying to parse for-loop') - const newParseData = { ...data, data, currentRange: undefined, currentLexeme: undefined }; - - ({ forToken, condition: head, body } = executeHook(data.hooks.loops.onForLoop.before, data, { forToken, condition: head, body })) + const newParseData = { ...data, data, currentRange: undefined, currentLexeme: undefined } const { variable: parsedVariable, vector: parsedVector, comments } = normalizeForHead(newParseData, head.content) - const parseBody = tryNormalizeSingleNode(newParseData, body) + const parseBody = normalizeSingleNode(newParseData, body) if( parsedVariable === undefined || @@ -54,7 +49,7 @@ export function tryNormalizeFor( const { location, content } = retrieveMetaStructure(forToken.content) - const result: RForLoop = { + return { type: RType.ForLoop, variable: parsedVariable, vector: parsedVector, @@ -67,12 +62,11 @@ export function tryNormalizeFor( }, location } - return executeHook(data.hooks.loops.onForLoop.after, data, result) } -function normalizeForHead(data: ParserData, forCondition: XmlBasedJson): { variable: RSymbol | undefined, vector: RNode | undefined, comments: RComment[] } { +function normalizeForHead(data: NormalizerData, forCondition: XmlBasedJson): { variable: RSymbol | undefined, vector: RNode | undefined, comments: RComment[] } { // must have a child which is `in`, a variable on the left, and a vector on the right - const children: NamedXmlBasedJson[] = getKeysGuarded(forCondition, childrenKey).map(content => ({ name: getTokenType(content), content })) + const children: NamedXmlBasedJson[] = getKeyGuarded(forCondition, childrenKey).map(content => ({ name: getTokenType(content), content })) const { comments, others } = splitComments(children) const inPosition = others.findIndex(elem => elem.name === RawRType.ForIn) @@ -81,7 +75,7 @@ function normalizeForHead(data: ParserData, forCondition: XmlBasedJson): { varia guard(variable !== undefined, () => `for loop variable should have been parsed to a symbol but was ${JSON.stringify(variable)}`) guard((variable as RNode).type === RType.Symbol, () => `for loop variable should have been parsed to a symbol but was ${JSON.stringify(variable)}`) - const vector = normalizeBasedOnType(data, [others[inPosition + 1]]) + const vector = normalizeExpressions(data, [others[inPosition + 1]]) guard(vector.length === 1 && vector[0].type !== RType.Delimiter, () => `for loop vector should have been parsed to a single element but was ${JSON.stringify(vector)}`) const parsedComments = comments.map(c => normalizeComment(data, c.content)) diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/next.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/normalize-next.ts similarity index 51% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/next.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/normalize-next.ts index 97e1c42794..4c62c666d1 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/next.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/normalize-next.ts @@ -1,18 +1,17 @@ -import type { ParserData } from '../../data' -import type { XmlBasedJson } from '../../input-format' -import { executeHook } from '../../hooks' -import { retrieveMetaStructure } from '../meta' +import type { NormalizerData } from '../../normalizer-data' import type { RNext } from '../../../../model' import { RType } from '../../../../model' +import type { XmlBasedJson } from '../../input-format' +import { expensiveTrace } from '../../../../../../../util/log' import { parseLog } from '../../../json/parser' +import { retrieveMetaStructure } from '../../normalize-meta' -export function normalizeNext(data: ParserData, obj: XmlBasedJson): RNext { - parseLog.debug(`[next] try: ${JSON.stringify(obj)}`) - obj = executeHook(data.hooks.loops.onNext.before, data, obj) +export function normalizeNext(data: NormalizerData, obj: XmlBasedJson): RNext { + expensiveTrace(parseLog, () => `[next] ${JSON.stringify(obj)}`) const { location, content } = retrieveMetaStructure(obj) - const result: RNext = { + return { type: RType.Next, location, lexeme: content, @@ -22,5 +21,4 @@ export function normalizeNext(data: ParserData, obj: XmlBasedJson): RNext { fullLexeme: data.currentLexeme } } - return executeHook(data.hooks.loops.onNext.after, data, result) } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/repeat.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/normalize-repeat.ts similarity index 50% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/repeat.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/normalize-repeat.ts index 4db1b8bf6b..bbc2dcf7ff 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/repeat.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/normalize-repeat.ts @@ -1,39 +1,35 @@ +import type { NormalizerData } from '../../normalizer-data' import type { NamedXmlBasedJson } from '../../input-format' -import { ensureExpressionList, retrieveMetaStructure } from '../meta' -import type { ParserData } from '../../data' -import { tryNormalizeSingleNode } from '../structure' import type { RRepeatLoop } from '../../../../model' import { RawRType, RType } from '../../../../model' -import { guard } from '../../../../../../../util/assert' -import { executeHook, executeUnknownHook } from '../../hooks' import { parseLog } from '../../../json/parser' +import { normalizeSingleNode } from '../structure' +import { guard } from '../../../../../../../util/assert' +import { ensureExpressionList, retrieveMetaStructure } from '../../normalize-meta' /** * Try to parse the construct as a {@link RRepeatLoop}. * - * @param data - The data used by the parser (see {@link ParserData}) + * @param data - The data used by the parser (see {@link NormalizerData}) * @param repeatToken - Token which represents the `repeat` keyword - * @param body - The `body` of the repeat-loop + * @param bodyToken - The `body` of the repeat-loop * * @returns The parsed {@link RRepeatLoop} or `undefined` if the given construct is not a repeat-loop */ -export function tryNormalizeRepeat(data: ParserData, repeatToken: NamedXmlBasedJson, body: NamedXmlBasedJson): RRepeatLoop | undefined { +export function tryNormalizeRepeat(data: NormalizerData, [repeatToken, bodyToken]: [NamedXmlBasedJson, NamedXmlBasedJson]): RRepeatLoop | undefined { if(repeatToken.name !== RawRType.Repeat) { parseLog.debug('encountered non-repeat token for supposed repeat-loop structure') - return executeUnknownHook(data.hooks.loops.onRepeatLoop.unknown, data, { repeatToken, body }) + return undefined } - parseLog.debug('trying to parse repeat-loop'); - ({ repeatToken, body } = executeHook(data.hooks.loops.onRepeatLoop.before, data, { repeatToken, body })) + parseLog.debug('trying to parse repeat-loop') - const parseBody = tryNormalizeSingleNode(data, body) - guard(parseBody.type !== RType.Delimiter, () => `no body for repeat-loop ${JSON.stringify(repeatToken)} (${JSON.stringify(body)})`) + const parseBody = normalizeSingleNode(data, bodyToken) + guard(parseBody.type !== RType.Delimiter, () => `no body for repeat-loop ${JSON.stringify(repeatToken)} (${JSON.stringify(bodyToken)})`) - const { - location, - content - } = retrieveMetaStructure(repeatToken.content) - const result: RRepeatLoop = { + const { location, content } = retrieveMetaStructure(repeatToken.content) + + return { type: RType.RepeatLoop, location, lexeme: content, @@ -44,5 +40,4 @@ export function tryNormalizeRepeat(data: ParserData, repeatToken: NamedXmlBasedJ fullLexeme: data.currentLexeme } } - return executeHook(data.hooks.loops.onRepeatLoop.after, data, result) } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/while.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/normalize-while.ts similarity index 67% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/while.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/normalize-while.ts index a8e832313f..be8ed5b9bb 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/while.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/loops/normalize-while.ts @@ -1,26 +1,21 @@ +import type { NormalizerData } from '../../normalizer-data' import type { NamedXmlBasedJson } from '../../input-format' import { XmlParseError } from '../../input-format' -import { ensureExpressionList, retrieveMetaStructure } from '../meta' -import { tryNormalizeSingleNode } from '../structure' -import type { ParserData } from '../../data' import type { RWhileLoop } from '../../../../model' import { RawRType, RType } from '../../../../model' -import { executeHook, executeUnknownHook } from '../../hooks' import { parseLog } from '../../../json/parser' +import { normalizeSingleNode } from '../structure' +import { ensureExpressionList, retrieveMetaStructure } from '../../normalize-meta' export function tryNormalizeWhile( - data: ParserData, - whileToken: NamedXmlBasedJson, - leftParen: NamedXmlBasedJson, - condition: NamedXmlBasedJson, - rightParen: NamedXmlBasedJson, - body: NamedXmlBasedJson + data: NormalizerData, + [whileToken, leftParen, condition, rightParen, body]: [NamedXmlBasedJson, NamedXmlBasedJson, NamedXmlBasedJson, NamedXmlBasedJson, NamedXmlBasedJson] ): RWhileLoop | undefined { if(whileToken.name !== RawRType.While) { parseLog.debug( 'encountered non-while token for supposed while-loop structure' ) - return executeUnknownHook(data.hooks.loops.onWhileLoop.unknown, data, { whileToken, leftParen, condition, rightParen, body }) + return undefined } else if(leftParen.name !== RawRType.ParenLeft) { throw new XmlParseError( `expected left-parenthesis for while but found ${JSON.stringify( @@ -35,13 +30,11 @@ export function tryNormalizeWhile( ) } - parseLog.debug( - 'trying to parse while-loop' - ) + parseLog.debug('trying to parse while-loop') - const parsedCondition = tryNormalizeSingleNode(data, condition) - const parseBody = tryNormalizeSingleNode(data, body) + const parsedCondition = normalizeSingleNode(data, condition) + const parseBody = normalizeSingleNode(data, body) if(parsedCondition.type === RType.Delimiter || parseBody.type === RType.Delimiter) { throw new XmlParseError( @@ -54,7 +47,7 @@ export function tryNormalizeWhile( const { location, content } = retrieveMetaStructure(whileToken.content) - const result: RWhileLoop = { + return { type: RType.WhileLoop, condition: parsedCondition, body: ensureExpressionList(parseBody), @@ -66,5 +59,4 @@ export function tryNormalizeWhile( fullLexeme: data.currentLexeme } } - return executeHook(data.hooks.loops.onWhileLoop.after, data, result) } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/normalize-access.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/normalize-access.ts new file mode 100644 index 0000000000..922f931d7c --- /dev/null +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/normalize-access.ts @@ -0,0 +1,115 @@ +import type { NamedXmlBasedJson } from '../input-format' +import type { NormalizerData } from '../normalizer-data' +import { normalizeExpressions, normalizeSingleNode } from './structure' +import { tryToNormalizeArgument } from './functions/normalize-argument' +import { parseLog } from '../../json/parser' +import type { RAccess, RArgument, RNode } from '../../../model' +import { EmptyArgument, RawRType, RType } from '../../../model' +import { splitArrayOn } from '../../../../../../util/arrays' +import { guard } from '../../../../../../util/assert' +import { retrieveMetaStructure } from '../normalize-meta' + +function normalizeAbstractArgument(x: readonly NamedXmlBasedJson[], data: NormalizerData, operator: '$' | '@' | '[' | '[['): RArgument | typeof EmptyArgument { + if(x.length === 0) { + return EmptyArgument + } else if(x.length !== 1 || x[0].name === RawRType.Expression) { + const gotAccess = tryToNormalizeArgument(data, x) + guard(gotAccess !== undefined, () => `expected one access result in access as argument, yet received ${JSON.stringify(gotAccess)} for ${JSON.stringify([operator, x])}`) + return gotAccess + } else { + const node = normalizeSingleNode(data, x[0]) as RNode + guard(node.type !== RType.ExpressionList, () => `expected expression list to be parsed as argument, yet received ${JSON.stringify(node)} for ${JSON.stringify(x)}`) + return { + type: RType.Argument, + location: node.location, + lexeme: node.lexeme, + name: undefined, + value: node, + info: { + fullRange: node.location, + fullLexeme: node.lexeme, + additionalTokens: [] + } + } + } +} + +/** + * Tries to normalize the given data as access (e.g., indexing). + * + * @param data - The data used by the parser (see {@link NormalizerData}) + * @param mappedWithName - The json object to extract the meta-information from + * + * @returns The parsed {@link RAccess} or `undefined` if the given construct is not accessing a value + */ +export function tryNormalizeAccess(data: NormalizerData, mappedWithName: NamedXmlBasedJson[]): RAccess | undefined { + parseLog.trace('trying to parse access') + + if(mappedWithName.length < 3) { + parseLog.trace('expected at least three elements are required to parse an access') + return undefined + } + + const accessOp = mappedWithName[1] + + let operator: RAccess['operator'] + let closingLength = 0 + + switch(accessOp.name) { + case RawRType.BracketLeft: + operator = '[' + closingLength = 1 + break + case RawRType.Dollar: + operator = '$' + break + case RawRType.At: + operator = '@' + break + case RawRType.DoubleBracketLeft: + operator = '[[' + closingLength = 2 + break + default: + parseLog.trace(`expected second element to be an access operator, yet received ${accessOp.name}`) + return undefined + } + + const accessed = mappedWithName[0] + if(accessed.name !== RawRType.Expression && accessed.name !== RawRType.ExprOfAssignOrHelp) { + parseLog.trace(`expected accessed element to be wrapped an expression, yet received ${accessed.name}`) + return undefined + } + + const parsedAccessed = normalizeExpressions(data, [accessed]) + if(parsedAccessed.length !== 1) { + parseLog.trace(`expected accessed element to be wrapped an expression, yet received ${accessed.name}`) + return undefined + } + + const remaining = mappedWithName.slice(2, mappedWithName.length - closingLength) + + parseLog.trace(`${remaining.length} remaining arguments for access`) + + const splitAccessOnComma = splitArrayOn(remaining, x => x.name === RawRType.Comma) + + const parsedAccess: (RArgument | typeof EmptyArgument)[] = splitAccessOnComma.map(x => { + return normalizeAbstractArgument(x, data, operator) + }) + + const { content, location } = retrieveMetaStructure(accessOp.content) + + return { + type: RType.Access, + location, + lexeme: content, + accessed: parsedAccessed[0], + operator, + access: parsedAccess, + info: { + fullRange: data.currentRange, + additionalTokens: [], + fullLexeme: data.currentLexeme + } + } as RAccess +} diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/index.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/index.ts index 6b53da363f..7d2dbf283f 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/index.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/index.ts @@ -1,3 +1,2 @@ -export * from './unary' -export * from './binary' -export * from './special' +export * from './normalize-unary' +export * from './normalize-binary' diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/binary.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/normalize-binary.ts similarity index 50% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/binary.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/normalize-binary.ts index 764e5a8c24..742c7f7b16 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/binary.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/normalize-binary.ts @@ -1,66 +1,43 @@ +import type { NormalizerData } from '../../normalizer-data' import type { NamedXmlBasedJson } from '../../input-format' import { XmlParseError } from '../../input-format' -import { ensureChildrenAreLhsAndRhsOrdered, retrieveMetaStructure, retrieveOpName } from '../meta' -import { identifySpecialOp } from './special' -import type { ParserData } from '../../data' -import { tryNormalizeSingleNode } from '../structure' import type { - BinaryOperatorFlavor, - RBinaryOp, RFunctionCall, RNamedFunctionCall, - RNode, - RPipe, - RSymbol } from '../../../../model' + RBinaryOp, RFunctionCall, + RNode, RPipe +} from '../../../../model' import { - ArithmeticOperatorsRAst, - AssignmentsRAst, - ComparisonOperatorsRAst, - LogicalOperatorsRAst, - ModelFormulaOperatorsRAst, RawRType, - RType + RType, + OperatorsInRAst, + RawRType } from '../../../../model' -import { executeHook, executeUnknownHook } from '../../hooks' -import { guard } from '../../../../../../../util/assert' import { parseLog } from '../../../json/parser' +import { ensureChildrenAreLhsAndRhsOrdered, retrieveMetaStructure, retrieveOpName } from '../../normalize-meta' +import { normalizeSingleNode } from '../structure' +import { guard } from '../../../../../../../util/assert' +import { expensiveTrace } from '../../../../../../../util/log' +import { startAndEndsWith } from '../../../../../../../util/strings' + /** * Parsing binary operations includes the pipe, even though the produced PIPE construct is not a binary operation, * to ensure it is handled separately from the others (especially in the combination of a pipe bind) */ export function tryNormalizeBinary( - data: ParserData, - lhs: NamedXmlBasedJson, - operator: NamedXmlBasedJson, - rhs: NamedXmlBasedJson + data: NormalizerData, + [lhs, operator, rhs]: [NamedXmlBasedJson, NamedXmlBasedJson, NamedXmlBasedJson] ): RNode | undefined { - parseLog.trace(`binary op for ${lhs.name} [${operator.name}] ${rhs.name}`) - let flavor: BinaryOperatorFlavor | 'special' | 'pipe' - if(RawRType.Special === operator.name) { - flavor = 'special' - } else if(ArithmeticOperatorsRAst.has(operator.name)) { - flavor = 'arithmetic' - } else if(ComparisonOperatorsRAst.has(operator.name)) { - flavor = 'comparison' - } else if(LogicalOperatorsRAst.has(operator.name)) { - flavor = 'logical' - } else if(ModelFormulaOperatorsRAst.has(operator.name)) { - flavor = 'model formula' - } else if(AssignmentsRAst.has(operator.name)) { - flavor = 'assignment' - } else if(operator.name === RawRType.Pipe) { - flavor = 'pipe' + expensiveTrace(parseLog, () => `binary op for ${lhs.name} [${operator.name}] ${rhs.name}`) + if(operator.name === RawRType.Special || OperatorsInRAst.has(operator.name) || operator.name === RawRType.Pipe) { + return parseBinaryOp(data, lhs, operator, rhs) } else { - return executeUnknownHook(data.hooks.operators.onBinary.unknown, data, { lhs, operator, rhs }) + return undefined } - return parseBinaryOp(data, flavor, lhs, operator, rhs) } -function parseBinaryOp(data: ParserData, flavor: BinaryOperatorFlavor | 'special' | 'pipe', lhs: NamedXmlBasedJson, operator: NamedXmlBasedJson, rhs: NamedXmlBasedJson): RFunctionCall | RBinaryOp | RPipe { - parseLog.debug(`[binary op] trying to parse ${flavor}`); - ({ flavor, lhs, rhs, operator } = executeHook(data.hooks.operators.onBinary.before, data, { flavor, lhs, operator, rhs })) - +function parseBinaryOp(data: NormalizerData, lhs: NamedXmlBasedJson, operator: NamedXmlBasedJson, rhs: NamedXmlBasedJson): RFunctionCall | RBinaryOp | RPipe { ensureChildrenAreLhsAndRhsOrdered(lhs.content, rhs.content) - let parsedLhs = tryNormalizeSingleNode(data, lhs) - let parsedRhs = tryNormalizeSingleNode(data, rhs) + const parsedLhs = normalizeSingleNode(data, lhs) + const parsedRhs = normalizeSingleNode(data, rhs) if(parsedLhs.type === RType.Delimiter || parsedRhs.type === RType.Delimiter) { throw new XmlParseError(`unexpected under-sided binary op, received ${JSON.stringify([parsedLhs, parsedRhs])} for ${JSON.stringify([lhs, operator, rhs])}`) @@ -68,24 +45,13 @@ function parseBinaryOp(data: ParserData, flavor: BinaryOperatorFlavor | 'special const operationName = retrieveOpName(operator) - // special support for strings in assignments - if(flavor === 'assignment') { - [parsedLhs, parsedRhs] = processLhsAndRhsForAssignment(operationName, parsedLhs, parsedRhs) - } - - - const { location, content } = retrieveMetaStructure(operator.content) - if(flavor === 'special') { - flavor = identifySpecialOp(content) - } - - if(flavor === 'special') { + if(startAndEndsWith(operationName, '%')) { guard(parsedLhs.location !== undefined && parsedLhs.lexeme !== undefined && parsedRhs.location !== undefined && parsedRhs.lexeme !== undefined, () => `special op lhs and rhs must have a locations and lexemes, but ${JSON.stringify(parsedLhs)} and ${JSON.stringify(parsedRhs)})`) // parse as infix function call! - const result: RNamedFunctionCall = { + return { type: RType.FunctionCall, flavor: 'named', infixSpecial: true, @@ -119,14 +85,10 @@ function parseBinaryOp(data: ParserData, flavor: BinaryOperatorFlavor | 'special ], info: {} } - return executeHook(data.hooks.operators.onBinary.after, data, result) - } - - let result: RBinaryOp | RPipe - if(flavor === 'pipe') { + } else if(operator.name === RawRType.Pipe) { guard(parsedLhs.location !== undefined, () => `pipe lhs must have a location, but ${JSON.stringify(parsedLhs)})`) guard(parsedLhs.lexeme !== undefined, () => `pipe lhs must have a full lexeme, but ${JSON.stringify(parsedLhs)})`) - result = { + return { type: RType.Pipe, location, lhs: { @@ -146,9 +108,8 @@ function parseBinaryOp(data: ParserData, flavor: BinaryOperatorFlavor | 'special } } } else { - result = { + return { type: RType.BinaryOp, - flavor, location, lhs: parsedLhs, rhs: parsedRhs, @@ -161,24 +122,4 @@ function parseBinaryOp(data: ParserData, flavor: BinaryOperatorFlavor | 'special } } } - return executeHook(data.hooks.operators.onBinary.after, data, result) -} - -function processLhsAndRhsForAssignment(opName: string, parsedLhs: RNode, parsedRhs: RNode): [RNode, RNode] { - const isRhs = opName === '->' || opName === '->>' - const assigned = isRhs ? parsedRhs : parsedLhs - if(assigned.type !== RType.String) { - return [parsedLhs, parsedRhs] - } - - // update the assigned value to be parsed as a symbol - const result: RSymbol = { - type: RType.Symbol, - lexeme: assigned.lexeme, - location: assigned.location, - content: assigned.content.str, - namespace: undefined, - info: assigned.info - } - return isRhs ? [parsedLhs, result] : [result, parsedRhs] } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/normalize-unary.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/normalize-unary.ts new file mode 100644 index 0000000000..b0dc65af93 --- /dev/null +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/normalize-unary.ts @@ -0,0 +1,55 @@ +import type { NormalizerData } from '../../normalizer-data' +import type { NamedXmlBasedJson } from '../../input-format' +import type { + RNode, RUnaryOp } from '../../../../model' +import { + RType, + UnaryOperatorsInRAst +} from '../../../../model' +import { parseLog } from '../../../json/parser' +import { normalizeSingleNode } from '../structure' +import { retrieveMetaStructure, retrieveOpName } from '../../normalize-meta' +import { guard } from '../../../../../../../util/assert' +import { expensiveTrace } from '../../../../../../../util/log' + + +/** + * Parses the construct as a {@link RUnaryOp}. + * + * @param data - The data used by the parser (see {@link NormalizerData}) + * @param operator - The operator token + * @param operand - The operand of the unary operator + * + * @returns The parsed {@link RUnaryOp} or `undefined` if the given construct is not a unary operator + */ +export function tryNormalizeUnary(data: NormalizerData, [operator, operand]: [NamedXmlBasedJson, NamedXmlBasedJson]): RNode | undefined { + expensiveTrace(parseLog, () => `unary op for ${operator.name} ${operand.name}`) + + if(UnaryOperatorsInRAst.has(operator.name)) { + return parseUnaryOp(data, operator, operand) + } else { + return undefined + } +} + +function parseUnaryOp(data: NormalizerData, operator: NamedXmlBasedJson, operand: NamedXmlBasedJson): RUnaryOp { + const parsedOperand = normalizeSingleNode(data, operand) + + guard(parsedOperand.type !== RType.Delimiter, 'unexpected under-sided unary op') + + const operationName = retrieveOpName(operator) + const { location, content } = retrieveMetaStructure(operator.content) + + return { + type: RType.UnaryOp, + location, + operator: operationName, + lexeme: content, + operand: parsedOperand, + info: { + fullRange: data.currentRange, + additionalTokens: [], + fullLexeme: data.currentLexeme + } + } +} diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/special.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/special.ts deleted file mode 100644 index d80a141f8b..0000000000 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/special.ts +++ /dev/null @@ -1,23 +0,0 @@ -import type { - BinaryOperatorFlavor } from '../../../../model' -import { - ArithmeticOperatorsRAst, - ComparisonOperatorsRAst, - LogicalOperatorsRAst -} from '../../../../model' - -/** - * Identify the flavor of a given operator, as we do not really have a use for "special" - * operators within our internal AST. - */ -export function identifySpecialOp(content: string): BinaryOperatorFlavor | 'special' { - if(ComparisonOperatorsRAst.has(content)) { - return 'comparison' - } else if(LogicalOperatorsRAst.has(content)) { - return 'logical' - } else if(ArithmeticOperatorsRAst.has(content)) { - return 'arithmetic' - } else { - return 'special' - } -} diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/unary.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/unary.ts deleted file mode 100644 index c69cdc89fc..0000000000 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/operators/unary.ts +++ /dev/null @@ -1,69 +0,0 @@ -import type { NamedXmlBasedJson } from '../../input-format' -import { retrieveMetaStructure, retrieveOpName } from '../meta' -import { tryNormalizeSingleNode } from '../structure' -import type { ParserData } from '../../data' -import { guard } from '../../../../../../../util/assert' -import type { - RNode, - RUnaryOp, - UnaryOperatorFlavor -} from '../../../../model' -import { - ArithmeticOperatorsRAst, - LogicalOperatorsRAst, - ModelFormulaOperatorsRAst, - RType -} from '../../../../model' -import { executeHook, executeUnknownHook } from '../../hooks' -import { parseLog } from '../../../json/parser' - -/** - * Parses the construct as a {@link RUnaryOp} (automatically identifies the flavor). - * - * @param data - The data used by the parser (see {@link ParserData}) - * @param operator - The operator token - * @param operand - The operand of the unary operator - * - * @returns The parsed {@link RUnaryOp} or `undefined` if the given construct is not a unary operator - */ -export function tryNormalizeUnary(data: ParserData, operator: NamedXmlBasedJson, operand: NamedXmlBasedJson): RNode | undefined { - parseLog.trace(`unary op for ${operator.name} ${operand.name}`) - let flavor: UnaryOperatorFlavor - if(ArithmeticOperatorsRAst.has(operator.name)) { - flavor = 'arithmetic' - } else if(LogicalOperatorsRAst.has(operator.name)) { - flavor = 'logical' - } else if(ModelFormulaOperatorsRAst.has(operator.name)) { - flavor = 'model formula' - } else { - return executeUnknownHook(data.hooks.operators.onUnary.unknown, data, { operator, operand }) - } - return parseUnaryOp(data, flavor, operator, operand) -} - -function parseUnaryOp(data: ParserData, flavor: UnaryOperatorFlavor, operator: NamedXmlBasedJson, operand: NamedXmlBasedJson): RUnaryOp { - parseLog.debug(`[unary op] parse ${flavor}`); // <- semicolon sadly required for not miss-interpreting the destructuring match as call - ({ flavor, operator, operand } = executeHook(data.hooks.operators.onUnary.before, data, { flavor, operator, operand })) - - const parsedOperand = tryNormalizeSingleNode(data, operand) - - guard(parsedOperand.type !== RType.Delimiter, () => 'unexpected under-sided unary op') - - const operationName = retrieveOpName(operator) - const { location, content } = retrieveMetaStructure(operator.content) - - const result: RUnaryOp = { - type: RType.UnaryOp, - flavor, - location, - operator: operationName, - lexeme: content, - operand: parsedOperand, - info: { - fullRange: data.currentRange, - additionalTokens: [], - fullLexeme: data.currentLexeme - } - } - return executeHook(data.hooks.operators.onUnary.after, data, result) -} diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/other/index.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/other/index.ts index ed051c229a..9d4db77520 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/other/index.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/other/index.ts @@ -1 +1 @@ -export * from './comment' +export * from './normalize-comment' diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/other/comment.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/other/normalize-comment.ts similarity index 57% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/other/comment.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/other/normalize-comment.ts index b29619731d..71260ca8d7 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/other/comment.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/other/normalize-comment.ts @@ -1,27 +1,22 @@ +import type { NormalizerData } from '../../normalizer-data' import type { XmlBasedJson } from '../../input-format' import type { RComment } from '../../../../model' import { RType } from '../../../../model' -import { retrieveMetaStructure } from '../meta' +import { retrieveMetaStructure } from '../../normalize-meta' import { guard } from '../../../../../../../util/assert' -import { executeHook } from '../../hooks' -import type { ParserData } from '../../data' -import { parseLog } from '../../../json/parser' /** * Normalize the given object as an R comment. * This requires you to check the corresponding name beforehand. * - * @param data - The data used by the parser (see {@link ParserData}) + * @param data - The data used by the parser (see {@link NormalizerData}) * @param obj - The json object to extract the meta-information from */ -export function normalizeComment(data: ParserData, obj: XmlBasedJson): RComment { - parseLog.debug('[comment]') - obj = executeHook(data.hooks.other.onComment.before, data, obj) - +export function normalizeComment(data: NormalizerData, obj: XmlBasedJson): RComment { const { location, content } = retrieveMetaStructure(obj) guard(content.startsWith('#'), 'comment must start with #') - const result: RComment = { + return { type: RType.Comment, location, content: content.slice(1), @@ -32,5 +27,4 @@ export function normalizeComment(data: ParserData, obj: XmlBasedJson): RComment fullLexeme: content } } - return executeHook(data.hooks.other.onComment.after, data, result) } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/other/line-directive.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/other/normalize-line-directive.ts similarity index 72% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/other/line-directive.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/other/normalize-line-directive.ts index 3afaac7b0e..d00fa7a5ac 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/other/line-directive.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/other/normalize-line-directive.ts @@ -1,11 +1,11 @@ +import type { NormalizerData } from '../../normalizer-data' import type { XmlBasedJson } from '../../input-format' import type { RComment, RLineDirective } from '../../../../model' import { RType } from '../../../../model' -import { retrieveMetaStructure } from '../meta' -import { guard } from '../../../../../../../util/assert' -import { executeHook } from '../../hooks' -import type { ParserData } from '../../data' import { parseLog } from '../../../json/parser' +import { retrieveMetaStructure } from '../../normalize-meta' +import { guard } from '../../../../../../../util/assert' + const LineDirectiveRegex = /^#line\s+(\d+)\s+"([^"]+)"\s*$/ @@ -14,20 +14,16 @@ const LineDirectiveRegex = /^#line\s+(\d+)\s+"([^"]+)"\s*$/ * This requires you to check the corresponding name beforehand. * If the given object turns out to be no line directive, this returns a normal comment instead. * - * @param data - The data used by the parser (see {@link ParserData}) + * @param data - The data used by the parser (see {@link NormalizerData}) * @param obj - The json object to extract the meta-information from */ -export function normalizeLineDirective(data: ParserData, obj: XmlBasedJson): RLineDirective | RComment { - parseLog.debug('[line-directive]') - obj = executeHook(data.hooks.other.onLineDirective.before, data, obj) - +export function normalizeLineDirective(data: NormalizerData, obj: XmlBasedJson): RLineDirective | RComment { const { location, content } = retrieveMetaStructure(obj) guard(content.startsWith('#line'), 'line directive must start with #line') const match = LineDirectiveRegex.exec(content) - let result: RLineDirective | RComment if(match === null) { parseLog.debug(`[line-directive] does not match the regex ${LineDirectiveRegex.source} given ${JSON.stringify(content)}`) - result = { + return { type: RType.Comment, location, lexeme: content, @@ -39,7 +35,7 @@ export function normalizeLineDirective(data: ParserData, obj: XmlBasedJson): RLi content: content.slice(1) } } else { - result = { + return { type: RType.LineDirective, location, line: parseInt(match[1]), @@ -52,5 +48,4 @@ export function normalizeLineDirective(data: ParserData, obj: XmlBasedJson): RLi } } } - return executeHook(data.hooks.other.onLineDirective.after, data, result) } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/elements.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/elements.ts deleted file mode 100644 index fce4f2e05c..0000000000 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/elements.ts +++ /dev/null @@ -1,196 +0,0 @@ -import type { NamedXmlBasedJson, XmlBasedJson } from '../../input-format' -import { splitArrayOn } from '../../../../../../../util/arrays' -import { getWithTokenType, retrieveMetaStructure } from '../meta' -import type { ParserData } from '../../data' -import { tryNormalizeSingleNode } from './single-element' -import { tryNormalizeSymbol } from '../values' -import { tryNormalizeUnary, tryNormalizeBinary } from '../operators' -import { - tryNormalizeRepeat, - tryNormalizeFor, - tryNormalizeWhile -} from '../loops' -import { tryNormalizeIfThenElse, tryNormalizeIfThen } from '../control' -import type { RNode } from '../../../../model' -import { RType, RawRType } from '../../../../model' -import { log } from '../../../../../../../util/log' -import { normalizeComment } from '../other' -import type { RDelimiter } from '../../../../model/nodes/info' -import { parseLog } from '../../../json/parser' - -function normalizeMappedWithoutSemicolonBasedOnType(mappedWithName: NamedXmlBasedJson[], data: ParserData): (RNode | RDelimiter)[] { - if(mappedWithName.length === 1) { - return [tryNormalizeSingleNode(data, mappedWithName[0])] - } else if(mappedWithName.length === 2) { - const unaryOp = tryNormalizeUnary( - data, - mappedWithName[0], - mappedWithName[1] - ) - if(unaryOp !== undefined) { - return [unaryOp] - } - const repeatLoop = tryNormalizeRepeat( - data, - mappedWithName[0], - mappedWithName[1] - ) - if(repeatLoop !== undefined) { - return [repeatLoop] - } - } else if(mappedWithName.length === 3) { - const binary = tryNormalizeBinary( - data, - mappedWithName[0], - mappedWithName[1], - mappedWithName[2] - ) - if(binary !== undefined) { - return [binary] - } else { - const forLoop = tryNormalizeFor( - data, - mappedWithName[0], - mappedWithName[1], - mappedWithName[2] - ) - if(forLoop !== undefined) { - return [forLoop] - } else { - // could be a symbol with namespace information - const symbol = tryNormalizeSymbol(data, mappedWithName) - if(symbol !== undefined) { - return [symbol] - } - } - } - } else if(mappedWithName.length === 5) { - const ifThen = tryNormalizeIfThen(data, [ - mappedWithName[0], - mappedWithName[1], - mappedWithName[2], - mappedWithName[3], - mappedWithName[4] - ]) - if(ifThen !== undefined) { - return [ifThen] - } else { - const whileLoop = tryNormalizeWhile( - data, - mappedWithName[0], - mappedWithName[1], - mappedWithName[2], - mappedWithName[3], - mappedWithName[4] - ) - if(whileLoop !== undefined) { - return [whileLoop] - } - } - } else if(mappedWithName.length === 7) { - const ifThenElse = tryNormalizeIfThenElse(data, [ - mappedWithName[0], - mappedWithName[1], - mappedWithName[2], - mappedWithName[3], - mappedWithName[4], - mappedWithName[5], - mappedWithName[6] - ]) - if(ifThenElse !== undefined) { - return [ifThenElse] - } - } - - // otherwise perform default parsing - return parseNodesWithUnknownType(data, mappedWithName) -} - -export function splitComments(mappedWithName: NamedXmlBasedJson[]) { - const comments = [] - const others = [] - for(const elem of mappedWithName) { - if(elem.name === RawRType.Comment) { - comments.push(elem) - } else { - others.push(elem) - } - } - return { comments, others } -} - -export function normalizeBasedOnType( - data: ParserData, - obj: XmlBasedJson[] | NamedXmlBasedJson[] -): (RNode | RDelimiter)[] { - if(obj.length === 0) { - parseLog.warn('no children received, skipping') - return [] - } - - let mappedWithName: NamedXmlBasedJson[] - - if(obj[0].name) { - mappedWithName = obj as NamedXmlBasedJson[] - } else { - mappedWithName = getWithTokenType(obj as XmlBasedJson[]) - } - - log.trace(`[parseBasedOnType] names: [${mappedWithName.map(({ name }) => name).join(', ')}]`) - - const semiColons: RDelimiter[] = [] - const splitOnSemicolon = splitArrayOn( - mappedWithName, - node => { - const res = node.name === RawRType.Semicolon - if(res) { - const { location, content } = retrieveMetaStructure(node.content) - semiColons.push({ - type: RType.Delimiter, - subtype: RawRType.Semicolon, - location: location, - lexeme: content - }) - } - return res - } - ) - - if(splitOnSemicolon.length > 1) { - log.trace(`found ${splitOnSemicolon.length} expressions by semicolon-split, parsing them separately`) - const flattened = [] - for(const sub of splitOnSemicolon) { - const result = normalizeBasedOnType(data, sub) - if(result.length === 1 && result[0].type === RType.ExpressionList) { - flattened.push(...result[0].children) - } else { - flattened.push(...result) - } - } - return [...flattened, ...semiColons] - } - - /* - * if splitOnSemicolon.length === 1, we can continue with the normal parsing, but we may have had a trailing semicolon, with this, it is removed as well. - * splitOnSemicolon.length === 0 is not possible, as we would have had an empty array before, split does not add elements. - */ - mappedWithName = splitOnSemicolon[0] - const { comments, others } = splitComments(mappedWithName) - - const parsedComments = comments.map(c => normalizeComment(data, c.content)) - - const result = normalizeMappedWithoutSemicolonBasedOnType(others, data) - - // we hoist comments - return [...parsedComments, ...result] -} - -export function parseNodesWithUnknownType(data: ParserData, mappedWithName: NamedXmlBasedJson[]): (RNode | RDelimiter)[] { - const parsedNodes: (RNode | RDelimiter)[] = [] - // used to indicate the new root node of this set of nodes - for(const elem of mappedWithName) { - const retrieved = tryNormalizeSingleNode(data, elem) - parsedNodes.push(retrieved) - } - return parsedNodes -} diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/index.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/index.ts index b33d20458f..a5795abc2d 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/index.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/index.ts @@ -1,3 +1,4 @@ -export * from './root' -export * from './elements' -export * from './single-element' +export * from './normalize-root' +export * from './normalize-expressions' +export * from './normalize-single-node' +export { normalizeDelimiter } from './normalize-delimiter' diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/normalize-delimiter.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/normalize-delimiter.ts new file mode 100644 index 0000000000..8ee7f73be5 --- /dev/null +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/normalize-delimiter.ts @@ -0,0 +1,14 @@ +import type { NamedXmlBasedJson } from '../../input-format' +import type { RDelimiter } from '../../../../model/nodes/info' +import { retrieveMetaStructure } from '../../normalize-meta' +import { RType } from '../../../../model' + +export function normalizeDelimiter(elem: NamedXmlBasedJson): RDelimiter { + const { location, content } = retrieveMetaStructure(elem.content) + return { + type: RType.Delimiter, + location, + lexeme: content, + subtype: elem.name + } +} diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/normalize-expressions.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/normalize-expressions.ts new file mode 100644 index 0000000000..477968dd3f --- /dev/null +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/normalize-expressions.ts @@ -0,0 +1,198 @@ +import type { NamedXmlBasedJson, XmlBasedJson } from '../../input-format' +import type { NormalizerData } from '../../normalizer-data' +import type { RComment, RExpressionList, RNode } from '../../../../model' +import { RawRType, RType } from '../../../../model' +import type { RDelimiter } from '../../../../model/nodes/info' +import { normalizeSingleNode } from './normalize-single-node' +import { tryNormalizeBinary, tryNormalizeUnary } from '../operators' +import { tryNormalizeFor, tryNormalizeRepeat, tryNormalizeWhile } from '../loops' +import { tryNormalizeSymbol } from '../values' +import { tryNormalizeIfThen, tryNormalizeIfThenElse } from '../control' +import { parseLog } from '../../../json/parser' +import { getWithTokenType } from '../../normalize-meta' +import { expensiveTrace, log } from '../../../../../../../util/log' +import { normalizeComment } from '../other' +import { guard } from '../../../../../../../util/assert' +import { jsonReplacer } from '../../../../../../../util/json' + +function normalizeMappedWithoutSemicolonBasedOnType(mappedWithName: readonly NamedXmlBasedJson[], data: NormalizerData): (RNode | RDelimiter)[] { + let result: RNode | RDelimiter | undefined = undefined + switch(mappedWithName.length) { + case 1: + result = normalizeSingleNode(data, mappedWithName[0]) + break + case 2: + result = tryNormalizeUnary(data, mappedWithName as [NamedXmlBasedJson, NamedXmlBasedJson]) + ?? tryNormalizeRepeat(data, mappedWithName as [NamedXmlBasedJson, NamedXmlBasedJson]) + break + case 3: + result = tryNormalizeBinary(data, mappedWithName as [NamedXmlBasedJson, NamedXmlBasedJson, NamedXmlBasedJson]) + ?? tryNormalizeFor(data, mappedWithName as [NamedXmlBasedJson, NamedXmlBasedJson, NamedXmlBasedJson]) + ?? tryNormalizeSymbol(data, mappedWithName) + break + case 5: + result = tryNormalizeIfThen(data, mappedWithName as [NamedXmlBasedJson, NamedXmlBasedJson, NamedXmlBasedJson, NamedXmlBasedJson, NamedXmlBasedJson]) + ?? tryNormalizeWhile(data, mappedWithName as [NamedXmlBasedJson, NamedXmlBasedJson, NamedXmlBasedJson, NamedXmlBasedJson, NamedXmlBasedJson]) + break + case 7: + result = tryNormalizeIfThenElse(data, mappedWithName as [NamedXmlBasedJson, NamedXmlBasedJson, NamedXmlBasedJson, NamedXmlBasedJson, NamedXmlBasedJson, NamedXmlBasedJson, NamedXmlBasedJson]) + break + } + + // otherwise perform default parsing + return result !== undefined ? [result] : parseNodesWithUnknownType(data, mappedWithName) +} + +interface HandledExpressionList { + segments: readonly NamedXmlBasedJson[][] + comments: readonly NamedXmlBasedJson[] + braces: undefined | [start: NamedXmlBasedJson, end: NamedXmlBasedJson] +} + +export function splitComments(tokens: readonly NamedXmlBasedJson[]) { + const comments = [] + const others = [] + for(const elem of tokens) { + if(elem.name === RawRType.Comment) { + comments.push(elem) + } else { + others.push(elem) + } + } + return { comments, others } +} + + +function splitExprs(tokens: readonly NamedXmlBasedJson[]) { + let last = 0, i = 0 + let lastExpr = false + const segments: NamedXmlBasedJson[][] = [] + for(const token of tokens) { + if(token.name === RawRType.Semicolon) { + segments.push(tokens.slice(last, i)) + lastExpr = false + last = i + 1 + } else { + const thisExpr = token.name === RawRType.Expression || token.name === RawRType.ExprOfAssignOrHelp + if(thisExpr && lastExpr) { + if(i > last) { + segments.push(tokens.slice(last, i)) + } + segments.push([tokens[i]]) + last = i + 1 + } + lastExpr = thisExpr + } + i++ + } + if(last < tokens.length) { + segments.push(tokens.slice(last, tokens.length)) + } + return segments +} + +/** + * Handles semicolons within _and_ braces at the start and end of the expression + * @param raw - The tokens to split + */ +function handleExpressionList(raw: readonly NamedXmlBasedJson[]): HandledExpressionList { + if(raw.length === 0) { + return { segments: [], comments: [], braces: undefined } + } + const { comments, others: tokens } = splitComments(raw) + const first = tokens[0].name + if(first === RawRType.BraceLeft) { + const endType = tokens[tokens.length - 1].name + guard(endType === RawRType.BraceRight, () => `expected a brace at the end of the expression list as well, but ${endType} :: ${JSON.stringify(tokens[tokens.length - 1], jsonReplacer)}`) + return { + segments: [tokens.slice(1, tokens.length - 1)], + comments, + braces: [tokens[0], tokens[tokens.length - 1]] + } + } else if(first === RawRType.ParenLeft) { + const endType = tokens[tokens.length - 1].name + guard(endType === RawRType.ParenRight, () => `expected a parenthesis at the end of the expression list as well, but ${endType} :: ${JSON.stringify(tokens[tokens.length - 1], jsonReplacer)}`) + return { + segments: [tokens.slice(1, tokens.length - 1)], + comments, + braces: [tokens[0], tokens[tokens.length - 1]] + } + } else { + return { segments: splitExprs(tokens), comments, braces: undefined } + } +} + + +function processBraces([start, end]: [start: NamedXmlBasedJson, end: NamedXmlBasedJson], processed: readonly RNode[], comments: RComment[], data: NormalizerData) : RExpressionList { + const [newStart, newEnd] = [tryNormalizeSymbol(data, [start]), tryNormalizeSymbol(data, [end])] + guard(newStart !== undefined && newEnd !== undefined, () => `expected both start and end to be symbols, but ${JSON.stringify(start, jsonReplacer)} :: ${JSON.stringify(end, jsonReplacer)}`) + return { + type: RType.ExpressionList, + children: processed, + grouping: [newStart, newEnd], + lexeme: undefined, + location: undefined, + info: { + additionalTokens: comments, + } + } + +} + +export function normalizeExpressions( + data: NormalizerData, + tokens: readonly XmlBasedJson[] | readonly NamedXmlBasedJson[] +): (RNode | RDelimiter)[] { + if(tokens.length === 0) { + parseLog.warn('no children received, skipping') + return [] + } + + let mappedWithName = tokens[0].name ? tokens as readonly NamedXmlBasedJson[] : getWithTokenType(tokens as XmlBasedJson[]) + + expensiveTrace(log, () => `[parseBasedOnType] names: [${mappedWithName.map(({ name }) => name).join(', ')}]`) + + let parsedComments: RComment[] = [] + + if(mappedWithName.length > 1) { + // iterate over types, find all semicolons, and segment the tokens based on them. + // we could potentially optimize as not all expr may have semicolons but not for now + const { segments, braces, comments } = handleExpressionList(mappedWithName) + parsedComments = comments.map(c => normalizeComment(data, c.content)) + + if(segments.length > 1 || braces) { + const processed = segments.flatMap(s => normalizeExpressions(data, s)) as RNode[] + guard(!processed.some(x => (x as RNode | RDelimiter).type === RType.Delimiter), () => `expected no delimiter tokens in ${JSON.stringify(processed)}`) + if(braces) { + return [processBraces(braces, processed, parsedComments, data)] + } else if(processed.length > 0) { + if(parsedComments) { + processed[0].info.additionalTokens ??= [] + processed[0].info.additionalTokens.push(...parsedComments) + } + return processed + } else { + return parsedComments + } + } + + /* + * if splitOnSemicolon.length === 1, we can continue with the normal parsing, but we may have had a trailing semicolon, with this, it is removed as well. + * splitOnSemicolon.length === 0 is not possible, as we would have had an empty array before, split does not add elements. + */ + mappedWithName = segments[0] + } + + + return [...parsedComments, ...normalizeMappedWithoutSemicolonBasedOnType(mappedWithName, data)] +} + +export function parseNodesWithUnknownType(data: NormalizerData, mappedWithName: readonly NamedXmlBasedJson[]): (RNode | RDelimiter)[] { + const parsedNodes: (RNode | RDelimiter)[] = [] + // used to indicate the new root node of this set of nodes + for(const elem of mappedWithName) { + const retrieved = normalizeSingleNode(data, elem) + parsedNodes.push(retrieved) + } + return parsedNodes +} diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/root.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/normalize-root.ts similarity index 59% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/root.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/normalize-root.ts index 8f45f60eaf..977aec0561 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/root.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/normalize-root.ts @@ -1,27 +1,28 @@ +import type { NormalizerData } from '../../normalizer-data' import type { XmlBasedJson } from '../../input-format' -import { childrenKey, getKeysGuarded } from '../../input-format' -import { assureTokenType } from '../meta' -import { normalizeBasedOnType } from './elements' -import type { ParserData } from '../../data' +import { childrenKey, getKeyGuarded } from '../../input-format' import type { RExpressionList, RNode } from '../../../../model' -import { RType, RawRType } from '../../../../model' +import { RawRType, RType } from '../../../../model' +import { assureTokenType } from '../../normalize-meta' +import type { RDelimiter } from '../../../../model/nodes/info' +import { normalizeExpressions } from './normalize-expressions' import { log } from '../../../../../../../util/log' import { partition } from '../../../../../../../util/arrays' -import type { RDelimiter } from '../../../../model/nodes/info' -export function parseRootObjToAst( - data: ParserData, + +export function normalizeRootObjToAst( + data: NormalizerData, obj: XmlBasedJson ): RExpressionList { - const exprContent = getKeysGuarded(obj, RawRType.ExpressionList) + const exprContent = getKeyGuarded(obj, RawRType.ExpressionList) assureTokenType(exprContent, RawRType.ExpressionList) let parsedChildren: (RNode | RDelimiter)[] = [] if(childrenKey in exprContent) { - const children = getKeysGuarded(exprContent, childrenKey) + const children = getKeyGuarded(exprContent, childrenKey) - parsedChildren = normalizeBasedOnType(data, children) + parsedChildren = normalizeExpressions(data, children) } else { log.debug('no children found, assume empty input') } @@ -31,6 +32,7 @@ export function parseRootObjToAst( return { type: RType.ExpressionList, children: nodes as RNode[], + grouping: undefined, lexeme: undefined, info: { fullRange: data.currentRange, diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/single-element.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/normalize-single-node.ts similarity index 69% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/single-element.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/normalize-single-node.ts index 90bc4c73cd..c61d06918c 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/single-element.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/structure/normalize-single-node.ts @@ -1,45 +1,33 @@ +import { getWithTokenType } from '../../normalize-meta' +import type { NormalizerData } from '../../normalizer-data' import type { NamedXmlBasedJson } from '../../input-format' import { XmlParseError } from '../../input-format' -import { normalizeNumber, normalizeString, tryNormalizeSymbol } from '../values' -import { guard } from '../../../../../../../util/assert' -import type { ParserData } from '../../data' -import { normalizeExpression } from '../expression' -import { getWithTokenType, retrieveMetaStructure } from '../meta' +import type { RDelimiter } from '../../../../model/nodes/info' import type { RNode } from '../../../../model' -import { RawRType, RType } from '../../../../model' +import { RawRType } from '../../../../model' import { normalizeComment } from '../other' +import { normalizeLineDirective } from '../other/normalize-line-directive' +import { normalizeExpression } from '../expression' +import { normalizeNumber, normalizeString, tryNormalizeSymbol } from '../values' import { normalizeBreak, normalizeNext } from '../loops' -import { normalizeLineDirective } from '../other/line-directive' -import type { RDelimiter } from '../../../../model/nodes/info' - -function normalizeDelimiter(data: ParserData, elem: NamedXmlBasedJson): RDelimiter { - const { - location, - content - } = retrieveMetaStructure(elem.content) - return { - type: RType.Delimiter, - location, - lexeme: content, - subtype: elem.name - } -} +import { guard } from '../../../../../../../util/assert' +import { normalizeDelimiter } from './normalize-delimiter' /** * Parses a single structure in the ast based on its type (e.g., a string, a number, a symbol, ...) * - * @param data - The data used by the parser (see {@link ParserData}) + * @param data - The data used by the parser (see {@link NormalizerData}) * @param elem - The element to parse * * @returns The parsed element as an `RNode` or an `RDelimiter` if it is such. */ -export function tryNormalizeSingleNode(data: ParserData, elem: NamedXmlBasedJson): RNode | RDelimiter { +export function normalizeSingleNode(data: NormalizerData, elem: NamedXmlBasedJson): RNode | RDelimiter { switch(elem.name) { case RawRType.ParenLeft: case RawRType.ParenRight: case RawRType.BraceLeft: case RawRType.BraceRight: - return normalizeDelimiter(data, elem) + return normalizeDelimiter(elem) case RawRType.Comment: return normalizeComment(data, elem.content) case RawRType.LineDirective: @@ -59,7 +47,7 @@ export function tryNormalizeSingleNode(data: ParserData, elem: NamedXmlBasedJson case RawRType.Symbol: case RawRType.Slot: case RawRType.NullConst: { - const symbol = tryNormalizeSymbol(data, getWithTokenType([elem.content])) + const symbol = tryNormalizeSymbol(data, getWithTokenType([elem.content])) guard(symbol !== undefined, () => `should have been parsed to a symbol but was ${JSON.stringify(symbol)}`) return symbol } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/values/index.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/values/index.ts index 2c0e58070b..8317815162 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/values/index.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/values/index.ts @@ -1,3 +1,3 @@ -export * from './number' -export * from './string' -export * from './symbol' +export * from './normalize-number' +export * from './normalize-string' +export * from './normalize-symbol' diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/values/number.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/values/normalize-number.ts similarity index 53% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/values/number.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/values/normalize-number.ts index 14d91efa92..516afbc404 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/values/number.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/values/normalize-number.ts @@ -1,25 +1,21 @@ +import type { NormalizerData } from '../../normalizer-data' import type { XmlBasedJson } from '../../input-format' -import type { RNa } from '../../../../../values' -import { boolean2ts, isBoolean, isNA, number2ts } from '../../../../../values' -import { retrieveMetaStructure } from '../meta' -import type { RLogical, RSymbol, NoInfo, RNumber } from '../../../../model' +import type { NoInfo, RLogical, RNumber, RSymbol } from '../../../../model' import { RType } from '../../../../model' -import type { ParserData } from '../../data' -import { executeHook } from '../../hooks' -import { parseLog } from '../../../json/parser' +import type { RNa } from '../../../../../convert-values' +import { number2ts, boolean2ts, isBoolean, isNA } from '../../../../../convert-values' +import { retrieveMetaStructure } from '../../normalize-meta' + /** * Normalize the given object as a R number (see {@link number2ts}), supporting booleans (see {@link boolean2ts}), * and special values. * This requires you to check the corresponding name beforehand. * - * @param data - The data used by the parser (see {@link ParserData}) + * @param data - The data used by the parser (see {@link NormalizerData}) * @param obj - The json object to extract the meta-information from */ -export function normalizeNumber(data: ParserData, obj: XmlBasedJson): RNumber | RLogical | RSymbol { - parseLog.debug('[number]') - obj = executeHook(data.hooks.values.onNumber.before, data, obj) - +export function normalizeNumber(data: NormalizerData, obj: XmlBasedJson): RNumber | RLogical | RSymbol { const { location, content } = retrieveMetaStructure(obj) const common = { location, @@ -31,27 +27,25 @@ export function normalizeNumber(data: ParserData, obj: XmlBasedJson): RNumber | } } - let result: RNumber | RLogical | RSymbol /* the special symbol */ if(isNA(content)) { - result = { + return { ...common, namespace: undefined, type: RType.Symbol, content } } else if(isBoolean(content)) { - result = { + return { ...common, type: RType.Logical, content: boolean2ts(content) } } else { - result = { + return { ...common, type: RType.Number, content: number2ts(content) } } - return executeHook(data.hooks.values.onNumber.after, data, result) } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/values/string.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/values/normalize-string.ts similarity index 68% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/values/string.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/values/normalize-string.ts index d0999cbf15..292e3db075 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/values/string.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/values/normalize-string.ts @@ -1,24 +1,19 @@ +import type { NormalizerData } from '../../normalizer-data' import type { XmlBasedJson } from '../../input-format' -import { retrieveMetaStructure } from '../meta' -import { string2ts } from '../../../../../values' import type { RString } from '../../../../model' import { RType } from '../../../../model' -import { executeHook } from '../../hooks' -import type { ParserData } from '../../data' +import { retrieveMetaStructure } from '../../normalize-meta' import { guard } from '../../../../../../../util/assert' -import { parseLog } from '../../../json/parser' +import { string2ts } from '../../../../../convert-values' /** * Normalize the given object as a R string (see {@link string2ts}). * This requires you to check the corresponding name beforehand. * - * @param data - The data used by the parser (see {@link ParserData}) + * @param data - The data used by the parser (see {@link NormalizerData}) * @param obj - The json object to extract the meta-information from */ -export function normalizeString(data: ParserData, obj: XmlBasedJson): RString { - parseLog.debug('[string]') - obj = executeHook(data.hooks.values.onString.before, data, obj) - +export function normalizeString(data: NormalizerData, obj: XmlBasedJson): RString { const { location, content } = retrieveMetaStructure(obj) // based on https://www.rdocumentation.org/packages/utils/versions/3.6.2/topics/getParseData we do not get strings with 1000 characters or more within the text field. @@ -29,7 +24,7 @@ export function normalizeString(data: ParserData, obj: XmlBasedJson): RString { stringContent = data.currentLexeme } - const result: RString = { + return { type: RType.String, location, content: string2ts(stringContent), @@ -40,5 +35,4 @@ export function normalizeString(data: ParserData, obj: XmlBasedJson): RString { fullLexeme: data.currentLexeme } } - return executeHook(data.hooks.values.onString.after, data, result) } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/values/symbol.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/values/normalize-symbol.ts similarity index 71% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/values/symbol.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/internal/values/normalize-symbol.ts index 1dd6bbefe5..5f3c813e32 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/values/symbol.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/values/normalize-symbol.ts @@ -1,27 +1,25 @@ +import type { NormalizerData } from '../../normalizer-data' import type { NamedXmlBasedJson } from '../../input-format' -import { guard } from '../../../../../../../util/assert' -import { retrieveMetaStructure } from '../meta' import type { RSymbol } from '../../../../model' -import { isSymbol, RType } from '../../../../model' -import type { ParserData } from '../../data' -import { executeHook, executeUnknownHook } from '../../hooks' -import { startAndEndsWith } from '../../../../../../../util/strings' +import { RType, isSymbol } from '../../../../model' +import { guard } from '../../../../../../../util/assert' import { parseLog } from '../../../json/parser' +import { retrieveMetaStructure } from '../../normalize-meta' +import { startAndEndsWith } from '../../../../../../../util/strings' /** * Normalize the given object as an R symbol (incorporating namespace information). *

* The special symbols `T` and `F` are parsed as logic values. * - * @param data - The data used by the parser (see {@link ParserData}) + * @param data - The data used by the parser (see {@link NormalizerData}) * @param objs - The json object to extract the meta-information from * * @returns The parsed symbol (with populated namespace information) or `undefined` if the given object is not a symbol. */ -export function tryNormalizeSymbol(data: ParserData, objs: NamedXmlBasedJson[]): RSymbol | undefined { +export function tryNormalizeSymbol(data: NormalizerData, objs: readonly NamedXmlBasedJson[]): RSymbol | undefined { guard(objs.length > 0, 'to parse symbols we need at least one object to work on!') parseLog.debug('trying to parse symbol') - objs = executeHook(data.hooks.values.onSymbol.before, data, objs) let location, content, namespace @@ -36,10 +34,10 @@ export function tryNormalizeSymbol(data: ParserData, objs: NamedXmlBasedJson[]): content = meta.content namespace = retrieveMetaStructure(objs[0].content).content } else { - return executeUnknownHook(data.hooks.values.onSymbol.unknown, data, objs) + return undefined } - const result: RSymbol = { + return { type: RType.Symbol, namespace, location, @@ -52,6 +50,4 @@ export function tryNormalizeSymbol(data: ParserData, objs: NamedXmlBasedJson[]): fullLexeme: data.currentLexeme } } - - return executeHook(data.hooks.values.onSymbol.after, data, result) } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/meta.ts b/src/r-bridge/lang-4.x/ast/parser/xml/normalize-meta.ts similarity index 84% rename from src/r-bridge/lang-4.x/ast/parser/xml/internal/meta.ts rename to src/r-bridge/lang-4.x/ast/parser/xml/normalize-meta.ts index 4953620506..2e1edb3dd6 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/meta.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/normalize-meta.ts @@ -1,13 +1,10 @@ -import type { NamedXmlBasedJson, XmlBasedJson } from '../input-format' -import { nameKey , contentKey , attributesKey , getKeysGuarded, XmlParseError } from '../input-format' - - - -import type { SourceRange } from '../../../../../../util/range' -import { rangeFrom, rangeStartsCompletelyBefore } from '../../../../../../util/range' -import type { RawRType, RExpressionList, RNode } from '../../../model' -import { RType } from '../../../model' -import { guard } from '../../../../../../util/assert' +import type { SourceRange } from '../../../../../util/range' +import { rangeFrom, rangeStartsCompletelyBefore } from '../../../../../util/range' +import type { RawRType, RExpressionList, RNode } from '../../model' +import { RType } from '../../model' +import { guard } from '../../../../../util/assert' +import type { NamedXmlBasedJson, XmlBasedJson } from './input-format' +import { XmlParseError, attributesKey, contentKey, nameKey } from './input-format' /** * if the passed object is an array with only one element, remove the array wrapper @@ -29,13 +26,12 @@ export function objectWithArrUnwrap(obj: XmlBasedJson[] | XmlBasedJson): XmlBase * given a xml element, extract the source location of the corresponding element in the R-ast */ export function extractLocation(ast: XmlBasedJson): SourceRange { - const { - line1, - col1, - line2, - col2 - } = getKeysGuarded(ast, 'line1', 'col1', 'line2', 'col2') - return rangeFrom(line1, col1, line2, col2) + return rangeFrom( + ast['line1'] as string, + ast['col1'] as string, + ast['line2'] as string, + ast['col2'] as string + ) } /** @@ -53,8 +49,8 @@ export function retrieveMetaStructure(obj: XmlBasedJson): { } { const unwrappedObj = objectWithArrUnwrap(obj) const attributes = obj[attributesKey] as XmlBasedJson | undefined - const content = obj[contentKey] as string | undefined ?? '' guard(attributes !== undefined, () => `expected attributes to be defined for ${JSON.stringify(obj)}`) + const content = obj[contentKey] as string | undefined ?? '' const location = extractLocation(attributes) return { unwrappedObj, @@ -77,7 +73,7 @@ export function assureTokenType(obj: XmlBasedJson, expectedName: RawRType): void * @param content - the json object to extract the token-type from */ export function getTokenType(content: XmlBasedJson): RawRType { - return getKeysGuarded(content, nameKey) as RawRType + return content[nameKey] as RawRType } export function getWithTokenType(obj: XmlBasedJson[]) { @@ -112,6 +108,7 @@ export function ensureExpressionList(node: RNode): RExpressionList +export interface NormalizerData extends MergeableRecord { /** * The currently active source range during parsing, i.e. the full range of the current element. */ - currentRange: SourceRange | undefined + currentRange: SourceRange | undefined /** * The currently active lexeme during parsing, i.e. the full lexeme of the current element. */ - currentLexeme: string | undefined + currentLexeme: string | undefined } diff --git a/src/r-bridge/lang-4.x/values.ts b/src/r-bridge/lang-4.x/convert-values.ts similarity index 95% rename from src/r-bridge/lang-4.x/values.ts rename to src/r-bridge/lang-4.x/convert-values.ts index a5b6d1cfc3..5944bc4547 100644 --- a/src/r-bridge/lang-4.x/values.ts +++ b/src/r-bridge/lang-4.x/convert-values.ts @@ -9,9 +9,7 @@ class ValueConversionError extends Error { * transforms a value to something R can understand (e.g., booleans to TRUE/FALSE) */ export function ts2r(value: T): string { - if(typeof value === 'undefined') { - return 'NA' - } else if(typeof value === 'string') { + if(typeof value === 'string') { return JSON.stringify(value) } else if(typeof value === 'number') { if(isNaN(value)) { @@ -24,6 +22,8 @@ export function ts2r(value: T): string { return value ? 'TRUE' : 'FALSE' } else if(value === null) { return 'NULL' + } else if(typeof value === 'undefined') { + return 'NA' } else if(Array.isArray(value)) { return `c(${value.map(ts2r).join(', ')})` } else if(typeof value === 'object') { @@ -126,8 +126,8 @@ export function number2ts(value: string): RNumberValue { export interface RStringValue { str: string - /** from the R-language definition a string is either delimited by a pair of single or double quotes */ - quotes: '"' | '\'' + /** from the R-language definition a string is either delimited by a pair of single or double quotes, 'none' strings are syntactically unquoted but treated as strings */ + quotes: '"' | '\'' | 'none' /** a string is raw if prefixed with r */ flag?: 'raw' } diff --git a/src/r-bridge/lang-4.x/index.ts b/src/r-bridge/lang-4.x/index.ts index 55705614c7..c2a55dd3c3 100644 --- a/src/r-bridge/lang-4.x/index.ts +++ b/src/r-bridge/lang-4.x/index.ts @@ -1,2 +1,2 @@ -export * from './values' +export * from './convert-values' export * from './ast' diff --git a/src/r-bridge/retriever.ts b/src/r-bridge/retriever.ts index 22f6b29cb4..e32a2fde35 100644 --- a/src/r-bridge/retriever.ts +++ b/src/r-bridge/retriever.ts @@ -1,18 +1,19 @@ import { type RShell } from './shell' -import type { XmlParserHooks, NormalizedAst } from './lang-4.x' +import type { NormalizedAst } from './lang-4.x' import { ts2r } from './lang-4.x' import { startAndEndsWith } from '../util/strings' -import type { AsyncOrSync, DeepPartial } from 'ts-essentials' +import type { AsyncOrSync } from 'ts-essentials' import { guard } from '../util/assert' import { RShellExecutor } from './shell-executor' import objectHash from 'object-hash' import { normalize } from './lang-4.x/ast/parser/json/parser' +import { ErrorMarker } from './init' export const fileProtocol = 'file://' as const export interface RParseRequestFromFile { readonly request: 'file'; - /** The path to the file (absolute paths are probably best here */ + /** The path to the file (absolute paths are probably best here) */ readonly content: string; } @@ -74,8 +75,6 @@ export function requestFingerprint(request: RParseRequest): string { return objectHash(request) } -const ErrorMarker = 'err' - /** * Provides the capability to parse R files/R code using the R parser. * Depends on {@link RShell} to provide a connection to R. @@ -84,25 +83,12 @@ const ErrorMarker = 'err' * If successful, allows to further query the last result with {@link retrieveNumberOfRTokensOfLastParse}. */ export function retrieveParseDataFromRCode(request: RParseRequest, shell: (RShell | RShellExecutor)): AsyncOrSync { + if(request.content.trim() === '') { + return Promise.resolve('') + } const suffix = request.request === 'file' ? ', encoding="utf-8"' : '' - const eol = ts2r(shell.options.eol) - const command = - /* first check if flowr_get is already part of the environment */ - 'if(!exists("flowr_get")){' - /* if not, define it complete wrapped in a try so that we can handle failures gracefully on stdout */ - + 'flowr_get<-function(...){tryCatch({' - /* the actual code to parse the R code, ... allows us to keep the old 'file=path' and 'text=content' semantics. we define flowr_output using the super assignment to persist it in the env! */ - + 'flowr_output<<-utils::getParseData(parse(...,keep.source=TRUE),includeText=TRUE);' - /* json conversion of the output, dataframe="values" allows us to receive a list of lists (which is more compact)! - * so we do not depend on jsonlite and friends, we do so manually (:sparkles:) - */ - + `cat("[",paste0(apply(flowr_output,1,function(o)sprintf("[%s,%s,%s,%s,%s,%s,%s,%s,%s]",o[[1]],o[[2]],o[[3]],o[[4]],o[[5]],o[[6]],deparse(o[[7]]),if(o[[8]])"true"else"false",deparse(o[[9]]))),collapse=","),"]",${eol},sep="")` - /* error handling (just produce the marker) */ - + `},error=function(e){cat("${ErrorMarker}",${eol})})};` - /* compile the function to improve perf. */ - + 'flowr_get<-compiler::cmpfun(flowr_get)};' - /* call the function with the request */ - + `flowr_get(${request.request}=${JSON.stringify(request.content)}${suffix})` + /* call the function with the request */ + const command =`flowr_get_ast(${request.request}=${JSON.stringify(request.content)}${suffix})` if(shell instanceof RShellExecutor) { return guardRetrievedOutput(shell.run(command), request) @@ -117,9 +103,9 @@ export function retrieveParseDataFromRCode(request: RParseRequest, shell: (RShel * Uses {@link retrieveParseDataFromRCode} and returns the nicely formatted object-AST. * If successful, allows to further query the last result with {@link retrieveNumberOfRTokensOfLastParse}. */ -export async function retrieveNormalizedAstFromRCode(request: RParseRequest, shell: RShell, hooks?: DeepPartial): Promise { +export async function retrieveNormalizedAstFromRCode(request: RParseRequest, shell: RShell): Promise { const data = await retrieveParseDataFromRCode(request, shell) - return normalize(data, hooks) + return normalize(data) } /** diff --git a/src/r-bridge/shell-executor.ts b/src/r-bridge/shell-executor.ts index bc4c5ca5d0..2eebb9556b 100644 --- a/src/r-bridge/shell-executor.ts +++ b/src/r-bridge/shell-executor.ts @@ -5,22 +5,18 @@ import { spawnSync } from 'child_process' import { ts2r } from './lang-4.x' import type { SemVer } from 'semver' import semver from 'semver/preload' -import { log, LogLevel } from '../util/log' +import { expensiveTrace, log } from '../util/log' +import { initCommand } from './init' const executorLog = log.getSubLogger({ name: 'RShellExecutor' }) export class RShellExecutor { public readonly options: Readonly - private readonly prerequisites: string[] = [] + private readonly prerequisites: string[] public constructor(options?: Partial) { this.options = deepMergeObject(DEFAULT_R_SHELL_OPTIONS, options) - } - - public continueOnError(): this { - executorLog.info('continue in case of Errors') - this.addPrerequisites('options(error=function() {})') - return this + this.prerequisites = [initCommand(this.options.eol)] } public addPrerequisites(commands: string | string[]): this { @@ -30,15 +26,13 @@ export class RShellExecutor { public usedRVersion(): SemVer | null{ const version = this.run(`cat(paste0(R.version$major,".",R.version$minor), ${ts2r(this.options.eol)})`) - executorLog.trace(`raw version: ${JSON.stringify(version)}`) + expensiveTrace(executorLog, () => `raw version: ${JSON.stringify(version)}`) return semver.coerce(version) } public run(command: string, returnErr = false): string { command += ';base::quit()' - if(executorLog.settings.minLevel >= LogLevel.Trace) { - executorLog.trace(`> ${JSON.stringify(command)}`) - } + expensiveTrace(executorLog, () => `> ${JSON.stringify(command)}`) const returns = spawnSync(this.options.pathToRExecutable, this.options.commandLineOptions, { env: this.options.env, diff --git a/src/r-bridge/shell.ts b/src/r-bridge/shell.ts index 76e3fcf46b..dc1540f97e 100644 --- a/src/r-bridge/shell.ts +++ b/src/r-bridge/shell.ts @@ -1,15 +1,16 @@ -import { type ChildProcessWithoutNullStreams, spawn } from 'child_process' +import { type ChildProcessWithoutNullStreams, spawn } from 'node:child_process' import { deepMergeObject, type MergeableRecord } from '../util/objects' import { type ILogObj, type Logger } from 'tslog' import * as readline from 'readline' import { ts2r } from './lang-4.x' -import { log, LogLevel } from '../util/log' +import { expensiveTrace, log, LogLevel } from '../util/log' import type { SemVer } from 'semver' import semver from 'semver/preload' import { getPlatform } from '../util/os' import fs from 'fs' +import type { DeepReadonly , AsyncOrSync } from 'ts-essentials' +import { initCommand } from './init' import { getConfig } from '../config' -import type { AsyncOrSync } from 'ts-essentials' export type OutputStreamSelector = 'stdout' | 'stderr' | 'both'; @@ -18,7 +19,7 @@ export interface CollectorTimeout extends MergeableRecord { * number of milliseconds to wait for the collection to finish */ ms: number - /** + /* * if true, the timeout will reset whenever we receive new data */ resetOnNewData: boolean @@ -65,6 +66,12 @@ export const DEFAULT_OUTPUT_COLLECTOR_CONFIGURATION: OutputCollectorConfiguratio errorStopsWaiting: true } +export const enum RShellReviveOptions { + Never, + OnError, + Always +} + export interface RShellExecutionOptions extends MergeableRecord { /** The path to the R executable, can be only the executable if it is to be found on the PATH. */ readonly pathToRExecutable: string @@ -75,14 +82,14 @@ export interface RShellExecutionOptions extends MergeableRecord { /** The character to use to mark the end of a line. Is probably always `\n` (even on windows). */ readonly eol: string /** The environment variables available in the R session. */ - readonly env: NodeJS.ProcessEnv + readonly env: NodeJS.ProcessEnv | undefined /** The path to the library directory, use undefined to let R figure that out for itself */ readonly homeLibPath: string | undefined } export interface RShellSessionOptions extends RShellExecutionOptions { /** If set, the R session will be restarted if it exits due to an error */ - readonly revive: 'never' | 'on-error' | 'always' + readonly revive: RShellReviveOptions /** Called when the R session is restarted, this makes only sense if `revive` is not set to `'never'` */ readonly onRevive: (code: number, signal: string | null) => void } @@ -98,9 +105,9 @@ export interface RShellOptions extends RShellSessionOptions { export const DEFAULT_R_PATH = getPlatform() === 'windows' ? 'R.exe' : 'R' export const DEFAULT_R_SHELL_EXEC_OPTIONS: RShellExecutionOptions = { pathToRExecutable: getConfig().rPath ?? DEFAULT_R_PATH, - commandLineOptions: ['--vanilla', '--quiet', '--no-echo', '--no-save'], + commandLineOptions: ['--vanilla', '--quiet', '--no-echo', '--no-save', '--slave'], cwd: process.cwd(), - env: process.env, + env: undefined, eol: '\n', homeLibPath: getPlatform() === 'windows' ? undefined : '~/.r-libs' } as const @@ -108,7 +115,7 @@ export const DEFAULT_R_SHELL_EXEC_OPTIONS: RShellExecutionOptions = { export const DEFAULT_R_SHELL_OPTIONS: RShellOptions = { ...DEFAULT_R_SHELL_EXEC_OPTIONS, sessionName: 'default', - revive: 'never', + revive: RShellReviveOptions.Never, onRevive: () => { /* do nothing */ } } as const @@ -129,7 +136,7 @@ export class RShell { private tempDirs = new Set() public constructor(options?: Partial) { - this.options = deepMergeObject(DEFAULT_R_SHELL_OPTIONS, options) + this.options = { ...DEFAULT_R_SHELL_OPTIONS, ...options } this.log = log.getSubLogger({ name: this.options.sessionName }) this.session = new RShellSession(this.options, this.log) @@ -137,12 +144,12 @@ export class RShell { } private revive() { - if(this.options.revive === 'never') { + if(this.options.revive === RShellReviveOptions.Never) { return } this.session.onExit((code, signal) => { - if(this.options.revive === 'always' || (this.options.revive === 'on-error' && code !== 0)) { + if(this.options.revive === RShellReviveOptions.Always || (this.options.revive === RShellReviveOptions.OnError && code !== 0)) { this.log.warn(`R session exited with code ${code}, reviving!`) this.options.onRevive(code, signal) this.session = new RShellSession(this.options, this.log) @@ -156,7 +163,7 @@ export class RShell { * will not do anything to alter input markers! */ public sendCommand(command: string): void { - if(this.log.settings.minLevel >= LogLevel.Trace) { + if(this.log.settings.minLevel <= LogLevel.Trace) { this.log.trace(`> ${JSON.stringify(command)}`) } this._sendCommand(command) @@ -175,7 +182,7 @@ export class RShell { onTimeout: resolve => resolve([]) } }) - this.log.trace(`raw version: ${JSON.stringify(result)}`) + expensiveTrace(this.log, () => `raw version: ${JSON.stringify(result)}`) if(result.length === 1) { this.versionCache = semver.coerce(result[0]) return this.versionCache @@ -184,8 +191,8 @@ export class RShell { } } - public injectLibPaths(...paths: string[]): void { - this.log.debug(`injecting lib paths ${JSON.stringify(paths)}`) + public injectLibPaths(...paths: readonly string[]): void { + expensiveTrace(this.log, () => `injecting lib paths ${JSON.stringify(paths)}`) this._sendCommand(`.libPaths(c(.libPaths(), ${paths.map(ts2r).join(',')}))`) } @@ -220,9 +227,7 @@ export class RShell { */ public async sendCommandWithOutput(command: string, addonConfig?: Partial): Promise { const config = deepMergeObject(DEFAULT_OUTPUT_COLLECTOR_CONFIGURATION, addonConfig) - if(this.log.settings.minLevel >= LogLevel.Trace) { - this.log.trace(`> ${JSON.stringify(command)}`) - } + expensiveTrace(this.log, () => `> ${JSON.stringify(command)}`) const output = await this.session.collectLinesUntil(config.from, { predicate: data => data === config.postamble, @@ -230,7 +235,7 @@ export class RShell { }, config.timeout, () => { this._sendCommand(command) if(config.from === 'stderr') { - this._sendCommand(`cat("${config.postamble}${this.options.eol}", file=stderr())`) + this._sendCommand(`cat("${config.postamble}${this.options.eol}",file=stderr())`) } else { this._sendCommand(`cat("${config.postamble}${this.options.eol}")`) } @@ -247,7 +252,7 @@ export class RShell { * * @see sendCommand */ - public sendCommands(...commands: string[]): void { + public sendCommands(...commands: readonly string[]): void { for(const element of commands) { this.sendCommand(element) } @@ -261,22 +266,13 @@ export class RShell { this._sendCommand('rm(list=ls())') } - /** - * usually R will stop execution on errors, with this the R session will try to - * continue working! - */ - public continueOnError(): void { - this.log.info('continue in case of Errors') - this._sendCommand('options(error=function() {})') - } - /** * Obtain the temporary directory used by R. * Additionally, this marks the directory for removal when the shell exits. */ public async obtainTmpDir(): Promise { - this.sendCommand('temp <- tempdir()') - const [tempdir] = await this.sendCommandWithOutput(`cat(temp, ${ts2r(this.options.eol)})`) + this.sendCommand('temp<-tempdir()') + const [tempdir] = await this.sendCommandWithOutput(`cat(temp,${ts2r(this.options.eol)})`) this.tempDirs.add(tempdir) return tempdir } @@ -302,11 +298,10 @@ class RShellSession { private readonly bareSession: ChildProcessWithoutNullStreams private readonly sessionStdOut: readline.Interface private readonly sessionStdErr: readline.Interface - private readonly options: RShellSessionOptions - private readonly log: Logger + private readonly options: DeepReadonly private collectionTimeout: NodeJS.Timeout | undefined - public constructor(options: RShellSessionOptions, log: Logger) { + public constructor(options: DeepReadonly, log: Logger) { this.bareSession = spawn(options.pathToRExecutable, options.commandLineOptions, { env: options.env, cwd: options.cwd, @@ -324,8 +319,21 @@ class RShellSession { this.end() }) this.options = options - this.log = log - this.setupRSessionLoggers() + // initialize the session + this.writeLine(initCommand(options.eol)) + + if(log.settings.minLevel <= LogLevel.Trace) { + this.bareSession.stdout.on('data', (data: Buffer) => { + log.trace(`< ${data.toString()}`) + }) + this.bareSession.on('close', (code: number) => { + log.trace(`session exited with code ${code}`) + }) + } + + this.bareSession.stderr.on('data', (data: string) => { + log.warn(`< ${data}`) + }) } public write(data: string): void { @@ -396,7 +404,7 @@ class RShellSession { * @returns true if the kill succeeds, false otherwise * @see RShell#close */ - end(filesToUnlink?: string[]): boolean { + end(filesToUnlink?: readonly string[]): boolean { if(filesToUnlink !== undefined) { log.info(`unlinking ${filesToUnlink.length} files (${JSON.stringify(filesToUnlink)})`) for(const f of filesToUnlink) { @@ -414,20 +422,6 @@ class RShellSession { return killResult } - private setupRSessionLoggers(): void { - if(this.log.settings.minLevel >= LogLevel.Trace) { - this.bareSession.stdout.on('data', (data: Buffer) => { - this.log.trace(`< ${data.toString()}`) - }) - this.bareSession.on('close', (code: number) => { - this.log.trace(`session exited with code ${code}`) - }) - } - this.bareSession.stderr.on('data', (data: string) => { - this.log.warn(`< ${data}`) - }) - } - public onExit(callback: (code: number, signal: string | null) => void): void { this.bareSession.on('exit', callback) this.bareSession.stdin.on('error', callback) diff --git a/src/reconstruct/reconstruct.ts b/src/reconstruct/reconstruct.ts index b452dabb96..f5ae21afce 100644 --- a/src/reconstruct/reconstruct.ts +++ b/src/reconstruct/reconstruct.ts @@ -27,12 +27,13 @@ import type { import { RType, foldAstStateful + , EmptyArgument } from '../r-bridge' -import { log, LogLevel } from '../util/log' -import { guard, isNotNull } from '../util/assert' +import { expensiveTrace, log, LogLevel } from '../util/log' +import { guard } from '../util/assert' import type { MergeableRecord } from '../util/objects' -// -type Selection = Set + +type Selection = ReadonlySet interface PrettyPrintLine { line: string indent: number @@ -49,13 +50,7 @@ const getLexeme = (n: RNodeWithParent) => n.info.fullLexeme ?? n.lexeme ?? '' const reconstructAsLeaf = (leaf: RNodeWithParent, configuration: ReconstructionConfiguration): Code => { const selectionHasLeaf = configuration.selection.has(leaf.info.id) || configuration.autoSelectIf(leaf) - if(selectionHasLeaf) { - return foldToConst(leaf) - } else { - return [] - } - // reconstructLogger.trace(`reconstructAsLeaf: ${leaf.info.id} (${selectionHasLeaf ? 'y' : 'n'}): ${JSON.stringify(wouldBe)}`) - // return selectionHasLeaf ? wouldBe : [] + return selectionHasLeaf ? foldToConst(leaf) : [] } const foldToConst = (n: RNodeWithParent): Code => plain(getLexeme(n)) @@ -64,21 +59,34 @@ function indentBy(lines: Code, indent: number): Code { return lines.map(({ line, indent: i }) => ({ line, indent: i + indent })) } -function reconstructExpressionList(exprList: RExpressionList, expressions: Code[], configuration: ReconstructionConfiguration): Code { - if(isSelected(configuration, exprList)) { - return plain(getLexeme(exprList)) - } - +function reconstructExpressionList(exprList: RExpressionList, _grouping: [Code, Code] | undefined, expressions: Code[], config: ReconstructionConfiguration): Code { const subExpressions = expressions.filter(e => e.length > 0) if(subExpressions.length === 0) { - return [] + if(isSelected(config, exprList)) { + return plain('{}') + } else { + return [] + } } else if(subExpressions.length === 1) { - return subExpressions[0] + if(!isSelected(config, exprList)) { + return subExpressions[0] + } + const [fst] = subExpressions + const g = exprList.grouping + + if(g && fst.length > 0) { + const start = g[0].content + const end = g[1].content + fst[0].line = `${start}${start === '{' ? ' ' : ''}${fst[0].line}` + fst[fst.length - 1].line = `${fst[fst.length - 1].line}${end === '}' ? ' ' : ''}${end}` + } + return fst } else { + const g = exprList.grouping return [ - { line: '{', indent: 0 }, + ...(g ? plain(g[0].content) : plain('{')), ...indentBy(subExpressions.flat(), 1), - { line: '}', indent: 0 } + ...(g ? plain(g[1].content) : plain('}')) ] } } @@ -106,11 +114,7 @@ function reconstructUnaryOp(leaf: RNodeWithParent, operand: Code, configuration: } } -function reconstructBinaryOp(n: RBinaryOp | RPipe, lhs: Code, rhs: Code, configuration: ReconstructionConfiguration): Code { - if(isSelected(configuration, n)) { - return plain(getLexeme(n)) - } - +function reconstructBinaryOp(n: RBinaryOp | RPipe, lhs: Code, rhs: Code): Code { if(lhs.length === 0 && rhs.length === 0) { return [] } @@ -124,131 +128,120 @@ function reconstructBinaryOp(n: RBinaryOp | RPipe' : n.operator, rhs) } -function reconstructForLoop(loop: RForLoop, variable: Code, vector: Code, body: Code, configuration: ReconstructionConfiguration): Code { - if(isSelected(configuration, loop)) { - return plain(getLexeme(loop)) - } - if(body.length === 0 && variable.length === 0 && vector.length === 0) { +function reconstructForLoop(loop: RForLoop, variable: Code, vector: Code, body: Code, config: ReconstructionConfiguration): Code { + if(!isSelected(config, loop) && variable.length === 0 && vector.length === 0) { + return body + } else if(body.length === 0 && variable.length === 0 && vector.length === 0) { return [] + } else if(body.length <= 1) { + // 'inline' + return [{ + line: `for(${getLexeme(loop.variable)} in ${getLexeme(loop.vector)}) ${body.length === 0 ? '{}' : body[0].line}`, + indent: 0 + }] + } else if(body[0].line === '{' && body[body.length - 1].line === '}') { + // 'block' + return [ + { line: `for(${getLexeme(loop.variable)} in ${getLexeme(loop.vector)}) {`, indent: 0 }, + ...body.slice(1, body.length - 1), + { line: '}', indent: 0 } + ] } else { - if(body.length <= 1) { - // 'inline' - return [{ line: `for(${getLexeme(loop.variable)} in ${getLexeme(loop.vector)}) ${body.length === 0 ? '{}' : body[0].line}`, indent: 0 }] - } else if(body[0].line === '{' && body[body.length - 1].line === '}') { - // 'block' - return [ - { line: `for(${getLexeme(loop.variable)} in ${getLexeme(loop.vector)}) {`, indent: 0 }, - ...body.slice(1, body.length - 1), - { line: '}', indent: 0 } - ] - } else { - // unknown - return [ - { line: `for(${getLexeme(loop.variable)} in ${getLexeme(loop.vector)})`, indent: 0 }, - ...indentBy(body, 1) - ] - } + // unknown + return [ + { line: `for(${getLexeme(loop.variable)} in ${getLexeme(loop.vector)})`, indent: 0 }, + ...indentBy(body, 1) + ] } } function reconstructRepeatLoop(loop: RRepeatLoop, body: Code, configuration: ReconstructionConfiguration): Code { - if(isSelected(configuration, loop)) { - return plain(getLexeme(loop)) - } else if(body.length === 0) { - return [] + const sel = isSelected(configuration, loop) + if(!sel) { + return body + } else if(body.length <= 1) { + // 'inline' + return [{ line: `repeat ${body.length === 0 ? '{}' : body[0].line}`, indent: 0 }] + } else if(body[0].line === '{' && body[body.length - 1].line === '}') { + // 'block' + return [ + { line: 'repeat {', indent: 0 }, + ...body.slice(1, body.length - 1), + { line: '}', indent: 0 } + ] } else { - if(body.length <= 1) { - // 'inline' - return [{ line: `repeat ${body.length === 0 ? '{}' : body[0].line}`, indent: 0 }] - } else if(body[0].line === '{' && body[body.length - 1].line === '}') { - // 'block' + // unknown + return [ + { line: 'repeat', indent: 0 }, + ...indentBy(body, 1) + ] + } +} + +function reconstructIfThenElse(ifThenElse: RIfThenElse, condition: Code, then: Code, otherwise: Code | undefined, config: ReconstructionConfiguration): Code { + otherwise ??= [] + if(then.length === 0 && otherwise.length === 0) { + if(isSelected(config, ifThenElse)) { + return [{ line: `if(${getLexeme(ifThenElse.condition)}) { }`, indent: 0 }] + } else if(condition.length > 0) { + return condition + } else { + return [] + } + } else if(otherwise.length === 0) { + if(isSelected(config, ifThenElse)) { return [ - { line: 'repeat {', indent: 0 }, - ...body.slice(1, body.length - 1), - { line: '}', indent: 0 } + { line: `if(${getLexeme(ifThenElse.condition)}) ${then[0].line}`, indent: 0 }, + ...indentBy(then.splice(1), 1) ] } else { - // unknown + return then + } + } else if(then.length === 0) { + if(isSelected(config, ifThenElse)) { return [ - { line: 'repeat', indent: 0 }, - ...indentBy(body, 1) + { line: `if(${getLexeme(ifThenElse.condition)}) { } else ${otherwise[0].line}`, indent: 0 }, + ...indentBy(otherwise.splice(1), 1) ] + } else { + return otherwise } - } -} - -function removeExpressionListWrap(code: Code) { - if(code.length > 0 && code[0].line === '{' && code[code.length - 1].line === '}') { - return indentBy(code.slice(1, code.length - 1), -1) } else { - return code + return [ + { line: `if(${getLexeme(ifThenElse.condition)}) ${then[0].line}`, indent: 0 }, + ...indentBy(then.splice(1), 1), + { line: `else ${otherwise[1].line}`, indent: 0 }, + ...indentBy(otherwise.splice(1), 1) + ] } } -function reconstructIfThenElse(ifThenElse: RIfThenElse, condition: Code, when: Code, otherwise: Code | undefined, configuration: ReconstructionConfiguration): Code { - if(isSelected(configuration, ifThenElse)) { - return plain(getLexeme(ifThenElse)) - } - otherwise ??= [] - if(condition.length === 0 && when.length === 0 && otherwise.length === 0) { +function reconstructWhileLoop(loop: RWhileLoop, condition: Code, body: Code, configuration: ReconstructionConfiguration): Code { + const sel = isSelected(configuration, loop) + if(!sel && condition.length === 0) { + return body + } else if(body.length === 0 && condition.length === 0) { return [] - } - if(otherwise.length === 0 && when.length === 0) { - return [ - { line: `if(${getLexeme(ifThenElse.condition)}) { }`, indent: 0 } - ] - } else if(otherwise.length === 0) { - return [ - { line: `if(${getLexeme(ifThenElse.condition)}) {`, indent: 0 }, - ...indentBy(removeExpressionListWrap(when), 1), - { line: '}', indent: 0 } - ] - } else if(when.length === 0) { + } else if(body.length <= 1) { + // 'inline' + return [{ line: `while(${getLexeme(loop.condition)}) ${body.length === 0 ? '{}' : body[0].line}`, indent: 0 }] + } else if(body[0].line === '{' && body[body.length - 1].line === '}') { + // 'block' return [ - { line: `if(${getLexeme(ifThenElse.condition)}) { } else {`, indent: 0 }, - ...indentBy(removeExpressionListWrap(otherwise), 1), + { line: `while(${getLexeme(loop.condition)}) {`, indent: 0 }, + ...body.slice(1, body.length - 1), { line: '}', indent: 0 } ] } else { + // unknown return [ - { line: `if(${getLexeme(ifThenElse.condition)}) {`, indent: 0 }, - ...indentBy(removeExpressionListWrap(when), 1), - { line: '} else {', indent: 0 }, - ...indentBy(removeExpressionListWrap(otherwise), 1), - { line: '}', indent: 0 } + { line: `while(${getLexeme(loop.condition)})`, indent: 0 }, + ...indentBy(body, 1) ] } } - -function reconstructWhileLoop(loop: RWhileLoop, condition: Code, body: Code, configuration: ReconstructionConfiguration): Code { - if(isSelected(configuration, loop)) { - return plain(getLexeme(loop)) - } - if(body.length === 0 && condition.length === 0) { - return [] - } else { - if(body.length <= 1) { - // 'inline' - return [{ line: `while(${getLexeme(loop.condition)}) ${body.length === 0 ? '{}' : body[0].line}`, indent: 0 }] - } else if(body[0].line === '{' && body[body.length - 1].line === '}') { - // 'block' - return [ - { line: `while(${getLexeme(loop.condition)}) {`, indent: 0 }, - ...body.slice(1, body.length - 1), - { line: '}', indent: 0 } - ] - } else { - // unknown - return [ - { line: `while(${getLexeme(loop.condition)})`, indent: 0 }, - ...indentBy(body, 1) - ] - } - } -} - function reconstructParameters(parameters: RParameter[]): string[] { // const baseParameters = parameters.flatMap(p => plain(getLexeme(p))) return parameters.map(p => { @@ -260,28 +253,21 @@ function reconstructParameters(parameters: RParameter[]): str }) } +function isNotEmptyArgument(a: Code | typeof EmptyArgument): a is Code { + return a !== EmptyArgument +} -function reconstructFoldAccess(node: RAccess, accessed: Code, access: string | (Code | null)[], configuration: ReconstructionConfiguration): Code { - if(isSelected(configuration, node)) { - return plain(getLexeme(node)) - } - +function reconstructFoldAccess(node: RAccess, accessed: Code, access: readonly (Code | typeof EmptyArgument)[]): Code { if(accessed.length === 0) { - if(typeof access === 'string') { - return [] - } else { - return access.filter(isNotNull).flat() - } + return access.filter(isNotEmptyArgument).flat() + } else if(access.every(a => a === EmptyArgument || a.length === 0)) { + return accessed } return plain(getLexeme(node)) } -function reconstructArgument(argument: RArgument, name: Code | undefined, value: Code | undefined, configuration: ReconstructionConfiguration): Code { - if(isSelected(configuration, argument)) { - return plain(getLexeme(argument)) - } - +function reconstructArgument(argument: RArgument, name: Code | undefined, value: Code | undefined): Code { if(argument.name !== undefined && name !== undefined && name.length > 0) { return plain(`${getLexeme(argument.name)}=${argument.value ? getLexeme(argument.value) : ''}`) } else { @@ -290,11 +276,7 @@ function reconstructArgument(argument: RArgument, name: Code } -function reconstructParameter(parameter: RParameter, name: Code, value: Code | undefined, configuration: ReconstructionConfiguration): Code { - if(isSelected(configuration, parameter)) { - return plain(getLexeme(parameter)) - } - +function reconstructParameter(parameter: RParameter, name: Code): Code { if(parameter.defaultValue !== undefined && name.length > 0) { return plain(`${getLexeme(parameter.name)}=${getLexeme(parameter.defaultValue)}`) } else if(parameter.defaultValue !== undefined && name.length === 0) { @@ -305,49 +287,46 @@ function reconstructParameter(parameter: RParameter, name: Co } -function reconstructFunctionDefinition(definition: RFunctionDefinition, functionParameters: Code[], body: Code, configuration: ReconstructionConfiguration): Code { +function reconstructFunctionDefinition(definition: RFunctionDefinition, functionParameters: readonly Code[], body: Code, config: ReconstructionConfiguration): Code { // if a definition is not selected, we only use the body - slicing will always select the definition - if(!isSelected(configuration, definition) && functionParameters.every(p => p.length === 0)) { - return body + if(functionParameters.every(p => p.length === 0)) { + const empty = body === undefined || body.length === 0 + const selected = isSelected(config, definition) + if(empty && selected) { // give function stub + return plain(`function(${reconstructParameters(definition.parameters).join(', ')}) { }`) + } else if(!selected) { // do not require function + return body + } } const parameters = reconstructParameters(definition.parameters).join(', ') if(body.length <= 1) { // 'inline' - const bodyStr = body.length === 0 ? '' : `${body[0].line} ` /* add suffix space */ + const bodyStr = body.length === 0 ? '{ }' : `${body[0].line}` // we keep the braces in every case because I do not like no-brace functions - return [{ line: `function(${parameters}) { ${bodyStr}}`, indent: 0 }] - } else if(body[0].line === '{' && body[body.length - 1].line === '}') { - // 'block' - return [ - { line: `function(${parameters}) {`, indent: 0 }, - ...body.slice(1, body.length - 1), - { line: '}', indent: 0 } - ] + return [{ line: `function(${parameters}) ${bodyStr}`, indent: 0 }] } else { - // unknown -> we add the braces just to be sure + // 'block' return [ - { line: `function(${parameters}) {`, indent: 0 }, - ...indentBy(body, 1), - { line: '}', indent: 0 } + { line: `function(${parameters}) ${body[0].line}`, indent: 0 }, + ...body.slice(1), ] } } -function reconstructSpecialInfixFunctionCall(args: (Code | undefined)[], call: RFunctionCall): Code { +function reconstructSpecialInfixFunctionCall(args: (Code | typeof EmptyArgument)[], call: RFunctionCall): Code { guard(args.length === 2, () => `infix special call must have exactly two arguments, got: ${args.length} (${JSON.stringify(args)})`) guard(call.flavor === 'named', `infix special call must be named, got: ${call.flavor}`) - const lhs = args[0] - const rhs = args[1] + const [lhs, rhs] = args if((lhs === undefined || lhs.length === 0) && (rhs === undefined || rhs.length === 0)) { return [] } // else if (rhs === undefined || rhs.length === 0) { // if rhs is undefined we still have to keep both now, but reconstruct manually :/ - if(lhs !== undefined && lhs.length > 0) { + if(lhs !== EmptyArgument && lhs.length > 0) { const lhsText = lhs.map(l => `${getIndentString(l.indent)}${l.line}`).join('\n') - if(rhs !== undefined && rhs.length > 0) { + if(rhs !== EmptyArgument && rhs.length > 0) { const rhsText = rhs.map(l => `${getIndentString(l.indent)}${l.line}`).join('\n') return plain(`${lhsText} ${call.functionName.content} ${rhsText}`) } else { @@ -357,7 +336,7 @@ function reconstructSpecialInfixFunctionCall(args: (Code | undefined)[], call: R return plain(`${getLexeme(call.arguments[0] as RArgument)} ${call.functionName.content} ${getLexeme(call.arguments[1] as RArgument)}`) } -function reconstructFunctionCall(call: RFunctionCall, functionName: Code, args: (Code | undefined)[], configuration: ReconstructionConfiguration): Code { +function reconstructFunctionCall(call: RFunctionCall, functionName: Code, args: (Code | typeof EmptyArgument)[], configuration: ReconstructionConfiguration): Code { if(call.infixSpecial === true) { return reconstructSpecialInfixFunctionCall(args, call) } @@ -370,16 +349,18 @@ function reconstructFunctionCall(call: RFunctionCall, functio } if(args.length === 0) { - guard(functionName.length === 1, `without args, we need the function name to be present! got: ${JSON.stringify(functionName)}`) - if(call.flavor === 'unnamed' && !functionName[0].line.endsWith(')')) { - functionName[0].line = `(${functionName[0].line})` + guard(functionName.length > 0, `without args, we need the function name to be present! got: ${JSON.stringify(functionName)}`) + const last = functionName[functionName.length - 1] + if(call.flavor === 'unnamed' && !last.line.endsWith(')')) { + functionName[0].line = `(${functionName[0].line}` + last.line += ')' } - if(!functionName[0].line.endsWith('()')) { + if(!last.line.endsWith('()')) { // add empty call braces if not present - functionName[0].line += '()' + last.line += '()' } - return [{ line: functionName[0].line, indent: functionName[0].indent }] + return functionName } else { return plain(getLexeme(call)) } @@ -415,26 +396,16 @@ export function autoSelectLibrary(node: RNode): boolean { // escalates with undefined if all are undefined const reconstructAstFolds: StatefulFoldFunctions = { // we just pass down the state information so everyone has them - down: (_n, c) => c, - foldNumber: reconstructAsLeaf, - foldString: reconstructAsLeaf, - foldLogical: reconstructAsLeaf, - foldSymbol: reconstructAsLeaf, - foldAccess: reconstructFoldAccess, - binaryOp: { - foldLogicalOp: reconstructBinaryOp, - foldArithmeticOp: reconstructBinaryOp, - foldComparisonOp: reconstructBinaryOp, - foldAssignment: reconstructBinaryOp, - foldPipe: reconstructBinaryOp, - foldModelFormula: reconstructBinaryOp - }, - unaryOp: { - foldArithmeticOp: reconstructUnaryOp, - foldLogicalOp: reconstructUnaryOp, - foldModelFormula: reconstructUnaryOp - }, - other: { + down: (_n, c) => c, + foldNumber: reconstructAsLeaf, + foldString: reconstructAsLeaf, + foldLogical: reconstructAsLeaf, + foldSymbol: reconstructAsLeaf, + foldAccess: reconstructFoldAccess, + foldBinaryOp: reconstructBinaryOp, + foldPipe: reconstructBinaryOp, + foldUnaryOp: reconstructUnaryOp, + other: { foldComment: reconstructAsLeaf, foldLineDirective: reconstructAsLeaf }, @@ -490,7 +461,7 @@ function removeOuterExpressionListIfApplicable(result: PrettyPrintLine[], autoSe * @returns The number of times `autoSelectIf` triggered, as well as the reconstructed code itself. */ export function reconstructToCode(ast: NormalizedAst, selection: Selection, autoSelectIf: AutoSelectPredicate = autoSelectLibrary): ReconstructionResult { - if(reconstructLogger.settings.minLevel >= LogLevel.Trace) { + if(reconstructLogger.settings.minLevel <= LogLevel.Trace) { reconstructLogger.trace(`reconstruct ast with ids: ${JSON.stringify([...selection])}`) } @@ -507,9 +478,7 @@ export function reconstructToCode(ast: NormalizedAst, selection: Sel // fold of the normalized ast const result = foldAstStateful(ast.ast, { selection, autoSelectIf: autoSelectIfWrapper }, reconstructAstFolds) - if(reconstructLogger.settings.minLevel >= LogLevel.Trace) { - reconstructLogger.trace('reconstructed ast before string conversion: ', JSON.stringify(result)) - } + expensiveTrace(reconstructLogger, () => `reconstructed ast before string conversion: ${JSON.stringify(result)}`) return removeOuterExpressionListIfApplicable(result, autoSelected) } diff --git a/src/slicing/criterion/collect-all.ts b/src/slicing/criterion/collect-all.ts index 85c03afa8a..f9908f08c8 100644 --- a/src/slicing/criterion/collect-all.ts +++ b/src/slicing/criterion/collect-all.ts @@ -19,12 +19,12 @@ import { getUniqueCombinationsOfSize } from '../../util/arrays' export interface SlicingCriteriaFilter extends MergeableRecord { /** * Inclusive minimum size of the slicing criteria (number of included slice points). - * Should be at least `1` to make sense (and of course at most {@link SlicingCriteriaFilter#maximumSize | maximum size}). + * Should be at least `1` to make sense (and of course at most {@link SlicingCriteriaFilter#maximumSize|maximum size}). */ minimumSize: number /** * Inclusive maximum size of the slicing criteria (number of included slice points). - * Should be at least `1` to make sense (and of course at least {@link SlicingCriteriaFilter#minimumSize | minimum size}). + * Should be at least `1` to make sense (and of course at least {@link SlicingCriteriaFilter#minimumSize|minimum size}). *

* Be really careful with this one, as the number of possible slicing criteria can grow exponentially with the maximum size. */ diff --git a/src/slicing/criterion/filters/all-variables.ts b/src/slicing/criterion/filters/all-variables.ts index 4f4fd5c7a2..953e1ec284 100644 --- a/src/slicing/criterion/filters/all-variables.ts +++ b/src/slicing/criterion/filters/all-variables.ts @@ -1,14 +1,16 @@ import type { FoldFunctions, - NodeId, ParentInformation, RFunctionCall, RNodeWithParent, RSymbol } from '../../../r-bridge' import { + EmptyArgument + , foldAst, isSpecialSymbol } from '../../../r-bridge' import type { SlicingCriteriaFilter } from '../collect-all' import { isNotNull } from '../../../util/assert' +import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id' export const DefaultAllVariablesFilter: SlicingCriteriaFilter = { minimumSize: 1, @@ -19,25 +21,15 @@ export const DefaultAllVariablesFilter: SlicingCriteriaFilter = { const onLeaf = () => [] const onBinary = (_: unknown, lhs: NodeId[], rhs: NodeId[]) => [...lhs, ...rhs] const defaultAllVariablesCollectorFolds: FoldFunctions = { - foldNumber: onLeaf, - foldString: onLeaf, - foldLogical: onLeaf, - foldSymbol: (symbol: RSymbol) => isSpecialSymbol(symbol) ? [] : [symbol.info.id], - foldAccess: (_: unknown, name: NodeId[], access: string | (null | NodeId[])[]) => Array.isArray(access) ? [...name, ...access.filter(isNotNull).flat()] : name, - binaryOp: { - foldLogicalOp: onBinary, - foldArithmeticOp: onBinary, - foldComparisonOp: onBinary, - foldAssignment: onBinary, - foldPipe: onBinary, - foldModelFormula: onBinary - }, - unaryOp: { - foldLogicalOp: (_: unknown, operator: NodeId[]) => operator, - foldArithmeticOp: (_: unknown, operator: NodeId[]) => operator, - foldModelFormula: (_: unknown, operator: NodeId[]) => operator - }, - loop: { + foldNumber: onLeaf, + foldString: onLeaf, + foldLogical: onLeaf, + foldSymbol: (symbol: RSymbol) => isSpecialSymbol(symbol) ? [] : [symbol.info.id], + foldAccess: (_: unknown, name: NodeId[], access: readonly (typeof EmptyArgument | NodeId[])[]) => [...name, ...access.filter(isNotNull).flat()], + foldBinaryOp: onBinary, + foldPipe: onBinary, + foldUnaryOp: (_: unknown, operator: NodeId[]) => operator, + loop: { foldFor: (_: unknown, a: NodeId[], b: NodeId[], c: NodeId[]) => [...a,...b,...c], foldWhile: (_: unknown, a: NodeId[], b: NodeId[]) => [...a,...b], foldRepeat: (_: unknown, a: NodeId[]) => a, @@ -49,19 +41,19 @@ const defaultAllVariablesCollectorFolds: FoldFunctions [...a,...b,...(c??[])], - foldExprList: (_: unknown, a: NodeId[][]) => a.flat(), + foldExprList: (_: unknown, _grouping: unknown, a: NodeId[][]) => a.flat(), functions: { foldFunctionDefinition: (_: unknown, a: NodeId[][], b: NodeId[]) => [...a.flat(),...b], - foldFunctionCall: (c: RFunctionCall, a: NodeId[], b: (NodeId[] | undefined)[]) => { - const args = b.flatMap(b => b !== undefined ? b.flat() : []) + foldFunctionCall: (c: RFunctionCall, a: NodeId[], b: (NodeId[] | typeof EmptyArgument)[]) => { + const args = b.flatMap(b => b !== EmptyArgument ? b.flat() : []) if(c.flavor === 'named') { return c.functionName.content === 'library' ? args.slice(1) : args } else { return [...a, ...args] } }, - foldArgument: (_: unknown, a: unknown, b: NodeId[] | undefined) => b ?? [], - foldParameter: (_: unknown, a: unknown, b: NodeId[] | undefined) => b ?? [] + foldArgument: (_: unknown, _a: unknown, b: NodeId[] | undefined) => b ?? [], + foldParameter: (_: unknown, _a: unknown, b: NodeId[] | undefined) => b ?? [] } } diff --git a/src/slicing/criterion/parse.ts b/src/slicing/criterion/parse.ts index 5698a4c374..43ca3a9983 100644 --- a/src/slicing/criterion/parse.ts +++ b/src/slicing/criterion/parse.ts @@ -2,6 +2,7 @@ import type { NormalizedAst, DecoratedAstMap, NodeId, NoInfo, ParentInformation, import { RType } from '../../r-bridge' import { slicerLogger } from '../static' import type { SourcePosition } from '../../util/range' +import { expensiveTrace } from '../../util/log' /** Either `line:column`, `line@variable-name`, or `$id` */ export type SingleSlicingCriterion = `${number}:${number}` | `${number}@${string}` | `$${number}` @@ -24,7 +25,7 @@ export function slicingCriterionToId(criterion: SingleSlicin let resolved: NodeId | undefined if(criterion.includes(':')) { const [line, column] = criterion.split(':').map(c => parseInt(c)) - resolved = locationToId({ line, column }, decorated.idMap) + resolved = locationToId([line, column], decorated.idMap) } else if(criterion.includes('@')) { const [line, name] = criterion.split(/@(.*)/s) // only split at first occurrence resolved = conventionalCriteriaToId(parseInt(line), name, decorated.idMap) @@ -43,13 +44,13 @@ export function slicingCriterionToId(criterion: SingleSlicin function locationToId(location: SourcePosition, dataflowIdMap: DecoratedAstMap): NodeId | undefined { let candidate: RNodeWithParent | undefined for(const [id, nodeInfo] of dataflowIdMap.entries()) { - if(nodeInfo.location === undefined || nodeInfo.location.start.line !== location.line || nodeInfo.location.start.column !== location.column) { + if(nodeInfo.location === undefined || nodeInfo.location[0] !== location[0] || nodeInfo.location[1] !== location[1]) { continue // only consider those with position information } - slicerLogger.trace(`can resolve id ${id} (${JSON.stringify(nodeInfo.location)}) for location ${JSON.stringify(location)}`) + expensiveTrace(slicerLogger, () => `can resolve id ${id} (${JSON.stringify(nodeInfo.location)}) for location ${JSON.stringify(location)}`) // function calls have the same location as the symbol they refer to, so we need to prefer the function call - if(candidate !== undefined && nodeInfo.type !== RType.FunctionCall && nodeInfo.type !== RType.Argument || nodeInfo.type === RType.ExpressionList) { + if(candidate !== undefined && nodeInfo.type !== RType.FunctionCall || nodeInfo.type === RType.Argument || nodeInfo.type === RType.ExpressionList) { continue } @@ -57,7 +58,7 @@ function locationToId(location: SourcePosition, dataflowIdMap: Decora } const id = candidate?.info.id if(id) { - slicerLogger.trace(`resolve id ${id} (${JSON.stringify(candidate?.info)}) for location ${JSON.stringify(location)}`) + expensiveTrace(slicerLogger, () =>`resolve id ${id} (${JSON.stringify(candidate?.info)}) for location ${JSON.stringify(location)}`) } return id } @@ -66,13 +67,13 @@ function conventionalCriteriaToId(line: number, name: string, dataflo let candidate: RNodeWithParent | undefined for(const [id, nodeInfo] of dataflowIdMap.entries()) { - if(nodeInfo.location === undefined || nodeInfo.location.start.line !== line || nodeInfo.lexeme !== name) { + if(nodeInfo.location === undefined || nodeInfo.location[0] !== line || nodeInfo.lexeme !== name) { continue } slicerLogger.trace(`can resolve id ${id} (${JSON.stringify(nodeInfo)}) for line ${line} and name ${name}`) // function calls have the same location as the symbol they refer to, so we need to prefer the function call - if(candidate !== undefined && nodeInfo.type !== RType.FunctionCall && nodeInfo.type !== RType.Argument || nodeInfo.type === RType.ExpressionList) { + if(candidate !== undefined && nodeInfo.type !== RType.FunctionCall || nodeInfo.type === RType.Argument || nodeInfo.type === RType.ExpressionList) { continue } candidate = nodeInfo @@ -89,7 +90,7 @@ export interface DecodedCriterion { id: NodeId } -export type DecodedCriteria = DecodedCriterion[] +export type DecodedCriteria = ReadonlyArray export function convertAllSlicingCriteriaToIds(criteria: SlicingCriteria, decorated: NormalizedAst): DecodedCriteria { return criteria.map(l => ({ criterion: l, id: slicingCriterionToId(l, decorated) })) diff --git a/src/slicing/static/fingerprint.ts b/src/slicing/static/fingerprint.ts new file mode 100644 index 0000000000..a177630b07 --- /dev/null +++ b/src/slicing/static/fingerprint.ts @@ -0,0 +1,13 @@ +import type { REnvironmentInformation } from '../../dataflow' +import objectHash from 'object-hash' +import type { NodeId } from '../../r-bridge' + +export type Fingerprint = string + +export function envFingerprint(env: REnvironmentInformation): Fingerprint { + return objectHash(env, { excludeKeys: key => key === 'id' }) +} + +export function fingerprint(id: NodeId, envFingerprint: Fingerprint, onlyForSideEffects: boolean): Fingerprint { + return `${id}-${envFingerprint}-${onlyForSideEffects ? '0' : '1'}` +} diff --git a/src/slicing/static/slice-call.ts b/src/slicing/static/slice-call.ts new file mode 100644 index 0000000000..3d5d432d19 --- /dev/null +++ b/src/slicing/static/slice-call.ts @@ -0,0 +1,98 @@ +import { + BuiltIn, + EdgeType +} from '../../dataflow' +import type { + DataflowGraph, + DataflowGraphVertexInfo, + REnvironmentInformation, + DataflowGraphVertexFunctionDefinition + , OutgoingEdges +} from '../../dataflow' +import { overwriteEnvironment, pushLocalEnvironment, resolveByName } from '../../dataflow/environments' +import type { NodeToSlice } from './slicer-types' +import type { VisitingQueue } from './visiting-queue' +import { guard } from '../../util/assert' +import type { Fingerprint } from './fingerprint' +import { envFingerprint } from './fingerprint' +import { getAllLinkedFunctionDefinitions } from '../../dataflow/internal/linker' + +function retrieveActiveEnvironment(callerInfo: DataflowGraphVertexInfo, baseEnvironment: REnvironmentInformation): REnvironmentInformation { + let callerEnvironment = callerInfo.environment + + if(baseEnvironment.level !== callerEnvironment.level) { + while(baseEnvironment.level < callerEnvironment.level) { + baseEnvironment = pushLocalEnvironment(baseEnvironment) + } + while(baseEnvironment.level > callerEnvironment.level) { + callerEnvironment = pushLocalEnvironment(callerEnvironment) + } + } + + return overwriteEnvironment(baseEnvironment, callerEnvironment) +} + +/** returns the new threshold hit count */ +export function sliceForCall(current: NodeToSlice, callerInfo: DataflowGraphVertexInfo, dataflowGraph: DataflowGraph, queue: VisitingQueue): void { + // bind with call-local environments during slicing + const outgoingEdges = dataflowGraph.get(callerInfo.id, true) + guard(outgoingEdges !== undefined, () => `outgoing edges of id: ${callerInfo.id} must be in graph but can not be found, keep in slice to be sure`) + + // lift baseEnv on the same level + const baseEnvironment = current.baseEnvironment + const baseEnvPrint = envFingerprint(baseEnvironment) + + const activeEnvironment = retrieveActiveEnvironment(callerInfo, baseEnvironment) + const activeEnvironmentFingerprint = envFingerprint(activeEnvironment) + + const functionCallDefs = resolveByName(callerInfo.name, activeEnvironment)?.filter(d => d.definedAt !== BuiltIn)?.map(d => d.nodeId) ?? [] + + for(const [target, outgoingEdge] of outgoingEdges[1].entries()) { + if(outgoingEdge.types.has(EdgeType.Calls)) { + functionCallDefs.push(target) + } + } + + const functionCallTargets = getAllLinkedFunctionDefinitions(new Set(functionCallDefs), dataflowGraph) + + for(const [_, functionCallTarget] of functionCallTargets) { + // all those linked within the scopes of other functions are already linked when exiting a function definition + for(const openIn of (functionCallTarget as DataflowGraphVertexFunctionDefinition).subflow.in) { + const defs = openIn.name ? resolveByName(openIn.name, activeEnvironment) : undefined + if(defs === undefined) { + continue + } + for(const def of defs.filter(d => d.nodeId !== BuiltIn)) { + queue.add(def.nodeId, baseEnvironment, baseEnvPrint, current.onlyForSideEffects) + } + } + + for(const exitPoint of (functionCallTarget as DataflowGraphVertexFunctionDefinition).exitPoints) { + queue.add(exitPoint, activeEnvironment, activeEnvironmentFingerprint, current.onlyForSideEffects) + } + } +} + +/** Returns true if we found at least one return edge */ +export function handleReturns(queue: VisitingQueue, currentEdges: OutgoingEdges, baseEnvFingerprint: Fingerprint, baseEnvironment: REnvironmentInformation): boolean { + let found = false + for(const [, edge] of currentEdges) { + if(edge.types.has(EdgeType.Returns)) { + found = true + break + } + } + if(!found) { + return false + } + for(const [target, edge] of currentEdges) { + if(edge.types.has(EdgeType.Returns)) { + queue.add(target, baseEnvironment, baseEnvFingerprint, false) + } else if(edge.types.has(EdgeType.Reads)) { + queue.add(target, baseEnvironment, baseEnvFingerprint, false) + } else if(edge.types.has(EdgeType.Argument)) { + queue.potentialArguments.add(target) + } + } + return true +} diff --git a/src/slicing/static/slicer-types.ts b/src/slicing/static/slicer-types.ts new file mode 100644 index 0000000000..df3728ecc9 --- /dev/null +++ b/src/slicing/static/slicer-types.ts @@ -0,0 +1,36 @@ +import type { NodeId } from '../../r-bridge' +import type { REnvironmentInformation } from '../../dataflow' +import type { DecodedCriteria } from '../criterion' + + +/** + * Represents a node during the slicing process, together with the environment it is traversed in + * (modified by function calls) and whether it is only used for its side effects. + */ +export interface NodeToSlice { + readonly id: NodeId + /** used for calling context, etc. */ + readonly baseEnvironment: REnvironmentInformation + /** if we add a function call, we may need it only for its side effects (e.g., a redefinition of a global variable), if so, 'returns' links will not be traced */ + readonly onlyForSideEffects: boolean +} + +/** + * The result of the slice step + */ +export interface SliceResult { + /** + * Number of times the set threshold was hit (i.e., the same node was visited too often). + * While any number above 0 might indicate a wrong slice, it does not have to as usually even revisiting the same node + * seldom causes more ids to be included in the slice. + */ + readonly timesHitThreshold: number + /** + * The ids of the nodes in the normalized ast that are part of the slice. + */ + readonly result: ReadonlySet + /** + * The mapping produced to decode the entered criteria + */ + readonly decodedCriteria: DecodedCriteria +} diff --git a/src/slicing/static/static-slicer.ts b/src/slicing/static/static-slicer.ts index 035d7390e8..49fbe67636 100644 --- a/src/slicing/static/static-slicer.ts +++ b/src/slicing/static/static-slicer.ts @@ -1,272 +1,100 @@ -import type { - DataflowGraph, - DataflowGraphVertexFunctionDefinition, - DataflowGraphVertexInfo, - REnvironmentInformation -} from '../../dataflow' +import type { DataflowGraph } from '../../dataflow' import { + BuiltIn, EdgeType, - graphToMermaidUrl, - initializeCleanEnvironments + initializeCleanEnvironments, + shouldTraverseEdge, + TraverseEdge, + VertexType } from '../../dataflow' import { guard } from '../../util/assert' -import type { - DecoratedAstMap, - NodeId, - NormalizedAst, - RNodeWithParent } from '../../r-bridge' -import { - collectAllIds, - RType -} from '../../r-bridge' -import { log, LogLevel } from '../../util/log' -import { getAllLinkedFunctionDefinitions } from '../../dataflow/internal/linker' -import { overwriteEnvironments, pushLocalEnvironment, resolveByName } from '../../dataflow/environments' -import objectHash from 'object-hash' -import { DefaultMap } from '../../util/defaultmap' -import { LocalScope } from '../../dataflow/environments/scopes' -import type { DecodedCriteria, SlicingCriteria } from '../criterion' +import type { NodeId, NormalizedAst } from '../../r-bridge' +import { expensiveTrace, log } from '../../util/log' +import type { SlicingCriteria } from '../criterion' import { convertAllSlicingCriteriaToIds } from '../criterion' +import type { SliceResult } from './slicer-types' +import { envFingerprint } from './fingerprint' +import { VisitingQueue } from './visiting-queue' +import { handleReturns, sliceForCall } from './slice-call' export const slicerLogger = log.getSubLogger({ name: 'slicer' }) - -/** - * Represents a node during the slicing process, together with the environment it is traversed in - * (modified by function calls) and whether it is only used for its side effects. - */ -interface NodeToSlice { - id: NodeId - /** used for calling context etc. */ - baseEnvironment: REnvironmentInformation - /** if we add a function call we may need it only for its side effects (e.g., a redefinition of a global variable), if so, 'returns' links will not be traced */ - onlyForSideEffects: boolean -} - - - -type Fingerprint = string - -function envFingerprint(env: REnvironmentInformation): string { - return objectHash(env, { excludeKeys: key => key === 'id' }) -} - -function fingerprint(id: NodeId, envFingerprint: string, onlyForSideEffects: boolean): Fingerprint { - return `${id}-${envFingerprint}-${onlyForSideEffects ? '0' : '1'}` -} - - -/** - * The result of the slice step - */ -export interface SliceResult { - /** - * Number of times the set threshold was hit (i.e., the same node was visited too often). - * While any number above 0 might indicate a wrong slice, it does not have to as usually even revisiting the same node does not - * often cause more ids to be included in the slice. - */ - timesHitThreshold: number - /** - * The ids of the nodes in the normalized ast that are part of the slice. - */ - result: Set - /** - * The mapping produced to decode the entered criteria - */ - decodedCriteria: DecodedCriteria -} - -class VisitingQueue { - private readonly threshold: number - private timesHitThreshold = 0 - private seen = new Map() - private idThreshold = new DefaultMap(() => 0) - private queue: NodeToSlice[] = [] - - constructor(threshold: number) { - this.threshold = threshold - } - - public add(target: NodeId, env: REnvironmentInformation, envFingerprint: string, onlyForSideEffects: boolean): void { - const idCounter = this.idThreshold.get(target) - if(idCounter > this.threshold) { - slicerLogger.warn(`id: ${target} has been visited ${idCounter} times, skipping`) - this.timesHitThreshold++ - return - } else { - this.idThreshold.set(target, idCounter + 1) - } - - const print = fingerprint(target, envFingerprint, onlyForSideEffects) - - if(!this.seen.has(print)) { - this.seen.set(print, target) - this.queue.push({ id: target, baseEnvironment: env, onlyForSideEffects: onlyForSideEffects }) - } - } - - public next(): NodeToSlice | undefined { - return this.queue.pop() - } - - public has(): boolean { - return this.queue.length > 0 - } - - public status(): Readonly> { - return { - timesHitThreshold: this.timesHitThreshold, - result: new Set(this.seen.values()) - } - } -} - - /** * This returns the ids to include in the slice, when slicing with the given seed id's (must be at least one). *

- * The returned ids can be used to {@link reconstructToCode | reconstruct the slice to R code}. + * The returned ids can be used to {@link reconstructToCode|reconstruct the slice to R code}. */ -export function staticSlicing(dataflowGraph: DataflowGraph, ast: NormalizedAst, criteria: SlicingCriteria, threshold = 75): Readonly { +export function staticSlicing(graph: DataflowGraph, ast: NormalizedAst, criteria: SlicingCriteria, threshold = 75): Readonly { guard(criteria.length > 0, 'must have at least one seed id to calculate slice') const decodedCriteria = convertAllSlicingCriteriaToIds(criteria, ast) - const idMap = ast.idMap - if(slicerLogger.settings.minLevel <= LogLevel.Trace) { - slicerLogger.trace(`calculating slice for ${decodedCriteria.length} seed criteria: ${decodedCriteria.map(s => JSON.stringify(s)).join(', ')}`) - } + expensiveTrace(slicerLogger, () =>`calculating slice for ${decodedCriteria.length} seed criteria: ${decodedCriteria.map(s => JSON.stringify(s)).join(', ')}`) const queue = new VisitingQueue(threshold) + let minDepth = Number.MAX_SAFE_INTEGER + const sliceSeedIds = new Set() // every node ships the call environment which registers the calling environment { - const basePrint = envFingerprint(initializeCleanEnvironments()) + const emptyEnv = initializeCleanEnvironments() + const basePrint = envFingerprint(emptyEnv) for(const startId of decodedCriteria) { - queue.add(startId.id, initializeCleanEnvironments(), basePrint, false) + queue.add(startId.id, emptyEnv, basePrint, false) + // retrieve the minimum depth of all nodes to only add control dependencies if they are "part" of the current execution + minDepth = Math.min(minDepth, ast.idMap.get(startId.id)?.info.depth ?? minDepth) + sliceSeedIds.add(startId.id) } } - - while(queue.has()) { + while(queue.nonEmpty()) { const current = queue.next() + const { baseEnvironment, id, onlyForSideEffects } = current + const baseEnvFingerprint = envFingerprint(baseEnvironment) - if(current === undefined) { - continue - } - - const baseEnvFingerprint = envFingerprint(current.baseEnvironment) - - const currentInfo = dataflowGraph.get(current.id, true) - // slicerLogger.trace(`visiting id: ${current.id} with name: ${currentInfo?.[0].name ?? ''}`) - + const currentInfo = graph.get(id, true) if(currentInfo === undefined) { - slicerLogger.warn(`id: ${current.id} must be in graph but can not be found, keep in slice to be sure`) + slicerLogger.warn(`id: ${id} must be in graph but can not be found, keep in slice to be sure`) continue } - if(currentInfo[0].tag === 'function-call' && !current.onlyForSideEffects) { - slicerLogger.trace(`${current.id} is a function call`) - sliceForCall(current, idMap, currentInfo[0], dataflowGraph, queue) - } - - const currentNode = idMap.get(current.id) - guard(currentNode !== undefined, () => `id: ${current.id} must be in dataflowIdMap is not in ${graphToMermaidUrl(dataflowGraph, idMap)}`) + const [currentVertex, currentEdges] = currentInfo - for(const [target, edge] of currentInfo[1]) { - if(edge.types.has(EdgeType.SideEffectOnCall)) { - queue.add(target, current.baseEnvironment, baseEnvFingerprint, true) + // we only add control dependencies iff 1) we are in different function call or 2) they have, at least, the same depth as the slicing seed + if(currentVertex.controlDependencies) { + const topLevel = graph.isRoot(id) || sliceSeedIds.has(id) + for(const cd of currentVertex.controlDependencies) { + if(!topLevel || (ast.idMap.get(cd)?.info.depth ?? 0) <= minDepth) { + queue.add(cd, baseEnvironment, baseEnvFingerprint, false) + } } - if(edge.types.has(EdgeType.Reads) || edge.types.has(EdgeType.DefinedBy) || edge.types.has(EdgeType.Argument) || edge.types.has(EdgeType.Calls) || edge.types.has(EdgeType.Relates) || edge.types.has(EdgeType.DefinesOnCall)) { - queue.add(target, current.baseEnvironment, baseEnvFingerprint, false) - } - } - for(const controlFlowDependency of addControlDependencies(currentInfo[0].id, idMap)) { - queue.add(controlFlowDependency, current.baseEnvironment, baseEnvFingerprint, false) - } - } - - // slicerLogger.trace(`static slicing produced: ${JSON.stringify([...seen])}`) - return { ...queue.status(), decodedCriteria } -} - - -function addAllFrom(current: RNodeWithParent, collected: Set) { - for(const id of collectAllIds(current)) { - collected.add(id) - } -} - -function addControlDependencies(source: NodeId, ast: DecoratedAstMap): Set { - const start = ast.get(source) - - const collected = new Set() - - let current = start - while(current !== undefined) { - if(current.type === RType.IfThenElse) { - addAllFrom(current.condition, collected) - } else if(current.type === RType.WhileLoop) { - addAllFrom(current.condition, collected) - } else if(current.type === RType.ForLoop) { - addAllFrom(current.variable, collected) - // vector not needed, if required, it is linked by defined-by - } - // nothing to do for repeat and rest! - current = current.info.parent ? ast.get(current.info.parent) : undefined - } - return collected -} - -function retrieveActiveEnvironment(callerInfo: DataflowGraphVertexInfo, baseEnvironment: REnvironmentInformation) { - let callerEnvironment = callerInfo.environment - - if(baseEnvironment.level !== callerEnvironment.level) { - while(baseEnvironment.level < callerEnvironment.level) { - baseEnvironment = pushLocalEnvironment(baseEnvironment) - } - while(baseEnvironment.level > callerEnvironment.level) { - callerEnvironment = pushLocalEnvironment(callerEnvironment) } - } - - return overwriteEnvironments(baseEnvironment, callerEnvironment) -} - -//// returns the new threshold hit count -function sliceForCall(current: NodeToSlice, idMap: DecoratedAstMap, callerInfo: DataflowGraphVertexInfo, dataflowGraph: DataflowGraph, queue: VisitingQueue): void { - // bind with call-local environments during slicing - const outgoingEdges = dataflowGraph.get(callerInfo.id, true) - guard(outgoingEdges !== undefined, () => `outgoing edges of id: ${callerInfo.id} must be in graph but can not be found, keep in slice to be sure`) - - // lift baseEnv on the same level - const baseEnvironment = current.baseEnvironment - const baseEnvPrint = envFingerprint(baseEnvironment) - const activeEnvironment = retrieveActiveEnvironment(callerInfo, baseEnvironment) - const activeEnvironmentFingerprint = envFingerprint(activeEnvironment) - const functionCallDefs = resolveByName(callerInfo.name, LocalScope, activeEnvironment)?.map(d => d.nodeId) ?? [] + if(!onlyForSideEffects) { + if(currentVertex.tag === VertexType.FunctionCall && !currentVertex.onlyBuiltin) { + sliceForCall(current, currentVertex, graph, queue) + } - for(const [target, outgoingEdge] of outgoingEdges[1].entries()) { - if(outgoingEdge.types.has(EdgeType.Calls)) { - functionCallDefs.push(target) + const ret = handleReturns(queue, currentEdges, baseEnvFingerprint, baseEnvironment) + if(ret) { + continue + } } - } - - const functionCallTargets = getAllLinkedFunctionDefinitions(new Set(functionCallDefs), dataflowGraph) - for(const [_, functionCallTarget] of functionCallTargets) { - // all those linked within the scopes of other functions are already linked when exiting a function definition - for(const openIn of (functionCallTarget as DataflowGraphVertexFunctionDefinition).subflow.in) { - const defs = resolveByName(openIn.name, LocalScope, activeEnvironment) - if(defs === undefined) { + for(const [target, { types }] of currentEdges) { + if(target === BuiltIn || types.has(EdgeType.NonStandardEvaluation)) { continue } - for(const def of defs) { - queue.add(def.nodeId, baseEnvironment, baseEnvPrint, current.onlyForSideEffects) + const t = shouldTraverseEdge(types) + if(t === TraverseEdge.Always) { + queue.add(target, baseEnvironment, baseEnvFingerprint, false) + } else if(t === TraverseEdge.DefinedByOnCall && queue.potentialArguments.has(target)) { + queue.add(target, baseEnvironment, baseEnvFingerprint, false) + queue.potentialArguments.delete(target) + } else if(t === TraverseEdge.SideEffect) { + queue.add(target, baseEnvironment, baseEnvFingerprint, true) } } - - for(const exitPoint of (functionCallTarget as DataflowGraphVertexFunctionDefinition).exitPoints) { - queue.add(exitPoint, activeEnvironment, activeEnvironmentFingerprint, current.onlyForSideEffects) - } } + + return { ...queue.status(), decodedCriteria } } + diff --git a/src/slicing/static/visiting-queue.ts b/src/slicing/static/visiting-queue.ts new file mode 100644 index 0000000000..651d65d3fa --- /dev/null +++ b/src/slicing/static/visiting-queue.ts @@ -0,0 +1,62 @@ +import type { Fingerprint } from './fingerprint' +import { fingerprint } from './fingerprint' +import type { NodeId } from '../../r-bridge' +import type { NodeToSlice, SliceResult } from './slicer-types' +import type { REnvironmentInformation } from '../../dataflow' +import { slicerLogger } from './static-slicer' + +export class VisitingQueue { + private readonly threshold: number + private timesHitThreshold = 0 + private seen = new Map() + private idThreshold = new Map() + private queue: NodeToSlice[] = [] + // the set of potential arguments holds arguments which may be added if found with the `defined-by-on-call` edge + public potentialArguments: Set = new Set() + + constructor(threshold: number) { + this.threshold = threshold + } + + /** + * Adds a node to the queue if it has not been seen before. + * @param target - the node to add + * @param env - the environment the node is traversed in + * @param envFingerprint - the fingerprint of the environment + * @param onlyForSideEffects - whether the node is only used for its side effects + */ + public add(target: NodeId, env: REnvironmentInformation, envFingerprint: string, onlyForSideEffects: boolean): void { + const idCounter = this.idThreshold.get(target) ?? 0 + + if(idCounter > this.threshold) { + slicerLogger.warn(`id: ${target} has been visited ${idCounter} times, skipping`) + this.timesHitThreshold++ + return + } else { + this.idThreshold.set(target, idCounter + 1) + } + + /* we do not include the in call part in the fingerprint as it is 'deterministic' from the source position */ + const print = fingerprint(target, envFingerprint, onlyForSideEffects) + + if(!this.seen.has(print)) { + this.seen.set(print, target) + this.queue.push({ id: target, baseEnvironment: env, onlyForSideEffects }) + } + } + + public next(): NodeToSlice { + return this.queue.pop() as NodeToSlice + } + + public nonEmpty(): boolean { + return this.queue.length > 0 + } + + public status(): Readonly> { + return { + timesHitThreshold: this.timesHitThreshold, + result: new Set(this.seen.values()) + } + } +} diff --git a/src/util/ansi.ts b/src/util/ansi.ts index 789b2564c1..5871abce1a 100644 --- a/src/util/ansi.ts +++ b/src/util/ansi.ts @@ -57,14 +57,14 @@ export const voidFormatter: OutputFormatter = new class implements OutputFormatt }() /** - * This does not work if the {@link setFormatter | formatter} is void. Tries to format the text with a bold font weight. + * This does not work if the {@link setFormatter|formatter} is void. Tries to format the text with a bold font weight. */ export function italic(s: string, f: OutputFormatter = formatter, options?: FormatOptions): string { return f.format(s, { style: FontStyles.Italic, ...options }) } /** - * This does not work if the {@link setFormatter | formatter} is void. Tries to format the text with an italic font shape. + * This does not work if the {@link setFormatter|formatter} is void. Tries to format the text with an italic font shape. */ export function bold(s: string, f: OutputFormatter = formatter, options?: FormatOptions): string { return f.format(s, { style: FontStyles.Bold, ...options }) diff --git a/src/util/arrays.ts b/src/util/arrays.ts index bd25ed406c..07d6fa36b2 100644 --- a/src/util/arrays.ts +++ b/src/util/arrays.ts @@ -1,5 +1,10 @@ import { guard } from './assert' +/** + * Returns the tail of an array (all elements except the first one). + */ +export type TailOfArray = T extends [infer _, ...infer Rest] ? Rest : never; + /** * Splits the array every time the given predicate fires. * The element the split appears on will not be included! @@ -15,7 +20,7 @@ import { guard } from './assert' * // => [[], [], [], []] * ``` */ -export function splitArrayOn(arr: T[], predicate: (elem: T) => boolean): T[][] { +export function splitArrayOn(arr: readonly T[], predicate: (elem: T) => boolean): T[][] { const result: T[][] = [] let current: T[] = [] let fired = false @@ -35,6 +40,23 @@ export function splitArrayOn(arr: T[], predicate: (elem: T) => boolean): T[][ return result } +/** + * Returns a tuple of two arrays, where the first one contains all elements for which the predicate returned true, + * and the second one contains all elements for which the predicate returned false. + */ +export function partitionArray(arr: readonly T[], predicate: (elem: T) => boolean): [T[], T[]] { + const left: T[] = [] + const right: T[] = [] + for(const elem of arr) { + if(predicate(elem)) { + left.push(elem) + } else { + right.push(elem) + } + } + return [left, right] +} + /** * Generate all permutations of the given array using Heap's algorithm (with its non-recursive variant). * @@ -140,3 +162,18 @@ export function array2bag(arr: T[]): Map { } return result } + +export function arrayEqual(a: readonly T[] | undefined, b: readonly T[] | undefined): boolean { + if(a === undefined || b === undefined) { + return a === b + } + if(a.length !== b.length) { + return false + } + for(let i = 0; i < a.length; ++i) { + if(a[i] !== b[i]) { + return false + } + } + return true +} diff --git a/src/util/assert.ts b/src/util/assert.ts index f1fec8018a..d0df2159ac 100644 --- a/src/util/assert.ts +++ b/src/util/assert.ts @@ -9,6 +9,10 @@ export function isNotUndefined(x: T | undefined): x is T { return x !== undefined } +export function isUndefined(x: T | undefined): x is undefined { + return x === undefined +} + export function isNotNull(x: T | null): x is T { return x !== null } diff --git a/src/util/cfg/cfg.ts b/src/util/cfg/cfg.ts index bcbe9f3ec9..3806b90454 100644 --- a/src/util/cfg/cfg.ts +++ b/src/util/cfg/cfg.ts @@ -16,7 +16,8 @@ import { RFalse, RoleInParent, RTrue -} from '../../r-bridge' + , + EmptyArgument } from '../../r-bridge' import type { MergeableRecord } from '../objects' import { setEquals } from '../set' import type { QuadSerializationConfiguration } from '../quads' @@ -131,25 +132,15 @@ export function emptyControlFlowInformation(): ControlFlowInformation { const cfgFolds: FoldFunctions = { - foldNumber: cfgLeaf(CfgVertexType.Expression), - foldString: cfgLeaf(CfgVertexType.Expression), - foldLogical: cfgLeaf(CfgVertexType.Expression), - foldSymbol: cfgLeaf(CfgVertexType.Expression), - foldAccess: cfgAccess, - binaryOp: { - foldLogicalOp: cfgBinaryOp, - foldArithmeticOp: cfgBinaryOp, - foldComparisonOp: cfgBinaryOp, - foldAssignment: cfgBinaryOp, - foldPipe: cfgBinaryOp, - foldModelFormula: cfgBinaryOp - }, - unaryOp: { - foldArithmeticOp: cfgUnaryOp, - foldLogicalOp: cfgUnaryOp, - foldModelFormula: cfgUnaryOp - }, - other: { + foldNumber: cfgLeaf(CfgVertexType.Expression), + foldString: cfgLeaf(CfgVertexType.Expression), + foldLogical: cfgLeaf(CfgVertexType.Expression), + foldSymbol: cfgLeaf(CfgVertexType.Expression), + foldAccess: cfgAccess, + foldBinaryOp: cfgBinaryOp, + foldPipe: cfgBinaryOp, + foldUnaryOp: cfgUnaryOp, + other: { foldComment: cfgIgnore, foldLineDirective: cfgIgnore }, @@ -370,7 +361,7 @@ function cfgFunctionDefinition(fn: RFunctionDefinition, param return { graph: graph, breaks: [], nexts: [], returns: body.returns, exitPoints: [fn.info.id], entryPoints: [fn.info.id] } } -function cfgFunctionCall(call: RFunctionCall, name: ControlFlowInformation, args: (ControlFlowInformation | undefined)[]): ControlFlowInformation { +function cfgFunctionCall(call: RFunctionCall, name: ControlFlowInformation, args: (ControlFlowInformation | typeof EmptyArgument)[]): ControlFlowInformation { const graph = name.graph const info = { graph, breaks: [...name.breaks], nexts: [...name.nexts], returns: [...name.returns], exitPoints: [call.info.id + '-exit'], entryPoints: [call.info.id] } @@ -391,7 +382,7 @@ function cfgFunctionCall(call: RFunctionCall, name: ControlFl let lastArgExits: NodeId[] = [call.info.id + '-name'] for(const arg of args) { - if(arg === undefined) { + if(arg === EmptyArgument) { continue } graph.merge(arg.graph) @@ -485,7 +476,7 @@ function cfgBinaryOp(binOp: RBinaryOp | RPipe, name: ControlFlowInformation, accessors: string | (ControlFlowInformation | null)[]): ControlFlowInformation { +function cfgAccess(access: RAccess, name: ControlFlowInformation, accessors: readonly (ControlFlowInformation | typeof EmptyArgument)[]): ControlFlowInformation { const result = name const graph = result.graph graph.addVertex({ id: access.info.id, name: access.type, type: CfgVertexType.Expression }) @@ -498,11 +489,8 @@ function cfgAccess(access: RAccess, name: ControlFlowInformat } result.entryPoints = [access.info.id] result.exitPoints = [access.info.id + '-exit'] - if(typeof accessors === 'string') { - return result - } for(const accessor of accessors) { - if(accessor === null) { + if(accessor === EmptyArgument) { continue } graph.merge(accessor.graph) @@ -526,7 +514,7 @@ function cfgUnaryOp(unary: RNodeWithParent, operand: ControlFlowInformation): Co } -function cfgExprList(_node: RNodeWithParent, expressions: ControlFlowInformation[]): ControlFlowInformation { +function cfgExprList(_node: RNodeWithParent, _grouping: unknown, expressions: ControlFlowInformation[]): ControlFlowInformation { const result: ControlFlowInformation = { graph: new ControlFlowGraph(), breaks: [], nexts: [], returns: [], exitPoints: [], entryPoints: [] } let first = true for(const expression of expressions) { diff --git a/src/util/diff.ts b/src/util/diff.ts index ec74e737c8..dfd37c1ddc 100644 --- a/src/util/diff.ts +++ b/src/util/diff.ts @@ -21,7 +21,7 @@ export interface DifferenceReport { */ comments(): readonly string[] | undefined /** - * @returns true iff the compared structures are equal + * @returns true iff the compared structures are equal (i.e., the diff is empty) */ isEqual(): boolean } @@ -35,19 +35,19 @@ export interface WriteableDifferenceReport extends DifferenceReport { * The `leftname` and `rightname` fields are only used to provide more useful * information in the difference report. */ -export interface GenericDifferenceInformation extends MergeableRecord { +export interface GenericDifferenceInformation extends MergeableRecord { /** A human-readable name for the left structure in `left == right`. */ readonly leftname: string /** A human-readable name for the right structure in `left == right`. */ readonly rightname: string /** The report on the difference of the two structures. */ - readonly report: WriteableDifferenceReport + readonly report: Report /** A human-readable indication of where we are (the prefix of the information if the structures differ) */ readonly position: string } -export function setDifference(left: ReadonlySet, right: ReadonlySet, info: GenericDifferenceInformation): void { +export function setDifference(left: ReadonlySet, right: ReadonlySet, info: GenericDifferenceInformation): void { const lWithoutR = setMinus(left, right) const rWithoutL = setMinus(right, left) if(lWithoutR.size === 0 && rWithoutL.size === 0) { diff --git a/src/util/log.ts b/src/util/log.ts index b9087ff2c7..e7a096dc51 100644 --- a/src/util/log.ts +++ b/src/util/log.ts @@ -1,6 +1,12 @@ import { type ILogObj, type ISettingsParam, Logger } from 'tslog' import { createStream, type Options } from 'rotating-file-stream' +export const expensiveTrace = (log: Logger, supplier: () => string): void => { + if(log.settings.minLevel <= LogLevel.Trace) { + log.trace(supplier()) + } +} + export class FlowrLogger extends Logger { /** by keeping track of all children we can propagate updates of the settings (e.g., in tests) */ private readonly childLoggers: Logger[] = [] diff --git a/src/util/logic.ts b/src/util/logic.ts new file mode 100644 index 0000000000..f1d5590756 --- /dev/null +++ b/src/util/logic.ts @@ -0,0 +1,2 @@ +// divering from boolean | maybe requires explicit handling +export type Ternary = 'always' | 'maybe' | 'never' diff --git a/src/util/mermaid/dfg.ts b/src/util/mermaid/dfg.ts index 9651c647e6..e4a637f86c 100644 --- a/src/util/mermaid/dfg.ts +++ b/src/util/mermaid/dfg.ts @@ -1,52 +1,55 @@ import type { NodeId } from '../../r-bridge' +import { EmptyArgument } from '../../r-bridge' import type { SourceRange } from '../range' import type { DataflowFunctionFlowInformation, DataflowGraph, - DataflowGraphEdgeAttribute, DataflowGraphVertexInfo, DataflowMap, FunctionArgument, - IdentifierReference -} from '../../dataflow' -import { + IdentifierDefinition, + IdentifierReference, + IEnvironment } from '../../dataflow' +import { isNamedArgument + , isPositionalArgument + , VertexType, BuiltIn, + BuiltInEnvironment, + CONSTANT_NAME, EdgeType } from '../../dataflow' import { guard } from '../assert' -import { jsonReplacer } from '../json' -import type { DataflowScopeName } from '../../dataflow/environments' import { escapeMarkdown, mermaidCodeToUrl } from './mermaid' +type MarkVertex = NodeId +type MarkEdge = `${string}->${string}` + +type Mark = MarkVertex | MarkEdge + interface MermaidGraph { nodeLines: string[] edgeLines: string[] hasBuiltIn: boolean includeEnvironments: boolean - mark: Set | undefined + mark: ReadonlySet | undefined /** in the form of from-\>to because I am lazy, see {@link encodeEdge} */ presentEdges: Set - // keep for subflows + // keep for sub-flows rootGraph: DataflowGraph } export function formatRange(range: SourceRange | undefined): string { if(range === undefined) { - return '??' + return '??-??' } - return `${range.start.line}.${range.start.column}-${range.end.line}.${range.end.column}` -} - -function scopeToMermaid(scope: DataflowScopeName, when: DataflowGraphEdgeAttribute): string { - const whenText = when === 'always' ? '' : `, ${when}` - return `, *${scope.replace('<', '#lt;')}${whenText}*` + return `${range[0]}.${range[1]}-${range[2]}.${range[3]}` } -function createArtificialExitPoints(exitPoints: NodeId[], mermaid: MermaidGraph, dataflowIdMap: DataflowMap, idPrefix: string) { +function createArtificialExitPoints(exitPoints: readonly NodeId[], mermaid: MermaidGraph, dataflowIdMap: DataflowMap, idPrefix: string) { for(const exitPoint of exitPoints) { - if(!mermaid.rootGraph.hasNode(exitPoint, true)) { + if(!mermaid.rootGraph.hasVertex(exitPoint, true)) { const node = dataflowIdMap.get(exitPoint) guard(node !== undefined, 'exit point not found') mermaid.nodeLines.push(` ${idPrefix}${exitPoint}{{"${node.lexeme ?? '??'} (${exitPoint})\n ${formatRange(dataflowIdMap.get(exitPoint)?.location)}"}}`) @@ -55,13 +58,21 @@ function createArtificialExitPoints(exitPoints: NodeId[], mermaid: MermaidGraph, } } -function subflowToMermaid(nodeId: NodeId, exitPoints: NodeId[], subflow: DataflowFunctionFlowInformation | undefined, dataflowIdMap: DataflowMap | undefined, mermaid: MermaidGraph, idPrefix = ''): void { +function subflowToMermaid(nodeId: NodeId, exitPoints: readonly NodeId[], subflow: DataflowFunctionFlowInformation | undefined, dataflowIdMap: DataflowMap | undefined, mermaid: MermaidGraph, idPrefix = ''): void { if(subflow === undefined) { return } const subflowId = `${idPrefix}flow-${nodeId}` mermaid.nodeLines.push(`\nsubgraph "${subflowId}" [function ${nodeId}]`) - const subgraph = graphToMermaidGraph(subflow.graph, mermaid.rootGraph, dataflowIdMap, null, idPrefix, mermaid.includeEnvironments, mermaid.mark, mermaid.rootGraph) + const subgraph = graphToMermaidGraph(subflow.graph, { + graph: mermaid.rootGraph, + rootGraph: mermaid.rootGraph, + dataflowIdMap, + idPrefix, + includeEnvironments: mermaid.includeEnvironments, + mark: mermaid.mark, + prefix: null + }) mermaid.nodeLines.push(...subgraph.nodeLines) mermaid.edgeLines.push(...subgraph.edgeLines) for(const [color, pool] of [['purple', subflow.in], ['green', subflow.out], ['orange', subflow.unknownReferences]]) { @@ -82,42 +93,51 @@ function subflowToMermaid(nodeId: NodeId, exitPoints: NodeId[], subflow: Dataflo } -function printArg(arg: IdentifierReference | '' | 'empty' | undefined): string { - if(arg === 'empty') { - return '' - } - if(arg === undefined || arg === '') { +function printArg(arg: FunctionArgument | undefined): string { + if(arg === undefined) { + return '??' + } else if(arg === EmptyArgument) { + return '[empty]' + } else if(isNamedArgument(arg)) { + const deps = arg.controlDependencies ? ', :maybe:' + arg.controlDependencies.join(',') : '' + return `${arg.name} (${arg.nodeId}${deps})` + } else if(isPositionalArgument(arg)) { + const deps = arg.controlDependencies ? ' (:maybe:' + arg.controlDependencies.join(',') + ')': '' + return `${arg.nodeId}${deps}` + } else { return '??' } - return `${arg.nodeId}` } -function displayFunctionArgMapping(argMapping: FunctionArgument[]): string { +function displayFunctionArgMapping(argMapping: readonly FunctionArgument[]): string { const result = [] for(const arg of argMapping) { - result.push(Array.isArray(arg) ? `${arg[0]} -> ${printArg(arg[1])}` : `${printArg(arg)}`) + result.push(printArg(arg)) } return result.length === 0 ? '' : `\n (${result.join(', ')})` } -function encodeEdge(from: string, to: string, types: Set, attribute: string): string { +function encodeEdge(from: string, to: string, types: Set): string { // sort from and to for same edges and relates be order independent - if(types.has(EdgeType.SameReadRead) || types.has(EdgeType.SameDefDef) || types.has(EdgeType.Relates)) { + if(types.has(EdgeType.SameReadRead) || types.has(EdgeType.SameDefDef)) { if(from > to) { ({ from, to } = { from: to, to: from }) } } - return `${from}->${to}["${[...types].join(':')} (${attribute})"]` + return `${from}->${to}["${[...types].join(':')}"]` } -function mermaidNodeBrackets(def: boolean, fCall: boolean) { +function mermaidNodeBrackets(tag: DataflowGraphVertexInfo['tag']): { open: string, close: string } { let open: string let close: string - if(def) { + if(tag === 'function-definition' || tag === 'variable-definition') { open = '[' close = ']' - } else if(fCall) { + } else if(tag === VertexType.FunctionCall) { open = '[[' close = ']]' + } else if(tag === 'value') { + open = '{{' + close = '}}' } else { open = '([' close = '])' @@ -125,16 +145,45 @@ function mermaidNodeBrackets(def: boolean, fCall: boolean) { return { open, close } } -function nodeToMermaid(graph: DataflowGraph, info: DataflowGraphVertexInfo, mermaid: MermaidGraph, id: NodeId, idPrefix: string, dataflowIdMap: DataflowMap | undefined, mark: Set | undefined): void { - const def = info.tag === 'variable-definition' || info.tag === 'function-definition' - const fCall = info.tag === 'function-call' - const defText = def ? scopeToMermaid(info.scope, info.when) : '' - const { open, close } = mermaidNodeBrackets(def, fCall) +function printIdentifier(id: IdentifierDefinition): string { + return `${id.name} (${id.nodeId}, ${id.kind},${id.controlDependencies? ' {' + id.controlDependencies.join(',') + '},' : ''} def. @${id.definedAt})` +} - if(mermaid.includeEnvironments) { - mermaid.nodeLines.push(` %% ${id}: ${JSON.stringify(info.environment, jsonReplacer)}`) +function printEnvironmentToLines(env: IEnvironment | undefined): string[] { + if(env === undefined) { + return ['??'] + } else if(env.id === BuiltInEnvironment.id) { + return ['Built-in'] + } + const lines = [...printEnvironmentToLines(env.parent), `${env.id}--${env.name}${'-'.repeat(40)}`] + const longestName = Math.max(...[...env.memory.keys()].map(x => x.length)) + for(const [name, defs] of env.memory.entries()) { + const printName = `${name}:` + lines.push(` ${printName.padEnd(longestName + 1, ' ')} {${defs.map(printIdentifier).join(', ')}}`) } - mermaid.nodeLines.push(` ${idPrefix}${id}${open}"\`${escapeMarkdown(info.name)} (${id}${defText})\n *${formatRange(dataflowIdMap?.get(id)?.location)}*${ + return lines +} + +function recoverConstantName(dataflowIdMap: DataflowMap | undefined, info: DataflowGraphVertexInfo): string { + const node = dataflowIdMap?.get(info.id) + return node ? `[${node.type}] ${node.lexeme ?? '??'}` : '??' +} + +function vertexToMermaid(info: DataflowGraphVertexInfo, mermaid: MermaidGraph, id: NodeId, idPrefix: string, dataflowIdMap: DataflowMap | undefined, mark: ReadonlySet | undefined): void { + const fCall = info.tag === VertexType.FunctionCall + const { open, close } = mermaidNodeBrackets(info.tag) + + if(info.environment && mermaid.includeEnvironments) { + if(info.environment.level > 0 || info.environment.current.memory.size !== 0) { + mermaid.nodeLines.push( + ` %% Environment of ${id} [level: ${info.environment.level}]:`, + printEnvironmentToLines(info.environment.current).map(x => ` %% ${x}`).join('\n')) + } + } + const escapedName = escapeMarkdown(info.name === CONSTANT_NAME ? recoverConstantName(dataflowIdMap, info) : info.name) + + const deps = info.controlDependencies ? ', :maybe:' + info.controlDependencies.join(',') : '' + mermaid.nodeLines.push(` ${idPrefix}${id}${open}"\`${escapedName}${escapedName.length > 10 ? '\n ' : ' '}(${id}${deps})\n *${formatRange(dataflowIdMap?.get(id)?.location)}*${ fCall ? displayFunctionArgMapping(info.args) : '' }\`"${close}`) if(mark?.has(id)) { @@ -143,12 +192,20 @@ function nodeToMermaid(graph: DataflowGraph, info: DataflowGraphVertexInfo, merm const edges = mermaid.rootGraph.get(id, true) guard(edges !== undefined, `node ${id} must be found`) - for(const [target, edge] of [...edges[1]]) { - const dotEdge = edge.types.has(EdgeType.SameDefDef) || edge.types.has(EdgeType.SameReadRead) || edge.types.has(EdgeType.Relates) - const edgeId = encodeEdge(idPrefix + id, idPrefix + target, edge.types, edge.attribute) + const artificialCdEdges = (info.controlDependencies ?? []).map(x => [x, { types: new Set(['CD']) }] as const) + for(const [target, edge] of [...edges[1], ...artificialCdEdges]) { + const dotEdge = edge.types.has(EdgeType.SameDefDef) || edge.types.has(EdgeType.SameReadRead) + const edgeId = encodeEdge(idPrefix + id, idPrefix + target, edge.types) if(!mermaid.presentEdges.has(edgeId)) { mermaid.presentEdges.add(edgeId) - mermaid.edgeLines.push(` ${idPrefix}${id} ${dotEdge ? '-.-' : '-->'}|"${[...edge.types].join(', ')} (${edge.attribute})"| ${idPrefix}${target}`) + mermaid.edgeLines.push(` ${idPrefix}${id} ${dotEdge ? '-.-' : '-->'}|"${[...edge.types].join(', ')}"| ${idPrefix}${target}`) + if(mermaid.mark?.has(id + '->' + target)) { + // who invented this syntax?! + mermaid.edgeLines.push(` linkStyle ${mermaid.presentEdges.size - 1} stroke:red,color:red,stroke-width:4px;`) + } + if(edge.types.has('CD')) { + mermaid.edgeLines.push(` linkStyle ${mermaid.presentEdges.size - 1} stroke:gray,color:gray;`) + } if(target === BuiltIn) { mermaid.hasBuiltIn = true } @@ -159,14 +216,28 @@ function nodeToMermaid(graph: DataflowGraph, info: DataflowGraphVertexInfo, merm } } +interface MermaidGraphConfiguration { + graph: DataflowGraph, + dataflowIdMap: DataflowMap | undefined, + prefix?: string | null, + idPrefix?: string, + includeEnvironments?: boolean, + mark?: ReadonlySet, + rootGraph?: DataflowGraph, + presentEdges?: Set +} + // make the passing of root ids more performant again -function graphToMermaidGraph(rootIds: ReadonlySet, graph: DataflowGraph, dataflowIdMap: DataflowMap | undefined, prefix: string | null = 'flowchart TD', idPrefix = '', includeEnvironments = true, mark?: Set, rootGraph?: DataflowGraph): MermaidGraph { - const mermaid: MermaidGraph = { nodeLines: prefix === null ? [] : [prefix], edgeLines: [], presentEdges: new Set(), hasBuiltIn: false, mark, rootGraph: rootGraph ?? graph, includeEnvironments } +function graphToMermaidGraph( + rootIds: ReadonlySet, + { graph, dataflowIdMap, prefix = 'flowchart TD', idPrefix = '', includeEnvironments = true, mark, rootGraph, presentEdges = new Set() }: MermaidGraphConfiguration +): MermaidGraph { + const mermaid: MermaidGraph = { nodeLines: prefix === null ? [] : [prefix], edgeLines: [], presentEdges, hasBuiltIn: false, mark, rootGraph: rootGraph ?? graph, includeEnvironments } for(const [id, info] of graph.vertices(true)) { if(rootIds.has(id)) { - nodeToMermaid(graph, info, mermaid, id, idPrefix, dataflowIdMap, mark) + vertexToMermaid(info, mermaid, id, idPrefix, dataflowIdMap, mark) } } if(mermaid.hasBuiltIn) { @@ -175,9 +246,9 @@ function graphToMermaidGraph(rootIds: ReadonlySet, graph: DataflowGraph, return mermaid } -export function graphToMermaid(graph: DataflowGraph, dataflowIdMap: DataflowMap | undefined, prefix: string | null = 'flowchart TD', idPrefix = '', includeEnvironments?: boolean, mark?: Set, rootGraph?: DataflowGraph): string { - const mermaid = graphToMermaidGraph(graph.rootIds(), graph, dataflowIdMap, prefix, idPrefix, includeEnvironments, mark, rootGraph) - return `${mermaid.nodeLines.join('\n')}\n${mermaid.edgeLines.join('\n')}` +export function graphToMermaid(config: MermaidGraphConfiguration): { string: string, mermaid: MermaidGraph } { + const mermaid = graphToMermaidGraph(config.graph.rootIds(), config) + return { string: `${mermaid.nodeLines.join('\n')}\n${mermaid.edgeLines.join('\n')}`, mermaid } } /** @@ -188,20 +259,21 @@ export function graphToMermaid(graph: DataflowGraph, dataflowIdMap: DataflowMap * @param includeEnvironments - Whether to include the environments in the mermaid graph code * @param mark - Special nodes to mark (e.g. those included in the slice) */ -export function graphToMermaidUrl(graph: DataflowGraph, dataflowIdMap: DataflowMap, includeEnvironments?: boolean, mark?: Set): string { - return mermaidCodeToUrl(graphToMermaid(graph, dataflowIdMap, undefined, undefined, includeEnvironments, mark)) +export function graphToMermaidUrl(graph: DataflowGraph, dataflowIdMap: DataflowMap, includeEnvironments?: boolean, mark?: ReadonlySet): string { + return mermaidCodeToUrl(graphToMermaid({ graph, dataflowIdMap, includeEnvironments, mark }).string) } export interface LabeledDiffGraph { label: string graph: DataflowGraph + mark?: Set } /** uses same id map but ensures, it is different from the rhs so that mermaid can work with that */ export function diffGraphsToMermaid(left: LabeledDiffGraph, right: LabeledDiffGraph, dataflowIdMap: DataflowMap | undefined, prefix: string): string { // we add the prefix ourselves - const leftGraph = graphToMermaid(left.graph, dataflowIdMap, '', `l-${left.label}`) - const rightGraph = graphToMermaid(right.graph, dataflowIdMap, '', `r-${right.label}`) + const { string: leftGraph, mermaid } = graphToMermaid({ graph: left.graph, dataflowIdMap, prefix: '', idPrefix: `l-${left.label}`, includeEnvironments: true, mark: left.mark }) + const { string: rightGraph } = graphToMermaid({ graph: right.graph, dataflowIdMap, prefix: '', idPrefix: `r-${right.label}`, includeEnvironments: true, mark: right.mark, presentEdges: mermaid.presentEdges }) return `${prefix}flowchart TD\nsubgraph "${left.label}"\n${leftGraph}\nend\nsubgraph "${right.label}"\n${rightGraph}\nend` } diff --git a/src/util/mermaid/mermaid.ts b/src/util/mermaid/mermaid.ts index 8f0beea3b6..46f07e2299 100644 --- a/src/util/mermaid/mermaid.ts +++ b/src/util/mermaid/mermaid.ts @@ -1,5 +1,5 @@ export function escapeMarkdown(text: string): string { - return text.replaceAll(/([+*])/g, '\\$1').replaceAll('"', '\'\'') + return text.replaceAll(/([+*<>-])/g, '\\$1').replaceAll('"', '\'\'') } /** @@ -7,13 +7,12 @@ export function escapeMarkdown(text: string): string { * * @param code - code to convert */ -export function mermaidCodeToUrl(code: string): string { +export function mermaidCodeToUrl(code: string, edit = false): string { const obj = { code, - mermaid: {}, - updateEditor: false, - autoSync: true, - updateDiagram: false + mermaid: { + autoSync: true + } } - return `https://mermaid.live/edit#base64:${Buffer.from(JSON.stringify(obj)).toString('base64')}` + return `https://mermaid.live/${edit ? 'edit' : 'view'}#base64:${Buffer.from(JSON.stringify(obj)).toString('base64')}` } diff --git a/src/util/quads.ts b/src/util/quads.ts index 95681795a7..dbb9a3f995 100644 --- a/src/util/quads.ts +++ b/src/util/quads.ts @@ -35,8 +35,8 @@ export type QuadIgnoreIf = (key: string, value: unknown) => boolean /** * Deterministically retrieve a unique id for a given object. - * @param obj - the object to retrieve the id for - * @param context - In order to provide unique ids even for different contexts, we add the context to the id. + * @param obj - The object to retrieve the id for + * @param context - to provide unique ids even for different contexts, we add the context to the id. */ export type QuadIdRetriever = (obj: unknown, context: ContextForQuad) => string @@ -279,7 +279,6 @@ function serializeObject(obj: DataForQuad | undefined | null, quads: Quad[], con } else if(obj instanceof Set) { let i = 0 for(const value of obj.values()) { - console.log('set', value) processObjectEntry('idx-'+String(i++), value, obj, quads, config) } } else { diff --git a/src/util/range.ts b/src/util/range.ts index 851ea577cf..274e44da9b 100644 --- a/src/util/range.ts +++ b/src/util/range.ts @@ -1,61 +1,72 @@ import { guard } from './assert' // xmlparsedata uses its own start and end only to break ties and calculates them on max col width approximation -export interface SourcePosition { +export type SourcePosition = [ /** starts with 1 */ - line: number + line: number, /** starts with 1 */ column: number -} +] -export interface SourceRange { +export type SourceRange = [ /** inclusive start position */ - readonly start: SourcePosition + startLine: number, + startColumn: number, /** inclusive end position */ - readonly end: SourcePosition + endLine: number, + endColumn: number +] + +export function getRangeStart(p: undefined): undefined +export function getRangeStart(p: SourceRange): SourcePosition +export function getRangeStart(p: SourceRange | undefined): SourcePosition | undefined +export function getRangeStart(p: SourceRange | undefined): SourcePosition | undefined { + return p === undefined ? undefined : [p[0], p[1]] +} + +export function getRangeEnd(p: undefined): undefined +export function getRangeEnd(p: SourceRange): SourcePosition +export function getRangeEnd(p: SourceRange | undefined): SourcePosition | undefined +export function getRangeEnd(p: SourceRange | undefined): SourcePosition | undefined { + return p === undefined ? undefined : [p[2], p[3]] } /** - * at the moment this does not ensure ordering of start and end! + * This does not ensure ordering of start and end! + * + * @param sl - start line + * @param sc - start column + * @param el - end line + * @param ec - end column */ -export function rangeFrom(line1: number | string, col1: number | string, line2: number | string, col2: number | string): SourceRange { - return { - start: { - line: Number(line1), - column: Number(col1) - }, - end: { - line: Number(line2), - column: Number(col2) - } - } +export function rangeFrom(sl: number | string, sc: number | string, el: number | string, ec: number | string): SourceRange { + return [Number(sl), Number(sc), Number(el), Number(ec)] } export function mergeRanges(...rs: SourceRange[]): SourceRange { guard(rs.length > 0, 'Cannot merge no ranges') - - return { - start: rs.reduce((acc, r) => acc.line < r.start.line || (acc.line === r.start.line && acc.column < r.start.column) ? acc : r.start, rs[0].start), - end: rs.reduce((acc, r) => acc.line > r.end.line || (acc.line === r.end.line && acc.column > r.end.column) ? acc : r.end, rs[0].end) - } + return rs.reduce(([sl, sc, el, ec], [nsl, nsc, nel, nec]) => [ + ...(sl < nsl || (sl === nsl && sc < nsc) ? [sl, sc] : [nsl, nsc]), + ...(el > nel || (el === nel && ec > nec) ? [el, ec] : [nel, nec]) + ] as SourceRange, rs[0]) } /** * @returns true iff `r1` starts and ends before `r2` starts (i.e., if `r1` and `r2` do not overlap and `r1` comes before `r2` */ -export function rangeStartsCompletelyBefore(r1: SourceRange, r2: SourceRange): boolean { - return r1.end.line < r2.start.line || (r1.end.line === r2.start.line && r1.end.column < r2.start.column) +export function rangeStartsCompletelyBefore([,,r1el,r1ec]: SourceRange, [r2sl,r2sc,,]: SourceRange): boolean { + return r1el < r2sl || (r1el === r2sl && r1ec < r2sc) } /** * Checks if the two ranges overlap. */ -export function rangesOverlap(r1: SourceRange, r2: SourceRange): boolean { - return r1.start.line <= r2.end.line && r2.start.line <= r1.end.line && r1.start.column <= r2.end.column && r2.start.column <= r1.end.column +export function rangesOverlap([r1sl,r1sc,r1el,r1ec]: SourceRange, [r2sl,r2sc,r2el,r2ec]: SourceRange): boolean { + return r1sl <= r2el && r2sl <= r1el && r1sc <= r2ec && r2sc <= r1ec } -export function addRanges(r1: SourceRange, r2: SourceRange): SourceRange { - return rangeFrom(r1.start.line + r2.start.line, r1.start.column + r2.start.column, r1.end.line + r2.end.line, r1.end.column + r2.end.column) +export function addRanges([r1sl,r1sc,r1el,r1ec]: SourceRange, [r2sl,r2sc,r2el,r2ec]: SourceRange): SourceRange { + return [r1sl+r2sl, r1sc+r2sc, r1el+r2el, r1ec+r2ec] } /** @@ -63,10 +74,10 @@ export function addRanges(r1: SourceRange, r2: SourceRange): SourceRange { * * @returns a positive number if `r1` comes after `r2`, a negative number if `r1` comes before `r2`, and `0` if they are equal */ -export function rangeCompare(r1: SourceRange, r2: SourceRange): number { - if(r1.start.line === r2.start.line) { - return r1.start.column - r2.start.column +export function rangeCompare([r1sl,r1sc,,]: SourceRange, [r2sl,r2sc,,]: SourceRange): number { + if(r1sl === r2sl) { + return r1sc - r2sc } else { - return r1.start.line - r2.start.line + return r1sl - r2sl } } diff --git a/test/functionality/_helper/ast-builder.ts b/test/functionality/_helper/ast-builder.ts index 2305255afc..2e8f544da5 100644 --- a/test/functionality/_helper/ast-builder.ts +++ b/test/functionality/_helper/ast-builder.ts @@ -1,11 +1,11 @@ -import { RType } from '../../../src/r-bridge' import type { RExpressionList, RNode, RParameter, RNumberValue } from '../../../src/r-bridge' +import { RType } from '../../../src/r-bridge' import type { SourceRange } from '../../../src/util/range' -const emptyInfo = { fullRange: undefined, additionalTokens: [], fullLexeme: undefined } +const emptyInfo = { fullRange: undefined, additionalTokens: [], fullLexeme: undefined, depth: 0 } export function exprList(...children: RNode[]): RExpressionList { - return { type: RType.ExpressionList, children, lexeme: undefined, info: emptyInfo } + return { type: RType.ExpressionList, children, lexeme: undefined, info: emptyInfo, grouping: undefined, location: undefined } } export function numVal(value: number, markedAsInt = false, complexNumber = false): RNumberValue { return { num: value, markedAsInt, complexNumber } diff --git a/test/functionality/_helper/dataflow/dataflow-builder-printer.ts b/test/functionality/_helper/dataflow/dataflow-builder-printer.ts new file mode 100644 index 0000000000..a05290ddce --- /dev/null +++ b/test/functionality/_helper/dataflow/dataflow-builder-printer.ts @@ -0,0 +1,320 @@ +/** + * The builder printer takes a dataflow graph and produces a string-code representation of what a builder would look like to create the graph. + * The goal is to create syntactically correct TypeScript code in a best-effort approach. + */ +import type { + DataflowGraph, + DataflowGraphVertexFunctionCall, DataflowGraphVertexFunctionDefinition, + DataflowGraphVertexInfo, + DataflowGraphVertexUse, + FunctionArgument, + REnvironmentInformation +} from '../../../../src/dataflow' +import { isPositionalArgument, + EdgeType, + VertexType +} from '../../../../src/dataflow' +import type { NodeId } from '../../../../src' +import { EmptyArgument } from '../../../../src' +import { assertUnreachable, isNotUndefined } from '../../../../src/util/assert' +import { DefaultMap } from '../../../../src/util/defaultmap' +import { EnvironmentBuilderPrinter } from './environment-builder-printer' +import { wrap, wrapReference } from './printer' + + +/** we add the node id to allow convenience sorting if we want that in the future (or grouping or, ...) */ +type Lines = [NodeId, string][] + + +export function printAsBuilder(graph: DataflowGraph): string { + return new DataflowBuilderPrinter(graph).print() +} + +const EdgeTypeFnMap: Record = { + [EdgeType.Reads]: 'reads', + [EdgeType.DefinedBy]: 'definedBy', + [EdgeType.SameReadRead]: 'sameRead', + [EdgeType.SameDefDef]: 'sameDef', + [EdgeType.Calls]: 'calls', + [EdgeType.Returns]: 'returns', + [EdgeType.DefinesOnCall]: 'definesOnCall', + [EdgeType.Argument]: 'argument', + [EdgeType.NonStandardEvaluation]: 'nse', + [EdgeType.SideEffectOnCall]: 'sideEffectOnCall', + /* treated specially as done by automated mirroring */ + [EdgeType.DefinedByOnCall]: undefined +} + +class DataflowBuilderPrinter { + private lines: Lines = [] + private graph: DataflowGraph + private rootIds: Set + private coveredVertices: Set = new Set() + private coveredEdges: Set = new Set() + + constructor(graph: DataflowGraph) { + this.rootIds = new Set(graph.rootIds()) + this.graph = graph + } + + private process() { + // we start by processing all uses and calls as they can automate a lot of things + this.processUseInitial() + this.processCalls() + for(const [id, vertex] of this.graph.vertices(true)) { + this.processVertex(id, vertex) + } + } + + private processUseInitial() { + for(const [id, vertex] of this.graph.vertices(true)) { + if(vertex.tag === 'use') { + const res = this.processUseVertexInitial(id, vertex) + if(res) { + this.processEdges(id) + } + } + } + } + private processCalls() { + for(const [id, vertex] of this.graph.vertices(true)) { + if(vertex.tag === VertexType.FunctionCall) { + this.processVertex(id, vertex) + } + } + } + + private groupEdgeTypesFrom(id: NodeId): DefaultMap { + const outgoing = this.graph.outgoingEdges(id) + const map: DefaultMap = new DefaultMap(() => []) + if(outgoing) { + for(const [target, edge] of outgoing) { + for(const type of edge.types) { + map.get(type).push(target) + } + } + } + return map + } + + private processCall(id: NodeId, vertex: DataflowGraphVertexFunctionCall) { + const outgoing = this.groupEdgeTypesFrom(id) + + const returns = outgoing.get(EdgeType.Returns) + const reads = outgoing.get(EdgeType.Reads) + + for(const target of returns ?? []) { + this.coveredEdges.add(edgeId(id, target, EdgeType.Returns)) + } + for(const target of reads ?? []) { + this.coveredEdges.add(edgeId(id, target, EdgeType.Reads)) + } + + let readSuffix = '' + if(reads.length > 1 && vertex.onlyBuiltin) { + readSuffix = ', onlyBuiltIn: true' + } + this.recordFnCall(id,'call', [ + wrap(id), + wrap(vertex.name), + `[${vertex.args.map(a => this.processArgumentInCall(vertex.id, a)).join(', ')}]`, + `{ returns: [${returns?.map(wrap).join(', ') ?? ''}], reads: [${reads?.map(wrap).join(', ') ?? ''}]${readSuffix}${this.getControlDependencySuffix(vertex.controlDependencies, ', ', '') ?? ''}${this.getEnvironmentSuffix(vertex.environment, ', ', '') ?? ''} }`, + this.asRootArg(id) + ]) + } + + private asRootArg(id: NodeId) { + return this.rootIds.has(id) ? undefined : 'false' + } + + private processArgumentInCall(fn: NodeId, arg: FunctionArgument | undefined): string { + if(arg === undefined || arg === EmptyArgument) { + return 'EmptyArgument' + } else if(isPositionalArgument(arg)) { + const suffix = this.getControlDependencySuffix(this.controlDependencyForArgument(arg.nodeId), ', { ') ?? '' + this.handleArgumentArgLinkage(fn, arg.nodeId) + return `argumentInCall('${arg.nodeId}'${suffix})` + } else { + this.coveredVertices.add(arg.nodeId) + this.handleArgumentArgLinkage(fn, arg.nodeId) + const suffix = this.getControlDependencySuffix(this.controlDependencyForArgument(arg.nodeId), ', ', '') ?? '' + return `argumentInCall('${arg.nodeId}', { name: '${arg.name}'${suffix} } )` + } + } + + private handleArgumentArgLinkage(fn: NodeId, id: NodeId) { + if(typeof id === 'string' && id.endsWith('-arg')) { + const withoutSuffix = id.slice(0, -4) + this.coveredEdges.add(edgeId(id, withoutSuffix, EdgeType.Reads)) + if(!this.graph.hasVertex(withoutSuffix, true)) { + // we have to add the argument linkage manually + this.recordFnCall(fn, 'argument', [wrap(fn), wrap(id)]) + this.coveredEdges.add(edgeId(fn, id, EdgeType.Argument)) + } + } else if(this.coveredVertices.has(id)) { + this.recordFnCall(fn, 'argument', [wrap(fn), wrap(id)]) + this.coveredEdges.add(edgeId(fn, id, EdgeType.Argument)) + } + } + + private controlDependencyForArgument(id: NodeId): NodeId[] | undefined { + // we ignore the control dependency of the argument in the call as it is usually separate, and the auto creation + // will respect the corresponding node! + return this.graph.getVertex(id, true)?.controlDependencies + } + + private processVertex(id: NodeId, vertex: DataflowGraphVertexInfo): void { + if(this.coveredVertices.has(id)) { + // otherwise at the end to have a fresh covered edges cache + this.processEdges(id) + return + } + this.coveredVertices.add(id) + const tag = vertex.tag + switch(tag) { + case VertexType.FunctionCall: + this.processCall(id, vertex) + break + case VertexType.Use: + this.processVertexUse(id, vertex) + break + case VertexType.Value: { + const root = this.asRootArg(id) + this.recordFnCall(id, 'constant', [ + wrap(id), + this.getControlDependencySuffix(vertex.controlDependencies) ?? (root ? 'undefined' : undefined), + root + ]) + break + } case VertexType.VariableDefinition: + this.processVariableDefinition(id, vertex) + break + case VertexType.FunctionDefinition: + this.processFunctionDefinition(id, vertex) + break + default: + assertUnreachable(tag) + } + this.processEdges(id) + } + + private processUseVertexInitial(id: NodeId, vertex: DataflowGraphVertexUse): boolean { + // if the id ends in arg and there is a vertex without the arg suffix we reset the vertex use and wait for the call + if(typeof id === 'string' && id.endsWith('-arg') && this.graph.hasVertex(id.slice(0, -4), true)) { + return false + } + this.coveredVertices.add(id) + this.processVertexUse(id, vertex) + return true + } + + private processVertexUse(id: NodeId, vertex: DataflowGraphVertexUse) { + const root = this.asRootArg(id) + this.recordFnCall(id, 'use', [ + wrap(id), + wrap(vertex.name), + this.getControlDependencySuffix(vertex.controlDependencies) ?? (root ? 'undefined' : undefined), + root + ]) + } + + private processFunctionDefinition(id: NodeId, vertex: DataflowGraphVertexFunctionDefinition) { + const root = this.asRootArg(id) + const suffix = this.getEnvironmentSuffix(vertex.environment, '{ ', ' }') ?? (root ? 'undefined' : undefined) + this.recordFnCall(id,'defineFunction', [ + wrap(id), + wrap(vertex.name), + `[${vertex.exitPoints.map(wrap).join(', ')}]`, + `{ + out: [${vertex.subflow.out.map(wrapReference).join(', ')}], + in: [${vertex.subflow.in.map(wrapReference).join(', ')}], + unknownReferences: [${vertex.subflow.unknownReferences.map(wrapReference).join(', ')}], + entryPoint: ${wrap(vertex.subflow.entryPoint)}, + graph: new Set([${[...vertex.subflow.graph].map(wrap).join(', ')}]), + environment: ${new EnvironmentBuilderPrinter(vertex.subflow.environment).print()} + }`, suffix, root + ]) + } + + + private processVariableDefinition(id: NodeId, vertex: DataflowGraphVertexInfo) { + const definedBy = this.groupEdgeTypesFrom(id).get(EdgeType.DefinedBy) + + for(const target of definedBy ?? []) { + this.coveredEdges.add(edgeId(id, target, EdgeType.DefinedBy)) + } + + this.recordFnCall(id,'defineVariable', [ + wrap(id), + wrap(vertex.name), + '{ definedBy: [' + (definedBy?.map(wrap).join(', ') ?? '') + ']' + (this.getControlDependencySuffix(vertex.controlDependencies, ', ', '') ?? '') + ' }', + this.asRootArg(id) + ]) + } + + private getControlDependencySuffix(arg: NodeId[] | undefined, prefix: string = '{ ', suffix: string = ' }'): string | undefined { + if(arg !== undefined) { + return `${prefix}controlDependency: [${arg.map(id => wrap(id)).join(', ')}]${suffix}` + } + return undefined + } + + private getEnvironmentSuffix(env: REnvironmentInformation | undefined, prefix: string = '{ ', suffix: string = ' }'): string | undefined { + if(env === undefined) { + return undefined + } + const printed = new EnvironmentBuilderPrinter(env).print() + return printed === '' ? undefined : `${prefix}environment: ${printed}${suffix}` + } + + private processEdges(id: NodeId): void { + const outgoing = this.groupEdgeTypesFrom(id) + if(!outgoing) { + return + } + for(const [type, edges] of outgoing.entries()) { + const remainingEdges = edges.filter(target => !this.coveredEdges.has(edgeId(id, target, type))) + this.processEdge(id, type, remainingEdges) + } + } + + private processEdge(from: NodeId, type: EdgeType, to: NodeId[]): void { + if(to.length === 0) { + return + } + for(const target of to) { + this.coveredEdges.add(edgeId(from, target, type)) + } + + const mappedName = EdgeTypeFnMap[type] + if(mappedName === undefined) { + // we ignore this edge type as it is a special case + if(type !== EdgeType.DefinedByOnCall) { + console.log('TODO: edge type', type) + } + return + } + this.recordFnCall(from, mappedName, [wrap(from), this.optionalArrayWrap(to)]) + } + + private optionalArrayWrap(to: NodeId[]) { + return to.length === 1 ? wrap(to[0]) : `[${to.map(wrap).join(', ')}]` + } + + private recordFnCall(id: NodeId, name: string, args: (string | undefined)[]): void { + this.lines.push([id, ` .${name}(${args.filter(isNotUndefined).join(', ')})`]) + } + + public print(): string { + this.process() + return 'emptyGraph()\n' + this.lines.map(([, line]) => line).join('\n') + } +} + +function edgeId(from: NodeId, to: NodeId, type: EdgeType): string { + if(type === EdgeType.SameReadRead || type === EdgeType.SameDefDef) { + // we don't care about the direction + [from, to] = from > to ? [to, from] : [from, to] + } + return `${from}->${to}[${type}]` +} diff --git a/test/functionality/_helper/dataflow/dataflowgraph-builder.ts b/test/functionality/_helper/dataflow/dataflowgraph-builder.ts new file mode 100644 index 0000000000..535d8b1302 --- /dev/null +++ b/test/functionality/_helper/dataflow/dataflowgraph-builder.ts @@ -0,0 +1,296 @@ +import type { NodeId } from '../../../../src' +import { EmptyArgument } from '../../../../src' +import type { + DataflowFunctionFlowInformation, + DataflowGraphVertexUse, + FunctionArgument, + REnvironmentInformation +} from '../../../../src/dataflow' +import { + BuiltIn, + CONSTANT_NAME, + DataflowGraph, + EdgeType, + isPositionalArgument, + VertexType +} from '../../../../src/dataflow' +import { deepMergeObject } from '../../../../src/util/objects' +import { normalizeIdToNumberIfPossible } from '../../../../src/r-bridge/lang-4.x/ast/model/processing/node-id' + +export function emptyGraph() { + return new DataflowGraphBuilder() +} + +export type DataflowGraphEdgeTarget = NodeId | (readonly NodeId[]); + +/** + * This DataflowGraphBuilder extends {@link DataflowGraph} with builder methods to + * easily and compactly add vertices and edges to a dataflow graph. Its usage thus + * simplifies writing tests for dataflow graphs. + */ +export class DataflowGraphBuilder extends DataflowGraph { + /** + * Adds a **vertex** for a **function definition** (V1). + * + * @param id - AST node ID + * @param name - AST node text + * @param subflow - Subflow data graph for the defined function. + * @param exitPoints - Node IDs for exit point vertices. + * @param info - Additional/optional properties. + * @param asRoot - should the vertex be part of the root vertex set of the graph + * (i.e., be a valid entry point), or is it nested (e.g., as part of a function definition) + */ + public defineFunction(id: NodeId, name: string, + exitPoints: readonly NodeId[], subflow: DataflowFunctionFlowInformation, + info?: { environment?: REnvironmentInformation, controlDependency?: NodeId[] }, + asRoot: boolean = true) { + return this.addVertex({ + tag: VertexType.FunctionDefinition, + id: normalizeIdToNumberIfPossible(id), + name, + subflow: { + ...subflow, + entryPoint: normalizeIdToNumberIfPossible(subflow.entryPoint), + graph: new Set([...subflow.graph].map(normalizeIdToNumberIfPossible)), + out: subflow.out.map(o => ({ ...o, nodeId: normalizeIdToNumberIfPossible(o.nodeId), controlDependencies: o.controlDependencies?.map(normalizeIdToNumberIfPossible) })), + in: subflow.in.map(o => ({ ...o, nodeId: normalizeIdToNumberIfPossible(o.nodeId), controlDependencies: o.controlDependencies?.map(normalizeIdToNumberIfPossible) })), + unknownReferences: subflow.unknownReferences.map(o => ({ ...o, nodeId: normalizeIdToNumberIfPossible(o.nodeId), controlDependencies: o.controlDependencies?.map(normalizeIdToNumberIfPossible) })) + } as DataflowFunctionFlowInformation, + exitPoints: exitPoints.map(normalizeIdToNumberIfPossible), + controlDependencies: info?.controlDependency?.map(normalizeIdToNumberIfPossible), + environment: info?.environment + }, asRoot) + } + + /** + * Adds a **vertex** for a **function call** (V2). + * + * @param id - AST node ID + * @param name - Function name + * @param args - Function arguments; may be empty + * @param info - Additional/optional properties. + * @param asRoot - should the vertex be part of the root vertex set of the graph + * (i.e., be a valid entry point), or is it nested (e.g., as part of a function definition) + */ + public call(id: NodeId, name: string, args: FunctionArgument[], + info?: { + returns?: readonly NodeId[], + reads?: readonly NodeId[], + onlyBuiltIn?: boolean, + environment?: REnvironmentInformation, + controlDependency?: NodeId[] + }, + asRoot: boolean = true) { + const onlyBuiltInAuto = info?.reads?.length === 1 && info?.reads[0] === BuiltIn + this.addVertex({ + tag: VertexType.FunctionCall, + id: normalizeIdToNumberIfPossible(id), + name, + args: args.map(a => a === EmptyArgument ? EmptyArgument : { ...a, nodeId: normalizeIdToNumberIfPossible(a.nodeId), controlDependency: undefined }), + environment: info?.environment, + controlDependencies: info?.controlDependency?.map(normalizeIdToNumberIfPossible), + onlyBuiltin: info?.onlyBuiltIn ?? onlyBuiltInAuto ?? false + }, asRoot) + this.addArgumentLinks(id, args) + if(info?.returns) { + for(const ret of info.returns) { + this.returns(id, ret) + } + } + if(info?.reads) { + for(const call of info.reads) { + this.reads(id, call) + } + } + return this + } + + /** automatically adds argument links if they do not already exist */ + private addArgumentLinks(id: NodeId, args: readonly FunctionArgument[]) { + for(const arg of args) { + if(arg === EmptyArgument) { + continue + } + if(isPositionalArgument(arg)) { + this.argument(id, arg.nodeId) + if(typeof arg.nodeId === 'string' && arg.nodeId.endsWith('-arg')) { + const withoutSuffix = arg.nodeId.slice(0, -4) + this.reads(arg.nodeId, withoutSuffix) + } + } else if(!this.hasVertex(arg.nodeId, true)) { + this.use(arg.nodeId, arg.name, { controlDependencies: arg.controlDependencies }) + this.argument(id, arg.nodeId) + } + } + } + + /** + * Adds a **vertex** for a **variable definition** (V4). + * + * @param id - AST node ID + * @param name - Variable name + * @param info - Additional/optional properties. + * @param asRoot - Should the vertex be part of the root vertex set of the graph + * (i.e., be a valid entry point), or is it nested (e.g., as part of a function definition) + */ + public defineVariable(id: NodeId, name: string, + info?: { controlDependency?: NodeId[], definedBy?: NodeId[]}, asRoot: boolean = true) { + this.addVertex({ + tag: VertexType.VariableDefinition, + id: normalizeIdToNumberIfPossible(id), + name, + controlDependencies: info?.controlDependency?.map(normalizeIdToNumberIfPossible), + }, asRoot) + if(info?.definedBy) { + for(const def of info.definedBy) { + this.definedBy(id, def) + } + } + return this + } + + /** + * Adds a **vertex** for **variable use** (V5). Intended for creating dataflow graphs as part of function tests. + * + * @param id - AST node id + * @param name - Variable name + * @param info - Additional/optional properties; i.e., scope, when, or environment. + * @param asRoot - should the vertex be part of the root vertex set of the graph + * (i.e., be a valid entry point) or is it nested (e.g., as part of a function definition) + */ + public use(id: NodeId, name: string, info?: Partial, asRoot: boolean = true) { + return this.addVertex(deepMergeObject({ + tag: VertexType.Use, + id: normalizeIdToNumberIfPossible(id), + name, + controlDependencies: undefined, + environment: undefined + }, { + ...info, + controlDependencies: info?.controlDependencies?.map(normalizeIdToNumberIfPossible) + } as Partial), asRoot) + } + + + /** + * Adds a **vertex** for a **constant value** (V6). + * + * @param id - AST node ID + * @param options - Additional/optional properties; + * @param asRoot - should the vertex be part of the root vertex set of the graph + * (i.e., be a valid entry point), or is it nested (e.g., as part of a function definition) + */ + public constant(id: NodeId, options?: { controlDependency?: NodeId[] }, asRoot: boolean = true) { + return this.addVertex({ + tag: VertexType.Value, + name: CONSTANT_NAME, + id: normalizeIdToNumberIfPossible(id), + controlDependencies: options?.controlDependency?.map(normalizeIdToNumberIfPossible), + environment: undefined + }, asRoot) + } + + private edgeHelper(from: NodeId, to: DataflowGraphEdgeTarget, type: EdgeType) { + if(Array.isArray(to)) { + for(const t of to) { + this.edgeHelper(from, t as NodeId, type) + } + return this + } + return this.addEdge(normalizeIdToNumberIfPossible(from), normalizeIdToNumberIfPossible(to as NodeId), { type }) + } + + /** + * Adds a **read edge** (E1). + * + * @param from - Vertex/NodeId + * @param to - see from + */ + public reads(from: NodeId, to: DataflowGraphEdgeTarget) { + return this.edgeHelper(from, to, EdgeType.Reads) + } + + /** + * Adds a **defined-by edge** (E2), with from as defined variable, and to + * as a variable/function contributing to its definition. + * + * @see reads for parameters. + */ + public definedBy(from: NodeId, to: DataflowGraphEdgeTarget) { + return this.edgeHelper(from, to, EdgeType.DefinedBy) + } + + /** + * Adds a **same-read-read edge** (E3), with from and to as two variable uses + * on the same variable. + * + * @see reads for parameters. + */ + public sameRead(from: NodeId, to: DataflowGraphEdgeTarget) { + return this.edgeHelper(from, to, EdgeType.SameReadRead) + } + + /** + * Adds a **same-def-def edge** (E4), with from and to as two variables + * that share a defining variable. + * + * @see reads for parameters. + */ + public sameDef(from: NodeId, to: DataflowGraphEdgeTarget) { + return this.edgeHelper(from, to, EdgeType.SameDefDef) + } + + /** + * Adds a **call edge** (E5) with from as caller, and to as callee. + * + * @see reads for parameters. + */ + public calls(from: NodeId, to: DataflowGraphEdgeTarget) { + return this.edgeHelper(from, to, EdgeType.Calls) + } + + /** + * Adds a **return edge** (E6) with from as function, and to as exit point. + * + * @see reads for parameters. + */ + public returns(from: NodeId, to: DataflowGraphEdgeTarget) { + return this.edgeHelper(from, to, EdgeType.Returns) + } + + /** + * Adds a **defines-on-call edge** (E7) with from as variable, and to as its definition + * + * @see reads for parameters. + */ + public definesOnCall(from: NodeId, to: DataflowGraphEdgeTarget) { + return this.edgeHelper(from, to, EdgeType.DefinesOnCall) + } + + /** + * Adds an **argument edge** (E9) with from as function call, and to as argument. + * + * @see reads for parameters. + */ + public argument(from: NodeId, to: DataflowGraphEdgeTarget) { + return this.edgeHelper(from, to, EdgeType.Argument) + } + + /** + * Adds a **non-standard evaluation edge** with from as vertex, and to as vertex. + * + * @see reads for parameters. + */ + public nse(from: NodeId, to: DataflowGraphEdgeTarget) { + return this.edgeHelper(from, to, EdgeType.NonStandardEvaluation) + } + + /** + * Adds a **side-effect-on-call edge** with from as vertex, and to as vertex. + * + * @see reads for parameters. + */ + public sideEffectOnCall(from: NodeId, to: DataflowGraphEdgeTarget) { + return this.edgeHelper(from, to, EdgeType.SideEffectOnCall) + } +} diff --git a/test/functionality/_helper/dataflow/environment-builder-printer.ts b/test/functionality/_helper/dataflow/environment-builder-printer.ts new file mode 100644 index 0000000000..625b31af12 --- /dev/null +++ b/test/functionality/_helper/dataflow/environment-builder-printer.ts @@ -0,0 +1,97 @@ +import type { IdentifierDefinition, IEnvironment, REnvironmentInformation } from '../../../../src/dataflow' +import { BuiltInEnvironment } from '../../../../src/dataflow' +import { assertUnreachable, isNotUndefined } from '../../../../src/util/assert' +import { wrap } from './printer' + +export class EnvironmentBuilderPrinter { + private env: REnvironmentInformation + private lines: string[] = [] + + constructor(env: REnvironmentInformation) { + this.env = env + } + + private process() { + let current = this.env.current + let i = this.env.level + while(current !== undefined && current.id !== BuiltInEnvironment.id) { + if(i-- > 0) { + this.push() + } + this.processEnvironment(current) + current = current.parent + } + } + + private processEnvironment(env: IEnvironment) { + for(const [name, defs] of env.memory.entries()) { + for(const def of defs) { + this.processDefinition(name, def) + } + } + } + + private processDefinition(name: string, def: IdentifierDefinition) { + const kind = def.kind + switch(kind) { + case 'variable': + this.recordFnCall('defineVariable', [ + wrap(name), + wrap(def.nodeId), + wrap(def.definedAt), + this.getControlDependencyArgument(def) + ]) + break + case 'function': + this.recordFnCall('defineFunction', [ + wrap(name), + wrap(def.nodeId), + wrap(def.definedAt), + this.getControlDependencyArgument(def) + ]) + break + case 'built-in-value': + case 'built-in-function': + /* shouldn't happen, only we can define built-in stuff */ + break + case 'argument': + this.recordFnCall('defineArgument', [ + wrap(name), + wrap(def.nodeId), + wrap(def.definedAt), + this.getControlDependencyArgument(def) + ]) + break + case 'parameter': + this.recordFnCall('defineParameter', [ + wrap(name), + wrap(def.nodeId), + wrap(def.definedAt), + this.getControlDependencyArgument(def) + ]) + break + default: + assertUnreachable(kind) + } + } + + private getControlDependencyArgument(def: IdentifierDefinition) { + return def.controlDependencies ? `[${def.controlDependencies.map(wrap).join(', ')}]` : undefined + } + + private push() { + this.recordFnCall('pushEnv', []) + } + + private recordFnCall(name: string, args: (string | undefined)[]): void { + this.lines.push(`.${name}(${args.filter(isNotUndefined).join(', ')})`) + } + + public print(): string { + if(this.env.level === 0 && this.env.current.memory.size === 0) { + return '' + } + this.process() + return 'defaultEnv()' + this.lines.join('') + } +} diff --git a/test/functionality/_helper/dataflow/environment-builder.ts b/test/functionality/_helper/dataflow/environment-builder.ts new file mode 100644 index 0000000000..28ffff937e --- /dev/null +++ b/test/functionality/_helper/dataflow/environment-builder.ts @@ -0,0 +1,159 @@ +import type { NodeId } from '../../../../src' +import type { FunctionArgument, IdentifierDefinition, REnvironmentInformation, Environment } from '../../../../src/dataflow' +import { + initializeCleanEnvironments +} from '../../../../src/dataflow' +import { + appendEnvironment, + define, + popLocalEnvironment, + pushLocalEnvironment +} from '../../../../src/dataflow/environments' +import { normalizeIdToNumberIfPossible } from '../../../../src/r-bridge/lang-4.x/ast/model/processing/node-id' + +export function variable(name: string, definedAt: NodeId): IdentifierDefinition { + return { name, kind: 'variable', nodeId: '_0', definedAt, controlDependencies: undefined } +} + +/** + * Provides a FunctionArgument to use with function call vertices. + * @param nodeId - AST Node ID + * @param options - optional allows to give further options + */ +export function argumentInCall(nodeId: NodeId, options?: { name?: string, controlDependency?: NodeId[] }): FunctionArgument { + return { nodeId: normalizeIdToNumberIfPossible(nodeId), name: options?.name, controlDependencies: options?.controlDependency?.map(normalizeIdToNumberIfPossible) } +} +/** + * The constant global environment with all pre-defined functions. + */ +export const defaultEnv = () => { + const global = initializeCleanEnvironments() + return new EnvironmentBuilder(global.current, 0) +} + +/** + * EnvironmentBuilder extends REnvironmentInformation with builder pattern methods. + */ +export class EnvironmentBuilder implements REnvironmentInformation { + /** + * Use global environment. + */ + current: Environment + /** + * Level is 0. + */ + level: number + + constructor(env: Environment, level: number) { + this.current = env + this.level = level + } + + /** + * Defines a new argument in the top environment. + * @param name - Argument name + * @param nodeId - AST Node ID of usage + * @param definedAt - AST Node ID of definition + * @param controlDependencies - Control dependencies + */ + defineArgument(name: string, nodeId: NodeId, definedAt: NodeId, controlDependencies: NodeId[] | undefined = undefined) { + return this.defineInEnv({ + kind: 'argument', + name, + definedAt, + nodeId, + controlDependencies }) + } + + /** + * Defines a new function in the top environment. + * @param name - Function name + * @param nodeId - AST Node ID of usage + * @param definedAt - AST Node ID of definition + * @param controlDependencies - Control dependencies + */ + defineFunction(name: string, nodeId: NodeId, definedAt: NodeId, controlDependencies: NodeId[] | undefined = undefined) { + return this.defineInEnv({ + kind: 'function', + name, + definedAt, + nodeId, + controlDependencies + }) + } + + /** + * Defines a new parameter in the top environment. + * @param name - Parameter name + * @param nodeId - AST Node ID of usage + * @param definedAt - AST Node ID of definition + * @param controlDependencies - Control dependencies + * */ + defineParameter(name: string, nodeId: NodeId, definedAt: NodeId, controlDependencies: NodeId[] | undefined = undefined) { + return this.defineInEnv({ + kind: 'parameter', + name, + definedAt, + nodeId, + controlDependencies + }) + } + + /** + * Defines a new parameter in the top environment. + * @param name - Variable name + * @param nodeId - AST Node ID of usage + * @param definedAt - AST Node ID of definition + * @param controlDependencies - Control dependencies + */ + defineVariable(name: string, nodeId: NodeId, definedAt: NodeId = nodeId, controlDependencies: NodeId[] | undefined = undefined) { + return this.defineInEnv({ + kind: 'variable', + name, + definedAt, + nodeId, + controlDependencies + }) + } + + /** + * Adds definitions to the current environment. + * @param def - Definition to add. + * @param superAssignment - If true, the definition is treated as if defined by a super assignment. + */ + defineInEnv(def: IdentifierDefinition, superAssignment = false) { + const envWithDefinition = define({ + ...def, + definedAt: normalizeIdToNumberIfPossible(def.definedAt), + nodeId: normalizeIdToNumberIfPossible(def.nodeId), + controlDependencies: def.controlDependencies?.map(normalizeIdToNumberIfPossible) + } as IdentifierDefinition, superAssignment, this) + return new EnvironmentBuilder(envWithDefinition.current, envWithDefinition.level) + } + + /** + * Adds a new, local environment on the environment stack and returns it. + */ + pushEnv(): EnvironmentBuilder { + const newEnvironment = pushLocalEnvironment(this) + return new EnvironmentBuilder(newEnvironment.current, newEnvironment.level) + } + + /** + * Pops the last environment (must be local) from the environment stack. + */ + popEnv(): EnvironmentBuilder { + const underlyingEnv = popLocalEnvironment(this) + return new EnvironmentBuilder(underlyingEnv.current, underlyingEnv.level) + } + + /** + * Appends the writes in other to the given environment + * (i.e. those _may_ happen). + * @param other - The next environment. + */ + appendWritesOf(other: REnvironmentInformation) { + const appendedEnv = appendEnvironment(this, other) + return new EnvironmentBuilder(appendedEnv.current, appendedEnv.level) + } +} diff --git a/test/functionality/_helper/dataflow/printer.ts b/test/functionality/_helper/dataflow/printer.ts new file mode 100644 index 0000000000..1a3529307f --- /dev/null +++ b/test/functionality/_helper/dataflow/printer.ts @@ -0,0 +1,32 @@ +import type { NodeId } from '../../../../src' +import { EmptyArgument } from '../../../../src' +import type { IdentifierReference } from '../../../../src/dataflow' +import { BuiltIn } from '../../../../src/dataflow' +import { + UnnamedFunctionCallPrefix +} from '../../../../src/dataflow/internal/process/functions/call/unnamed-call-handling' + +export function wrap(id: string | NodeId | undefined): string { + if(id === undefined) { + return 'undefined' + } else if(id === EmptyArgument) { + return 'EmptyArgument' + } else if(id === BuiltIn) { + return 'BuiltIn' + } else if(typeof id === 'string' && id.startsWith(UnnamedFunctionCallPrefix)) { + return `\`\${UnnamedFunctionCallPrefix}${id.slice(UnnamedFunctionCallPrefix.length)}\`` + } else { + return `'${id}'` + } +} + +function wrapControlDependency(controlDependency: NodeId[] | undefined): string { + if(controlDependency === undefined) { + return 'undefined' + } else { + return `[${controlDependency.map(wrap).join(', ')}]` + } +} +export function wrapReference(ref: IdentifierReference): string { + return `{ nodeId: ${wrap(ref.nodeId)}, name: ${wrap(ref.name)}, controlDependencies: ${wrapControlDependency(ref.controlDependencies)} }` +} diff --git a/test/functionality/_helper/dataflowgraph-builder.ts b/test/functionality/_helper/dataflowgraph-builder.ts deleted file mode 100644 index 9285a89d8e..0000000000 --- a/test/functionality/_helper/dataflowgraph-builder.ts +++ /dev/null @@ -1,181 +0,0 @@ -import type { DataflowFunctionFlowInformation, DataflowGraphEdgeAttribute, DataflowGraphExitPoint, DataflowGraphVertexFunctionCall, DataflowGraphVertexFunctionDefinition, DataflowGraphVertexUse, DataflowGraphVertexVariableDefinition, FunctionArgument, NodeId, REnvironmentInformation } from '../../../src' -import { DataflowGraph, EdgeType } from '../../../src' -import { LocalScope } from '../../../src/dataflow/environments/scopes' -import { deepMergeObject } from '../../../src/util/objects' - -export function emptyGraph() { - return new DataflowGraphBuilder() -} - -/** - * This DataflowGraphBuilder extends {@link DataflowGraph} with builder methods to - * easily and compactly add vertices and edges to a dataflow graph. Its usage thus - * simplifies writing tests for dataflows. - */ -export class DataflowGraphBuilder extends DataflowGraph { - /** - * Adds a vertex for a function definition (V1). - * - * @param id - AST node ID - * @param name - AST node text - * @param subflow - Subflow data graph for the defined function. - * @param exitPoints - Node IDs for exit point vertices. - * @param info - Additional/optional properties. - * @param asRoot - should the vertex be part of the root vertex set of the graph - * (i.e., be a valid entry point), or is it nested (e.g., as part of a function definition) - */ - public defineFunction(id: NodeId, name: string, - exitPoints: NodeId[], subflow: DataflowFunctionFlowInformation, - info?: Partial, - asRoot: boolean = true) { - const scope = (info && info.scope) ? info.scope : LocalScope - return this.addVertex(deepMergeObject({ tag: 'function-definition', id, name, subflow, exitPoints, scope }, info), asRoot) - } - - /** - * Adds a vertex for a function call (V2). - * - * @param id - AST node ID - * @param name - Function name - * @param args - Function arguments; may be empty - * @param info - Additional/optional properties. - * @param asRoot - should the vertex be part of the root vertex set of the graph - * (i.e., be a valid entry point), or is it nested (e.g., as part of a function definition) - */ - public call(id: NodeId, name: string, args: FunctionArgument[], - info?: Partial, - asRoot: boolean = true) { - return this.addVertex(deepMergeObject({ tag: 'function-call', id, name, args }, info), asRoot) - } - - /** - * Adds a vertex for an exit point of a function (V3). - * - * @param id - AST node ID - * @param name - AST node text - * @param env - Environment of the function we exit. - * @param info - Additional/optional properties. - * @param asRoot - should the vertex be part of the root vertex set of the graph - * (i.e., be a valid entry point), or is it nested (e.g., as part of a function definition) - */ - public exit(id: NodeId, name: string, environment?: REnvironmentInformation, - info?: Partial, - asRoot: boolean = true) { - return this.addVertex(deepMergeObject({ tag: 'exit-point', id, environment, name }, info), asRoot) - } - - /** - * Adds a vertex for a variable definition (V4). - * - * @param id - AST node ID - * @param name - Variable name - * @param scope - Scope (global/local/custom), defaults to local. - * @param info - Additional/optional properties. - * @param asRoot - should the vertex be part of the root vertex set of the graph - * (i.e., be a valid entry point), or is it nested (e.g., as part of a function definition) - */ - public defineVariable(id: NodeId, name: string, scope: string = LocalScope, - info?: Partial, asRoot: boolean = true) { - return this.addVertex(deepMergeObject({ tag: 'variable-definition', id, name, scope }, info), asRoot) - } - - /** - * Adds a vertex for variable use (V5). Intended for creating dataflow graphs as part of function tests. - * - * @param id - AST node id - * @param name - Variable name - * @param info - Additional/optional properties; - * i.e. scope, when, or environment. - * @param asRoot - should the vertex be part of the root vertex set of the graph - * (i.e., be a valid entry point) or is it nested (e.g., as part of a function definition) - */ - public use(id: NodeId, name: string, info?: Partial, asRoot: boolean = true) { - return this.addVertex(deepMergeObject({ tag: 'use', id, name }, info), asRoot) - } - - /** - * Adds a read edge (E1) for simple testing. - * - * @param from - Vertex/NodeId - * @param to - see from - * @param when - always (default), or maybe - */ - public reads(from: NodeId, to: NodeId, when: DataflowGraphEdgeAttribute = 'always') { - return this.addEdge(from, to, EdgeType.Reads, when) - } - - /** - * Adds a defined-by edge (E2), with from as defined variable, and to as - * as a variable/function contributing to its definition. - * - * @see reads for parameters. - */ - public definedBy(from: NodeId, to: NodeId, when: DataflowGraphEdgeAttribute = 'always') { - return this.addEdge(from, to, EdgeType.DefinedBy, when) - } - - /** - * Adds a same-read-read edge (E3), with from and to as two variable uses - * on the same variable. - * - * @see reads for parameters. - */ - public sameRead(from: NodeId, to: NodeId, when: DataflowGraphEdgeAttribute = 'always') { - return this.addEdge(from, to, EdgeType.SameReadRead, when) - } - - /** - * Adds a same-def-def edge (E4), with from and to as two variables - * that share a defining variable. - * - * @see reads for parameters. - */ - public sameDef(from: NodeId, to: NodeId, when: DataflowGraphEdgeAttribute = 'always') { - return this.addEdge(from, to, EdgeType.SameDefDef, when) - } - - /** - * Adds a call edge (E5) with from as caller, and to as callee. - * - * @see reads for parameters. - */ - public calls(from: NodeId, to: NodeId, when: DataflowGraphEdgeAttribute = 'always') { - return this.addEdge(from, to, EdgeType.Calls, when) - } - - /** - * Adds a return edge (E6) with from as function, and to as exit point. - * - * @see reads for parameters. - */ - public returns(from: NodeId, to: NodeId, when: DataflowGraphEdgeAttribute = 'always') { - return this.addEdge(from, to, EdgeType.Returns, when) - } - - /** - * Adds a defines-on-call edge (E7) with from as variable, and to as its definition - * - * @see reads for parameters. - */ - public definesOnCall(from: NodeId, to: NodeId, when: DataflowGraphEdgeAttribute = 'always') { - return this.addEdge(from, to, EdgeType.DefinesOnCall, when) - } - - /** - * Adds an argument edge (E9) with from as function call, and to as argument. - * - * @see reads for parameters. - */ - public argument(from: NodeId, to: NodeId, when: DataflowGraphEdgeAttribute = 'always') { - return this.addEdge(from, to, EdgeType.Argument, when) - } - - /** - * Adds an relation (E10) with from as exit point, and to as any other vertex. - * - * @see reads for parameters. - */ - public relates(from: NodeId, to: NodeId, when: DataflowGraphEdgeAttribute = 'always') { - return this.addEdge(from, to, EdgeType.Relates, when) - } -} \ No newline at end of file diff --git a/test/functionality/_helper/environment-builder.ts b/test/functionality/_helper/environment-builder.ts deleted file mode 100644 index 663779632b..0000000000 --- a/test/functionality/_helper/environment-builder.ts +++ /dev/null @@ -1,141 +0,0 @@ -import type { NodeId } from '../../../src/r-bridge' -import type { DataflowGraphEdgeAttribute as WhenUsed, FunctionArgument, IdentifierDefinition, REnvironmentInformation, Identifier } from '../../../src/dataflow' -import { appendEnvironments, DefaultEnvironmentMemory, define, Environment, popLocalEnvironment, pushLocalEnvironment, type DataflowScopeName as RScope } from '../../../src/dataflow/environments' -import { GlobalScope, LocalScope } from '../../../src/dataflow/environments/scopes' -import { UnnamedArgumentPrefix } from '../../../src/dataflow/internal/process/functions/argument' - -export function variable(name: string, nodeId: NodeId, definedAt: NodeId = nodeId, scope: RScope = LocalScope, used: WhenUsed = 'always'): IdentifierDefinition { - return { name, kind: 'variable', nodeId, definedAt, scope, used } -} - -/** - * Provides a FunctionArgument to use with function call vertices. - * @param nodeId - AST Node ID - * @param name - optional; can be removed for unnamed arguments - * @param scope - optional; default is LocalScope - * @param used - optional; default is always - */ -export function argumentInCall(nodeId: NodeId, name?: string, scope: RScope = LocalScope, used: WhenUsed = 'always'): FunctionArgument { - if(name === undefined) { - return { nodeId, name: unnamedArgument(nodeId), scope, used } - } else { - return [name, { nodeId, name, scope, used }] - } -} - -export function unnamedArgument(id: NodeId) { - return `${UnnamedArgumentPrefix}${id}` -} - -/** - * The constant global environment with all pre-defined functions. - */ -export const defaultEnvironment = () => { - const builtIns = new Map(DefaultEnvironmentMemory) - const globalEnv = new Environment(GlobalScope) - globalEnv.memory = builtIns - return new EnvironmentBuilder(globalEnv, 0) -} - -/** - * EnvironmentBuilder extends REnvironmentInformation with builder pattern methods. - */ -export class EnvironmentBuilder implements REnvironmentInformation { - /** - * Use global environment. - */ - current: Environment - /** - * Level is 0. - */ - level: number - - constructor(env: Environment, level: number) { - this.current = env - this.level = level - } - - /** - * Defines a new argument in the top environment. - * @param name - Argument name - * @param nodeId - AST Node ID of usage - * @param definedAt - AST Node Id of definition - * @param scope - local (default) or optional - * @param used - always (default) or optional - */ - defineArgument(name: string, nodeId: NodeId, definedAt: NodeId, scope: RScope = LocalScope, used: WhenUsed = 'always') { - return this.defineEnv({ name, kind: 'argument', definedAt, nodeId, scope, used }) - } - - /** - * Defines a new function in the top environment. - * @param name - Function name - * @param nodeId - AST Node ID of usage - * @param definedAt - AST Node Id of definition - * @param scope - local (default) or optional - * @param used - always (default) or optional - */ - defineFunction(name: string, nodeId: NodeId, definedAt: NodeId, scope: RScope = LocalScope, used: WhenUsed = 'always') { - return this.defineEnv({ name, kind: 'function', definedAt, nodeId, scope, used }) - } - - /** - * Defines a new parameter in the top environment. - * @param name - Parameter name - * @param nodeId - AST Node ID of usage - * @param definedAt - AST Node Id of definition - * @param scope - local (default) or optional - * @param used - always (default) or optional - */ - defineParameter(name: string, nodeId: NodeId, definedAt: NodeId, scope: RScope = LocalScope, used: WhenUsed = 'always') { - return this.defineEnv({ name, kind: 'parameter', definedAt, nodeId, scope, used }) - } - - /** - * Defines a new parameter in the top environment. - * @param name - Variable name - * @param nodeId - AST Node Id of usage - * @param definedAt - AST Node ID of definition - * @param scope - local (default) or optional - * @param used - always (default) or optional - */ - defineVariable(name: string, nodeId: NodeId, definedAt: NodeId = nodeId, scope: RScope = LocalScope, used: WhenUsed = 'always') { - return this.defineEnv({ name, kind: 'variable', definedAt, nodeId, scope, used }) - } - - /** - * Adds definitions to the current environment. - * @param def - Definition to add. - */ - defineEnv(def: IdentifierDefinition) { - const envWithDefinition = define(def, def.scope, this) - return new EnvironmentBuilder(envWithDefinition.current, envWithDefinition.level) - } - - /** - * Adds a new, local environment on the environment stack and returns it. - * @param definitions - Definitions to add to the local environment. - */ - pushEnv(): EnvironmentBuilder { - const newEnvironment = pushLocalEnvironment(this) - return new EnvironmentBuilder(newEnvironment.current, newEnvironment.level) - } - - /** - * Pops the last environment (must be local) from the environment stack. - */ - popEnv(): EnvironmentBuilder { - const underlyingEnv = popLocalEnvironment(this) - return new EnvironmentBuilder(underlyingEnv.current, underlyingEnv.level) - } - - /** - * Appends the writes in other to the given environment - * (i.e. those _may_ happen). - * @param other - The next environment. - */ - appendWritesOf(other: REnvironmentInformation) { - const appendedEnv = appendEnvironments(this, other) - return new EnvironmentBuilder(appendedEnv.current, appendedEnv.level) - } -} \ No newline at end of file diff --git a/test/functionality/_helper/label.ts b/test/functionality/_helper/label.ts new file mode 100644 index 0000000000..a01a9178eb --- /dev/null +++ b/test/functionality/_helper/label.ts @@ -0,0 +1,155 @@ +/** + * Labels can be used whenever a test name is expected, to wrap around the original + * string and link it to functionality it refers to. As this is currently work in + * progress, no automated linkage or validation is performed. + * @module + */ + + +import { DefaultMap } from '../../../src/util/defaultmap' +import type { FlowrCapabilityWithPath, SupportedFlowrCapabilityId } from '../../../src/r-bridge/data' +import { getAllCapabilities } from '../../../src/r-bridge/data' +import type { MergeableRecord } from '../../../src/util/objects' + +// map flowr ids to the capabilities +const TheGlobalLabelMap: DefaultMap = new DefaultMap(() => []) + +const uniqueTestId = (() => { + let id = 0 + return () => id++ +})() + + +export type TestLabelContext = 'parse' | 'desugar' | 'dataflow' | 'other' | 'slice' +export interface TestLabel extends MergeableRecord { + readonly id: number + readonly name: string + /** even if ids appear multiple times we only want to count each one once */ + readonly capabilities: ReadonlySet + /** this is automatically set (hihi) by functions like `assertAst` to correctly derive what part of capability we check */ + readonly context: Set +} + + +/** + * Wraps a test name with a unique identifier and label it with the given ids. + * @param testname - the name of the test (`it`) to be labeled + * @param ids - the capability ids to attach to the test + * @param context - the context in which the test is run, if not given this returns the label information for a test-helper to attach it + */ +export function label(testname: string, ids: readonly SupportedFlowrCapabilityId[], context: readonly TestLabelContext[]): string +export function label(testname: string, ids: readonly SupportedFlowrCapabilityId[], context?: readonly TestLabelContext[]): TestLabel +export function label(testname: string, ids: readonly SupportedFlowrCapabilityId[], context?: readonly TestLabelContext[]): TestLabel | string { + const capabilities: Set = new Set(ids) + const label: TestLabel = { + id: uniqueTestId(), + name: testname.toLowerCase(), + capabilities, + context: context === undefined ? new Set() : new Set(context) + } + + for(const i of capabilities) { + TheGlobalLabelMap.get(i).push(label) + } + + if(context === undefined) { + return label + } else { + return getFullNameOfLabel(label) + } +} + +function getFullNameOfLabel(label: TestLabel): string { + return `#${label.id} ${label.name} [${[...label.capabilities].join(', ')}]` +} + + +/** + * Returns the full name of the testlabel and adds the respective contexts + */ +export function decorateLabelContext(label: TestLabel | string, context: readonly TestLabelContext[]): string { + if(typeof label === 'string') { + return label + } + + for(const c of context) { + label.context.add(c) + } + + return getFullNameOfLabel(label) +} + +function printIdRange(start: number, last: number): string { + if(start === last) { + return `#${start}` + } else { + return `#${start}-#${last}` + } +} + +function mergeConsecutiveIds(ids: readonly number[]): string { + if(ids.length === 0) { + return '' + } + + const sorted = [...ids].sort((a, b) => a - b) + const result: string[] = [] + let start: number = sorted[0] + let last: number = start + + for(const id of sorted.slice(1)) { + if(id === last + 1) { + last = id + } else { + result.push(printIdRange(start, last)) + start = id + last = id + } + } + result.push(printIdRange(start, last)) + return `\x1b[36m${result.join('\x1b[m, \x1b[36m')}\x1b[m` +} + +function printCapability(label: FlowrCapabilityWithPath, testNames: TestLabel[]) { + const supportClaim = label.supported ? ` (claim: ${label.supported} supported)` : '' + const paddedLabel = `${' '.repeat(label.path.length * 2 - 2)}[${label.path.join('/')}] ${label.name}${supportClaim}` + const tests = testNames.length > 1 ? 'tests:' : 'test: ' + // we only have to warn if we claim to support but do not offer + if(testNames.length === 0) { + if(label.supported !== 'not' && label.supported !== undefined) { + console.log(`\x1b[1;31m${paddedLabel} is not covered by any tests\x1b[0m`) + } else { + console.log(`${paddedLabel}`) + } + return + } + + // group by contexts + const contextMap = new DefaultMap(() => []) + for(const t of testNames) { + for(const c of t.context) { + contextMap.get(c).push(t) + } + } + let formattedTestNames = '' + for(const [context, tests] of contextMap.entries()) { + const formatted = mergeConsecutiveIds(tests.map(t => t.id)) + formattedTestNames += `\n${' '.repeat(label.path.length * 2 - 2)} - ${context} [${tests.length}]: ${formatted}` + } + + console.log(`\x1b[1m${paddedLabel}\x1b[0m is covered by ${testNames.length} ${tests}${formattedTestNames}`) +} + +function printLabelSummary(): void { + console.log('== Test Capability Coverage ' + '='.repeat(80)) + // only list those for which we have a support claim + const allCapabilities = [...getAllCapabilities()] + const entries = allCapabilities.map(c => [c, TheGlobalLabelMap.get(c.id)] as const) + + for(const [capability, testNames] of entries) { + printCapability(capability, testNames) + } +} + +after(printLabelSummary) +process.on('exit', printLabelSummary) diff --git a/test/functionality/_helper/log.ts b/test/functionality/_helper/log.ts new file mode 100644 index 0000000000..828eb9c6be --- /dev/null +++ b/test/functionality/_helper/log.ts @@ -0,0 +1,24 @@ +import { log, LogLevel } from '../../../src/util/log' + +/** + * Update the minimum level of all flowr loggers. + * @param minLevel - The new minimum level to show messages from (inclusive) + * @param log2File - Whether to log to a file as well + */ +export function setMinLevelOfAllLogs(minLevel: LogLevel, log2File = false) { + if(log2File) { + log.logToFile() + } + log.updateSettings(logger => { + logger.settings.minLevel = minLevel + }) +} + +/** + * Just a convenience function to enable all logs. + */ +export function enableLog(minLevel: LogLevel = LogLevel.Trace) { + // we use a test hook as well to be more flexible + before(() => setMinLevelOfAllLogs(minLevel, false)) + setMinLevelOfAllLogs(minLevel, false) +} diff --git a/test/functionality/_helper/provider.ts b/test/functionality/_helper/provider.ts index 2c6c6e7b06..b33d835f96 100644 --- a/test/functionality/_helper/provider.ts +++ b/test/functionality/_helper/provider.ts @@ -1,14 +1,11 @@ // all examples are based on the R language def (Draft of 2023-03-15, 10.3.1) +import type { NamespaceIdentifier, RNumberValue, RStringValue } from '../../../src' import { - type NamespaceIdentifier, - RNa, RNull, type RNumberValue, type RStringValue, - ArithmeticOperators, - Assignments, - ComparisonOperators, - LogicalOperators, + RNa, RNull, + Operators, OperatorArity, - OperatorDatabase, ModelFormulaOperators -} from '../../../src/r-bridge' + OperatorDatabase +} from '../../../src' // maps a string to the expected R number parse value export const RNumberPool: { val: RNumberValue, str: string }[] = [ @@ -91,61 +88,30 @@ export const RStringPool: { val: RStringValue, str: string }[] = [ { str: '"\\U{10AFFE}"', val: { str: '\\U{10AFFE}', quotes: '"' } } // unicode 4 ] -export const RSymbolPool: { val: string, str: string, namespace: NamespaceIdentifier | undefined, symbolStart: number }[] = [ +export const RSymbolPool: { val: string, str: string, namespace: NamespaceIdentifier | undefined, symbolStart: number, internal?: boolean }[] = [ { str: 'NA', val: RNa, namespace: undefined, symbolStart: 1 }, { str: 'NULL', val: RNull, namespace: undefined, symbolStart: 1 }, { str: 'x', val: 'x', namespace: undefined, symbolStart: 1 }, { str: 'x.y', val: 'x.y', namespace: undefined, symbolStart: 1 }, { str: 'x::y', val: 'y', namespace: 'x', symbolStart: 4 }, // ::: for non-exported? - { str: 'x:::y', val: 'y', namespace: 'x', symbolStart: 5 } + { str: 'x:::y', val: 'y', namespace: 'x', symbolStart: 5, internal: true } ] -const canBeABinaryOp = (op: string): boolean => { +function canBeABinaryOp(op: string) { const arity = OperatorDatabase[op].arity return arity === OperatorArity.Binary || arity === OperatorArity.Both } -const canBeAUnaryOp = (op: string): boolean => { +function canBeAUnaryOp(op: string): boolean { const arity = OperatorDatabase[op].arity return arity === OperatorArity.Unary || arity === OperatorArity.Both } -export const RArithmeticBinaryOpPool: { flavor: 'arithmetic', str: string }[] = - ArithmeticOperators.filter(canBeABinaryOp).map(op => ({ str: op, flavor: 'arithmetic' })) +export const BinaryOperatorPool: ReadonlySet = new Set(Operators.filter(canBeABinaryOp)) +export const BinaryNonAssignmentOperators: readonly string[] = [...BinaryOperatorPool].filter(op => OperatorDatabase[op].usedAs !== 'assignment') -export const RLogicalBinaryOpPool: { flavor: 'logical', str: string }[] = - LogicalOperators.filter(canBeABinaryOp).map(op => ({ str: op, flavor: 'logical' })) +export const UnaryOperatorPool: ReadonlySet = new Set(Operators.filter(canBeAUnaryOp)) -export const RComparisonBinaryOpPool: { flavor: 'comparison', str: string }[] = - ComparisonOperators.filter(canBeABinaryOp).map(op => ({ str: op, flavor: 'comparison' })) - -export const RModelFormulaBinaryOpPool: { flavor: 'model formula', str: string }[] = - ModelFormulaOperators.filter(canBeABinaryOp).map(op => ({ str: op, flavor: 'model formula' })) - -export const RAssignmentOpPool: { flavor: 'assignment', str: string }[] = - Assignments.filter(canBeABinaryOp).map(op => ({ str: op, flavor: 'assignment' })) - -export const RNonAssignmentBinaryOpPool: { label: 'arithmetic' | 'logical' | 'comparison' | 'model formula', pool: typeof RArithmeticBinaryOpPool | typeof RLogicalBinaryOpPool | typeof RComparisonBinaryOpPool | typeof RModelFormulaBinaryOpPool }[] = - [ - { label: 'arithmetic', pool: RArithmeticBinaryOpPool }, - { label: 'logical', pool: RLogicalBinaryOpPool }, - { label: 'comparison', pool: RComparisonBinaryOpPool }, - { label: 'model formula', pool: RModelFormulaBinaryOpPool } - ] - -export const RArithmeticUnaryOpPool: { flavor: 'arithmetic', str: string }[] = - ArithmeticOperators.filter(canBeAUnaryOp).map(op => ({ str: op, flavor: 'arithmetic' })) - -export const RLogicalUnaryOpPool: { flavor: 'logical', str: string }[] = - LogicalOperators.filter(canBeAUnaryOp).map(op => ({ str: op, flavor: 'logical' })) - -export const RModelFormulaUnaryOpPool: { flavor: 'model formula', str: string }[] = - ModelFormulaOperators.filter(canBeAUnaryOp).map(op => ({ str: op, flavor: 'model formula' })) - -export const RUnaryOpPool: { label: 'arithmetic' | 'logical' | 'model formula', pool: typeof RArithmeticUnaryOpPool | typeof RLogicalUnaryOpPool | typeof RModelFormulaUnaryOpPool }[] = [ - { label: 'arithmetic', pool: RArithmeticUnaryOpPool }, - { label: 'logical', pool: RLogicalUnaryOpPool }, - { label: 'model formula', pool: RModelFormulaUnaryOpPool } -] +export const AssignmentOperators: readonly string[] = Operators.filter(op => OperatorDatabase[op].usedAs === 'assignment') diff --git a/test/functionality/_helper/shell.ts b/test/functionality/_helper/shell.ts index 68e18cd631..82e5c87fba 100644 --- a/test/functionality/_helper/shell.ts +++ b/test/functionality/_helper/shell.ts @@ -1,10 +1,5 @@ import { it } from 'mocha' import { testRequiresNetworkConnection } from './network' -import type { DeepPartial } from 'ts-essentials' -import { - deterministicCountingIdGenerator, requestFromInput, - RShell -} from '../../../src/r-bridge' import type { DecoratedAstMap, fileProtocol, IdGenerator, @@ -13,20 +8,30 @@ import type { RExpressionList, RNode, RNodeWithParent, - XmlParserHooks - -} from '../../../src/r-bridge' - + SlicingCriteria } from '../../../src' +import { + deterministicCountingIdGenerator, requestFromInput, + RShell +} from '../../../src' import { assert } from 'chai' -import type { DataflowGraph } from '../../../src/dataflow' -import { diffGraphsToMermaidUrl, graphToMermaidUrl } from '../../../src/dataflow' -import type { SlicingCriteria } from '../../../src/slicing' import { testRequiresRVersion } from './version' import type { MergeableRecord } from '../../../src/util/objects' import { deepMergeObject } from '../../../src/util/objects' -import { executeSingleSubStep, LAST_STEP, SteppingSlicer } from '../../../src/core' +import { NAIVE_RECONSTRUCT } from '../../../src/core/steps/all/static-slicing/10-reconstruct' import { guard } from '../../../src/util/assert' -import type { DifferenceReport } from '../../../src/util/diff' +import { createPipeline } from '../../../src/core/steps/pipeline' +import { PipelineExecutor } from '../../../src/core/pipeline-executor' +import { PARSE_WITH_R_SHELL_STEP } from '../../../src/core/steps/all/core/00-parse' +import { NORMALIZE } from '../../../src/core/steps/all/core/10-normalize' +import { SteppingSlicer } from '../../../src/core/stepping-slicer' +import { LAST_STEP } from '../../../src/core/steps/steps' +import type { TestLabel } from './label' +import { decorateLabelContext } from './label' +import { STATIC_DATAFLOW } from '../../../src/core/steps/all/core/20-dataflow' +import { graphToMermaidUrl, diffGraphsToMermaidUrl } from '../../../src/dataflow' +import type { DataflowDifferenceReport, DataflowGraph , ProblematicDiffInfo } from '../../../src/dataflow' +import { printAsBuilder } from './dataflow/dataflow-builder-printer' +import { normalizedAstToMermaidUrl } from '../../../src/util/mermaid' export const testWithShell = (msg: string, fn: (shell: RShell, test: Mocha.Context) => void | Promise): Mocha.Test => { return it(msg, async function(): Promise { @@ -48,16 +53,14 @@ export const testWithShell = (msg: string, fn: (shell: RShell, test: Mocha.Conte export function withShell(fn: (shell: RShell) => void): () => void { return function() { const shell = new RShell() + after(() => shell.close()) fn(shell) - after(() => { - shell.close() - }) } } function removeInformation>(obj: T, includeTokens: boolean): T { return JSON.parse(JSON.stringify(obj, (key, value) => { - if(key === 'fullRange' || key === 'fullLexeme' || key === 'id' || key === 'parent' || key === 'index' || key === 'role') { + if(key === 'fullRange' || key === 'fullLexeme' || key === 'id' || key === 'parent' || key === 'index' || key === 'role' || key === 'depth') { return undefined } else if(key === 'additionalTokens' && (!includeTokens || (Array.isArray(value) && value.length === 0))) { return undefined @@ -81,13 +84,12 @@ function assertAstEqualIgnoreSourceInformation(ast: RNode, expected: } } -export const retrieveNormalizedAst = async(shell: RShell, input: `${typeof fileProtocol}${string}` | string, hooks?: DeepPartial): Promise => { +export const retrieveNormalizedAst = async(shell: RShell, input: `${typeof fileProtocol}${string}` | string): Promise => { const request = requestFromInput(input) return (await new SteppingSlicer({ stepOfInterest: 'normalize', shell, - request, - hooks + request }).allRemainingSteps()).normalize.ast } @@ -115,7 +117,7 @@ async function testRequiresPackages(shell: RShell, requiredPackages: string[], t } export async function ensureConfig(shell: RShell, test: Mocha.Context, userConfig?: Partial): Promise { - const config = deepMergeObject>(defaultTestConfiguration, userConfig) + const config = deepMergeObject(defaultTestConfiguration, userConfig) if(config.needsNetworkConnection) { await testRequiresNetworkConnection(test) } @@ -127,15 +129,37 @@ export async function ensureConfig(shell: RShell, test: Mocha.Context, userConfi } } -/** call within describeSession */ -export function assertAst(name: string, shell: RShell, input: string, expected: RExpressionList, userConfig?: Partial(steps: S[], wanted: T): { step: S, wanted: T }[] { + return steps.map(step => ({ step, wanted })) +} + +const normalizePipeline = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE) + +/** + * For a given input code this takes multiple ASTs depending on the respective normalizer step to run! + * + * @see sameForSteps + */ +export function assertAst(name: TestLabel | string, shell: RShell, input: string, expected: RExpressionList, userConfig?: Partial): Mocha.Test { +}>): Mocha.Suite | Mocha.Test { + const fullname = decorateLabelContext(name, ['desugar']) // the ternary operator is to support the legacy way I wrote these tests - by mirroring the input within the name - return it(name === input ? name : `${name} (input: ${input})`, async function() { + return it(`${fullname} (input: ${input})`, async function() { await ensureConfig(shell, this, userConfig) - const ast = await retrieveNormalizedAst(shell, input) - assertAstEqualIgnoreSourceInformation(ast, expected, !userConfig?.ignoreAdditionalTokens, () => `got: ${JSON.stringify(ast)}, vs. expected: ${JSON.stringify(expected)}`) + + const pipeline = new PipelineExecutor(normalizePipeline, { + shell, + request: requestFromInput(input) + }) + const result = await pipeline.allRemainingSteps() + const ast = result.normalize.ast + + assertAstEqualIgnoreSourceInformation(ast, expected, !userConfig?.ignoreAdditionalTokens, + () => `got: ${JSON.stringify(ast)}, vs. expected: ${JSON.stringify(expected)}`) }) } @@ -156,28 +180,44 @@ export function assertDecoratedAst(name: string, shell: RShell, input }) } -export function assertDataflow(name: string, shell: RShell, input: string, expected: DataflowGraph, userConfig?: Partial, startIndexForDeterministicIds = 0): void { - it(`${name} (input: ${JSON.stringify(input)})`, async function() { + +const legacyDataflow = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW) + +function mapProblematicNodesToIds(problematic: readonly ProblematicDiffInfo[] | undefined): Set | undefined { + return problematic === undefined ? undefined : new Set(problematic.map(p => p.tag === 'vertex' ? p.id : `${p.from}->${p.to}`)) +} + +export function assertDataflow( + name: string | TestLabel, + shell: RShell, + input: string, + expected: DataflowGraph, + userConfig?: Partial, + startIndexForDeterministicIds = 0 +): void { + const effectiveName = decorateLabelContext(name, ['dataflow']) + it(`${effectiveName} (input: ${JSON.stringify(input)})`, async function() { await ensureConfig(shell, this, userConfig) - const info = await new SteppingSlicer({ - stepOfInterest: 'dataflow', - request: requestFromInput(input), + const info = await new PipelineExecutor(legacyDataflow, { shell, - getId: deterministicCountingIdGenerator(startIndexForDeterministicIds), + request: requestFromInput(input), + getId: deterministicCountingIdGenerator(startIndexForDeterministicIds) }).allRemainingSteps() - const report: DifferenceReport = expected.equals(info.dataflow.graph, true, { left: 'expected', right: 'got' }) + const report: DataflowDifferenceReport = expected.equals(info.dataflow.graph, true, { left: 'expected', right: 'got' }) // with the try catch the diff graph is not calculated if everything is fine try { guard(report.isEqual(), () => `report:\n * ${report.comments()?.join('\n * ') ?? ''}`) } catch(e) { const diff = diffGraphsToMermaidUrl( - { label: 'expected', graph: expected }, - { label: 'got', graph: info.dataflow.graph }, + { label: 'expected', graph: expected, mark: mapProblematicNodesToIds(report.problematic()) }, + { label: 'got', graph: info.dataflow.graph, mark: mapProblematicNodesToIds(report.problematic()) }, info.normalize.idMap, - `%% ${input.replace(/\n/g, '\n%% ')}\n` + `%% ${input.replace(/\n/g, '\n%% ')}\n` + report.comments()?.map(n => `%% ${n}\n`).join('') ?? '' + '\n' ) + console.error('best-effort reconstruction:\n', printAsBuilder(info.dataflow.graph)) + console.error('diff:\n', diff) throw e } @@ -191,11 +231,11 @@ function printIdMapping(ids: NodeId[], map: DecoratedAstMap): string { } /** - * Please note, that theis executes the reconstruction step separately, as it predefines the result of the slice with the given ids. + * Please note, that this executes the reconstruction step separately, as it predefines the result of the slice with the given ids. */ -export function assertReconstructed(name: string, shell: RShell, input: string, ids: NodeId | NodeId[], expected: string, userConfig?: Partial, getId: IdGenerator = deterministicCountingIdGenerator(0)): Mocha.Test { +export function assertReconstructed(name: string | TestLabel, shell: RShell, input: string, ids: NodeId | NodeId[], expected: string, userConfig?: Partial, getId: IdGenerator = deterministicCountingIdGenerator(0)): Mocha.Test { const selectedIds = Array.isArray(ids) ? ids : [ids] - return it(name, async function() { + return it(decorateLabelContext(name, ['slice']), async function() { await ensureConfig(shell, this, userConfig) const result = await new SteppingSlicer({ @@ -204,14 +244,24 @@ export function assertReconstructed(name: string, shell: RShell, input: string, request: requestFromInput(input), shell }).allRemainingSteps() - const reconstructed = executeSingleSubStep('reconstruct', result.normalize, new Set(selectedIds)) - assert.strictEqual(reconstructed.code, expected, `got: ${reconstructed.code}, vs. expected: ${expected}, for input ${input} (ids: ${printIdMapping(selectedIds, result.normalize.idMap)})`) + const reconstructed = NAIVE_RECONSTRUCT.processor({ + normalize: result.normalize, + slice: { + decodedCriteria: [], + timesHitThreshold: 0, + result: new Set(selectedIds) + } + }, {}) + assert.strictEqual(reconstructed.code, expected, + `got: ${reconstructed.code}, vs. expected: ${expected}, for input ${input} (ids ${JSON.stringify(ids)}:\n${[...result.normalize.idMap].map(i => `${i[0]}: '${i[1].lexeme}'`).join('\n')})`) }) } -export function assertSliced(name: string, shell: RShell, input: string, criteria: SlicingCriteria, expected: string, getId: IdGenerator = deterministicCountingIdGenerator(0)): Mocha.Test { - return it(`${JSON.stringify(criteria)} ${name}`, async function() { +export function assertSliced(name: string | TestLabel, shell: RShell, input: string, criteria: SlicingCriteria, expected: string, getId: IdGenerator = deterministicCountingIdGenerator(0)): Mocha.Test { + const fullname = decorateLabelContext(name, ['slice']) + + return it(`${JSON.stringify(criteria)} ${fullname}`, async function() { const result = await new SteppingSlicer({ stepOfInterest: LAST_STEP, getId, @@ -220,14 +270,14 @@ export function assertSliced(name: string, shell: RShell, input: string, criteri criterion: criteria, }).allRemainingSteps() - try { assert.strictEqual( result.reconstruct.code, expected, - `got: ${result.reconstruct.code}, vs. expected: ${expected}, for input ${input} (slice: ${printIdMapping(result.slice.decodedCriteria.map(({ id }) => id), result.normalize.idMap)}), url: ${graphToMermaidUrl(result.dataflow.graph, result.normalize.idMap, true, result.slice.result)}` + `got: ${result.reconstruct.code}, vs. expected: ${expected}, for input ${input} (slice for ${JSON.stringify(criteria)}: ${printIdMapping(result.slice.decodedCriteria.map(({ id }) => id), result.normalize.idMap)}), url: ${graphToMermaidUrl(result.dataflow.graph, result.normalize.idMap, true, result.slice.result)}` ) } catch(e) { - console.error('vis-got:\n', graphToMermaidUrl(result.dataflow.graph, result.normalize.idMap)) + console.error(normalizedAstToMermaidUrl(result.normalize.ast)) + console.error(`got:\n${result.reconstruct.code}\nvs. expected:\n${expected}`) throw e } }) diff --git a/test/functionality/dataflow/environments/initialization-tests.ts b/test/functionality/dataflow/environments/initialization-tests.ts index 99badd049e..c3741f94a2 100644 --- a/test/functionality/dataflow/environments/initialization-tests.ts +++ b/test/functionality/dataflow/environments/initialization-tests.ts @@ -1,29 +1,21 @@ -import { DefaultEnvironmentMemory, Environment, initializeCleanEnvironments } from '../../../../src/dataflow' +import { BuiltInEnvironment, Environment, initializeCleanEnvironments } from '../../../../src/dataflow' import { expect } from 'chai' -import { GlobalScope } from '../../../../src/dataflow/environments/scopes' +import { label } from '../../_helper/label' describe('Initialization', () => { - it('Clean creation should have no info but the default information', () => { + it(label('Clean creation should have no info but the default information', ['global-scope'], ['other']), () => { const clean = initializeCleanEnvironments() expect(clean.current,'there should be a current environment').to.be.not.undefined - expect(clean.current.memory, 'the current environment should have the default map').to.be.deep.equal(DefaultEnvironmentMemory) - expect(clean.current.name, 'the current environment must have the correct scope name').to.be.equal(GlobalScope) + expect(clean.current.memory.size, 'the current environment should have no memory').to.be.equal(0) + expect(clean.current.name, 'the current environment must have the correct scope name').to.be.equal('global') expect(clean.level, 'the level of the clean environment is predefined as 0').to.be.equal(0) }) - it('Clean creation should create independent new environments', () => { + it(label('Clean creation should create independent new environments', ['lexicographic-scope'], ['other']), () => { const clean = initializeCleanEnvironments() - clean.current.parent = new Environment('test') + clean.current.parent = new Environment('test', clean.current.parent) const second = initializeCleanEnvironments() - expect(second.current.parent, 'the new one should not have a parent ').to.be.undefined + expect(second.current.parent.id, 'the new one should have a parent, the built-in environment').to.be.equal(BuiltInEnvironment.id) expect(clean.current.parent, 'the old one should still have the parent').to.be.not.undefined }) - it('The default memory map should be copied', () => { - const clean = initializeCleanEnvironments() - clean.current.memory.clear() - - const second = initializeCleanEnvironments() - expect(second.current.memory, 'the new one should have the default environment map').to.be.deep.equal(DefaultEnvironmentMemory) - expect(clean.current.memory, 'the cleared one should be empty').to.be.have.length(0) - }) }) diff --git a/test/functionality/dataflow/environments/modification-tests.ts b/test/functionality/dataflow/environments/modification-tests.ts index 09d9800242..d5e115cee2 100644 --- a/test/functionality/dataflow/environments/modification-tests.ts +++ b/test/functionality/dataflow/environments/modification-tests.ts @@ -1,11 +1,10 @@ -import type { NodeId } from '../../../../src/r-bridge' +import type { NodeId } from '../../../../src' import type { IEnvironment } from '../../../../src/dataflow' -import { DefaultEnvironmentMemory } from '../../../../src/dataflow' import { guard } from '../../../../src/util/assert' import { expect } from 'chai' -import { appendEnvironments, overwriteEnvironments } from '../../../../src/dataflow/environments' -import { GlobalScope, } from '../../../../src/dataflow/environments/scopes' -import { defaultEnvironment } from '../../_helper/environment-builder' +import { appendEnvironment, overwriteEnvironment } from '../../../../src/dataflow/environments' +import { defaultEnv } from '../../_helper/dataflow/environment-builder' +import { label } from '../../_helper/label' /** if you pass multiple `definedAt`, this will expect the node to have multiple definitions */ function existsDefinedAt(name: string, definedAt: NodeId[], result: IEnvironment | undefined, message?: string) { @@ -20,45 +19,46 @@ function existsDefinedAt(name: string, definedAt: NodeId[], result: IEnvironment describe('Modification', () => { describe('Global', () => { - it('Different variables', () => { - const clean = defaultEnvironment().defineVariable('x', '_1') - const overwrite = defaultEnvironment().defineVariable('y', '_2') - const result = overwriteEnvironments(clean, overwrite) + it(label('Different variables', ['global-scope', 'name-normal'], ['other']), () => { + const clean = defaultEnv().defineVariable('x', '_1') + const overwrite = defaultEnv().defineVariable('y', '_2') + const result = overwriteEnvironment(clean, overwrite) expect(result, 'there should be a result').to.be.not.undefined - expect(result.current.memory, 'there should be two definitions for x and y').to.have.length(2 + DefaultEnvironmentMemory.size) + expect(result.current.memory, 'there should be two definitions for x and y').to.have.length(2) existsDefinedAt('x', ['_1'], result.current, 'globals must be defined locally as well') existsDefinedAt('y', ['_2'], result.current, 'globals must be defined locally as well') }) - it('Same variables', () => { - const clean = defaultEnvironment().defineVariable('x', '_1') - const overwrite = defaultEnvironment().defineVariable('x', '_2') - const result = overwriteEnvironments(clean, overwrite) + it(label('Same variables', ['global-scope'], ['other']), () => { + const clean = defaultEnv().defineVariable('x', '_1') + const overwrite = defaultEnv().defineVariable('x', '_2') + const result = overwriteEnvironment(clean, overwrite) expect(result, 'there should be a result').to.be.not.undefined - expect(result.current.memory, 'there should be only one definition for x').to.have.length(1 + DefaultEnvironmentMemory.size) + expect(result.current.memory, 'there should be only one definition for x').to.have.length(1) existsDefinedAt('x', ['_2'], result.current) }) }) describe('Local', () => { - it('Different variables', () => { - const clean = defaultEnvironment().defineVariable('long', '_1') - const overwrite = defaultEnvironment().defineVariable('short', '_2') - const result = overwriteEnvironments(clean, overwrite) + + it(label('Different variables', ['lexicographic-scope'], ['other']), () => { + const clean = defaultEnv().defineVariable('long', '_1') + const overwrite = defaultEnv().defineVariable('short', '_2') + const result = overwriteEnvironment(clean, overwrite) expect(result, 'there should be a result').to.be.not.undefined expect(result.level, 'neither definitions nor overwrites should produce new local scopes').to.be.equal(0) - expect(result.current.memory, 'there should be two definitions for long and short').to.have.length(2 + DefaultEnvironmentMemory.size) + expect(result.current.memory, 'there should be two definitions for long and short').to.have.length(2) existsDefinedAt('long', ['_1'], result.current) existsDefinedAt('short', ['_2'], result.current) }) - it('Same variables', () => { - const clean = defaultEnvironment().defineVariable('long', '_1') - const overwrite = defaultEnvironment().defineVariable('long', '_2') - const result = overwriteEnvironments(clean, overwrite) + it(label('Same variables', ['lexicographic-scope'], ['other']), () => { + const clean = defaultEnv().defineVariable('long', '_1') + const overwrite = defaultEnv().defineVariable('long', '_2') + const result = overwriteEnvironment(clean, overwrite) expect(result, 'there should be a result').to.be.not.undefined expect(result.level, 'neither definitions nor overwrites should produce new local scopes').to.be.equal(0) - expect(result.current.memory, 'there should be only one definition for long').to.have.length(1 + DefaultEnvironmentMemory.size) + expect(result.current.memory, 'there should be only one definition for long').to.have.length(1) existsDefinedAt('long', ['_2'], result.current) }) }) @@ -66,45 +66,46 @@ describe('Modification', () => { describe('Append', () => { describe('Global', () => { - it('Different variables', () => { - const clean = defaultEnvironment().defineVariable('x', '_1', '_1', GlobalScope) - const append = defaultEnvironment().defineVariable('y', '_2', '_2', GlobalScope) - const result = appendEnvironments(clean, append) + it(label('Different variables', ['global-scope', 'lexicographic-scope'], ['other']), () => { + const clean = defaultEnv().defineVariable('x', '_1', '_1') + const append = defaultEnv().defineVariable('y', '_2', '_2') + const result = appendEnvironment(clean, append) expect(result, 'there should be a result').to.be.not.undefined - expect(result.current.memory, 'there should be two definitions for x and y').to.have.length(2 + DefaultEnvironmentMemory.size) + expect(result.current.memory, 'there should be two definitions for x and y').to.have.length(2) existsDefinedAt('x', ['_1'], result.current, 'globals must be defined locally as well') existsDefinedAt('y', ['_2'], result.current, 'globals must be defined locally as well') }) - it('Same variables', () => { - const clean = defaultEnvironment().defineVariable('x', '_1', '_1', GlobalScope) - const append = defaultEnvironment().defineVariable('x', '_2', '_2', GlobalScope) - const result = appendEnvironments(clean, append) + it(label('Same variables', ['global-scope', 'lexicographic-scope'], ['other']), () => { + const clean = defaultEnv().defineVariable('x', '_1', '_1') + const append = defaultEnv().defineVariable('x', '_2', '_2') + const result = appendEnvironment(clean, append) expect(result, 'there should be a result').to.be.not.undefined - expect(result.current.memory, 'there should be only one symbol defined (for x)').to.have.length(1 + DefaultEnvironmentMemory.size) + expect(result.current.memory, 'there should be only one symbol defined (for x)').to.have.length(1) existsDefinedAt('x', ['_1', '_2'], result.current) }) }) describe('Local', () => { - it('Different variables', () => { - const clean = defaultEnvironment().defineVariable('local-long', '_1') - const append = defaultEnvironment().defineVariable('local-short', '_2') - const result = appendEnvironments(clean, append) + + it(label('Different variables', ['lexicographic-scope'], ['other']), () => { + const clean = defaultEnv().defineVariable('local-long', '_1') + const append = defaultEnv().defineVariable('local-short', '_2') + const result = appendEnvironment(clean, append) expect(result, 'there should be a result').to.be.not.undefined expect(result.level, 'neither definitions nor appends should produce new local scopes').to.be.equal(0) - expect(result.current.memory, 'there should be two definitions for local-long and local-short').to.have.length(2 + DefaultEnvironmentMemory.size) + expect(result.current.memory, 'there should be two definitions for local-long and local-short').to.have.length(2) existsDefinedAt('local-long', ['_1'], result.current) existsDefinedAt('local-short', ['_2'], result.current) }) - it('Same variables', () => { - const clean = defaultEnvironment().defineVariable('local-long', '_1') - const append = defaultEnvironment().defineVariable('local-long', '_2') - const result = appendEnvironments(clean, append) + it(label('Same variables', ['lexicographic-scope'], ['other']), () => { + const clean = defaultEnv().defineVariable('local-long', '_1') + const append = defaultEnv().defineVariable('local-long', '_2') + const result = appendEnvironment(clean, append) expect(result, 'there should be a result').to.be.not.undefined expect(result.level, 'neither definitions nor overwrites should produce new local scopes').to.be.equal(0) - expect(result.current.memory, 'there should be only one definition for local-long').to.have.length(1 + DefaultEnvironmentMemory.size) + expect(result.current.memory, 'there should be only one definition for local-long').to.have.length(1) existsDefinedAt('local-long', ['_1', '_2'], result.current) }) }) diff --git a/test/functionality/dataflow/environments/resolve-tests.ts b/test/functionality/dataflow/environments/resolve-tests.ts index a6ebb1e09b..a781d60f4e 100644 --- a/test/functionality/dataflow/environments/resolve-tests.ts +++ b/test/functionality/dataflow/environments/resolve-tests.ts @@ -1,28 +1,28 @@ import { resolveByName } from '../../../../src/dataflow/environments' import { expect } from 'chai' import { guard } from '../../../../src/util/assert' -import { GlobalScope, LocalScope } from '../../../../src/dataflow/environments/scopes' -import { defaultEnvironment, variable } from '../../_helper/environment-builder' +import { defaultEnv, variable } from '../../_helper/dataflow/environment-builder' +import { label } from '../../_helper/label' describe('Resolve', () => { describe('ByName', () => { - it('Locally without distracting elements', () => { + it(label('Locally without distracting elements', ['global-scope', 'lexicographic-scope'], ['other']), () => { const xVar = variable('x', '_1') - const env = defaultEnvironment().defineEnv(xVar) - const result = resolveByName('x', LocalScope, env) + const env = defaultEnv().defineInEnv(xVar) + const result = resolveByName('x', env) guard(result !== undefined, 'there should be a result') expect(result, 'there should be exactly one definition for x').to.have.length(1) - expect(result[0], 'it should be x').to.be.equal(xVar) + expect(result[0], 'it should be x').to.deep.equal(xVar) }) - it('Locally with global distract', () => { - let env = defaultEnvironment() - .defineVariable('x', '_2', '_1', GlobalScope) + it(label('Locally with global distract', ['global-scope', 'lexicographic-scope'], ['other']), () => { + let env = defaultEnv() + .defineVariable('x', '_2', '_1') const xVar = variable('x', '_1') - env = env.defineEnv(xVar) - const result = resolveByName('x', LocalScope , env) + env = env.defineInEnv(xVar) + const result = resolveByName('x', env) guard(result !== undefined, 'there should be a result') expect(result, 'there should be exactly one definition for x').to.have.length(1) - expect(result[0], 'it should be x').to.be.equal(xVar) + expect(result[0], 'it should be x').to.be.deep.equal(xVar) }) }) }) diff --git a/test/functionality/dataflow/graph/equal-tests.ts b/test/functionality/dataflow/graph/equal-tests.ts index d9769b3288..cdc9db985f 100644 --- a/test/functionality/dataflow/graph/equal-tests.ts +++ b/test/functionality/dataflow/graph/equal-tests.ts @@ -1,7 +1,7 @@ import type { DataflowGraph } from '../../../../src/dataflow' import { diffGraphsToMermaidUrl } from '../../../../src/dataflow' import { assert } from 'chai' -import { emptyGraph } from '../../_helper/dataflowgraph-builder' +import { emptyGraph } from '../../_helper/dataflow/dataflowgraph-builder' function test(cmp: (x: boolean) => void, a: DataflowGraph, b: DataflowGraph, text: string) { try { @@ -15,7 +15,7 @@ function test(cmp: (x: boolean) => void, a: DataflowGraph, b: DataflowGraph, tex } describe('Equal', () => { - const raw = (name: string, a: DataflowGraph, b: DataflowGraph, text: string, cmp: (x: boolean) => void) => { + function raw(name: string, a: DataflowGraph, b: DataflowGraph, text: string, cmp: (x: boolean) => void) { return it(name, () => { // as the comparison is relatively quick, we allow explicit checks for commutativity test(cmp, a, b, 'a;b' + text) @@ -24,7 +24,7 @@ describe('Equal', () => { } describe('Positive', () => { - const eq = (name: string, a: DataflowGraph, b: DataflowGraph) => { + function eq(name: string, a: DataflowGraph, b: DataflowGraph) { raw(name, a, b, 'should be equal', x => assert.isTrue(x)) } @@ -32,7 +32,7 @@ describe('Equal', () => { eq('Same vertex', emptyGraph().use('0', 'x'), emptyGraph().use('0', 'x')) }) describe('Negative', () => { - const neq = (name: string, a: DataflowGraph, b: DataflowGraph) => { + function neq(name: string, a: DataflowGraph, b: DataflowGraph) { raw(name, a, b, 'should differ', x => assert.isFalse(x)) } describe('More elements', () => { @@ -45,14 +45,14 @@ describe('Equal', () => { const rhs = emptyGraph().use('0', 'x') neq('Id', emptyGraph().use('1', 'x'), rhs) neq('Name', emptyGraph().use('0', 'y'), rhs) - neq('Tag', emptyGraph().exit('0', 'x'), rhs) + neq('Control Dependency', emptyGraph().use('0', 'x', { controlDependencies: ['1'] }), rhs) + neq('Tag', emptyGraph().constant('0'), rhs) }) describe('Different edges', () => { const rhs = emptyGraph().reads('0', '1') neq('Source Id', emptyGraph().reads('2', '1'), rhs) neq('Target Id', emptyGraph().reads('0', '2'), rhs) neq('Type', emptyGraph().calls('0', '1'), rhs) - neq('Attribute', emptyGraph().reads('0', '1', 'maybe'), rhs) }) }) }) diff --git a/test/functionality/dataflow/processing-of-elements/atomic/atomic-tests.ts b/test/functionality/dataflow/processing-of-elements/atomic/atomic-tests.ts deleted file mode 100644 index 2d05abfa2a..0000000000 --- a/test/functionality/dataflow/processing-of-elements/atomic/atomic-tests.ts +++ /dev/null @@ -1,506 +0,0 @@ -/** - * Here we cover dataflow extraction for atomic statements (no expression lists). - * Yet, some constructs (like for-loops) require the combination of statements, they are included as well. - * This will not include functions! - */ -import { assertDataflow, withShell } from '../../../_helper/shell' -import { RAssignmentOpPool, RNonAssignmentBinaryOpPool, RUnaryOpPool } from '../../../_helper/provider' -import { GlobalScope, LocalScope } from '../../../../../src/dataflow/environments/scopes' -import { MIN_VERSION_PIPE } from '../../../../../src/r-bridge/lang-4.x/ast/model/versions' -import { emptyGraph } from '../../../_helper/dataflowgraph-builder' -import { argumentInCall, defaultEnvironment, unnamedArgument } from '../../../_helper/environment-builder' - -describe('Atomic (dataflow information)', withShell((shell) => { - describe('uninteresting leafs', () => { - for(const input of ['42', '"test"', 'TRUE', 'NA', 'NULL']) { - assertDataflow(input, shell, input, emptyGraph()) - } - }) - - assertDataflow('simple variable', shell, - 'xylophone', - emptyGraph().use('0', 'xylophone') - ) - - describe('access', () => { - describe('const access', () => { - assertDataflow('single constant', shell, - 'a[2]', - emptyGraph().use('0', 'a', { when: 'maybe' }) - .use('2', unnamedArgument('2')) - .reads('0', '2') - ) - assertDataflow('double constant', shell, - 'a[[2]]', - emptyGraph().use('0', 'a', { when: 'maybe' }) - .use('2', unnamedArgument('2')) - .reads('0', '2') - ) - assertDataflow('dollar constant', shell, - 'a$b', - emptyGraph().use('0', 'a', { when: 'maybe' }) - ) - assertDataflow('at constant', shell, - 'a@b', - emptyGraph().use('0', 'a', { when: 'maybe' }) - ) - assertDataflow('chained constant', shell, - 'a[2][3]', - emptyGraph().use('0', 'a', { when: 'maybe' }) - .use('2', unnamedArgument('2')) - .reads('0', '2') - .use('5', unnamedArgument('5')) - .reads('0', '5') - ) - assertDataflow('chained mixed constant', shell, - 'a[2]$a', - emptyGraph().use('0', 'a', { when: 'maybe' }) - .use('2', unnamedArgument('2')) - .reads('0', '2') - ) - }) - assertDataflow('chained bracket access with variables', shell, - 'a[x][y]', - emptyGraph() - .use('0', 'a', { when: 'maybe' }) - .use('1', 'x') - .use('4', 'y') - .use('2', unnamedArgument('2')) - .use('5', unnamedArgument('5')) - .reads('0', '2') - .reads('0', '5') - .reads('2', '1') - .reads('5', '4') - ) - assertDataflow('assign on access', shell, - 'a[x] <- 5', - emptyGraph() - .defineVariable('0', 'a', LocalScope, { when: 'maybe' }) - .use('1', 'x') - .use('2', unnamedArgument('2')) - .reads('0', '2') - .reads('2', '1') - ) - }) - - describe('unary operators', () => { - for(const opSuite of RUnaryOpPool) { - describe(`${opSuite.label} operations`, () => { - for(const op of opSuite.pool) { - const inputDifferent = `${op.str}x` - assertDataflow(`${op.str}x`, shell, - inputDifferent, - emptyGraph().use('0', 'x') - ) - } - }) - } - }) - - // these will be more interesting whenever we have more information on the edges (like modification etc.) - describe('non-assignment binary operators', () => { - for(const opSuite of RNonAssignmentBinaryOpPool) { - describe(`${opSuite.label}`, () => { - for(const op of opSuite.pool) { - describe(`${op.str}`, () => { - const inputDifferent = `x ${op.str} y` - assertDataflow(`${inputDifferent} (different variables)`, - shell, - inputDifferent, - emptyGraph().use('0', 'x').use('1', 'y') - ) - - const inputSame = `x ${op.str} x` - assertDataflow(`${inputSame} (same variables)`, - shell, - inputSame, - emptyGraph() - .use('0', 'x') - .use('1', 'x') - .sameRead('0', '1') - ) - }) - } - }) - } - }) - - describe('Pipes', () => { - describe('Passing one argument', () => { - assertDataflow('No parameter function', shell, 'x |> f()', - emptyGraph() - .use('0', 'x') - .call('3', 'f', [argumentInCall('1')]) - .use('1', unnamedArgument('1')) - .argument('3', '1') - .reads('1', '0'), - { minRVersion: MIN_VERSION_PIPE } - ) - assertDataflow('Nested calling', shell, 'x |> f() |> g()', - emptyGraph() - .use('0', 'x') - .call('3', 'f', [argumentInCall('1')]) - .call('7', 'g', [argumentInCall('5')]) - .use('1', unnamedArgument('1')) - .use('5', unnamedArgument('5')) - .argument('3', '1') - .argument('7', '5') - .reads('5', '3') - .reads('1', '0'), - { minRVersion: MIN_VERSION_PIPE } - ) - assertDataflow('Multi-Parameter function', shell, 'x |> f(y,z)', - emptyGraph() - .use('0', 'x') - .call('7', 'f', [argumentInCall('1'), argumentInCall('4'), argumentInCall('6')]) - .use('1', unnamedArgument('1')) - .use('4', unnamedArgument('4')) - .use('6', unnamedArgument('6')) - .use('0', 'x') - .use('3', 'y') - .use('5', 'z') - .argument('7', '1') - .argument('7', '4') - .argument('7', '6') - .reads('1', '0') - .reads('4', '3') - .reads('6', '5'), - { minRVersion: MIN_VERSION_PIPE } - ) - }) - }) - - describe('assignments', () => { - for(const op of RAssignmentOpPool) { - describe(`${op.str}`, () => { - const scope = op.str.length > 2 ? GlobalScope : LocalScope // love it - const swapSourceAndTarget = op.str === '->' || op.str === '->>' - - const constantAssignment = swapSourceAndTarget ? `5 ${op.str} x` : `x ${op.str} 5` - assertDataflow(`${constantAssignment} (constant assignment)`, - shell, - constantAssignment, - emptyGraph().defineVariable(swapSourceAndTarget ? '1' : '0', 'x', scope) - ) - - const variableAssignment = `x ${op.str} y` - const dataflowGraph = emptyGraph() - if(swapSourceAndTarget) { - dataflowGraph - .use('0', 'x') - .defineVariable('1', 'y', scope) - .definedBy('1', '0') - } else { - dataflowGraph - .defineVariable('0', 'x', scope) - .use('1', 'y') - .definedBy('0', '1') - } - assertDataflow(`${variableAssignment} (variable assignment)`, - shell, - variableAssignment, - dataflowGraph - ) - - const circularAssignment = `x ${op.str} x` - - const circularGraph = emptyGraph() - if(swapSourceAndTarget) { - circularGraph - .use('0', 'x') - .defineVariable('1', 'x', scope) - .definedBy('1', '0') - } else { - circularGraph - .defineVariable('0', 'x', scope) - .use('1', 'x') - .definedBy('0', '1') - } - - assertDataflow(`${circularAssignment} (circular assignment)`, - shell, - circularAssignment, - circularGraph - ) - }) - } - describe('nested assignments', () => { - assertDataflow('"x <- y <- 1"', shell, - 'x <- y <- 1', - emptyGraph() - .defineVariable('0', 'x') - .defineVariable('1', 'y') - .definedBy('0', '1') - ) - assertDataflow('"1 -> x -> y"', shell, - '1 -> x -> y', - emptyGraph() - .defineVariable('1', 'x') - .defineVariable('3', 'y') - .definedBy('3', '1') - ) - // still by indirection (even though y is overwritten?) - assertDataflow('"x <- 1 -> y"', shell, - 'x <- 1 -> y', - emptyGraph() - .defineVariable('0', 'x') - .defineVariable('2', 'y') - .definedBy('0', '2') - ) - assertDataflow('"x <- y <- z"', shell, - 'x <- y <- z', - emptyGraph() - .defineVariable('0', 'x') - .defineVariable('1', 'y') - .use('2', 'z') - .definedBy('0', '1') - .definedBy('1', '2') - .definedBy('0', '2') - ) - assertDataflow('nested global assignments', shell, - 'x <<- y <<- z', - emptyGraph() - .defineVariable('0', 'x', GlobalScope) - .defineVariable('1', 'y', GlobalScope) - .use('2', 'z') - .definedBy('0', '1') - .definedBy('1', '2') - .definedBy('0', '2') - ) - assertDataflow('nested global mixed with local assignments', shell, - 'x <<- y <- y2 <<- z', - emptyGraph() - .defineVariable('0', 'x', GlobalScope) - .defineVariable('1', 'y') - .defineVariable('2', 'y2', GlobalScope) - .use('3', 'z') - .definedBy('0', '1') - .definedBy('0', '2') - .definedBy('0', '3') - .definedBy('1', '2') - .definedBy('1', '3') - .definedBy('2', '3') - ) - }) - - describe('known impact assignments', () => { - describe('loops return invisible null', () => { - for(const assignment of [ { str: '<-', defId: ['0','0','0'], readId: ['1','1','1'], swap: false }, - { str: '<<-', defId: ['0','0','0'], readId: ['1','1','1'], swap: false }, { str: '=', defId: ['0','0','0'], readId: ['1','1','1'], swap: false }, - /* two for parenthesis necessary for precedence */ - { str: '->', defId: ['3', '4', '7'], readId: ['0','0','0'], swap: true }, { str: '->>', defId: ['3', '4', '7'], readId: ['0','0','0'], swap: true }] ) { - describe(`${assignment.str}`, () => { - const scope = assignment.str.length > 2 ? GlobalScope : LocalScope - - for(const wrapper of [(x: string) => x, (x: string) => `{ ${x} }`]) { - const build = (a: string, b: string) => assignment.swap ? `(${wrapper(b)}) ${assignment.str} ${a}` : `${a} ${assignment.str} ${wrapper(b)}` - - const repeatCode = build('x', 'repeat x') - assertDataflow(`"${repeatCode}"`, shell, repeatCode, emptyGraph() - .defineVariable(assignment.defId[0], 'x', scope) - .use(assignment.readId[0], 'x') - ) - - const whileCode = build('x', 'while (x) 3') - assertDataflow(`"${whileCode}"`, shell, whileCode, emptyGraph() - .defineVariable(assignment.defId[1], 'x', scope) - .use(assignment.readId[1], 'x')) - - const forCode = build('x', 'for (x in 1:4) 3') - assertDataflow(`"${forCode}"`, shell, forCode, - emptyGraph() - .defineVariable(assignment.defId[2], 'x', scope) - .defineVariable(assignment.readId[2], 'x') - ) - } - }) - } - }) - }) - describe('assignment with function call', () => { - const environmentWithX = defaultEnvironment().defineArgument('x', '4', '4') - assertDataflow('define call with multiple args should only be defined by the call-return', shell, 'a <- foo(x=3,y,z)', - emptyGraph() - .defineVariable('0', 'a') - .call('9', 'foo', [ - argumentInCall('4', 'x'), - argumentInCall('6'), - argumentInCall('8') - ]) - .use('4', 'x') - .use('5', 'y', { environment: environmentWithX }) - .use('6', unnamedArgument('6'), { environment: environmentWithX }) - .use('7', 'z', { environment: environmentWithX }) - .use('8', unnamedArgument('8'), { environment: environmentWithX }) - .definedBy('0', '9') - .argument('9', '4') - .argument('9', '6') - .argument('9', '8') - .reads('6', '5') - .reads('8', '7') - ) - }) - }) - - describe('if-then-else', () => { - // spacing issues etc. are dealt with within the parser, however, braces are not allowed to introduce scoping artifacts - for(const b of [ - { label: 'without braces', func: (x: string) => `${x}` }, - { label: 'with braces', func: (x: string) => `{ ${x} }` }, - ]) { - describe(`Variant ${b.label}`, () => { - describe('if-then, no else', () => { - assertDataflow('completely constant', shell, - `if (TRUE) ${b.func('1')}`, - emptyGraph() - ) - assertDataflow('compare cond.', shell, - `if (x > 5) ${b.func('1')}`, - emptyGraph().use('0', 'x') - ) - assertDataflow('compare cond. symbol in then', shell, - `if (x > 5) ${b.func('y')}`, - emptyGraph().use('0', 'x') - .use('3', 'y', { when: 'maybe' }) - ) - assertDataflow('all variables', shell, - `if (x > y) ${b.func('z')}`, - emptyGraph() - .use('0', 'x') - .use('1', 'y') - .use('3', 'z', { when: 'maybe' }) - ) - assertDataflow('all variables, some same', shell, - `if (x > y) ${b.func('x')}`, - emptyGraph() - .use('0', 'x') - .use('1', 'y') - .use('3', 'x', { when: 'maybe' }) - .sameRead('0', '3', 'maybe') - ) - assertDataflow('all same variables', shell, - `if (x > x) ${b.func('x')}`, - emptyGraph() - .use('0', 'x') - .use('1', 'x') - .use('3', 'x', { when: 'maybe' }) - .sameRead('0', '1') - // theoretically, they just have to be connected, so 0 is just hardcoded - .sameRead('0', '3', 'maybe') - ) - assertDataflow('definition in if', shell, - `if (x <- 3) ${b.func('x')}`, - emptyGraph() - .defineVariable('0', 'x', LocalScope) - .use('3', 'x', { when: 'maybe', environment: defaultEnvironment().defineVariable('x', '0', '2') }) - .reads('3', '0') - ) - }) - - describe('if-then, with else', () => { - assertDataflow('completely constant', shell, - 'if (TRUE) { 1 } else { 2 }', - emptyGraph() - ) - assertDataflow('compare cond.', shell, - 'if (x > 5) { 1 } else { 42 }', - emptyGraph().use('0', 'x') - ) - assertDataflow('compare cond. symbol in then', shell, - 'if (x > 5) { y } else { 42 }', - emptyGraph().use('0', 'x').use('3', 'y', { when: 'maybe' }) - ) - assertDataflow('compare cond. symbol in then & else', shell, - 'if (x > 5) { y } else { z }', - emptyGraph() - .use('0', 'x') - .use('3', 'y', { when: 'maybe' }) - .use('5', 'z', { when: 'maybe' }) - ) - assertDataflow('all variables', shell, - 'if (x > y) { z } else { a }', - emptyGraph() - .use('0', 'x') - .use('1', 'y') - .use('3', 'z', { when: 'maybe' }) - .use('5', 'a', { when: 'maybe' }) - ) - assertDataflow('all variables, some same', shell, - 'if (y > x) { x } else { y }', - emptyGraph() - .use('0', 'y') - .use('1', 'x') - .use('3', 'x', { when: 'maybe' }) - .use('5', 'y', { when: 'maybe' }) - .sameRead('1', '3', 'maybe') - .sameRead('0', '5', 'maybe') - ) - assertDataflow('all same variables', shell, - 'if (x > x) { x } else { x }', - emptyGraph() - .use('0', 'x') - .use('1', 'x') - .use('3', 'x', { when: 'maybe' }) - .use('5', 'x', { when: 'maybe' }) - // 0 is just hardcoded, they actually just have to be connected - .sameRead('0', '1') - .sameRead('0', '3', 'maybe') - .sameRead('0', '5', 'maybe') - ) - }) - }) - } - }) - describe('inline non-strict boolean operations', () => { - const environmentWithY = defaultEnvironment().defineVariable('y', '0', '2') - const environmentWithOtherY = defaultEnvironment().defineVariable('y', '4', '6') - assertDataflow('define call with multiple args should only be defined by the call-return', shell, 'y <- 15; x && (y <- 13); y', - emptyGraph() - .defineVariable('0', 'y') - .defineVariable('4', 'y', LocalScope, { environment: environmentWithY }) - .use('3', 'x', { environment: environmentWithY }) - .use('8', 'y', { environment: environmentWithY.appendWritesOf(environmentWithOtherY) }) - .reads('8', '0') - .reads('8', '4') - .sameDef('0', '4') - ) - }) - - describe('loops', () => { - describe('for', () => { - assertDataflow('simple constant for-loop', shell, - 'for(i in 1:10) { 1 }', - emptyGraph().defineVariable('0', 'i') - ) - assertDataflow('using loop variable in body', shell, - 'for(i in 1:10) { i }', - emptyGraph() - .defineVariable('0', 'i') - .use('4', 'i', { when: 'maybe', environment: defaultEnvironment().defineVariable('i', '0', '6') }) - .reads('4', '0', 'maybe') - ) - }) - - describe('repeat', () => { - assertDataflow('simple constant repeat', shell, - 'repeat 2', - emptyGraph() - ) - assertDataflow('using loop variable in body', shell, - 'repeat x', - emptyGraph().use('0', 'x') - ) - assertDataflow('using loop variable in body', shell, - 'repeat { x <- 1 }', - emptyGraph().defineVariable('0', 'x') - ) - assertDataflow('using variable in body', shell, - 'repeat { x <- y }', - emptyGraph() - .defineVariable('0', 'x') - .use('1', 'y') - .definedBy('0', '1') - ) - }) - }) -})) diff --git a/test/functionality/dataflow/processing-of-elements/atomic/dataflow-atomic-tests.ts b/test/functionality/dataflow/processing-of-elements/atomic/dataflow-atomic-tests.ts new file mode 100644 index 0000000000..91318ac055 --- /dev/null +++ b/test/functionality/dataflow/processing-of-elements/atomic/dataflow-atomic-tests.ts @@ -0,0 +1,705 @@ +/** + * Here we cover dataflow extraction for atomic statements (no expression lists). + * Yet, some constructs (like for-loops) require the combination of statements, they are included as well. + * This will not include functions! + */ +import { assertDataflow, withShell } from '../../../_helper/shell' +import { MIN_VERSION_PIPE } from '../../../../../src/r-bridge/lang-4.x/ast/model/versions' +import { label } from '../../../_helper/label' +import { emptyGraph } from '../../../_helper/dataflow/dataflowgraph-builder' +import { argumentInCall, defaultEnv } from '../../../_helper/dataflow/environment-builder' +import { AssignmentOperators, BinaryNonAssignmentOperators, UnaryOperatorPool } from '../../../_helper/provider' +import { EmptyArgument, OperatorDatabase } from '../../../../../src' +import type { SupportedFlowrCapabilityId } from '../../../../../src/r-bridge/data' +import type { FunctionArgument } from '../../../../../src/dataflow' +import { startAndEndsWith } from '../../../../../src/util/strings' +import { BuiltIn } from '../../../../../src/dataflow' + +describe('Atomic (dataflow information)', withShell(shell => { + describe('Uninteresting Leafs', () => { + for(const [input, id] of [ + ['42', 'numbers'], + ['"test"', 'strings'], + ['\'test\'', 'strings'], + ['TRUE', 'logical'], + ['FALSE', 'logical'], + ['NA', 'numbers'], + ['NULL', 'null'], + ['Inf', 'inf-and-nan'], + ['NaN', 'inf-and-nan'] + ] as [string, SupportedFlowrCapabilityId][]) { + assertDataflow(label(input, [id]), shell, input, + emptyGraph().constant('0') + ) + } + }) + + assertDataflow(label('simple variable', ['name-normal']), shell, + 'xylophone', emptyGraph().use('0', 'xylophone') + ) + + describe('Access', () => { + describe('Access with Constant', () => { + assertDataflow(label('single constant', ['name-normal', 'numbers', 'single-bracket-access']), + shell,'a[2]', emptyGraph() + .use('0', 'a', { controlDependencies: [] }) + .argument('3', '0') + .call('3', '[', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: ['0', '1', BuiltIn], onlyBuiltIn: true }) + .argument('3', '1') + .constant('1') + ) + assertDataflow(label('double constant', ['name-normal', 'numbers', 'double-bracket-access']), + shell, 'a[[2]]', emptyGraph() + .use('0', 'a', { controlDependencies: [] }) + .argument('3', '0') + .argument('3', '1') + .constant('1') + .call('3', '[[', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: ['0', '1', BuiltIn], onlyBuiltIn: true }) + ) + assertDataflow(label('dollar constant', ['name-normal', 'dollar-access']), + shell, 'a$b', emptyGraph() + .use('0', 'a', { controlDependencies: [] }) + .argument('3', '0') + .argument('3', '1') + .call('3', '$', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: ['0', BuiltIn], onlyBuiltIn: true }) + .constant('1') + ) + assertDataflow(label('at constant', ['name-normal', 'slot-access']), + shell, 'a@b', emptyGraph() + .use('0', 'a', { controlDependencies: [] }) + .argument('3', '0') + .call('3', '@', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: ['0', BuiltIn], onlyBuiltIn: true }) + .argument('3', '1') + .constant('1') + ) + assertDataflow(label('chained constant', ['name-normal', 'numbers', 'single-bracket-access']), shell, + 'a[2][3]', emptyGraph() + .use('0', 'a', { controlDependencies: [] }) + .argument('3', '0') + .call('3', '[', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn, '0', '1'], onlyBuiltIn: true }) + .argument('3', '1') + .argument('6', '3') + .call('6', '[', [argumentInCall('3'), argumentInCall('4')], { returns: ['3'], reads: ['3', '4', BuiltIn], onlyBuiltIn: true }) + .argument('6', '4') + .constant('1') + .constant('4') + + ) + assertDataflow(label('chained mixed constant', ['dollar-access', 'single-bracket-access', 'name-normal', 'numbers']), shell, + 'a[2]$a', emptyGraph() + .use('0', 'a', { controlDependencies: [] }) + .argument('3', '0') + .call('3', '[', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn, '0', '1'], onlyBuiltIn: true }) + .argument('3', '1') + .argument('6', '3') + .call('6', '$', [argumentInCall('3'), argumentInCall('4')], { returns: ['3'], reads: ['3', BuiltIn], onlyBuiltIn: true }) + .argument('6', '4') + .constant('1') + .constant('4') + ) + }) + assertDataflow(label('chained bracket access with variables', ['name-normal', 'single-bracket-access', ...OperatorDatabase['<-'].capabilities]), shell, + 'a[x][y]', emptyGraph() + .use('0', 'a', { controlDependencies: [] }) + .use('1', 'x') + .use('4', 'y') + .argument('3', ['0', '1']) + .call('3', '[', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn, '0', '1'], onlyBuiltIn: true }) + .argument('6', ['3', '4']) + .call('6', '[', [argumentInCall('3'), argumentInCall('4')], { returns: ['3'], reads: ['3', '4', BuiltIn], onlyBuiltIn: true }) + ) + assertDataflow(label('assign on access', ['name-normal', 'single-bracket-access', ...OperatorDatabase['<-'].capabilities, 'replacement-functions']), shell, + 'a[x] <- 5', emptyGraph() + .use('1', 'x') + .call('3', '[<-', [argumentInCall('0'), argumentInCall('1'), argumentInCall('4')], { returns: ['0'], reads: ['1', BuiltIn], onlyBuiltIn: true }) + .constant('4') + .defineVariable('0', 'a', { definedBy: ['4', '3'] }) + ) + }) + + describe('Unary Operators', () => { + for(const op of UnaryOperatorPool) { + const inputDifferent = `${op}x` + const opData = OperatorDatabase[op] + assertDataflow(label(`${op}x`, ['unary-operator', 'name-normal', ...opData.capabilities]), shell, + inputDifferent, + emptyGraph() + .use('0', 'x').reads('1', '0') + .call('1', op, [argumentInCall('0')], { reads: [BuiltIn] }) + ) + } + }) + + // these will be more interesting whenever we have more information on the edges (like modification etc.) + describe('Non-Assignment Binary Operators', () => { + for(const op of BinaryNonAssignmentOperators.filter(x => !startAndEndsWith(x, '%'))) { + describe(`${op}`, () => { + const inputDifferent = `x ${op} y` + const inputSame = `x ${op} x` + + const capabilities = OperatorDatabase[op].capabilities + if(capabilities.includes('non-strict-logical-operators')) { + assertDataflow(label(`${inputDifferent} (different variables)`, ['name-normal', ...capabilities]), + shell, + inputDifferent, + emptyGraph() + .use('0', 'x') + .use('1', 'y', { controlDependencies: ['2'] }) + .call('2', op, [argumentInCall('0'), argumentInCall('1')], { returns: [], reads: [BuiltIn] }) + .reads('2', '0') + ) + + assertDataflow(label(`${inputSame} (same variables)`, ['name-normal', ...capabilities]), + shell, inputSame, + emptyGraph() + .use('0', 'x') + .sameRead('0', '1') + .use('1', 'x', { controlDependencies: ['2'] }) + .call('2', op, [argumentInCall('0'), argumentInCall('1')], { returns: [], reads: [BuiltIn] }) + .reads('2', '0') + ) + } else { + assertDataflow(label(`${inputDifferent} (different variables)`, ['name-normal', ...capabilities]), + shell, + inputDifferent, + emptyGraph() + .call('2', op, [argumentInCall('0'), argumentInCall('1')], { reads: [BuiltIn] }) + .use('0', 'x').use('1', 'y').reads('2', ['0', '1']) + ) + + assertDataflow(label(`${inputSame} (same variables)`, ['name-normal', ...capabilities]), + shell, + inputSame, + emptyGraph() + .call('2', op, [argumentInCall('0'), argumentInCall('1')], { reads: [BuiltIn] }) + .use('0', 'x').use('1', 'x') + .sameRead('0', '1').reads('2', ['0', '1']) + ) + } + }) + } + }) + + describe('Assignment Operators', () => { + for(const op of AssignmentOperators) { + describe(`${op}`, () => { + const swapSourceAndTarget = op === '->' || op === '->>' + const [variableId, constantId] = swapSourceAndTarget ? ['1', '0'] : ['0', '1'] + + const args: FunctionArgument[] = [argumentInCall('0'), argumentInCall('1')] + + const constantAssignment = swapSourceAndTarget ? `5 ${op} x` : `x ${op} 5` + assertDataflow(label(`${constantAssignment} (constant assignment)`, ['name-normal', ...OperatorDatabase[op].capabilities, 'numbers']), + shell, constantAssignment, + emptyGraph() + .call('2', op, args, { reads: [BuiltIn], returns: [`${variableId}`] }) + .defineVariable(variableId, 'x', { definedBy: [constantId, '2'] }) + .constant(constantId) + ) + + const variableAssignment = `x ${op} y` + const dataflowGraph = emptyGraph() + .call('2', op, args, { reads: [BuiltIn], returns: [`${variableId}`] }) + if(swapSourceAndTarget) { + dataflowGraph + .use('0', 'x') + .defineVariable('1', 'y', { definedBy: ['0', '2'] }) + } else { + dataflowGraph + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .use('1', 'y') + } + assertDataflow(label(`${variableAssignment} (variable assignment)`, ['name-normal', ...OperatorDatabase[op].capabilities]), + shell, + variableAssignment, + dataflowGraph + ) + + const circularAssignment = `x ${op} x` + + const circularGraph = emptyGraph() + .call('2', op, args, { reads: [BuiltIn], returns: [`${variableId}`] }) + if(swapSourceAndTarget) { + circularGraph + .use('0', 'x') + .defineVariable('1', 'x', { definedBy: ['0', '2'] }) + } else { + circularGraph + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .use('1', 'x') + } + + assertDataflow(label(`${circularAssignment} (circular assignment)`, ['name-normal', ...OperatorDatabase[op].capabilities, 'return-value-of-assignments']), + shell, circularAssignment, + circularGraph + ) + }) + } + describe('Nested Assignments', () => { + assertDataflow(label('"x <- y <- 1"', ['name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities, 'return-value-of-assignments']), + shell, 'x <- y <- 1', emptyGraph() + .call('3', '<-', [argumentInCall('1'), argumentInCall('2')], { returns: ['1'], reads: [BuiltIn] }) + .argument('3', ['2', '1']) + .argument('4', '3') + .call('4', '<-', [argumentInCall('0'), argumentInCall('3')], { returns: ['0'], reads: [BuiltIn] }) + .argument('4', '0') + .sameRead('4', '3') + .constant('2') + .defineVariable('1', 'y', { definedBy: ['2', '3'] }) + .defineVariable('0', 'x', { definedBy: ['3', '4'] }) + ) + assertDataflow(label('"1 -> x -> y"', ['name-normal', 'numbers', ...OperatorDatabase['->'].capabilities, 'return-value-of-assignments']), + shell, '1 -> x -> y', emptyGraph() + .call('2', '->', [argumentInCall('0'), argumentInCall('1')], { returns: ['1'], reads: [BuiltIn] }) + .argument('2', ['0', '1']) + .argument('4', '2') + .call('4', '->', [argumentInCall('2'), argumentInCall('3')], { returns: ['3'], reads: [BuiltIn] }) + .argument('4', '3') + .sameRead('4', '2') + .constant('0') + .defineVariable('1', 'x', { definedBy: ['0', '2'] }) + .defineVariable('3', 'y', { definedBy: ['2', '4'] }) + ) + assertDataflow(label('"x <- 1 -> y"', ['name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities, ...OperatorDatabase['->'].capabilities, 'return-value-of-assignments']), + shell, 'x <- 1 -> y', emptyGraph() + .call('3', '->', [argumentInCall('1'), argumentInCall('2')], { returns: ['2'], reads: [BuiltIn] }) + .argument('3', ['1', '2']) + .argument('4', '3') + .call('4', '<-', [argumentInCall('0'), argumentInCall('3')], { returns: ['0'], reads: [BuiltIn] }) + .argument('4', '0') + .constant('1') + .defineVariable('2', 'y', { definedBy: ['1', '3'] }) + .defineVariable('0', 'x', { definedBy: ['3', '4'] }) + ) + assertDataflow(label('"x <- y <- z"', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'return-value-of-assignments']), + shell, 'x <- y <- z', emptyGraph() + .use('2', 'z') + .argument('3', '2') + .call('3', '<-', [argumentInCall('1'), argumentInCall('2')], { returns: ['1'], reads: [BuiltIn] }) + .argument('3', '1') + .argument('4', '3') + .call('4', '<-', [argumentInCall('0'), argumentInCall('3')], { returns: ['0'], reads: [BuiltIn] }) + .argument('4', '0') + .sameRead('4', '3') + .defineVariable('1', 'y', { definedBy: ['2', '3'] }) + .defineVariable('0', 'x', { definedBy: ['3', '4'] }) + ) + assertDataflow(label('Nested Global Assignments', ['name-normal', ...OperatorDatabase['<<-'].capabilities, 'return-value-of-assignments']), + shell, 'x <<- y <<- z', emptyGraph() + .use('2', 'z') + .argument('3', '2') + .call('3', '<<-', [argumentInCall('1'), argumentInCall('2')], { returns: ['1'], reads: [BuiltIn] }) + .argument('3', '1') + .argument('4', '3') + .call('4', '<<-', [argumentInCall('0'), argumentInCall('3')], { returns: ['0'], reads: [BuiltIn] }) + .argument('4', '0') + .sameRead('4', '3') + .defineVariable('1', 'y', { definedBy: ['2', '3'] }) + .defineVariable('0', 'x', { definedBy: ['3', '4'] }) + ) + assertDataflow(label('Nested Global Mixed with Local Assignments', ['name-normal', ...OperatorDatabase['<<-'].capabilities, ...OperatorDatabase['<-'].capabilities, 'return-value-of-assignments']), + shell, 'x <<- y <- y2 <<- z', emptyGraph() + .use('3', 'z') + .argument('4', '3') + .call('4', '<<-', [argumentInCall('2'), argumentInCall('3')], { returns: ['2'], reads: [BuiltIn] }) + .argument('4', '2') + .sameRead('4', '6') + .argument('5', '4') + .call('5', '<-', [argumentInCall('1'), argumentInCall('4')], { returns: ['1'], reads: [BuiltIn] }) + .argument('5', '1') + .argument('6', '5') + .call('6', '<<-', [argumentInCall('0'), argumentInCall('5')], { returns: ['0'], reads: [BuiltIn] }) + .argument('6', '0') + .defineVariable('2', 'y2', { definedBy: ['3', '4'] }) + .defineVariable('1', 'y', { definedBy: ['4', '5'] }) + .defineVariable('0', 'x', { definedBy: ['5', '6'] }) + ) + assertDataflow(label('Use Assignment on Target Side', ['numbers', 'single-bracket-access', 'replacement-functions', 'name-normal', ...OperatorDatabase['<-'].capabilities, 'return-value-of-assignments']), + shell, 'a[x] <- x <- 3', emptyGraph() + .use('1', 'x') + .call('6', '<-', [argumentInCall('4'), argumentInCall('5')], { returns: ['4'], reads: [BuiltIn] }) + .argument('6', ['5', '4']) + .argument('3', '1') + .argument('3', '6') + .call('3', '[<-', [argumentInCall('0'), argumentInCall('1'), argumentInCall('6')], { returns: ['0'], reads: ['1', BuiltIn], onlyBuiltIn: true }) + .argument('3', '0') + .constant('5') + .defineVariable('4', 'x', { definedBy: ['5', '6'] }) + .defineVariable('0', 'a', { definedBy: ['6', '3'] }) + ) + assertDataflow(label('Use Assignment on Target Side (inv)', ['numbers', 'single-bracket-access', 'replacement-functions', 'name-normal', ...OperatorDatabase['->'].capabilities, 'return-value-of-assignments']), + shell, '3 -> x -> a[x]', emptyGraph() + .use('4', 'x') + .call('2', '->', [argumentInCall('0'), argumentInCall('1')], { returns: ['1'], reads: [BuiltIn] }) + .argument('2', ['0', '1']) + .argument('6', '4') + .argument('6', '2') + .call('6', '[<-', [argumentInCall('3'), argumentInCall('4'), argumentInCall('2')], { returns: ['3'], reads: ['4', BuiltIn], onlyBuiltIn: true }) + .argument('6', '3') + .constant('0') + .defineVariable('1', 'x', { definedBy: ['0', '2'] }) + .defineVariable('3', 'a', { definedBy: ['2', '6'] }) + ) + }) + + describe('Known Impact Assignments', () => { + describe('Loops Return Invisible Null', () => { + describe('With <-', () => { + assertDataflow(label('Repeat', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'return-value-of-assignments', 'repeat-loop']), + shell, 'x <- repeat x', emptyGraph() + .use('1', 'x') + .argument('3', '1') + .call('3', 'repeat', [argumentInCall('1')], { returns: [], reads: [BuiltIn] }) + .nse('3', '1') + .argument('4', '3') + .call('4', '<-', [argumentInCall('0'), argumentInCall('3')], { returns: ['0'], reads: [BuiltIn] }) + .argument('4', '0') + .defineVariable('0', 'x', { definedBy: ['3', '4'] }) + ) + + assertDataflow(label('While', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'return-value-of-assignments', 'while-loop', 'numbers']), + shell, 'x <- while (x) 3', emptyGraph() + .use('1', 'x') + .argument('4', '1') + .call('4', 'while', [argumentInCall('1'), argumentInCall('2')], { returns: [], reads: [BuiltIn, '1'], onlyBuiltIn: true }) + .argument('4', '2') + .nse('4', '2') + .argument('5', '4') + .call('5', '<-', [argumentInCall('0'), argumentInCall('4')], { returns: ['0'], reads: [BuiltIn] }) + .argument('5', '0') + .constant('2', { controlDependency: [] }) + .defineVariable('0', 'x', { definedBy: ['4', '5'] }) + ) + + assertDataflow(label('For', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'return-value-of-assignments', 'for-loop', 'numbers', 'built-in-sequencing']), + shell, 'x <- for (i in 1:4) 3', emptyGraph() + .call('4', ':', [argumentInCall('2'), argumentInCall('3')], { returns: [], reads: [BuiltIn, '2', '3'], onlyBuiltIn: true }) + .call('7', 'for', [argumentInCall('1'), argumentInCall('4'), argumentInCall('5')], { returns: [], reads: [BuiltIn, '1', '4'], onlyBuiltIn: true, environment: defaultEnv().defineVariable('i', '1', '7') }) + .nse('7', '5') + .call('8', '<-', [argumentInCall('0'), argumentInCall('7')], { returns: ['0'], reads: [BuiltIn] }) + .defineVariable('1', 'i', { definedBy: ['4'] }) + .constant('2') + .constant('3') + .constant('5', { controlDependency: [] }) + .defineVariable('0', 'x', { definedBy: ['7', '8'] }) + ) + }) + }) + }) + describe('Assignment with Function Call', () => { + assertDataflow(label('define call with multiple args should only be defined by the call-return', ['name-normal', 'numbers', 'unnamed-arguments', 'call-normal', ...OperatorDatabase['<-'].capabilities, 'return-value-of-assignments']), + shell, 'a <- foo(x=3,y,z)', emptyGraph() + .reads('4', '3') + .use('5', 'y') + .use('7', 'z') + .call('9', 'foo', [argumentInCall('4', { name: 'x' }), argumentInCall('5'), argumentInCall('7')], { returns: [], reads: [] }) + .argument('10', '9') + .call('10', '<-', [argumentInCall('0'), argumentInCall('9')], { returns: ['0'], reads: [BuiltIn] }) + .argument('10', '0') + .constant('3') + .defineVariable('0', 'a', { definedBy: ['9', '10'] }) + ) + }) + }) + + describe('Pipes', () => { + describe('Passing one argument', () => { + assertDataflow(label('No parameter function', ['built-in-pipe-and-pipe-bind', 'name-normal', 'call-normal']), + shell, 'x |> f()', emptyGraph() + .use('0', 'x') + .argument('3', '0') + .call('3', 'f', [argumentInCall('0')], { returns: [], reads: [] }) + .argument('4', '0') + .argument('4', '3') + .call('4', '|>', [argumentInCall('0'), argumentInCall('3')], { returns: [], reads: [BuiltIn] }), + { minRVersion: MIN_VERSION_PIPE } + ) + assertDataflow(label('Nested calling', ['built-in-pipe-and-pipe-bind', 'call-normal', 'built-in-pipe-and-pipe-bind', 'name-normal']), + shell, 'x |> f() |> g()', emptyGraph() + .use('0', 'x') + .argument('3', '0') + .call('3', 'f', [argumentInCall('0')], { returns: [], reads: [] }) + .argument('4', '0') + .argument('4', '3') + .call('4', '|>', [argumentInCall('0'), argumentInCall('3')], { returns: [], reads: [BuiltIn] }) + .argument('7', '4') + .call('7', 'g', [argumentInCall('4')], { returns: [], reads: [] }) + .argument('8', '4') + .argument('8', '7') + .call('8', '|>', [argumentInCall('4'), argumentInCall('7')], { returns: [], reads: [BuiltIn] }), + { minRVersion: MIN_VERSION_PIPE } + ) + assertDataflow(label('Multi-Parameter function', ['built-in-pipe-and-pipe-bind', 'call-normal', 'built-in-pipe-and-pipe-bind', 'name-normal', 'unnamed-arguments']), + shell, 'x |> f(y,z)', emptyGraph() + .use('0', 'x') + .use('3', 'y') + .use('5', 'z') + .argument('7', '0') + .argument('7', '3') + .argument('7', '5') + .call('7', 'f', [argumentInCall('0'), argumentInCall('3'), argumentInCall('5')], { returns: [], reads: [] }) + .argument('8', '0') + .argument('8', '7') + .call('8', '|>', [argumentInCall('0'), argumentInCall('7')], { returns: [], reads: [BuiltIn] }), + { minRVersion: MIN_VERSION_PIPE } + ) + }) + }) + + + describe('if-then-else', () => { + // spacing issues etc. are dealt with within the parser; however, braces are not allowed to introduce scoping artifacts + describe('if-then, no else', () => { + assertDataflow(label('completely constant', ['if', 'logical', 'numbers']), + shell, 'if (TRUE) 1', + emptyGraph() + .call('3', 'if', [argumentInCall('0'), argumentInCall('1'), EmptyArgument], { returns: ['1'], reads: ['0', BuiltIn], onlyBuiltIn: true }) + .constant('0') + .constant('1') + ) + assertDataflow(label('compare condition', ['if', 'logical', 'numbers', ...OperatorDatabase['>'].capabilities]), + shell, 'if (x > 5) 1', emptyGraph() + .use('0', 'x') + .call('2', '>', [argumentInCall('0'), argumentInCall('1')], { returns: [], reads: [BuiltIn] }) + .reads('2', ['0', '1']) + .call('5', 'if', [argumentInCall('2'), argumentInCall('3'), EmptyArgument], { returns: ['3'], reads: ['2', BuiltIn], onlyBuiltIn: true }) + .constant('1') + .constant('3', { controlDependency: ['5'] }) + ) + assertDataflow(label('compare cond. symbol in then', ['if', 'logical', 'numbers', 'name-normal', ...OperatorDatabase['>'].capabilities]), + shell, 'if (x > 5) y', emptyGraph() + .use('0', 'x') + .use('3', 'y', { controlDependencies: ['5'] }) + .argument('2', '0') + .call('2', '>', [argumentInCall('0'), argumentInCall('1')], { returns: [], reads: [BuiltIn] }) + .reads('2', ['0', '1']) + .call('5', 'if', [argumentInCall('2'), argumentInCall('3'), EmptyArgument], { returns: ['3'], reads: ['2', BuiltIn], onlyBuiltIn: true }) + .constant('1') + ) + assertDataflow(label('all variables', ['if', 'logical', 'name-normal', ...OperatorDatabase['>'].capabilities]), + shell, 'if (x > y) z', emptyGraph() + .use('0', 'x') + .use('1', 'y') + .use('3', 'z', { controlDependencies: ['5'] }) + .call('2', '>', [argumentInCall('0'), argumentInCall('1')], { returns: [], reads: [BuiltIn] }) + .reads('2', ['0', '1']) + .call('5', 'if', [argumentInCall('2'), argumentInCall('3'), EmptyArgument], { returns: ['3'], reads: ['2', BuiltIn], onlyBuiltIn: true }) + ) + assertDataflow(label('all variables, some same', ['if', 'logical', 'name-normal', ...OperatorDatabase['>'].capabilities]), + shell, 'if (x > y) x', emptyGraph() + .use('0', 'x') + .sameRead('0', '3') + .use('1', 'y') + .use('3', 'x', { controlDependencies: ['5'] }) + .call('2', '>', [argumentInCall('0'), argumentInCall('1')], { returns: [], reads: [BuiltIn] }) + .reads('2', ['0', '1']) + .call('5', 'if', [argumentInCall('2'), argumentInCall('3'), EmptyArgument], { returns: ['3'], reads: ['2', BuiltIn], onlyBuiltIn: true }) + ) + assertDataflow(label('all same variables', ['if', 'logical', 'name-normal', ...OperatorDatabase['>'].capabilities]), + shell, 'if (x > x) x', emptyGraph() + .use('0', 'x') + // theoretically, they just have to be connected, so 0 is just hardcoded + .sameRead('0', ['1', '3']) + .use('1', 'x') + .use('3', 'x', { controlDependencies: ['5'] }) + .call('2', '>', [argumentInCall('0'), argumentInCall('1')], { returns: [], reads: [BuiltIn] }) + .reads('2', ['0', '1']) + .call('5', 'if', [argumentInCall('2'), argumentInCall('3'), EmptyArgument], { returns: ['3'], reads: ['2', BuiltIn], onlyBuiltIn: true }) + ) + assertDataflow(label('definition in if', ['if', 'logical', 'name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities]), + shell, 'if (x <- 3) x', emptyGraph() + .use('3', 'x', { controlDependencies: ['5'] }) + .reads('3', '0') + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .call('5', 'if', [argumentInCall('2'), argumentInCall('3'), EmptyArgument], { returns: ['3'], reads: ['2', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '0', '2') }) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + ) + }) + + describe('if-then, with else', () => { + assertDataflow(label('completely constant', ['if', 'logical', 'numbers', 'grouping']), + shell, 'if (TRUE) { 1 } else { 2 }', + emptyGraph() + .call('4', '{', [argumentInCall('3')], { returns: ['3'], reads: [BuiltIn] }) + .argument('4', '3') + .call('9', 'if', [argumentInCall('0'), argumentInCall('4'), EmptyArgument], { returns: ['4'], reads: [BuiltIn] }) + .argument('9', ['4', '0']) + .reads('9', '0') + .constant('0') + .constant('3') + ) + assertDataflow(label('compare cond.', ['if', 'logical', 'numbers', 'name-normal', 'grouping', ...OperatorDatabase['>'].capabilities]), + shell, 'if (x > 5) { 1 } else { 42 }', + emptyGraph() + .use('0', 'x') + .call('2', '>', [argumentInCall('0'), argumentInCall('1')], { returns: [], reads: [BuiltIn] }) + .reads('2', ['0', '1']) + .call('6', '{', [argumentInCall('5')], { returns: ['5'], reads: [BuiltIn], controlDependency: ['11'] }) + .sameRead('6', '10') + .call('10', '{', [argumentInCall('9')], { returns: ['9'], reads: [BuiltIn], controlDependency: ['11'] }) + .call('11', 'if', [argumentInCall('2'), argumentInCall('6'), argumentInCall('10')], { returns: ['6', '10'], reads: [BuiltIn] }) + .reads('11', '2') + .constant('1') + .constant('5', { controlDependency: ['11'] }) + .constant('9', { controlDependency: ['11'] }) + ) + assertDataflow(label('compare cond. symbol in then', ['if', 'logical', 'numbers', 'name-normal', 'grouping', ...OperatorDatabase['>'].capabilities]), + shell, 'if (x > 5) { y } else { 42 }', + emptyGraph() + .use('0', 'x') + .use('5', 'y', { controlDependencies: ['11'] }) + .call('2', '>', [argumentInCall('0'), argumentInCall('1')], { returns: [], reads: [BuiltIn] }) + .reads('2', ['0', '1']) + .argument('2', ['1']) + .call('6', '{', [argumentInCall('5')], { returns: ['5'], reads: [BuiltIn], controlDependency: ['11'] }) + .argument('6', '5') + .sameRead('6', '10') + .call('10', '{', [argumentInCall('9')], { returns: ['9'], reads: [BuiltIn], controlDependency: ['11'] }) + .argument('10', '9') + .call('11', 'if', [argumentInCall('2'), argumentInCall('6'), argumentInCall('10')], { returns: ['6', '10'], reads: [BuiltIn] }) + .argument('11', ['6', '10', '2']) + .reads('11', '2') + .constant('1') + .constant('9', { controlDependency: ['11'] }) + ) + assertDataflow(label('compare cond. symbol in then & else', ['if', 'logical', 'numbers', 'name-normal', 'grouping', ...OperatorDatabase['>'].capabilities]), + shell, 'if (x > 5) { y } else { z }', emptyGraph() + .use('0', 'x') + .use('5', 'y', { controlDependencies: ['11'] }) + .use('9', 'z', { controlDependencies: ['11'] }) + .call('2', '>', [argumentInCall('0'), argumentInCall('1')], { returns: [], reads: [BuiltIn] }) + .reads('2', ['0', '1']) + .call('6', '{', [argumentInCall('5')], { returns: ['5'], reads: [BuiltIn], controlDependency: ['11'] }) + .sameRead('6', '10') + .call('10', '{', [argumentInCall('9')], { returns: ['9'], reads: [BuiltIn], controlDependency: ['11'] }) + .call('11', 'if', [argumentInCall('2'), argumentInCall('6'), argumentInCall('10')], { returns: ['6', '10'], reads: ['2', BuiltIn], onlyBuiltIn: true }) + .constant('1') + ) + assertDataflow(label('all variables', ['if', 'logical', 'name-normal', 'grouping', ...OperatorDatabase['>'].capabilities]), + shell, 'if (x > y) { z } else { a }', emptyGraph() + .use('0', 'x') + .use('1', 'y') + .use('5', 'z', { controlDependencies: ['11'] }) + .use('9', 'a', { controlDependencies: ['11'] }) + .call('2', '>', [argumentInCall('0'), argumentInCall('1')], { returns: [], reads: [BuiltIn] }) + .reads('2', ['0', '1']) + .call('6', '{', [argumentInCall('5')], { returns: ['5'], reads: [BuiltIn], controlDependency: ['11'] }) + .sameRead('6', '10') + .call('10', '{', [argumentInCall('9')], { returns: ['9'], reads: [BuiltIn], controlDependency: ['11'] }) + .call('11', 'if', [argumentInCall('2'), argumentInCall('6'), argumentInCall('10')], { returns: ['6', '10'], reads: ['2', BuiltIn], onlyBuiltIn: true }) + ) + assertDataflow(label('all variables, some same', ['if', 'logical', 'name-normal', 'grouping', ...OperatorDatabase['>'].capabilities]), + shell, 'if (y > x) { x } else { y }', emptyGraph() + .use('0', 'y') + .sameRead('0', '9') + .use('1', 'x') + .sameRead('1', '5') + .use('5', 'x', { controlDependencies: ['11'] }) + .use('9', 'y', { controlDependencies: ['11'] }) + .call('2', '>', [argumentInCall('0'), argumentInCall('1')], { returns: [], reads: [BuiltIn] }) + .reads('2', ['0', '1']) + .call('6', '{', [argumentInCall('5')], { returns: ['5'], reads: [BuiltIn], controlDependency: ['11'] }) + .sameRead('6', '10') + .call('10', '{', [argumentInCall('9')], { returns: ['9'], reads: [BuiltIn], controlDependency: ['11'] }) + .call('11', 'if', [argumentInCall('2'), argumentInCall('6'), argumentInCall('10')], { returns: ['6', '10'], reads: ['2', BuiltIn], onlyBuiltIn: true }) + ) + assertDataflow(label('all same variables', ['if', 'logical', 'name-normal', 'grouping', ...OperatorDatabase['>'].capabilities]), + shell, 'if (x > x) { x } else { x }', emptyGraph() + .use('0', 'x') + .sameRead('0', ['1', '5', '9']) + .use('1', 'x') + .use('5', 'x', { controlDependencies: ['11'] }) + .use('9', 'x', { controlDependencies: ['11'] }) + .call('2', '>', [argumentInCall('0'), argumentInCall('1')], { returns: [], reads: [BuiltIn] }) + .reads('2', ['0', '1']) + .call('6', '{', [argumentInCall('5')], { returns: ['5'], reads: [BuiltIn], controlDependency: ['11'] }) + .sameRead('6', '10') + .call('10', '{', [argumentInCall('9')], { returns: ['9'], reads: [BuiltIn], controlDependency: ['11'] }) + .call('11', 'if', [argumentInCall('2'), argumentInCall('6'), argumentInCall('10')], { returns: ['6', '10'], reads: ['2', BuiltIn], onlyBuiltIn: true }) + ) + }) + }) + describe('Inline Non-Strict Boolean Operations', () => { + assertDataflow(label('rhs has to depend on x', ['name-normal', 'logical', 'numbers', 'semicolons', ...OperatorDatabase['&&'].capabilities, ...OperatorDatabase['<-'].capabilities]), + shell, 'y <- 15; x && (y <- 13); y', + emptyGraph() + .use('3', 'x') + .use('11', 'y') + .reads('11', ['0', '6']) + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .argument('2', ['1', '0']) + .call('8', '<-', [argumentInCall('6'), argumentInCall('7')], { returns: ['6'], reads: [BuiltIn], controlDependency: ['10'], environment: defaultEnv().defineVariable('y', '0', '2') }) + .argument('8', ['7', '6']) + .call('9', '(', [argumentInCall('8')], { returns: ['8'], reads: [BuiltIn], controlDependency: ['10'], environment: defaultEnv().defineVariable('y', '0', '2').defineVariable('y', '6', '8', ['10']) }) + .call('10', '&&', [argumentInCall('3'), argumentInCall('9')], { returns: [], reads: [BuiltIn], environment: defaultEnv().defineVariable('y', '0', '2').defineVariable('y', '6', '8', ['10']) }) + .reads('10', '3') + .argument('10', ['3', '9']) + .constant('1') + .defineVariable('0', 'y', { definedBy: ['1', '2'] }) + .constant('7', { controlDependency: ['10'] }) + .defineVariable('6', 'y', { definedBy: ['7', '8'], controlDependency: ['10'] }) + ) + }) + + describe('Loops', () => { + describe('For', () => { + assertDataflow(label('simple constant for-loop', ['for-loop', 'numbers', 'name-normal', 'built-in-sequencing', 'grouping']), + shell, 'for(i in 1:10) { 1 }', emptyGraph() + .call('3', ':', [argumentInCall('1'), argumentInCall('2')], { returns: [], reads: ['1', '2', BuiltIn], onlyBuiltIn: true }) + .call('7', '{', [argumentInCall('6')], { returns: ['6'], reads: [BuiltIn], controlDependency: [] }) + .call('8', 'for', [argumentInCall('0'), argumentInCall('3'), argumentInCall('7')], { returns: [], reads: ['0', '3', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('i', '0', '8') }) + .nse('8', '7') + .defineVariable('0', 'i', { definedBy: ['3'] }) + .constant('1') + .constant('2') + .constant('6', { controlDependency: [] }) + ) + assertDataflow(label('using loop variable in body', ['for-loop', 'numbers', 'name-normal', 'built-in-sequencing', 'grouping']), + shell, 'for(i in 1:10) { i }', emptyGraph() + .use('6', 'i', { controlDependencies: [] }) + .reads('6', '0') + .call('3', ':', [argumentInCall('1'), argumentInCall('2')], { returns: [], reads: ['1', '2', BuiltIn], onlyBuiltIn: true }) + .call('7', '{', [argumentInCall('6')], { returns: ['6'], reads: [BuiltIn], controlDependency: [] }) + .call('8', 'for', [argumentInCall('0'), argumentInCall('3'), argumentInCall('7')], { returns: [], reads: ['0', '3', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('i', '0', '8') }) + .nse('8', '7') + .defineVariable('0', 'i', { definedBy: ['3'] }) + .constant('1') + .constant('2') + ) + }) + + describe('Repeat', () => { + assertDataflow(label('simple constant repeat', ['repeat-loop', 'numbers']), + shell, 'repeat 2', emptyGraph() + .call('2', 'repeat', [argumentInCall('0')], { returns: [], reads: [BuiltIn] }) + .nse('2', '0') + .constant('0') + ) + assertDataflow(label('using loop variable in body', ['repeat-loop', 'name-normal']), + shell, 'repeat x', emptyGraph() + .use('0', 'x') + .call('2', 'repeat', [argumentInCall('0')], { returns: [], reads: [BuiltIn] }) + .nse('2', '0') + ) + assertDataflow(label('using loop variable in body', ['repeat-loop', 'name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities, 'grouping']), + shell, 'repeat { x <- 1 }', emptyGraph() + .call('4', '<-', [argumentInCall('2'), argumentInCall('3')], { returns: ['2'], reads: [BuiltIn] }) + .call('5', '{', [argumentInCall('4')], { returns: ['4'], reads: [BuiltIn] }) + .call('6', 'repeat', [argumentInCall('5')], { returns: [], reads: [BuiltIn] }) + .nse('6', '5') + .constant('3') + .defineVariable('2', 'x', { definedBy: ['3', '4'] }) + ) + assertDataflow(label('using variable in body', ['repeat-loop', 'name-normal', ...OperatorDatabase['<-'].capabilities, 'grouping']), + shell, 'repeat { x <- y }', emptyGraph() + .use('3', 'y') + .call('4', '<-', [argumentInCall('2'), argumentInCall('3')], { returns: ['2'], reads: [BuiltIn] }) + .call('5', '{', [argumentInCall('4')], { returns: ['4'], reads: [BuiltIn] }) + .call('6', 'repeat', [argumentInCall('5')], { returns: [], reads: [BuiltIn] }) + .nse('6', '5') + .defineVariable('2', 'x', { definedBy: ['3', '4'] }) + ) + }) + }) +})) diff --git a/test/functionality/dataflow/processing-of-elements/expression-lists/dataflow-if-then-tests.ts b/test/functionality/dataflow/processing-of-elements/expression-lists/dataflow-if-then-tests.ts new file mode 100644 index 0000000000..d1e42ad04d --- /dev/null +++ b/test/functionality/dataflow/processing-of-elements/expression-lists/dataflow-if-then-tests.ts @@ -0,0 +1,220 @@ +import { assertDataflow, withShell } from '../../../_helper/shell' +import { emptyGraph } from '../../../_helper/dataflow/dataflowgraph-builder' +import { argumentInCall, defaultEnv } from '../../../_helper/dataflow/environment-builder' +import { EmptyArgument, OperatorDatabase } from '../../../../../src' +import { BuiltIn } from '../../../../../src/dataflow' +import { label } from '../../../_helper/label' + +describe('Lists with if-then constructs', withShell(shell => { + for(const assign of ['<-', '<<-', '=']) { + describe(`using ${assign}`, () => { + describe('reads within if', () => { + for(const b of [ + { label: 'without else', text: '' }, + { label: 'with else', text: ' else { 1 }' }, + ]) { + describe(`${b.label}`, () => { + const cd = b.text === '' ? ['8'] : ['12'] + const baseGraph = emptyGraph() + .use('3', 'x') + .reads('3', '0') + .call('2', assign, [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .call('7', '{', [argumentInCall('6')], { returns: ['6'], reads: [BuiltIn], controlDependency: cd, environment: defaultEnv().defineVariable('x', '0', '2') }) + if(b.text !== '') { + baseGraph.sameRead('7', '11') + .call('11', '{', [argumentInCall('10')], { returns: ['10'], reads: [BuiltIn], controlDependency: ['12'], environment: defaultEnv().defineVariable('x', '0', '2') }) + .call('12', 'if', [argumentInCall('3'), argumentInCall('7'), argumentInCall('11')], { returns: ['7', '11'], reads: ['3', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '0', '2') }) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .constant('6', { controlDependency: ['12'] }) + .constant('10', { controlDependency: ['12'] }) + } else { + baseGraph.call('8', 'if', [argumentInCall('3'), argumentInCall('7'), EmptyArgument], { returns: ['7'], reads: ['3', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '0', '2') }) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .constant('6', { controlDependency: ['8'] }) + } + assertDataflow(label('read previous def in cond', [...OperatorDatabase[assign].capabilities, 'name-normal', 'numbers', 'newlines', 'if']), + shell, + `x ${assign} 2\nif(x) { 1 } ${b.text}`, + baseGraph + ) + const previousGraph = emptyGraph() + .use('6', 'x') + .reads('6', '0') + .call('2', assign, [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .call('7', '{', [argumentInCall('6')], { returns: ['6'], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '0', '2') }) + .call(cd[0], 'if', [argumentInCall('3'), argumentInCall('7'), EmptyArgument], { returns: ['7'], reads: ['3', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '0', '2') }) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .constant('3') + // otherwise will be pruned by TRUE + assertDataflow(label('read previous def in then', [...OperatorDatabase[assign].capabilities, 'name-normal', 'numbers', 'newlines', 'if', 'logical']), + shell, + `x ${assign} 2\nif(TRUE) { x } ${b.text}`, + previousGraph + ) + }) + } + assertDataflow(label('read previous def in else', [...OperatorDatabase[assign].capabilities, 'name-normal', 'numbers', 'newlines', 'if', 'logical']), + shell, + `x ${assign} 2\nif(FALSE) { 42 } else { x }`, emptyGraph() + .use('10', 'x') + .reads('10', '0') + .call('2', assign, [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .call('11', '{', [argumentInCall('10')], { returns: ['10'], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '0', '2') }) + .call('12', 'if', [argumentInCall('3'), EmptyArgument, argumentInCall('11')], { returns: ['11'], reads: ['3', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '0', '2') }) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .constant('3') + ) + }) + describe('write within if', () => { + assertDataflow(label('without else directly together', ['if', 'logical', 'name-normal', ...OperatorDatabase[assign].capabilities, 'numbers', 'newlines']), + shell, + `if(TRUE) { x ${assign} 2 }\nx`, emptyGraph() + .use('8', 'x') + .reads('8', '3') + .call('5', assign, [argumentInCall('3'), argumentInCall('4')], { returns: ['3'], reads: [BuiltIn] }) + .call('6', '{', [argumentInCall('5')], { returns: ['5'], reads: [BuiltIn] }) + .call('7', 'if', [argumentInCall('0'), argumentInCall('6'), EmptyArgument], { returns: ['6'], reads: ['0', BuiltIn], onlyBuiltIn: true }) + .constant('0') + .constant('4') + .defineVariable('3', 'x', { definedBy: ['4', '5'] }) + ) + assertDataflow(label('def in else read afterwards', ['if', 'logical', 'numbers', 'name-normal', ...OperatorDatabase[assign].capabilities, 'newlines']), + shell, + `if(FALSE) { 42 } else { x ${assign} 5 }\nx`, emptyGraph() + .use('12', 'x') + .reads('12', '7') + .call('9', assign, [argumentInCall('7'), argumentInCall('8')], { returns: ['7'], reads: [BuiltIn] }) + .call('10', '{', [argumentInCall('9')], { returns: ['9'], reads: [BuiltIn] }) + .call('11', 'if', [argumentInCall('0'), EmptyArgument, argumentInCall('10')], { returns: ['10'], reads: ['0', BuiltIn], onlyBuiltIn: true }) + .constant('0') + .constant('8') + .defineVariable('7', 'x', { definedBy: ['8', '9'] }) + ) + + assertDataflow(label('def in then and else read afterward', ['if', 'name-normal', ...OperatorDatabase[assign].capabilities, 'numbers', 'newlines']), + shell, + `if(z) { x ${assign} 7 } else { x ${assign} 5 }\nx`, emptyGraph() + .use('0', 'z') + .use('14', 'x') + .reads('14', ['3', '9']) + .call('5', assign, [argumentInCall('3'), argumentInCall('4')], { returns: ['3'], reads: [BuiltIn], controlDependency: ['13'] }) + .sameRead('5', '11') + .call('6', '{', [argumentInCall('5')], { returns: ['5'], reads: [BuiltIn], controlDependency: ['13'] }) + .sameRead('6', '12') + .call('11', assign, [argumentInCall('9'), argumentInCall('10')], { returns: ['9'], reads: [BuiltIn], controlDependency: ['13'] }) + .call('12', '{', [argumentInCall('11')], { returns: ['11'], reads: [BuiltIn], controlDependency: ['13'] }) + .call('13', 'if', [argumentInCall('0'), argumentInCall('6'), argumentInCall('12')], { returns: ['6', '12'], reads: ['0', BuiltIn], onlyBuiltIn: true }) + .constant('4') + .defineVariable('3', 'x', { definedBy: ['4', '5'], controlDependency: ['13'] }) + .constant('10') + .defineVariable('9', 'x', { definedBy: ['10', '11'], controlDependency: ['13'] }) + ) + }) + }) + } + describe('Branch Coverage', () => { + //All test related to branch coverage (testing the interaction between then end else block) + assertDataflow(label('assignment both branches in if', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'newlines', 'if']), shell, 'x <- 1\nif(r) { x <- 2 } else { x <- 3 }\n y <- x', emptyGraph() + .use('3', 'r') + .use('18', 'x') + .reads('18', ['6', '12']) + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .sameRead('2', ['8', '14', '19']) + .call('8', '<-', [argumentInCall('6'), argumentInCall('7')], { returns: ['6'], reads: [BuiltIn], controlDependency: ['16'], environment: defaultEnv().defineVariable('x', '0', '2') }) + .sameRead('8', '14') + .call('9', '{', [argumentInCall('8')], { returns: ['8'], reads: [BuiltIn], controlDependency: ['16'], environment: defaultEnv().defineVariable('x', '0', '2') }) + .sameRead('9', '15') + .call('14', '<-', [argumentInCall('12'), argumentInCall('13')], { returns: ['12'], reads: [BuiltIn], controlDependency: ['16'], environment: defaultEnv().defineVariable('x', '0', '2') }) + .call('15', '{', [argumentInCall('14')], { returns: ['14'], reads: [BuiltIn], controlDependency: ['16'], environment: defaultEnv().defineVariable('x', '0', '2') }) + .call('16', 'if', [argumentInCall('3'), argumentInCall('9'), argumentInCall('15')], { returns: ['9', '15'], reads: ['3', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '0', '2') }) + .call('19', '<-', [argumentInCall('17'), argumentInCall('18')], { returns: ['17'], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '6', '8', ['16']).defineVariable('x', '12', '14', ['16']) }) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .sameDef('0', ['6', '12']) + .constant('7') + .defineVariable('6', 'x', { definedBy: ['7', '8'], controlDependency: ['16'] }) + .constant('13') + .defineVariable('12', 'x', { definedBy: ['13', '14'], controlDependency: ['16'] }) + .defineVariable('17', 'y', { definedBy: ['18', '19'] }) + ) + + assertDataflow(label('assignment if one branch', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'newlines', 'if', 'numbers']), shell, 'x <- 1\nif(r) { x <- 2 } \n y <- x', emptyGraph() + .use('3', 'r') + .use('12', 'x') + .reads('12', ['6', '0']) + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .sameRead('2', ['8', '13']) + .call('8', '<-', [argumentInCall('6'), argumentInCall('7')], { returns: ['6'], reads: [BuiltIn], controlDependency: ['10'], environment: defaultEnv().defineVariable('x', '0', '2') }) + .call('9', '{', [argumentInCall('8')], { returns: ['8'], reads: [BuiltIn], controlDependency: ['10'], environment: defaultEnv().defineVariable('x', '0', '2') }) + .call('10', 'if', [argumentInCall('3'), argumentInCall('9'), EmptyArgument], { returns: ['9'], reads: ['3', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '0', '2', ['10']) }) + .call('13', '<-', [argumentInCall('11'), argumentInCall('12')], { returns: ['11'], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '6', '8', ['10']).defineVariable('x', '0', '2', ['10']) }) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .sameDef('0', '6') + .constant('7') + .defineVariable('6', 'x', { definedBy: ['7', '8'], controlDependency: ['10'] }) + .defineVariable('11', 'y', { definedBy: ['12', '13'] }) + ) + + assertDataflow(label('assignment if multiple variables with else', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'newlines', 'if']), + shell, + 'x <- 1 \n y <- 2 \n if(r){ x <- 3 \n y <- 4} else {x <- 5} \n w <- x \n z <- y', + emptyGraph() + .use('6', 'r') + .use('24', 'x') + .reads('24', ['9', '18']) + .use('27', 'y') + .reads('27', ['12', '3']) + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .sameRead('2', ['5', '11', '14', '20', '25', '28']) + .call('5', '<-', [argumentInCall('3'), argumentInCall('4')], { returns: ['3'], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '0', '2') }) + .call('11', '<-', [argumentInCall('9'), argumentInCall('10')], { returns: ['9'], reads: [BuiltIn], controlDependency: ['22'], environment: defaultEnv().defineVariable('x', '0', '2').defineVariable('y', '3', '5') }) + .sameRead('11', ['14', '20']) + .call('14', '<-', [argumentInCall('12'), argumentInCall('13')], { returns: ['12'], reads: [BuiltIn], controlDependency: ['22'], environment: defaultEnv().defineVariable('x', '9', '11').defineVariable('y', '3', '5') }) + .call('15', '{', [argumentInCall('11'), argumentInCall('14')], { returns: ['14'], reads: [BuiltIn], controlDependency: ['22'], environment: defaultEnv().defineVariable('x', '9', '11').defineVariable('y', '3', '5') }) + .sameRead('15', '21') + .call('20', '<-', [argumentInCall('18'), argumentInCall('19')], { returns: ['18'], reads: [BuiltIn], controlDependency: ['22'], environment: defaultEnv().defineVariable('x', '0', '2').defineVariable('y', '3', '5') }) + .call('21', '{', [argumentInCall('20')], { returns: ['20'], reads: [BuiltIn], controlDependency: ['22'], environment: defaultEnv().defineVariable('x', '0', '2').defineVariable('y', '3', '5') }) + .call('22', 'if', [argumentInCall('6'), argumentInCall('15'), argumentInCall('21')], { returns: ['15', '21'], reads: ['6', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '0', '2').defineVariable('y', '3', '5') }) + .call('25', '<-', [argumentInCall('23'), argumentInCall('24')], { returns: ['23'], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '9', '11', ['22']).defineVariable('x', '18', '20', ['22']).defineVariable('y', '12', '14', ['22']).defineVariable('y', '3', '5', ['22']) }) + .call('28', '<-', [argumentInCall('26'), argumentInCall('27')], { returns: ['26'], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '9', '11', ['22']).defineVariable('x', '18', '20', ['22']).defineVariable('y', '12', '14', ['22']).defineVariable('y', '3', '5', ['22']).defineVariable('w', '23', '25') }) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .sameDef('0', ['9', '18']) + .constant('4') + .defineVariable('3', 'y', { definedBy: ['4', '5'] }) + .sameDef('3', '12') + .constant('10') + .defineVariable('9', 'x', { definedBy: ['10', '11'], controlDependency: ['22'] }) + .constant('13') + .defineVariable('12', 'y', { definedBy: ['13', '14'], controlDependency: ['22'] }) + .constant('19') + .defineVariable('18', 'x', { definedBy: ['19', '20'], controlDependency: ['22'] }) + .defineVariable('23', 'w', { definedBy: ['24', '25'] }) + .defineVariable('26', 'z', { definedBy: ['27', '28'] }) + ) + assertDataflow(label('assignment in else block', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'newlines', 'if']), shell, 'x <- 1 \n if(r){} else{x <- 2} \n y <- x', emptyGraph() + .use('3', 'r') + .use('15', 'x') + .reads('15', ['0', '9']) + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .sameRead('2', ['11', '16']) + .call('6', '{', [], { returns: [], reads: [BuiltIn], controlDependency: ['13'], environment: defaultEnv().defineVariable('x', '0', '2') }) + .sameRead('6', '12') + .call('11', '<-', [argumentInCall('9'), argumentInCall('10')], { returns: ['9'], reads: [BuiltIn], controlDependency: ['13'], environment: defaultEnv().defineVariable('x', '0', '2') }) + .call('12', '{', [argumentInCall('11')], { returns: ['11'], reads: [BuiltIn], controlDependency: ['13'], environment: defaultEnv().defineVariable('x', '0', '2') }) + .call('13', 'if', [argumentInCall('3'), argumentInCall('6'), argumentInCall('12')], { returns: ['6', '12'], reads: ['3', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '0', '2', ['13']).defineVariable('x', '9', '11', ['13']) }) + .call('16', '<-', [argumentInCall('14'), argumentInCall('15')], { returns: ['14'], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '0', '2', ['13']).defineVariable('x', '9', '11', ['13']) }) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .sameDef('0', '9') + .constant('10') + .defineVariable('9', 'x', { definedBy: ['10', '11'], controlDependency: ['13'] }) + .defineVariable('14', 'y', { definedBy: ['15', '16'] }) + ) + }) +})) diff --git a/test/functionality/dataflow/processing-of-elements/expression-lists/dataflow-read-tests.ts b/test/functionality/dataflow/processing-of-elements/expression-lists/dataflow-read-tests.ts new file mode 100644 index 0000000000..f3b2c44c5d --- /dev/null +++ b/test/functionality/dataflow/processing-of-elements/expression-lists/dataflow-read-tests.ts @@ -0,0 +1,108 @@ +import { assertDataflow, withShell } from '../../../_helper/shell' +import { emptyGraph } from '../../../_helper/dataflow/dataflowgraph-builder' +import { argumentInCall, defaultEnv } from '../../../_helper/dataflow/environment-builder' +import { BuiltIn } from '../../../../../src/dataflow' +import { label } from '../../../_helper/label' +import { OperatorDatabase } from '../../../../../src' + +describe('Lists with variable references', withShell(shell => { + describe('read-read same variable', () => { + assertDataflow(label('directly together', ['name-normal', 'newlines']), shell, + 'x\nx', emptyGraph() + .use('0', 'x') + .use('1', 'x') + .sameRead('0', '1') + ) + + assertDataflow(label('multiple occurrences of same variable', ['name-normal', 'newlines']), shell, + 'x\nx\nx', emptyGraph() + .use('0', 'x') + .use('1', 'x') + .use('2', 'x') + .sameRead('0', '1') + .sameRead('0', '2') + ) + }) + describe('def-def same variable', () => { + assertDataflow(label('directly together', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'newlines']), shell, + 'x <- 1\nx <- 2', emptyGraph() + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .sameRead('2', '5') + .call('5', '<-', [argumentInCall('3'), argumentInCall('4')], { returns: ['3'], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '0', '2') }) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .sameDef('0', '3') + .constant('4') + .defineVariable('3', 'x', { definedBy: ['4', '5'] }) + ) + + assertDataflow(label('multiple occurrences of same variable', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'newlines']), shell, + 'x <- 1\nx <- 3\n3\nx <- 9', emptyGraph() + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .sameRead('2', ['5', '9']) + .call('5', '<-', [argumentInCall('3'), argumentInCall('4')], { returns: ['3'], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '0', '2') }) + .call('9', '<-', [argumentInCall('7'), argumentInCall('8')], { returns: ['7'], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '3', '5') }) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .sameDef('0', '3') + .constant('4') + .defineVariable('3', 'x', { definedBy: ['4', '5'] }) + .sameDef('3', '7') + .constant('6') + .constant('8') + .defineVariable('7', 'x', { definedBy: ['8', '9'] }) + ) + }) + describe('def followed by read', () => { + assertDataflow(label('directly together', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'newlines']), shell, + 'x <- 1\nx', emptyGraph() + .use('3', 'x') + .reads('3', '0') + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + ) + assertDataflow(label('redefinition links correctly', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'semicolons']), shell, + 'x <- 2; x <- 3; x', + emptyGraph() + .use('6', 'x') + .reads('6', '3') + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .sameRead('2', '5') + .call('5', '<-', [argumentInCall('3'), argumentInCall('4')], { returns: ['3'], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '0', '2') }) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .sameDef('0', '3') + .constant('4') + .defineVariable('3', 'x', { definedBy: ['4', '5'] }) + ) + assertDataflow(label('multiple redefinition with circular definition', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'semicolons']), shell, + 'x <- 2; x <- x; x', + emptyGraph() + .use('4', 'x') + .reads('4', '0') + .use('6', 'x') + .reads('6', '3') + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .sameRead('2', '5') + .call('5', '<-', [argumentInCall('3'), argumentInCall('4')], { returns: ['3'], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '0', '2') }) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .sameDef('0', '3') + .defineVariable('3', 'x', { definedBy: ['4', '5'] }) + ) + assertDataflow(label('duplicate circular definition', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'semicolons']), shell, + 'x <- x; x <- x;', + emptyGraph() + .use('1', 'x') + .use('4', 'x') + .reads('4', '0') + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .sameRead('2', '5') + .call('5', '<-', [argumentInCall('3'), argumentInCall('4')], { returns: ['3'], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '0', '2') }) + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .sameDef('0', '3') + .defineVariable('3', 'x', { definedBy: ['4', '5'] }) + ) + }) +})) diff --git a/test/functionality/dataflow/processing-of-elements/expression-lists/if-then-tests.ts b/test/functionality/dataflow/processing-of-elements/expression-lists/if-then-tests.ts deleted file mode 100644 index dae98c02d5..0000000000 --- a/test/functionality/dataflow/processing-of-elements/expression-lists/if-then-tests.ts +++ /dev/null @@ -1,83 +0,0 @@ -import { assertDataflow, withShell } from '../../../_helper/shell' -import { GlobalScope, LocalScope } from '../../../../../src/dataflow/environments/scopes' -import { emptyGraph } from '../../../_helper/dataflowgraph-builder' -import { defaultEnvironment } from '../../../_helper/environment-builder' - -describe('Lists with if-then constructs', withShell(shell => { - for(const assign of [ '<-', '<<-', '=']) { - const scope = assign === '<<-' ? GlobalScope : LocalScope - describe(`using ${assign}`, () => { - describe('reads within if', () => { - for(const b of [ - { label: 'without else', text: '' }, - { label: 'with else', text: ' else { 1 }' }, - ]) { - describe(`${b.label}`, () => { - assertDataflow('read previous def in cond', - shell, - `x ${assign} 2\nif(x) { 1 } ${b.text}`, - emptyGraph() - .defineVariable('0', 'x', scope) - .use('3', 'x', { environment: defaultEnvironment().defineVariable('x', '0', '2', scope) }) - .reads('3', '0') - ) - assertDataflow('read previous def in then', - shell, - `x ${assign} 2\nif(TRUE) { x } ${b.text}`, - emptyGraph() - .defineVariable('0', 'x', scope) - .use('4', 'x', { environment: defaultEnvironment().defineVariable('x', '0', '2', scope) }) - .reads('4', '0') - ) - }) - } - assertDataflow('read previous def in else', - shell, - `x ${assign} 2\nif(FALSE) { 42 } else { x }`, - emptyGraph() - .defineVariable('0', 'x', scope) - .use('6', 'x', { environment: defaultEnvironment().defineVariable('x', '0', '2', scope) }) - .reads('6', '0') - ) - }) - describe('write within if', () => { - for(const b of [ - { label: 'without else', text: '' }, - { label: 'with else', text: ' else { 1 }' }, - ]) { - assertDataflow(`${b.label} directly together`, - shell, - `if(TRUE) { x ${assign} 2 }\nx`, - emptyGraph() - .defineVariable('1', 'x', scope) - .use('6', 'x', { environment: defaultEnvironment().defineVariable('x', '1', '3', scope) }) - .reads('6', '1') - ) - } - assertDataflow('def in else read afterwards', - shell, - `if(FALSE) { 42 } else { x ${assign} 5 }\nx`, - emptyGraph() - .defineVariable('3', 'x', scope) - .use('8', 'x', { environment: defaultEnvironment().defineVariable('x', '3', '5', scope) }) - .reads('8', '3') - ) - - const whenEnvironment = defaultEnvironment().defineVariable('x', '1', '3', scope, 'maybe') - const otherwiseEnvironment = defaultEnvironment().defineVariable('x', '5', '7', scope, 'maybe') - - assertDataflow('def in then and else read afterward', - shell, - `if(z) { x ${assign} 7 } else { x ${assign} 5 }\nx`, - emptyGraph() - .use('0', 'z', { scope }) - .defineVariable('1', 'x', scope, { when: 'maybe' }) - .defineVariable('5', 'x', scope, { when: 'maybe' }) - .use('10', 'x', { environment: whenEnvironment.appendWritesOf(otherwiseEnvironment) }) - .reads('10', '1', 'maybe') - .reads('10', '5', 'maybe') - ) - }) - }) - } -})) diff --git a/test/functionality/dataflow/processing-of-elements/expression-lists/read-tests.ts b/test/functionality/dataflow/processing-of-elements/expression-lists/read-tests.ts deleted file mode 100644 index 189f0bbaf4..0000000000 --- a/test/functionality/dataflow/processing-of-elements/expression-lists/read-tests.ts +++ /dev/null @@ -1,134 +0,0 @@ -import type { NodeId } from '../../../../../src/r-bridge' -import { assertDataflow, withShell } from '../../../_helper/shell' -import { LocalScope } from '../../../../../src/dataflow/environments/scopes' -import { emptyGraph } from '../../../_helper/dataflowgraph-builder' -import { defaultEnvironment } from '../../../_helper/environment-builder' - -describe('Lists with variable references', withShell(shell => { - describe('read-read same variable', () => { - const sameGraph = (id1: NodeId, id2: NodeId) => - emptyGraph() - .use(id1, 'x') - .use(id2, 'x') - .sameRead(id1, id2) - assertDataflow('directly together', shell, - 'x\nx', - sameGraph('0', '1') - ) - assertDataflow('surrounded by uninteresting elements', shell, - '3\nx\n1\nx\n2', - sameGraph('1', '3') - ) - assertDataflow('using braces', shell, - '{ x }\n{{ x }}', - sameGraph('0', '1') - ) - assertDataflow('using braces and uninteresting elements', shell, - '{ x + 2 }; 4 - { x }', - sameGraph('0', '4') - ) - - assertDataflow('multiple occurrences of same variable', shell, - 'x\nx\n3\nx', - emptyGraph() - .use('0', 'x') - .use('1', 'x') - .use('3', 'x') - .sameRead('0', '1') - .sameRead('0', '3') - ) - }) - describe('def-def same variable', () => { - const sameGraph = (id1: NodeId, id2: NodeId, definedAt: NodeId) => - emptyGraph() - .defineVariable(id1, 'x') - .defineVariable(id2, 'x', LocalScope, { environment: defaultEnvironment().defineVariable('x', id1, definedAt) }) - .sameDef(id1, id2) - assertDataflow('directly together', shell, - 'x <- 1\nx <- 2', - sameGraph('0', '3', '2') - ) - assertDataflow('directly together with mixed sides', shell, - '1 -> x\nx <- 2', - sameGraph('1', '3', '2') - ) - assertDataflow('surrounded by uninteresting elements', shell, - '3\nx <- 1\n1\nx <- 3\n2', - sameGraph('1', '5', '3') - ) - assertDataflow('using braces', shell, - '{ x <- 42 }\n{{ x <- 50 }}', - sameGraph('0', '3', '2') - ) - assertDataflow('using braces and uninteresting elements', shell, - '5; { x <- 2 }; 17; 4 -> x; 9', - sameGraph('1', '6', '3') - ) - - assertDataflow('multiple occurrences of same variable', shell, - 'x <- 1\nx <- 3\n3\nx <- 9', - emptyGraph() - .defineVariable('0', 'x') - .defineVariable('3', 'x', LocalScope, { environment: defaultEnvironment().defineVariable('x', '0', '2') }) - .defineVariable('7', 'x', LocalScope, { environment: defaultEnvironment().defineVariable('x', '3', '5') }) - .sameDef('0', '3') - .sameDef('3', '7') - ) - }) - describe('def followed by read', () => { - const sameGraph = (id1: NodeId, id2: NodeId, definedAt: NodeId) => - emptyGraph() - .defineVariable(id1, 'x') - .use(id2, 'x', { environment: defaultEnvironment().defineVariable('x', id1, definedAt) }) - .reads(id2, id1) - assertDataflow('directly together', shell, - 'x <- 1\nx', - sameGraph('0', '3', '2') - ) - assertDataflow('surrounded by uninteresting elements', shell, - '3\nx <- 1\n1\nx\n2', - sameGraph('1', '5', '3') - ) - assertDataflow('using braces', shell, - '{ x <- 1 }\n{{ x }}', - sameGraph('0', '3', '2') - ) - assertDataflow('using braces and uninteresting elements', shell, - '{ x <- 2 }; 5; x', - sameGraph('0', '4', '2') - ) - assertDataflow('redefinition links correctly', shell, - 'x <- 2; x <- 3; x', - emptyGraph() - .defineVariable('0', 'x') - .defineVariable('3', 'x', LocalScope, { environment: defaultEnvironment().defineVariable('x', '0', '2') }) - .use('6', 'x', { environment: defaultEnvironment().defineVariable('x', '3', '5') }) - .reads('6', '3') - .sameDef('0', '3') - ) - assertDataflow('multiple redefinition with circular definition', shell, - 'x <- 2; x <- x; x', - emptyGraph() - .defineVariable('0', 'x') - .defineVariable('3', 'x', LocalScope, { environment: defaultEnvironment().defineVariable('x', '0', '2') }) - .use('4', 'x' , { environment: defaultEnvironment().defineVariable('x', '0', '2') }) - .use('6', 'x', { environment: defaultEnvironment().defineVariable('x', '3', '5') }) - .reads('4', '0') - .definedBy('3', '4') - .sameDef('0', '3') - .reads('6', '3') - ) - assertDataflow('duplicate circular definition', shell, - 'x <- x; x <- x;', - emptyGraph() - .defineVariable('0', 'x') - .use('1', 'x') - .defineVariable('3', 'x', LocalScope, { environment: defaultEnvironment().defineVariable('x', '0', '2') }) - .use('4', 'x', { environment: defaultEnvironment().defineVariable('x', '0', '2') }) - .definedBy('0', '1') - .definedBy('3', '4') - .reads('4', '0') - .sameDef('0', '3') - ) - }) -})) diff --git a/test/functionality/dataflow/processing-of-elements/expression-lists/uninteresting-tests.ts b/test/functionality/dataflow/processing-of-elements/expression-lists/uninteresting-tests.ts deleted file mode 100644 index 317ca10285..0000000000 --- a/test/functionality/dataflow/processing-of-elements/expression-lists/uninteresting-tests.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { assertDataflow, withShell } from '../../../_helper/shell' -import { emptyGraph } from '../../../_helper/dataflowgraph-builder' - -describe('Lists without variable references ', withShell(shell => { - for(const b of ['1\n2\n3', '1;2;3', '{ 1 + 2 }\n{ 3 * 4 }']) { - assertDataflow(`${JSON.stringify(b)}`, shell, - b, - emptyGraph() - ) - } -})) diff --git a/test/functionality/dataflow/processing-of-elements/functions/dataflow-function-call-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/dataflow-function-call-tests.ts new file mode 100644 index 0000000000..a9f1bdaeeb --- /dev/null +++ b/test/functionality/dataflow/processing-of-elements/functions/dataflow-function-call-tests.ts @@ -0,0 +1,278 @@ +import { assertDataflow, withShell } from '../../../_helper/shell' +import { MIN_VERSION_LAMBDA } from '../../../../../src/r-bridge/lang-4.x/ast/model/versions' +import { emptyGraph } from '../../../_helper/dataflow/dataflowgraph-builder' +import { argumentInCall, defaultEnv } from '../../../_helper/dataflow/environment-builder' +import { + UnnamedFunctionCallPrefix +} from '../../../../../src/dataflow/internal/process/functions/call/unnamed-call-handling' +import { EmptyArgument, OperatorDatabase } from '../../../../../src' +import { BuiltIn } from '../../../../../src/dataflow' +import { label } from '../../../_helper/label' + +describe('Function Call', withShell(shell => { + describe('Calling previously defined functions', () => { + assertDataflow(label('Calling function a', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'semicolons', 'formals-named', 'implicit-return', 'unnamed-arguments']), shell, 'i <- 4; a <- function(x) { x }\na(i)', emptyGraph() + .use('8', 'x', undefined, false) + .reads('8', '4') + .use('13', 'i', undefined) + .reads('13', '0') + .definesOnCall('13', '4') + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .sameRead('2', '11') + .call('9', '{', [argumentInCall('8')], { returns: ['8'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineParameter('x', '4', '5') }, false) + .call('11', '<-', [argumentInCall('3'), argumentInCall('10')], { returns: ['3'], reads: [BuiltIn], environment: defaultEnv().defineVariable('i', '0', '2') }) + .call('15', 'a', [argumentInCall('13')], { returns: ['9'], reads: ['3'], environment: defaultEnv().defineVariable('i', '0', '2').defineFunction('a', '3', '11') }) + .calls('15', '10') + .constant('1') + .defineVariable('0', 'i', { definedBy: ['1', '2'] }) + .defineVariable('4', 'x', { definedBy: [] }, false) + .defineFunction('10', '10', ['9'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '9', + graph: new Set(['4', '8', '9']), + environment: defaultEnv().pushEnv().defineParameter('x', '4', '5') + }) + .defineVariable('3', 'a', { definedBy: ['10', '11'] }) + ) + + assertDataflow(label('Calling function a with an indirection', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'semicolons', 'formals-named', 'implicit-return', 'newlines', 'unnamed-arguments']), shell, 'i <- 4; a <- function(x) { x }\nb <- a\nb(i)', + emptyGraph() + .use('8', 'x', undefined, false) + .reads('8', '4') + .use('13', 'a', undefined) + .reads('13', '3') + .use('16', 'i', undefined) + .reads('16', '0') + .definesOnCall('16', '4') + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .sameRead('2', ['11', '14']) + .call('9', '{', [argumentInCall('8')], { returns: ['8'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineParameter('x', '4', '5') }, false) + .call('11', '<-', [argumentInCall('3'), argumentInCall('10')], { returns: ['3'], reads: [BuiltIn], environment: defaultEnv().defineVariable('i', '0', '2') }) + .call('14', '<-', [argumentInCall('12'), argumentInCall('13')], { returns: ['12'], reads: [BuiltIn], environment: defaultEnv().defineVariable('i', '0', '2').defineFunction('a', '3', '11') }) + .call('18', 'b', [argumentInCall('16')], { returns: ['9'], reads: ['12'], environment: defaultEnv().defineVariable('i', '0', '2').defineFunction('a', '3', '11').defineVariable('b', '12', '14') }) + .calls('18', '10') + .constant('1') + .defineVariable('0', 'i', { definedBy: ['1', '2'] }) + .defineVariable('4', 'x', { definedBy: [] }, false) + .defineFunction('10', '10', ['9'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '9', + graph: new Set(['4', '8', '9']), + environment: defaultEnv().pushEnv().defineParameter('x', '4', '5') + }) + .defineVariable('3', 'a', { definedBy: ['10', '11'] }) + .defineVariable('12', 'b', { definedBy: ['13', '14'] }) + ) + + assertDataflow(label('Calling with a constant function', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'newlines', 'formals-named', 'implicit-return', 'unnamed-arguments']), shell, `i <- 4 +a <- function(x) { x <- x; x <- 3; 1 } +a(i)`, emptyGraph() + .use('9', 'x', undefined, false) + .reads('9', '4') + .use('19', 'i', undefined) + .reads('19', '0') + .definesOnCall('19', '4') + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .sameRead('2', '17') + .call('10', '<-', [argumentInCall('8'), argumentInCall('9')], { returns: ['8'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineParameter('x', '4', '5') }, false) + .sameRead('10', '13') + .call('13', '<-', [argumentInCall('11'), argumentInCall('12')], { returns: ['11'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineVariable('x', '8', '10') }, false) + .call('15', '{', [argumentInCall('10'), argumentInCall('13'), argumentInCall('14')], { returns: ['14'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineVariable('x', '11', '13') }, false) + .call('17', '<-', [argumentInCall('3'), argumentInCall('16')], { returns: ['3'], reads: [BuiltIn], environment: defaultEnv().defineVariable('i', '0', '2') }) + .call('21', 'a', [argumentInCall('19')], { returns: ['15'], reads: ['3'], environment: defaultEnv().defineVariable('i', '0', '2').defineFunction('a', '3', '17') }) + .calls('21', '16') + .constant('1') + .defineVariable('0', 'i', { definedBy: ['1', '2'] }) + .defineVariable('4', 'x', { definedBy: [] }, false) + .sameDef('4', ['8', '11']) + .defineVariable('8', 'x', { definedBy: ['9', '10'] }, false) + .sameDef('8', '11') + .constant('12', undefined, false) + .defineVariable('11', 'x', { definedBy: ['12', '13'] }, false) + .constant('14', undefined, false) + .defineFunction('16', '16', ['15'], { + out: [], + in: [{ nodeId: '14', name: undefined, controlDependencies: [] }], + unknownReferences: [], + entryPoint: '15', + graph: new Set(['4', '9', '8', '10', '12', '11', '13', '14', '15']), + environment: defaultEnv().pushEnv().defineVariable('x', '11', '13') + }) + .defineVariable('3', 'a', { definedBy: ['16', '17'] }) + ) + }) + + describe('Directly calling a function', () => { + const outGraph = emptyGraph() + .use('6', 'x', undefined, false) + .reads('6', '2') + .call('8', '+', [argumentInCall('6'), argumentInCall('7')], { returns: [], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineParameter('x', '2', '3') }, false) + .reads('8', ['6', '7']) + .call('9', '{', [argumentInCall('8')], { returns: ['8'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineParameter('x', '2', '3') }, false) + .call('11', '(', [argumentInCall('10')], { returns: ['10'], reads: [BuiltIn] }) + .call('14', `${UnnamedFunctionCallPrefix}14`, [argumentInCall('12')], { returns: ['9'], reads: ['11'] }) + .calls('14', ['11', '10']) + .defineVariable('2', 'x', { definedBy: [] }, false) + .constant('7', undefined, false) + .defineFunction('10', '10', ['9'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '9', + graph: new Set(['2', '6', '7', '8', '9']), + environment: defaultEnv().pushEnv().defineParameter('x', '2', '3') + }) + .constant('12', undefined) + .definesOnCall('12', '2') + + assertDataflow(label('Calling with constant argument using lambda', ['lambda-syntax', 'implicit-return', 'binary-operator', 'infix-calls', 'call-anonymous', 'unnamed-arguments', 'numbers', ...OperatorDatabase['+'].capabilities]), shell, '(\\(x) { x + 1 })(2)', + outGraph, + { minRVersion: MIN_VERSION_LAMBDA } + ) + assertDataflow(label('Calling with constant argument', ['formals-named', 'implicit-return', 'binary-operator', 'infix-calls', 'call-anonymous', 'unnamed-arguments', 'numbers', ...OperatorDatabase['+'].capabilities]), shell, '(function(x) { x + 1 })(2)', + outGraph + ) + + assertDataflow(label('Calling a function which returns another', ['name-normal', 'normal-definition', 'implicit-return', 'call-normal', 'numbers']), shell, `a <- function() { function() { 42 } } +a()()`, emptyGraph() + .call('6', '{', [argumentInCall('5')], { returns: ['5'], reads: [BuiltIn], environment: defaultEnv().pushEnv().pushEnv() }, false) + .call('8', '{', [argumentInCall('7')], { returns: ['7'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .call('10', '<-', [argumentInCall('0'), argumentInCall('9')], { returns: ['0'], reads: [BuiltIn] }) + .call('12', 'a', [], { returns: ['8'], reads: ['0'], environment: defaultEnv().defineFunction('a', '0', '10') }) + .calls('12', '9') + .call('13', `${UnnamedFunctionCallPrefix}13`, [], { returns: ['6'], reads: ['12'], environment: defaultEnv().defineFunction('a', '0', '10') }) + .calls('13', ['12', '7']) + .constant('5', undefined, false) + .defineFunction('7', '7', ['6'], { + out: [], + in: [{ nodeId: '5', name: undefined, controlDependencies: [] }], + unknownReferences: [], + entryPoint: '6', + graph: new Set(['5', '6']), + environment: defaultEnv().pushEnv().pushEnv() + }, { environment: defaultEnv().pushEnv() }, false) + .defineFunction('9', '9', ['8'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '8', + graph: new Set(['7', '8']), + environment: defaultEnv().pushEnv() + }) + .defineVariable('0', 'a', { definedBy: ['9', '10'] }) + ) + }) + + describe('Argument which is expression', () => { + assertDataflow(label('Calling with 1 + x', ['unnamed-arguments', 'binary-operator', 'infix-calls', 'name-normal', 'numbers', ...OperatorDatabase['+'].capabilities]), shell, 'foo(1 + x)', emptyGraph() + .use('2', 'x') + .call('3', '+', [argumentInCall('1'), argumentInCall('2')], { returns: [], reads: [BuiltIn] }) + .reads('3', ['1', '2']) + .call('5', 'foo', [argumentInCall('3')], { returns: [], reads: [] }) + .constant('1') + ) + }) + + describe('Argument which is anonymous function call', () => { + assertDataflow(label('Calling with a constant function', ['call-anonymous', 'unnamed-arguments', 'implicit-return', 'numbers']), shell, 'f(function() { 3 })', emptyGraph() + .call('4', '{', [argumentInCall('3')], { returns: ['3'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .call('7', 'f', [argumentInCall('5')], { returns: [], reads: [] }) + .constant('3', undefined, false) + .defineFunction('5', '5', ['4'], { + out: [], + in: [{ nodeId: '3', name: undefined, controlDependencies: [] }], + unknownReferences: [], + entryPoint: '4', + graph: new Set(['3', '4']), + environment: defaultEnv().pushEnv() + }) + ) + }) + + describe('Multiple out refs in arguments', () => { + assertDataflow(label('Calling \'seq\'', ['function-calls', 'numbers', 'unnamed-arguments', 'named-arguments']), shell, 'seq(1, length(pkgnames), by = stepsize)', + emptyGraph() + .use('4', 'pkgnames') + .use('9', 'stepsize') + .use('10', 'by') + .reads('10', '9') + .call('6', 'length', [argumentInCall('4')], { returns: [], reads: [] }) + .call('11', 'seq', [argumentInCall('1'), argumentInCall('6'), argumentInCall('10', { name: 'by' } )], { returns: [], reads: [] }) + .argument('11', '10') + .constant('1') + ) + }) + + describe('Late function bindings', () => { + assertDataflow(label('Late binding of y', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'normal-definition', 'implicit-return', 'newlines', 'numbers', 'call-normal']), shell, 'a <- function() { y }\ny <- 12\na()', emptyGraph() + .use('3', 'y', undefined, false) + .call('4', '{', [argumentInCall('3')], { returns: ['3'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .call('6', '<-', [argumentInCall('0'), argumentInCall('5')], { returns: ['0'], reads: [BuiltIn] }) + .sameRead('6', '9') + .call('9', '<-', [argumentInCall('7'), argumentInCall('8')], { returns: ['7'], reads: [BuiltIn], environment: defaultEnv().defineFunction('a', '0', '6') }) + .call('11', 'a', [], { returns: ['4'], reads: ['0', '7'], environment: defaultEnv().defineFunction('a', '0', '6').defineVariable('y', '7', '9') }) + .calls('11', '5') + .defineFunction('5', '5', ['4'], { + out: [], + in: [{ nodeId: '3', name: 'y', controlDependencies: [] }], + unknownReferences: [], + entryPoint: '4', + graph: new Set(['3', '4']), + environment: defaultEnv().pushEnv() + }) + .defineVariable('0', 'a', { definedBy: ['5', '6'] }) + .constant('8') + .defineVariable('7', 'y', { definedBy: ['8', '9'] }) + ) + }) + + describe('Deal with empty calls', () => { + assertDataflow(label('Not giving first parameter', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'formals-named', 'formals-default', 'implicit-return', 'newlines', 'empty-arguments', 'unnamed-arguments']), shell, `a <- function(x=3,y) { y } +a(,3)`, emptyGraph() + .use('8', 'y', undefined, false) + .reads('8', '4') + .call('9', '{', [argumentInCall('8')], { returns: ['8'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineParameter('x', '1', '3').defineParameter('y', '4', '5') }, false) + .call('11', '<-', [argumentInCall('0'), argumentInCall('10')], { returns: ['0'], reads: [BuiltIn] }) + .call('15', 'a', [EmptyArgument, argumentInCall('13')], { returns: ['9'], reads: ['0'], environment: defaultEnv().defineFunction('a', '0', '11') }) + .calls('15', '10') + .defineVariable('1', 'x', { definedBy: ['2'] }, false) + .constant('2', undefined, false) + .defineVariable('4', 'y', { definedBy: [] }, false) + .defineFunction('10', '10', ['9'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '9', + graph: new Set(['1', '2', '4', '8', '9']), + environment: defaultEnv().pushEnv().defineParameter('x', '1', '3').defineParameter('y', '4', '5') + }) + .defineVariable('0', 'a', { definedBy: ['10', '11'] }) + .constant('13') + .definesOnCall('13', '4') + ) + }) + describe('Reuse parameters in call', () => { + assertDataflow(label('Not giving first argument', ['named-arguments', 'unnamed-arguments', 'numbers', 'name-normal']), shell, 'a(x=3, x)', emptyGraph() + .use('3', 'x') + .reads('3', '2') + .use('4', 'x') + .call('6', 'a', [argumentInCall('3', { name: 'x' } ), argumentInCall('4')], { returns: [], reads: [] }) + .argument('6', '3') + .constant('2') + ) + }) + describe('Define in parameters', () => { + assertDataflow(label('Support assignments in function calls', ['function-calls', 'side-effects-in-argument', 'name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'semicolons']), shell, 'foo(x <- 3); x', emptyGraph() + .use('6', 'x') + .reads('6', '1') + .call('3', '<-', [argumentInCall('1'), argumentInCall('2')], { returns: ['1'], reads: [BuiltIn] }) + .call('5', 'foo', [argumentInCall('3')], { returns: [], reads: [] }) + .constant('2') + .defineVariable('1', 'x', { definedBy: ['2', '3'] }) + ) + }) +})) diff --git a/test/functionality/dataflow/processing-of-elements/functions/dataflow-function-definition-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/dataflow-function-definition-tests.ts new file mode 100644 index 0000000000..d38d40e40b --- /dev/null +++ b/test/functionality/dataflow/processing-of-elements/functions/dataflow-function-definition-tests.ts @@ -0,0 +1,452 @@ +import { assertDataflow, withShell } from '../../../_helper/shell' +import { BuiltIn } from '../../../../../src/dataflow' +import { emptyGraph } from '../../../_helper/dataflow/dataflowgraph-builder' +import { argumentInCall, defaultEnv } from '../../../_helper/dataflow/environment-builder' +import { EmptyArgument, OperatorDatabase } from '../../../../../src' +import { label } from '../../../_helper/label' + +describe('Function Definition', withShell(shell => { + describe('Only functions', () => { + assertDataflow(label('unknown read in function', ['normal-definition', 'implicit-return', 'name-normal']), shell, 'function() { x }', emptyGraph() + .use('2', 'x', undefined, false) + .argument('3', '2') + .call('3', '{', [argumentInCall('2')], { returns: ['2'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .defineFunction('4', '4', ['3'], { + out: [], + in: [{ nodeId: '2', name: 'x', controlDependencies: [] }], + unknownReferences: [], + entryPoint: '3', + graph: new Set(['2', '3']), + environment: defaultEnv().pushEnv() + }) + ) + + assertDataflow(label('read of parameter', ['formals-named', 'implicit-return', 'name-normal']), shell, 'function(x) { x }', emptyGraph() + .use('4', 'x', undefined, false) + .reads('4', '0') + .argument('5', '4') + .call('5', '{', [argumentInCall('4')], { returns: ['4'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineParameter('x', '0', '1') }, false) + .defineVariable('0', 'x', { definedBy: [] }, false) + .defineFunction('6', '6', ['5'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '5', + graph: new Set(['0', '4', '5']), + environment: defaultEnv().pushEnv().defineParameter('x', '0', '1') + }) + ) + assertDataflow(label('read of parameter in return', ['formals-named', 'return', 'name-normal']), shell, 'function(x) { return(x) }', emptyGraph() + .use('5', 'x', undefined, false) + .reads('5', '0') + .argument('7', '5') + .call('7', 'return', [argumentInCall('5')], { returns: ['5'], reads: [BuiltIn], onlyBuiltIn: true, environment: defaultEnv().pushEnv().defineParameter('x', '0', '1') }, false) + .argument('8', '7') + .call('8', '{', [argumentInCall('7')], { returns: ['7'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineParameter('x', '0', '1') }, false) + .defineVariable('0', 'x', { definedBy: [] }, false) + .defineFunction('9', '9', ['8'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '8', + graph: new Set(['0', '5', '7', '8']), + environment: defaultEnv().pushEnv().defineParameter('x', '0', '1') + }) + ) + + describe('x', () => { + assertDataflow(label('return parameter named', ['formals-named', 'return', 'named-arguments']), shell, 'function(x) { return(x=x) }', emptyGraph() + .use('6', 'x', undefined, false) + .reads('6', '0') + .use('7', 'x', undefined, false) + .reads('7', '6') + .call('8', 'return', [argumentInCall('7', { name: 'x' } )], { returns: ['7'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineParameter('x', '0', '1') }, false) + .argument('8', '7') + .call('9', '{', [argumentInCall('8')], { returns: ['8'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineParameter('x', '0', '1') }, false) + .defineVariable('0', 'x', { definedBy: [] }, false) + .defineFunction('10', '10', ['9'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '9', + graph: new Set(['0', '6', '7', '8', '9']), + environment: defaultEnv().pushEnv().defineParameter('x', '0', '1') + }) + ) + }) + + const envWithoutParams = defaultEnv().pushEnv() + const envWithXParam = envWithoutParams.defineParameter('x', '0', '1') + const envWithXYParam = envWithXParam.defineParameter('y', '2', '3') + const envWithXYZParam = envWithXYParam.defineParameter('z', '4', '5') + + assertDataflow(label('read of one parameter', ['formals-named', 'implicit-return', 'name-normal']), shell, 'function(x,y,z) y', + emptyGraph() + .defineFunction('8', '8', ['6'], { + out: [], + unknownReferences: [], + in: [], + entryPoint: '8', + graph: new Set(['0', '2', '4', '6']), + environment: envWithXYZParam + }) + .defineVariable('0', 'x', { }, false) + .defineVariable('2', 'y', { }, false) + .defineVariable('4', 'z', { }, false) + .use('6', 'y', { }, false) + .reads('6', '2') + ) + }) + describe('Scoping of body', () => { + assertDataflow(label('previously defined read in function', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'semicolons', 'normal-definition', 'implicit-return']), shell, 'x <- 3; function() { x }', emptyGraph() + .use('5', 'x', undefined, false) + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .argument('2', ['1', '0']) + .argument('6', '5') + .call('6', '{', [argumentInCall('5')], { returns: ['5'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .defineFunction('7', '7', ['6'], { + out: [], + in: [{ nodeId: '5', name: 'x', controlDependencies: [] }], + unknownReferences: [], + entryPoint: '6', + graph: new Set(['5', '6']), + environment: defaultEnv().pushEnv() + }) + ) + assertDataflow(label('local define with <- in function, read after', ['normal-definition', 'semicolons', 'name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers']), shell, 'function() { x <- 3; }; x', emptyGraph() + .use('7', 'x') + .call('4', '<-', [argumentInCall('2'), argumentInCall('3')], { returns: ['2'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .call('5', '{', [argumentInCall('4')], { returns: ['4'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .constant('3', undefined, false) + .defineVariable('2', 'x', { definedBy: ['3', '4'] }, false) + .defineFunction('6', '6', ['5'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '5', + graph: new Set(['3', '2', '4', '5']), + environment: defaultEnv().pushEnv().defineVariable('x', '2', '4') + }) + ) + assertDataflow(label('local define with = in function, read after', ['normal-definition', ...OperatorDatabase['='].capabilities, 'semicolons', 'name-normal', 'numbers']), shell, 'function() { x = 3; }; x', emptyGraph() + .use('7', 'x') + .call('4', '=', [argumentInCall('2'), argumentInCall('3')], { returns: ['2'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .call('5', '{', [argumentInCall('4')], { returns: ['4'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .constant('3', undefined, false) + .defineVariable('2', 'x', { definedBy: ['3', '4'] }, false) + .defineFunction('6', '6', ['5'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '5', + graph: new Set(['3', '2', '4', '5']), + environment: defaultEnv().pushEnv().defineVariable('x', '2', '4') + }) + ) + + assertDataflow(label('local define with -> in function, read after', ['normal-definition', 'numbers', ...OperatorDatabase['->'].capabilities, 'semicolons', 'name-normal']), shell, 'function() { 3 -> x; }; x', emptyGraph() + .use('7', 'x') + .call('4', '->', [argumentInCall('2'), argumentInCall('3')], { returns: ['3'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .call('5', '{', [argumentInCall('4')], { returns: ['4'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .constant('2', undefined, false) + .defineVariable('3', 'x', { definedBy: ['2', '4'] }, false) + .defineFunction('6', '6', ['5'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '5', + graph: new Set(['2', '3', '4', '5']), + environment: defaultEnv().pushEnv().defineVariable('x', '3', '4') + }) + ) + assertDataflow(label('global define with <<- in function, read after', ['normal-definition', 'name-normal', 'numbers', ...OperatorDatabase['<<-'].capabilities, 'semicolons']), shell, 'function() { x <<- 3; }; x', emptyGraph() + .use('7', 'x') + .call('4', '<<-', [argumentInCall('2'), argumentInCall('3')], { returns: ['2'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .call('5', '{', [argumentInCall('4')], { returns: ['4'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .constant('3', undefined, false) + .defineVariable('2', 'x', { definedBy: ['3', '4'] }, false) + .defineFunction('6', '6', ['5'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '5', + graph: new Set(['3', '2', '4', '5']), + environment: defaultEnv().defineVariable('x', '2', '4').pushEnv() + }, { environment: defaultEnv().defineVariable('x', '2', '4') }) + ) + assertDataflow(label('global define with ->> in function, read after', ['normal-definition', 'numbers', ...OperatorDatabase['->>'].capabilities, 'semicolons', 'name-normal']), shell, 'function() { 3 ->> x; }; x', emptyGraph() + .use('7', 'x') + .call('4', '->>', [argumentInCall('2'), argumentInCall('3')], { returns: ['3'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .call('5', '{', [argumentInCall('4')], { returns: ['4'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .constant('2', undefined, false) + .defineVariable('3', 'x', { definedBy: ['2', '4'] }, false) + .defineFunction('6', '6', ['5'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '5', + graph: new Set(['2', '3', '4', '5']), + environment: defaultEnv().defineVariable('x', '3', '4').pushEnv() + }, { environment: defaultEnv().defineVariable('x', '3', '4') }) + ) + assertDataflow(label('shadow in body', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'semicolons', 'normal-definition']), shell, 'x <- 2; function() { x <- 3; x }; x', emptyGraph() + .use('8', 'x', undefined, false) + .reads('8', '5') + .use('11', 'x') + .reads('11', '0') + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .call('7', '<-', [argumentInCall('5'), argumentInCall('6')], { returns: ['5'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .call('9', '{', [argumentInCall('7'), argumentInCall('8')], { returns: ['8'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineVariable('x', '5', '7') }, false) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .constant('6', undefined, false) + .defineVariable('5', 'x', { definedBy: ['6', '7'] }, false) + .defineFunction('10', '10', ['9'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '9', + graph: new Set(['6', '5', '7', '8', '9']), + environment: defaultEnv().pushEnv().defineVariable('x', '5', '7') + }) + ) + assertDataflow(label('shadow in body with closure', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'semicolons', 'normal-definition']), shell, 'x <- 2; function() { x <- x; x }; x', emptyGraph() + .use('6', 'x', undefined, false) + .use('8', 'x', undefined, false) + .reads('8', '5') + .use('11', 'x') + .reads('11', '0') + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .call('7', '<-', [argumentInCall('5'), argumentInCall('6')], { returns: ['5'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .call('9', '{', [argumentInCall('7'), argumentInCall('8')], { returns: ['8'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineVariable('x', '5', '7') }, false) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .defineVariable('5', 'x', { definedBy: ['6', '7'] }, false) + .defineFunction('10', '10', ['9'], { + out: [], + in: [{ nodeId: '6', name: 'x', controlDependencies: [] }], + unknownReferences: [], + entryPoint: '9', + graph: new Set(['6', '5', '7', '8', '9']), + environment: defaultEnv().pushEnv().defineVariable('x', '5', '7') + }) + ) + }) + describe('Scoping of parameters', () => { + assertDataflow(label('parameter shadows', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'semicolons', 'formals-named', 'implicit-return']), shell, 'x <- 3; function(x) { x }', emptyGraph() + .use('7', 'x', undefined, false) + .reads('7', '3') + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .argument('2', ['1', '0']) + .argument('8', '7') + .call('8', '{', [argumentInCall('7')], { returns: ['7'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineParameter('x', '3', '4') }, false) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .defineVariable('3', 'x', { definedBy: [] }, false) + .defineFunction('9', '9', ['8'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '8', + graph: new Set(['3', '7', '8']), + environment: defaultEnv().pushEnv().defineParameter('x', '3', '4') + }) + ) + }) + describe('Access dot-dot-dot', () => { + assertDataflow(label('parameter shadows', ['formals-dot-dot-dot', 'implicit-return']), shell, 'function(...) { ..11 }', emptyGraph() + .use('4', '..11', undefined, false) + .reads('4', '0') + .argument('5', '4') + .call('5', '{', [argumentInCall('4')], { returns: ['4'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineParameter('...', '0', '1') }, false) + .defineVariable('0', '...', { definedBy: [] }, false) + .defineFunction('6', '6', ['5'], { + out: [], + in: [{ nodeId: '4', name: '..11', controlDependencies: [] }], + unknownReferences: [], + entryPoint: '5', + graph: new Set(['0', '4', '5']), + environment: defaultEnv().pushEnv().defineParameter('...', '0', '1') + }) + ) + }) + describe('Using named arguments', () => { + assertDataflow(label('Read first parameter', ['formals-default', 'implicit-return', 'name-normal']), shell, 'function(a=3, b=a) { b }', emptyGraph() + .use('4', 'a', undefined, false) + .reads('4', '0') + .use('8', 'b', undefined, false) + .reads('8', '3') + .call('9', '{', [argumentInCall('8')], { returns: ['8'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineParameter('a', '0', '2').defineParameter('b', '3', '5') }, false) + .defineVariable('0', 'a', { definedBy: ['1'] }, false) + .constant('1', undefined, false) + .defineVariable('3', 'b', { definedBy: ['4'] }, false) + .defineFunction('10', '10', ['9'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '9', + graph: new Set(['0', '1', '3', '4', '8', '9']), + environment: defaultEnv().pushEnv().defineParameter('a', '0', '2').defineParameter('b', '3', '5') + }) + ) + + assertDataflow(label('Read later definition', ['formals-named', 'name-normal', 'name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities, 'semicolons', 'binary-operator', 'infix-calls', ...OperatorDatabase['+'].capabilities]), shell, 'function(a=b, m=3) { b <- 1; a; b <- 5; a + 1 }', emptyGraph() + .use('1', 'b', undefined, false) + .reads('1', '8') + .use('11', 'a', undefined, false) + .reads('11', '0') + .use('15', 'a', undefined, false) + .reads('15', '0') + .call('10', '<-', [argumentInCall('8'), argumentInCall('9')], { returns: ['8'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineParameter('a', '0', '2').defineParameter('m', '3', '5') }, false) + .sameRead('10', '14') + .call('14', '<-', [argumentInCall('12'), argumentInCall('13')], { returns: ['12'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineParameter('a', '0', '2').defineParameter('m', '3', '5').defineVariable('b', '8', '10') }, false) + .call('17', '+', [argumentInCall('15'), argumentInCall('16')], { returns: [], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineParameter('a', '0', '2').defineParameter('m', '3', '5').defineVariable('b', '12', '14') }, false) + .reads('17', ['15', '16']) + .call('18', '{', [argumentInCall('10'), argumentInCall('11'), argumentInCall('14'), argumentInCall('17')], { returns: ['17'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineParameter('a', '0', '2').defineParameter('m', '3', '5').defineVariable('b', '12', '14') }, false) + .defineVariable('0', 'a', { definedBy: ['1'] }, false) + .defineVariable('3', 'm', { definedBy: ['4'] }, false) + .constant('4', undefined, false) + .constant('9', undefined, false) + .defineVariable('8', 'b', { definedBy: ['9', '10'] }, false) + .sameDef('8', '12') + .constant('13', undefined, false) + .defineVariable('12', 'b', { definedBy: ['13', '14'] }, false) + .constant('16', undefined, false) + .defineFunction('19', '19', ['18'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '18', + graph: new Set(['0', '1', '3', '4', '9', '8', '10', '11', '13', '12', '14', '15', '16', '17', '18']), + environment: defaultEnv().pushEnv().defineParameter('a', '0', '2').defineParameter('m', '3', '5').defineVariable('b', '12', '14') + }) + ) + }) + describe('Using special argument', () => { + assertDataflow(label('Return ...', ['formals-named', 'formals-dot-dot-dot', 'unnamed-arguments', 'implicit-return']), shell, 'function(a, ...) { foo(...) }', emptyGraph() + .use('7', '...', undefined, false) + .reads('7', '2') + .argument('9', '7') + .call('9', 'foo', [argumentInCall('7')], { returns: [], reads: [], environment: defaultEnv().pushEnv().defineParameter('a', '0', '1').defineParameter('...', '2', '3') }, false) + .call('10', '{', [argumentInCall('9')], { returns: ['9'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineParameter('a', '0', '1').defineParameter('...', '2', '3') }, false) + .defineVariable('0', 'a', { definedBy: [] }, false) + .defineVariable('2', '...', { definedBy: [] }, false) + .defineFunction('11', '11', ['10'], { + out: [], + in: [{ nodeId: '9', name: 'foo', controlDependencies: [] }], + unknownReferences: [], + entryPoint: '10', + graph: new Set(['0', '2', '7', '9', '10']), + environment: defaultEnv().pushEnv().defineParameter('a', '0', '1').defineParameter('...', '2', '3') + }) + ) + }) + describe('Bind environment to correct exit point', () => { + assertDataflow(label('Two possible exit points to bind y closure', ['normal-definition', 'name-normal', ...OperatorDatabase['<-'].capabilities, 'implicit-return', 'if', 'return']), shell, `function() { + g <- function() { y } + y <- 5 + if(z) + return(g) + y <- 3 + g +}`, emptyGraph() + .use('5', 'y', undefined, false) + .reads('5', ['9', '19']) + .use('12', 'z', undefined, false) + .use('14', 'g', undefined, false) + .reads('14', '2') + .use('22', 'g', { controlDependencies: [] }, false) + .reads('22', '2') + .call('6', '{', [argumentInCall('5')], { returns: ['5'], reads: [BuiltIn], environment: defaultEnv().pushEnv().pushEnv() }, false) + .call('8', '<-', [argumentInCall('2'), argumentInCall('7')], { returns: ['2'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .sameRead('8', ['11', '21']) + .call('11', '<-', [argumentInCall('9'), argumentInCall('10')], { returns: ['9'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineFunction('g', '2', '8') }, false) + .call('16', 'return', [argumentInCall('14')], { returns: ['14'], reads: [BuiltIn], controlDependency: ['18'], environment: defaultEnv().pushEnv().defineFunction('g', '2', '8').defineVariable('y', '9', '11') }, false) + .call('18', 'if', [argumentInCall('12'), argumentInCall('16'), EmptyArgument], { returns: ['16'], reads: ['12', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().pushEnv().defineFunction('g', '2', '8').defineVariable('y', '9', '11') }, false) + .call('21', '<-', [argumentInCall('19'), argumentInCall('20')], { returns: ['19'], reads: [BuiltIn], controlDependency: [], environment: defaultEnv().pushEnv().defineFunction('g', '2', '8').defineVariable('y', '9', '11') }, false) + .call('23', '{', [argumentInCall('8'), argumentInCall('11'), argumentInCall('18'), argumentInCall('21'), argumentInCall('22')], { returns: ['22'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineFunction('g', '2', '8').defineVariable('y', '9', '11').defineVariable('y', '19', '21', []) }, false) + .returns('23', '16') + .defineFunction('7', '7', ['6'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '6', + graph: new Set(['5', '6']), + environment: defaultEnv().pushEnv().pushEnv() + }, { environment: defaultEnv().pushEnv() }, false) + .defineVariable('2', 'g', { definedBy: ['7', '8'] }, false) + .constant('10', undefined, false) + .defineVariable('9', 'y', { definedBy: ['10', '11'] }, false) + .sameDef('9', '19') + .constant('20', undefined, false) + .defineVariable('19', 'y', { definedBy: ['20', '21'], controlDependency: [] }, false) + .defineFunction('24', '24', ['23'], { + out: [], + in: [{ nodeId: '12', name: 'z', controlDependencies: [] }], + unknownReferences: [], + entryPoint: '23', + graph: new Set(['7', '2', '8', '10', '9', '11', '12', '14', '16', '18', '20', '19', '21', '22', '23']), + environment: defaultEnv().pushEnv().defineFunction('g', '2', '8').defineVariable('y', '9', '11').defineVariable('y', '19', '21', []) + }) + ) + }) + describe('Late binding of environment variables', () => { + assertDataflow(label('define after function definition', ['normal-definition', 'implicit-return', 'semicolons', 'name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers']), shell, 'function() { x }; x <- 3', emptyGraph() + .use('2', 'x', undefined, false) + .call('3', '{', [argumentInCall('2')], { returns: ['2'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .call('7', '<-', [argumentInCall('5'), argumentInCall('6')], { returns: ['5'], reads: [BuiltIn] }) + .defineFunction('4', '4', ['3'], { + out: [], + in: [{ nodeId: '2', name: 'x', controlDependencies: [] }], + unknownReferences: [], + entryPoint: '3', + graph: new Set(['2', '3']), + environment: defaultEnv().pushEnv() + }) + .constant('6') + .defineVariable('5', 'x', { definedBy: ['6', '7'] }) + ) + }) + + describe('Nested Function Definitions', () => { + assertDataflow(label('double nested functions', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'normal-definition', 'unnamed-arguments', 'semicolons']), shell, 'a <- function() { x <- function(x) { x <- b }; x }; b <- 3; a', emptyGraph() + .use('9', 'b', undefined, false) + .use('14', 'x', undefined, false) + .reads('14', '3') + .use('21', 'a') + .reads('21', '0') + .call('10', '<-', [argumentInCall('8'), argumentInCall('9')], { returns: ['8'], reads: [BuiltIn], environment: defaultEnv().pushEnv().pushEnv().defineParameter('x', '4', '5') }, false) + .call('11', '{', [argumentInCall('10')], { returns: ['10'], reads: [BuiltIn], environment: defaultEnv().pushEnv().pushEnv().defineParameter('x', '4', '5') }, false) + .call('13', '<-', [argumentInCall('3'), argumentInCall('12')], { returns: ['3'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) + .call('15', '{', [argumentInCall('13'), argumentInCall('14')], { returns: ['14'], reads: [BuiltIn], environment: defaultEnv().pushEnv().defineFunction('x', '3', '13') }, false) + .call('17', '<-', [argumentInCall('0'), argumentInCall('16')], { returns: ['0'], reads: [BuiltIn] }) + .sameRead('17', '20') + .call('20', '<-', [argumentInCall('18'), argumentInCall('19')], { returns: ['18'], reads: [BuiltIn], environment: defaultEnv().defineFunction('a', '0', '17') }) + .defineVariable('4', 'x', { definedBy: [] }, false) + .sameDef('4', '8') + .defineVariable('8', 'x', { definedBy: ['9', '10'] }, false) + .defineFunction('12', '12', ['11'], { + out: [], + in: [{ nodeId: '9', name: 'b', controlDependencies: [] }], + unknownReferences: [], + entryPoint: '11', + graph: new Set(['4', '9', '8', '10', '11']), + environment: defaultEnv().pushEnv().pushEnv().defineVariable('x', '8', '10') + }, { environment: defaultEnv().pushEnv() }, false) + .defineVariable('3', 'x', { definedBy: ['12', '13'] }, false) + .defineFunction('16', '16', ['15'], { + out: [], + in: [], + unknownReferences: [], + entryPoint: '15', + graph: new Set(['12', '3', '13', '14', '15']), + environment: defaultEnv().pushEnv().defineFunction('x', '3', '13') + }) + .defineVariable('0', 'a', { definedBy: ['16', '17'] }) + .constant('19') + .defineVariable('18', 'b', { definedBy: ['19', '20'] }) + ) + }) +})) diff --git a/test/functionality/dataflow/processing-of-elements/functions/dataflow-source-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/dataflow-source-tests.ts new file mode 100644 index 0000000000..e8e68d4f81 --- /dev/null +++ b/test/functionality/dataflow/processing-of-elements/functions/dataflow-source-tests.ts @@ -0,0 +1,95 @@ +import { EmptyArgument, OperatorDatabase, requestProviderFromFile, requestProviderFromText } from '../../../../../src' +import { BuiltIn } from '../../../../../src/dataflow' +import { setSourceProvider } from '../../../../../src/dataflow/internal/process/functions/call/built-in/built-in-source' +import { emptyGraph } from '../../../_helper/dataflow/dataflowgraph-builder' +import { argumentInCall, defaultEnv } from '../../../_helper/dataflow/environment-builder' +import { assertDataflow, withShell } from '../../../_helper/shell' +import { label } from '../../../_helper/label' + +describe('source', withShell(shell => { + // reset the source provider back to the default value after our tests + after(() => setSourceProvider(requestProviderFromFile())) + + const sources = { + simple: 'N <- 9', + recursive1: 'x <- 1\nsource("recursive2")', + recursive2: 'cat(x)\nsource("recursive1")' + } + setSourceProvider(requestProviderFromText(sources)) + + assertDataflow(label('simple source', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'unnamed-arguments', 'strings', 'sourcing-external-files','newlines']), shell, 'source("simple")\ncat(N)', emptyGraph() + .use('5', 'N') + .reads('5', 'simple-1:1-1:6-0') + .call('3', 'source', [argumentInCall('1')], { returns: [], reads: [BuiltIn] }) + .call('simple-1:1-1:6-2', '<-', [argumentInCall('simple-1:1-1:6-0'), argumentInCall('simple-1:1-1:6-1')], { returns: ['simple-1:1-1:6-0'], reads: [BuiltIn] }) + .call('7', 'cat', [argumentInCall('5')], { returns: [], reads: [BuiltIn], environment: defaultEnv().defineVariable('N', 'simple-1:1-1:6-0', 'simple-1:1-1:6-2') }) + .constant('1') + .constant('simple-1:1-1:6-1') + .defineVariable('simple-1:1-1:6-0', 'N', { definedBy: ['simple-1:1-1:6-1', 'simple-1:1-1:6-2'] }) + ) + + assertDataflow(label('multiple source', ['sourcing-external-files', 'strings', 'unnamed-arguments', 'normal-definition', 'newlines']), shell, 'source("simple")\nN <- 0\nsource("simple")\ncat(N)', emptyGraph() + .use('12', 'N') + .reads('12', 'simple-3:1-3:6-0') + .call('3', 'source', [argumentInCall('1')], { returns: [], reads: [BuiltIn] }) + .sameRead('3', '10') + .call('simple-1:1-1:6-2', '<-', [argumentInCall('simple-1:1-1:6-0'), argumentInCall('simple-1:1-1:6-1')], { returns: ['simple-1:1-1:6-0'], reads: [BuiltIn] }) + .call('6', '<-', [argumentInCall('4'), argumentInCall('5')], { returns: ['4'], reads: [BuiltIn], environment: defaultEnv().defineVariable('N', 'simple-1:1-1:6-0', 'simple-1:1-1:6-2') }) + .call('10', 'source', [argumentInCall('8')], { returns: [], reads: [BuiltIn], environment: defaultEnv().defineVariable('N', '4', '6') }) + .call('simple-3:1-3:6-2', '<-', [argumentInCall('simple-3:1-3:6-0'), argumentInCall('simple-3:1-3:6-1')], { returns: ['simple-3:1-3:6-0'], reads: [BuiltIn], environment: defaultEnv().defineVariable('N', '4', '6') }) + .call('14', 'cat', [argumentInCall('12')], { returns: [], reads: [BuiltIn], environment: defaultEnv().defineVariable('N', 'simple-3:1-3:6-0', 'simple-3:1-3:6-2') }) + .constant('1') + .constant('simple-1:1-1:6-1') + .defineVariable('simple-1:1-1:6-0', 'N', { definedBy: ['simple-1:1-1:6-1', 'simple-1:1-1:6-2'] }) + .sameDef('simple-1:1-1:6-0', '4') + .constant('5') + .defineVariable('4', 'N', { definedBy: ['5', '6'] }) + .sameDef('4', 'simple-3:1-3:6-0') + .constant('8') + .constant('simple-3:1-3:6-1') + .defineVariable('simple-3:1-3:6-0', 'N', { definedBy: ['simple-3:1-3:6-1', 'simple-3:1-3:6-2'] }) + ) + + assertDataflow(label('conditional', ['if', 'name-normal', 'sourcing-external-files', 'unnamed-arguments', 'strings']), shell, 'if (x) { source("simple") }\ncat(N)', emptyGraph() + .use('0', 'x') + .use('10', 'N') + .reads('10', 'simple-1:10-1:15-0') + .call('6', 'source', [argumentInCall('4')], { returns: [], reads: [BuiltIn], controlDependency: ['8'] }) + .call('simple-1:10-1:15-2', '<-', [argumentInCall('simple-1:10-1:15-0'), argumentInCall('simple-1:10-1:15-1')], { returns: ['simple-1:10-1:15-0'], reads: [BuiltIn] }) + .call('7', '{', [argumentInCall('6')], { returns: ['6'], reads: [BuiltIn], controlDependency: ['8'] }) + .call('8', 'if', [argumentInCall('0'), argumentInCall('7'), EmptyArgument], { returns: ['7'], reads: ['0', BuiltIn], onlyBuiltIn: true }) + .call('12', 'cat', [argumentInCall('10')], { returns: [], reads: [BuiltIn], environment: defaultEnv().defineVariable('N', 'simple-1:10-1:15-0', 'simple-1:10-1:15-2') }) + .constant('4') + .constant('simple-1:10-1:15-1') + .defineVariable('simple-1:10-1:15-0', 'N', { definedBy: ['simple-1:10-1:15-1', 'simple-1:10-1:15-2'] }) + ) + + // missing sources should just be ignored + assertDataflow(label('missing source', ['unnamed-arguments', 'strings', 'sourcing-external-files']), shell, 'source("missing")', emptyGraph() + .call('3', 'source', [argumentInCall('1')], { returns: [], reads: [BuiltIn] }) + .constant('1') + ) + + assertDataflow(label('recursive source', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'unnamed-arguments', 'strings', 'sourcing-external-files', 'newlines']), shell, sources.recursive1, emptyGraph() + .use('recursive2-2:1-2:6-1', 'x') + .reads('recursive2-2:1-2:6-1', '0') + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .call('6', 'source', [argumentInCall('4')], { returns: [], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '0', '2') }) + .call('recursive2-2:1-2:6-3', 'cat', [argumentInCall('recursive2-2:1-2:6-1')], { returns: [], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '0', '2') }) + .call('recursive2-2:1-2:6-7', 'source', [argumentInCall('recursive2-2:1-2:6-5')], { returns: [], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '0', '2') }) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .constant('4') + .constant('recursive2-2:1-2:6-5') + ) + + // we currently don't support (and ignore) source calls with non-constant arguments! + assertDataflow(label('non-constant source', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'strings', 'newlines', 'unnamed-arguments']), shell, 'x <- "recursive1"\nsource(x)', emptyGraph() + .use('4', 'x') + .reads('4', '0') + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .call('6', 'source', [argumentInCall('4')], { returns: [], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '0', '2') }) + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + ) +})) diff --git a/test/functionality/dataflow/processing-of-elements/functions/function-call-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/function-call-tests.ts deleted file mode 100644 index ae80a5a936..0000000000 --- a/test/functionality/dataflow/processing-of-elements/functions/function-call-tests.ts +++ /dev/null @@ -1,329 +0,0 @@ -import { assertDataflow, withShell } from '../../../_helper/shell' -import { UnnamedFunctionCallPrefix } from '../../../../../src/dataflow/internal/process/functions/function-call' -import { LocalScope } from '../../../../../src/dataflow/environments/scopes' -import { MIN_VERSION_LAMBDA } from '../../../../../src/r-bridge/lang-4.x/ast/model/versions' -import { emptyGraph } from '../../../_helper/dataflowgraph-builder' -import { argumentInCall, defaultEnvironment, unnamedArgument } from '../../../_helper/environment-builder' - -describe('Function Call', withShell(shell => { - describe('Calling previously defined functions', () => { - const envWithXParamDefined = defaultEnvironment().pushEnv().defineParameter('x', '4', '5') - const envWithFirstI = defaultEnvironment().defineVariable('i', '0', '2') - const envWithIA = envWithFirstI.defineFunction('a', '3', '9') - - assertDataflow('Calling function a', shell, 'i <- 4; a <- function(x) { x }\na(i)', - emptyGraph() - .defineVariable('0', 'i') - .defineVariable('3', 'a', LocalScope, { environment: envWithFirstI }) - .use('11', 'i', { environment: envWithIA }) - .use('12', unnamedArgument('12'), { environment: envWithIA }) - .call('13', 'a', [argumentInCall('12')], { environment: envWithIA }) - .defineFunction('8', '8', ['6'], { - out: [], - in: [], - unknownReferences: [], - scope: LocalScope, - environments: envWithXParamDefined, - graph: new Set(['4', '6']), - }, { environment: envWithXParamDefined.popEnv() }) - .defineVariable('4', 'x', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .use('6', 'x', { environment: envWithXParamDefined }, false) - .reads('6', '4') - .reads('11', '0') - .definedBy('3', '8') - .argument('13', '12') - .reads('12', '11') - .reads('13', '3') - .calls('13', '8') - .returns('13', '6') - .definesOnCall('12', '4') - ) - const envWithIAB = envWithIA.defineVariable('b', '10', '12') - assertDataflow('Calling function a with an indirection', shell, 'i <- 4; a <- function(x) { x }\nb <- a\nb(i)', - emptyGraph() - .defineVariable('0', 'i') - .defineVariable('3', 'a', LocalScope, { environment: envWithFirstI }) - .defineVariable('10', 'b', LocalScope, { environment: envWithIA }) - .use('11', 'a', { environment: envWithIA }) - .use('14', 'i', { environment: envWithIAB }) - .use('15', unnamedArgument('15'), { environment: envWithIAB }) - .call('16', 'b', [argumentInCall('15')], { environment: envWithIAB }) - .defineFunction('8', '8', ['6'], { - out: [], - in: [], - unknownReferences: [], - scope: LocalScope, - environments: envWithXParamDefined, - graph: new Set(['4', '6']) - }, - { environment: envWithXParamDefined.popEnv() }) - .defineVariable('4', 'x', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .use('6', 'x', { environment: envWithXParamDefined }, false) - .reads('6', '4') - .reads('14', '0') - .definedBy('3', '8') - .definedBy('10', '11') - .reads('11', '3') - .argument('16', '15') - .reads('15', '14') - .reads('16', '10') - .calls('16', '8') - .returns('16', '6') - .definesOnCall('15', '4') - ) - const envWithXConstDefined = defaultEnvironment().pushEnv().defineParameter('x', '4', '5') - const envWithXDefinedForFunc = defaultEnvironment().pushEnv().defineVariable('x', '6', '8') - const envWithLastXDefined = defaultEnvironment().pushEnv().defineVariable('x', '9', '11') - const envWithIAndLargeA = envWithFirstI.defineFunction('a', '3', '15') - - assertDataflow('Calling with a constant function', shell, `i <- 4 -a <- function(x) { x <- x; x <- 3; 1 } -a(i)`, emptyGraph() - .defineVariable('0', 'i') - .defineVariable('3', 'a', LocalScope, { environment: envWithFirstI }) - .use('17', 'i', { environment: envWithIAndLargeA }) - .use('18', unnamedArgument('18'), { environment: envWithIAndLargeA }) - .reads('17', '0') - .call('19', 'a', [argumentInCall('18')], { environment: envWithIAndLargeA }) - .defineFunction('14', '14', ['12'], { - out: [], - in: [], - unknownReferences: [], - scope: LocalScope, - environments: envWithLastXDefined, - graph: new Set(['4', '6', '7', '9']) - }, - { environment: defaultEnvironment() } - ) - .defineVariable('4', 'x', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .defineVariable('6', 'x', LocalScope, { environment: envWithXConstDefined }, false) - .defineVariable('9', 'x', LocalScope, { environment: envWithXDefinedForFunc }, false) - .use('7', 'x', { environment: envWithXConstDefined }, false) - .exit('12', '1', envWithLastXDefined, {}, false) - .definedBy('6', '7') - .reads('7', '4') - .sameDef('6', '9') - .sameDef('4', '9') - .sameDef('4', '6') - - .definedBy('3', '14') - .argument('19', '18') - .reads('18', '17') - .reads('19', '3') - .calls('19', '14') - .returns('19', '12') - .definesOnCall('18', '4') - ) - }) - - describe('Directly calling a function', () => { - const envWithXParameter = defaultEnvironment().pushEnv().defineParameter('x', '0', '1') - const outGraph = emptyGraph() - .call('9', `${UnnamedFunctionCallPrefix}9`,[argumentInCall('8')]) - .defineFunction('6', '6', ['4'], { - out: [], - in: [], - unknownReferences: [], - scope: LocalScope, - environments: envWithXParameter, - graph: new Set(['0', '2']) - }, - { environment: defaultEnvironment() }) - .defineVariable('0', 'x', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .use('2', 'x', { environment: envWithXParameter }, false) - .exit('4', '+', envWithXParameter , {}, false) - .relates('2', '4') - .reads('2', '0') - - .use('8', unnamedArgument('8')) - .argument('9', '8') - .calls('9', '6') - .returns('9', '4') - .definesOnCall('8', '0') - - assertDataflow('Calling with constant argument using lambda', shell, '(\\(x) { x + 1 })(2)', - outGraph, - { minRVersion: MIN_VERSION_LAMBDA } - ) - assertDataflow('Calling with constant argument', shell, '(function(x) { x + 1 })(2)', - outGraph - ) - - const envWithADefined = defaultEnvironment().defineFunction('a', '0', '6') - - assertDataflow('Calling a function which returns another', shell, `a <- function() { function() { 42 } } -a()()`, - emptyGraph() - .call('9', `${UnnamedFunctionCallPrefix}9`, [], { environment: envWithADefined }) - .call('8', 'a', [], { environment: envWithADefined }) - .defineVariable('0', 'a') - .defineFunction('5', '5', ['3'], { - out: [], - in: [], - unknownReferences: [], - scope: LocalScope, - environments: defaultEnvironment().pushEnv(), - graph: new Set(['3']) - }, - { environment: defaultEnvironment() } - ) - .defineFunction('3', '3', ['1'], { - out: [], - in: [], - unknownReferences: [], - scope: LocalScope, - environments: defaultEnvironment().pushEnv().pushEnv(), - graph: new Set() - }, - { environment: defaultEnvironment().pushEnv() }, false) - .exit('1', '42', defaultEnvironment().pushEnv().pushEnv(), {}, false) - .calls('9', '8') - .reads('8', '0') - .definedBy('0', '5') - .calls('8', '5') - .returns('8', '3') - .calls('9', '3') - .returns('9', '1') - ) - }) - - describe('Argument which is expression', () => { - assertDataflow('Calling with 1 + x', shell, 'foo(1 + x)', - emptyGraph() - .call('5', 'foo', [argumentInCall('4')], { environment: defaultEnvironment() }) - .use('4', unnamedArgument('4')) - .use('2', 'x') - .reads('4', '2') - .argument('5', '4') - ) - }) - - describe('Argument which is anonymous function call', () => { - assertDataflow('Calling with a constant function', shell, 'f(function() { 3 })', - emptyGraph() - .call('5', 'f', [argumentInCall('4')], { environment: defaultEnvironment() }) - .use('4', unnamedArgument('4')) - .defineFunction('3', '3', ['1'], { - out: [], - in: [], - unknownReferences: [], - scope: LocalScope, - environments: defaultEnvironment().pushEnv(), - graph: new Set() - }) - .exit('1', '3', defaultEnvironment().pushEnv() , {}, false) - .reads('4', '3') - .argument('5', '4') - ) - }) - - describe('Multiple out refs in arguments', () => { - assertDataflow('Calling \'seq\'', shell, 'seq(1, length(pkgnames), by = stepsize)', - emptyGraph() - .call('11', 'seq', [argumentInCall('2'), argumentInCall('7'), argumentInCall('10', 'by')],{ environment: defaultEnvironment() }) - .use('2', unnamedArgument('2')) - .use('7', unnamedArgument('7')) - .use('10', 'by') - .argument('11', '2') - .argument('11', '7') - .argument('11', '10') - .use('9', 'stepsize' ) - .reads('10', '9') - .call('6', 'length', [argumentInCall('5')], { environment: defaultEnvironment() }) - .reads('7', '6') - .use('5', unnamedArgument('5')) - .argument('6', '5') - .use('4', 'pkgnames' ) - .reads('5', '4') - - ) - }) - - describe('Late function bindings', () => { - const innerEnv = defaultEnvironment().pushEnv() - const defWithA = defaultEnvironment().defineFunction('a', '0', '4') - const defWithAY = defWithA.defineVariable('y', '5', '7') - - assertDataflow('Late binding of y', shell, 'a <- function() { y }\ny <- 12\na()', - emptyGraph() - .defineVariable('0', 'a') - .defineVariable('5', 'y', LocalScope, { environment: defWithA }) - .call('9', 'a', [], { environment: defWithAY }) - .defineFunction('3', '3', ['1'], { - out: [], - in: [{ nodeId: '1', name: 'y', scope: LocalScope, used: 'always' }], - unknownReferences: [], - scope: LocalScope, - environments: innerEnv, - graph: new Set(['1']) - }) - .use('1', 'y', { environment: innerEnv }, false) - .definedBy('0', '3') - .calls('9', '3') - .reads('9', '0') - .returns('9', '1') - .reads('9', '5') - ) - }) - - describe('Deal with empty calls', () => { - const withXParameter = defaultEnvironment() - .pushEnv().defineParameter('x', '1', '3') - const withXYParameter = withXParameter.defineParameter('y', '4', '5') - const withADefined = defaultEnvironment().defineFunction('a', '0', '9') - - assertDataflow('Not giving first parameter', shell, `a <- function(x=3,y) { y } -a(,3)`, emptyGraph() - .call('13', 'a', [ - 'empty', - { nodeId: '12', name: unnamedArgument('12'), scope: LocalScope, used: 'always' } - ], - { environment: withADefined }) - .defineVariable('0', 'a') - .defineFunction('8', '8', ['6'], { - out: [], - in: [], - unknownReferences: [], - scope: LocalScope, - environments: withXYParameter, - graph: new Set(['1', '4', '6']) - }, - { environment: withXYParameter.popEnv() }) - .defineVariable('1', 'x', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .defineVariable('4', 'y', LocalScope, { environment: withXParameter }, false) - .use('6', 'y', { environment: withXYParameter }, false) - .reads('6', '4') - .use('12', unnamedArgument('12'), { environment: withADefined }) - .reads('13', '0') - .calls('13', '8') - .definedBy('0', '8') - .argument('13', '12') - .returns('13', '6') - .definesOnCall('12', '4') - ) - }) - describe('Reuse parameters in call', () => { - const envWithX = defaultEnvironment().defineArgument('x', '3', '3') - assertDataflow('Not giving first argument', shell, 'a(x=3, x)', emptyGraph() - .call('6', 'a', [argumentInCall('3', 'x'), argumentInCall('5')]) - .use('3', 'x') - .use('5', unnamedArgument('5'), { environment: envWithX }) - .use('4', 'x', { environment: envWithX }) - .argument('6', '3') - .argument('6', '5') - .reads('5', '4') - .reads('4', '3') - ) - }) - describe('Define in parameters', () => { - assertDataflow('Support assignments in function calls', shell, 'foo(x <- 3); x', emptyGraph() - .call('5', 'foo', [argumentInCall('4')]) - .use('4', unnamedArgument('4')) - .defineVariable('1', 'x') - .use('6', 'x', { environment: defaultEnvironment().defineVariable('x', '1', '3') }) - .argument('5', '4') - .reads('4', '1') - .reads('6', '1') - ) - }) -})) diff --git a/test/functionality/dataflow/processing-of-elements/functions/function-definition-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/function-definition-tests.ts deleted file mode 100644 index 5df1fbf2f9..0000000000 --- a/test/functionality/dataflow/processing-of-elements/functions/function-definition-tests.ts +++ /dev/null @@ -1,487 +0,0 @@ -import { assertDataflow, withShell } from '../../../_helper/shell' -import { BuiltIn } from '../../../../../src/dataflow' -import { GlobalScope, LocalScope } from '../../../../../src/dataflow/environments/scopes' -import { emptyGraph } from '../../../_helper/dataflowgraph-builder' -import { argumentInCall, defaultEnvironment, unnamedArgument } from '../../../_helper/environment-builder' - -describe('Function Definition', withShell(shell => { - describe('Only functions', () => { - assertDataflow('unknown read in function', shell, 'function() { x }', - emptyGraph() - .defineFunction('2', '2', ['0'], { - out: [], - unknownReferences: [], - in: [{ nodeId: '0', used: 'always', name: 'x', scope: LocalScope }], - scope: LocalScope, - graph: new Set(['0']), - environments: defaultEnvironment().pushEnv() - }) - .use('0', 'x', { environment: defaultEnvironment().pushEnv() }, false) - ) - - const envWithXDefined = defaultEnvironment().pushEnv().defineParameter('x', '0', '1') - assertDataflow('read of parameter', shell, 'function(x) { x }', - emptyGraph() - .defineFunction('4', '4', ['2'], { - out: [], - unknownReferences: [], - in: [], - scope: LocalScope, - graph: new Set(['0', '2']), - environments: envWithXDefined - }) - .defineVariable('0', 'x', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .use('2', 'x', { environment: envWithXDefined }, false) - .reads('2', '0') - ) - assertDataflow('read of parameter in return', shell, 'function(x) { return(x) }', - emptyGraph() - .defineFunction('7', '7', ['5'], { - out: [], - unknownReferences: [], - in: [], - scope: LocalScope, - graph: new Set(['4', '5', '3', '0']), - environments: envWithXDefined - }) - .defineVariable('0', 'x', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .use('3', 'x', { environment: envWithXDefined }, false) - .call('5', 'return', [argumentInCall('4')], { environment: envWithXDefined }, false) - .use('4',unnamedArgument('4'), { environment: envWithXDefined }, false) - .reads('5', BuiltIn) - .calls('5', BuiltIn) - .reads('3', '0') - .argument('5', '4') - .returns('5', '4') - .reads('4', '3') - ) - - describe('x', () => { - assertDataflow('return parameter named', shell, 'function(x) { return(x=x) }', - emptyGraph() - .defineFunction('8', '8', ['6'], { - out: [], - unknownReferences: [], - in: [], - scope: LocalScope, - graph: new Set(['5', '6', '4', '0']), - environments: envWithXDefined - }) - .defineVariable('0', 'x', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .use('4', 'x', { environment: envWithXDefined }, false) - .call('6', 'return', [argumentInCall('5', 'x')], { environment: envWithXDefined }, false) - .use('5', 'x', { environment: envWithXDefined }, false) - .reads('6', BuiltIn) - .calls('6', BuiltIn) - .reads('4', '0') - .argument('6', '5') - .returns('6', '5') - .reads('5', '4') - ) - }) - - const envWithoutParams = defaultEnvironment().pushEnv() - const envWithXParam = envWithoutParams.defineParameter('x', '0', '1') - const envWithXYParam = envWithXParam.defineParameter('y', '2', '3') - const envWithXYZParam = envWithXYParam.defineParameter('z', '4', '5') - - assertDataflow('read of one parameter', shell, 'function(x,y,z) y', - emptyGraph() - .defineFunction('8', '8', ['6'], { - out: [], - unknownReferences: [], - in: [], - scope: LocalScope, - graph: new Set(['0', '2', '4', '6']), - environments: envWithXYZParam - }) - .defineVariable('0', 'x', LocalScope, { environment: envWithoutParams }, false) - .defineVariable('2', 'y', LocalScope, { environment: envWithXParam }, false) - .defineVariable('4', 'z', LocalScope, { environment: envWithXYParam }, false) - .use('6', 'y', { environment: envWithXYZParam }, false) - .reads('6', '2') - ) - }) - describe('Scoping of body', () => { - assertDataflow('previously defined read in function', shell, 'x <- 3; function() { x }', - emptyGraph() - .defineVariable('0', 'x') - .defineFunction('5', '5', ['3'], { - out: [], - unknownReferences: [], - in: [ { nodeId: '3', scope: LocalScope, name: 'x', used: 'always' } ], - scope: LocalScope, - graph: new Set(['3']), - environments: defaultEnvironment().pushEnv() - }) - .use('3', 'x', { environment: defaultEnvironment().pushEnv() }, false) - ) - const envWithXDefined = defaultEnvironment().pushEnv().defineVariable('x', '0', '2') - assertDataflow('local define with <- in function, read after', shell, 'function() { x <- 3; }; x', - emptyGraph() - .use('5', 'x') - .defineFunction('4', '4', ['2' /* the assignment */], { - out: [], - unknownReferences: [], - in: [], - scope: LocalScope, - graph: new Set(['0']), - environments: envWithXDefined - }) - .defineVariable('0', 'x', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .exit('2', '<-', envWithXDefined, {}, false) - .relates('2', '0') - ) - assertDataflow('local define with = in function, read after', shell, 'function() { x = 3; }; x', - emptyGraph() - .use('5', 'x') - .defineFunction('4', '4', ['2'], { - out: [], - unknownReferences: [], - in: [], - scope: LocalScope, - graph: new Set(['0']), - environments: envWithXDefined - }) - .defineVariable('0', 'x', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .exit('2', '=', envWithXDefined, {}, false) - .relates('2', '0') - ) - - const envWithXDefinedR = defaultEnvironment().pushEnv().defineVariable('x', '1', '2') - assertDataflow('local define with -> in function, read after', shell, 'function() { 3 -> x; }; x', - emptyGraph() - .use('5', 'x') - .defineFunction('4', '4', ['2'], { - out: [], - unknownReferences: [], - in: [], - scope: LocalScope, - graph: new Set(['1']), - environments: envWithXDefinedR - }) - .defineVariable('1', 'x', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .exit('2', '->', envWithXDefinedR, {}, false) - .relates('2', '1') - ) - const envWithXDefinedGlobal = defaultEnvironment().pushEnv().defineVariable('x', '0', '2', GlobalScope) - assertDataflow('global define with <<- in function, read after', shell, 'function() { x <<- 3; }; x', - emptyGraph() - .use('5', 'x') - .defineFunction('4', '4', ['2'], { - out: [], - unknownReferences: [], - in: [], - scope: LocalScope, - graph: new Set(['0']), - environments: envWithXDefinedGlobal - }, - { environment: envWithXDefinedGlobal.popEnv() } - ) - .defineVariable('0', 'x', GlobalScope, { environment: defaultEnvironment().pushEnv() }, false) - .exit('2', '<<-', envWithXDefinedGlobal, {}, false) - .relates('2', '0') - ) - const envWithXDefinedGlobalR = defaultEnvironment().pushEnv().defineVariable('x', '1', '2', GlobalScope) - assertDataflow('global define with ->> in function, read after', shell, 'function() { 3 ->> x; }; x', - emptyGraph() - .use('5', 'x') - .defineFunction('4', '4', ['2'], { - out: [], - unknownReferences: [], - in: [], - scope: LocalScope, - graph: new Set(['1']), - environments: envWithXDefinedGlobalR - }, - { environment: envWithXDefinedGlobalR.popEnv() } - ) - .defineVariable('1', 'x', GlobalScope, { environment: defaultEnvironment().pushEnv() }, false) - .exit('2', '->>', envWithXDefinedGlobalR, {}, false) - .relates('2', '1') - ) - const envDefXSingle = defaultEnvironment().pushEnv().defineVariable('x', '3', '5') - assertDataflow('shadow in body', shell, 'x <- 2; function() { x <- 3; x }; x', - emptyGraph() - .defineVariable('0', 'x') - .use('9', 'x', { - environment: defaultEnvironment().defineVariable('x', '0', '2') - }) - .reads('9', '0') - .defineFunction('8', '8', ['6'], { - out: [], - unknownReferences: [], - in: [], - scope: LocalScope, - graph: new Set(['6', '3']), - environments: envDefXSingle - }) - .use('6', 'x', { environment: defaultEnvironment().pushEnv().defineVariable('x', '3', '5') }, false) - .defineVariable('3', 'x', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .reads('6', '3') - ) - assertDataflow('shadow in body with closure', shell, 'x <- 2; function() { x <- x; x }; x', - emptyGraph() - .defineVariable('0', 'x') - .use('9', 'x', { - environment: defaultEnvironment().defineVariable('x', '0', '2') - }) - .reads('9', '0') - .defineFunction('8', '8', ['6'], { - out: [], - unknownReferences: [], - in: [ { nodeId: '4', used: 'always', name: 'x', scope: LocalScope } ], - scope: LocalScope, - graph: new Set(['3', '4', '6']), - environments: envDefXSingle - }) - .defineVariable('3', 'x', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .use('4', 'x', { environment: defaultEnvironment().pushEnv() }, false) - .use('6', 'x', { - environment: defaultEnvironment().pushEnv().defineVariable('x', '3', '5'), - }, false) - .reads('6', '3') - .definedBy('3', '4') - ) - }) - describe('Scoping of parameters', () => { - const envWithXDefined = defaultEnvironment().pushEnv().defineParameter('x', '3', '4') - assertDataflow('parameter shadows', shell, 'x <- 3; function(x) { x }', - emptyGraph() - .defineVariable('0', 'x') - .defineFunction('7', '7', ['5'], { - out: [], - unknownReferences: [], - in: [], - scope: LocalScope, - graph: new Set(['3', '5']), - environments: envWithXDefined - }) - .defineVariable('3', 'x', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .use('5', 'x', { environment: envWithXDefined }, false) - .reads('5', '3') - ) - }) - describe('Access dot-dot-dot', () => { - const envWithParam = defaultEnvironment().pushEnv().defineParameter('...', '0', '1') - assertDataflow('parameter shadows', shell, 'function(...) { ..11 }', - emptyGraph() - .defineFunction('4', '4', ['2'], { - out: [], - unknownReferences: [], - in: [], - scope: LocalScope, - graph: new Set(['0', '2']), - environments: envWithParam - }) - .defineVariable('0', '...', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .use('2', '..11', { environment: envWithParam }, false) - .reads('2', '0') - ) - }) - describe('Using named arguments', () => { - const envWithA = defaultEnvironment().pushEnv().defineParameter('a', '0', '2') - const envWithAB = envWithA.defineParameter('b', '3', '5') - - assertDataflow('Read first parameter', shell, 'function(a=3, b=a) { b }', - emptyGraph() - .defineFunction('8', '8', ['6'], { - out: [], - unknownReferences: [], - in: [], - scope: LocalScope, - environments: envWithAB, - graph: new Set(['0', '3', '4', '6']) - }) - .defineVariable('0', 'a', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .defineVariable('3', 'b', LocalScope, { environment: envWithA }, false) - .use('4', 'a', { environment: envWithA }, false) - .use('6', 'b', { environment: envWithAB }, false) - .reads('4', '0') - .definedBy('3', '4', 'maybe' /* default values can be overridden */) - .reads('6', '3') - ) - - const envWithFirstParam = defaultEnvironment().pushEnv().defineParameter('a', '0', '2') - const envWithBothParam = envWithFirstParam.defineParameter('m', '3', '5') - const envWithBothParamFirstB = envWithBothParam.defineVariable('b', '6', '8') - const envWithBothParamSecondB = envWithBothParam.defineVariable('b', '10', '12') - - assertDataflow('Read later definition', shell, 'function(a=b, m=3) { b <- 1; a; b <- 5; a + 1 }', - emptyGraph() - .defineFunction('17', '17', ['15'],{ - out: [], - unknownReferences: [], - in: [], - scope: LocalScope, - environments: envWithBothParamSecondB, - graph: new Set(['0', '3', '10', '6', '1', '9', '13']) - }) - .defineVariable('0', 'a', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .defineVariable('3', 'm', LocalScope, { environment: envWithFirstParam }, false) - .defineVariable('10', 'b', LocalScope, { environment: envWithBothParamFirstB }, false) - .defineVariable('6', 'b', LocalScope, { environment: envWithBothParam }, false) - .use('1', 'b', { environment: defaultEnvironment().pushEnv() }, false) - .use('9', 'a', { environment: envWithBothParamFirstB }, false) - .use('13', 'a', { environment: envWithBothParamSecondB }, false) - .exit('15', '+', envWithBothParamSecondB, {}, false) - .relates('15', '13') - .sameRead('13', '9') - .reads('9', '0') - .reads('13', '0') - .definedBy('0', '1', 'maybe') - .reads('1', '6') - .sameDef('10', '6') - ) - }) - describe('Using special argument', () => { - const envWithA = defaultEnvironment().pushEnv().defineParameter('a', '0', '1') - const envWithASpecial = envWithA.defineParameter('...', '2', '3') - - assertDataflow('Return ...', shell, 'function(a, ...) { foo(...) }', - emptyGraph() - .defineFunction('9', '9', ['7'], { - out: [], - unknownReferences: [], - in: [], - scope: LocalScope, - environments: envWithASpecial, - graph: new Set(['0', '2', '5', '7', '6']) - }) - .defineVariable('0', 'a', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .defineVariable('2', '...', LocalScope, { environment: envWithA }, false) - .use('5', '...', { environment: envWithASpecial }, false) - .call('7', 'foo', [argumentInCall('6')], { environment: envWithASpecial }, false) - .use('6',unnamedArgument('6'), { environment: envWithASpecial }, false) - .argument('7', '6') - .reads('6', '5') - .reads('5', '2') - ) - }) - describe('Bind environment to correct exit point', () => { - const envWithG = defaultEnvironment().pushEnv().defineFunction('g', '0', '4') - const envWithFirstY = envWithG.defineVariable('y', '5', '7') - const finalEnv = envWithG.defineVariable('y', '15', '17') - assertDataflow('Two possible exit points to bind y closure', shell, `function() { - g <- function() { y } - y <- 5 - if(z) - return(g) - y <- 3 - g -}`, - emptyGraph() - .defineFunction('20', '20', ['12', '18'], { - out: [], - unknownReferences: [], - in: [ { nodeId: '8', name: 'z', used: 'always', scope: LocalScope } ], - scope: LocalScope, - environments: finalEnv, - graph: new Set(['0', '5', '15', '8', '10', '18', '11', '12', '3']) - }) - .defineVariable('0', 'g', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .defineVariable('5', 'y', LocalScope, { environment: envWithG }, false) - .defineVariable('15', 'y', LocalScope, { environment: envWithFirstY }, false) - .use('8', 'z', { environment: envWithFirstY }, false) - .use('10', 'g', { environment: envWithFirstY }, false) - .use('18', 'g', { environment: finalEnv }, false) - .use('11', unnamedArgument('11'), { environment: envWithFirstY }, false) - .call('12', 'return', [argumentInCall('11')], { when: 'maybe', environment: envWithFirstY }, false) - .defineFunction('3', '3', ['1'], { - out: [], - unknownReferences: [], - in: [], - scope: LocalScope, - environments: defaultEnvironment().pushEnv().pushEnv(), - graph: new Set(['1']) - }, - { environment: defaultEnvironment().pushEnv() }, false) - .definedBy('0', '3') - .reads('1', '5', 'maybe') - .reads('1', '15', 'maybe') - .reads('18', '0') - .reads('10', '0') - .reads('11', '10') - .argument('12', '11') - .returns('12', '11') - .reads('12', BuiltIn, 'maybe') - .calls('12', BuiltIn, 'maybe') - .sameDef('5', '15') - .use('1', 'y', { environment: defaultEnvironment().pushEnv().pushEnv() }, false) - ) - }) - describe('Late binding of environment variables', () => { - assertDataflow('define after function definition', shell, 'function() { x }; x <- 3', - emptyGraph() - .defineVariable('3', 'x') - .defineFunction('2', '2', ['0'], { - out: [], - unknownReferences: [], - in: [{ - nodeId: '0', - scope: LocalScope, - name: 'x', - used: 'always' - }], - scope: LocalScope, - graph: new Set(['0']), - environments: defaultEnvironment().pushEnv() - }) - .use('0', 'x', { environment: defaultEnvironment().pushEnv() }, false) - ) - }) - - describe('Nested Function Definitions', () => { - const withXParameterInOuter = defaultEnvironment().pushEnv().defineFunction('x', '1', '9') - const withinNestedFunctionWithoutParam = defaultEnvironment().pushEnv().pushEnv() - const withinNestedFunctionWithParam = withinNestedFunctionWithoutParam.defineParameter('x', '2', '3') - const withinNestedFunctionWithDef = defaultEnvironment().pushEnv().pushEnv().defineVariable('x', '4', '6') - const envWithA = defaultEnvironment().defineFunction('a', '0', '13') - const envWithAB = envWithA.defineVariable('b', '14', '16') - - assertDataflow('double nested functions', shell, 'a <- function() { x <- function(x) { x <- b }; x }; b <- 3; a', - emptyGraph() - .defineVariable('0', 'a') - .defineVariable('14', 'b', LocalScope, { environment: envWithA }) - .use('17', 'a', { environment: envWithAB }) - .reads('17', '0', 'always') - .defineFunction('12', '12', ['10'], { - out: [], - unknownReferences: [], - in: [], - scope: LocalScope, - graph: new Set(['10', '1', '8']), - environments: withXParameterInOuter - }) - .definedBy('0', '12') - - .use('10', 'x', { environment: withXParameterInOuter }, false) - .defineVariable('1', 'x', LocalScope, { environment: defaultEnvironment().pushEnv() }, false) - .defineFunction('8', '8', ['6'], { - out: [], - unknownReferences: [], - in: [{ - nodeId: '5', - scope: LocalScope, - name: 'x', - used: 'always' - }], - scope: LocalScope, - graph: new Set(['5', '4', '2']), - environments: withinNestedFunctionWithDef - }, - { environment: defaultEnvironment().pushEnv() }, false) - .reads('10', '1') - .definedBy('1', '8') - - .use('5', 'b', { environment: withinNestedFunctionWithParam }, false) - .exit('6', '<-', withinNestedFunctionWithDef, {}, false) - .relates('6', '4') - .relates('6', '5') - .defineVariable('4', 'x', LocalScope, { environment: withinNestedFunctionWithParam }, false) - .defineVariable('2', 'x', LocalScope, { environment: withinNestedFunctionWithoutParam }, false) - .definedBy('4', '5') - .sameDef('2', '4') - ) - }) -})) diff --git a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts deleted file mode 100644 index 5c51eb047d..0000000000 --- a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts +++ /dev/null @@ -1,121 +0,0 @@ -import { assertDataflow, withShell } from '../../../_helper/shell' -import { setSourceProvider } from '../../../../../src/dataflow/internal/process/functions/source' -import { BuiltIn, requestProviderFromFile, requestProviderFromText, sourcedDeterministicCountingIdGenerator } from '../../../../../src' -import { LocalScope } from '../../../../../src/dataflow/environments/scopes' -import { emptyGraph } from '../../../_helper/dataflowgraph-builder' -import { argumentInCall, defaultEnvironment, unnamedArgument } from '../../../_helper/environment-builder' - -describe('source', withShell(shell => { - // reset the source provider back to the default value after our tests - after(() => setSourceProvider(requestProviderFromFile())) - - const sources = { - simple: 'N <- 9', - recursive1: 'x <- 1\nsource("recursive2")', - recursive2: 'cat(x)\nsource("recursive1")' - } - setSourceProvider(requestProviderFromText(sources)) - - const envWithSimpleN = defaultEnvironment().defineVariable('N', 'simple-1:1-1:6-0', 'simple-1:1-1:6-2') - assertDataflow('simple source', shell, 'source("simple")\ncat(N)', emptyGraph() - .defineVariable('simple-1:1-1:6-0', 'N') - .call('3', 'source', [argumentInCall('2')], { environment: defaultEnvironment() }) - .call('7', 'cat', [argumentInCall('6')], { environment: envWithSimpleN }) - .use('5', 'N', { environment: envWithSimpleN }) - .use('2', unnamedArgument('2')) - .use('6', unnamedArgument('6'), { environment: envWithSimpleN }) - .argument('3', '2') - .reads('3', BuiltIn) - .reads('5', 'simple-1:1-1:6-0') - .reads('6', '5') - .argument('7', '6') - .reads('7', BuiltIn) - ) - - assertDataflow('multiple source', shell, 'source("simple")\nN <- 0\nsource("simple")\ncat(N)', emptyGraph() - .call('3', 'source', [argumentInCall('2')], { environment: defaultEnvironment() }) - .call('10', 'source', [argumentInCall('9')], - { environment: defaultEnvironment().defineVariable('N', '4', '6') }) - .call('14', 'cat', [argumentInCall('13')], - { environment: defaultEnvironment().defineVariable('N', 'simple-3:1-3:6-0', 'simple-3:1-3:6-2') }) - .defineVariable('simple-3:1-3:6-0', 'N', LocalScope, - { environment: defaultEnvironment().defineVariable('N', '4', '6') } - ) - .defineVariable('simple-1:1-1:6-0', 'N') - .defineVariable('4', 'N', LocalScope, { environment: envWithSimpleN }) - .use('2', unnamedArgument('2')) - .use('9', unnamedArgument('9'), { environment: defaultEnvironment().defineVariable('N', '4', '6') }) - .use('13', unnamedArgument('13'), { environment: defaultEnvironment().defineVariable('N', 'simple-3:1-3:6-0', 'simple-3:1-3:6-2') }) - .use('12', 'N', { environment: defaultEnvironment().defineVariable('N', 'simple-3:1-3:6-0', 'simple-3:1-3:6-2') }) - .sameRead('3', '10') - .argument('3', '2') - .argument('14', '13') - .argument('10', '9') - .reads('3', BuiltIn) - .reads('10', BuiltIn) - .reads('14', BuiltIn) - .reads('13', '12') - .reads('12', 'simple-3:1-3:6-0') - .sameDef('simple-3:1-3:6-0', '4') - .sameDef('4', 'simple-1:1-1:6-0') - ) - - const envWithConditionalN = defaultEnvironment().defineVariable('N', 'simple-1:10-1:15-0', 'simple-1:10-1:15-2') - assertDataflow('conditional', shell, 'if (x) { source("simple") }\ncat(N)', emptyGraph() - .defineVariable('simple-1:10-1:15-0', 'N') - .call('4', 'source', [argumentInCall('3')], { environment: defaultEnvironment(), when: 'maybe' }) - .call('10', 'cat', [argumentInCall('9')], { environment: envWithConditionalN }) - .use('0', 'x') - .use('8', 'N', { environment: envWithConditionalN }) - .use('3', unnamedArgument('3')) - .use('9', unnamedArgument('9'), { environment: envWithConditionalN }) - .argument('4', '3') - .reads('4', BuiltIn, 'maybe') - .reads('8', 'simple-1:10-1:15-0') - .reads('9', '8') - .argument('10', '9') - .reads('10', BuiltIn) - ) - - // missing sources should just be ignored - assertDataflow('missing source', shell, 'source("missing")', emptyGraph() - .call('3', 'source',[argumentInCall('2')], { environment: defaultEnvironment() }) - .use('2', unnamedArgument('2')) - .argument('3', '2') - .reads('3', BuiltIn) - ) - - const recursive2Id = (id: number) => sourcedDeterministicCountingIdGenerator('recursive2', { start: { line: 2, column: 1 }, end: { line: 2, column: 6 } }, id)() - const envWithX = defaultEnvironment().defineVariable('x', '0', '2') - - assertDataflow('recursive source', shell, sources.recursive1, emptyGraph() - .call('6', 'source', [argumentInCall('5')], { environment: envWithX }) - .call(recursive2Id(7), 'source', [argumentInCall(recursive2Id(6))], { environment: envWithX }) - .call(recursive2Id(3), 'cat', [argumentInCall(recursive2Id(2))], { environment: envWithX }) - .defineVariable('0', 'x') - .use('5', unnamedArgument('5'), { environment: envWithX }) - .use(recursive2Id(6), unnamedArgument(recursive2Id(6)), { environment: envWithX }) - .use(recursive2Id(2), unnamedArgument(recursive2Id(2)), { environment: envWithX }) - .use(recursive2Id(1), 'x', { environment: envWithX }) - .argument('6', '5') - .reads('6', BuiltIn) - .reads(recursive2Id(3), BuiltIn) - .argument(recursive2Id(3), recursive2Id(2)) - .reads(recursive2Id(2), recursive2Id(1)) - .reads(recursive2Id(1), '0') - .argument(recursive2Id(7), recursive2Id(6)) - .reads(recursive2Id(7), BuiltIn) - ) - - // we currently don't support (and ignore) source calls with non-constant arguments! - assertDataflow('non-constant source', shell, 'x <- "recursive1"\nsource(x)', emptyGraph() - .call('6', 'source', [argumentInCall('5')], { environment: envWithX }) - .defineVariable('0', 'x') - .use('5', unnamedArgument('5'), { environment: envWithX }) - .use('4', 'x', { environment: envWithX }) - .argument('6', '5') - .reads('6', BuiltIn) - .reads('5', '4') - .reads('4', '0') - ) -})) diff --git a/test/functionality/dataflow/processing-of-elements/loops/dataflow-for-loop-tests.ts b/test/functionality/dataflow/processing-of-elements/loops/dataflow-for-loop-tests.ts new file mode 100644 index 0000000000..9fc9687cd4 --- /dev/null +++ b/test/functionality/dataflow/processing-of-elements/loops/dataflow-for-loop-tests.ts @@ -0,0 +1,161 @@ +import { assertDataflow, withShell } from '../../../_helper/shell' +import { emptyGraph } from '../../../_helper/dataflow/dataflowgraph-builder' +import { argumentInCall, defaultEnv } from '../../../_helper/dataflow/environment-builder' +import { BuiltIn } from '../../../../../src/dataflow' +import { EmptyArgument, OperatorDatabase } from '../../../../../src' +import { label } from '../../../_helper/label' + +describe('for', withShell(shell => { + assertDataflow(label('Single-vector for Loop', ['for-loop', 'name-normal', 'numbers']), + shell, 'for(i in 0) i', emptyGraph() + .use('2', 'i', { controlDependencies: [] }) + .reads('2', '0') + .argument('4', '2') + .call('4', 'for', [argumentInCall('0'), argumentInCall('1'), argumentInCall('2')], { returns: [], reads: ['0', '1', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('i', '0', '4') }) + .argument('4', ['0', '1']) + .nse('4', '2') + .defineVariable('0', 'i', { definedBy: ['1'] }) + .constant('1') + ) + + describe('Potential redefinition with break', () => { + assertDataflow(label('Potential redefinition inside the same loop', ['repeat-loop', 'name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'if', 'break']), + shell, + `repeat { + x <- 2 + if(z) break + x <- 3 +} +x`, emptyGraph() + .use('5', 'z') + .use('14', 'x') + .reads('14', ['2', '9']) + .call('4', '<-', [argumentInCall('2'), argumentInCall('3')], { returns: ['2'], reads: [BuiltIn] }) + .sameRead('4', '11') + .call('6', 'break', [], { returns: [], reads: [BuiltIn], controlDependency: ['8'], environment: defaultEnv().defineVariable('x', '2', '4') }) + .call('8', 'if', [argumentInCall('5'), argumentInCall('6'), EmptyArgument], { returns: ['6'], reads: [BuiltIn, '5'], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '2', '4') }) + .call('11', '<-', [argumentInCall('9'), argumentInCall('10')], { returns: ['9'], reads: [BuiltIn], controlDependency: [], environment: defaultEnv().defineVariable('x', '2', '4') }) + .call('12', '{', [argumentInCall('4'), argumentInCall('8'), argumentInCall('11')], { returns: ['11'], reads: [BuiltIn], environment: defaultEnv().defineVariable('x', '2', '4').defineVariable('x', '9', '11', []) }) + .call('13', 'repeat', [argumentInCall('12')], { returns: [], reads: [BuiltIn] }) + .nse('13', '12') + .constant('3') + .defineVariable('2', 'x', { definedBy: ['3', '4'] }) + .sameDef('2', '9') + .constant('10') + .defineVariable('9', 'x', { definedBy: ['10', '11'], controlDependency: [] }) + ) + }) + + assertDataflow(label('Read in for Loop', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'newlines', 'for-loop']), shell, 'x <- 12\nfor(i in 1:10) x ', emptyGraph() + .use('7', 'x', { controlDependencies: [] }) + .reads('7', '0') + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .call('6', ':', [argumentInCall('4'), argumentInCall('5')], { returns: [], reads: ['4', '5', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '0', '2') }) + .call('9', 'for', [argumentInCall('3'), argumentInCall('6'), argumentInCall('7')], { returns: [], reads: ['3', '6', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '0', '2').defineVariable('i', '3', '9') }) + .nse('9', '7') + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .defineVariable('3', 'i', { definedBy: ['6'] }) + .constant('4') + .constant('5') + ) + assertDataflow(label('Read after for loop', ['for-loop', 'name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'newlines']), shell, 'for(i in 1:10) { x <- 12 }\n x', emptyGraph() + .use('11', 'x') + .reads('11', '6') + .call('3', ':', [argumentInCall('1'), argumentInCall('2')], { returns: [], reads: ['1', '2', BuiltIn], onlyBuiltIn: true }) + .call('8', '<-', [argumentInCall('6'), argumentInCall('7')], { returns: ['6'], reads: [BuiltIn], controlDependency: [] }) + .call('9', '{', [argumentInCall('8')], { returns: ['8'], reads: [BuiltIn], controlDependency: [] }) + .call('10', 'for', [argumentInCall('0'), argumentInCall('3'), argumentInCall('9')], { returns: [], reads: ['0', '3', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('i', '0', '10') }) + .nse('10', '9') + .defineVariable('0', 'i', { definedBy: ['3'] }) + .constant('1') + .constant('2') + .constant('7', { controlDependency: ['10'] }) + .defineVariable('6', 'x', { definedBy: ['7', '8'], controlDependency: [] }) + ) + + + assertDataflow(label('Read after for loop with outer def', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'newlines', 'for-loop']), shell, 'x <- 9\nfor(i in 1:10) { x <- 12 }\n x', emptyGraph() + .use('14', 'x') + .reads('14', ['0', '9']) + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .sameRead('2', '11') + .call('6', ':', [argumentInCall('4'), argumentInCall('5')], { returns: [], reads: ['4', '5', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '0', '2') }) + .call('11', '<-', [argumentInCall('9'), argumentInCall('10')], { returns: ['9'], reads: [BuiltIn], controlDependency: [] }) + .call('12', '{', [argumentInCall('11')], { returns: ['11'], reads: [BuiltIn], controlDependency: [] }) + .call('13', 'for', [argumentInCall('3'), argumentInCall('6'), argumentInCall('12')], { returns: [], reads: ['3', '6', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '0', '2', []).defineVariable('x', '9', '11', []).defineVariable('i', '3', '13') }) + .nse('13', '12') + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .sameDef('0', '9') + .defineVariable('3', 'i', { definedBy: ['6'] }) + .constant('4') + .constant('5') + .constant('10', { controlDependency: ['13'] }) + .defineVariable('9', 'x', { definedBy: ['10', '11'], controlDependency: [] }) + ) + assertDataflow(label('redefinition within loop', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'newlines', 'for-loop']), shell, 'x <- 9\nfor(i in 1:10) { x <- x }\n x', emptyGraph() + .use('10', 'x', { controlDependencies: [] }) + .reads('10', ['9', '0']) + .use('14', 'x') + .reads('14', ['0', '9']) + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .sameRead('2', '11') + .call('6', ':', [argumentInCall('4'), argumentInCall('5')], { returns: [], reads: ['4', '5', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '0', '2') }) + .call('11', '<-', [argumentInCall('9'), argumentInCall('10')], { returns: ['9'], reads: [BuiltIn], controlDependency: [] }) + .call('12', '{', [argumentInCall('11')], { returns: ['11'], reads: [BuiltIn], controlDependency: [] }) + .call('13', 'for', [argumentInCall('3'), argumentInCall('6'), argumentInCall('12')], { returns: [], reads: ['3', '6', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '0', '2', []).defineVariable('x', '9', '11', []).defineVariable('i', '3', '13') }) + .nse('13', '12') + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .sameDef('0', '9') + .defineVariable('3', 'i', { definedBy: ['6'] }) + .constant('4') + .constant('5') + .defineVariable('9', 'x', { definedBy: ['10', '11'], controlDependency: [] }) + ) + + assertDataflow(label('double redefinition within loop', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'newlines', 'for-loop', 'semicolons']), shell, 'x <- 9\nfor(i in 1:10) { x <- x; x <- x }\n x', emptyGraph() + .use('10', 'x', { controlDependencies: [] }) + .reads('10', ['12', '0']) + .use('13', 'x', { controlDependencies: ['16'] }) + .reads('13', '9') + .use('17', 'x') + .reads('17', ['0', '9', '12']) + .call('2', '<-', [argumentInCall('0'), argumentInCall('1')], { returns: ['0'], reads: [BuiltIn] }) + .sameRead('2', ['11', '14']) + .call('6', ':', [argumentInCall('4'), argumentInCall('5')], { returns: [], reads: ['4', '5', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '0', '2') }) + .call('11', '<-', [argumentInCall('9'), argumentInCall('10')], { returns: ['9'], reads: [BuiltIn], controlDependency: [] }) + .sameRead('11', '14') + .call('14', '<-', [argumentInCall('12'), argumentInCall('13')], { returns: ['12'], reads: [BuiltIn], controlDependency: [], environment: defaultEnv().defineVariable('x', '9', '11', ['16']) }) + .call('15', '{', [argumentInCall('11'), argumentInCall('14')], { returns: ['14'], reads: [BuiltIn], controlDependency: [], environment: defaultEnv().defineVariable('x', '9', '11', ['16']) }) + .call('16', 'for', [argumentInCall('3'), argumentInCall('6'), argumentInCall('15')], { returns: [], reads: ['3', '6', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('x', '0', '2', []).defineVariable('x', '9', '11', []).defineVariable('x', '12', '14', []).defineVariable('i', '3', '16') }) + .nse('16', '15') + .constant('1') + .defineVariable('0', 'x', { definedBy: ['1', '2'] }) + .sameDef('0', ['9', '12']) + .defineVariable('3', 'i', { definedBy: ['6'] }) + .constant('4') + .constant('5') + .defineVariable('9', 'x', { definedBy: ['10', '11'], controlDependency: [] }) + .sameDef('9', '12') + .defineVariable('12', 'x', { definedBy: ['13', '14'], controlDependency: [] }) + ) + + assertDataflow(label('loop-variable redefined within loop', ['name-normal', 'for-loop', 'semicolons', 'newlines', 'numbers']), shell, 'for(i in 1:10) { i; i <- 12 }\n i', emptyGraph() + .use('6', 'i', { controlDependencies: [] }) + .reads('6', '0') + .use('12', 'i') + .reads('12', ['0', '7']) + .call('3', ':', [argumentInCall('1'), argumentInCall('2')], { returns: [], reads: ['1', '2', BuiltIn], onlyBuiltIn: true }) + .call('9', '<-', [argumentInCall('7'), argumentInCall('8')], { returns: ['7'], reads: [BuiltIn], controlDependency: [] }) + .call('10', '{', [argumentInCall('6'), argumentInCall('9')], { returns: ['9'], reads: [BuiltIn], controlDependency: [] }) + .call('11', 'for', [argumentInCall('0'), argumentInCall('3'), argumentInCall('10')], { returns: [], reads: ['0', '3', BuiltIn], onlyBuiltIn: true, environment: defaultEnv().defineVariable('i', '0', '11', []).defineVariable('i', '7', '9', []) }) + .nse('11', '10') + .defineVariable('0', 'i', { definedBy: ['3'] }) + .constant('1') + .constant('2') + .constant('8', { controlDependency: ['11'] }) + .defineVariable('7', 'i', { definedBy: ['8', '9'], controlDependency: [] }) + ) +})) diff --git a/test/functionality/dataflow/processing-of-elements/loops/dataflow-while-loop-tests.ts b/test/functionality/dataflow/processing-of-elements/loops/dataflow-while-loop-tests.ts new file mode 100644 index 0000000000..ae64670bbf --- /dev/null +++ b/test/functionality/dataflow/processing-of-elements/loops/dataflow-while-loop-tests.ts @@ -0,0 +1,53 @@ +import { assertDataflow, withShell } from '../../../_helper/shell' +import { emptyGraph } from '../../../_helper/dataflow/dataflowgraph-builder' +import { argumentInCall, defaultEnv } from '../../../_helper/dataflow/environment-builder' +import { BuiltIn } from '../../../../../src/dataflow' +import { label } from '../../../_helper/label' +import { OperatorDatabase } from '../../../../../src' + +describe('While', withShell(shell => { + assertDataflow(label('simple constant while', ['while-loop', 'logical', 'numbers']), shell, 'while (TRUE) 2', emptyGraph() + .call('3', 'while', [argumentInCall('0'), argumentInCall('1')], { returns: [], reads: ['0', BuiltIn], onlyBuiltIn: true }) + .nse('3', '1') + .constant('0') + .constant('1', { controlDependency: [] }) + ) + assertDataflow(label('using variable in body', ['while-loop', 'logical', 'name-normal']), shell, 'while (TRUE) x', emptyGraph() + .use('1', 'x', { controlDependencies: [] }) + .call('3', 'while', [argumentInCall('0'), argumentInCall('1')], { returns: [], reads: ['0', BuiltIn], onlyBuiltIn: true }) + .nse('3', '1') + .constant('0') + ) + assertDataflow(label('assignment in loop body', ['while-loop', 'logical', 'name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers']), shell, 'while (TRUE) { x <- 3 }', emptyGraph() + .call('5', '<-', [argumentInCall('3'), argumentInCall('4')], { returns: ['3'], reads: [BuiltIn], controlDependency: [] }) + .call('6', '{', [argumentInCall('5')], { returns: ['5'], reads: [BuiltIn], controlDependency: [] }) + .call('7', 'while', [argumentInCall('0'), argumentInCall('6')], { returns: [], reads: ['0', BuiltIn], onlyBuiltIn: true }) + .nse('7', '6') + .constant('0') + .constant('4', { controlDependency: ['7'] }) + .defineVariable('3', 'x', { definedBy: ['4', '5'], controlDependency: [] }) + ) + assertDataflow(label('def compare in loop', ['while-loop', 'grouping', ...OperatorDatabase['<-'].capabilities, 'name-normal', 'infix-calls', 'binary-operator', ...OperatorDatabase['-'].capabilities, ...OperatorDatabase['>'].capabilities, 'precedence']), shell, 'while ((x <- x - 1) > 0) { x }', emptyGraph() + .use('3', 'x') + .use('12', 'x', { controlDependencies: [] }) + .reads('12', '2') + .call('5', '-', [argumentInCall('3'), argumentInCall('4')], { returns: [], reads: [BuiltIn, '3', '4'], onlyBuiltIn: true }) + .call('6', '<-', [argumentInCall('2'), argumentInCall('5')], { returns: ['2'], reads: [BuiltIn] }) + .call('7', '(', [argumentInCall('6')], { returns: ['6'], reads: [BuiltIn] }) + .call('9', '>', [argumentInCall('7'), argumentInCall('8')], { returns: [], reads: [BuiltIn, '7', '8'], onlyBuiltIn: true }) + .call('13', '{', [argumentInCall('12')], { returns: ['12'], reads: [BuiltIn], controlDependency: [], environment: defaultEnv().defineVariable('x', '2', '6') }) + .call('14', 'while', [argumentInCall('9'), argumentInCall('13')], { returns: [], reads: ['9', BuiltIn], onlyBuiltIn: true }) + .nse('14', '13') + .constant('4') + .defineVariable('2', 'x', { definedBy: ['5', '6'] }) + .constant('8') + ) + assertDataflow(label('Endless while loop with variables', ['while-loop', 'name-normal']), shell, 'while(x) y', emptyGraph() + .use('0', 'x') + .use('1', 'y', { controlDependencies: [] }) + .argument('3', '0') + .argument('3', '1') + .call('3', 'while', [argumentInCall('0'), argumentInCall('1')], { returns: [], reads: ['0', BuiltIn], onlyBuiltIn: true }) + .nse('3', '1') + ) +})) diff --git a/test/functionality/dataflow/processing-of-elements/loops/for-loop-tests.ts b/test/functionality/dataflow/processing-of-elements/loops/for-loop-tests.ts deleted file mode 100644 index 51ab1e601b..0000000000 --- a/test/functionality/dataflow/processing-of-elements/loops/for-loop-tests.ts +++ /dev/null @@ -1,138 +0,0 @@ -import { assertDataflow, withShell } from '../../../_helper/shell' -import { LocalScope } from '../../../../../src/dataflow/environments/scopes' -import { emptyGraph } from '../../../_helper/dataflowgraph-builder' -import { defaultEnvironment } from '../../../_helper/environment-builder' - -describe('for', withShell(shell => { - assertDataflow('Single-vector for Loop', - shell, - 'for(i in 0) i ', - emptyGraph() - .defineVariable('0', 'i') - .use('2', 'i', { when: 'maybe', environment: defaultEnvironment().defineVariable('i', '0', '4') }) - .reads('2', '0', 'maybe') - ) - - describe('Potential redefinition with break', () => { - const withXDefined = defaultEnvironment().defineVariable('x', '0', '2') - const otherXDefined = defaultEnvironment().defineVariable('x', '7', '9', LocalScope, 'maybe') - assertDataflow('Potential redefinition inside the same loop', - shell, - `repeat { - x <- 2 - if(z) break - x <- 3 -} -x`, - emptyGraph() - .defineVariable('0', 'x') - .defineVariable('7', 'x', LocalScope, { environment: withXDefined }) - .use('3', 'z', { environment: withXDefined }) - .use('12', 'x', { environment: withXDefined.appendWritesOf(otherXDefined) }) - .reads('12', '0', 'always') - .reads('12', '7', 'maybe') - .sameDef('0', '7', 'maybe') - ) - }) - - const envWithX = () => defaultEnvironment().defineVariable('x', '0', '2') - assertDataflow('Read in for Loop', - shell, - 'x <- 12\nfor(i in 1:10) x ', - emptyGraph() - .defineVariable('0', 'x') - .defineVariable('3', 'i', LocalScope, { environment: envWithX() }) - .use('7', 'x', { when: 'maybe', environment: envWithX().defineVariable('i', '3', '9') }) - .reads('7', '0', 'maybe') - ) - const envWithI = () => defaultEnvironment().defineVariable('i', '0', '8') - assertDataflow('Read after for loop', - shell, - 'for(i in 1:10) { x <- 12 }\n x', - emptyGraph() - .defineVariable('0', 'i') - .defineVariable('4', 'x', LocalScope, { when: 'maybe', environment: envWithI() }) - .use('9', 'x', { environment: envWithI().defineVariable('x', '4', '6', LocalScope, 'maybe') }) - .reads('9', '4', 'maybe') - ) - - - const envWithFirstX = () => defaultEnvironment().defineVariable('x', '0', '2') - const envInFor = () => envWithFirstX().defineVariable('i', '3', '11') - const envOutFor = () => defaultEnvironment().defineVariable('i', '3', '11').defineVariable('x', '0', '2') - const envWithSecondX = () => defaultEnvironment().defineVariable('x', '7', '9', LocalScope, 'maybe') - - assertDataflow('Read after for loop with outer def', - shell, - 'x <- 9\nfor(i in 1:10) { x <- 12 }\n x', - emptyGraph() - .defineVariable('0', 'x') - .defineVariable('3', 'i', LocalScope, { environment: envWithFirstX() }) - .defineVariable('7', 'x', LocalScope, { when: 'maybe', environment: envInFor() }) - .use('12', 'x', { environment: envOutFor().appendWritesOf(envWithSecondX()) }) - .reads('12', '0') - .reads('12', '7', 'maybe') - .sameDef('0', '7', 'maybe') - ) - assertDataflow('Redefinition within loop', - shell, - 'x <- 9\nfor(i in 1:10) { x <- x }\n x', - emptyGraph() - .defineVariable('0', 'x') - .defineVariable('3', 'i', LocalScope, { environment: envWithFirstX() }) - .defineVariable('7', 'x', LocalScope, { when: 'maybe', environment: envInFor() }) - .use('8', 'x', { when: 'maybe', environment: envInFor() }) - .use('12', 'x', { environment: envOutFor().appendWritesOf(envWithSecondX()) }) - .reads('12', '0') - .reads('12', '7', 'maybe') - .reads('8', '0', 'maybe') - .reads('8', '7', 'maybe') - .definedBy('7', '8') - .sameDef('0', '7', 'maybe') - ) - - const envInLargeFor = () => envWithFirstX().defineVariable('i', '3', '14') - const envInLargeFor2 = () => envInLargeFor().defineVariable('x', '7', '9') - const envOutLargeFor = () => envInLargeFor().defineVariable('x', '10', '12', LocalScope, 'maybe') - - assertDataflow('Redefinition within loop', - shell, - 'x <- 9\nfor(i in 1:10) { x <- x; x <- x }\n x', - emptyGraph() - .defineVariable('0', 'x') - .defineVariable('3', 'i', LocalScope, { environment: envWithFirstX() }) - .defineVariable('7', 'x', LocalScope, { when: 'maybe', environment: envInLargeFor() }) - .use('8', 'x', { when: 'maybe', environment: envInLargeFor() }) - .defineVariable('10', 'x', LocalScope, { when: 'maybe', environment: envInLargeFor2() }) - .use('11', 'x', /* this is wrong, but uncertainty is not fully supported in the impl atm.*/ { environment: envInLargeFor2() }) - .use('15', 'x',{ environment: envWithFirstX().appendWritesOf(envOutLargeFor()) }) - .reads('11', '7')// second x <- *x* always reads first *x* <- x - .reads('8', '0', 'maybe') - .reads('8', '10', 'maybe') - .reads('15', '0') - .reads('15', '10', 'maybe') - .definedBy('7', '8') - .definedBy('10', '11') - .sameDef('0', '7', 'maybe') - .sameDef('0', '10', 'maybe') - .sameDef('7', '10') // both in same loop execution - ) - - const forLoopWithI = () => defaultEnvironment().defineVariable('i', '0', '9') - const forLoopWithIAfter = () => defaultEnvironment().defineVariable('i', '0', '9', LocalScope, 'maybe') - const forLoopAfterI = () => defaultEnvironment().defineVariable('i', '5', '7', LocalScope, 'maybe') - - assertDataflow('Redefinition within loop', - shell, - 'for(i in 1:10) { i; i <- 12 }\n i', - emptyGraph() - .defineVariable('0', 'i') - .defineVariable('5', 'i', LocalScope, { when: 'maybe', environment: forLoopWithI() }) - .use('4', 'i', { when: 'maybe', environment: forLoopWithI() }) - .use('10', 'i', { environment: forLoopWithIAfter().appendWritesOf(forLoopAfterI()) }) - .reads('4', '0', 'maybe') - .reads('10', '5', 'maybe') - .reads('10', '0', 'maybe') - .sameDef('5', '0') - ) -})) diff --git a/test/functionality/dataflow/processing-of-elements/loops/while-loop-tests.ts b/test/functionality/dataflow/processing-of-elements/loops/while-loop-tests.ts deleted file mode 100644 index f8d184f3de..0000000000 --- a/test/functionality/dataflow/processing-of-elements/loops/while-loop-tests.ts +++ /dev/null @@ -1,39 +0,0 @@ -import { LocalScope } from '../../../../../src/dataflow/environments/scopes' -import { assertDataflow, withShell } from '../../../_helper/shell' -import { emptyGraph } from '../../../_helper/dataflowgraph-builder' -import { defaultEnvironment } from '../../../_helper/environment-builder' - -describe('while', withShell(shell => { - assertDataflow('simple constant while', shell, - 'while (TRUE) 2', - emptyGraph() - ) - assertDataflow('using variable in body', shell, - 'while (TRUE) x', - emptyGraph().use('1', 'x', { when: 'maybe' }) - ) - assertDataflow('assignment in loop body', shell, - 'while (TRUE) { x <- 3 }', - emptyGraph().defineVariable('1', 'x', LocalScope, { when: 'maybe' }) - ) - assertDataflow('def compare in loop', shell, 'while ((x <- x - 1) > 0) { x }', - emptyGraph() - .defineVariable('0', 'x') - .use('1', 'x') - .use('7', 'x', { when: 'maybe', environment: defaultEnvironment().defineVariable('x', '0', '4') }) - .reads('7', '0', 'maybe') - .definedBy('0', '1') - ) - assertDataflow('Endless while loop', - shell, - 'while(TRUE) 1', - emptyGraph() - ) - assertDataflow('Endless while loop with variables', - shell, - 'while(x) y', - emptyGraph() - .use('0', 'x') - .use('1', 'y', { when: 'maybe' }) - ) -})) diff --git a/test/functionality/dataflow/processing-of-elements/processing-of-elements.ts b/test/functionality/dataflow/processing-of-elements/processing-of-elements.ts index 0caac30f7e..ddb40c268f 100644 --- a/test/functionality/dataflow/processing-of-elements/processing-of-elements.ts +++ b/test/functionality/dataflow/processing-of-elements/processing-of-elements.ts @@ -2,19 +2,19 @@ import { requireAllTestsInFolder } from '../../_helper/collect-tests' import path from 'path' describe('Processing of Elements', () => { - describe('atomic', () => + describe('Atomic', () => requireAllTestsInFolder(path.join(__dirname, 'atomic')) ) - describe('expression-lists', () => + describe('Expression Lists', () => requireAllTestsInFolder(path.join(__dirname, 'expression-lists')) ) - describe('functions', () => + describe('Functions', () => requireAllTestsInFolder(path.join(__dirname, 'functions')) ) - describe('loops', () => + describe('Loops', () => requireAllTestsInFolder(path.join(__dirname, 'loops')) ) }) diff --git a/test/functionality/main.spec.ts b/test/functionality/main.spec.ts index 52f19e3510..08adbcb138 100644 --- a/test/functionality/main.spec.ts +++ b/test/functionality/main.spec.ts @@ -4,36 +4,13 @@ * @module */ -import { log, LogLevel } from '../../src/util/log' +import { LogLevel } from '../../src/util/log' import chai from 'chai' +import { setMinLevelOfAllLogs } from './_helper/log' chai.config.includeStack = true chai.config.showDiff = true chai.config.truncateThreshold = 0 -/** - * Update the minimum level of all flowr loggers (including the detached {@link serverLog}). - * @param minLevel - The new minimum level to show messages from (inclusive) - * @param log2File - Whether to log to a file as well - */ -function setMinLevelOfAllLogs(minLevel: LogLevel, log2File = false) { - if(log2File) { - log.logToFile() - } - log.updateSettings(logger => { - logger.settings.minLevel = minLevel - }) -} - export const VERBOSE_TESTS = process.argv.includes('--verbose') before(() => setMinLevelOfAllLogs(VERBOSE_TESTS ? LogLevel.Trace : LogLevel.Error, VERBOSE_TESTS)) - -/** controlled with the `--test-installation` parameter */ -export const RUN_INSTALLATION_TESTS = process.argv.includes('--test-installation') - -export function isInstallTest(test: Mocha.Context): void { - if(!RUN_INSTALLATION_TESTS) { - console.warn('skipping installation test (set RUN_INSTALLATION_TESTS to run it)') - test.skip() - } -} diff --git a/test/functionality/pipelines/create/create-tests.ts b/test/functionality/pipelines/create/create-tests.ts new file mode 100644 index 0000000000..2a845e9bcb --- /dev/null +++ b/test/functionality/pipelines/create/create-tests.ts @@ -0,0 +1,164 @@ +import { createPipeline } from '../../../../src/core/steps/pipeline' +import type { IPipelineStep, PipelineStepName } from '../../../../src/core/steps/step' +import { expect } from 'chai' +import { PARSE_WITH_R_SHELL_STEP } from '../../../../src/core/steps/all/core/00-parse' +import { allPermutations } from '../../../../src/util/arrays' +import { NORMALIZE } from '../../../../src/core/steps/all/core/10-normalize' +import { STATIC_DATAFLOW } from '../../../../src/core/steps/all/core/20-dataflow' +import { STATIC_SLICE } from '../../../../src/core/steps/all/static-slicing/00-slice' +import { NAIVE_RECONSTRUCT } from '../../../../src/core/steps/all/static-slicing/10-reconstruct' + +describe('Create Pipeline (includes dependency checks)', () => { + describe('error-cases', () => { + function negative(name: string, rawSteps: IPipelineStep[], message: string | RegExp) { + it(`${name} (all permutations)`, () => { + for(const steps of allPermutations(rawSteps)) { + expect(() => createPipeline(...steps)).to.throw(message) + } + }) + } + describe('without decorators', () => { + negative('should throw on empty input', [], /empty/) + negative('should throw on duplicate names', + [PARSE_WITH_R_SHELL_STEP, PARSE_WITH_R_SHELL_STEP], /duplicate|not unique/) + negative('should throw on invalid dependencies', + [PARSE_WITH_R_SHELL_STEP, { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + dependencies: ['foo'] + }], /invalid dependency|not exist/) + negative('should throw on cycles', + [PARSE_WITH_R_SHELL_STEP, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v1', + dependencies: ['parse-v2'] + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + dependencies: ['parse-v1'] + } + ], /cycle/) + }) + describe('with decorators', () => { + negative('should throw on decoration cycles', + [PARSE_WITH_R_SHELL_STEP, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v1', + decorates: 'parse', + dependencies: ['parse-v2'] + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + decorates: 'parse', + dependencies: ['parse-v1'] + } + ], /decoration cycle/) + negative('decorate non-existing step', + [{ + ...PARSE_WITH_R_SHELL_STEP, + decorates: 'foo' + }], /decorates.+not exist/) + }) + }) + describe('default behavior', () => { + function positive(name: string, rawSteps: IPipelineStep[], expected: PipelineStepName[], indexOfFirstPerFile: number = expected.length) { + it(`${name} (all permutations)`, () => { + for(const steps of allPermutations(rawSteps)) { + const pipeline = createPipeline(...steps) + expect([...pipeline.steps.keys()]).to.have.members(expected, `should have the correct keys for ${JSON.stringify(steps)}`) + expect(pipeline.order).to.have.ordered.members(expected, `should have the correct keys for ${JSON.stringify(steps)}`) + expect(pipeline.firstStepPerRequest).to.equal(indexOfFirstPerFile, `should have the correct firstStepPerRequest for ${JSON.stringify(steps)}`) + } + }) + } + + describe('without decorators', () => { + positive('should work on a single step', [PARSE_WITH_R_SHELL_STEP], ['parse']) + positive('should work on a single step with dependencies', [ + PARSE_WITH_R_SHELL_STEP, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + dependencies: ['parse'] + } + ], ['parse', 'parse-v2']) + // they will be shuffled in all permutations + positive('default pipeline', [ + PARSE_WITH_R_SHELL_STEP, + NORMALIZE, + STATIC_DATAFLOW, + STATIC_SLICE, + NAIVE_RECONSTRUCT + ], ['parse', 'normalize', 'dataflow', 'slice', 'reconstruct'], 3) + }) + describe('with decorators', () => { + positive('simple decorator on first step', [ + PARSE_WITH_R_SHELL_STEP, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + dependencies: [], + decorates: 'parse', + } + ], ['parse', 'parse-v2'], 2) + positive('decorators can depend on each other', [ + PARSE_WITH_R_SHELL_STEP, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + decorates: 'parse', + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v3', + dependencies: ['parse-v2'], + decorates: 'parse', + } + ], ['parse', 'parse-v2', 'parse-v3']) + positive('not the first, and multiple decorators', [ + PARSE_WITH_R_SHELL_STEP, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + dependencies: ['parse'], + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v3', + decorates: 'parse-v2', + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v4', + dependencies: ['parse-v2'] + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v6', + dependencies: ['parse-v4'] + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v5', + decorates: 'parse-v6', + } + ], ['parse', 'parse-v2', 'parse-v3', 'parse-v4', 'parse-v6', 'parse-v5']) + positive('default pipeline with dataflow decoration', [ + PARSE_WITH_R_SHELL_STEP, + NORMALIZE, + STATIC_DATAFLOW, + { + ...STATIC_DATAFLOW, + name: 'dataflow-decorator', + decorates: 'dataflow' + }, + STATIC_SLICE, + NAIVE_RECONSTRUCT + ], ['parse', 'normalize', 'dataflow', 'dataflow-decorator', 'slice', 'reconstruct'], 4) + }) + }) +}) diff --git a/test/functionality/pipelines/pipelines.spec.ts b/test/functionality/pipelines/pipelines.spec.ts new file mode 100644 index 0000000000..c929e370e2 --- /dev/null +++ b/test/functionality/pipelines/pipelines.spec.ts @@ -0,0 +1,6 @@ +import { requireAllTestsInFolder } from '../_helper/collect-tests' +import path from 'node:path' + +describe('Pipelines', () => { + describe('create', () => requireAllTestsInFolder(path.join(__dirname, 'create'))) +}) diff --git a/test/functionality/r-bridge/executor.ts b/test/functionality/r-bridge/executor.ts index d57230dc27..d26bc1b35d 100644 --- a/test/functionality/r-bridge/executor.ts +++ b/test/functionality/r-bridge/executor.ts @@ -19,11 +19,5 @@ describe('RShellExecutor', function() { const error = executor.run('a', true) assert.match(error, /Error.*'a'/g) assert.match(error, /halted/g) - - // check continuing on error - executor.continueOnError() - const ignored = executor.run('a', true) - assert.match(ignored, /Error.*'a'/g) - assert.notMatch(ignored, /halted/g) }) }) diff --git a/test/functionality/r-bridge/lang/ast/parse-access.ts b/test/functionality/r-bridge/lang/ast/parse-access.ts index f9f30eb019..d27a2ed72f 100644 --- a/test/functionality/r-bridge/lang/ast/parse-access.ts +++ b/test/functionality/r-bridge/lang/ast/parse-access.ts @@ -1,270 +1,227 @@ import { assertAst, withShell } from '../../../_helper/shell' import { exprList, numVal } from '../../../_helper/ast-builder' import { rangeFrom } from '../../../../../src/util/range' -import { RType } from '../../../../../src/r-bridge' +import { EmptyArgument, OperatorDatabase, RType } from '../../../../../src' +import { label } from '../../../_helper/label' describe('Parse value access', withShell(shell => { describe('Single bracket', () => { - assertAst('Empty', shell, 'a[]', exprList({ - type: RType.Access, - location: rangeFrom(1, 2, 1, 2), - lexeme: '[', - operator: '[', - info: {}, - accessed: { - type: RType.Symbol, - location: rangeFrom(1, 1, 1, 1), - namespace: undefined, - lexeme: 'a', - content: 'a', - info: {} - }, - access: [] - })) - assertAst('One value', shell, 'a[1]', exprList({ - type: RType.Access, - location: rangeFrom(1, 2, 1, 2), - lexeme: '[', - operator: '[', - info: {}, - accessed: { - type: RType.Symbol, - location: rangeFrom(1, 1, 1, 1), - namespace: undefined, - lexeme: 'a', - content: 'a', - info: {} - }, - access: [{ - type: RType.Argument, - location: rangeFrom(1, 3, 1, 3), - lexeme: '1', - name: undefined, + assertAst(label('Empty Access', ['name-normal', 'single-bracket-access', 'access-with-empty']), + shell, 'a[]', exprList({ + type: RType.Access, + location: rangeFrom(1, 2, 1, 2), + lexeme: '[', + operator: '[', + info: {}, + accessed: { + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + namespace: undefined, + lexeme: 'a', + content: 'a', + info: {} + }, + access: [] + }) + ) + assertAst(label('One Constant', ['name-normal', 'single-bracket-access', 'numbers']), + shell, 'a[1]', exprList({ + type: RType.Access, + location: rangeFrom(1, 2, 1, 2), + lexeme: '[', + operator: '[', info: {}, - value: { - type: RType.Number, + accessed: { + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + namespace: undefined, + lexeme: 'a', + content: 'a', + info: {} + }, + access: [{ + type: RType.Argument, location: rangeFrom(1, 3, 1, 3), lexeme: '1', - content: numVal(1), - info: {} - } - }] - })) - assertAst('One variable', shell, 'a[x]', exprList({ - type: RType.Access, - location: rangeFrom(1, 2, 1, 2), - lexeme: '[', - operator: '[', - info: {}, - accessed: { - type: RType.Symbol, - location: rangeFrom(1, 1, 1, 1), - namespace: undefined, - lexeme: 'a', - content: 'a', - info: {} - }, - access: [{ - type: RType.Argument, - location: rangeFrom(1, 3, 1, 3), - lexeme: 'x', - name: undefined, + name: undefined, + info: {}, + value: { + type: RType.Number, + location: rangeFrom(1, 3, 1, 3), + lexeme: '1', + content: numVal(1), + info: {} + } + }] + }) + ) + assertAst(label('One Variable', ['name-normal', 'single-bracket-access']), + shell, 'a[x]', exprList({ + type: RType.Access, + location: rangeFrom(1, 2, 1, 2), + lexeme: '[', + operator: '[', info: {}, - value: { + accessed: { type: RType.Symbol, - location: rangeFrom(1, 3, 1, 3), + location: rangeFrom(1, 1, 1, 1), namespace: undefined, - lexeme: 'x', - content: 'x', + lexeme: 'a', + content: 'a', info: {} - } - }] - })) - assertAst('One expression', shell, 'a[x + 3]', exprList({ - type: RType.Access, - location: rangeFrom(1, 2, 1, 2), - lexeme: '[', - operator: '[', - info: {}, - accessed: { - type: RType.Symbol, - location: rangeFrom(1, 1, 1, 1), - namespace: undefined, - lexeme: 'a', - content: 'a', - info: {} - }, - access: [{ - type: RType.Argument, - location: rangeFrom(1, 3, 1, 7), - lexeme: 'x + 3', - name: undefined, - info: {}, - value: { - type: RType.BinaryOp, - location: rangeFrom(1, 5, 1, 5), - flavor: 'arithmetic', - operator: '+', - lexeme: '+', + }, + access: [{ + type: RType.Argument, + location: rangeFrom(1, 3, 1, 3), + lexeme: 'x', + name: undefined, info: {}, - lhs: { + value: { type: RType.Symbol, location: rangeFrom(1, 3, 1, 3), namespace: undefined, lexeme: 'x', content: 'x', info: {} - }, - rhs: { + } + }] + }) + ) + assertAst(label('One Expression', ['name-normal', 'single-bracket-access', 'binary-operator', 'infix-calls', 'function-calls', 'numbers', ...OperatorDatabase['-'].capabilities]), + shell, 'a[x + 3]', exprList({ + type: RType.Access, + location: rangeFrom(1, 2, 1, 2), + lexeme: '[', + operator: '[', + info: {}, + accessed: { + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + namespace: undefined, + lexeme: 'a', + content: 'a', + info: {} + }, + access: [{ + type: RType.Argument, + location: rangeFrom(1, 3, 1, 7), + lexeme: 'x + 3', + name: undefined, + info: {}, + value: { + type: RType.BinaryOp, + location: rangeFrom(1, 5, 1, 5), + operator: '+', + lexeme: '+', + info: {}, + lhs: { + type: RType.Symbol, + location: rangeFrom(1, 3, 1, 3), + namespace: undefined, + lexeme: 'x', + content: 'x', + info: {} + }, + rhs: { + type: RType.Number, + location: rangeFrom(1, 7, 1, 7), + lexeme: '3', + content: numVal(3), + info: {} + } + } + }] + }) + ) + assertAst(label('Multiple Access', ['name-normal', 'single-bracket-access', 'numbers']), + shell, 'a[3,2]', exprList({ + type: RType.Access, + location: rangeFrom(1, 2, 1, 2), + lexeme: '[', + operator: '[', + info: {}, + accessed: { + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + namespace: undefined, + lexeme: 'a', + content: 'a', + info: {} + }, + access: [{ + type: RType.Argument, + location: rangeFrom(1, 3, 1, 3), + lexeme: '3', + name: undefined, + info: {}, + value: { type: RType.Number, - location: rangeFrom(1, 7, 1, 7), + location: rangeFrom(1, 3, 1, 3), lexeme: '3', content: numVal(3), info: {} } - } - }] - })) - assertAst('Multiple', shell, 'a[3,2]', exprList({ - type: RType.Access, - location: rangeFrom(1, 2, 1, 2), - lexeme: '[', - operator: '[', - info: {}, - accessed: { - type: RType.Symbol, - location: rangeFrom(1, 1, 1, 1), - namespace: undefined, - lexeme: 'a', - content: 'a', - info: {} - }, - access: [{ - type: RType.Argument, - location: rangeFrom(1, 3, 1, 3), - lexeme: '3', - name: undefined, - info: {}, - value: { - type: RType.Number, - location: rangeFrom(1, 3, 1, 3), - lexeme: '3', - content: numVal(3), - info: {} - } - }, { - type: RType.Argument, - location: rangeFrom(1, 5, 1, 5), - lexeme: '2', - name: undefined, - info: {}, - value: { - type: RType.Number, + }, { + type: RType.Argument, location: rangeFrom(1, 5, 1, 5), lexeme: '2', - content: numVal(2), - info: {} - } - }] - })) - assertAst('Multiple with empty', shell, 'a[,2,4]', exprList({ - type: RType.Access, - location: rangeFrom(1, 2, 1, 2), - lexeme: '[', - operator: '[', - info: {}, - accessed: { - type: RType.Symbol, - location: rangeFrom(1, 1, 1, 1), - namespace: undefined, - lexeme: 'a', - content: 'a', - info: {} - }, - access: [null, { - type: RType.Argument, - location: rangeFrom(1, 4, 1, 4), - lexeme: '2', - name: undefined, - info: {}, - value: { - type: RType.Number, - location: rangeFrom(1, 4, 1, 4), - lexeme: '2', - content: numVal(2), - info: {} - } - }, { - type: RType.Argument, - location: rangeFrom(1, 6, 1, 6), - lexeme: '4', - name: undefined, - info: {}, - value: { - type: RType.Number, - location: rangeFrom(1, 6, 1, 6), - lexeme: '4', - content: numVal(4), - info: {} - } - }] - })) - assertAst('Named argument', shell, 'a[1,super=4]', exprList({ - type: RType.Access, - location: rangeFrom(1, 2, 1, 2), - lexeme: '[', - operator: '[', - info: {}, - accessed: { - type: RType.Symbol, - location: rangeFrom(1, 1, 1, 1), - namespace: undefined, - lexeme: 'a', - content: 'a', - info: {} - }, - access: [{ - type: RType.Argument, - location: rangeFrom(1, 3, 1, 3), - lexeme: '1', - name: undefined, + name: undefined, + info: {}, + value: { + type: RType.Number, + location: rangeFrom(1, 5, 1, 5), + lexeme: '2', + content: numVal(2), + info: {} + } + }] + }) + ) + assertAst(label('Multiple with Empty', ['name-normal', 'single-bracket-access', 'numbers', 'access-with-empty']), + shell, 'a[,2,4]', exprList({ + type: RType.Access, + location: rangeFrom(1, 2, 1, 2), + lexeme: '[', + operator: '[', info: {}, - value: { - type: RType.Number, - location: rangeFrom(1, 3, 1, 3), - lexeme: '1', - content: numVal(1), - info: {} - } - }, { - type: RType.Argument, - location: rangeFrom(1, 5, 1, 9), - lexeme: 'super', - name: { + accessed: { type: RType.Symbol, - location: rangeFrom(1, 5, 1, 9), + location: rangeFrom(1, 1, 1, 1), namespace: undefined, - lexeme: 'super', - content: 'super', + lexeme: 'a', + content: 'a', info: {} }, - info: {}, - value: { - type: RType.Number, - location: rangeFrom(1, 11, 1, 11), + access: [EmptyArgument, { + type: RType.Argument, + location: rangeFrom(1, 4, 1, 4), + lexeme: '2', + name: undefined, + info: {}, + value: { + type: RType.Number, + location: rangeFrom(1, 4, 1, 4), + lexeme: '2', + content: numVal(2), + info: {} + } + }, { + type: RType.Argument, + location: rangeFrom(1, 6, 1, 6), lexeme: '4', - content: numVal(4), - info: {} - } - }] - })) - assertAst('Chained', shell, 'a[1][4]', exprList({ - type: RType.Access, - location: rangeFrom(1, 5, 1, 5), - lexeme: '[', - operator: '[', - info: {}, - accessed: { + name: undefined, + info: {}, + value: { + type: RType.Number, + location: rangeFrom(1, 6, 1, 6), + lexeme: '4', + content: numVal(4), + info: {} + } + }] + })) + assertAst(label('Named argument', ['name-normal', 'single-bracket-access', 'numbers', 'access-with-argument-names']), + shell, 'a[1,super=4]', exprList({ type: RType.Access, location: rangeFrom(1, 2, 1, 2), lexeme: '[', @@ -291,176 +248,272 @@ describe('Parse value access', withShell(shell => { content: numVal(1), info: {} } + }, { + type: RType.Argument, + location: rangeFrom(1, 5, 1, 9), + lexeme: 'super', + name: { + type: RType.Symbol, + location: rangeFrom(1, 5, 1, 9), + namespace: undefined, + lexeme: 'super', + content: 'super', + info: {} + }, + info: {}, + value: { + type: RType.Number, + location: rangeFrom(1, 11, 1, 11), + lexeme: '4', + content: numVal(4), + info: {} + } }] - }, - access: [{ - type: RType.Argument, - location: rangeFrom(1, 6, 1, 6), - lexeme: '4', - name: undefined, + }) + ) + assertAst(label('Chained', ['name-normal', 'single-bracket-access', 'numbers']), + shell, 'a[1][4]', exprList({ + type: RType.Access, + location: rangeFrom(1, 5, 1, 5), + lexeme: '[', + operator: '[', info: {}, - value: { - type: RType.Number, + accessed: { + type: RType.Access, + location: rangeFrom(1, 2, 1, 2), + lexeme: '[', + operator: '[', + info: {}, + accessed: { + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + namespace: undefined, + lexeme: 'a', + content: 'a', + info: {} + }, + access: [{ + type: RType.Argument, + location: rangeFrom(1, 3, 1, 3), + lexeme: '1', + name: undefined, + info: {}, + value: { + type: RType.Number, + location: rangeFrom(1, 3, 1, 3), + lexeme: '1', + content: numVal(1), + info: {} + } + }] + }, + access: [{ + type: RType.Argument, location: rangeFrom(1, 6, 1, 6), lexeme: '4', - content: numVal(4), - info: {} - } - }] - })) + name: undefined, + info: {}, + value: { + type: RType.Number, + location: rangeFrom(1, 6, 1, 6), + lexeme: '4', + content: numVal(4), + info: {} + } + }] + }) + ) }) describe('Double bracket', () => { - assertAst('Empty', shell, 'b[[]]', exprList({ - type: RType.Access, - location: rangeFrom(1, 2, 1, 3), - lexeme: '[[', - operator: '[[', - info: {}, - accessed: { - type: RType.Symbol, - location: rangeFrom(1, 1, 1, 1), - namespace: undefined, - lexeme: 'b', - content: 'b', - info: {} - }, - access: [] - })) - assertAst('One element', shell, 'b[[5]]', exprList({ - type: RType.Access, - location: rangeFrom(1, 2, 1, 3), - lexeme: '[[', - operator: '[[', - info: {}, - accessed: { - type: RType.Symbol, - location: rangeFrom(1, 1, 1, 1), - namespace: undefined, - lexeme: 'b', - content: 'b', - info: {} - }, - access: [{ - type: RType.Argument, - location: rangeFrom(1, 4, 1, 4), - lexeme: '5', - name: undefined, + assertAst(label('Empty', ['name-normal', 'double-bracket-access', 'access-with-empty']), + shell, 'b[[]]', exprList({ + type: RType.Access, + location: rangeFrom(1, 2, 1, 3), + lexeme: '[[', + operator: '[[', info: {}, - value: { - type: RType.Number, + accessed: { + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + namespace: undefined, + lexeme: 'b', + content: 'b', + info: {} + }, + access: [] + }) + ) + assertAst(label('One Constant', ['name-normal', 'double-bracket-access']), + shell, 'b[[5]]', exprList({ + type: RType.Access, + location: rangeFrom(1, 2, 1, 3), + lexeme: '[[', + operator: '[[', + info: {}, + accessed: { + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + namespace: undefined, + lexeme: 'b', + content: 'b', + info: {} + }, + access: [{ + type: RType.Argument, location: rangeFrom(1, 4, 1, 4), lexeme: '5', - content: numVal(5), - info: {} - } - }] - })) - assertAst('Multiple', shell, 'b[[5,3]]', exprList({ - type: RType.Access, - location: rangeFrom(1, 2, 1, 3), - lexeme: '[[', - operator: '[[', - info: {}, - accessed: { - type: RType.Symbol, - location: rangeFrom(1, 1, 1, 1), - namespace: undefined, - lexeme: 'b', - content: 'b', - info: {} - }, - access: [{ - type: RType.Argument, - location: rangeFrom(1, 4, 1, 4), - lexeme: '5', - name: undefined, + name: undefined, + info: {}, + value: { + type: RType.Number, + location: rangeFrom(1, 4, 1, 4), + lexeme: '5', + content: numVal(5), + info: {} + } + }] + }) + ) + assertAst(label('Multiple', ['name-normal', 'double-bracket-access', 'numbers']), + shell, 'b[[5,3]]', exprList({ + type: RType.Access, + location: rangeFrom(1, 2, 1, 3), + lexeme: '[[', + operator: '[[', info: {}, - value: { - type: RType.Number, + accessed: { + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + namespace: undefined, + lexeme: 'b', + content: 'b', + info: {} + }, + access: [{ + type: RType.Argument, location: rangeFrom(1, 4, 1, 4), lexeme: '5', - content: numVal(5), - info: {} - } - }, { - type: RType.Argument, - location: rangeFrom(1, 6, 1, 6), - lexeme: '3', - name: undefined, - info: {}, - value: { - type: RType.Number, + name: undefined, + info: {}, + value: { + type: RType.Number, + location: rangeFrom(1, 4, 1, 4), + lexeme: '5', + content: numVal(5), + info: {} + } + }, { + type: RType.Argument, location: rangeFrom(1, 6, 1, 6), lexeme: '3', - content: numVal(3), - info: {} - } - }] - })) - assertAst('Multiple with empty', shell, 'b[[5,,]]', exprList({ - type: RType.Access, - location: rangeFrom(1, 2, 1, 3), - lexeme: '[[', - operator: '[[', - info: {}, - accessed: { - type: RType.Symbol, - location: rangeFrom(1, 1, 1, 1), - namespace: undefined, - lexeme: 'b', - content: 'b', - info: {} - }, - access: [{ - - type: RType.Argument, - location: rangeFrom(1, 4, 1, 4), - lexeme: '5', - name: undefined, + name: undefined, + info: {}, + value: { + type: RType.Number, + location: rangeFrom(1, 6, 1, 6), + lexeme: '3', + content: numVal(3), + info: {} + } + }] + }) + ) + assertAst(label('Multiple with empty', ['name-normal', 'double-bracket-access', 'numbers', 'access-with-empty']), + shell, 'b[[5,,]]', exprList({ + type: RType.Access, + location: rangeFrom(1, 2, 1, 3), + lexeme: '[[', + operator: '[[', info: {}, - value: { - type: RType.Number, + accessed: { + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + namespace: undefined, + lexeme: 'b', + content: 'b', + info: {} + }, + access: [{ + type: RType.Argument, location: rangeFrom(1, 4, 1, 4), lexeme: '5', - content: numVal(5), - info: {} - } - },null,null] - })) + name: undefined, + info: {}, + value: { + type: RType.Number, + location: rangeFrom(1, 4, 1, 4), + lexeme: '5', + content: numVal(5), + info: {} + } + }, EmptyArgument, EmptyArgument] + }) + ) }) describe('Dollar and Slot', () => { - assertAst('Dollar access', shell, 'c$x', exprList({ - type: RType.Access, - location: rangeFrom(1, 2, 1, 2), - lexeme: '$', - operator: '$', - info: {}, - accessed: { - type: RType.Symbol, - location: rangeFrom(1, 1, 1, 1), - namespace: undefined, - lexeme: 'c', - content: 'c', - info: {} - }, - access: 'x' - })) - assertAst('Slot based access', shell, 'd@y', exprList({ - type: RType.Access, - location: rangeFrom(1, 2, 1, 2), - lexeme: '@', - operator: '@', - info: {}, - accessed: { - type: RType.Symbol, - location: rangeFrom(1, 1, 1, 1), - namespace: undefined, - lexeme: 'd', - content: 'd', - info: {} - }, - access: 'y' - })) + assertAst(label('Dollar access', ['name-normal', 'dollar-access']), + shell, 'c$x', exprList({ + type: RType.Access, + location: rangeFrom(1, 2, 1, 2), + lexeme: '$', + operator: '$', + info: {}, + accessed: { + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + namespace: undefined, + lexeme: 'c', + content: 'c', + info: {} + }, + access: [{ + type: RType.Argument, + location: rangeFrom(1, 3, 1, 3), + lexeme: 'x', + name: undefined, + info: {}, + value: { + type: RType.Symbol, + location: rangeFrom(1, 3, 1, 3), + namespace: undefined, + lexeme: 'x', + content: 'x', + info: {} + } + }] + }) + ) + assertAst(label('Slot based access', ['name-normal', 'slot-access']), + shell, 'd@y', exprList({ + type: RType.Access, + location: rangeFrom(1, 2, 1, 2), + lexeme: '@', + operator: '@', + info: {}, + accessed: { + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + namespace: undefined, + lexeme: 'd', + content: 'd', + info: {} + }, + access: [{ + type: RType.Argument, + location: rangeFrom(1, 3, 1, 3), + lexeme: 'y', + name: undefined, + info: {}, + value: { + type: RType.Symbol, + location: rangeFrom(1, 3, 1, 3), + namespace: undefined, + lexeme: 'y', + content: 'y', + info: {} + } + }] + }) + ) }) })) - diff --git a/test/functionality/r-bridge/lang/ast/parse-assignments.ts b/test/functionality/r-bridge/lang/ast/parse-assignments.ts index 1f4e5ba824..be2f101891 100644 --- a/test/functionality/r-bridge/lang/ast/parse-assignments.ts +++ b/test/functionality/r-bridge/lang/ast/parse-assignments.ts @@ -1,24 +1,22 @@ import { assertAst, withShell } from '../../../_helper/shell' import { exprList, numVal } from '../../../_helper/ast-builder' -import { RAssignmentOpPool } from '../../../_helper/provider' import { rangeFrom } from '../../../../../src/util/range' -import { RType } from '../../../../../src/r-bridge' +import { OperatorDatabase, RType } from '../../../../../src' +import { label } from '../../../_helper/label' +import { AssignmentOperators } from '../../../_helper/provider' describe('Parse simple assignments', - withShell((shell) => { + withShell(shell => { describe('Constant Assignments', () => { - for(const op of RAssignmentOpPool) { - const opOffset = op.str.length - 1 - assertAst( - 'Assign to 5', - shell, - `x ${op.str} 5`, - exprList({ + for(const op of AssignmentOperators) { + const opOffset = op.length - 1 + const data = OperatorDatabase[op] + assertAst(label(`x ${op} 5`, ['binary-operator', 'infix-calls', 'function-calls', ...data.capabilities]), + shell, `x ${op} 5`,exprList({ type: RType.BinaryOp, location: rangeFrom(1, 3, 1, 3 + opOffset), - flavor: 'assignment', - lexeme: op.str, - operator: op.str, + lexeme: op, + operator: op, info: {}, lhs: { type: RType.Symbol, @@ -42,24 +40,23 @@ describe('Parse simple assignments', // allow assignments to strings and function calls describe('Assignments to strings', () => { - assertAst( - 'Assign to given string', - shell, - '\'a\' <- 5', - exprList({ + assertAst(label('Assign to Given String', ['binary-operator', 'infix-calls', 'function-calls', ...OperatorDatabase['<-'].capabilities, 'name-quoted', 'numbers']), + shell, '\'a\' <- 5', exprList({ type: RType.BinaryOp, location: rangeFrom(1, 5, 1, 6), - flavor: 'assignment', lexeme: '<-', operator: '<-', info: {}, lhs: { - type: RType.Symbol, + type: RType.String, location: rangeFrom(1, 1, 1, 3), namespace: undefined, lexeme: "'a'", - content: 'a', - info: {} + content: { + quotes: "'", + str: 'a' + }, + info: {} }, rhs: { type: RType.Number, @@ -73,14 +70,10 @@ describe('Parse simple assignments', }) describe('Assignment with an expression list', () => { - assertAst( - 'Assign to 5', - shell, - 'x <- { 2 * 3 }', - exprList({ + assertAst(label('x <- { 2 * 3 }', [...OperatorDatabase['*'].capabilities, 'function-calls', ...OperatorDatabase['<-'].capabilities, 'name-normal', 'numbers', 'grouping']), + shell, 'x <- { 2 * 3 }', exprList({ type: RType.BinaryOp, location: rangeFrom(1, 3, 1, 4), - flavor: 'assignment', lexeme: '<-', operator: '<-', info: {}, @@ -93,26 +86,46 @@ describe('Parse simple assignments', info: {} }, rhs: { - type: RType.BinaryOp, - location: rangeFrom(1, 10, 1, 10), - flavor: 'arithmetic', - lexeme: '*', - operator: '*', + type: RType.ExpressionList, + lexeme: undefined, + location: undefined, info: {}, - lhs: { - type: RType.Number, - location: rangeFrom(1, 8, 1, 8), - lexeme: '2', - content: numVal(2), - info: {} - }, - rhs: { - type: RType.Number, - location: rangeFrom(1, 12, 1, 12), - lexeme: '3', - content: numVal(3), - info: {} - } + grouping: [{ + type: RType.Symbol, + lexeme: '{', + content: '{', + info: {}, + namespace: undefined, + location: rangeFrom(1, 6, 1, 6) + }, { + type: RType.Symbol, + lexeme: '}', + content: '}', + info: {}, + namespace: undefined, + location: rangeFrom(1, 14, 1, 14) + }], + children: [{ + type: RType.BinaryOp, + location: rangeFrom(1, 10, 1, 10), + lexeme: '*', + operator: '*', + info: {}, + lhs: { + type: RType.Number, + location: rangeFrom(1, 8, 1, 8), + lexeme: '2', + content: numVal(2), + info: {} + }, + rhs: { + type: RType.Number, + location: rangeFrom(1, 12, 1, 12), + lexeme: '3', + content: numVal(3), + info: {} + } + }] }, }), { ignoreAdditionalTokens: true diff --git a/test/functionality/r-bridge/lang/ast/parse-constructs.ts b/test/functionality/r-bridge/lang/ast/parse-constructs.ts index 5ce8c34ee4..093aed2555 100644 --- a/test/functionality/r-bridge/lang/ast/parse-constructs.ts +++ b/test/functionality/r-bridge/lang/ast/parse-constructs.ts @@ -1,114 +1,203 @@ import { assertAst, withShell } from '../../../_helper/shell' import { exprList, numVal } from '../../../_helper/ast-builder' +import type { SourceRange } from '../../../../../src/util/range' import { addRanges, rangeFrom } from '../../../../../src/util/range' -import { RType } from '../../../../../src/r-bridge' -import { ensureExpressionList } from '../../../../../src/r-bridge/lang-4.x/ast/parser/xml/internal' +import type { RExpressionList, RNode } from '../../../../../src' +import { ensureExpressionList, RType } from '../../../../../src' +import type { SupportedFlowrCapabilityId } from '../../../../../src/r-bridge/data' +import { label } from '../../../_helper/label' -const IfThenSpacingVariants = [ +interface IfThenSpacing { + str: string + locationTrue: ReturnType + then: RNode + num: number, + locationNum: ReturnType + end: ReturnType + /* yes, we could give them just once, but if we ever want to modify the list this is more flexible */ + capabilities: SupportedFlowrCapabilityId[] +} + +const IfThenSpacingVariants: IfThenSpacing[] = [ { str: 'if(TRUE)1', locationTrue: rangeFrom(1, 4, 1, 7), - locationNum: rangeFrom(1, 9, 1, 9), + then: { type: RType.Number, location: rangeFrom(1, 9, 1, 9), lexeme: '1', content: numVal(1), info: {} }, num: 1, + locationNum: rangeFrom(1, 9, 1, 9), end: rangeFrom(1, 9, 1, 9), + capabilities: ['if', 'logical', 'numbers'] }, { str: 'if(TRUE) 1', locationTrue: rangeFrom(1, 4, 1, 7), - locationNum: rangeFrom(1, 10, 1, 10), + then: { type: RType.Number, location: rangeFrom(1, 10, 1, 10), lexeme: '1', content: numVal(1), info: {} }, num: 1, + locationNum: rangeFrom(1, 10, 1, 10), end: rangeFrom(1, 10, 1, 10), + capabilities: ['if', 'logical', 'numbers'] }, { str: 'if (TRUE) 1', locationTrue: rangeFrom(1, 5, 1, 8), - locationNum: rangeFrom(1, 11, 1, 11), num: 1, + locationNum: rangeFrom(1, 11, 1, 11), + then: { type: RType.Number, location: rangeFrom(1, 11, 1, 11), lexeme: '1', content: numVal(1), info: {} }, end: rangeFrom(1, 11, 1, 11), + capabilities: ['if', 'logical', 'numbers'] }, { str: 'if (TRUE) 42', locationTrue: rangeFrom(1, 9, 1, 12), - locationNum: rangeFrom(1, 16, 1, 17), num: 42, + locationNum: rangeFrom(1, 16, 1, 17), + then: { type: RType.Number, location: rangeFrom(1, 16, 1, 17), lexeme: '42', content: numVal(42), info: {} }, end: rangeFrom(1, 17, 1, 17), + capabilities: ['if', 'logical', 'numbers'] }, { str: 'if\n(TRUE)1', locationTrue: rangeFrom(2, 2, 2, 5), - locationNum: rangeFrom(2, 7, 2, 7), num: 1, + locationNum: rangeFrom(2,7,2,7), + then: { type: RType.Number, location: rangeFrom(2, 7, 2, 7), lexeme: '1', content: numVal(1), info: {} }, end: rangeFrom(2, 7, 2, 7), + capabilities: ['if', 'logical', 'numbers'] }, { str: 'if(TRUE)\n1', locationTrue: rangeFrom(1, 4, 1, 7), - locationNum: rangeFrom(2, 1, 2, 1), num: 1, + locationNum: rangeFrom(2,1,2,1), + then: { type: RType.Number, location: rangeFrom(2, 1, 2, 1), lexeme: '1', content: numVal(1), info: {} }, end: rangeFrom(2, 1, 2, 1), + capabilities: ['if', 'logical', 'numbers'] }, { str: 'if\n(\nTRUE\n)\n1', locationTrue: rangeFrom(3, 1, 3, 4), - locationNum: rangeFrom(5, 1, 5, 1), num: 1, + locationNum: rangeFrom(5,1,5,1), + then: { type: RType.Number, location: rangeFrom(5, 1, 5, 1), lexeme: '1', content: numVal(1), info: {} }, end: rangeFrom(5, 1, 5, 1), + capabilities: ['if', 'logical', 'numbers'] }, ] -const IfThenBraceVariants = [{ +function inBrace(start: SourceRange, end: SourceRange, content: RNode): RExpressionList { + return { + type: RType.ExpressionList, + location: undefined, + lexeme: undefined, + info: {}, + grouping: [{ + type: RType.Symbol, + lexeme: '{', + content: '{', + info: {}, + namespace: undefined, + location: start + }, { + type: RType.Symbol, + lexeme: '}', + content: '}', + info: {}, + namespace: undefined, + location: end + }], + children: [content] + } +} + +const IfThenBraceVariants: IfThenSpacing[] = [{ str: 'if(TRUE){1}', locationTrue: rangeFrom(1, 4, 1, 7), - locationNum: rangeFrom(1, 10, 1, 10), num: 1, - end: rangeFrom(1, 11, 1, 11) + locationNum: rangeFrom(1,10,1,10), + then: inBrace(rangeFrom(1, 9, 1, 9), rangeFrom(1, 11, 1, 11), { type: RType.Number, location: rangeFrom(1, 10, 1, 10), lexeme: '1', content: numVal(1), info: {} }), + end: rangeFrom(1, 11, 1, 11), + capabilities: ['if', 'logical', 'numbers', 'grouping'] }, { str: 'if(TRUE){42}', locationTrue: rangeFrom(1, 4, 1, 7), locationNum: rangeFrom(1, 10, 1, 11), + then: inBrace(rangeFrom(1, 9, 1, 9), rangeFrom(1, 12, 1, 12), { type: RType.Number, location: rangeFrom(1, 10, 1, 11), lexeme: '42', content: numVal(42), info: {} }), num: 42, - end: rangeFrom(1, 12, 1, 12) + end: rangeFrom(1, 12, 1, 12), + capabilities: ['if', 'logical', 'numbers', 'grouping'] }, { str: 'if(TRUE){{{1}}}', locationTrue: rangeFrom(1, 4, 1, 7), locationNum: rangeFrom(1, 12, 1, 12), + then: inBrace(rangeFrom(1, 9, 1, 9), rangeFrom(1, 15, 1, 15), + inBrace(rangeFrom(1, 10, 1, 10), rangeFrom(1, 14, 1, 14), + inBrace(rangeFrom(1, 11, 1, 11), rangeFrom(1, 13, 1, 13), + { type: RType.Number, location: rangeFrom(1, 12, 1, 12), lexeme: '1', content: numVal(1), info: {} } + ) + ) + ), num: 1, - end: rangeFrom(1, 15, 1, 15) + end: rangeFrom(1, 15, 1, 15), + capabilities: ['if', 'logical', 'numbers', 'grouping'] }] +interface ElseSpacing { + str: string + locationElse: ReturnType + otherwise: (offset: SourceRange) => RNode, + num: number, + capabilities: SupportedFlowrCapabilityId[] +} + // suffix of if-then counterparts -const ElseSpacingVariants = [{ +const ElseSpacingVariants: ElseSpacing[] = [{ /* one space/newline around is the minimum for R */ str: ' else 2', locationElse: rangeFrom(0, 7, 0, 7), - num: 2 + num: 2, + otherwise: off => ({ type: RType.Number, location: addRanges(off, rangeFrom(0, 7, 0, 7)), lexeme: '2', content: numVal(2), info: {} }), + capabilities: ['if', 'numbers'] }, { - str: ' else 2', + str: ' else 9', locationElse: rangeFrom(0, 8, 0, 8), - num: 2 + num: 9, + otherwise: off => ({ type: RType.Number, location: addRanges(off, rangeFrom(0, 8, 0, 8)), lexeme: '9', content: numVal(9), info: {} }), + capabilities: ['if', 'numbers'] }] -const ElseBracesVariants = [{ +const ElsegroupingVariants: ElseSpacing[] = [{ str: ' else {2}', locationElse: rangeFrom(0, 8, 0, 8), - num: 2 + otherwise: off => inBrace( + addRanges(off, rangeFrom(0, 7, 0, 7)), addRanges(off, rangeFrom(0, 9, 0, 9)), + { type: RType.Number, location: addRanges(off, rangeFrom(0, 8, 0, 8)), lexeme: '2', content: numVal(2), info: {} } + ), + num: 2, + capabilities: ['if', 'numbers', 'grouping'] }, { str: ' else {{{42}}}', locationElse: rangeFrom(0, 10, 0, 11), - num: 42 + otherwise: off => inBrace(addRanges(off, rangeFrom(0, 7, 0, 7)), addRanges(off, rangeFrom(0, 14, 0, 14)), + inBrace(addRanges(off, rangeFrom(0, 8, 0, 8)), addRanges(off, rangeFrom(0, 13, 0, 13)), + inBrace(addRanges(off, rangeFrom(0, 9, 0, 9)), addRanges(off, rangeFrom(0, 12, 0, 12)), + { type: RType.Number, location: addRanges(off, rangeFrom(0, 10, 0, 11)), lexeme: '42', content: numVal(42), info: {} } + ) + ) + ), + num: 42, + capabilities: ['if', 'numbers', 'grouping'] }] describe('Parse simple constructs', withShell(shell => { describe('if', () => { describe('if-then', () => { - for(const pool of [{ name: 'braces', variants: IfThenBraceVariants }, { + for(const pool of [{ name: 'grouping', variants: IfThenBraceVariants }, { name: 'spacing', variants: IfThenSpacingVariants }]) { describe(`${pool.name} variants`, () => { for(const variant of pool.variants) { - const strNum = `${variant.num}` - assertAst(JSON.stringify(variant.str), shell, variant.str, exprList({ + assertAst(label(JSON.stringify(variant.str), variant.capabilities), shell, variant.str, exprList({ type: RType.IfThenElse, location: rangeFrom(1, 1, 1, 2), lexeme: 'if', @@ -120,13 +209,7 @@ describe('Parse simple constructs', withShell(shell => { content: true, info: {} }, - then: ensureExpressionList({ - type: RType.Number, - location: variant.locationNum, - lexeme: strNum, - content: numVal(variant.num), - info: {} - }) + then: ensureExpressionList(variant.then) }), { ignoreAdditionalTokens: true }) @@ -135,21 +218,19 @@ describe('Parse simple constructs', withShell(shell => { } }) describe('if-then-else', () => { - for(const elsePool of [{ name: 'braces', variants: ElseBracesVariants }, { + for(const elsePool of [{ name: 'grouping', variants: ElsegroupingVariants }, { name: 'spacing', variants: ElseSpacingVariants }]) { - for(const ifThenPool of [{ name: 'braces', variants: IfThenBraceVariants }, { + for(const ifThenPool of [{ name: 'grouping', variants: IfThenBraceVariants }, { name: 'spacing', variants: IfThenSpacingVariants }]) { describe(`if-then: ${ifThenPool.name}, else: ${elsePool.name}`, () => { for(const elseVariant of elsePool.variants) { for(const ifThenVariant of ifThenPool.variants) { - const thenNum = `${ifThenVariant.num}` - const elseNum = `${elseVariant.num}` const input = `${ifThenVariant.str}${elseVariant.str}` - assertAst('if-then-else', shell, input, exprList({ + assertAst(label(JSON.stringify(input), [...ifThenVariant.capabilities, ...elseVariant.capabilities]), shell, input, exprList({ type: RType.IfThenElse, location: rangeFrom(1, 1, 1, 2), lexeme: 'if', @@ -161,20 +242,8 @@ describe('Parse simple constructs', withShell(shell => { content: true, info: {} }, - then: ensureExpressionList({ - type: RType.Number, - location: ifThenVariant.locationNum, - lexeme: thenNum, - content: numVal(ifThenVariant.num), - info: {} - }), - otherwise: ensureExpressionList({ - type: RType.Number, - location: addRanges(elseVariant.locationElse, ifThenVariant.end), - lexeme: elseNum, - content: numVal(elseVariant.num), - info: {} - }) + then: ensureExpressionList(ifThenVariant.then), + otherwise: ensureExpressionList(elseVariant.otherwise(ifThenVariant.end)) }), { ignoreAdditionalTokens: true }) @@ -187,58 +256,58 @@ describe('Parse simple constructs', withShell(shell => { }) describe('loops', () => { describe('for', () => { - assertAst('for(i in 1:10) 2', shell, 'for(i in 1:42)2', exprList({ - type: RType.ForLoop, - location: rangeFrom(1, 1, 1, 3), - lexeme: 'for', - info: {}, - variable: { - type: RType.Symbol, - location: rangeFrom(1, 5, 1, 5), - namespace: undefined, - lexeme: 'i', - content: 'i', - info: {} - }, - vector: { - type: RType.BinaryOp, - flavor: 'arithmetic', - operator: ':', - location: rangeFrom(1, 11, 1, 11), - lexeme: ':', + assertAst(label('for(i in 1:10) 2', ['for-loop', 'name-normal', 'numbers', 'built-in-sequencing']), shell, 'for(i in 1:42)2', + exprList({ + type: RType.ForLoop, + location: rangeFrom(1, 1, 1, 3), + lexeme: 'for', info: {}, - lhs: { - type: RType.Number, - location: rangeFrom(1, 10, 1, 10), - lexeme: '1', - content: numVal(1), - info: {} + variable: { + type: RType.Symbol, + location: rangeFrom(1, 5, 1, 5), + namespace: undefined, + lexeme: 'i', + content: 'i', + info: {} }, - rhs: { + vector: { + type: RType.BinaryOp, + operator: ':', + location: rangeFrom(1, 11, 1, 11), + lexeme: ':', + info: {}, + lhs: { + type: RType.Number, + location: rangeFrom(1, 10, 1, 10), + lexeme: '1', + content: numVal(1), + info: {} + }, + rhs: { + type: RType.Number, + location: rangeFrom(1, 12, 1, 13), + lexeme: '42', + content: numVal(42), + info: {} + } + }, + body: ensureExpressionList({ type: RType.Number, - location: rangeFrom(1, 12, 1, 13), - lexeme: '42', - content: numVal(42), + location: rangeFrom(1, 15, 1, 15), + lexeme: '2', + content: numVal(2), info: {} - } - }, - body: ensureExpressionList({ - type: RType.Number, - location: rangeFrom(1, 15, 1, 15), - lexeme: '2', - content: numVal(2), - info: {} - }) - }), { - ignoreAdditionalTokens: true - } + }) + }), { + ignoreAdditionalTokens: true + } ) - assertAst('for-loop with comment', shell, `for(#a + assertAst(label('for-loop with comment', ['for-loop', 'name-normal', 'numbers', 'built-in-sequencing', 'comments', 'newlines']), shell, `for(#a i#b in#c 1:42#d ) - 2`, exprList({ + 2`,exprList({ type: RType.ForLoop, location: rangeFrom(1, 1, 1, 3), lexeme: 'for', @@ -253,7 +322,6 @@ describe('Parse simple constructs', withShell(shell => { }, vector: { type: RType.BinaryOp, - flavor: 'arithmetic', operator: ':', location: rangeFrom(4, 34, 4, 34), lexeme: ':', @@ -280,159 +348,197 @@ describe('Parse simple constructs', withShell(shell => { content: numVal(2), info: {} }) - }), { + }) , { ignoreAdditionalTokens: true } ) }) describe('repeat', () => { - assertAst('Single instruction repeat', shell, 'repeat 2', exprList({ - type: RType.RepeatLoop, - location: rangeFrom(1, 1, 1, 6), - lexeme: 'repeat', - info: {}, - body: ensureExpressionList({ - type: RType.Number, - location: rangeFrom(1, 8, 1, 8), - lexeme: '2', - content: numVal(2), - info: {} + assertAst(label('Single instruction repeat', ['repeat-loop', 'numbers']), + shell, 'repeat 2', exprList({ + type: RType.RepeatLoop, + location: rangeFrom(1, 1, 1, 6), + lexeme: 'repeat', + info: {}, + body: ensureExpressionList({ + type: RType.Number, + location: rangeFrom(1, 8, 1, 8), + lexeme: '2', + content: numVal(2), + info: {} + }) + }), { + ignoreAdditionalTokens: true }) - }), { - ignoreAdditionalTokens: true - }) - assertAst('Two statement repeat', shell, 'repeat { x; y }', exprList({ - type: RType.RepeatLoop, - location: rangeFrom(1, 1, 1, 6), - lexeme: 'repeat', - info: {}, - body: { - type: RType.ExpressionList, - location: rangeFrom(1, 8, 1, 15), - lexeme: '{ x; y }', + assertAst(label('Two Statement Repeat', ['repeat-loop', 'numbers', 'grouping', 'semicolons']), + shell, 'repeat { x; y }', exprList({ + type: RType.RepeatLoop, + location: rangeFrom(1, 1, 1, 6), + lexeme: 'repeat', info: {}, - children: [{ - type: RType.Symbol, - location: rangeFrom(1, 10, 1, 10), - namespace: undefined, - lexeme: 'x', - content: 'x', - info: {}, - }, { - type: RType.Symbol, - location: rangeFrom(1, 13, 1, 13), - namespace: undefined, - lexeme: 'y', - content: 'y', - info: {} - }] - } - }), { - ignoreAdditionalTokens: true - }) + body: { + type: RType.ExpressionList, + location: undefined, + lexeme: undefined, + grouping: [{ + type: RType.Symbol, + lexeme: '{', + content: '{', + info: {}, + namespace: undefined, + location: rangeFrom(1, 8, 1, 8) + }, { + type: RType.Symbol, + lexeme: '}', + content: '}', + info: {}, + namespace: undefined, + location: rangeFrom(1, 15, 1, 15) + }], + info: {}, + children: [{ + type: RType.Symbol, + location: rangeFrom(1, 10, 1, 10), + namespace: undefined, + lexeme: 'x', + content: 'x', + info: {}, + }, { + type: RType.Symbol, + location: rangeFrom(1, 13, 1, 13), + namespace: undefined, + lexeme: 'y', + content: 'y', + info: {} + }] + } + }), { + ignoreAdditionalTokens: true + }) }) describe('while', () => { - assertAst('while (TRUE) 42', shell, 'while (TRUE) 42', exprList({ - type: RType.WhileLoop, - location: rangeFrom(1, 1, 1, 5), - lexeme: 'while', - info: {}, - condition: { - type: RType.Logical, - location: rangeFrom(1, 8, 1, 11), - lexeme: 'TRUE', - content: true, - info: {} - }, - body: ensureExpressionList({ - type: RType.Number, - location: rangeFrom(1, 14, 1, 15), - lexeme: '42', - content: numVal(42), - info: {} + assertAst(label('while (TRUE) 42', ['while-loop', 'logical', 'numbers']), + shell, 'while (TRUE) 42', exprList({ + type: RType.WhileLoop, + location: rangeFrom(1, 1, 1, 5), + lexeme: 'while', + info: {}, + condition: { + type: RType.Logical, + location: rangeFrom(1, 8, 1, 11), + lexeme: 'TRUE', + content: true, + info: {} + }, + body: ensureExpressionList({ + type: RType.Number, + location: rangeFrom(1, 14, 1, 15), + lexeme: '42', + content: numVal(42), + info: {} + }) + }), { + ignoreAdditionalTokens: true }) - }), { - ignoreAdditionalTokens: true - }) - assertAst('Two statement while', shell, 'while (FALSE) { x; y }', exprList({ - type: RType.WhileLoop, - location: rangeFrom(1, 1, 1, 5), - lexeme: 'while', - info: {}, - condition: { - type: RType.Logical, - location: rangeFrom(1, 8, 1, 12), - lexeme: 'FALSE', - content: false, - info: {} - }, - body: ensureExpressionList({ - type: RType.ExpressionList, - location: rangeFrom(1, 15, 1, 22), - lexeme: '{ x; y }', - info: {}, - children: [{ - type: RType.Symbol, - location: rangeFrom(1, 17, 1, 17), - namespace: undefined, - lexeme: 'x', - content: 'x', - info: {} - }, { - type: RType.Symbol, - location: rangeFrom(1, 20, 1, 20), - namespace: undefined, - lexeme: 'y', - content: 'y', - info: {} - }] + assertAst(label('Two statement while', ['while-loop', 'logical', 'grouping', 'semicolons']), + shell, 'while (FALSE) { x; y }', exprList({ + type: RType.WhileLoop, + location: rangeFrom(1, 1, 1, 5), + lexeme: 'while', + info: {}, + condition: { + type: RType.Logical, + location: rangeFrom(1, 8, 1, 12), + lexeme: 'FALSE', + content: false, + info: {} + }, + body: { + type: RType.ExpressionList, + location: undefined, + lexeme: undefined, + grouping: [{ + type: RType.Symbol, + lexeme: '{', + content: '{', + info: {}, + namespace: undefined, + location: rangeFrom(1, 15, 1, 15) + }, { + type: RType.Symbol, + lexeme: '}', + content: '}', + info: {}, + namespace: undefined, + location: rangeFrom(1, 22, 1, 22) + }], + info: {}, + children: [{ + type: RType.Symbol, + location: rangeFrom(1, 17, 1, 17), + namespace: undefined, + lexeme: 'x', + content: 'x', + info: {} + }, { + type: RType.Symbol, + location: rangeFrom(1, 20, 1, 20), + namespace: undefined, + lexeme: 'y', + content: 'y', + info: {} + }] + } + }), { + ignoreAdditionalTokens: true }) - }), { - ignoreAdditionalTokens: true - }) }) describe('break', () => { - assertAst('while (TRUE) break', shell, 'while (TRUE) break', exprList({ - type: RType.WhileLoop, - location: rangeFrom(1, 1, 1, 5), - lexeme: 'while', - info: {}, - condition: { - type: RType.Logical, - location: rangeFrom(1, 8, 1, 11), - lexeme: 'TRUE', - content: true, - info: {} - }, - body: ensureExpressionList({ - type: RType.Break, - location: rangeFrom(1, 14, 1, 18), - lexeme: 'break', - info: {} + assertAst(label('while (TRUE) break', ['while-loop', 'logical', 'break']), + shell, 'while (TRUE) break', exprList({ + type: RType.WhileLoop, + location: rangeFrom(1, 1, 1, 5), + lexeme: 'while', + info: {}, + condition: { + type: RType.Logical, + location: rangeFrom(1, 8, 1, 11), + lexeme: 'TRUE', + content: true, + info: {} + }, + body: ensureExpressionList({ + type: RType.Break, + location: rangeFrom(1, 14, 1, 18), + lexeme: 'break', + info: {} + }) }) - })) + ) }) describe('next', () => { - assertAst('Next in while', shell, 'while (TRUE) next', exprList({ - type: RType.WhileLoop, - location: rangeFrom(1, 1, 1, 5), - lexeme: 'while', - info: {}, - condition: { - type: RType.Logical, - location: rangeFrom(1, 8, 1, 11), - lexeme: 'TRUE', - content: true, - info: {} - }, - body: ensureExpressionList({ - type: RType.Next, - location: rangeFrom(1, 14, 1, 17), - lexeme: 'next', - info: {} + assertAst(label('Next in while', ['while-loop', 'next']), + shell, 'while (TRUE) next', exprList({ + type: RType.WhileLoop, + location: rangeFrom(1, 1, 1, 5), + lexeme: 'while', + info: {}, + condition: { + type: RType.Logical, + location: rangeFrom(1, 8, 1, 11), + lexeme: 'TRUE', + content: true, + info: {} + }, + body: ensureExpressionList({ + type: RType.Next, + location: rangeFrom(1, 14, 1, 17), + lexeme: 'next', + info: {} + }) }) - })) + ) }) }) })) diff --git a/test/functionality/r-bridge/lang/ast/parse-directives.ts b/test/functionality/r-bridge/lang/ast/parse-directives.ts index 68ffc5792f..ad13d356bf 100644 --- a/test/functionality/r-bridge/lang/ast/parse-directives.ts +++ b/test/functionality/r-bridge/lang/ast/parse-directives.ts @@ -1,13 +1,12 @@ import { assertAst, withShell } from '../../../_helper/shell' import { exprList } from '../../../_helper/ast-builder' import { rangeFrom } from '../../../../../src/util/range' -import { RType } from '../../../../../src/r-bridge' +import { RType } from '../../../../../src' +import { label } from '../../../_helper/label' describe('Parse the line directive', withShell(shell => { - assertAst( - 'Simple line', - shell, - '#line 42 "foo.R"', + assertAst(label('Simple line', ['comments']), + shell, '#line 42 "foo.R"', exprList({ type: RType.LineDirective, info: {}, diff --git a/test/functionality/r-bridge/lang/ast/parse-expression-lists.ts b/test/functionality/r-bridge/lang/ast/parse-expression-lists.ts index 48f5727a0b..cda38d4ac4 100644 --- a/test/functionality/r-bridge/lang/ast/parse-expression-lists.ts +++ b/test/functionality/r-bridge/lang/ast/parse-expression-lists.ts @@ -1,123 +1,198 @@ import { assertAst, withShell } from '../../../_helper/shell' import { exprList, numVal } from '../../../_helper/ast-builder' import { rangeFrom } from '../../../../../src/util/range' -import { RawRType, RType } from '../../../../../src/r-bridge' +import { RType } from '../../../../../src' +import { label } from '../../../_helper/label' -describe('Parse expression lists', - withShell((shell) => { - describe('Expression lists with newlines and braces', () => { - // this is already covered by other tests, yet it is good to state it here explicitly (expr list is the default top-level token for R) - assertAst('"42" (single element)', shell, - '42', - exprList({ +describe('Parse expression lists', withShell(shell => { + describe('Expression lists with newlines and braces', () => { + // this is already covered by other tests, yet it is good to state it here explicitly (expr list is the default top-level token for R) + assertAst(label('single element', ['numbers']), + shell, '42', exprList({ + type: RType.Number, + location: rangeFrom(1, 1, 1, 2), + lexeme: '42', + content: numVal(42), + info: {} + }) + + ) + // the r standard does not seem to allow '\r\n' or '\n\r' + assertAst(label('two lines', ['name-normal', 'numbers', 'newlines']), + shell, '42\na', + exprList( + { type: RType.Number, location: rangeFrom(1, 1, 1, 2), lexeme: '42', content: numVal(42), info: {} - }) + }, + { + type: RType.Symbol, + location: rangeFrom(2, 1, 2, 1), + namespace: undefined, + lexeme: 'a', + content: 'a', + info: {} + } ) - // the r standard does not seem to allow '\r\n' or '\n\r' - const twoLine = '42\na' - assertAst(`${JSON.stringify(twoLine)} (two lines)`, shell, - twoLine, - exprList( - { - type: RType.Number, - location: rangeFrom(1, 1, 1, 2), - lexeme: '42', - content: numVal(42), - info: {} - }, - { - type: RType.Symbol, - location: rangeFrom(2, 1, 2, 1), - namespace: undefined, - lexeme: 'a', - content: 'a', - info: {} - } - ) + ) + + assertAst(label('three lines', ['name-normal', 'numbers', 'newlines']), + shell, 'a\nb\nc', + exprList( + { + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + lexeme: 'a', + content: 'a', + namespace: undefined, + info: {} + }, + { + type: RType.Symbol, + location: rangeFrom(2, 1, 2, 1), + namespace: undefined, + lexeme: 'b', + content: 'b', + info: {} + }, + { + type: RType.Symbol, + location: rangeFrom(3, 1, 3, 1), + lexeme: 'c', + content: 'c', + namespace: undefined, + info: {} + }, ) + ) - const manyLines = 'a\nb\nc\nd\nn2\nz\n' - assertAst(`${JSON.stringify(manyLines)} (many lines)`, shell, - manyLines, - exprList( + assertAst(label('many lines', ['name-normal', 'numbers', 'newlines']), + shell, 'a\nb\nc\nd\nn2\nz\n', + exprList( + { + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + namespace: undefined, + lexeme: 'a', + content: 'a', + info: {} + }, + { + type: RType.Symbol, + location: rangeFrom(2, 1, 2, 1), + namespace: undefined, + lexeme: 'b', + content: 'b', + info: {} + }, + { + type: RType.Symbol, + location: rangeFrom(3, 1, 3, 1), + namespace: undefined, + lexeme: 'c', + content: 'c', + info: {} + }, + { + type: RType.Symbol, + location: rangeFrom(4, 1, 4, 1), + namespace: undefined, + lexeme: 'd', + content: 'd', + info: {} + }, + { + type: RType.Symbol, + location: rangeFrom(5, 1, 5, 2), + namespace: undefined, + lexeme: 'n2', + content: 'n2', + info: {} + }, + { + type: RType.Symbol, + location: rangeFrom(6, 1, 6, 1), + namespace: undefined, + lexeme: 'z', + content: 'z', + info: {} + } + ) + ) + + assertAst(label('Two Lines With Braces', ['name-normal', 'numbers', 'grouping', 'newlines']), + shell, '{ 42\na }', exprList({ + type: RType.ExpressionList, + location: undefined, + grouping: [ { type: RType.Symbol, location: rangeFrom(1, 1, 1, 1), - namespace: undefined, - lexeme: 'a', - content: 'a', - info: {} + lexeme: '{', + content: '{', + info: {}, + namespace: undefined }, { type: RType.Symbol, - location: rangeFrom(2, 1, 2, 1), - namespace: undefined, - lexeme: 'b', - content: 'b', - info: {} - }, - { - type: RType.Symbol, - location: rangeFrom(3, 1, 3, 1), - namespace: undefined, - lexeme: 'c', - content: 'c', - info: {} - }, + location: rangeFrom(2, 3, 2, 3), + lexeme: '}', + content: '}', + info: {}, + namespace: undefined + } + ], + lexeme: undefined, + info: { }, + children: [ { - type: RType.Symbol, - location: rangeFrom(4, 1, 4, 1), - namespace: undefined, - lexeme: 'd', - content: 'd', - info: {} + type: RType.Number, + location: rangeFrom(1, 3, 1, 4), + lexeme: '42', + content: numVal(42), + info: {} }, { type: RType.Symbol, - location: rangeFrom(5, 1, 5, 2), + location: rangeFrom(2, 1, 2, 1), namespace: undefined, - lexeme: 'n2', - content: 'n2', + lexeme: 'a', + content: 'a', info: {} }, - { - type: RType.Symbol, - location: rangeFrom(6, 1, 6, 1), - namespace: undefined, - lexeme: 'z', - content: 'z', - info: {} - } - ) - ) + ], + }) + ) - const twoLineWithBraces = '{ 42\na }' - assertAst(`${JSON.stringify(twoLineWithBraces)} (two lines with braces)`, shell, - twoLineWithBraces, - exprList({ + // { 42\na }{ x } seems to be illegal for R... + assertAst(label('Multiple Braces', ['name-normal', 'numbers', 'grouping', 'newlines']), + shell, '{ 42\na }\n{ x }', exprList( + { type: RType.ExpressionList, - location: rangeFrom(1, 1, 2, 3), - lexeme: '{ 42\na }', - info: { - additionalTokens: [ - { - type: RType.Delimiter, - subtype: RawRType.BraceLeft, - location: rangeFrom(1, 1, 1, 1), - lexeme: '{' - }, - { - type: RType.Delimiter, - subtype: RawRType.BraceRight, - location: rangeFrom(2, 3, 2, 3), - lexeme: '}' - } - ] - }, + location: undefined, + grouping: [ + { + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + lexeme: '{', + content: '{', + info: {}, + namespace: undefined + }, + { + type: RType.Symbol, + location: rangeFrom(2, 3, 2, 3), + lexeme: '}', + content: '}', + info: {}, + namespace: undefined + } + ], + lexeme: undefined, + info: { }, children: [ { type: RType.Number, @@ -135,193 +210,141 @@ describe('Parse expression lists', info: {} }, ], - }) - ) - - // { 42\na }{ x } seems to be illegal for R... - const multipleBraces = '{ 42\na }\n{ x }' - assertAst(`${JSON.stringify(multipleBraces)} (multiple braces)`, shell, - multipleBraces, - exprList( - { - type: RType.ExpressionList, - location: rangeFrom(1, 1, 2, 3), - lexeme: '{ 42\na }', - info: { - additionalTokens: [ - { - type: RType.Delimiter, - subtype: RawRType.BraceLeft, - location: rangeFrom(1, 1, 1, 1), - lexeme: '{' - }, - { - type: RType.Delimiter, - subtype: RawRType.BraceRight, - location: rangeFrom(2, 3, 2, 3), - lexeme: '}' - } - ] + }, + { + type: RType.ExpressionList, + location: undefined, + info: {}, + lexeme: undefined, + grouping: [ + { + type: RType.Symbol, + location: rangeFrom(3, 1, 3, 1), + lexeme: '{', + content: '{', + info: {}, + namespace: undefined }, - children: [ - { - type: RType.Number, - location: rangeFrom(1, 3, 1, 4), - lexeme: '42', - content: numVal(42), - info: {} - }, - { - type: RType.Symbol, - location: rangeFrom(2, 1, 2, 1), - namespace: undefined, - lexeme: 'a', - content: 'a', - info: {} - }, - ], - }, - { + { + type: RType.Symbol, + location: rangeFrom(3, 5, 3, 5), + lexeme: '}', + content: '}', + info: {}, + namespace: undefined + }], + children: [{ type: RType.Symbol, location: rangeFrom(3, 3, 3, 3), namespace: undefined, lexeme: 'x', content: 'x', - info: { - additionalTokens: [ - { - type: RType.Delimiter, - subtype: RawRType.BraceLeft, - location: rangeFrom(3, 1, 3, 1), - lexeme: '{' - }, - { - type: RType.Delimiter, - subtype: RawRType.BraceRight, - location: rangeFrom(3, 5, 3, 5), - lexeme: '}' - } - ] - } - } - ) + info: {} + }] + } ) - }) + ) + }) - describe('Expression lists with semicolons', () => { - assertAst('"42;a" (two elements in same line)', shell, - '42;a', - { - type: RType.ExpressionList, - lexeme: undefined, - info: { - additionalTokens: [ - { - type: RType.Delimiter, - subtype: RawRType.Semicolon, - location: rangeFrom(1, 3, 1, 3), - lexeme: ';' - } - ] + describe('Expression lists with semicolons', () => { + assertAst(label('Two Elements in Same Line', ['numbers', 'name-normal', 'semicolons']), + shell, '42;a', + { + type: RType.ExpressionList, + lexeme: undefined, + grouping: undefined, + info: { }, + children: [ + { + type: RType.Number, + location: rangeFrom(1, 1, 1, 2), + lexeme: '42', + content: numVal(42), + info: {} }, - children: [ - { - type: RType.Number, - location: rangeFrom(1, 1, 1, 2), - lexeme: '42', - content: numVal(42), - info: {} - }, - { - type: RType.Symbol, - location: rangeFrom(1, 4, 1, 4), - namespace: undefined, - lexeme: 'a', - content: 'a', - info: {} - } - ] - } - - ) + { + type: RType.Symbol, + location: rangeFrom(1, 4, 1, 4), + namespace: undefined, + lexeme: 'a', + content: 'a', + info: {} + } + ] + } + ) - assertAst('"{ 3; }" (empty)', shell, - '{ 3; }', - exprList({ + assertAst(label('Empty split with semicolon', ['numbers', 'semicolons', 'grouping']), + shell, '{ 3; }', exprList({ + type: RType.ExpressionList, + lexeme: undefined, + info: {}, + location: undefined, + grouping: [ + { + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + lexeme: '{', + content: '{', + info: {}, + namespace: undefined + }, + { + type: RType.Symbol, + location: rangeFrom(1, 6, 1, 6), + lexeme: '}', + content: '}', + info: {}, + namespace: undefined + } + ], + children: [{ type: RType.Number, location: rangeFrom(1, 3, 1, 3), lexeme: '3', content: numVal(3), - info: { - additionalTokens: [ - { - type: RType.Delimiter, - subtype: RawRType.Semicolon, - location: rangeFrom(1, 4, 1, 4), - lexeme: ';' - }, - { - type: RType.Delimiter, - subtype: RawRType.BraceLeft, - location: rangeFrom(1, 1, 1, 1), - lexeme: '{' - }, - { - type: RType.Delimiter, - subtype: RawRType.BraceRight, - location: rangeFrom(1, 6, 1, 6), - lexeme: '}' - } - ] - } - }) - ) + info: {} + }] + }) + ) - assertAst('Inconsistent split with semicolon', shell, - '1\n2; 3\n4', - { - type: RType.ExpressionList, - lexeme: undefined, - info: { - additionalTokens: [ - { - type: RType.Delimiter, - subtype: RawRType.Semicolon, - location: rangeFrom(2, 2, 2, 2), - lexeme: ';' - } - ] - }, - children: [ - { - type: RType.Number, - location: rangeFrom(1, 1, 1, 1), - lexeme: '1', - content: numVal(1), - info: {} - }, { - type: RType.Number, - location: rangeFrom(2, 1, 2, 1), - lexeme: '2', - content: numVal(2), - info: {} - }, { - type: RType.Number, - location: rangeFrom(2, 4, 2, 4), - lexeme: '3', - content: numVal(3), - info: {} - }, { - type: RType.Number, - location: rangeFrom(3, 1, 3, 1), - lexeme: '4', - content: numVal(4), - info: {} - } - ] - } - ) - }) + assertAst(label('Inconsistent split with semicolon', ['numbers', 'semicolons', 'newlines']), + shell, '1\n2; 3\n4', + { + type: RType.ExpressionList, + lexeme: undefined, + grouping: undefined, + info: { }, + children: [ + { + type: RType.Number, + location: rangeFrom(1, 1, 1, 1), + lexeme: '1', + content: numVal(1), + info: {} + }, { + type: RType.Number, + location: rangeFrom(2, 1, 2, 1), + lexeme: '2', + content: numVal(2), + info: {} + }, { + type: RType.Number, + location: rangeFrom(2, 4, 2, 4), + lexeme: '3', + content: numVal(3), + info: {} + }, { + type: RType.Number, + location: rangeFrom(3, 1, 3, 1), + lexeme: '4', + content: numVal(4), + info: {} + } + ] + } + ) }) +}) ) diff --git a/test/functionality/r-bridge/lang/ast/parse-function-call.ts b/test/functionality/r-bridge/lang/ast/parse-function-call.ts index 313f1db760..d52533ae2d 100644 --- a/test/functionality/r-bridge/lang/ast/parse-function-call.ts +++ b/test/functionality/r-bridge/lang/ast/parse-function-call.ts @@ -1,16 +1,13 @@ import { assertAst, withShell } from '../../../_helper/shell' import { exprList, numVal } from '../../../_helper/ast-builder' import { rangeFrom } from '../../../../../src/util/range' -import { RType } from '../../../../../src/r-bridge' -import { ensureExpressionList } from '../../../../../src/r-bridge/lang-4.x/ast/parser/xml/internal' +import { EmptyArgument, RType } from '../../../../../src' +import { label } from '../../../_helper/label' -describe('Parse function calls', withShell((shell) => { +describe('Parse function calls', withShell(shell => { describe('functions without arguments', () => { - assertAst( - 'f()', - shell, - 'f()', - exprList({ + assertAst(label('f()', ['call-normal', 'name-normal']), + shell, 'f()', exprList({ type: RType.FunctionCall, flavor: 'named', location: rangeFrom(1, 1, 1, 1), @@ -29,11 +26,8 @@ describe('Parse function calls', withShell((shell) => { ) }) describe('functions with arguments', () => { - assertAst( - 'f(1, 2)', - shell, - 'f(1, 2)', - exprList({ + assertAst(label('f(1, 2)', ['name-normal', 'call-normal', 'unnamed-arguments', 'numbers']), + shell, 'f(1, 2)', exprList({ type: RType.FunctionCall, flavor: 'named', location: rangeFrom(1, 1, 1, 1), @@ -78,13 +72,42 @@ describe('Parse function calls', withShell((shell) => { ], }) ) + assertAst(label('f(1,)', ['name-normal', 'call-normal', 'unnamed-arguments', 'numbers', 'empty-arguments']), + shell, 'f(1,)', exprList({ + type: RType.FunctionCall, + flavor: 'named', + location: rangeFrom(1, 1, 1, 1), + lexeme: 'f', + info: {}, + functionName: { + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + lexeme: 'f', + content: 'f', + namespace: undefined, + info: {} + }, + arguments: [ + { + type: RType.Argument, + location: rangeFrom(1, 3, 1, 3), + name: undefined, + info: {}, + lexeme: '1', + value: { + type: RType.Number, + location: rangeFrom(1, 3, 1, 3), + lexeme: '1', + content: numVal(1), + info: {} + } + }, EmptyArgument], + }) + ) }) describe('functions with named arguments', () => { - assertAst( - 'f(1, x=2, 4, y=3)', - shell, - 'f(1, x=2, 4, y=3)', - exprList({ + assertAst(label('f(1, x=2, 4, y=3)', ['name-normal', 'call-normal', 'unnamed-arguments', 'named-arguments', 'numbers']), + shell, 'f(1, x=2, 4, y=3)', exprList({ type: RType.FunctionCall, flavor: 'named', location: rangeFrom(1, 1, 1, 1), @@ -169,11 +192,8 @@ describe('Parse function calls', withShell((shell) => { ], }) ) - const code = 'f("a"=3,\'x\'=2)' - assertAst( - `string arguments - ${code}`, - shell, - code, + assertAst(label('string arguments', ['name-normal', 'call-normal', 'string-arguments', 'strings']), + shell,'f("a"=3,\'x\'=2)', exprList({ type: RType.FunctionCall, flavor: 'named', @@ -232,49 +252,87 @@ describe('Parse function calls', withShell((shell) => { } } ], - }) - ) + })) }) describe('directly called functions', () => { - assertAst( - 'Directly call with 2', - shell, - '(function(x) { x })(2)', - exprList({ + assertAst(label('Directly call with 2', ['call-anonymous', 'formals-named', 'numbers', 'name-normal', 'normal-definition', 'grouping']), + shell, '(function(x) { x })(2)', exprList({ type: RType.FunctionCall, flavor: 'unnamed', location: rangeFrom(1, 1, 1, 19), lexeme: '(function(x) { x })', info: {}, calledFunction: { - type: RType.FunctionDefinition, - location: rangeFrom(1, 2, 1, 9), - lexeme: 'function', - parameters: [{ - type: RType.Parameter, - location: rangeFrom(1, 11, 1, 11), - special: false, - lexeme: 'x', - defaultValue: undefined, - name: { - type: RType.Symbol, - location: rangeFrom(1, 11, 1, 11), - lexeme: 'x', - content: 'x', - namespace: undefined, - info: {} - }, - info: {}, - }], - body: ensureExpressionList({ + type: RType.ExpressionList, + location: undefined, + lexeme: undefined, + info: {}, + grouping: [{ type: RType.Symbol, - location: rangeFrom(1, 16, 1, 16), - lexeme: 'x', - content: 'x', - namespace: undefined, - info: {} - }), - info: {} + location: rangeFrom(1, 1, 1, 1), + lexeme: '(', + content: '(', + info: {}, + namespace: undefined + }, { + type: RType.Symbol, + location: rangeFrom(1, 19, 1, 19), + lexeme: ')', + content: ')', + info: {}, + namespace: undefined + }], + children: [{ + type: RType.FunctionDefinition, + location: rangeFrom(1, 2, 1, 9), + lexeme: 'function', + parameters: [{ + type: RType.Parameter, + location: rangeFrom(1, 11, 1, 11), + special: false, + lexeme: 'x', + defaultValue: undefined, + name: { + type: RType.Symbol, + location: rangeFrom(1, 11, 1, 11), + lexeme: 'x', + content: 'x', + namespace: undefined, + info: {} + }, + info: {}, + }], + body: { + type: RType.ExpressionList, + location: undefined, + lexeme: undefined, + info: {}, + grouping: [{ + type: RType.Symbol, + location: rangeFrom(1, 14, 1, 14), + lexeme: '{', + content: '{', + info: {}, + namespace: undefined + }, { + type: RType.Symbol, + location: rangeFrom(1, 18, 1, 18), + lexeme: '}', + content: '}', + info: {}, + namespace: undefined + }], + children: [{ + type: RType.Symbol, + location: rangeFrom(1, 16, 1, 16), + lexeme: 'x', + content: 'x', + namespace: undefined, + info: {} + }] + }, + info: {} + }] }, arguments: [ { @@ -296,11 +354,8 @@ describe('Parse function calls', withShell((shell) => { ignoreAdditionalTokens: true } ) - assertAst( - 'Double call with only the second one being direct', - shell, - 'a(1)(2)', - exprList({ + assertAst(label('Double call with only the second one being direct', ['call-anonymous', 'numbers', 'name-normal', 'normal-definition']), + shell, 'a(1)(2)', exprList({ type: RType.FunctionCall, flavor: 'unnamed', location: rangeFrom(1, 1, 1, 4), @@ -355,10 +410,8 @@ describe('Parse function calls', withShell((shell) => { ) }) describe('functions with explicit namespacing', () => { - assertAst( - 'x::f()', - shell, - 'x::f()', + assertAst(label('x::f()', ['name-normal', 'call-normal', 'accessing-exported-names']), + shell, 'x::f()', exprList({ type: RType.FunctionCall, flavor: 'named', @@ -378,10 +431,8 @@ describe('Parse function calls', withShell((shell) => { ) }) describe('functions which are called as string', () => { - assertAst( - "'f'()", - shell, - "'f'()", + assertAst(label("'f'()", ['name-quoted', 'call-normal']), + shell, "'f'()", exprList({ type: RType.FunctionCall, flavor: 'named', @@ -400,31 +451,24 @@ describe('Parse function calls', withShell((shell) => { }) ) }) - describe('Reserved wrong functions', () => { - assertAst( - 'next()', - shell, - 'next()', - exprList({ + describe('Next and break as functions', () => { + assertAst(label('next()', ['name-normal', 'call-normal', 'next']), + shell, 'next()', exprList({ type: RType.Next, location: rangeFrom(1, 1, 1, 4), lexeme: 'next', info: {} - - }) - ) - assertAst( - 'break()', - shell, - 'break()', - exprList({ - type: RType.Break, - location: rangeFrom(1, 1, 1, 5), - lexeme: 'break', - info: {} - }) ) }) + assertAst(label('break()', ['name-normal', 'call-normal', 'break']), + shell, 'break()', exprList({ + type: RType.Break, + location: rangeFrom(1, 1, 1, 5), + lexeme: 'break', + info: {} + + }) + ) }) ) diff --git a/test/functionality/r-bridge/lang/ast/parse-function-definitions.ts b/test/functionality/r-bridge/lang/ast/parse-function-definitions.ts index 5c7fd1e700..c2d078a2e2 100644 --- a/test/functionality/r-bridge/lang/ast/parse-function-definitions.ts +++ b/test/functionality/r-bridge/lang/ast/parse-function-definitions.ts @@ -1,14 +1,13 @@ import { assertAst, withShell } from '../../../_helper/shell' import { exprList, numVal, parameter } from '../../../_helper/ast-builder' import { rangeFrom } from '../../../../../src/util/range' -import { RType } from '../../../../../src/r-bridge' -import { ensureExpressionList } from '../../../../../src/r-bridge/lang-4.x/ast/parser/xml/internal' +import { OperatorDatabase, RType } from '../../../../../src' +import { label } from '../../../_helper/label' -describe('Parse function definitions', withShell((shell) => { +describe('Parse function definitions', withShell(shell => { describe('without parameters', () => { - const noop = 'function() { }' - assertAst(`noop - ${noop}`, shell, noop, - exprList({ + assertAst(label('Noop', ['normal-definition', 'grouping']), + shell, 'function() { }', exprList({ type: RType.FunctionDefinition, location: rangeFrom(1, 1, 1, 8), lexeme: 'function', @@ -16,8 +15,23 @@ describe('Parse function definitions', withShell((shell) => { info: {}, body: { type: RType.ExpressionList, - location: rangeFrom(1, 12, 1, 14), - lexeme: '{ }', + grouping: [{ + type: RType.Symbol, + location: rangeFrom(1, 12, 1, 12), + lexeme: '{', + content: '{', + info: {}, + namespace: undefined + }, { + type: RType.Symbol, + location: rangeFrom(1, 14, 1, 14), + lexeme: '}', + content: '}', + info: {}, + namespace: undefined + }], + location: undefined, + lexeme: undefined, children: [], info: {} } @@ -25,61 +39,78 @@ describe('Parse function definitions', withShell((shell) => { ignoreAdditionalTokens: true } ) - const noArgs = 'function() { x + 2 * 3 }' - assertAst(`noArgs - ${noArgs}`, shell, noArgs, - exprList({ + assertAst(label('No Args', ['normal-definition', 'name-normal', 'numbers', 'grouping', ...OperatorDatabase['+'].capabilities, ...OperatorDatabase['*'].capabilities]), + shell, 'function() { x + 2 * 3 }',exprList({ type: RType.FunctionDefinition, location: rangeFrom(1, 1, 1, 8), lexeme: 'function', parameters: [], info: {}, - body: ensureExpressionList({ - type: RType.BinaryOp, - location: rangeFrom(1, 16, 1, 16), - flavor: 'arithmetic', - lexeme: '+', - operator: '+', + body: { + type: RType.ExpressionList, + location: undefined, + lexeme: undefined, info: {}, - lhs: { + grouping: [{ type: RType.Symbol, - location: rangeFrom(1, 14, 1, 14), - lexeme: 'x', - content: 'x', - namespace: undefined, - info: {} - }, - rhs: { + location: rangeFrom(1, 12, 1, 12), + lexeme: '{', + content: '{', + info: {}, + namespace: undefined + }, { + type: RType.Symbol, + location: rangeFrom(1, 24, 1, 24), + lexeme: '}', + content: '}', + info: {}, + namespace: undefined + }], + children: [{ type: RType.BinaryOp, - location: rangeFrom(1, 20, 1, 20), - flavor: 'arithmetic', - lexeme: '*', - operator: '*', + location: rangeFrom(1, 16, 1, 16), + lexeme: '+', + operator: '+', info: {}, lhs: { - type: RType.Number, - location: rangeFrom(1, 18, 1, 18), - lexeme: '2', - content: numVal(2), - info: {} + type: RType.Symbol, + location: rangeFrom(1, 14, 1, 14), + lexeme: 'x', + content: 'x', + namespace: undefined, + info: {} }, rhs: { - type: RType.Number, - location: rangeFrom(1, 22, 1, 22), - lexeme: '3', - content: numVal(3), - info: {} + type: RType.BinaryOp, + location: rangeFrom(1, 20, 1, 20), + lexeme: '*', + operator: '*', + info: {}, + lhs: { + type: RType.Number, + location: rangeFrom(1, 18, 1, 18), + lexeme: '2', + content: numVal(2), + info: {} + }, + rhs: { + type: RType.Number, + location: rangeFrom(1, 22, 1, 22), + lexeme: '3', + content: numVal(3), + info: {} + } } - } - }) + }] + } }), { ignoreAdditionalTokens: true } ) }) describe('with unnamed parameters', () => { - const oneParameter = 'function(x) { }' - assertAst(`one parameter - ${oneParameter}`, shell, oneParameter, - exprList({ + assertAst(label('One parameter', ['normal-definition', 'formals-named', 'grouping']), + shell, 'function(x) { }', exprList({ type: RType.FunctionDefinition, location: rangeFrom(1, 1, 1, 8), lexeme: 'function', @@ -87,8 +118,23 @@ describe('Parse function definitions', withShell((shell) => { info: {}, body: { type: RType.ExpressionList, - location: rangeFrom(1, 13, 1, 15), - lexeme: '{ }', + grouping: [{ + type: RType.Symbol, + location: rangeFrom(1, 13, 1, 13), + lexeme: '{', + content: '{', + info: {}, + namespace: undefined + }, { + type: RType.Symbol, + location: rangeFrom(1, 15, 1, 15), + lexeme: '}', + content: '}', + info: {}, + namespace: undefined + }], + location: undefined, + lexeme: undefined, children: [], info: {} } @@ -96,9 +142,8 @@ describe('Parse function definitions', withShell((shell) => { ignoreAdditionalTokens: true } ) - const multipleParameters = 'function(a,the,b) { b }' - assertAst(`multiple parameters - ${multipleParameters}`, shell, multipleParameters, - exprList({ + assertAst(label('Multiple parameters', ['normal-definition', 'name-normal', 'formals-named', 'grouping']), + shell, 'function(a,the,b) { b }', exprList({ type: RType.FunctionDefinition, location: rangeFrom(1, 1, 1, 8), lexeme: 'function', @@ -108,43 +153,77 @@ describe('Parse function definitions', withShell((shell) => { parameter('b', rangeFrom(1, 16, 1, 16)) ], info: {}, - body: ensureExpressionList({ - type: RType.Symbol, - location: rangeFrom(1, 21, 1, 21), - lexeme: 'b', - content: 'b', - namespace: undefined, - info: {} - }) + body: { + type: RType.ExpressionList, + location: undefined, + lexeme: undefined, + info: {}, + grouping: [{ + type: RType.Symbol, + location: rangeFrom(1, 19, 1, 19), + lexeme: '{', + content: '{', + info: {}, + namespace: undefined + }, { + type: RType.Symbol, + location: rangeFrom(1, 23, 1, 23), + lexeme: '}', + content: '}', + info: {}, + namespace: undefined + }], + children: [{ + type: RType.Symbol, + location: rangeFrom(1, 21, 1, 21), + lexeme: 'b', + content: 'b', + namespace: undefined, + info: {} + }] + } }), { ignoreAdditionalTokens: true } ) }) - describe('with special parameters (...)', () => { - const asSingleParameter = 'function(...) { }' - assertAst(`as single arg - ${asSingleParameter}`, shell, asSingleParameter, - exprList({ + describe('With Special Parameters (...)', () => { + assertAst(label('As Single Argument', ['normal-definition', 'formals-dot-dot-dot', 'grouping']), + shell, 'function(...) { }', exprList({ type: RType.FunctionDefinition, location: rangeFrom(1, 1, 1, 8), lexeme: 'function', parameters: [parameter('...', rangeFrom(1, 10, 1, 12), undefined, true)], info: {}, - body: ensureExpressionList({ + body: { type: RType.ExpressionList, - location: rangeFrom(1, 15, 1, 17), - lexeme: '{ }', + grouping: [{ + type: RType.Symbol, + location: rangeFrom(1, 15, 1, 15), + lexeme: '{', + content: '{', + info: {}, + namespace: undefined + }, { + type: RType.Symbol, + location: rangeFrom(1, 17, 1, 17), + lexeme: '}', + content: '}', + info: {}, + namespace: undefined + }], + location: undefined, + lexeme: undefined, children: [], info: {} - }) + } }), { ignoreAdditionalTokens: true } ) - const asFirstParameters = 'function(..., a) { }' - assertAst(`as first arg - ${asFirstParameters}`, shell, asFirstParameters, - exprList({ + assertAst(label('As first arg', ['normal-definition', 'formals-dot-dot-dot', 'grouping', 'formals-named']), + shell, 'function(..., a) { }', exprList({ type: RType.FunctionDefinition, location: rangeFrom(1, 1, 1, 8), lexeme: 'function', @@ -155,8 +234,23 @@ describe('Parse function definitions', withShell((shell) => { info: {}, body: { type: RType.ExpressionList, - location: rangeFrom(1, 18, 1, 20), - lexeme: '{ }', + location: undefined, + grouping: [{ + type: RType.Symbol, + location: rangeFrom(1, 18, 1, 18), + lexeme: '{', + content: '{', + info: {}, + namespace: undefined + }, { + type: RType.Symbol, + location: rangeFrom(1, 20, 1, 20), + lexeme: '}', + content: '}', + info: {}, + namespace: undefined + }], + lexeme: undefined, children: [], info: {} } @@ -165,9 +259,8 @@ describe('Parse function definitions', withShell((shell) => { } ) - const asLastParameter = 'function(a, the, ...) { ... }' - assertAst(`as last arg - ${asLastParameter}`, shell, asLastParameter, - exprList({ + assertAst(label('As last arg', ['normal-definition', 'formals-dot-dot-dot', 'grouping', 'formals-named', 'name-normal']), + shell, 'function(a, the, ...) { ... }', exprList({ type: RType.FunctionDefinition, location: rangeFrom(1, 1, 1, 8), lexeme: 'function', @@ -177,23 +270,43 @@ describe('Parse function definitions', withShell((shell) => { parameter('...', rangeFrom(1, 18, 1, 20), undefined, true) ], info: {}, - body: ensureExpressionList({ - type: RType.Symbol, - location: rangeFrom(1, 25, 1, 27), - lexeme: '...', - content: '...', - namespace: undefined, - info: {} - }) + body: { + type: RType.ExpressionList, + location: undefined, + lexeme: undefined, + info: {}, + grouping: [{ + type: RType.Symbol, + location: rangeFrom(1, 23, 1, 23), + lexeme: '{', + content: '{', + info: {}, + namespace: undefined + }, { + type: RType.Symbol, + location: rangeFrom(1, 29, 1, 29), + lexeme: '}', + content: '}', + info: {}, + namespace: undefined + }], + children: [{ + type: RType.Symbol, + location: rangeFrom(1, 25, 1, 27), + lexeme: '...', + content: '...', + namespace: undefined, + info: {} + }] + } }), { ignoreAdditionalTokens: true } ) }) - describe('with named parameters', () => { - const oneParameter = 'function(x=3) { }' - assertAst(`one parameter - ${oneParameter}`, shell, oneParameter, - exprList({ + describe('With Named Parameters', () => { + assertAst(label('One Parameter', ['normal-definition', 'formals-named', 'formals-default', 'grouping', 'name-normal', 'numbers']), + shell, 'function(x=3) { }', exprList({ type: RType.FunctionDefinition, location: rangeFrom(1, 1, 1, 8), lexeme: 'function', @@ -209,8 +322,23 @@ describe('Parse function definitions', withShell((shell) => { info: {}, body: { type: RType.ExpressionList, - location: rangeFrom(1, 15, 1, 17), - lexeme: '{ }', + grouping: [{ + type: RType.Symbol, + location: rangeFrom(1, 15, 1, 15), + lexeme: '{', + content: '{', + info: {}, + namespace: undefined + }, { + type: RType.Symbol, + location: rangeFrom(1, 17, 1, 17), + lexeme: '}', + content: '}', + info: {}, + namespace: undefined + }], + location: undefined, + lexeme: undefined, children: [], info: {} } @@ -219,9 +347,8 @@ describe('Parse function definitions', withShell((shell) => { } ) - const multipleParameters = 'function(a, x=3, huhu="hehe") { x }' - assertAst(`multiple parameter - ${multipleParameters}`, shell, multipleParameters, - exprList({ + assertAst(label('Multiple Parameter', ['normal-definition', 'formals-named', 'formals-default', 'grouping', 'name-normal', 'numbers', 'name-normal', 'strings']), + shell, 'function(a, x=3, huhu="hehe") { x }', exprList({ type: RType.FunctionDefinition, location: rangeFrom(1, 1, 1, 8), lexeme: 'function', @@ -243,14 +370,35 @@ describe('Parse function definitions', withShell((shell) => { }) ], info: {}, - body: ensureExpressionList({ - type: RType.Symbol, - location: rangeFrom(1, 33, 1, 33), - lexeme: 'x', - content: 'x', - namespace: undefined, - info: {} - }) + body: { + type: RType.ExpressionList, + lexeme: undefined, + location: undefined, + info: {}, + grouping: [{ + type: RType.Symbol, + location: rangeFrom(1, 31, 1, 31), + lexeme: '{', + content: '{', + info: {}, + namespace: undefined + }, { + type: RType.Symbol, + location: rangeFrom(1, 35, 1, 35), + lexeme: '}', + content: '}', + info: {}, + namespace: undefined + }], + children: [{ + type: RType.Symbol, + location: rangeFrom(1, 33, 1, 33), + lexeme: 'x', + content: 'x', + namespace: undefined, + info: {} + }] + } }), { ignoreAdditionalTokens: true } diff --git a/test/functionality/r-bridge/lang/ast/parse-hooks.ts b/test/functionality/r-bridge/lang/ast/parse-hooks.ts deleted file mode 100644 index 3221912e60..0000000000 --- a/test/functionality/r-bridge/lang/ast/parse-hooks.ts +++ /dev/null @@ -1,45 +0,0 @@ -import { retrieveNormalizedAst, withShell } from '../../../_helper/shell' -import { assert } from 'chai' -import { requestFromInput } from '../../../../../src/r-bridge' -import { SteppingSlicer } from '../../../../../src/core' - -describe('Check hooks are called appropriately', withShell(shell => { - it('Call the number hook!', async() => { - let before = false - let after = false - await retrieveNormalizedAst(shell, '1', { - values: { - onNumber: { - before: () => { - before = true; return undefined - }, - after: () => { - after = true; return undefined - } - }, - }, - }) - assert.isTrue(before, 'The number before-hook was not called!') - assert.isTrue(after, 'The number after-hook was not called!') - }) - it('Call the string hook!', async() => { - let counter = 0 - - await new SteppingSlicer({ - stepOfInterest: 'normalize', - shell, - request: requestFromInput('x <- "foo"'), - hooks: { - values: { - onString: { - after: () => { - counter++ - }, - } - } - } - }).allRemainingSteps() - - assert.equal(counter, 1, 'The string after-hook should be called once') - }) -})) diff --git a/test/functionality/r-bridge/lang/ast/parse-operations.ts b/test/functionality/r-bridge/lang/ast/parse-operations.ts index aa2ad946b5..a8855f5d56 100644 --- a/test/functionality/r-bridge/lang/ast/parse-operations.ts +++ b/test/functionality/r-bridge/lang/ast/parse-operations.ts @@ -1,231 +1,168 @@ import { assertAst, withShell } from '../../../_helper/shell' import { exprList, numVal } from '../../../_helper/ast-builder' -import { - RArithmeticBinaryOpPool, - RLogicalBinaryOpPool, - RUnaryOpPool, -} from '../../../_helper/provider' -import { type RShell, RType, ComparisonOperators } from '../../../../../src/r-bridge' +import { AssignmentOperators, BinaryOperatorPool, UnaryOperatorPool } from '../../../_helper/provider' +import { OperatorDatabase, type RShell, RType } from '../../../../../src' import { rangeFrom } from '../../../../../src/util/range' +import { label } from '../../../_helper/label' +import { startAndEndsWith } from '../../../../../src/util/strings' -describe('Parse simple operations', - withShell((shell) => { - describe('unary operations', () => { - for(const opSuite of RUnaryOpPool) { - describe(`${opSuite.label} operations`, () => { - for(const op of opSuite.pool) { - const simpleInput = `${op.str}42` - const opOffset = op.str.length - 1 - assertAst( - `${simpleInput}`, - shell, - simpleInput, - exprList({ - type: RType.UnaryOp, - operator: op.str, - flavor: op.flavor, - lexeme: op.str, - location: rangeFrom(1, 1, 1, 1 + opOffset), - info: {}, - operand: { - type: RType.Number, - location: rangeFrom(1, 2 + opOffset, 1, 3 + opOffset), - lexeme: '42', - content: numVal(42), - info: {} - }, - }) - ) - } - }) - } - }) - describe('? question', () => { - assertAst( - '? x', - shell, - '? x', - exprList({ +describe('Parse simple operations', withShell(shell => { + describe('unary operations', () => { + for(const op of UnaryOperatorPool) { + const simpleInput = `${op}42` + const opOffset = op.length - 1 + const opData = OperatorDatabase[op] + assertAst(label(`${simpleInput}`, ['unary-operator', 'numbers', ...opData.capabilities]), + shell, simpleInput, exprList({ type: RType.UnaryOp, - location: rangeFrom(1, 1, 1, 1), - operator: '?', - lexeme: '?', - flavor: 'logical', + operator: op, + lexeme: op, + location: rangeFrom(1, 1, 1, 1 + opOffset), info: {}, operand: { - type: RType.Symbol, - location: rangeFrom(1, 3, 1, 3), - lexeme: 'x', - content: 'x', - namespace: undefined, - info: {} - } + type: RType.Number, + location: rangeFrom(1, 2 + opOffset, 1, 3 + opOffset), + lexeme: '42', + content: numVal(42), + info: {} + }, }) ) - }) - - describe('binary operations', () => { - for(const opSuite of [ - { label: 'arithmetic', pool: RArithmeticBinaryOpPool }, - { - label: 'logical', - pool: RLogicalBinaryOpPool, - }, - ]) { - describe(`${opSuite.label} operations`, () => { - for(const op of opSuite.pool) { - describePrecedenceTestsForOp(op, shell) - } - }) - } - describe('comparison operations', () => { - for(const op of ComparisonOperators) { - describe(op, () => { - const simpleInput = `1 ${op} 1` - const opOffset = op.length - 1 - assertAst( - simpleInput, - shell, - simpleInput, - exprList({ - type: RType.BinaryOp, - operator: op, - lexeme: op, - flavor: 'comparison', - location: rangeFrom(1, 3, 1, 3 + opOffset), - info: {}, - lhs: { - type: RType.Number, - location: rangeFrom(1, 1, 1, 1), - lexeme: '1', - content: numVal(1), - info: {} - }, - rhs: { - type: RType.Number, - location: rangeFrom(1, 5 + opOffset, 1, 5 + opOffset), - lexeme: '1', - content: numVal(1), - info: {} - }, - }) - ) - }) + } + }) + describe('? question', () => { + assertAst(label('? x', ['unary-operator', 'built-in-help', 'name-normal']), + shell, '? x', exprList({ + type: RType.UnaryOp, + location: rangeFrom(1, 1, 1, 1), + operator: '?', + lexeme: '?', + info: {}, + operand: { + type: RType.Symbol, + location: rangeFrom(1, 3, 1, 3), + lexeme: 'x', + content: 'x', + namespace: undefined, + info: {} } }) + ) + }) - describe('intermixed with comments', () => { - assertAst( - '1 + # comment\n2', - shell, - '1 + # comment\n2', - exprList({ // hoist children - type: RType.ExpressionList, - location: rangeFrom(1, 1, 2, 1), - info: {}, - lexeme: '1 + # comment\n2', - children: [ - { - type: RType.Comment, - content: ' comment', - lexeme: '# comment', - location: rangeFrom(1, 5, 1, 13), - info: {} + describe('Binary Operations', () => { + for(const op of [...BinaryOperatorPool].filter(op => !startAndEndsWith(op, '%'))) { + describePrecedenceTestsForOp(op, shell) + } + + describe('Intermixed with comments', () => { + assertAst(label('1 + # comment\n2', ['binary-operator', 'infix-calls', 'function-calls', 'numbers', 'comments', 'newlines', ...OperatorDatabase['+'].capabilities]), + shell, '1 + # comment\n2', exprList({ // hoist children + type: RType.ExpressionList, + location: rangeFrom(1, 1, 2, 1), + grouping: undefined, + info: {}, + lexeme: '1 + # comment\n2', + children: [ + { + type: RType.Comment, + content: ' comment', + lexeme: '# comment', + location: rangeFrom(1, 5, 1, 13), + info: {} + }, + { + type: RType.BinaryOp, + info: {}, + lexeme: '+', + operator: '+', + location: rangeFrom(1, 3, 1, 3), + lhs: { + type: RType.Number, + content: numVal(1), + info: {}, + lexeme: '1', + location: rangeFrom(1, 1, 1, 1) }, - { - type: RType.BinaryOp, - flavor: 'arithmetic', + rhs: { + type: RType.Number, + content: numVal(2), info: {}, - lexeme: '+', - operator: '+', - location: rangeFrom(1, 3, 1, 3), - lhs: { - type: RType.Number, - content: numVal(1), - info: {}, - lexeme: '1', - location: rangeFrom(1, 1, 1, 1) - }, - rhs: { - type: RType.Number, - content: numVal(2), - info: {}, - lexeme: '2', - location: rangeFrom(2, 1, 2, 1) - } + lexeme: '2', + location: rangeFrom(2, 1, 2, 1) } - ] - }) - ) - }) - describe('Using unknown special infix operator', () => { - assertAst( - '1 %xx% 2', - shell, - '1 %xx% 2', - exprList( + } + ] + }), { + ignoreAdditionalTokens: false + } + ) + }) + describe('Using unknown special infix operator', () => { + assertAst(label('1 %xx% 2', ['binary-operator', 'infix-calls', 'function-calls', 'numbers', 'special-operator']), + shell, '1 %xx% 2', exprList({ + type: RType.FunctionCall, + flavor: 'named', + infixSpecial: true, + info: {}, + lexeme: '1 %xx% 2', + functionName: { + type: RType.Symbol, + lexeme: '%xx%', + content: '%xx%', + namespace: undefined, + location: rangeFrom(1, 3, 1, 6), + info: {} + }, + location: rangeFrom(1, 3, 1, 6), + arguments: [ { - type: RType.FunctionCall, - flavor: 'named', - infixSpecial: true, - info: {}, - lexeme: '1 %xx% 2', - functionName: { - type: RType.Symbol, - lexeme: '%xx%', - content: '%xx%', - namespace: undefined, - location: rangeFrom(1, 3, 1, 6), - info: {} - }, - location: rangeFrom(1, 3, 1, 6), - arguments: [ - { - type: RType.Argument, - info: {}, - lexeme: '1', - name: undefined, - location: rangeFrom(1, 1, 1, 1), - value: { - type: RType.Number, - content: numVal(1), - info: {}, - lexeme: '1', - location: rangeFrom(1, 1, 1, 1) - } - }, { - type: RType.Argument, - info: {}, - lexeme: '2', - name: undefined, - location: rangeFrom(1, 8, 1, 8), - value: { - type: RType.Number, - content: numVal(2), - info: {}, - lexeme: '2', - location: rangeFrom(1, 8, 1, 8) - } - } - ] + type: RType.Argument, + info: {}, + lexeme: '1', + name: undefined, + location: rangeFrom(1, 1, 1, 1), + value: { + type: RType.Number, + content: numVal(1), + info: {}, + lexeme: '1', + location: rangeFrom(1, 1, 1, 1) + } + }, { + type: RType.Argument, + info: {}, + lexeme: '2', + name: undefined, + location: rangeFrom(1, 8, 1, 8), + value: { + type: RType.Number, + content: numVal(2), + info: {}, + lexeme: '2', + location: rangeFrom(1, 8, 1, 8) + } } - ) - ) - }) + ] + }) + ) }) }) +}) ) -function describePrecedenceTestsForOp(op: typeof RArithmeticBinaryOpPool[number] | typeof RLogicalBinaryOpPool[number], shell: RShell): void { - describe(`${op.str} (${op.flavor})`, () => { - const simpleInput = `1 ${op.str} 1` - const opOffset = op.str.length - 1 - assertAst(simpleInput, shell, simpleInput, exprList( - { +function describePrecedenceTestsForOp(op: string, shell: RShell): void { + const comparisonPrecedenceOperators = new Set(['<', '<=', '>', '>=', '==', '!=', '', '==']) + + describe(`${op}`, () => { + const simpleInput = `1 ${op} 1` + const opOffset = op.length - 1 + const opData = OperatorDatabase[op] + assertAst(label(simpleInput, ['binary-operator', 'infix-calls', 'function-calls', 'numbers', ...opData.capabilities]), + shell, simpleInput, exprList({ type: RType.BinaryOp, - operator: op.str, - lexeme: op.str, - flavor: op.flavor, + operator: op, + lexeme: op, location: rangeFrom(1, 3, 1, 3 + opOffset), info: {}, lhs: { @@ -243,103 +180,256 @@ function describePrecedenceTestsForOp(op: typeof RArithmeticBinaryOpPool[number] info: {} } } - )) + )) - // offsets encode additional shifts by parenthesis - const precedenceTests = [ - { input: `(1 ${op.str} 1) ${op.str} 42`, offsetL: 1, offsetC: 2, offsetR: 2 }, - { input: `(1 ${op.str} 1) ${op.str} (42)`, offsetL: 1, offsetC: 2, offsetR: 3 } - ] - // exponentiation has a different behavior when nested without braces - if(op.str !== '^' && op.str !== '**') { - precedenceTests.push({ input: `1 ${op.str} 1 ${op.str} 42`, offsetL: 0, offsetC: 0, offsetR: 0 }) - } + if(!comparisonPrecedenceOperators.has(op)) { + let [offsetL, offsetC, offsetR] = [1, 2, 2] - for(const defaultPrecedence of precedenceTests) { - assertAst(defaultPrecedence.input, shell, defaultPrecedence.input, exprList( - { + assertAst(label('Single Parenthesis', ['binary-operator', 'infix-calls', 'function-calls', 'numbers', 'grouping', ...opData.capabilities]), + shell, `(1 ${op} 1) ${op} 42`, exprList({ type: RType.BinaryOp, - operator: op.str, - lexeme: op.str, - flavor: op.flavor, - location: rangeFrom(1, 7 + opOffset + defaultPrecedence.offsetC, 1, 7 + 2 * opOffset + defaultPrecedence.offsetC), + operator: op, + lexeme: op, + location: rangeFrom(1, 7 + opOffset + offsetC, 1, 7 + 2 * opOffset + offsetC), info: {}, lhs: { - type: RType.BinaryOp, - operator: op.str, - lexeme: op.str, - flavor: op.flavor, - location: rangeFrom(1, 3 + defaultPrecedence.offsetL, 1, 3 + opOffset + defaultPrecedence.offsetL), + type: RType.ExpressionList, + location: undefined, + grouping: [{ + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + lexeme: '(', + content: '(', + info: {}, + namespace: undefined + }, { + type: RType.Symbol, + location: rangeFrom(1, 6 + opOffset + offsetL, 1, 6 + opOffset + offsetL), + lexeme: ')', + content: ')', + info: {}, + namespace: undefined + }], + lexeme: undefined, info: {}, - lhs: { - type: RType.Number, - location: rangeFrom(1, 1 + defaultPrecedence.offsetL, 1, 1 + defaultPrecedence.offsetL), - lexeme: '1', - content: numVal(1), - info: {} - }, - rhs: { - type: RType.Number, - location: rangeFrom(1, 5 + opOffset + defaultPrecedence.offsetL, 1, 5 + opOffset + defaultPrecedence.offsetL), - lexeme: '1', - content: numVal(1), - info: {} - } + children: [{ + type: RType.BinaryOp, + operator: op, + lexeme: op, + location: rangeFrom(1, 3 + offsetL, 1, 3 + opOffset + offsetL), + info: {}, + lhs: { + type: RType.Number, + location: rangeFrom(1, 1 + offsetL, 1, 1 + offsetL), + lexeme: '1', + content: numVal(1), + info: {} + }, + rhs: { + type: RType.Number, + location: rangeFrom(1, 5 + opOffset + offsetL, 1, 5 + opOffset + offsetL), + lexeme: '1', + content: numVal(1), + info: {} + } + }] }, rhs: { type: RType.Number, - location: rangeFrom(1, 9 + 2 * opOffset + defaultPrecedence.offsetR, 1, 10 + 2 * opOffset + defaultPrecedence.offsetR), + location: rangeFrom(1, 9 + 2 * opOffset + offsetR, 1, 10 + 2 * opOffset + offsetR), lexeme: '42', content: numVal(42), info: {} } - } - ), { - ignoreAdditionalTokens: true - }) - } + }), { + ignoreAdditionalTokens: true + }); - const invertedPrecedenceInput = `1 ${op.str} (1 ${op.str} 42)` - assertAst(invertedPrecedenceInput, shell, invertedPrecedenceInput, exprList( - { - type: RType.BinaryOp, - operator: op.str, - lexeme: op.str, - flavor: op.flavor, - location: rangeFrom(1, 3, 1, 3 + opOffset), - info: {}, - lhs: { - type: RType.Number, - location: rangeFrom(1, 1, 1, 1), - content: numVal(1), - lexeme: '1', - info: {} - }, - rhs: { + ([offsetL, offsetC, offsetR] = [1, 2, 3]) + assertAst(label('Multiple Parenthesis', ['binary-operator', 'infix-calls', 'function-calls', 'numbers', 'grouping', ...opData.capabilities]), + shell, `(1 ${op} 1) ${op} (42)`, exprList({ type: RType.BinaryOp, - operator: op.str, - lexeme: op.str, - flavor: op.flavor, - location: rangeFrom(1, 8 + opOffset, 1, 8 + 2 * opOffset), + operator: op, + lexeme: op, + location: rangeFrom(1, 7 + opOffset + offsetC, 1, 7 + 2 * opOffset + offsetC), + info: {}, + lhs: { + type: RType.ExpressionList, + location: undefined, + grouping: [{ + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + lexeme: '(', + content: '(', + info: {}, + namespace: undefined + }, { + type: RType.Symbol, + location: rangeFrom(1, 6 + opOffset + offsetL, 1, 6 + opOffset + offsetL), + lexeme: ')', + content: ')', + info: {}, + namespace: undefined + }], + lexeme: undefined, + info: {}, + children: [{ + type: RType.BinaryOp, + operator: op, + lexeme: op, + location: rangeFrom(1, 3 + offsetL, 1, 3 + opOffset + offsetL), + info: {}, + lhs: { + type: RType.Number, + location: rangeFrom(1, 1 + offsetL, 1, 1 + offsetL), + lexeme: '1', + content: numVal(1), + info: {} + }, + rhs: { + type: RType.Number, + location: rangeFrom(1, 5 + opOffset + offsetL, 1, 5 + opOffset + offsetL), + lexeme: '1', + content: numVal(1), + info: {} + } + }] + }, + rhs: { + type: RType.ExpressionList, + location: undefined, + grouping: [{ + type: RType.Symbol, + location: rangeFrom(1, 8 + 2 * opOffset + offsetR, 1, 8 + 2 * opOffset + offsetR), + lexeme: '(', + content: '(', + info: {}, + namespace: undefined + }, { + type: RType.Symbol, + location: rangeFrom(1, 11 + 2 * opOffset + offsetR, 1, 11 + 2 * opOffset + offsetR), + lexeme: ')', + content: ')', + info: {}, + namespace: undefined + }], + lexeme: undefined, + info: {}, + children: [{ + type: RType.Number, + location: rangeFrom(1, 9 + 2 * opOffset + offsetR, 1, 10 + 2 * opOffset + offsetR), + lexeme: '42', + content: numVal(42), + info: {} + }] + } + }), { + ignoreAdditionalTokens: true + }) + + // exponentiation and assignments has a different behavior when nested without parenthesis + if(op !== '^' && op !== '**' && !AssignmentOperators.includes(op)) { + [offsetL, offsetC, offsetR] = [0, 0, 0] + + assertAst(label('No Parenthesis', ['binary-operator', 'infix-calls', 'function-calls', 'numbers', 'grouping', ...opData.capabilities]), + shell, `1 ${op} 1 ${op} 42`, exprList({ + type: RType.BinaryOp, + operator: op, + lexeme: op, + location: rangeFrom(1, 7 + opOffset + offsetC, 1, 7 + 2 * opOffset + offsetC), + info: {}, + lhs: { + type: RType.BinaryOp, + operator: op, + lexeme: op, + location: rangeFrom(1, 3 + offsetL, 1, 3 + opOffset + offsetL), + info: {}, + lhs: { + type: RType.Number, + location: rangeFrom(1, 1 + offsetL, 1, 1 + offsetL), + lexeme: '1', + content: numVal(1), + info: {} + }, + rhs: { + type: RType.Number, + location: rangeFrom(1, 5 + opOffset + offsetL, 1, 5 + opOffset + offsetL), + lexeme: '1', + content: numVal(1), + info: {} + } + }, + rhs: { + type: RType.Number, + location: rangeFrom(1, 9 + 2 * opOffset + offsetR, 1, 10 + 2 * opOffset + offsetR), + lexeme: '42', + content: numVal(42), + info: {} + } + }), { + ignoreAdditionalTokens: true + }) + } + + assertAst(label('Invert precedence', ['binary-operator', 'infix-calls', 'function-calls', 'numbers', 'grouping', ...opData.capabilities]), + shell, `1 ${op} (1 ${op} 42)`, exprList({ + type: RType.BinaryOp, + operator: op, + lexeme: op, + location: rangeFrom(1, 3, 1, 3 + opOffset), info: {}, lhs: { type: RType.Number, - location: rangeFrom(1, 6 + opOffset, 1, 6 + opOffset), + location: rangeFrom(1, 1, 1, 1), content: numVal(1), lexeme: '1', info: {} }, rhs: { - type: RType.Number, - location: rangeFrom(1, 10 + 2 * opOffset, 1, 11 + 2 * opOffset), - content: numVal(42), - lexeme: '42', - info: {} + type: RType.ExpressionList, + location: undefined, + lexeme: undefined, + info: {}, + grouping: [{ + type: RType.Symbol, + location: rangeFrom(1, 5 + opOffset, 1, 5 + opOffset), + lexeme: '(', + content: '(', + info: {}, + namespace: undefined + }, { + type: RType.Symbol, + location: rangeFrom(1, 12 + 2*opOffset, 1, 12 + 2*opOffset), + lexeme: ')', + content: ')', + info: {}, + namespace: undefined + }], + children: [{ + type: RType.BinaryOp, + operator: op, + lexeme: op, + location: rangeFrom(1, 8 + opOffset, 1, 8 + 2 * opOffset), + info: {}, + lhs: { + type: RType.Number, + location: rangeFrom(1, 6 + opOffset, 1, 6 + opOffset), + content: numVal(1), + lexeme: '1', + info: {} + }, + rhs: { + type: RType.Number, + location: rangeFrom(1, 10 + 2 * opOffset, 1, 11 + 2 * opOffset), + content: numVal(42), + lexeme: '42', + info: {} + } + }] } - } - } - ), { - ignoreAdditionalTokens: true - }) + }), { + ignoreAdditionalTokens: true + }) + } }) } diff --git a/test/functionality/r-bridge/lang/ast/parse-pipes.ts b/test/functionality/r-bridge/lang/ast/parse-pipes.ts index 6fa6bbd62f..0f5f5314bd 100644 --- a/test/functionality/r-bridge/lang/ast/parse-pipes.ts +++ b/test/functionality/r-bridge/lang/ast/parse-pipes.ts @@ -1,15 +1,13 @@ import { assertAst, withShell } from '../../../_helper/shell' import { exprList } from '../../../_helper/ast-builder' import { rangeFrom } from '../../../../../src/util/range' -import { RType } from '../../../../../src/r-bridge' +import { RType } from '../../../../../src' import { MIN_VERSION_PIPE } from '../../../../../src/r-bridge/lang-4.x/ast/model/versions' +import { label } from '../../../_helper/label' describe('Parse Pipes', withShell(shell => { - assertAst( - 'x |> f()', - shell, - 'x |> f()', - exprList({ + assertAst(label('x |> f()', ['name-normal', 'built-in-pipe-and-pipe-bind', 'call-normal']), + shell, 'x |> f()', exprList({ type: RType.Pipe, location: rangeFrom(1, 3, 1, 4), lexeme: '|>', @@ -48,11 +46,8 @@ describe('Parse Pipes', withShell(shell => { }), { minRVersion: MIN_VERSION_PIPE } ) - assertAst( - 'x |> f() |> g()', - shell, - 'x |> f() |> g()', - exprList({ + assertAst(label('x |> f() |> g()', ['name-normal', 'built-in-pipe-and-pipe-bind', 'call-normal']), + shell, 'x |> f() |> g()', exprList({ type: RType.Pipe, location: rangeFrom(1, 10, 1, 11), lexeme: '|>', diff --git a/test/functionality/r-bridge/lang/ast/parse-snippets.ts b/test/functionality/r-bridge/lang/ast/parse-snippets.ts index 3a2546b7ff..4b6a429c81 100644 --- a/test/functionality/r-bridge/lang/ast/parse-snippets.ts +++ b/test/functionality/r-bridge/lang/ast/parse-snippets.ts @@ -1,15 +1,15 @@ import { exprList, numVal } from '../../../_helper/ast-builder' import { assertAst, withShell } from '../../../_helper/shell' import { rangeFrom } from '../../../../../src/util/range' -import { RType } from '../../../../../src/r-bridge' -import { ensureExpressionList } from '../../../../../src/r-bridge/lang-4.x/ast/parser/xml/internal' +import { OperatorDatabase, RType } from '../../../../../src' +import { label } from '../../../_helper/label' -describe('Parse larger snippets', withShell((shell) => { +describe('Parse Larger Snippets', withShell((shell) => { describe('if-then, assignments, symbols, and comparisons', () => { - assertAst( - 'max function', - shell, - ` + assertAst(label('Manual Max Function', [ + 'name-normal', ...OperatorDatabase['<-'].capabilities, ...OperatorDatabase['='].capabilities, ...OperatorDatabase['->'].capabilities, ...OperatorDatabase['<<-'].capabilities, ...OperatorDatabase['->>'].capabilities, 'numbers', 'if', ...OperatorDatabase['>'].capabilities, 'grouping', 'newlines' + ]), shell, + ` a <- 3 b = 4 if (a >b) { @@ -19,141 +19,169 @@ if (a >b) { b ->> max } max - `, - exprList( - { + `, exprList( + { + type: RType.BinaryOp, + lexeme: '<-', + operator: '<-', + location: rangeFrom(2, 3, 2, 4), + info: {}, + lhs: { + type: RType.Symbol, + lexeme: 'a', + namespace: undefined, + content: 'a', + location: rangeFrom(2, 1, 2, 1), + info: {} + }, + rhs: { + type: RType.Number, + lexeme: '3', + content: numVal(3), + location: rangeFrom(2, 6, 2, 6), + info: {} + }, + }, + { + type: RType.BinaryOp, + lexeme: '=', + operator: '=', + location: rangeFrom(3, 3, 3, 3), + info: {}, + lhs: { + type: RType.Symbol, + lexeme: 'b', + namespace: undefined, + content: 'b', + location: rangeFrom(3, 1, 3, 1), + info: {} + }, + rhs: { + type: RType.Number, + lexeme: '4', + content: numVal(4), + location: rangeFrom(3, 5, 3, 5), + info: {} + }, + }, + { + type: RType.IfThenElse, + lexeme: 'if', + location: rangeFrom(4, 1, 4, 2), + info: {}, + condition: { type: RType.BinaryOp, - flavor: 'assignment', - lexeme: '<-', - operator: '<-', - location: rangeFrom(2, 3, 2, 4), + lexeme: '>', + operator: '>', + location: rangeFrom(4, 7, 4, 7), info: {}, lhs: { type: RType.Symbol, lexeme: 'a', namespace: undefined, content: 'a', - location: rangeFrom(2, 1, 2, 1), + location: rangeFrom(4, 5, 4, 5), info: {} }, rhs: { - type: RType.Number, - lexeme: '3', - content: numVal(3), - location: rangeFrom(2, 6, 2, 6), - info: {} - }, - }, - { - type: RType.BinaryOp, - flavor: 'assignment', - lexeme: '=', - operator: '=', - location: rangeFrom(3, 3, 3, 3), - info: {}, - lhs: { type: RType.Symbol, lexeme: 'b', namespace: undefined, content: 'b', - location: rangeFrom(3, 1, 3, 1), + location: rangeFrom(4, 8, 4, 8), info: {} }, - rhs: { - type: RType.Number, - lexeme: '4', - content: numVal(4), - location: rangeFrom(3, 5, 3, 5), - info: {} - }, }, - { - type: RType.IfThenElse, - lexeme: 'if', - location: rangeFrom(4, 1, 4, 2), - info: {}, - condition: { - type: RType.BinaryOp, - flavor: 'comparison', - lexeme: '>', - operator: '>', - location: rangeFrom(4, 7, 4, 7), - info: {}, - lhs: { - type: RType.Symbol, - lexeme: 'a', - namespace: undefined, - content: 'a', - location: rangeFrom(4, 5, 4, 5), - info: {} - }, - rhs: { - type: RType.Symbol, - lexeme: 'b', - namespace: undefined, - content: 'b', - location: rangeFrom(4, 8, 4, 8), - info: {} + then: { + type: RType.ExpressionList, + grouping: [{ + type: RType.Symbol, + lexeme: '{', + location: rangeFrom(4, 11, 4, 11), + content: '{', + info: {}, + namespace: undefined + }, { + type: RType.Symbol, + lexeme: '}', + location: rangeFrom(7, 1, 7, 1), + content: '}', + info: {}, + namespace: undefined + }], + lexeme: undefined, + location: undefined, + info: {}, + children: [ + { + type: RType.BinaryOp, + lexeme: '<<-', + operator: '<<-', + location: rangeFrom(5, 7, 5, 9), + info: {}, + lhs: { + type: RType.Symbol, + lexeme: 'max', + namespace: undefined, + content: 'max', + location: rangeFrom(5, 3, 5, 5), + info: {} + }, + rhs: { + type: RType.Symbol, + lexeme: 'a', + namespace: undefined, + content: 'a', + location: rangeFrom(5, 11, 5, 11), + info: {} + }, }, - }, - then: { - type: RType.ExpressionList, - lexeme: '{\n max <<- a\n i ->2\n}', - location: rangeFrom(4, 11, 7, 1), - info: {}, - children: [ - { - type: RType.BinaryOp, - flavor: 'assignment', - lexeme: '<<-', - operator: '<<-', - location: rangeFrom(5, 7, 5, 9), - info: {}, - lhs: { - type: RType.Symbol, - lexeme: 'max', - namespace: undefined, - content: 'max', - location: rangeFrom(5, 3, 5, 5), - info: {} - }, - rhs: { - type: RType.Symbol, - lexeme: 'a', - namespace: undefined, - content: 'a', - location: rangeFrom(5, 11, 5, 11), - info: {} - }, + { + type: RType.BinaryOp, + lexeme: '->', + operator: '->', + location: rangeFrom(6, 5, 6, 6), + info: {}, + lhs: { + type: RType.Symbol, + lexeme: 'i', + namespace: undefined, + content: 'i', + location: rangeFrom(6, 3, 6, 3), + info: {} }, - { - type: RType.BinaryOp, - flavor: 'assignment', - lexeme: '->', - operator: '->', - location: rangeFrom(6, 5, 6, 6), - info: {}, - lhs: { - type: RType.Symbol, - lexeme: 'i', - namespace: undefined, - content: 'i', - location: rangeFrom(6, 3, 6, 3), - info: {} - }, - rhs: { - type: RType.Number, - lexeme: '2', - content: numVal(2), - location: rangeFrom(6, 7, 6, 7), - info: {} - }, + rhs: { + type: RType.Number, + lexeme: '2', + content: numVal(2), + location: rangeFrom(6, 7, 6, 7), + info: {} }, - ], - }, - otherwise: ensureExpressionList({ + }, + ], + }, + otherwise: { + type: RType.ExpressionList, + location: undefined, + lexeme: undefined, + info: {}, + grouping: [{ + type: RType.Symbol, + lexeme: '{', + location: rangeFrom(7, 8, 7, 8), + content: '{', + info: {}, + namespace: undefined + }, { + type: RType.Symbol, + lexeme: '}', + location: rangeFrom(9, 1, 9, 1), + content: '}', + info: {}, + namespace: undefined + }], + children: [{ type: RType.BinaryOp, - flavor: 'assignment', lexeme: '->>', operator: '->>', location: rangeFrom(8, 5, 8, 7), @@ -174,19 +202,20 @@ max location: rangeFrom(8, 9, 8, 11), info: {} }, - }), + }] }, - { - type: RType.Symbol, - lexeme: 'max', - content: 'max', - namespace: undefined, - location: rangeFrom(10, 1, 10, 3), - info: {} - } - ), { - ignoreAdditionalTokens: true + }, + { + type: RType.Symbol, + lexeme: 'max', + content: 'max', + namespace: undefined, + location: rangeFrom(10, 1, 10, 3), + info: {} } + ), { + ignoreAdditionalTokens: true + } ) }) }) diff --git a/test/functionality/r-bridge/lang/ast/parse-symbols.ts b/test/functionality/r-bridge/lang/ast/parse-symbols.ts index b207127fa2..57d9d03141 100644 --- a/test/functionality/r-bridge/lang/ast/parse-symbols.ts +++ b/test/functionality/r-bridge/lang/ast/parse-symbols.ts @@ -1,32 +1,39 @@ import { assertAst, withShell } from '../../../_helper/shell' import { exprList } from '../../../_helper/ast-builder' import { rangeFrom } from '../../../../../src/util/range' -import { RType } from '../../../../../src/r-bridge' +import { RType } from '../../../../../src' +import { label } from '../../../_helper/label' describe('Parse symbols', withShell(shell => { - assertAst('Simple Symbol', shell, 'a', exprList({ - type: RType.Symbol, - location: rangeFrom(1, 1, 1, 1), - namespace: undefined, - lexeme: 'a', - content: 'a', - info: {} - })) - assertAst('With Namespace', shell, 'a::b', exprList({ - type: RType.Symbol, - location: rangeFrom(1, 4, 1, 4), - namespace: 'a', - lexeme: 'b', - content: 'b', - info: {} - })) - assertAst('With Quotes and Namespace', shell, 'a::"b"', exprList({ - type: RType.Symbol, - location: rangeFrom(1, 4, 1, 6), - namespace: 'a', - lexeme: '"b"', - content: '"b"', - info: {} - })) + assertAst(label('Simple Symbol', ['name-normal']), + shell, 'a', exprList({ + type: RType.Symbol, + location: rangeFrom(1, 1, 1, 1), + namespace: undefined, + lexeme: 'a', + content: 'a', + info: {} + }) + ) + assertAst(label('With Namespace', ['name-normal', 'accessing-exported-names']), + shell, 'a::b', exprList({ + type: RType.Symbol, + location: rangeFrom(1, 4, 1, 4), + namespace: 'a', + lexeme: 'b', + content: 'b', + info: {} + }) + ) + assertAst(label('With Quotes and Namespace', ['name-normal', 'name-quoted', 'accessing-exported-names']), + shell, 'a::"b"', exprList({ + type: RType.Symbol, + location: rangeFrom(1, 4, 1, 6), + namespace: 'a', + lexeme: '"b"', + content: '"b"', + info: {} + }) + ) })) diff --git a/test/functionality/r-bridge/lang/ast/parse-values.ts b/test/functionality/r-bridge/lang/ast/parse-values.ts index b00cda911d..5cc0e04cec 100644 --- a/test/functionality/r-bridge/lang/ast/parse-values.ts +++ b/test/functionality/r-bridge/lang/ast/parse-values.ts @@ -1,9 +1,5 @@ import { assertAst, withShell } from '../../../_helper/shell' -import { - RNumberPool, - RStringPool, - RSymbolPool, -} from '../../../_helper/provider' +import { RNumberPool, RStringPool, RSymbolPool } from '../../../_helper/provider' import { exprList } from '../../../_helper/ast-builder' import { rangeFrom } from '../../../../../src/util/range' import { retrieveParseDataFromRCode, RType } from '../../../../../src' @@ -11,6 +7,7 @@ import chai, { assert } from 'chai' import chaiAsPromised from 'chai-as-promised' import { MIN_VERSION_RAW_STABLE } from '../../../../../src/r-bridge/lang-4.x/ast/model/versions' import { prepareParsedData } from '../../../../../src/r-bridge/lang-4.x/ast/parser/json/format' +import { label } from '../../../_helper/label' chai.use(chaiAsPromised) describe('CSV parsing', withShell(shell => { @@ -19,7 +16,7 @@ describe('CSV parsing', withShell(shell => { request: 'text', content: 'x <- 1' }, shell) - assert.equal(code, '[[1,1,1,6,7,0,"expr",false,"x <- 1"],[1,1,1,1,1,3,"SYMBOL",true,"x"],[1,1,1,1,3,7,"expr",false,"x"],[1,3,1,4,2,7,"LEFT_ASSIGN",true,"<-"],[1,6,1,6,4,5,"NUM_CONST",true,"1"],[1,6,1,6,5,7,"expr",false,"1"]]') + assert.equal(code, '[1,1,1,6,7,0,"expr",false,"x <- 1"],[1,1,1,1,1,3,"SYMBOL",true,"x"],[1,1,1,1,3,7,"expr",false,"x"],[1,3,1,4,2,7,"LEFT_ASSIGN",true,"<-"],[1,6,1,6,4,5,"NUM_CONST",true,"1"],[1,6,1,6,5,7,"expr",false,"1"]') }) it('to object', async() => { @@ -28,23 +25,34 @@ describe('CSV parsing', withShell(shell => { content: 'x <- 1' }, shell) const parsed = prepareParsedData(code) - const one = '{"line1":1,"col1":1,"line2":1,"col2":1,"id":1,"parent":3,"token":"SYMBOL","terminal":true,"text":"x"}' - const two = '{"line1":1,"col1":3,"line2":1,"col2":4,"id":2,"parent":7,"token":"LEFT_ASSIGN","terminal":true,"text":"<-"}' - const three = `{"line1":1,"col1":1,"line2":1,"col2":1,"id":3,"parent":7,"token":"expr","terminal":false,"text":"x","children":[${one}]}` - const four = '{"line1":1,"col1":6,"line2":1,"col2":6,"id":4,"parent":5,"token":"NUM_CONST","terminal":true,"text":"1"}' - const five = `{"line1":1,"col1":6,"line2":1,"col2":6,"id":5,"parent":7,"token":"expr","terminal":false,"text":"1","children":[${four}]}` - assert.deepEqual(Object.fromEntries(parsed), JSON.parse(`{"1":${one},"2":${two},"3":${three},"4":${four},"5":${five},"7":{"line1":1,"col1":1,"line2":1,"col2":6,"id":7,"parent":0,"token":"expr","terminal":false,"text":"x <- 1","children":[${three},${two},${five}]}}`)) + const one = { 'line1': 1,'col1': 1,'line2': 1,'col2': 1,'id': 1,'parent': 3,'token': 'SYMBOL','terminal': true,'text': 'x' } + const two = { 'line1': 1,'col1': 3,'line2': 1,'col2': 4,'id': 2,'parent': 7,'token': 'LEFT_ASSIGN','terminal': true,'text': '<-' } + const three = { 'line1': 1,'col1': 1,'line2': 1,'col2': 1,'id': 3,'parent': 7,'token': 'expr','terminal': false,'text': 'x','children': [one] } + const four = { 'line1': 1,'col1': 6,'line2': 1,'col2': 6,'id': 4,'parent': 5,'token': 'NUM_CONST','terminal': true,'text': '1' } + const five = { 'line1': 1,'col1': 6,'line2': 1,'col2': 6,'id': 5,'parent': 7,'token': 'expr','terminal': false,'text': '1','children': [four] } + assert.deepEqual(parsed, [{ 'line1': 1,'col1': 1,'line2': 1,'col2': 6,'id': 7,'parent': 0,'token': 'expr','terminal': false,'text': 'x <- 1','children': [three,two,five] }]) + }) + + + it('multiline to object', async() => { + const code = await retrieveParseDataFromRCode({ + request: 'text', + content: '5\nb' + }, shell) + const parsed = prepareParsedData(code) + const one = { 'line1': 1,'col1': 1,'line2': 1,'col2': 1,'id': 1,'parent': 2,'token': 'NUM_CONST','terminal': true,'text': '5' } + const exprOne = { 'line1': 1,'col1': 1,'line2': 1,'col2': 1,'id': 2,'parent': 0,'token': 'expr','terminal': false,'text': '5','children': [one] } + const two = { 'line1': 2,'col1': 1,'line2': 2,'col2': 1,'id': 6,'parent': 8,'token': 'SYMBOL','terminal': true,'text': 'b' } + const exprTwo = { 'line1': 2,'col1': 1,'line2': 2,'col2': 1,'id': 8,'parent': 0,'token': 'expr','terminal': false,'text': 'b','children': [two] } + assert.deepEqual(parsed, [exprOne, exprTwo]) }) })) describe('Constant Parsing', withShell(shell => { describe('parse empty', () => { - assertAst( - 'nothing', - shell, - '', - exprList() + assertAst(label('nothing', []), + shell, '', exprList() ) }) describe('parse single', () => { @@ -57,16 +65,13 @@ describe('Constant Parsing', describe('numbers', () => { for(const number of RNumberPool) { const range = rangeFrom(1, 1, 1, number.str.length) - assertAst( - number.str, - shell, - number.str, - exprList({ + assertAst(label(number.str, ['numbers']), + shell, number.str, exprList({ type: RType.Number, location: range, lexeme: number.str, content: number.val, - info: {} + info: { } }) ) } @@ -74,11 +79,9 @@ describe('Constant Parsing', describe('strings', () => { for(const string of RStringPool) { const range = rangeFrom(1, 1, 1, string.str.length) - assertAst( - string.str, - shell, - string.str, - exprList({ + const raw = string.str.startsWith('r') || string.str.startsWith('R') + assertAst(label(string.str, ['strings', ...(raw ? ['raw-strings' as const] : [])]), + shell, string.str, exprList({ type: RType.String, location: range, lexeme: string.str, @@ -87,24 +90,18 @@ describe('Constant Parsing', }), { // just a hackey way to not outright flag all - minRVersion: string.str.startsWith('r') || string.str.startsWith('R') ? MIN_VERSION_RAW_STABLE : undefined + minRVersion: raw ? MIN_VERSION_RAW_STABLE : undefined } ) } }) - describe('symbols', () => { + describe('Symbols', () => { for(const symbol of RSymbolPool) { - const range = rangeFrom( - 1, - symbol.symbolStart, - 1, - symbol.symbolStart + symbol.val.length - 1 - ) - assertAst( - symbol.str, - shell, - symbol.str, - exprList({ + const range = rangeFrom(1, symbol.symbolStart, 1, symbol.symbolStart + symbol.val.length - 1) + const exported = symbol.namespace !== undefined + const mapped = exported ? [symbol.internal ? 'accessing-internal-names' as const : 'accessing-exported-names' as const] : [] + assertAst(label(symbol.str, ['name-normal', ...mapped]), + shell, symbol.str, exprList({ type: RType.Symbol, namespace: symbol.namespace, location: range, @@ -117,11 +114,8 @@ describe('Constant Parsing', }) describe('logical', () => { for(const [lexeme, content] of [['TRUE', true], ['FALSE', false]] as const) { - assertAst( - `${lexeme} as ${JSON.stringify(content)}`, - shell, - lexeme, - exprList({ + assertAst(label(`${lexeme} as ${JSON.stringify(content)}`, ['logical']), + shell, lexeme, exprList({ type: RType.Logical, location: rangeFrom(1, 1, 1, lexeme.length), lexeme, @@ -132,10 +126,8 @@ describe('Constant Parsing', } }) describe('comments', () => { - assertAst( - 'simple line comment', - shell, - '# Hello World', + assertAst(label('simple line comment', ['comments']), + shell, '# Hello World', exprList({ type: RType.Comment, location: rangeFrom(1, 1, 1, 13), diff --git a/test/functionality/r-bridge/lang/values.ts b/test/functionality/r-bridge/lang/values.ts index 5e67955535..49e58ddf2a 100644 --- a/test/functionality/r-bridge/lang/values.ts +++ b/test/functionality/r-bridge/lang/values.ts @@ -1,6 +1,6 @@ import { it } from 'mocha' import { assert } from 'chai' -import { boolean2ts, isBoolean, number2ts, string2ts, ts2r } from '../../../../src/r-bridge' +import { boolean2ts, isBoolean, number2ts, string2ts, ts2r } from '../../../../src' import { RNumberPool, RStringPool } from '../../_helper/provider' describe('Bidirectional Value Translation', () => { diff --git a/test/functionality/r-bridge/processing.spec.ts b/test/functionality/r-bridge/processing.spec.ts index 61b89d6176..97c346fd78 100644 --- a/test/functionality/r-bridge/processing.spec.ts +++ b/test/functionality/r-bridge/processing.spec.ts @@ -1,30 +1,26 @@ import { assertDecoratedAst, retrieveNormalizedAst, withShell } from '../_helper/shell' import { numVal } from '../_helper/ast-builder' import { rangeFrom } from '../../../src/util/range' -import type { - RNodeWithParent, - NodeId } from '../../../src/r-bridge' -import { - RType, - decorateAst, - collectAllIds, RoleInParent -} from '../../../src/r-bridge' +import type { RNodeWithParent, NodeId } from '../../../src' +import { RType, decorateAst, collectAllIds, RoleInParent } from '../../../src' import { assert } from 'chai' describe('Assign unique Ids and Parents', withShell((shell) => { - describe('Testing deterministic counting Id assignment', () => { + describe('Testing Deterministic Counting of Id Assignment', () => { const assertDecorated = (name: string, input: string, expected: RNodeWithParent): void => { assertDecoratedAst(name, shell, input, expected) } // decided to test with ast parsing, as we are dependent on these changes in reality describe('Single nodes (leafs)', () => { - const exprList = (...children: RNodeWithParent[]): RNodeWithParent => ({ - type: RType.ExpressionList, - lexeme: undefined, - info: { + const exprList = (...children: readonly RNodeWithParent[]): RNodeWithParent => ({ + type: RType.ExpressionList, + lexeme: undefined, + grouping: undefined, + info: { parent: undefined, id: '1', index: 0, + depth: 0, role: RoleInParent.Root }, children, @@ -41,6 +37,7 @@ describe('Assign unique Ids and Parents', withShell((shell) => { info: { parent: '1', id: '0', + depth: 1, role: RoleInParent.ExpressionListChild, index: 0, }, @@ -55,6 +52,7 @@ describe('Assign unique Ids and Parents', withShell((shell) => { info: { parent: '1', id: '0', + depth: 1, role: RoleInParent.ExpressionListChild, index: 0 }, @@ -69,6 +67,7 @@ describe('Assign unique Ids and Parents', withShell((shell) => { info: { parent: '1', id: '0', + depth: 1, role: RoleInParent.ExpressionListChild, index: 0 }, @@ -84,6 +83,7 @@ describe('Assign unique Ids and Parents', withShell((shell) => { info: { parent: '1', id: '0', + depth: 1, role: RoleInParent.ExpressionListChild, index: 0 }, @@ -91,7 +91,7 @@ describe('Assign unique Ids and Parents', withShell((shell) => { ) }) }) - describe('Collect all íds in ast', () => { + describe('Collect all Ids in AST', () => { function assertIds(name: string, input: string, expected: Set, stop?: (node: RNodeWithParent) => boolean) { it(name, async() => { const baseAst = await retrieveNormalizedAst(shell, input) @@ -100,11 +100,11 @@ describe('Assign unique Ids and Parents', withShell((shell) => { assert.deepStrictEqual(ids, expected, `Ids do not match for input ${input}`) }) } - assertIds('Without stop', 'x <- 2', new Set(['0', '1', '2', '3'])) - assertIds('Stop one', 'x <- 2', new Set(['0', '2', '3']), n => n.type === RType.Number) - assertIds('Multiple statements', 'x <- 2; if(TRUE) { a <- 4 }', new Set(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])) + assertIds('Without stop', 'x <- 2', new Set([0, 1, 2, 3])) + assertIds('Stop one', 'x <- 2', new Set([0, 2, 3]), n => n.type === RType.Number) + assertIds('Multiple statements', 'x <- 2; if(TRUE) { a <- 4 }', new Set([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])) // if, TRUE, [when] - assertIds('Multiple statements blocking binary ops', 'x <- 2; if(TRUE) { a <- 4 }', new Set(['3', '7', '8', '9']), n => n.type === RType.BinaryOp) + assertIds('Multiple statements blocking binary ops', 'x <- 2; if(TRUE) { a <- 4 }', new Set([3, 4, 5, 9, 10, 11]), n => n.type === RType.BinaryOp) }) }) ) diff --git a/test/functionality/r-bridge/r-bridge.spec.ts b/test/functionality/r-bridge/r-bridge.spec.ts index 9ce5ca6de8..708ed9b49f 100644 --- a/test/functionality/r-bridge/r-bridge.spec.ts +++ b/test/functionality/r-bridge/r-bridge.spec.ts @@ -22,7 +22,4 @@ describe('R-Bridge', () => { require('./lang/ast/parse-snippets') require('./lang/ast/parse-directives') }) - describe('Parser Hooks', () => { - require('./lang/ast/parse-hooks') - }) }) diff --git a/test/functionality/r-bridge/sessions.ts b/test/functionality/r-bridge/sessions.ts index 5f537d5e35..5214d90bf7 100644 --- a/test/functionality/r-bridge/sessions.ts +++ b/test/functionality/r-bridge/sessions.ts @@ -1,20 +1,19 @@ import chai, { assert } from 'chai' -import { testWithShell } from '../_helper/shell' +import { testWithShell, withShell } from '../_helper/shell' import chaiAsPromised from 'chai-as-promised' import semver from 'semver/preload' import { guard } from '../../../src/util/assert' chai.use(chaiAsPromised) /** here we use testWithShell to get a fresh shell within each call */ -describe('RShell sessions', function() { - this.slow('500ms') // some respect for the r shell :/ - testWithShell('test that we can create a connection to R', shell => { +describe('RShell sessions', withShell(shell => { + it('test that we can create a connection to R', () => { assert.doesNotThrow(() => { shell.clearEnvironment() }) }) describe('test the version of R', () => { - testWithShell('query the installed version of R', async shell => { + it('query the installed version of R', async() => { const version = await shell.usedRVersion() guard(version !== null, 'we should be able to retrieve the version of R') assert.isNotNull(semver.valid(version), `the version ${JSON.stringify(version)} should be a valid semver`) @@ -24,14 +23,14 @@ describe('RShell sessions', function() { describe('let R make an addition', () => { [true, false].forEach(trimOutput => { - testWithShell(`let R make an addition (${trimOutput ? 'with' : 'without'} trimming)`, async shell => { + it(`let R make an addition (${trimOutput ? 'with' : 'without'} trimming)`, async() => { const lines = await shell.sendCommandWithOutput('1 + 1', { automaticallyTrimOutput: trimOutput }) assert.equal(lines.length, 1) assert.equal(lines[0], '[1] 2') }) }) }) - testWithShell('keep context of previous commands', async shell => { + it('keep context of previous commands', async() => { shell.sendCommand('a <- 1 + 1') const lines = await shell.sendCommandWithOutput('a') assert.equal(lines.length, 1) @@ -47,21 +46,11 @@ describe('RShell sessions', function() { }) ) }) - testWithShell('clear environment should remove variable information', async shell => { - shell.continueOnError() // we will produce an error! - shell.sendCommand('options(warn=-1); invisible(Sys.setlocale("LC_MESSAGES", \'en_GB.UTF-8\'))') - shell.sendCommand('a <- 1 + 1') - shell.clearEnvironment() - await shell.sendCommandWithOutput('a', { from: 'stderr' }).then(lines => { - // just await an error - assert.match(lines.join('\n'), /^.*Error.*a/) - }) - }) - testWithShell('send multiple commands', async shell => { + it('send multiple commands', async() => { shell.sendCommands('a <- 1', 'b <- 2', 'c <- a + b') const lines = await shell.sendCommandWithOutput('c') assert.equal(lines.length, 1) assert.equal(lines[0], '[1] 3') }) -}) +})) diff --git a/test/functionality/slicing/reconstruct/simple-tests.ts b/test/functionality/slicing/reconstruct/simple-tests.ts index 38cd78a72e..555f2af0b5 100644 --- a/test/functionality/slicing/reconstruct/simple-tests.ts +++ b/test/functionality/slicing/reconstruct/simple-tests.ts @@ -1,65 +1,68 @@ import { assertReconstructed, withShell } from '../../_helper/shell' +import type { NodeId } from '../../../../src' +import { OperatorDatabase } from '../../../../src' +import { label } from '../../_helper/label' +import type { SupportedFlowrCapabilityId } from '../../../../src/r-bridge/data' describe('Simple', withShell(shell => { describe('Constant assignments', () => { - for(const code of [ - 'x <- 5', - 'x <- 5; y <- 9', - '{ x <- 5 }', - '{ x <- 5; y <- 9 }' - ]) { - assertReconstructed(code, shell, code, '0', 'x <- 5') + for(const [id, code, caps] of [ + [0, 'x <- 5', ['name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities]], + [0, 'x <- 5; y <- 9', ['name-normal', 'numbers', 'semicolons', ...OperatorDatabase['<-'].capabilities]], + [2, '{ x <- 5 }', ['grouping', 'name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities]], + [2, '{ x <- 5; y <- 9 }', ['grouping', 'name-normal', 'numbers', 'semicolons', ...OperatorDatabase['<-'].capabilities]], + ] as [number, string, SupportedFlowrCapabilityId[]][]){ + assertReconstructed(label(code, caps), shell, code, id, 'x <- 5') } }) describe('Nested Assignments', () => { - for(const [code, id, expected] of [ - ['12 + (supi <- 42)', '0', '12 + (supi <- 42)' ], - ['y <- x <- 42', '1', 'x <- 42' ], - ['y <- x <- 42', '0', 'y <- x <- 42' ], - // we are not smart enough right now to see, that the write is constant. - ['for (i in 1:20) { x <- 5 }', '4', 'for(i in 1:20) x <- 5' ] - ]) { - assertReconstructed(code, shell, code, id, expected) + for(const [code, id, expected, caps] of [ + ['12 + (supi <- 42)', 0, '12 + (supi <- 42)', ['grouping', 'name-normal', ...OperatorDatabase['<-'].capabilities, ...OperatorDatabase['+'].capabilities]], + ['y <- x <- 42', 1, 'x <- 42', ['name-normal', 'numbers', 'return-value-of-assignments', ...OperatorDatabase['<-'].capabilities, 'precedence'] ], + ['y <- x <- 42', 0, 'y <- x <- 42', ['name-normal', 'numbers', 'return-value-of-assignments', ...OperatorDatabase['<-'].capabilities, 'precedence'] ], + ['for (i in 1:20) { x <- 5 }', 6, 'x <- 5', ['for-loop', 'name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities] ] + ] as [string, number, string, SupportedFlowrCapabilityId[]][]) { + assertReconstructed(label(code, caps), shell, code, id, expected) } }) describe('Access', () => { - for(const [code, id, expected] of [ - ['a[3]', '0', 'a[3]' ], - ['a[x]', '1', 'x' ] - ]) { - assertReconstructed(code, shell, code, id, expected) + for(const [code, id, expected, caps] of [ + /* we are interested in 'a' not in the result of the access*/ + ['a[3]', 0, 'a', ['single-bracket-access', 'numbers', 'name-normal'] ], + ['a[x]', 1, 'x', ['single-bracket-access', 'name-normal'] ] + ] as [string, number, string, SupportedFlowrCapabilityId[]][]) { + assertReconstructed(label(code, caps), shell, code, id, expected) } }) describe('Loops', () => { describe('repeat', () => { - const pool: [string, string | string[], string][] = [ - ['repeat { x }', '0', 'repeat x'], - ['repeat { x <- 5; y <- 9 }', '0', 'repeat x <- 5'], - ['repeat { x <- 5; y <- 9 }', ['0', '1', '4'], 'repeat {\n x <- 5\n 9\n}'] + const pool: [string, NodeId | NodeId[], string, SupportedFlowrCapabilityId[]][] = [ + ['repeat { x }', 2, 'x', ['repeat-loop', 'name-normal']], + ['repeat { x <- 5; y <- 9 }', 2, 'x <- 5', ['repeat-loop', 'name-normal', ...OperatorDatabase['<-'].capabilities, 'semicolons', 'numbers']], + ['repeat { x <- 5; y <- 9 }', [2, 4, 6], 'x <- 5\n9', ['repeat-loop', 'name-normal', ...OperatorDatabase['<-'].capabilities, 'semicolons', 'numbers']] ] - for(const [code, id, expected] of pool) { - assertReconstructed(code, shell, code, id, expected) + for(const [code, id, expected, caps] of pool) { + assertReconstructed(label(code, caps), shell, code, id, expected) } }) describe('while', () => { - const pool: [string, string | string[], string][] = [ - ['while(TRUE) { x }', '1', 'while(TRUE) x'], - ['while(TRUE) { x <- 5 }', '1', 'while(TRUE) x <- 5'], - ['while(TRUE) { x <- 5; y <- 9 }', '1', 'while(TRUE) x <- 5'], - ['while(TRUE) { x <- 5; y <- 9 }', '0', 'while(TRUE) {}'], - ['while(TRUE) { x <- 5; y <- 9 }', ['0', '1'], 'while(TRUE) x <- 5'], - ['while(TRUE) { x <- 5; y <- 9 }', ['0', '1', '2'], 'while(TRUE) x <- 5'], - ['while(TRUE) { x <- 5; y <- 9 }', ['0', '4'], 'while(TRUE) y <- 9'], - ['while(TRUE) { x <- 5; y <- 9 }', ['0', '1', '4'], 'while(TRUE) {\n x <- 5\n y <- 9\n}'], - ['while(x + 2 > 3) { x <- 0 }', ['0'], 'while(x + 2 > 3) {}'], - ['while(x + 2 > 3) { x <- 0 }', ['5'], 'while(x + 2 > 3) x <- 0'], - ['while(x + 2 > 3) { x <- 0 }', ['0', '5'], 'while(x + 2 > 3) x <- 0'] + const fiveNineCaps: SupportedFlowrCapabilityId[] = ['while-loop', 'logical', 'name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'semicolons'] + const pool: [string, NodeId | NodeId[], string, SupportedFlowrCapabilityId[]][] = [ + ['while(TRUE) { x }', 3, 'x', ['while-loop', 'logical', 'name-normal']], + ['while(TRUE) { x <- 5 }', 3, 'x <- 5', ['while-loop', 'logical', 'name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities]], + ['while(TRUE) { x <- 5; y <- 9 }', 3, 'x <- 5', fiveNineCaps], + ['while(TRUE) { x <- 5; y <- 9 }', [10, 3], 'while(TRUE) x <- 5', fiveNineCaps], + ['while(TRUE) { x <- 5; y <- 9 }', [10, 3, 5], 'while(TRUE) x <- 5', fiveNineCaps], + ['while(TRUE) { x <- 5; y <- 9 }', [10, 6], 'while(TRUE) y <- 9', fiveNineCaps], + ['while(TRUE) { x <- 5; y <- 9 }', [3, 4, 6], 'x <- 5\ny <- 9', fiveNineCaps], + ['while(x + 2 > 3) { x <- 0 }', [7], 'x <- 0', ['while-loop', 'binary-operator', 'infix-calls', ...OperatorDatabase['+'].capabilities, 'name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers']], + ['while(x + 2 > 3) { x <- 0 }', [0, 7], 'while(x + 2 > 3) x <- 0', ['while-loop', 'binary-operator', 'infix-calls', ...OperatorDatabase['+'].capabilities, 'name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers']] ] - for(const [code, id, expected] of pool) { - assertReconstructed(code, shell, code, id, expected) + for(const [code, id, expected, caps] of pool) { + assertReconstructed(label(code, caps), shell, code, id, expected) } }) @@ -71,38 +74,41 @@ describe('Simple', withShell(shell => { 12 -> x } ` - const pool: [string, string | string[], string][] = [ - [largeFor, '0', 'for(i in 1:20) {}'], - [largeFor, '4', 'for(i in 1:20) y <- 9'], - [largeFor, ['0', '4'], 'for(i in 1:20) y <- 9'], - [largeFor, ['0', '4', '7'], `for(i in 1:20) { - y <- 9 - x <- 5 -}`], - [largeFor, ['0', '4', '10'], `for(i in 1:20) { + const caps: SupportedFlowrCapabilityId[] = ['for-loop', 'name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities, ...OperatorDatabase['->'].capabilities, 'newlines'] + const pool: [string, NodeId | NodeId[], string][] = [ + [largeFor, 0, 'for(i in 1:20) {}'], + [largeFor, 6, 'y <- 9'], + [largeFor, [6, 16], 'for(i in 1:20) y <- 9'], + [largeFor, [6, 9], 'y <- 9\nx <- 5'], + [largeFor, [6, 12, 16], `for(i in 1:20) { y <- 9 12 -> x }`], ] for(const [code, id, expected] of pool) { - assertReconstructed(`${JSON.stringify(id)}: ${code}`, shell, code, id, expected) + assertReconstructed(label(`${JSON.stringify(id)}: ${code}`, caps), shell, code, id, expected) } }) }) describe('Failures in practice', () => { - assertReconstructed('Reconstruct expression list in call', shell, ` + assertReconstructed(label('Reconstruct expression list in call', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'unnamed-arguments', 'call-normal', 'newlines']), shell, ` a <- foo({ a <- b() c <- 3 - })`, '0', `a <- foo({ + })`, 0, `a <- foo({ a <- b() c <- 3 })`) - assertReconstructed('Reconstruct access in pipe', shell, ` + + const caps: SupportedFlowrCapabilityId[] = ['name-normal', ...OperatorDatabase['<-'].capabilities, 'double-bracket-access', 'numbers', 'infix-calls', 'binary-operator', 'call-normal', 'newlines', 'unnamed-arguments', 'precedence'] + assertReconstructed(label('Reconstruct access in pipe (variable)', caps), shell, ` +ls <- x[[1]] %>% st_cast() +class(ls)`, 2, 'x') + assertReconstructed(label('Reconstruct access in pipe (access)', caps), shell, ` ls <- x[[1]] %>% st_cast() -class(ls)`, '2', 'x[[1]]') +class(ls)`, 13, 'class(ls)') }) })) diff --git a/test/functionality/slicing/slicing-criterion/collect-all-tests.ts b/test/functionality/slicing/slicing-criterion/collect-all-tests.ts index a036286edc..82b188cb96 100644 --- a/test/functionality/slicing/slicing-criterion/collect-all-tests.ts +++ b/test/functionality/slicing/slicing-criterion/collect-all-tests.ts @@ -8,32 +8,38 @@ import { convertAllSlicingCriteriaToIds } from '../../../../src/slicing' import type { RShell } from '../../../../src/r-bridge' -import { decorateAst } from '../../../../src/r-bridge' +import { OperatorDatabase , decorateAst } from '../../../../src/r-bridge' + import { retrieveNormalizedAst, withShell } from '../../_helper/shell' import { assert } from 'chai' +import { normalizeIdToNumberIfPossible } from '../../../../src/r-bridge/lang-4.x/ast/model/processing/node-id' +import type { TestLabel } from '../../_helper/label' +import { label , decorateLabelContext } from '../../_helper/label' +import type { SupportedFlowrCapabilityId } from '../../../../src/r-bridge/data' + -function assertRetrievedIdsWith(shell: RShell, name: string, input: string, filter: SlicingCriteriaFilter, ...expected: SlicingCriteria[]) { - return it(name, async() => { +function assertRetrievedIdsWith(shell: RShell, name: string | TestLabel, input: string, filter: SlicingCriteriaFilter, ...expected: SlicingCriteria[]) { + return it(decorateLabelContext(name, ['slice']), async() => { const ast = await retrieveNormalizedAst(shell, input) const decorated = decorateAst(ast) const got = [...collectAllSlicingCriteria(decorated.ast, filter)] .flatMap(criteria => convertAllSlicingCriteriaToIds(criteria, decorated)) - .map(m => ({ id: m.id, name: decorated.idMap.get(m.id)?.lexeme })) + .map(m => ({ id: normalizeIdToNumberIfPossible(m.id), name: decorated.idMap.get(normalizeIdToNumberIfPossible(m.id))?.lexeme })) const expectedMapped = expected .flatMap(criteria => convertAllSlicingCriteriaToIds(criteria, decorated)) - assert.deepStrictEqual(got, expectedMapped.map(m => ({ id: m.id, name: decorated.idMap.get(m.id)?.lexeme })), `mapped: ${JSON.stringify(expectedMapped)}`) + assert.deepStrictEqual(got, expectedMapped.map(m => ({ id: normalizeIdToNumberIfPossible(m.id), name: decorated.idMap.get(normalizeIdToNumberIfPossible(m.id))?.lexeme })), `mapped: ${JSON.stringify(expectedMapped)}`) }) } describe('Retrieve all slicing locations', withShell(shell => { describe('Test the default all variables filter', () => { - function test(input: string, ...expected: SlicingCriteria[]) { - assertRetrievedIdsWith(shell, `Retrieve all variables in ${JSON.stringify(input)}`, input, DefaultAllVariablesFilter, ...expected) + function test(input: string, caps: SupportedFlowrCapabilityId[], ...expected: SlicingCriteria[]) { + assertRetrievedIdsWith(shell, label(`Retrieve all variables in ${JSON.stringify(input)}`, caps), input, DefaultAllVariablesFilter, ...expected) } - test('x <- 1', [ '1@x' ]) - test('x <- 1\ny <- 2', [ '1@x' ], [ '2@y' ]) - test('library(foo)', [ ]) // here, foo is not a variable but used as the library name + test('x <- 1', [...OperatorDatabase['<-'].capabilities, 'name-normal', 'numbers'], [ '1@x' ]) + test('x <- 1\ny <- 2', [...OperatorDatabase['<-'].capabilities, 'name-normal', 'numbers', 'newlines'], [ '1@x' ], [ '2@y' ]) + test('library(foo)', ['unnamed-arguments', 'name-normal'], [ ]) // here, foo is not a variable but used as the library name test(`a <- 52 foo(a=3,b<-2,c=4) if(TRUE) { @@ -45,12 +51,14 @@ if(TRUE) { } a - 1 -> a } -foo(5)`, [ '1@a' ], [ '2@b' ], [ '4@a' ], [ '5:5' ], [ '5:9' ], [ '7@foo' ], [ '8@x' ], [ '10:3' ], [ '10:12' ]) +foo(5)`, [...OperatorDatabase['<-'].capabilities, ...OperatorDatabase['='].capabilities, ...OperatorDatabase['-'].capabilities, ...OperatorDatabase['<<-'].capabilities, ...OperatorDatabase['->'].capabilities, ...OperatorDatabase['+'].capabilities, ...OperatorDatabase['>'].capabilities, 'name-normal', 'numbers', 'newlines', 'if', 'while-loop', 'logical', 'named-arguments', 'side-effects-in-argument', 'formals-named', 'implicit-return'], + [ '1@a' ], [ '2@b' ], [ '4@a' ], [ '5:5' ], [ '5:9' ], [ '7@foo' ], [ '8@x' ], [ '10:3' ], [ '10:12' ]) test(`x = NULL u <<- function(a = NULL, b = NA, c, d=7, e=x, f=TRUE, g=FALSE, ...) { g <- 12 * NaN - Inf h <- function(x) { x + 1 } return(h(a + b)) -}`, [ '1@x' ], [ '2@u' ], ['2@x'], [ '3@g' ], [ '4@h' ], [ '4:22' ], [ '5@a' ], [ '5@b' ]) +}`,[...OperatorDatabase['<<-'].capabilities, ...OperatorDatabase['='].capabilities, 'name-normal', 'inf-and-nan', 'numbers', 'null', 'newlines', 'formals-default', 'formals-named', 'unnamed-arguments', ...OperatorDatabase['+'].capabilities, 'implicit-return', 'return'], + [ '1@x' ], [ '2@u' ], ['2@x'], [ '3@g' ], [ '4@h' ], [ '4:22' ], [ '5@a' ], [ '5@b' ]) }) })) diff --git a/test/functionality/slicing/static-program-slices/calls-tests.ts b/test/functionality/slicing/static-program-slices/calls-tests.ts index 8a746ac4f3..09e77fa0c2 100644 --- a/test/functionality/slicing/static-program-slices/calls-tests.ts +++ b/test/functionality/slicing/static-program-slices/calls-tests.ts @@ -1,4 +1,7 @@ import { assertSliced, withShell } from '../../_helper/shell' +import { label } from '../../_helper/label' +import type { SupportedFlowrCapabilityId } from '../../../../src/r-bridge/data' +import { OperatorDatabase } from '../../../../src' describe('Calls', withShell(shell => { describe('Simple Calls', () => { @@ -6,27 +9,36 @@ describe('Calls', withShell(shell => { a <- function(x) { x } a(i)` for(const criterion of ['3:1', '3@a'] as const) { - assertSliced(JSON.stringify(code), shell, code, [criterion], code) + assertSliced(label(JSON.stringify(code), ['function-definitions', 'resolve-arguments', 'formals-named', 'name-normal', 'call-normal', ...OperatorDatabase['<-'].capabilities, 'unnamed-arguments']), + shell, code, [criterion], code + ) } + const constCapabilities: SupportedFlowrCapabilityId[] = ['function-definitions', 'resolve-arguments', 'formals-named', 'name-normal', 'numbers', 'call-normal', ...OperatorDatabase['<-'].capabilities, 'unnamed-arguments', 'implicit-return'] const constFunction = `i <- 4 a <- function(x) { x <- 2; 1 } a(i)` - assertSliced('Function call with constant function', shell, constFunction, ['3:1'], `i <- 4 -a <- function(x) { 1 } + /* actually, `i` does not have to be defined, as it is _not used_ by the function, so we do not have to include `i <- 4` */ + assertSliced(label('Function call with constant function', constCapabilities), + shell, constFunction, ['3:1'], `a <- function(x) { 1 } a(i)`) - assertSliced('Slice function definition', shell, constFunction, ['2@a'], 'a <- function(x) { }') - assertSliced('Slice within function', shell, constFunction, ['2:20'], 'x <- 2') - assertSliced('Multiple unknown calls', shell, ` + /* nothing of the function-content is required */ + assertSliced(label('Slice function definition', constCapabilities), + shell, constFunction, ['2@a'], 'a <- function(x) { }') + assertSliced(label('Slice within function', constCapabilities), shell, constFunction, ['2:20'], 'x <- 2') + assertSliced(label('Multiple unknown calls', ['name-normal', 'resolve-arguments','unnamed-arguments', 'numbers', 'call-normal', 'newlines']), + shell, ` foo(x, y) foo(x, 3) `, ['3@foo'], 'foo(x, 3)') - assertSliced('Multiple unknown calls sharing known def', shell, ` + assertSliced(label('Multiple unknown calls sharing known def', ['name-normal', 'resolve-arguments','formals-named', 'unnamed-arguments', 'implicit-return', 'numbers', 'call-normal', 'newlines']), + shell, ` x. <- function (x) { x } foo(x, x.(y)) foo(x, x.(3)) `, ['4@foo'], `x. <- function(x) { x } foo(x, x.(3))`) - assertSliced('Using ...', shell, ` + assertSliced(label('Using ...', ['name-normal', 'resolve-arguments', 'unnamed-arguments', 'formals-dot-dot-dot', 'formals-named', 'implicit-return', 'call-normal', ...OperatorDatabase['<-'].capabilities, 'newlines', 'numbers']), + shell, ` f1 <- function (a,b) { c } f2 <- function (...) { f1(...) } x <- 3 @@ -45,7 +57,8 @@ f2(1,x)`) a <- function(x) { x + i } a(4)` for(const criterion of ['3:1', '3@a'] as const) { - assertSliced('Must include read', shell, code, [criterion], code) + assertSliced(label('Must include read', ['name-normal', 'resolve-arguments', 'unnamed-arguments', 'formals-named', 'implicit-return', 'call-normal', ...OperatorDatabase['<-'].capabilities, 'newlines', 'binary-operator', 'infix-calls', ...OperatorDatabase['+'].capabilities, 'numbers']), + shell, code, [criterion], code) } }) describe('Read variable defined after', () => { @@ -53,7 +66,8 @@ a(4)` i <- 4 a(5)` for(const criterion of ['3:1', '3@a'] as const) { - assertSliced('Must include read', shell, code, [criterion], code) + assertSliced(label('Must include read', ['name-normal', 'resolve-arguments', 'unnamed-arguments', 'formals-named', 'implicit-return', 'call-normal', ...OperatorDatabase['<-'].capabilities, 'newlines', 'binary-operator', 'infix-calls', ...OperatorDatabase['+'].capabilities, 'numbers']), + shell, code, [criterion], code) } }) describe('Read variable defined before and after', () => { @@ -62,7 +76,7 @@ a <- function(x) { x + i } i <- 4 a(5)` for(const criterion of ['4:1', '4@a'] as const) { - assertSliced('Only keep second definition', shell, code, [criterion], `a <- function(x) { x + i } + assertSliced(label('Only keep second definition', ['name-normal', 'resolve-arguments', 'unnamed-arguments', 'formals-named', 'implicit-return', 'call-normal', ...OperatorDatabase['<-'].capabilities, 'newlines', 'binary-operator', 'infix-calls', ...OperatorDatabase['+'].capabilities, 'numbers']), shell, code, [criterion], `a <- function(x) { x + i } i <- 4 a(5)`) } @@ -73,21 +87,26 @@ a(5)`) x <- 2 a() b()` - assertSliced('Include only b-definition', shell, code, ['3@a'], `a <- b <- function() { x } + const caps: SupportedFlowrCapabilityId[] = ['name-normal', 'normal-definition', 'implicit-return', 'call-normal', ...OperatorDatabase['<-'].capabilities, 'newlines', 'binary-operator', 'infix-calls', 'numbers', 'return-value-of-assignments', 'precedence'] + assertSliced(label('Include only b-definition', caps), + shell, code, ['3@a'], `a <- b <- function() { x } x <- 2 a()`) - assertSliced('Include only b-definition', shell, code, ['4@b'], `b <- function() { x } + assertSliced(label('Include only b-definition', caps), + shell, code, ['4@b'], `b <- function() { x } x <- 2 b()`) }) describe('Functions with named arguments', () => { const code = `a <- function(x=4) { x } a(x = 3)` - assertSliced('Must include function definition', shell, code, ['2@a'], code) + assertSliced(label('Must include function definition', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'formals-default', 'implicit-return', 'newlines', 'named-arguments','resolve-arguments', 'numbers']), + shell, code, ['2@a'], code) - assertSliced('Must work for same named arguments too', shell, 'a <- 3\nb <- foo(a=a)', ['2@b'], 'a <- 3\nb <- foo(a=a)') + assertSliced(label('Must work for same named arguments too', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'named-arguments', 'newlines']), + shell, 'a <- 3\nb <- foo(a=a)', ['2@b'], 'a <- 3\nb <- foo(a=a)') - assertSliced('Must work for same named arguments nested', shell, ` + assertSliced(label('Must work for same named arguments nested', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'formals-default', 'named-arguments', 'accessing-exported-names', 'implicit-return', 'newlines', 'strings']), shell, ` f <- function(some_variable="hello") { result <- some::other(some_variable=some_variable) result @@ -101,7 +120,8 @@ f <- function(some_variable="hello") { const lateCode = `f <- function(a=b, m=3) { b <- 1; a; b <- 5; a + 1 } f() ` - assertSliced('Late bindings of parameter in body', shell, lateCode, ['2@f'], `f <- function(a=b, m=3) { + assertSliced(label('Late bindings of parameter in body', ['name-normal', 'formals-promises', 'resolve-arguments', ...OperatorDatabase['<-'].capabilities, 'formals-default', 'numbers', 'implicit-return', 'binary-operator', 'infix-calls', ...OperatorDatabase['+'].capabilities, 'call-normal', 'semicolons']), + shell, lateCode, ['2@f'], `f <- function(a=b, m=3) { b <- 1 a + 1 } @@ -109,24 +129,31 @@ f()`) const lateCodeB = `f <- function(a=b, b=3) { b <- 1; a; b <- 5; a + 1 } f() ` - assertSliced('Late bindings of parameter in parameters', shell, lateCodeB, ['2@f'], `f <- function(a=b, b=3) { a + 1 } + assertSliced(label('Late bindings of parameter in parameters', ['name-normal', 'formals-promises', 'resolve-arguments', ...OperatorDatabase['<-'].capabilities, 'formals-default', 'newlines','binary-operator', 'infix-calls', 'numbers', 'call-normal', ...OperatorDatabase['+'].capabilities, 'semicolons']), + shell, lateCodeB, ['2@f'], `f <- function(a=b, b=3) { a + 1 } f()`) - assertSliced('Parameters binding context', shell, `f <- function(a=y) { a } + assertSliced(label('Parameters binding context', ['name-normal', 'formals-promises', 'resolve-arguments', ...OperatorDatabase['<-'].capabilities, 'formals-default', 'implicit-return', 'newlines', 'numbers', 'call-normal']), + shell, `f <- function(a=y) { a } a <- 5 y <- 3 y <- 4 f()`, ['5@f'], `f <- function(a=y) { a } y <- 4 f()`) + + assertSliced(label('Named argument collides with variable', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'unnamed-arguments', 'named-arguments', 'newlines']), shell, 'x <- 100\nlist(123, x = 200, 234)\nprint(x)', + ['3@x'], 'x <- 100\nprint(x)') }) describe('Functions with nested definitions', () => { describe('Simple Function pass with return', () => { const code = `a <- function() { a <- 2; return(function() { 1 }) } b <- a() b()` - assertSliced('Must include outer function', shell, code, ['2@a'], `a <- function() { return(function() { 1 }) } + assertSliced(label('Must include outer function', ['name-normal', 'closures', ...OperatorDatabase['<-'].capabilities, 'normal-definition', 'numbers', 'return', 'implicit-return', 'call-normal', 'newlines', 'semicolons']), + shell, code, ['2@a'], `a <- function() { return(function() { 1 }) } a()`) - assertSliced('Must include linked function', shell, code, ['3@b'], `a <- function() { return(function() { 1 }) } + assertSliced(label('Must include linked function', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'normal-definition', 'return', 'implicit-return', 'numbers', 'newlines', 'call-normal']), + shell, code, ['3@b'], `a <- function() { return(function() { 1 }) } b <- a() b()`) }) @@ -137,12 +164,13 @@ y <- 5 z <- 5 u <- a() u()` - assertSliced('Must include function shell', shell, code, ['5@a'], `a <- function() { + assertSliced(label('Must include function shell', ['name-normal', 'closures', ...OperatorDatabase['<-'].capabilities, 'normal-definition', 'implicit-return', 'numbers', 'binary-operator', 'infix-calls', ...OperatorDatabase['+'].capabilities, 'return', 'newlines', 'call-normal', 'semicolons']), + shell, code, ['5@a'], `a <- function() { x <- function() { } return(x) } a()`) - assertSliced('Must include function shell on call', shell, code, ['6@u'], `a <- function() { + assertSliced(label('Must include function shell on call', ['name-normal', 'closures', ...OperatorDatabase['<-'].capabilities, 'normal-definition', 'newlines', 'return', 'call-normal']), shell, code, ['6@u'], `a <- function() { x <- function() { z + y } y <- 12 return(x) @@ -152,8 +180,9 @@ u <- a() u()`) }) }) - describe('Anonymous functions', () => { - assertSliced('Keep anonymous', shell, ` + describe('Anonymous Functions', () => { + assertSliced(label('keep anonymous', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'normal-definition', 'binary-operator', 'infix-calls', ...OperatorDatabase['-'].capabilities, 'implicit-return', 'call-anonymous', 'unnamed-arguments']), + shell, ` x <- (function() { x <- 4 x - 5 @@ -163,50 +192,53 @@ cat(x) `, ['7@x'], `x <- (function() { 3 })() cat(x)`) }) - describe('Higher-order functions', () => { + describe('Higher-order Functions', () => { const code = `a <- function() { x <- 3; i } i <- 4 b <- function(f) { i <- 5; f() } b(a)` - assertSliced('Only i, not bound in context', shell, code, ['1@i'], 'i') - assertSliced('Slice of b is independent', shell, code, ['3@b'], 'b <- function(f) { }') - assertSliced('Slice of b-call uses function', shell, code, ['4@b'], `a <- function() { i } + const caps: SupportedFlowrCapabilityId[] = ['name-normal', 'resolve-arguments', ...OperatorDatabase['<-'].capabilities, 'normal-definition', 'implicit-return', 'newlines', 'numbers', 'formals-named', 'call-normal', 'unnamed-arguments'] + assertSliced(label('Only i, not bound in context', caps), shell, code, ['1@i'], 'i') + assertSliced(label('Slice of b is independent', caps), shell, code, ['3@b'], 'b <- function(f) { }') + assertSliced(label('Slice of b-call uses function', caps), shell, code, ['4@b'], `a <- function() { i } b <- function(f) { i <- 5 f() } b(a)`) - assertSliced('Directly call returned function', shell, `m <- 12 + assertSliced(label('Directly call returned function', ['name-normal', 'closures', 'resolve-arguments', ...OperatorDatabase['<-'].capabilities, 'formals-named', 'normal-definition', 'implicit-return', 'return', 'unnamed-arguments', 'newlines', 'numbers', 'call-normal']), + shell, `m <- 12 a <- function(x) { b <- function() { function() { x } } return(b()) } -a(m)()`, ['$25' /* we can't directly slice the second call as the "a" name would take the inner call */], `m <- 12 +res <- a(m)()`, ['6@res'], `m <- 12 a <- function(x) { b <- function() { function() { x } } return(b()) } -a(m)()`) - assertSliced('Higher order anonymous function', shell, `a <- function(b) { +res <- a(m)()`) + assertSliced(label('Higher order anonymous function', ['name-normal', 'resolve-arguments', 'closures', ...OperatorDatabase['<-'].capabilities, 'formals-named', 'implicit-return', 'normal-definition', 'call-anonymous', 'binary-operator', 'infix-calls', ...OperatorDatabase['+'].capabilities, 'newlines', 'precedence']), + shell, `a <- function(b) { b } x <- a(function() 2 + 3)() + a(function() 7)()`, ['4@x'], `a <- function(b) { b } x <- a(function() 2 + 3)() + a(function() 7)()`) }) describe('Side-Effects', () => { - assertSliced('Important Side-Effect', shell, `x <- 2 + assertSliced(label('Important Side-Effect', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'normal-definition', ...OperatorDatabase['<<-'].capabilities, 'side-effects-in-function-call', 'implicit-return', 'call-normal', 'unnamed-arguments', 'newlines', 'precedence']), shell, `x <- 2 f <- function() { x <<- 3 } f() cat(x) - `, ['4@x'], `f <- function() { x <<- 3 } + `, ['4@x'], `f <- function() x <<- 3 f() cat(x)`) - assertSliced('Unimportant Side-Effect', shell, `f <- function() { y <<- 3 } + assertSliced(label('Unimportant Side-Effect', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', ...OperatorDatabase['<<-'].capabilities, 'normal-definition', 'implicit-return', 'side-effects-in-function-call', 'call-normal', 'unnamed-arguments', 'newlines']), shell, `f <- function() { y <<- 3 } f() cat(x) `, ['3@x'], 'cat(x)') - assertSliced('Nested Side-Effect For Last', shell, `f <- function() { + assertSliced(label('Nested Side-Effect For Last', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'normal-definition', 'newlines', 'implicit-return', 'numbers', 'call-normal', 'side-effects-in-function-call']), shell, `f <- function() { a <- function() { x } x <- 3 a() @@ -222,7 +254,7 @@ b <- f() b <- f()`) // that it contains x <- 2 is an error in the current implementation as this happens due to the 'reads' edge from the closure linking // however, this read edge should not apply when the call happens within the same scope - assertSliced('Nested Side-Effect For First', shell, `f <- function() { + assertSliced(label('Nested Side-Effect For First', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'normal-definition', 'implicit-return', 'numbers', 'call-normal', 'newlines', 'side-effects-in-function-call']), shell, `f <- function() { a <- function() { x } x <- 3 b <- a() @@ -239,11 +271,31 @@ b <- f() b } b <- f()`) + }) + describe('Early return of function', () => { + const code = `x <- (function() { + g <- function() { y } + y <- 5 + if(z) + return(g) + y <- 3 + g +})() +res <- x()` + assertSliced(label('Double return points', ['name-normal', 'closures', ...OperatorDatabase['<-'].capabilities, 'call-anonymous', 'normal-definition', 'implicit-return', 'numbers', 'if', 'return', 'implicit-return', 'call-normal', 'newlines']), shell, code, ['9@res'], ` +x <- (function() { + g <- function() { y } + y <- 5 + if(z) return(g) + y <- 3 + g + })() +res <- x()`.trim()) }) describe('Recursive functions', () => { const code = `f <- function() { f() } f()` - assertSliced('Endless recursion', shell, code, ['2@f'], code) + assertSliced(label('Endless recursion', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'normal-definition', 'implicit-return', 'call-normal', 'newlines']), shell, code, ['2@f'], code) }) describe('Uninteresting calls', () => { const code = ` @@ -251,8 +303,8 @@ a <- list(1,2,3,4) a[3] print(a[2]) ` - assertSliced('Must include function shell', shell, code, ['3@a'], `a <- list(1,2,3,4) -a[3]`) + assertSliced(label('Must include function shell', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'unnamed-arguments', 'single-bracket-access', 'newlines']), shell, code, ['3@a'], `a <- list(1,2,3,4) +a`) }) describe('Global vs. local definitions', () => { const localCode = ` @@ -260,9 +312,10 @@ a <- function() { x = x + 5; cat(x) } x <- 3 a() cat(x)` - assertSliced('Local redefinition has no effect', shell, localCode, ['5@x'], `x <- 3 + const localCaps: SupportedFlowrCapabilityId[] = ['name-normal', 'lexicographic-scope', 'normal-definition', ...OperatorDatabase['='].capabilities, 'binary-operator', 'infix-calls', ...OperatorDatabase['+'].capabilities, 'semicolons', 'unnamed-arguments', 'newlines', 'call-normal', 'numbers', 'precedence'] + assertSliced(label('Local redefinition has no effect', localCaps), shell, localCode, ['5@x'], `x <- 3 cat(x)`) - assertSliced('Local redefinition must be kept as part of call', shell, localCode, ['4@a'], `a <- function() { + assertSliced(label('Local redefinition must be kept as part of call', localCaps), shell, localCode, ['4@a'], `a <- function() { x = x + 5 cat(x) } @@ -273,7 +326,7 @@ a <- function() { x <<- x + 5; cat(x) } x <- 3 a() cat(x)` - assertSliced('But the global redefinition remains', shell, globalCode, ['5@x'], `a <- function() { x <<- x + 5 } + assertSliced(label('But the global redefinition remains', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'normal-definition', 'implicit-return', 'side-effects-in-function-call', 'return-value-of-assignments', 'newlines', 'call-normal', 'unnamed-arguments', 'precedence']), shell, globalCode, ['5@x'], `a <- function() x <<- x + 5 x <- 3 a() cat(x)`) @@ -282,37 +335,34 @@ a <- function() { x <<- 5; cat(x) } x <- 3 a() cat(x)` - assertSliced('The local assignment is only needed if the global reads', shell, globalCodeWithoutLocal, ['5@x'], `a <- function() { x <<- 5 } + assertSliced(label('The local assignment is only needed if the global reads', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'function-definitions', ...OperatorDatabase['<<-'].capabilities, 'numbers', 'newlines', 'call-normal', 'unnamed-arguments', 'precedence']), shell, globalCodeWithoutLocal, ['5@x'], `a <- function() x <<- 5 a() cat(x)`) - assertSliced('Must work with nested globals', shell, `a <- function() { function(b) { x <<- b } } + assertSliced(label('Must work with nested globals', ['name-normal', 'resolve-arguments', 'lexicographic-scope', ...OperatorDatabase['<-'].capabilities, 'normal-definition', 'formals-named', 'side-effects-in-function-call', 'return-value-of-assignments', 'newlines', 'numbers', 'call-normal', 'unnamed-arguments', 'precedence']), + shell, `a <- function() { function(b) x <<- b } y <- 5 x <- 2 a()(y) -cat(x)`, ['5@x'], `a <- function() { function(b) { x <<- b } } +cat(x)`, ['5@x'], `a <- function() { function(b) x <<- b } y <- 5 a()(y) cat(x)`) - assertSliced('Must work with nested globals and known assignments not-happening', shell, `a <- function() { function(b) { if(FALSE) { x <<- b } } } + assertSliced(label('Must work with nested globals and known assignments not-happening', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'normal-definition', 'formals-named', 'if', 'logical', ...OperatorDatabase['<<-'].capabilities, 'return-value-of-assignments', 'resolve-arguments', 'implicit-return', 'newlines', 'call-normal', 'unnamed-arguments']), + shell, `a <- function() { function(b) { if(FALSE) { x <<- b } } } y <- 5 x <- 2 a()(y) cat(x)`, ['5@x'], `x <- 2 cat(x)`) - assertSliced('Must work with nested globals and maybe assignments', shell, `a <- function() { function(b) { if(runif() > .5) { x <<- b } } } + assertSliced(label('Must work with nested globals and maybe assignments', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'normal-definition', 'formals-named', 'if', 'call-normal', ...OperatorDatabase['>'].capabilities, 'numbers', ...OperatorDatabase['<<-'].capabilities, 'return-value-of-assignments', 'resolve-arguments', 'lexicographic-scope', 'newlines', 'unnamed-arguments', 'closures']), + shell, `a <- function() { function(b) { if(runif() > .5) { x <<- b } } } y <- 5 x <- 2 a()(y) -cat(x)`, ['5@x'], `a <- function() { - function(b) { - if(runif() > .5) { - x <<- b - } - } - } +cat(x)`, ['5@x'], `a <- function() { function(b) if(runif() > .5) { x <<- b } } y <- 5 x <- 2 a()(y) @@ -328,16 +378,17 @@ a <- function() { x <- 3; 5 } a() \`a\`() ` - assertSliced('Must link with string/string', shell, code, ['3@\'a\''], `'a' <- function() { 4 } + const caps: SupportedFlowrCapabilityId[] = ['name-quoted', 'name-escaped', ...OperatorDatabase['<-'].capabilities, 'normal-definition', 'name-normal', 'numbers', 'semicolons', 'implicit-return', 'call-normal', 'newlines', 'name-escaped'] + assertSliced(label('Must link with string/string', caps), shell, code, ['3@\'a\''], `'a' <- function() { 4 } 'a'()`) - assertSliced('Must link with string/no-string', shell, code, ['4@a'], `'a' <- function() { 4 } + assertSliced(label('Must link with string/no-string', caps), shell, code, ['4@a'], `'a' <- function() { 4 } a()`) - assertSliced('Must link with no-string/string', shell, code, ['6@\'a\''], `a <- function() { 5 } + assertSliced(label('Must link with no-string/string', caps), shell, code, ['6@\'a\''], `a <- function() { 5 } 'a'()`) // the common case: - assertSliced('Must link with no-string/no-string', shell, code, ['7@a'], `a <- function() { 5 } + assertSliced(label('Must link with no-string/no-string', caps), shell, code, ['7@a'], `a <- function() { 5 } a()`) - assertSliced('Try with special backticks', shell, code, ['8@`a`'], `a <- function() { 5 } + assertSliced(label('Try with special backticks', caps), shell, code, ['8@`a`'], `a <- function() { 5 } \`a\`()`) }) describe('Using own infix operators', () => { @@ -351,11 +402,13 @@ a()`) cat(3 %a% 4) cat(4 %b% 5) ` - assertSliced('Must link with backticks', shell, code, ['8:7'], `\`%a%\` <- function(x, y) { x + y } + const caps: SupportedFlowrCapabilityId[] = ['name-escaped', 'resolve-arguments', 'name-quoted', 'infix-calls', 'formals-named', 'implicit-return', 'newlines', 'unnamed-arguments', 'special-operator'] + assertSliced(label('Must link with backticks', caps), shell, code, ['8:7'], `\`%a%\` <- function(x, y) { x + y } cat(3 %a% 4)`) - assertSliced('Must link with backticks', shell, code, ['9:7'], `'%b%' <- function(x, y) { x * y } + assertSliced(label('Must link with backticks', caps), shell, code, ['9:7'], `'%b%' <- function(x, y) { x * y } cat(4 %b% 5)`) - assertSliced('Must work with assigned custom pipes too', shell, 'a <- b %>% c %>% d', ['1@a'], 'a <- b %>% c %>% d') + assertSliced(label('Must work with assigned custom pipes too', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'infix-calls', 'numbers', 'special-operator', 'precedence']), + shell, 'a <- b %>% c %>% d', ['1@a'], 'a <- b %>% c %>% d') }) describe('Using own alias infix operators', () => { const code = ` @@ -363,7 +416,8 @@ cat(4 %b% 5)`) "%a%" <- pkg::"%a%" cat(4 %a% 5) ` - assertSliced('Must link alias but not namespace origin', shell, code, ['4:1'], `"%a%" <- pkg::"%a%" + assertSliced(label('Must link alias but not namespace origin', ['name-quoted', ...OperatorDatabase['<-'].capabilities, 'formals-named', 'implicit-return', 'infix-calls', 'special-operator', 'accessing-exported-names', 'newlines', 'unnamed-arguments']), + shell, code, ['4:1'], `"%a%" <- pkg::"%a%" cat(4 %a% 5)`) }) describe('Using own alias infix operators with namespace', () => { @@ -372,8 +426,21 @@ pkg::"%a%" <- function(x, y) { x + y } "%a%" <- pkg::"%a%" cat(4 %a% 5) ` - assertSliced('Must link alias with namespace', shell, code, ['4:1'], `pkg::"%a%" <- function(x, y) { x + y } + assertSliced(label('must link alias with namespace', ['accessing-exported-names', 'resolve-arguments', 'name-quoted', ...OperatorDatabase['<-'].capabilities, 'formals-named', 'implicit-return', 'binary-operator', 'infix-calls', ...OperatorDatabase['+'].capabilities, 'special-operator', 'unnamed-arguments']), + shell, code, ['4:1'], `pkg::"%a%" <- function(x, y) { x + y } "%a%" <- pkg::"%a%" cat(4 %a% 5)`) }) + describe('Quotation', () => { + assertSliced(label('quote does not reference variables', ['name-normal','newlines', ...OperatorDatabase['<-'].capabilities, 'built-in-quoting' ]), + shell, 'x <- 3\ny <- quote(x)', ['2@y'], 'y <- quote(x)') + }) + describe('Redefine built-ins', () => { + assertSliced(label('redefining assignments should work', ['name-quoted', 'name-normal', 'precedence', 'numbers', ...OperatorDatabase['<-'].capabilities, ...OperatorDatabase['='].capabilities, 'redefinition-of-built-in-functions-primitives']), + shell, 'x <- 1\n`<-`<-`*`\nx <- 3\ny = x', ['4@y'], 'x <- 1\ny = x') + }) + describe('Switch', () => { + assertSliced(label('Switch with named arguments', ['switch', ...OperatorDatabase['<-'].capabilities, 'numbers', 'strings', 'named-arguments', 'unnamed-arguments', 'switch', 'function-calls' ]), + shell, 'x <- switch("a", a=1, b=2, c=3)', ['1@x'], 'x <- switch("a", a=1, b=2, c=3)') + }) })) diff --git a/test/functionality/slicing/static-program-slices/simple-tests.ts b/test/functionality/slicing/static-program-slices/simple-tests.ts index 4487e1f023..a56b5860c3 100644 --- a/test/functionality/slicing/static-program-slices/simple-tests.ts +++ b/test/functionality/slicing/static-program-slices/simple-tests.ts @@ -1,24 +1,34 @@ import { assertSliced, withShell } from '../../_helper/shell' +import { label } from '../../_helper/label' +import type { SupportedFlowrCapabilityId } from '../../../../src/r-bridge/data' +import { OperatorDatabase } from '../../../../src' describe('Simple', withShell(shell => { describe('Constant assignments', () => { for(const i of [1, 2, 3]) { - assertSliced(`x <- [${i}]`, shell, 'x <- 1\nx <- 2\nx <- 3', [`${i}:1`], `x <- ${i}`) + assertSliced(label(`slice constant assignment ${i}`, ['name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities, 'newlines']), + shell, 'x <- 1\nx <- 2\nx <- 3', [`${i}:1`], `x <- ${i}` + ) } }) describe('Constant conditionals', () => { - assertSliced('if(TRUE)', shell, 'if(TRUE) { x <- 3 } else { x <- 4}\nx', ['2@x'], 'if(TRUE) {\n x <- 3\n}\nx') - assertSliced('if(FALSE)', shell, 'if(FALSE) { x <- 3 } else { x <- 4}\nx', ['2@x'], 'if(FALSE) { } else {\n x <- 4\n}\nx') + assertSliced(label('if(TRUE)', ['name-normal', 'logical', 'numbers', ...OperatorDatabase['<-'].capabilities, 'newlines', 'if']), + shell, 'if(TRUE) { x <- 3 } else { x <- 4 }\nx', ['2@x'], 'x <- 3\nx' + ) + assertSliced(label('if(FALSE)', ['name-normal', 'logical', 'numbers', ...OperatorDatabase['<-'].capabilities, 'newlines', 'if']), + shell, 'if(FALSE) { x <- 3 } else { x <- 4 }\nx', ['2@x'], 'x <- 4\nx') }) describe('Independent Control-Flow', () => { - assertSliced('For-Loop', shell, ` + assertSliced(label('For-Loop', ['name-normal', 'for-loop', 'newlines', 'unnamed-arguments', 'numbers', 'built-in-sequencing', ...OperatorDatabase['<-'].capabilities, 'function-calls', ...OperatorDatabase['*'].capabilities, 'precedence']), + shell, ` x <- 1 for(i in 1:10) { x <- x * 2 } cat(x) `, ['6@x'], 'x <- 1\nfor(i in 1:10) x <- x * 2\ncat(x)') - assertSliced('While-Loop', shell, ` + assertSliced(label('While-Loop', ['name-normal', 'while-loop', 'newlines', 'numbers', 'unnamed-arguments', ...OperatorDatabase['<-'].capabilities, 'function-calls', ...OperatorDatabase['*'].capabilities, 'precedence']), + shell, ` x <- 1 while(i > 3) { x <- x * 2 @@ -26,20 +36,19 @@ while(i > 3) { cat(x) `, ['6@x'], 'x <- 1\nwhile(i > 3) x <- x * 2\ncat(x)') - // urgh that is fragile - assertSliced('If-Then', shell, ` + assertSliced(label('if-then', ['name-normal', 'if', 'newlines', 'numbers', 'unnamed-arguments', ...OperatorDatabase['<-'].capabilities, 'function-calls', ...OperatorDatabase['*'].capabilities, 'precedence']), + shell, ` x <- 1 if(i > 3) { x <- x * 2 } cat(x) `, ['6@x'], `x <- 1 -if(i > 3) { - x <- x * 2 -} +if(i > 3) { x <- x * 2 } cat(x)`) - assertSliced('Independent If-Then with extra requirements', shell, ` + assertSliced(label('independent if-then with extra requirements', ['name-normal', 'if', 'newlines', 'unnamed-arguments', 'numbers', ...OperatorDatabase['<-'].capabilities, 'function-calls', ...OperatorDatabase['*'].capabilities, 'precedence']), + shell, ` x <- 1 i <- 3 if(i > 3) { @@ -48,16 +57,19 @@ if(i > 3) { cat(x) `, ['7@x'], `x <- 1 i <- 3 -if(i > 3) { - x <- x * 2 -} +if(i > 3) { x <- x * 2 } cat(x)`) }) describe('Access', () => { - assertSliced('Constant', shell, 'a <- 4\na <- list(1,2)\na[3]', ['3@a'], 'a <- list(1,2)\na[3]') - assertSliced('Variable', shell, 'i <- 4\na <- list(1,2)\na[i]', ['3@a'], 'i <- 4\na <- list(1,2)\na[i]') - assertSliced('Subset Sequence', shell, 'i <- 4\na <- list(1,2)\na[1:i,]', ['3@a'], 'i <- 4\na <- list(1,2)\na[1:i,]') - describe('definitions', () => { + assertSliced(label('constant', ['name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities, 'newlines', 'unnamed-arguments', 'single-bracket-access']), + shell, 'a <- 4\na <- list(1,2)\na[3]', ['3@a'], 'a <- list(1,2)\na') + assertSliced(label('variable', ['name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities, 'newlines', 'unnamed-arguments', 'single-bracket-access']), + shell, 'i <- 4\na <- list(1,2)\nb <- a[i]', ['3@b'], 'i <- 4\na <- list(1,2)\nb <- a[i]') + assertSliced(label('subset sequence', ['name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities, 'newlines', 'unnamed-arguments', 'built-in-sequencing', 'empty-arguments', 'single-bracket-access', 'subsetting']), + shell, 'i <- 4\na <- list(1,2)\n b <- a[1:i,]', ['3@b'], 'i <- 4\na <- list(1,2)\nb <- a[1:i,]') + assertSliced(label('range assignment', ['name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities, 'newlines', 'unnamed-arguments', 'built-in-sequencing', 'empty-arguments', 'single-bracket-access', 'subsetting', 'range-assignment']), + shell, 'a <- 1:10\na[1:5] <- 3\na', ['3@a'], 'a <- 1 : 10\na[1:5] <- 3\na') + describe('Definitions', () => { describe('[[', () => { const code = ` a <- list(1,2) @@ -68,11 +80,13 @@ cat(a) a <- list(3,4) cat(a) ` - assertSliced('Repeated named access and definition', shell, code, ['6@a'], `a <- list(1,2) + assertSliced(label('Repeated named access and definition', ['name-normal', 'numbers', 'double-bracket-access', 'unnamed-arguments', 'function-calls', ...OperatorDatabase['<-'].capabilities, 'newlines', 'unnamed-arguments']), + shell, code, ['6@a'], `a <- list(1,2) a[[1]] = 2 a[[2]] = 3 cat(a)`) - assertSliced('Full redefinitions still apply', shell, code, ['8@a'], `a <- list(3,4) + assertSliced(label('Full redefinitions still apply', ['name-normal', 'numbers', 'double-bracket-access', 'unnamed-arguments', 'function-calls', ...OperatorDatabase['<-'].capabilities, 'newlines', 'unnamed-arguments']), + shell, code, ['8@a'], `a <- list(3,4) cat(a)`) }) describe('$', () => { @@ -85,22 +99,26 @@ cat(a) a <- list(a=3,b=4) cat(a) ` - assertSliced('Repeated named access and definition', shell, codeB, ['6@a'], `a <- list(a=1,b=2) + assertSliced(label('Repeated named access and definition', ['name-normal', 'function-calls', 'named-arguments', 'unnamed-arguments', 'dollar-access', ...OperatorDatabase['<-'].capabilities, 'numbers']), + shell, codeB, ['6@a'], `a <- list(a=1,b=2) a$a = 2 a$b = 3 cat(a)`) - assertSliced('Full redefinitions still apply', shell, codeB, ['8@a'], `a <- list(a=3,b=4) + assertSliced(label('Full redefinitions still apply', ['name-normal', 'function-calls', 'named-arguments', 'unnamed-arguments', 'dollar-access', ...OperatorDatabase['<-'].capabilities, 'numbers']), + shell, codeB, ['8@a'], `a <- list(a=3,b=4) cat(a)`) }) }) }) describe('With directives', () => { - assertSliced('Single directive', shell, ` + assertSliced(label('Single directive', ['name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities, 'newlines', 'unnamed-arguments', 'comments']), + shell, ` #line 42 "foo.R" a <- 5 `, ['3@a'], 'a <- 5') }) describe('The classic', () => { + const capabilities: SupportedFlowrCapabilityId[] = ['name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities, 'call-normal', 'newlines', 'unnamed-arguments', 'for-loop', ...OperatorDatabase['+'].capabilities, ...OperatorDatabase['*'].capabilities, 'built-in-sequencing', 'strings', 'precedence'] const code = ` sum <- 0 product <- 1 @@ -116,33 +134,38 @@ cat("Sum:", sum, "\\n") cat("Product:", product, "\\n") ` - assertSliced('Sum lhs in for', shell, code, ['8:3'], + assertSliced(label('Sum lhs in for', capabilities), + shell, code, ['8:3'], `sum <- 0 w <- 7 N <- 10 for(i in 1:(N-1)) sum <- sum + i + w` ) - assertSliced('Sum rhs in for', shell, code, ['8:10'], + assertSliced(label('Sum rhs in for', capabilities), + shell, code, ['8:10'], `sum <- 0 w <- 7 N <- 10 for(i in 1:(N-1)) sum <- sum + i + w` ) - assertSliced('Product lhs in for', shell, code, ['9:3'], + assertSliced(label('Product lhs in for', capabilities), + shell, code, ['9:3'], `product <- 1 N <- 10 for(i in 1:(N-1)) product <- product * i` ) - assertSliced('Product rhs in for', shell, code, ['9:14'], + assertSliced(label('Product rhs in for', capabilities), + shell, code, ['9:14'], `product <- 1 N <- 10 for(i in 1:(N-1)) product <- product * i` ) - assertSliced('Sum in call', shell, code, ['12:13'], + assertSliced(label('Sum in call', capabilities), + shell, code, ['12:13'], `sum <- 0 w <- 7 N <- 10 @@ -150,14 +173,16 @@ for(i in 1:(N-1)) sum <- sum + i + w cat("Sum:", sum, "\\n")` ) - assertSliced('Product in call', shell, code, ['13:17'], + assertSliced(label('Product in call', capabilities), + shell, code, ['13:17'], `product <- 1 N <- 10 for(i in 1:(N-1)) product <- product * i cat("Product:", product, "\\n")` ) - assertSliced('Top by name', shell, code, ['2@sum'], + assertSliced(label('Top by name', capabilities), + shell, code, ['2@sum'], 'sum <- 0' ) diff --git a/test/functionality/util/control-flow-graph-tests.ts b/test/functionality/util/control-flow-graph-tests.ts index 7838118285..3b6a30938f 100644 --- a/test/functionality/util/control-flow-graph-tests.ts +++ b/test/functionality/util/control-flow-graph-tests.ts @@ -10,10 +10,16 @@ import { equalCfg, extractCFG } from '../../../src/util/cfg/cfg' -import { SteppingSlicer } from '../../../src/core' +import type { NodeId } from '../../../src' import { requestFromInput, RFalse, RTrue, RType } from '../../../src/r-bridge' import { defaultQuadIdGenerator } from '../../../src/util/quads' import { cfgToMermaidUrl } from '../../../src/util/mermaid' +import { SteppingSlicer } from '../../../src/core/stepping-slicer' +import { normalizeIdToNumberIfPossible } from '../../../src/r-bridge/lang-4.x/ast/model/processing/node-id' + +function normAllIds(ids: NodeId[]): NodeId[] { + return ids.map(normalizeIdToNumberIfPossible) +} describe('Control Flow Graph', withShell(shell => { function assertCfg(code: string, partialExpected: Partial) { @@ -28,11 +34,11 @@ describe('Control Flow Graph', withShell(shell => { const cfg = extractCFG(result.normalize) try { - assert.deepStrictEqual(cfg.entryPoints, expected.entryPoints, 'entry points differ') - assert.deepStrictEqual(cfg.exitPoints, expected.exitPoints, 'exit points differ') - assert.deepStrictEqual(cfg.breaks, expected.breaks, 'breaks differ') - assert.deepStrictEqual(cfg.nexts, expected.nexts, 'nexts differ') - assert.deepStrictEqual(cfg.returns, expected.returns, 'returns differ') + assert.deepStrictEqual(normAllIds(cfg.entryPoints), normAllIds(expected.entryPoints), 'entry points differ') + assert.deepStrictEqual(normAllIds(cfg.exitPoints), normAllIds(expected.exitPoints), 'exit points differ') + assert.deepStrictEqual(normAllIds(cfg.breaks), normAllIds(expected.breaks), 'breaks differ') + assert.deepStrictEqual(normAllIds(cfg.nexts), normAllIds(expected.nexts), 'nexts differ') + assert.deepStrictEqual(normAllIds(cfg.returns), normAllIds(expected.returns), 'returns differ') assert.isTrue(equalCfg(cfg.graph, expected.graph), 'graphs differ') } catch(e: unknown) { console.error(`expected: ${cfgToMermaidUrl(expected, result.normalize)}`) @@ -46,67 +52,67 @@ describe('Control Flow Graph', withShell(shell => { entryPoints: [ '3' ], exitPoints: [ '3-exit' ], graph: new ControlFlowGraph() - .addVertex({ id: '0', name: RType.Logical, type: CfgVertexType.Expression }) - .addVertex({ id: '1', name: RType.Number, type: CfgVertexType.Expression }) - .addVertex({ id: '3', name: RType.IfThenElse, type: CfgVertexType.Statement }) + .addVertex({ id: 0, name: RType.Logical, type: CfgVertexType.Expression }) + .addVertex({ id: 1, name: RType.Number, type: CfgVertexType.Expression }) + .addVertex({ id: 3, name: RType.IfThenElse, type: CfgVertexType.Statement }) .addVertex({ id: '3-exit', name: 'if-exit', type: CfgVertexType.EndMarker }) - .addEdge('0', '3', { label: 'FD' }) - .addEdge('1', '0', { label: 'CD', when: RTrue }) - .addEdge('3-exit', '1', { label: 'FD' }) - .addEdge('3-exit', '0', { label: 'CD', when: RFalse }) + .addEdge(0, 3, { label: 'FD' }) + .addEdge(1, 0, { label: 'CD', when: RTrue }) + .addEdge('3-exit', 1, { label: 'FD' }) + .addEdge('3-exit', 0, { label: 'CD', when: RFalse }) }) assertCfg('2 + 3', { entryPoints: [ '2' ], exitPoints: [ '2-exit' ], graph: new ControlFlowGraph() - .addVertex({ id: '0', name: RType.Number, type: CfgVertexType.Expression }) - .addVertex({ id: '1', name: RType.Number, type: CfgVertexType.Expression }) - .addVertex({ id: '2', name: RType.BinaryOp, type: CfgVertexType.Expression }) + .addVertex({ id: 0, name: RType.Number, type: CfgVertexType.Expression }) + .addVertex({ id: 1, name: RType.Number, type: CfgVertexType.Expression }) + .addVertex({ id: 2, name: RType.BinaryOp, type: CfgVertexType.Expression }) .addVertex({ id: '2-exit', name: 'binOp-exit', type: CfgVertexType.EndMarker }) - .addEdge('0', '2', { label: 'FD' }) - .addEdge('1', '0', { label: 'FD' }) - .addEdge('2-exit', '1', { label: 'FD' }) + .addEdge(0, 2, { label: 'FD' }) + .addEdge(1, 0, { label: 'FD' }) + .addEdge('2-exit', 1, { label: 'FD' }) }) assertCfg('f(2 + 3, x=3)', { entryPoints: [ '8' ], exitPoints: [ '8-exit' ], graph: new ControlFlowGraph() - .addVertex({ id: '0', name: RType.Symbol, type: CfgVertexType.Expression }) - .addVertex({ id: '8', name: RType.FunctionCall, type: CfgVertexType.Statement }) + .addVertex({ id: 0, name: RType.Symbol, type: CfgVertexType.Expression }) + .addVertex({ id: 8, name: RType.FunctionCall, type: CfgVertexType.Statement }) .addVertex({ id: '8-name', name: 'call-name', type: CfgVertexType.MidMarker }) .addVertex({ id: '8-exit', name: 'call-exit', type: CfgVertexType.EndMarker }) - .addVertex({ id: '4', name: RType.Argument, type: CfgVertexType.Expression }) + .addVertex({ id: 4, name: RType.Argument, type: CfgVertexType.Expression }) .addVertex({ id: '4-before-value', name: 'before-value', type: CfgVertexType.MidMarker }) - .addVertex({ id: '1', name: RType.Number, type: CfgVertexType.Expression }) - .addVertex({ id: '2', name: RType.Number, type: CfgVertexType.Expression }) - .addVertex({ id: '3', name: RType.BinaryOp, type: CfgVertexType.Expression }) + .addVertex({ id: 1, name: RType.Number, type: CfgVertexType.Expression }) + .addVertex({ id: 2, name: RType.Number, type: CfgVertexType.Expression }) + .addVertex({ id: 3, name: RType.BinaryOp, type: CfgVertexType.Expression }) .addVertex({ id: '3-exit', name: 'binOp-exit', type: CfgVertexType.EndMarker }) .addVertex({ id: '4-exit', name: 'exit', type: CfgVertexType.EndMarker }) - .addVertex({ id: '7', name: RType.Argument, type: CfgVertexType.Expression }) - .addVertex({ id: '5', name: RType.Symbol, type: CfgVertexType.Expression }) + .addVertex({ id: 7, name: RType.Argument, type: CfgVertexType.Expression }) + .addVertex({ id: 5, name: RType.Symbol, type: CfgVertexType.Expression }) .addVertex({ id: '7-before-value', name: 'before-value', type: CfgVertexType.MidMarker }) - .addVertex({ id: '6', name: RType.Number, type: CfgVertexType.Expression }) + .addVertex({ id: 6, name: RType.Number, type: CfgVertexType.Expression }) .addVertex({ id: '7-exit', name: 'exit', type: CfgVertexType.EndMarker }) - .addEdge('0', '8', { label: 'FD' }) - .addEdge('8-name', '0', { label: 'FD' }) - .addEdge('4', '8-name', { label: 'FD' }) - .addEdge('4-before-value', '4', { label: 'FD' }) - .addEdge('3', '4-before-value', { label: 'FD' }) - .addEdge('1', '3', { label: 'FD' }) - .addEdge('2', '1', { label: 'FD' }) - .addEdge('3-exit', '2', { label: 'FD' }) + .addEdge(0, 8, { label: 'FD' }) + .addEdge('8-name', 0, { label: 'FD' }) + .addEdge(4, '8-name', { label: 'FD' }) + .addEdge('4-before-value', 4, { label: 'FD' }) + .addEdge(3, '4-before-value', { label: 'FD' }) + .addEdge(1, 3, { label: 'FD' }) + .addEdge(2, 1, { label: 'FD' }) + .addEdge('3-exit', 2, { label: 'FD' }) .addEdge('4-exit', '3-exit', { label: 'FD' }) - .addEdge('7', '4-exit', { label: 'FD' }) - .addEdge('5', '7', { label: 'FD' }) - .addEdge('7-before-value', '5', { label: 'FD' }) - .addEdge('6', '7-before-value', { label: 'FD' }) - .addEdge('7-exit', '6', { label: 'FD' }) + .addEdge(7, '4-exit', { label: 'FD' }) + .addEdge(5, 7, { label: 'FD' }) + .addEdge('7-before-value', 5, { label: 'FD' }) + .addEdge(6, '7-before-value', { label: 'FD' }) + .addEdge('7-exit', 6, { label: 'FD' }) .addEdge('8-exit', '7-exit', { label: 'FD' }) }) @@ -123,13 +129,13 @@ describe('Control Flow Graph', withShell(shell => { const content = cfg2quads(cfg, { context, domain, getId: defaultQuadIdGenerator() }) - assert.strictEqual(content, `<${domain}${context}/0> <${domain}rootIds> "3" <${context}> . + assert.strictEqual(content, `<${domain}${context}/0> <${domain}rootIds> "3"^^ <${context}> . <${domain}${context}/0> <${domain}rootIds> "3-exit" <${context}> . -<${domain}${context}/0> <${domain}rootIds> "0" <${context}> . -<${domain}${context}/0> <${domain}rootIds> "1" <${context}> . +<${domain}${context}/0> <${domain}rootIds> "0"^^ <${context}> . +<${domain}${context}/0> <${domain}rootIds> "1"^^ <${context}> . <${domain}${context}/0> <${domain}vertices> <${domain}${context}/1> <${context}> . <${domain}${context}/1> <${domain}next> <${domain}${context}/2> <${context}> . -<${domain}${context}/1> <${domain}id> "3" <${context}> . +<${domain}${context}/1> <${domain}id> "3"^^ <${context}> . <${domain}${context}/1> <${domain}name> "RIfThenElse" <${context}> . <${domain}${context}/0> <${domain}vertices> <${domain}${context}/2> <${context}> . <${domain}${context}/2> <${domain}next> <${domain}${context}/3> <${context}> . @@ -137,33 +143,33 @@ describe('Control Flow Graph', withShell(shell => { <${domain}${context}/2> <${domain}name> "if-exit" <${context}> . <${domain}${context}/0> <${domain}vertices> <${domain}${context}/3> <${context}> . <${domain}${context}/3> <${domain}next> <${domain}${context}/4> <${context}> . -<${domain}${context}/3> <${domain}id> "0" <${context}> . +<${domain}${context}/3> <${domain}id> "0"^^ <${context}> . <${domain}${context}/3> <${domain}name> "RLogical" <${context}> . <${domain}${context}/0> <${domain}vertices> <${domain}${context}/4> <${context}> . -<${domain}${context}/4> <${domain}id> "1" <${context}> . +<${domain}${context}/4> <${domain}id> "1"^^ <${context}> . <${domain}${context}/4> <${domain}name> "RNumber" <${context}> . <${domain}${context}/0> <${domain}edges> <${domain}${context}/5> <${context}> . <${domain}${context}/5> <${domain}next> <${domain}${context}/6> <${context}> . -<${domain}${context}/5> <${domain}from> "1" <${context}> . -<${domain}${context}/5> <${domain}to> "0" <${context}> . +<${domain}${context}/5> <${domain}from> "1"^^ <${context}> . +<${domain}${context}/5> <${domain}to> "0"^^ <${context}> . <${domain}${context}/5> <${domain}type> "CD" <${context}> . <${domain}${context}/5> <${domain}when> "TRUE" <${context}> . <${domain}${context}/0> <${domain}edges> <${domain}${context}/6> <${context}> . <${domain}${context}/6> <${domain}next> <${domain}${context}/7> <${context}> . -<${domain}${context}/6> <${domain}from> "0" <${context}> . -<${domain}${context}/6> <${domain}to> "3" <${context}> . +<${domain}${context}/6> <${domain}from> "0"^^ <${context}> . +<${domain}${context}/6> <${domain}to> "3"^^ <${context}> . <${domain}${context}/6> <${domain}type> "FD" <${context}> . <${domain}${context}/0> <${domain}edges> <${domain}${context}/7> <${context}> . <${domain}${context}/7> <${domain}next> <${domain}${context}/8> <${context}> . <${domain}${context}/7> <${domain}from> "3-exit" <${context}> . -<${domain}${context}/7> <${domain}to> "1" <${context}> . +<${domain}${context}/7> <${domain}to> "1"^^ <${context}> . <${domain}${context}/7> <${domain}type> "FD" <${context}> . <${domain}${context}/0> <${domain}edges> <${domain}${context}/8> <${context}> . <${domain}${context}/8> <${domain}from> "3-exit" <${context}> . -<${domain}${context}/8> <${domain}to> "0" <${context}> . +<${domain}${context}/8> <${domain}to> "0"^^ <${context}> . <${domain}${context}/8> <${domain}type> "CD" <${context}> . <${domain}${context}/8> <${domain}when> "FALSE" <${context}> . -<${domain}${context}/0> <${domain}entryPoints> "3" <${context}> . +<${domain}${context}/0> <${domain}entryPoints> "3"^^ <${context}> . <${domain}${context}/0> <${domain}exitPoints> "3-exit" <${context}> . `) }) diff --git a/test/functionality/util/quads-tests.ts b/test/functionality/util/quads-tests.ts index 9a16d62512..e4a8ade5bd 100644 --- a/test/functionality/util/quads-tests.ts +++ b/test/functionality/util/quads-tests.ts @@ -1,9 +1,9 @@ import { retrieveNormalizedAst, withShell } from '../_helper/shell' -import { decorateAst, requestFromInput, RType } from '../../../src/r-bridge' +import { decorateAst, requestFromInput } from '../../../src' import { defaultQuadIdGenerator, serialize2quads } from '../../../src/util/quads' import { assert } from 'chai' -import { SteppingSlicer } from '../../../src/core' import { dataflowGraphToQuads } from '../../../src/core/print/dataflow-printer' +import { SteppingSlicer } from '../../../src/core/stepping-slicer' describe('Quads', withShell(shell => { const context = 'test' @@ -20,19 +20,16 @@ describe('Quads', withShell(shell => { const idPrefix = `${domain}${context}/` // ids are deterministic, so we can compare the quads await compareQuadsCfg('1', ` -<${idPrefix}0> <${domain}type> "${RType.ExpressionList}" <${context}> . +<${idPrefix}0> <${domain}type> "RExpressionList" <${context}> . <${idPrefix}0> <${domain}children> <${idPrefix}1> <${context}> . -<${idPrefix}1> <${domain}location> <${idPrefix}2> <${context}> . -<${idPrefix}2> <${domain}start> <${idPrefix}3> <${context}> . -<${idPrefix}3> <${domain}line> "1"^^ <${context}> . -<${idPrefix}3> <${domain}column> "1"^^ <${context}> . -<${idPrefix}2> <${domain}end> <${idPrefix}4> <${context}> . -<${idPrefix}4> <${domain}line> "1"^^ <${context}> . -<${idPrefix}4> <${domain}column> "1"^^ <${context}> . +<${idPrefix}1> <${domain}location> "1"^^ <${context}> . +<${idPrefix}1> <${domain}location> "1"^^ <${context}> . +<${idPrefix}1> <${domain}location> "1"^^ <${context}> . +<${idPrefix}1> <${domain}location> "1"^^ <${context}> . <${idPrefix}1> <${domain}lexeme> "1" <${context}> . -<${idPrefix}1> <${domain}type> "${RType.Number}" <${context}> . -<${idPrefix}1> <${domain}content> <${idPrefix}5> <${context}> . -<${idPrefix}5> <${domain}num> "1"^^ <${context}> . +<${idPrefix}1> <${domain}type> "RNumber" <${context}> . +<${idPrefix}1> <${domain}content> <${idPrefix}2> <${context}> . +<${idPrefix}2> <${domain}num> "1"^^ <${context}> . `) }) @@ -51,52 +48,32 @@ describe('Quads', withShell(shell => { const idPrefix = `${domain}${context}/` // ids are deterministic, so we can compare the quads await compareQuadsDfg('foo(x)', ` -<${idPrefix}0> <${domain}rootIds> "1" <${context}> . -<${idPrefix}0> <${domain}rootIds> "2" <${context}> . -<${idPrefix}0> <${domain}rootIds> "3" <${context}> . +<${idPrefix}0> <${domain}rootIds> "1"^^ <${context}> . +<${idPrefix}0> <${domain}rootIds> "3"^^ <${context}> . <${idPrefix}0> <${domain}vertices> <${idPrefix}1> <${context}> . <${idPrefix}1> <${domain}next> <${idPrefix}2> <${context}> . <${idPrefix}1> <${domain}tag> "use" <${context}> . -<${idPrefix}1> <${domain}id> "1" <${context}> . +<${idPrefix}1> <${domain}id> "1"^^ <${context}> . <${idPrefix}1> <${domain}name> "x" <${context}> . +<${idPrefix}1> <${domain}when> "always" <${context}> . <${idPrefix}1> <${domain}environment> <${idPrefix}3> <${context}> . <${idPrefix}3> <${domain}current> <${idPrefix}4> <${context}> . <${idPrefix}3> <${domain}level> "0"^^ <${context}> . -<${idPrefix}1> <${domain}when> "always" <${context}> . <${idPrefix}0> <${domain}vertices> <${idPrefix}2> <${context}> . -<${idPrefix}2> <${domain}next> <${idPrefix}5> <${context}> . -<${idPrefix}2> <${domain}tag> "use" <${context}> . -<${idPrefix}2> <${domain}id> "2" <${context}> . -<${idPrefix}2> <${domain}name> "unnamed-argument-2" <${context}> . -<${idPrefix}2> <${domain}environment> <${idPrefix}6> <${context}> . -<${idPrefix}6> <${domain}current> <${idPrefix}7> <${context}> . -<${idPrefix}6> <${domain}level> "0"^^ <${context}> . +<${idPrefix}2> <${domain}tag> "function-call" <${context}> . +<${idPrefix}2> <${domain}id> "3"^^ <${context}> . +<${idPrefix}2> <${domain}name> "foo" <${context}> . +<${idPrefix}2> <${domain}environment> <${idPrefix}5> <${context}> . +<${idPrefix}5> <${domain}current> <${idPrefix}6> <${context}> . +<${idPrefix}5> <${domain}level> "0"^^ <${context}> . +<${idPrefix}2> <${domain}onlyBuiltin> "false"^^ <${context}> . +<${idPrefix}2> <${domain}args> <${idPrefix}7> <${context}> . +<${idPrefix}7> <${domain}nodeId> "1"^^ <${context}> . <${idPrefix}2> <${domain}when> "always" <${context}> . -<${idPrefix}0> <${domain}vertices> <${idPrefix}5> <${context}> . -<${idPrefix}5> <${domain}tag> "function-call" <${context}> . -<${idPrefix}5> <${domain}id> "3" <${context}> . -<${idPrefix}5> <${domain}name> "foo" <${context}> . -<${idPrefix}5> <${domain}environment> <${idPrefix}8> <${context}> . -<${idPrefix}8> <${domain}current> <${idPrefix}9> <${context}> . -<${idPrefix}8> <${domain}level> "0"^^ <${context}> . -<${idPrefix}5> <${domain}when> "always" <${context}> . -<${idPrefix}5> <${domain}scope> "local" <${context}> . -<${idPrefix}5> <${domain}args> <${idPrefix}10> <${context}> . -<${idPrefix}10> <${domain}name> "unnamed-argument-2" <${context}> . -<${idPrefix}10> <${domain}scope> "local" <${context}> . -<${idPrefix}10> <${domain}nodeId> "2" <${context}> . -<${idPrefix}10> <${domain}used> "always" <${context}> . -<${idPrefix}0> <${domain}edges> <${idPrefix}11> <${context}> . -<${idPrefix}11> <${domain}next> <${idPrefix}12> <${context}> . -<${idPrefix}11> <${domain}from> "2" <${context}> . -<${idPrefix}11> <${domain}to> "1" <${context}> . -<${idPrefix}11> <${domain}type> "reads" <${context}> . -<${idPrefix}11> <${domain}when> "always" <${context}> . -<${idPrefix}0> <${domain}edges> <${idPrefix}12> <${context}> . -<${idPrefix}12> <${domain}from> "3" <${context}> . -<${idPrefix}12> <${domain}to> "2" <${context}> . -<${idPrefix}12> <${domain}type> "argument" <${context}> . -<${idPrefix}12> <${domain}when> "always" <${context}> . +<${idPrefix}0> <${domain}edges> <${idPrefix}8> <${context}> . +<${idPrefix}8> <${domain}from> "3"^^ <${context}> . +<${idPrefix}8> <${domain}to> "1"^^ <${context}> . +<${idPrefix}8> <${domain}type> "argument" <${context}> . `) }) })) diff --git a/test/functionality/util/random-tests.ts b/test/functionality/util/random-tests.ts index 524238595e..23312fcdd0 100644 --- a/test/functionality/util/random-tests.ts +++ b/test/functionality/util/random-tests.ts @@ -51,7 +51,7 @@ describe('Random', () => { ) } }) - it('Floating Point', function() { + it('floating point', function() { for(const length of [2.3, 42.42, Math.PI]) { assert.throws( () => randomString(length), diff --git a/test/functionality/util/range-tests.ts b/test/functionality/util/range-tests.ts index fedec3488a..6ac1c61c35 100644 --- a/test/functionality/util/range-tests.ts +++ b/test/functionality/util/range-tests.ts @@ -21,10 +21,7 @@ describe('Range', () => { for(const endColumn of pool) { assert.deepStrictEqual( rangeFrom(startLine, startColumn, endLine, endColumn), - { - start: { line: startLine, column: startColumn }, - end: { line: endLine, column: endColumn }, - }, + [startLine, startColumn, endLine, endColumn], 'with numbers' ) assert.deepStrictEqual( @@ -34,10 +31,7 @@ describe('Range', () => { `${endLine}`, `${endColumn}` ), - { - start: { line: startLine, column: startColumn }, - end: { line: endLine, column: endColumn }, - }, + [startLine, startColumn, endLine, endColumn], 'with strings' ) } @@ -95,10 +89,10 @@ describe('Range', () => { ) } - const assertIndependentOfOrder = ( + function assertIndependentOfOrder( expected: SourceRange, ...a: SourceRange[] - ): void => { + ): void { for(const permutation of allPermutations(a)) { assertMerged(expected, ...permutation) } diff --git a/tsconfig.json b/tsconfig.json index e799db8d72..7f9386d8ba 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -10,7 +10,8 @@ "skipLibCheck": true, "sourceMap": true, "outDir": "./dist/", - "strict": true + "strict": true, + "alwaysStrict": true }, "lib": [ "esnext", "dom" ], "exclude": [ diff --git a/wiki/Interface.md b/wiki/Interface.md index 7ead583366..e29c0e8bf0 100644 --- a/wiki/Interface.md +++ b/wiki/Interface.md @@ -18,7 +18,8 @@ Although far from being as detailed as the in-depth explanation of [*flowR*](htt - [Interfacing With the File System](#interfacing-with-the-file-system) - [⚒️ Writing Code](#️-writing-code) - [Interfacing With R by Using The `RShell`](#interfacing-with-r-by-using-the-rshell) - - [Slicing With The `SteppingSlicer`](#slicing-with-the-steppingslicer) + - [The Pipeline Executor](#the-pipeline-executor) + - [(Deprecated) Slicing With The `SteppingSlicer`](#deprecated-slicing-with-the-steppingslicer) - [Understanding the Steps](#understanding-the-steps) - [Benchmark the Slicer With The `BenchmarkSlicer`](#benchmark-the-slicer-with-the-benchmarkslicer) - [Augmenting the Normalization](#augmenting-the-normalization) @@ -957,15 +958,41 @@ With a shell object (let's call it `shell`), you can execute R code by using `RS Besides that, the command `RShell::tryToInjectHomeLibPath` may be of interest, as it enables all libraries available on the host system. -### Slicing With The `SteppingSlicer` +### The Pipeline Executor -The main class that represents *flowR*'s slicing is the `SteppingSlicer` class. With *flowR*, this allows you to slice code like this: +Once, in the beginning, *flowR* was meant to produce a dataflow graph merely to provide *program slices*. However, with continuous extensions the dataflow graph repeatedly proofs to be the interesting part. +With this, we restructured *flowR*'s *hardcoded* pipeline to be +far more flexible. Now, it can be theoretically extended or replaced with arbitrary steps, optional steps, and, what we call 'decorations' of these steps. In short, if you still "just want to slice", you can do it like this: ```typescript -const shell = new RShell() +const slicer = new PipelineExecutor(DEFAULT_SLICING_PIPELINE, { + shell: new RShell(), + request: requestFromInput('x <- 1\nx + 1'), + criterion: ['2@x'] +}) +const slice = await slicer.allRemainingSteps() +// console.log(slice.reconstruct.code) +``` +If you compare this, with what you would have done with the [old `SteppingSlicer`](#deprecated-slicing-with-the-steppingslicer) this essentially just requires you to replace the `SteppingSlicer` with the `PipelineExecutor` and to pass the `DEFAULT_SLICING_PIPELINE` as the first argument. +Similarly, the new `PipelineExecutor`... + +1. allows to investigate the results of all intermediate steps +2. can be executed step-by-step +3. can repeat steps (e.g., to calculate multiple slices on the same input) + +See the [documentation](https://code-inspect.github.io/flowr/doc/classes/src_core_pipeline-executor.PipelineExecutor.html) for more information. + +### (Deprecated) Slicing With The `SteppingSlicer` + +> 💡 Information\ +> Please note, that the `SteppingSlicer` has been deprecated with the *Dataflow v2* update, in favor of a far more general `PipelineExecutor` (which now backs the `SteppingSlicer` using a custom legacy-`Pipeline` to ensure that it behaves similar). + +The main class that represents *flowR*'s slicing is the `SteppingSlicer` class. With *flowR*, this allows you to slice code like this: + +```typescript const stepper = new SteppingSlicer({ - shell: shell, + shell: new RShell(), request: requestFromInput('x <- 1\nx + 1'), criterion: ['2@x'] }) @@ -994,7 +1021,7 @@ See the _documentation_ for more. The definition of all steps happens in [src/core/steps.ts](https://github.com/Code-Inspect/flowr/blob/main/src/core/steps.ts). Investigating the file provides you an overview of the slicing phases, as well as the functions that are called to perform the respective step. -The [`SteppingSlicer`](https://github.com/Code-Inspect/flowr/blob/main/src/core/slicer.ts) simply glues them together and passes the results of one step to the next. +The [`SteppingSlicer`](https://github.com/Code-Inspect/flowr/blob/main/src/core/stepping-slicer.ts) simply glues them together and passes the results of one step to the next. If you are interested in the type magic associated with the stepping slicers output type, refer to [src/core/output.ts](https://github.com/Code-Inspect/flowr/blob/main/src/core/output.ts). If you add a new step, make sure to modify all of these locations accordingly. diff --git a/wiki/Linting and Testing.md b/wiki/Linting and Testing.md index d591a03070..fdf891c008 100644 --- a/wiki/Linting and Testing.md +++ b/wiki/Linting and Testing.md @@ -126,7 +126,7 @@ We explain the most important workflows in the following: - running the [linter](#linting) and reporting its results - deploying the documentation to [GitHub Pages](https://code-inspect.github.io/flowr/doc/) - [release.yaml](../.github/workflows/release.yaml) is responsible for creating a new release, only to be run by repository owners. Furthermore, it adds the new docker image to [docker hub](https://hub.docker.com/r/eagleoutice/flowr). -- [check-broken-links.yaml](../.github/workflows/check-broken-links.yaml) repeatedly tests that all links are not dead! +- [broken-links-and-wiki.yaml](../.github/workflows/broken-links-and-wiki.yaml) repeatedly tests that all links are not dead! ## Linting