diff --git a/.githooks/commit-msg b/.githooks/commit-msg index 5ec8c63d1f..ddf8b80e68 100755 --- a/.githooks/commit-msg +++ b/.githooks/commit-msg @@ -4,9 +4,19 @@ message_file="$1" message=$(cat "$message_file") # Define the regular expression pattern -regex="^((\[(no|skip) ci\] )?(((feat|tests?|lint|refactor|ci|git|special|doc|typo|log|ts|fix|wip|docker|dep)(-fix|-fail)?(, ?)?)+: |Merge (remote-tracking )?branch|Auto-merging).+|\[release:(patch|minor|major)\] .+)" +regex="^((\[(no|skip) ci\] )?" # allow to skip ci steps if required +regex="$regex(Merge (remote-tracking )?branch|Auto-merging|" # allow merge commit messages + regex="$regex(" # allow multiple types + regex="$regex(feat|tests?|lint|refactor|ci|git|special|doc|typo|ts|fix|wip|docker|dep)" # all valid types + regex="$regex(-fix|-fail)?" # optional fail suffix + regex="$regex(\([^)]+\))?" # optional scope + regex="$regex(, ?)?" # optional comma between types + regex="$regex)+: " # at least one type is required +regex="$regex)" # allow arbitrary message (no end marker) +regex="$regex|\[release:(patch|minor|major)\] .+)" # alternatively, allow release commits (only to be done on main) if ! echo "$message" | grep -qE "$regex"; then - echo "[POLICY] Your message is not formatted correctly. Respect the regex: '$regex'!" + echo "[POLICY] Your message is not formatted correctly. Please respect the style defined in '.github/CONTRIBUTING.md'." + printf "[POLICY] Your message was (ignoring git comments):\n\n%s\n" "$(echo "$message" | grep -vE "^#")" exit 1 fi diff --git a/.githooks/pre-push b/.githooks/pre-push index e8a9974cbd..ccb1dfe394 100755 --- a/.githooks/pre-push +++ b/.githooks/pre-push @@ -2,9 +2,42 @@ set -eu -if ! [ -x "$(command -v npm)" ]; then - echo 'Error: npm not found. Make it available to the host shell (e.g., with "nvm use --lts").' - exit 2 + +find_npm_linux() { + export NPM_CMD="npm" + + if ! (type $NPM_CMD >> /dev/null); then + echo "npm not found, trying to make it available using nvm..." + if type nvm >> /dev/null; then + echo "nvm found, using it to install the latest lts node" + nvm use --lts + else + echo "nvm not found, trying to make it available using the nvm.sh" + # try to make it available based on https://github.com/typicode/husky/issues/912#issuecomment-817522060 + export NVM_DIR="$HOME/.nvm/nvm.sh" + . "$(dirname $NVM_DIR)/nvm.sh" + + export NVM_DIR="$HOME/.nvm" + a=$(nvm ls --no-colors | grep 'node') + v=$(echo "$a" | sed -E 's/.*\(-> ([^ ]+).*/\1/') + + export PATH="$NVM_DIR/versions/node/$v/bin:$PATH" + + if ! (type $NPM_CMD >> /dev/null); then + echo "no variant of npm or nvm found, trying to use the npm.cmd" + export NPM_CMD="npm.cmd" + fi + fi + fi +} + +if [ -z "${OSTYPE+x}" ]; then + find_npm_linux +else + case "$OSTYPE" in + msys*) export NPM_CMD="npm.cmd";; + *) find_npm_linux ;; + esac fi @@ -37,7 +70,7 @@ if [ -n "$(git status --porcelain)" ]; then fi echo "Linting project (local mode)..." -npm run lint-local +$NPM_CMD run lint-local # shellcheck disable=SC2124 # we want the argument splitting diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 3bf15ef39e..b7410fe2a5 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -33,8 +33,8 @@ If you have any questions, refer to the [wiki](https://github.com/Code-Inspect/f ## Commit Messages -We structure our commit messages (enforced by our git-hooks) using the format `: `. -Currently, the following types are at your disposal (more may have been or are still available, but please restrict yourself to the following): +We structure our commit messages (enforced by our git-hooks) using the format `(): ` (with `()` being optional). +Currently, the following `` are at your disposal (more may have been or are still available, but please restrict yourself to the following): | name | description | @@ -49,7 +49,6 @@ Currently, the following types are at your disposal (more may have been or are s | `lint` | Adapted or updated linter-issues. | | `doc` | Updated the documentation of *flowR*. | | `typo` | Dealt with a small typo/a grammatical mistake. | -| `log` | Improved or updated the logging of *flowR*. | | `ts` | Performed something typescript-specific (e.g., reconfigured the `tsconfig.json`). | | `wip` | *Use this only in combination with another type*. It marks the commit to be unfinished. | | `special` | *Use this only if none of the other categories apply*. Explain the details in your commit message. | @@ -65,9 +64,12 @@ Although you can give the same type repeatedly - if you think you should, please With this, the artificial message -> `feat, test-fix: Support for branching in dataflow, fixed branching-test` +> `feat, test-fix: Support for branching in dataflow, fixed branching test` represents the addition of a new feature and the fix of a corresponding test. +With scopes, it could look like this: + +> `feat, test-fix(dataflow): Support branching & fixed branching test` To skip the `ci`, you can prefix the commit message with `[skip ci]`. diff --git a/package.json b/package.json index 081db72a22..9835e2d1bc 100644 --- a/package.json +++ b/package.json @@ -18,7 +18,7 @@ "lint": "npm run license-compat -- --summary && eslint src/ test/", "license-compat": "license-checker --onlyAllow 'MIT;MIT OR X11;GPLv2;LGPL;GNUGPL;ISC;Apache-2.0;FreeBSD;BSD-2-Clause;clearbsd;ModifiedBSD;BSD-3-Clause;Python-2.0;Unlicense;WTFPL;CC-BY-4.0;CC-BY-3.0;CC0-1.0;0BSD'", "doc": "typedoc", - "test": "nyc --no-clean mocha --require ts-node/register --timeout 60000 \"test/**/*.spec.ts\"", + "test": "nyc --source-map --produce-source-map mocha --require ts-node/register --timeout 60000 \"test/**/*.spec.ts\"", "performance-test": "func() { cd test/performance/ && bash run-all-suites.sh $1 $2; cd ../../; }; func", "test-full": "npm run test -- --test-installation" }, @@ -29,14 +29,13 @@ "all": true, "per-file": true, "check-coverage": false, - "skip-full": false, + "skip-full": true, "lines": 70, "extension": [ ".ts" ], "include": "src/**/*.ts", "reporter": [ - "html", "text", "lcov", "cobertura" @@ -173,7 +172,7 @@ "check-file/filename-naming-convention": [ "error", { - "**/*.ts": "?([A-Z])+([a-z])*((-|.)?([A-Z])+([a-z]))" + "**/*.ts": "?([0-9]+-)?([A-Z])+([a-z])*((-|.)?([A-Z])+([a-z]))" } ], "check-file/folder-match-with-fex": [ diff --git a/src/benchmark/slicer.ts b/src/benchmark/benchmark-slicer.ts similarity index 100% rename from src/benchmark/slicer.ts rename to src/benchmark/benchmark-slicer.ts diff --git a/src/benchmark/index.ts b/src/benchmark/index.ts index 78a6cd9673..86fb3719b1 100644 --- a/src/benchmark/index.ts +++ b/src/benchmark/index.ts @@ -1,3 +1,3 @@ export * from './stats' -export * from './slicer' +export * from './benchmark-slicer' export * from './stopwatch' diff --git a/src/cli/repl/core.ts b/src/cli/repl/core.ts index 0cbaf159d9..d3076279c5 100644 --- a/src/cli/repl/core.ts +++ b/src/cli/repl/core.ts @@ -3,7 +3,7 @@ * * @module */ -import { RShell } from '../../r-bridge' +import { RShell, RShellReviveOptions } from '../../r-bridge' import readline from 'readline/promises' import { bold } from '../../statistics' import { prompt } from './prompt' @@ -76,7 +76,7 @@ export async function replProcessAnswer(output: ReplOutput, expr: string, shell: * For the execution, this function makes use of {@link replProcessAnswer} * */ -export async function repl(shell = new RShell({ revive: 'always' }), rl = readline.createInterface(DEFAULT_REPL_READLINE_CONFIGURATION), output = standardReplOutput) { +export async function repl(shell = new RShell({ revive: RShellReviveOptions.Always }), rl = readline.createInterface(DEFAULT_REPL_READLINE_CONFIGURATION), output = standardReplOutput) { // the incredible repl :D, we kill it with ':quit' // eslint-disable-next-line no-constant-condition,@typescript-eslint/no-unnecessary-condition diff --git a/src/cli/repl/server/connection.ts b/src/cli/repl/server/connection.ts index 47c6cb6741..a623c7be35 100644 --- a/src/cli/repl/server/connection.ts +++ b/src/cli/repl/server/connection.ts @@ -1,4 +1,4 @@ -import { LAST_STEP, printStepResult, SteppingSlicer, StepResults, STEPS_PER_SLICE } from '../../../core' +import { LAST_STEP, SteppingSlicer, StepResults, STEPS_PER_SLICE } from '../../../core' import { DEFAULT_XML_PARSER_CONFIG, NormalizedAst, RShell, XmlParserConfig } from '../../../r-bridge' import { sendMessage } from './send' import { answerForValidationError, validateBaseMessageFormat, validateMessage } from './validate' @@ -24,8 +24,11 @@ import { cfg2quads, ControlFlowInformation, extractCFG } from '../../../util/cfg import { defaultQuadIdGenerator, QuadSerializationConfiguration } from '../../../util/quads' import { deepMergeObject } from '../../../util/objects' import { LogLevel } from '../../../util/log' -import { StepOutputFormat } from '../../../core/print/print' +import { printStepResult, StepOutputFormat } from '../../../core/print/print' import { DataflowInformation } from '../../../dataflow/internal/info' +import { PARSE_WITH_R_SHELL_STEP } from '../../../core/steps/all/core/00-parse' +import { NORMALIZE } from '../../../core/steps/all/core/10-normalize' +import { LEGACY_STATIC_DATAFLOW } from '../../../core/steps/all/core/20-dataflow' /** * Each connection handles a single client, answering to its requests. @@ -134,9 +137,9 @@ export class FlowRServerConnection { id: message.id, cfg: cfg ? cfg2quads(cfg, config()) : undefined, results: { - parse: await printStepResult('parse', results.parse as string, StepOutputFormat.RdfQuads, config(), parseConfig), - normalize: await printStepResult('normalize', results.normalize as NormalizedAst, StepOutputFormat.RdfQuads, config()), - dataflow: await printStepResult('dataflow', results.dataflow as DataflowInformation, StepOutputFormat.RdfQuads, config()), + parse: await printStepResult(PARSE_WITH_R_SHELL_STEP, results.parse as string, StepOutputFormat.RdfQuads, config(), parseConfig), + normalize: await printStepResult(NORMALIZE, results.normalize as NormalizedAst, StepOutputFormat.RdfQuads, config()), + dataflow: await printStepResult(LEGACY_STATIC_DATAFLOW, results.dataflow as DataflowInformation, StepOutputFormat.RdfQuads, config()), } }) } else { diff --git a/src/cli/statistics-helper-app.ts b/src/cli/statistics-helper-app.ts index f7797b0ef1..8b483f37c7 100644 --- a/src/cli/statistics-helper-app.ts +++ b/src/cli/statistics-helper-app.ts @@ -12,9 +12,11 @@ import { create } from 'tar' import fs from 'fs' import { guard } from '../util/assert' import { retrieveArchiveName } from './common/features' -import { printStepResult } from '../core' -import { StepOutputFormat } from '../core/print/print' +import { printStepResult, StepOutputFormat } from '../core/print/print' import { date2string } from '../util/time' +import { PARSE_WITH_R_SHELL_STEP } from '../core/steps/all/core/00-parse' +import { NORMALIZE } from '../core/steps/all/core/10-normalize' +import { LEGACY_STATIC_DATAFLOW } from '../core/steps/all/core/20-dataflow' // apps should never depend on other apps when forking (otherwise, they are "run" on load :/) @@ -89,9 +91,9 @@ async function getStatsForSingleFile() { if(options['dump-json']) { const [, output] = [...stats.outputs.entries()][0] const cfg = extractCFG(output.normalize) - statisticsFileProvider.append('output-json', 'parse', await printStepResult('parse', output.parse, StepOutputFormat.Json)) - statisticsFileProvider.append('output-json', 'normalize', await printStepResult('normalize', output.normalize, StepOutputFormat.Json)) - statisticsFileProvider.append('output-json', 'dataflow', await printStepResult('dataflow', output.dataflow, StepOutputFormat.Json)) + statisticsFileProvider.append('output-json', 'parse', await printStepResult(PARSE_WITH_R_SHELL_STEP, output.parse, StepOutputFormat.Json)) + statisticsFileProvider.append('output-json', 'normalize', await printStepResult(NORMALIZE, output.normalize, StepOutputFormat.Json)) + statisticsFileProvider.append('output-json', 'dataflow', await printStepResult(LEGACY_STATIC_DATAFLOW, output.dataflow, StepOutputFormat.Json)) statisticsFileProvider.append('output-json', 'cfg', JSON.stringify(cfg, jsonReplacer)) } diff --git a/src/core/index.ts b/src/core/index.ts index 84b443c457..b939e826fb 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -1,4 +1,4 @@ -export { SteppingSlicer } from './slicer' -export * from './steps' -export * from './input' -export * from './output' +export { SteppingSlicer } from './stepping-slicer' +export * from './steps/steps' +export * from './steps/input' +export * from './steps/output' diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts new file mode 100644 index 0000000000..7bb6d3ae4d --- /dev/null +++ b/src/core/pipeline-executor.ts @@ -0,0 +1,255 @@ +import { PipelineStepName, PipelineStepStage } from './steps' +import { guard } from '../util/assert' +import { + Pipeline, + PipelineInput, + PipelineOutput, + PipelinePerRequestInput, + PipelineStepNames, + PipelineStepOutputWithName +} from './steps/pipeline' + +/** + * The pipeline executor allows to execute arbitrary {@link Pipeline|pipelines} in a step-by-step fashion. + * If you are not yet in the possession of a {@link Pipeline|pipeline}, you can use the {@link createPipeline} function + * to create one for yourself, based on the steps that you want to execute. + * + * Those steps are split into two phases or "stages" (which is the name that we will use in the following), represented + * by the {@link PipelineStepStage} type. These allow us to separate things that have to be done + * once per-file, e.g., actually parsing the AST, from those, that we need to repeat 'once per request' (whatever this + * request may be). In other words, what can be cached between operations and what can not. + * + * Furthermore, this executor follows an iterable fashion to be *as flexible as possible* + * (e.g., to be instrumented with measurements). So, you can use the pipeline executor like this: + * + * ```ts + * const stepper = new PipelineExecutor( ... ) + * while(stepper.hasNextStep()) { + * await stepper.nextStep() + * } + * + * stepper.switchToRequestStage() + * + * while(stepper.hasNextStep()) { + * await stepper.nextStep() + * } + * + * const result = stepper.getResults() + * ``` + * + * Of course, you might think, that this is rather overkill if you simply want to receive the result. + * And this is true. Therefore, if you do not want to perform some kind of magic in-between steps, you can use the + * **{@link allRemainingSteps}** function like this: + * + * ```ts + * const stepper = new PipelineExecutor( ... ) + * const result = await stepper.allRemainingSteps() + * ``` + * + * As the name suggest, you can combine this name with previous calls to {@link nextStep} to only execute the remaining + * steps in case, for whatever reason you only want to instrument some steps. + * + * By default, the {@link PipelineExecutor} does not offer an automatic way to repeat requests (mostly to prevent accidental errors). + * However, you can use the + * **{@link updateRequest}** function to reset the request steps and re-execute them for a new request. This allows something like the following: + * + * ```ts + * const stepper = new PipelineExecutor( ... ) + * const result = await stepper.allRemainingSteps() + * + * stepper.updateRequest( ... ) + * const result2 = await stepper.allRemainingSteps() + * ``` + * + * **Example - Slicing With the Pipeline Executor**: + * + * Suppose, you want to... you know _slice_ a file (which was, at one point the origin of flowR), then you can + * either create a pipeline yourself with the respective steps, or you can use the {@link DEFAULT_SLICING_PIPELINE} (and friends). + * With it, slicing essentially becomes 'easy-as-pie': + * + * ```ts + * const slicer = new PipelineExecutor(DEFAULT_SLICING_PIPELINE, { + * shell: new RShell(), + * // of course, the criterion and request given here are just examples, you can use whatever you want to slice! + * criterion: ['2@b'], + * request: requestFromInput('b <- 3; x <- 5\ncat(b)'), + * }) + * const result = await slicer.allRemainingSteps() + * ``` + * + * But now, we want to slice for `x` in the first line as well! We can do that by adding: + * + * ```ts + * stepper.updateRequest({ criterion: ['1@x'] }) + * const result2 = await stepper.allRemainingSteps() + * ``` + * + * @note Even though using the pipeline executor introduces a small performance overhead, we consider + * it to be the baseline for performance benchmarking. It may very well be possible to squeeze out a little bit more by + * directly constructing the steps in the right order. However, we consider this to be negligible when compared with the time required + * for, for example, the dataflow analysis of larger files. + * + * @see PipelineExecutor#allRemainingSteps + * @see PipelineExecutor#nextStep + */ +export class PipelineExecutor

{ + private readonly pipeline: P + private readonly length: number + + private input: PipelineInput

+ private output: PipelineOutput

= {} as PipelineOutput

+ private currentExecutionStage = PipelineStepStage.OncePerFile + private stepCounter = 0 + + /** + * Construct a new pipeline executor. + * The required additional input is specified by the {@link IPipelineStep#requiredInput|required input configuration} of each step in the `pipeline`. + * + * @param pipeline - The {@link Pipeline} to execute, probably created with {@link createPipeline}. + * @param input - External {@link PipelineInput|configuration and input} required to execute the given pipeline. + */ + constructor(pipeline: P, input: PipelineInput

) { + this.pipeline = pipeline + this.length = pipeline.order.length + this.input = input + } + + /** + * Retrieve the current {@link PipelineStepStage|stage} the pipeline executor is in. + * + * @see currentExecutionStage + * @see switchToRequestStage + * @see PipelineStepStage + */ + public getCurrentStage(): PipelineStepStage { + return this.currentExecutionStage + } + + /** + * Switch to the next {@link PipelineStepStage|stage} of the pipeline executor. + * + * This will fail if either a step change is currently not valid (as not all steps have been executed), + * or if there is no next stage (i.e., the pipeline is already completed or in the last stage). + * + * @see PipelineExecutor + * @see getCurrentStage + */ + public switchToRequestStage(): void { + guard(this.stepCounter === this.pipeline.firstStepPerRequest, 'First need to complete all steps before switching') + guard(this.currentExecutionStage === PipelineStepStage.OncePerFile, 'Cannot switch to next stage, already in per-request stage.') + this.currentExecutionStage = PipelineStepStage.OncePerRequest + } + + + public getResults(intermediate?:false): PipelineOutput

+ public getResults(intermediate: true): Partial> + public getResults(intermediate: boolean): PipelineOutput

| Partial> + /** + * Returns the results of the pipeline. + * + * @param intermediate - Normally you can only receive the results *after* the stepper completed the step of interested. + * However, if you pass `true` to this parameter, you can also receive the results *before* the {@link PipelineExecutor|pipeline executor} + * completed, although the typing system then can not guarantee which of the steps have already happened. + */ + public getResults(intermediate = false): PipelineOutput

| Partial> { + guard(intermediate || this.stepCounter >= this.length, 'Without the intermediate flag, the pipeline must be completed before providing access to the results.') + return this.output + } + + /** + * Returns true only if + * 1) there are more {@link IPipelineStep|steps} to-do for the current {@link PipelineStepStage|stage} and + * 2) we have not yet reached the end of the {@link Pipeline|pipeline}. + */ + public hasNextStep(): boolean { + return (this.stepCounter < this.length && this.currentExecutionStage !== PipelineStepStage.OncePerFile) + || this.stepCounter < this.pipeline.firstStepPerRequest + } + + /** + * Execute the next {@link IPipelineStep|step} and return the name of the {@link IPipelineStep|step} that was executed, + * so you can guard if the {@link IPipelineStep|step} differs from what you are interested in. + * Furthermore, it returns the {@link IPipelineStep|step's} result. + * + * @param expectedStepName - A safeguard if you want to retrieve the result. + * If given, it causes the execution to fail if the next step is not the one you expect. + * + * _Without `expectedStepName`, please refrain from accessing the result, as you have no safeguards if the pipeline changes._ + */ + public async nextStep(expectedStepName?: PassedName): Promise<{ + name: typeof expectedStepName extends undefined ? PipelineStepName : PassedName + result: typeof expectedStepName extends undefined ? unknown : PipelineStepOutputWithName + }> { + const [step, result] = this._doNextStep(expectedStepName) + const awaitedResult = await result + + this.output[step as PipelineStepNames

] = awaitedResult + this.stepCounter++ + + return { name: step as PassedName, result: awaitedResult } + } + + private _doNextStep(expectedStepName: Readonly): [ + step: PipelineStepName, + result: Promise> + ] { + const step = this.pipeline.steps.get(this.pipeline.order[this.stepCounter]) + guard(step !== undefined, () => `Cannot execute next step, step ${this.pipeline.order[this.stepCounter]} does not exist.`) + + if(expectedStepName !== undefined) { + guard(step.name === expectedStepName, () => `Cannot execute next step, expected step ${JSON.stringify(expectedStepName)} but got ${step.name}.`) + } + + return [step.name, step.processor(this.output, this.input) as unknown as PipelineStepOutputWithName] + } + + /** + * This only makes sense if you have already run a request and want to re-use the per-file results for a new one. + * (or if for whatever reason you did not pass information for the pipeline with the constructor). + * + * @param newRequestData - Data for the new request + */ + public updateRequest(newRequestData: PipelinePerRequestInput

): void { + const requestStep = this.pipeline.firstStepPerRequest + guard(this.stepCounter >= requestStep, 'Cannot reset request prior to once-per-request stage') + this.input = { + ...(this.input as object), + ...newRequestData + } as PipelineInput

+ this.stepCounter = requestStep + // clear the results for all steps with an index >= firstStepPerRequest, this is more of a sanity check + for(let i = requestStep; i < this.length; i++) { + this.output[this.pipeline.order[i] as PipelineStepNames

] = undefined as unknown as PipelineStepOutputWithName + } + } + + public async allRemainingSteps(canSwitchStage: false): Promise>> + public async allRemainingSteps(canSwitchStage?: true): Promise> + public async allRemainingSteps(canSwitchStage: boolean): Promise | Partial>> + /** + * Execute all remaining steps and automatically call {@link switchToRequestStage} if necessary. + * @param canSwitchStage - If true, automatically switch to the request stage if necessary + * (i.e., this is what you want if you have never executed {@link nextStep} and you want to execute *all* steps). + * However, passing false allows you to only execute the steps of the 'once-per-file' stage (i.e., the steps that can be cached). + * + * @note There is a small type difference if you pass 'false' and already have manually switched to the 'once-per-request' stage. + * Because now, the results of these steps are no longer part of the result type (although they are still included). + * In such a case, you may be better off with simply passing 'true' as the function will detect that the stage is already switched. + * We could solve this type problem by separating the {@link PipelineExecutor} class into two for each stage, + * but this would break the improved readability and unified handling of the executor that I wanted to achieve with this class. + */ + public async allRemainingSteps(canSwitchStage = true): Promise | Partial>> { + while(this.hasNextStep()) { + await this.nextStep() + } + + if(canSwitchStage && this.stepCounter < this.length && this.currentExecutionStage === PipelineStepStage.OncePerFile) { + this.switchToRequestStage() + while(this.hasNextStep()) { + await this.nextStep() + } + } + + return this.stepCounter < this.length ? this.getResults(true) : this.getResults() + } +} diff --git a/src/core/print/print.ts b/src/core/print/print.ts index 86923338ef..3d52d44183 100644 --- a/src/core/print/print.ts +++ b/src/core/print/print.ts @@ -1,4 +1,6 @@ -import { StepFunction } from '../steps' +import { IPipelineStep, StepProcessingFunction } from '../steps' +import { TailOfArray } from '../../util/arrays' +import { guard } from '../../util/assert' /** * Defines the output format of a step that you are interested in. @@ -39,7 +41,7 @@ export const enum StepOutputFormat { /** * Helper function to support the {@link Internal} format, as it is simply returning the input. * - * @see IStepPrinter + * @see IPipelineStepPrinter */ export function internalPrinter(input: Input): Input { return input @@ -47,8 +49,28 @@ export function internalPrinter(input: Input): Input { /** * A mapping function that maps the result of a step (i.e., the dataflow graph) - * to another representation (linked by {@link StepOutputFormat} in an {@link IStep}). + * to another representation (linked by {@link StepOutputFormat} in an {@link IPipelineStep}). + * + * For the internal format, refer to {@link InternalStepPrinter} as a shorthand. */ -export type IStepPrinter = +export type IPipelineStepPrinter = Format extends StepOutputFormat.Internal ? (input: Awaited>) => Awaited> : (input: Awaited>, ...additional: AdditionalInput) => Promise | string + +export type InternalStepPrinter = IPipelineStepPrinter + +/** + * For a `step` of the given name, which returned the given `data`. Convert that data into the given `format`. + * Depending on your step and the format this may require `additional` inputs. + */ +export function printStepResult< + Step extends IPipelineStep, + Processor extends Step['processor'], + Format extends Exclude & number, + Printer extends Step['printer'][Format], + AdditionalInput extends TailOfArray>, +>(step: Step, data: Awaited>, format: Format, ...additional: AdditionalInput): Promise { + const printer = step.printer[format] as IPipelineStepPrinter | undefined + guard(printer !== undefined, `printer for ${step.name} does not support ${String(format)}`) + return printer(data, ...additional) as Promise +} diff --git a/src/core/slicer.ts b/src/core/stepping-slicer.ts similarity index 54% rename from src/core/slicer.ts rename to src/core/stepping-slicer.ts index 375a6b5ed6..0b3c88f2c7 100644 --- a/src/core/slicer.ts +++ b/src/core/stepping-slicer.ts @@ -1,23 +1,32 @@ import { - NormalizedAst, IdGenerator, - NoInfo, - RParseRequest, - RShell, - XmlParserHooks -} from '../r-bridge' -import { - executeSingleSubStep, LAST_PER_FILE_STEP, LAST_STEP, - StepRequired, STEPS, - STEPS_PER_FILE, + LAST_PER_FILE_STEP, LAST_STEP, STEPS_PER_SLICE, - StepName, StepResult + SteppingSlicerInput, + StepResults, + StepName, PipelineStepStage, PipelineStepName } from './steps' -import { guard } from '../util/assert' -import { SliceResult, SlicingCriteria } from '../slicing' -import { DeepPartial } from 'ts-essentials' -import { SteppingSlicerInput } from './input' -import { StepResults } from './output' -import { DataflowInformation } from '../dataflow/internal/info' +import { SlicingCriteria } from '../slicing' +import { createPipeline, Pipeline, PipelineOutput, PipelineStepOutputWithName } from './steps/pipeline' +import { PARSE_WITH_R_SHELL_STEP } from './steps/all/core/00-parse' +import { NORMALIZE } from './steps/all/core/10-normalize' +import { LEGACY_STATIC_DATAFLOW } from './steps/all/core/20-dataflow' +import { STATIC_SLICE } from './steps/all/static-slicing/30-slice' +import { NAIVE_RECONSTRUCT } from './steps/all/static-slicing/40-reconstruct' +import { PipelineExecutor } from './pipeline-executor' + +const legacyPipelines = { + // brrh, but who cares, it is legacy! + 'parse': createPipeline(PARSE_WITH_R_SHELL_STEP), + 'normalize': createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE), + 'dataflow': createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, LEGACY_STATIC_DATAFLOW), + 'slice': createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, LEGACY_STATIC_DATAFLOW, STATIC_SLICE), + 'reconstruct': createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, LEGACY_STATIC_DATAFLOW, STATIC_SLICE, NAIVE_RECONSTRUCT) +} +type LegacyPipelineType = typeof legacyPipelines[InterestedIn] + +function getLegacyPipeline(interestedIn: StepName): Pipeline { + return legacyPipelines[interestedIn] +} /** * This is ultimately the root of flowR's static slicing procedure. @@ -76,46 +85,25 @@ import { DataflowInformation } from '../dataflow/internal/info' * for, for example, the dataflow analysis. * * @see retrieveResultOfStep - * @see SteppingSlicer#doNextStep * @see StepName */ -export class SteppingSlicer { - public static readonly maximumNumberOfStepsPerFile = Object.keys(STEPS_PER_FILE).length - public static readonly maximumNumberOfStepsPerSlice = SteppingSlicer.maximumNumberOfStepsPerFile + Object.keys(STEPS_PER_SLICE).length - - private readonly shell: RShell - private readonly stepOfInterest: InterestedIn - private readonly request: RParseRequest - private readonly hooks?: DeepPartial - private readonly getId?: IdGenerator - - private criterion?: SlicingCriteria - - private results = {} as Record - - private stage: StepRequired = 'once-per-file' - private stepCounter = 0 - private reachedWanted = false +export class SteppingSlicer { + private executor: PipelineExecutor> /** * Create a new stepping slicer. For more details on the arguments please see {@link SteppingSlicerInput}. */ constructor(input: SteppingSlicerInput) { - this.shell = input.shell - this.request = input.request - this.hooks = input.hooks - this.getId = input.getId - this.stepOfInterest = (input.stepOfInterest ?? LAST_STEP) as InterestedIn - this.criterion = input.criterion + this.executor = new PipelineExecutor(getLegacyPipeline(input.stepOfInterest ?? LAST_STEP), input) as PipelineExecutor> } /** * Retrieve the current stage the stepping slicer is in. - * @see StepRequired + * @see PipelineStepStage * @see switchToSliceStage */ - public getCurrentStage(): StepRequired { - return this.stage + public getCurrentStage(): PipelineStepStage { + return this.executor.getCurrentStage() } /** @@ -124,14 +112,12 @@ export class SteppingSlicer - public getResults(intermediate: true): Partial> + public getResults(intermediate?:false): PipelineOutput> + public getResults(intermediate: true): Partial>> /** * Returns the result of the step of interest, as well as the results of all steps before it. * @@ -139,19 +125,15 @@ export class SteppingSlicer | Partial> { - guard(intermediate || this.reachedWanted, 'Before reading the results, we need to reach the step we are interested in') - return this.results as StepResults + public getResults(intermediate = false): PipelineOutput> | Partial>> { + return this.executor.getResults(intermediate) } /** * Returns true only if 1) there are more steps to-do for the current stage and 2) we have not yet reached the step we are interested in */ public hasNextStep(): boolean { - return !this.reachedWanted && (this.stage === 'once-per-file' ? - this.stepCounter < SteppingSlicer.maximumNumberOfStepsPerFile - : this.stepCounter < SteppingSlicer.maximumNumberOfStepsPerSlice - ) + return this.executor.hasNextStep() } /** @@ -162,65 +144,11 @@ export class SteppingSlicer(expectedStepName?: PassedName): Promise<{ - name: typeof expectedStepName extends undefined ? StepName : PassedName - result: typeof expectedStepName extends undefined ? unknown : StepResult> + public async nextStep(expectedStepName?: PassedName): Promise<{ + name: typeof expectedStepName extends undefined ? PipelineStepName : PassedName + result: typeof expectedStepName extends undefined ? unknown : PipelineStepOutputWithName, Exclude> }> { - guard(this.hasNextStep(), 'No more steps to do') - - const guardStep = this.getGuardStep(expectedStepName) - - const { step, result } = await this.doNextStep(guardStep) - - this.results[step] = result - this.stepCounter += 1 - if(this.stepOfInterest === step) { - this.reachedWanted = true - } - - return { name: step as PassedName, result: result as StepResult } - } - - private getGuardStep(expectedStepName: StepName | undefined) { - return expectedStepName === undefined ? - (name: K): K => name - : - (name: K): K => { - guard(expectedStepName === name, `Expected step ${expectedStepName} but got ${name}`) - return name - } - } - - private async doNextStep(guardStep: (name: K) => K) { - let step: StepName - let result: unknown - - switch(this.stepCounter) { - case 0: - step = guardStep('parse') - result = await executeSingleSubStep(step, this.request, this.shell) - break - case 1: - step = guardStep('normalize') - result = await executeSingleSubStep(step, this.results.parse as string, await this.shell.tokenMap(), this.hooks, this.getId) - break - case 2: - step = guardStep('dataflow') - result = executeSingleSubStep(step, this.results.normalize as NormalizedAst) - break - case 3: - guard(this.criterion !== undefined, 'Cannot decode criteria without a criterion') - step = guardStep('slice') - result = executeSingleSubStep(step, (this.results.dataflow as DataflowInformation).graph, this.results.normalize as NormalizedAst, this.criterion) - break - case 4: - step = guardStep('reconstruct') - result = executeSingleSubStep(step, this.results.normalize as NormalizedAst, (this.results.slice as SliceResult).result) - break - default: - throw new Error(`Unknown step ${this.stepCounter}, reaching this should not happen!`) - } - return { step, result } + return this.executor.nextStep(expectedStepName) } /** @@ -230,14 +158,8 @@ export class SteppingSlicer= SteppingSlicer.maximumNumberOfStepsPerFile , 'Cannot reset slice prior to once-per-slice stage') - this.criterion = newCriterion - this.stepCounter = SteppingSlicer.maximumNumberOfStepsPerFile - this.results.slice = undefined - this.results.reconstruct = undefined - if(this.stepOfInterest === 'slice' || this.stepOfInterest === 'reconstruct') { - this.reachedWanted = false - } + // @ts-expect-error -- it is legacy + this.executor.updateRequest({ criterion: newCriterion }) } public async allRemainingSteps(canSwitchStage: false): Promise>> @@ -255,15 +177,6 @@ export class SteppingSlicer | Partial>> { - while(this.hasNextStep()) { - await this.nextStep() - } - if(canSwitchStage && !this.reachedWanted && this.stage === 'once-per-file') { - this.switchToSliceStage() - while(this.hasNextStep()) { - await this.nextStep() - } - } - return this.reachedWanted ? this.getResults() : this.getResults(true) + return this.executor.allRemainingSteps(canSwitchStage) } } diff --git a/src/core/steps.ts b/src/core/steps.ts deleted file mode 100644 index 0fbb1e88e3..0000000000 --- a/src/core/steps.ts +++ /dev/null @@ -1,158 +0,0 @@ -/** - * This file defines *all* steps of the slicing process and the data they require. - * - * Note, that the order of elements here also describes the *desired* order of their desired execution for readability. - * However, it is the {@link SteppingSlicer} which controls the order of execution and the steps required to achieve a given result. - * - * If you add a new step, you have to (at least) update the {@link SteppingSlicer} as well as the corresponding type predicate {@link SteppingSlicerInput}. - * Furthermore, if your step is the new *last* step, please update {@link LAST_STEP}. - * - * Please note that the combination of `satisfies` and `as` seems to be required. - * With `satisfies` we make sure that the respective element has all the keys it requires, and the `as` force the type to be exactly the given one - * - * @module - */ - -import { MergeableRecord } from '../util/objects' -import { - normalize, - retrieveXmlFromRCode -} from '../r-bridge' -import { produceDataFlowGraph } from '../dataflow' -import { reconstructToCode, staticSlicing } from '../slicing' -import { internalPrinter, IStepPrinter, StepOutputFormat } from './print/print' -import { - normalizedAstToJson, - normalizedAstToQuads, - printNormalizedAstToMermaid, - printNormalizedAstToMermaidUrl -} from './print/normalize-printer' -import { guard } from '../util/assert' -import { - dataflowGraphToJson, - dataflowGraphToMermaid, - dataflowGraphToMermaidUrl, - dataflowGraphToQuads -} from './print/dataflow-printer' -import { parseToQuads } from './print/parse-printer' - -/** - * This represents close a function that we know completely nothing about. - * Nevertheless, this is the basis of what a step processor should look like. - */ -export type StepFunction = (...args: never[]) => unknown -/** - * This represents the required execution frequency of a step. - */ -export type StepRequired = 'once-per-file' | 'once-per-slice' - - -/** - * Defines what is to be known of a single step in the slicing process. - */ -export interface IStep< - Fn extends StepFunction, -> extends MergeableRecord { - /** Human-readable description of this step */ - description: string - /** The main processor that essentially performs the logic of this step */ - processor: (...input: Parameters) => ReturnType - /* does this step has to be repeated for each new slice or can it be performed only once in the initialization */ - required: StepRequired - printer: { - [K in StepOutputFormat]?: IStepPrinter - } & { - // we always want to have the internal printer - [StepOutputFormat.Internal]: IStepPrinter - } -} - - -export const STEPS_PER_FILE = { - 'parse': { - description: 'Parse the given R code into an AST', - processor: retrieveXmlFromRCode, - required: 'once-per-file', - printer: { - [StepOutputFormat.Internal]: internalPrinter, - [StepOutputFormat.Json]: text => text, - [StepOutputFormat.RdfQuads]: parseToQuads - } - } satisfies IStep, - 'normalize': { - description: 'Normalize the AST to flowR\'s AST (first step of the normalization)', - processor: normalize, - required: 'once-per-file', - printer: { - [StepOutputFormat.Internal]: internalPrinter, - [StepOutputFormat.Json]: normalizedAstToJson, - [StepOutputFormat.RdfQuads]: normalizedAstToQuads, - [StepOutputFormat.Mermaid]: printNormalizedAstToMermaid, - [StepOutputFormat.MermaidUrl]: printNormalizedAstToMermaidUrl - } - } satisfies IStep, - 'dataflow': { - description: 'Construct the dataflow graph', - processor: produceDataFlowGraph, - required: 'once-per-file', - printer: { - [StepOutputFormat.Internal]: internalPrinter, - [StepOutputFormat.Json]: dataflowGraphToJson, - [StepOutputFormat.RdfQuads]: dataflowGraphToQuads, - [StepOutputFormat.Mermaid]: dataflowGraphToMermaid, - [StepOutputFormat.MermaidUrl]: dataflowGraphToMermaidUrl - } - } satisfies IStep -} as const - -export const STEPS_PER_SLICE = { - 'slice': { - description: 'Calculate the actual static slice from the dataflow graph and the given slicing criteria', - processor: staticSlicing, - required: 'once-per-slice', - printer: { - [StepOutputFormat.Internal]: internalPrinter - } - } satisfies IStep, - 'reconstruct': { - description: 'Reconstruct R code from the static slice', - processor: reconstructToCode, - required: 'once-per-slice', - printer: { - [StepOutputFormat.Internal]: internalPrinter - } - } satisfies IStep -} as const - -export const STEPS = { ...STEPS_PER_FILE, ...STEPS_PER_SLICE } as const -export const LAST_PER_FILE_STEP = 'dataflow' as const -export const LAST_STEP = 'reconstruct' as const - -export type StepName = keyof typeof STEPS -export type Step = typeof STEPS[Name] -export type StepProcessor = Step['processor'] -export type StepResult = Awaited>> - -export function executeSingleSubStep>(subStep: Name, ...input: Parameters): ReturnType { - // @ts-expect-error - this is safe, as we know that the function arguments are correct by 'satisfies', this saves an explicit cast with 'as' - return STEPS[subStep].processor(...input as unknown as never[]) as ReturnType -} - -type Tail = T extends [infer _, ...infer Rest] ? Rest : never; - -/** - * For a `step` of the given name, which returned the given `data`. Convert that data into the given `format`. - * Depending on your step and the format this may require `additional` inputs. - */ -export function printStepResult< - Name extends StepName, - Processor extends StepProcessor, - Format extends Exclude & number, - Printer extends (typeof STEPS)[Name]['printer'][Format], - AdditionalInput extends Tail>, ->(step: Name, data: Awaited>, format: Format, ...additional: AdditionalInput): Promise { - const base = STEPS[step].printer - const printer = base[format as keyof typeof base] as IStepPrinter, Format, AdditionalInput> | undefined - guard(printer !== undefined, `printer for ${step} does not support ${String(format)}`) - return printer(data, ...additional) as Promise -} diff --git a/src/core/steps/all/core/00-parse.ts b/src/core/steps/all/core/00-parse.ts new file mode 100644 index 0000000000..8e5f31b8f2 --- /dev/null +++ b/src/core/steps/all/core/00-parse.ts @@ -0,0 +1,31 @@ +import { internalPrinter, StepOutputFormat } from '../../../print/print' +import { parseToQuads } from '../../../print/parse-printer' +import { IPipelineStep, PipelineStepStage } from '../../step' +import { retrieveXmlFromRCode, RParseRequest, RShell } from '../../../../r-bridge' +import { DeepReadonly } from 'ts-essentials' + +export interface ParseRequiredInput { + /** This is the {@link RShell} connection to be used to obtain the original parses AST of the R code */ + readonly shell: RShell + /** The request which essentially indicates the input to extract the AST from */ + readonly request: RParseRequest +} + +function processor(_results: unknown, input: Partial) { + return retrieveXmlFromRCode(input.request as RParseRequest, input.shell as RShell) +} + +export const PARSE_WITH_R_SHELL_STEP = { + name: 'parse', + description: 'Parse the given R code into an AST', + processor, + executed: PipelineStepStage.OncePerFile, + printer: { + [StepOutputFormat.Internal]: internalPrinter, + [StepOutputFormat.Json]: text => text, + [StepOutputFormat.RdfQuads]: parseToQuads + }, + dependencies: [], + requiredInput: undefined as unknown as ParseRequiredInput +} as const satisfies DeepReadonly< +IPipelineStep<'parse', typeof processor>> diff --git a/src/core/steps/all/core/10-normalize.ts b/src/core/steps/all/core/10-normalize.ts new file mode 100644 index 0000000000..888775fedb --- /dev/null +++ b/src/core/steps/all/core/10-normalize.ts @@ -0,0 +1,44 @@ +import { + IdGenerator, + NoInfo, + normalize, + RShell, + XmlParserHooks +} from '../../../../r-bridge' +import { internalPrinter, StepOutputFormat } from '../../../print/print' +import { + normalizedAstToJson, + normalizedAstToQuads, + printNormalizedAstToMermaid, + printNormalizedAstToMermaidUrl +} from '../../../print/normalize-printer' +import { IPipelineStep, PipelineStepStage } from '../../step' +import { DeepPartial, DeepReadonly } from 'ts-essentials' +import { ParseRequiredInput } from './00-parse' + +export interface NormalizeRequiredInput extends ParseRequiredInput { + /** These hooks only make sense if you at least want to normalize the parsed R AST. They can augment the normalization process */ + readonly hooks?: DeepPartial, + /** This id generator is only necessary if you want to retrieve a dataflow from the parsed R AST, it determines the id generator to use and by default uses the {@link deterministicCountingIdGenerator}*/ + readonly getId?: IdGenerator +} + +async function processor(results: { parse?: string }, input: Partial) { + return normalize(results.parse as string, await (input.shell as RShell).tokenMap(), input.hooks, input.getId) +} + +export const NORMALIZE = { + name: 'normalize', + description: 'Normalize the AST to flowR\'s AST (first step of the normalization)', + processor, + executed: PipelineStepStage.OncePerFile, + printer: { + [StepOutputFormat.Internal]: internalPrinter, + [StepOutputFormat.Json]: normalizedAstToJson, + [StepOutputFormat.RdfQuads]: normalizedAstToQuads, + [StepOutputFormat.Mermaid]: printNormalizedAstToMermaid, + [StepOutputFormat.MermaidUrl]: printNormalizedAstToMermaidUrl + }, + dependencies: [ 'parse' ], + requiredInput: undefined as unknown as NormalizeRequiredInput +} as const satisfies DeepReadonly> diff --git a/src/core/steps/all/core/20-dataflow.ts b/src/core/steps/all/core/20-dataflow.ts new file mode 100644 index 0000000000..f3d8eb3ac3 --- /dev/null +++ b/src/core/steps/all/core/20-dataflow.ts @@ -0,0 +1,31 @@ +import { internalPrinter, StepOutputFormat } from '../../../print/print' +import { IPipelineStep, PipelineStepStage } from '../../step' +import { produceDataFlowGraph } from '../../../../dataflow' +import { + dataflowGraphToJson, + dataflowGraphToMermaid, + dataflowGraphToMermaidUrl, + dataflowGraphToQuads +} from '../../../print/dataflow-printer' +import { DeepReadonly } from 'ts-essentials' +import { NormalizedAst } from '../../../../r-bridge' + +function processor(results: { normalize?: NormalizedAst }) { + return produceDataFlowGraph(results.normalize as NormalizedAst) +} + +export const LEGACY_STATIC_DATAFLOW = { + name: 'dataflow', + description: 'Construct the dataflow graph', + processor, + executed: PipelineStepStage.OncePerFile, + printer: { + [StepOutputFormat.Internal]: internalPrinter, + [StepOutputFormat.Json]: dataflowGraphToJson, + [StepOutputFormat.RdfQuads]: dataflowGraphToQuads, + [StepOutputFormat.Mermaid]: dataflowGraphToMermaid, + [StepOutputFormat.MermaidUrl]: dataflowGraphToMermaidUrl + }, + dependencies: [ 'normalize' ], + requiredInput: {} +} as const satisfies DeepReadonly> diff --git a/src/core/steps/all/static-slicing/30-slice.ts b/src/core/steps/all/static-slicing/30-slice.ts new file mode 100644 index 0000000000..3d314411c2 --- /dev/null +++ b/src/core/steps/all/static-slicing/30-slice.ts @@ -0,0 +1,30 @@ +import { internalPrinter, StepOutputFormat } from '../../../print/print' +import { IPipelineStep, PipelineStepStage } from '../../step' +import { SlicingCriteria, staticSlicing } from '../../../../slicing' +import { DeepReadonly } from 'ts-essentials' +import { NormalizeRequiredInput } from '../core/10-normalize' +import { DataflowInformation } from '../../../../dataflow/internal/info' +import { NormalizedAst } from '../../../../r-bridge' + +export interface SliceRequiredInput extends NormalizeRequiredInput { + /** The slicing criterion is only of interest if you actually want to slice the R code */ + readonly criterion: SlicingCriteria, + /** How many re-visits of the same node are ok? */ + readonly threshold?: number +} + +function processor(results: { dataflow?: DataflowInformation, normalize?: NormalizedAst }, input: Partial) { + return staticSlicing((results.dataflow as DataflowInformation).graph, results.normalize as NormalizedAst, input.criterion as SlicingCriteria, input.threshold) +} + +export const STATIC_SLICE = { + name: 'slice', + description: 'Calculate the actual static slice from the dataflow graph and the given slicing criteria', + processor, + executed: PipelineStepStage.OncePerRequest, + printer: { + [StepOutputFormat.Internal]: internalPrinter + }, + dependencies: [ 'dataflow' ], + requiredInput: undefined as unknown as SliceRequiredInput +} as const satisfies DeepReadonly> diff --git a/src/core/steps/all/static-slicing/40-reconstruct.ts b/src/core/steps/all/static-slicing/40-reconstruct.ts new file mode 100644 index 0000000000..e1c1cf6911 --- /dev/null +++ b/src/core/steps/all/static-slicing/40-reconstruct.ts @@ -0,0 +1,26 @@ +import { internalPrinter, StepOutputFormat } from '../../../print/print' +import { IPipelineStep, PipelineStepStage } from '../../step' +import { AutoSelectPredicate, reconstructToCode, SliceResult } from '../../../../slicing' +import { DeepReadonly } from 'ts-essentials' +import { NormalizedAst } from '../../../../r-bridge' +import { SliceRequiredInput } from './30-slice' + +export interface ReconstructRequiredInput extends SliceRequiredInput { + autoSelectIf?: AutoSelectPredicate +} + +function processor(results: { normalize?: NormalizedAst, slice?: SliceResult }, input: Partial) { + return reconstructToCode(results.normalize as NormalizedAst, (results.slice as SliceResult).result, input.autoSelectIf) +} + +export const NAIVE_RECONSTRUCT = { + name: 'reconstruct', + description: 'Reconstruct R code from the static slice', + processor, + executed: PipelineStepStage.OncePerRequest, + printer: { + [StepOutputFormat.Internal]: internalPrinter + }, + dependencies: [ 'slice' ], + requiredInput: undefined as unknown as ReconstructRequiredInput +} as const satisfies DeepReadonly> diff --git a/src/core/steps/index.ts b/src/core/steps/index.ts new file mode 100644 index 0000000000..7fcfa6e800 --- /dev/null +++ b/src/core/steps/index.ts @@ -0,0 +1,4 @@ +export * from './output' +export * from './step' +export * from './steps' +export * from './input' diff --git a/src/core/input.ts b/src/core/steps/input.ts similarity index 94% rename from src/core/input.ts rename to src/core/steps/input.ts index 59ac663dfd..233beba510 100644 --- a/src/core/input.ts +++ b/src/core/steps/input.ts @@ -1,7 +1,7 @@ -import { MergeableRecord } from '../util/objects' -import { IdGenerator, NoInfo, RParseRequest, RShell, XmlParserHooks } from '../r-bridge' +import { MergeableRecord } from '../../util/objects' +import { IdGenerator, NoInfo, RParseRequest, RShell, XmlParserHooks } from '../../r-bridge' import { DeepPartial } from 'ts-essentials' -import { AutoSelectPredicate, SlicingCriteria } from '../slicing' +import { AutoSelectPredicate, SlicingCriteria } from '../../slicing' import { STEPS_PER_SLICE, StepName, STEPS_PER_FILE } from './steps' /** diff --git a/src/core/output.ts b/src/core/steps/output.ts similarity index 100% rename from src/core/output.ts rename to src/core/steps/output.ts diff --git a/src/core/steps/pipeline/create.ts b/src/core/steps/pipeline/create.ts new file mode 100644 index 0000000000..ab1d4b80c3 --- /dev/null +++ b/src/core/steps/pipeline/create.ts @@ -0,0 +1,145 @@ +import { IPipelineStep, PipelineStepName, PipelineStepStage } from '../step' +import { InvalidPipelineError } from './invalid-pipeline-error' +import { Pipeline } from './pipeline' +import { jsonReplacer } from '../../../util/json' +import { partitionArray } from '../../../util/arrays' + +/** + * Given a set of {@link IPipelineStep|steps} with their dependencies, this function verifies all requirements of {@link createPipeline}. + */ +export function verifyAndBuildPipeline(steps: readonly IPipelineStep[]): Pipeline { + if(steps.length === 0) { + throw new InvalidPipelineError('0) Pipeline is empty') + } + + const [perFileSteps, perRequestSteps] = partitionArray(steps, s => s.executed === PipelineStepStage.OncePerFile) + + // we construct a map linking each name to its respective step + const perFileStepMap = new Map() + const initsPerFile: PipelineStepName[] = [] + const visited = new Set() + + // we start by working on the per-file steps + initializeSteps(perFileSteps, perFileStepMap, initsPerFile, visited) + // first, we sort the per-file steps + const sortedPerFile = topologicalSort(initsPerFile, perFileStepMap, visited) + validateStepOutput(sortedPerFile, perFileStepMap, steps) + + const perRequestStepMap = new Map(perFileStepMap) + // we track all elements without dependencies, i.e., those that start the pipeline + const initsPerRequest: PipelineStepName[] = [] + + // now, we do the same for the per-request steps, keeping the per-file steps known + initializeSteps(perRequestSteps, perRequestStepMap, initsPerRequest, visited) + + const sortedPerRequest = topologicalSort(initsPerRequest, perRequestStepMap, visited) + const sorted = [...sortedPerFile, ...sortedPerRequest] + validateStepOutput(sorted, perRequestStepMap, steps) + + return { + steps: perRequestStepMap, + order: sorted, + firstStepPerRequest: sortedPerFile.length + } +} + +function validateStepOutput(sorted: PipelineStepName[], stepMap: Map, steps: readonly IPipelineStep[]) { + if(sorted.length !== stepMap.size) { + // check if any of the dependencies in the map are invalid + checkForInvalidDependency(steps, stepMap) + // otherwise, we assume a cycle + throw new InvalidPipelineError(`3) Pipeline contains at least one cycle; sorted: ${JSON.stringify(sorted)}, steps: ${JSON.stringify([...stepMap.keys()])}`) + } +} + +function allDependenciesAreVisited(step: IPipelineStep, visited: ReadonlySet) { + return step.dependencies.every(d => visited.has(d)) +} + +function handleStep(step: IPipelineStep, init: PipelineStepName, visited: Set, sorted: PipelineStepName[], elem: PipelineStepName, decoratorsOfLastOthers: Set, inits: PipelineStepName[]) { + if(step.decorates === init) { + if(allDependenciesAreVisited(step, visited)) { + sorted.push(elem) + visited.add(elem) + } else { + decoratorsOfLastOthers.add(elem) + } + } else if(step.decorates === undefined && allDependenciesAreVisited(step, visited)) { + inits.push(elem) + } +} + +function topologicalSort(inits: PipelineStepName[], stepMap: Map, visited: Set) { + const sorted: PipelineStepName[] = [] + + while(inits.length > 0) { + const init = inits.pop() as PipelineStepName + sorted.push(init) + visited.add(init) + + // these decorators still have dependencies open; we have to check if they can be satisfied by the other steps to add + const decoratorsOfLastOthers = new Set() + for(const [elem, step] of stepMap.entries()) { + if(visited.has(elem)) { + continue + } + handleStep(step, init, visited, sorted, elem, decoratorsOfLastOthers, inits) + } + + // for the other decorators we have to cycle until we find a solution, or know, that no solution exists + topologicallyInsertDecoratorElements(decoratorsOfLastOthers, stepMap, visited, sorted) + } + return sorted +} + +function topologicallyInsertDecoratorElements(decoratorsOfLastOthers: Set, stepMap: Map, visited: Set, sorted: PipelineStepName[]) { + if(decoratorsOfLastOthers.size === 0) { + return + } + + let changed = true + while(changed) { + changed = false + for(const elem of [...decoratorsOfLastOthers]) { + const step = stepMap.get(elem) as IPipelineStep + if(allDependenciesAreVisited(step, visited)) { + decoratorsOfLastOthers.delete(elem) + sorted.push(elem) + visited.add(elem) + changed = true + } + } + } + if(decoratorsOfLastOthers.size > 0) { + throw new InvalidPipelineError(`5) Pipeline contains at least one decoration cycle: ${JSON.stringify(decoratorsOfLastOthers, jsonReplacer)}`) + } +} + +function checkForInvalidDependency(steps: readonly IPipelineStep[], stepMap: Map) { + for(const step of steps) { + for(const dep of step.dependencies) { + if(!stepMap.has(dep)) { + throw new InvalidPipelineError(`2) Step "${step.name}" depends on step "${dep}" which does not exist`) + } + } + if(step.decorates && !stepMap.has(step.decorates)) { + throw new InvalidPipelineError(`4) Step "${step.name}" decorates step "${step.decorates}" which does not exist`) + } + } +} + +function initializeSteps(steps: readonly IPipelineStep[], stepMap: Map, inits: PipelineStepName[], visited: ReadonlySet) { + for(const step of steps) { + const name = step.name + // if the name is already in the map we have a duplicate + if(stepMap.has(name)) { + throw new InvalidPipelineError(`1) Step name "${name}" is not unique in the pipeline`) + } + stepMap.set(name, step) + // only steps that have no dependencies and do not decorate others can be initial steps + if(allDependenciesAreVisited(step, visited) && (step.decorates === undefined || visited.has(step.decorates))) { + inits.push(name) + } + } +} + diff --git a/src/core/steps/pipeline/default.ts b/src/core/steps/pipeline/default.ts new file mode 100644 index 0000000000..c33c9aa9f8 --- /dev/null +++ b/src/core/steps/pipeline/default.ts @@ -0,0 +1,11 @@ +/** + * Contains the default pipeline for working with flowr + */ +import { createPipeline } from './pipeline' +import { PARSE_WITH_R_SHELL_STEP } from '../all/core/00-parse' +import { NORMALIZE } from '../all/core/10-normalize' +import { LEGACY_STATIC_DATAFLOW } from '../all/core/20-dataflow' +import { STATIC_SLICE } from '../all/static-slicing/30-slice' +import { NAIVE_RECONSTRUCT } from '../all/static-slicing/40-reconstruct' + +export const DEFAULT_SLICING_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, LEGACY_STATIC_DATAFLOW, STATIC_SLICE, NAIVE_RECONSTRUCT) diff --git a/src/core/steps/pipeline/index.ts b/src/core/steps/pipeline/index.ts new file mode 100644 index 0000000000..38bb83622a --- /dev/null +++ b/src/core/steps/pipeline/index.ts @@ -0,0 +1,3 @@ +export * from './pipeline' +export * from './invalid-pipeline-error' +export * from './default' diff --git a/src/core/steps/pipeline/invalid-pipeline-error.ts b/src/core/steps/pipeline/invalid-pipeline-error.ts new file mode 100644 index 0000000000..a673273528 --- /dev/null +++ b/src/core/steps/pipeline/invalid-pipeline-error.ts @@ -0,0 +1,9 @@ +/** + * Thrown if for whatever reason, the pipeline is invalid. + */ +export class InvalidPipelineError extends Error { + constructor(message: string) { + super(message) + this.name = 'InvalidPipelineError' + } +} diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts new file mode 100644 index 0000000000..2b8d4afdeb --- /dev/null +++ b/src/core/steps/pipeline/pipeline.ts @@ -0,0 +1,71 @@ +import { IPipelineStep, PipelineStepName, PipelineStepStage } from '../step' +import { verifyAndBuildPipeline } from './create' +import { DeepReadonly, UnionToIntersection } from 'ts-essentials' + +/** + * A pipeline is a collection of {@link Pipeline#steps|steps} that are executed in a certain {@link Pipeline#order|order}. + * It is to be created {@link createPipeline}. + * + * If you want to get the type of all steps in the pipeline (given they are created canonically using const step names), refer to {@link PipelineStepNames}. + */ +export interface Pipeline { + readonly steps: ReadonlyMap> + readonly order: readonly T['name'][] + /** + * In the order, this is the index of the first step that + * is executed {@link PipelineStepStage#OncePerRequest|once per request}. + * If it is "out of bounds" (i.e., the number of steps), all steps are executed {@link PipelineStepStage#OncePerFile|once per file}. + */ + readonly firstStepPerRequest: number +} + +/** + * Returns the types of all step names in the given pipeline. + * + * @see Pipeline for details + */ +export type PipelineStepNames

= PipelineStep

['name'] +export type PipelineStep

= P extends Pipeline ? U : never + +export type PipelineStepWithName

= P extends Pipeline ? U extends IPipelineStep ? U : never : never +export type PipelineStepProcessorWithName

= PipelineStepWithName['processor'] +export type PipelineStepPrintersWithName

= PipelineStepWithName['printer'] +export type PipelineStepOutputWithName

= Awaited>> + + +export type PipelineInput

= UnionToIntersection['requiredInput']> + +/** + * Only gets the union of 'requiredInput' of those PipelineSteps which have a 'execute' field of type 'OncePerRequest'. + * In other words, information that you may want to change for another request (e.g., another slice) with the same file. + */ +export type PipelinePerRequestInput

= { + [K in PipelineStepNames

]: PipelineStep

['executed'] extends PipelineStepStage.OncePerFile ? never : PipelineStepWithName['requiredInput'] +}[PipelineStepNames

] + +export type PipelineOutput

= { + [K in PipelineStepNames

]: PipelineStepOutputWithName +} + +/** + * Creates a {@link Pipeline|pipeline} from a given collection of {@link IPipelineStep|steps}. + * In order to be valid, the collection of {@link IPipelineStep|steps} must satisfy the following set of constraints + * (which should be logical, when you consider what a pipeline should accomplish): + * + * 0) the collection of {@link IPipelineStep|steps} is not empty + * 1) all {@link IPipelineStepOrder#name|names} of {@link IPipelineStep|steps} are unique for the given pipeline + * 2) all {@link IPipelineStepOrder#dependencies|dependencies} of all {@link IPipelineStep|steps} are exist + * 3) there are no cycles in the dependency graph + * 4) the target of a {@link IPipelineStepOrder#decorates|step's decoration} exists + * 5) if a {@link IPipelineStepOrder#decorates|decoration} applies, all of its {@link IPipelineStepOrder#dependencies|dependencies} are already in the pipeline + * 6) in the resulting {@link Pipeline|pipeline}, there is a strict cut between {@link IPipelineStep|steps} that are executed + * {@link PipelineStepStage#OncePerFile|once per file} and {@link PipelineStepStage#OncePerRequest|once per request}. + * + * @returns The function will try to order your collection steps so that all the constraints hold. + * If it succeeds it will return the resulting {@link Pipeline|pipeline}, otherwise it will throw an {@link InvalidPipelineError}. + * + * @throws InvalidPipelineError If any of the constraints listed above are not satisfied. + */ +export function createPipeline(...steps: T): Pipeline { + return verifyAndBuildPipeline(steps) +} diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts new file mode 100644 index 0000000000..d416870151 --- /dev/null +++ b/src/core/steps/step.ts @@ -0,0 +1,97 @@ +/** + * Defines the {@link IPipelineStep} interface which specifies all data available for a single step. + * + * @module + */ + +import { MergeableRecord } from '../../util/objects' +import { InternalStepPrinter, IPipelineStepPrinter, StepOutputFormat } from '../print/print' + +/** + * This represents the format of a step processor which retrieves two things: + * + * 1) the input configuration as passed to the {@link PipelineExecutor}. + * 2) the output produced by the previous steps. + * + * Please be aware, that if the respective information is available is not ensured by the type system but rather + * ensured at runtime by your dependencies. If you want to make sure, that the information is present, + * list all steps that you require as your {@link IPipelineStepOrder#dependencies|dependencies}, even if they would be + * already covered transitively. + */ +export type StepProcessingFunction = + (results: Record, input: Record) => unknown +/** + * This represents the required execution frequency of a step. + */ +export const enum PipelineStepStage { + /** This step has to be executed once per file */ + OncePerFile, + /** This step has to be executed once per request (e.g., slice for a given variable) */ + OncePerRequest +} + +export type PipelineStepName = string & { __brand?: 'StepName' } + +/** + * Contains the data to specify the order of {@link IPipelineStep|steps} in a pipeline. + */ +export interface IPipelineStepOrder< + Name extends PipelineStepName = PipelineStepName, +> { + /** + * Name of the respective step, it does not have to be unique in general but only unique per-pipeline. + * In other words, you can have multiple steps with a name like `parse` as long as you use only one of them in a given pipeline. + * This is, because these names are required in the {@link IPipelineStep#dependencies} field to refer to other steps this one relies on. + */ + readonly name: Name + /** + * Give the names of other steps this one requires to be completed as a prerequisite (e.g., to gain access to their input). + * Does not have to be transitive, this will be checked by the scheduler of the pipeline. + */ + readonly dependencies: readonly PipelineStepName[] + /* does this step has to be repeated for each new request or can it be performed only once in the initialization */ + readonly executed: PipelineStepStage + /** + * This is similar to {@link dependencies}, but is used to say that a given step _decorates_ another one. + * This imbues two requirements: + * The step must take the output of the decorated step as input, and produce the same output as the decorated step. + * + * If so, it is ensured that _this_ step is executed _after_ the step it decorates, but before any step that depends on it. + */ + readonly decorates?: PipelineStepName +} + +/** + * Defines what is to be known of a single step in a pipeline. + * It wraps around a single {@link IPipelineStep#processor|processor} function, providing additional information. + * Steps will be executed synchronously, in-sequence, based on their {@link IPipelineStep#dependencies|dependencies}. + */ +export interface IPipelineStep< + Name extends PipelineStepName = PipelineStepName, + // eslint-disable-next-line -- by default, we assume nothing about the function shape + Fn extends StepProcessingFunction = (...args: any[]) => any, +> extends MergeableRecord, IPipelineStepOrder { + /** Human-readable description of this step */ + readonly description: string + /** The main processor that essentially performs the logic of this step */ + readonly processor: (...input: Parameters) => ReturnType + /** + * How to visualize the results of the respective step to the user? + */ + readonly printer: { + [K in StepOutputFormat]?: IPipelineStepPrinter + } & { + // we always want to have the internal printer + [StepOutputFormat.Internal]: InternalStepPrinter + } + /** + * Input configuration required to perform the respective steps. + * Required inputs of dependencies do not have to, but can be repeated. + *

+ * Use the pattern `undefined as unknown as T` to indicate that the value is required but not provided. + */ + readonly requiredInput: object +} + + + diff --git a/src/core/steps/steps.ts b/src/core/steps/steps.ts new file mode 100644 index 0000000000..758a214b1d --- /dev/null +++ b/src/core/steps/steps.ts @@ -0,0 +1,41 @@ +/** + * This file defines *all* steps of the slicing process and the data they require. + * + * Note, that the order of elements here also describes the *desired* order of their desired execution for readability. + * However, it is the {@link SteppingSlicer} which controls the order of execution and the steps required to achieve a given result. + * + * If you add a new step, you have to (at least) update the {@link SteppingSlicer} as well as the corresponding type predicate {@link SteppingSlicerInput}. + * Furthermore, if your step is the new *last* step, please update {@link LAST_STEP}. + * + * Please note that the combination of `satisfies` and `as` seems to be required. + * With `satisfies` we make sure that the respective element has all the keys it requires, and the `as` force the type to be exactly the given one + * + * @module + */ + +import { PARSE_WITH_R_SHELL_STEP } from './all/core/00-parse' +import { NORMALIZE } from './all/core/10-normalize' +import { LEGACY_STATIC_DATAFLOW } from './all/core/20-dataflow' +import { STATIC_SLICE } from './all/static-slicing/30-slice' +import { NAIVE_RECONSTRUCT } from './all/static-slicing/40-reconstruct' + + +export const STEPS_PER_FILE = { + 'parse': PARSE_WITH_R_SHELL_STEP, + 'normalize': NORMALIZE, + 'dataflow': LEGACY_STATIC_DATAFLOW +} as const + +export const STEPS_PER_SLICE = { + 'slice': STATIC_SLICE, + 'reconstruct': NAIVE_RECONSTRUCT +} as const + +export const STEPS = { ...STEPS_PER_FILE, ...STEPS_PER_SLICE } as const +export const LAST_PER_FILE_STEP = 'dataflow' as const +export const LAST_STEP = 'reconstruct' as const + +export type StepName = keyof typeof STEPS +export type Step = typeof STEPS[Name] +export type StepProcessor = Step['processor'] +export type StepResult = Awaited>> diff --git a/src/flowr.ts b/src/flowr.ts index 201c8fcf94..bb3a1d6d88 100644 --- a/src/flowr.ts +++ b/src/flowr.ts @@ -5,7 +5,7 @@ * Otherwise, it will start a REPL that can call these scripts and return their results repeatedly. */ import { log, LogLevel } from './util/log' -import { RShell } from './r-bridge' +import { RShell, RShellReviveOptions } from './r-bridge' import commandLineUsage, { OptionDefinition } from 'command-line-usage' import commandLineArgs from 'command-line-args' import { guard } from './util/assert' @@ -79,7 +79,7 @@ if(options['no-ansi']) { async function retrieveShell(): Promise { // we keep an active shell session to allow other parse investigations :) const shell = new RShell({ - revive: 'always', + revive: RShellReviveOptions.Always, onRevive: (code, signal) => { const signalText = signal == null ? '' : ` and signal ${signal}` console.log(formatter.format(`R process exited with code ${code}${signalText}. Restarting...`, { color: Colors.Magenta, effect: ColorEffect.Foreground })) diff --git a/src/r-bridge/lang-4.x/values.ts b/src/r-bridge/lang-4.x/values.ts index 57f24bb3af..720dc1f916 100644 --- a/src/r-bridge/lang-4.x/values.ts +++ b/src/r-bridge/lang-4.x/values.ts @@ -11,9 +11,7 @@ class ValueConversionError extends Error { * transforms a value to something R can understand (e.g., booleans to TRUE/FALSE) */ export function ts2r(value: T): string { - if(typeof value === 'undefined') { - return 'NA' - } else if(typeof value === 'string') { + if(typeof value === 'string') { return JSON.stringify(value) } else if(typeof value === 'number') { return value.toString() @@ -21,6 +19,8 @@ export function ts2r(value: T): string { return value ? 'TRUE' : 'FALSE' } else if(value === null) { return 'NULL' + } else if(typeof value === 'undefined') { + return 'NA' } else if(Array.isArray(value)) { return `c(${value.map(ts2r).join(', ')})` } else if(typeof value === 'object') { diff --git a/src/r-bridge/shell.ts b/src/r-bridge/shell.ts index bce99de8c6..399a22d69c 100644 --- a/src/r-bridge/shell.ts +++ b/src/r-bridge/shell.ts @@ -1,4 +1,4 @@ -import { type ChildProcessWithoutNullStreams, spawn } from 'child_process' +import { type ChildProcessWithoutNullStreams, spawn } from 'node:child_process' import { deepMergeObject, type MergeableRecord } from '../util/objects' import { type ILogObj, type Logger } from 'tslog' import * as readline from 'node:readline' @@ -9,7 +9,7 @@ import semver from 'semver/preload' import { getPlatform } from '../util/os' import fs from 'fs' import { removeTokenMapQuotationMarks, TokenMap } from './retriever' -import { DeepWritable } from 'ts-essentials' +import { DeepReadonly, DeepWritable } from 'ts-essentials' export type OutputStreamSelector = 'stdout' | 'stderr' | 'both'; @@ -61,6 +61,12 @@ export const DEFAULT_OUTPUT_COLLECTOR_CONFIGURATION: OutputCollectorConfiguratio errorStopsWaiting: true } +export const enum RShellReviveOptions { + Never, + OnError, + Always +} + export interface RShellSessionOptions extends MergeableRecord { /** The path to the R executable, can be only the executable if it is to be found on the PATH. */ readonly pathToRExecutable: string @@ -70,10 +76,10 @@ export interface RShellSessionOptions extends MergeableRecord { readonly cwd: string /** The character to use to mark the end of a line. Is probably always `\n` (even on windows). */ readonly eol: string - /** The environment variables available in the R session. */ - readonly env: NodeJS.ProcessEnv + /** The environment variables available in the R session (undefined uses the child-process default). */ + readonly env: NodeJS.ProcessEnv | undefined /** If set, the R session will be restarted if it exits due to an error */ - readonly revive: 'never' | 'on-error' | 'always' + readonly revive: RShellReviveOptions /** Called when the R session is restarted, this makes only sense if `revive` is not set to `'never'` */ readonly onRevive: (code: number, signal: string | null) => void /** The path to the library directory, use undefined to let R figure that out for itself */ @@ -93,10 +99,10 @@ export const DEFAULT_R_SHELL_OPTIONS: RShellOptions = { pathToRExecutable: getPlatform() === 'windows' ? 'R.exe' : 'R', commandLineOptions: ['--vanilla', '--quiet', '--no-echo', '--no-save'], cwd: process.cwd(), - env: process.env, + env: undefined, eol: '\n', homeLibPath: getPlatform() === 'windows' ? undefined : '~/.r-libs', - revive: 'never', + revive: RShellReviveOptions.Never, onRevive: () => { /* do nothing */ } } as const @@ -118,7 +124,7 @@ export class RShell { private tempDirs = new Set() public constructor(options?: Partial) { - this.options = deepMergeObject(DEFAULT_R_SHELL_OPTIONS, options) + this.options = { ...DEFAULT_R_SHELL_OPTIONS, ...options } this.log = log.getSubLogger({ name: this.options.sessionName }) this.session = new RShellSession(this.options, this.log) @@ -126,12 +132,12 @@ export class RShell { } private revive() { - if(this.options.revive === 'never') { + if(this.options.revive === RShellReviveOptions.Never) { return } this.session.onExit((code, signal) => { - if(this.options.revive === 'always' || (this.options.revive === 'on-error' && code !== 0)) { + if(this.options.revive === RShellReviveOptions.Always || (this.options.revive === RShellReviveOptions.OnError && code !== 0)) { this.log.warn(`R session exited with code ${code}, reviving!`) this.options.onRevive(code, signal) this.session = new RShellSession(this.options, this.log) @@ -363,28 +369,36 @@ class RShellSession { private readonly bareSession: ChildProcessWithoutNullStreams private readonly sessionStdOut: readline.Interface private readonly sessionStdErr: readline.Interface - private readonly options: RShellSessionOptions + private readonly options: DeepReadonly private readonly log: Logger private collectionTimeout: NodeJS.Timeout | undefined - public constructor(options: RShellSessionOptions, log: Logger) { + public constructor(options: DeepReadonly, log: Logger) { this.bareSession = spawn(options.pathToRExecutable, options.commandLineOptions, { env: options.env, cwd: options.cwd, windowsHide: true }) - this.sessionStdOut = readline.createInterface({ - input: this.bareSession.stdout, - terminal: false - }) - this.sessionStdErr = readline.createInterface({ - input: this.bareSession.stderr, - terminal: false - }) - this.onExit(() => { this.end() }) + + this.sessionStdOut = readline.createInterface({ input: this.bareSession.stdout }) + this.sessionStdErr = readline.createInterface({ input: this.bareSession.stderr }) + + this.onExit(() => this.end()) this.options = options this.log = log - this.setupRSessionLoggers() + + if(log.settings.minLevel >= LogLevel.Trace) { + this.bareSession.stdout.on('data', (data: Buffer) => { + log.trace(`< ${data.toString()}`) + }) + this.bareSession.on('close', (code: number) => { + log.trace(`session exited with code ${code}`) + }) + } + + this.bareSession.stderr.on('data', (data: string) => { + log.warn(`< ${data}`) + }) } public write(data: string): void { @@ -467,20 +481,6 @@ class RShellSession { return killResult } - private setupRSessionLoggers(): void { - if(this.log.settings.minLevel >= LogLevel.Trace) { - this.bareSession.stdout.on('data', (data: Buffer) => { - this.log.trace(`< ${data.toString()}`) - }) - this.bareSession.on('close', (code: number) => { - this.log.trace(`session exited with code ${code}`) - }) - } - this.bareSession.stderr.on('data', (data: string) => { - this.log.warn(`< ${data}`) - }) - } - public onExit(callback: (code: number, signal: string | null) => void): void { this.bareSession.on('exit', callback) this.bareSession.stdin.on('error', callback) diff --git a/src/slicing/static/static-slicer.ts b/src/slicing/static/static-slicer.ts index 7cb0ba759c..36616d4303 100644 --- a/src/slicing/static/static-slicer.ts +++ b/src/slicing/static/static-slicer.ts @@ -20,7 +20,6 @@ import { log } from '../../util/log' import { getAllLinkedFunctionDefinitions } from '../../dataflow/internal/linker' import { overwriteEnvironments, pushLocalEnvironment, resolveByName } from '../../dataflow/environments' import objectHash from 'object-hash' -import { DefaultMap } from '../../util/defaultmap' import { LocalScope } from '../../dataflow/environments/scopes' import { convertAllSlicingCriteriaToIds, DecodedCriteria, SlicingCriteria } from '../criterion' @@ -74,9 +73,9 @@ export interface SliceResult { class VisitingQueue { private readonly threshold: number - private timesHitThreshold = 0 - private seen = new Map() - private idThreshold = new DefaultMap(() => 0) + private timesHitThreshold = 0 + private seen = new Map() + private idThreshold = new Map() private queue: NodeToSlice[] = [] constructor(threshold: number) { @@ -84,7 +83,8 @@ class VisitingQueue { } public add(target: NodeId, env: REnvironmentInformation, envFingerprint: string, onlyForSideEffects: boolean): void { - const idCounter = this.idThreshold.get(target) + const idCounter = this.idThreshold.get(target) ?? 0 + if(idCounter > this.threshold) { slicerLogger.warn(`id: ${target} has been visited ${idCounter} times, skipping`) this.timesHitThreshold++ @@ -101,11 +101,11 @@ class VisitingQueue { } } - public next(): NodeToSlice | undefined { - return this.queue.pop() + public next(): NodeToSlice { + return this.queue.pop() as NodeToSlice } - public has(): boolean { + public nonEmpty(): boolean { return this.queue.length > 0 } @@ -121,7 +121,7 @@ class VisitingQueue { /** * This returns the ids to include in the slice, when slicing with the given seed id's (must be at least one). *

- * The returned ids can be used to {@link reconstructToCode | reconstruct the slice to R code}. + * The returned ids can be used to {@link reconstructToCode|reconstruct the slice to R code}. */ export function staticSlicing(dataflowGraph: DataflowGraph, ast: NormalizedAst, criteria: SlicingCriteria, threshold = 75): Readonly { guard(criteria.length > 0, 'must have at least one seed id to calculate slice') @@ -133,48 +133,47 @@ export function staticSlicing(dataflowGraph: DataflowGraph, ast: NormalizedAst, // every node ships the call environment which registers the calling environment { - const basePrint = envFingerprint(initializeCleanEnvironments()) + const emptyEnv = initializeCleanEnvironments() + const basePrint = envFingerprint(emptyEnv) for(const startId of decodedCriteria) { - queue.add(startId.id, initializeCleanEnvironments(), basePrint, false) + queue.add(startId.id, emptyEnv, basePrint, false) } } - while(queue.has()) { + while(queue.nonEmpty()) { const current = queue.next() - if(current === undefined) { - continue - } - - const baseEnvFingerprint = envFingerprint(current.baseEnvironment) + const baseEnvironment = current.baseEnvironment + const baseEnvFingerprint = envFingerprint(baseEnvironment) const currentInfo = dataflowGraph.get(current.id, true) - // slicerLogger.trace(`visiting id: ${current.id} with name: ${currentInfo?.[0].name ?? ''}`) if(currentInfo === undefined) { slicerLogger.warn(`id: ${current.id} must be in graph but can not be found, keep in slice to be sure`) continue } - if(currentInfo[0].tag === 'function-call' && !current.onlyForSideEffects) { + const [currentVertex, currentEdges] = currentInfo + + if(currentVertex.tag === 'function-call' && !current.onlyForSideEffects) { slicerLogger.trace(`${current.id} is a function call`) - sliceForCall(current, idMap, currentInfo[0], dataflowGraph, queue) + sliceForCall(current, idMap, currentVertex, dataflowGraph, queue) } const currentNode = idMap.get(current.id) guard(currentNode !== undefined, () => `id: ${current.id} must be in dataflowIdMap is not in ${graphToMermaidUrl(dataflowGraph, idMap)}`) - for(const [target, edge] of currentInfo[1]) { + for(const [target, edge] of currentEdges) { if(edge.types.has(EdgeType.SideEffectOnCall)) { - queue.add(target, current.baseEnvironment, baseEnvFingerprint, true) + queue.add(target, baseEnvironment, baseEnvFingerprint, true) } if(edge.types.has(EdgeType.Reads) || edge.types.has(EdgeType.DefinedBy) || edge.types.has(EdgeType.Argument) || edge.types.has(EdgeType.Calls) || edge.types.has(EdgeType.Relates) || edge.types.has(EdgeType.DefinesOnCall)) { - queue.add(target, current.baseEnvironment, baseEnvFingerprint, false) + queue.add(target, baseEnvironment, baseEnvFingerprint, false) } } - for(const controlFlowDependency of addControlDependencies(currentInfo[0].id, idMap)) { - queue.add(controlFlowDependency, current.baseEnvironment, baseEnvFingerprint, false) + for(const controlFlowDependency of addControlDependencies(currentVertex.id, idMap)) { + queue.add(controlFlowDependency, baseEnvironment, baseEnvFingerprint, false) } } diff --git a/src/util/arrays.ts b/src/util/arrays.ts index 30ada7a76c..56e85287c8 100644 --- a/src/util/arrays.ts +++ b/src/util/arrays.ts @@ -1,5 +1,10 @@ import { guard } from './assert' +/** + * Returns the tail of an array (all elements except the first one). + */ +export type TailOfArray = T extends [infer _, ...infer Rest] ? Rest : never; + /** * Splits the array every time the given predicate fires. * The element the split appears on will not be included! @@ -35,6 +40,23 @@ export function splitArrayOn(arr: T[], predicate: (elem: T) => boolean): T[][ return result } +/** + * Returns a tuple of two arrays, where the first one contains all elements for which the predicate returned true, + * and the second one contains all elements for which the predicate returned false. + */ +export function partitionArray(arr: readonly T[], predicate: (elem: T) => boolean): [T[], T[]] { + const left: T[] = [] + const right: T[] = [] + for(const elem of arr) { + if(predicate(elem)) { + left.push(elem) + } else { + right.push(elem) + } + } + return [left, right] +} + /** * Generate all permutations of the given array using Heap's algorithm (with its non-recursive variant). * diff --git a/test/functionality/_helper/shell.ts b/test/functionality/_helper/shell.ts index 6574f53624..9d7e3daffb 100644 --- a/test/functionality/_helper/shell.ts +++ b/test/functionality/_helper/shell.ts @@ -10,7 +10,7 @@ import { RExpressionList, RNode, RNodeWithParent, - RShell, + RShell, ts2r, XmlParserHooks } from '../../../src/r-bridge' import { assert } from 'chai' @@ -18,7 +18,8 @@ import { DataflowGraph, diffGraphsToMermaidUrl, graphToMermaidUrl } from '../../ import { SlicingCriteria } from '../../../src/slicing' import { testRequiresRVersion } from './version' import { deepMergeObject, MergeableRecord } from '../../../src/util/objects' -import { executeSingleSubStep, LAST_STEP, SteppingSlicer } from '../../../src/core' +import { LAST_STEP, SteppingSlicer } from '../../../src/core' +import { NAIVE_RECONSTRUCT } from '../../../src/core/steps/all/static-slicing/40-reconstruct' export const testWithShell = (msg: string, fn: (shell: RShell, test: Mocha.Context) => void | Promise): Mocha.Test => { return it(msg, async function(): Promise { @@ -41,15 +42,22 @@ export const testWithShell = (msg: string, fn: (shell: RShell, test: Mocha.Conte export function withShell(fn: (shell: RShell) => void, packages: string[] = ['xmlparsedata']): () => void { return function() { const shell = new RShell() + // this way we probably do not have to reinstall even if we launch from WebStorm - before(async function() { + before('setup shell', async function() { this.timeout('15min') shell.tryToInjectHomeLibPath() + let network = false for(const pkg of packages) { if(!await shell.isPackageInstalled(pkg)) { - await testRequiresNetworkConnection(this) + if(!network) { + await testRequiresNetworkConnection(this) + } + network = true + await shell.ensurePackageInstalled(pkg, true) + } else { + shell.sendCommand(`library(${ts2r(pkg)})`) } - await shell.ensurePackageInstalled(pkg, true) } }) fn(shell) @@ -188,7 +196,14 @@ export function assertReconstructed(name: string, shell: RShell, input: string, request: requestFromInput(input), shell }).allRemainingSteps() - const reconstructed = executeSingleSubStep('reconstruct', result.normalize, new Set(selectedIds)) + const reconstructed = NAIVE_RECONSTRUCT.processor({ + normalize: result.normalize, + slice: { + decodedCriteria: [], + timesHitThreshold: 0, + result: new Set(selectedIds) + } + }, {}) assert.strictEqual(reconstructed.code, expected, `got: ${reconstructed.code}, vs. expected: ${expected}, for input ${input} (ids: ${printIdMapping(selectedIds, result.normalize.idMap)})`) }) } @@ -204,7 +219,6 @@ export function assertSliced(name: string, shell: RShell, input: string, criteri criterion: criteria, }).allRemainingSteps() - try { assert.strictEqual( result.reconstruct.code, expected, diff --git a/test/functionality/dataflow/dataflow.spec.ts b/test/functionality/dataflow/dataflow.spec.ts index 2225b867d6..b13e6d891d 100644 --- a/test/functionality/dataflow/dataflow.spec.ts +++ b/test/functionality/dataflow/dataflow.spec.ts @@ -10,6 +10,5 @@ describe('Dataflow', () => { requireAllTestsInFolder(path.join(__dirname, 'graph')) ) - require('./processing-of-elements/processing-of-elements') }) diff --git a/test/functionality/pipelines/create/create-tests.ts b/test/functionality/pipelines/create/create-tests.ts new file mode 100644 index 0000000000..4a907fd23d --- /dev/null +++ b/test/functionality/pipelines/create/create-tests.ts @@ -0,0 +1,164 @@ +import { createPipeline } from '../../../../src/core/steps/pipeline' +import { IPipelineStep, PipelineStepName } from '../../../../src/core/steps' +import { expect } from 'chai' +import { PARSE_WITH_R_SHELL_STEP } from '../../../../src/core/steps/all/core/00-parse' +import { allPermutations } from '../../../../src/util/arrays' +import { NORMALIZE } from '../../../../src/core/steps/all/core/10-normalize' +import { LEGACY_STATIC_DATAFLOW } from '../../../../src/core/steps/all/core/20-dataflow' +import { STATIC_SLICE } from '../../../../src/core/steps/all/static-slicing/30-slice' +import { NAIVE_RECONSTRUCT } from '../../../../src/core/steps/all/static-slicing/40-reconstruct' + +describe('Create Pipeline (includes dependency checks)', () => { + describe('error-cases', () => { + function negative(name: string, rawSteps: IPipelineStep[], message: string | RegExp) { + it(`${name} (all permutations)`, () => { + for(const steps of allPermutations(rawSteps)) { + expect(() => createPipeline(...steps)).to.throw(message) + } + }) + } + describe('without decorators', () => { + negative('should throw on empty input', [], /empty/) + negative('should throw on duplicate names', + [PARSE_WITH_R_SHELL_STEP, PARSE_WITH_R_SHELL_STEP], /duplicate|not unique/) + negative('should throw on invalid dependencies', + [PARSE_WITH_R_SHELL_STEP, { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + dependencies: ['foo'] + }], /invalid dependency|not exist/) + negative('should throw on cycles', + [PARSE_WITH_R_SHELL_STEP, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v1', + dependencies: ['parse-v2'] + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + dependencies: ['parse-v1'] + } + ], /cycle/) + }) + describe('with decorators', () => { + negative('should throw on decoration cycles', + [PARSE_WITH_R_SHELL_STEP, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v1', + decorates: 'parse', + dependencies: ['parse-v2'] + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + decorates: 'parse', + dependencies: ['parse-v1'] + } + ], /decoration cycle/) + negative('decorate non-existing step', + [{ + ...PARSE_WITH_R_SHELL_STEP, + decorates: 'foo' + }], /decorates.+not exist/) + }) + }) + describe('default behavior', () => { + function positive(name: string, rawSteps: IPipelineStep[], expected: PipelineStepName[], indexOfFirstPerFile: number = expected.length) { + it(`${name} (all permutations)`, () => { + for(const steps of allPermutations(rawSteps)) { + const pipeline = createPipeline(...steps) + expect([...pipeline.steps.keys()]).to.have.members(expected, `should have the correct keys for ${JSON.stringify(steps)}`) + expect(pipeline.order).to.have.ordered.members(expected, `should have the correct keys for ${JSON.stringify(steps)}`) + expect(pipeline.firstStepPerRequest).to.equal(indexOfFirstPerFile, `should have the correct firstStepPerRequest for ${JSON.stringify(steps)}`) + } + }) + } + + describe('without decorators', () => { + positive('should work on a single step', [PARSE_WITH_R_SHELL_STEP], ['parse']) + positive('should work on a single step with dependencies', [ + PARSE_WITH_R_SHELL_STEP, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + dependencies: ['parse'] + } + ], ['parse', 'parse-v2']) + // they will be shuffled in all permutations + positive('default pipeline', [ + PARSE_WITH_R_SHELL_STEP, + NORMALIZE, + LEGACY_STATIC_DATAFLOW, + STATIC_SLICE, + NAIVE_RECONSTRUCT + ], ['parse', 'normalize', 'dataflow', 'slice', 'reconstruct'], 3) + }) + describe('with decorators', () => { + positive('simple decorator on first step', [ + PARSE_WITH_R_SHELL_STEP, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + dependencies: [], + decorates: 'parse', + } + ], ['parse', 'parse-v2'], 2) + positive('decorators can depend on each other', [ + PARSE_WITH_R_SHELL_STEP, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + decorates: 'parse', + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v3', + dependencies: ['parse-v2'], + decorates: 'parse', + } + ], ['parse', 'parse-v2', 'parse-v3']) + positive('not the first, and multiple decorators', [ + PARSE_WITH_R_SHELL_STEP, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + dependencies: ['parse'], + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v3', + decorates: 'parse-v2', + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v4', + dependencies: ['parse-v2'] + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v6', + dependencies: ['parse-v4'] + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v5', + decorates: 'parse-v6', + } + ], ['parse', 'parse-v2', 'parse-v3', 'parse-v4', 'parse-v6', 'parse-v5']) + positive('default pipeline with dataflow decoration', [ + PARSE_WITH_R_SHELL_STEP, + NORMALIZE, + LEGACY_STATIC_DATAFLOW, + { + ...LEGACY_STATIC_DATAFLOW, + name: 'dataflow-decorator', + decorates: 'dataflow' + }, + STATIC_SLICE, + NAIVE_RECONSTRUCT + ], ['parse', 'normalize', 'dataflow', 'dataflow-decorator', 'slice', 'reconstruct'], 4) + }) + }) +}) diff --git a/test/functionality/pipelines/pipelines.spec.ts b/test/functionality/pipelines/pipelines.spec.ts new file mode 100644 index 0000000000..c929e370e2 --- /dev/null +++ b/test/functionality/pipelines/pipelines.spec.ts @@ -0,0 +1,6 @@ +import { requireAllTestsInFolder } from '../_helper/collect-tests' +import path from 'node:path' + +describe('Pipelines', () => { + describe('create', () => requireAllTestsInFolder(path.join(__dirname, 'create'))) +}) diff --git a/test/functionality/r-bridge/lang/ast/parse-function-call.ts b/test/functionality/r-bridge/lang/ast/parse-function-call.ts index 219c575ec6..537eb674c7 100644 --- a/test/functionality/r-bridge/lang/ast/parse-function-call.ts +++ b/test/functionality/r-bridge/lang/ast/parse-function-call.ts @@ -4,7 +4,7 @@ import { rangeFrom } from '../../../../../src/util/range' import { RType } from '../../../../../src/r-bridge' import { ensureExpressionList } from '../../../../../src/r-bridge/lang-4.x/ast/parser/xml/internal' -describe('Parse function calls', withShell((shell) => { +describe('Parse function calls', withShell(shell => { describe('functions without arguments', () => { assertAst( 'f()', diff --git a/tsconfig.json b/tsconfig.json index e813248d40..9e628fc863 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -9,7 +9,8 @@ "skipLibCheck": true, "sourceMap": true, "outDir": "./dist/", - "strict": true + "strict": true, + "alwaysStrict": true }, "lib": [ "esnext", "dom" ], "exclude": [ diff --git a/wiki/Interface.md b/wiki/Interface.md index 8b03e1a448..fb2470b997 100644 --- a/wiki/Interface.md +++ b/wiki/Interface.md @@ -18,7 +18,8 @@ Although far from being as detailed as the in-depth explanation of [*flowR*](htt - [Interfacing With the File System](#interfacing-with-the-file-system) - [⚒️ Writing Code](#️-writing-code) - [Interfacing With R by Using The `RShell`](#interfacing-with-r-by-using-the-rshell) - - [Slicing With The `SteppingSlicer`](#slicing-with-the-steppingslicer) + - [The Pipeline Executor](#the-pipeline-executor) + - [(Deprecated) Slicing With The `SteppingSlicer`](#deprecated-slicing-with-the-steppingslicer) - [Understanding the Steps](#understanding-the-steps) - [Benchmark the Slicer With The `BenchmarkSlicer`](#benchmark-the-slicer-with-the-benchmarkslicer) - [Augmenting the Normalization](#augmenting-the-normalization) @@ -953,16 +954,41 @@ With a shell object (let's call it `shell`), you can execute R code by using `RS Besides that, the command `RShell::tryToInjectHomeLibPath` may be of interest, as it enables all libraries available on the host system. +### The Pipeline Executor -### Slicing With The `SteppingSlicer` +Once, in the beginning, *flowR* was meant to produce a dataflow graph merely to provide *program slices*. However, with continuous extensions the dataflow graph repeatedly proofs to be the interesting part. +With this, we restructured *flowR*'s *hardcoded* pipeline to be +far more flexible. Now, it can be theoretically extended or replaced with arbitrary steps, optional steps, and, what we call 'decorations' of these steps. In short, if you still "just want to slice", you can do it like this: + +```typescript +const slicer = new PipelineExecutor(DEFAULT_SLICING_PIPELINE, { + shell: new RShell(), + request: requestFromInput('x <- 1\nx + 1'), + criterion: ['2@x'] +}) +const slice = await slicer.allRemainingSteps() +// console.log(slice.reconstruct.code) +``` + +If you compare this, with what you would have done with the [old `SteppingSlicer`](#deprecated-slicing-with-the-steppingslicer) this essentially just requires you to replace the `SteppingSlicer` with the `PipelineExecutor` and to pass the `DEFAULT_SLICING_PIPELINE` as the first argument. +Similarly, the new `PipelineExecutor`... + +1. allows to investigate the results of all intermediate steps +2. can be executed step-by-step +3. can repeat steps (e.g., to calculate multiple slices on the same input) + +See the [documentation](https://code-inspect.github.io/flowr/doc/classes/src_core_pipeline-executor.PipelineExecutor.html) for more information. + +### (Deprecated) Slicing With The `SteppingSlicer` + +> 💡 Information\ +> Please note, that the `SteppingSlicer` has been deprecated with the *Dataflow v2* update, in favor of a far more general `PipelineExecutor` (which now backs the `SteppingSlicer` using a custom legacy-`Pipeline` to ensure that it behaves similar). The main class that represents *flowR*'s slicing is the [`SteppingSlicer`](https://code-inspect.github.io/flowr/doc/classes/src_core_slicer.SteppingSlicer.html) class. With *flowR*, this allows you to slice code like this: ```typescript -const shell = new RShell() - const stepper = new SteppingSlicer({ - shell: shell, + shell: new RShell(), request: requestFromInput('x <- 1\nx + 1'), criterion: ['2@x'] }) @@ -985,13 +1011,13 @@ Besides slicing, the stepping slicer: 2. can be executed step-by-step 3. can be told to stop after a given step -See the [documentation](https://code-inspect.github.io/flowr/doc/classes/src_core_slicer.SteppingSlicer.html) for more. +See the [documentation](https://code-inspect.github.io/flowr/doc/classes/src_core_stepping-slicer.SteppingSlicer.html) for more. #### Understanding the Steps The definition of all steps happens in [src/core/steps.ts](https://github.com/Code-Inspect/flowr/blob/main/src/core/steps.ts). Investigating the file provides you an overview of the slicing phases, as well as the functions that are called to perform the respective step. -The [`SteppingSlicer`](https://github.com/Code-Inspect/flowr/blob/main/src/core/slicer.ts) simply glues them together and passes the results of one step to the next. +The [`SteppingSlicer`](https://github.com/Code-Inspect/flowr/blob/main/src/core/stepping-slicer.ts) simply glues them together and passes the results of one step to the next. If you are interested in the type magic associated with the stepping slicers output type, refer to [src/core/output.ts](https://github.com/Code-Inspect/flowr/blob/main/src/core/output.ts). If you add a new step, make sure to modify all of these locations accordingly.