From 26231d797f84643dc9281636c446ea07784e9837 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 22 Nov 2023 20:15:34 +0100 Subject: [PATCH 001/104] wip, feat: outsourcing the default steps to allow a more sophisticated step processing --- package.json | 2 +- src/core/index.ts | 6 +-- src/core/print/print.ts | 4 ++ src/core/slicer.ts | 4 +- src/core/steps/all/00-parse.ts | 18 ++++++++ src/core/steps/all/10-normalize.ts | 24 +++++++++++ src/core/steps/all/20-dataflow.ts | 24 +++++++++++ src/core/steps/all/30-slice.ts | 14 +++++++ src/core/steps/all/40-reconstruct.ts | 14 +++++++ src/core/steps/index.ts | 4 ++ src/core/{ => steps}/input.ts | 6 +-- src/core/{ => steps}/output.ts | 0 src/core/steps/step.ts | 61 ++++++++++++++++++++++++++++ src/core/steps/steps-provider.ts | 14 +++++++ src/core/{ => steps}/steps.ts | 49 ++++------------------ 15 files changed, 195 insertions(+), 49 deletions(-) create mode 100644 src/core/steps/all/00-parse.ts create mode 100644 src/core/steps/all/10-normalize.ts create mode 100644 src/core/steps/all/20-dataflow.ts create mode 100644 src/core/steps/all/30-slice.ts create mode 100644 src/core/steps/all/40-reconstruct.ts create mode 100644 src/core/steps/index.ts rename src/core/{ => steps}/input.ts (94%) rename src/core/{ => steps}/output.ts (100%) create mode 100644 src/core/steps/step.ts create mode 100644 src/core/steps/steps-provider.ts rename src/core/{ => steps}/steps.ts (77%) diff --git a/package.json b/package.json index 081db72a22..6a4b7693ca 100644 --- a/package.json +++ b/package.json @@ -173,7 +173,7 @@ "check-file/filename-naming-convention": [ "error", { - "**/*.ts": "?([A-Z])+([a-z])*((-|.)?([A-Z])+([a-z]))" + "**/*.ts": "([0-9]+-)?([A-Z])+([a-z])*((-|.)?([A-Z])+([a-z]))" } ], "check-file/folder-match-with-fex": [ diff --git a/src/core/index.ts b/src/core/index.ts index 84b443c457..9791bf0539 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -1,4 +1,4 @@ export { SteppingSlicer } from './slicer' -export * from './steps' -export * from './input' -export * from './output' +export * from './steps/steps' +export * from './steps/input' +export * from './steps/output' diff --git a/src/core/print/print.ts b/src/core/print/print.ts index 86923338ef..824e7f29e0 100644 --- a/src/core/print/print.ts +++ b/src/core/print/print.ts @@ -48,7 +48,11 @@ export function internalPrinter(input: Input): Input { /** * A mapping function that maps the result of a step (i.e., the dataflow graph) * to another representation (linked by {@link StepOutputFormat} in an {@link IStep}). + * + * For the internal format, refer to {@link InternalStepPrinter} as a shorthand. */ export type IStepPrinter = Format extends StepOutputFormat.Internal ? (input: Awaited>) => Awaited> : (input: Awaited>, ...additional: AdditionalInput) => Promise | string + +export type InternalStepPrinter = IStepPrinter diff --git a/src/core/slicer.ts b/src/core/slicer.ts index 375a6b5ed6..68c42b54d7 100644 --- a/src/core/slicer.ts +++ b/src/core/slicer.ts @@ -10,13 +10,13 @@ import { StepRequired, STEPS, STEPS_PER_FILE, STEPS_PER_SLICE, + SteppingSlicerInput, + StepResults, StepName, StepResult } from './steps' import { guard } from '../util/assert' import { SliceResult, SlicingCriteria } from '../slicing' import { DeepPartial } from 'ts-essentials' -import { SteppingSlicerInput } from './input' -import { StepResults } from './output' import { DataflowInformation } from '../dataflow/internal/info' /** diff --git a/src/core/steps/all/00-parse.ts b/src/core/steps/all/00-parse.ts new file mode 100644 index 0000000000..f0ee2eca1c --- /dev/null +++ b/src/core/steps/all/00-parse.ts @@ -0,0 +1,18 @@ +import { internalPrinter, StepOutputFormat } from '../../print/print' +import { parseToQuads } from '../../print/parse-printer' +import { IStep } from '../step' +import { retrieveXmlFromRCode } from '../../../r-bridge' + + +export const PARSE_WITH_R_SHELL_STEP = { + name: 'parse', + description: 'Parse the given R code into an AST', + processor: retrieveXmlFromRCode, + required: 'once-per-file', + printer: { + [StepOutputFormat.Internal]: internalPrinter, + [StepOutputFormat.Json]: text => text, + [StepOutputFormat.RdfQuads]: parseToQuads + }, + dependencies: [] +} satisfies IStep diff --git a/src/core/steps/all/10-normalize.ts b/src/core/steps/all/10-normalize.ts new file mode 100644 index 0000000000..a0f6bc167f --- /dev/null +++ b/src/core/steps/all/10-normalize.ts @@ -0,0 +1,24 @@ +import { normalize } from '../../../r-bridge' +import { internalPrinter, StepOutputFormat } from '../../print/print' +import { + normalizedAstToJson, + normalizedAstToQuads, + printNormalizedAstToMermaid, + printNormalizedAstToMermaidUrl +} from '../../print/normalize-printer' +import { IStep } from '../step' + +export const NORMALIZE = { + name: 'normalize', + description: 'Normalize the AST to flowR\'s AST (first step of the normalization)', + processor: normalize, + required: 'once-per-file', + printer: { + [StepOutputFormat.Internal]: internalPrinter, + [StepOutputFormat.Json]: normalizedAstToJson, + [StepOutputFormat.RdfQuads]: normalizedAstToQuads, + [StepOutputFormat.Mermaid]: printNormalizedAstToMermaid, + [StepOutputFormat.MermaidUrl]: printNormalizedAstToMermaidUrl + }, + dependencies: [ 'parse' ] +} satisfies IStep diff --git a/src/core/steps/all/20-dataflow.ts b/src/core/steps/all/20-dataflow.ts new file mode 100644 index 0000000000..7752512498 --- /dev/null +++ b/src/core/steps/all/20-dataflow.ts @@ -0,0 +1,24 @@ +import { internalPrinter, StepOutputFormat } from '../../print/print' +import { IStep } from '../step' +import { produceDataFlowGraph } from '../../../dataflow' +import { + dataflowGraphToJson, + dataflowGraphToMermaid, + dataflowGraphToMermaidUrl, + dataflowGraphToQuads +} from '../../print/dataflow-printer' + +export const LEGACY_STATIC_DATAFLOW = { + name: 'dataflow', + description: 'Construct the dataflow graph', + processor: produceDataFlowGraph, + required: 'once-per-file', + printer: { + [StepOutputFormat.Internal]: internalPrinter, + [StepOutputFormat.Json]: dataflowGraphToJson, + [StepOutputFormat.RdfQuads]: dataflowGraphToQuads, + [StepOutputFormat.Mermaid]: dataflowGraphToMermaid, + [StepOutputFormat.MermaidUrl]: dataflowGraphToMermaidUrl + }, + dependencies: [ 'normalize' ] +} satisfies IStep diff --git a/src/core/steps/all/30-slice.ts b/src/core/steps/all/30-slice.ts new file mode 100644 index 0000000000..8173b7300c --- /dev/null +++ b/src/core/steps/all/30-slice.ts @@ -0,0 +1,14 @@ +import { internalPrinter, StepOutputFormat } from '../../print/print' +import { IStep } from '../step' +import { staticSlicing } from '../../../slicing' + +export const STATIC_SLICE = { + name: 'slice', + description: 'Calculate the actual static slice from the dataflow graph and the given slicing criteria', + processor: staticSlicing, + required: 'once-per-slice', + printer: { + [StepOutputFormat.Internal]: internalPrinter + }, + dependencies: [ 'dataflow' ] +} satisfies IStep diff --git a/src/core/steps/all/40-reconstruct.ts b/src/core/steps/all/40-reconstruct.ts new file mode 100644 index 0000000000..0ca126383d --- /dev/null +++ b/src/core/steps/all/40-reconstruct.ts @@ -0,0 +1,14 @@ +import { internalPrinter, StepOutputFormat } from '../../print/print' +import { IStep } from '../step' +import { reconstructToCode } from '../../../slicing' + +export const NAIVE_RECONSTRUCT = { + name: 'reconstruct', + description: 'Reconstruct R code from the static slice', + processor: reconstructToCode, + required: 'once-per-slice', + printer: { + [StepOutputFormat.Internal]: internalPrinter + }, + dependencies: [ 'slice' ] +} satisfies IStep diff --git a/src/core/steps/index.ts b/src/core/steps/index.ts new file mode 100644 index 0000000000..7fcfa6e800 --- /dev/null +++ b/src/core/steps/index.ts @@ -0,0 +1,4 @@ +export * from './output' +export * from './step' +export * from './steps' +export * from './input' diff --git a/src/core/input.ts b/src/core/steps/input.ts similarity index 94% rename from src/core/input.ts rename to src/core/steps/input.ts index 59ac663dfd..233beba510 100644 --- a/src/core/input.ts +++ b/src/core/steps/input.ts @@ -1,7 +1,7 @@ -import { MergeableRecord } from '../util/objects' -import { IdGenerator, NoInfo, RParseRequest, RShell, XmlParserHooks } from '../r-bridge' +import { MergeableRecord } from '../../util/objects' +import { IdGenerator, NoInfo, RParseRequest, RShell, XmlParserHooks } from '../../r-bridge' import { DeepPartial } from 'ts-essentials' -import { AutoSelectPredicate, SlicingCriteria } from '../slicing' +import { AutoSelectPredicate, SlicingCriteria } from '../../slicing' import { STEPS_PER_SLICE, StepName, STEPS_PER_FILE } from './steps' /** diff --git a/src/core/output.ts b/src/core/steps/output.ts similarity index 100% rename from src/core/output.ts rename to src/core/steps/output.ts diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts new file mode 100644 index 0000000000..f99fa530f1 --- /dev/null +++ b/src/core/steps/step.ts @@ -0,0 +1,61 @@ +/** + * Defines the {@link IStep} interface which specifies all data available for a single step. + * + * @module + */ + +import { MergeableRecord } from '../../util/objects' +import { InternalStepPrinter, IStepPrinter, StepOutputFormat } from '../print/print' + + +/** + * This represents close a function that we know completely nothing about. + * Nevertheless, this is the basis of what a step processor should look like. + */ +export type StepFunction = (...args: never[]) => unknown +/** + * This represents the required execution frequency of a step. + */ +export type StepRequired = 'once-per-file' | 'once-per-slice' + + +export type StepName = string & { __brand?: 'StepName' } + +/** + * Defines what is to be known of a single step in the slicing process. + * It wraps around a single {@link IStep#processor|processor} function, providing additional information. + * Steps will be executed synchronously, in-sequence, based on their {@link IStep#dependencies|dependencies}. + */ +export interface IStep< + Fn extends StepFunction, +> extends MergeableRecord { + /** + * Name of the respective step, it does not have to be unique in general but only unique per-pipeline. + * In other words, you can have multiple steps with a name like `parse` as long as you use only one of them in a given pipeline. + * This is, because these names are required in the {@link IStep#dependencies} field to refer to other steps this one relies on. + */ + name: StepName + /** Human-readable description of this step */ + description: string + /** The main processor that essentially performs the logic of this step */ + processor: (...input: Parameters) => ReturnType + /* does this step has to be repeated for each new slice or can it be performed only once in the initialization */ + required: StepRequired + /** + * How to visualize the results of the respective step to the user? + */ + printer: { + [K in StepOutputFormat]?: IStepPrinter + } & { + // we always want to have the internal printer + [StepOutputFormat.Internal]: InternalStepPrinter + } + /** + * Give the names of other steps this one requires to be completed as a prerequisite (e.g., to gain access to their input). + * Does not have to be transitive, this will be checked by the scheduler of the pipeline. + */ + dependencies: StepName[] +} + + + diff --git a/src/core/steps/steps-provider.ts b/src/core/steps/steps-provider.ts new file mode 100644 index 0000000000..96d3c2da89 --- /dev/null +++ b/src/core/steps/steps-provider.ts @@ -0,0 +1,14 @@ +/** + * Defines a factor interface which allows to retrieve steps based on a configuration. + * It extends on the single steps provided by flowr, with the hopes of keeping the interface the same. + * + * @module + */ +import { MergeableRecord } from '../../util/objects' +import { IStep } from './step' + + +export interface StepsConfiguration extends MergeableRecord { + readonly name: string +} + diff --git a/src/core/steps.ts b/src/core/steps/steps.ts similarity index 77% rename from src/core/steps.ts rename to src/core/steps/steps.ts index 0fbb1e88e3..2bfc1111aa 100644 --- a/src/core/steps.ts +++ b/src/core/steps/steps.ts @@ -13,59 +13,28 @@ * @module */ -import { MergeableRecord } from '../util/objects' import { normalize, retrieveXmlFromRCode -} from '../r-bridge' -import { produceDataFlowGraph } from '../dataflow' -import { reconstructToCode, staticSlicing } from '../slicing' -import { internalPrinter, IStepPrinter, StepOutputFormat } from './print/print' +} from '../../r-bridge' +import { produceDataFlowGraph } from '../../dataflow' +import { reconstructToCode, staticSlicing } from '../../slicing' +import { internalPrinter, IStepPrinter, StepOutputFormat } from '../print/print' import { normalizedAstToJson, normalizedAstToQuads, printNormalizedAstToMermaid, printNormalizedAstToMermaidUrl -} from './print/normalize-printer' -import { guard } from '../util/assert' +} from '../print/normalize-printer' +import { guard } from '../../util/assert' import { dataflowGraphToJson, dataflowGraphToMermaid, dataflowGraphToMermaidUrl, dataflowGraphToQuads -} from './print/dataflow-printer' -import { parseToQuads } from './print/parse-printer' - -/** - * This represents close a function that we know completely nothing about. - * Nevertheless, this is the basis of what a step processor should look like. - */ -export type StepFunction = (...args: never[]) => unknown -/** - * This represents the required execution frequency of a step. - */ -export type StepRequired = 'once-per-file' | 'once-per-slice' - - -/** - * Defines what is to be known of a single step in the slicing process. - */ -export interface IStep< - Fn extends StepFunction, -> extends MergeableRecord { - /** Human-readable description of this step */ - description: string - /** The main processor that essentially performs the logic of this step */ - processor: (...input: Parameters) => ReturnType - /* does this step has to be repeated for each new slice or can it be performed only once in the initialization */ - required: StepRequired - printer: { - [K in StepOutputFormat]?: IStepPrinter - } & { - // we always want to have the internal printer - [StepOutputFormat.Internal]: IStepPrinter - } -} +} from '../print/dataflow-printer' +import { parseToQuads } from '../print/parse-printer' +import { IStep } from './step' export const STEPS_PER_FILE = { From dbf66139dde19e8ab91f568efbe88e375cd239e5 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 22 Nov 2023 21:12:18 +0100 Subject: [PATCH 002/104] feat, wip: basic pipeline verification --- package.json | 2 +- src/core/steps/pipeline.ts | 7 ++ src/core/steps/pipeline/dependency-checker.ts | 79 +++++++++++++++++++ .../steps/pipeline/invalid-pipeline-error.ts | 6 ++ src/core/steps/pipeline/pipeline.ts | 6 ++ src/core/steps/step.ts | 2 +- 6 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 src/core/steps/pipeline.ts create mode 100644 src/core/steps/pipeline/dependency-checker.ts create mode 100644 src/core/steps/pipeline/invalid-pipeline-error.ts create mode 100644 src/core/steps/pipeline/pipeline.ts diff --git a/package.json b/package.json index 6a4b7693ca..145832767c 100644 --- a/package.json +++ b/package.json @@ -173,7 +173,7 @@ "check-file/filename-naming-convention": [ "error", { - "**/*.ts": "([0-9]+-)?([A-Z])+([a-z])*((-|.)?([A-Z])+([a-z]))" + "**/*.ts": "?([0-9]+-)?([A-Z])+([a-z])*((-|.)?([A-Z])+([a-z]))" } ], "check-file/folder-match-with-fex": [ diff --git a/src/core/steps/pipeline.ts b/src/core/steps/pipeline.ts new file mode 100644 index 0000000000..7f8a365dc6 --- /dev/null +++ b/src/core/steps/pipeline.ts @@ -0,0 +1,7 @@ +import { IStep } from './step' + +/** + * A pipeline describes a sequence of steps that are to be executed in order. + */ +// TODO: stepping slicer is repsonsible! +export type IPipeline = IStep[] diff --git a/src/core/steps/pipeline/dependency-checker.ts b/src/core/steps/pipeline/dependency-checker.ts new file mode 100644 index 0000000000..f7283a2ed1 --- /dev/null +++ b/src/core/steps/pipeline/dependency-checker.ts @@ -0,0 +1,79 @@ +import { IStep, StepName } from '../step' +import { InvalidPipelineError } from './invalid-pipeline-error' +import { Pipeline } from './pipeline' + + +/** + * Given a set of {@link IStep|steps} with their dependencies, this function verifies that + * 1) all names of steps are unique for the given pipeline + * 2) all dependencies of steps are valid (i.e., refer to existing steps) + * 3) there are no cycles in the dependency graph + * If successful, it returns the topologically sorted list of steps in order of desired execution. + */ +export function verifyPipeline(steps: IStep[]): Pipeline { + // we construct a map linking each name to its respective step + const stepMap = new Map() + // we track all elements without dependencies, i.e. those that start the pipeline + const inits: StepName[] = [] + initializeSteps(steps, stepMap, inits) + + if(inits.length === 0) { + throw new InvalidPipelineError('Pipeline has no initial steps (i.e., it contains no step without dependencies)') + } + const sorted = topoSort(inits, stepMap) + + if(sorted.length !== stepMap.size) { + // check if any of the dependencies in the map are invalid + checkForInvalidDependency(steps, stepMap) + // otherwise, we assume a cycle + throw new InvalidPipelineError('Pipeline contains at least one cycle') + } + + return { + steps: stepMap, + order: sorted + } +} + +function topoSort(inits: StepName[], stepMap: Map) { + // now, we topo-sort the steps + const sorted: StepName[] = [] + const visited = new Set() + while(inits.length > 0) { + const init = inits.pop() as StepName + sorted.push(init) + visited.add(init) + // TODO: improve this check, maybe really remove? + for(const [key, step] of stepMap.entries()) { + if(!visited.has(key) && step.dependencies.filter(dep => !visited.has(dep)).length === 0) { + inits.push(key) + } + } + } + return sorted +} + +function checkForInvalidDependency(steps: IStep[], stepMap: Map) { + for(const step of steps) { + for(const dep of step.dependencies) { + if(!stepMap.has(dep)) { + throw new InvalidPipelineError(`Step "${step.name}" depends on step "${dep}" which does not exist`) + } + } + } +} + +function initializeSteps(steps: IStep[], stepMap: Map, inits: StepName[]) { + for(const step of steps) { + const name = step.name + // if the name is already in the map we have a duplicate + if(stepMap.has(name)) { + throw new InvalidPipelineError(`Step name "${name}" is not unique in the pipeline`) + } + stepMap.set(name, step) + if(step.dependencies.length === 0) { + inits.push(name) + } + } +} + diff --git a/src/core/steps/pipeline/invalid-pipeline-error.ts b/src/core/steps/pipeline/invalid-pipeline-error.ts new file mode 100644 index 0000000000..a570178774 --- /dev/null +++ b/src/core/steps/pipeline/invalid-pipeline-error.ts @@ -0,0 +1,6 @@ +export class InvalidPipelineError extends Error { + constructor(message: string) { + super(message) + this.name = 'InvalidPipelineError' + } +} diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts new file mode 100644 index 0000000000..e01f78b596 --- /dev/null +++ b/src/core/steps/pipeline/pipeline.ts @@ -0,0 +1,6 @@ +import { IStep, StepName } from '../step' + +export interface Pipeline { + readonly steps: ReadonlyMap + readonly order: StepName[] +} diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index f99fa530f1..0d5a2a9f47 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -27,7 +27,7 @@ export type StepName = string & { __brand?: 'StepName' } * Steps will be executed synchronously, in-sequence, based on their {@link IStep#dependencies|dependencies}. */ export interface IStep< - Fn extends StepFunction, + Fn extends StepFunction = StepFunction, > extends MergeableRecord { /** * Name of the respective step, it does not have to be unique in general but only unique per-pipeline. From c664b070c1a068cc84a1adcd8bfc32d5057ecf9b Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 22 Nov 2023 21:27:46 +0100 Subject: [PATCH 003/104] feat: basic pipeline dependency checking --- src/core/slicer.ts | 2 +- src/core/steps/pipeline/dependency-checker.ts | 18 ++++++++--------- src/core/steps/pipeline/index.ts | 3 +++ src/core/steps/pipeline/pipeline.ts | 6 +++--- src/core/steps/step.ts | 6 +++--- src/core/steps/steps.ts | 20 ++++++++++++++----- 6 files changed, 34 insertions(+), 21 deletions(-) create mode 100644 src/core/steps/pipeline/index.ts diff --git a/src/core/slicer.ts b/src/core/slicer.ts index 68c42b54d7..5154ee76c6 100644 --- a/src/core/slicer.ts +++ b/src/core/slicer.ts @@ -12,7 +12,7 @@ import { STEPS_PER_SLICE, SteppingSlicerInput, StepResults, - StepName, StepResult + StepResult, StepName } from './steps' import { guard } from '../util/assert' import { SliceResult, SlicingCriteria } from '../slicing' diff --git a/src/core/steps/pipeline/dependency-checker.ts b/src/core/steps/pipeline/dependency-checker.ts index f7283a2ed1..ea9f4537a0 100644 --- a/src/core/steps/pipeline/dependency-checker.ts +++ b/src/core/steps/pipeline/dependency-checker.ts @@ -1,4 +1,4 @@ -import { IStep, StepName } from '../step' +import { IStep, NameOfStep } from '../step' import { InvalidPipelineError } from './invalid-pipeline-error' import { Pipeline } from './pipeline' @@ -12,9 +12,9 @@ import { Pipeline } from './pipeline' */ export function verifyPipeline(steps: IStep[]): Pipeline { // we construct a map linking each name to its respective step - const stepMap = new Map() + const stepMap = new Map() // we track all elements without dependencies, i.e. those that start the pipeline - const inits: StepName[] = [] + const inits: NameOfStep[] = [] initializeSteps(steps, stepMap, inits) if(inits.length === 0) { @@ -35,12 +35,12 @@ export function verifyPipeline(steps: IStep[]): Pipeline { } } -function topoSort(inits: StepName[], stepMap: Map) { +function topoSort(inits: NameOfStep[], stepMap: Map) { // now, we topo-sort the steps - const sorted: StepName[] = [] - const visited = new Set() + const sorted: NameOfStep[] = [] + const visited = new Set() while(inits.length > 0) { - const init = inits.pop() as StepName + const init = inits.pop() as NameOfStep sorted.push(init) visited.add(init) // TODO: improve this check, maybe really remove? @@ -53,7 +53,7 @@ function topoSort(inits: StepName[], stepMap: Map) { return sorted } -function checkForInvalidDependency(steps: IStep[], stepMap: Map) { +function checkForInvalidDependency(steps: IStep[], stepMap: Map) { for(const step of steps) { for(const dep of step.dependencies) { if(!stepMap.has(dep)) { @@ -63,7 +63,7 @@ function checkForInvalidDependency(steps: IStep[], stepMap: Map } } -function initializeSteps(steps: IStep[], stepMap: Map, inits: StepName[]) { +function initializeSteps(steps: IStep[], stepMap: Map, inits: NameOfStep[]) { for(const step of steps) { const name = step.name // if the name is already in the map we have a duplicate diff --git a/src/core/steps/pipeline/index.ts b/src/core/steps/pipeline/index.ts new file mode 100644 index 0000000000..ee8629ad4f --- /dev/null +++ b/src/core/steps/pipeline/index.ts @@ -0,0 +1,3 @@ +export * from './pipeline' +export * from './dependency-checker' +export * from './invalid-pipeline-error' diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index e01f78b596..355e165d36 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -1,6 +1,6 @@ -import { IStep, StepName } from '../step' +import { IStep, NameOfStep } from '../step' export interface Pipeline { - readonly steps: ReadonlyMap - readonly order: StepName[] + readonly steps: ReadonlyMap + readonly order: NameOfStep[] } diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index 0d5a2a9f47..760c2d07ce 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -19,7 +19,7 @@ export type StepFunction = (...args: never[]) => unknown export type StepRequired = 'once-per-file' | 'once-per-slice' -export type StepName = string & { __brand?: 'StepName' } +export type NameOfStep = string & { __brand?: 'StepName' } /** * Defines what is to be known of a single step in the slicing process. @@ -34,7 +34,7 @@ export interface IStep< * In other words, you can have multiple steps with a name like `parse` as long as you use only one of them in a given pipeline. * This is, because these names are required in the {@link IStep#dependencies} field to refer to other steps this one relies on. */ - name: StepName + name: NameOfStep /** Human-readable description of this step */ description: string /** The main processor that essentially performs the logic of this step */ @@ -54,7 +54,7 @@ export interface IStep< * Give the names of other steps this one requires to be completed as a prerequisite (e.g., to gain access to their input). * Does not have to be transitive, this will be checked by the scheduler of the pipeline. */ - dependencies: StepName[] + dependencies: NameOfStep[] } diff --git a/src/core/steps/steps.ts b/src/core/steps/steps.ts index 2bfc1111aa..b668080358 100644 --- a/src/core/steps/steps.ts +++ b/src/core/steps/steps.ts @@ -39,6 +39,7 @@ import { IStep } from './step' export const STEPS_PER_FILE = { 'parse': { + name: 'parse', description: 'Parse the given R code into an AST', processor: retrieveXmlFromRCode, required: 'once-per-file', @@ -46,9 +47,11 @@ export const STEPS_PER_FILE = { [StepOutputFormat.Internal]: internalPrinter, [StepOutputFormat.Json]: text => text, [StepOutputFormat.RdfQuads]: parseToQuads - } + }, + dependencies: [] } satisfies IStep, 'normalize': { + name: 'normalize', description: 'Normalize the AST to flowR\'s AST (first step of the normalization)', processor: normalize, required: 'once-per-file', @@ -58,9 +61,11 @@ export const STEPS_PER_FILE = { [StepOutputFormat.RdfQuads]: normalizedAstToQuads, [StepOutputFormat.Mermaid]: printNormalizedAstToMermaid, [StepOutputFormat.MermaidUrl]: printNormalizedAstToMermaidUrl - } + }, + dependencies: [] } satisfies IStep, 'dataflow': { + name: 'dataflow', description: 'Construct the dataflow graph', processor: produceDataFlowGraph, required: 'once-per-file', @@ -70,26 +75,31 @@ export const STEPS_PER_FILE = { [StepOutputFormat.RdfQuads]: dataflowGraphToQuads, [StepOutputFormat.Mermaid]: dataflowGraphToMermaid, [StepOutputFormat.MermaidUrl]: dataflowGraphToMermaidUrl - } + }, + dependencies: [] } satisfies IStep } as const export const STEPS_PER_SLICE = { 'slice': { + name: 'slice', description: 'Calculate the actual static slice from the dataflow graph and the given slicing criteria', processor: staticSlicing, required: 'once-per-slice', printer: { [StepOutputFormat.Internal]: internalPrinter - } + }, + dependencies: [ ] } satisfies IStep, 'reconstruct': { + name: 'reconstruct', description: 'Reconstruct R code from the static slice', processor: reconstructToCode, required: 'once-per-slice', printer: { [StepOutputFormat.Internal]: internalPrinter - } + }, + dependencies: [ ] } satisfies IStep } as const From 8a8ef0b0544a35842ae0862c89d4d5ed4a9897c1 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 22 Nov 2023 22:59:34 +0100 Subject: [PATCH 004/104] test: test error cases of topo-sort --- src/core/steps/pipeline.ts | 7 ------ src/core/steps/pipeline/dependency-checker.ts | 4 ++++ src/core/steps/pipeline/index.ts | 1 - src/core/steps/pipeline/pipeline.ts | 5 ++++ src/core/steps/step.ts | 3 ++- .../create/dependency-check-tests.ts | 24 +++++++++++++++++++ .../functionality/pipelines/pipelines.spec.ts | 6 +++++ 7 files changed, 41 insertions(+), 9 deletions(-) delete mode 100644 src/core/steps/pipeline.ts create mode 100644 test/functionality/pipelines/create/dependency-check-tests.ts create mode 100644 test/functionality/pipelines/pipelines.spec.ts diff --git a/src/core/steps/pipeline.ts b/src/core/steps/pipeline.ts deleted file mode 100644 index 7f8a365dc6..0000000000 --- a/src/core/steps/pipeline.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { IStep } from './step' - -/** - * A pipeline describes a sequence of steps that are to be executed in order. - */ -// TODO: stepping slicer is repsonsible! -export type IPipeline = IStep[] diff --git a/src/core/steps/pipeline/dependency-checker.ts b/src/core/steps/pipeline/dependency-checker.ts index ea9f4537a0..590a41e95c 100644 --- a/src/core/steps/pipeline/dependency-checker.ts +++ b/src/core/steps/pipeline/dependency-checker.ts @@ -11,6 +11,10 @@ import { Pipeline } from './pipeline' * If successful, it returns the topologically sorted list of steps in order of desired execution. */ export function verifyPipeline(steps: IStep[]): Pipeline { + if(steps.length === 0) { + throw new InvalidPipelineError('Pipeline is empty') + } + // we construct a map linking each name to its respective step const stepMap = new Map() // we track all elements without dependencies, i.e. those that start the pipeline diff --git a/src/core/steps/pipeline/index.ts b/src/core/steps/pipeline/index.ts index ee8629ad4f..7a50feab00 100644 --- a/src/core/steps/pipeline/index.ts +++ b/src/core/steps/pipeline/index.ts @@ -1,3 +1,2 @@ export * from './pipeline' -export * from './dependency-checker' export * from './invalid-pipeline-error' diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index 355e165d36..c952dba07d 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -1,6 +1,11 @@ import { IStep, NameOfStep } from '../step' +import { verifyPipeline } from './dependency-checker' export interface Pipeline { readonly steps: ReadonlyMap readonly order: NameOfStep[] } + +export function createPipeline(steps: IStep[]): Pipeline { + return verifyPipeline(steps) +} diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index 760c2d07ce..8a395e92d9 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -27,7 +27,8 @@ export type NameOfStep = string & { __brand?: 'StepName' } * Steps will be executed synchronously, in-sequence, based on their {@link IStep#dependencies|dependencies}. */ export interface IStep< - Fn extends StepFunction = StepFunction, + // eslint-disable-next-line -- by default, we assume nothing about the function shape + Fn extends StepFunction = (...args: any[]) => any, > extends MergeableRecord { /** * Name of the respective step, it does not have to be unique in general but only unique per-pipeline. diff --git a/test/functionality/pipelines/create/dependency-check-tests.ts b/test/functionality/pipelines/create/dependency-check-tests.ts new file mode 100644 index 0000000000..a8f66b666e --- /dev/null +++ b/test/functionality/pipelines/create/dependency-check-tests.ts @@ -0,0 +1,24 @@ +import { createPipeline } from '../../../../src/core/steps/pipeline' +import { IStep } from '../../../../src/core/steps' +import { expect } from 'chai' +import { PARSE_WITH_R_SHELL_STEP } from '../../../../src/core/steps/all/00-parse' + +describe('dependency check', () => { + describe('error-cases', () => { + function negative(name: string, steps: IStep[], message: string | RegExp) { + it(name, () => { + expect(() => createPipeline(steps)).to.throw(message) + }) + } + negative('should throw on empty input', [], /empty/) + negative('should throw on duplicate names', + [PARSE_WITH_R_SHELL_STEP, PARSE_WITH_R_SHELL_STEP], /duplicate|not unique/) + negative('should throw on invalid dependencies', + [PARSE_WITH_R_SHELL_STEP, { ...PARSE_WITH_R_SHELL_STEP, name: 'parse-v2', dependencies: ['foo'] }], /invalid dependency|not exist/) + negative('should throw on cycles', + [PARSE_WITH_R_SHELL_STEP, + { ...PARSE_WITH_R_SHELL_STEP, name: 'parse-v1', dependencies: ['parse-v2'] }, + { ...PARSE_WITH_R_SHELL_STEP, name: 'parse-v2', dependencies: ['parse-v1'] } + ], /cycle/) + }) +}) diff --git a/test/functionality/pipelines/pipelines.spec.ts b/test/functionality/pipelines/pipelines.spec.ts new file mode 100644 index 0000000000..c929e370e2 --- /dev/null +++ b/test/functionality/pipelines/pipelines.spec.ts @@ -0,0 +1,6 @@ +import { requireAllTestsInFolder } from '../_helper/collect-tests' +import path from 'node:path' + +describe('Pipelines', () => { + describe('create', () => requireAllTestsInFolder(path.join(__dirname, 'create'))) +}) From 591396e926e4f7ff639a3ca77fa3c64754515174 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 22 Nov 2023 23:00:50 +0100 Subject: [PATCH 005/104] test: simple dependency ordering in pipeline --- .../pipelines/create/dependency-check-tests.ts | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/test/functionality/pipelines/create/dependency-check-tests.ts b/test/functionality/pipelines/create/dependency-check-tests.ts index a8f66b666e..5e8477ea52 100644 --- a/test/functionality/pipelines/create/dependency-check-tests.ts +++ b/test/functionality/pipelines/create/dependency-check-tests.ts @@ -1,5 +1,5 @@ import { createPipeline } from '../../../../src/core/steps/pipeline' -import { IStep } from '../../../../src/core/steps' +import { IStep, NameOfStep } from '../../../../src/core/steps' import { expect } from 'chai' import { PARSE_WITH_R_SHELL_STEP } from '../../../../src/core/steps/all/00-parse' @@ -21,4 +21,19 @@ describe('dependency check', () => { { ...PARSE_WITH_R_SHELL_STEP, name: 'parse-v2', dependencies: ['parse-v1'] } ], /cycle/) }) + describe('default behavior', () => { + function positive(name: string, steps: IStep[], expected: NameOfStep[]) { + it(name, () => { + const pipeline = createPipeline(steps) + expect([...pipeline.steps.keys()]).to.have.members(expected) + expect(pipeline.order).to.have.ordered.members(expected) + }) + } + + positive('should work on a single step', [PARSE_WITH_R_SHELL_STEP], ['parse']) + positive('should work on a single step with dependencies', [ + PARSE_WITH_R_SHELL_STEP, + { ...PARSE_WITH_R_SHELL_STEP, name: 'parse-v2', dependencies: ['parse'] } + ], ['parse', 'parse-v2']) + }) }) From 4c79024479ece7348825de35c2645facc90f6b44 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 22 Nov 2023 23:02:02 +0100 Subject: [PATCH 006/104] test: improve checking of all permutations with toposort --- .../pipelines/create/dependency-check-tests.ts | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/test/functionality/pipelines/create/dependency-check-tests.ts b/test/functionality/pipelines/create/dependency-check-tests.ts index 5e8477ea52..1ffbf1ca6f 100644 --- a/test/functionality/pipelines/create/dependency-check-tests.ts +++ b/test/functionality/pipelines/create/dependency-check-tests.ts @@ -2,6 +2,7 @@ import { createPipeline } from '../../../../src/core/steps/pipeline' import { IStep, NameOfStep } from '../../../../src/core/steps' import { expect } from 'chai' import { PARSE_WITH_R_SHELL_STEP } from '../../../../src/core/steps/all/00-parse' +import { allPermutations } from '../../../../src/util/arrays' describe('dependency check', () => { describe('error-cases', () => { @@ -22,11 +23,14 @@ describe('dependency check', () => { ], /cycle/) }) describe('default behavior', () => { - function positive(name: string, steps: IStep[], expected: NameOfStep[]) { + function positive(name: string, rawSteps: IStep[], expected: NameOfStep[]) { it(name, () => { - const pipeline = createPipeline(steps) - expect([...pipeline.steps.keys()]).to.have.members(expected) - expect(pipeline.order).to.have.ordered.members(expected) + // try all permutations + for(const steps of allPermutations(rawSteps)) { + const pipeline = createPipeline(steps) + expect([...pipeline.steps.keys()]).to.have.members(expected, `should have the correct keys for ${JSON.stringify(steps)}`) + expect(pipeline.order).to.have.ordered.members(expected, `should have the correct keys for ${JSON.stringify(steps)}`) + } }) } From 0717aadb243f974a15b371aa7700a10a35b2e2af Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 22 Nov 2023 23:03:42 +0100 Subject: [PATCH 007/104] test: add default pipline to the permutation tests --- .../pipelines/create/dependency-check-tests.ts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/functionality/pipelines/create/dependency-check-tests.ts b/test/functionality/pipelines/create/dependency-check-tests.ts index 1ffbf1ca6f..ec9d1d883b 100644 --- a/test/functionality/pipelines/create/dependency-check-tests.ts +++ b/test/functionality/pipelines/create/dependency-check-tests.ts @@ -3,6 +3,10 @@ import { IStep, NameOfStep } from '../../../../src/core/steps' import { expect } from 'chai' import { PARSE_WITH_R_SHELL_STEP } from '../../../../src/core/steps/all/00-parse' import { allPermutations } from '../../../../src/util/arrays' +import { NORMALIZE } from '../../../../src/core/steps/all/10-normalize' +import { LEGACY_STATIC_DATAFLOW } from '../../../../src/core/steps/all/20-dataflow' +import { STATIC_SLICE } from '../../../../src/core/steps/all/30-slice' +import { NAIVE_RECONSTRUCT } from '../../../../src/core/steps/all/40-reconstruct' describe('dependency check', () => { describe('error-cases', () => { @@ -39,5 +43,13 @@ describe('dependency check', () => { PARSE_WITH_R_SHELL_STEP, { ...PARSE_WITH_R_SHELL_STEP, name: 'parse-v2', dependencies: ['parse'] } ], ['parse', 'parse-v2']) + // they will be shuffled in all permutations + positive('default pipeline', [ + PARSE_WITH_R_SHELL_STEP, + NORMALIZE, + LEGACY_STATIC_DATAFLOW, + STATIC_SLICE, + NAIVE_RECONSTRUCT + ], ['parse', 'normalize', 'dataflow', 'slice', 'reconstruct']) }) }) From 970abdf2d6ceaa08fca643765ff622f29a4d0fd0 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 22 Nov 2023 23:04:42 +0100 Subject: [PATCH 008/104] lint-fix: remove unnecessary input --- src/core/steps/steps-provider.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core/steps/steps-provider.ts b/src/core/steps/steps-provider.ts index 96d3c2da89..6f189e481e 100644 --- a/src/core/steps/steps-provider.ts +++ b/src/core/steps/steps-provider.ts @@ -5,7 +5,6 @@ * @module */ import { MergeableRecord } from '../../util/objects' -import { IStep } from './step' export interface StepsConfiguration extends MergeableRecord { From ba242dfa46725b75d50b8efecd1b13c12b493e5e Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 11:01:31 +0100 Subject: [PATCH 009/104] refactor: improve names involved in the pipeline construction --- src/core/steps/pipeline/dependency-checker.ts | 7 ++++--- src/core/steps/pipeline/invalid-pipeline-error.ts | 3 +++ src/core/steps/pipeline/pipeline.ts | 4 ++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/core/steps/pipeline/dependency-checker.ts b/src/core/steps/pipeline/dependency-checker.ts index 590a41e95c..537ef2a48a 100644 --- a/src/core/steps/pipeline/dependency-checker.ts +++ b/src/core/steps/pipeline/dependency-checker.ts @@ -9,8 +9,9 @@ import { Pipeline } from './pipeline' * 2) all dependencies of steps are valid (i.e., refer to existing steps) * 3) there are no cycles in the dependency graph * If successful, it returns the topologically sorted list of steps in order of desired execution. + * @throws InvalidPipelineError if any of the above conditions are not met */ -export function verifyPipeline(steps: IStep[]): Pipeline { +export function verifyAndBuildPipeline(steps: IStep[]): Pipeline { if(steps.length === 0) { throw new InvalidPipelineError('Pipeline is empty') } @@ -24,7 +25,7 @@ export function verifyPipeline(steps: IStep[]): Pipeline { if(inits.length === 0) { throw new InvalidPipelineError('Pipeline has no initial steps (i.e., it contains no step without dependencies)') } - const sorted = topoSort(inits, stepMap) + const sorted = topologicalSort(inits, stepMap) if(sorted.length !== stepMap.size) { // check if any of the dependencies in the map are invalid @@ -39,7 +40,7 @@ export function verifyPipeline(steps: IStep[]): Pipeline { } } -function topoSort(inits: NameOfStep[], stepMap: Map) { +function topologicalSort(inits: NameOfStep[], stepMap: Map) { // now, we topo-sort the steps const sorted: NameOfStep[] = [] const visited = new Set() diff --git a/src/core/steps/pipeline/invalid-pipeline-error.ts b/src/core/steps/pipeline/invalid-pipeline-error.ts index a570178774..a673273528 100644 --- a/src/core/steps/pipeline/invalid-pipeline-error.ts +++ b/src/core/steps/pipeline/invalid-pipeline-error.ts @@ -1,3 +1,6 @@ +/** + * Thrown if for whatever reason, the pipeline is invalid. + */ export class InvalidPipelineError extends Error { constructor(message: string) { super(message) diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index c952dba07d..d3c17da027 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -1,5 +1,5 @@ import { IStep, NameOfStep } from '../step' -import { verifyPipeline } from './dependency-checker' +import { verifyAndBuildPipeline } from './dependency-checker' export interface Pipeline { readonly steps: ReadonlyMap @@ -7,5 +7,5 @@ export interface Pipeline { } export function createPipeline(steps: IStep[]): Pipeline { - return verifyPipeline(steps) + return verifyAndBuildPipeline(steps) } From 5837668eb94b7beb044a716f7afa8835e4794d56 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 11:04:18 +0100 Subject: [PATCH 010/104] doc: document the pipeline interface and friends --- src/core/steps/pipeline/pipeline.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index d3c17da027..e764f94a9b 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -1,11 +1,19 @@ import { IStep, NameOfStep } from '../step' import { verifyAndBuildPipeline } from './dependency-checker' +/** + * A pipeline is a collection of {@link Pipeline#steps|steps} that are executed in a certain {@link Pipeline#order|order}. + * It is to be created {@link createPipeline}. + */ export interface Pipeline { readonly steps: ReadonlyMap readonly order: NameOfStep[] } +/** + * Creates a pipeline from the given steps. + * Refer to {@link verifyAndBuildPipeline} for details and constraints on the steps. + */ export function createPipeline(steps: IStep[]): Pipeline { return verifyAndBuildPipeline(steps) } From 530671834e90d7948b1952d8f8ddfa708bc1cc4b Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 11:33:53 +0100 Subject: [PATCH 011/104] Allow scope declarations in structured commit messages (#512) * git, doc: document new optional scope and much more readable regex in commit-msg * feat(commit-msg): print out message even if it is invalid * feat(commit-msg): print commit message even if it is incorrect * doc: refer to the contributin md instead of just printing the regex if the git commit message is wrong * refactor(commit-msg): slightly reformat output * refactor(commit-msg): remove the `log` type * doc(contributing): document new scopes for commit messages --- .githooks/commit-msg | 14 ++++++++++++-- .github/CONTRIBUTING.md | 10 ++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/.githooks/commit-msg b/.githooks/commit-msg index 5ec8c63d1f..ddf8b80e68 100755 --- a/.githooks/commit-msg +++ b/.githooks/commit-msg @@ -4,9 +4,19 @@ message_file="$1" message=$(cat "$message_file") # Define the regular expression pattern -regex="^((\[(no|skip) ci\] )?(((feat|tests?|lint|refactor|ci|git|special|doc|typo|log|ts|fix|wip|docker|dep)(-fix|-fail)?(, ?)?)+: |Merge (remote-tracking )?branch|Auto-merging).+|\[release:(patch|minor|major)\] .+)" +regex="^((\[(no|skip) ci\] )?" # allow to skip ci steps if required +regex="$regex(Merge (remote-tracking )?branch|Auto-merging|" # allow merge commit messages + regex="$regex(" # allow multiple types + regex="$regex(feat|tests?|lint|refactor|ci|git|special|doc|typo|ts|fix|wip|docker|dep)" # all valid types + regex="$regex(-fix|-fail)?" # optional fail suffix + regex="$regex(\([^)]+\))?" # optional scope + regex="$regex(, ?)?" # optional comma between types + regex="$regex)+: " # at least one type is required +regex="$regex)" # allow arbitrary message (no end marker) +regex="$regex|\[release:(patch|minor|major)\] .+)" # alternatively, allow release commits (only to be done on main) if ! echo "$message" | grep -qE "$regex"; then - echo "[POLICY] Your message is not formatted correctly. Respect the regex: '$regex'!" + echo "[POLICY] Your message is not formatted correctly. Please respect the style defined in '.github/CONTRIBUTING.md'." + printf "[POLICY] Your message was (ignoring git comments):\n\n%s\n" "$(echo "$message" | grep -vE "^#")" exit 1 fi diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 3bf15ef39e..b7410fe2a5 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -33,8 +33,8 @@ If you have any questions, refer to the [wiki](https://github.com/Code-Inspect/f ## Commit Messages -We structure our commit messages (enforced by our git-hooks) using the format `: `. -Currently, the following types are at your disposal (more may have been or are still available, but please restrict yourself to the following): +We structure our commit messages (enforced by our git-hooks) using the format `(): ` (with `()` being optional). +Currently, the following `` are at your disposal (more may have been or are still available, but please restrict yourself to the following): | name | description | @@ -49,7 +49,6 @@ Currently, the following types are at your disposal (more may have been or are s | `lint` | Adapted or updated linter-issues. | | `doc` | Updated the documentation of *flowR*. | | `typo` | Dealt with a small typo/a grammatical mistake. | -| `log` | Improved or updated the logging of *flowR*. | | `ts` | Performed something typescript-specific (e.g., reconfigured the `tsconfig.json`). | | `wip` | *Use this only in combination with another type*. It marks the commit to be unfinished. | | `special` | *Use this only if none of the other categories apply*. Explain the details in your commit message. | @@ -65,9 +64,12 @@ Although you can give the same type repeatedly - if you think you should, please With this, the artificial message -> `feat, test-fix: Support for branching in dataflow, fixed branching-test` +> `feat, test-fix: Support for branching in dataflow, fixed branching test` represents the addition of a new feature and the fix of a corresponding test. +With scopes, it could look like this: + +> `feat, test-fix(dataflow): Support branching & fixed branching test` To skip the `ci`, you can prefix the commit message with `[skip ci]`. From fc9d0ca516ebf9b274076a561d4fa7059f17539c Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 12:04:23 +0100 Subject: [PATCH 012/104] feat, wip(pipeline): working on `decoration` for dependencies in step order --- src/core/steps/step.ts | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index 8a395e92d9..174e41dfbf 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -21,6 +21,25 @@ export type StepRequired = 'once-per-file' | 'once-per-slice' export type NameOfStep = string & { __brand?: 'StepName' } +/** + * Contains the data to specify the order of {@link IStep|steps} in a pipeline. + */ +export interface IStepOrder { + /** + * Give the names of other steps this one requires to be completed as a prerequisite (e.g., to gain access to their input). + * Does not have to be transitive, this will be checked by the scheduler of the pipeline. + */ + dependencies: NameOfStep[] + /** + * This is similar to {@link dependencies}, but is used to say that a given step _decorates_ another one. + * This imbues two requirements: + * The step must take the output of the decorated step as input, and produce the same output as the decorated step. + * + * If so, it is ensured that _this_ step is executed _after_ the step it decorates, but before any step that depends on it. + */ + decorates: NameOfStep +} + /** * Defines what is to be known of a single step in the slicing process. * It wraps around a single {@link IStep#processor|processor} function, providing additional information. @@ -29,7 +48,7 @@ export type NameOfStep = string & { __brand?: 'StepName' } export interface IStep< // eslint-disable-next-line -- by default, we assume nothing about the function shape Fn extends StepFunction = (...args: any[]) => any, -> extends MergeableRecord { +> extends MergeableRecord, IStepOrder { /** * Name of the respective step, it does not have to be unique in general but only unique per-pipeline. * In other words, you can have multiple steps with a name like `parse` as long as you use only one of them in a given pipeline. @@ -51,11 +70,6 @@ export interface IStep< // we always want to have the internal printer [StepOutputFormat.Internal]: InternalStepPrinter } - /** - * Give the names of other steps this one requires to be completed as a prerequisite (e.g., to gain access to their input). - * Does not have to be transitive, this will be checked by the scheduler of the pipeline. - */ - dependencies: NameOfStep[] } From 4134d8fc7bd936db4d3a2f15615e8373a960af45 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 12:04:57 +0100 Subject: [PATCH 013/104] refactor(pipeline): optional decorates step, and name as part of the step order --- src/core/steps/step.ts | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index 174e41dfbf..c30068ec47 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -25,6 +25,12 @@ export type NameOfStep = string & { __brand?: 'StepName' } * Contains the data to specify the order of {@link IStep|steps} in a pipeline. */ export interface IStepOrder { + /** + * Name of the respective step, it does not have to be unique in general but only unique per-pipeline. + * In other words, you can have multiple steps with a name like `parse` as long as you use only one of them in a given pipeline. + * This is, because these names are required in the {@link IStep#dependencies} field to refer to other steps this one relies on. + */ + name: NameOfStep /** * Give the names of other steps this one requires to be completed as a prerequisite (e.g., to gain access to their input). * Does not have to be transitive, this will be checked by the scheduler of the pipeline. @@ -37,7 +43,7 @@ export interface IStepOrder { * * If so, it is ensured that _this_ step is executed _after_ the step it decorates, but before any step that depends on it. */ - decorates: NameOfStep + decorates?: NameOfStep } /** @@ -49,12 +55,6 @@ export interface IStep< // eslint-disable-next-line -- by default, we assume nothing about the function shape Fn extends StepFunction = (...args: any[]) => any, > extends MergeableRecord, IStepOrder { - /** - * Name of the respective step, it does not have to be unique in general but only unique per-pipeline. - * In other words, you can have multiple steps with a name like `parse` as long as you use only one of them in a given pipeline. - * This is, because these names are required in the {@link IStep#dependencies} field to refer to other steps this one relies on. - */ - name: NameOfStep /** Human-readable description of this step */ description: string /** The main processor that essentially performs the logic of this step */ From e22fca137f4533a213e15cb00d9f1010cc914b8e Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 12:14:22 +0100 Subject: [PATCH 014/104] refactor(pipeline-dep): add rule invalidation numbers to the dependency checker --- src/core/steps/pipeline/dependency-checker.ts | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/core/steps/pipeline/dependency-checker.ts b/src/core/steps/pipeline/dependency-checker.ts index 537ef2a48a..a130a9cc1a 100644 --- a/src/core/steps/pipeline/dependency-checker.ts +++ b/src/core/steps/pipeline/dependency-checker.ts @@ -5,25 +5,28 @@ import { Pipeline } from './pipeline' /** * Given a set of {@link IStep|steps} with their dependencies, this function verifies that + * 0) the pipeline is not empty * 1) all names of steps are unique for the given pipeline - * 2) all dependencies of steps are valid (i.e., refer to existing steps) + * 2) all {@link IStepOrder#dependencies|dependencies} of steps are valid (i.e., refer to existing steps) * 3) there are no cycles in the dependency graph + * 4) the target of a {@link IStepOrder#decorates|decoration} exists + * 5) the target of a {@link IStepOrder#decorates|decoration} is not part of the {@link IStepOrder#dependencies|dependencies} * If successful, it returns the topologically sorted list of steps in order of desired execution. * @throws InvalidPipelineError if any of the above conditions are not met */ export function verifyAndBuildPipeline(steps: IStep[]): Pipeline { if(steps.length === 0) { - throw new InvalidPipelineError('Pipeline is empty') + throw new InvalidPipelineError('0) Pipeline is empty') } // we construct a map linking each name to its respective step const stepMap = new Map() - // we track all elements without dependencies, i.e. those that start the pipeline + // we track all elements without dependencies, i.e., those that start the pipeline const inits: NameOfStep[] = [] initializeSteps(steps, stepMap, inits) if(inits.length === 0) { - throw new InvalidPipelineError('Pipeline has no initial steps (i.e., it contains no step without dependencies)') + throw new InvalidPipelineError('3) Pipeline has no initial steps (i.e., it contains no step without dependencies)') } const sorted = topologicalSort(inits, stepMap) @@ -31,7 +34,7 @@ export function verifyAndBuildPipeline(steps: IStep[]): Pipeline { // check if any of the dependencies in the map are invalid checkForInvalidDependency(steps, stepMap) // otherwise, we assume a cycle - throw new InvalidPipelineError('Pipeline contains at least one cycle') + throw new InvalidPipelineError('3) Pipeline contains at least one cycle') } return { @@ -62,9 +65,12 @@ function checkForInvalidDependency(steps: IStep[], stepMap: Map, inits: const name = step.name // if the name is already in the map we have a duplicate if(stepMap.has(name)) { - throw new InvalidPipelineError(`Step name "${name}" is not unique in the pipeline`) + throw new InvalidPipelineError(`1) Step name "${name}" is not unique in the pipeline`) } stepMap.set(name, step) if(step.dependencies.length === 0) { From 6689b400890b7ae31ec84d8771027587f63affb9 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 12:24:58 +0100 Subject: [PATCH 015/104] feat(pipeline): topo-sort now has borderline support for decoration step --- src/core/steps/pipeline/dependency-checker.ts | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/src/core/steps/pipeline/dependency-checker.ts b/src/core/steps/pipeline/dependency-checker.ts index a130a9cc1a..94b7a34708 100644 --- a/src/core/steps/pipeline/dependency-checker.ts +++ b/src/core/steps/pipeline/dependency-checker.ts @@ -10,7 +10,7 @@ import { Pipeline } from './pipeline' * 2) all {@link IStepOrder#dependencies|dependencies} of steps are valid (i.e., refer to existing steps) * 3) there are no cycles in the dependency graph * 4) the target of a {@link IStepOrder#decorates|decoration} exists - * 5) the target of a {@link IStepOrder#decorates|decoration} is not part of the {@link IStepOrder#dependencies|dependencies} + * 5) if a decoration applies, all of its dependencies are already in the pipeline * If successful, it returns the topologically sorted list of steps in order of desired execution. * @throws InvalidPipelineError if any of the above conditions are not met */ @@ -46,15 +46,28 @@ export function verifyAndBuildPipeline(steps: IStep[]): Pipeline { function topologicalSort(inits: NameOfStep[], stepMap: Map) { // now, we topo-sort the steps const sorted: NameOfStep[] = [] - const visited = new Set() + const unvisited = new Set(stepMap.keys()) while(inits.length > 0) { const init = inits.pop() as NameOfStep sorted.push(init) - visited.add(init) - // TODO: improve this check, maybe really remove? - for(const [key, step] of stepMap.entries()) { - if(!visited.has(key) && step.dependencies.filter(dep => !visited.has(dep)).length === 0) { - inits.push(key) + unvisited.delete(init) + for(const elem of unvisited) { + const step = stepMap.get(elem) as IStep + // we should do that better, for now we do not assume that many dependencies + const hasUnsatisfiedDependencies = step.dependencies.some(dep => unvisited.has(dep)) + + const last = sorted[sorted.length - 1] + // if the step decorates the last step in the sorted list, we can add it to the list, but only if all its dependencies are already in the list + if(step.decorates === last) { + // if dependencies are still missing, we cannot add it to the list and fail TODO: if not all of its dependencies which remain decorate the last step + // TODO: we currently do not allow decorations to be dependent on each other for the same step + if(hasUnsatisfiedDependencies) { + throw new InvalidPipelineError(`5) Step "${step.name}" decorates step "${step.decorates}" but not all of its dependencies are satisfied`) + } + sorted.push(elem) + unvisited.delete(elem) + } else if(hasUnsatisfiedDependencies) { + inits.push(elem) } } } From 9ca01026a0b22ac80ac88f9c3d5b6266fd54c4ea Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 12:25:50 +0100 Subject: [PATCH 016/104] refactor(pipeline): hoist loop invariant `last` --- src/core/steps/pipeline/dependency-checker.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/steps/pipeline/dependency-checker.ts b/src/core/steps/pipeline/dependency-checker.ts index 94b7a34708..14277bdc18 100644 --- a/src/core/steps/pipeline/dependency-checker.ts +++ b/src/core/steps/pipeline/dependency-checker.ts @@ -51,12 +51,13 @@ function topologicalSort(inits: NameOfStep[], stepMap: Map) { const init = inits.pop() as NameOfStep sorted.push(init) unvisited.delete(init) + const last = sorted[sorted.length - 1] + for(const elem of unvisited) { const step = stepMap.get(elem) as IStep // we should do that better, for now we do not assume that many dependencies const hasUnsatisfiedDependencies = step.dependencies.some(dep => unvisited.has(dep)) - const last = sorted[sorted.length - 1] // if the step decorates the last step in the sorted list, we can add it to the list, but only if all its dependencies are already in the list if(step.decorates === last) { // if dependencies are still missing, we cannot add it to the list and fail TODO: if not all of its dependencies which remain decorate the last step From 1e89a9920c2d4239e0225a0bbcf2fbdb18d10abb Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 12:53:54 +0100 Subject: [PATCH 017/104] feat(pipeline): toposort support for decoratorse elements --- src/core/steps/pipeline/dependency-checker.ts | 70 +++++++++++++++---- 1 file changed, 57 insertions(+), 13 deletions(-) diff --git a/src/core/steps/pipeline/dependency-checker.ts b/src/core/steps/pipeline/dependency-checker.ts index 14277bdc18..485ebc5adc 100644 --- a/src/core/steps/pipeline/dependency-checker.ts +++ b/src/core/steps/pipeline/dependency-checker.ts @@ -43,36 +43,80 @@ export function verifyAndBuildPipeline(steps: IStep[]): Pipeline { } } +function initializeUnvisited(stepMap: Map, inits: NameOfStep[]) { + const unvisited = new Set(stepMap.keys()) + for(const init of inits) { + unvisited.delete(init) + } + return unvisited +} + + function topologicalSort(inits: NameOfStep[], stepMap: Map) { // now, we topo-sort the steps const sorted: NameOfStep[] = [] - const unvisited = new Set(stepMap.keys()) + // we subsequently remove every step that we visit to improve the iteration over all remaining elements to test + const unvisited = initializeUnvisited(stepMap, inits) + while(inits.length > 0) { const init = inits.pop() as NameOfStep sorted.push(init) - unvisited.delete(init) const last = sorted[sorted.length - 1] + // we need to sort decorators as well, but only if they have unsatisfied dependencies that are decorating the same step + const decoratorsOfLastInits = [] + // these decorators still have dependencies open; we have to check if they can be satisfied by the other steps to add + const decoratorsOfLastOthers = new Set() + // conventional topo-sort elements that now no longer have unsatisfied dependencies + const otherInits = [] + for(const elem of unvisited) { const step = stepMap.get(elem) as IStep - // we should do that better, for now we do not assume that many dependencies - const hasUnsatisfiedDependencies = step.dependencies.some(dep => unvisited.has(dep)) - - // if the step decorates the last step in the sorted list, we can add it to the list, but only if all its dependencies are already in the list + const hasUnvisitedDeps = step.dependencies.some(d => unvisited.has(d)) if(step.decorates === last) { - // if dependencies are still missing, we cannot add it to the list and fail TODO: if not all of its dependencies which remain decorate the last step - // TODO: we currently do not allow decorations to be dependent on each other for the same step - if(hasUnsatisfiedDependencies) { - throw new InvalidPipelineError(`5) Step "${step.name}" decorates step "${step.decorates}" but not all of its dependencies are satisfied`) + unvisited.delete(elem) + if(hasUnvisitedDeps) { + decoratorsOfLastOthers.add(elem) + } else { + unvisited.delete(elem) + decoratorsOfLastInits.push(elem) } - sorted.push(elem) + } else if(hasUnvisitedDeps) { + otherInits.push(elem) + } + } + // we can add all decorators with satisfied dependencies + inits.push(...decoratorsOfLastInits) + + // for the other decorators we have to cycle until we find a solution, or know, that no solution exists + topologicallyInsertDecoratorElements(decoratorsOfLastOthers, stepMap, unvisited, inits) + + for(const elem of otherInits) { + unvisited.delete(elem) + inits.push(elem) + } + } + return sorted +} + +function topologicallyInsertDecoratorElements(decoratorsOfLastOthers: Set, stepMap: Map, unvisited: Set, inits: NameOfStep[]) { + let changed = true + while(changed) { + changed = false + for(const elem of [...decoratorsOfLastOthers]) { + const step = stepMap.get(elem) as IStep + const hasUnvisitedDeps = step.dependencies.some(d => unvisited.has(d)) + if(!hasUnvisitedDeps) { unvisited.delete(elem) - } else if(hasUnsatisfiedDependencies) { + decoratorsOfLastOthers.delete(elem) inits.push(elem) + changed = true } } } - return sorted + if(decoratorsOfLastOthers.size > 0) { + throw new InvalidPipelineError(`5) Pipeline contains at least one decoration cycle: ${JSON.stringify(decoratorsOfLastOthers)}`) + } } function checkForInvalidDependency(steps: IStep[], stepMap: Map) { From 3f1afc6b84e550e6be0c962372261abb687cb29c Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 12:59:54 +0100 Subject: [PATCH 018/104] refactor, fix(pipeline): improve init handling of decorator steps and cleanup decoration toposort --- src/core/steps/pipeline/dependency-checker.ts | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/core/steps/pipeline/dependency-checker.ts b/src/core/steps/pipeline/dependency-checker.ts index 485ebc5adc..9869ab6bc9 100644 --- a/src/core/steps/pipeline/dependency-checker.ts +++ b/src/core/steps/pipeline/dependency-checker.ts @@ -43,6 +43,9 @@ export function verifyAndBuildPipeline(steps: IStep[]): Pipeline { } } +/** + * Keep track of all steps that are not already part of the `inits` list. + */ function initializeUnvisited(stepMap: Map, inits: NameOfStep[]) { const unvisited = new Set(stepMap.keys()) for(const init of inits) { @@ -53,18 +56,15 @@ function initializeUnvisited(stepMap: Map, inits: NameOfStep[ function topologicalSort(inits: NameOfStep[], stepMap: Map) { - // now, we topo-sort the steps const sorted: NameOfStep[] = [] + // we subsequently remove every step that we visit to improve the iteration over all remaining elements to test const unvisited = initializeUnvisited(stepMap, inits) while(inits.length > 0) { const init = inits.pop() as NameOfStep sorted.push(init) - const last = sorted[sorted.length - 1] - // we need to sort decorators as well, but only if they have unsatisfied dependencies that are decorating the same step - const decoratorsOfLastInits = [] // these decorators still have dependencies open; we have to check if they can be satisfied by the other steps to add const decoratorsOfLastOthers = new Set() // conventional topo-sort elements that now no longer have unsatisfied dependencies @@ -73,20 +73,18 @@ function topologicalSort(inits: NameOfStep[], stepMap: Map) { for(const elem of unvisited) { const step = stepMap.get(elem) as IStep const hasUnvisitedDeps = step.dependencies.some(d => unvisited.has(d)) - if(step.decorates === last) { + if(step.decorates === init) { unvisited.delete(elem) if(hasUnvisitedDeps) { decoratorsOfLastOthers.add(elem) } else { unvisited.delete(elem) - decoratorsOfLastInits.push(elem) + inits.push(elem) } - } else if(hasUnvisitedDeps) { + } else if(!hasUnvisitedDeps) { otherInits.push(elem) } } - // we can add all decorators with satisfied dependencies - inits.push(...decoratorsOfLastInits) // for the other decorators we have to cycle until we find a solution, or know, that no solution exists topologicallyInsertDecoratorElements(decoratorsOfLastOthers, stepMap, unvisited, inits) From 57b0e744d415d90b76ee7b38f97a5bb1937a42a9 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 13:07:59 +0100 Subject: [PATCH 019/104] test-fix(pipeline): check invalid pipeline before starting to run the toposort --- src/core/steps/pipeline/dependency-checker.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/steps/pipeline/dependency-checker.ts b/src/core/steps/pipeline/dependency-checker.ts index 9869ab6bc9..b809668441 100644 --- a/src/core/steps/pipeline/dependency-checker.ts +++ b/src/core/steps/pipeline/dependency-checker.ts @@ -28,11 +28,11 @@ export function verifyAndBuildPipeline(steps: IStep[]): Pipeline { if(inits.length === 0) { throw new InvalidPipelineError('3) Pipeline has no initial steps (i.e., it contains no step without dependencies)') } - const sorted = topologicalSort(inits, stepMap) + // check if any of the dependencies in the map are invalid + checkForInvalidDependency(steps, stepMap) + const sorted = topologicalSort(inits, stepMap) if(sorted.length !== stepMap.size) { - // check if any of the dependencies in the map are invalid - checkForInvalidDependency(steps, stepMap) // otherwise, we assume a cycle throw new InvalidPipelineError('3) Pipeline contains at least one cycle') } @@ -67,7 +67,7 @@ function topologicalSort(inits: NameOfStep[], stepMap: Map) { // these decorators still have dependencies open; we have to check if they can be satisfied by the other steps to add const decoratorsOfLastOthers = new Set() - // conventional topo-sort elements that now no longer have unsatisfied dependencies + // conventional topological-sort elements that now no longer have unsatisfied dependencies const otherInits = [] for(const elem of unvisited) { From 4afcaee1c1a239776dd448d0555697134a09fb6d Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 13:20:57 +0100 Subject: [PATCH 020/104] refactor(pipeline): clean up handling of visited elements --- src/core/steps/pipeline/dependency-checker.ts | 56 ++++++++----------- 1 file changed, 22 insertions(+), 34 deletions(-) diff --git a/src/core/steps/pipeline/dependency-checker.ts b/src/core/steps/pipeline/dependency-checker.ts index b809668441..cc6724ce59 100644 --- a/src/core/steps/pipeline/dependency-checker.ts +++ b/src/core/steps/pipeline/dependency-checker.ts @@ -28,11 +28,11 @@ export function verifyAndBuildPipeline(steps: IStep[]): Pipeline { if(inits.length === 0) { throw new InvalidPipelineError('3) Pipeline has no initial steps (i.e., it contains no step without dependencies)') } - // check if any of the dependencies in the map are invalid - checkForInvalidDependency(steps, stepMap) const sorted = topologicalSort(inits, stepMap) if(sorted.length !== stepMap.size) { + // check if any of the dependencies in the map are invalid + checkForInvalidDependency(steps, stepMap) // otherwise, we assume a cycle throw new InvalidPipelineError('3) Pipeline contains at least one cycle') } @@ -43,69 +43,57 @@ export function verifyAndBuildPipeline(steps: IStep[]): Pipeline { } } -/** - * Keep track of all steps that are not already part of the `inits` list. - */ -function initializeUnvisited(stepMap: Map, inits: NameOfStep[]) { - const unvisited = new Set(stepMap.keys()) - for(const init of inits) { - unvisited.delete(init) - } - return unvisited -} - function topologicalSort(inits: NameOfStep[], stepMap: Map) { const sorted: NameOfStep[] = [] - - // we subsequently remove every step that we visit to improve the iteration over all remaining elements to test - const unvisited = initializeUnvisited(stepMap, inits) + const visited = new Set() while(inits.length > 0) { const init = inits.pop() as NameOfStep sorted.push(init) + visited.add(init) // these decorators still have dependencies open; we have to check if they can be satisfied by the other steps to add const decoratorsOfLastOthers = new Set() // conventional topological-sort elements that now no longer have unsatisfied dependencies const otherInits = [] - for(const elem of unvisited) { - const step = stepMap.get(elem) as IStep - const hasUnvisitedDeps = step.dependencies.some(d => unvisited.has(d)) + for(const [elem, step] of stepMap.entries()) { + if(visited.has(elem)) { + continue + } + const allDepsSatisfied = step.dependencies.every(d => visited.has(d)) if(step.decorates === init) { - unvisited.delete(elem) - if(hasUnvisitedDeps) { - decoratorsOfLastOthers.add(elem) - } else { - unvisited.delete(elem) + if(allDepsSatisfied) { inits.push(elem) + } else { + decoratorsOfLastOthers.add(elem) } - } else if(!hasUnvisitedDeps) { + } else if(allDepsSatisfied) { otherInits.push(elem) } } // for the other decorators we have to cycle until we find a solution, or know, that no solution exists - topologicallyInsertDecoratorElements(decoratorsOfLastOthers, stepMap, unvisited, inits) + topologicallyInsertDecoratorElements(decoratorsOfLastOthers, stepMap, visited, inits) - for(const elem of otherInits) { - unvisited.delete(elem) - inits.push(elem) - } + inits.push(...otherInits) } return sorted } -function topologicallyInsertDecoratorElements(decoratorsOfLastOthers: Set, stepMap: Map, unvisited: Set, inits: NameOfStep[]) { +function topologicallyInsertDecoratorElements(decoratorsOfLastOthers: Set, stepMap: Map, visited: Set, inits: NameOfStep[]) { + if(decoratorsOfLastOthers.size === 0) { + return + } + let changed = true while(changed) { changed = false for(const elem of [...decoratorsOfLastOthers]) { const step = stepMap.get(elem) as IStep - const hasUnvisitedDeps = step.dependencies.some(d => unvisited.has(d)) - if(!hasUnvisitedDeps) { - unvisited.delete(elem) + const allDepsSatisfied = step.dependencies.every(d => visited.has(d)) + if(allDepsSatisfied) { decoratorsOfLastOthers.delete(elem) inits.push(elem) changed = true From ce8b79a2649124ec25f825b68bab5a83c940aca9 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 13:32:01 +0100 Subject: [PATCH 021/104] refactor, test-fix(pipeline): decorating steps can never be inits --- src/core/steps/pipeline/dependency-checker.ts | 6 +- .../create/dependency-check-tests.ts | 57 ++++++++++++++----- 2 files changed, 48 insertions(+), 15 deletions(-) diff --git a/src/core/steps/pipeline/dependency-checker.ts b/src/core/steps/pipeline/dependency-checker.ts index cc6724ce59..12ff2f5345 100644 --- a/src/core/steps/pipeline/dependency-checker.ts +++ b/src/core/steps/pipeline/dependency-checker.ts @@ -1,6 +1,7 @@ import { IStep, NameOfStep } from '../step' import { InvalidPipelineError } from './invalid-pipeline-error' import { Pipeline } from './pipeline' +import { jsonReplacer } from '../../../util/json' /** @@ -101,7 +102,7 @@ function topologicallyInsertDecoratorElements(decoratorsOfLastOthers: Set 0) { - throw new InvalidPipelineError(`5) Pipeline contains at least one decoration cycle: ${JSON.stringify(decoratorsOfLastOthers)}`) + throw new InvalidPipelineError(`5) Pipeline contains at least one decoration cycle: ${JSON.stringify(decoratorsOfLastOthers, jsonReplacer)}`) } } @@ -126,7 +127,8 @@ function initializeSteps(steps: IStep[], stepMap: Map, inits: throw new InvalidPipelineError(`1) Step name "${name}" is not unique in the pipeline`) } stepMap.set(name, step) - if(step.dependencies.length === 0) { + // only steps that have no dependencies and do not decorate others can be initial steps + if(step.dependencies.length === 0 && step.decorates === undefined) { inits.push(name) } } diff --git a/test/functionality/pipelines/create/dependency-check-tests.ts b/test/functionality/pipelines/create/dependency-check-tests.ts index ec9d1d883b..30ad821086 100644 --- a/test/functionality/pipelines/create/dependency-check-tests.ts +++ b/test/functionality/pipelines/create/dependency-check-tests.ts @@ -38,18 +38,49 @@ describe('dependency check', () => { }) } - positive('should work on a single step', [PARSE_WITH_R_SHELL_STEP], ['parse']) - positive('should work on a single step with dependencies', [ - PARSE_WITH_R_SHELL_STEP, - { ...PARSE_WITH_R_SHELL_STEP, name: 'parse-v2', dependencies: ['parse'] } - ], ['parse', 'parse-v2']) - // they will be shuffled in all permutations - positive('default pipeline', [ - PARSE_WITH_R_SHELL_STEP, - NORMALIZE, - LEGACY_STATIC_DATAFLOW, - STATIC_SLICE, - NAIVE_RECONSTRUCT - ], ['parse', 'normalize', 'dataflow', 'slice', 'reconstruct']) + describe('without decorators', () => { + positive('should work on a single step', [PARSE_WITH_R_SHELL_STEP], ['parse']) + positive('should work on a single step with dependencies', [ + PARSE_WITH_R_SHELL_STEP, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + dependencies: ['parse'] + } + ], ['parse', 'parse-v2']) + // they will be shuffled in all permutations + positive('default pipeline', [ + PARSE_WITH_R_SHELL_STEP, + NORMALIZE, + LEGACY_STATIC_DATAFLOW, + STATIC_SLICE, + NAIVE_RECONSTRUCT + ], ['parse', 'normalize', 'dataflow', 'slice', 'reconstruct']) + }) + describe('with decorators', () => { + positive('simple decorator on first step', [ + PARSE_WITH_R_SHELL_STEP, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + dependencies: [], + decorates: 'parse', + } + ], ['parse', 'parse-v2']) + positive('decorators can depend on each other', [ + PARSE_WITH_R_SHELL_STEP, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + decorates: 'parse', + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v3', + dependencies: ['parse-v2'], + decorates: 'parse', + } + ], ['parse', 'parse-v2', 'parse-v3']) + }) }) }) From c07efbec396f076febb6ddf4156b0135c14502bd Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 13:47:39 +0100 Subject: [PATCH 022/104] refactor, test-fix(pipeline): support multiple decorators and further improve handling --- src/core/steps/pipeline/dependency-checker.ts | 30 +++++++++---------- .../create/dependency-check-tests.ts | 28 +++++++++++++++++ 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/src/core/steps/pipeline/dependency-checker.ts b/src/core/steps/pipeline/dependency-checker.ts index 12ff2f5345..95698e8b83 100644 --- a/src/core/steps/pipeline/dependency-checker.ts +++ b/src/core/steps/pipeline/dependency-checker.ts @@ -35,7 +35,7 @@ export function verifyAndBuildPipeline(steps: IStep[]): Pipeline { // check if any of the dependencies in the map are invalid checkForInvalidDependency(steps, stepMap) // otherwise, we assume a cycle - throw new InvalidPipelineError('3) Pipeline contains at least one cycle') + throw new InvalidPipelineError(`3) Pipeline contains at least one cycle; sorted: ${JSON.stringify(sorted)}, steps: ${JSON.stringify([...stepMap.keys()])}`) } return { @@ -44,6 +44,9 @@ export function verifyAndBuildPipeline(steps: IStep[]): Pipeline { } } +function allDependenciesAreVisited(step: IStep<(...args: any[]) => any>, visited: Set) { + return step.dependencies.every(d => visited.has(d)) +} function topologicalSort(inits: NameOfStep[], stepMap: Map) { const sorted: NameOfStep[] = [] @@ -56,34 +59,29 @@ function topologicalSort(inits: NameOfStep[], stepMap: Map) { // these decorators still have dependencies open; we have to check if they can be satisfied by the other steps to add const decoratorsOfLastOthers = new Set() - // conventional topological-sort elements that now no longer have unsatisfied dependencies - const otherInits = [] - for(const [elem, step] of stepMap.entries()) { if(visited.has(elem)) { continue } - const allDepsSatisfied = step.dependencies.every(d => visited.has(d)) if(step.decorates === init) { - if(allDepsSatisfied) { - inits.push(elem) + if(allDependenciesAreVisited(step, visited)) { + sorted.push(elem) + visited.add(elem) } else { decoratorsOfLastOthers.add(elem) } - } else if(allDepsSatisfied) { - otherInits.push(elem) + } else if(step.decorates === undefined && allDependenciesAreVisited(step, visited)) { + inits.push(elem) } } // for the other decorators we have to cycle until we find a solution, or know, that no solution exists - topologicallyInsertDecoratorElements(decoratorsOfLastOthers, stepMap, visited, inits) - - inits.push(...otherInits) + topologicallyInsertDecoratorElements(decoratorsOfLastOthers, stepMap, visited, sorted) } return sorted } -function topologicallyInsertDecoratorElements(decoratorsOfLastOthers: Set, stepMap: Map, visited: Set, inits: NameOfStep[]) { +function topologicallyInsertDecoratorElements(decoratorsOfLastOthers: Set, stepMap: Map, visited: Set, sorted: NameOfStep[]) { if(decoratorsOfLastOthers.size === 0) { return } @@ -93,10 +91,10 @@ function topologicallyInsertDecoratorElements(decoratorsOfLastOthers: Set visited.has(d)) - if(allDepsSatisfied) { + if(allDependenciesAreVisited(step, visited)) { decoratorsOfLastOthers.delete(elem) - inits.push(elem) + sorted.push(elem) + visited.add(elem) changed = true } } diff --git a/test/functionality/pipelines/create/dependency-check-tests.ts b/test/functionality/pipelines/create/dependency-check-tests.ts index 30ad821086..1603105872 100644 --- a/test/functionality/pipelines/create/dependency-check-tests.ts +++ b/test/functionality/pipelines/create/dependency-check-tests.ts @@ -81,6 +81,34 @@ describe('dependency check', () => { decorates: 'parse', } ], ['parse', 'parse-v2', 'parse-v3']) + positive('not the first, and multiple decorators', [ + PARSE_WITH_R_SHELL_STEP, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + dependencies: ['parse'], + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v3', + decorates: 'parse-v2', + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v4', + dependencies: ['parse-v2'] + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v6', + dependencies: ['parse-v4'] + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v5', + decorates: 'parse-v6', + } + ], ['parse', 'parse-v2', 'parse-v3', 'parse-v4', 'parse-v6', 'parse-v5']) }) }) }) From 372d53a07999c0576806a662278403eca38fae05 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 13:49:33 +0100 Subject: [PATCH 023/104] refactor, test(pipeline): clarify that we test all permutations --- .../create/dependency-check-tests.ts | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/test/functionality/pipelines/create/dependency-check-tests.ts b/test/functionality/pipelines/create/dependency-check-tests.ts index 1603105872..0a659476a6 100644 --- a/test/functionality/pipelines/create/dependency-check-tests.ts +++ b/test/functionality/pipelines/create/dependency-check-tests.ts @@ -10,9 +10,11 @@ import { NAIVE_RECONSTRUCT } from '../../../../src/core/steps/all/40-reconstruct describe('dependency check', () => { describe('error-cases', () => { - function negative(name: string, steps: IStep[], message: string | RegExp) { - it(name, () => { - expect(() => createPipeline(steps)).to.throw(message) + function negative(name: string, rawSteps: IStep[], message: string | RegExp) { + it(`${name} (all permutations)`, () => { + for(const steps of allPermutations(rawSteps)) { + expect(() => createPipeline(steps)).to.throw(message) + } }) } negative('should throw on empty input', [], /empty/) @@ -28,8 +30,7 @@ describe('dependency check', () => { }) describe('default behavior', () => { function positive(name: string, rawSteps: IStep[], expected: NameOfStep[]) { - it(name, () => { - // try all permutations + it(`${name} (all permutations)`, () => { for(const steps of allPermutations(rawSteps)) { const pipeline = createPipeline(steps) expect([...pipeline.steps.keys()]).to.have.members(expected, `should have the correct keys for ${JSON.stringify(steps)}`) @@ -109,6 +110,18 @@ describe('dependency check', () => { decorates: 'parse-v6', } ], ['parse', 'parse-v2', 'parse-v3', 'parse-v4', 'parse-v6', 'parse-v5']) + positive('default pipeline with dataflow decoration', [ + PARSE_WITH_R_SHELL_STEP, + NORMALIZE, + LEGACY_STATIC_DATAFLOW, + { + ...LEGACY_STATIC_DATAFLOW, + name: 'dataflow-decorator', + decorates: 'dataflow' + }, + STATIC_SLICE, + NAIVE_RECONSTRUCT + ], ['parse', 'normalize', 'dataflow', 'dataflow-decorator', 'slice', 'reconstruct']) }) }) }) From 2149b0f5464f365860bb8161fb67956d7c0af05c Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 13:51:36 +0100 Subject: [PATCH 024/104] test(pipeline): add a test case to specifically guard againts missing initial steps --- .../functionality/pipelines/create/dependency-check-tests.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/functionality/pipelines/create/dependency-check-tests.ts b/test/functionality/pipelines/create/dependency-check-tests.ts index 0a659476a6..b98f40be35 100644 --- a/test/functionality/pipelines/create/dependency-check-tests.ts +++ b/test/functionality/pipelines/create/dependency-check-tests.ts @@ -22,6 +22,11 @@ describe('dependency check', () => { [PARSE_WITH_R_SHELL_STEP, PARSE_WITH_R_SHELL_STEP], /duplicate|not unique/) negative('should throw on invalid dependencies', [PARSE_WITH_R_SHELL_STEP, { ...PARSE_WITH_R_SHELL_STEP, name: 'parse-v2', dependencies: ['foo'] }], /invalid dependency|not exist/) + negative('no initial steps', + [ + { ...PARSE_WITH_R_SHELL_STEP, name: 'parse-v1', dependencies: ['parse-v2'] }, + { ...PARSE_WITH_R_SHELL_STEP, name: 'parse-v2', dependencies: ['parse-v1'] } + ], /no initial/) negative('should throw on cycles', [PARSE_WITH_R_SHELL_STEP, { ...PARSE_WITH_R_SHELL_STEP, name: 'parse-v1', dependencies: ['parse-v2'] }, From fa891aaa22c1ad6ee7f4c26c1ebd1a1b13814440 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 13:53:38 +0100 Subject: [PATCH 025/104] refactor(pipeline): remove initial step guards --- src/core/steps/pipeline/dependency-checker.ts | 4 ---- .../functionality/pipelines/create/dependency-check-tests.ts | 5 ----- 2 files changed, 9 deletions(-) diff --git a/src/core/steps/pipeline/dependency-checker.ts b/src/core/steps/pipeline/dependency-checker.ts index 95698e8b83..0e0f83165d 100644 --- a/src/core/steps/pipeline/dependency-checker.ts +++ b/src/core/steps/pipeline/dependency-checker.ts @@ -26,10 +26,6 @@ export function verifyAndBuildPipeline(steps: IStep[]): Pipeline { const inits: NameOfStep[] = [] initializeSteps(steps, stepMap, inits) - if(inits.length === 0) { - throw new InvalidPipelineError('3) Pipeline has no initial steps (i.e., it contains no step without dependencies)') - } - const sorted = topologicalSort(inits, stepMap) if(sorted.length !== stepMap.size) { // check if any of the dependencies in the map are invalid diff --git a/test/functionality/pipelines/create/dependency-check-tests.ts b/test/functionality/pipelines/create/dependency-check-tests.ts index b98f40be35..0a659476a6 100644 --- a/test/functionality/pipelines/create/dependency-check-tests.ts +++ b/test/functionality/pipelines/create/dependency-check-tests.ts @@ -22,11 +22,6 @@ describe('dependency check', () => { [PARSE_WITH_R_SHELL_STEP, PARSE_WITH_R_SHELL_STEP], /duplicate|not unique/) negative('should throw on invalid dependencies', [PARSE_WITH_R_SHELL_STEP, { ...PARSE_WITH_R_SHELL_STEP, name: 'parse-v2', dependencies: ['foo'] }], /invalid dependency|not exist/) - negative('no initial steps', - [ - { ...PARSE_WITH_R_SHELL_STEP, name: 'parse-v1', dependencies: ['parse-v2'] }, - { ...PARSE_WITH_R_SHELL_STEP, name: 'parse-v2', dependencies: ['parse-v1'] } - ], /no initial/) negative('should throw on cycles', [PARSE_WITH_R_SHELL_STEP, { ...PARSE_WITH_R_SHELL_STEP, name: 'parse-v1', dependencies: ['parse-v2'] }, From 5918e5df237a8f2fd2b2f13a2d50260cd663fde9 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 14:29:55 +0100 Subject: [PATCH 026/104] Robustify Git-Hooks for use with GitHub Desktop (#499) git: try to make pre-push hook more robust with sourcing npm --- .githooks/pre-push | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/.githooks/pre-push b/.githooks/pre-push index e8a9974cbd..111e89281c 100755 --- a/.githooks/pre-push +++ b/.githooks/pre-push @@ -2,9 +2,30 @@ set -eu -if ! [ -x "$(command -v npm)" ]; then - echo 'Error: npm not found. Make it available to the host shell (e.g., with "nvm use --lts").' - exit 2 +NPM_CMD="npm" + +if ! (type $NPM_CMD >> /dev/null); then + echo "npm not found, trying to make it available using nvm..." + if type nvm >> /dev/null; then + echo "nvm found, using it to install the latest lts node" + nvm use --lts + else + echo "nvm not found, trying to make it available using the nvm.sh" + # try to make it available based on https://github.com/typicode/husky/issues/912#issuecomment-817522060 + export NVM_DIR="$HOME/.nvm/nvm.sh" + . "$(dirname $NVM_DIR)/nvm.sh" + + export NVM_DIR="$HOME/.nvm" + a=$(nvm ls --no-colors | grep 'node') + v=$(echo "$a" | sed -E 's/.*\(-> ([^ ]+).*/\1/') + + export PATH="$NVM_DIR/versions/node/$v/bin:$PATH" + + if ! (type $NPM_CMD >> /dev/null); then + echo "no variant of npm or nvm found, trying to use the npm.cmd" + NPM_CMD="npm.cmd" + fi + fi fi @@ -37,7 +58,7 @@ if [ -n "$(git status --porcelain)" ]; then fi echo "Linting project (local mode)..." -npm run lint-local +$NPM_CMD run lint-local # shellcheck disable=SC2124 # we want the argument splitting From b0d17cc7929ea75d773b2a6daad86315c9e4a082 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 14:41:53 +0100 Subject: [PATCH 027/104] refactor(pipeline): remove redundant generic type --- src/core/steps/pipeline/dependency-checker.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/steps/pipeline/dependency-checker.ts b/src/core/steps/pipeline/dependency-checker.ts index 0e0f83165d..fae8eb7cb1 100644 --- a/src/core/steps/pipeline/dependency-checker.ts +++ b/src/core/steps/pipeline/dependency-checker.ts @@ -40,7 +40,7 @@ export function verifyAndBuildPipeline(steps: IStep[]): Pipeline { } } -function allDependenciesAreVisited(step: IStep<(...args: any[]) => any>, visited: Set) { +function allDependenciesAreVisited(step: IStep, visited: Set) { return step.dependencies.every(d => visited.has(d)) } From f86a20daaa6cf1cb9cb422ae4489987c72dd43c8 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 15:18:45 +0100 Subject: [PATCH 028/104] Improve pre-push hook with GitHub Desktop by detecting windows (#515) git: improve on pre-push hook with windows fallback --- .githooks/pre-push | 56 ++++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/.githooks/pre-push b/.githooks/pre-push index 111e89281c..ccb1dfe394 100755 --- a/.githooks/pre-push +++ b/.githooks/pre-push @@ -2,30 +2,42 @@ set -eu -NPM_CMD="npm" - -if ! (type $NPM_CMD >> /dev/null); then - echo "npm not found, trying to make it available using nvm..." - if type nvm >> /dev/null; then - echo "nvm found, using it to install the latest lts node" - nvm use --lts - else - echo "nvm not found, trying to make it available using the nvm.sh" - # try to make it available based on https://github.com/typicode/husky/issues/912#issuecomment-817522060 - export NVM_DIR="$HOME/.nvm/nvm.sh" - . "$(dirname $NVM_DIR)/nvm.sh" - - export NVM_DIR="$HOME/.nvm" - a=$(nvm ls --no-colors | grep 'node') - v=$(echo "$a" | sed -E 's/.*\(-> ([^ ]+).*/\1/') - - export PATH="$NVM_DIR/versions/node/$v/bin:$PATH" - - if ! (type $NPM_CMD >> /dev/null); then - echo "no variant of npm or nvm found, trying to use the npm.cmd" - NPM_CMD="npm.cmd" + +find_npm_linux() { + export NPM_CMD="npm" + + if ! (type $NPM_CMD >> /dev/null); then + echo "npm not found, trying to make it available using nvm..." + if type nvm >> /dev/null; then + echo "nvm found, using it to install the latest lts node" + nvm use --lts + else + echo "nvm not found, trying to make it available using the nvm.sh" + # try to make it available based on https://github.com/typicode/husky/issues/912#issuecomment-817522060 + export NVM_DIR="$HOME/.nvm/nvm.sh" + . "$(dirname $NVM_DIR)/nvm.sh" + + export NVM_DIR="$HOME/.nvm" + a=$(nvm ls --no-colors | grep 'node') + v=$(echo "$a" | sed -E 's/.*\(-> ([^ ]+).*/\1/') + + export PATH="$NVM_DIR/versions/node/$v/bin:$PATH" + + if ! (type $NPM_CMD >> /dev/null); then + echo "no variant of npm or nvm found, trying to use the npm.cmd" + export NPM_CMD="npm.cmd" + fi fi fi +} + +if [ -z "${OSTYPE+x}" ]; then + find_npm_linux +else + case "$OSTYPE" in + msys*) export NPM_CMD="npm.cmd";; + *) find_npm_linux ;; + esac fi From 42f8b83f7bcbd844b841f5b62a8119dd35480205 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 15:49:25 +0100 Subject: [PATCH 029/104] refactor, fix(test-coverage): hopefully source maps work now --- package.json | 2 +- tsconfig.json | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 145832767c..38f5e0c037 100644 --- a/package.json +++ b/package.json @@ -18,7 +18,7 @@ "lint": "npm run license-compat -- --summary && eslint src/ test/", "license-compat": "license-checker --onlyAllow 'MIT;MIT OR X11;GPLv2;LGPL;GNUGPL;ISC;Apache-2.0;FreeBSD;BSD-2-Clause;clearbsd;ModifiedBSD;BSD-3-Clause;Python-2.0;Unlicense;WTFPL;CC-BY-4.0;CC-BY-3.0;CC0-1.0;0BSD'", "doc": "typedoc", - "test": "nyc --no-clean mocha --require ts-node/register --timeout 60000 \"test/**/*.spec.ts\"", + "test": "nyc --source-map --produce-source-map --cache false mocha --require ts-node/register --timeout 60000 \"test/**/*.spec.ts\"", "performance-test": "func() { cd test/performance/ && bash run-all-suites.sh $1 $2; cd ../../; }; func", "test-full": "npm run test -- --test-installation" }, diff --git a/tsconfig.json b/tsconfig.json index e813248d40..37694b70d3 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -8,8 +8,10 @@ "moduleResolution": "node", "skipLibCheck": true, "sourceMap": true, + "inlineSourceMap": true, "outDir": "./dist/", - "strict": true + "strict": true, + "alwaysStrict": true }, "lib": [ "esnext", "dom" ], "exclude": [ From 4b30aa272dd8a2e9c59cad91803246ea2d7d69cb Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 15:50:23 +0100 Subject: [PATCH 030/104] refactor(test-coverage): remove inline source map from the tsconfig --- tsconfig.json | 1 - 1 file changed, 1 deletion(-) diff --git a/tsconfig.json b/tsconfig.json index 37694b70d3..9e628fc863 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -8,7 +8,6 @@ "moduleResolution": "node", "skipLibCheck": true, "sourceMap": true, - "inlineSourceMap": true, "outDir": "./dist/", "strict": true, "alwaysStrict": true From 7f20772cba2fcab518fa2cb55176c088f6c1adc8 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 15:57:35 +0100 Subject: [PATCH 031/104] test(pipeline): test decoration cycles --- .../create/dependency-check-tests.ts | 51 +++++++++++++++---- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/test/functionality/pipelines/create/dependency-check-tests.ts b/test/functionality/pipelines/create/dependency-check-tests.ts index 0a659476a6..e309c8cb89 100644 --- a/test/functionality/pipelines/create/dependency-check-tests.ts +++ b/test/functionality/pipelines/create/dependency-check-tests.ts @@ -17,16 +17,47 @@ describe('dependency check', () => { } }) } - negative('should throw on empty input', [], /empty/) - negative('should throw on duplicate names', - [PARSE_WITH_R_SHELL_STEP, PARSE_WITH_R_SHELL_STEP], /duplicate|not unique/) - negative('should throw on invalid dependencies', - [PARSE_WITH_R_SHELL_STEP, { ...PARSE_WITH_R_SHELL_STEP, name: 'parse-v2', dependencies: ['foo'] }], /invalid dependency|not exist/) - negative('should throw on cycles', - [PARSE_WITH_R_SHELL_STEP, - { ...PARSE_WITH_R_SHELL_STEP, name: 'parse-v1', dependencies: ['parse-v2'] }, - { ...PARSE_WITH_R_SHELL_STEP, name: 'parse-v2', dependencies: ['parse-v1'] } - ], /cycle/) + describe('without decorators', () => { + negative('should throw on empty input', [], /empty/) + negative('should throw on duplicate names', + [PARSE_WITH_R_SHELL_STEP, PARSE_WITH_R_SHELL_STEP], /duplicate|not unique/) + negative('should throw on invalid dependencies', + [PARSE_WITH_R_SHELL_STEP, { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + dependencies: ['foo'] + }], /invalid dependency|not exist/) + negative('should throw on cycles', + [PARSE_WITH_R_SHELL_STEP, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v1', + dependencies: ['parse-v2'] + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + dependencies: ['parse-v1'] + } + ], /cycle/) + }) + describe('with decorators', () => { + negative('should throw on decoration cycles', + [PARSE_WITH_R_SHELL_STEP, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v1', + decorates: 'parse', + dependencies: ['parse-v2'] + }, + { + ...PARSE_WITH_R_SHELL_STEP, + name: 'parse-v2', + decorates: 'parse', + dependencies: ['parse-v1'] + } + ], /decoration cycle/) + }) }) describe('default behavior', () => { function positive(name: string, rawSteps: IStep[], expected: NameOfStep[]) { From 6f246e5649c5ad00ed33f6f3aedf4d4637072b4e Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 16:02:09 +0100 Subject: [PATCH 032/104] test(pipeline): test errors for non-existing decoration steps --- .../functionality/pipelines/create/dependency-check-tests.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/functionality/pipelines/create/dependency-check-tests.ts b/test/functionality/pipelines/create/dependency-check-tests.ts index e309c8cb89..a2722981bc 100644 --- a/test/functionality/pipelines/create/dependency-check-tests.ts +++ b/test/functionality/pipelines/create/dependency-check-tests.ts @@ -57,6 +57,11 @@ describe('dependency check', () => { dependencies: ['parse-v1'] } ], /decoration cycle/) + negative('decorate non-existing step', + [{ + ...PARSE_WITH_R_SHELL_STEP, + decorates: 'foo' + }], /decorates.+not exist/) }) }) describe('default behavior', () => { From 941c7ea3af1aef940f04bd52c3f1a4b0eeec685e Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 16:05:37 +0100 Subject: [PATCH 033/104] refactor(pipeline): minor complexity reduction for the toposort --- src/core/steps/pipeline/dependency-checker.ts | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/core/steps/pipeline/dependency-checker.ts b/src/core/steps/pipeline/dependency-checker.ts index fae8eb7cb1..c563416576 100644 --- a/src/core/steps/pipeline/dependency-checker.ts +++ b/src/core/steps/pipeline/dependency-checker.ts @@ -44,6 +44,19 @@ function allDependenciesAreVisited(step: IStep, visited: Set) { return step.dependencies.every(d => visited.has(d)) } +function handleStep(step: IStep, init: NameOfStep, visited: Set, sorted: NameOfStep[], elem: NameOfStep, decoratorsOfLastOthers: Set, inits: NameOfStep[]) { + if(step.decorates === init) { + if(allDependenciesAreVisited(step, visited)) { + sorted.push(elem) + visited.add(elem) + } else { + decoratorsOfLastOthers.add(elem) + } + } else if(step.decorates === undefined && allDependenciesAreVisited(step, visited)) { + inits.push(elem) + } +} + function topologicalSort(inits: NameOfStep[], stepMap: Map) { const sorted: NameOfStep[] = [] const visited = new Set() @@ -59,16 +72,7 @@ function topologicalSort(inits: NameOfStep[], stepMap: Map) { if(visited.has(elem)) { continue } - if(step.decorates === init) { - if(allDependenciesAreVisited(step, visited)) { - sorted.push(elem) - visited.add(elem) - } else { - decoratorsOfLastOthers.add(elem) - } - } else if(step.decorates === undefined && allDependenciesAreVisited(step, visited)) { - inits.push(elem) - } + handleStep(step, init, visited, sorted, elem, decoratorsOfLastOthers, inits) } // for the other decorators we have to cycle until we find a solution, or know, that no solution exists From 616413b0b4ca2a708e7bb7b2b01ba6a5aa31a3a2 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 16:34:43 +0100 Subject: [PATCH 034/104] refactor(pipeline): elevate step names in a pipeline --- src/core/steps/all/00-parse.ts | 3 ++- src/core/steps/all/10-normalize.ts | 5 +++-- src/core/steps/all/20-dataflow.ts | 4 +++- src/core/steps/all/30-slice.ts | 3 ++- src/core/steps/all/40-reconstruct.ts | 3 ++- src/core/steps/pipeline/dependency-checker.ts | 6 ++--- src/core/steps/pipeline/pipeline.ts | 6 ++--- src/core/steps/print.ts | 22 +++++++++++++++++++ src/core/steps/step.ts | 18 +++++++-------- src/core/steps/steps.ts | 19 ---------------- src/util/arrays.ts | 5 +++++ .../create/dependency-check-tests.ts | 6 ++--- 12 files changed, 57 insertions(+), 43 deletions(-) create mode 100644 src/core/steps/print.ts diff --git a/src/core/steps/all/00-parse.ts b/src/core/steps/all/00-parse.ts index f0ee2eca1c..29893d7283 100644 --- a/src/core/steps/all/00-parse.ts +++ b/src/core/steps/all/00-parse.ts @@ -2,6 +2,7 @@ import { internalPrinter, StepOutputFormat } from '../../print/print' import { parseToQuads } from '../../print/parse-printer' import { IStep } from '../step' import { retrieveXmlFromRCode } from '../../../r-bridge' +import { DeepReadonly } from 'ts-essentials' export const PARSE_WITH_R_SHELL_STEP = { @@ -15,4 +16,4 @@ export const PARSE_WITH_R_SHELL_STEP = { [StepOutputFormat.RdfQuads]: parseToQuads }, dependencies: [] -} satisfies IStep +} as const satisfies DeepReadonly> diff --git a/src/core/steps/all/10-normalize.ts b/src/core/steps/all/10-normalize.ts index a0f6bc167f..db1c865c90 100644 --- a/src/core/steps/all/10-normalize.ts +++ b/src/core/steps/all/10-normalize.ts @@ -1,4 +1,4 @@ -import { normalize } from '../../../r-bridge' +import { normalize, retrieveXmlFromRCode } from '../../../r-bridge' import { internalPrinter, StepOutputFormat } from '../../print/print' import { normalizedAstToJson, @@ -7,6 +7,7 @@ import { printNormalizedAstToMermaidUrl } from '../../print/normalize-printer' import { IStep } from '../step' +import { DeepReadonly } from 'ts-essentials' export const NORMALIZE = { name: 'normalize', @@ -21,4 +22,4 @@ export const NORMALIZE = { [StepOutputFormat.MermaidUrl]: printNormalizedAstToMermaidUrl }, dependencies: [ 'parse' ] -} satisfies IStep +} as const satisfies DeepReadonly> diff --git a/src/core/steps/all/20-dataflow.ts b/src/core/steps/all/20-dataflow.ts index 7752512498..0ac3bf2638 100644 --- a/src/core/steps/all/20-dataflow.ts +++ b/src/core/steps/all/20-dataflow.ts @@ -7,6 +7,8 @@ import { dataflowGraphToMermaidUrl, dataflowGraphToQuads } from '../../print/dataflow-printer' +import { DeepReadonly } from 'ts-essentials' +import { normalize } from '../../../r-bridge' export const LEGACY_STATIC_DATAFLOW = { name: 'dataflow', @@ -21,4 +23,4 @@ export const LEGACY_STATIC_DATAFLOW = { [StepOutputFormat.MermaidUrl]: dataflowGraphToMermaidUrl }, dependencies: [ 'normalize' ] -} satisfies IStep +} as const satisfies DeepReadonly> diff --git a/src/core/steps/all/30-slice.ts b/src/core/steps/all/30-slice.ts index 8173b7300c..0a24a7f256 100644 --- a/src/core/steps/all/30-slice.ts +++ b/src/core/steps/all/30-slice.ts @@ -1,6 +1,7 @@ import { internalPrinter, StepOutputFormat } from '../../print/print' import { IStep } from '../step' import { staticSlicing } from '../../../slicing' +import { DeepReadonly } from 'ts-essentials' export const STATIC_SLICE = { name: 'slice', @@ -11,4 +12,4 @@ export const STATIC_SLICE = { [StepOutputFormat.Internal]: internalPrinter }, dependencies: [ 'dataflow' ] -} satisfies IStep +} as const satisfies DeepReadonly> diff --git a/src/core/steps/all/40-reconstruct.ts b/src/core/steps/all/40-reconstruct.ts index 0ca126383d..093d91159a 100644 --- a/src/core/steps/all/40-reconstruct.ts +++ b/src/core/steps/all/40-reconstruct.ts @@ -1,6 +1,7 @@ import { internalPrinter, StepOutputFormat } from '../../print/print' import { IStep } from '../step' import { reconstructToCode } from '../../../slicing' +import { DeepReadonly } from 'ts-essentials' export const NAIVE_RECONSTRUCT = { name: 'reconstruct', @@ -11,4 +12,4 @@ export const NAIVE_RECONSTRUCT = { [StepOutputFormat.Internal]: internalPrinter }, dependencies: [ 'slice' ] -} satisfies IStep +} as const satisfies DeepReadonly> diff --git a/src/core/steps/pipeline/dependency-checker.ts b/src/core/steps/pipeline/dependency-checker.ts index c563416576..13763d5f84 100644 --- a/src/core/steps/pipeline/dependency-checker.ts +++ b/src/core/steps/pipeline/dependency-checker.ts @@ -15,7 +15,7 @@ import { jsonReplacer } from '../../../util/json' * If successful, it returns the topologically sorted list of steps in order of desired execution. * @throws InvalidPipelineError if any of the above conditions are not met */ -export function verifyAndBuildPipeline(steps: IStep[]): Pipeline { +export function verifyAndBuildPipeline(steps: readonly IStep[]): Pipeline { if(steps.length === 0) { throw new InvalidPipelineError('0) Pipeline is empty') } @@ -104,7 +104,7 @@ function topologicallyInsertDecoratorElements(decoratorsOfLastOthers: Set) { +function checkForInvalidDependency(steps: readonly IStep[], stepMap: Map) { for(const step of steps) { for(const dep of step.dependencies) { if(!stepMap.has(dep)) { @@ -117,7 +117,7 @@ function checkForInvalidDependency(steps: IStep[], stepMap: Map, inits: NameOfStep[]) { +function initializeSteps(steps: readonly IStep[], stepMap: Map, inits: NameOfStep[]) { for(const step of steps) { const name = step.name // if the name is already in the map we have a duplicate diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index e764f94a9b..6c43e83b08 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -5,15 +5,15 @@ import { verifyAndBuildPipeline } from './dependency-checker' * A pipeline is a collection of {@link Pipeline#steps|steps} that are executed in a certain {@link Pipeline#order|order}. * It is to be created {@link createPipeline}. */ -export interface Pipeline { +export interface Pipeline { readonly steps: ReadonlyMap - readonly order: NameOfStep[] + readonly order: T } /** * Creates a pipeline from the given steps. * Refer to {@link verifyAndBuildPipeline} for details and constraints on the steps. */ -export function createPipeline(steps: IStep[]): Pipeline { +export function createPipeline(...steps: T): Pipeline { return verifyAndBuildPipeline(steps) } diff --git a/src/core/steps/print.ts b/src/core/steps/print.ts new file mode 100644 index 0000000000..839eee7566 --- /dev/null +++ b/src/core/steps/print.ts @@ -0,0 +1,22 @@ +import { IStepPrinter, StepOutputFormat } from '../print/print' +import { guard } from '../../util/assert' +import { StepName, StepProcessor, STEPS } from './steps' +import { TailOfArray } from '../../util/arrays' + + +/** + * For a `step` of the given name, which returned the given `data`. Convert that data into the given `format`. + * Depending on your step and the format this may require `additional` inputs. + */ +export function printStepResult< + Name extends StepName, + Processor extends StepProcessor, + Format extends Exclude & number, + Printer extends (typeof STEPS)[Name]['printer'][Format], + AdditionalInput extends TailOfArray>, +>(step: Name, data: Awaited>, format: Format, ...additional: AdditionalInput): Promise { + const base = STEPS[step].printer + const printer = base[format as keyof typeof base] as IStepPrinter, Format, AdditionalInput> | undefined + guard(printer !== undefined, `printer for ${step} does not support ${String(format)}`) + return printer(data, ...additional) as Promise +} diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index c30068ec47..c40217d35c 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -30,12 +30,12 @@ export interface IStepOrder { * In other words, you can have multiple steps with a name like `parse` as long as you use only one of them in a given pipeline. * This is, because these names are required in the {@link IStep#dependencies} field to refer to other steps this one relies on. */ - name: NameOfStep + readonly name: string /** * Give the names of other steps this one requires to be completed as a prerequisite (e.g., to gain access to their input). * Does not have to be transitive, this will be checked by the scheduler of the pipeline. */ - dependencies: NameOfStep[] + readonly dependencies: readonly NameOfStep[] /** * This is similar to {@link dependencies}, but is used to say that a given step _decorates_ another one. * This imbues two requirements: @@ -43,7 +43,7 @@ export interface IStepOrder { * * If so, it is ensured that _this_ step is executed _after_ the step it decorates, but before any step that depends on it. */ - decorates?: NameOfStep + readonly decorates?: NameOfStep } /** @@ -56,19 +56,19 @@ export interface IStep< Fn extends StepFunction = (...args: any[]) => any, > extends MergeableRecord, IStepOrder { /** Human-readable description of this step */ - description: string + readonly description: string /** The main processor that essentially performs the logic of this step */ - processor: (...input: Parameters) => ReturnType + readonly processor: (...input: Parameters) => ReturnType /* does this step has to be repeated for each new slice or can it be performed only once in the initialization */ - required: StepRequired + readonly required: StepRequired /** * How to visualize the results of the respective step to the user? */ - printer: { - [K in StepOutputFormat]?: IStepPrinter + readonly printer: { + [K in StepOutputFormat]?: Readonly> } & { // we always want to have the internal printer - [StepOutputFormat.Internal]: InternalStepPrinter + [StepOutputFormat.Internal]: Readonly> } } diff --git a/src/core/steps/steps.ts b/src/core/steps/steps.ts index b668080358..9efd67618f 100644 --- a/src/core/steps/steps.ts +++ b/src/core/steps/steps.ts @@ -116,22 +116,3 @@ export function executeSingleSubStep } - -type Tail = T extends [infer _, ...infer Rest] ? Rest : never; - -/** - * For a `step` of the given name, which returned the given `data`. Convert that data into the given `format`. - * Depending on your step and the format this may require `additional` inputs. - */ -export function printStepResult< - Name extends StepName, - Processor extends StepProcessor, - Format extends Exclude & number, - Printer extends (typeof STEPS)[Name]['printer'][Format], - AdditionalInput extends Tail>, ->(step: Name, data: Awaited>, format: Format, ...additional: AdditionalInput): Promise { - const base = STEPS[step].printer - const printer = base[format as keyof typeof base] as IStepPrinter, Format, AdditionalInput> | undefined - guard(printer !== undefined, `printer for ${step} does not support ${String(format)}`) - return printer(data, ...additional) as Promise -} diff --git a/src/util/arrays.ts b/src/util/arrays.ts index 30ada7a76c..59ed4bbd68 100644 --- a/src/util/arrays.ts +++ b/src/util/arrays.ts @@ -1,5 +1,10 @@ import { guard } from './assert' +/** + * Returns the tail of an array (all elements except the first one). + */ +export type TailOfArray = T extends [infer _, ...infer Rest] ? Rest : never; + /** * Splits the array every time the given predicate fires. * The element the split appears on will not be included! diff --git a/test/functionality/pipelines/create/dependency-check-tests.ts b/test/functionality/pipelines/create/dependency-check-tests.ts index a2722981bc..b4005de097 100644 --- a/test/functionality/pipelines/create/dependency-check-tests.ts +++ b/test/functionality/pipelines/create/dependency-check-tests.ts @@ -1,4 +1,4 @@ -import { createPipeline } from '../../../../src/core/steps/pipeline' +import { createPipeline, Pipeline } from '../../../../src/core/steps/pipeline' import { IStep, NameOfStep } from '../../../../src/core/steps' import { expect } from 'chai' import { PARSE_WITH_R_SHELL_STEP } from '../../../../src/core/steps/all/00-parse' @@ -13,7 +13,7 @@ describe('dependency check', () => { function negative(name: string, rawSteps: IStep[], message: string | RegExp) { it(`${name} (all permutations)`, () => { for(const steps of allPermutations(rawSteps)) { - expect(() => createPipeline(steps)).to.throw(message) + expect(() => createPipeline(...steps)).to.throw(message) } }) } @@ -68,7 +68,7 @@ describe('dependency check', () => { function positive(name: string, rawSteps: IStep[], expected: NameOfStep[]) { it(`${name} (all permutations)`, () => { for(const steps of allPermutations(rawSteps)) { - const pipeline = createPipeline(steps) + const pipeline = createPipeline(...steps) expect([...pipeline.steps.keys()]).to.have.members(expected, `should have the correct keys for ${JSON.stringify(steps)}`) expect(pipeline.order).to.have.ordered.members(expected, `should have the correct keys for ${JSON.stringify(steps)}`) } From 338dbb229ab5608db201e52f44538dbf7fa87b72 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 16:36:51 +0100 Subject: [PATCH 035/104] refactor(pipeline): allow to acess step types --- src/core/steps/pipeline/pipeline.ts | 9 +++++++++ .../pipelines/create/dependency-check-tests.ts | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index 6c43e83b08..7f3d8ace1c 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -4,12 +4,21 @@ import { verifyAndBuildPipeline } from './dependency-checker' /** * A pipeline is a collection of {@link Pipeline#steps|steps} that are executed in a certain {@link Pipeline#order|order}. * It is to be created {@link createPipeline}. + * + * If you want to get the type of all steps in the pipeline (given they are created canonically using const step names), refer to {@link PipelineStepTypes}. */ export interface Pipeline { readonly steps: ReadonlyMap readonly order: T } +/** + * Returns the types of all steps in the given pipeline. + * + * @see Pipeline for details + */ +export type PipelineStepTypes = T extends Pipeline ? U[0] : never + /** * Creates a pipeline from the given steps. * Refer to {@link verifyAndBuildPipeline} for details and constraints on the steps. diff --git a/test/functionality/pipelines/create/dependency-check-tests.ts b/test/functionality/pipelines/create/dependency-check-tests.ts index b4005de097..418e66a6ff 100644 --- a/test/functionality/pipelines/create/dependency-check-tests.ts +++ b/test/functionality/pipelines/create/dependency-check-tests.ts @@ -1,4 +1,4 @@ -import { createPipeline, Pipeline } from '../../../../src/core/steps/pipeline' +import { createPipeline, Pipeline, PipelineStepTypes } from '../../../../src/core/steps/pipeline' import { IStep, NameOfStep } from '../../../../src/core/steps' import { expect } from 'chai' import { PARSE_WITH_R_SHELL_STEP } from '../../../../src/core/steps/all/00-parse' From c467a645ec1e47fdaf35ccd7aedeab55343c0e9b Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 16:50:18 +0100 Subject: [PATCH 036/104] feat(pipeline): migrate type helpers --- src/core/steps/pipeline/pipeline.ts | 20 +++++++++++++------ src/core/steps/step.ts | 7 ++++--- src/core/steps/steps.ts | 3 +-- .../create/dependency-check-tests.ts | 3 ++- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index 7f3d8ace1c..b3558e6fae 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -1,28 +1,36 @@ import { IStep, NameOfStep } from '../step' import { verifyAndBuildPipeline } from './dependency-checker' +import { StepName } from '../steps' /** * A pipeline is a collection of {@link Pipeline#steps|steps} that are executed in a certain {@link Pipeline#order|order}. * It is to be created {@link createPipeline}. * - * If you want to get the type of all steps in the pipeline (given they are created canonically using const step names), refer to {@link PipelineStepTypes}. + * If you want to get the type of all steps in the pipeline (given they are created canonically using const step names), refer to {@link PipelineStepNames}. + * + * TODO: group this for per-file and per-request steps/stages in general with arbitrary names? */ -export interface Pipeline { +export interface Pipeline { readonly steps: ReadonlyMap - readonly order: T + readonly order: T['name'][] } /** - * Returns the types of all steps in the given pipeline. + * Returns the types of all step names in the given pipeline. * * @see Pipeline for details */ -export type PipelineStepTypes = T extends Pipeline ? U[0] : never +export type PipelineStepNames

= PipelineStep

['name'] +export type PipelineStep

= P extends Pipeline ? U : never + +export type PipelineStepWithName

= P extends Pipeline ? U extends IStep ? U : never : never +export type PipelineStepProcessorWithName

= PipelineStepWithName['processor'] +export type PipelineStepResultWithName

= Awaited>> /** * Creates a pipeline from the given steps. * Refer to {@link verifyAndBuildPipeline} for details and constraints on the steps. */ -export function createPipeline(...steps: T): Pipeline { +export function createPipeline(...steps: T): Pipeline { return verifyAndBuildPipeline(steps) } diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index c40217d35c..7c99a23e0a 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -24,13 +24,13 @@ export type NameOfStep = string & { __brand?: 'StepName' } /** * Contains the data to specify the order of {@link IStep|steps} in a pipeline. */ -export interface IStepOrder { +export interface IStepOrder { /** * Name of the respective step, it does not have to be unique in general but only unique per-pipeline. * In other words, you can have multiple steps with a name like `parse` as long as you use only one of them in a given pipeline. * This is, because these names are required in the {@link IStep#dependencies} field to refer to other steps this one relies on. */ - readonly name: string + readonly name: Name /** * Give the names of other steps this one requires to be completed as a prerequisite (e.g., to gain access to their input). * Does not have to be transitive, this will be checked by the scheduler of the pipeline. @@ -52,9 +52,10 @@ export interface IStepOrder { * Steps will be executed synchronously, in-sequence, based on their {@link IStep#dependencies|dependencies}. */ export interface IStep< + Name extends NameOfStep = NameOfStep, // eslint-disable-next-line -- by default, we assume nothing about the function shape Fn extends StepFunction = (...args: any[]) => any, -> extends MergeableRecord, IStepOrder { +> extends MergeableRecord, IStepOrder { /** Human-readable description of this step */ readonly description: string /** The main processor that essentially performs the logic of this step */ diff --git a/src/core/steps/steps.ts b/src/core/steps/steps.ts index 9efd67618f..250732c514 100644 --- a/src/core/steps/steps.ts +++ b/src/core/steps/steps.ts @@ -19,14 +19,13 @@ import { } from '../../r-bridge' import { produceDataFlowGraph } from '../../dataflow' import { reconstructToCode, staticSlicing } from '../../slicing' -import { internalPrinter, IStepPrinter, StepOutputFormat } from '../print/print' +import { internalPrinter, StepOutputFormat } from '../print/print' import { normalizedAstToJson, normalizedAstToQuads, printNormalizedAstToMermaid, printNormalizedAstToMermaidUrl } from '../print/normalize-printer' -import { guard } from '../../util/assert' import { dataflowGraphToJson, dataflowGraphToMermaid, diff --git a/test/functionality/pipelines/create/dependency-check-tests.ts b/test/functionality/pipelines/create/dependency-check-tests.ts index 418e66a6ff..3ba124caa4 100644 --- a/test/functionality/pipelines/create/dependency-check-tests.ts +++ b/test/functionality/pipelines/create/dependency-check-tests.ts @@ -1,4 +1,4 @@ -import { createPipeline, Pipeline, PipelineStepTypes } from '../../../../src/core/steps/pipeline' +import { createPipeline, Pipeline, PipelineStepNames } from '../../../../src/core/steps/pipeline' import { IStep, NameOfStep } from '../../../../src/core/steps' import { expect } from 'chai' import { PARSE_WITH_R_SHELL_STEP } from '../../../../src/core/steps/all/00-parse' @@ -74,6 +74,7 @@ describe('dependency check', () => { } }) } + const pipeline = createPipeline(PARSE_WITH_R_SHELL_STEP, STATIC_SLICE) describe('without decorators', () => { positive('should work on a single step', [PARSE_WITH_R_SHELL_STEP], ['parse']) From 9b3040291054721c5ea10ca53abc795bd2052d92 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 17:05:56 +0100 Subject: [PATCH 037/104] refactor(pipeline): improve the pipeline printer --- src/cli/repl/server/connection.ts | 13 +++++++++---- src/cli/statistics-helper-app.ts | 11 +++++++---- src/core/steps/all/00-parse.ts | 2 +- src/core/steps/all/10-normalize.ts | 2 +- src/core/steps/all/20-dataflow.ts | 2 +- src/core/steps/all/30-slice.ts | 2 +- src/core/steps/all/40-reconstruct.ts | 2 +- src/core/steps/pipeline/default.ts | 11 +++++++++++ src/core/steps/pipeline/pipeline.ts | 7 ++++--- src/core/steps/print.ts | 17 ++++++++--------- src/core/steps/step.ts | 4 ++-- src/core/steps/steps.ts | 10 +++++----- 12 files changed, 51 insertions(+), 32 deletions(-) create mode 100644 src/core/steps/pipeline/default.ts diff --git a/src/cli/repl/server/connection.ts b/src/cli/repl/server/connection.ts index 47c6cb6741..257967eb52 100644 --- a/src/cli/repl/server/connection.ts +++ b/src/cli/repl/server/connection.ts @@ -1,4 +1,4 @@ -import { LAST_STEP, printStepResult, SteppingSlicer, StepResults, STEPS_PER_SLICE } from '../../../core' +import { LAST_STEP, SteppingSlicer, StepResults, STEPS_PER_SLICE } from '../../../core' import { DEFAULT_XML_PARSER_CONFIG, NormalizedAst, RShell, XmlParserConfig } from '../../../r-bridge' import { sendMessage } from './send' import { answerForValidationError, validateBaseMessageFormat, validateMessage } from './validate' @@ -26,6 +26,10 @@ import { deepMergeObject } from '../../../util/objects' import { LogLevel } from '../../../util/log' import { StepOutputFormat } from '../../../core/print/print' import { DataflowInformation } from '../../../dataflow/internal/info' +import { printStepResult } from '../../../core/steps/print' +import { PARSE_WITH_R_SHELL_STEP } from '../../../core/steps/all/00-parse' +import { NORMALIZE } from '../../../core/steps/all/10-normalize' +import { LEGACY_STATIC_DATAFLOW } from '../../../core/steps/all/20-dataflow' /** * Each connection handles a single client, answering to its requests. @@ -134,9 +138,10 @@ export class FlowRServerConnection { id: message.id, cfg: cfg ? cfg2quads(cfg, config()) : undefined, results: { - parse: await printStepResult('parse', results.parse as string, StepOutputFormat.RdfQuads, config(), parseConfig), - normalize: await printStepResult('normalize', results.normalize as NormalizedAst, StepOutputFormat.RdfQuads, config()), - dataflow: await printStepResult('dataflow', results.dataflow as DataflowInformation, StepOutputFormat.RdfQuads, config()), + // TODO: migrate to steps used in pipeline + parse: await printStepResult(PARSE_WITH_R_SHELL_STEP, results.parse as string, StepOutputFormat.RdfQuads, config(), parseConfig), + normalize: await printStepResult(NORMALIZE, results.normalize as NormalizedAst, StepOutputFormat.RdfQuads, config()), + dataflow: await printStepResult(LEGACY_STATIC_DATAFLOW, results.dataflow as DataflowInformation, StepOutputFormat.RdfQuads, config()), } }) } else { diff --git a/src/cli/statistics-helper-app.ts b/src/cli/statistics-helper-app.ts index f7797b0ef1..df73bce6da 100644 --- a/src/cli/statistics-helper-app.ts +++ b/src/cli/statistics-helper-app.ts @@ -12,9 +12,12 @@ import { create } from 'tar' import fs from 'fs' import { guard } from '../util/assert' import { retrieveArchiveName } from './common/features' -import { printStepResult } from '../core' import { StepOutputFormat } from '../core/print/print' import { date2string } from '../util/time' +import { printStepResult } from '../core/steps/print' +import { PARSE_WITH_R_SHELL_STEP } from '../core/steps/all/00-parse' +import { NORMALIZE } from '../core/steps/all/10-normalize' +import { LEGACY_STATIC_DATAFLOW } from '../core/steps/all/20-dataflow' // apps should never depend on other apps when forking (otherwise, they are "run" on load :/) @@ -89,9 +92,9 @@ async function getStatsForSingleFile() { if(options['dump-json']) { const [, output] = [...stats.outputs.entries()][0] const cfg = extractCFG(output.normalize) - statisticsFileProvider.append('output-json', 'parse', await printStepResult('parse', output.parse, StepOutputFormat.Json)) - statisticsFileProvider.append('output-json', 'normalize', await printStepResult('normalize', output.normalize, StepOutputFormat.Json)) - statisticsFileProvider.append('output-json', 'dataflow', await printStepResult('dataflow', output.dataflow, StepOutputFormat.Json)) + statisticsFileProvider.append('output-json', 'parse', await printStepResult(PARSE_WITH_R_SHELL_STEP, output.parse, StepOutputFormat.Json)) + statisticsFileProvider.append('output-json', 'normalize', await printStepResult(NORMALIZE, output.normalize, StepOutputFormat.Json)) + statisticsFileProvider.append('output-json', 'dataflow', await printStepResult(LEGACY_STATIC_DATAFLOW, output.dataflow, StepOutputFormat.Json)) statisticsFileProvider.append('output-json', 'cfg', JSON.stringify(cfg, jsonReplacer)) } diff --git a/src/core/steps/all/00-parse.ts b/src/core/steps/all/00-parse.ts index 29893d7283..1a83075517 100644 --- a/src/core/steps/all/00-parse.ts +++ b/src/core/steps/all/00-parse.ts @@ -16,4 +16,4 @@ export const PARSE_WITH_R_SHELL_STEP = { [StepOutputFormat.RdfQuads]: parseToQuads }, dependencies: [] -} as const satisfies DeepReadonly> +} as const satisfies DeepReadonly> diff --git a/src/core/steps/all/10-normalize.ts b/src/core/steps/all/10-normalize.ts index db1c865c90..6410c89a98 100644 --- a/src/core/steps/all/10-normalize.ts +++ b/src/core/steps/all/10-normalize.ts @@ -22,4 +22,4 @@ export const NORMALIZE = { [StepOutputFormat.MermaidUrl]: printNormalizedAstToMermaidUrl }, dependencies: [ 'parse' ] -} as const satisfies DeepReadonly> +} as const satisfies DeepReadonly> diff --git a/src/core/steps/all/20-dataflow.ts b/src/core/steps/all/20-dataflow.ts index 0ac3bf2638..f42ea1fd44 100644 --- a/src/core/steps/all/20-dataflow.ts +++ b/src/core/steps/all/20-dataflow.ts @@ -23,4 +23,4 @@ export const LEGACY_STATIC_DATAFLOW = { [StepOutputFormat.MermaidUrl]: dataflowGraphToMermaidUrl }, dependencies: [ 'normalize' ] -} as const satisfies DeepReadonly> +} as const satisfies DeepReadonly> diff --git a/src/core/steps/all/30-slice.ts b/src/core/steps/all/30-slice.ts index 0a24a7f256..4f9310b7e4 100644 --- a/src/core/steps/all/30-slice.ts +++ b/src/core/steps/all/30-slice.ts @@ -12,4 +12,4 @@ export const STATIC_SLICE = { [StepOutputFormat.Internal]: internalPrinter }, dependencies: [ 'dataflow' ] -} as const satisfies DeepReadonly> +} as const satisfies DeepReadonly> diff --git a/src/core/steps/all/40-reconstruct.ts b/src/core/steps/all/40-reconstruct.ts index 093d91159a..c200b3a469 100644 --- a/src/core/steps/all/40-reconstruct.ts +++ b/src/core/steps/all/40-reconstruct.ts @@ -12,4 +12,4 @@ export const NAIVE_RECONSTRUCT = { [StepOutputFormat.Internal]: internalPrinter }, dependencies: [ 'slice' ] -} as const satisfies DeepReadonly> +} as const satisfies DeepReadonly> diff --git a/src/core/steps/pipeline/default.ts b/src/core/steps/pipeline/default.ts new file mode 100644 index 0000000000..08772fbde5 --- /dev/null +++ b/src/core/steps/pipeline/default.ts @@ -0,0 +1,11 @@ +/** + * Contains the default pipeline for working with flowr + */ +import { createPipeline } from './pipeline' +import { PARSE_WITH_R_SHELL_STEP } from '../all/00-parse' +import { NORMALIZE } from '../all/10-normalize' +import { LEGACY_STATIC_DATAFLOW } from '../all/20-dataflow' +import { STATIC_SLICE } from '../all/30-slice' +import { NAIVE_RECONSTRUCT } from '../all/40-reconstruct' + +export const DEFAULT_SLICING_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, LEGACY_STATIC_DATAFLOW, STATIC_SLICE, NAIVE_RECONSTRUCT) diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index b3558e6fae..16385242ec 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -23,9 +23,10 @@ export interface Pipeline { export type PipelineStepNames

= PipelineStep

['name'] export type PipelineStep

= P extends Pipeline ? U : never -export type PipelineStepWithName

= P extends Pipeline ? U extends IStep ? U : never : never -export type PipelineStepProcessorWithName

= PipelineStepWithName['processor'] -export type PipelineStepResultWithName

= Awaited>> +export type PipelineStepWithName

= P extends Pipeline ? U extends IStep ? U : never : never +export type PipelineStepProcessorWithName

= PipelineStepWithName['processor'] +export type PipelineStepPrintersWithName

= PipelineStepWithName['printer'] +export type PipelineStepResultWithName

= Awaited>> /** * Creates a pipeline from the given steps. diff --git a/src/core/steps/print.ts b/src/core/steps/print.ts index 839eee7566..3cdfe56b13 100644 --- a/src/core/steps/print.ts +++ b/src/core/steps/print.ts @@ -1,7 +1,7 @@ import { IStepPrinter, StepOutputFormat } from '../print/print' import { guard } from '../../util/assert' -import { StepName, StepProcessor, STEPS } from './steps' import { TailOfArray } from '../../util/arrays' +import { IStep } from './step' /** @@ -9,14 +9,13 @@ import { TailOfArray } from '../../util/arrays' * Depending on your step and the format this may require `additional` inputs. */ export function printStepResult< - Name extends StepName, - Processor extends StepProcessor, - Format extends Exclude & number, - Printer extends (typeof STEPS)[Name]['printer'][Format], + Step extends IStep, + Processor extends Step['processor'], + Format extends Exclude & number, + Printer extends Step['printer'][Format], AdditionalInput extends TailOfArray>, ->(step: Name, data: Awaited>, format: Format, ...additional: AdditionalInput): Promise { - const base = STEPS[step].printer - const printer = base[format as keyof typeof base] as IStepPrinter, Format, AdditionalInput> | undefined - guard(printer !== undefined, `printer for ${step} does not support ${String(format)}`) +>(step: Step, data: Awaited>, format: Format, ...additional: AdditionalInput): Promise { + const printer = step.printer[format] as IStepPrinter | undefined + guard(printer !== undefined, `printer for ${step.name} does not support ${String(format)}`) return printer(data, ...additional) as Promise } diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index 7c99a23e0a..30831695a6 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -66,10 +66,10 @@ export interface IStep< * How to visualize the results of the respective step to the user? */ readonly printer: { - [K in StepOutputFormat]?: Readonly> + [K in StepOutputFormat]?: IStepPrinter } & { // we always want to have the internal printer - [StepOutputFormat.Internal]: Readonly> + [StepOutputFormat.Internal]: InternalStepPrinter } } diff --git a/src/core/steps/steps.ts b/src/core/steps/steps.ts index 250732c514..ecb4b99cfc 100644 --- a/src/core/steps/steps.ts +++ b/src/core/steps/steps.ts @@ -48,7 +48,7 @@ export const STEPS_PER_FILE = { [StepOutputFormat.RdfQuads]: parseToQuads }, dependencies: [] - } satisfies IStep, + } satisfies IStep<'parse', typeof retrieveXmlFromRCode>, 'normalize': { name: 'normalize', description: 'Normalize the AST to flowR\'s AST (first step of the normalization)', @@ -62,7 +62,7 @@ export const STEPS_PER_FILE = { [StepOutputFormat.MermaidUrl]: printNormalizedAstToMermaidUrl }, dependencies: [] - } satisfies IStep, + } satisfies IStep<'normalize', typeof normalize>, 'dataflow': { name: 'dataflow', description: 'Construct the dataflow graph', @@ -76,7 +76,7 @@ export const STEPS_PER_FILE = { [StepOutputFormat.MermaidUrl]: dataflowGraphToMermaidUrl }, dependencies: [] - } satisfies IStep + } satisfies IStep<'dataflow', typeof produceDataFlowGraph> } as const export const STEPS_PER_SLICE = { @@ -89,7 +89,7 @@ export const STEPS_PER_SLICE = { [StepOutputFormat.Internal]: internalPrinter }, dependencies: [ ] - } satisfies IStep, + } satisfies IStep<'slice', typeof staticSlicing>, 'reconstruct': { name: 'reconstruct', description: 'Reconstruct R code from the static slice', @@ -99,7 +99,7 @@ export const STEPS_PER_SLICE = { [StepOutputFormat.Internal]: internalPrinter }, dependencies: [ ] - } satisfies IStep + } satisfies IStep<'reconstruct', typeof reconstructToCode> } as const export const STEPS = { ...STEPS_PER_FILE, ...STEPS_PER_SLICE } as const From 03480c573948b9d816d19f9757da8131490c581e Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 17:09:34 +0100 Subject: [PATCH 038/104] wip(pipeline): takeover to port in- and output --- src/core/steps/step.ts | 2 +- src/core/steps/steps-provider.ts | 13 ------------- 2 files changed, 1 insertion(+), 14 deletions(-) delete mode 100644 src/core/steps/steps-provider.ts diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index 30831695a6..d787c51e2b 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -18,7 +18,7 @@ export type StepFunction = (...args: never[]) => unknown */ export type StepRequired = 'once-per-file' | 'once-per-slice' - +// TODO: rename to StepName export type NameOfStep = string & { __brand?: 'StepName' } /** diff --git a/src/core/steps/steps-provider.ts b/src/core/steps/steps-provider.ts deleted file mode 100644 index 6f189e481e..0000000000 --- a/src/core/steps/steps-provider.ts +++ /dev/null @@ -1,13 +0,0 @@ -/** - * Defines a factor interface which allows to retrieve steps based on a configuration. - * It extends on the single steps provided by flowr, with the hopes of keeping the interface the same. - * - * @module - */ -import { MergeableRecord } from '../../util/objects' - - -export interface StepsConfiguration extends MergeableRecord { - readonly name: string -} - From b3b5d25df9440e6fca3e24c3f00a9efbf739ad4a Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 18:26:39 +0100 Subject: [PATCH 039/104] test-fix(pipeline): remove example --- test/functionality/pipelines/create/dependency-check-tests.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/test/functionality/pipelines/create/dependency-check-tests.ts b/test/functionality/pipelines/create/dependency-check-tests.ts index 3ba124caa4..10479e513d 100644 --- a/test/functionality/pipelines/create/dependency-check-tests.ts +++ b/test/functionality/pipelines/create/dependency-check-tests.ts @@ -74,7 +74,6 @@ describe('dependency check', () => { } }) } - const pipeline = createPipeline(PARSE_WITH_R_SHELL_STEP, STATIC_SLICE) describe('without decorators', () => { positive('should work on a single step', [PARSE_WITH_R_SHELL_STEP], ['parse']) From b68898210ff796c27f6ad93263ebd3a409a33faf Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 19:55:50 +0100 Subject: [PATCH 040/104] feat(steps): add required input per step --- src/core/steps/all/00-parse.ts | 10 ++++++++-- src/core/steps/all/10-normalize.ts | 12 +++++++++--- src/core/steps/all/20-dataflow.ts | 1 - src/core/steps/all/30-slice.ts | 8 ++++++-- src/core/steps/all/40-reconstruct.ts | 8 ++++++-- src/core/steps/pipeline/pipeline.ts | 1 - src/core/steps/step.ts | 7 +++++++ 7 files changed, 36 insertions(+), 11 deletions(-) diff --git a/src/core/steps/all/00-parse.ts b/src/core/steps/all/00-parse.ts index 1a83075517..4fc4d4c7ee 100644 --- a/src/core/steps/all/00-parse.ts +++ b/src/core/steps/all/00-parse.ts @@ -1,7 +1,7 @@ import { internalPrinter, StepOutputFormat } from '../../print/print' import { parseToQuads } from '../../print/parse-printer' import { IStep } from '../step' -import { retrieveXmlFromRCode } from '../../../r-bridge' +import { retrieveXmlFromRCode, RParseRequest, RShell } from '../../../r-bridge' import { DeepReadonly } from 'ts-essentials' @@ -15,5 +15,11 @@ export const PARSE_WITH_R_SHELL_STEP = { [StepOutputFormat.Json]: text => text, [StepOutputFormat.RdfQuads]: parseToQuads }, - dependencies: [] + dependencies: [], + requiredInput: { + /** This is the {@link RShell} connection to be used to obtain the original parses AST of the R code */ + shell: undefined as unknown as RShell, + /** The request which essentially indicates the input to extract the AST from */ + request: undefined as unknown as RParseRequest + } } as const satisfies DeepReadonly> diff --git a/src/core/steps/all/10-normalize.ts b/src/core/steps/all/10-normalize.ts index 6410c89a98..34b7d6485d 100644 --- a/src/core/steps/all/10-normalize.ts +++ b/src/core/steps/all/10-normalize.ts @@ -1,4 +1,4 @@ -import { normalize, retrieveXmlFromRCode } from '../../../r-bridge' +import { IdGenerator, NoInfo, normalize, XmlParserHooks } from '../../../r-bridge' import { internalPrinter, StepOutputFormat } from '../../print/print' import { normalizedAstToJson, @@ -7,7 +7,7 @@ import { printNormalizedAstToMermaidUrl } from '../../print/normalize-printer' import { IStep } from '../step' -import { DeepReadonly } from 'ts-essentials' +import { DeepPartial, DeepReadonly } from 'ts-essentials' export const NORMALIZE = { name: 'normalize', @@ -21,5 +21,11 @@ export const NORMALIZE = { [StepOutputFormat.Mermaid]: printNormalizedAstToMermaid, [StepOutputFormat.MermaidUrl]: printNormalizedAstToMermaidUrl }, - dependencies: [ 'parse' ] + dependencies: [ 'parse' ], + requiredInput: { + /** These hooks only make sense if you at least want to normalize the parsed R AST. They can augment the normalization process */ + hooks: undefined as unknown as DeepPartial, + /** This id generator is only necessary if you want to retrieve a dataflow from the parsed R AST, it determines the id generator to use and by default uses the {@link deterministicCountingIdGenerator}*/ + getId: undefined as unknown as IdGenerator + } } as const satisfies DeepReadonly> diff --git a/src/core/steps/all/20-dataflow.ts b/src/core/steps/all/20-dataflow.ts index f42ea1fd44..21415d2d56 100644 --- a/src/core/steps/all/20-dataflow.ts +++ b/src/core/steps/all/20-dataflow.ts @@ -8,7 +8,6 @@ import { dataflowGraphToQuads } from '../../print/dataflow-printer' import { DeepReadonly } from 'ts-essentials' -import { normalize } from '../../../r-bridge' export const LEGACY_STATIC_DATAFLOW = { name: 'dataflow', diff --git a/src/core/steps/all/30-slice.ts b/src/core/steps/all/30-slice.ts index 4f9310b7e4..a372764709 100644 --- a/src/core/steps/all/30-slice.ts +++ b/src/core/steps/all/30-slice.ts @@ -1,6 +1,6 @@ import { internalPrinter, StepOutputFormat } from '../../print/print' import { IStep } from '../step' -import { staticSlicing } from '../../../slicing' +import { SlicingCriteria, staticSlicing } from '../../../slicing' import { DeepReadonly } from 'ts-essentials' export const STATIC_SLICE = { @@ -11,5 +11,9 @@ export const STATIC_SLICE = { printer: { [StepOutputFormat.Internal]: internalPrinter }, - dependencies: [ 'dataflow' ] + dependencies: [ 'dataflow' ], + requiredInput: { + /** The slicing criterion is only of interest if you actually want to slice the R code */ + criterion: undefined as unknown as SlicingCriteria + } } as const satisfies DeepReadonly> diff --git a/src/core/steps/all/40-reconstruct.ts b/src/core/steps/all/40-reconstruct.ts index c200b3a469..3b39c485d5 100644 --- a/src/core/steps/all/40-reconstruct.ts +++ b/src/core/steps/all/40-reconstruct.ts @@ -1,6 +1,6 @@ import { internalPrinter, StepOutputFormat } from '../../print/print' import { IStep } from '../step' -import { reconstructToCode } from '../../../slicing' +import { AutoSelectPredicate, reconstructToCode } from '../../../slicing' import { DeepReadonly } from 'ts-essentials' export const NAIVE_RECONSTRUCT = { @@ -11,5 +11,9 @@ export const NAIVE_RECONSTRUCT = { printer: { [StepOutputFormat.Internal]: internalPrinter }, - dependencies: [ 'slice' ] + dependencies: [ 'slice' ], + requiredInput: { + /** If you want to auto-select something in the reconstruction add it here, otherwise, it will use the default defined alongside {@link reconstructToCode}*/ + autoSelectIf: undefined as unknown as AutoSelectPredicate + } } as const satisfies DeepReadonly> diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index 16385242ec..634da2dfff 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -1,6 +1,5 @@ import { IStep, NameOfStep } from '../step' import { verifyAndBuildPipeline } from './dependency-checker' -import { StepName } from '../steps' /** * A pipeline is a collection of {@link Pipeline#steps|steps} that are executed in a certain {@link Pipeline#order|order}. diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index d787c51e2b..7266f6b718 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -71,6 +71,13 @@ export interface IStep< // we always want to have the internal printer [StepOutputFormat.Internal]: InternalStepPrinter } + /** + * Input configuration required to perform the respective steps. + * Required inputs of dependencies do not have to be repeated. + *

+ * Use the pattern `undefined as unknown as T` to indicate that the value is required but not provided. + */ + readonly requiredInput?: Record } From 024aebc5481ec83305f6344fb4f80502f46b9300 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 20:06:46 +0100 Subject: [PATCH 041/104] feat(pipeline): collect pipeline output --- src/core/steps/pipeline/pipeline.ts | 2 ++ test/functionality/pipelines/create/dependency-check-tests.ts | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index 634da2dfff..2f37580f6e 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -27,6 +27,8 @@ export type PipelineStepProcessorWithName

= PipelineStepWithName['printer'] export type PipelineStepResultWithName

= Awaited>> +export type PipelineInput

= PipelineStep

['requiredInput'] + /** * Creates a pipeline from the given steps. * Refer to {@link verifyAndBuildPipeline} for details and constraints on the steps. diff --git a/test/functionality/pipelines/create/dependency-check-tests.ts b/test/functionality/pipelines/create/dependency-check-tests.ts index 10479e513d..96687142ea 100644 --- a/test/functionality/pipelines/create/dependency-check-tests.ts +++ b/test/functionality/pipelines/create/dependency-check-tests.ts @@ -1,4 +1,4 @@ -import { createPipeline, Pipeline, PipelineStepNames } from '../../../../src/core/steps/pipeline' +import { createPipeline, Pipeline, PipelineInput, PipelineStepNames } from '../../../../src/core/steps/pipeline' import { IStep, NameOfStep } from '../../../../src/core/steps' import { expect } from 'chai' import { PARSE_WITH_R_SHELL_STEP } from '../../../../src/core/steps/all/00-parse' From f03acfefe4746ee3dec53c9694cd5df27235ae08 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 20:16:36 +0100 Subject: [PATCH 042/104] refactor(pipeline): propose pipeline output --- src/core/steps/pipeline/pipeline.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index 2f37580f6e..e9d74f8d52 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -28,6 +28,9 @@ export type PipelineStepPrintersWithName

= Awaited>> export type PipelineInput

= PipelineStep

['requiredInput'] +export type PipelineOutput

= { + [K in PipelineStepNames

]: PipelineStepResultWithName +} /** * Creates a pipeline from the given steps. From beaa75b4853a6dc933850899cba4172bcf5ec72f Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 20:38:20 +0100 Subject: [PATCH 043/104] refactor(phases): now talk about when a step is to be executed --- src/core/pipeline-executor.ts | 254 +++++++++++++++++++++++++++ src/core/steps/all/00-parse.ts | 4 +- src/core/steps/all/10-normalize.ts | 4 +- src/core/steps/all/20-dataflow.ts | 4 +- src/core/steps/all/30-slice.ts | 4 +- src/core/steps/all/40-reconstruct.ts | 4 +- src/core/steps/step.ts | 10 +- src/core/steps/steps.ts | 12 +- 8 files changed, 278 insertions(+), 18 deletions(-) create mode 100644 src/core/pipeline-executor.ts diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts new file mode 100644 index 0000000000..c09547cb62 --- /dev/null +++ b/src/core/pipeline-executor.ts @@ -0,0 +1,254 @@ +/* +import { + NormalizedAst, + NoInfo, +} from '../r-bridge' +import { + executeSingleSubStep, LAST_PER_FILE_STEP, LAST_STEP, + StepRequired, STEPS, + STEPS_PER_FILE, + STEPS_PER_SLICE, + SteppingSlicerInput, + StepResults, + StepResult, StepName +} from './steps' +import { guard } from '../util/assert' +import { SliceResult, SlicingCriteria } from '../slicing' +import { DeepPartial } from 'ts-essentials' +import { DataflowInformation } from '../dataflow/internal/info' +import { Pipeline, PipelineInput, PipelineOutput } from './steps/pipeline' + +/!** + * TODO: This is ultimately the root of flowR's static slicing procedure. + * It clearly defines the steps that are to be executed and splits them into two stages. + * - `once-per-file`: for steps that are executed once per file. These can be performed *without* the knowledge of a slicing criteria, + * and they can be cached and re-used if you want to slice the same file multiple times. + * - `once-per-slice`: for steps that are executed once per slice. These can only be performed *with* a slicing criteria. + * + * Furthermore, this stepper follows an iterable fashion to be *as flexible as possible* (e.g., to be instrumented with measurements). + * So, you can use the stepping slicer like this: + * + * ```ts + * const slicer = new SteppingSlicer({ ... }) + * while(slicer.hasNextStep()) { + * await slicer.nextStep() + * } + * + * slicer.switchToSliceStage() + * + * while(slicer.hasNextStep()) { + * await slicer.nextStep() + * } + * + * const result = slicer.getResults() + * ``` + * + * Of course, you might think, that this is rather overkill if you simply want to receive the slice of a given input source or in general + * the result of any step. And this is true. Therefore, if you do not want to perform some kind of magic in-between steps, you can use the + * **{@link allRemainingSteps}** function like this: + * + * ```ts + * const slicer = new SteppingSlicer({ ... }) + * const result = await slicer.allRemainingSteps() + * ``` + * + * As the name suggest, you can combine this name with previous calls to {@link nextStep} to only execute the remaining steps. + * + * Giving the **step of interest** allows you to declare the maximum step to execute. + * So, if you pass `dataflow` as the step of interest, the stepping slicer will stop after the dataflow step. + * If you do not pass a step, the stepping slicer will execute all steps. + * + * By default, the {@link PipelineExecutor} does not offer an automatic way to repeat the per-slice steps for multiple slices (this is mostly to prevent accidental errors). + * However, you can use the **{@link updateCriterion}** function to reset the per-slice steps and re-execute them for a new slice. This allows something like the following: + * + * ```ts + * const slicer = new SteppingSlicer({ ... }) + * const result = await slicer.allRemainingSteps() + * + * slicer.updateCriterion(...) + * const result2 = await slicer.allRemainingSteps() + * ``` + * + * @note Even though, using the stepping slicer introduces some performance overhead, we consider + * it to be the baseline for performance benchmarking. It may very well be possible to squeeze out some more performance by + * directly constructing the steps in the right order. However, we consider this to be negligible when compared with the time required + * for, for example, the dataflow analysis. + * + * @see retrieveResultOfStep + * @see PipelineExecutor#doNextStep + * @see StepName + *!/ +export class PipelineExecutor

{ + // TODO: handle per-file and per-slice + + private readonly input: PipelineInput

+ private output = {} as PipelineOutput

+ + private stepCounter = 0 + + /!** + * Create a new stepping slicer. For more details on the arguments please see {@link SteppingSlicerInput}. + *!/ + constructor(input: PipelineInput

) { + this.input = input + } + + /!** + * Retrieve the current stage the stepping slicer is in. + * @see StepRequired + * @see switchToSliceStage + *!/ + public getCurrentStage(): StepRequired { + return this.stage + } + + /!** + * Switch to the next stage of the stepping slicer. + * @see PipelineExecutor + * @see getCurrentStage + *!/ + public switchToSliceStage(): void { + guard(this.stepCounter === PipelineExecutor.maximumNumberOfStepsPerFile, 'First need to complete all steps before switching') + guard(this.stage === 'once-per-file', 'Cannot switch to next stage, already in once-per-slice stage') + this.stage = 'once-per-slice' + } + + + public getResults(intermediate?:false): StepResults + public getResults(intermediate: true): Partial> + /!** + * Returns the result of the step of interest, as well as the results of all steps before it. + * + * @param intermediate - normally you can only receive the results *after* the stepper completed the step of interested. + * However, if you pass `true` to this parameter, you can also receive the results *before* the step of interest, + * although the typing system then can not guarantee which of the steps have already happened. + *!/ + public getResults(intermediate = false): StepResults | Partial> { + guard(intermediate || this.reachedWanted, 'Before reading the results, we need to reach the step we are interested in') + return this.results as StepResults + } + + /!** + * Returns true only if 1) there are more steps to-do for the current stage and 2) we have not yet reached the step we are interested in + *!/ + public hasNextStep(): boolean { + return !this.reachedWanted && (this.stage === 'once-per-file' ? + this.stepCounter < PipelineExecutor.maximumNumberOfStepsPerFile + : this.stepCounter < PipelineExecutor.maximumNumberOfStepsPerSlice + ) + } + + /!** + * Execute the next step (guarded with {@link hasNextStep}) and return the name of the step that was executed, so you can guard if the step differs from what you are interested in. + * Furthermore, it returns the step's result. + * + * The `step` parameter is a safeguard if you want to retrieve the result. + * If given, it causes the execution to fail if the next step is not the one you expect. + * *Without step, please refrain from accessing the result.* + *!/ + public async nextStep(expectedStepName?: PassedName): Promise<{ + name: typeof expectedStepName extends undefined ? StepName : PassedName + result: typeof expectedStepName extends undefined ? unknown : StepResult> + }> { + guard(this.hasNextStep(), 'No more steps to do') + + const guardStep = this.getGuardStep(expectedStepName) + + const { step, result } = await this.doNextStep(guardStep) + + this.results[step] = result + this.stepCounter += 1 + if(this.stepOfInterest === step) { + this.reachedWanted = true + } + + return { name: step as PassedName, result: result as StepResult } + } + + private getGuardStep(expectedStepName: StepName | undefined) { + return expectedStepName === undefined ? + (name: K): K => name + : + (name: K): K => { + guard(expectedStepName === name, `Expected step ${expectedStepName} but got ${name}`) + return name + } + } + + private async doNextStep(guardStep: (name: K) => K) { + let step: StepName + let result: unknown + + switch(this.stepCounter) { + case 0: + step = guardStep('parse') + result = await executeSingleSubStep(step, this.request, this.shell) + break + case 1: + step = guardStep('normalize') + result = await executeSingleSubStep(step, this.results.parse as string, await this.shell.tokenMap(), this.hooks, this.getId) + break + case 2: + step = guardStep('dataflow') + result = executeSingleSubStep(step, this.results.normalize as NormalizedAst) + break + case 3: + guard(this.criterion !== undefined, 'Cannot decode criteria without a criterion') + step = guardStep('slice') + result = executeSingleSubStep(step, (this.results.dataflow as DataflowInformation).graph, this.results.normalize as NormalizedAst, this.criterion) + break + case 4: + step = guardStep('reconstruct') + result = executeSingleSubStep(step, this.results.normalize as NormalizedAst, (this.results.slice as SliceResult).result) + break + default: + throw new Error(`Unknown step ${this.stepCounter}, reaching this should not happen!`) + } + return { step, result } + } + + /!** + * This only makes sense if you have already sliced a file (e.g., by running up to the `slice` step) and want to do so again while caching the results. + * Or if for whatever reason you did not pass a criterion with the constructor. + * + * @param newCriterion - the new slicing criterion to use for the next slice + *!/ + public updateCriterion(newCriterion: SlicingCriteria): void { + guard(this.stepCounter >= PipelineExecutor.maximumNumberOfStepsPerFile , 'Cannot reset slice prior to once-per-slice stage') + this.criterion = newCriterion + this.stepCounter = PipelineExecutor.maximumNumberOfStepsPerFile + this.results.slice = undefined + this.results.reconstruct = undefined + if(this.stepOfInterest === 'slice' || this.stepOfInterest === 'reconstruct') { + this.reachedWanted = false + } + } + + public async allRemainingSteps(canSwitchStage: false): Promise>> + public async allRemainingSteps(canSwitchStage?: true): Promise> + /!** + * Execute all remaining steps and automatically call {@link switchToSliceStage} if necessary. + * @param canSwitchStage - if true, automatically switch to the slice stage if necessary + * (i.e., this is what you want if you have never executed {@link nextStep} and you want to execute *all* steps). + * However, passing false allows you to only execute the steps of the 'once-per-file' stage (i.e., the steps that can be cached). + * + * @note There is a small type difference if you pass 'false' and already have manually switched to the 'once-per-slice' stage. + * Because now, the results of these steps are no longer part of the result type (although they are still included). + * In such a case, you may be better off with simply passing 'true' as the function will detect that the stage is already switched. + * We could solve this type problem by separating the SteppingSlicer class into two for each stage, but this would break the improved readability and unified handling + * of the slicer that I wanted to achieve with this class. + *!/ + public async allRemainingSteps(canSwitchStage = true): Promise | Partial>> { + while(this.hasNextStep()) { + await this.nextStep() + } + if(canSwitchStage && !this.reachedWanted && this.stage === 'once-per-file') { + this.switchToSliceStage() + while(this.hasNextStep()) { + await this.nextStep() + } + } + return this.reachedWanted ? this.getResults() : this.getResults(true) + } +} +*/ diff --git a/src/core/steps/all/00-parse.ts b/src/core/steps/all/00-parse.ts index 4fc4d4c7ee..25716dece9 100644 --- a/src/core/steps/all/00-parse.ts +++ b/src/core/steps/all/00-parse.ts @@ -1,6 +1,6 @@ import { internalPrinter, StepOutputFormat } from '../../print/print' import { parseToQuads } from '../../print/parse-printer' -import { IStep } from '../step' +import { IStep, StepHasToBeExecuted } from '../step' import { retrieveXmlFromRCode, RParseRequest, RShell } from '../../../r-bridge' import { DeepReadonly } from 'ts-essentials' @@ -9,7 +9,7 @@ export const PARSE_WITH_R_SHELL_STEP = { name: 'parse', description: 'Parse the given R code into an AST', processor: retrieveXmlFromRCode, - required: 'once-per-file', + executed: StepHasToBeExecuted.OncePerFile, printer: { [StepOutputFormat.Internal]: internalPrinter, [StepOutputFormat.Json]: text => text, diff --git a/src/core/steps/all/10-normalize.ts b/src/core/steps/all/10-normalize.ts index 34b7d6485d..d852e579ba 100644 --- a/src/core/steps/all/10-normalize.ts +++ b/src/core/steps/all/10-normalize.ts @@ -6,14 +6,14 @@ import { printNormalizedAstToMermaid, printNormalizedAstToMermaidUrl } from '../../print/normalize-printer' -import { IStep } from '../step' +import { IStep, StepHasToBeExecuted } from '../step' import { DeepPartial, DeepReadonly } from 'ts-essentials' export const NORMALIZE = { name: 'normalize', description: 'Normalize the AST to flowR\'s AST (first step of the normalization)', processor: normalize, - required: 'once-per-file', + executed: StepHasToBeExecuted.OncePerFile, printer: { [StepOutputFormat.Internal]: internalPrinter, [StepOutputFormat.Json]: normalizedAstToJson, diff --git a/src/core/steps/all/20-dataflow.ts b/src/core/steps/all/20-dataflow.ts index 21415d2d56..f840bcc491 100644 --- a/src/core/steps/all/20-dataflow.ts +++ b/src/core/steps/all/20-dataflow.ts @@ -1,5 +1,5 @@ import { internalPrinter, StepOutputFormat } from '../../print/print' -import { IStep } from '../step' +import { IStep, StepHasToBeExecuted } from '../step' import { produceDataFlowGraph } from '../../../dataflow' import { dataflowGraphToJson, @@ -13,7 +13,7 @@ export const LEGACY_STATIC_DATAFLOW = { name: 'dataflow', description: 'Construct the dataflow graph', processor: produceDataFlowGraph, - required: 'once-per-file', + executed: StepHasToBeExecuted.OncePerFile, printer: { [StepOutputFormat.Internal]: internalPrinter, [StepOutputFormat.Json]: dataflowGraphToJson, diff --git a/src/core/steps/all/30-slice.ts b/src/core/steps/all/30-slice.ts index a372764709..bf181b7614 100644 --- a/src/core/steps/all/30-slice.ts +++ b/src/core/steps/all/30-slice.ts @@ -1,5 +1,5 @@ import { internalPrinter, StepOutputFormat } from '../../print/print' -import { IStep } from '../step' +import { IStep, StepHasToBeExecuted } from '../step' import { SlicingCriteria, staticSlicing } from '../../../slicing' import { DeepReadonly } from 'ts-essentials' @@ -7,7 +7,7 @@ export const STATIC_SLICE = { name: 'slice', description: 'Calculate the actual static slice from the dataflow graph and the given slicing criteria', processor: staticSlicing, - required: 'once-per-slice', + executed: StepHasToBeExecuted.OncePerRequest, printer: { [StepOutputFormat.Internal]: internalPrinter }, diff --git a/src/core/steps/all/40-reconstruct.ts b/src/core/steps/all/40-reconstruct.ts index 3b39c485d5..8c31049fa7 100644 --- a/src/core/steps/all/40-reconstruct.ts +++ b/src/core/steps/all/40-reconstruct.ts @@ -1,5 +1,5 @@ import { internalPrinter, StepOutputFormat } from '../../print/print' -import { IStep } from '../step' +import { IStep, StepHasToBeExecuted } from '../step' import { AutoSelectPredicate, reconstructToCode } from '../../../slicing' import { DeepReadonly } from 'ts-essentials' @@ -7,7 +7,7 @@ export const NAIVE_RECONSTRUCT = { name: 'reconstruct', description: 'Reconstruct R code from the static slice', processor: reconstructToCode, - required: 'once-per-slice', + executed: StepHasToBeExecuted.OncePerRequest, printer: { [StepOutputFormat.Internal]: internalPrinter }, diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index 7266f6b718..cd94abd23f 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -16,6 +16,12 @@ export type StepFunction = (...args: never[]) => unknown /** * This represents the required execution frequency of a step. */ +export const enum StepHasToBeExecuted { + /** This step has to be executed once per file */ + OncePerFile, + /** This step has to be executed once per request (e.g., slice for a given variable) */ + OncePerRequest +} export type StepRequired = 'once-per-file' | 'once-per-slice' // TODO: rename to StepName @@ -60,8 +66,8 @@ export interface IStep< readonly description: string /** The main processor that essentially performs the logic of this step */ readonly processor: (...input: Parameters) => ReturnType - /* does this step has to be repeated for each new slice or can it be performed only once in the initialization */ - readonly required: StepRequired + /* does this step has to be repeated for each new request or can it be performed only once in the initialization */ + readonly executed: StepHasToBeExecuted /** * How to visualize the results of the respective step to the user? */ diff --git a/src/core/steps/steps.ts b/src/core/steps/steps.ts index ecb4b99cfc..0dcff8b176 100644 --- a/src/core/steps/steps.ts +++ b/src/core/steps/steps.ts @@ -33,7 +33,7 @@ import { dataflowGraphToQuads } from '../print/dataflow-printer' import { parseToQuads } from '../print/parse-printer' -import { IStep } from './step' +import { IStep, StepHasToBeExecuted } from './step' export const STEPS_PER_FILE = { @@ -41,7 +41,7 @@ export const STEPS_PER_FILE = { name: 'parse', description: 'Parse the given R code into an AST', processor: retrieveXmlFromRCode, - required: 'once-per-file', + executed: StepHasToBeExecuted.OncePerFile, printer: { [StepOutputFormat.Internal]: internalPrinter, [StepOutputFormat.Json]: text => text, @@ -53,7 +53,7 @@ export const STEPS_PER_FILE = { name: 'normalize', description: 'Normalize the AST to flowR\'s AST (first step of the normalization)', processor: normalize, - required: 'once-per-file', + executed: StepHasToBeExecuted.OncePerFile, printer: { [StepOutputFormat.Internal]: internalPrinter, [StepOutputFormat.Json]: normalizedAstToJson, @@ -67,7 +67,7 @@ export const STEPS_PER_FILE = { name: 'dataflow', description: 'Construct the dataflow graph', processor: produceDataFlowGraph, - required: 'once-per-file', + executed: StepHasToBeExecuted.OncePerFile, printer: { [StepOutputFormat.Internal]: internalPrinter, [StepOutputFormat.Json]: dataflowGraphToJson, @@ -84,7 +84,7 @@ export const STEPS_PER_SLICE = { name: 'slice', description: 'Calculate the actual static slice from the dataflow graph and the given slicing criteria', processor: staticSlicing, - required: 'once-per-slice', + executed: StepHasToBeExecuted.OncePerRequest, printer: { [StepOutputFormat.Internal]: internalPrinter }, @@ -94,7 +94,7 @@ export const STEPS_PER_SLICE = { name: 'reconstruct', description: 'Reconstruct R code from the static slice', processor: reconstructToCode, - required: 'once-per-slice', + executed: StepHasToBeExecuted.OncePerRequest, printer: { [StepOutputFormat.Internal]: internalPrinter }, From 96d58de7489b8350af848e9d472de0896eaa4196 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 20:41:50 +0100 Subject: [PATCH 044/104] refactor(pipeline): rename pipeline creation file from dependency check to `create-tests` --- src/core/steps/pipeline/{dependency-checker.ts => create.ts} | 0 src/core/steps/pipeline/pipeline.ts | 2 +- .../create/{dependency-check-tests.ts => create-tests.ts} | 4 ++-- 3 files changed, 3 insertions(+), 3 deletions(-) rename src/core/steps/pipeline/{dependency-checker.ts => create.ts} (100%) rename test/functionality/pipelines/create/{dependency-check-tests.ts => create-tests.ts} (97%) diff --git a/src/core/steps/pipeline/dependency-checker.ts b/src/core/steps/pipeline/create.ts similarity index 100% rename from src/core/steps/pipeline/dependency-checker.ts rename to src/core/steps/pipeline/create.ts diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index e9d74f8d52..17ae879e50 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -1,5 +1,5 @@ import { IStep, NameOfStep } from '../step' -import { verifyAndBuildPipeline } from './dependency-checker' +import { verifyAndBuildPipeline } from './create' /** * A pipeline is a collection of {@link Pipeline#steps|steps} that are executed in a certain {@link Pipeline#order|order}. diff --git a/test/functionality/pipelines/create/dependency-check-tests.ts b/test/functionality/pipelines/create/create-tests.ts similarity index 97% rename from test/functionality/pipelines/create/dependency-check-tests.ts rename to test/functionality/pipelines/create/create-tests.ts index 96687142ea..460bd1c0ac 100644 --- a/test/functionality/pipelines/create/dependency-check-tests.ts +++ b/test/functionality/pipelines/create/create-tests.ts @@ -1,4 +1,4 @@ -import { createPipeline, Pipeline, PipelineInput, PipelineStepNames } from '../../../../src/core/steps/pipeline' +import { createPipeline } from '../../../../src/core/steps/pipeline' import { IStep, NameOfStep } from '../../../../src/core/steps' import { expect } from 'chai' import { PARSE_WITH_R_SHELL_STEP } from '../../../../src/core/steps/all/00-parse' @@ -8,7 +8,7 @@ import { LEGACY_STATIC_DATAFLOW } from '../../../../src/core/steps/all/20-datafl import { STATIC_SLICE } from '../../../../src/core/steps/all/30-slice' import { NAIVE_RECONSTRUCT } from '../../../../src/core/steps/all/40-reconstruct' -describe('dependency check', () => { +describe('Create Pipeline (includes dependency checks)', () => { describe('error-cases', () => { function negative(name: string, rawSteps: IStep[], message: string | RegExp) { it(`${name} (all permutations)`, () => { From 0e8f2cc18aaf195a1722c21c4d8da6e158f47d51 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 21:24:34 +0100 Subject: [PATCH 045/104] refactor, wip(pipeline): working on execution-state aware pipeline creation --- src/core/steps/pipeline/create.ts | 57 +++++++++++++------ src/core/steps/pipeline/pipeline.ts | 12 ++-- src/core/steps/step.ts | 4 +- .../pipelines/create/create-tests.ts | 3 +- 4 files changed, 51 insertions(+), 25 deletions(-) diff --git a/src/core/steps/pipeline/create.ts b/src/core/steps/pipeline/create.ts index 13763d5f84..9b55c4966f 100644 --- a/src/core/steps/pipeline/create.ts +++ b/src/core/steps/pipeline/create.ts @@ -1,17 +1,17 @@ -import { IStep, NameOfStep } from '../step' +import { IStep, NameOfStep, StepHasToBeExecuted } from '../step' import { InvalidPipelineError } from './invalid-pipeline-error' import { Pipeline } from './pipeline' import { jsonReplacer } from '../../../util/json' - /** * Given a set of {@link IStep|steps} with their dependencies, this function verifies that * 0) the pipeline is not empty * 1) all names of steps are unique for the given pipeline - * 2) all {@link IStepOrder#dependencies|dependencies} of steps are valid (i.e., refer to existing steps) + * 2) all {@link IStepOrder#dependencies|dependencies} of all steps are exist * 3) there are no cycles in the dependency graph * 4) the target of a {@link IStepOrder#decorates|decoration} exists * 5) if a decoration applies, all of its dependencies are already in the pipeline + * 6) in the resulting pipeline, there is a strict cut between steps that are executed once per file and once per request * If successful, it returns the topologically sorted list of steps in order of desired execution. * @throws InvalidPipelineError if any of the above conditions are not met */ @@ -21,23 +21,37 @@ export function verifyAndBuildPipeline(steps: readonly IStep[]): Pipeline { } // we construct a map linking each name to its respective step - const stepMap = new Map() + const perFileStepMap = new Map() + const perRequestStepMap = new Map() // we track all elements without dependencies, i.e., those that start the pipeline - const inits: NameOfStep[] = [] - initializeSteps(steps, stepMap, inits) + const initsPerFile: NameOfStep[] = [] + const initsPerRequest: NameOfStep[] = [] + initializeSteps(steps, perFileStepMap, perRequestStepMap, initsPerFile, initsPerRequest) + + // first, we sort the per-file steps + const visited = new Set() + const sortedPerFile = topologicalSort(initsPerFile, perFileStepMap, visited) + validateMaps(sortedPerFile, perFileStepMap, steps) + + const allStepsMap = new Map([...perFileStepMap, ...perRequestStepMap]) + const sortedPerRequest = topologicalSort(initsPerRequest, allStepsMap, visited) + console.log(initsPerRequest, 'sortedPerRequest', sortedPerRequest) + validateMaps(sortedPerRequest, perRequestStepMap, steps) + + return { + steps: allStepsMap, + order: [...sortedPerFile, ...sortedPerRequest], + firstStepPerRequest: sortedPerRequest.length === 0 ? undefined : sortedPerFile.length + } +} - const sorted = topologicalSort(inits, stepMap) +function validateMaps(sorted: NameOfStep[], stepMap: Map, steps: readonly IStep[]) { if(sorted.length !== stepMap.size) { // check if any of the dependencies in the map are invalid checkForInvalidDependency(steps, stepMap) // otherwise, we assume a cycle throw new InvalidPipelineError(`3) Pipeline contains at least one cycle; sorted: ${JSON.stringify(sorted)}, steps: ${JSON.stringify([...stepMap.keys()])}`) } - - return { - steps: stepMap, - order: sorted - } } function allDependenciesAreVisited(step: IStep, visited: Set) { @@ -57,9 +71,8 @@ function handleStep(step: IStep, init: NameOfStep, visited: Set, sor } } -function topologicalSort(inits: NameOfStep[], stepMap: Map) { +function topologicalSort(inits: NameOfStep[], stepMap: Map, visited: Set) { const sorted: NameOfStep[] = [] - const visited = new Set() while(inits.length > 0) { const init = inits.pop() as NameOfStep @@ -117,17 +130,25 @@ function checkForInvalidDependency(steps: readonly IStep[], stepMap: Map, inits: NameOfStep[]) { +function initializeSteps(steps: readonly IStep[], stepMap: Map, perRequest: Map, initsPerFile: NameOfStep[], initsPerRequest: NameOfStep[]) { for(const step of steps) { const name = step.name // if the name is already in the map we have a duplicate - if(stepMap.has(name)) { + if(perFile.has(name) || perRequest.has(name)) { throw new InvalidPipelineError(`1) Step name "${name}" is not unique in the pipeline`) } - stepMap.set(name, step) + if(step.executed === StepHasToBeExecuted.OncePerFile) { + perFile.set(name, step) + } else { + perRequest.set(name, step) + } // only steps that have no dependencies and do not decorate others can be initial steps if(step.dependencies.length === 0 && step.decorates === undefined) { - inits.push(name) + if(step.executed === StepHasToBeExecuted.OncePerFile) { + initsPerFile.push(name) + } else { + initsPerRequest.push(name) + } } } } diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index 17ae879e50..801d33fd24 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -6,12 +6,16 @@ import { verifyAndBuildPipeline } from './create' * It is to be created {@link createPipeline}. * * If you want to get the type of all steps in the pipeline (given they are created canonically using const step names), refer to {@link PipelineStepNames}. - * - * TODO: group this for per-file and per-request steps/stages in general with arbitrary names? */ export interface Pipeline { - readonly steps: ReadonlyMap - readonly order: T['name'][] + readonly steps: ReadonlyMap + readonly order: T['name'][] + /** + * In the order, this is the index of the first step that + * is executed {@link StepHasToBeExecuted#OncePerRequest|once per request}. + * If undefined, all steps are executed {@link StepHasToBeExecuted#OncePerFile|once per file}. + */ + readonly firstStepPerRequest: number | undefined } /** diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index cd94abd23f..92218c876c 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -42,6 +42,8 @@ export interface IStepOrder { * Does not have to be transitive, this will be checked by the scheduler of the pipeline. */ readonly dependencies: readonly NameOfStep[] + /* does this step has to be repeated for each new request or can it be performed only once in the initialization */ + readonly executed: StepHasToBeExecuted /** * This is similar to {@link dependencies}, but is used to say that a given step _decorates_ another one. * This imbues two requirements: @@ -66,8 +68,6 @@ export interface IStep< readonly description: string /** The main processor that essentially performs the logic of this step */ readonly processor: (...input: Parameters) => ReturnType - /* does this step has to be repeated for each new request or can it be performed only once in the initialization */ - readonly executed: StepHasToBeExecuted /** * How to visualize the results of the respective step to the user? */ diff --git a/test/functionality/pipelines/create/create-tests.ts b/test/functionality/pipelines/create/create-tests.ts index 460bd1c0ac..0a0cad40cb 100644 --- a/test/functionality/pipelines/create/create-tests.ts +++ b/test/functionality/pipelines/create/create-tests.ts @@ -65,12 +65,13 @@ describe('Create Pipeline (includes dependency checks)', () => { }) }) describe('default behavior', () => { - function positive(name: string, rawSteps: IStep[], expected: NameOfStep[]) { + function positive(name: string, rawSteps: IStep[], expected: NameOfStep[], indexOfFirstPerFile: number | undefined = undefined) { it(`${name} (all permutations)`, () => { for(const steps of allPermutations(rawSteps)) { const pipeline = createPipeline(...steps) expect([...pipeline.steps.keys()]).to.have.members(expected, `should have the correct keys for ${JSON.stringify(steps)}`) expect(pipeline.order).to.have.ordered.members(expected, `should have the correct keys for ${JSON.stringify(steps)}`) + expect(pipeline.firstStepPerRequest).to.equal(indexOfFirstPerFile, `should have the correct firstStepPerRequest for ${JSON.stringify(steps)}`) } }) } From 195077ea103413f2860eaf6daff37b9a1dd856a3 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 21:36:56 +0100 Subject: [PATCH 046/104] feat(pipeline-create): rudimentary support for execution stages --- src/core/steps/pipeline/create.ts | 38 +++++++++---------- src/util/arrays.ts | 17 +++++++++ .../pipelines/create/create-tests.ts | 4 +- 3 files changed, 38 insertions(+), 21 deletions(-) diff --git a/src/core/steps/pipeline/create.ts b/src/core/steps/pipeline/create.ts index 9b55c4966f..f9d328aad4 100644 --- a/src/core/steps/pipeline/create.ts +++ b/src/core/steps/pipeline/create.ts @@ -2,6 +2,7 @@ import { IStep, NameOfStep, StepHasToBeExecuted } from '../step' import { InvalidPipelineError } from './invalid-pipeline-error' import { Pipeline } from './pipeline' import { jsonReplacer } from '../../../util/json' +import { partitionArray } from '../../../util/arrays' /** * Given a set of {@link IStep|steps} with their dependencies, this function verifies that @@ -20,22 +21,29 @@ export function verifyAndBuildPipeline(steps: readonly IStep[]): Pipeline { throw new InvalidPipelineError('0) Pipeline is empty') } + const [perFileSteps, perRequestSteps] = partitionArray(steps, s => s.executed === StepHasToBeExecuted.OncePerFile) + // we construct a map linking each name to its respective step const perFileStepMap = new Map() - const perRequestStepMap = new Map() - // we track all elements without dependencies, i.e., those that start the pipeline const initsPerFile: NameOfStep[] = [] - const initsPerRequest: NameOfStep[] = [] - initializeSteps(steps, perFileStepMap, perRequestStepMap, initsPerFile, initsPerRequest) + const visited = new Set() + // we start by working on the per-file steps + initializeSteps(perFileSteps, perFileStepMap, initsPerFile, visited) // first, we sort the per-file steps - const visited = new Set() const sortedPerFile = topologicalSort(initsPerFile, perFileStepMap, visited) validateMaps(sortedPerFile, perFileStepMap, steps) + const perRequestStepMap = new Map() + // we track all elements without dependencies, i.e., those that start the pipeline + const initsPerRequest: NameOfStep[] = [] + + // now, we do the same for the per-request steps, keeping the per-file steps known + initializeSteps(perRequestSteps, perRequestStepMap, initsPerRequest, visited) + const allStepsMap = new Map([...perFileStepMap, ...perRequestStepMap]) const sortedPerRequest = topologicalSort(initsPerRequest, allStepsMap, visited) - console.log(initsPerRequest, 'sortedPerRequest', sortedPerRequest) + validateMaps(sortedPerRequest, perRequestStepMap, steps) return { @@ -130,25 +138,17 @@ function checkForInvalidDependency(steps: readonly IStep[], stepMap: Map, perRequest: Map, initsPerFile: NameOfStep[], initsPerRequest: NameOfStep[]) { +function initializeSteps(steps: readonly IStep[], stepMap: Map, inits: NameOfStep[], visited: Set) { for(const step of steps) { const name = step.name // if the name is already in the map we have a duplicate - if(perFile.has(name) || perRequest.has(name)) { + if(stepMap.has(name)) { throw new InvalidPipelineError(`1) Step name "${name}" is not unique in the pipeline`) } - if(step.executed === StepHasToBeExecuted.OncePerFile) { - perFile.set(name, step) - } else { - perRequest.set(name, step) - } + stepMap.set(name, step) // only steps that have no dependencies and do not decorate others can be initial steps - if(step.dependencies.length === 0 && step.decorates === undefined) { - if(step.executed === StepHasToBeExecuted.OncePerFile) { - initsPerFile.push(name) - } else { - initsPerRequest.push(name) - } + if(allDependenciesAreVisited(step, visited) && (step.decorates === undefined || visited.has(step.decorates))) { + inits.push(name) } } } diff --git a/src/util/arrays.ts b/src/util/arrays.ts index 59ed4bbd68..56e85287c8 100644 --- a/src/util/arrays.ts +++ b/src/util/arrays.ts @@ -40,6 +40,23 @@ export function splitArrayOn(arr: T[], predicate: (elem: T) => boolean): T[][ return result } +/** + * Returns a tuple of two arrays, where the first one contains all elements for which the predicate returned true, + * and the second one contains all elements for which the predicate returned false. + */ +export function partitionArray(arr: readonly T[], predicate: (elem: T) => boolean): [T[], T[]] { + const left: T[] = [] + const right: T[] = [] + for(const elem of arr) { + if(predicate(elem)) { + left.push(elem) + } else { + right.push(elem) + } + } + return [left, right] +} + /** * Generate all permutations of the given array using Heap's algorithm (with its non-recursive variant). * diff --git a/test/functionality/pipelines/create/create-tests.ts b/test/functionality/pipelines/create/create-tests.ts index 0a0cad40cb..dd2a342223 100644 --- a/test/functionality/pipelines/create/create-tests.ts +++ b/test/functionality/pipelines/create/create-tests.ts @@ -93,7 +93,7 @@ describe('Create Pipeline (includes dependency checks)', () => { LEGACY_STATIC_DATAFLOW, STATIC_SLICE, NAIVE_RECONSTRUCT - ], ['parse', 'normalize', 'dataflow', 'slice', 'reconstruct']) + ], ['parse', 'normalize', 'dataflow', 'slice', 'reconstruct'], 3) }) describe('with decorators', () => { positive('simple decorator on first step', [ @@ -158,7 +158,7 @@ describe('Create Pipeline (includes dependency checks)', () => { }, STATIC_SLICE, NAIVE_RECONSTRUCT - ], ['parse', 'normalize', 'dataflow', 'dataflow-decorator', 'slice', 'reconstruct']) + ], ['parse', 'normalize', 'dataflow', 'dataflow-decorator', 'slice', 'reconstruct'], 4) }) }) }) From bb36ae993253dbfe14e60caba9de6c07870f7b80 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 21:39:52 +0100 Subject: [PATCH 047/104] refactor(pipeline-creation): minor optimizing touches to pipeline optimizations --- src/core/steps/pipeline/create.ts | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/core/steps/pipeline/create.ts b/src/core/steps/pipeline/create.ts index f9d328aad4..1eb7712f9c 100644 --- a/src/core/steps/pipeline/create.ts +++ b/src/core/steps/pipeline/create.ts @@ -32,28 +32,27 @@ export function verifyAndBuildPipeline(steps: readonly IStep[]): Pipeline { initializeSteps(perFileSteps, perFileStepMap, initsPerFile, visited) // first, we sort the per-file steps const sortedPerFile = topologicalSort(initsPerFile, perFileStepMap, visited) - validateMaps(sortedPerFile, perFileStepMap, steps) + validateStepOutput(sortedPerFile, perFileStepMap, steps) - const perRequestStepMap = new Map() + const perRequestStepMap = new Map(perFileStepMap) // we track all elements without dependencies, i.e., those that start the pipeline const initsPerRequest: NameOfStep[] = [] // now, we do the same for the per-request steps, keeping the per-file steps known initializeSteps(perRequestSteps, perRequestStepMap, initsPerRequest, visited) - const allStepsMap = new Map([...perFileStepMap, ...perRequestStepMap]) - const sortedPerRequest = topologicalSort(initsPerRequest, allStepsMap, visited) - - validateMaps(sortedPerRequest, perRequestStepMap, steps) + const sortedPerRequest = topologicalSort(initsPerRequest, perRequestStepMap, visited) + const sorted = [...sortedPerFile, ...sortedPerRequest] + validateStepOutput(sorted, perRequestStepMap, steps) return { - steps: allStepsMap, - order: [...sortedPerFile, ...sortedPerRequest], + steps: perRequestStepMap, + order: sorted, firstStepPerRequest: sortedPerRequest.length === 0 ? undefined : sortedPerFile.length } } -function validateMaps(sorted: NameOfStep[], stepMap: Map, steps: readonly IStep[]) { +function validateStepOutput(sorted: NameOfStep[], stepMap: Map, steps: readonly IStep[]) { if(sorted.length !== stepMap.size) { // check if any of the dependencies in the map are invalid checkForInvalidDependency(steps, stepMap) @@ -62,7 +61,7 @@ function validateMaps(sorted: NameOfStep[], stepMap: Map, ste } } -function allDependenciesAreVisited(step: IStep, visited: Set) { +function allDependenciesAreVisited(step: IStep, visited: ReadonlySet) { return step.dependencies.every(d => visited.has(d)) } @@ -138,7 +137,7 @@ function checkForInvalidDependency(steps: readonly IStep[], stepMap: Map, inits: NameOfStep[], visited: Set) { +function initializeSteps(steps: readonly IStep[], stepMap: Map, inits: NameOfStep[], visited: ReadonlySet) { for(const step of steps) { const name = step.name // if the name is already in the map we have a duplicate From a7908ea20f6d27f1f0284653323c7a2020a6c7d8 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 21:55:33 +0100 Subject: [PATCH 048/104] refactor, wip(pipeline-executor): working on migrating the slicer semantics to the pipeline semantics --- src/core/pipeline-executor.ts | 105 ++++++++++++---------------- src/core/steps/pipeline/pipeline.ts | 2 +- 2 files changed, 47 insertions(+), 60 deletions(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index c09547cb62..6af5bc0f5d 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -1,24 +1,11 @@ -/* -import { - NormalizedAst, - NoInfo, -} from '../r-bridge' -import { - executeSingleSubStep, LAST_PER_FILE_STEP, LAST_STEP, - StepRequired, STEPS, - STEPS_PER_FILE, - STEPS_PER_SLICE, - SteppingSlicerInput, - StepResults, - StepResult, StepName -} from './steps' +import { NoInfo, NormalizedAst } from '../r-bridge' +import { executeSingleSubStep, StepHasToBeExecuted, StepName, StepResult, StepResults, STEPS_PER_SLICE } from './steps' import { guard } from '../util/assert' import { SliceResult, SlicingCriteria } from '../slicing' -import { DeepPartial } from 'ts-essentials' import { DataflowInformation } from '../dataflow/internal/info' import { Pipeline, PipelineInput, PipelineOutput } from './steps/pipeline' -/!** +/** * TODO: This is ultimately the root of flowR's static slicing procedure. * It clearly defines the steps that are to be executed and splits them into two stages. * - `once-per-file`: for steps that are executed once per file. These can be performed *without* the knowledge of a slicing criteria, @@ -77,75 +64,76 @@ import { Pipeline, PipelineInput, PipelineOutput } from './steps/pipeline' * @see retrieveResultOfStep * @see PipelineExecutor#doNextStep * @see StepName - *!/ + */ export class PipelineExecutor

{ - // TODO: handle per-file and per-slice - - private readonly input: PipelineInput

- private output = {} as PipelineOutput

+ private readonly pipeline: P + private readonly input: PipelineInput

+ private output: PipelineOutput

= {} as PipelineOutput

+ private currentExecutionStage = StepHasToBeExecuted.OncePerFile private stepCounter = 0 - /!** + /** * Create a new stepping slicer. For more details on the arguments please see {@link SteppingSlicerInput}. - *!/ - constructor(input: PipelineInput

) { + */ + constructor(pipeline: P, input: PipelineInput

) { + this.pipeline = pipeline this.input = input } - /!** - * Retrieve the current stage the stepping slicer is in. - * @see StepRequired - * @see switchToSliceStage - *!/ - public getCurrentStage(): StepRequired { - return this.stage + /** + * Retrieve the current stage the pipeline executor is in. + * @see currentExecutionStage + * @see switchToRequestStage + */ + public getCurrentStage(): StepHasToBeExecuted { + return this.currentExecutionStage } - /!** + /** * Switch to the next stage of the stepping slicer. * @see PipelineExecutor * @see getCurrentStage - *!/ - public switchToSliceStage(): void { - guard(this.stepCounter === PipelineExecutor.maximumNumberOfStepsPerFile, 'First need to complete all steps before switching') - guard(this.stage === 'once-per-file', 'Cannot switch to next stage, already in once-per-slice stage') - this.stage = 'once-per-slice' + */ + public switchToRequestStage(): void { + guard(this.pipeline.firstStepPerRequest === undefined || this.stepCounter === this.pipeline.firstStepPerRequest, 'First need to complete all steps before switching') + guard(this.currentExecutionStage === StepHasToBeExecuted.OncePerFile, 'Cannot switch to next stage, already in per-request stage.') + this.currentExecutionStage = StepHasToBeExecuted.OncePerRequest } - public getResults(intermediate?:false): StepResults - public getResults(intermediate: true): Partial> - /!** - * Returns the result of the step of interest, as well as the results of all steps before it. + public getResults(intermediate?:false): PipelineOutput

+ public getResults(intermediate: true): Partial> + /** + * Returns the results of the pipeline. * * @param intermediate - normally you can only receive the results *after* the stepper completed the step of interested. - * However, if you pass `true` to this parameter, you can also receive the results *before* the step of interest, + * However, if you pass `true` to this parameter, you can also receive the results *before* the pipeline completed, * although the typing system then can not guarantee which of the steps have already happened. - *!/ - public getResults(intermediate = false): StepResults | Partial> { - guard(intermediate || this.reachedWanted, 'Before reading the results, we need to reach the step we are interested in') - return this.results as StepResults + */ + public getResults(intermediate = false): PipelineOutput

| Partial> { + guard(intermediate || this.stepCounter >= this.pipeline.order.length, 'Without the intermediate flag, the pipeline must be completed before providing access to the results.') + return this.output } - /!** - * Returns true only if 1) there are more steps to-do for the current stage and 2) we have not yet reached the step we are interested in - *!/ + /** + * Returns true only if 1) there are more steps to-do for the current stage and 2) we have not yet reached the end of the pipeline. + */ public hasNextStep(): boolean { - return !this.reachedWanted && (this.stage === 'once-per-file' ? - this.stepCounter < PipelineExecutor.maximumNumberOfStepsPerFile - : this.stepCounter < PipelineExecutor.maximumNumberOfStepsPerSlice + return this.stepCounter < this.pipeline.order.length && ( + this.currentExecutionStage !== StepHasToBeExecuted.OncePerFile || + this.stepCounter < (this.pipeline.firstStepPerRequest ?? this.pipeline.order.length) ) } - /!** + /** * Execute the next step (guarded with {@link hasNextStep}) and return the name of the step that was executed, so you can guard if the step differs from what you are interested in. * Furthermore, it returns the step's result. * * The `step` parameter is a safeguard if you want to retrieve the result. * If given, it causes the execution to fail if the next step is not the one you expect. * *Without step, please refrain from accessing the result.* - *!/ + */ public async nextStep(expectedStepName?: PassedName): Promise<{ name: typeof expectedStepName extends undefined ? StepName : PassedName result: typeof expectedStepName extends undefined ? unknown : StepResult> @@ -207,12 +195,12 @@ export class PipelineExecutor

{ return { step, result } } - /!** + /** * This only makes sense if you have already sliced a file (e.g., by running up to the `slice` step) and want to do so again while caching the results. * Or if for whatever reason you did not pass a criterion with the constructor. * * @param newCriterion - the new slicing criterion to use for the next slice - *!/ + */ public updateCriterion(newCriterion: SlicingCriteria): void { guard(this.stepCounter >= PipelineExecutor.maximumNumberOfStepsPerFile , 'Cannot reset slice prior to once-per-slice stage') this.criterion = newCriterion @@ -226,7 +214,7 @@ export class PipelineExecutor

{ public async allRemainingSteps(canSwitchStage: false): Promise>> public async allRemainingSteps(canSwitchStage?: true): Promise> - /!** + /** * Execute all remaining steps and automatically call {@link switchToSliceStage} if necessary. * @param canSwitchStage - if true, automatically switch to the slice stage if necessary * (i.e., this is what you want if you have never executed {@link nextStep} and you want to execute *all* steps). @@ -237,7 +225,7 @@ export class PipelineExecutor

{ * In such a case, you may be better off with simply passing 'true' as the function will detect that the stage is already switched. * We could solve this type problem by separating the SteppingSlicer class into two for each stage, but this would break the improved readability and unified handling * of the slicer that I wanted to achieve with this class. - *!/ + */ public async allRemainingSteps(canSwitchStage = true): Promise | Partial>> { while(this.hasNextStep()) { await this.nextStep() @@ -251,4 +239,3 @@ export class PipelineExecutor

{ return this.reachedWanted ? this.getResults() : this.getResults(true) } } -*/ diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index 801d33fd24..62b872eb59 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -9,7 +9,7 @@ import { verifyAndBuildPipeline } from './create' */ export interface Pipeline { readonly steps: ReadonlyMap - readonly order: T['name'][] + readonly order: readonly T['name'][] /** * In the order, this is the index of the first step that * is executed {@link StepHasToBeExecuted#OncePerRequest|once per request}. From d10f614dbc83e058b7e9f6ed18c753dba3d27886 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 22:06:19 +0100 Subject: [PATCH 049/104] refactor, wip(pipeline-executor): migrate `nextStep` --- src/core/pipeline-executor.ts | 55 ++++++++++++++++++----------- src/core/steps/pipeline/pipeline.ts | 4 +-- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index 6af5bc0f5d..7f32426476 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -1,9 +1,23 @@ import { NoInfo, NormalizedAst } from '../r-bridge' -import { executeSingleSubStep, StepHasToBeExecuted, StepName, StepResult, StepResults, STEPS_PER_SLICE } from './steps' +import { + executeSingleSubStep, + NameOfStep, + StepHasToBeExecuted, + StepName, + StepResult, + StepResults, + STEPS_PER_SLICE +} from './steps' import { guard } from '../util/assert' import { SliceResult, SlicingCriteria } from '../slicing' import { DataflowInformation } from '../dataflow/internal/info' -import { Pipeline, PipelineInput, PipelineOutput } from './steps/pipeline' +import { + Pipeline, + PipelineInput, + PipelineOutput, + PipelineStepNames, + PipelineStepOutputWithName +} from './steps/pipeline' /** * TODO: This is ultimately the root of flowR's static slicing procedure. @@ -130,41 +144,42 @@ export class PipelineExecutor

{ * Execute the next step (guarded with {@link hasNextStep}) and return the name of the step that was executed, so you can guard if the step differs from what you are interested in. * Furthermore, it returns the step's result. * - * The `step` parameter is a safeguard if you want to retrieve the result. - * If given, it causes the execution to fail if the next step is not the one you expect. - * *Without step, please refrain from accessing the result.* + * @param expectedStepName - A safeguard if you want to retrieve the result. + * If given, it causes the execution to fail if the next step is not the one you expect. + * + * *Without `expectedStepName`, please refrain from accessing the result, as you have no safeguards if the pipeline changes.* */ - public async nextStep(expectedStepName?: PassedName): Promise<{ - name: typeof expectedStepName extends undefined ? StepName : PassedName - result: typeof expectedStepName extends undefined ? unknown : StepResult> + public async nextStep(expectedStepName?: PassedName): Promise<{ + name: typeof expectedStepName extends undefined ? NameOfStep : PassedName + result: typeof expectedStepName extends undefined ? unknown : PipelineStepOutputWithName }> { - guard(this.hasNextStep(), 'No more steps to do') + guard(this.hasNextStep(), 'No more steps to do in the pipeline.') const guardStep = this.getGuardStep(expectedStepName) const { step, result } = await this.doNextStep(guardStep) - this.results[step] = result + this.output[step as PipelineStepNames

] = result this.stepCounter += 1 - if(this.stepOfInterest === step) { - this.reachedWanted = true - } - return { name: step as PassedName, result: result as StepResult } + return { name: step as PassedName, result } } - private getGuardStep(expectedStepName: StepName | undefined) { + private getGuardStep(expectedStepName: NameOfStep | undefined) { return expectedStepName === undefined ? - (name: K): K => name + (name: K): K => name : - (name: K): K => { - guard(expectedStepName === name, `Expected step ${expectedStepName} but got ${name}`) + (name: K): K => { + guard(expectedStepName === name, `Expected step ${expectedStepName} but got ${String(name)}`) return name } } - private async doNextStep(guardStep: (name: K) => K) { - let step: StepName + private async doNextStep(guardStep: (name: K) => K): Promise<{ + step: NameOfStep, + result: PipelineStepOutputWithName + }> { + let step: NameOfStep let result: unknown switch(this.stepCounter) { diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index 62b872eb59..474e76e4d5 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -29,11 +29,11 @@ export type PipelineStep

= P extends Pipeline ? U : export type PipelineStepWithName

= P extends Pipeline ? U extends IStep ? U : never : never export type PipelineStepProcessorWithName

= PipelineStepWithName['processor'] export type PipelineStepPrintersWithName

= PipelineStepWithName['printer'] -export type PipelineStepResultWithName

= Awaited>> +export type PipelineStepOutputWithName

= Awaited>> export type PipelineInput

= PipelineStep

['requiredInput'] export type PipelineOutput

= { - [K in PipelineStepNames

]: PipelineStepResultWithName + [K in PipelineStepNames

]: PipelineStepOutputWithName } /** From 49064cc0520b97950d0d332676e257530faa65f0 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 22:11:55 +0100 Subject: [PATCH 050/104] refactor, wip(pipeline): we need to unify the design of a pipeline step with regards to its input arguments --- src/core/pipeline-executor.ts | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index 7f32426476..49b6f53d57 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -179,34 +179,14 @@ export class PipelineExecutor

{ step: NameOfStep, result: PipelineStepOutputWithName }> { - let step: NameOfStep + guard(this.stepCounter >= 0 && this.stepCounter < this.pipeline.order.length, `Cannot execute next step, already reached end of pipeline or unexpected index (${this.stepCounter}).`) + const step = this.pipeline.steps.get(this.pipeline.order[this.stepCounter]) + guard(step !== undefined, `Cannot execute next step, step ${this.pipeline.order[this.stepCounter]} does not exist.`) let result: unknown - switch(this.stepCounter) { - case 0: - step = guardStep('parse') - result = await executeSingleSubStep(step, this.request, this.shell) - break - case 1: - step = guardStep('normalize') - result = await executeSingleSubStep(step, this.results.parse as string, await this.shell.tokenMap(), this.hooks, this.getId) - break - case 2: - step = guardStep('dataflow') - result = executeSingleSubStep(step, this.results.normalize as NormalizedAst) - break - case 3: - guard(this.criterion !== undefined, 'Cannot decode criteria without a criterion') - step = guardStep('slice') - result = executeSingleSubStep(step, (this.results.dataflow as DataflowInformation).graph, this.results.normalize as NormalizedAst, this.criterion) - break - case 4: - step = guardStep('reconstruct') - result = executeSingleSubStep(step, this.results.normalize as NormalizedAst, (this.results.slice as SliceResult).result) - break - default: - throw new Error(`Unknown step ${this.stepCounter}, reaching this should not happen!`) - } + guardStep(step.name) + result = await executeSingleSubStep(step, this.request, this.shell) + return { step, result } } From 44c03476cd44578ec5b83f3b0fc538f803562d84 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 22:13:36 +0100 Subject: [PATCH 051/104] refactor(pipeline-print): move the pipeline print function to the correct file --- src/cli/repl/server/connection.ts | 3 +-- src/cli/statistics-helper-app.ts | 3 +-- src/core/pipeline-executor.ts | 42 +++++++++++++++---------------- src/core/print/print.ts | 20 ++++++++++++++- src/core/steps/print.ts | 21 ---------------- 5 files changed, 42 insertions(+), 47 deletions(-) delete mode 100644 src/core/steps/print.ts diff --git a/src/cli/repl/server/connection.ts b/src/cli/repl/server/connection.ts index 257967eb52..4331807471 100644 --- a/src/cli/repl/server/connection.ts +++ b/src/cli/repl/server/connection.ts @@ -24,9 +24,8 @@ import { cfg2quads, ControlFlowInformation, extractCFG } from '../../../util/cfg import { defaultQuadIdGenerator, QuadSerializationConfiguration } from '../../../util/quads' import { deepMergeObject } from '../../../util/objects' import { LogLevel } from '../../../util/log' -import { StepOutputFormat } from '../../../core/print/print' +import { printStepResult, StepOutputFormat } from '../../../core/print/print' import { DataflowInformation } from '../../../dataflow/internal/info' -import { printStepResult } from '../../../core/steps/print' import { PARSE_WITH_R_SHELL_STEP } from '../../../core/steps/all/00-parse' import { NORMALIZE } from '../../../core/steps/all/10-normalize' import { LEGACY_STATIC_DATAFLOW } from '../../../core/steps/all/20-dataflow' diff --git a/src/cli/statistics-helper-app.ts b/src/cli/statistics-helper-app.ts index df73bce6da..ddc723620b 100644 --- a/src/cli/statistics-helper-app.ts +++ b/src/cli/statistics-helper-app.ts @@ -12,9 +12,8 @@ import { create } from 'tar' import fs from 'fs' import { guard } from '../util/assert' import { retrieveArchiveName } from './common/features' -import { StepOutputFormat } from '../core/print/print' +import { printStepResult, StepOutputFormat } from '../core/print/print' import { date2string } from '../util/time' -import { printStepResult } from '../core/steps/print' import { PARSE_WITH_R_SHELL_STEP } from '../core/steps/all/00-parse' import { NORMALIZE } from '../core/steps/all/10-normalize' import { LEGACY_STATIC_DATAFLOW } from '../core/steps/all/20-dataflow' diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index 49b6f53d57..9051f1c61b 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -1,11 +1,10 @@ -import { NoInfo, NormalizedAst } from '../r-bridge' +/* import { executeSingleSubStep, NameOfStep, StepHasToBeExecuted, StepName, - StepResult, - StepResults, + StepResults, STEPS_PER_SLICE } from './steps' import { guard } from '../util/assert' @@ -19,7 +18,7 @@ import { PipelineStepOutputWithName } from './steps/pipeline' -/** +/!** * TODO: This is ultimately the root of flowR's static slicing procedure. * It clearly defines the steps that are to be executed and splits them into two stages. * - `once-per-file`: for steps that are executed once per file. These can be performed *without* the knowledge of a slicing criteria, @@ -78,7 +77,7 @@ import { * @see retrieveResultOfStep * @see PipelineExecutor#doNextStep * @see StepName - */ + *!/ export class PipelineExecutor

{ private readonly pipeline: P private readonly input: PipelineInput

@@ -87,28 +86,28 @@ export class PipelineExecutor

{ private currentExecutionStage = StepHasToBeExecuted.OncePerFile private stepCounter = 0 - /** + /!** * Create a new stepping slicer. For more details on the arguments please see {@link SteppingSlicerInput}. - */ + *!/ constructor(pipeline: P, input: PipelineInput

) { this.pipeline = pipeline this.input = input } - /** + /!** * Retrieve the current stage the pipeline executor is in. * @see currentExecutionStage * @see switchToRequestStage - */ + *!/ public getCurrentStage(): StepHasToBeExecuted { return this.currentExecutionStage } - /** + /!** * Switch to the next stage of the stepping slicer. * @see PipelineExecutor * @see getCurrentStage - */ + *!/ public switchToRequestStage(): void { guard(this.pipeline.firstStepPerRequest === undefined || this.stepCounter === this.pipeline.firstStepPerRequest, 'First need to complete all steps before switching') guard(this.currentExecutionStage === StepHasToBeExecuted.OncePerFile, 'Cannot switch to next stage, already in per-request stage.') @@ -118,21 +117,21 @@ export class PipelineExecutor

{ public getResults(intermediate?:false): PipelineOutput

public getResults(intermediate: true): Partial> - /** + /!** * Returns the results of the pipeline. * * @param intermediate - normally you can only receive the results *after* the stepper completed the step of interested. * However, if you pass `true` to this parameter, you can also receive the results *before* the pipeline completed, * although the typing system then can not guarantee which of the steps have already happened. - */ + *!/ public getResults(intermediate = false): PipelineOutput

| Partial> { guard(intermediate || this.stepCounter >= this.pipeline.order.length, 'Without the intermediate flag, the pipeline must be completed before providing access to the results.') return this.output } - /** + /!** * Returns true only if 1) there are more steps to-do for the current stage and 2) we have not yet reached the end of the pipeline. - */ + *!/ public hasNextStep(): boolean { return this.stepCounter < this.pipeline.order.length && ( this.currentExecutionStage !== StepHasToBeExecuted.OncePerFile || @@ -140,7 +139,7 @@ export class PipelineExecutor

{ ) } - /** + /!** * Execute the next step (guarded with {@link hasNextStep}) and return the name of the step that was executed, so you can guard if the step differs from what you are interested in. * Furthermore, it returns the step's result. * @@ -148,7 +147,7 @@ export class PipelineExecutor

{ * If given, it causes the execution to fail if the next step is not the one you expect. * * *Without `expectedStepName`, please refrain from accessing the result, as you have no safeguards if the pipeline changes.* - */ + *!/ public async nextStep(expectedStepName?: PassedName): Promise<{ name: typeof expectedStepName extends undefined ? NameOfStep : PassedName result: typeof expectedStepName extends undefined ? unknown : PipelineStepOutputWithName @@ -190,12 +189,12 @@ export class PipelineExecutor

{ return { step, result } } - /** + /!** * This only makes sense if you have already sliced a file (e.g., by running up to the `slice` step) and want to do so again while caching the results. * Or if for whatever reason you did not pass a criterion with the constructor. * * @param newCriterion - the new slicing criterion to use for the next slice - */ + *!/ public updateCriterion(newCriterion: SlicingCriteria): void { guard(this.stepCounter >= PipelineExecutor.maximumNumberOfStepsPerFile , 'Cannot reset slice prior to once-per-slice stage') this.criterion = newCriterion @@ -209,7 +208,7 @@ export class PipelineExecutor

{ public async allRemainingSteps(canSwitchStage: false): Promise>> public async allRemainingSteps(canSwitchStage?: true): Promise> - /** + /!** * Execute all remaining steps and automatically call {@link switchToSliceStage} if necessary. * @param canSwitchStage - if true, automatically switch to the slice stage if necessary * (i.e., this is what you want if you have never executed {@link nextStep} and you want to execute *all* steps). @@ -220,7 +219,7 @@ export class PipelineExecutor

{ * In such a case, you may be better off with simply passing 'true' as the function will detect that the stage is already switched. * We could solve this type problem by separating the SteppingSlicer class into two for each stage, but this would break the improved readability and unified handling * of the slicer that I wanted to achieve with this class. - */ + *!/ public async allRemainingSteps(canSwitchStage = true): Promise | Partial>> { while(this.hasNextStep()) { await this.nextStep() @@ -234,3 +233,4 @@ export class PipelineExecutor

{ return this.reachedWanted ? this.getResults() : this.getResults(true) } } +*/ diff --git a/src/core/print/print.ts b/src/core/print/print.ts index 824e7f29e0..9bec882877 100644 --- a/src/core/print/print.ts +++ b/src/core/print/print.ts @@ -1,4 +1,6 @@ -import { StepFunction } from '../steps' +import { IStep, StepFunction } from '../steps' +import { TailOfArray } from '../../util/arrays' +import { guard } from '../../util/assert' /** * Defines the output format of a step that you are interested in. @@ -56,3 +58,19 @@ export type IStepPrinter>, ...additional: AdditionalInput) => Promise | string export type InternalStepPrinter = IStepPrinter + +/** + * For a `step` of the given name, which returned the given `data`. Convert that data into the given `format`. + * Depending on your step and the format this may require `additional` inputs. + */ +export function printStepResult< + Step extends IStep, + Processor extends Step['processor'], + Format extends Exclude & number, + Printer extends Step['printer'][Format], + AdditionalInput extends TailOfArray>, +>(step: Step, data: Awaited>, format: Format, ...additional: AdditionalInput): Promise { + const printer = step.printer[format] as IStepPrinter | undefined + guard(printer !== undefined, `printer for ${step.name} does not support ${String(format)}`) + return printer(data, ...additional) as Promise +} diff --git a/src/core/steps/print.ts b/src/core/steps/print.ts deleted file mode 100644 index 3cdfe56b13..0000000000 --- a/src/core/steps/print.ts +++ /dev/null @@ -1,21 +0,0 @@ -import { IStepPrinter, StepOutputFormat } from '../print/print' -import { guard } from '../../util/assert' -import { TailOfArray } from '../../util/arrays' -import { IStep } from './step' - - -/** - * For a `step` of the given name, which returned the given `data`. Convert that data into the given `format`. - * Depending on your step and the format this may require `additional` inputs. - */ -export function printStepResult< - Step extends IStep, - Processor extends Step['processor'], - Format extends Exclude & number, - Printer extends Step['printer'][Format], - AdditionalInput extends TailOfArray>, ->(step: Step, data: Awaited>, format: Format, ...additional: AdditionalInput): Promise { - const printer = step.printer[format] as IStepPrinter | undefined - guard(printer !== undefined, `printer for ${step.name} does not support ${String(format)}`) - return printer(data, ...additional) as Promise -} From 246e95ed027c071d276466ed41d37720415e86ec Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 22:43:06 +0100 Subject: [PATCH 052/104] refactor, wip(pipeline-steps): start to port new common processor style --- src/core/print/print.ts | 6 +++--- src/core/steps/all/00-parse.ts | 25 +++++++++++++++---------- src/core/steps/step.ts | 31 ++++++++++++++++++++++--------- 3 files changed, 40 insertions(+), 22 deletions(-) diff --git a/src/core/print/print.ts b/src/core/print/print.ts index 9bec882877..f99e842aee 100644 --- a/src/core/print/print.ts +++ b/src/core/print/print.ts @@ -1,4 +1,4 @@ -import { IStep, StepFunction } from '../steps' +import { IStep, StepProcessingFunction } from '../steps' import { TailOfArray } from '../../util/arrays' import { guard } from '../../util/assert' @@ -53,11 +53,11 @@ export function internalPrinter(input: Input): Input { * * For the internal format, refer to {@link InternalStepPrinter} as a shorthand. */ -export type IStepPrinter = +export type IStepPrinter = Format extends StepOutputFormat.Internal ? (input: Awaited>) => Awaited> : (input: Awaited>, ...additional: AdditionalInput) => Promise | string -export type InternalStepPrinter = IStepPrinter +export type InternalStepPrinter = IStepPrinter /** * For a `step` of the given name, which returned the given `data`. Convert that data into the given `format`. diff --git a/src/core/steps/all/00-parse.ts b/src/core/steps/all/00-parse.ts index 25716dece9..89c2ff639d 100644 --- a/src/core/steps/all/00-parse.ts +++ b/src/core/steps/all/00-parse.ts @@ -3,23 +3,28 @@ import { parseToQuads } from '../../print/parse-printer' import { IStep, StepHasToBeExecuted } from '../step' import { retrieveXmlFromRCode, RParseRequest, RShell } from '../../../r-bridge' import { DeepReadonly } from 'ts-essentials' +import { guard } from '../../../util/assert' +const ParseRequiredInput = { + /** This is the {@link RShell} connection to be used to obtain the original parses AST of the R code */ + shell: undefined as unknown as RShell, + /** The request which essentially indicates the input to extract the AST from */ + request: undefined as unknown as RParseRequest +} as const export const PARSE_WITH_R_SHELL_STEP = { name: 'parse', description: 'Parse the given R code into an AST', - processor: retrieveXmlFromRCode, - executed: StepHasToBeExecuted.OncePerFile, - printer: { + processor: (results: object, input: Partial) => { + guard(input.request !== undefined && input.shell !== undefined, 'Required input not provided') + return retrieveXmlFromRCode(input.request, input.shell) + }, + executed: StepHasToBeExecuted.OncePerFile, + printer: { [StepOutputFormat.Internal]: internalPrinter, [StepOutputFormat.Json]: text => text, [StepOutputFormat.RdfQuads]: parseToQuads }, dependencies: [], - requiredInput: { - /** This is the {@link RShell} connection to be used to obtain the original parses AST of the R code */ - shell: undefined as unknown as RShell, - /** The request which essentially indicates the input to extract the AST from */ - request: undefined as unknown as RParseRequest - } -} as const satisfies DeepReadonly> + requiredInput: ParseRequiredInput +} as const satisfies DeepReadonly) => ReturnType>> diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index 92218c876c..baa3a3d59d 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -7,12 +7,21 @@ import { MergeableRecord } from '../../util/objects' import { InternalStepPrinter, IStepPrinter, StepOutputFormat } from '../print/print' - /** - * This represents close a function that we know completely nothing about. - * Nevertheless, this is the basis of what a step processor should look like. + * This represents the format of a step processor which retrieves two things: + * + * 1) the input configuration as passed to the {@link PipelineExecutor}. + * 2) the output produced by the previous steps. + * + * Please be aware, that if the respective information is available is not ensured by the type system but rather + * ensured at runtime by your dependencies. If you want to make sure, that the information is present, + * list all steps that you require as your {@link IStepOrder#dependencies|dependencies}, even if they would be + * already covered transitively. + * + * TODO: we could use prototypic cores for each step name */ -export type StepFunction = (...args: never[]) => unknown +export type StepProcessingFunction = + (results: Record, input: Record) => unknown /** * This represents the required execution frequency of a step. */ @@ -30,7 +39,10 @@ export type NameOfStep = string & { __brand?: 'StepName' } /** * Contains the data to specify the order of {@link IStep|steps} in a pipeline. */ -export interface IStepOrder { +export interface IStepOrder< + Name extends NameOfStep = NameOfStep, + Dependencies extends readonly NameOfStep[] = readonly NameOfStep[] +> { /** * Name of the respective step, it does not have to be unique in general but only unique per-pipeline. * In other words, you can have multiple steps with a name like `parse` as long as you use only one of them in a given pipeline. @@ -41,7 +53,7 @@ export interface IStepOrder { * Give the names of other steps this one requires to be completed as a prerequisite (e.g., to gain access to their input). * Does not have to be transitive, this will be checked by the scheduler of the pipeline. */ - readonly dependencies: readonly NameOfStep[] + readonly dependencies: Dependencies /* does this step has to be repeated for each new request or can it be performed only once in the initialization */ readonly executed: StepHasToBeExecuted /** @@ -55,15 +67,16 @@ export interface IStepOrder { } /** - * Defines what is to be known of a single step in the slicing process. + * Defines what is to be known of a single step in a pipeline. * It wraps around a single {@link IStep#processor|processor} function, providing additional information. * Steps will be executed synchronously, in-sequence, based on their {@link IStep#dependencies|dependencies}. */ export interface IStep< Name extends NameOfStep = NameOfStep, + Dependencies extends readonly NameOfStep[] = readonly NameOfStep[], // eslint-disable-next-line -- by default, we assume nothing about the function shape - Fn extends StepFunction = (...args: any[]) => any, -> extends MergeableRecord, IStepOrder { + Fn extends StepProcessingFunction = (...args: any[]) => any, +> extends MergeableRecord, IStepOrder { /** Human-readable description of this step */ readonly description: string /** The main processor that essentially performs the logic of this step */ From cff01af69a66e873975903be953d84869eb9fd7d Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 23:08:45 +0100 Subject: [PATCH 053/104] refactor, wip(pipeline-steps): basic type unidifications for step types --- src/core/steps/all/00-parse.ts | 6 ++--- src/core/steps/all/10-normalize.ts | 35 +++++++++++++++++++--------- src/core/steps/all/20-dataflow.ts | 13 +++++++---- src/core/steps/all/30-slice.ts | 29 ++++++++++++++++------- src/core/steps/all/40-reconstruct.ts | 27 ++++++++++++++------- src/core/steps/step.ts | 8 +++---- 6 files changed, 78 insertions(+), 40 deletions(-) diff --git a/src/core/steps/all/00-parse.ts b/src/core/steps/all/00-parse.ts index 89c2ff639d..6074dafc61 100644 --- a/src/core/steps/all/00-parse.ts +++ b/src/core/steps/all/00-parse.ts @@ -5,7 +5,7 @@ import { retrieveXmlFromRCode, RParseRequest, RShell } from '../../../r-bridge' import { DeepReadonly } from 'ts-essentials' import { guard } from '../../../util/assert' -const ParseRequiredInput = { +export const ParseRequiredInput = { /** This is the {@link RShell} connection to be used to obtain the original parses AST of the R code */ shell: undefined as unknown as RShell, /** The request which essentially indicates the input to extract the AST from */ @@ -15,7 +15,7 @@ const ParseRequiredInput = { export const PARSE_WITH_R_SHELL_STEP = { name: 'parse', description: 'Parse the given R code into an AST', - processor: (results: object, input: Partial) => { + processor: (_results: object, input: Partial) => { guard(input.request !== undefined && input.shell !== undefined, 'Required input not provided') return retrieveXmlFromRCode(input.request, input.shell) }, @@ -27,4 +27,4 @@ export const PARSE_WITH_R_SHELL_STEP = { }, dependencies: [], requiredInput: ParseRequiredInput -} as const satisfies DeepReadonly) => ReturnType>> +} as const satisfies DeepReadonly) => ReturnType>> diff --git a/src/core/steps/all/10-normalize.ts b/src/core/steps/all/10-normalize.ts index d852e579ba..c0bcbfd536 100644 --- a/src/core/steps/all/10-normalize.ts +++ b/src/core/steps/all/10-normalize.ts @@ -1,4 +1,9 @@ -import { IdGenerator, NoInfo, normalize, XmlParserHooks } from '../../../r-bridge' +import { + IdGenerator, + NoInfo, + normalize, + XmlParserHooks +} from '../../../r-bridge' import { internalPrinter, StepOutputFormat } from '../../print/print' import { normalizedAstToJson, @@ -8,13 +13,26 @@ import { } from '../../print/normalize-printer' import { IStep, StepHasToBeExecuted } from '../step' import { DeepPartial, DeepReadonly } from 'ts-essentials' +import { ParseRequiredInput } from './00-parse' +import { guard } from '../../../util/assert' + +export const NormalizeRequiredInput = { + ...ParseRequiredInput, + /** These hooks only make sense if you at least want to normalize the parsed R AST. They can augment the normalization process */ + hooks: undefined as unknown as DeepPartial, + /** This id generator is only necessary if you want to retrieve a dataflow from the parsed R AST, it determines the id generator to use and by default uses the {@link deterministicCountingIdGenerator}*/ + getId: undefined as unknown as IdGenerator +} as const export const NORMALIZE = { name: 'normalize', description: 'Normalize the AST to flowR\'s AST (first step of the normalization)', - processor: normalize, - executed: StepHasToBeExecuted.OncePerFile, - printer: { + processor: async(results: { parse?: string }, input: Partial) => { + guard(results.parse !== undefined && input.request !== undefined && input.shell !== undefined, 'Required input not provided') + return normalize(results.parse, await input.shell.tokenMap(), input.hooks, input.getId) + }, + executed: StepHasToBeExecuted.OncePerFile, + printer: { [StepOutputFormat.Internal]: internalPrinter, [StepOutputFormat.Json]: normalizedAstToJson, [StepOutputFormat.RdfQuads]: normalizedAstToQuads, @@ -22,10 +40,5 @@ export const NORMALIZE = { [StepOutputFormat.MermaidUrl]: printNormalizedAstToMermaidUrl }, dependencies: [ 'parse' ], - requiredInput: { - /** These hooks only make sense if you at least want to normalize the parsed R AST. They can augment the normalization process */ - hooks: undefined as unknown as DeepPartial, - /** This id generator is only necessary if you want to retrieve a dataflow from the parsed R AST, it determines the id generator to use and by default uses the {@link deterministicCountingIdGenerator}*/ - getId: undefined as unknown as IdGenerator - } -} as const satisfies DeepReadonly> + requiredInput: NormalizeRequiredInput +} as const satisfies DeepReadonly) => ReturnType>> diff --git a/src/core/steps/all/20-dataflow.ts b/src/core/steps/all/20-dataflow.ts index f840bcc491..05791370cf 100644 --- a/src/core/steps/all/20-dataflow.ts +++ b/src/core/steps/all/20-dataflow.ts @@ -8,13 +8,18 @@ import { dataflowGraphToQuads } from '../../print/dataflow-printer' import { DeepReadonly } from 'ts-essentials' +import { NormalizedAst } from '../../../r-bridge' +import { guard } from '../../../util/assert' export const LEGACY_STATIC_DATAFLOW = { name: 'dataflow', description: 'Construct the dataflow graph', - processor: produceDataFlowGraph, - executed: StepHasToBeExecuted.OncePerFile, - printer: { + processor: (results: { normalize?: NormalizedAst }) => { + guard(results.normalize !== undefined, 'Required input not provided') + return produceDataFlowGraph(results.normalize) + }, + executed: StepHasToBeExecuted.OncePerFile, + printer: { [StepOutputFormat.Internal]: internalPrinter, [StepOutputFormat.Json]: dataflowGraphToJson, [StepOutputFormat.RdfQuads]: dataflowGraphToQuads, @@ -22,4 +27,4 @@ export const LEGACY_STATIC_DATAFLOW = { [StepOutputFormat.MermaidUrl]: dataflowGraphToMermaidUrl }, dependencies: [ 'normalize' ] -} as const satisfies DeepReadonly> +} as const satisfies DeepReadonly ReturnType>> diff --git a/src/core/steps/all/30-slice.ts b/src/core/steps/all/30-slice.ts index bf181b7614..722dced79f 100644 --- a/src/core/steps/all/30-slice.ts +++ b/src/core/steps/all/30-slice.ts @@ -2,18 +2,31 @@ import { internalPrinter, StepOutputFormat } from '../../print/print' import { IStep, StepHasToBeExecuted } from '../step' import { SlicingCriteria, staticSlicing } from '../../../slicing' import { DeepReadonly } from 'ts-essentials' +import { NormalizeRequiredInput } from './10-normalize' +import { DataflowInformation } from '../../../dataflow/internal/info' +import { NormalizedAst } from '../../../r-bridge' +import { guard } from '../../../util/assert' + +export const SliceRequiredInput = { + ...NormalizeRequiredInput, + /** The slicing criterion is only of interest if you actually want to slice the R code */ + criterion: undefined as unknown as SlicingCriteria, + /** How many re-visits of the same node are ok? */ + threshold: 75 +} as const + export const STATIC_SLICE = { name: 'slice', description: 'Calculate the actual static slice from the dataflow graph and the given slicing criteria', - processor: staticSlicing, - executed: StepHasToBeExecuted.OncePerRequest, - printer: { + processor: (results: { dataflow?: DataflowInformation, normalize?: NormalizedAst }, input: Partial) => { + guard(results.dataflow !== undefined && results.normalize !== undefined && input.criterion !== undefined && input.threshold !== undefined, 'Required input not provided') + return staticSlicing(results.dataflow.graph, results.normalize, input.criterion, input.threshold) + }, + executed: StepHasToBeExecuted.OncePerRequest, + printer: { [StepOutputFormat.Internal]: internalPrinter }, dependencies: [ 'dataflow' ], - requiredInput: { - /** The slicing criterion is only of interest if you actually want to slice the R code */ - criterion: undefined as unknown as SlicingCriteria - } -} as const satisfies DeepReadonly> + requiredInput: SliceRequiredInput +} as const satisfies DeepReadonly) => ReturnType>> diff --git a/src/core/steps/all/40-reconstruct.ts b/src/core/steps/all/40-reconstruct.ts index 8c31049fa7..e98385ea43 100644 --- a/src/core/steps/all/40-reconstruct.ts +++ b/src/core/steps/all/40-reconstruct.ts @@ -1,19 +1,28 @@ import { internalPrinter, StepOutputFormat } from '../../print/print' import { IStep, StepHasToBeExecuted } from '../step' -import { AutoSelectPredicate, reconstructToCode } from '../../../slicing' +import { AutoSelectPredicate, reconstructToCode, SliceResult } from '../../../slicing' import { DeepReadonly } from 'ts-essentials' +import { NormalizedAst } from '../../../r-bridge' +import { SliceRequiredInput } from './30-slice' +import { guard } from '../../../util/assert' + +export const ReconstructRequiredInput = { + ...SliceRequiredInput, + /** If you want to auto-select something in the reconstruction add it here, otherwise, it will use the default defined alongside {@link reconstructToCode}*/ + autoSelectIf: undefined as unknown as AutoSelectPredicate +} as const export const NAIVE_RECONSTRUCT = { name: 'reconstruct', description: 'Reconstruct R code from the static slice', - processor: reconstructToCode, - executed: StepHasToBeExecuted.OncePerRequest, - printer: { + processor: (results: { normalize?: NormalizedAst, slice?: SliceResult }, input: Partial) => { + guard(results.normalize !== undefined && results.slice !== undefined && input.autoSelectIf !== undefined, 'Required input not provided') + return reconstructToCode(results.normalize, results.slice.result, input.autoSelectIf) + }, + executed: StepHasToBeExecuted.OncePerRequest, + printer: { [StepOutputFormat.Internal]: internalPrinter }, dependencies: [ 'slice' ], - requiredInput: { - /** If you want to auto-select something in the reconstruction add it here, otherwise, it will use the default defined alongside {@link reconstructToCode}*/ - autoSelectIf: undefined as unknown as AutoSelectPredicate - } -} as const satisfies DeepReadonly> + requiredInput: ReconstructRequiredInput +} as const satisfies DeepReadonly) => ReturnType>> diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index baa3a3d59d..fd7706d871 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -41,7 +41,6 @@ export type NameOfStep = string & { __brand?: 'StepName' } */ export interface IStepOrder< Name extends NameOfStep = NameOfStep, - Dependencies extends readonly NameOfStep[] = readonly NameOfStep[] > { /** * Name of the respective step, it does not have to be unique in general but only unique per-pipeline. @@ -53,7 +52,7 @@ export interface IStepOrder< * Give the names of other steps this one requires to be completed as a prerequisite (e.g., to gain access to their input). * Does not have to be transitive, this will be checked by the scheduler of the pipeline. */ - readonly dependencies: Dependencies + readonly dependencies: NameOfStep[] /* does this step has to be repeated for each new request or can it be performed only once in the initialization */ readonly executed: StepHasToBeExecuted /** @@ -73,10 +72,9 @@ export interface IStepOrder< */ export interface IStep< Name extends NameOfStep = NameOfStep, - Dependencies extends readonly NameOfStep[] = readonly NameOfStep[], // eslint-disable-next-line -- by default, we assume nothing about the function shape Fn extends StepProcessingFunction = (...args: any[]) => any, -> extends MergeableRecord, IStepOrder { +> extends MergeableRecord, IStepOrder { /** Human-readable description of this step */ readonly description: string /** The main processor that essentially performs the logic of this step */ @@ -92,7 +90,7 @@ export interface IStep< } /** * Input configuration required to perform the respective steps. - * Required inputs of dependencies do not have to be repeated. + * Required inputs of dependencies do not have to, but can be repeated. *

* Use the pattern `undefined as unknown as T` to indicate that the value is required but not provided. */ From 511e2552abc0d32611cfd51c43f28e41c363845f Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 23:11:23 +0100 Subject: [PATCH 054/104] refactor(steps): reuse step definitions in legacy const step structure --- src/core/steps/step.ts | 2 +- src/core/steps/steps.ts | 70 ++++++----------------------------------- 2 files changed, 11 insertions(+), 61 deletions(-) diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index fd7706d871..aa21dd4c74 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -52,7 +52,7 @@ export interface IStepOrder< * Give the names of other steps this one requires to be completed as a prerequisite (e.g., to gain access to their input). * Does not have to be transitive, this will be checked by the scheduler of the pipeline. */ - readonly dependencies: NameOfStep[] + readonly dependencies: readonly NameOfStep[] /* does this step has to be repeated for each new request or can it be performed only once in the initialization */ readonly executed: StepHasToBeExecuted /** diff --git a/src/core/steps/steps.ts b/src/core/steps/steps.ts index 0dcff8b176..c22d205aaa 100644 --- a/src/core/steps/steps.ts +++ b/src/core/steps/steps.ts @@ -34,72 +34,22 @@ import { } from '../print/dataflow-printer' import { parseToQuads } from '../print/parse-printer' import { IStep, StepHasToBeExecuted } from './step' +import { PARSE_WITH_R_SHELL_STEP } from './all/00-parse' +import { NORMALIZE } from './all/10-normalize' +import { LEGACY_STATIC_DATAFLOW } from './all/20-dataflow' +import { STATIC_SLICE } from './all/30-slice' +import { NAIVE_RECONSTRUCT } from './all/40-reconstruct' export const STEPS_PER_FILE = { - 'parse': { - name: 'parse', - description: 'Parse the given R code into an AST', - processor: retrieveXmlFromRCode, - executed: StepHasToBeExecuted.OncePerFile, - printer: { - [StepOutputFormat.Internal]: internalPrinter, - [StepOutputFormat.Json]: text => text, - [StepOutputFormat.RdfQuads]: parseToQuads - }, - dependencies: [] - } satisfies IStep<'parse', typeof retrieveXmlFromRCode>, - 'normalize': { - name: 'normalize', - description: 'Normalize the AST to flowR\'s AST (first step of the normalization)', - processor: normalize, - executed: StepHasToBeExecuted.OncePerFile, - printer: { - [StepOutputFormat.Internal]: internalPrinter, - [StepOutputFormat.Json]: normalizedAstToJson, - [StepOutputFormat.RdfQuads]: normalizedAstToQuads, - [StepOutputFormat.Mermaid]: printNormalizedAstToMermaid, - [StepOutputFormat.MermaidUrl]: printNormalizedAstToMermaidUrl - }, - dependencies: [] - } satisfies IStep<'normalize', typeof normalize>, - 'dataflow': { - name: 'dataflow', - description: 'Construct the dataflow graph', - processor: produceDataFlowGraph, - executed: StepHasToBeExecuted.OncePerFile, - printer: { - [StepOutputFormat.Internal]: internalPrinter, - [StepOutputFormat.Json]: dataflowGraphToJson, - [StepOutputFormat.RdfQuads]: dataflowGraphToQuads, - [StepOutputFormat.Mermaid]: dataflowGraphToMermaid, - [StepOutputFormat.MermaidUrl]: dataflowGraphToMermaidUrl - }, - dependencies: [] - } satisfies IStep<'dataflow', typeof produceDataFlowGraph> + 'parse': PARSE_WITH_R_SHELL_STEP, + 'normalize': NORMALIZE, + 'dataflow': LEGACY_STATIC_DATAFLOW } as const export const STEPS_PER_SLICE = { - 'slice': { - name: 'slice', - description: 'Calculate the actual static slice from the dataflow graph and the given slicing criteria', - processor: staticSlicing, - executed: StepHasToBeExecuted.OncePerRequest, - printer: { - [StepOutputFormat.Internal]: internalPrinter - }, - dependencies: [ ] - } satisfies IStep<'slice', typeof staticSlicing>, - 'reconstruct': { - name: 'reconstruct', - description: 'Reconstruct R code from the static slice', - processor: reconstructToCode, - executed: StepHasToBeExecuted.OncePerRequest, - printer: { - [StepOutputFormat.Internal]: internalPrinter - }, - dependencies: [ ] - } satisfies IStep<'reconstruct', typeof reconstructToCode> + 'slice': STATIC_SLICE, + 'reconstruct': NAIVE_RECONSTRUCT } as const export const STEPS = { ...STEPS_PER_FILE, ...STEPS_PER_SLICE } as const From 73ab0dafedeb858ec3fd74e4602461071c495e26 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 23:19:13 +0100 Subject: [PATCH 055/104] refactor(stepping-slicer): migrate the stepping slicer glue to our new format --- src/core/slicer.ts | 24 +++++++++++++++++++----- src/core/steps/all/40-reconstruct.ts | 4 ++-- test/functionality/_helper/shell.ts | 12 ++++++++++-- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/core/slicer.ts b/src/core/slicer.ts index 5154ee76c6..4d6de06d69 100644 --- a/src/core/slicer.ts +++ b/src/core/slicer.ts @@ -198,24 +198,38 @@ export class SteppingSlicer, (this.results.slice as SliceResult).result) + result = executeSingleSubStep(step, { + normalize: this.results.normalize as NormalizedAst, + slice: this.results.slice as SliceResult + }, {}) break default: throw new Error(`Unknown step ${this.stepCounter}, reaching this should not happen!`) diff --git a/src/core/steps/all/40-reconstruct.ts b/src/core/steps/all/40-reconstruct.ts index e98385ea43..3ad6ac037a 100644 --- a/src/core/steps/all/40-reconstruct.ts +++ b/src/core/steps/all/40-reconstruct.ts @@ -1,6 +1,6 @@ import { internalPrinter, StepOutputFormat } from '../../print/print' import { IStep, StepHasToBeExecuted } from '../step' -import { AutoSelectPredicate, reconstructToCode, SliceResult } from '../../../slicing' +import { autoSelectLibrary, AutoSelectPredicate, reconstructToCode, SliceResult } from '../../../slicing' import { DeepReadonly } from 'ts-essentials' import { NormalizedAst } from '../../../r-bridge' import { SliceRequiredInput } from './30-slice' @@ -9,7 +9,7 @@ import { guard } from '../../../util/assert' export const ReconstructRequiredInput = { ...SliceRequiredInput, /** If you want to auto-select something in the reconstruction add it here, otherwise, it will use the default defined alongside {@link reconstructToCode}*/ - autoSelectIf: undefined as unknown as AutoSelectPredicate + autoSelectIf: autoSelectLibrary as AutoSelectPredicate } as const export const NAIVE_RECONSTRUCT = { diff --git a/test/functionality/_helper/shell.ts b/test/functionality/_helper/shell.ts index 6574f53624..19d94dcda4 100644 --- a/test/functionality/_helper/shell.ts +++ b/test/functionality/_helper/shell.ts @@ -18,7 +18,8 @@ import { DataflowGraph, diffGraphsToMermaidUrl, graphToMermaidUrl } from '../../ import { SlicingCriteria } from '../../../src/slicing' import { testRequiresRVersion } from './version' import { deepMergeObject, MergeableRecord } from '../../../src/util/objects' -import { executeSingleSubStep, LAST_STEP, SteppingSlicer } from '../../../src/core' +import { LAST_STEP, SteppingSlicer } from '../../../src/core' +import { NAIVE_RECONSTRUCT } from '../../../src/core/steps/all/40-reconstruct' export const testWithShell = (msg: string, fn: (shell: RShell, test: Mocha.Context) => void | Promise): Mocha.Test => { return it(msg, async function(): Promise { @@ -188,7 +189,14 @@ export function assertReconstructed(name: string, shell: RShell, input: string, request: requestFromInput(input), shell }).allRemainingSteps() - const reconstructed = executeSingleSubStep('reconstruct', result.normalize, new Set(selectedIds)) + const reconstructed = NAIVE_RECONSTRUCT.processor({ + normalize: result.normalize, + slice: { + decodedCriteria: [], + timesHitThreshold: 0, + result: new Set(selectedIds) + } + }, {}) assert.strictEqual(reconstructed.code, expected, `got: ${reconstructed.code}, vs. expected: ${expected}, for input ${input} (ids: ${printIdMapping(selectedIds, result.normalize.idMap)})`) }) } From 8673956d10f72f4c10a3e1da398cccd6cb6ea5d3 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 23:26:42 +0100 Subject: [PATCH 056/104] test-fix: update the stepping slicer to work with the new step passing style :3 --- src/core/steps/all/10-normalize.ts | 2 +- src/core/steps/all/30-slice.ts | 4 ++-- src/core/steps/all/40-reconstruct.ts | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/steps/all/10-normalize.ts b/src/core/steps/all/10-normalize.ts index c0bcbfd536..9d9c25bda5 100644 --- a/src/core/steps/all/10-normalize.ts +++ b/src/core/steps/all/10-normalize.ts @@ -28,7 +28,7 @@ export const NORMALIZE = { name: 'normalize', description: 'Normalize the AST to flowR\'s AST (first step of the normalization)', processor: async(results: { parse?: string }, input: Partial) => { - guard(results.parse !== undefined && input.request !== undefined && input.shell !== undefined, 'Required input not provided') + guard(results.parse !== undefined && input.shell !== undefined, 'Required input not provided') return normalize(results.parse, await input.shell.tokenMap(), input.hooks, input.getId) }, executed: StepHasToBeExecuted.OncePerFile, diff --git a/src/core/steps/all/30-slice.ts b/src/core/steps/all/30-slice.ts index 722dced79f..98fb5e3ef8 100644 --- a/src/core/steps/all/30-slice.ts +++ b/src/core/steps/all/30-slice.ts @@ -11,7 +11,7 @@ export const SliceRequiredInput = { ...NormalizeRequiredInput, /** The slicing criterion is only of interest if you actually want to slice the R code */ criterion: undefined as unknown as SlicingCriteria, - /** How many re-visits of the same node are ok? */ + /** How many re-visits of the same node are ok? TODO: use default? */ threshold: 75 } as const @@ -20,7 +20,7 @@ export const STATIC_SLICE = { name: 'slice', description: 'Calculate the actual static slice from the dataflow graph and the given slicing criteria', processor: (results: { dataflow?: DataflowInformation, normalize?: NormalizedAst }, input: Partial) => { - guard(results.dataflow !== undefined && results.normalize !== undefined && input.criterion !== undefined && input.threshold !== undefined, 'Required input not provided') + guard(results.dataflow !== undefined && results.normalize !== undefined && input.criterion !== undefined, 'Required input not provided') return staticSlicing(results.dataflow.graph, results.normalize, input.criterion, input.threshold) }, executed: StepHasToBeExecuted.OncePerRequest, diff --git a/src/core/steps/all/40-reconstruct.ts b/src/core/steps/all/40-reconstruct.ts index 3ad6ac037a..c8c2d901fc 100644 --- a/src/core/steps/all/40-reconstruct.ts +++ b/src/core/steps/all/40-reconstruct.ts @@ -16,7 +16,7 @@ export const NAIVE_RECONSTRUCT = { name: 'reconstruct', description: 'Reconstruct R code from the static slice', processor: (results: { normalize?: NormalizedAst, slice?: SliceResult }, input: Partial) => { - guard(results.normalize !== undefined && results.slice !== undefined && input.autoSelectIf !== undefined, 'Required input not provided') + guard(results.normalize !== undefined && results.slice !== undefined, 'Required input not provided') return reconstructToCode(results.normalize, results.slice.result, input.autoSelectIf) }, executed: StepHasToBeExecuted.OncePerRequest, From c2a30f70c1fad315a7a6ca30c57e8e30ab551347 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 23:32:58 +0100 Subject: [PATCH 057/104] refactor(steps): defer guards for step processors -> if we are wired wrongly, we have more problems --- src/core/steps/all/00-parse.ts | 10 +++------- src/core/steps/all/10-normalize.ts | 11 ++++------- src/core/steps/all/20-dataflow.ts | 10 +++------- src/core/steps/all/30-slice.ts | 10 +++------- src/core/steps/all/40-reconstruct.ts | 10 +++------- 5 files changed, 16 insertions(+), 35 deletions(-) diff --git a/src/core/steps/all/00-parse.ts b/src/core/steps/all/00-parse.ts index 6074dafc61..6cace2f9b4 100644 --- a/src/core/steps/all/00-parse.ts +++ b/src/core/steps/all/00-parse.ts @@ -3,7 +3,6 @@ import { parseToQuads } from '../../print/parse-printer' import { IStep, StepHasToBeExecuted } from '../step' import { retrieveXmlFromRCode, RParseRequest, RShell } from '../../../r-bridge' import { DeepReadonly } from 'ts-essentials' -import { guard } from '../../../util/assert' export const ParseRequiredInput = { /** This is the {@link RShell} connection to be used to obtain the original parses AST of the R code */ @@ -15,12 +14,9 @@ export const ParseRequiredInput = { export const PARSE_WITH_R_SHELL_STEP = { name: 'parse', description: 'Parse the given R code into an AST', - processor: (_results: object, input: Partial) => { - guard(input.request !== undefined && input.shell !== undefined, 'Required input not provided') - return retrieveXmlFromRCode(input.request, input.shell) - }, - executed: StepHasToBeExecuted.OncePerFile, - printer: { + processor: (_results: object, input: Partial) => retrieveXmlFromRCode(input.request as RParseRequest, input.shell as RShell), + executed: StepHasToBeExecuted.OncePerFile, + printer: { [StepOutputFormat.Internal]: internalPrinter, [StepOutputFormat.Json]: text => text, [StepOutputFormat.RdfQuads]: parseToQuads diff --git a/src/core/steps/all/10-normalize.ts b/src/core/steps/all/10-normalize.ts index 9d9c25bda5..8801a7a0fb 100644 --- a/src/core/steps/all/10-normalize.ts +++ b/src/core/steps/all/10-normalize.ts @@ -1,7 +1,7 @@ import { IdGenerator, NoInfo, - normalize, + normalize, RParseRequest, RShell, XmlParserHooks } from '../../../r-bridge' import { internalPrinter, StepOutputFormat } from '../../print/print' @@ -27,12 +27,9 @@ export const NormalizeRequiredInput = { export const NORMALIZE = { name: 'normalize', description: 'Normalize the AST to flowR\'s AST (first step of the normalization)', - processor: async(results: { parse?: string }, input: Partial) => { - guard(results.parse !== undefined && input.shell !== undefined, 'Required input not provided') - return normalize(results.parse, await input.shell.tokenMap(), input.hooks, input.getId) - }, - executed: StepHasToBeExecuted.OncePerFile, - printer: { + processor: async(results: { parse?: string }, input: Partial) => normalize(results.parse as string, await (input.shell as RShell).tokenMap(), input.hooks, input.getId), + executed: StepHasToBeExecuted.OncePerFile, + printer: { [StepOutputFormat.Internal]: internalPrinter, [StepOutputFormat.Json]: normalizedAstToJson, [StepOutputFormat.RdfQuads]: normalizedAstToQuads, diff --git a/src/core/steps/all/20-dataflow.ts b/src/core/steps/all/20-dataflow.ts index 05791370cf..a440863cdb 100644 --- a/src/core/steps/all/20-dataflow.ts +++ b/src/core/steps/all/20-dataflow.ts @@ -9,17 +9,13 @@ import { } from '../../print/dataflow-printer' import { DeepReadonly } from 'ts-essentials' import { NormalizedAst } from '../../../r-bridge' -import { guard } from '../../../util/assert' export const LEGACY_STATIC_DATAFLOW = { name: 'dataflow', description: 'Construct the dataflow graph', - processor: (results: { normalize?: NormalizedAst }) => { - guard(results.normalize !== undefined, 'Required input not provided') - return produceDataFlowGraph(results.normalize) - }, - executed: StepHasToBeExecuted.OncePerFile, - printer: { + processor: (results: { normalize?: NormalizedAst }) => produceDataFlowGraph(results.normalize as NormalizedAst), + executed: StepHasToBeExecuted.OncePerFile, + printer: { [StepOutputFormat.Internal]: internalPrinter, [StepOutputFormat.Json]: dataflowGraphToJson, [StepOutputFormat.RdfQuads]: dataflowGraphToQuads, diff --git a/src/core/steps/all/30-slice.ts b/src/core/steps/all/30-slice.ts index 98fb5e3ef8..ea23231ac4 100644 --- a/src/core/steps/all/30-slice.ts +++ b/src/core/steps/all/30-slice.ts @@ -5,7 +5,6 @@ import { DeepReadonly } from 'ts-essentials' import { NormalizeRequiredInput } from './10-normalize' import { DataflowInformation } from '../../../dataflow/internal/info' import { NormalizedAst } from '../../../r-bridge' -import { guard } from '../../../util/assert' export const SliceRequiredInput = { ...NormalizeRequiredInput, @@ -19,12 +18,9 @@ export const SliceRequiredInput = { export const STATIC_SLICE = { name: 'slice', description: 'Calculate the actual static slice from the dataflow graph and the given slicing criteria', - processor: (results: { dataflow?: DataflowInformation, normalize?: NormalizedAst }, input: Partial) => { - guard(results.dataflow !== undefined && results.normalize !== undefined && input.criterion !== undefined, 'Required input not provided') - return staticSlicing(results.dataflow.graph, results.normalize, input.criterion, input.threshold) - }, - executed: StepHasToBeExecuted.OncePerRequest, - printer: { + processor: (results: { dataflow?: DataflowInformation, normalize?: NormalizedAst }, input: Partial) => staticSlicing((results.dataflow as DataflowInformation).graph, results.normalize as NormalizedAst, input.criterion as SlicingCriteria, input.threshold), + executed: StepHasToBeExecuted.OncePerRequest, + printer: { [StepOutputFormat.Internal]: internalPrinter }, dependencies: [ 'dataflow' ], diff --git a/src/core/steps/all/40-reconstruct.ts b/src/core/steps/all/40-reconstruct.ts index c8c2d901fc..965c663279 100644 --- a/src/core/steps/all/40-reconstruct.ts +++ b/src/core/steps/all/40-reconstruct.ts @@ -4,7 +4,6 @@ import { autoSelectLibrary, AutoSelectPredicate, reconstructToCode, SliceResult import { DeepReadonly } from 'ts-essentials' import { NormalizedAst } from '../../../r-bridge' import { SliceRequiredInput } from './30-slice' -import { guard } from '../../../util/assert' export const ReconstructRequiredInput = { ...SliceRequiredInput, @@ -15,12 +14,9 @@ export const ReconstructRequiredInput = { export const NAIVE_RECONSTRUCT = { name: 'reconstruct', description: 'Reconstruct R code from the static slice', - processor: (results: { normalize?: NormalizedAst, slice?: SliceResult }, input: Partial) => { - guard(results.normalize !== undefined && results.slice !== undefined, 'Required input not provided') - return reconstructToCode(results.normalize, results.slice.result, input.autoSelectIf) - }, - executed: StepHasToBeExecuted.OncePerRequest, - printer: { + processor: (results: { normalize?: NormalizedAst, slice?: SliceResult }, input: Partial) => reconstructToCode(results.normalize as NormalizedAst, (results.slice as SliceResult).result, input.autoSelectIf), + executed: StepHasToBeExecuted.OncePerRequest, + printer: { [StepOutputFormat.Internal]: internalPrinter }, dependencies: [ 'slice' ], From 78809c1d94ab69be157b2cd6c679fc6e454a5848 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 24 Nov 2023 23:40:13 +0100 Subject: [PATCH 058/104] refactor(pipeline-executor): a little bit of unsafe magic behind `doNextStep` --- src/core/pipeline-executor.ts | 45 ++++++++++++++++------------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index 9051f1c61b..1d3cdecfcf 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -1,10 +1,9 @@ -/* import { executeSingleSubStep, NameOfStep, StepHasToBeExecuted, StepName, - StepResults, + StepResults, STEPS_PER_SLICE } from './steps' import { guard } from '../util/assert' @@ -18,7 +17,7 @@ import { PipelineStepOutputWithName } from './steps/pipeline' -/!** +/** * TODO: This is ultimately the root of flowR's static slicing procedure. * It clearly defines the steps that are to be executed and splits them into two stages. * - `once-per-file`: for steps that are executed once per file. These can be performed *without* the knowledge of a slicing criteria, @@ -77,7 +76,7 @@ import { * @see retrieveResultOfStep * @see PipelineExecutor#doNextStep * @see StepName - *!/ + */ export class PipelineExecutor

{ private readonly pipeline: P private readonly input: PipelineInput

@@ -86,28 +85,28 @@ export class PipelineExecutor

{ private currentExecutionStage = StepHasToBeExecuted.OncePerFile private stepCounter = 0 - /!** + /** * Create a new stepping slicer. For more details on the arguments please see {@link SteppingSlicerInput}. - *!/ + */ constructor(pipeline: P, input: PipelineInput

) { this.pipeline = pipeline this.input = input } - /!** + /** * Retrieve the current stage the pipeline executor is in. * @see currentExecutionStage * @see switchToRequestStage - *!/ + */ public getCurrentStage(): StepHasToBeExecuted { return this.currentExecutionStage } - /!** + /** * Switch to the next stage of the stepping slicer. * @see PipelineExecutor * @see getCurrentStage - *!/ + */ public switchToRequestStage(): void { guard(this.pipeline.firstStepPerRequest === undefined || this.stepCounter === this.pipeline.firstStepPerRequest, 'First need to complete all steps before switching') guard(this.currentExecutionStage === StepHasToBeExecuted.OncePerFile, 'Cannot switch to next stage, already in per-request stage.') @@ -117,21 +116,21 @@ export class PipelineExecutor

{ public getResults(intermediate?:false): PipelineOutput

public getResults(intermediate: true): Partial> - /!** + /** * Returns the results of the pipeline. * * @param intermediate - normally you can only receive the results *after* the stepper completed the step of interested. * However, if you pass `true` to this parameter, you can also receive the results *before* the pipeline completed, * although the typing system then can not guarantee which of the steps have already happened. - *!/ + */ public getResults(intermediate = false): PipelineOutput

| Partial> { guard(intermediate || this.stepCounter >= this.pipeline.order.length, 'Without the intermediate flag, the pipeline must be completed before providing access to the results.') return this.output } - /!** + /** * Returns true only if 1) there are more steps to-do for the current stage and 2) we have not yet reached the end of the pipeline. - *!/ + */ public hasNextStep(): boolean { return this.stepCounter < this.pipeline.order.length && ( this.currentExecutionStage !== StepHasToBeExecuted.OncePerFile || @@ -139,7 +138,7 @@ export class PipelineExecutor

{ ) } - /!** + /** * Execute the next step (guarded with {@link hasNextStep}) and return the name of the step that was executed, so you can guard if the step differs from what you are interested in. * Furthermore, it returns the step's result. * @@ -147,7 +146,7 @@ export class PipelineExecutor

{ * If given, it causes the execution to fail if the next step is not the one you expect. * * *Without `expectedStepName`, please refrain from accessing the result, as you have no safeguards if the pipeline changes.* - *!/ + */ public async nextStep(expectedStepName?: PassedName): Promise<{ name: typeof expectedStepName extends undefined ? NameOfStep : PassedName result: typeof expectedStepName extends undefined ? unknown : PipelineStepOutputWithName @@ -181,20 +180,19 @@ export class PipelineExecutor

{ guard(this.stepCounter >= 0 && this.stepCounter < this.pipeline.order.length, `Cannot execute next step, already reached end of pipeline or unexpected index (${this.stepCounter}).`) const step = this.pipeline.steps.get(this.pipeline.order[this.stepCounter]) guard(step !== undefined, `Cannot execute next step, step ${this.pipeline.order[this.stepCounter]} does not exist.`) - let result: unknown guardStep(step.name) - result = await executeSingleSubStep(step, this.request, this.shell) + const result = await step.processor(this.output, this.input) as unknown - return { step, result } + return { step: step.name, result: result as PipelineStepOutputWithName } } - /!** + /** * This only makes sense if you have already sliced a file (e.g., by running up to the `slice` step) and want to do so again while caching the results. * Or if for whatever reason you did not pass a criterion with the constructor. * * @param newCriterion - the new slicing criterion to use for the next slice - *!/ + */ public updateCriterion(newCriterion: SlicingCriteria): void { guard(this.stepCounter >= PipelineExecutor.maximumNumberOfStepsPerFile , 'Cannot reset slice prior to once-per-slice stage') this.criterion = newCriterion @@ -208,7 +206,7 @@ export class PipelineExecutor

{ public async allRemainingSteps(canSwitchStage: false): Promise>> public async allRemainingSteps(canSwitchStage?: true): Promise> - /!** + /** * Execute all remaining steps and automatically call {@link switchToSliceStage} if necessary. * @param canSwitchStage - if true, automatically switch to the slice stage if necessary * (i.e., this is what you want if you have never executed {@link nextStep} and you want to execute *all* steps). @@ -219,7 +217,7 @@ export class PipelineExecutor

{ * In such a case, you may be better off with simply passing 'true' as the function will detect that the stage is already switched. * We could solve this type problem by separating the SteppingSlicer class into two for each stage, but this would break the improved readability and unified handling * of the slicer that I wanted to achieve with this class. - *!/ + */ public async allRemainingSteps(canSwitchStage = true): Promise | Partial>> { while(this.hasNextStep()) { await this.nextStep() @@ -233,4 +231,3 @@ export class PipelineExecutor

{ return this.reachedWanted ? this.getResults() : this.getResults(true) } } -*/ From bd5a551df799df392f3edac1f856791dc9b9e464 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 25 Nov 2023 00:06:36 +0100 Subject: [PATCH 059/104] feat(pipeline-executor): at least the syntax is no longer a problem :D --- src/core/pipeline-executor.ts | 62 +++++++++++++---------------- src/core/steps/pipeline/create.ts | 2 +- src/core/steps/pipeline/pipeline.ts | 16 ++++++-- 3 files changed, 42 insertions(+), 38 deletions(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index 1d3cdecfcf..ce47fbbd0c 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -1,18 +1,10 @@ -import { - executeSingleSubStep, - NameOfStep, - StepHasToBeExecuted, - StepName, - StepResults, - STEPS_PER_SLICE -} from './steps' +import { NameOfStep, StepHasToBeExecuted } from './steps' import { guard } from '../util/assert' -import { SliceResult, SlicingCriteria } from '../slicing' -import { DataflowInformation } from '../dataflow/internal/info' import { Pipeline, PipelineInput, PipelineOutput, + PipelinePerRequestInput, PipelineStepNames, PipelineStepOutputWithName } from './steps/pipeline' @@ -79,7 +71,7 @@ import { */ export class PipelineExecutor

{ private readonly pipeline: P - private readonly input: PipelineInput

+ private input: PipelineInput

private output: PipelineOutput

= {} as PipelineOutput

private currentExecutionStage = StepHasToBeExecuted.OncePerFile @@ -108,7 +100,7 @@ export class PipelineExecutor

{ * @see getCurrentStage */ public switchToRequestStage(): void { - guard(this.pipeline.firstStepPerRequest === undefined || this.stepCounter === this.pipeline.firstStepPerRequest, 'First need to complete all steps before switching') + guard(this.stepCounter === this.pipeline.firstStepPerRequest, 'First need to complete all steps before switching') guard(this.currentExecutionStage === StepHasToBeExecuted.OncePerFile, 'Cannot switch to next stage, already in per-request stage.') this.currentExecutionStage = StepHasToBeExecuted.OncePerRequest } @@ -134,7 +126,7 @@ export class PipelineExecutor

{ public hasNextStep(): boolean { return this.stepCounter < this.pipeline.order.length && ( this.currentExecutionStage !== StepHasToBeExecuted.OncePerFile || - this.stepCounter < (this.pipeline.firstStepPerRequest ?? this.pipeline.order.length) + this.stepCounter < this.pipeline.firstStepPerRequest ) } @@ -188,46 +180,48 @@ export class PipelineExecutor

{ } /** - * This only makes sense if you have already sliced a file (e.g., by running up to the `slice` step) and want to do so again while caching the results. - * Or if for whatever reason you did not pass a criterion with the constructor. + * This only makes sense if you have already run a request and want to re-use the per-file results for a new one. + * (or if for whatever reason you did not pass information for the pipeline with the constructor). * - * @param newCriterion - the new slicing criterion to use for the next slice + * @param newRequestData - data for the new request */ - public updateCriterion(newCriterion: SlicingCriteria): void { - guard(this.stepCounter >= PipelineExecutor.maximumNumberOfStepsPerFile , 'Cannot reset slice prior to once-per-slice stage') - this.criterion = newCriterion - this.stepCounter = PipelineExecutor.maximumNumberOfStepsPerFile - this.results.slice = undefined - this.results.reconstruct = undefined - if(this.stepOfInterest === 'slice' || this.stepOfInterest === 'reconstruct') { - this.reachedWanted = false + public updateCriterion(newRequestData: PipelinePerRequestInput

): void { + guard(this.stepCounter >= this.pipeline.firstStepPerRequest, 'Cannot reset slice prior to once-per-slice stage') + this.input = { + ...this.input, + ...newRequestData + } + this.stepCounter = this.pipeline.firstStepPerRequest + // clear the results for all steps with an index >= firstStepPerRequest, this is more of a sanity check + for(let i = this.pipeline.firstStepPerRequest; i < this.pipeline.order.length; i++) { + this.output[this.pipeline.order[i] as PipelineStepNames

] = undefined as unknown as PipelineStepOutputWithName } } - public async allRemainingSteps(canSwitchStage: false): Promise>> - public async allRemainingSteps(canSwitchStage?: true): Promise> + public async allRemainingSteps(canSwitchStage: false): Promise>> + public async allRemainingSteps(canSwitchStage?: true): Promise> /** * Execute all remaining steps and automatically call {@link switchToSliceStage} if necessary. - * @param canSwitchStage - if true, automatically switch to the slice stage if necessary + * @param canSwitchStage - if true, automatically switch to the request stage if necessary * (i.e., this is what you want if you have never executed {@link nextStep} and you want to execute *all* steps). * However, passing false allows you to only execute the steps of the 'once-per-file' stage (i.e., the steps that can be cached). * - * @note There is a small type difference if you pass 'false' and already have manually switched to the 'once-per-slice' stage. + * @note There is a small type difference if you pass 'false' and already have manually switched to the 'once-per-request' stage. * Because now, the results of these steps are no longer part of the result type (although they are still included). * In such a case, you may be better off with simply passing 'true' as the function will detect that the stage is already switched. - * We could solve this type problem by separating the SteppingSlicer class into two for each stage, but this would break the improved readability and unified handling - * of the slicer that I wanted to achieve with this class. + * We could solve this type problem by separating the PipelineExecutor class into two for each stage, but this would break the improved readability and unified handling + * of the executor that I wanted to achieve with this class. */ - public async allRemainingSteps(canSwitchStage = true): Promise | Partial>> { + public async allRemainingSteps(canSwitchStage = true): Promise | Partial>> { while(this.hasNextStep()) { await this.nextStep() } - if(canSwitchStage && !this.reachedWanted && this.stage === 'once-per-file') { - this.switchToSliceStage() + if(canSwitchStage && this.hasNextStep() && this.currentExecutionStage === StepHasToBeExecuted.OncePerFile) { + this.switchToRequestStage() while(this.hasNextStep()) { await this.nextStep() } } - return this.reachedWanted ? this.getResults() : this.getResults(true) + return this.hasNextStep() ? this.getResults(true) : this.getResults() } } diff --git a/src/core/steps/pipeline/create.ts b/src/core/steps/pipeline/create.ts index 1eb7712f9c..c1df64dbea 100644 --- a/src/core/steps/pipeline/create.ts +++ b/src/core/steps/pipeline/create.ts @@ -48,7 +48,7 @@ export function verifyAndBuildPipeline(steps: readonly IStep[]): Pipeline { return { steps: perRequestStepMap, order: sorted, - firstStepPerRequest: sortedPerRequest.length === 0 ? undefined : sortedPerFile.length + firstStepPerRequest: sortedPerFile.length } } diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index 474e76e4d5..44c68ce93f 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -1,5 +1,6 @@ -import { IStep, NameOfStep } from '../step' +import { IStep, NameOfStep, StepHasToBeExecuted } from '../step' import { verifyAndBuildPipeline } from './create' +import { satisfies } from 'semver' /** * A pipeline is a collection of {@link Pipeline#steps|steps} that are executed in a certain {@link Pipeline#order|order}. @@ -13,9 +14,9 @@ export interface Pipeline { /** * In the order, this is the index of the first step that * is executed {@link StepHasToBeExecuted#OncePerRequest|once per request}. - * If undefined, all steps are executed {@link StepHasToBeExecuted#OncePerFile|once per file}. + * If it is "out of bounds" (i.e., the number of steps), all steps are executed {@link StepHasToBeExecuted#OncePerFile|once per file}. */ - readonly firstStepPerRequest: number | undefined + readonly firstStepPerRequest: number } /** @@ -32,6 +33,15 @@ export type PipelineStepPrintersWithName

= Awaited>> export type PipelineInput

= PipelineStep

['requiredInput'] + +/** + * Only gets the union of 'requiredInput' of those PipelineSteps which have a 'execute' field of type 'OncePerRequest'. + * In other words, information that you may want to change for another request (e.g., another slice) with the same file. + */ +export type PipelinePerRequestInput

= { + [K in PipelineStepNames

]: PipelineStep

['executed'] extends StepHasToBeExecuted.OncePerRequest ? PipelineStepWithName['requiredInput'] : never +}[PipelineStepNames

] + export type PipelineOutput

= { [K in PipelineStepNames

]: PipelineStepOutputWithName } From 4393f4244b9b1f3c62e693472a018a4a31fb11bc Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 25 Nov 2023 00:10:03 +0100 Subject: [PATCH 060/104] test-fix(pipeline-create): fix correct index for start of per-request steps --- test/functionality/pipelines/create/create-tests.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/functionality/pipelines/create/create-tests.ts b/test/functionality/pipelines/create/create-tests.ts index dd2a342223..a47ef53958 100644 --- a/test/functionality/pipelines/create/create-tests.ts +++ b/test/functionality/pipelines/create/create-tests.ts @@ -65,7 +65,7 @@ describe('Create Pipeline (includes dependency checks)', () => { }) }) describe('default behavior', () => { - function positive(name: string, rawSteps: IStep[], expected: NameOfStep[], indexOfFirstPerFile: number | undefined = undefined) { + function positive(name: string, rawSteps: IStep[], expected: NameOfStep[], indexOfFirstPerFile: number = expected.length) { it(`${name} (all permutations)`, () => { for(const steps of allPermutations(rawSteps)) { const pipeline = createPipeline(...steps) @@ -104,7 +104,7 @@ describe('Create Pipeline (includes dependency checks)', () => { dependencies: [], decorates: 'parse', } - ], ['parse', 'parse-v2']) + ], ['parse', 'parse-v2'], 2) positive('decorators can depend on each other', [ PARSE_WITH_R_SHELL_STEP, { From f1e97f72e5fd6e25771bf0dbeae3d4992186a32a Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 25 Nov 2023 00:10:56 +0100 Subject: [PATCH 061/104] refactor(static-slicing): move static slicing steps accordingly --- src/core/steps/all/{ => static-slicing}/30-slice.ts | 12 ++++++------ .../steps/all/{ => static-slicing}/40-reconstruct.ts | 8 ++++---- src/core/steps/pipeline/default.ts | 4 ++-- src/core/steps/steps.ts | 4 ++-- test/functionality/_helper/shell.ts | 2 +- test/functionality/pipelines/create/create-tests.ts | 4 ++-- 6 files changed, 17 insertions(+), 17 deletions(-) rename src/core/steps/all/{ => static-slicing}/30-slice.ts (75%) rename src/core/steps/all/{ => static-slicing}/40-reconstruct.ts (84%) diff --git a/src/core/steps/all/30-slice.ts b/src/core/steps/all/static-slicing/30-slice.ts similarity index 75% rename from src/core/steps/all/30-slice.ts rename to src/core/steps/all/static-slicing/30-slice.ts index ea23231ac4..c6e6709100 100644 --- a/src/core/steps/all/30-slice.ts +++ b/src/core/steps/all/static-slicing/30-slice.ts @@ -1,10 +1,10 @@ -import { internalPrinter, StepOutputFormat } from '../../print/print' -import { IStep, StepHasToBeExecuted } from '../step' -import { SlicingCriteria, staticSlicing } from '../../../slicing' +import { internalPrinter, StepOutputFormat } from '../../../print/print' +import { IStep, StepHasToBeExecuted } from '../../step' +import { SlicingCriteria, staticSlicing } from '../../../../slicing' import { DeepReadonly } from 'ts-essentials' -import { NormalizeRequiredInput } from './10-normalize' -import { DataflowInformation } from '../../../dataflow/internal/info' -import { NormalizedAst } from '../../../r-bridge' +import { NormalizeRequiredInput } from '../10-normalize' +import { DataflowInformation } from '../../../../dataflow/internal/info' +import { NormalizedAst } from '../../../../r-bridge' export const SliceRequiredInput = { ...NormalizeRequiredInput, diff --git a/src/core/steps/all/40-reconstruct.ts b/src/core/steps/all/static-slicing/40-reconstruct.ts similarity index 84% rename from src/core/steps/all/40-reconstruct.ts rename to src/core/steps/all/static-slicing/40-reconstruct.ts index 965c663279..6bd7e0a4a5 100644 --- a/src/core/steps/all/40-reconstruct.ts +++ b/src/core/steps/all/static-slicing/40-reconstruct.ts @@ -1,8 +1,8 @@ -import { internalPrinter, StepOutputFormat } from '../../print/print' -import { IStep, StepHasToBeExecuted } from '../step' -import { autoSelectLibrary, AutoSelectPredicate, reconstructToCode, SliceResult } from '../../../slicing' +import { internalPrinter, StepOutputFormat } from '../../../print/print' +import { IStep, StepHasToBeExecuted } from '../../step' +import { autoSelectLibrary, AutoSelectPredicate, reconstructToCode, SliceResult } from '../../../../slicing' import { DeepReadonly } from 'ts-essentials' -import { NormalizedAst } from '../../../r-bridge' +import { NormalizedAst } from '../../../../r-bridge' import { SliceRequiredInput } from './30-slice' export const ReconstructRequiredInput = { diff --git a/src/core/steps/pipeline/default.ts b/src/core/steps/pipeline/default.ts index 08772fbde5..2e233bd189 100644 --- a/src/core/steps/pipeline/default.ts +++ b/src/core/steps/pipeline/default.ts @@ -5,7 +5,7 @@ import { createPipeline } from './pipeline' import { PARSE_WITH_R_SHELL_STEP } from '../all/00-parse' import { NORMALIZE } from '../all/10-normalize' import { LEGACY_STATIC_DATAFLOW } from '../all/20-dataflow' -import { STATIC_SLICE } from '../all/30-slice' -import { NAIVE_RECONSTRUCT } from '../all/40-reconstruct' +import { STATIC_SLICE } from '../all/static-slicing/30-slice' +import { NAIVE_RECONSTRUCT } from '../all/static-slicing/40-reconstruct' export const DEFAULT_SLICING_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, LEGACY_STATIC_DATAFLOW, STATIC_SLICE, NAIVE_RECONSTRUCT) diff --git a/src/core/steps/steps.ts b/src/core/steps/steps.ts index c22d205aaa..03cfcb3a36 100644 --- a/src/core/steps/steps.ts +++ b/src/core/steps/steps.ts @@ -37,8 +37,8 @@ import { IStep, StepHasToBeExecuted } from './step' import { PARSE_WITH_R_SHELL_STEP } from './all/00-parse' import { NORMALIZE } from './all/10-normalize' import { LEGACY_STATIC_DATAFLOW } from './all/20-dataflow' -import { STATIC_SLICE } from './all/30-slice' -import { NAIVE_RECONSTRUCT } from './all/40-reconstruct' +import { STATIC_SLICE } from './all/static-slicing/30-slice' +import { NAIVE_RECONSTRUCT } from './all/static-slicing/40-reconstruct' export const STEPS_PER_FILE = { diff --git a/test/functionality/_helper/shell.ts b/test/functionality/_helper/shell.ts index 19d94dcda4..5d9919bbcd 100644 --- a/test/functionality/_helper/shell.ts +++ b/test/functionality/_helper/shell.ts @@ -19,7 +19,7 @@ import { SlicingCriteria } from '../../../src/slicing' import { testRequiresRVersion } from './version' import { deepMergeObject, MergeableRecord } from '../../../src/util/objects' import { LAST_STEP, SteppingSlicer } from '../../../src/core' -import { NAIVE_RECONSTRUCT } from '../../../src/core/steps/all/40-reconstruct' +import { NAIVE_RECONSTRUCT } from '../../../src/core/steps/all/static-slicing/40-reconstruct' export const testWithShell = (msg: string, fn: (shell: RShell, test: Mocha.Context) => void | Promise): Mocha.Test => { return it(msg, async function(): Promise { diff --git a/test/functionality/pipelines/create/create-tests.ts b/test/functionality/pipelines/create/create-tests.ts index a47ef53958..3c9edddb24 100644 --- a/test/functionality/pipelines/create/create-tests.ts +++ b/test/functionality/pipelines/create/create-tests.ts @@ -5,8 +5,8 @@ import { PARSE_WITH_R_SHELL_STEP } from '../../../../src/core/steps/all/00-parse import { allPermutations } from '../../../../src/util/arrays' import { NORMALIZE } from '../../../../src/core/steps/all/10-normalize' import { LEGACY_STATIC_DATAFLOW } from '../../../../src/core/steps/all/20-dataflow' -import { STATIC_SLICE } from '../../../../src/core/steps/all/30-slice' -import { NAIVE_RECONSTRUCT } from '../../../../src/core/steps/all/40-reconstruct' +import { STATIC_SLICE } from '../../../../src/core/steps/all/static-slicing/30-slice' +import { NAIVE_RECONSTRUCT } from '../../../../src/core/steps/all/static-slicing/40-reconstruct' describe('Create Pipeline (includes dependency checks)', () => { describe('error-cases', () => { From c48a13265194e9e90f4907e5e337f4ac9b43c099 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 25 Nov 2023 00:11:33 +0100 Subject: [PATCH 062/104] refactor(pipeline-core): relocate core steps as well --- src/cli/repl/server/connection.ts | 6 +++--- src/cli/statistics-helper-app.ts | 6 +++--- src/core/steps/all/{ => core}/00-parse.ts | 8 ++++---- src/core/steps/all/{ => core}/10-normalize.ts | 10 +++++----- src/core/steps/all/{ => core}/20-dataflow.ts | 10 +++++----- src/core/steps/all/static-slicing/30-slice.ts | 2 +- src/core/steps/pipeline/default.ts | 6 +++--- src/core/steps/steps.ts | 6 +++--- test/functionality/pipelines/create/create-tests.ts | 6 +++--- 9 files changed, 30 insertions(+), 30 deletions(-) rename src/core/steps/all/{ => core}/00-parse.ts (84%) rename src/core/steps/all/{ => core}/10-normalize.ts (87%) rename src/core/steps/all/{ => core}/20-dataflow.ts (75%) diff --git a/src/cli/repl/server/connection.ts b/src/cli/repl/server/connection.ts index 4331807471..2b7fefea5d 100644 --- a/src/cli/repl/server/connection.ts +++ b/src/cli/repl/server/connection.ts @@ -26,9 +26,9 @@ import { deepMergeObject } from '../../../util/objects' import { LogLevel } from '../../../util/log' import { printStepResult, StepOutputFormat } from '../../../core/print/print' import { DataflowInformation } from '../../../dataflow/internal/info' -import { PARSE_WITH_R_SHELL_STEP } from '../../../core/steps/all/00-parse' -import { NORMALIZE } from '../../../core/steps/all/10-normalize' -import { LEGACY_STATIC_DATAFLOW } from '../../../core/steps/all/20-dataflow' +import { PARSE_WITH_R_SHELL_STEP } from '../../../core/steps/all/core/00-parse' +import { NORMALIZE } from '../../../core/steps/all/core/10-normalize' +import { LEGACY_STATIC_DATAFLOW } from '../../../core/steps/all/core/20-dataflow' /** * Each connection handles a single client, answering to its requests. diff --git a/src/cli/statistics-helper-app.ts b/src/cli/statistics-helper-app.ts index ddc723620b..8b483f37c7 100644 --- a/src/cli/statistics-helper-app.ts +++ b/src/cli/statistics-helper-app.ts @@ -14,9 +14,9 @@ import { guard } from '../util/assert' import { retrieveArchiveName } from './common/features' import { printStepResult, StepOutputFormat } from '../core/print/print' import { date2string } from '../util/time' -import { PARSE_WITH_R_SHELL_STEP } from '../core/steps/all/00-parse' -import { NORMALIZE } from '../core/steps/all/10-normalize' -import { LEGACY_STATIC_DATAFLOW } from '../core/steps/all/20-dataflow' +import { PARSE_WITH_R_SHELL_STEP } from '../core/steps/all/core/00-parse' +import { NORMALIZE } from '../core/steps/all/core/10-normalize' +import { LEGACY_STATIC_DATAFLOW } from '../core/steps/all/core/20-dataflow' // apps should never depend on other apps when forking (otherwise, they are "run" on load :/) diff --git a/src/core/steps/all/00-parse.ts b/src/core/steps/all/core/00-parse.ts similarity index 84% rename from src/core/steps/all/00-parse.ts rename to src/core/steps/all/core/00-parse.ts index 6cace2f9b4..6297522e90 100644 --- a/src/core/steps/all/00-parse.ts +++ b/src/core/steps/all/core/00-parse.ts @@ -1,7 +1,7 @@ -import { internalPrinter, StepOutputFormat } from '../../print/print' -import { parseToQuads } from '../../print/parse-printer' -import { IStep, StepHasToBeExecuted } from '../step' -import { retrieveXmlFromRCode, RParseRequest, RShell } from '../../../r-bridge' +import { internalPrinter, StepOutputFormat } from '../../../print/print' +import { parseToQuads } from '../../../print/parse-printer' +import { IStep, StepHasToBeExecuted } from '../../step' +import { retrieveXmlFromRCode, RParseRequest, RShell } from '../../../../r-bridge' import { DeepReadonly } from 'ts-essentials' export const ParseRequiredInput = { diff --git a/src/core/steps/all/10-normalize.ts b/src/core/steps/all/core/10-normalize.ts similarity index 87% rename from src/core/steps/all/10-normalize.ts rename to src/core/steps/all/core/10-normalize.ts index 8801a7a0fb..e6d883909a 100644 --- a/src/core/steps/all/10-normalize.ts +++ b/src/core/steps/all/core/10-normalize.ts @@ -3,18 +3,18 @@ import { NoInfo, normalize, RParseRequest, RShell, XmlParserHooks -} from '../../../r-bridge' -import { internalPrinter, StepOutputFormat } from '../../print/print' +} from '../../../../r-bridge' +import { internalPrinter, StepOutputFormat } from '../../../print/print' import { normalizedAstToJson, normalizedAstToQuads, printNormalizedAstToMermaid, printNormalizedAstToMermaidUrl -} from '../../print/normalize-printer' -import { IStep, StepHasToBeExecuted } from '../step' +} from '../../../print/normalize-printer' +import { IStep, StepHasToBeExecuted } from '../../step' import { DeepPartial, DeepReadonly } from 'ts-essentials' import { ParseRequiredInput } from './00-parse' -import { guard } from '../../../util/assert' +import { guard } from '../../../../util/assert' export const NormalizeRequiredInput = { ...ParseRequiredInput, diff --git a/src/core/steps/all/20-dataflow.ts b/src/core/steps/all/core/20-dataflow.ts similarity index 75% rename from src/core/steps/all/20-dataflow.ts rename to src/core/steps/all/core/20-dataflow.ts index a440863cdb..643688b681 100644 --- a/src/core/steps/all/20-dataflow.ts +++ b/src/core/steps/all/core/20-dataflow.ts @@ -1,14 +1,14 @@ -import { internalPrinter, StepOutputFormat } from '../../print/print' -import { IStep, StepHasToBeExecuted } from '../step' -import { produceDataFlowGraph } from '../../../dataflow' +import { internalPrinter, StepOutputFormat } from '../../../print/print' +import { IStep, StepHasToBeExecuted } from '../../step' +import { produceDataFlowGraph } from '../../../../dataflow' import { dataflowGraphToJson, dataflowGraphToMermaid, dataflowGraphToMermaidUrl, dataflowGraphToQuads -} from '../../print/dataflow-printer' +} from '../../../print/dataflow-printer' import { DeepReadonly } from 'ts-essentials' -import { NormalizedAst } from '../../../r-bridge' +import { NormalizedAst } from '../../../../r-bridge' export const LEGACY_STATIC_DATAFLOW = { name: 'dataflow', diff --git a/src/core/steps/all/static-slicing/30-slice.ts b/src/core/steps/all/static-slicing/30-slice.ts index c6e6709100..ae3adbc420 100644 --- a/src/core/steps/all/static-slicing/30-slice.ts +++ b/src/core/steps/all/static-slicing/30-slice.ts @@ -2,7 +2,7 @@ import { internalPrinter, StepOutputFormat } from '../../../print/print' import { IStep, StepHasToBeExecuted } from '../../step' import { SlicingCriteria, staticSlicing } from '../../../../slicing' import { DeepReadonly } from 'ts-essentials' -import { NormalizeRequiredInput } from '../10-normalize' +import { NormalizeRequiredInput } from '../core/10-normalize' import { DataflowInformation } from '../../../../dataflow/internal/info' import { NormalizedAst } from '../../../../r-bridge' diff --git a/src/core/steps/pipeline/default.ts b/src/core/steps/pipeline/default.ts index 2e233bd189..c33c9aa9f8 100644 --- a/src/core/steps/pipeline/default.ts +++ b/src/core/steps/pipeline/default.ts @@ -2,9 +2,9 @@ * Contains the default pipeline for working with flowr */ import { createPipeline } from './pipeline' -import { PARSE_WITH_R_SHELL_STEP } from '../all/00-parse' -import { NORMALIZE } from '../all/10-normalize' -import { LEGACY_STATIC_DATAFLOW } from '../all/20-dataflow' +import { PARSE_WITH_R_SHELL_STEP } from '../all/core/00-parse' +import { NORMALIZE } from '../all/core/10-normalize' +import { LEGACY_STATIC_DATAFLOW } from '../all/core/20-dataflow' import { STATIC_SLICE } from '../all/static-slicing/30-slice' import { NAIVE_RECONSTRUCT } from '../all/static-slicing/40-reconstruct' diff --git a/src/core/steps/steps.ts b/src/core/steps/steps.ts index 03cfcb3a36..e649c58ea3 100644 --- a/src/core/steps/steps.ts +++ b/src/core/steps/steps.ts @@ -34,9 +34,9 @@ import { } from '../print/dataflow-printer' import { parseToQuads } from '../print/parse-printer' import { IStep, StepHasToBeExecuted } from './step' -import { PARSE_WITH_R_SHELL_STEP } from './all/00-parse' -import { NORMALIZE } from './all/10-normalize' -import { LEGACY_STATIC_DATAFLOW } from './all/20-dataflow' +import { PARSE_WITH_R_SHELL_STEP } from './all/core/00-parse' +import { NORMALIZE } from './all/core/10-normalize' +import { LEGACY_STATIC_DATAFLOW } from './all/core/20-dataflow' import { STATIC_SLICE } from './all/static-slicing/30-slice' import { NAIVE_RECONSTRUCT } from './all/static-slicing/40-reconstruct' diff --git a/test/functionality/pipelines/create/create-tests.ts b/test/functionality/pipelines/create/create-tests.ts index 3c9edddb24..ddee7f5798 100644 --- a/test/functionality/pipelines/create/create-tests.ts +++ b/test/functionality/pipelines/create/create-tests.ts @@ -1,10 +1,10 @@ import { createPipeline } from '../../../../src/core/steps/pipeline' import { IStep, NameOfStep } from '../../../../src/core/steps' import { expect } from 'chai' -import { PARSE_WITH_R_SHELL_STEP } from '../../../../src/core/steps/all/00-parse' +import { PARSE_WITH_R_SHELL_STEP } from '../../../../src/core/steps/all/core/00-parse' import { allPermutations } from '../../../../src/util/arrays' -import { NORMALIZE } from '../../../../src/core/steps/all/10-normalize' -import { LEGACY_STATIC_DATAFLOW } from '../../../../src/core/steps/all/20-dataflow' +import { NORMALIZE } from '../../../../src/core/steps/all/core/10-normalize' +import { LEGACY_STATIC_DATAFLOW } from '../../../../src/core/steps/all/core/20-dataflow' import { STATIC_SLICE } from '../../../../src/core/steps/all/static-slicing/30-slice' import { NAIVE_RECONSTRUCT } from '../../../../src/core/steps/all/static-slicing/40-reconstruct' From 684604816c1ab622572073a09de4c066666f7ce1 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 25 Nov 2023 00:13:36 +0100 Subject: [PATCH 063/104] refactor(stepping-slicer): update filename from `slicer` to `stepping-slicer` to reflect the content better --- src/core/index.ts | 2 +- src/core/{slicer.ts => stepping-slicer.ts} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/core/{slicer.ts => stepping-slicer.ts} (100%) diff --git a/src/core/index.ts b/src/core/index.ts index 9791bf0539..b939e826fb 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -1,4 +1,4 @@ -export { SteppingSlicer } from './slicer' +export { SteppingSlicer } from './stepping-slicer' export * from './steps/steps' export * from './steps/input' export * from './steps/output' diff --git a/src/core/slicer.ts b/src/core/stepping-slicer.ts similarity index 100% rename from src/core/slicer.ts rename to src/core/stepping-slicer.ts From dda10a789b72321add9ca66313c6d5788fe5a1cc Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 25 Nov 2023 00:26:32 +0100 Subject: [PATCH 064/104] refactor, wip(stepping-slicer): migrate stepping slicer to use the pipeline executor --- src/core/pipeline-executor.ts | 11 +- src/core/stepping-slicer.ts | 176 ++++++---------------------- src/core/steps/pipeline/pipeline.ts | 2 +- src/core/steps/step.ts | 1 - 4 files changed, 46 insertions(+), 144 deletions(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index ce47fbbd0c..415f5c5bf6 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -66,7 +66,7 @@ import { * for, for example, the dataflow analysis. * * @see retrieveResultOfStep - * @see PipelineExecutor#doNextStep + * @see PipelineExecutor#_doNextStep * @see StepName */ export class PipelineExecutor

{ @@ -108,6 +108,7 @@ export class PipelineExecutor

{ public getResults(intermediate?:false): PipelineOutput

public getResults(intermediate: true): Partial> + public getResults(intermediate: boolean): PipelineOutput

| Partial> /** * Returns the results of the pipeline. * @@ -147,7 +148,7 @@ export class PipelineExecutor

{ const guardStep = this.getGuardStep(expectedStepName) - const { step, result } = await this.doNextStep(guardStep) + const { step, result } = await this._doNextStep(guardStep) this.output[step as PipelineStepNames

] = result this.stepCounter += 1 @@ -165,7 +166,8 @@ export class PipelineExecutor

{ } } - private async doNextStep(guardStep: (name: K) => K): Promise<{ + // TODO: make it private after the stepping slicer is removed + public async _doNextStep(guardStep: (name: K) => K): Promise<{ step: NameOfStep, result: PipelineStepOutputWithName }> { @@ -185,7 +187,7 @@ export class PipelineExecutor

{ * * @param newRequestData - data for the new request */ - public updateCriterion(newRequestData: PipelinePerRequestInput

): void { + public updateRequest(newRequestData: PipelinePerRequestInput

): void { guard(this.stepCounter >= this.pipeline.firstStepPerRequest, 'Cannot reset slice prior to once-per-slice stage') this.input = { ...this.input, @@ -200,6 +202,7 @@ export class PipelineExecutor

{ public async allRemainingSteps(canSwitchStage: false): Promise>> public async allRemainingSteps(canSwitchStage?: true): Promise> + public async allRemainingSteps(canSwitchStage: boolean): Promise | Partial>> /** * Execute all remaining steps and automatically call {@link switchToSliceStage} if necessary. * @param canSwitchStage - if true, automatically switch to the request stage if necessary diff --git a/src/core/stepping-slicer.ts b/src/core/stepping-slicer.ts index 4d6de06d69..a89c88e11c 100644 --- a/src/core/stepping-slicer.ts +++ b/src/core/stepping-slicer.ts @@ -1,23 +1,26 @@ import { - NormalizedAst, IdGenerator, - NoInfo, - RParseRequest, - RShell, - XmlParserHooks -} from '../r-bridge' -import { - executeSingleSubStep, LAST_PER_FILE_STEP, LAST_STEP, - StepRequired, STEPS, - STEPS_PER_FILE, + LAST_PER_FILE_STEP, LAST_STEP, STEPS_PER_SLICE, SteppingSlicerInput, StepResults, - StepResult, StepName + StepName, StepHasToBeExecuted, NameOfStep } from './steps' -import { guard } from '../util/assert' -import { SliceResult, SlicingCriteria } from '../slicing' -import { DeepPartial } from 'ts-essentials' -import { DataflowInformation } from '../dataflow/internal/info' +import { SlicingCriteria } from '../slicing' +import { createPipeline, PipelineOutput, PipelineStepOutputWithName } from './steps/pipeline' +import { PARSE_WITH_R_SHELL_STEP } from './steps/all/core/00-parse' +import { NORMALIZE } from './steps/all/core/10-normalize' +import { LEGACY_STATIC_DATAFLOW } from './steps/all/core/20-dataflow' +import { STATIC_SLICE } from './steps/all/static-slicing/30-slice' +import { NAIVE_RECONSTRUCT } from './steps/all/static-slicing/40-reconstruct' +import { PipelineExecutor } from './pipeline-executor' + +const SteppingSlicerLegacyPipeline = createPipeline( + PARSE_WITH_R_SHELL_STEP, + NORMALIZE, + LEGACY_STATIC_DATAFLOW, + STATIC_SLICE, + NAIVE_RECONSTRUCT +) /** * This is ultimately the root of flowR's static slicing procedure. @@ -80,42 +83,25 @@ import { DataflowInformation } from '../dataflow/internal/info' * @see StepName */ export class SteppingSlicer { - public static readonly maximumNumberOfStepsPerFile = Object.keys(STEPS_PER_FILE).length - public static readonly maximumNumberOfStepsPerSlice = SteppingSlicer.maximumNumberOfStepsPerFile + Object.keys(STEPS_PER_SLICE).length - - private readonly shell: RShell private readonly stepOfInterest: InterestedIn - private readonly request: RParseRequest - private readonly hooks?: DeepPartial - private readonly getId?: IdGenerator - - private criterion?: SlicingCriteria - - private results = {} as Record - - private stage: StepRequired = 'once-per-file' - private stepCounter = 0 - private reachedWanted = false + private executor: PipelineExecutor /** * Create a new stepping slicer. For more details on the arguments please see {@link SteppingSlicerInput}. */ constructor(input: SteppingSlicerInput) { - this.shell = input.shell - this.request = input.request - this.hooks = input.hooks - this.getId = input.getId + // TODO: subset pipeline based on interested in + this.executor = new PipelineExecutor(SteppingSlicerLegacyPipeline, input) this.stepOfInterest = (input.stepOfInterest ?? LAST_STEP) as InterestedIn - this.criterion = input.criterion } /** * Retrieve the current stage the stepping slicer is in. - * @see StepRequired + * @see StepHasToBeExecuted * @see switchToSliceStage */ - public getCurrentStage(): StepRequired { - return this.stage + public getCurrentStage(): StepHasToBeExecuted { + return this.executor.getCurrentStage() } /** @@ -124,14 +110,12 @@ export class SteppingSlicer - public getResults(intermediate: true): Partial> + public getResults(intermediate?:false): PipelineOutput + public getResults(intermediate: true): Partial> /** * Returns the result of the step of interest, as well as the results of all steps before it. * @@ -139,19 +123,15 @@ export class SteppingSlicer | Partial> { - guard(intermediate || this.reachedWanted, 'Before reading the results, we need to reach the step we are interested in') - return this.results as StepResults + public getResults(intermediate = false): PipelineOutput | Partial> { + return this.executor.getResults(intermediate) } /** * Returns true only if 1) there are more steps to-do for the current stage and 2) we have not yet reached the step we are interested in */ public hasNextStep(): boolean { - return !this.reachedWanted && (this.stage === 'once-per-file' ? - this.stepCounter < SteppingSlicer.maximumNumberOfStepsPerFile - : this.stepCounter < SteppingSlicer.maximumNumberOfStepsPerSlice - ) + return this.executor.hasNextStep() } /** @@ -162,79 +142,15 @@ export class SteppingSlicer(expectedStepName?: PassedName): Promise<{ - name: typeof expectedStepName extends undefined ? StepName : PassedName - result: typeof expectedStepName extends undefined ? unknown : StepResult> + public async nextStep(expectedStepName?: PassedName): Promise<{ + name: typeof expectedStepName extends undefined ? NameOfStep : PassedName + result: typeof expectedStepName extends undefined ? unknown : PipelineStepOutputWithName> }> { - guard(this.hasNextStep(), 'No more steps to do') - - const guardStep = this.getGuardStep(expectedStepName) - - const { step, result } = await this.doNextStep(guardStep) - - this.results[step] = result - this.stepCounter += 1 - if(this.stepOfInterest === step) { - this.reachedWanted = true - } - - return { name: step as PassedName, result: result as StepResult } + return this.executor.nextStep(expectedStepName) } - private getGuardStep(expectedStepName: StepName | undefined) { - return expectedStepName === undefined ? - (name: K): K => name - : - (name: K): K => { - guard(expectedStepName === name, `Expected step ${expectedStepName} but got ${name}`) - return name - } - } - - private async doNextStep(guardStep: (name: K) => K) { - let step: StepName - let result: unknown - - switch(this.stepCounter) { - case 0: - step = guardStep('parse') - result = await executeSingleSubStep(step, {}, { request: this.request, shell: this.shell }) - break - case 1: - step = guardStep('normalize') - result = await executeSingleSubStep(step, { - parse: this.results.parse as string - }, { - shell: this.shell, - hooks: this.hooks, - getId: this.getId - }) - break - case 2: - step = guardStep('dataflow') - result = executeSingleSubStep(step, { normalize: this.results.normalize as NormalizedAst }) - break - case 3: - guard(this.criterion !== undefined, 'Cannot decode criteria without a criterion') - step = guardStep('slice') - result = executeSingleSubStep(step, { - dataflow: this.results.dataflow as DataflowInformation, - normalize: this.results.normalize as NormalizedAst - }, { - criterion: this.criterion - }) - break - case 4: - step = guardStep('reconstruct') - result = executeSingleSubStep(step, { - normalize: this.results.normalize as NormalizedAst, - slice: this.results.slice as SliceResult - }, {}) - break - default: - throw new Error(`Unknown step ${this.stepCounter}, reaching this should not happen!`) - } - return { step, result } + private async doNextStep(guardStep: (name: K) => K) { + return this.executor._doNextStep(guardStep) } /** @@ -244,14 +160,7 @@ export class SteppingSlicer= SteppingSlicer.maximumNumberOfStepsPerFile , 'Cannot reset slice prior to once-per-slice stage') - this.criterion = newCriterion - this.stepCounter = SteppingSlicer.maximumNumberOfStepsPerFile - this.results.slice = undefined - this.results.reconstruct = undefined - if(this.stepOfInterest === 'slice' || this.stepOfInterest === 'reconstruct') { - this.reachedWanted = false - } + this.executor.updateRequest(newCriterion) } public async allRemainingSteps(canSwitchStage: false): Promise>> @@ -269,15 +178,6 @@ export class SteppingSlicer | Partial>> { - while(this.hasNextStep()) { - await this.nextStep() - } - if(canSwitchStage && !this.reachedWanted && this.stage === 'once-per-file') { - this.switchToSliceStage() - while(this.hasNextStep()) { - await this.nextStep() - } - } - return this.reachedWanted ? this.getResults() : this.getResults(true) + return this.executor.allRemainingSteps(canSwitchStage) } } diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index 44c68ce93f..7810c9bb26 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -39,7 +39,7 @@ export type PipelineInput

= PipelineStep

['requiredInput'] * In other words, information that you may want to change for another request (e.g., another slice) with the same file. */ export type PipelinePerRequestInput

= { - [K in PipelineStepNames

]: PipelineStep

['executed'] extends StepHasToBeExecuted.OncePerRequest ? PipelineStepWithName['requiredInput'] : never + [K in PipelineStepNames

]: PipelineStep

['executed'] extends StepHasToBeExecuted.OncePerFile ? never : PipelineStepWithName['requiredInput'] }[PipelineStepNames

] export type PipelineOutput

= { diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index aa21dd4c74..ac1201a4d0 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -31,7 +31,6 @@ export const enum StepHasToBeExecuted { /** This step has to be executed once per request (e.g., slice for a given variable) */ OncePerRequest } -export type StepRequired = 'once-per-file' | 'once-per-slice' // TODO: rename to StepName export type NameOfStep = string & { __brand?: 'StepName' } From 87bf0f32393c2e1f2784d82c19dfa4351aa9a8ee Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 25 Nov 2023 01:33:13 +0100 Subject: [PATCH 065/104] test-fix(stepping-slicer): use intermediate results if not skipping stage --- src/core/pipeline-executor.ts | 7 ++- src/core/stepping-slicer.ts | 54 ++++++++++++------- src/core/steps/all/static-slicing/30-slice.ts | 7 +-- 3 files changed, 43 insertions(+), 25 deletions(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index 415f5c5bf6..3f255868a9 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -125,10 +125,9 @@ export class PipelineExecutor

{ * Returns true only if 1) there are more steps to-do for the current stage and 2) we have not yet reached the end of the pipeline. */ public hasNextStep(): boolean { - return this.stepCounter < this.pipeline.order.length && ( - this.currentExecutionStage !== StepHasToBeExecuted.OncePerFile || + return (this.stepCounter < this.pipeline.order.length && + this.currentExecutionStage !== StepHasToBeExecuted.OncePerFile) || this.stepCounter < this.pipeline.firstStepPerRequest - ) } /** @@ -225,6 +224,6 @@ export class PipelineExecutor

{ await this.nextStep() } } - return this.hasNextStep() ? this.getResults(true) : this.getResults() + return this.stepCounter < this.pipeline.steps.size ? this.getResults(true) : this.getResults() } } diff --git a/src/core/stepping-slicer.ts b/src/core/stepping-slicer.ts index a89c88e11c..4e042aa55c 100644 --- a/src/core/stepping-slicer.ts +++ b/src/core/stepping-slicer.ts @@ -6,21 +6,40 @@ import { StepName, StepHasToBeExecuted, NameOfStep } from './steps' import { SlicingCriteria } from '../slicing' -import { createPipeline, PipelineOutput, PipelineStepOutputWithName } from './steps/pipeline' +import { createPipeline, Pipeline, PipelineOutput, PipelineStepOutputWithName } from './steps/pipeline' import { PARSE_WITH_R_SHELL_STEP } from './steps/all/core/00-parse' import { NORMALIZE } from './steps/all/core/10-normalize' import { LEGACY_STATIC_DATAFLOW } from './steps/all/core/20-dataflow' import { STATIC_SLICE } from './steps/all/static-slicing/30-slice' import { NAIVE_RECONSTRUCT } from './steps/all/static-slicing/40-reconstruct' import { PipelineExecutor } from './pipeline-executor' +import { assertUnreachable } from '../util/assert' -const SteppingSlicerLegacyPipeline = createPipeline( - PARSE_WITH_R_SHELL_STEP, - NORMALIZE, - LEGACY_STATIC_DATAFLOW, - STATIC_SLICE, - NAIVE_RECONSTRUCT -) +type LegacyPipelineType = + Pipeline + +function getLegacyPipeline(interestedIn: StepName): Pipeline { + // brrh, but who cares, it is legacy! + switch(interestedIn) { + case 'parse': + return createPipeline(PARSE_WITH_R_SHELL_STEP) + case 'normalize': + return createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE) + case 'dataflow': + return createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, LEGACY_STATIC_DATAFLOW) + case 'slice': + return createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, LEGACY_STATIC_DATAFLOW, STATIC_SLICE) + case 'reconstruct': + return createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, LEGACY_STATIC_DATAFLOW, STATIC_SLICE, NAIVE_RECONSTRUCT) + default: + assertUnreachable(interestedIn) + } +} /** * This is ultimately the root of flowR's static slicing procedure. @@ -82,17 +101,15 @@ const SteppingSlicerLegacyPipeline = createPipeline( * @see SteppingSlicer#doNextStep * @see StepName */ -export class SteppingSlicer { - private readonly stepOfInterest: InterestedIn - private executor: PipelineExecutor +export class SteppingSlicer { + private executor: PipelineExecutor> /** * Create a new stepping slicer. For more details on the arguments please see {@link SteppingSlicerInput}. */ constructor(input: SteppingSlicerInput) { // TODO: subset pipeline based on interested in - this.executor = new PipelineExecutor(SteppingSlicerLegacyPipeline, input) - this.stepOfInterest = (input.stepOfInterest ?? LAST_STEP) as InterestedIn + this.executor = new PipelineExecutor(getLegacyPipeline(input.stepOfInterest ?? LAST_STEP), input) as PipelineExecutor> } /** @@ -114,8 +131,8 @@ export class SteppingSlicer - public getResults(intermediate: true): Partial> + public getResults(intermediate?:false): PipelineOutput> + public getResults(intermediate: true): Partial>> /** * Returns the result of the step of interest, as well as the results of all steps before it. * @@ -123,7 +140,7 @@ export class SteppingSlicer | Partial> { + public getResults(intermediate = false): PipelineOutput> | Partial>> { return this.executor.getResults(intermediate) } @@ -144,7 +161,7 @@ export class SteppingSlicer(expectedStepName?: PassedName): Promise<{ name: typeof expectedStepName extends undefined ? NameOfStep : PassedName - result: typeof expectedStepName extends undefined ? unknown : PipelineStepOutputWithName> + result: typeof expectedStepName extends undefined ? unknown : PipelineStepOutputWithName, Exclude> }> { return this.executor.nextStep(expectedStepName) } @@ -160,7 +177,8 @@ export class SteppingSlicer>> diff --git a/src/core/steps/all/static-slicing/30-slice.ts b/src/core/steps/all/static-slicing/30-slice.ts index ae3adbc420..29121a3cc7 100644 --- a/src/core/steps/all/static-slicing/30-slice.ts +++ b/src/core/steps/all/static-slicing/30-slice.ts @@ -18,9 +18,10 @@ export const SliceRequiredInput = { export const STATIC_SLICE = { name: 'slice', description: 'Calculate the actual static slice from the dataflow graph and the given slicing criteria', - processor: (results: { dataflow?: DataflowInformation, normalize?: NormalizedAst }, input: Partial) => staticSlicing((results.dataflow as DataflowInformation).graph, results.normalize as NormalizedAst, input.criterion as SlicingCriteria, input.threshold), - executed: StepHasToBeExecuted.OncePerRequest, - printer: { + processor: (results: { dataflow?: DataflowInformation, normalize?: NormalizedAst }, input: Partial) => + staticSlicing((results.dataflow as DataflowInformation).graph, results.normalize as NormalizedAst, input.criterion as SlicingCriteria, input.threshold), + executed: StepHasToBeExecuted.OncePerRequest, + printer: { [StepOutputFormat.Internal]: internalPrinter }, dependencies: [ 'dataflow' ], From 4b906f29d6593bdcae049faafa97fe63e221dbe1 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 25 Nov 2023 01:35:54 +0100 Subject: [PATCH 066/104] feat-fix(pipeline-executor): do per-request steps not relying on the done flag --- src/core/pipeline-executor.ts | 2 +- test/functionality/_helper/shell.ts | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index 3f255868a9..ce89150e29 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -218,7 +218,7 @@ export class PipelineExecutor

{ while(this.hasNextStep()) { await this.nextStep() } - if(canSwitchStage && this.hasNextStep() && this.currentExecutionStage === StepHasToBeExecuted.OncePerFile) { + if(canSwitchStage && this.stepCounter < this.pipeline.steps.size && this.currentExecutionStage === StepHasToBeExecuted.OncePerFile) { this.switchToRequestStage() while(this.hasNextStep()) { await this.nextStep() diff --git a/test/functionality/_helper/shell.ts b/test/functionality/_helper/shell.ts index 5d9919bbcd..5fe9b9d2fb 100644 --- a/test/functionality/_helper/shell.ts +++ b/test/functionality/_helper/shell.ts @@ -213,6 +213,7 @@ export function assertSliced(name: string, shell: RShell, input: string, criteri }).allRemainingSteps() + console.log(Object.keys(result), result.reconstruct) try { assert.strictEqual( result.reconstruct.code, expected, From 0de5024a76372a21f2da4191f29d9ab328684e0d Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 25 Nov 2023 01:36:52 +0100 Subject: [PATCH 067/104] refactor, test: remove console-log --- test/functionality/_helper/shell.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/functionality/_helper/shell.ts b/test/functionality/_helper/shell.ts index 5fe9b9d2fb..c4f11910ac 100644 --- a/test/functionality/_helper/shell.ts +++ b/test/functionality/_helper/shell.ts @@ -212,8 +212,6 @@ export function assertSliced(name: string, shell: RShell, input: string, criteri criterion: criteria, }).allRemainingSteps() - - console.log(Object.keys(result), result.reconstruct) try { assert.strictEqual( result.reconstruct.code, expected, From fb4e077cb77a178a882adf44b2fb57ef08394e4d Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 25 Nov 2023 01:39:54 +0100 Subject: [PATCH 068/104] refactor(stepping-slicer): clean up legacy pipeline structure --- src/core/stepping-slicer.ts | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/src/core/stepping-slicer.ts b/src/core/stepping-slicer.ts index 4e042aa55c..4c12d64307 100644 --- a/src/core/stepping-slicer.ts +++ b/src/core/stepping-slicer.ts @@ -13,32 +13,19 @@ import { LEGACY_STATIC_DATAFLOW } from './steps/all/core/20-dataflow' import { STATIC_SLICE } from './steps/all/static-slicing/30-slice' import { NAIVE_RECONSTRUCT } from './steps/all/static-slicing/40-reconstruct' import { PipelineExecutor } from './pipeline-executor' -import { assertUnreachable } from '../util/assert' -type LegacyPipelineType = - Pipeline +const legacyPipelines = { + // brrh, but who cares, it is legacy! + 'parse': createPipeline(PARSE_WITH_R_SHELL_STEP), + 'normalize': createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE), + 'dataflow': createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, LEGACY_STATIC_DATAFLOW), + 'slice': createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, LEGACY_STATIC_DATAFLOW, STATIC_SLICE), + 'reconstruct': createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, LEGACY_STATIC_DATAFLOW, STATIC_SLICE, NAIVE_RECONSTRUCT) +} +type LegacyPipelineType = typeof legacyPipelines[InterestedIn] function getLegacyPipeline(interestedIn: StepName): Pipeline { - // brrh, but who cares, it is legacy! - switch(interestedIn) { - case 'parse': - return createPipeline(PARSE_WITH_R_SHELL_STEP) - case 'normalize': - return createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE) - case 'dataflow': - return createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, LEGACY_STATIC_DATAFLOW) - case 'slice': - return createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, LEGACY_STATIC_DATAFLOW, STATIC_SLICE) - case 'reconstruct': - return createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, LEGACY_STATIC_DATAFLOW, STATIC_SLICE, NAIVE_RECONSTRUCT) - default: - assertUnreachable(interestedIn) - } + return legacyPipelines[interestedIn] } /** From f26b5303c7d488818b11f42d0876b9155058d47b Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 25 Nov 2023 01:41:58 +0100 Subject: [PATCH 069/104] lint-fix: deal with unused imports and functions linter errors --- src/core/steps/all/core/10-normalize.ts | 3 +-- src/core/steps/pipeline/pipeline.ts | 1 - src/core/steps/steps.ts | 26 ------------------------- 3 files changed, 1 insertion(+), 29 deletions(-) diff --git a/src/core/steps/all/core/10-normalize.ts b/src/core/steps/all/core/10-normalize.ts index e6d883909a..098a2eec52 100644 --- a/src/core/steps/all/core/10-normalize.ts +++ b/src/core/steps/all/core/10-normalize.ts @@ -1,7 +1,7 @@ import { IdGenerator, NoInfo, - normalize, RParseRequest, RShell, + normalize, RShell, XmlParserHooks } from '../../../../r-bridge' import { internalPrinter, StepOutputFormat } from '../../../print/print' @@ -14,7 +14,6 @@ import { import { IStep, StepHasToBeExecuted } from '../../step' import { DeepPartial, DeepReadonly } from 'ts-essentials' import { ParseRequiredInput } from './00-parse' -import { guard } from '../../../../util/assert' export const NormalizeRequiredInput = { ...ParseRequiredInput, diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index 7810c9bb26..a6c1ca224b 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -1,6 +1,5 @@ import { IStep, NameOfStep, StepHasToBeExecuted } from '../step' import { verifyAndBuildPipeline } from './create' -import { satisfies } from 'semver' /** * A pipeline is a collection of {@link Pipeline#steps|steps} that are executed in a certain {@link Pipeline#order|order}. diff --git a/src/core/steps/steps.ts b/src/core/steps/steps.ts index e649c58ea3..758a214b1d 100644 --- a/src/core/steps/steps.ts +++ b/src/core/steps/steps.ts @@ -13,27 +13,6 @@ * @module */ -import { - normalize, - retrieveXmlFromRCode -} from '../../r-bridge' -import { produceDataFlowGraph } from '../../dataflow' -import { reconstructToCode, staticSlicing } from '../../slicing' -import { internalPrinter, StepOutputFormat } from '../print/print' -import { - normalizedAstToJson, - normalizedAstToQuads, - printNormalizedAstToMermaid, - printNormalizedAstToMermaidUrl -} from '../print/normalize-printer' -import { - dataflowGraphToJson, - dataflowGraphToMermaid, - dataflowGraphToMermaidUrl, - dataflowGraphToQuads -} from '../print/dataflow-printer' -import { parseToQuads } from '../print/parse-printer' -import { IStep, StepHasToBeExecuted } from './step' import { PARSE_WITH_R_SHELL_STEP } from './all/core/00-parse' import { NORMALIZE } from './all/core/10-normalize' import { LEGACY_STATIC_DATAFLOW } from './all/core/20-dataflow' @@ -60,8 +39,3 @@ export type StepName = keyof typeof STEPS export type Step = typeof STEPS[Name] export type StepProcessor = Step['processor'] export type StepResult = Awaited>> - -export function executeSingleSubStep>(subStep: Name, ...input: Parameters): ReturnType { - // @ts-expect-error - this is safe, as we know that the function arguments are correct by 'satisfies', this saves an explicit cast with 'as' - return STEPS[subStep].processor(...input as unknown as never[]) as ReturnType -} From 53e7e78ea0073023072707afd60771d0d79ec5c9 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 16:55:10 +0100 Subject: [PATCH 070/104] doc(pipeline): improve documentation of `createPipeline` and friends --- src/core/pipeline-executor.ts | 6 +++- src/core/print/print.ts | 14 ++++---- src/core/steps/all/core/00-parse.ts | 4 +-- src/core/steps/all/core/10-normalize.ts | 4 +-- src/core/steps/all/core/20-dataflow.ts | 4 +-- src/core/steps/all/static-slicing/30-slice.ts | 4 +-- .../all/static-slicing/40-reconstruct.ts | 4 +-- src/core/steps/pipeline/create.ts | 35 +++++++------------ src/core/steps/pipeline/pipeline.ts | 29 +++++++++++---- src/core/steps/step.ts | 22 ++++++------ .../pipelines/create/create-tests.ts | 6 ++-- 11 files changed, 71 insertions(+), 61 deletions(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index ce89150e29..205000d185 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -78,7 +78,11 @@ export class PipelineExecutor

{ private stepCounter = 0 /** - * Create a new stepping slicer. For more details on the arguments please see {@link SteppingSlicerInput}. + * Create a new pipeline executor. The required additional input is specified by the {@link IPipelineStep#requiredInput|required input configuration} + * of each step in the `pipeline`. + * + * @param pipeline - The {@link Pipeline} to execute, probably created with {@link createPipeline}. + * @param input - External {@link PipelineInput|configuration and input} required to execute the given pipeline. */ constructor(pipeline: P, input: PipelineInput

) { this.pipeline = pipeline diff --git a/src/core/print/print.ts b/src/core/print/print.ts index f99e842aee..3d52d44183 100644 --- a/src/core/print/print.ts +++ b/src/core/print/print.ts @@ -1,4 +1,4 @@ -import { IStep, StepProcessingFunction } from '../steps' +import { IPipelineStep, StepProcessingFunction } from '../steps' import { TailOfArray } from '../../util/arrays' import { guard } from '../../util/assert' @@ -41,7 +41,7 @@ export const enum StepOutputFormat { /** * Helper function to support the {@link Internal} format, as it is simply returning the input. * - * @see IStepPrinter + * @see IPipelineStepPrinter */ export function internalPrinter(input: Input): Input { return input @@ -49,28 +49,28 @@ export function internalPrinter(input: Input): Input { /** * A mapping function that maps the result of a step (i.e., the dataflow graph) - * to another representation (linked by {@link StepOutputFormat} in an {@link IStep}). + * to another representation (linked by {@link StepOutputFormat} in an {@link IPipelineStep}). * * For the internal format, refer to {@link InternalStepPrinter} as a shorthand. */ -export type IStepPrinter = +export type IPipelineStepPrinter = Format extends StepOutputFormat.Internal ? (input: Awaited>) => Awaited> : (input: Awaited>, ...additional: AdditionalInput) => Promise | string -export type InternalStepPrinter = IStepPrinter +export type InternalStepPrinter = IPipelineStepPrinter /** * For a `step` of the given name, which returned the given `data`. Convert that data into the given `format`. * Depending on your step and the format this may require `additional` inputs. */ export function printStepResult< - Step extends IStep, + Step extends IPipelineStep, Processor extends Step['processor'], Format extends Exclude & number, Printer extends Step['printer'][Format], AdditionalInput extends TailOfArray>, >(step: Step, data: Awaited>, format: Format, ...additional: AdditionalInput): Promise { - const printer = step.printer[format] as IStepPrinter | undefined + const printer = step.printer[format] as IPipelineStepPrinter | undefined guard(printer !== undefined, `printer for ${step.name} does not support ${String(format)}`) return printer(data, ...additional) as Promise } diff --git a/src/core/steps/all/core/00-parse.ts b/src/core/steps/all/core/00-parse.ts index 6297522e90..09eae798bf 100644 --- a/src/core/steps/all/core/00-parse.ts +++ b/src/core/steps/all/core/00-parse.ts @@ -1,6 +1,6 @@ import { internalPrinter, StepOutputFormat } from '../../../print/print' import { parseToQuads } from '../../../print/parse-printer' -import { IStep, StepHasToBeExecuted } from '../../step' +import { IPipelineStep, StepHasToBeExecuted } from '../../step' import { retrieveXmlFromRCode, RParseRequest, RShell } from '../../../../r-bridge' import { DeepReadonly } from 'ts-essentials' @@ -23,4 +23,4 @@ export const PARSE_WITH_R_SHELL_STEP = { }, dependencies: [], requiredInput: ParseRequiredInput -} as const satisfies DeepReadonly) => ReturnType>> +} as const satisfies DeepReadonly) => ReturnType>> diff --git a/src/core/steps/all/core/10-normalize.ts b/src/core/steps/all/core/10-normalize.ts index 098a2eec52..e7eb142a82 100644 --- a/src/core/steps/all/core/10-normalize.ts +++ b/src/core/steps/all/core/10-normalize.ts @@ -11,7 +11,7 @@ import { printNormalizedAstToMermaid, printNormalizedAstToMermaidUrl } from '../../../print/normalize-printer' -import { IStep, StepHasToBeExecuted } from '../../step' +import { IPipelineStep, StepHasToBeExecuted } from '../../step' import { DeepPartial, DeepReadonly } from 'ts-essentials' import { ParseRequiredInput } from './00-parse' @@ -37,4 +37,4 @@ export const NORMALIZE = { }, dependencies: [ 'parse' ], requiredInput: NormalizeRequiredInput -} as const satisfies DeepReadonly) => ReturnType>> +} as const satisfies DeepReadonly) => ReturnType>> diff --git a/src/core/steps/all/core/20-dataflow.ts b/src/core/steps/all/core/20-dataflow.ts index 643688b681..320b13d2d7 100644 --- a/src/core/steps/all/core/20-dataflow.ts +++ b/src/core/steps/all/core/20-dataflow.ts @@ -1,5 +1,5 @@ import { internalPrinter, StepOutputFormat } from '../../../print/print' -import { IStep, StepHasToBeExecuted } from '../../step' +import { IPipelineStep, StepHasToBeExecuted } from '../../step' import { produceDataFlowGraph } from '../../../../dataflow' import { dataflowGraphToJson, @@ -23,4 +23,4 @@ export const LEGACY_STATIC_DATAFLOW = { [StepOutputFormat.MermaidUrl]: dataflowGraphToMermaidUrl }, dependencies: [ 'normalize' ] -} as const satisfies DeepReadonly ReturnType>> +} as const satisfies DeepReadonly ReturnType>> diff --git a/src/core/steps/all/static-slicing/30-slice.ts b/src/core/steps/all/static-slicing/30-slice.ts index 29121a3cc7..9e1995ed66 100644 --- a/src/core/steps/all/static-slicing/30-slice.ts +++ b/src/core/steps/all/static-slicing/30-slice.ts @@ -1,5 +1,5 @@ import { internalPrinter, StepOutputFormat } from '../../../print/print' -import { IStep, StepHasToBeExecuted } from '../../step' +import { IPipelineStep, StepHasToBeExecuted } from '../../step' import { SlicingCriteria, staticSlicing } from '../../../../slicing' import { DeepReadonly } from 'ts-essentials' import { NormalizeRequiredInput } from '../core/10-normalize' @@ -26,4 +26,4 @@ export const STATIC_SLICE = { }, dependencies: [ 'dataflow' ], requiredInput: SliceRequiredInput -} as const satisfies DeepReadonly) => ReturnType>> +} as const satisfies DeepReadonly) => ReturnType>> diff --git a/src/core/steps/all/static-slicing/40-reconstruct.ts b/src/core/steps/all/static-slicing/40-reconstruct.ts index 6bd7e0a4a5..b94705ea6d 100644 --- a/src/core/steps/all/static-slicing/40-reconstruct.ts +++ b/src/core/steps/all/static-slicing/40-reconstruct.ts @@ -1,5 +1,5 @@ import { internalPrinter, StepOutputFormat } from '../../../print/print' -import { IStep, StepHasToBeExecuted } from '../../step' +import { IPipelineStep, StepHasToBeExecuted } from '../../step' import { autoSelectLibrary, AutoSelectPredicate, reconstructToCode, SliceResult } from '../../../../slicing' import { DeepReadonly } from 'ts-essentials' import { NormalizedAst } from '../../../../r-bridge' @@ -21,4 +21,4 @@ export const NAIVE_RECONSTRUCT = { }, dependencies: [ 'slice' ], requiredInput: ReconstructRequiredInput -} as const satisfies DeepReadonly) => ReturnType>> +} as const satisfies DeepReadonly) => ReturnType>> diff --git a/src/core/steps/pipeline/create.ts b/src/core/steps/pipeline/create.ts index c1df64dbea..719fe6ec26 100644 --- a/src/core/steps/pipeline/create.ts +++ b/src/core/steps/pipeline/create.ts @@ -1,22 +1,13 @@ -import { IStep, NameOfStep, StepHasToBeExecuted } from '../step' +import { IPipelineStep, NameOfStep, StepHasToBeExecuted } from '../step' import { InvalidPipelineError } from './invalid-pipeline-error' import { Pipeline } from './pipeline' import { jsonReplacer } from '../../../util/json' import { partitionArray } from '../../../util/arrays' /** - * Given a set of {@link IStep|steps} with their dependencies, this function verifies that - * 0) the pipeline is not empty - * 1) all names of steps are unique for the given pipeline - * 2) all {@link IStepOrder#dependencies|dependencies} of all steps are exist - * 3) there are no cycles in the dependency graph - * 4) the target of a {@link IStepOrder#decorates|decoration} exists - * 5) if a decoration applies, all of its dependencies are already in the pipeline - * 6) in the resulting pipeline, there is a strict cut between steps that are executed once per file and once per request - * If successful, it returns the topologically sorted list of steps in order of desired execution. - * @throws InvalidPipelineError if any of the above conditions are not met + * Given a set of {@link IPipelineStep|steps} with their dependencies, this function verifies all requirements of {@link createPipeline}. */ -export function verifyAndBuildPipeline(steps: readonly IStep[]): Pipeline { +export function verifyAndBuildPipeline(steps: readonly IPipelineStep[]): Pipeline { if(steps.length === 0) { throw new InvalidPipelineError('0) Pipeline is empty') } @@ -24,7 +15,7 @@ export function verifyAndBuildPipeline(steps: readonly IStep[]): Pipeline { const [perFileSteps, perRequestSteps] = partitionArray(steps, s => s.executed === StepHasToBeExecuted.OncePerFile) // we construct a map linking each name to its respective step - const perFileStepMap = new Map() + const perFileStepMap = new Map() const initsPerFile: NameOfStep[] = [] const visited = new Set() @@ -34,7 +25,7 @@ export function verifyAndBuildPipeline(steps: readonly IStep[]): Pipeline { const sortedPerFile = topologicalSort(initsPerFile, perFileStepMap, visited) validateStepOutput(sortedPerFile, perFileStepMap, steps) - const perRequestStepMap = new Map(perFileStepMap) + const perRequestStepMap = new Map(perFileStepMap) // we track all elements without dependencies, i.e., those that start the pipeline const initsPerRequest: NameOfStep[] = [] @@ -52,7 +43,7 @@ export function verifyAndBuildPipeline(steps: readonly IStep[]): Pipeline { } } -function validateStepOutput(sorted: NameOfStep[], stepMap: Map, steps: readonly IStep[]) { +function validateStepOutput(sorted: NameOfStep[], stepMap: Map, steps: readonly IPipelineStep[]) { if(sorted.length !== stepMap.size) { // check if any of the dependencies in the map are invalid checkForInvalidDependency(steps, stepMap) @@ -61,11 +52,11 @@ function validateStepOutput(sorted: NameOfStep[], stepMap: Map) { +function allDependenciesAreVisited(step: IPipelineStep, visited: ReadonlySet) { return step.dependencies.every(d => visited.has(d)) } -function handleStep(step: IStep, init: NameOfStep, visited: Set, sorted: NameOfStep[], elem: NameOfStep, decoratorsOfLastOthers: Set, inits: NameOfStep[]) { +function handleStep(step: IPipelineStep, init: NameOfStep, visited: Set, sorted: NameOfStep[], elem: NameOfStep, decoratorsOfLastOthers: Set, inits: NameOfStep[]) { if(step.decorates === init) { if(allDependenciesAreVisited(step, visited)) { sorted.push(elem) @@ -78,7 +69,7 @@ function handleStep(step: IStep, init: NameOfStep, visited: Set, sor } } -function topologicalSort(inits: NameOfStep[], stepMap: Map, visited: Set) { +function topologicalSort(inits: NameOfStep[], stepMap: Map, visited: Set) { const sorted: NameOfStep[] = [] while(inits.length > 0) { @@ -101,7 +92,7 @@ function topologicalSort(inits: NameOfStep[], stepMap: Map, v return sorted } -function topologicallyInsertDecoratorElements(decoratorsOfLastOthers: Set, stepMap: Map, visited: Set, sorted: NameOfStep[]) { +function topologicallyInsertDecoratorElements(decoratorsOfLastOthers: Set, stepMap: Map, visited: Set, sorted: NameOfStep[]) { if(decoratorsOfLastOthers.size === 0) { return } @@ -110,7 +101,7 @@ function topologicallyInsertDecoratorElements(decoratorsOfLastOthers: Set) { +function checkForInvalidDependency(steps: readonly IPipelineStep[], stepMap: Map) { for(const step of steps) { for(const dep of step.dependencies) { if(!stepMap.has(dep)) { @@ -137,7 +128,7 @@ function checkForInvalidDependency(steps: readonly IStep[], stepMap: Map, inits: NameOfStep[], visited: ReadonlySet) { +function initializeSteps(steps: readonly IPipelineStep[], stepMap: Map, inits: NameOfStep[], visited: ReadonlySet) { for(const step of steps) { const name = step.name // if the name is already in the map we have a duplicate diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index a6c1ca224b..0adf56b863 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -1,4 +1,4 @@ -import { IStep, NameOfStep, StepHasToBeExecuted } from '../step' +import { IPipelineStep, NameOfStep, StepHasToBeExecuted } from '../step' import { verifyAndBuildPipeline } from './create' /** @@ -7,8 +7,8 @@ import { verifyAndBuildPipeline } from './create' * * If you want to get the type of all steps in the pipeline (given they are created canonically using const step names), refer to {@link PipelineStepNames}. */ -export interface Pipeline { - readonly steps: ReadonlyMap +export interface Pipeline { + readonly steps: ReadonlyMap readonly order: readonly T['name'][] /** * In the order, this is the index of the first step that @@ -26,7 +26,7 @@ export interface Pipeline { export type PipelineStepNames

= PipelineStep

['name'] export type PipelineStep

= P extends Pipeline ? U : never -export type PipelineStepWithName

= P extends Pipeline ? U extends IStep ? U : never : never +export type PipelineStepWithName

= P extends Pipeline ? U extends IPipelineStep ? U : never : never export type PipelineStepProcessorWithName

= PipelineStepWithName['processor'] export type PipelineStepPrintersWithName

= PipelineStepWithName['printer'] export type PipelineStepOutputWithName

= Awaited>> @@ -46,9 +46,24 @@ export type PipelineOutput

= { } /** - * Creates a pipeline from the given steps. - * Refer to {@link verifyAndBuildPipeline} for details and constraints on the steps. + * Creates a {@link Pipeline|pipeline} from a given collection of {@link IPipelineStep|steps}. + * In order to be valid, the collection of {@link IPipelineStep|steps} must satisfy the following set of constraints + * (which should be logical, when you consider what a pipeline should accomplish): + * + * 0) the collection of {@link IPipelineStep|steps} is not empty + * 1) all {@link IPipelineStepOrder#name|names} of {@link IPipelineStep|steps} are unique for the given pipeline + * 2) all {@link IPipelineStepOrder#dependencies|dependencies} of all {@link IPipelineStep|steps} are exist + * 3) there are no cycles in the dependency graph + * 4) the target of a {@link IPipelineStepOrder#decorates|step's decoration} exists + * 5) if a {@link IPipelineStepOrder#decorates|decoration} applies, all of its {@link IPipelineStepOrder#dependencies|dependencies} are already in the pipeline + * 6) in the resulting {@link Pipeline|pipeline}, there is a strict cut between {@link IPipelineStep|steps} that are executed + * {@link StepHasToBeExecuted#OncePerFile|once per file} and {@link StepHasToBeExecuted#OncePerRequest|once per request}. + * + * @returns The function will try to order your collection steps so that all the constraints hold. + * If it succeeds it will return the resulting {@link Pipeline|pipeline}, otherwise it will throw an {@link InvalidPipelineError}. + * + * @throws InvalidPipelineError If any of the constraints listed above are not satisfied. */ -export function createPipeline(...steps: T): Pipeline { +export function createPipeline(...steps: T): Pipeline { return verifyAndBuildPipeline(steps) } diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index ac1201a4d0..b5e2210c2f 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -1,11 +1,11 @@ /** - * Defines the {@link IStep} interface which specifies all data available for a single step. + * Defines the {@link IPipelineStep} interface which specifies all data available for a single step. * * @module */ import { MergeableRecord } from '../../util/objects' -import { InternalStepPrinter, IStepPrinter, StepOutputFormat } from '../print/print' +import { InternalStepPrinter, IPipelineStepPrinter, StepOutputFormat } from '../print/print' /** * This represents the format of a step processor which retrieves two things: @@ -15,7 +15,7 @@ import { InternalStepPrinter, IStepPrinter, StepOutputFormat } from '../print/pr * * Please be aware, that if the respective information is available is not ensured by the type system but rather * ensured at runtime by your dependencies. If you want to make sure, that the information is present, - * list all steps that you require as your {@link IStepOrder#dependencies|dependencies}, even if they would be + * list all steps that you require as your {@link IPipelineStepOrder#dependencies|dependencies}, even if they would be * already covered transitively. * * TODO: we could use prototypic cores for each step name @@ -36,15 +36,15 @@ export const enum StepHasToBeExecuted { export type NameOfStep = string & { __brand?: 'StepName' } /** - * Contains the data to specify the order of {@link IStep|steps} in a pipeline. + * Contains the data to specify the order of {@link IPipelineStep|steps} in a pipeline. */ -export interface IStepOrder< +export interface IPipelineStepOrder< Name extends NameOfStep = NameOfStep, > { /** * Name of the respective step, it does not have to be unique in general but only unique per-pipeline. * In other words, you can have multiple steps with a name like `parse` as long as you use only one of them in a given pipeline. - * This is, because these names are required in the {@link IStep#dependencies} field to refer to other steps this one relies on. + * This is, because these names are required in the {@link IPipelineStep#dependencies} field to refer to other steps this one relies on. */ readonly name: Name /** @@ -66,14 +66,14 @@ export interface IStepOrder< /** * Defines what is to be known of a single step in a pipeline. - * It wraps around a single {@link IStep#processor|processor} function, providing additional information. - * Steps will be executed synchronously, in-sequence, based on their {@link IStep#dependencies|dependencies}. + * It wraps around a single {@link IPipelineStep#processor|processor} function, providing additional information. + * Steps will be executed synchronously, in-sequence, based on their {@link IPipelineStep#dependencies|dependencies}. */ -export interface IStep< +export interface IPipelineStep< Name extends NameOfStep = NameOfStep, // eslint-disable-next-line -- by default, we assume nothing about the function shape Fn extends StepProcessingFunction = (...args: any[]) => any, -> extends MergeableRecord, IStepOrder { +> extends MergeableRecord, IPipelineStepOrder { /** Human-readable description of this step */ readonly description: string /** The main processor that essentially performs the logic of this step */ @@ -82,7 +82,7 @@ export interface IStep< * How to visualize the results of the respective step to the user? */ readonly printer: { - [K in StepOutputFormat]?: IStepPrinter + [K in StepOutputFormat]?: IPipelineStepPrinter } & { // we always want to have the internal printer [StepOutputFormat.Internal]: InternalStepPrinter diff --git a/test/functionality/pipelines/create/create-tests.ts b/test/functionality/pipelines/create/create-tests.ts index ddee7f5798..2b31c7b234 100644 --- a/test/functionality/pipelines/create/create-tests.ts +++ b/test/functionality/pipelines/create/create-tests.ts @@ -1,5 +1,5 @@ import { createPipeline } from '../../../../src/core/steps/pipeline' -import { IStep, NameOfStep } from '../../../../src/core/steps' +import { IPipelineStep, NameOfStep } from '../../../../src/core/steps' import { expect } from 'chai' import { PARSE_WITH_R_SHELL_STEP } from '../../../../src/core/steps/all/core/00-parse' import { allPermutations } from '../../../../src/util/arrays' @@ -10,7 +10,7 @@ import { NAIVE_RECONSTRUCT } from '../../../../src/core/steps/all/static-slicing describe('Create Pipeline (includes dependency checks)', () => { describe('error-cases', () => { - function negative(name: string, rawSteps: IStep[], message: string | RegExp) { + function negative(name: string, rawSteps: IPipelineStep[], message: string | RegExp) { it(`${name} (all permutations)`, () => { for(const steps of allPermutations(rawSteps)) { expect(() => createPipeline(...steps)).to.throw(message) @@ -65,7 +65,7 @@ describe('Create Pipeline (includes dependency checks)', () => { }) }) describe('default behavior', () => { - function positive(name: string, rawSteps: IStep[], expected: NameOfStep[], indexOfFirstPerFile: number = expected.length) { + function positive(name: string, rawSteps: IPipelineStep[], expected: NameOfStep[], indexOfFirstPerFile: number = expected.length) { it(`${name} (all permutations)`, () => { for(const steps of allPermutations(rawSteps)) { const pipeline = createPipeline(...steps) From 07a7b3859a47f67cbfd2c60f86a3e4fa6f87047d Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 16:58:51 +0100 Subject: [PATCH 071/104] refactor(pipeline): a pipeline is no (deep) readonly --- src/core/pipeline-executor.ts | 11 ++++++----- src/core/steps/pipeline/pipeline.ts | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index 205000d185..d97102f786 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -71,15 +71,15 @@ import { */ export class PipelineExecutor

{ private readonly pipeline: P - private input: PipelineInput

- private output: PipelineOutput

= {} as PipelineOutput

+ private input: PipelineInput

+ private output: PipelineOutput

= {} as PipelineOutput

private currentExecutionStage = StepHasToBeExecuted.OncePerFile private stepCounter = 0 /** - * Create a new pipeline executor. The required additional input is specified by the {@link IPipelineStep#requiredInput|required input configuration} - * of each step in the `pipeline`. + * Construct a new pipeline executor. + * The required additional input is specified by the {@link IPipelineStep#requiredInput|required input configuration} of each step in the `pipeline`. * * @param pipeline - The {@link Pipeline} to execute, probably created with {@link createPipeline}. * @param input - External {@link PipelineInput|configuration and input} required to execute the given pipeline. @@ -90,7 +90,8 @@ export class PipelineExecutor

{ } /** - * Retrieve the current stage the pipeline executor is in. + * Retrieve the current {@link StepHasToBeExecuted|stage} the pipeline executor is in. + * * @see currentExecutionStage * @see switchToRequestStage */ diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index 0adf56b863..795546891f 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -1,5 +1,6 @@ import { IPipelineStep, NameOfStep, StepHasToBeExecuted } from '../step' import { verifyAndBuildPipeline } from './create' +import { DeepReadonly } from 'ts-essentials' /** * A pipeline is a collection of {@link Pipeline#steps|steps} that are executed in a certain {@link Pipeline#order|order}. @@ -8,7 +9,7 @@ import { verifyAndBuildPipeline } from './create' * If you want to get the type of all steps in the pipeline (given they are created canonically using const step names), refer to {@link PipelineStepNames}. */ export interface Pipeline { - readonly steps: ReadonlyMap + readonly steps: ReadonlyMap> readonly order: readonly T['name'][] /** * In the order, this is the index of the first step that From 01495c5499165afff9695bf1bc4ae4ff3bcf2733 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 17:22:45 +0100 Subject: [PATCH 072/104] refactor(pipeline-executor): improve and reduce some unnecessary checks --- src/core/pipeline-executor.ts | 39 +++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index d97102f786..f938512768 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -71,6 +71,7 @@ import { */ export class PipelineExecutor

{ private readonly pipeline: P + private readonly length: number private input: PipelineInput

private output: PipelineOutput

= {} as PipelineOutput

@@ -86,6 +87,7 @@ export class PipelineExecutor

{ */ constructor(pipeline: P, input: PipelineInput

) { this.pipeline = pipeline + this.length = pipeline.order.length this.input = input } @@ -94,13 +96,18 @@ export class PipelineExecutor

{ * * @see currentExecutionStage * @see switchToRequestStage + * @see StepHasToBeExecuted */ public getCurrentStage(): StepHasToBeExecuted { return this.currentExecutionStage } /** - * Switch to the next stage of the stepping slicer. + * Switch to the next {@link StepHasToBeExecuted|stage} of the pipeline executor. + * + * This will fail if either a step change is currently not valid (as not all steps have been executed), + * or if there is no next stage (i.e., the pipeline is already completed or in the last stage). + * * @see PipelineExecutor * @see getCurrentStage */ @@ -136,7 +143,8 @@ export class PipelineExecutor

{ } /** - * Execute the next step (guarded with {@link hasNextStep}) and return the name of the step that was executed, so you can guard if the step differs from what you are interested in. + * Execute the next step and return the name of the step that was executed, + * so you can guard if the step differs from what you are interested in. * Furthermore, it returns the step's result. * * @param expectedStepName - A safeguard if you want to retrieve the result. @@ -148,8 +156,6 @@ export class PipelineExecutor

{ name: typeof expectedStepName extends undefined ? NameOfStep : PassedName result: typeof expectedStepName extends undefined ? unknown : PipelineStepOutputWithName }> { - guard(this.hasNextStep(), 'No more steps to do in the pipeline.') - const guardStep = this.getGuardStep(expectedStepName) const { step, result } = await this._doNextStep(guardStep) @@ -175,7 +181,7 @@ export class PipelineExecutor

{ step: NameOfStep, result: PipelineStepOutputWithName }> { - guard(this.stepCounter >= 0 && this.stepCounter < this.pipeline.order.length, `Cannot execute next step, already reached end of pipeline or unexpected index (${this.stepCounter}).`) + guard(this.stepCounter >= 0 && this.stepCounter < this.length, `Cannot execute next step, already reached end of pipeline or unexpected index (${this.stepCounter}).`) const step = this.pipeline.steps.get(this.pipeline.order[this.stepCounter]) guard(step !== undefined, `Cannot execute next step, step ${this.pipeline.order[this.stepCounter]} does not exist.`) @@ -189,17 +195,18 @@ export class PipelineExecutor

{ * This only makes sense if you have already run a request and want to re-use the per-file results for a new one. * (or if for whatever reason you did not pass information for the pipeline with the constructor). * - * @param newRequestData - data for the new request + * @param newRequestData - Data for the new request */ public updateRequest(newRequestData: PipelinePerRequestInput

): void { - guard(this.stepCounter >= this.pipeline.firstStepPerRequest, 'Cannot reset slice prior to once-per-slice stage') + const requestStep = this.pipeline.firstStepPerRequest + guard(this.stepCounter >= requestStep, 'Cannot reset slice prior to once-per-slice stage') this.input = { ...this.input, ...newRequestData } - this.stepCounter = this.pipeline.firstStepPerRequest + this.stepCounter = requestStep // clear the results for all steps with an index >= firstStepPerRequest, this is more of a sanity check - for(let i = this.pipeline.firstStepPerRequest; i < this.pipeline.order.length; i++) { + for(let i = requestStep; i < this.length; i++) { this.output[this.pipeline.order[i] as PipelineStepNames

] = undefined as unknown as PipelineStepOutputWithName } } @@ -208,27 +215,29 @@ export class PipelineExecutor

{ public async allRemainingSteps(canSwitchStage?: true): Promise> public async allRemainingSteps(canSwitchStage: boolean): Promise | Partial>> /** - * Execute all remaining steps and automatically call {@link switchToSliceStage} if necessary. - * @param canSwitchStage - if true, automatically switch to the request stage if necessary + * Execute all remaining steps and automatically call {@link switchToRequestStage} if necessary. + * @param canSwitchStage - If true, automatically switch to the request stage if necessary * (i.e., this is what you want if you have never executed {@link nextStep} and you want to execute *all* steps). * However, passing false allows you to only execute the steps of the 'once-per-file' stage (i.e., the steps that can be cached). * * @note There is a small type difference if you pass 'false' and already have manually switched to the 'once-per-request' stage. * Because now, the results of these steps are no longer part of the result type (although they are still included). * In such a case, you may be better off with simply passing 'true' as the function will detect that the stage is already switched. - * We could solve this type problem by separating the PipelineExecutor class into two for each stage, but this would break the improved readability and unified handling - * of the executor that I wanted to achieve with this class. + * We could solve this type problem by separating the {@link PipelineExecutor} class into two for each stage, + * but this would break the improved readability and unified handling of the executor that I wanted to achieve with this class. */ public async allRemainingSteps(canSwitchStage = true): Promise | Partial>> { while(this.hasNextStep()) { await this.nextStep() } - if(canSwitchStage && this.stepCounter < this.pipeline.steps.size && this.currentExecutionStage === StepHasToBeExecuted.OncePerFile) { + + if(canSwitchStage && this.stepCounter < this.length && this.currentExecutionStage === StepHasToBeExecuted.OncePerFile) { this.switchToRequestStage() while(this.hasNextStep()) { await this.nextStep() } } - return this.stepCounter < this.pipeline.steps.size ? this.getResults(true) : this.getResults() + + return this.stepCounter < this.length ? this.getResults(true) : this.getResults() } } From e637c00ff6c5470107842ff993a074fc7343a2df Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 17:27:08 +0100 Subject: [PATCH 073/104] refactor, wip(pipeline-executor): inline guard --- src/core/pipeline-executor.ts | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index f938512768..f7398a7dea 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -124,7 +124,7 @@ export class PipelineExecutor

{ /** * Returns the results of the pipeline. * - * @param intermediate - normally you can only receive the results *after* the stepper completed the step of interested. + * @param intermediate - Normally you can only receive the results *after* the stepper completed the step of interested. * However, if you pass `true` to this parameter, you can also receive the results *before* the pipeline completed, * although the typing system then can not guarantee which of the steps have already happened. */ @@ -156,9 +156,7 @@ export class PipelineExecutor

{ name: typeof expectedStepName extends undefined ? NameOfStep : PassedName result: typeof expectedStepName extends undefined ? unknown : PipelineStepOutputWithName }> { - const guardStep = this.getGuardStep(expectedStepName) - - const { step, result } = await this._doNextStep(guardStep) + const { step, result } = await this._doNextStep(expectedStepName) this.output[step as PipelineStepNames

] = result this.stepCounter += 1 @@ -166,18 +164,8 @@ export class PipelineExecutor

{ return { name: step as PassedName, result } } - private getGuardStep(expectedStepName: NameOfStep | undefined) { - return expectedStepName === undefined ? - (name: K): K => name - : - (name: K): K => { - guard(expectedStepName === name, `Expected step ${expectedStepName} but got ${String(name)}`) - return name - } - } - // TODO: make it private after the stepping slicer is removed - public async _doNextStep(guardStep: (name: K) => K): Promise<{ + public async _doNextStep(expectedStepName: Readonly): Promise<{ step: NameOfStep, result: PipelineStepOutputWithName }> { @@ -185,7 +173,9 @@ export class PipelineExecutor

{ const step = this.pipeline.steps.get(this.pipeline.order[this.stepCounter]) guard(step !== undefined, `Cannot execute next step, step ${this.pipeline.order[this.stepCounter]} does not exist.`) - guardStep(step.name) + if(expectedStepName !== undefined) { + guard(step.name === expectedStepName, () => `Cannot execute next step, expected step ${JSON.stringify(expectedStepName)} but got ${step.name}.`) + } const result = await step.processor(this.output, this.input) as unknown return { step: step.name, result: result as PipelineStepOutputWithName } From 64097027c69b7dae518660e130b9fe05371a2775 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 17:27:32 +0100 Subject: [PATCH 074/104] refactor(stepping-slicer): clean up doNextStep --- src/core/stepping-slicer.ts | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/core/stepping-slicer.ts b/src/core/stepping-slicer.ts index 4c12d64307..9aed3c16aa 100644 --- a/src/core/stepping-slicer.ts +++ b/src/core/stepping-slicer.ts @@ -85,7 +85,6 @@ function getLegacyPipeline(interestedIn: StepName): Pipeline { * for, for example, the dataflow analysis. * * @see retrieveResultOfStep - * @see SteppingSlicer#doNextStep * @see StepName */ export class SteppingSlicer { @@ -153,10 +152,6 @@ export class SteppingSlicer { return this.executor.nextStep(expectedStepName) } - private async doNextStep(guardStep: (name: K) => K) { - return this.executor._doNextStep(guardStep) - } - /** * This only makes sense if you have already sliced a file (e.g., by running up to the `slice` step) and want to do so again while caching the results. * Or if for whatever reason you did not pass a criterion with the constructor. From 2f927d703ec6bcadeced10211dee70baea789f9b Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 17:28:13 +0100 Subject: [PATCH 075/104] doc(stepping-slicer): remove old todo --- src/core/stepping-slicer.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core/stepping-slicer.ts b/src/core/stepping-slicer.ts index 9aed3c16aa..b7393c9fbf 100644 --- a/src/core/stepping-slicer.ts +++ b/src/core/stepping-slicer.ts @@ -94,7 +94,6 @@ export class SteppingSlicer { * Create a new stepping slicer. For more details on the arguments please see {@link SteppingSlicerInput}. */ constructor(input: SteppingSlicerInput) { - // TODO: subset pipeline based on interested in this.executor = new PipelineExecutor(getLegacyPipeline(input.stepOfInterest ?? LAST_STEP), input) as PipelineExecutor> } From e3a3fe96c66dbf86e1f1567e9243480e94d9dc6b Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 17:30:30 +0100 Subject: [PATCH 076/104] refactor(pipeline-executor): remove another unnecessary step --- src/core/pipeline-executor.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index f7398a7dea..3c530688e8 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -159,7 +159,7 @@ export class PipelineExecutor

{ const { step, result } = await this._doNextStep(expectedStepName) this.output[step as PipelineStepNames

] = result - this.stepCounter += 1 + this.stepCounter++ return { name: step as PassedName, result } } @@ -169,7 +169,6 @@ export class PipelineExecutor

{ step: NameOfStep, result: PipelineStepOutputWithName }> { - guard(this.stepCounter >= 0 && this.stepCounter < this.length, `Cannot execute next step, already reached end of pipeline or unexpected index (${this.stepCounter}).`) const step = this.pipeline.steps.get(this.pipeline.order[this.stepCounter]) guard(step !== undefined, `Cannot execute next step, step ${this.pipeline.order[this.stepCounter]} does not exist.`) From 8b5ca6ccdb5ffb820410231c71509325b9489d06 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 17:34:33 +0100 Subject: [PATCH 077/104] refactor(pipeline-executor): one-step unwrap of async in pipeline-executor --- src/core/pipeline-executor.ts | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index 3c530688e8..bbf26342ef 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -156,28 +156,27 @@ export class PipelineExecutor

{ name: typeof expectedStepName extends undefined ? NameOfStep : PassedName result: typeof expectedStepName extends undefined ? unknown : PipelineStepOutputWithName }> { - const { step, result } = await this._doNextStep(expectedStepName) + const [step, result] = this._doNextStep(expectedStepName) + const awaitedResult = await result - this.output[step as PipelineStepNames

] = result + this.output[step as PipelineStepNames

] = awaitedResult this.stepCounter++ - return { name: step as PassedName, result } + return { name: step as PassedName, result: awaitedResult } } - // TODO: make it private after the stepping slicer is removed - public async _doNextStep(expectedStepName: Readonly): Promise<{ + private _doNextStep(expectedStepName: Readonly): [ step: NameOfStep, - result: PipelineStepOutputWithName - }> { + result: Promise> + ] { const step = this.pipeline.steps.get(this.pipeline.order[this.stepCounter]) guard(step !== undefined, `Cannot execute next step, step ${this.pipeline.order[this.stepCounter]} does not exist.`) if(expectedStepName !== undefined) { guard(step.name === expectedStepName, () => `Cannot execute next step, expected step ${JSON.stringify(expectedStepName)} but got ${step.name}.`) } - const result = await step.processor(this.output, this.input) as unknown - return { step: step.name, result: result as PipelineStepOutputWithName } + return [step.name, step.processor(this.output, this.input) as unknown as PipelineStepOutputWithName] } /** From 5374d8763012b10c42e8fb60407b49d929cee696 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 17:38:58 +0100 Subject: [PATCH 078/104] refactor(static-slice): hoist empty env when initializing visiting queue of the static slicer --- src/slicing/static/static-slicer.ts | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/slicing/static/static-slicer.ts b/src/slicing/static/static-slicer.ts index 7cb0ba759c..3156bab877 100644 --- a/src/slicing/static/static-slicer.ts +++ b/src/slicing/static/static-slicer.ts @@ -74,9 +74,9 @@ export interface SliceResult { class VisitingQueue { private readonly threshold: number - private timesHitThreshold = 0 - private seen = new Map() - private idThreshold = new DefaultMap(() => 0) + private timesHitThreshold = 0 + private seen = new Map() + private idThreshold = new DefaultMap(() => 0) private queue: NodeToSlice[] = [] constructor(threshold: number) { @@ -85,6 +85,7 @@ class VisitingQueue { public add(target: NodeId, env: REnvironmentInformation, envFingerprint: string, onlyForSideEffects: boolean): void { const idCounter = this.idThreshold.get(target) + if(idCounter > this.threshold) { slicerLogger.warn(`id: ${target} has been visited ${idCounter} times, skipping`) this.timesHitThreshold++ @@ -133,9 +134,10 @@ export function staticSlicing(dataflowGraph: DataflowGraph, ast: NormalizedAst, // every node ships the call environment which registers the calling environment { - const basePrint = envFingerprint(initializeCleanEnvironments()) + const emptyEnv = initializeCleanEnvironments() + const basePrint = envFingerprint(emptyEnv) for(const startId of decodedCriteria) { - queue.add(startId.id, initializeCleanEnvironments(), basePrint, false) + queue.add(startId.id, emptyEnv, basePrint, false) } } From 7f98544b85e7a2ab7840666b90d4c04b16758ceb Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 17:44:12 +0100 Subject: [PATCH 079/104] refactor(static-slicer): clean up redundant checks and access-operations --- src/slicing/static/static-slicer.ts | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/slicing/static/static-slicer.ts b/src/slicing/static/static-slicer.ts index 3156bab877..af1c5bf5fd 100644 --- a/src/slicing/static/static-slicer.ts +++ b/src/slicing/static/static-slicer.ts @@ -102,11 +102,11 @@ class VisitingQueue { } } - public next(): NodeToSlice | undefined { - return this.queue.pop() + public next(): NodeToSlice { + return this.queue.pop() as NodeToSlice } - public has(): boolean { + public nonEmpty(): boolean { return this.queue.length > 0 } @@ -122,7 +122,7 @@ class VisitingQueue { /** * This returns the ids to include in the slice, when slicing with the given seed id's (must be at least one). *

- * The returned ids can be used to {@link reconstructToCode | reconstruct the slice to R code}. + * The returned ids can be used to {@link reconstructToCode|reconstruct the slice to R code}. */ export function staticSlicing(dataflowGraph: DataflowGraph, ast: NormalizedAst, criteria: SlicingCriteria, threshold = 75): Readonly { guard(criteria.length > 0, 'must have at least one seed id to calculate slice') @@ -142,32 +142,29 @@ export function staticSlicing(dataflowGraph: DataflowGraph, ast: NormalizedAst, } - while(queue.has()) { + while(queue.nonEmpty()) { const current = queue.next() - if(current === undefined) { - continue - } - const baseEnvFingerprint = envFingerprint(current.baseEnvironment) const currentInfo = dataflowGraph.get(current.id, true) - // slicerLogger.trace(`visiting id: ${current.id} with name: ${currentInfo?.[0].name ?? ''}`) if(currentInfo === undefined) { slicerLogger.warn(`id: ${current.id} must be in graph but can not be found, keep in slice to be sure`) continue } - if(currentInfo[0].tag === 'function-call' && !current.onlyForSideEffects) { + const [currentVertex, currentEdges] = currentInfo + + if(currentVertex.tag === 'function-call' && !current.onlyForSideEffects) { slicerLogger.trace(`${current.id} is a function call`) - sliceForCall(current, idMap, currentInfo[0], dataflowGraph, queue) + sliceForCall(current, idMap, currentVertex, dataflowGraph, queue) } const currentNode = idMap.get(current.id) guard(currentNode !== undefined, () => `id: ${current.id} must be in dataflowIdMap is not in ${graphToMermaidUrl(dataflowGraph, idMap)}`) - for(const [target, edge] of currentInfo[1]) { + for(const [target, edge] of currentEdges) { if(edge.types.has(EdgeType.SideEffectOnCall)) { queue.add(target, current.baseEnvironment, baseEnvFingerprint, true) } @@ -175,7 +172,7 @@ export function staticSlicing(dataflowGraph: DataflowGraph, ast: NormalizedAst, queue.add(target, current.baseEnvironment, baseEnvFingerprint, false) } } - for(const controlFlowDependency of addControlDependencies(currentInfo[0].id, idMap)) { + for(const controlFlowDependency of addControlDependencies(currentVertex.id, idMap)) { queue.add(controlFlowDependency, current.baseEnvironment, baseEnvFingerprint, false) } } From 8bf74eb7148adf8d9f9eee70a60892f1686e29ee Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 17:47:34 +0100 Subject: [PATCH 080/104] refactor(static-slicer): do not use default map but directly embed semantics in static slicing --- src/slicing/static/static-slicer.ts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/slicing/static/static-slicer.ts b/src/slicing/static/static-slicer.ts index af1c5bf5fd..de1ed27e5c 100644 --- a/src/slicing/static/static-slicer.ts +++ b/src/slicing/static/static-slicer.ts @@ -20,7 +20,6 @@ import { log } from '../../util/log' import { getAllLinkedFunctionDefinitions } from '../../dataflow/internal/linker' import { overwriteEnvironments, pushLocalEnvironment, resolveByName } from '../../dataflow/environments' import objectHash from 'object-hash' -import { DefaultMap } from '../../util/defaultmap' import { LocalScope } from '../../dataflow/environments/scopes' import { convertAllSlicingCriteriaToIds, DecodedCriteria, SlicingCriteria } from '../criterion' @@ -76,7 +75,7 @@ class VisitingQueue { private readonly threshold: number private timesHitThreshold = 0 private seen = new Map() - private idThreshold = new DefaultMap(() => 0) + private idThreshold = new Map() private queue: NodeToSlice[] = [] constructor(threshold: number) { @@ -84,7 +83,7 @@ class VisitingQueue { } public add(target: NodeId, env: REnvironmentInformation, envFingerprint: string, onlyForSideEffects: boolean): void { - const idCounter = this.idThreshold.get(target) + const idCounter = this.idThreshold.get(target) ?? 0 if(idCounter > this.threshold) { slicerLogger.warn(`id: ${target} has been visited ${idCounter} times, skipping`) From 6aeeda0c7806a0c9a8df1bea18c48e0843f89878 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 17:55:39 +0100 Subject: [PATCH 081/104] refactor(static-slicer): collect `baseEnvironment` --- src/slicing/static/static-slicer.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/slicing/static/static-slicer.ts b/src/slicing/static/static-slicer.ts index de1ed27e5c..36616d4303 100644 --- a/src/slicing/static/static-slicer.ts +++ b/src/slicing/static/static-slicer.ts @@ -144,7 +144,8 @@ export function staticSlicing(dataflowGraph: DataflowGraph, ast: NormalizedAst, while(queue.nonEmpty()) { const current = queue.next() - const baseEnvFingerprint = envFingerprint(current.baseEnvironment) + const baseEnvironment = current.baseEnvironment + const baseEnvFingerprint = envFingerprint(baseEnvironment) const currentInfo = dataflowGraph.get(current.id, true) @@ -165,14 +166,14 @@ export function staticSlicing(dataflowGraph: DataflowGraph, ast: NormalizedAst, for(const [target, edge] of currentEdges) { if(edge.types.has(EdgeType.SideEffectOnCall)) { - queue.add(target, current.baseEnvironment, baseEnvFingerprint, true) + queue.add(target, baseEnvironment, baseEnvFingerprint, true) } if(edge.types.has(EdgeType.Reads) || edge.types.has(EdgeType.DefinedBy) || edge.types.has(EdgeType.Argument) || edge.types.has(EdgeType.Calls) || edge.types.has(EdgeType.Relates) || edge.types.has(EdgeType.DefinesOnCall)) { - queue.add(target, current.baseEnvironment, baseEnvFingerprint, false) + queue.add(target, baseEnvironment, baseEnvFingerprint, false) } } for(const controlFlowDependency of addControlDependencies(currentVertex.id, idMap)) { - queue.add(controlFlowDependency, current.baseEnvironment, baseEnvFingerprint, false) + queue.add(controlFlowDependency, baseEnvironment, baseEnvFingerprint, false) } } From 5f9b190870a101428fc4693764fb3034d20fb905 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 17:57:08 +0100 Subject: [PATCH 082/104] refactor(benchmark-slicer): rename the hosting file to better reflect what is in it --- src/benchmark/{slicer.ts => benchmark-slicer.ts} | 0 src/benchmark/index.ts | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename src/benchmark/{slicer.ts => benchmark-slicer.ts} (100%) diff --git a/src/benchmark/slicer.ts b/src/benchmark/benchmark-slicer.ts similarity index 100% rename from src/benchmark/slicer.ts rename to src/benchmark/benchmark-slicer.ts diff --git a/src/benchmark/index.ts b/src/benchmark/index.ts index 78a6cd9673..86fb3719b1 100644 --- a/src/benchmark/index.ts +++ b/src/benchmark/index.ts @@ -1,3 +1,3 @@ export * from './stats' -export * from './slicer' +export * from './benchmark-slicer' export * from './stopwatch' From a92674cde0d8f6d6559581aedbf5466dcaeed867 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 17:59:32 +0100 Subject: [PATCH 083/104] refactor(pipeline): update the index file to include the default pipelines provider --- src/core/steps/pipeline/index.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/steps/pipeline/index.ts b/src/core/steps/pipeline/index.ts index 7a50feab00..38bb83622a 100644 --- a/src/core/steps/pipeline/index.ts +++ b/src/core/steps/pipeline/index.ts @@ -1,2 +1,3 @@ export * from './pipeline' export * from './invalid-pipeline-error' +export * from './default' From 8bcec1f345ac0c4171405012915c6e3e9fea9cd0 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 18:03:29 +0100 Subject: [PATCH 084/104] refactor(shell): options will now no longer use a deep merge as it seems to be a misplaced performance option --- src/r-bridge/shell.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r-bridge/shell.ts b/src/r-bridge/shell.ts index bce99de8c6..d29c0b1b24 100644 --- a/src/r-bridge/shell.ts +++ b/src/r-bridge/shell.ts @@ -118,7 +118,7 @@ export class RShell { private tempDirs = new Set() public constructor(options?: Partial) { - this.options = deepMergeObject(DEFAULT_R_SHELL_OPTIONS, options) + this.options = { ...DEFAULT_R_SHELL_OPTIONS, ...options } this.log = log.getSubLogger({ name: this.options.sessionName }) this.session = new RShellSession(this.options, this.log) From 6a5037b94ad6392c23fcef9d424eae50bed54794 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 18:09:04 +0100 Subject: [PATCH 085/104] refactor: RShells now use a const enum for revive options --- src/cli/repl/core.ts | 4 ++-- src/flowr.ts | 4 ++-- src/r-bridge/shell.ts | 14 ++++++++++---- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/cli/repl/core.ts b/src/cli/repl/core.ts index 0cbaf159d9..d3076279c5 100644 --- a/src/cli/repl/core.ts +++ b/src/cli/repl/core.ts @@ -3,7 +3,7 @@ * * @module */ -import { RShell } from '../../r-bridge' +import { RShell, RShellReviveOptions } from '../../r-bridge' import readline from 'readline/promises' import { bold } from '../../statistics' import { prompt } from './prompt' @@ -76,7 +76,7 @@ export async function replProcessAnswer(output: ReplOutput, expr: string, shell: * For the execution, this function makes use of {@link replProcessAnswer} * */ -export async function repl(shell = new RShell({ revive: 'always' }), rl = readline.createInterface(DEFAULT_REPL_READLINE_CONFIGURATION), output = standardReplOutput) { +export async function repl(shell = new RShell({ revive: RShellReviveOptions.Always }), rl = readline.createInterface(DEFAULT_REPL_READLINE_CONFIGURATION), output = standardReplOutput) { // the incredible repl :D, we kill it with ':quit' // eslint-disable-next-line no-constant-condition,@typescript-eslint/no-unnecessary-condition diff --git a/src/flowr.ts b/src/flowr.ts index 201c8fcf94..bb3a1d6d88 100644 --- a/src/flowr.ts +++ b/src/flowr.ts @@ -5,7 +5,7 @@ * Otherwise, it will start a REPL that can call these scripts and return their results repeatedly. */ import { log, LogLevel } from './util/log' -import { RShell } from './r-bridge' +import { RShell, RShellReviveOptions } from './r-bridge' import commandLineUsage, { OptionDefinition } from 'command-line-usage' import commandLineArgs from 'command-line-args' import { guard } from './util/assert' @@ -79,7 +79,7 @@ if(options['no-ansi']) { async function retrieveShell(): Promise { // we keep an active shell session to allow other parse investigations :) const shell = new RShell({ - revive: 'always', + revive: RShellReviveOptions.Always, onRevive: (code, signal) => { const signalText = signal == null ? '' : ` and signal ${signal}` console.log(formatter.format(`R process exited with code ${code}${signalText}. Restarting...`, { color: Colors.Magenta, effect: ColorEffect.Foreground })) diff --git a/src/r-bridge/shell.ts b/src/r-bridge/shell.ts index d29c0b1b24..fd0c90dbfd 100644 --- a/src/r-bridge/shell.ts +++ b/src/r-bridge/shell.ts @@ -61,6 +61,12 @@ export const DEFAULT_OUTPUT_COLLECTOR_CONFIGURATION: OutputCollectorConfiguratio errorStopsWaiting: true } +export const enum RShellReviveOptions { + Never, + OnError, + Always +} + export interface RShellSessionOptions extends MergeableRecord { /** The path to the R executable, can be only the executable if it is to be found on the PATH. */ readonly pathToRExecutable: string @@ -73,7 +79,7 @@ export interface RShellSessionOptions extends MergeableRecord { /** The environment variables available in the R session. */ readonly env: NodeJS.ProcessEnv /** If set, the R session will be restarted if it exits due to an error */ - readonly revive: 'never' | 'on-error' | 'always' + readonly revive: RShellReviveOptions /** Called when the R session is restarted, this makes only sense if `revive` is not set to `'never'` */ readonly onRevive: (code: number, signal: string | null) => void /** The path to the library directory, use undefined to let R figure that out for itself */ @@ -96,7 +102,7 @@ export const DEFAULT_R_SHELL_OPTIONS: RShellOptions = { env: process.env, eol: '\n', homeLibPath: getPlatform() === 'windows' ? undefined : '~/.r-libs', - revive: 'never', + revive: RShellReviveOptions.Never, onRevive: () => { /* do nothing */ } } as const @@ -126,12 +132,12 @@ export class RShell { } private revive() { - if(this.options.revive === 'never') { + if(this.options.revive === RShellReviveOptions.Never) { return } this.session.onExit((code, signal) => { - if(this.options.revive === 'always' || (this.options.revive === 'on-error' && code !== 0)) { + if(this.options.revive === RShellReviveOptions.Always || (this.options.revive === RShellReviveOptions.OnError && code !== 0)) { this.log.warn(`R session exited with code ${code}, reviving!`) this.options.onRevive(code, signal) this.session = new RShellSession(this.options, this.log) From 6f814b05c616459d238c4eb2d1a82f5e6a11527f Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 18:24:21 +0100 Subject: [PATCH 086/104] refactor(shell): try to speedup R-Session startup time --- src/r-bridge/shell.ts | 48 +++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/src/r-bridge/shell.ts b/src/r-bridge/shell.ts index fd0c90dbfd..9bac7cc01c 100644 --- a/src/r-bridge/shell.ts +++ b/src/r-bridge/shell.ts @@ -9,7 +9,7 @@ import semver from 'semver/preload' import { getPlatform } from '../util/os' import fs from 'fs' import { removeTokenMapQuotationMarks, TokenMap } from './retriever' -import { DeepWritable } from 'ts-essentials' +import { DeepReadonly, DeepWritable } from 'ts-essentials' export type OutputStreamSelector = 'stdout' | 'stderr' | 'both'; @@ -369,28 +369,36 @@ class RShellSession { private readonly bareSession: ChildProcessWithoutNullStreams private readonly sessionStdOut: readline.Interface private readonly sessionStdErr: readline.Interface - private readonly options: RShellSessionOptions + private readonly options: DeepReadonly private readonly log: Logger private collectionTimeout: NodeJS.Timeout | undefined - public constructor(options: RShellSessionOptions, log: Logger) { + public constructor(options: DeepReadonly, log: Logger) { this.bareSession = spawn(options.pathToRExecutable, options.commandLineOptions, { env: options.env, cwd: options.cwd, windowsHide: true }) - this.sessionStdOut = readline.createInterface({ - input: this.bareSession.stdout, - terminal: false - }) - this.sessionStdErr = readline.createInterface({ - input: this.bareSession.stderr, - terminal: false - }) - this.onExit(() => { this.end() }) + + this.sessionStdOut = readline.createInterface({ input: this.bareSession.stdout }) + this.sessionStdErr = readline.createInterface({ input: this.bareSession.stderr }) + + this.onExit(() => this.end()) this.options = options this.log = log - this.setupRSessionLoggers() + + if(log.settings.minLevel >= LogLevel.Trace) { + this.bareSession.stdout.on('data', (data: Buffer) => { + log.trace(`< ${data.toString()}`) + }) + this.bareSession.on('close', (code: number) => { + log.trace(`session exited with code ${code}`) + }) + } + + this.bareSession.stderr.on('data', (data: string) => { + log.warn(`< ${data}`) + }) } public write(data: string): void { @@ -473,20 +481,6 @@ class RShellSession { return killResult } - private setupRSessionLoggers(): void { - if(this.log.settings.minLevel >= LogLevel.Trace) { - this.bareSession.stdout.on('data', (data: Buffer) => { - this.log.trace(`< ${data.toString()}`) - }) - this.bareSession.on('close', (code: number) => { - this.log.trace(`session exited with code ${code}`) - }) - } - this.bareSession.stderr.on('data', (data: string) => { - this.log.warn(`< ${data}`) - }) - } - public onExit(callback: (code: number, signal: string | null) => void): void { this.bareSession.on('exit', callback) this.bareSession.stdin.on('error', callback) From 952019f057d67d85bed2125659180eb0ff014ff9 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 18:49:59 +0100 Subject: [PATCH 087/104] refactor(test): shell pkg loading scheme for packages --- src/r-bridge/lang-4.x/values.ts | 6 +++--- src/r-bridge/shell.ts | 8 ++++---- test/functionality/_helper/shell.ts | 15 +++++++++++---- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/r-bridge/lang-4.x/values.ts b/src/r-bridge/lang-4.x/values.ts index 57f24bb3af..720dc1f916 100644 --- a/src/r-bridge/lang-4.x/values.ts +++ b/src/r-bridge/lang-4.x/values.ts @@ -11,9 +11,7 @@ class ValueConversionError extends Error { * transforms a value to something R can understand (e.g., booleans to TRUE/FALSE) */ export function ts2r(value: T): string { - if(typeof value === 'undefined') { - return 'NA' - } else if(typeof value === 'string') { + if(typeof value === 'string') { return JSON.stringify(value) } else if(typeof value === 'number') { return value.toString() @@ -21,6 +19,8 @@ export function ts2r(value: T): string { return value ? 'TRUE' : 'FALSE' } else if(value === null) { return 'NULL' + } else if(typeof value === 'undefined') { + return 'NA' } else if(Array.isArray(value)) { return `c(${value.map(ts2r).join(', ')})` } else if(typeof value === 'object') { diff --git a/src/r-bridge/shell.ts b/src/r-bridge/shell.ts index 9bac7cc01c..399a22d69c 100644 --- a/src/r-bridge/shell.ts +++ b/src/r-bridge/shell.ts @@ -1,4 +1,4 @@ -import { type ChildProcessWithoutNullStreams, spawn } from 'child_process' +import { type ChildProcessWithoutNullStreams, spawn } from 'node:child_process' import { deepMergeObject, type MergeableRecord } from '../util/objects' import { type ILogObj, type Logger } from 'tslog' import * as readline from 'node:readline' @@ -76,8 +76,8 @@ export interface RShellSessionOptions extends MergeableRecord { readonly cwd: string /** The character to use to mark the end of a line. Is probably always `\n` (even on windows). */ readonly eol: string - /** The environment variables available in the R session. */ - readonly env: NodeJS.ProcessEnv + /** The environment variables available in the R session (undefined uses the child-process default). */ + readonly env: NodeJS.ProcessEnv | undefined /** If set, the R session will be restarted if it exits due to an error */ readonly revive: RShellReviveOptions /** Called when the R session is restarted, this makes only sense if `revive` is not set to `'never'` */ @@ -99,7 +99,7 @@ export const DEFAULT_R_SHELL_OPTIONS: RShellOptions = { pathToRExecutable: getPlatform() === 'windows' ? 'R.exe' : 'R', commandLineOptions: ['--vanilla', '--quiet', '--no-echo', '--no-save'], cwd: process.cwd(), - env: process.env, + env: undefined, eol: '\n', homeLibPath: getPlatform() === 'windows' ? undefined : '~/.r-libs', revive: RShellReviveOptions.Never, diff --git a/test/functionality/_helper/shell.ts b/test/functionality/_helper/shell.ts index c4f11910ac..9d7e3daffb 100644 --- a/test/functionality/_helper/shell.ts +++ b/test/functionality/_helper/shell.ts @@ -10,7 +10,7 @@ import { RExpressionList, RNode, RNodeWithParent, - RShell, + RShell, ts2r, XmlParserHooks } from '../../../src/r-bridge' import { assert } from 'chai' @@ -42,15 +42,22 @@ export const testWithShell = (msg: string, fn: (shell: RShell, test: Mocha.Conte export function withShell(fn: (shell: RShell) => void, packages: string[] = ['xmlparsedata']): () => void { return function() { const shell = new RShell() + // this way we probably do not have to reinstall even if we launch from WebStorm - before(async function() { + before('setup shell', async function() { this.timeout('15min') shell.tryToInjectHomeLibPath() + let network = false for(const pkg of packages) { if(!await shell.isPackageInstalled(pkg)) { - await testRequiresNetworkConnection(this) + if(!network) { + await testRequiresNetworkConnection(this) + } + network = true + await shell.ensurePackageInstalled(pkg, true) + } else { + shell.sendCommand(`library(${ts2r(pkg)})`) } - await shell.ensurePackageInstalled(pkg, true) } }) fn(shell) From 9d919dac3d323c2aa472fe0da8f53b644d5930b1 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 18:56:59 +0100 Subject: [PATCH 088/104] refactor(shell, test): tryyyy to host shell --- test/functionality/_helper/shell.ts | 49 ++++++++++++++++------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/test/functionality/_helper/shell.ts b/test/functionality/_helper/shell.ts index 9d7e3daffb..fffb4d4d86 100644 --- a/test/functionality/_helper/shell.ts +++ b/test/functionality/_helper/shell.ts @@ -20,6 +20,7 @@ import { testRequiresRVersion } from './version' import { deepMergeObject, MergeableRecord } from '../../../src/util/objects' import { LAST_STEP, SteppingSlicer } from '../../../src/core' import { NAIVE_RECONSTRUCT } from '../../../src/core/steps/all/static-slicing/40-reconstruct' +import { italic } from '../../../src/statistics' export const testWithShell = (msg: string, fn: (shell: RShell, test: Mocha.Context) => void | Promise): Mocha.Test => { return it(msg, async function(): Promise { @@ -34,36 +35,42 @@ export const testWithShell = (msg: string, fn: (shell: RShell, test: Mocha.Conte }) } +const shell = new RShell() +const end = () => { + shell.close() +} +process.on('SIGINT', end) +process.on('SIGTERM', end) + +after(() => { + end() +}) + /** * produces a shell session for you, can be used within a `describe` block * @param fn - function to use the shell * @param packages - packages to be ensured when the shell is created */ export function withShell(fn: (shell: RShell) => void, packages: string[] = ['xmlparsedata']): () => void { - return function() { - const shell = new RShell() - - // this way we probably do not have to reinstall even if we launch from WebStorm - before('setup shell', async function() { - this.timeout('15min') - shell.tryToInjectHomeLibPath() - let network = false - for(const pkg of packages) { - if(!await shell.isPackageInstalled(pkg)) { - if(!network) { - await testRequiresNetworkConnection(this) - } - network = true - await shell.ensurePackageInstalled(pkg, true) - } else { - shell.sendCommand(`library(${ts2r(pkg)})`) + // this way we probably do not have to reinstall even if we launch from WebStorm + before('setup shell', async function() { + this.timeout('15min') + shell.tryToInjectHomeLibPath() + let network = false + for(const pkg of packages) { + if(!await shell.isPackageInstalled(pkg)) { + if(!network) { + await testRequiresNetworkConnection(this) } + network = true + await shell.ensurePackageInstalled(pkg, true) + } else { + shell.sendCommand(`library(${ts2r(pkg)})`) } - }) + } + }) + return function() { fn(shell) - after(() => { - shell.close() - }) } } From cc36f19dc1907d10e5f435ba49312cd63d11db42 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 18:58:09 +0100 Subject: [PATCH 089/104] Revert "refactor(shell, test): tryyyy to host shell" This reverts commit 9d919dac3d323c2aa472fe0da8f53b644d5930b1. --- test/functionality/_helper/shell.ts | 49 +++++++++++++---------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/test/functionality/_helper/shell.ts b/test/functionality/_helper/shell.ts index fffb4d4d86..9d7e3daffb 100644 --- a/test/functionality/_helper/shell.ts +++ b/test/functionality/_helper/shell.ts @@ -20,7 +20,6 @@ import { testRequiresRVersion } from './version' import { deepMergeObject, MergeableRecord } from '../../../src/util/objects' import { LAST_STEP, SteppingSlicer } from '../../../src/core' import { NAIVE_RECONSTRUCT } from '../../../src/core/steps/all/static-slicing/40-reconstruct' -import { italic } from '../../../src/statistics' export const testWithShell = (msg: string, fn: (shell: RShell, test: Mocha.Context) => void | Promise): Mocha.Test => { return it(msg, async function(): Promise { @@ -35,42 +34,36 @@ export const testWithShell = (msg: string, fn: (shell: RShell, test: Mocha.Conte }) } -const shell = new RShell() -const end = () => { - shell.close() -} -process.on('SIGINT', end) -process.on('SIGTERM', end) - -after(() => { - end() -}) - /** * produces a shell session for you, can be used within a `describe` block * @param fn - function to use the shell * @param packages - packages to be ensured when the shell is created */ export function withShell(fn: (shell: RShell) => void, packages: string[] = ['xmlparsedata']): () => void { - // this way we probably do not have to reinstall even if we launch from WebStorm - before('setup shell', async function() { - this.timeout('15min') - shell.tryToInjectHomeLibPath() - let network = false - for(const pkg of packages) { - if(!await shell.isPackageInstalled(pkg)) { - if(!network) { - await testRequiresNetworkConnection(this) + return function() { + const shell = new RShell() + + // this way we probably do not have to reinstall even if we launch from WebStorm + before('setup shell', async function() { + this.timeout('15min') + shell.tryToInjectHomeLibPath() + let network = false + for(const pkg of packages) { + if(!await shell.isPackageInstalled(pkg)) { + if(!network) { + await testRequiresNetworkConnection(this) + } + network = true + await shell.ensurePackageInstalled(pkg, true) + } else { + shell.sendCommand(`library(${ts2r(pkg)})`) } - network = true - await shell.ensurePackageInstalled(pkg, true) - } else { - shell.sendCommand(`library(${ts2r(pkg)})`) } - } - }) - return function() { + }) fn(shell) + after(() => { + shell.close() + }) } } From d153b32916f1f65aae076f942d7bbb42fed66432 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 19:05:28 +0100 Subject: [PATCH 090/104] refactor(pipeline-executor): clean up pipeline executor --- src/core/pipeline-executor.ts | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index bbf26342ef..ec7d238e9e 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -125,27 +125,28 @@ export class PipelineExecutor

{ * Returns the results of the pipeline. * * @param intermediate - Normally you can only receive the results *after* the stepper completed the step of interested. - * However, if you pass `true` to this parameter, you can also receive the results *before* the pipeline completed, - * although the typing system then can not guarantee which of the steps have already happened. + * However, if you pass `true` to this parameter, you can also receive the results *before* the {@link PipelineExecutor|pipeline executor} + * completed, although the typing system then can not guarantee which of the steps have already happened. */ public getResults(intermediate = false): PipelineOutput

| Partial> { - guard(intermediate || this.stepCounter >= this.pipeline.order.length, 'Without the intermediate flag, the pipeline must be completed before providing access to the results.') + guard(intermediate || this.stepCounter >= this.length, 'Without the intermediate flag, the pipeline must be completed before providing access to the results.') return this.output } /** - * Returns true only if 1) there are more steps to-do for the current stage and 2) we have not yet reached the end of the pipeline. + * Returns true only if + * 1) there are more {@link IPipelineStep|steps} to-do for the current {@link StepHasToBeExecuted|stage} and + * 2) we have not yet reached the end of the {@link Pipeline|pipeline}. */ public hasNextStep(): boolean { - return (this.stepCounter < this.pipeline.order.length && - this.currentExecutionStage !== StepHasToBeExecuted.OncePerFile) || - this.stepCounter < this.pipeline.firstStepPerRequest + return (this.stepCounter < this.length && this.currentExecutionStage !== StepHasToBeExecuted.OncePerFile) + || this.stepCounter < this.pipeline.firstStepPerRequest } /** - * Execute the next step and return the name of the step that was executed, - * so you can guard if the step differs from what you are interested in. - * Furthermore, it returns the step's result. + * Execute the next {@link IPipelineStep|step} and return the name of the {@link IPipelineStep|step} that was executed, + * so you can guard if the {@link IPipelineStep|step} differs from what you are interested in. + * Furthermore, it returns the {@link IPipelineStep|step's} result. * * @param expectedStepName - A safeguard if you want to retrieve the result. * If given, it causes the execution to fail if the next step is not the one you expect. From d24bfea6cf1f650ff76e501f8f3b3b5c1d1777d2 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 19:06:33 +0100 Subject: [PATCH 091/104] refactor(meta): allow nyc to cache again --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 38f5e0c037..089cede3aa 100644 --- a/package.json +++ b/package.json @@ -18,7 +18,7 @@ "lint": "npm run license-compat -- --summary && eslint src/ test/", "license-compat": "license-checker --onlyAllow 'MIT;MIT OR X11;GPLv2;LGPL;GNUGPL;ISC;Apache-2.0;FreeBSD;BSD-2-Clause;clearbsd;ModifiedBSD;BSD-3-Clause;Python-2.0;Unlicense;WTFPL;CC-BY-4.0;CC-BY-3.0;CC0-1.0;0BSD'", "doc": "typedoc", - "test": "nyc --source-map --produce-source-map --cache false mocha --require ts-node/register --timeout 60000 \"test/**/*.spec.ts\"", + "test": "nyc --source-map --produce-source-map mocha --require ts-node/register --timeout 60000 \"test/**/*.spec.ts\"", "performance-test": "func() { cd test/performance/ && bash run-all-suites.sh $1 $2; cd ../../; }; func", "test-full": "npm run test -- --test-installation" }, From c8e7eb338c1b2d8b7e276ea92c6c42740259e050 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 19:08:07 +0100 Subject: [PATCH 092/104] refactor(meta): clean up nyc reporters --- package.json | 1 - 1 file changed, 1 deletion(-) diff --git a/package.json b/package.json index 089cede3aa..f6bd6a55df 100644 --- a/package.json +++ b/package.json @@ -36,7 +36,6 @@ ], "include": "src/**/*.ts", "reporter": [ - "html", "text", "lcov", "cobertura" From 81cc25433191817dd54d2246d25260eba76b15b3 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 19:08:34 +0100 Subject: [PATCH 093/104] refactor(nyc): skip fully covered --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index f6bd6a55df..9835e2d1bc 100644 --- a/package.json +++ b/package.json @@ -29,7 +29,7 @@ "all": true, "per-file": true, "check-coverage": false, - "skip-full": false, + "skip-full": true, "lines": 70, "extension": [ ".ts" From a21502efd43a7a0ea273fb9aa453fec64eb771a0 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 19:11:09 +0100 Subject: [PATCH 094/104] refactor(test): remove unnecessary parenthesis --- test/functionality/r-bridge/lang/ast/parse-function-call.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/functionality/r-bridge/lang/ast/parse-function-call.ts b/test/functionality/r-bridge/lang/ast/parse-function-call.ts index 219c575ec6..537eb674c7 100644 --- a/test/functionality/r-bridge/lang/ast/parse-function-call.ts +++ b/test/functionality/r-bridge/lang/ast/parse-function-call.ts @@ -4,7 +4,7 @@ import { rangeFrom } from '../../../../../src/util/range' import { RType } from '../../../../../src/r-bridge' import { ensureExpressionList } from '../../../../../src/r-bridge/lang-4.x/ast/parser/xml/internal' -describe('Parse function calls', withShell((shell) => { +describe('Parse function calls', withShell(shell => { describe('functions without arguments', () => { assertAst( 'f()', From a1b07d120012fc56b188b60ae34c4ac786af2662 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 20:27:30 +0100 Subject: [PATCH 095/104] refactor, wip(pipeline): improve slicing input handler --- src/core/pipeline-executor.ts | 47 ++++++++++--------- src/core/stepping-slicer.ts | 6 +-- src/core/steps/all/core/00-parse.ts | 19 ++++---- src/core/steps/all/core/10-normalize.ts | 19 ++++---- src/core/steps/all/core/20-dataflow.ts | 7 +-- src/core/steps/all/static-slicing/30-slice.ts | 20 ++++---- .../all/static-slicing/40-reconstruct.ts | 18 ++++--- src/core/steps/pipeline/create.ts | 4 +- src/core/steps/pipeline/pipeline.ts | 18 ++++--- src/core/steps/step.ts | 8 ++-- test/functionality/dataflow/dataflow.spec.ts | 25 ++++++++++ 11 files changed, 111 insertions(+), 80 deletions(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index ec7d238e9e..523a7cf937 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -1,4 +1,4 @@ -import { NameOfStep, StepHasToBeExecuted } from './steps' +import { NameOfStep, PipelineStepStage } from './steps' import { guard } from '../util/assert' import { Pipeline, @@ -10,17 +10,20 @@ import { } from './steps/pipeline' /** - * TODO: This is ultimately the root of flowR's static slicing procedure. - * It clearly defines the steps that are to be executed and splits them into two stages. - * - `once-per-file`: for steps that are executed once per file. These can be performed *without* the knowledge of a slicing criteria, - * and they can be cached and re-used if you want to slice the same file multiple times. - * - `once-per-slice`: for steps that are executed once per slice. These can only be performed *with* a slicing criteria. + * The pipeline executor allows to execute arbitrary {@link Pipeline|pipelines} in a step-by-step fashion. + * If you are not yet in the possession of a {@link Pipeline|pipeline}, you can use the {@link createPipeline} function + * to create one for yourself, based on the steps that you want to execute. * - * Furthermore, this stepper follows an iterable fashion to be *as flexible as possible* (e.g., to be instrumented with measurements). - * So, you can use the stepping slicer like this: + * Those steps are split into two phases or "stages" (which is the name that we will use in the following), represented + * by the {@link PipelineStepStage} type. These allow us to separate things that have to be done + * once per-file, e.g., actually parsing the AST, from those, that we need to repeat 'once per request' (whatever this + * request may be). In other words, what can be cached between operations and what can not. + * + * Furthermore, this executor follows an iterable fashion to be *as flexible as possible* + * (e.g., to be instrumented with measurements). So, you can use the pipeline executor like this: * * ```ts - * const slicer = new SteppingSlicer({ ... }) + * const slicer = new PipelineExecutor({ ... }) * while(slicer.hasNextStep()) { * await slicer.nextStep() * } @@ -75,7 +78,7 @@ export class PipelineExecutor

{ private input: PipelineInput

private output: PipelineOutput

= {} as PipelineOutput

- private currentExecutionStage = StepHasToBeExecuted.OncePerFile + private currentExecutionStage = PipelineStepStage.OncePerFile private stepCounter = 0 /** @@ -92,18 +95,18 @@ export class PipelineExecutor

{ } /** - * Retrieve the current {@link StepHasToBeExecuted|stage} the pipeline executor is in. + * Retrieve the current {@link PipelineStepStage|stage} the pipeline executor is in. * * @see currentExecutionStage * @see switchToRequestStage - * @see StepHasToBeExecuted + * @see PipelineStepStage */ - public getCurrentStage(): StepHasToBeExecuted { + public getCurrentStage(): PipelineStepStage { return this.currentExecutionStage } /** - * Switch to the next {@link StepHasToBeExecuted|stage} of the pipeline executor. + * Switch to the next {@link PipelineStepStage|stage} of the pipeline executor. * * This will fail if either a step change is currently not valid (as not all steps have been executed), * or if there is no next stage (i.e., the pipeline is already completed or in the last stage). @@ -113,8 +116,8 @@ export class PipelineExecutor

{ */ public switchToRequestStage(): void { guard(this.stepCounter === this.pipeline.firstStepPerRequest, 'First need to complete all steps before switching') - guard(this.currentExecutionStage === StepHasToBeExecuted.OncePerFile, 'Cannot switch to next stage, already in per-request stage.') - this.currentExecutionStage = StepHasToBeExecuted.OncePerRequest + guard(this.currentExecutionStage === PipelineStepStage.OncePerFile, 'Cannot switch to next stage, already in per-request stage.') + this.currentExecutionStage = PipelineStepStage.OncePerRequest } @@ -135,11 +138,11 @@ export class PipelineExecutor

{ /** * Returns true only if - * 1) there are more {@link IPipelineStep|steps} to-do for the current {@link StepHasToBeExecuted|stage} and + * 1) there are more {@link IPipelineStep|steps} to-do for the current {@link PipelineStepStage|stage} and * 2) we have not yet reached the end of the {@link Pipeline|pipeline}. */ public hasNextStep(): boolean { - return (this.stepCounter < this.length && this.currentExecutionStage !== StepHasToBeExecuted.OncePerFile) + return (this.stepCounter < this.length && this.currentExecutionStage !== PipelineStepStage.OncePerFile) || this.stepCounter < this.pipeline.firstStepPerRequest } @@ -151,7 +154,7 @@ export class PipelineExecutor

{ * @param expectedStepName - A safeguard if you want to retrieve the result. * If given, it causes the execution to fail if the next step is not the one you expect. * - * *Without `expectedStepName`, please refrain from accessing the result, as you have no safeguards if the pipeline changes.* + * _Without `expectedStepName`, please refrain from accessing the result, as you have no safeguards if the pipeline changes._ */ public async nextStep(expectedStepName?: PassedName): Promise<{ name: typeof expectedStepName extends undefined ? NameOfStep : PassedName @@ -190,9 +193,9 @@ export class PipelineExecutor

{ const requestStep = this.pipeline.firstStepPerRequest guard(this.stepCounter >= requestStep, 'Cannot reset slice prior to once-per-slice stage') this.input = { - ...this.input, + ...(this.input as object), ...newRequestData - } + } as PipelineInput

this.stepCounter = requestStep // clear the results for all steps with an index >= firstStepPerRequest, this is more of a sanity check for(let i = requestStep; i < this.length; i++) { @@ -220,7 +223,7 @@ export class PipelineExecutor

{ await this.nextStep() } - if(canSwitchStage && this.stepCounter < this.length && this.currentExecutionStage === StepHasToBeExecuted.OncePerFile) { + if(canSwitchStage && this.stepCounter < this.length && this.currentExecutionStage === PipelineStepStage.OncePerFile) { this.switchToRequestStage() while(this.hasNextStep()) { await this.nextStep() diff --git a/src/core/stepping-slicer.ts b/src/core/stepping-slicer.ts index b7393c9fbf..bcfafbcef8 100644 --- a/src/core/stepping-slicer.ts +++ b/src/core/stepping-slicer.ts @@ -3,7 +3,7 @@ import { STEPS_PER_SLICE, SteppingSlicerInput, StepResults, - StepName, StepHasToBeExecuted, NameOfStep + StepName, PipelineStepStage, NameOfStep } from './steps' import { SlicingCriteria } from '../slicing' import { createPipeline, Pipeline, PipelineOutput, PipelineStepOutputWithName } from './steps/pipeline' @@ -99,10 +99,10 @@ export class SteppingSlicer { /** * Retrieve the current stage the stepping slicer is in. - * @see StepHasToBeExecuted + * @see PipelineStepStage * @see switchToSliceStage */ - public getCurrentStage(): StepHasToBeExecuted { + public getCurrentStage(): PipelineStepStage { return this.executor.getCurrentStage() } diff --git a/src/core/steps/all/core/00-parse.ts b/src/core/steps/all/core/00-parse.ts index 09eae798bf..12d0790242 100644 --- a/src/core/steps/all/core/00-parse.ts +++ b/src/core/steps/all/core/00-parse.ts @@ -1,26 +1,27 @@ import { internalPrinter, StepOutputFormat } from '../../../print/print' import { parseToQuads } from '../../../print/parse-printer' -import { IPipelineStep, StepHasToBeExecuted } from '../../step' +import { IPipelineStep, PipelineStepStage } from '../../step' import { retrieveXmlFromRCode, RParseRequest, RShell } from '../../../../r-bridge' import { DeepReadonly } from 'ts-essentials' -export const ParseRequiredInput = { +export interface ParseRequiredInput { /** This is the {@link RShell} connection to be used to obtain the original parses AST of the R code */ - shell: undefined as unknown as RShell, + readonly shell: RShell /** The request which essentially indicates the input to extract the AST from */ - request: undefined as unknown as RParseRequest -} as const + readonly request: RParseRequest +} export const PARSE_WITH_R_SHELL_STEP = { name: 'parse', description: 'Parse the given R code into an AST', - processor: (_results: object, input: Partial) => retrieveXmlFromRCode(input.request as RParseRequest, input.shell as RShell), - executed: StepHasToBeExecuted.OncePerFile, + processor: (_results: object, input: Partial) => retrieveXmlFromRCode(input.request as RParseRequest, input.shell as RShell), + executed: PipelineStepStage.OncePerFile, printer: { [StepOutputFormat.Internal]: internalPrinter, [StepOutputFormat.Json]: text => text, [StepOutputFormat.RdfQuads]: parseToQuads }, dependencies: [], - requiredInput: ParseRequiredInput -} as const satisfies DeepReadonly) => ReturnType>> + requiredInput: undefined as unknown as ParseRequiredInput +} as const satisfies DeepReadonly< + IPipelineStep<'parse', (results: object, input: Partial) => ReturnType>> diff --git a/src/core/steps/all/core/10-normalize.ts b/src/core/steps/all/core/10-normalize.ts index e7eb142a82..1f58469f6d 100644 --- a/src/core/steps/all/core/10-normalize.ts +++ b/src/core/steps/all/core/10-normalize.ts @@ -11,23 +11,22 @@ import { printNormalizedAstToMermaid, printNormalizedAstToMermaidUrl } from '../../../print/normalize-printer' -import { IPipelineStep, StepHasToBeExecuted } from '../../step' +import { IPipelineStep, PipelineStepStage } from '../../step' import { DeepPartial, DeepReadonly } from 'ts-essentials' import { ParseRequiredInput } from './00-parse' -export const NormalizeRequiredInput = { - ...ParseRequiredInput, +export interface NormalizeRequiredInput extends ParseRequiredInput { /** These hooks only make sense if you at least want to normalize the parsed R AST. They can augment the normalization process */ - hooks: undefined as unknown as DeepPartial, + readonly hooks?: DeepPartial, /** This id generator is only necessary if you want to retrieve a dataflow from the parsed R AST, it determines the id generator to use and by default uses the {@link deterministicCountingIdGenerator}*/ - getId: undefined as unknown as IdGenerator -} as const + readonly getId?: IdGenerator +} export const NORMALIZE = { name: 'normalize', description: 'Normalize the AST to flowR\'s AST (first step of the normalization)', - processor: async(results: { parse?: string }, input: Partial) => normalize(results.parse as string, await (input.shell as RShell).tokenMap(), input.hooks, input.getId), - executed: StepHasToBeExecuted.OncePerFile, + processor: async(results: { parse?: string }, input: Partial) => normalize(results.parse as string, await (input.shell as RShell).tokenMap(), input.hooks, input.getId), + executed: PipelineStepStage.OncePerFile, printer: { [StepOutputFormat.Internal]: internalPrinter, [StepOutputFormat.Json]: normalizedAstToJson, @@ -36,5 +35,5 @@ export const NORMALIZE = { [StepOutputFormat.MermaidUrl]: printNormalizedAstToMermaidUrl }, dependencies: [ 'parse' ], - requiredInput: NormalizeRequiredInput -} as const satisfies DeepReadonly) => ReturnType>> + requiredInput: undefined as unknown as NormalizeRequiredInput +} as const satisfies DeepReadonly) => ReturnType>> diff --git a/src/core/steps/all/core/20-dataflow.ts b/src/core/steps/all/core/20-dataflow.ts index 320b13d2d7..dcc3737a86 100644 --- a/src/core/steps/all/core/20-dataflow.ts +++ b/src/core/steps/all/core/20-dataflow.ts @@ -1,5 +1,5 @@ import { internalPrinter, StepOutputFormat } from '../../../print/print' -import { IPipelineStep, StepHasToBeExecuted } from '../../step' +import { IPipelineStep, PipelineStepStage } from '../../step' import { produceDataFlowGraph } from '../../../../dataflow' import { dataflowGraphToJson, @@ -14,7 +14,7 @@ export const LEGACY_STATIC_DATAFLOW = { name: 'dataflow', description: 'Construct the dataflow graph', processor: (results: { normalize?: NormalizedAst }) => produceDataFlowGraph(results.normalize as NormalizedAst), - executed: StepHasToBeExecuted.OncePerFile, + executed: PipelineStepStage.OncePerFile, printer: { [StepOutputFormat.Internal]: internalPrinter, [StepOutputFormat.Json]: dataflowGraphToJson, @@ -22,5 +22,6 @@ export const LEGACY_STATIC_DATAFLOW = { [StepOutputFormat.Mermaid]: dataflowGraphToMermaid, [StepOutputFormat.MermaidUrl]: dataflowGraphToMermaidUrl }, - dependencies: [ 'normalize' ] + dependencies: [ 'normalize' ], + requiredInput: {} } as const satisfies DeepReadonly ReturnType>> diff --git a/src/core/steps/all/static-slicing/30-slice.ts b/src/core/steps/all/static-slicing/30-slice.ts index 9e1995ed66..2e14ecd6b0 100644 --- a/src/core/steps/all/static-slicing/30-slice.ts +++ b/src/core/steps/all/static-slicing/30-slice.ts @@ -1,29 +1,27 @@ import { internalPrinter, StepOutputFormat } from '../../../print/print' -import { IPipelineStep, StepHasToBeExecuted } from '../../step' +import { IPipelineStep, PipelineStepStage } from '../../step' import { SlicingCriteria, staticSlicing } from '../../../../slicing' import { DeepReadonly } from 'ts-essentials' import { NormalizeRequiredInput } from '../core/10-normalize' import { DataflowInformation } from '../../../../dataflow/internal/info' import { NormalizedAst } from '../../../../r-bridge' -export const SliceRequiredInput = { - ...NormalizeRequiredInput, +export interface SliceRequiredInput extends NormalizeRequiredInput { /** The slicing criterion is only of interest if you actually want to slice the R code */ - criterion: undefined as unknown as SlicingCriteria, + readonly criterion: SlicingCriteria, /** How many re-visits of the same node are ok? TODO: use default? */ - threshold: 75 -} as const - + readonly threshold?: number +} export const STATIC_SLICE = { name: 'slice', description: 'Calculate the actual static slice from the dataflow graph and the given slicing criteria', - processor: (results: { dataflow?: DataflowInformation, normalize?: NormalizedAst }, input: Partial) => + processor: (results: { dataflow?: DataflowInformation, normalize?: NormalizedAst }, input: Partial) => staticSlicing((results.dataflow as DataflowInformation).graph, results.normalize as NormalizedAst, input.criterion as SlicingCriteria, input.threshold), - executed: StepHasToBeExecuted.OncePerRequest, + executed: PipelineStepStage.OncePerRequest, printer: { [StepOutputFormat.Internal]: internalPrinter }, dependencies: [ 'dataflow' ], - requiredInput: SliceRequiredInput -} as const satisfies DeepReadonly) => ReturnType>> + requiredInput: undefined as unknown as SliceRequiredInput +} as const satisfies DeepReadonly) => ReturnType>> diff --git a/src/core/steps/all/static-slicing/40-reconstruct.ts b/src/core/steps/all/static-slicing/40-reconstruct.ts index b94705ea6d..eafde8ac76 100644 --- a/src/core/steps/all/static-slicing/40-reconstruct.ts +++ b/src/core/steps/all/static-slicing/40-reconstruct.ts @@ -1,24 +1,22 @@ import { internalPrinter, StepOutputFormat } from '../../../print/print' -import { IPipelineStep, StepHasToBeExecuted } from '../../step' +import { IPipelineStep, PipelineStepStage } from '../../step' import { autoSelectLibrary, AutoSelectPredicate, reconstructToCode, SliceResult } from '../../../../slicing' import { DeepReadonly } from 'ts-essentials' import { NormalizedAst } from '../../../../r-bridge' import { SliceRequiredInput } from './30-slice' -export const ReconstructRequiredInput = { - ...SliceRequiredInput, - /** If you want to auto-select something in the reconstruction add it here, otherwise, it will use the default defined alongside {@link reconstructToCode}*/ - autoSelectIf: autoSelectLibrary as AutoSelectPredicate -} as const +export interface ReconstructRequiredInput extends SliceRequiredInput { + autoSelectIf?: AutoSelectPredicate +} export const NAIVE_RECONSTRUCT = { name: 'reconstruct', description: 'Reconstruct R code from the static slice', - processor: (results: { normalize?: NormalizedAst, slice?: SliceResult }, input: Partial) => reconstructToCode(results.normalize as NormalizedAst, (results.slice as SliceResult).result, input.autoSelectIf), - executed: StepHasToBeExecuted.OncePerRequest, + processor: (results: { normalize?: NormalizedAst, slice?: SliceResult }, input: Partial) => reconstructToCode(results.normalize as NormalizedAst, (results.slice as SliceResult).result, input.autoSelectIf), + executed: PipelineStepStage.OncePerRequest, printer: { [StepOutputFormat.Internal]: internalPrinter }, dependencies: [ 'slice' ], - requiredInput: ReconstructRequiredInput -} as const satisfies DeepReadonly) => ReturnType>> + requiredInput: undefined as unknown as ReconstructRequiredInput +} as const satisfies DeepReadonly) => ReturnType>> diff --git a/src/core/steps/pipeline/create.ts b/src/core/steps/pipeline/create.ts index 719fe6ec26..1fb21d7fc8 100644 --- a/src/core/steps/pipeline/create.ts +++ b/src/core/steps/pipeline/create.ts @@ -1,4 +1,4 @@ -import { IPipelineStep, NameOfStep, StepHasToBeExecuted } from '../step' +import { IPipelineStep, NameOfStep, PipelineStepStage } from '../step' import { InvalidPipelineError } from './invalid-pipeline-error' import { Pipeline } from './pipeline' import { jsonReplacer } from '../../../util/json' @@ -12,7 +12,7 @@ export function verifyAndBuildPipeline(steps: readonly IPipelineStep[]): Pipelin throw new InvalidPipelineError('0) Pipeline is empty') } - const [perFileSteps, perRequestSteps] = partitionArray(steps, s => s.executed === StepHasToBeExecuted.OncePerFile) + const [perFileSteps, perRequestSteps] = partitionArray(steps, s => s.executed === PipelineStepStage.OncePerFile) // we construct a map linking each name to its respective step const perFileStepMap = new Map() diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index 795546891f..a0c472958b 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -1,6 +1,7 @@ -import { IPipelineStep, NameOfStep, StepHasToBeExecuted } from '../step' +import { IPipelineStep, NameOfStep, PipelineStepStage } from '../step' import { verifyAndBuildPipeline } from './create' -import { DeepReadonly } from 'ts-essentials' +import { DeepReadonly, UnionToIntersection } from 'ts-essentials' +import { DEFAULT_SLICING_PIPELINE } from './default' /** * A pipeline is a collection of {@link Pipeline#steps|steps} that are executed in a certain {@link Pipeline#order|order}. @@ -13,8 +14,8 @@ export interface Pipeline { readonly order: readonly T['name'][] /** * In the order, this is the index of the first step that - * is executed {@link StepHasToBeExecuted#OncePerRequest|once per request}. - * If it is "out of bounds" (i.e., the number of steps), all steps are executed {@link StepHasToBeExecuted#OncePerFile|once per file}. + * is executed {@link PipelineStepStage#OncePerRequest|once per request}. + * If it is "out of bounds" (i.e., the number of steps), all steps are executed {@link PipelineStepStage#OncePerFile|once per file}. */ readonly firstStepPerRequest: number } @@ -32,14 +33,17 @@ export type PipelineStepProcessorWithName

= PipelineStepWithName['printer'] export type PipelineStepOutputWithName

= Awaited>> -export type PipelineInput

= PipelineStep

['requiredInput'] + +export type PipelineInput

= UnionToIntersection['requiredInput']> + +type T = PipelineInput /** * Only gets the union of 'requiredInput' of those PipelineSteps which have a 'execute' field of type 'OncePerRequest'. * In other words, information that you may want to change for another request (e.g., another slice) with the same file. */ export type PipelinePerRequestInput

= { - [K in PipelineStepNames

]: PipelineStep

['executed'] extends StepHasToBeExecuted.OncePerFile ? never : PipelineStepWithName['requiredInput'] + [K in PipelineStepNames

]: PipelineStep

['executed'] extends PipelineStepStage.OncePerFile ? never : PipelineStepWithName['requiredInput'] }[PipelineStepNames

] export type PipelineOutput

= { @@ -58,7 +62,7 @@ export type PipelineOutput

= { * 4) the target of a {@link IPipelineStepOrder#decorates|step's decoration} exists * 5) if a {@link IPipelineStepOrder#decorates|decoration} applies, all of its {@link IPipelineStepOrder#dependencies|dependencies} are already in the pipeline * 6) in the resulting {@link Pipeline|pipeline}, there is a strict cut between {@link IPipelineStep|steps} that are executed - * {@link StepHasToBeExecuted#OncePerFile|once per file} and {@link StepHasToBeExecuted#OncePerRequest|once per request}. + * {@link PipelineStepStage#OncePerFile|once per file} and {@link PipelineStepStage#OncePerRequest|once per request}. * * @returns The function will try to order your collection steps so that all the constraints hold. * If it succeeds it will return the resulting {@link Pipeline|pipeline}, otherwise it will throw an {@link InvalidPipelineError}. diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index b5e2210c2f..87716069cb 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -25,7 +25,7 @@ export type StepProcessingFunction = /** * This represents the required execution frequency of a step. */ -export const enum StepHasToBeExecuted { +export const enum PipelineStepStage { /** This step has to be executed once per file */ OncePerFile, /** This step has to be executed once per request (e.g., slice for a given variable) */ @@ -53,7 +53,7 @@ export interface IPipelineStepOrder< */ readonly dependencies: readonly NameOfStep[] /* does this step has to be repeated for each new request or can it be performed only once in the initialization */ - readonly executed: StepHasToBeExecuted + readonly executed: PipelineStepStage /** * This is similar to {@link dependencies}, but is used to say that a given step _decorates_ another one. * This imbues two requirements: @@ -92,8 +92,10 @@ export interface IPipelineStep< * Required inputs of dependencies do not have to, but can be repeated. *

* Use the pattern `undefined as unknown as T` to indicate that the value is required but not provided. + * + * TODO: respect default values. */ - readonly requiredInput?: Record + readonly requiredInput: object } diff --git a/test/functionality/dataflow/dataflow.spec.ts b/test/functionality/dataflow/dataflow.spec.ts index 2225b867d6..62127d83ab 100644 --- a/test/functionality/dataflow/dataflow.spec.ts +++ b/test/functionality/dataflow/dataflow.spec.ts @@ -1,5 +1,9 @@ import { requireAllTestsInFolder } from '../_helper/collect-tests' import path from 'path' +import { PipelineExecutor } from '../../../src/core/pipeline-executor' +import { DEFAULT_SLICING_PIPELINE } from '../../../src/core/steps/pipeline' +import { withShell } from '../_helper/shell' +import { requestFromInput } from '../../../src/r-bridge' describe('Dataflow', () => { describe('Environments', () => @@ -10,6 +14,27 @@ describe('Dataflow', () => { requireAllTestsInFolder(path.join(__dirname, 'graph')) ) + describe('x', withShell(shell => { + it('foo', async() => { + const stepper = new PipelineExecutor(DEFAULT_SLICING_PIPELINE, { + shell, + criterion: ['2@b'], + request: requestFromInput('b <- 3\ncat(b)'), + }) + + while(stepper.hasNextStep()) { + await stepper.nextStep() + } + + stepper.switchToRequestStage() + + while(stepper.hasNextStep()) { + await stepper.nextStep() + } + + console.log(stepper.getResults()) + }) + })) require('./processing-of-elements/processing-of-elements') }) From bbf882ae3e3bb5d75e454ce2ce3449a0b572f48f Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 20:49:24 +0100 Subject: [PATCH 096/104] doc(pipeline-executor): rewrite the main doc for the pipline executor in the code --- src/core/pipeline-executor.ts | 74 +++++++++++++------- test/functionality/dataflow/dataflow.spec.ts | 17 ++++- 2 files changed, 62 insertions(+), 29 deletions(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index 523a7cf937..1474b61b20 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -23,54 +23,74 @@ import { * (e.g., to be instrumented with measurements). So, you can use the pipeline executor like this: * * ```ts - * const slicer = new PipelineExecutor({ ... }) - * while(slicer.hasNextStep()) { - * await slicer.nextStep() + * const stepper = new PipelineExecutor( ... ) + * while(stepper.hasNextStep()) { + * await stepper.nextStep() * } * - * slicer.switchToSliceStage() + * stepper.switchToRequestStage() * - * while(slicer.hasNextStep()) { - * await slicer.nextStep() + * while(stepper.hasNextStep()) { + * await stepper.nextStep() * } * - * const result = slicer.getResults() + * const result = stepper.getResults() * ``` * - * Of course, you might think, that this is rather overkill if you simply want to receive the slice of a given input source or in general - * the result of any step. And this is true. Therefore, if you do not want to perform some kind of magic in-between steps, you can use the + * Of course, you might think, that this is rather overkill if you simply want to receive the result. + * And this is true. Therefore, if you do not want to perform some kind of magic in-between steps, you can use the * **{@link allRemainingSteps}** function like this: * * ```ts - * const slicer = new SteppingSlicer({ ... }) - * const result = await slicer.allRemainingSteps() + * const stepper = new PipelineExecutor( ... ) + * const result = await stepper.allRemainingSteps() * ``` * - * As the name suggest, you can combine this name with previous calls to {@link nextStep} to only execute the remaining steps. + * As the name suggest, you can combine this name with previous calls to {@link nextStep} to only execute the remaining + * steps in case, for whatever reason you only want to instrument some steps. + * + * By default, the {@link PipelineExecutor} does not offer an automatic way to repeat requests (mostly to prevent accidental errors). + * However, you can use the + * **{@link updateRequest}** function to reset the request steps and re-execute them for a new request. This allows something like the following: * - * Giving the **step of interest** allows you to declare the maximum step to execute. - * So, if you pass `dataflow` as the step of interest, the stepping slicer will stop after the dataflow step. - * If you do not pass a step, the stepping slicer will execute all steps. + * ```ts + * const stepper = new PipelineExecutor( ... ) + * const result = await stepper.allRemainingSteps() + * + * stepper.updateRequest( ... ) + * const result2 = await stepper.allRemainingSteps() + * ``` * - * By default, the {@link PipelineExecutor} does not offer an automatic way to repeat the per-slice steps for multiple slices (this is mostly to prevent accidental errors). - * However, you can use the **{@link updateCriterion}** function to reset the per-slice steps and re-execute them for a new slice. This allows something like the following: + * **Example - Slicing With the Pipeline Executor**: + * + * Suppose, you want to... you know _slice_ a file (which was, at one point the origin of flowR), then you can + * either create a pipeline yourself with the respective steps, or you can use the {@link DEFAULT_SLICING_PIPELINE} (and friends). + * With it, slicing essentially becomes 'easy-as-pie': * * ```ts - * const slicer = new SteppingSlicer({ ... }) + * const slicer = new PipelineExecutor(DEFAULT_SLICING_PIPELINE, { + * shell, + * // of course, the criterion and request given here are just examples, you can use whatever you want to slice! + * criterion: ['2@b'], + * request: requestFromInput('b <- 3; x <- 5\ncat(b)'), + * }) * const result = await slicer.allRemainingSteps() + * ``` * - * slicer.updateCriterion(...) - * const result2 = await slicer.allRemainingSteps() + * But now, we want to slice for `x` in the first line as well! We can do that by adding: + * + * ```ts + * stepper.updateRequest({ criterion: ['1@x'] }) + * const result2 = await stepper.allRemainingSteps() * ``` * - * @note Even though, using the stepping slicer introduces some performance overhead, we consider - * it to be the baseline for performance benchmarking. It may very well be possible to squeeze out some more performance by + * @note Even though using the pipeline executor introduces a small performance overhead, we consider + * it to be the baseline for performance benchmarking. It may very well be possible to squeeze out a little bit more by * directly constructing the steps in the right order. However, we consider this to be negligible when compared with the time required - * for, for example, the dataflow analysis. + * for, for example, the dataflow analysis of larger files. * - * @see retrieveResultOfStep - * @see PipelineExecutor#_doNextStep - * @see StepName + * @see PipelineExecutor#allRemainingSteps + * @see PipelineExecutor#nextStep */ export class PipelineExecutor

{ private readonly pipeline: P @@ -191,7 +211,7 @@ export class PipelineExecutor

{ */ public updateRequest(newRequestData: PipelinePerRequestInput

): void { const requestStep = this.pipeline.firstStepPerRequest - guard(this.stepCounter >= requestStep, 'Cannot reset slice prior to once-per-slice stage') + guard(this.stepCounter >= requestStep, 'Cannot reset request prior to once-per-request stage') this.input = { ...(this.input as object), ...newRequestData diff --git a/test/functionality/dataflow/dataflow.spec.ts b/test/functionality/dataflow/dataflow.spec.ts index 62127d83ab..1fdf76bcfb 100644 --- a/test/functionality/dataflow/dataflow.spec.ts +++ b/test/functionality/dataflow/dataflow.spec.ts @@ -19,7 +19,7 @@ describe('Dataflow', () => { const stepper = new PipelineExecutor(DEFAULT_SLICING_PIPELINE, { shell, criterion: ['2@b'], - request: requestFromInput('b <- 3\ncat(b)'), + request: requestFromInput('b <- 3; x <- 5\ncat(b)'), }) while(stepper.hasNextStep()) { @@ -32,7 +32,20 @@ describe('Dataflow', () => { await stepper.nextStep() } - console.log(stepper.getResults()) + const result = stepper.getResults() + console.log(result) + }) + it('bar', async() => { + const stepper = new PipelineExecutor(DEFAULT_SLICING_PIPELINE, { + shell, + criterion: ['2@b'], + request: requestFromInput('b <- 3; x <- 5\ncat(b)'), + }) + console.log(await stepper.allRemainingSteps()) + stepper.updateRequest({ + criterion: ['1@x'] + }) + console.log(await stepper.allRemainingSteps()) }) })) From 3459f17feab4fc34b3d3a8812219ac599d800fd0 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 20:53:03 +0100 Subject: [PATCH 097/104] refactor(pipeline-executor): make guard message a supplier to avoid the string construction if it is not needed --- src/core/pipeline-executor.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index 1474b61b20..eb0f384abc 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -194,7 +194,7 @@ export class PipelineExecutor

{ result: Promise> ] { const step = this.pipeline.steps.get(this.pipeline.order[this.stepCounter]) - guard(step !== undefined, `Cannot execute next step, step ${this.pipeline.order[this.stepCounter]} does not exist.`) + guard(step !== undefined, () => `Cannot execute next step, step ${this.pipeline.order[this.stepCounter]} does not exist.`) if(expectedStepName !== undefined) { guard(step.name === expectedStepName, () => `Cannot execute next step, expected step ${JSON.stringify(expectedStepName)} but got ${step.name}.`) From ee6211c0087ed4887d29b19da7a544609184e34f Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 21:01:11 +0100 Subject: [PATCH 098/104] doc(wiki, interface): deprecation information for SteppingSlicer --- wiki/Interface.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/wiki/Interface.md b/wiki/Interface.md index 8b03e1a448..7d8a560d01 100644 --- a/wiki/Interface.md +++ b/wiki/Interface.md @@ -18,7 +18,7 @@ Although far from being as detailed as the in-depth explanation of [*flowR*](htt - [Interfacing With the File System](#interfacing-with-the-file-system) - [⚒️ Writing Code](#️-writing-code) - [Interfacing With R by Using The `RShell`](#interfacing-with-r-by-using-the-rshell) - - [Slicing With The `SteppingSlicer`](#slicing-with-the-steppingslicer) + - [(Deprecated) Slicing With The `SteppingSlicer`](#deprecated-slicing-with-the-steppingslicer) - [Understanding the Steps](#understanding-the-steps) - [Benchmark the Slicer With The `BenchmarkSlicer`](#benchmark-the-slicer-with-the-benchmarkslicer) - [Augmenting the Normalization](#augmenting-the-normalization) @@ -954,7 +954,11 @@ With a shell object (let's call it `shell`), you can execute R code by using `RS Besides that, the command `RShell::tryToInjectHomeLibPath` may be of interest, as it enables all libraries available on the host system. -### Slicing With The `SteppingSlicer` + +### (Deprecated) Slicing With The `SteppingSlicer` + +> 💡 Information\ +> Please note, that the `SteppingSlicer` has been deprecated with the *Dataflow v2* update, in favor of a far more general `PipelineExecutor` (which now backs the `SteppingSlicer` using a custom legacy-`Pipeline` to ensure that it behaves similar). The main class that represents *flowR*'s slicing is the [`SteppingSlicer`](https://code-inspect.github.io/flowr/doc/classes/src_core_slicer.SteppingSlicer.html) class. With *flowR*, this allows you to slice code like this: From 15c1c12111b9dfecad390012ff8f0f8266d89526 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 21:10:45 +0100 Subject: [PATCH 099/104] refactor(test): remove dummy test/tests --- src/core/pipeline-executor.ts | 2 +- test/functionality/dataflow/dataflow.spec.ts | 39 -------------------- 2 files changed, 1 insertion(+), 40 deletions(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index eb0f384abc..4fcfa73cf3 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -69,7 +69,7 @@ import { * * ```ts * const slicer = new PipelineExecutor(DEFAULT_SLICING_PIPELINE, { - * shell, + * shell: new RShell(), * // of course, the criterion and request given here are just examples, you can use whatever you want to slice! * criterion: ['2@b'], * request: requestFromInput('b <- 3; x <- 5\ncat(b)'), diff --git a/test/functionality/dataflow/dataflow.spec.ts b/test/functionality/dataflow/dataflow.spec.ts index 1fdf76bcfb..b13e6d891d 100644 --- a/test/functionality/dataflow/dataflow.spec.ts +++ b/test/functionality/dataflow/dataflow.spec.ts @@ -1,9 +1,5 @@ import { requireAllTestsInFolder } from '../_helper/collect-tests' import path from 'path' -import { PipelineExecutor } from '../../../src/core/pipeline-executor' -import { DEFAULT_SLICING_PIPELINE } from '../../../src/core/steps/pipeline' -import { withShell } from '../_helper/shell' -import { requestFromInput } from '../../../src/r-bridge' describe('Dataflow', () => { describe('Environments', () => @@ -14,40 +10,5 @@ describe('Dataflow', () => { requireAllTestsInFolder(path.join(__dirname, 'graph')) ) - describe('x', withShell(shell => { - it('foo', async() => { - const stepper = new PipelineExecutor(DEFAULT_SLICING_PIPELINE, { - shell, - criterion: ['2@b'], - request: requestFromInput('b <- 3; x <- 5\ncat(b)'), - }) - - while(stepper.hasNextStep()) { - await stepper.nextStep() - } - - stepper.switchToRequestStage() - - while(stepper.hasNextStep()) { - await stepper.nextStep() - } - - const result = stepper.getResults() - console.log(result) - }) - it('bar', async() => { - const stepper = new PipelineExecutor(DEFAULT_SLICING_PIPELINE, { - shell, - criterion: ['2@b'], - request: requestFromInput('b <- 3; x <- 5\ncat(b)'), - }) - console.log(await stepper.allRemainingSteps()) - stepper.updateRequest({ - criterion: ['1@x'] - }) - console.log(await stepper.allRemainingSteps()) - }) - })) - require('./processing-of-elements/processing-of-elements') }) From 8bf1f67c189799ecfab3e6a7f597f89bb6b0ffe1 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 21:17:55 +0100 Subject: [PATCH 100/104] doc(wiki, interface): rudimentary pipeline-executor explanation --- wiki/Interface.md | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/wiki/Interface.md b/wiki/Interface.md index 7d8a560d01..fb2470b997 100644 --- a/wiki/Interface.md +++ b/wiki/Interface.md @@ -18,6 +18,7 @@ Although far from being as detailed as the in-depth explanation of [*flowR*](htt - [Interfacing With the File System](#interfacing-with-the-file-system) - [⚒️ Writing Code](#️-writing-code) - [Interfacing With R by Using The `RShell`](#interfacing-with-r-by-using-the-rshell) + - [The Pipeline Executor](#the-pipeline-executor) - [(Deprecated) Slicing With The `SteppingSlicer`](#deprecated-slicing-with-the-steppingslicer) - [Understanding the Steps](#understanding-the-steps) - [Benchmark the Slicer With The `BenchmarkSlicer`](#benchmark-the-slicer-with-the-benchmarkslicer) @@ -953,7 +954,30 @@ With a shell object (let's call it `shell`), you can execute R code by using `RS Besides that, the command `RShell::tryToInjectHomeLibPath` may be of interest, as it enables all libraries available on the host system. +### The Pipeline Executor +Once, in the beginning, *flowR* was meant to produce a dataflow graph merely to provide *program slices*. However, with continuous extensions the dataflow graph repeatedly proofs to be the interesting part. +With this, we restructured *flowR*'s *hardcoded* pipeline to be +far more flexible. Now, it can be theoretically extended or replaced with arbitrary steps, optional steps, and, what we call 'decorations' of these steps. In short, if you still "just want to slice", you can do it like this: + +```typescript +const slicer = new PipelineExecutor(DEFAULT_SLICING_PIPELINE, { + shell: new RShell(), + request: requestFromInput('x <- 1\nx + 1'), + criterion: ['2@x'] +}) +const slice = await slicer.allRemainingSteps() +// console.log(slice.reconstruct.code) +``` + +If you compare this, with what you would have done with the [old `SteppingSlicer`](#deprecated-slicing-with-the-steppingslicer) this essentially just requires you to replace the `SteppingSlicer` with the `PipelineExecutor` and to pass the `DEFAULT_SLICING_PIPELINE` as the first argument. +Similarly, the new `PipelineExecutor`... + +1. allows to investigate the results of all intermediate steps +2. can be executed step-by-step +3. can repeat steps (e.g., to calculate multiple slices on the same input) + +See the [documentation](https://code-inspect.github.io/flowr/doc/classes/src_core_pipeline-executor.PipelineExecutor.html) for more information. ### (Deprecated) Slicing With The `SteppingSlicer` @@ -963,10 +987,8 @@ Besides that, the command `RShell::tryToInjectHomeLibPath` may be of interest, a The main class that represents *flowR*'s slicing is the [`SteppingSlicer`](https://code-inspect.github.io/flowr/doc/classes/src_core_slicer.SteppingSlicer.html) class. With *flowR*, this allows you to slice code like this: ```typescript -const shell = new RShell() - const stepper = new SteppingSlicer({ - shell: shell, + shell: new RShell(), request: requestFromInput('x <- 1\nx + 1'), criterion: ['2@x'] }) @@ -989,13 +1011,13 @@ Besides slicing, the stepping slicer: 2. can be executed step-by-step 3. can be told to stop after a given step -See the [documentation](https://code-inspect.github.io/flowr/doc/classes/src_core_slicer.SteppingSlicer.html) for more. +See the [documentation](https://code-inspect.github.io/flowr/doc/classes/src_core_stepping-slicer.SteppingSlicer.html) for more. #### Understanding the Steps The definition of all steps happens in [src/core/steps.ts](https://github.com/Code-Inspect/flowr/blob/main/src/core/steps.ts). Investigating the file provides you an overview of the slicing phases, as well as the functions that are called to perform the respective step. -The [`SteppingSlicer`](https://github.com/Code-Inspect/flowr/blob/main/src/core/slicer.ts) simply glues them together and passes the results of one step to the next. +The [`SteppingSlicer`](https://github.com/Code-Inspect/flowr/blob/main/src/core/stepping-slicer.ts) simply glues them together and passes the results of one step to the next. If you are interested in the type magic associated with the stepping slicers output type, refer to [src/core/output.ts](https://github.com/Code-Inspect/flowr/blob/main/src/core/output.ts). If you add a new step, make sure to modify all of these locations accordingly. From 48e47478728b35af7379fa780b2e4d9d27b39565 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 21:28:24 +0100 Subject: [PATCH 101/104] refactor(steps): clean up processor types --- src/core/steps/all/core/00-parse.ts | 8 ++++++-- src/core/steps/all/core/10-normalize.ts | 11 ++++++++--- src/core/steps/all/core/20-dataflow.ts | 8 ++++++-- src/core/steps/all/static-slicing/30-slice.ts | 17 ++++++++++------- .../steps/all/static-slicing/40-reconstruct.ts | 10 +++++++--- 5 files changed, 37 insertions(+), 17 deletions(-) diff --git a/src/core/steps/all/core/00-parse.ts b/src/core/steps/all/core/00-parse.ts index 12d0790242..8e5f31b8f2 100644 --- a/src/core/steps/all/core/00-parse.ts +++ b/src/core/steps/all/core/00-parse.ts @@ -11,10 +11,14 @@ export interface ParseRequiredInput { readonly request: RParseRequest } +function processor(_results: unknown, input: Partial) { + return retrieveXmlFromRCode(input.request as RParseRequest, input.shell as RShell) +} + export const PARSE_WITH_R_SHELL_STEP = { name: 'parse', description: 'Parse the given R code into an AST', - processor: (_results: object, input: Partial) => retrieveXmlFromRCode(input.request as RParseRequest, input.shell as RShell), + processor, executed: PipelineStepStage.OncePerFile, printer: { [StepOutputFormat.Internal]: internalPrinter, @@ -24,4 +28,4 @@ export const PARSE_WITH_R_SHELL_STEP = { dependencies: [], requiredInput: undefined as unknown as ParseRequiredInput } as const satisfies DeepReadonly< - IPipelineStep<'parse', (results: object, input: Partial) => ReturnType>> +IPipelineStep<'parse', typeof processor>> diff --git a/src/core/steps/all/core/10-normalize.ts b/src/core/steps/all/core/10-normalize.ts index 1f58469f6d..888775fedb 100644 --- a/src/core/steps/all/core/10-normalize.ts +++ b/src/core/steps/all/core/10-normalize.ts @@ -1,7 +1,8 @@ import { IdGenerator, NoInfo, - normalize, RShell, + normalize, + RShell, XmlParserHooks } from '../../../../r-bridge' import { internalPrinter, StepOutputFormat } from '../../../print/print' @@ -22,10 +23,14 @@ export interface NormalizeRequiredInput extends ParseRequiredInput { readonly getId?: IdGenerator } +async function processor(results: { parse?: string }, input: Partial) { + return normalize(results.parse as string, await (input.shell as RShell).tokenMap(), input.hooks, input.getId) +} + export const NORMALIZE = { name: 'normalize', description: 'Normalize the AST to flowR\'s AST (first step of the normalization)', - processor: async(results: { parse?: string }, input: Partial) => normalize(results.parse as string, await (input.shell as RShell).tokenMap(), input.hooks, input.getId), + processor, executed: PipelineStepStage.OncePerFile, printer: { [StepOutputFormat.Internal]: internalPrinter, @@ -36,4 +41,4 @@ export const NORMALIZE = { }, dependencies: [ 'parse' ], requiredInput: undefined as unknown as NormalizeRequiredInput -} as const satisfies DeepReadonly) => ReturnType>> +} as const satisfies DeepReadonly> diff --git a/src/core/steps/all/core/20-dataflow.ts b/src/core/steps/all/core/20-dataflow.ts index dcc3737a86..f3d8eb3ac3 100644 --- a/src/core/steps/all/core/20-dataflow.ts +++ b/src/core/steps/all/core/20-dataflow.ts @@ -10,10 +10,14 @@ import { import { DeepReadonly } from 'ts-essentials' import { NormalizedAst } from '../../../../r-bridge' +function processor(results: { normalize?: NormalizedAst }) { + return produceDataFlowGraph(results.normalize as NormalizedAst) +} + export const LEGACY_STATIC_DATAFLOW = { name: 'dataflow', description: 'Construct the dataflow graph', - processor: (results: { normalize?: NormalizedAst }) => produceDataFlowGraph(results.normalize as NormalizedAst), + processor, executed: PipelineStepStage.OncePerFile, printer: { [StepOutputFormat.Internal]: internalPrinter, @@ -24,4 +28,4 @@ export const LEGACY_STATIC_DATAFLOW = { }, dependencies: [ 'normalize' ], requiredInput: {} -} as const satisfies DeepReadonly ReturnType>> +} as const satisfies DeepReadonly> diff --git a/src/core/steps/all/static-slicing/30-slice.ts b/src/core/steps/all/static-slicing/30-slice.ts index 2e14ecd6b0..3d314411c2 100644 --- a/src/core/steps/all/static-slicing/30-slice.ts +++ b/src/core/steps/all/static-slicing/30-slice.ts @@ -8,20 +8,23 @@ import { NormalizedAst } from '../../../../r-bridge' export interface SliceRequiredInput extends NormalizeRequiredInput { /** The slicing criterion is only of interest if you actually want to slice the R code */ - readonly criterion: SlicingCriteria, - /** How many re-visits of the same node are ok? TODO: use default? */ + readonly criterion: SlicingCriteria, + /** How many re-visits of the same node are ok? */ readonly threshold?: number } +function processor(results: { dataflow?: DataflowInformation, normalize?: NormalizedAst }, input: Partial) { + return staticSlicing((results.dataflow as DataflowInformation).graph, results.normalize as NormalizedAst, input.criterion as SlicingCriteria, input.threshold) +} + export const STATIC_SLICE = { name: 'slice', description: 'Calculate the actual static slice from the dataflow graph and the given slicing criteria', - processor: (results: { dataflow?: DataflowInformation, normalize?: NormalizedAst }, input: Partial) => - staticSlicing((results.dataflow as DataflowInformation).graph, results.normalize as NormalizedAst, input.criterion as SlicingCriteria, input.threshold), - executed: PipelineStepStage.OncePerRequest, - printer: { + processor, + executed: PipelineStepStage.OncePerRequest, + printer: { [StepOutputFormat.Internal]: internalPrinter }, dependencies: [ 'dataflow' ], requiredInput: undefined as unknown as SliceRequiredInput -} as const satisfies DeepReadonly) => ReturnType>> +} as const satisfies DeepReadonly> diff --git a/src/core/steps/all/static-slicing/40-reconstruct.ts b/src/core/steps/all/static-slicing/40-reconstruct.ts index eafde8ac76..e1c1cf6911 100644 --- a/src/core/steps/all/static-slicing/40-reconstruct.ts +++ b/src/core/steps/all/static-slicing/40-reconstruct.ts @@ -1,6 +1,6 @@ import { internalPrinter, StepOutputFormat } from '../../../print/print' import { IPipelineStep, PipelineStepStage } from '../../step' -import { autoSelectLibrary, AutoSelectPredicate, reconstructToCode, SliceResult } from '../../../../slicing' +import { AutoSelectPredicate, reconstructToCode, SliceResult } from '../../../../slicing' import { DeepReadonly } from 'ts-essentials' import { NormalizedAst } from '../../../../r-bridge' import { SliceRequiredInput } from './30-slice' @@ -9,14 +9,18 @@ export interface ReconstructRequiredInput extends SliceRequiredInput { autoSelectIf?: AutoSelectPredicate } +function processor(results: { normalize?: NormalizedAst, slice?: SliceResult }, input: Partial) { + return reconstructToCode(results.normalize as NormalizedAst, (results.slice as SliceResult).result, input.autoSelectIf) +} + export const NAIVE_RECONSTRUCT = { name: 'reconstruct', description: 'Reconstruct R code from the static slice', - processor: (results: { normalize?: NormalizedAst, slice?: SliceResult }, input: Partial) => reconstructToCode(results.normalize as NormalizedAst, (results.slice as SliceResult).result, input.autoSelectIf), + processor, executed: PipelineStepStage.OncePerRequest, printer: { [StepOutputFormat.Internal]: internalPrinter }, dependencies: [ 'slice' ], requiredInput: undefined as unknown as ReconstructRequiredInput -} as const satisfies DeepReadonly) => ReturnType>> +} as const satisfies DeepReadonly> From f92353c6278c2f7c906e6c53aadf100e9250f01d Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 21:30:08 +0100 Subject: [PATCH 102/104] lint-fix(pipeline): remove unused type helper i used to experiment --- src/core/steps/pipeline/pipeline.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index a0c472958b..dbe08e1ecb 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -1,7 +1,6 @@ import { IPipelineStep, NameOfStep, PipelineStepStage } from '../step' import { verifyAndBuildPipeline } from './create' import { DeepReadonly, UnionToIntersection } from 'ts-essentials' -import { DEFAULT_SLICING_PIPELINE } from './default' /** * A pipeline is a collection of {@link Pipeline#steps|steps} that are executed in a certain {@link Pipeline#order|order}. @@ -36,8 +35,6 @@ export type PipelineStepOutputWithName

= UnionToIntersection['requiredInput']> -type T = PipelineInput - /** * Only gets the union of 'requiredInput' of those PipelineSteps which have a 'execute' field of type 'OncePerRequest'. * In other words, information that you may want to change for another request (e.g., another slice) with the same file. From 4c46edbdba34e96dc4b69e08c52fce48980157f0 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 21:32:53 +0100 Subject: [PATCH 103/104] lint-fix: remove some of the i-won't do them now todos --- src/cli/repl/server/connection.ts | 1 - src/core/steps/step.ts | 5 ----- 2 files changed, 6 deletions(-) diff --git a/src/cli/repl/server/connection.ts b/src/cli/repl/server/connection.ts index 2b7fefea5d..a623c7be35 100644 --- a/src/cli/repl/server/connection.ts +++ b/src/cli/repl/server/connection.ts @@ -137,7 +137,6 @@ export class FlowRServerConnection { id: message.id, cfg: cfg ? cfg2quads(cfg, config()) : undefined, results: { - // TODO: migrate to steps used in pipeline parse: await printStepResult(PARSE_WITH_R_SHELL_STEP, results.parse as string, StepOutputFormat.RdfQuads, config(), parseConfig), normalize: await printStepResult(NORMALIZE, results.normalize as NormalizedAst, StepOutputFormat.RdfQuads, config()), dataflow: await printStepResult(LEGACY_STATIC_DATAFLOW, results.dataflow as DataflowInformation, StepOutputFormat.RdfQuads, config()), diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index 87716069cb..a465d47acc 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -17,8 +17,6 @@ import { InternalStepPrinter, IPipelineStepPrinter, StepOutputFormat } from '../ * ensured at runtime by your dependencies. If you want to make sure, that the information is present, * list all steps that you require as your {@link IPipelineStepOrder#dependencies|dependencies}, even if they would be * already covered transitively. - * - * TODO: we could use prototypic cores for each step name */ export type StepProcessingFunction = (results: Record, input: Record) => unknown @@ -32,7 +30,6 @@ export const enum PipelineStepStage { OncePerRequest } -// TODO: rename to StepName export type NameOfStep = string & { __brand?: 'StepName' } /** @@ -92,8 +89,6 @@ export interface IPipelineStep< * Required inputs of dependencies do not have to, but can be repeated. *

* Use the pattern `undefined as unknown as T` to indicate that the value is required but not provided. - * - * TODO: respect default values. */ readonly requiredInput: object } From 63e7595d4737ad4c8c1e27a2ccd9fe9c4e53af31 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 29 Nov 2023 21:35:21 +0100 Subject: [PATCH 104/104] refactor: rename `NameOfStep` to `PipelineStepName` --- src/core/pipeline-executor.ts | 16 +++++----- src/core/stepping-slicer.ts | 6 ++-- src/core/steps/pipeline/create.ts | 32 +++++++++---------- src/core/steps/pipeline/pipeline.ts | 12 +++---- src/core/steps/step.ts | 10 +++--- .../pipelines/create/create-tests.ts | 4 +-- 6 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index 4fcfa73cf3..7bb6d3ae4d 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -1,4 +1,4 @@ -import { NameOfStep, PipelineStepStage } from './steps' +import { PipelineStepName, PipelineStepStage } from './steps' import { guard } from '../util/assert' import { Pipeline, @@ -176,8 +176,8 @@ export class PipelineExecutor

{ * * _Without `expectedStepName`, please refrain from accessing the result, as you have no safeguards if the pipeline changes._ */ - public async nextStep(expectedStepName?: PassedName): Promise<{ - name: typeof expectedStepName extends undefined ? NameOfStep : PassedName + public async nextStep(expectedStepName?: PassedName): Promise<{ + name: typeof expectedStepName extends undefined ? PipelineStepName : PassedName result: typeof expectedStepName extends undefined ? unknown : PipelineStepOutputWithName }> { const [step, result] = this._doNextStep(expectedStepName) @@ -189,9 +189,9 @@ export class PipelineExecutor

{ return { name: step as PassedName, result: awaitedResult } } - private _doNextStep(expectedStepName: Readonly): [ - step: NameOfStep, - result: Promise> + private _doNextStep(expectedStepName: Readonly): [ + step: PipelineStepName, + result: Promise> ] { const step = this.pipeline.steps.get(this.pipeline.order[this.stepCounter]) guard(step !== undefined, () => `Cannot execute next step, step ${this.pipeline.order[this.stepCounter]} does not exist.`) @@ -200,7 +200,7 @@ export class PipelineExecutor

{ guard(step.name === expectedStepName, () => `Cannot execute next step, expected step ${JSON.stringify(expectedStepName)} but got ${step.name}.`) } - return [step.name, step.processor(this.output, this.input) as unknown as PipelineStepOutputWithName] + return [step.name, step.processor(this.output, this.input) as unknown as PipelineStepOutputWithName] } /** @@ -219,7 +219,7 @@ export class PipelineExecutor

{ this.stepCounter = requestStep // clear the results for all steps with an index >= firstStepPerRequest, this is more of a sanity check for(let i = requestStep; i < this.length; i++) { - this.output[this.pipeline.order[i] as PipelineStepNames

] = undefined as unknown as PipelineStepOutputWithName + this.output[this.pipeline.order[i] as PipelineStepNames

] = undefined as unknown as PipelineStepOutputWithName } } diff --git a/src/core/stepping-slicer.ts b/src/core/stepping-slicer.ts index bcfafbcef8..0b3c88f2c7 100644 --- a/src/core/stepping-slicer.ts +++ b/src/core/stepping-slicer.ts @@ -3,7 +3,7 @@ import { STEPS_PER_SLICE, SteppingSlicerInput, StepResults, - StepName, PipelineStepStage, NameOfStep + StepName, PipelineStepStage, PipelineStepName } from './steps' import { SlicingCriteria } from '../slicing' import { createPipeline, Pipeline, PipelineOutput, PipelineStepOutputWithName } from './steps/pipeline' @@ -144,8 +144,8 @@ export class SteppingSlicer { * If given, it causes the execution to fail if the next step is not the one you expect. * *Without step, please refrain from accessing the result.* */ - public async nextStep(expectedStepName?: PassedName): Promise<{ - name: typeof expectedStepName extends undefined ? NameOfStep : PassedName + public async nextStep(expectedStepName?: PassedName): Promise<{ + name: typeof expectedStepName extends undefined ? PipelineStepName : PassedName result: typeof expectedStepName extends undefined ? unknown : PipelineStepOutputWithName, Exclude> }> { return this.executor.nextStep(expectedStepName) diff --git a/src/core/steps/pipeline/create.ts b/src/core/steps/pipeline/create.ts index 1fb21d7fc8..ab1d4b80c3 100644 --- a/src/core/steps/pipeline/create.ts +++ b/src/core/steps/pipeline/create.ts @@ -1,4 +1,4 @@ -import { IPipelineStep, NameOfStep, PipelineStepStage } from '../step' +import { IPipelineStep, PipelineStepName, PipelineStepStage } from '../step' import { InvalidPipelineError } from './invalid-pipeline-error' import { Pipeline } from './pipeline' import { jsonReplacer } from '../../../util/json' @@ -15,9 +15,9 @@ export function verifyAndBuildPipeline(steps: readonly IPipelineStep[]): Pipelin const [perFileSteps, perRequestSteps] = partitionArray(steps, s => s.executed === PipelineStepStage.OncePerFile) // we construct a map linking each name to its respective step - const perFileStepMap = new Map() - const initsPerFile: NameOfStep[] = [] - const visited = new Set() + const perFileStepMap = new Map() + const initsPerFile: PipelineStepName[] = [] + const visited = new Set() // we start by working on the per-file steps initializeSteps(perFileSteps, perFileStepMap, initsPerFile, visited) @@ -25,9 +25,9 @@ export function verifyAndBuildPipeline(steps: readonly IPipelineStep[]): Pipelin const sortedPerFile = topologicalSort(initsPerFile, perFileStepMap, visited) validateStepOutput(sortedPerFile, perFileStepMap, steps) - const perRequestStepMap = new Map(perFileStepMap) + const perRequestStepMap = new Map(perFileStepMap) // we track all elements without dependencies, i.e., those that start the pipeline - const initsPerRequest: NameOfStep[] = [] + const initsPerRequest: PipelineStepName[] = [] // now, we do the same for the per-request steps, keeping the per-file steps known initializeSteps(perRequestSteps, perRequestStepMap, initsPerRequest, visited) @@ -43,7 +43,7 @@ export function verifyAndBuildPipeline(steps: readonly IPipelineStep[]): Pipelin } } -function validateStepOutput(sorted: NameOfStep[], stepMap: Map, steps: readonly IPipelineStep[]) { +function validateStepOutput(sorted: PipelineStepName[], stepMap: Map, steps: readonly IPipelineStep[]) { if(sorted.length !== stepMap.size) { // check if any of the dependencies in the map are invalid checkForInvalidDependency(steps, stepMap) @@ -52,11 +52,11 @@ function validateStepOutput(sorted: NameOfStep[], stepMap: Map) { +function allDependenciesAreVisited(step: IPipelineStep, visited: ReadonlySet) { return step.dependencies.every(d => visited.has(d)) } -function handleStep(step: IPipelineStep, init: NameOfStep, visited: Set, sorted: NameOfStep[], elem: NameOfStep, decoratorsOfLastOthers: Set, inits: NameOfStep[]) { +function handleStep(step: IPipelineStep, init: PipelineStepName, visited: Set, sorted: PipelineStepName[], elem: PipelineStepName, decoratorsOfLastOthers: Set, inits: PipelineStepName[]) { if(step.decorates === init) { if(allDependenciesAreVisited(step, visited)) { sorted.push(elem) @@ -69,16 +69,16 @@ function handleStep(step: IPipelineStep, init: NameOfStep, visited: Set, visited: Set) { - const sorted: NameOfStep[] = [] +function topologicalSort(inits: PipelineStepName[], stepMap: Map, visited: Set) { + const sorted: PipelineStepName[] = [] while(inits.length > 0) { - const init = inits.pop() as NameOfStep + const init = inits.pop() as PipelineStepName sorted.push(init) visited.add(init) // these decorators still have dependencies open; we have to check if they can be satisfied by the other steps to add - const decoratorsOfLastOthers = new Set() + const decoratorsOfLastOthers = new Set() for(const [elem, step] of stepMap.entries()) { if(visited.has(elem)) { continue @@ -92,7 +92,7 @@ function topologicalSort(inits: NameOfStep[], stepMap: Map, stepMap: Map, visited: Set, sorted: NameOfStep[]) { +function topologicallyInsertDecoratorElements(decoratorsOfLastOthers: Set, stepMap: Map, visited: Set, sorted: PipelineStepName[]) { if(decoratorsOfLastOthers.size === 0) { return } @@ -115,7 +115,7 @@ function topologicallyInsertDecoratorElements(decoratorsOfLastOthers: Set) { +function checkForInvalidDependency(steps: readonly IPipelineStep[], stepMap: Map) { for(const step of steps) { for(const dep of step.dependencies) { if(!stepMap.has(dep)) { @@ -128,7 +128,7 @@ function checkForInvalidDependency(steps: readonly IPipelineStep[], stepMap: Map } } -function initializeSteps(steps: readonly IPipelineStep[], stepMap: Map, inits: NameOfStep[], visited: ReadonlySet) { +function initializeSteps(steps: readonly IPipelineStep[], stepMap: Map, inits: PipelineStepName[], visited: ReadonlySet) { for(const step of steps) { const name = step.name // if the name is already in the map we have a duplicate diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index dbe08e1ecb..2b8d4afdeb 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -1,4 +1,4 @@ -import { IPipelineStep, NameOfStep, PipelineStepStage } from '../step' +import { IPipelineStep, PipelineStepName, PipelineStepStage } from '../step' import { verifyAndBuildPipeline } from './create' import { DeepReadonly, UnionToIntersection } from 'ts-essentials' @@ -9,7 +9,7 @@ import { DeepReadonly, UnionToIntersection } from 'ts-essentials' * If you want to get the type of all steps in the pipeline (given they are created canonically using const step names), refer to {@link PipelineStepNames}. */ export interface Pipeline { - readonly steps: ReadonlyMap> + readonly steps: ReadonlyMap> readonly order: readonly T['name'][] /** * In the order, this is the index of the first step that @@ -27,10 +27,10 @@ export interface Pipeline { export type PipelineStepNames

= PipelineStep

['name'] export type PipelineStep

= P extends Pipeline ? U : never -export type PipelineStepWithName

= P extends Pipeline ? U extends IPipelineStep ? U : never : never -export type PipelineStepProcessorWithName

= PipelineStepWithName['processor'] -export type PipelineStepPrintersWithName

= PipelineStepWithName['printer'] -export type PipelineStepOutputWithName

= Awaited>> +export type PipelineStepWithName

= P extends Pipeline ? U extends IPipelineStep ? U : never : never +export type PipelineStepProcessorWithName

= PipelineStepWithName['processor'] +export type PipelineStepPrintersWithName

= PipelineStepWithName['printer'] +export type PipelineStepOutputWithName

= Awaited>> export type PipelineInput

= UnionToIntersection['requiredInput']> diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index a465d47acc..d416870151 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -30,13 +30,13 @@ export const enum PipelineStepStage { OncePerRequest } -export type NameOfStep = string & { __brand?: 'StepName' } +export type PipelineStepName = string & { __brand?: 'StepName' } /** * Contains the data to specify the order of {@link IPipelineStep|steps} in a pipeline. */ export interface IPipelineStepOrder< - Name extends NameOfStep = NameOfStep, + Name extends PipelineStepName = PipelineStepName, > { /** * Name of the respective step, it does not have to be unique in general but only unique per-pipeline. @@ -48,7 +48,7 @@ export interface IPipelineStepOrder< * Give the names of other steps this one requires to be completed as a prerequisite (e.g., to gain access to their input). * Does not have to be transitive, this will be checked by the scheduler of the pipeline. */ - readonly dependencies: readonly NameOfStep[] + readonly dependencies: readonly PipelineStepName[] /* does this step has to be repeated for each new request or can it be performed only once in the initialization */ readonly executed: PipelineStepStage /** @@ -58,7 +58,7 @@ export interface IPipelineStepOrder< * * If so, it is ensured that _this_ step is executed _after_ the step it decorates, but before any step that depends on it. */ - readonly decorates?: NameOfStep + readonly decorates?: PipelineStepName } /** @@ -67,7 +67,7 @@ export interface IPipelineStepOrder< * Steps will be executed synchronously, in-sequence, based on their {@link IPipelineStep#dependencies|dependencies}. */ export interface IPipelineStep< - Name extends NameOfStep = NameOfStep, + Name extends PipelineStepName = PipelineStepName, // eslint-disable-next-line -- by default, we assume nothing about the function shape Fn extends StepProcessingFunction = (...args: any[]) => any, > extends MergeableRecord, IPipelineStepOrder { diff --git a/test/functionality/pipelines/create/create-tests.ts b/test/functionality/pipelines/create/create-tests.ts index 2b31c7b234..4a907fd23d 100644 --- a/test/functionality/pipelines/create/create-tests.ts +++ b/test/functionality/pipelines/create/create-tests.ts @@ -1,5 +1,5 @@ import { createPipeline } from '../../../../src/core/steps/pipeline' -import { IPipelineStep, NameOfStep } from '../../../../src/core/steps' +import { IPipelineStep, PipelineStepName } from '../../../../src/core/steps' import { expect } from 'chai' import { PARSE_WITH_R_SHELL_STEP } from '../../../../src/core/steps/all/core/00-parse' import { allPermutations } from '../../../../src/util/arrays' @@ -65,7 +65,7 @@ describe('Create Pipeline (includes dependency checks)', () => { }) }) describe('default behavior', () => { - function positive(name: string, rawSteps: IPipelineStep[], expected: NameOfStep[], indexOfFirstPerFile: number = expected.length) { + function positive(name: string, rawSteps: IPipelineStep[], expected: PipelineStepName[], indexOfFirstPerFile: number = expected.length) { it(`${name} (all permutations)`, () => { for(const steps of allPermutations(rawSteps)) { const pipeline = createPipeline(...steps)