From dee5823cc72a4975cf1c41b688171f1706d625a3 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Tue, 7 May 2024 15:11:41 +0200 Subject: [PATCH 1/8] refactor: removed the stepping slicer --- src/core/stepping-slicer.ts | 182 ------------------ src/core/steps/input.ts | 45 ----- src/core/steps/pipeline/default.ts | 3 + test/functionality/_helper/shell.ts | 34 ++-- .../util/control-flow-graph-tests.ts | 15 +- test/functionality/util/quads-tests.ts | 8 +- 6 files changed, 28 insertions(+), 259 deletions(-) delete mode 100644 src/core/stepping-slicer.ts delete mode 100644 src/core/steps/input.ts diff --git a/src/core/stepping-slicer.ts b/src/core/stepping-slicer.ts deleted file mode 100644 index 1f303d130d..0000000000 --- a/src/core/stepping-slicer.ts +++ /dev/null @@ -1,182 +0,0 @@ -import type { SlicingCriteria } from '../slicing' -import type { Pipeline, PipelineOutput, PipelineStepOutputWithName } from './steps/pipeline' -import { createPipeline } from './steps/pipeline' -import { PARSE_WITH_R_SHELL_STEP } from './steps/all/core/00-parse' -import { NORMALIZE } from './steps/all/core/10-normalize' -import { STATIC_DATAFLOW } from './steps/all/core/20-dataflow' -import { STATIC_SLICE } from './steps/all/static-slicing/00-slice' -import { NAIVE_RECONSTRUCT } from './steps/all/static-slicing/10-reconstruct' -import { PipelineExecutor } from './pipeline-executor' -import type { LAST_PER_FILE_STEP, StepName } from './steps/steps' -import { LAST_STEP } from './steps/steps' -import type { SteppingSlicerInput } from './steps/input' -import type { PipelineStepName, PipelineStepStage } from './steps/step' - -const legacyPipelines = { - // brrh, but who cares, it is legacy! - 'parse': createPipeline(PARSE_WITH_R_SHELL_STEP), - 'normalize': createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE), - 'dataflow': createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW), - 'ai': createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW), - 'slice': createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW, STATIC_SLICE), - 'reconstruct': createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW, STATIC_SLICE, NAIVE_RECONSTRUCT) -} as const - -type LegacyPipelineType = typeof legacyPipelines[InterestedIn] - -function getLegacyPipeline(interestedIn: StepName): Pipeline { - return legacyPipelines[interestedIn] -} - -/** - * This is ultimately the root of flowR's static slicing procedure. - * It clearly defines the steps that are to be executed and splits them into two stages. - * - `once-per-file`: for steps that are executed once per file. These can be performed *without* the knowledge of a slicing criteria, - * and they can be cached and re-used if you want to slice the same file multiple times. - * - `once-per-slice`: for steps that are executed once per slice. These can only be performed *with* a slicing criteria. - * - * Furthermore, this stepper follows an iterable fashion to be *as flexible as possible* (e.g., to be instrumented with measurements). - * So, you can use the stepping slicer like this: - * - * ```ts - * const slicer = new SteppingSlicer({ ... }) - * while(slicer.hasNextStep()) { - * await slicer.nextStep() - * } - * - * slicer.switchToSliceStage() - * - * while(slicer.hasNextStep()) { - * await slicer.nextStep() - * } - * - * const result = slicer.getResults() - * ``` - * - * Of course, you might think, that this is rather overkill if you simply want to receive the slice of a given input source or in general - * the result of any step. And this is true. Therefore, if you do not want to perform some kind of magic in-between steps, you can use the - * **{@link allRemainingSteps}** function like this: - * - * ```ts - * const slicer = new SteppingSlicer({ ... }) - * const result = await slicer.allRemainingSteps() - * ``` - * - * As the name suggest, you can combine this name with previous calls to {@link nextStep} to only execute the remaining steps. - * - * Giving the **step of interest** allows you to declare the maximum step to execute. - * So, if you pass `dataflow` as the step of interest, the stepping slicer will stop after the dataflow step. - * If you do not pass a step, the stepping slicer will execute all steps. - * - * By default, the {@link SteppingSlicer} does not offer an automatic way to repeat the per-slice steps for multiple slices (this is mostly to prevent accidental errors). - * However, you can use the **{@link updateCriterion}** function to reset the per-slice steps and re-execute them for a new slice. This allows something like the following: - * - * ```ts - * const slicer = new SteppingSlicer({ ... }) - * const result = await slicer.allRemainingSteps() - * - * slicer.updateCriterion(...) - * const result2 = await slicer.allRemainingSteps() - * ``` - * - * @note Even though, using the stepping slicer introduces some performance overhead, we consider - * it to be the baseline for performance benchmarking. It may very well be possible to squeeze out some more performance by - * directly constructing the steps in the right order. However, we consider this to be negligible when compared with the time required - * for, for example, the dataflow analysis. - * - * @see retrieveResultOfStep - * @see StepName - */ -export class SteppingSlicer { - private executor: PipelineExecutor> - - /** - * Create a new stepping slicer. For more details on the arguments please see {@link SteppingSlicerInput}. - */ - constructor(input: SteppingSlicerInput) { - this.executor = new PipelineExecutor(getLegacyPipeline(input.stepOfInterest ?? LAST_STEP), input) as PipelineExecutor> - } - - /** - * Retrieve the current stage the stepping slicer is in. - * @see PipelineStepStage - * @see switchToSliceStage - */ - public getCurrentStage(): PipelineStepStage { - return this.executor.getCurrentStage() - } - - /** - * Switch to the next stage of the stepping slicer. - * @see SteppingSlicer - * @see getCurrentStage - */ - public switchToSliceStage(): void { - this.executor.switchToRequestStage() - } - - - public getResults(intermediate?:false): PipelineOutput> - public getResults(intermediate: true): Partial>> - /** - * Returns the result of the step of interest, as well as the results of all steps before it. - * - * @param intermediate - normally you can only receive the results *after* the stepper completed the step of interested. - * However, if you pass `true` to this parameter, you can also receive the results *before* the step of interest, - * although the typing system then can not guarantee which of the steps have already happened. - */ - public getResults(intermediate = false): PipelineOutput> | Partial>> { - return this.executor.getResults(intermediate) - } - - /** - * Returns true only if 1) there are more steps to-do for the current stage and 2) we have not yet reached the step we are interested in - */ - public hasNextStep(): boolean { - return this.executor.hasNextStep() - } - - /** - * Execute the next step (guarded with {@link hasNextStep}) and return the name of the step that was executed, so you can guard if the step differs from what you are interested in. - * Furthermore, it returns the step's result. - * - * The `step` parameter is a safeguard if you want to retrieve the result. - * If given, it causes the execution to fail if the next step is not the one you expect. - * *Without step, please refrain from accessing the result.* - */ - public async nextStep(expectedStepName?: PassedName): Promise<{ - name: typeof expectedStepName extends undefined ? PipelineStepName : PassedName - result: typeof expectedStepName extends undefined ? unknown : PipelineStepOutputWithName, Exclude> - }> { - return this.executor.nextStep(expectedStepName) - } - - /** - * This only makes sense if you have already sliced a file (e.g., by running up to the `slice` step) and want to do so again while caching the results. - * Or if for whatever reason you did not pass a criterion with the constructor. - * - * @param newCriterion - the new slicing criterion to use for the next slice - */ - public updateCriterion(newCriterion: SlicingCriteria): void { - // @ts-expect-error -- it is legacy - this.executor.updateRequest({ criterion: newCriterion }) - } - - public async allRemainingSteps(canSwitchStage: false): Promise>>> - public async allRemainingSteps(canSwitchStage?: true): Promise>> - /** - * Execute all remaining steps and automatically call {@link switchToSliceStage} if necessary. - * @param canSwitchStage - if true, automatically switch to the slice stage if necessary - * (i.e., this is what you want if you have never executed {@link nextStep} and you want to execute *all* steps). - * However, passing false allows you to only execute the steps of the 'once-per-file' stage (i.e., the steps that can be cached). - * - * @note There is a small type difference if you pass 'false' and already have manually switched to the 'once-per-slice' stage. - * Because now, the results of these steps are no longer part of the result type (although they are still included). - * In such a case, you may be better off with simply passing 'true' as the function will detect that the stage is already switched. - * We could solve this type problem by separating the SteppingSlicer class into two for each stage, but this would break the improved readability and unified handling - * of the slicer that I wanted to achieve with this class. - */ - public async allRemainingSteps(canSwitchStage = true): Promise> | Partial>>> { - return this.executor.allRemainingSteps(canSwitchStage) - } -} diff --git a/src/core/steps/input.ts b/src/core/steps/input.ts deleted file mode 100644 index c105cef504..0000000000 --- a/src/core/steps/input.ts +++ /dev/null @@ -1,45 +0,0 @@ -import type { MergeableRecord } from '../../util/objects' -import type { IdGenerator, NoInfo, RParseRequest, RShell } from '../../r-bridge' -import type { AutoSelectPredicate, SlicingCriteria } from '../../slicing' -import type { STEPS_PER_SLICE, StepName, STEPS_PER_FILE } from './steps' - -/** - * We split the types, as if you are only interested in what can be done per-file, you do not need a slicing criterion. - * Furthermore, if you are only interested in the parse result, you do not require the token map and you can not pass hooks - */ -interface BaseSteppingSlicerInput extends MergeableRecord { - /** - * The step you are actually interested in. - * If you pass 'dataflow', the stepper will stop after analyzing the dataflow. - * The step is optional, if you do not pass a step, the stepper will execute all steps. - */ - stepOfInterest?: InterestedIn - /** This is the {@link RShell} connection to be used to obtain the original parses AST of the R code */ - shell: RShell - /** The request which essentially indicates the input to extract the AST from */ - request: RParseRequest - /** This id generator is only necessary if you want to retrieve a dataflow from the parsed R AST, it determines the id generator to use and by default uses the {@link deterministicCountingIdGenerator}*/ - getId?: IdGenerator - /** The slicing criterion is only of interest if you actually want to slice the R code */ - criterion?: SlicingCriteria - /** If you want to auto-select something in the reconstruction add it here, otherwise, it will use the default defined alongside {@link reconstructToCode}*/ - autoSelectIf?: AutoSelectPredicate -} - -interface NormalizeSteppingSlicerInput extends BaseSteppingSlicerInput { - stepOfInterest: InterestedIn -} - -interface SliceSteppingSlicerInput extends BaseSteppingSlicerInput { - stepOfInterest?: InterestedIn - criterion: SlicingCriteria -} - -/** - * For a given set of steps of interest, this essentially (statically) determines the required inputs for the {@link SteppingSlicer}. - * All arguments are documented alongside {@link BaseSteppingSlicerInput}. - */ -export type SteppingSlicerInput = - InterestedIn extends keyof typeof STEPS_PER_SLICE | undefined ? SliceSteppingSlicerInput : - InterestedIn extends Exclude ? NormalizeSteppingSlicerInput : - BaseSteppingSlicerInput diff --git a/src/core/steps/pipeline/default.ts b/src/core/steps/pipeline/default.ts index 640888f3f4..b81f0368a3 100644 --- a/src/core/steps/pipeline/default.ts +++ b/src/core/steps/pipeline/default.ts @@ -9,5 +9,8 @@ import { STATIC_SLICE } from '../all/static-slicing/00-slice' import { NAIVE_RECONSTRUCT } from '../all/static-slicing/10-reconstruct' export const DEFAULT_SLICING_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW, STATIC_SLICE, NAIVE_RECONSTRUCT) +export const DEFAULT_RECONSTRUCT_PIPELINE = DEFAULT_SLICING_PIPELINE export const DEFAULT_DATAFLOW_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW) + +export const DEFAULT_NORMALIZE_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE) diff --git a/test/functionality/_helper/shell.ts b/test/functionality/_helper/shell.ts index 82e5c87fba..a871ae39e4 100644 --- a/test/functionality/_helper/shell.ts +++ b/test/functionality/_helper/shell.ts @@ -19,11 +19,10 @@ import type { MergeableRecord } from '../../../src/util/objects' import { deepMergeObject } from '../../../src/util/objects' import { NAIVE_RECONSTRUCT } from '../../../src/core/steps/all/static-slicing/10-reconstruct' import { guard } from '../../../src/util/assert' -import { createPipeline } from '../../../src/core/steps/pipeline' +import { createPipeline, DEFAULT_NORMALIZE_PIPELINE, DEFAULT_RECONSTRUCT_PIPELINE, DEFAULT_SLICING_PIPELINE } from '../../../src/core/steps/pipeline' import { PipelineExecutor } from '../../../src/core/pipeline-executor' import { PARSE_WITH_R_SHELL_STEP } from '../../../src/core/steps/all/core/00-parse' import { NORMALIZE } from '../../../src/core/steps/all/core/10-normalize' -import { SteppingSlicer } from '../../../src/core/stepping-slicer' import { LAST_STEP } from '../../../src/core/steps/steps' import type { TestLabel } from './label' import { decorateLabelContext } from './label' @@ -86,10 +85,8 @@ function assertAstEqualIgnoreSourceInformation(ast: RNode, expected: export const retrieveNormalizedAst = async(shell: RShell, input: `${typeof fileProtocol}${string}` | string): Promise => { const request = requestFromInput(input) - return (await new SteppingSlicer({ - stepOfInterest: 'normalize', - shell, - request + return (await new PipelineExecutor(DEFAULT_NORMALIZE_PIPELINE, { + shell, request }).allRemainingSteps()).normalize.ast } @@ -136,8 +133,6 @@ export function sameForSteps(steps: S[], wanted: T): { step: S, wanted: T return steps.map(step => ({ step, wanted })) } -const normalizePipeline = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE) - /** * For a given input code this takes multiple ASTs depending on the respective normalizer step to run! * @@ -151,7 +146,7 @@ export function assertAst(name: TestLabel | string, shell: RShell, input: string return it(`${fullname} (input: ${input})`, async function() { await ensureConfig(shell, this, userConfig) - const pipeline = new PipelineExecutor(normalizePipeline, { + const pipeline = new PipelineExecutor(DEFAULT_NORMALIZE_PIPELINE, { shell, request: requestFromInput(input) }) @@ -167,11 +162,10 @@ export function assertAst(name: TestLabel | string, shell: RShell, input: string export function assertDecoratedAst(name: string, shell: RShell, input: string, expected: RNodeWithParent, userConfig?: Partial, startIndexForDeterministicIds = 0): void { it(name, async function() { await ensureConfig(shell, this, userConfig) - const result = await new SteppingSlicer({ - stepOfInterest: 'normalize', - getId: deterministicCountingIdGenerator(startIndexForDeterministicIds), + const result = await new PipelineExecutor(createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE),{ + getId: deterministicCountingIdGenerator(startIndexForDeterministicIds), shell, - request: requestFromInput(input), + request: requestFromInput(input), }).allRemainingSteps() const ast = result.normalize.ast @@ -238,10 +232,9 @@ export function assertReconstructed(name: string | TestLabel, shell: RShell, inp return it(decorateLabelContext(name, ['slice']), async function() { await ensureConfig(shell, this, userConfig) - const result = await new SteppingSlicer({ - stepOfInterest: 'normalize', - getId: getId, - request: requestFromInput(input), + const result = await new PipelineExecutor(createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE),{ + getId: getId, + request: requestFromInput(input), shell }).allRemainingSteps() const reconstructed = NAIVE_RECONSTRUCT.processor({ @@ -262,12 +255,11 @@ export function assertSliced(name: string | TestLabel, shell: RShell, input: str const fullname = decorateLabelContext(name, ['slice']) return it(`${JSON.stringify(criteria)} ${fullname}`, async function() { - const result = await new SteppingSlicer({ - stepOfInterest: LAST_STEP, + const result = await new PipelineExecutor(DEFAULT_RECONSTRUCT_PIPELINE,{ getId, - request: requestFromInput(input), + request: requestFromInput(input), shell, - criterion: criteria, + criterion: criteria, }).allRemainingSteps() try { diff --git a/test/functionality/util/control-flow-graph-tests.ts b/test/functionality/util/control-flow-graph-tests.ts index 3b6a30938f..3a23f565ee 100644 --- a/test/functionality/util/control-flow-graph-tests.ts +++ b/test/functionality/util/control-flow-graph-tests.ts @@ -14,8 +14,11 @@ import type { NodeId } from '../../../src' import { requestFromInput, RFalse, RTrue, RType } from '../../../src/r-bridge' import { defaultQuadIdGenerator } from '../../../src/util/quads' import { cfgToMermaidUrl } from '../../../src/util/mermaid' -import { SteppingSlicer } from '../../../src/core/stepping-slicer' import { normalizeIdToNumberIfPossible } from '../../../src/r-bridge/lang-4.x/ast/model/processing/node-id' +import { PipelineExecutor } from '../../../src/core/pipeline-executor' +import { createPipeline, DEFAULT_NORMALIZE_PIPELINE } from '../../../src/core/steps/pipeline' +import { PARSE_WITH_R_SHELL_STEP } from '../../../src/core/steps/all/core/00-parse' +import { NORMALIZE } from '../../../src/core/steps/all/core/10-normalize' function normAllIds(ids: NodeId[]): NodeId[] { return ids.map(normalizeIdToNumberIfPossible) @@ -26,10 +29,9 @@ describe('Control Flow Graph', withShell(shell => { // shallow copy is important to avoid killing the CFG :c const expected: ControlFlowInformation = { ...emptyControlFlowInformation(), ...partialExpected } return it(code, async()=> { - const result = await new SteppingSlicer({ - stepOfInterest: 'normalize', + const result = await new PipelineExecutor(DEFAULT_NORMALIZE_PIPELINE, { shell, - request: requestFromInput(code) + request: requestFromInput(code) }).allRemainingSteps() const cfg = extractCFG(result.normalize) @@ -120,10 +122,9 @@ describe('Control Flow Graph', withShell(shell => { const domain = 'https://uni-ulm.de/r-ast/' const context = 'test' - const result = await new SteppingSlicer({ - stepOfInterest: 'normalize', + const result = await new PipelineExecutor(DEFAULT_NORMALIZE_PIPELINE, { shell, - request: requestFromInput('if(TRUE) 1') + request: requestFromInput('if(TRUE) 1') }).allRemainingSteps() const cfg = extractCFG(result.normalize) diff --git a/test/functionality/util/quads-tests.ts b/test/functionality/util/quads-tests.ts index e4a8ade5bd..c70b528703 100644 --- a/test/functionality/util/quads-tests.ts +++ b/test/functionality/util/quads-tests.ts @@ -3,7 +3,8 @@ import { decorateAst, requestFromInput } from '../../../src' import { defaultQuadIdGenerator, serialize2quads } from '../../../src/util/quads' import { assert } from 'chai' import { dataflowGraphToQuads } from '../../../src/core/print/dataflow-printer' -import { SteppingSlicer } from '../../../src/core/stepping-slicer' +import { PipelineExecutor } from '../../../src/core/pipeline-executor' +import { DEFAULT_DATAFLOW_PIPELINE } from '../../../src/core/steps/pipeline' describe('Quads', withShell(shell => { const context = 'test' @@ -34,9 +35,8 @@ describe('Quads', withShell(shell => { }) const compareQuadsDfg = async(code: string, expected: string) => { - const info = await new SteppingSlicer({ - stepOfInterest: 'dataflow', - request: requestFromInput(code), + const info = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { + request: requestFromInput(code), shell }).allRemainingSteps() From 4cfdc87c22fa4986294116df92771058952d5ad6 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Tue, 7 May 2024 15:13:17 +0200 Subject: [PATCH 2/8] refactor: fix lint errors --- test/functionality/_helper/shell.ts | 3 +-- test/functionality/util/control-flow-graph-tests.ts | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/test/functionality/_helper/shell.ts b/test/functionality/_helper/shell.ts index a871ae39e4..1788eb7c6f 100644 --- a/test/functionality/_helper/shell.ts +++ b/test/functionality/_helper/shell.ts @@ -19,11 +19,10 @@ import type { MergeableRecord } from '../../../src/util/objects' import { deepMergeObject } from '../../../src/util/objects' import { NAIVE_RECONSTRUCT } from '../../../src/core/steps/all/static-slicing/10-reconstruct' import { guard } from '../../../src/util/assert' -import { createPipeline, DEFAULT_NORMALIZE_PIPELINE, DEFAULT_RECONSTRUCT_PIPELINE, DEFAULT_SLICING_PIPELINE } from '../../../src/core/steps/pipeline' +import { createPipeline, DEFAULT_NORMALIZE_PIPELINE, DEFAULT_RECONSTRUCT_PIPELINE } from '../../../src/core/steps/pipeline' import { PipelineExecutor } from '../../../src/core/pipeline-executor' import { PARSE_WITH_R_SHELL_STEP } from '../../../src/core/steps/all/core/00-parse' import { NORMALIZE } from '../../../src/core/steps/all/core/10-normalize' -import { LAST_STEP } from '../../../src/core/steps/steps' import type { TestLabel } from './label' import { decorateLabelContext } from './label' import { STATIC_DATAFLOW } from '../../../src/core/steps/all/core/20-dataflow' diff --git a/test/functionality/util/control-flow-graph-tests.ts b/test/functionality/util/control-flow-graph-tests.ts index 3a23f565ee..e5a7754a43 100644 --- a/test/functionality/util/control-flow-graph-tests.ts +++ b/test/functionality/util/control-flow-graph-tests.ts @@ -16,9 +16,7 @@ import { defaultQuadIdGenerator } from '../../../src/util/quads' import { cfgToMermaidUrl } from '../../../src/util/mermaid' import { normalizeIdToNumberIfPossible } from '../../../src/r-bridge/lang-4.x/ast/model/processing/node-id' import { PipelineExecutor } from '../../../src/core/pipeline-executor' -import { createPipeline, DEFAULT_NORMALIZE_PIPELINE } from '../../../src/core/steps/pipeline' -import { PARSE_WITH_R_SHELL_STEP } from '../../../src/core/steps/all/core/00-parse' -import { NORMALIZE } from '../../../src/core/steps/all/core/10-normalize' +import { DEFAULT_NORMALIZE_PIPELINE } from '../../../src/core/steps/pipeline' function normAllIds(ids: NodeId[]): NodeId[] { return ids.map(normalizeIdToNumberIfPossible) From 387e5e1e9cb8716b6ce3c24f44a4a72b8f7a1002 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Tue, 7 May 2024 15:16:32 +0200 Subject: [PATCH 3/8] refactor: use default pipelines where applicable --- test/functionality/_helper/shell.ts | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/test/functionality/_helper/shell.ts b/test/functionality/_helper/shell.ts index 1788eb7c6f..0ec707186d 100644 --- a/test/functionality/_helper/shell.ts +++ b/test/functionality/_helper/shell.ts @@ -19,13 +19,10 @@ import type { MergeableRecord } from '../../../src/util/objects' import { deepMergeObject } from '../../../src/util/objects' import { NAIVE_RECONSTRUCT } from '../../../src/core/steps/all/static-slicing/10-reconstruct' import { guard } from '../../../src/util/assert' -import { createPipeline, DEFAULT_NORMALIZE_PIPELINE, DEFAULT_RECONSTRUCT_PIPELINE } from '../../../src/core/steps/pipeline' +import { DEFAULT_DATAFLOW_PIPELINE, DEFAULT_NORMALIZE_PIPELINE, DEFAULT_RECONSTRUCT_PIPELINE } from '../../../src/core/steps/pipeline' import { PipelineExecutor } from '../../../src/core/pipeline-executor' -import { PARSE_WITH_R_SHELL_STEP } from '../../../src/core/steps/all/core/00-parse' -import { NORMALIZE } from '../../../src/core/steps/all/core/10-normalize' import type { TestLabel } from './label' import { decorateLabelContext } from './label' -import { STATIC_DATAFLOW } from '../../../src/core/steps/all/core/20-dataflow' import { graphToMermaidUrl, diffGraphsToMermaidUrl } from '../../../src/dataflow' import type { DataflowDifferenceReport, DataflowGraph , ProblematicDiffInfo } from '../../../src/dataflow' import { printAsBuilder } from './dataflow/dataflow-builder-printer' @@ -161,7 +158,7 @@ export function assertAst(name: TestLabel | string, shell: RShell, input: string export function assertDecoratedAst(name: string, shell: RShell, input: string, expected: RNodeWithParent, userConfig?: Partial, startIndexForDeterministicIds = 0): void { it(name, async function() { await ensureConfig(shell, this, userConfig) - const result = await new PipelineExecutor(createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE),{ + const result = await new PipelineExecutor(DEFAULT_NORMALIZE_PIPELINE, { getId: deterministicCountingIdGenerator(startIndexForDeterministicIds), shell, request: requestFromInput(input), @@ -173,9 +170,6 @@ export function assertDecoratedAst(name: string, shell: RShell, input }) } - -const legacyDataflow = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW) - function mapProblematicNodesToIds(problematic: readonly ProblematicDiffInfo[] | undefined): Set | undefined { return problematic === undefined ? undefined : new Set(problematic.map(p => p.tag === 'vertex' ? p.id : `${p.from}->${p.to}`)) } @@ -192,7 +186,7 @@ export function assertDataflow( it(`${effectiveName} (input: ${JSON.stringify(input)})`, async function() { await ensureConfig(shell, this, userConfig) - const info = await new PipelineExecutor(legacyDataflow, { + const info = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { shell, request: requestFromInput(input), getId: deterministicCountingIdGenerator(startIndexForDeterministicIds) @@ -231,7 +225,7 @@ export function assertReconstructed(name: string | TestLabel, shell: RShell, inp return it(decorateLabelContext(name, ['slice']), async function() { await ensureConfig(shell, this, userConfig) - const result = await new PipelineExecutor(createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE),{ + const result = await new PipelineExecutor(DEFAULT_NORMALIZE_PIPELINE, { getId: getId, request: requestFromInput(input), shell From 7998f016142105aa3bc5d034fcdb4eefb702e7b3 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Tue, 7 May 2024 19:00:16 +0200 Subject: [PATCH 4/8] refactor: further remove old stepping-slicer files --- src/core/steps.ts | 154 --------------------------------------- src/core/steps/output.ts | 19 ----- src/core/steps/step.ts | 4 +- src/core/steps/steps.ts | 41 ----------- 4 files changed, 2 insertions(+), 216 deletions(-) delete mode 100644 src/core/steps.ts delete mode 100644 src/core/steps/output.ts delete mode 100644 src/core/steps/steps.ts diff --git a/src/core/steps.ts b/src/core/steps.ts deleted file mode 100644 index 7c79e5e211..0000000000 --- a/src/core/steps.ts +++ /dev/null @@ -1,154 +0,0 @@ -/** - * This file defines *all* steps of the slicing process and the data they require. - * - * Note, that the order of elements here also describes the *desired* order of their desired execution for readability. - * However, it is the {@link SteppingSlicer} which controls the order of execution and the steps required to achieve a given result. - * - * If you add a new step, you have to (at least) update the {@link SteppingSlicer} as well as the corresponding type predicate {@link SteppingSlicerInput}. - * Furthermore, if your step is the new *last* step, please update {@link LAST_STEP}. - * - * Please note that the combination of `satisfies` and `as` seems to be required. - * With `satisfies` we make sure that the respective element has all the keys it requires, and the `as` force the type to be exactly the given one - * - * @module - */ - -import type { MergeableRecord } from '../util/objects' -import type { IPipelineStepPrinter } from './print/print' -import { internalPrinter, StepOutputFormat } from './print/print' -import { - normalizedAstToJson, - normalizedAstToQuads, - printNormalizedAstToMermaid, - printNormalizedAstToMermaidUrl -} from './print/normalize-printer' -import { guard } from '../util/assert' -import { parseToQuads } from './print/parse-printer' -import { - dataflowGraphToJson, - dataflowGraphToMermaid, - dataflowGraphToMermaidUrl, - dataflowGraphToQuads -} from './print/dataflow-printer' -import type { StepProcessingFunction } from './steps/step' -import { PARSE_WITH_R_SHELL_STEP } from './steps/all/core/00-parse' -import { NORMALIZE } from './steps/all/core/10-normalize' -import { STATIC_DATAFLOW } from './steps/all/core/20-dataflow' -import { STATIC_SLICE } from './steps/all/static-slicing/00-slice' -import { NAIVE_RECONSTRUCT } from './steps/all/static-slicing/10-reconstruct' - -/** - * This represents the required execution frequency of a step. - */ -export type StepRequired = 'once-per-file' | 'once-per-slice' - - -/** - * Defines what is to be known of a single step in the slicing process. - */ -export interface IStep< - Fn extends StepProcessingFunction, -> extends MergeableRecord { - /** Human-readable description of this step */ - description: string - /** The main processor that essentially performs the logic of this step */ - processor: (...input: Parameters) => ReturnType - /* does this step have to be repeated for each new slice, or can it be performed only once in the initialization? */ - required: StepRequired - printer: { - [K in StepOutputFormat]?: IPipelineStepPrinter - } & { - // we always want to have the internal printer - [StepOutputFormat.Internal]: IPipelineStepPrinter - } -} - - -export const STEPS_PER_FILE = { - 'parse': { - description: 'Parse the given R code into an AST', - processor: PARSE_WITH_R_SHELL_STEP.processor, - required: 'once-per-file', - printer: { - [StepOutputFormat.Internal]: internalPrinter, - [StepOutputFormat.Json]: text => text, - [StepOutputFormat.RdfQuads]: parseToQuads - } - } satisfies IStep, - 'normalize': { - description: 'Normalize the AST to flowR\'s AST (first step of the normalization)', - processor: NORMALIZE.processor, - required: 'once-per-file', - printer: { - [StepOutputFormat.Internal]: internalPrinter, - [StepOutputFormat.Json]: normalizedAstToJson, - [StepOutputFormat.RdfQuads]: normalizedAstToQuads, - [StepOutputFormat.Mermaid]: printNormalizedAstToMermaid, - [StepOutputFormat.MermaidUrl]: printNormalizedAstToMermaidUrl - } - } satisfies IStep, - 'dataflow': { - description: 'Construct the dataflow graph', - processor: STATIC_DATAFLOW.processor, - required: 'once-per-file', - printer: { - [StepOutputFormat.Internal]: internalPrinter, - [StepOutputFormat.Json]: dataflowGraphToJson, - [StepOutputFormat.RdfQuads]: dataflowGraphToQuads, - [StepOutputFormat.Mermaid]: dataflowGraphToMermaid, - [StepOutputFormat.MermaidUrl]: dataflowGraphToMermaidUrl - } - } satisfies IStep -} as const - -export const STEPS_PER_SLICE = { - 'slice': { - description: 'Calculate the actual static slice from the dataflow graph and the given slicing criteria', - processor: STATIC_SLICE.processor, - required: 'once-per-slice', - printer: { - [StepOutputFormat.Internal]: internalPrinter - } - } satisfies IStep, - 'reconstruct': { - description: 'Reconstruct R code from the static slice', - processor: NAIVE_RECONSTRUCT.processor, - required: 'once-per-slice', - printer: { - [StepOutputFormat.Internal]: internalPrinter - } - } satisfies IStep -} as const - -export const STEPS = { ...STEPS_PER_FILE, ...STEPS_PER_SLICE } as const -export const LAST_PER_FILE_STEP = 'dataflow' -export const LAST_STEP = 'reconstruct' - -export type StepName = keyof typeof STEPS -export type Step = typeof STEPS[Name] -export type StepProcessor = Step['processor'] -export type StepResult = Awaited>> - -export function executeSingleSubStep>(subStep: Name, ...input: Parameters): ReturnType { - // @ts-expect-error - this is safe, as we know that the function arguments are correct by 'satisfies', this saves an explicit cast with 'as' - return STEPS[subStep].processor(...input as unknown as never[]) as ReturnType -} - -type Tail = T extends [infer _, ...infer Rest] ? Rest : never; - -/** - * For a `step` of the given name, which returned the given `data`. Convert that data into the given `format`. - * Depending on your step and the format this may require `additional` inputs. - */ -export function printStepResult< - Name extends StepName, - Processor extends StepProcessor, - Format extends Exclude & number, - Printer extends (typeof STEPS)[Name]['printer'][Format], - AdditionalInput extends Tail>, ->(step: Name, data: Awaited>, format: Format, ...additional: AdditionalInput): Promise { - const base = STEPS[step].printer - const printer = base[format as keyof typeof base] as IPipelineStepPrinter, Format, AdditionalInput> | undefined - guard(printer !== undefined, `printer for ${step} does not support ${String(format)}`) - return printer(data, ...additional) as Promise -} diff --git a/src/core/steps/output.ts b/src/core/steps/output.ts deleted file mode 100644 index 7de96aa294..0000000000 --- a/src/core/steps/output.ts +++ /dev/null @@ -1,19 +0,0 @@ -import type { LAST_STEP, StepName, StepResult } from './steps' - -/** Represents the return value of the processor linked to the step with the name 'K' */ -type Out = Record>; - -/** - * Essentially expresses an object that, if a step 'x' is of interest, contains the result of step 'x' and all steps before 'x'. - */ -export type StepResults = InterestedIn extends never ? never - : InterestedIn extends undefined ? StepResultsHelper - : StepResultsHelper> - -type StepResultsHelper = { - 'parse': Out<'parse'> - 'normalize': StepResultsHelper<'parse'> & Out<'normalize'> - 'dataflow': StepResultsHelper<'normalize'> & Out<'dataflow'> - 'slice': StepResultsHelper<'dataflow'> & Out<'slice'> - 'reconstruct': StepResultsHelper<'slice'> & Out<'reconstruct'> -}[InterestedIn] diff --git a/src/core/steps/step.ts b/src/core/steps/step.ts index 741bb87a69..eb4960a2c8 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/step.ts @@ -49,7 +49,7 @@ export interface IPipelineStepOrder< * Does not have to be transitive, this will be checked by the scheduler of the pipeline. */ readonly dependencies: readonly PipelineStepName[] - /* does this step has to be repeated for each new request or can it be performed only once in the initialization */ + /* does this step have to be repeated for each new request, or can it be performed only once in the initialization? */ readonly executed: PipelineStepStage /** * This is similar to {@link dependencies}, but is used to say that a given step _decorates_ another one. @@ -83,7 +83,7 @@ export interface IPipelineStep< readonly printer: { [K in StepOutputFormat]?: IPipelineStepPrinter } & { - // we always want to have the internal printer + // we always want to have an internal printer [StepOutputFormat.Internal]: InternalStepPrinter } /** diff --git a/src/core/steps/steps.ts b/src/core/steps/steps.ts deleted file mode 100644 index 051b206188..0000000000 --- a/src/core/steps/steps.ts +++ /dev/null @@ -1,41 +0,0 @@ -/** - * This file defines *all* steps of the slicing process and the data they require. - * - * Note, that the order of elements here also describes the *desired* order of their desired execution for readability. - * However, it is the {@link SteppingSlicer} which controls the order of execution and the steps required to achieve a given result. - * - * If you add a new step, you have to (at least) update the {@link SteppingSlicer} as well as the corresponding type predicate {@link SteppingSlicerInput}. - * Furthermore, if your step is the new *last* step, please update {@link LAST_STEP}. - * - * Please note that the combination of `satisfies` and `as` seems to be required. - * With `satisfies` we make sure that the respective element has all the keys it requires, and the `as` force the type to be exactly the given one - * - * @module - */ - -import { PARSE_WITH_R_SHELL_STEP } from './all/core/00-parse' -import { NORMALIZE } from './all/core/10-normalize' -import { STATIC_DATAFLOW } from './all/core/20-dataflow' -import { STATIC_SLICE } from './all/static-slicing/00-slice' -import { NAIVE_RECONSTRUCT } from './all/static-slicing/10-reconstruct' - - -export const STEPS_PER_FILE = { - 'parse': PARSE_WITH_R_SHELL_STEP, - 'normalize': NORMALIZE, - 'dataflow': STATIC_DATAFLOW -} as const - -export const STEPS_PER_SLICE = { - 'slice': STATIC_SLICE, - 'reconstruct': NAIVE_RECONSTRUCT -} as const - -export const STEPS = { ...STEPS_PER_FILE, ...STEPS_PER_SLICE } as const -export const LAST_PER_FILE_STEP = 'dataflow' -export const LAST_STEP = 'reconstruct' - -export type StepName = keyof typeof STEPS -export type Step = typeof STEPS[Name] -export type StepProcessor = Step['processor'] -export type StepResult = Awaited>> From b9d35b393ad775e8922b80223a6de970c3adf93c Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Tue, 7 May 2024 19:10:04 +0200 Subject: [PATCH 5/8] refactor(pipeline): minor improvements to filenames and structure --- src/core/pipeline-executor.ts | 4 ++-- src/core/print/print.ts | 2 +- src/core/steps/all/core/00-parse.ts | 4 ++-- src/core/steps/all/core/10-normalize.ts | 4 ++-- src/core/steps/all/core/20-dataflow.ts | 4 ++-- src/core/steps/all/core/21-abstract-interpretation.ts | 4 ++-- src/core/steps/all/static-slicing/00-slice.ts | 4 ++-- src/core/steps/all/static-slicing/10-reconstruct.ts | 4 ++-- src/core/steps/{step.ts => pipeline-step.ts} | 1 + src/core/steps/pipeline/{create.ts => create-pipeline.ts} | 6 +++--- .../steps/pipeline/{default.ts => default-pipelines.ts} | 0 src/core/steps/pipeline/index.ts | 2 +- src/core/steps/pipeline/pipeline.ts | 4 ++-- test/functionality/pipelines/create/create-tests.ts | 2 +- 14 files changed, 23 insertions(+), 22 deletions(-) rename src/core/steps/{step.ts => pipeline-step.ts} (97%) rename src/core/steps/pipeline/{create.ts => create-pipeline.ts} (96%) rename src/core/steps/pipeline/{default.ts => default-pipelines.ts} (100%) diff --git a/src/core/pipeline-executor.ts b/src/core/pipeline-executor.ts index 1e92d3dbc9..3d50e76a95 100644 --- a/src/core/pipeline-executor.ts +++ b/src/core/pipeline-executor.ts @@ -7,8 +7,8 @@ import type { PipelineStepNames, PipelineStepOutputWithName } from './steps/pipeline' -import type { PipelineStepName } from './steps/step' -import { PipelineStepStage } from './steps/step' +import type { PipelineStepName } from './steps/pipeline-step' +import { PipelineStepStage } from './steps/pipeline-step' /** * The pipeline executor allows to execute arbitrary {@link Pipeline|pipelines} in a step-by-step fashion. diff --git a/src/core/print/print.ts b/src/core/print/print.ts index 8d9049e858..f307b787b5 100644 --- a/src/core/print/print.ts +++ b/src/core/print/print.ts @@ -1,4 +1,4 @@ -import type { IPipelineStep, StepProcessingFunction } from '../steps/step' +import type { IPipelineStep, StepProcessingFunction } from '../steps/pipeline-step' import type { TailOfArray } from '../../util/arrays' import { guard } from '../../util/assert' diff --git a/src/core/steps/all/core/00-parse.ts b/src/core/steps/all/core/00-parse.ts index 3d953ff2ac..824ede5499 100644 --- a/src/core/steps/all/core/00-parse.ts +++ b/src/core/steps/all/core/00-parse.ts @@ -1,7 +1,7 @@ import { internalPrinter, StepOutputFormat } from '../../../print/print' import { parseToQuads } from '../../../print/parse-printer' -import type { IPipelineStep } from '../../step' -import { PipelineStepStage } from '../../step' +import type { IPipelineStep } from '../../pipeline-step' +import { PipelineStepStage } from '../../pipeline-step' import type { RParseRequest, RShell } from '../../../../r-bridge' import type { DeepReadonly } from 'ts-essentials' import type { RShellExecutor } from '../../../../r-bridge/shell-executor' diff --git a/src/core/steps/all/core/10-normalize.ts b/src/core/steps/all/core/10-normalize.ts index 11d3a0ee01..2be182b390 100644 --- a/src/core/steps/all/core/10-normalize.ts +++ b/src/core/steps/all/core/10-normalize.ts @@ -9,8 +9,8 @@ import { printNormalizedAstToMermaid, printNormalizedAstToMermaidUrl } from '../../../print/normalize-printer' -import type { IPipelineStep } from '../../step' -import { PipelineStepStage } from '../../step' +import type { IPipelineStep } from '../../pipeline-step' +import { PipelineStepStage } from '../../pipeline-step' import type { DeepReadonly } from 'ts-essentials' import type { ParseRequiredInput } from './00-parse' import { normalize } from '../../../../r-bridge/lang-4.x/ast/parser/json/parser' diff --git a/src/core/steps/all/core/20-dataflow.ts b/src/core/steps/all/core/20-dataflow.ts index ca589dab6f..48a5957875 100644 --- a/src/core/steps/all/core/20-dataflow.ts +++ b/src/core/steps/all/core/20-dataflow.ts @@ -1,6 +1,6 @@ import { internalPrinter, StepOutputFormat } from '../../../print/print' -import type { IPipelineStep } from '../../step' -import { PipelineStepStage } from '../../step' +import type { IPipelineStep } from '../../pipeline-step' +import { PipelineStepStage } from '../../pipeline-step' import { dataflowGraphToJson, dataflowGraphToMermaid, diff --git a/src/core/steps/all/core/21-abstract-interpretation.ts b/src/core/steps/all/core/21-abstract-interpretation.ts index fc6d91e0d6..6164d07be7 100644 --- a/src/core/steps/all/core/21-abstract-interpretation.ts +++ b/src/core/steps/all/core/21-abstract-interpretation.ts @@ -1,8 +1,8 @@ import { internalPrinter, StepOutputFormat } from '../../../print/print' import type { DataflowInformation } from '../../../../dataflow/info' -import type { IPipelineStep } from '../../step' +import type { IPipelineStep } from '../../pipeline-step' import type { DeepReadonly } from 'ts-essentials' -import { PipelineStepStage } from '../../step' +import { PipelineStepStage } from '../../pipeline-step' // Use runAbstractInterpretation here when it's ready function processor(results: { dataflow?: DataflowInformation }, _input: unknown): DataflowInformation { diff --git a/src/core/steps/all/static-slicing/00-slice.ts b/src/core/steps/all/static-slicing/00-slice.ts index 8b09c6cfcc..1e14b179f7 100644 --- a/src/core/steps/all/static-slicing/00-slice.ts +++ b/src/core/steps/all/static-slicing/00-slice.ts @@ -1,6 +1,6 @@ import { internalPrinter, StepOutputFormat } from '../../../print/print' -import type { IPipelineStep } from '../../step' -import { PipelineStepStage } from '../../step' +import type { IPipelineStep } from '../../pipeline-step' +import { PipelineStepStage } from '../../pipeline-step' import type { SlicingCriteria } from '../../../../slicing' import { staticSlicing } from '../../../../slicing' import type { DeepReadonly } from 'ts-essentials' diff --git a/src/core/steps/all/static-slicing/10-reconstruct.ts b/src/core/steps/all/static-slicing/10-reconstruct.ts index 56f8cf76c9..c5fa503e61 100644 --- a/src/core/steps/all/static-slicing/10-reconstruct.ts +++ b/src/core/steps/all/static-slicing/10-reconstruct.ts @@ -1,6 +1,6 @@ import { internalPrinter, StepOutputFormat } from '../../../print/print' -import type { IPipelineStep } from '../../step' -import { PipelineStepStage } from '../../step' +import type { IPipelineStep } from '../../pipeline-step' +import { PipelineStepStage } from '../../pipeline-step' import type { AutoSelectPredicate } from '../../../../slicing' import { reconstructToCode } from '../../../../slicing' import type { DeepReadonly } from 'ts-essentials' diff --git a/src/core/steps/step.ts b/src/core/steps/pipeline-step.ts similarity index 97% rename from src/core/steps/step.ts rename to src/core/steps/pipeline-step.ts index eb4960a2c8..2631ee5ced 100644 --- a/src/core/steps/step.ts +++ b/src/core/steps/pipeline-step.ts @@ -30,6 +30,7 @@ export const enum PipelineStepStage { OncePerRequest } +/** Name of a single {@link IPipelineStep|step} in a pipeline (branded to avoid type-mishaps). */ export type PipelineStepName = string & { __brand?: 'StepName' } /** diff --git a/src/core/steps/pipeline/create.ts b/src/core/steps/pipeline/create-pipeline.ts similarity index 96% rename from src/core/steps/pipeline/create.ts rename to src/core/steps/pipeline/create-pipeline.ts index 9df2b42908..153dca6d2b 100644 --- a/src/core/steps/pipeline/create.ts +++ b/src/core/steps/pipeline/create-pipeline.ts @@ -1,5 +1,5 @@ -import type { IPipelineStep, PipelineStepName } from '../step' -import { PipelineStepStage } from '../step' +import type { IPipelineStep, PipelineStepName } from '../pipeline-step' +import { PipelineStepStage } from '../pipeline-step' import { InvalidPipelineError } from './invalid-pipeline-error' import type { Pipeline } from './pipeline' import { jsonReplacer } from '../../../util/json' @@ -132,7 +132,7 @@ function checkForInvalidDependency(steps: readonly IPipelineStep[], stepMap: Map function initializeSteps(steps: readonly IPipelineStep[], stepMap: Map, inits: PipelineStepName[], visited: ReadonlySet) { for(const step of steps) { const name = step.name - // if the name is already in the map we have a duplicate + // if the name is already in the map, we have a duplicate if(stepMap.has(name)) { throw new InvalidPipelineError(`1) Step name "${name}" is not unique in the pipeline`) } diff --git a/src/core/steps/pipeline/default.ts b/src/core/steps/pipeline/default-pipelines.ts similarity index 100% rename from src/core/steps/pipeline/default.ts rename to src/core/steps/pipeline/default-pipelines.ts diff --git a/src/core/steps/pipeline/index.ts b/src/core/steps/pipeline/index.ts index 38bb83622a..21532a8ef2 100644 --- a/src/core/steps/pipeline/index.ts +++ b/src/core/steps/pipeline/index.ts @@ -1,3 +1,3 @@ export * from './pipeline' export * from './invalid-pipeline-error' -export * from './default' +export * from './default-pipelines' diff --git a/src/core/steps/pipeline/pipeline.ts b/src/core/steps/pipeline/pipeline.ts index efedd959bd..63f60a7464 100644 --- a/src/core/steps/pipeline/pipeline.ts +++ b/src/core/steps/pipeline/pipeline.ts @@ -1,5 +1,5 @@ -import type { IPipelineStep, PipelineStepName, PipelineStepStage } from '../step' -import { verifyAndBuildPipeline } from './create' +import type { IPipelineStep, PipelineStepName, PipelineStepStage } from '../pipeline-step' +import { verifyAndBuildPipeline } from './create-pipeline' import type { DeepReadonly, UnionToIntersection } from 'ts-essentials' /** diff --git a/test/functionality/pipelines/create/create-tests.ts b/test/functionality/pipelines/create/create-tests.ts index 2a845e9bcb..ecfff012a6 100644 --- a/test/functionality/pipelines/create/create-tests.ts +++ b/test/functionality/pipelines/create/create-tests.ts @@ -1,5 +1,5 @@ import { createPipeline } from '../../../../src/core/steps/pipeline' -import type { IPipelineStep, PipelineStepName } from '../../../../src/core/steps/step' +import type { IPipelineStep, PipelineStepName } from '../../../../src/core/steps/pipeline-step' import { expect } from 'chai' import { PARSE_WITH_R_SHELL_STEP } from '../../../../src/core/steps/all/core/00-parse' import { allPermutations } from '../../../../src/util/arrays' From f6866fe6e7e1452cb9f6a00ef164b4833561915a Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Tue, 7 May 2024 19:11:25 +0200 Subject: [PATCH 6/8] refactor: remove local variable in det. id generator closure --- src/r-bridge/lang-4.x/ast/model/processing/decorate.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/r-bridge/lang-4.x/ast/model/processing/decorate.ts b/src/r-bridge/lang-4.x/ast/model/processing/decorate.ts index 74cce15d42..f0e290a04e 100644 --- a/src/r-bridge/lang-4.x/ast/model/processing/decorate.ts +++ b/src/r-bridge/lang-4.x/ast/model/processing/decorate.ts @@ -43,8 +43,7 @@ export type IdGenerator = (data: RNode) => NodeId /** * The simplest id generator which just increments a number on each call. */ -export function deterministicCountingIdGenerator(start = 0): () => NodeId { - let id = start +export function deterministicCountingIdGenerator(id = 0): () => NodeId { return () => id++ } From 90334fae1d595985a0a453e4bc2c5963cf6e6257 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Tue, 7 May 2024 19:30:49 +0200 Subject: [PATCH 7/8] doc(interface): clean up the interface wiki page --- wiki/Home.md | 6 +- wiki/Interface.md | 152 ++++++++-------------------------------------- 2 files changed, 27 insertions(+), 131 deletions(-) diff --git a/wiki/Home.md b/wiki/Home.md index 9a4a39b648..8223baa4ad 100644 --- a/wiki/Home.md +++ b/wiki/Home.md @@ -1,12 +1,10 @@ [![flowR Logo](img/flowR.png)](https://github.com/Code-Inspect/flowr) -***This wiki page is currently under construction*** - *flowR* is a static [dataflow analyzer](https://en.wikipedia.org/wiki/Data-flow_analysis) and [program slicer](https://github.com/Code-Inspect/flowr/wiki/Terminology#program-slice) for the [*R*](https://www.r-project.org/) programming language (currently tested for versions `4.x`), and you have stumbled upon its wiki pages! -Overall, the wiki has three goals: +Overall, this wiki has three goals: -1. Illustrate how to [**use**](https://github.com/Code-Inspect/flowr/wiki/Overview) *flowR* in its current form. +1. Illustrate how to [**use**](https://github.com/Code-Inspect/flowr/wiki/Overview) *flowR*. 2. Show how to [**extend**](https://github.com/Code-Inspect/flowr/wiki/Interface) *flowR* with other tools (i.e., how to interface with *flowR*) 3. Explain the inner workings to help to [**develop**](https://github.com/Code-Inspect/flowr/wiki/Core) *flowR*'s core. diff --git a/wiki/Interface.md b/wiki/Interface.md index 3147238296..9832189ccf 100644 --- a/wiki/Interface.md +++ b/wiki/Interface.md @@ -1,37 +1,32 @@ -***This wiki page is currently under construction*** - Although far from being as detailed as the in-depth explanation of [*flowR*](https://github.com/Code-Inspect/flowr/wiki/Core), this wiki page explains how to interface with *flowR* in more detail.<1> -- [💬 Communicating With the Server](#-communicating-with-the-server) +- [💬 Communicating with the Server](#-communicating-with-the-server) - [The Hello Message](#the-hello-message) - [The Analysis Request](#the-analysis-request) - [Including the Control Flow Graph](#including-the-control-flow-graph) - [Retrieve the Output as RDF N-Quads](#retrieve-the-output-as-rdf-n-quads) - - [Complete Example](#complete-example) + - [Complete Example (without WebSocket)](#complete-example-without-websocket) - [Using Netcat](#using-netcat) - [Using Python](#using-python) - [The Slice Request](#the-slice-request) - [The REPL Request](#the-repl-request) - [💻 Using the REPL](#-using-the-repl) - [Example: Retrieving the Dataflow Graph](#example-retrieving-the-dataflow-graph) - - [Interfacing With the File System](#interfacing-with-the-file-system) + - [Interfacing with the File System](#interfacing-with-the-file-system) - [⚒️ Writing Code](#️-writing-code) - - [Interfacing With R by Using The `RShell`](#interfacing-with-r-by-using-the-rshell) + - [Interfacing with R by Using the `RShell`](#interfacing-with-r-by-using-the-rshell) - [The Pipeline Executor](#the-pipeline-executor) - - [(Deprecated) Slicing With The `SteppingSlicer`](#deprecated-slicing-with-the-steppingslicer) - - [Understanding the Steps](#understanding-the-steps) - - [Benchmark the Slicer With The `BenchmarkSlicer`](#benchmark-the-slicer-with-the-benchmarkslicer) - - [Augmenting the Normalization](#augmenting-the-normalization) - [Generate Statistics](#generate-statistics) - [Extract Statistics with `extractUsageStatistics()`](#extract-statistics-with-extractusagestatistics) - [Adding a New Feature to Extract](#adding-a-new-feature-to-extract) -## 💬 Communicating With the Server +## 💬 Communicating with the Server As explained in the [Overview](https://github.com/Code-Inspect/flowr/wiki/Overview), you can simply run the [TCP](https://de.wikipedia.org/wiki/Transmission_Control_Protocol) server by adding the `--server` flag (and, due to the interactive mode, exit with the conventional CTRL+C). -Currently, every connection is handled by the same underlying `RShell` - so the server is not designed to handle many clients at a time. Additionally, the server is not well guarded against attacks (e.g., you can theoretically spawn an arbitrary amount of R shell sessions on the target machine). +Currently, every connection is handled by the same underlying `RShell` - so the server is not designed to handle many clients at a time. +Additionally, the server is not well guarded against attacks (e.g., you can theoretically spawn an arbitrary amount of R shell sessions on the target machine). Every message has to be given in a single line (i.e., without a newline in-between) and end with a newline character. Nevertheless, we will pretty-print example given in the following segments for the ease of reading. @@ -79,8 +74,8 @@ See the implementation of the [hello message](https://github.com/Code-Inspect/fl There are currently a few messages that you can send after the hello message. -If you want to *slice* a piece of R code you first have to send an analysis request, so that you can send one or multiple slice requests afterward. -Requests for the repl are independent of that. +If you want to *slice* a piece of R code you first have to send an [analysis request](#the-analysis-request), so that you can send one or multiple slice requests afterward. +Requests for the [REPL](#the-repl-request) are independent of that. ### The Analysis Request @@ -105,16 +100,15 @@ sequenceDiagram The request allows the server to analyze a file and prepare it for slicing. -The message can contain a `filetoken`, which is used to identify the file in later slice requests (if you do not add one, the request will not be stored and therefore be unavailable for slicing). +The message can contain a `filetoken`, which is used to identify the file in later slice requests (if you do not add one, the request will not be stored and therefore, it is not available for subsequent slicing). > [!IMPORTANT] > If you want to send and process a lot of analysis requests, but do not want to slice them, please do not pass the `filetoken` field. This will save the server a lot of memory allocation. -Furthermore, it must contain either a `content` field to directly pass the file's content or a `filepath` field which contains the path to the file (which must be accessible for the server to be useful). +Furthermore, the request must contain either a `content` field to directly pass the file's content or a `filepath` field which contains the path to the file (this path must be accessible for the server to be useful). If you add the `id` field, the answer will use the same `id` so you can match requests and the corresponding answers. See the implementation of the [request-file-analysis message](https://github.com/Code-Inspect/flowr-cli/blob/main/src/repl/server/messages/analysis.ts) for more information. -
Example Request @@ -466,7 +460,7 @@ It contains a human-readable description *why* the analysis failed (see the [err #### Including the Control Flow Graph -While *flowR* does (for the time being) not use an explicit control flow graph, the respective structure can still be exposed using the server (note that, as this feature is not needed within *flowR*, it is tested significantly less - so please create a [new issue](https://github.com/Code-Inspect/flowr/issues/new/choose) for any bug you may encounter). +While *flowR* does (for the time being) not use an explicit control flow graph but instead relies on control-dependency edges within the dataflow graph, the respective structure can still be exposed using the server (note that, as this feature is not needed within *flowR*, it is tested significantly less - so please create a [new issue](https://github.com/Code-Inspect/flowr/issues/new/choose) for any bug you may encounter). For this, the analysis request may add `cfg: true` to its list of options.
@@ -629,11 +623,9 @@ The response is basically the same as the response sent without the `cfg` flag.
- #### Retrieve the Output as RDF N-Quads -The default response is formatted as JSON. However, by specifying `format: "n-quads"`, you can retrieve the individual results (e.g., the normalized AST), as [RDF N-Quads](https://www.w3.org/TR/n-quads/). This works with, and without `cfg: true`. - +The default response is formatted as JSON. However, by specifying `format: "n-quads"`, you can retrieve the individual results (e.g., the normalized AST), as [RDF N-Quads](https://www.w3.org/TR/n-quads/). This works with, and without [`cfg: true`](#including-the-control-flow-graph).
Example Request @@ -676,7 +668,7 @@ Please note, that the base message format is still JSON. Only the individual res ```
-#### Complete Example +#### Complete Example (without WebSocket) Suppose, you want to launch the server using a docker container. Then, start the server by (forwarding the internal default port): @@ -686,7 +678,7 @@ docker run -p1042:1042 -it --rm eagleoutice/flowr --server ##### Using Netcat -Now, using a tool like _netcat_ to connect: +Now, using a tool like *netcat* to connect: ```shell nc 127.0.0.1 1042 @@ -844,16 +836,16 @@ sequenceDiagram The REPL execution message allows to send a REPL command to receive its output. For more on the REPL, see the [introduction](https://github.com/Code-Inspect/flowr/wiki/Overview#the-read-eval-print-loop-repl), or the [description below](#using-the-repl). You only have to pass the command you want to execute in the `expression` field. Furthermore, you can set the `ansi` field to `true` if you are interested in output formatted using [ANSI escape codes](https://en.wikipedia.org/wiki/ANSI_escape_code). -We strongly recommend you to make use of the `id` field to link answers with requests as you can theoretically request the execution of multiple scripts, which then happens in parallel. +We strongly recommend you to make use of the `id` field to link answers with requests as you can theoretically request the execution of multiple scripts at the same time, which then happens in parallel. > [!WARNING] -> There is currently no automatic sandboxing or safeguarding against such requests. They simply execute the respective R code on your machine. +> There is currently no automatic sandboxing or safeguarding against such requests. They simply execute the respective R code on your machine. Please be very careful. The answer on such a request is different from the other messages as the `request-repl-execution` message may be sent multiple times. This allows to better handle requests that require more time but already output intermediate results. You can detect the end of the execution by receiving the `end-repl-execution` message. See the implementation of the [request-repl-execution message](https://github.com/Code-Inspect/flowr-cli/blob/main/src/repl/server/messages/repl.ts) for more information. -The semantics of the error message are similar to other messages. +The semantics of the error message are similar to that of the other messages.
Example Request @@ -930,7 +922,7 @@ flowchart LR The graph returned for you may differ, depending on the evolution of *flowR*. -### Interfacing With the File System +### Interfacing with the File System Many commands that allow for an R-expression (like `:dataflow*`) allow for a file as well, if the argument starts with `file://`. If you are located in the root directory of the *flowR* repository, the following should give you the parsed AST of the example file: @@ -938,21 +930,20 @@ Many commands that allow for an R-expression (like `:dataflow*`) allow for a fil R> :parse file://test/testfiles/example.R ``` - ## ⚒️ Writing Code *flowR* can be used as module and offers several main classes and interfaces that are interesting for extension (see the [core](https://github.com/Code-Inspect/flowr/wiki/Core) wiki page for more information). -### Interfacing With R by Using The `RShell` +### Interfacing with R by Using the `RShell` The `RShell` class allows to interface with the `R` ecosystem installed on the host system. -For now there are no alternatives (although we plan on providing more flexible drop-in replacements). +For now there are no (real) alternatives, although we plan on providing more flexible drop-in replacements. > [!IMPORTANT] > Each `RShell` controls a new instance of the R interpreter, make sure to call `RShell::close()` when you are done. You can start a new "session" simply by constructing a new object with `new RShell()`. -However, there are several options which may be of interest (e.g., to automatically revive the shell in case of errors or to control the name location of the R process on the system). See the in-code _documentation_ for more information. +However, there are several options which may be of interest (e.g., to automatically revive the shell in case of errors or to control the name location of the R process on the system). See the in-code *documentation* for more information. With a shell object (let's call it `shell`), you can execute R code by using `RShell::sendCommand`, for example `shell.sendCommand("1 + 1")`. However, this does not return anything, so if you want to collect the output of your command, use `RShell::sendCommandWithOutput` instead. @@ -974,110 +965,17 @@ const slice = await slicer.allRemainingSteps() // console.log(slice.reconstruct.code) ``` -If you compare this, with what you would have done with the [old `SteppingSlicer`](#deprecated-slicing-with-the-steppingslicer) this essentially just requires you to replace the `SteppingSlicer` with the `PipelineExecutor` and to pass the `DEFAULT_SLICING_PIPELINE` as the first argument. -Similarly, the new `PipelineExecutor`... +If you compare this, with what you would have done with the old `SteppingSlicer`, this essentially just requires you to replace the `SteppingSlicer` with the `PipelineExecutor` and to pass the `DEFAULT_SLICING_PIPELINE` as the first argument. +The `PipelineExecutor`... 1. allows to investigate the results of all intermediate steps 2. can be executed step-by-step 3. can repeat steps (e.g., to calculate multiple slices on the same input) -See the documentation for more information. - -### (Deprecated) Slicing With The `SteppingSlicer` - -> 💡 Information\ -> Please note, that the `SteppingSlicer` has been deprecated with the *Dataflow v2* update, in favor of a far more general `PipelineExecutor` (which now backs the `SteppingSlicer` using a custom legacy-`Pipeline` to ensure that it behaves similar). - -The main class that represents *flowR*'s slicing is the `SteppingSlicer` class. With *flowR*, this allows you to slice code like this: - -```typescript -const stepper = new SteppingSlicer({ - shell: new RShell(), - request: requestFromInput('x <- 1\nx + 1'), - criterion: ['2@x'] -}) - -const slice = await stepper.allRemainingSteps() -// console.log(slice.reconstruct.code) -``` - -After that, you can request more slices with the help of `SteppingSlicer::updateCriterion`: - -```typescript -stepper.updateCriterion(['1@x']) -const sliceB = await stepper.allRemainingSteps() -// console.log(sliceB.reconstruct.code) -``` - -Besides slicing, the stepping slicer: - -1. allows to investigate the results of all intermediate steps -2. can be executed step-by-step -3. can be told to stop after a given step - -See the _documentation_ for more. - -#### Understanding the Steps - -The definition of all steps happens in [src/core/steps.ts](https://github.com/Code-Inspect/flowr/blob/main/src/core/steps.ts). -Investigating the file provides you an overview of the slicing phases, as well as the functions that are called to perform the respective step. -The [`SteppingSlicer`](https://github.com/Code-Inspect/flowr/blob/main/src/core/stepping-slicer.ts) simply glues them together and passes the results of one step to the next. - -If you add a new step, make sure to modify all of these locations accordingly. - -#### Benchmark the Slicer With The `BenchmarkSlicer` - -Relying on the `SteppingSlicer`, the `BenchmarkSlicer` instruments each step to allow measuring the required time. It is used by the `benchmark` script, explained in the [overview](https://github.com/Code-Inspect/flowr/wiki/Overview) wiki page. -Furthermore, it provides a simple way to slice a file for all possible slicing points: - -```typescript -const slicer = new BenchmarkSlicer() - -await slicer.init({ request: 'text', content: 'y <- 2 + x' }) -await slicer.sliceForAll(DefaultAllVariablesFilter) - -const result = slicer.finish() -``` - -Please create a new `BenchmarkSlicer` object per input file (this will probably change as soon as *flowR* allows for multiple input files). - -> [!TIP] -> Calling `BenchmarkSlicer::finish` will automatically take care of closing the underlying shell session. -> However, if you want to be sure (or need it in case of exceptions), you can use `BenchmarkSlicer::ensureSessionClosed`. - -### Augmenting the Normalization - -The normalization of a given input is essentially handled by the `normalize` function although it is better to use the abstraction of the `SteppingSlicer` and use `executeSingleSubStep('normalize', )` to invoke the respective step. -The call accepts a collection of *hooks* (the configuration of the `SteppingSlicer` allows them as well). - -These hooks allow the modification of the inputs and outputs of the normalization. If you want to count the amount of strings encountered while parsing, you can use something like this: - -```ts -const shell = new RShell() - -let counter = 0 - -await new SteppingSlicer({ - stepOfInterest: 'normalize', shell, - request: requestFromInput('x <- "foo"'), - hooks: { - values: { - onString: { - after: () => { counter++ }, - } - } - } -}).allRemainingSteps() - -// console.log(counter) -``` - -The `after` hook is called after the normalization has created the respective normalized string node, so we can be sure that the node was indeed a string! Besides incrementing the respective counter, we could return a value that the normalization should use instead (but we do not do that in this example). +See the in-code documentation for more information. ### Generate Statistics -**TODO: will probably change as part of the planned paper** - #### Extract Statistics with `extractUsageStatistics()` #### Adding a New Feature to Extract From 89642bb9063ec6d2a9a67cee82fb03fda94fa941 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Tue, 7 May 2024 19:36:02 +0200 Subject: [PATCH 8/8] refactor(interface): clarify that the stepping slicer was removed --- wiki/Interface.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wiki/Interface.md b/wiki/Interface.md index 9832189ccf..a48e1179f2 100644 --- a/wiki/Interface.md +++ b/wiki/Interface.md @@ -965,7 +965,7 @@ const slice = await slicer.allRemainingSteps() // console.log(slice.reconstruct.code) ``` -If you compare this, with what you would have done with the old `SteppingSlicer`, this essentially just requires you to replace the `SteppingSlicer` with the `PipelineExecutor` and to pass the `DEFAULT_SLICING_PIPELINE` as the first argument. +If you compare this, with what you would have done with the old (and removed) `SteppingSlicer`, this essentially just requires you to replace the `SteppingSlicer` with the `PipelineExecutor` and to pass the `DEFAULT_SLICING_PIPELINE` as the first argument. The `PipelineExecutor`... 1. allows to investigate the results of all intermediate steps