From de943ed1da3cc56aff0a57c60123e6bedb9462d4 Mon Sep 17 00:00:00 2001 From: EagleoutIce Date: Tue, 21 May 2024 09:38:23 +0200 Subject: [PATCH 1/6] feat-fix: sync idmap on dataflow comparison --- src/dataflow/graph/graph.ts | 15 +++++++++++++-- test/functionality/_helper/shell.ts | 5 +++++ test/testfiles/example.R | 1 + 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/dataflow/graph/graph.ts b/src/dataflow/graph/graph.ts index 1654dd0b16..629811c13a 100644 --- a/src/dataflow/graph/graph.ts +++ b/src/dataflow/graph/graph.ts @@ -76,11 +76,11 @@ type EdgeData = Omit { private static DEFAULT_ENVIRONMENT: REnvironmentInformation | undefined = undefined - public readonly idMap: AstIdMap | undefined + private _idMap: AstIdMap | undefined constructor(idMap: AstIdMap | undefined) { DataflowGraph.DEFAULT_ENVIRONMENT ??= initializeCleanEnvironments() - this.idMap = idMap + this._idMap = idMap } /** Contains the vertices of the root level graph (i.e., included those vertices from the complete graph, that are nested within function definitions) */ @@ -134,6 +134,17 @@ export class DataflowGraph Date: Tue, 21 May 2024 10:09:20 +0200 Subject: [PATCH 2/6] refactor: provide more information on missing source vertices --- src/dataflow/graph/diff.ts | 8 ++++++++ src/dataflow/graph/graph.ts | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/dataflow/graph/diff.ts b/src/dataflow/graph/diff.ts index efef9b8637..4c260cfd9e 100644 --- a/src/dataflow/graph/diff.ts +++ b/src/dataflow/graph/diff.ts @@ -95,10 +95,18 @@ function diffOutgoingEdges(ctx: DataflowDiffContext): void { } for(const [id, edge] of lEdges) { + if(!ctx.left.hasVertex(id)) { + ctx.report.addComment(`The source ${id} of edges ${JSON.stringify(edge, jsonReplacer)} is not present in ${ctx.leftname}. This means that the graph contains an edge but not the corresponding vertex.`) + continue + } diffEdges(ctx, id, edge, rEdges.get(id)) } // just to make it both ways in case the length differs for(const [id, edge] of rEdges) { + if(!ctx.right.hasVertex(id)) { + ctx.report.addComment(`The source ${id} of edges ${JSON.stringify(edge, jsonReplacer)} is not present in ${ctx.rightname}. This means that the graph contains an edge but not the corresponding vertex.`) + continue + } if(!lEdges.has(id)) { diffEdges(ctx, id, undefined, edge) } diff --git a/src/dataflow/graph/graph.ts b/src/dataflow/graph/graph.ts index 629811c13a..cc756938c2 100644 --- a/src/dataflow/graph/graph.ts +++ b/src/dataflow/graph/graph.ts @@ -176,7 +176,7 @@ export class DataflowGraph Date: Tue, 21 May 2024 12:32:41 +0200 Subject: [PATCH 3/6] feat-fix: `assign` calls expose the rootId as read and not the name id --- src/dataflow/environments/built-in.ts | 44 +++++++++---------- src/dataflow/graph/graph.ts | 2 +- .../call/built-in/built-in-assignment.ts | 16 ++++--- test/functionality/_helper/shell.ts | 5 ++- 4 files changed, 35 insertions(+), 32 deletions(-) diff --git a/src/dataflow/environments/built-in.ts b/src/dataflow/environments/built-in.ts index 41a08ae9f4..bd63ad8012 100644 --- a/src/dataflow/environments/built-in.ts +++ b/src/dataflow/environments/built-in.ts @@ -149,29 +149,29 @@ registerBuiltInConstant('FALSE', false) registerBuiltInConstant('F', false) registerSimpleFunctions('~', '+', '-', '*', '/', '^', '!', '?', '**', '==', '!=', '>', '<', '>=', '<=', '%%', '%/%', '%*%', ':', 'list') registerBuiltInFunctions(['cat', 'switch'], defaultBuiltInProcessor, {}) /* returns null */ -registerBuiltInFunctions(['print', '('], defaultBuiltInProcessor, { returnsNthArgument: 0 }, ) -registerBuiltInFunctions(['return'], defaultBuiltInProcessor, { returnsNthArgument: 0, cfg: ExitPointType.Return }, ) -registerBuiltInFunctions(['break'], defaultBuiltInProcessor, { cfg: ExitPointType.Break }, ) -registerBuiltInFunctions(['next'], defaultBuiltInProcessor, { cfg: ExitPointType.Next }, ) -registerBuiltInFunctions(['{'], processExpressionList, {}, ) -registerBuiltInFunctions(['source'], processSourceCall, {}, ) -registerBuiltInFunctions(['[', '[['], processAccess, { treatIndicesAsString: false }, ) -registerBuiltInFunctions(['$', '@'], processAccess, { treatIndicesAsString: true }, ) -registerBuiltInFunctions(['if'], processIfThenElse, {}, ) -registerBuiltInFunctions(['get'], processGet, {}, ) -registerBuiltInFunctions(['library'], processLibrary, {}, ) -registerBuiltInFunctions(['<-', '='], processAssignment, { canBeReplacement: true }, ) -registerBuiltInFunctions([':=', 'assign'], processAssignment, {}, ) -registerBuiltInFunctions(['delayedAssign'], processAssignment, { quoteSource: true }, ) -registerBuiltInFunctions(['<<-'], processAssignment, { superAssignment: true, canBeReplacement: true }, ) -registerBuiltInFunctions(['->'], processAssignment, { swapSourceAndTarget: true, canBeReplacement: true }, ) +registerBuiltInFunctions(['print', '('], defaultBuiltInProcessor, { returnsNthArgument: 0 } ) +registerBuiltInFunctions(['return'], defaultBuiltInProcessor, { returnsNthArgument: 0, cfg: ExitPointType.Return } ) +registerBuiltInFunctions(['break'], defaultBuiltInProcessor, { cfg: ExitPointType.Break } ) +registerBuiltInFunctions(['next'], defaultBuiltInProcessor, { cfg: ExitPointType.Next } ) +registerBuiltInFunctions(['{'], processExpressionList, {} ) +registerBuiltInFunctions(['source'], processSourceCall, {} ) +registerBuiltInFunctions(['[', '[['], processAccess, { treatIndicesAsString: false } ) +registerBuiltInFunctions(['$', '@'], processAccess, { treatIndicesAsString: true } ) +registerBuiltInFunctions(['if'], processIfThenElse, {} ) +registerBuiltInFunctions(['get'], processGet, {} ) +registerBuiltInFunctions(['library'], processLibrary, {} ) +registerBuiltInFunctions(['<-', '='], processAssignment, { canBeReplacement: true } ) +registerBuiltInFunctions([':=', 'assign'], processAssignment, {} ) +registerBuiltInFunctions(['delayedAssign'], processAssignment, { quoteSource: true } ) +registerBuiltInFunctions(['<<-'], processAssignment, { superAssignment: true, canBeReplacement: true } ) +registerBuiltInFunctions(['->'], processAssignment, { swapSourceAndTarget: true, canBeReplacement: true } ) registerBuiltInFunctions(['->>'], processAssignment, { superAssignment: true, swapSourceAndTarget: true, canBeReplacement: true } ) registerBuiltInFunctions(['&&', '||', '&', '|'], processSpecialBinOp, { lazy: true } ) -registerBuiltInFunctions(['|>', '%>%'], processPipe, {}, ) -registerBuiltInFunctions(['function', '\\'], processFunctionDefinition, {}, ) -registerBuiltInFunctions(['quote', 'substitute', 'bquote'], processQuote, { quoteArgumentsWithIndex: 0 }, ) -registerBuiltInFunctions(['for'], processForLoop, {}, ) -registerBuiltInFunctions(['repeat'], processRepeatLoop, {}, ) -registerBuiltInFunctions(['while'], processWhileLoop, {}, ) +registerBuiltInFunctions(['|>', '%>%'], processPipe, {} ) +registerBuiltInFunctions(['function', '\\'], processFunctionDefinition, {} ) +registerBuiltInFunctions(['quote', 'substitute', 'bquote'], processQuote, { quoteArgumentsWithIndex: 0 } ) +registerBuiltInFunctions(['for'], processForLoop, {} ) +registerBuiltInFunctions(['repeat'], processRepeatLoop, {} ) +registerBuiltInFunctions(['while'], processWhileLoop, {} ) /* they are all mapped to `<-` but we separate super assignments */ registerReplacementFunctions({ makeMaybe: true }, ['<-', '<<-'], '[', '[[', '$', '@', 'names', 'dimnames', 'attributes', 'attr', 'class', 'levels', 'rownames', 'colnames') diff --git a/src/dataflow/graph/graph.ts b/src/dataflow/graph/graph.ts index cc756938c2..55af864360 100644 --- a/src/dataflow/graph/graph.ts +++ b/src/dataflow/graph/graph.ts @@ -235,7 +235,7 @@ export class DataflowGraph): this { const { fromId, toId } = extractEdgeIds(from, to) const { type, ...rest } = edgeInfo - + if(fromId === toId) { return this } diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts index 2606b8ed44..8c8bd79e4d 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts @@ -21,6 +21,7 @@ import type { RUnnamedArgument } from '../../../../../../r-bridge/lang-4.x/ast/m import { VertexType } from '../../../../../graph/vertex' import { define } from '../../../../../environments/define' import { EdgeType } from '../../../../../graph/edge' +import { dataflowGraphToMermaidUrl } from '../../../../../../core/print/dataflow-printer'; function toReplacementSymbol(target: RNodeWithParent & Base & Location, prefix: string, superAssignment: boolean): RSymbol { return { @@ -41,12 +42,12 @@ function getEffectiveOrder(config: { } export interface AssignmentConfiguration { - readonly superAssignment?: boolean - readonly swapSourceAndTarget?: boolean + readonly superAssignment?: boolean + readonly swapSourceAndTarget?: boolean /* Make maybe if assigned to symbol */ - readonly makeMaybe?: boolean - readonly quoteSource?: boolean - readonly canBeReplacement?: boolean + readonly makeMaybe?: boolean + readonly quoteSource?: boolean + readonly canBeReplacement?: boolean } /** @@ -149,6 +150,7 @@ function processAssignmentToString( data, reverseOrder: !config.swapSourceAndTarget }) + return processAssignmentToSymbol({ ...config, name, @@ -200,7 +202,7 @@ function processAssignmentToSymbol({ // we drop the first arg which we use to pass along arguments :D const readFromSourceWritten = sourceArg.out.slice(1) - const readTargets: readonly IdentifierReference[] = [{ nodeId: name.info.id, name: name.content, controlDependencies: data.controlDependencies }, ...sourceArg.unknownReferences, ...sourceArg.in, ...targetArg.in.filter(i => i.nodeId !== target.info.id), ...readFromSourceWritten] + const readTargets: readonly IdentifierReference[] = [{ nodeId: rootId, name: name.content, controlDependencies: data.controlDependencies }, ...sourceArg.unknownReferences, ...sourceArg.in, ...targetArg.in.filter(i => i.nodeId !== target.info.id), ...readFromSourceWritten] const writeTargets = [...writeNodes, ...writeNodes, ...readFromSourceWritten] information.environment = overwriteEnvironment(targetArg.environment, sourceArg.environment) @@ -223,7 +225,7 @@ function processAssignmentToSymbol({ } } - information.graph.addEdge(name.info.id, targetArg.entryPoint, { type: EdgeType.Returns }) + information.graph.addEdge(rootId, targetArg.entryPoint, { type: EdgeType.Returns }) if(quoteSource) { information.graph.addEdge(rootId, source.info.id, { type: EdgeType.NonStandardEvaluation }) diff --git a/test/functionality/_helper/shell.ts b/test/functionality/_helper/shell.ts index 668727c9d9..10fe3281c1 100644 --- a/test/functionality/_helper/shell.ts +++ b/test/functionality/_helper/shell.ts @@ -242,9 +242,10 @@ export function assertDataflow( { label: 'got', graph: info.dataflow.graph, mark: mapProblematicNodesToIds(report.problematic()) }, `%% ${input.replace(/\n/g, '\n%% ')}\n` + report.comments()?.map(n => `%% ${n}\n`).join('') ?? '' + '\n' ) - console.error('best-effort reconstruction:\n', printAsBuilder(info.dataflow.graph)) - console.log(normalizedAstToMermaidUrl(info.normalize.ast)) + console.error('ast', normalizedAstToMermaidUrl(info.normalize.ast)) + + console.error('best-effort reconstruction:\n', printAsBuilder(info.dataflow.graph)) console.error('diff:\n', diff) throw e From 301200973cba651c46a99670002fb3e3e9e8b451 Mon Sep 17 00:00:00 2001 From: EagleoutIce Date: Tue, 21 May 2024 12:44:21 +0200 Subject: [PATCH 4/6] feat-fix: sanity check for environments --- src/dataflow/environments/diff.ts | 4 +++- src/dataflow/graph/graph.ts | 5 ++++- test/functionality/util/quads-tests.ts | 21 +++++++++------------ 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/dataflow/environments/diff.ts b/src/dataflow/environments/diff.ts index 6c1a0999da..eb4d468ae5 100644 --- a/src/dataflow/environments/diff.ts +++ b/src/dataflow/environments/diff.ts @@ -80,7 +80,9 @@ export function diffEnvironment(a: IEn export function diffEnvironmentInformation(a: REnvironmentInformation | undefined, b: REnvironmentInformation | undefined, info: GenericDifferenceInformation): void { if(a === undefined || b === undefined) { - info.report.addComment(`${info.position}Different environments: ${JSON.stringify(a, jsonReplacer)} vs. ${JSON.stringify(b, jsonReplacer)}`) + if(a !== b) { + info.report.addComment(`${info.position}Different environments: ${JSON.stringify(a, jsonReplacer)} vs. ${JSON.stringify(b, jsonReplacer)}`) + } return } diffEnvironment(a.current, b.current, info) diff --git a/src/dataflow/graph/graph.ts b/src/dataflow/graph/graph.ts index 55af864360..fa66f9baf6 100644 --- a/src/dataflow/graph/graph.ts +++ b/src/dataflow/graph/graph.ts @@ -207,9 +207,12 @@ export class DataflowGraph { <${idPrefix}1> <${domain}tag> "use" <${context}> . <${idPrefix}1> <${domain}id> "1"^^ <${context}> . <${idPrefix}1> <${domain}when> "always" <${context}> . -<${idPrefix}1> <${domain}environment> <${idPrefix}3> <${context}> . -<${idPrefix}3> <${domain}current> <${idPrefix}4> <${context}> . -<${idPrefix}3> <${domain}level> "0"^^ <${context}> . <${idPrefix}0> <${domain}vertices> <${idPrefix}2> <${context}> . <${idPrefix}2> <${domain}tag> "function-call" <${context}> . <${idPrefix}2> <${domain}id> "3"^^ <${context}> . -<${idPrefix}2> <${domain}environment> <${idPrefix}5> <${context}> . -<${idPrefix}5> <${domain}current> <${idPrefix}6> <${context}> . -<${idPrefix}5> <${domain}level> "0"^^ <${context}> . +<${idPrefix}2> <${domain}environment> <${idPrefix}3> <${context}> . +<${idPrefix}3> <${domain}current> <${idPrefix}4> <${context}> . +<${idPrefix}3> <${domain}level> "0"^^ <${context}> . <${idPrefix}2> <${domain}name> "foo" <${context}> . <${idPrefix}2> <${domain}onlyBuiltin> "false"^^ <${context}> . -<${idPrefix}2> <${domain}args> <${idPrefix}7> <${context}> . -<${idPrefix}7> <${domain}nodeId> "1"^^ <${context}> . +<${idPrefix}2> <${domain}args> <${idPrefix}5> <${context}> . +<${idPrefix}5> <${domain}nodeId> "1"^^ <${context}> . <${idPrefix}2> <${domain}when> "always" <${context}> . -<${idPrefix}0> <${domain}edges> <${idPrefix}8> <${context}> . -<${idPrefix}8> <${domain}from> "3"^^ <${context}> . -<${idPrefix}8> <${domain}to> "1"^^ <${context}> . -<${idPrefix}8> <${domain}type> "argument" <${context}> . +<${idPrefix}0> <${domain}edges> <${idPrefix}6> <${context}> . +<${idPrefix}6> <${domain}from> "3"^^ <${context}> . +<${idPrefix}6> <${domain}to> "1"^^ <${context}> . +<${idPrefix}6> <${domain}type> "argument" <${context}> . `) }) })) From 5539370c0833f69bf5b406724311a093905565fc Mon Sep 17 00:00:00 2001 From: EagleoutIce Date: Tue, 21 May 2024 12:49:55 +0200 Subject: [PATCH 5/6] lint-fix: handle linter errors --- src/dataflow/graph/graph.ts | 2 +- .../functions/call/built-in/built-in-assignment.ts | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/dataflow/graph/graph.ts b/src/dataflow/graph/graph.ts index fa66f9baf6..57dc3426d0 100644 --- a/src/dataflow/graph/graph.ts +++ b/src/dataflow/graph/graph.ts @@ -76,7 +76,7 @@ type EdgeData = Omit { private static DEFAULT_ENVIRONMENT: REnvironmentInformation | undefined = undefined - private _idMap: AstIdMap | undefined + private _idMap: AstIdMap | undefined constructor(idMap: AstIdMap | undefined) { DataflowGraph.DEFAULT_ENVIRONMENT ??= initializeCleanEnvironments() diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts index 8c8bd79e4d..5ca77a0747 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts @@ -21,7 +21,6 @@ import type { RUnnamedArgument } from '../../../../../../r-bridge/lang-4.x/ast/m import { VertexType } from '../../../../../graph/vertex' import { define } from '../../../../../environments/define' import { EdgeType } from '../../../../../graph/edge' -import { dataflowGraphToMermaidUrl } from '../../../../../../core/print/dataflow-printer'; function toReplacementSymbol(target: RNodeWithParent & Base & Location, prefix: string, superAssignment: boolean): RSymbol { return { @@ -42,12 +41,12 @@ function getEffectiveOrder(config: { } export interface AssignmentConfiguration { - readonly superAssignment?: boolean - readonly swapSourceAndTarget?: boolean + readonly superAssignment?: boolean + readonly swapSourceAndTarget?: boolean /* Make maybe if assigned to symbol */ - readonly makeMaybe?: boolean - readonly quoteSource?: boolean - readonly canBeReplacement?: boolean + readonly makeMaybe?: boolean + readonly quoteSource?: boolean + readonly canBeReplacement?: boolean } /** @@ -150,7 +149,7 @@ function processAssignmentToString( data, reverseOrder: !config.swapSourceAndTarget }) - + return processAssignmentToSymbol({ ...config, name, From bca2dda5f6b2869ccbaa4b92e170bd5ad3a5f2e1 Mon Sep 17 00:00:00 2001 From: EagleoutIce Date: Tue, 21 May 2024 13:05:08 +0200 Subject: [PATCH 6/6] refactor: reset the `example.R` --- test/testfiles/example.R | 1 - 1 file changed, 1 deletion(-) diff --git a/test/testfiles/example.R b/test/testfiles/example.R index 6e537c3362..cd86f3fe93 100644 --- a/test/testfiles/example.R +++ b/test/testfiles/example.R @@ -1,6 +1,5 @@ sum <- 0 product <- 1 -product <- 5 w <- 7 N <- 10