From b11decd1c887eddbc4bb5fde0e692eeed7e7b9e0 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Mon, 22 Jan 2024 11:23:41 +0100 Subject: [PATCH 01/46] wip: added testfile & started on environment (??) --- src/dataflow/environments/environment.ts | 8 ++++++++ test/testfiles/source.R | 4 ++++ 2 files changed, 12 insertions(+) create mode 100644 test/testfiles/source.R diff --git a/src/dataflow/environments/environment.ts b/src/dataflow/environments/environment.ts index c33aeb6863..1141fb754e 100644 --- a/src/dataflow/environments/environment.ts +++ b/src/dataflow/environments/environment.ts @@ -145,6 +145,14 @@ export const DefaultEnvironmentMemory = new Map Date: Mon, 22 Jan 2024 11:39:46 +0100 Subject: [PATCH 02/46] wip: some todos --- src/dataflow/environments/environment.ts | 1 + src/dataflow/internal/process/functions/function-call.ts | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/dataflow/environments/environment.ts b/src/dataflow/environments/environment.ts index 1141fb754e..d0d27c5aa8 100644 --- a/src/dataflow/environments/environment.ts +++ b/src/dataflow/environments/environment.ts @@ -146,6 +146,7 @@ export const DefaultEnvironmentMemory = new Map(functionCall: RFunctionCall(functionCall: RFunctionCall Date: Mon, 22 Jan 2024 14:49:30 +0100 Subject: [PATCH 03/46] wip: start of source argument resolve --- .../process/functions/function-call.ts | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/src/dataflow/internal/process/functions/function-call.ts b/src/dataflow/internal/process/functions/function-call.ts index 9bed918ab6..6cd5a816c2 100644 --- a/src/dataflow/internal/process/functions/function-call.ts +++ b/src/dataflow/internal/process/functions/function-call.ts @@ -1,11 +1,12 @@ import { DataflowInformation } from '../../info' import { DataflowProcessorInformation, processDataflowFor } from '../../../processor' import { define, overwriteEnvironments, resolveByName } from '../../../environments' -import { ParentInformation, RFunctionCall, RType } from '../../../../r-bridge' +import {ParentInformation, RFunctionCall, RParseRequest, RShell, RType} from '../../../../r-bridge' import { guard } from '../../../../util/assert' import { DataflowGraph, dataflowLogger, EdgeType, FunctionArgument } from '../../../index' import { linkArgumentsOnCall } from '../../linker' import { LocalScope } from '../../../environments/scopes' +import {SteppingSlicer} from '../../../../core' export const UnnamedFunctionCallPrefix = 'unnamed-function-call-' @@ -103,10 +104,26 @@ export function processFunctionCall(functionCall: RFunctionCall Date: Mon, 22 Jan 2024 11:23:41 +0100 Subject: [PATCH 04/46] wip: added testfile & started on environment (??) --- src/dataflow/environments/environment.ts | 8 ++++++++ test/testfiles/source.R | 4 ++++ 2 files changed, 12 insertions(+) create mode 100644 test/testfiles/source.R diff --git a/src/dataflow/environments/environment.ts b/src/dataflow/environments/environment.ts index c33aeb6863..1141fb754e 100644 --- a/src/dataflow/environments/environment.ts +++ b/src/dataflow/environments/environment.ts @@ -145,6 +145,14 @@ export const DefaultEnvironmentMemory = new Map Date: Mon, 22 Jan 2024 11:39:46 +0100 Subject: [PATCH 05/46] wip: some todos --- src/dataflow/environments/environment.ts | 1 + src/dataflow/internal/process/functions/function-call.ts | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/dataflow/environments/environment.ts b/src/dataflow/environments/environment.ts index 1141fb754e..d0d27c5aa8 100644 --- a/src/dataflow/environments/environment.ts +++ b/src/dataflow/environments/environment.ts @@ -146,6 +146,7 @@ export const DefaultEnvironmentMemory = new Map(functionCall: RFunctionCall(functionCall: RFunctionCall Date: Mon, 22 Jan 2024 14:49:30 +0100 Subject: [PATCH 06/46] wip: start of source argument resolve --- .../process/functions/function-call.ts | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/src/dataflow/internal/process/functions/function-call.ts b/src/dataflow/internal/process/functions/function-call.ts index 9bed918ab6..6cd5a816c2 100644 --- a/src/dataflow/internal/process/functions/function-call.ts +++ b/src/dataflow/internal/process/functions/function-call.ts @@ -1,11 +1,12 @@ import { DataflowInformation } from '../../info' import { DataflowProcessorInformation, processDataflowFor } from '../../../processor' import { define, overwriteEnvironments, resolveByName } from '../../../environments' -import { ParentInformation, RFunctionCall, RType } from '../../../../r-bridge' +import {ParentInformation, RFunctionCall, RParseRequest, RShell, RType} from '../../../../r-bridge' import { guard } from '../../../../util/assert' import { DataflowGraph, dataflowLogger, EdgeType, FunctionArgument } from '../../../index' import { linkArgumentsOnCall } from '../../linker' import { LocalScope } from '../../../environments/scopes' +import {SteppingSlicer} from '../../../../core' export const UnnamedFunctionCallPrefix = 'unnamed-function-call-' @@ -103,10 +104,26 @@ export function processFunctionCall(functionCall: RFunctionCall Date: Thu, 25 Jan 2024 13:38:11 +0100 Subject: [PATCH 07/46] refactor: start using sync r shell implementation --- src/cli/repl/commands/parse.ts | 2 +- src/core/print/parse-printer.ts | 4 +- src/core/slicer.ts | 2 +- .../ast/parser/xml/internal/xml-to-json.ts | 10 +++-- .../lang-4.x/ast/parser/xml/parser.ts | 4 +- src/r-bridge/retriever.ts | 44 +++++++++++++------ .../r-bridge/lang/ast/parse-values.ts | 2 +- 7 files changed, 44 insertions(+), 24 deletions(-) diff --git a/src/cli/repl/commands/parse.ts b/src/cli/repl/commands/parse.ts index 685c6f7ba8..cf8a328bb4 100644 --- a/src/cli/repl/commands/parse.ts +++ b/src/cli/repl/commands/parse.ts @@ -132,7 +132,7 @@ export const parseCommand: ReplCommand = { }).allRemainingSteps() const config = deepMergeObject(DEFAULT_XML_PARSER_CONFIG, { tokenMap: await shell.tokenMap() }) - const object = await xlm2jsonObject(config, result.parse) + const object = xlm2jsonObject(config, result.parse) output.stdout(depthListToTextTree(toDepthMap(object, config), config, output.formatter)) } diff --git a/src/core/print/parse-printer.ts b/src/core/print/parse-printer.ts index 9f7f5fb624..940f7d4d9d 100644 --- a/src/core/print/parse-printer.ts +++ b/src/core/print/parse-printer.ts @@ -23,8 +23,8 @@ function filterObject(obj: XmlBasedJson, keys: Set): XmlBasedJson[] | Xm } -export async function parseToQuads(code: string, config: QuadSerializationConfiguration, parseConfig: XmlParserConfig): Promise { - const obj = await xlm2jsonObject(parseConfig, code) +export function parseToQuads(code: string, config: QuadSerializationConfiguration, parseConfig: XmlParserConfig): string{ + const obj = xlm2jsonObject(parseConfig, code) // recursively filter so that if the object contains one of the keys 'a', 'b' or 'c', all other keys are ignored return serialize2quads( filterObject(obj, new Set([parseConfig.attributeName, parseConfig.childrenName, parseConfig.contentName])) as XmlBasedJson, diff --git a/src/core/slicer.ts b/src/core/slicer.ts index 375a6b5ed6..abfb041e16 100644 --- a/src/core/slicer.ts +++ b/src/core/slicer.ts @@ -202,7 +202,7 @@ export class SteppingSlicer { - return xml2js.parseStringPromise(xmlString, { +export function xlm2jsonObject(config: XmlParserConfig, xmlString: string): XmlBasedJson { + let result: XmlBasedJson = {} + xml2js.parseString(xmlString, { + // we want this to be strictly synchronous! + async: false, attrkey: config.attributeName, charkey: config.contentName, childkey: config.childrenName, @@ -22,5 +25,6 @@ export function xlm2jsonObject(config: XmlParserConfig, xmlString: string): Prom includeWhiteChars: true, normalize: false, strict: true - }) as Promise + }, (_, r)=> result = r as XmlBasedJson) + return result } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/parser.ts b/src/r-bridge/lang-4.x/ast/parser/xml/parser.ts index 8108d85011..44e9168fd8 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/parser.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/parser.ts @@ -27,12 +27,12 @@ export const parseLog = log.getSubLogger({ name: 'ast-parser' }) * * @returns The normalized and decorated AST (i.e., as a doubly linked tree) */ -export async function normalize(xmlString: string, tokenMap: TokenMap, hooks?: DeepPartial, getId: IdGenerator = deterministicCountingIdGenerator(0)): Promise { +export function normalize(xmlString: string, tokenMap: TokenMap, hooks?: DeepPartial, getId: IdGenerator = deterministicCountingIdGenerator(0)): NormalizedAst { const config = { ...DEFAULT_XML_PARSER_CONFIG, tokenMap } const hooksWithDefaults = deepMergeObject(DEFAULT_PARSER_HOOKS, hooks) as XmlParserHooks const data: ParserData = { config, hooks: hooksWithDefaults, currentRange: undefined, currentLexeme: undefined } - const object = await xlm2jsonObject(config, xmlString) + const object = xlm2jsonObject(config, xmlString) return decorateAst(parseRootObjToAst(data, object), getId) } diff --git a/src/r-bridge/retriever.ts b/src/r-bridge/retriever.ts index d6bdd30ab7..346c816d20 100644 --- a/src/r-bridge/retriever.ts +++ b/src/r-bridge/retriever.ts @@ -3,6 +3,7 @@ import { ts2r, XmlParserHooks, normalize, NormalizedAst } from './lang-4.x' import { startAndEndsWith } from '../util/strings' import { DeepPartial, DeepReadonly } from 'ts-essentials' import { guard } from '../util/assert' +import {RShellExecutor} from './shell-executor' export interface RParseRequestFromFile { request: 'file'; @@ -53,22 +54,37 @@ const ErrorMarker = 'err' * Throws if the file could not be parsed. * If successful, allows to further query the last result with {@link retrieveNumberOfRTokensOfLastParse}. */ -export async function retrieveXmlFromRCode(request: RParseRequest, shell: RShell): Promise { - if(request.ensurePackageInstalled) { - await shell.ensurePackageInstalled('xmlparsedata', true) - } - +export function retrieveXmlFromRCode(request: RParseRequest, shell: (RShell | RShellExecutor)): (Promise | string) { const suffix = request.request === 'file' ? ', encoding="utf-8"' : '' - - shell.sendCommands(`flowr_output <- flowr_parsed <- "${ErrorMarker}"`, + const setupCommands = [ + `flowr_output <- flowr_parsed <- "${ErrorMarker}"`, // now, try to retrieve the ast `try(flowr_parsed<-parse(${request.request}=${JSON.stringify(request.content)},keep.source=TRUE${suffix}),silent=FALSE)`, - 'try(flowr_output<-xmlparsedata::xml_parse_data(flowr_parsed,includeText=TRUE,pretty=FALSE),silent=FALSE)' - ) - const xml = await shell.sendCommandWithOutput(`cat(flowr_output,${ts2r(shell.options.eol)})`) - const output = xml.join(shell.options.eol) - guard(output !== ErrorMarker, () => `unable to parse R code (see the log for more information) for request ${JSON.stringify(request)}}`) - return output + 'try(flowr_output<-xmlparsedata::xml_parse_data(flowr_parsed,includeText=TRUE,pretty=FALSE),silent=FALSE)', + ] + const outputCommand = `cat(flowr_output,${ts2r(shell.options.eol)})` + + if(shell instanceof RShellExecutor){ + if(request.ensurePackageInstalled) + shell.ensurePackageInstalled('xmlparsedata',true) + + shell.run(setupCommands) + const output = shell.run(outputCommand) + guard(output !== ErrorMarker, () => `unable to parse R code (see the log for more information) for request ${JSON.stringify(request)}}`) + return output + } else { + const run = async() => { + if(request.ensurePackageInstalled) + await shell.ensurePackageInstalled('xmlparsedata', true) + + shell.sendCommands(...setupCommands) + const output = (await shell.sendCommandWithOutput(outputCommand)).join(shell.options.eol) + guard(output !== ErrorMarker, () => `unable to parse R code (see the log for more information) for request ${JSON.stringify(request)}}`) + return output + } + return run() + } + } /** @@ -77,7 +93,7 @@ export async function retrieveXmlFromRCode(request: RParseRequest, shell: RShell */ export async function retrieveNormalizedAstFromRCode(request: RParseRequest, shell: RShell, hooks?: DeepPartial): Promise { const xml = await retrieveXmlFromRCode(request, shell) - return await normalize(xml, await shell.tokenMap(), hooks) + return normalize(xml, await shell.tokenMap(), hooks) } /** diff --git a/test/functionality/r-bridge/lang/ast/parse-values.ts b/test/functionality/r-bridge/lang/ast/parse-values.ts index 0ad6f4a951..52ec27fc43 100644 --- a/test/functionality/r-bridge/lang/ast/parse-values.ts +++ b/test/functionality/r-bridge/lang/ast/parse-values.ts @@ -29,7 +29,7 @@ describe('Constant Parsing', request: 'text', content: '{', ensurePackageInstalled: true - }, shell)) + }, shell) as Promise) ) describe('numbers', () => { for(const number of RNumberPool) { From 97761a71481b7562d65c578d3e5078e88a128f59 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Mon, 22 Jan 2024 11:39:46 +0100 Subject: [PATCH 08/46] wip: rebase --- src/dataflow/internal/process/functions/function-call.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dataflow/internal/process/functions/function-call.ts b/src/dataflow/internal/process/functions/function-call.ts index 6cd5a816c2..dacb52501e 100644 --- a/src/dataflow/internal/process/functions/function-call.ts +++ b/src/dataflow/internal/process/functions/function-call.ts @@ -38,6 +38,7 @@ export function processFunctionCall(functionCall: RFunctionCall Date: Thu, 25 Jan 2024 13:38:11 +0100 Subject: [PATCH 09/46] refactor: start using sync r shell implementation --- src/cli/repl/commands/parse.ts | 2 +- src/core/print/parse-printer.ts | 4 +- src/core/slicer.ts | 2 +- .../ast/parser/xml/internal/xml-to-json.ts | 10 +++-- .../lang-4.x/ast/parser/xml/parser.ts | 4 +- src/r-bridge/retriever.ts | 44 +++++++++++++------ .../r-bridge/lang/ast/parse-values.ts | 2 +- 7 files changed, 44 insertions(+), 24 deletions(-) diff --git a/src/cli/repl/commands/parse.ts b/src/cli/repl/commands/parse.ts index 685c6f7ba8..cf8a328bb4 100644 --- a/src/cli/repl/commands/parse.ts +++ b/src/cli/repl/commands/parse.ts @@ -132,7 +132,7 @@ export const parseCommand: ReplCommand = { }).allRemainingSteps() const config = deepMergeObject(DEFAULT_XML_PARSER_CONFIG, { tokenMap: await shell.tokenMap() }) - const object = await xlm2jsonObject(config, result.parse) + const object = xlm2jsonObject(config, result.parse) output.stdout(depthListToTextTree(toDepthMap(object, config), config, output.formatter)) } diff --git a/src/core/print/parse-printer.ts b/src/core/print/parse-printer.ts index 9f7f5fb624..940f7d4d9d 100644 --- a/src/core/print/parse-printer.ts +++ b/src/core/print/parse-printer.ts @@ -23,8 +23,8 @@ function filterObject(obj: XmlBasedJson, keys: Set): XmlBasedJson[] | Xm } -export async function parseToQuads(code: string, config: QuadSerializationConfiguration, parseConfig: XmlParserConfig): Promise { - const obj = await xlm2jsonObject(parseConfig, code) +export function parseToQuads(code: string, config: QuadSerializationConfiguration, parseConfig: XmlParserConfig): string{ + const obj = xlm2jsonObject(parseConfig, code) // recursively filter so that if the object contains one of the keys 'a', 'b' or 'c', all other keys are ignored return serialize2quads( filterObject(obj, new Set([parseConfig.attributeName, parseConfig.childrenName, parseConfig.contentName])) as XmlBasedJson, diff --git a/src/core/slicer.ts b/src/core/slicer.ts index 375a6b5ed6..abfb041e16 100644 --- a/src/core/slicer.ts +++ b/src/core/slicer.ts @@ -202,7 +202,7 @@ export class SteppingSlicer { - return xml2js.parseStringPromise(xmlString, { +export function xlm2jsonObject(config: XmlParserConfig, xmlString: string): XmlBasedJson { + let result: XmlBasedJson = {} + xml2js.parseString(xmlString, { + // we want this to be strictly synchronous! + async: false, attrkey: config.attributeName, charkey: config.contentName, childkey: config.childrenName, @@ -22,5 +25,6 @@ export function xlm2jsonObject(config: XmlParserConfig, xmlString: string): Prom includeWhiteChars: true, normalize: false, strict: true - }) as Promise + }, (_, r)=> result = r as XmlBasedJson) + return result } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/parser.ts b/src/r-bridge/lang-4.x/ast/parser/xml/parser.ts index 8108d85011..44e9168fd8 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/parser.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/parser.ts @@ -27,12 +27,12 @@ export const parseLog = log.getSubLogger({ name: 'ast-parser' }) * * @returns The normalized and decorated AST (i.e., as a doubly linked tree) */ -export async function normalize(xmlString: string, tokenMap: TokenMap, hooks?: DeepPartial, getId: IdGenerator = deterministicCountingIdGenerator(0)): Promise { +export function normalize(xmlString: string, tokenMap: TokenMap, hooks?: DeepPartial, getId: IdGenerator = deterministicCountingIdGenerator(0)): NormalizedAst { const config = { ...DEFAULT_XML_PARSER_CONFIG, tokenMap } const hooksWithDefaults = deepMergeObject(DEFAULT_PARSER_HOOKS, hooks) as XmlParserHooks const data: ParserData = { config, hooks: hooksWithDefaults, currentRange: undefined, currentLexeme: undefined } - const object = await xlm2jsonObject(config, xmlString) + const object = xlm2jsonObject(config, xmlString) return decorateAst(parseRootObjToAst(data, object), getId) } diff --git a/src/r-bridge/retriever.ts b/src/r-bridge/retriever.ts index d6bdd30ab7..346c816d20 100644 --- a/src/r-bridge/retriever.ts +++ b/src/r-bridge/retriever.ts @@ -3,6 +3,7 @@ import { ts2r, XmlParserHooks, normalize, NormalizedAst } from './lang-4.x' import { startAndEndsWith } from '../util/strings' import { DeepPartial, DeepReadonly } from 'ts-essentials' import { guard } from '../util/assert' +import {RShellExecutor} from './shell-executor' export interface RParseRequestFromFile { request: 'file'; @@ -53,22 +54,37 @@ const ErrorMarker = 'err' * Throws if the file could not be parsed. * If successful, allows to further query the last result with {@link retrieveNumberOfRTokensOfLastParse}. */ -export async function retrieveXmlFromRCode(request: RParseRequest, shell: RShell): Promise { - if(request.ensurePackageInstalled) { - await shell.ensurePackageInstalled('xmlparsedata', true) - } - +export function retrieveXmlFromRCode(request: RParseRequest, shell: (RShell | RShellExecutor)): (Promise | string) { const suffix = request.request === 'file' ? ', encoding="utf-8"' : '' - - shell.sendCommands(`flowr_output <- flowr_parsed <- "${ErrorMarker}"`, + const setupCommands = [ + `flowr_output <- flowr_parsed <- "${ErrorMarker}"`, // now, try to retrieve the ast `try(flowr_parsed<-parse(${request.request}=${JSON.stringify(request.content)},keep.source=TRUE${suffix}),silent=FALSE)`, - 'try(flowr_output<-xmlparsedata::xml_parse_data(flowr_parsed,includeText=TRUE,pretty=FALSE),silent=FALSE)' - ) - const xml = await shell.sendCommandWithOutput(`cat(flowr_output,${ts2r(shell.options.eol)})`) - const output = xml.join(shell.options.eol) - guard(output !== ErrorMarker, () => `unable to parse R code (see the log for more information) for request ${JSON.stringify(request)}}`) - return output + 'try(flowr_output<-xmlparsedata::xml_parse_data(flowr_parsed,includeText=TRUE,pretty=FALSE),silent=FALSE)', + ] + const outputCommand = `cat(flowr_output,${ts2r(shell.options.eol)})` + + if(shell instanceof RShellExecutor){ + if(request.ensurePackageInstalled) + shell.ensurePackageInstalled('xmlparsedata',true) + + shell.run(setupCommands) + const output = shell.run(outputCommand) + guard(output !== ErrorMarker, () => `unable to parse R code (see the log for more information) for request ${JSON.stringify(request)}}`) + return output + } else { + const run = async() => { + if(request.ensurePackageInstalled) + await shell.ensurePackageInstalled('xmlparsedata', true) + + shell.sendCommands(...setupCommands) + const output = (await shell.sendCommandWithOutput(outputCommand)).join(shell.options.eol) + guard(output !== ErrorMarker, () => `unable to parse R code (see the log for more information) for request ${JSON.stringify(request)}}`) + return output + } + return run() + } + } /** @@ -77,7 +93,7 @@ export async function retrieveXmlFromRCode(request: RParseRequest, shell: RShell */ export async function retrieveNormalizedAstFromRCode(request: RParseRequest, shell: RShell, hooks?: DeepPartial): Promise { const xml = await retrieveXmlFromRCode(request, shell) - return await normalize(xml, await shell.tokenMap(), hooks) + return normalize(xml, await shell.tokenMap(), hooks) } /** diff --git a/test/functionality/r-bridge/lang/ast/parse-values.ts b/test/functionality/r-bridge/lang/ast/parse-values.ts index 0ad6f4a951..52ec27fc43 100644 --- a/test/functionality/r-bridge/lang/ast/parse-values.ts +++ b/test/functionality/r-bridge/lang/ast/parse-values.ts @@ -29,7 +29,7 @@ describe('Constant Parsing', request: 'text', content: '{', ensurePackageInstalled: true - }, shell)) + }, shell) as Promise) ) describe('numbers', () => { for(const number of RNumberPool) { From c1d13460e252b6b65d8c16b7dd2504c6828a8bcb Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Thu, 25 Jan 2024 14:32:09 +0100 Subject: [PATCH 10/46] wip: use executeSingleSubStep for parsing sourced code --- .../process/functions/function-call.ts | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/dataflow/internal/process/functions/function-call.ts b/src/dataflow/internal/process/functions/function-call.ts index dacb52501e..81fdb299f0 100644 --- a/src/dataflow/internal/process/functions/function-call.ts +++ b/src/dataflow/internal/process/functions/function-call.ts @@ -1,12 +1,13 @@ import { DataflowInformation } from '../../info' import { DataflowProcessorInformation, processDataflowFor } from '../../../processor' import { define, overwriteEnvironments, resolveByName } from '../../../environments' -import {ParentInformation, RFunctionCall, RParseRequest, RShell, RType} from '../../../../r-bridge' +import {ParentInformation, RFunctionCall, RType} from '../../../../r-bridge' import { guard } from '../../../../util/assert' import { DataflowGraph, dataflowLogger, EdgeType, FunctionArgument } from '../../../index' import { linkArgumentsOnCall } from '../../linker' import { LocalScope } from '../../../environments/scopes' -import {SteppingSlicer} from '../../../../core' +import {RShellExecutor} from '../../../../r-bridge/shell-executor' +import {executeSingleSubStep} from '../../../../core' export const UnnamedFunctionCallPrefix = 'unnamed-function-call-' @@ -105,24 +106,19 @@ export function processFunctionCall(functionCall: RFunctionCall Date: Mon, 22 Jan 2024 11:23:41 +0100 Subject: [PATCH 11/46] wip: added testfile & started on environment (??) --- src/dataflow/environments/environment.ts | 8 ++++++++ test/testfiles/source.R | 4 ++++ 2 files changed, 12 insertions(+) create mode 100644 test/testfiles/source.R diff --git a/src/dataflow/environments/environment.ts b/src/dataflow/environments/environment.ts index ee37ffb3f2..2d9b644885 100644 --- a/src/dataflow/environments/environment.ts +++ b/src/dataflow/environments/environment.ts @@ -147,6 +147,14 @@ export const DefaultEnvironmentMemory = new Map Date: Mon, 22 Jan 2024 11:39:46 +0100 Subject: [PATCH 12/46] wip: some todos --- src/dataflow/environments/environment.ts | 1 + src/dataflow/internal/process/functions/function-call.ts | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/dataflow/environments/environment.ts b/src/dataflow/environments/environment.ts index 2d9b644885..755ce2f73a 100644 --- a/src/dataflow/environments/environment.ts +++ b/src/dataflow/environments/environment.ts @@ -148,6 +148,7 @@ export const DefaultEnvironmentMemory = new Map(functionCall: RFunctionCall(functionCall: RFunctionCall Date: Mon, 22 Jan 2024 14:49:30 +0100 Subject: [PATCH 13/46] wip: start of source argument resolve --- .../process/functions/function-call.ts | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/dataflow/internal/process/functions/function-call.ts b/src/dataflow/internal/process/functions/function-call.ts index adfe0114c3..88b7f64284 100644 --- a/src/dataflow/internal/process/functions/function-call.ts +++ b/src/dataflow/internal/process/functions/function-call.ts @@ -2,13 +2,13 @@ import type { DataflowInformation } from '../../info' import type { DataflowProcessorInformation} from '../../../processor' import { processDataflowFor } from '../../../processor' import { define, overwriteEnvironments, resolveByName } from '../../../environments' -import type { ParentInformation, RFunctionCall} from '../../../../r-bridge' -import { RType } from '../../../../r-bridge' +import {ParentInformation, RFunctionCall, RParseRequest, RShell, RType} from '../../../../r-bridge' import { guard } from '../../../../util/assert' import type { FunctionArgument } from '../../../index' import { DataflowGraph, dataflowLogger, EdgeType } from '../../../index' import { linkArgumentsOnCall } from '../../linker' import { LocalScope } from '../../../environments/scopes' +import {SteppingSlicer} from '../../../../core' export const UnnamedFunctionCallPrefix = 'unnamed-function-call-' @@ -106,10 +106,26 @@ export function processFunctionCall(functionCall: RFunctionCall Date: Mon, 22 Jan 2024 11:39:46 +0100 Subject: [PATCH 14/46] wip: rebase --- src/dataflow/internal/process/functions/function-call.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dataflow/internal/process/functions/function-call.ts b/src/dataflow/internal/process/functions/function-call.ts index 88b7f64284..1902c20991 100644 --- a/src/dataflow/internal/process/functions/function-call.ts +++ b/src/dataflow/internal/process/functions/function-call.ts @@ -40,6 +40,7 @@ export function processFunctionCall(functionCall: RFunctionCall Date: Thu, 25 Jan 2024 13:38:11 +0100 Subject: [PATCH 15/46] refactor: start using sync r shell implementation --- src/cli/repl/commands/parse.ts | 2 +- src/core/print/parse-printer.ts | 4 +- src/core/slicer.ts | 2 +- .../ast/parser/xml/internal/xml-to-json.ts | 10 +++-- .../lang-4.x/ast/parser/xml/parser.ts | 4 +- src/r-bridge/retriever.ts | 44 +++++++++++++------ .../r-bridge/lang/ast/parse-values.ts | 2 +- 7 files changed, 44 insertions(+), 24 deletions(-) diff --git a/src/cli/repl/commands/parse.ts b/src/cli/repl/commands/parse.ts index a851cac445..ec8e57c2cc 100644 --- a/src/cli/repl/commands/parse.ts +++ b/src/cli/repl/commands/parse.ts @@ -135,7 +135,7 @@ export const parseCommand: ReplCommand = { }).allRemainingSteps() const config = deepMergeObject(DEFAULT_XML_PARSER_CONFIG, { tokenMap: await shell.tokenMap() }) - const object = await xlm2jsonObject(config, result.parse) + const object = xlm2jsonObject(config, result.parse) output.stdout(depthListToTextTree(toDepthMap(object, config), config, output.formatter)) } diff --git a/src/core/print/parse-printer.ts b/src/core/print/parse-printer.ts index fcc1698505..95e341d210 100644 --- a/src/core/print/parse-printer.ts +++ b/src/core/print/parse-printer.ts @@ -24,8 +24,8 @@ function filterObject(obj: XmlBasedJson, keys: Set): XmlBasedJson[] | Xm } -export async function parseToQuads(code: string, config: QuadSerializationConfiguration, parseConfig: XmlParserConfig): Promise { - const obj = await xlm2jsonObject(parseConfig, code) +export function parseToQuads(code: string, config: QuadSerializationConfiguration, parseConfig: XmlParserConfig): string{ + const obj = xlm2jsonObject(parseConfig, code) // recursively filter so that if the object contains one of the keys 'a', 'b' or 'c', all other keys are ignored return serialize2quads( filterObject(obj, new Set([parseConfig.attributeName, parseConfig.childrenName, parseConfig.contentName])) as XmlBasedJson, diff --git a/src/core/slicer.ts b/src/core/slicer.ts index e652d34667..e026533993 100644 --- a/src/core/slicer.ts +++ b/src/core/slicer.ts @@ -204,7 +204,7 @@ export class SteppingSlicer { - return xml2js.parseStringPromise(xmlString, { +export function xlm2jsonObject(config: XmlParserConfig, xmlString: string): XmlBasedJson { + let result: XmlBasedJson = {} + xml2js.parseString(xmlString, { + // we want this to be strictly synchronous! + async: false, attrkey: config.attributeName, charkey: config.contentName, childkey: config.childrenName, @@ -22,5 +25,6 @@ export function xlm2jsonObject(config: XmlParserConfig, xmlString: string): Prom includeWhiteChars: true, normalize: false, strict: true - }) as Promise + }, (_, r)=> result = r as XmlBasedJson) + return result } diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/parser.ts b/src/r-bridge/lang-4.x/ast/parser/xml/parser.ts index 1f068b650a..9ae30443dd 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/parser.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/parser.ts @@ -30,12 +30,12 @@ export const parseLog = log.getSubLogger({ name: 'ast-parser' }) * * @returns The normalized and decorated AST (i.e., as a doubly linked tree) */ -export async function normalize(xmlString: string, tokenMap: TokenMap, hooks?: DeepPartial, getId: IdGenerator = deterministicCountingIdGenerator(0)): Promise { +export function normalize(xmlString: string, tokenMap: TokenMap, hooks?: DeepPartial, getId: IdGenerator = deterministicCountingIdGenerator(0)): NormalizedAst { const config = { ...DEFAULT_XML_PARSER_CONFIG, tokenMap } const hooksWithDefaults = deepMergeObject(DEFAULT_PARSER_HOOKS, hooks) as XmlParserHooks const data: ParserData = { config, hooks: hooksWithDefaults, currentRange: undefined, currentLexeme: undefined } - const object = await xlm2jsonObject(config, xmlString) + const object = xlm2jsonObject(config, xmlString) return decorateAst(parseRootObjToAst(data, object), getId) } diff --git a/src/r-bridge/retriever.ts b/src/r-bridge/retriever.ts index 6ae8a939bf..a4870ae497 100644 --- a/src/r-bridge/retriever.ts +++ b/src/r-bridge/retriever.ts @@ -4,6 +4,7 @@ import { ts2r, normalize } from './lang-4.x' import { startAndEndsWith } from '../util/strings' import type { DeepPartial, DeepReadonly } from 'ts-essentials' import { guard } from '../util/assert' +import {RShellExecutor} from './shell-executor' export interface RParseRequestFromFile { request: 'file'; @@ -54,22 +55,37 @@ const ErrorMarker = 'err' * Throws if the file could not be parsed. * If successful, allows to further query the last result with {@link retrieveNumberOfRTokensOfLastParse}. */ -export async function retrieveXmlFromRCode(request: RParseRequest, shell: RShell): Promise { - if(request.ensurePackageInstalled) { - await shell.ensurePackageInstalled('xmlparsedata', true) - } - +export function retrieveXmlFromRCode(request: RParseRequest, shell: (RShell | RShellExecutor)): (Promise | string) { const suffix = request.request === 'file' ? ', encoding="utf-8"' : '' - - shell.sendCommands(`flowr_output <- flowr_parsed <- "${ErrorMarker}"`, + const setupCommands = [ + `flowr_output <- flowr_parsed <- "${ErrorMarker}"`, // now, try to retrieve the ast `try(flowr_parsed<-parse(${request.request}=${JSON.stringify(request.content)},keep.source=TRUE${suffix}),silent=FALSE)`, - 'try(flowr_output<-xmlparsedata::xml_parse_data(flowr_parsed,includeText=TRUE,pretty=FALSE),silent=FALSE)' - ) - const xml = await shell.sendCommandWithOutput(`cat(flowr_output,${ts2r(shell.options.eol)})`) - const output = xml.join(shell.options.eol) - guard(output !== ErrorMarker, () => `unable to parse R code (see the log for more information) for request ${JSON.stringify(request)}}`) - return output + 'try(flowr_output<-xmlparsedata::xml_parse_data(flowr_parsed,includeText=TRUE,pretty=FALSE),silent=FALSE)', + ] + const outputCommand = `cat(flowr_output,${ts2r(shell.options.eol)})` + + if(shell instanceof RShellExecutor){ + if(request.ensurePackageInstalled) + shell.ensurePackageInstalled('xmlparsedata',true) + + shell.run(setupCommands) + const output = shell.run(outputCommand) + guard(output !== ErrorMarker, () => `unable to parse R code (see the log for more information) for request ${JSON.stringify(request)}}`) + return output + } else { + const run = async() => { + if(request.ensurePackageInstalled) + await shell.ensurePackageInstalled('xmlparsedata', true) + + shell.sendCommands(...setupCommands) + const output = (await shell.sendCommandWithOutput(outputCommand)).join(shell.options.eol) + guard(output !== ErrorMarker, () => `unable to parse R code (see the log for more information) for request ${JSON.stringify(request)}}`) + return output + } + return run() + } + } /** @@ -78,7 +94,7 @@ export async function retrieveXmlFromRCode(request: RParseRequest, shell: RShell */ export async function retrieveNormalizedAstFromRCode(request: RParseRequest, shell: RShell, hooks?: DeepPartial): Promise { const xml = await retrieveXmlFromRCode(request, shell) - return await normalize(xml, await shell.tokenMap(), hooks) + return normalize(xml, await shell.tokenMap(), hooks) } /** diff --git a/test/functionality/r-bridge/lang/ast/parse-values.ts b/test/functionality/r-bridge/lang/ast/parse-values.ts index 0ad6f4a951..52ec27fc43 100644 --- a/test/functionality/r-bridge/lang/ast/parse-values.ts +++ b/test/functionality/r-bridge/lang/ast/parse-values.ts @@ -29,7 +29,7 @@ describe('Constant Parsing', request: 'text', content: '{', ensurePackageInstalled: true - }, shell)) + }, shell) as Promise) ) describe('numbers', () => { for(const number of RNumberPool) { From 559d80c5d6f12eeed1fb5b945f58c676f375313f Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Mon, 22 Jan 2024 11:39:46 +0100 Subject: [PATCH 16/46] wip: some todos --- src/dataflow/internal/process/functions/function-call.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dataflow/internal/process/functions/function-call.ts b/src/dataflow/internal/process/functions/function-call.ts index 1902c20991..88b7f64284 100644 --- a/src/dataflow/internal/process/functions/function-call.ts +++ b/src/dataflow/internal/process/functions/function-call.ts @@ -40,7 +40,6 @@ export function processFunctionCall(functionCall: RFunctionCall Date: Thu, 25 Jan 2024 14:32:09 +0100 Subject: [PATCH 17/46] wip: use executeSingleSubStep for parsing sourced code --- .../process/functions/function-call.ts | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/dataflow/internal/process/functions/function-call.ts b/src/dataflow/internal/process/functions/function-call.ts index 88b7f64284..9e3cff50dd 100644 --- a/src/dataflow/internal/process/functions/function-call.ts +++ b/src/dataflow/internal/process/functions/function-call.ts @@ -2,13 +2,14 @@ import type { DataflowInformation } from '../../info' import type { DataflowProcessorInformation} from '../../../processor' import { processDataflowFor } from '../../../processor' import { define, overwriteEnvironments, resolveByName } from '../../../environments' -import {ParentInformation, RFunctionCall, RParseRequest, RShell, RType} from '../../../../r-bridge' +import {ParentInformation, RFunctionCall, RType} from '../../../../r-bridge' import { guard } from '../../../../util/assert' import type { FunctionArgument } from '../../../index' import { DataflowGraph, dataflowLogger, EdgeType } from '../../../index' import { linkArgumentsOnCall } from '../../linker' import { LocalScope } from '../../../environments/scopes' -import {SteppingSlicer} from '../../../../core' +import {RShellExecutor} from '../../../../r-bridge/shell-executor' +import {executeSingleSubStep} from '../../../../core' export const UnnamedFunctionCallPrefix = 'unnamed-function-call-' @@ -106,24 +107,19 @@ export function processFunctionCall(functionCall: RFunctionCall Date: Tue, 30 Jan 2024 13:42:13 +0100 Subject: [PATCH 18/46] wip: fix merge issues --- src/dataflow/internal/process/functions/function-call.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dataflow/internal/process/functions/function-call.ts b/src/dataflow/internal/process/functions/function-call.ts index 9e3cff50dd..65973de54c 100644 --- a/src/dataflow/internal/process/functions/function-call.ts +++ b/src/dataflow/internal/process/functions/function-call.ts @@ -2,7 +2,8 @@ import type { DataflowInformation } from '../../info' import type { DataflowProcessorInformation} from '../../../processor' import { processDataflowFor } from '../../../processor' import { define, overwriteEnvironments, resolveByName } from '../../../environments' -import {ParentInformation, RFunctionCall, RType} from '../../../../r-bridge' +import type {ParentInformation, RFunctionCall} from '../../../../r-bridge' +import { RType} from '../../../../r-bridge' import { guard } from '../../../../util/assert' import type { FunctionArgument } from '../../../index' import { DataflowGraph, dataflowLogger, EdgeType } from '../../../index' From a0f8f0000f061f0e4dba865cb480649ec9afd51a Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Tue, 30 Jan 2024 14:11:12 +0100 Subject: [PATCH 19/46] feat-fix: avoid cyclic dependency when using step executor --- src/core/steps.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/steps.ts b/src/core/steps.ts index d0bc4ca9ef..51369ed47d 100644 --- a/src/core/steps.ts +++ b/src/core/steps.ts @@ -96,7 +96,8 @@ export const STEPS_PER_FILE = { } satisfies IStep, 'dataflow': { description: 'Construct the dataflow graph', - processor: produceDataFlowGraph, + // TODO avoid cyclic dependency when using step executor in function-call (feels like a dirty hack) + processor: a => produceDataFlowGraph(a), required: 'once-per-file', printer: { [StepOutputFormat.Internal]: internalPrinter, From 9fc51f0148178d65253dc90c83a3519a4a42532c Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Tue, 30 Jan 2024 14:56:20 +0100 Subject: [PATCH 20/46] wip: run normalize and dataflow on sourced file --- .../internal/process/functions/function-call.ts | 17 ++++++++++------- src/r-bridge/retriever.ts | 2 +- test/testfiles/source.R | 4 +++- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/dataflow/internal/process/functions/function-call.ts b/src/dataflow/internal/process/functions/function-call.ts index 65973de54c..ebf1bb86fb 100644 --- a/src/dataflow/internal/process/functions/function-call.ts +++ b/src/dataflow/internal/process/functions/function-call.ts @@ -1,11 +1,12 @@ import type { DataflowInformation } from '../../info' -import type { DataflowProcessorInformation} from '../../../processor' +import type {DataflowProcessorInformation} from '../../../processor' import { processDataflowFor } from '../../../processor' import { define, overwriteEnvironments, resolveByName } from '../../../environments' -import type {ParentInformation, RFunctionCall} from '../../../../r-bridge' +import type {NormalizedAst, ParentInformation, RFunctionCall} from '../../../../r-bridge' +import { removeTokenMapQuotationMarks} from '../../../../r-bridge' import { RType} from '../../../../r-bridge' import { guard } from '../../../../util/assert' -import type { FunctionArgument } from '../../../index' +import type {FunctionArgument} from '../../../index' import { DataflowGraph, dataflowLogger, EdgeType } from '../../../index' import { linkArgumentsOnCall } from '../../linker' import { LocalScope } from '../../../environments/scopes' @@ -113,14 +114,16 @@ export function processFunctionCall(functionCall: RFunctionCall + const dataflow = processDataflowFor(normalized.ast, { ...data, environments: finalEnv }) + // TODO integrate this into the current graph somehow + console.log(dataflow) } } diff --git a/src/r-bridge/retriever.ts b/src/r-bridge/retriever.ts index a4870ae497..b00417ef2f 100644 --- a/src/r-bridge/retriever.ts +++ b/src/r-bridge/retriever.ts @@ -69,7 +69,7 @@ export function retrieveXmlFromRCode(request: RParseRequest, shell: (RShell | RS if(request.ensurePackageInstalled) shell.ensurePackageInstalled('xmlparsedata',true) - shell.run(setupCommands) + shell.addPrerequisites(setupCommands) const output = shell.run(outputCommand) guard(output !== ErrorMarker, () => `unable to parse R code (see the log for more information) for request ${JSON.stringify(request)}}`) return output diff --git a/test/testfiles/source.R b/test/testfiles/source.R index 4bd47f194f..f14a7fd123 100644 --- a/test/testfiles/source.R +++ b/test/testfiles/source.R @@ -1,4 +1,6 @@ -source("example.R") +# it seems like R wants this path to be relative to the source file, +# but we might want it relative to the cwd of the shell executor? +source("test/testfiles/example.R") cat("-----\n") cat("Sourced N:", N, "\n") From 32b049e4961f66b7bd9488d1f469baec82cb7dba Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Wed, 31 Jan 2024 13:13:51 +0100 Subject: [PATCH 21/46] wip: some work on source dataflowing --- .../process/functions/function-call.ts | 20 +++++++++++++++---- .../lang-4.x/ast/model/processing/decorate.ts | 5 +++++ test/testfiles/source.R | 8 ++------ test/testfiles/sourced.R | 1 + 4 files changed, 24 insertions(+), 10 deletions(-) create mode 100644 test/testfiles/sourced.R diff --git a/src/dataflow/internal/process/functions/function-call.ts b/src/dataflow/internal/process/functions/function-call.ts index ebf1bb86fb..e3c493e605 100644 --- a/src/dataflow/internal/process/functions/function-call.ts +++ b/src/dataflow/internal/process/functions/function-call.ts @@ -1,12 +1,14 @@ import type { DataflowInformation } from '../../info' import type {DataflowProcessorInformation} from '../../../processor' import { processDataflowFor } from '../../../processor' -import { define, overwriteEnvironments, resolveByName } from '../../../environments' +import {define, overwriteEnvironments, resolveByName} from '../../../environments' import type {NormalizedAst, ParentInformation, RFunctionCall} from '../../../../r-bridge' +import {fileNameDeterministicCountingIdGenerator} from '../../../../r-bridge' import { removeTokenMapQuotationMarks} from '../../../../r-bridge' import { RType} from '../../../../r-bridge' import { guard } from '../../../../util/assert' import type {FunctionArgument} from '../../../index' +import { graphToMermaidUrl} from '../../../index' import { DataflowGraph, dataflowLogger, EdgeType } from '../../../index' import { linkArgumentsOnCall } from '../../linker' import { LocalScope } from '../../../environments/scopes' @@ -115,18 +117,28 @@ export function processFunctionCall(functionCall: RFunctionCall + const normalized = executeSingleSubStep('normalize', parsed, executor.getTokenMap(), undefined, fileNameDeterministicCountingIdGenerator(path)) as NormalizedAst const dataflow = processDataflowFor(normalized.ast, { ...data, environments: finalEnv }) - // TODO integrate this into the current graph somehow - console.log(dataflow) + + // update our graph with the sourced file's information + // TODO just set finalEnv, use overwriteEnvironments or appendEnvironments? makes no difference in the current example + finalEnv = overwriteEnvironments(finalEnv, dataflow.environments) + finalGraph.mergeWith(dataflow.graph) + // TODO is this the way it should be?? just changing the data ast seems fishy + for(const [k,v] of normalized.idMap) + data.completeAst.idMap.set(k,v) } } + console.log(graphToMermaidUrl(finalGraph, data.completeAst.idMap)) + return { unknownReferences: [], in: inIds, diff --git a/src/r-bridge/lang-4.x/ast/model/processing/decorate.ts b/src/r-bridge/lang-4.x/ast/model/processing/decorate.ts index 9e0a9cb4c0..f848b6c532 100644 --- a/src/r-bridge/lang-4.x/ast/model/processing/decorate.ts +++ b/src/r-bridge/lang-4.x/ast/model/processing/decorate.ts @@ -46,6 +46,11 @@ export function deterministicCountingIdGenerator(start = 0): () => NodeId { return () => `${id++}` } +export function fileNameDeterministicCountingIdGenerator(filename: string, start = 0): () => NodeId { + let id = start + return () => `${filename}-${id++}` +} + function loc2Id(loc: SourceRange) { return `${loc.start.line}:${loc.start.column}-${loc.end.line}:${loc.end.column}` } diff --git a/test/testfiles/source.R b/test/testfiles/source.R index f14a7fd123..4ea2246a18 100644 --- a/test/testfiles/source.R +++ b/test/testfiles/source.R @@ -1,6 +1,2 @@ -# it seems like R wants this path to be relative to the source file, -# but we might want it relative to the cwd of the shell executor? -source("test/testfiles/example.R") - -cat("-----\n") -cat("Sourced N:", N, "\n") +source("test/testfiles/sourced.R") +cat(N) \ No newline at end of file diff --git a/test/testfiles/sourced.R b/test/testfiles/sourced.R new file mode 100644 index 0000000000..43b4814b5f --- /dev/null +++ b/test/testfiles/sourced.R @@ -0,0 +1 @@ +N <- 9 \ No newline at end of file From 0392ad6e29fa4ef4eedc4bfccb71a9f66db85850 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Wed, 31 Jan 2024 13:36:58 +0100 Subject: [PATCH 22/46] refactor: remove print --- src/dataflow/internal/process/functions/function-call.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/dataflow/internal/process/functions/function-call.ts b/src/dataflow/internal/process/functions/function-call.ts index e3c493e605..aefacbc7c8 100644 --- a/src/dataflow/internal/process/functions/function-call.ts +++ b/src/dataflow/internal/process/functions/function-call.ts @@ -8,7 +8,6 @@ import { removeTokenMapQuotationMarks} from '../../../../r-bridge' import { RType} from '../../../../r-bridge' import { guard } from '../../../../util/assert' import type {FunctionArgument} from '../../../index' -import { graphToMermaidUrl} from '../../../index' import { DataflowGraph, dataflowLogger, EdgeType } from '../../../index' import { linkArgumentsOnCall } from '../../linker' import { LocalScope } from '../../../environments/scopes' @@ -137,8 +136,6 @@ export function processFunctionCall(functionCall: RFunctionCall Date: Thu, 1 Feb 2024 11:28:20 +0100 Subject: [PATCH 23/46] refactor: clean up todos and move source to its own function --- src/core/steps.ts | 1 - src/dataflow/environments/environment.ts | 1 - .../process/functions/function-call.ts | 41 ++++--------------- .../internal/process/functions/source.ts | 33 +++++++++++++++ .../ast/parser/xml/internal/xml-to-json.ts | 2 +- src/r-bridge/retriever.ts | 16 ++++---- 6 files changed, 51 insertions(+), 43 deletions(-) create mode 100644 src/dataflow/internal/process/functions/source.ts diff --git a/src/core/steps.ts b/src/core/steps.ts index 51369ed47d..edafcca114 100644 --- a/src/core/steps.ts +++ b/src/core/steps.ts @@ -96,7 +96,6 @@ export const STEPS_PER_FILE = { } satisfies IStep, 'dataflow': { description: 'Construct the dataflow graph', - // TODO avoid cyclic dependency when using step executor in function-call (feels like a dirty hack) processor: a => produceDataFlowGraph(a), required: 'once-per-file', printer: { diff --git a/src/dataflow/environments/environment.ts b/src/dataflow/environments/environment.ts index 755ce2f73a..2d9b644885 100644 --- a/src/dataflow/environments/environment.ts +++ b/src/dataflow/environments/environment.ts @@ -148,7 +148,6 @@ export const DefaultEnvironmentMemory = new Map(functionCall: RFunctionCall - const dataflow = processDataflowFor(normalized.ast, { ...data, environments: finalEnv }) - - // update our graph with the sourced file's information - // TODO just set finalEnv, use overwriteEnvironments or appendEnvironments? makes no difference in the current example - finalEnv = overwriteEnvironments(finalEnv, dataflow.environments) - finalGraph.mergeWith(dataflow.graph) - // TODO is this the way it should be?? just changing the data ast seems fishy - for(const [k,v] of normalized.idMap) - data.completeAst.idMap.set(k,v) - } - } - - return { + let info: DataflowInformation = { unknownReferences: [], in: inIds, out: functionName.out, // we do not keep argument out as it has been linked by the function @@ -144,5 +115,11 @@ export function processFunctionCall(functionCall: RFunctionCall(functionCall: RFunctionCall, data: DataflowProcessorInformation, information: DataflowInformation): DataflowInformation { + const sourceFile = functionCall.arguments[0] + if(sourceFile?.value?.type == RType.String) { + const executor = new RShellExecutor() + const path = removeTokenMapQuotationMarks(sourceFile.lexeme) + + // parse, normalize and dataflow the sourced file + const parsed = executeSingleSubStep('parse', { + request: 'file', + content: path, + ensurePackageInstalled: true + }, executor) as string + const normalized = executeSingleSubStep('normalize', parsed, executor.getTokenMap(), undefined, fileNameDeterministicCountingIdGenerator(path)) as NormalizedAst + const dataflow = processDataflowFor(normalized.ast, {...data, environments: information.environments}) + + // update our graph with the sourced file's information + const newInformation = {...information} + newInformation.environments = overwriteEnvironments(information.environments, dataflow.environments) + newInformation.graph.mergeWith(dataflow.graph) + // TODO is this the way it should be?? just changing the data ast seems fishy + for(const [k, v] of normalized.idMap) + data.completeAst.idMap.set(k, v) + return newInformation + } + return information +} \ No newline at end of file diff --git a/src/r-bridge/lang-4.x/ast/parser/xml/internal/xml-to-json.ts b/src/r-bridge/lang-4.x/ast/parser/xml/internal/xml-to-json.ts index f48ff71be9..6a0a910770 100644 --- a/src/r-bridge/lang-4.x/ast/parser/xml/internal/xml-to-json.ts +++ b/src/r-bridge/lang-4.x/ast/parser/xml/internal/xml-to-json.ts @@ -11,7 +11,7 @@ import type { XmlBasedJson } from '../input-format' export function xlm2jsonObject(config: XmlParserConfig, xmlString: string): XmlBasedJson { let result: XmlBasedJson = {} xml2js.parseString(xmlString, { - // we want this to be strictly synchronous! + // we want this to be strictly synchronous so that the result can be returned immediately below! async: false, attrkey: config.attributeName, charkey: config.contentName, diff --git a/src/r-bridge/retriever.ts b/src/r-bridge/retriever.ts index b00417ef2f..7d9a45a287 100644 --- a/src/r-bridge/retriever.ts +++ b/src/r-bridge/retriever.ts @@ -2,7 +2,7 @@ import { type RShell } from './shell' import type { XmlParserHooks, NormalizedAst } from './lang-4.x' import { ts2r, normalize } from './lang-4.x' import { startAndEndsWith } from '../util/strings' -import type { DeepPartial, DeepReadonly } from 'ts-essentials' +import type {AsyncOrSync, DeepPartial, DeepReadonly} from 'ts-essentials' import { guard } from '../util/assert' import {RShellExecutor} from './shell-executor' @@ -55,7 +55,7 @@ const ErrorMarker = 'err' * Throws if the file could not be parsed. * If successful, allows to further query the last result with {@link retrieveNumberOfRTokensOfLastParse}. */ -export function retrieveXmlFromRCode(request: RParseRequest, shell: (RShell | RShellExecutor)): (Promise | string) { +export function retrieveXmlFromRCode(request: RParseRequest, shell: (RShell | RShellExecutor)): AsyncOrSync { const suffix = request.request === 'file' ? ', encoding="utf-8"' : '' const setupCommands = [ `flowr_output <- flowr_parsed <- "${ErrorMarker}"`, @@ -70,22 +70,22 @@ export function retrieveXmlFromRCode(request: RParseRequest, shell: (RShell | RS shell.ensurePackageInstalled('xmlparsedata',true) shell.addPrerequisites(setupCommands) - const output = shell.run(outputCommand) - guard(output !== ErrorMarker, () => `unable to parse R code (see the log for more information) for request ${JSON.stringify(request)}}`) - return output + return guardOutput(shell.run(outputCommand)) } else { const run = async() => { if(request.ensurePackageInstalled) await shell.ensurePackageInstalled('xmlparsedata', true) shell.sendCommands(...setupCommands) - const output = (await shell.sendCommandWithOutput(outputCommand)).join(shell.options.eol) - guard(output !== ErrorMarker, () => `unable to parse R code (see the log for more information) for request ${JSON.stringify(request)}}`) - return output + return guardOutput((await shell.sendCommandWithOutput(outputCommand)).join(shell.options.eol)) } return run() } + function guardOutput(output: string): string { + guard(output !== ErrorMarker, () => `unable to parse R code (see the log for more information) for request ${JSON.stringify(request)}}`) + return output + } } /** From 787fe003a268e52337bc9257cf629d943b8e2219 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Thu, 1 Feb 2024 11:35:25 +0100 Subject: [PATCH 24/46] refactor: explicitly as in processSourceCall --- src/dataflow/internal/process/functions/source.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dataflow/internal/process/functions/source.ts b/src/dataflow/internal/process/functions/source.ts index 155fee7d69..789a8df326 100644 --- a/src/dataflow/internal/process/functions/source.ts +++ b/src/dataflow/internal/process/functions/source.ts @@ -1,3 +1,4 @@ +import type { RArgument} from '../../../../r-bridge' import {fileNameDeterministicCountingIdGenerator, type NormalizedAst, type ParentInformation, removeTokenMapQuotationMarks, type RFunctionCall, RType} from '../../../../r-bridge' import {RShellExecutor} from '../../../../r-bridge/shell-executor' import {executeSingleSubStep} from '../../../../core' @@ -6,7 +7,7 @@ import {overwriteEnvironments} from '../../../environments' import type {DataflowInformation} from '../../info' export function processSourceCall(functionCall: RFunctionCall, data: DataflowProcessorInformation, information: DataflowInformation): DataflowInformation { - const sourceFile = functionCall.arguments[0] + const sourceFile = functionCall.arguments[0] as RArgument if(sourceFile?.value?.type == RType.String) { const executor = new RShellExecutor() const path = removeTokenMapQuotationMarks(sourceFile.lexeme) From 197c41807f10e8e031f179852fa83befe0081bbf Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Thu, 1 Feb 2024 11:41:20 +0100 Subject: [PATCH 25/46] refactor: damn u typescript --- src/dataflow/internal/process/functions/source.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dataflow/internal/process/functions/source.ts b/src/dataflow/internal/process/functions/source.ts index 789a8df326..549cd08774 100644 --- a/src/dataflow/internal/process/functions/source.ts +++ b/src/dataflow/internal/process/functions/source.ts @@ -7,7 +7,7 @@ import {overwriteEnvironments} from '../../../environments' import type {DataflowInformation} from '../../info' export function processSourceCall(functionCall: RFunctionCall, data: DataflowProcessorInformation, information: DataflowInformation): DataflowInformation { - const sourceFile = functionCall.arguments[0] as RArgument + const sourceFile = functionCall.arguments[0] as RArgument | undefined if(sourceFile?.value?.type == RType.String) { const executor = new RShellExecutor() const path = removeTokenMapQuotationMarks(sourceFile.lexeme) From 136a8ebd93dbdd03247b046083e918daa1922cd2 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Thu, 1 Feb 2024 13:28:11 +0100 Subject: [PATCH 26/46] feat-fix: ensure we only parse built-in source calls --- src/dataflow/internal/process/functions/function-call.ts | 4 ++-- src/dataflow/internal/process/functions/source.ts | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/dataflow/internal/process/functions/function-call.ts b/src/dataflow/internal/process/functions/function-call.ts index 59fa82c90d..82bff3634c 100644 --- a/src/dataflow/internal/process/functions/function-call.ts +++ b/src/dataflow/internal/process/functions/function-call.ts @@ -9,7 +9,7 @@ import type {FunctionArgument} from '../../../index' import { DataflowGraph, dataflowLogger, EdgeType } from '../../../index' import { linkArgumentsOnCall } from '../../linker' import { LocalScope } from '../../../environments/scopes' -import {processSourceCall} from './source' +import {isSourceCall, processSourceCall} from './source' export const UnnamedFunctionCallPrefix = 'unnamed-function-call-' @@ -117,7 +117,7 @@ export function processFunctionCall(functionCall: RFunctionCall d.kind == 'built-in-function') +} + export function processSourceCall(functionCall: RFunctionCall, data: DataflowProcessorInformation, information: DataflowInformation): DataflowInformation { const sourceFile = functionCall.arguments[0] as RArgument | undefined if(sourceFile?.value?.type == RType.String) { From f335eee72dd948139425f1a561feb79b19183e4b Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Thu, 1 Feb 2024 13:34:41 +0100 Subject: [PATCH 27/46] refactor: remove todo --- src/dataflow/internal/process/functions/source.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dataflow/internal/process/functions/source.ts b/src/dataflow/internal/process/functions/source.ts index 65109f5f61..92ddc645ba 100644 --- a/src/dataflow/internal/process/functions/source.ts +++ b/src/dataflow/internal/process/functions/source.ts @@ -32,7 +32,7 @@ export function processSourceCall(functionCall: RFunctionCall Date: Thu, 1 Feb 2024 13:54:32 +0100 Subject: [PATCH 28/46] feat: allow overriding the source file provider --- .../internal/process/functions/source.ts | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/dataflow/internal/process/functions/source.ts b/src/dataflow/internal/process/functions/source.ts index 92ddc645ba..43811e5c31 100644 --- a/src/dataflow/internal/process/functions/source.ts +++ b/src/dataflow/internal/process/functions/source.ts @@ -1,4 +1,4 @@ -import type { RArgument} from '../../../../r-bridge' +import type {RArgument, RParseRequest} from '../../../../r-bridge' import {fileNameDeterministicCountingIdGenerator, type NormalizedAst, type ParentInformation, removeTokenMapQuotationMarks, type RFunctionCall, RType} from '../../../../r-bridge' import {RShellExecutor} from '../../../../r-bridge/shell-executor' import {executeSingleSubStep} from '../../../../core' @@ -6,6 +6,14 @@ import {type DataflowProcessorInformation, processDataflowFor} from '../../../pr import {type DataflowScopeName, type Identifier, overwriteEnvironments, type REnvironmentInformation, resolveByName} from '../../../environments' import type {DataflowInformation} from '../../info' +export const sourceFileProvider: (path: string) => RParseRequest = path => { + return { + request: 'file', + content: path, + ensurePackageInstalled: true + } +} + export function isSourceCall(name: Identifier, scope: DataflowScopeName, environments: REnvironmentInformation): boolean { if(name != 'source') return false @@ -20,11 +28,7 @@ export function processSourceCall(functionCall: RFunctionCall const dataflow = processDataflowFor(normalized.ast, {...data, environments: information.environments}) From 019d49c5e8eff8d81ff1712d1097b2652912c7af Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Thu, 1 Feb 2024 14:33:52 +0100 Subject: [PATCH 29/46] test: start on source tests --- .../internal/process/functions/source.ts | 6 ++- .../functions/source-tests.ts | 54 +++++++++++++++++++ test/testfiles/source.R | 2 - test/testfiles/sourced.R | 1 - 4 files changed, 59 insertions(+), 4 deletions(-) create mode 100644 test/functionality/dataflow/processing-of-elements/functions/source-tests.ts delete mode 100644 test/testfiles/source.R delete mode 100644 test/testfiles/sourced.R diff --git a/src/dataflow/internal/process/functions/source.ts b/src/dataflow/internal/process/functions/source.ts index 43811e5c31..45d136cd49 100644 --- a/src/dataflow/internal/process/functions/source.ts +++ b/src/dataflow/internal/process/functions/source.ts @@ -6,7 +6,7 @@ import {type DataflowProcessorInformation, processDataflowFor} from '../../../pr import {type DataflowScopeName, type Identifier, overwriteEnvironments, type REnvironmentInformation, resolveByName} from '../../../environments' import type {DataflowInformation} from '../../info' -export const sourceFileProvider: (path: string) => RParseRequest = path => { +let sourceFileProvider: (path: string) => RParseRequest = path => { return { request: 'file', content: path, @@ -14,6 +14,10 @@ export const sourceFileProvider: (path: string) => RParseRequest = path => { } } +export function setSourceFileProvider(provider: (path: string) => RParseRequest): void { + sourceFileProvider = provider +} + export function isSourceCall(name: Identifier, scope: DataflowScopeName, environments: REnvironmentInformation): boolean { if(name != 'source') return false diff --git a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts new file mode 100644 index 0000000000..0fb6af77ea --- /dev/null +++ b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts @@ -0,0 +1,54 @@ +import {assertDataflow, withShell} from '../../../_helper/shell' +import {setSourceFileProvider} from '../../../../../src/dataflow/internal/process/functions/source' +import {BuiltIn, DataflowGraph, EdgeType, initializeCleanEnvironments} from '../../../../../src' +import {LocalScope} from '../../../../../src/dataflow/environments/scopes' +import {UnnamedArgumentPrefix} from '../../../../../src/dataflow/internal/process/functions/argument' +import {define} from '../../../../../src/dataflow/environments' + +describe('source', withShell(shell => { + const sources = new Map([ + ['simple', 'N <- 9'] + ]) + setSourceFileProvider(path => { + return { + request: 'text', + content: sources.get(path) as string, + ensurePackageInstalled: true + } + }) + + const envWithN = define( + {nodeId: 'simple-0', scope: 'local', name: 'N', used: 'always', kind: 'variable', definedAt: 'simple-2' }, + LocalScope, + initializeCleanEnvironments() + ) + assertDataflow('simple source', shell, 'source("simple")\ncat(N)', new DataflowGraph() + .addVertex({ tag: 'variable-definition', id: 'simple-0', name: 'N', scope: LocalScope }) + .addVertex({ + tag: 'function-call', + name: 'source', + id: '3', + environment: initializeCleanEnvironments(), + args: [{ + nodeId: '2', name: `${UnnamedArgumentPrefix}2`, scope: LocalScope, used: 'always' } + ]}) + .addVertex({ + tag: 'function-call', + name: 'cat', + id: '7', + environment: envWithN, + args: [{ + nodeId: '6', name: `${UnnamedArgumentPrefix}6`, scope: LocalScope, used: 'always' + }] + }) + .addVertex({tag: 'use', id: '5', name: 'N', environment: envWithN}) + .addVertex({tag: 'use', id: '2', name: `${UnnamedArgumentPrefix}2`}) + .addVertex({tag: 'use', id: '6', name: `${UnnamedArgumentPrefix}6`, environment: envWithN}) + .addEdge('3', '2', EdgeType.Argument, 'always') + .addEdge('3', BuiltIn, EdgeType.Reads, 'always') + .addEdge('5', 'simple-0', EdgeType.Reads, 'always') + .addEdge('6', '5', EdgeType.Reads, 'always') + .addEdge('7', '6', EdgeType.Argument, 'always') + .addEdge('7', BuiltIn, EdgeType.Reads, 'always') + ) +})) \ No newline at end of file diff --git a/test/testfiles/source.R b/test/testfiles/source.R deleted file mode 100644 index 4ea2246a18..0000000000 --- a/test/testfiles/source.R +++ /dev/null @@ -1,2 +0,0 @@ -source("test/testfiles/sourced.R") -cat(N) \ No newline at end of file diff --git a/test/testfiles/sourced.R b/test/testfiles/sourced.R deleted file mode 100644 index 43b4814b5f..0000000000 --- a/test/testfiles/sourced.R +++ /dev/null @@ -1 +0,0 @@ -N <- 9 \ No newline at end of file From 345bf4c13fb7863afd3cc828b623ca23ad9bf4b4 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Thu, 1 Feb 2024 14:53:25 +0100 Subject: [PATCH 30/46] refactor: overhaul source providers --- .../internal/process/functions/source.ts | 33 ++++++++++++++----- .../functions/source-tests.ts | 13 ++------ 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/dataflow/internal/process/functions/source.ts b/src/dataflow/internal/process/functions/source.ts index 45d136cd49..c91749f3f9 100644 --- a/src/dataflow/internal/process/functions/source.ts +++ b/src/dataflow/internal/process/functions/source.ts @@ -6,16 +6,26 @@ import {type DataflowProcessorInformation, processDataflowFor} from '../../../pr import {type DataflowScopeName, type Identifier, overwriteEnvironments, type REnvironmentInformation, resolveByName} from '../../../environments' import type {DataflowInformation} from '../../info' -let sourceFileProvider: (path: string) => RParseRequest = path => { - return { - request: 'file', - content: path, - ensurePackageInstalled: true +let sourceProvider: SourceProvider = { + createRequest(path: string): RParseRequest { + return { + request: 'file', + content: path, + ensurePackageInstalled: true + } } } -export function setSourceFileProvider(provider: (path: string) => RParseRequest): void { - sourceFileProvider = provider +export function setTextSourceProvider(sources: Map): void { + sourceProvider = { + createRequest(path: string): RParseRequest { + return { + request: 'text', + content: sources.get(path) as string, + ensurePackageInstalled: true + } + } + } } export function isSourceCall(name: Identifier, scope: DataflowScopeName, environments: REnvironmentInformation): boolean { @@ -30,9 +40,10 @@ export function processSourceCall(functionCall: RFunctionCall const dataflow = processDataflowFor(normalized.ast, {...data, environments: information.environments}) @@ -46,4 +57,8 @@ export function processSourceCall(functionCall: RFunctionCall { - const sources = new Map([ + setTextSourceProvider(new Map([ ['simple', 'N <- 9'] - ]) - setSourceFileProvider(path => { - return { - request: 'text', - content: sources.get(path) as string, - ensurePackageInstalled: true - } - }) + ])) const envWithN = define( {nodeId: 'simple-0', scope: 'local', name: 'N', used: 'always', kind: 'variable', definedAt: 'simple-2' }, From 7507a1845488304bacabefaa47df12e46bbf90a5 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Thu, 1 Feb 2024 15:06:38 +0100 Subject: [PATCH 31/46] refactor: generify source providers to RParseRequestProvider --- .../internal/process/functions/source.ts | 29 ++++--------------- src/r-bridge/retriever.ts | 27 +++++++++++++++++ .../functions/source-tests.ts | 10 +++---- 3 files changed, 37 insertions(+), 29 deletions(-) diff --git a/src/dataflow/internal/process/functions/source.ts b/src/dataflow/internal/process/functions/source.ts index c91749f3f9..ca21b2416d 100644 --- a/src/dataflow/internal/process/functions/source.ts +++ b/src/dataflow/internal/process/functions/source.ts @@ -1,4 +1,5 @@ -import type {RArgument, RParseRequest} from '../../../../r-bridge' +import type {RArgument, RParseRequestProvider} from '../../../../r-bridge' +import { requestProviderFromFile} from '../../../../r-bridge' import {fileNameDeterministicCountingIdGenerator, type NormalizedAst, type ParentInformation, removeTokenMapQuotationMarks, type RFunctionCall, RType} from '../../../../r-bridge' import {RShellExecutor} from '../../../../r-bridge/shell-executor' import {executeSingleSubStep} from '../../../../core' @@ -6,26 +7,10 @@ import {type DataflowProcessorInformation, processDataflowFor} from '../../../pr import {type DataflowScopeName, type Identifier, overwriteEnvironments, type REnvironmentInformation, resolveByName} from '../../../environments' import type {DataflowInformation} from '../../info' -let sourceProvider: SourceProvider = { - createRequest(path: string): RParseRequest { - return { - request: 'file', - content: path, - ensurePackageInstalled: true - } - } -} +let sourceProvider = requestProviderFromFile() -export function setTextSourceProvider(sources: Map): void { - sourceProvider = { - createRequest(path: string): RParseRequest { - return { - request: 'text', - content: sources.get(path) as string, - ensurePackageInstalled: true - } - } - } +export function setSourceProvider(provider: RParseRequestProvider): void { + sourceProvider = provider } export function isSourceCall(name: Identifier, scope: DataflowScopeName, environments: REnvironmentInformation): boolean { @@ -58,7 +43,3 @@ export function processSourceCall(functionCall: RFunctionCall { - setTextSourceProvider(new Map([ - ['simple', 'N <- 9'] - ])) + setSourceProvider(requestProviderFromText({ + simple: 'N <- 9' + })) const envWithN = define( {nodeId: 'simple-0', scope: 'local', name: 'N', used: 'always', kind: 'variable', definedAt: 'simple-2' }, From 911d34931fd228c892145dca8cdc423815f6179d Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Mon, 5 Feb 2024 10:45:35 +0100 Subject: [PATCH 32/46] test: added test for conditional source --- .../functions/source-tests.ts | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts index 7b772458c8..7be80c9f0a 100644 --- a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts +++ b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts @@ -15,6 +15,7 @@ describe('source', withShell(shell => { LocalScope, initializeCleanEnvironments() ) + assertDataflow('simple source', shell, 'source("simple")\ncat(N)', new DataflowGraph() .addVertex({ tag: 'variable-definition', id: 'simple-0', name: 'N', scope: LocalScope }) .addVertex({ @@ -44,4 +45,36 @@ describe('source', withShell(shell => { .addEdge('7', '6', EdgeType.Argument, 'always') .addEdge('7', BuiltIn, EdgeType.Reads, 'always') ) -})) \ No newline at end of file + + assertDataflow('conditional', shell, 'if (x) { source("simple") }\ncat(N)', new DataflowGraph() + .addVertex({ tag: 'variable-definition', id: 'simple-0', name: 'N', scope: LocalScope }) + .addVertex({ + tag: 'function-call', + name: 'source', + id: '4', + environment: initializeCleanEnvironments(), + args: [{ + nodeId: '3', name: `${UnnamedArgumentPrefix}3`, scope: LocalScope, used: 'always' } + ], + when: 'maybe' + }) + .addVertex({ + tag: 'function-call', + name: 'cat', + id: '10', + environment: envWithN, + args: [{ + nodeId: '9', name: `${UnnamedArgumentPrefix}9`, scope: LocalScope, used: 'always' + }] + }) + .addVertex({tag: 'use', id: '0', name: 'x', scope: LocalScope}) + .addVertex({tag: 'use', id: '8', name: 'N', environment: envWithN}) + .addVertex({tag: 'use', id: '3', name: `${UnnamedArgumentPrefix}3`}) + .addVertex({tag: 'use', id: '9', name: `${UnnamedArgumentPrefix}9`, environment: envWithN}) + .addEdge('4', '3', EdgeType.Argument, 'always') + .addEdge('4', BuiltIn, EdgeType.Reads, 'maybe') + .addEdge('8', 'simple-0', EdgeType.Reads, 'always') + .addEdge('9', '8', EdgeType.Reads, 'always') + .addEdge('10', '9', EdgeType.Argument, 'always') + .addEdge('10', BuiltIn, EdgeType.Reads, 'always')) +})) From ba6dce2b2879025d10c5d68a440c77f97c3c8b5b Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Mon, 5 Feb 2024 11:00:59 +0100 Subject: [PATCH 33/46] refactor: properly handle missing/invalid sourced files --- .../internal/process/functions/source.ts | 9 ++++++- .../functions/source-tests.ts | 24 ++++++++++++++++--- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/dataflow/internal/process/functions/source.ts b/src/dataflow/internal/process/functions/source.ts index ca21b2416d..b27f1a55a2 100644 --- a/src/dataflow/internal/process/functions/source.ts +++ b/src/dataflow/internal/process/functions/source.ts @@ -6,6 +6,7 @@ import {executeSingleSubStep} from '../../../../core' import {type DataflowProcessorInformation, processDataflowFor} from '../../../processor' import {type DataflowScopeName, type Identifier, overwriteEnvironments, type REnvironmentInformation, resolveByName} from '../../../environments' import type {DataflowInformation} from '../../info' +import {dataflowLogger} from '../../../index' let sourceProvider = requestProviderFromFile() @@ -28,7 +29,13 @@ export function processSourceCall(functionCall: RFunctionCall const dataflow = processDataflowFor(normalized.ast, {...data, environments: information.environments}) diff --git a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts index 7be80c9f0a..c00e04613f 100644 --- a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts +++ b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts @@ -6,9 +6,10 @@ import {UnnamedArgumentPrefix} from '../../../../../src/dataflow/internal/proces import {define} from '../../../../../src/dataflow/environments' describe('source', withShell(shell => { - setSourceProvider(requestProviderFromText({ + const sources = { simple: 'N <- 9' - })) + } + setSourceProvider(requestProviderFromText(sources)) const envWithN = define( {nodeId: 'simple-0', scope: 'local', name: 'N', used: 'always', kind: 'variable', definedAt: 'simple-2' }, @@ -76,5 +77,22 @@ describe('source', withShell(shell => { .addEdge('8', 'simple-0', EdgeType.Reads, 'always') .addEdge('9', '8', EdgeType.Reads, 'always') .addEdge('10', '9', EdgeType.Argument, 'always') - .addEdge('10', BuiltIn, EdgeType.Reads, 'always')) + .addEdge('10', BuiltIn, EdgeType.Reads, 'always') + ) + + // missing sources should just be ignored + assertDataflow('missing source', shell, 'source("missing")', new DataflowGraph() + .addVertex({ + tag: 'function-call', + name: 'source', + id: '3', + environment: initializeCleanEnvironments(), + args: [{ + nodeId: '2', name: `${UnnamedArgumentPrefix}2`, scope: LocalScope, used: 'always' + }] + }) + .addVertex({tag: 'use', id: '2', name: `${UnnamedArgumentPrefix}2`}) + .addEdge('3', '2', EdgeType.Argument, 'always') + .addEdge('3', BuiltIn, EdgeType.Reads, 'always') + ) })) From 48c7928f829704aee0b3d988dae7bdc1e7eeee07 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Mon, 5 Feb 2024 11:24:44 +0100 Subject: [PATCH 34/46] wip: test for recursive sources --- .../processing-of-elements/functions/source-tests.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts index c00e04613f..21af81fa0d 100644 --- a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts +++ b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts @@ -7,7 +7,9 @@ import {define} from '../../../../../src/dataflow/environments' describe('source', withShell(shell => { const sources = { - simple: 'N <- 9' + simple: 'N <- 9', + recursive1: 'x <- 1\nsource("recursive2")', + recursive2: 'cat(x)\nsource("recursive1")' } setSourceProvider(requestProviderFromText(sources)) @@ -95,4 +97,6 @@ describe('source', withShell(shell => { .addEdge('3', '2', EdgeType.Argument, 'always') .addEdge('3', BuiltIn, EdgeType.Reads, 'always') ) + + assertDataflow('recursive source', shell, sources.recursive1, new DataflowGraph()) })) From 3f21bcfc445dc20fc09aa432e677d2d8814a40c7 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Mon, 5 Feb 2024 13:28:13 +0100 Subject: [PATCH 35/46] feat: skip dataflow analysis for re-sourced references --- src/dataflow/extractor.ts | 9 ++++++++- .../internal/process/functions/source.ts | 16 +++++++++++++--- src/dataflow/processor.ts | 14 +++++++------- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/src/dataflow/extractor.ts b/src/dataflow/extractor.ts index ff677fdae3..3a05026fc3 100644 --- a/src/dataflow/extractor.ts +++ b/src/dataflow/extractor.ts @@ -49,7 +49,14 @@ const processors: DataflowProcessors = { } export function produceDataFlowGraph(ast: NormalizedAst, initialScope: DataflowScopeName = LocalScope): DataflowInformation { - return processDataflowFor(ast.ast, { completeAst: ast, activeScope: initialScope, environments: initializeCleanEnvironments(), processors: processors as DataflowProcessors }) + return processDataflowFor(ast.ast, { + completeAst: ast, + activeScope: initialScope, + environments: initializeCleanEnvironments(), + processors: processors as DataflowProcessors, + currentPath: 'initial', + sourceReferences: new Map() + }) } export function processBinaryOp(node: RBinaryOp, data: DataflowProcessorInformation) { diff --git a/src/dataflow/internal/process/functions/source.ts b/src/dataflow/internal/process/functions/source.ts index b27f1a55a2..9a840057c6 100644 --- a/src/dataflow/internal/process/functions/source.ts +++ b/src/dataflow/internal/process/functions/source.ts @@ -1,5 +1,5 @@ import type {RArgument, RParseRequestProvider} from '../../../../r-bridge' -import { requestProviderFromFile} from '../../../../r-bridge' +import {requestProviderFromFile} from '../../../../r-bridge' import {fileNameDeterministicCountingIdGenerator, type NormalizedAst, type ParentInformation, removeTokenMapQuotationMarks, type RFunctionCall, RType} from '../../../../r-bridge' import {RShellExecutor} from '../../../../r-bridge/shell-executor' import {executeSingleSubStep} from '../../../../core' @@ -28,6 +28,12 @@ export function processSourceCall(functionCall: RFunctionCall(functionCall: RFunctionCall - const dataflow = processDataflowFor(normalized.ast, {...data, environments: information.environments}) + const dataflow = processDataflowFor(normalized.ast, {...data, currentPath: path, environments: information.environments}) // update our graph with the sourced file's information - const newInformation = {...information} + const newInformation = {...information} newInformation.environments = overwriteEnvironments(information.environments, dataflow.environments) newInformation.graph.mergeWith(dataflow.graph) // this can be improved, see issue #628 diff --git a/src/dataflow/processor.ts b/src/dataflow/processor.ts index 9e94a06588..8672b746c0 100644 --- a/src/dataflow/processor.ts +++ b/src/dataflow/processor.ts @@ -13,20 +13,23 @@ export interface DataflowProcessorInformation { /** * Initial and frozen ast-information */ - readonly completeAst: NormalizedAst + readonly completeAst: NormalizedAst /** * Correctly contains pushed local scopes introduced by `function` scopes. * Will by default *not* contain any symbol-bindings introduces along the way, they have to be decorated when moving up the tree. */ - readonly environments: REnvironmentInformation + readonly environments: REnvironmentInformation /** * Name of the currently active scope, (hopefully) always {@link LocalScope | Local} */ - readonly activeScope: DataflowScopeName + readonly activeScope: DataflowScopeName /** * Other processors to be called by the given functions */ - readonly processors: DataflowProcessors + readonly processors: DataflowProcessors + // TODO using "initial" as the default path doesn't allow us to skip re-sourcing the initial file - how do we find out the initial file's name/path? + readonly currentPath: string | 'initial' + readonly sourceReferences: Map } export type DataflowProcessor> = (node: NodeType, data: DataflowProcessorInformation) => DataflowInformation @@ -55,6 +58,3 @@ export type DataflowProcessors = { export function processDataflowFor(current: RNodeWithParent, data: DataflowProcessorInformation): DataflowInformation { return data.processors[current.type](current as never, data) } - - - From 53d69de6a71f9935ec6ce713fc31a1fd76409940 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Mon, 5 Feb 2024 13:46:16 +0100 Subject: [PATCH 36/46] wip: add another todo --- src/dataflow/internal/process/functions/source.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dataflow/internal/process/functions/source.ts b/src/dataflow/internal/process/functions/source.ts index 9a840057c6..6766509b3b 100644 --- a/src/dataflow/internal/process/functions/source.ts +++ b/src/dataflow/internal/process/functions/source.ts @@ -28,6 +28,7 @@ export function processSourceCall(functionCall: RFunctionCall Date: Tue, 6 Feb 2024 11:12:21 +0100 Subject: [PATCH 37/46] refactor: use parse requests in dataflow processor info --- src/core/slicer.ts | 2 +- src/core/steps.ts | 2 +- src/dataflow/extractor.ts | 6 +++--- src/dataflow/internal/process/functions/source.ts | 7 ++++--- src/dataflow/processor.ts | 5 ++--- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/core/slicer.ts b/src/core/slicer.ts index e026533993..e132dd357e 100644 --- a/src/core/slicer.ts +++ b/src/core/slicer.ts @@ -208,7 +208,7 @@ export class SteppingSlicer, 'dataflow': { description: 'Construct the dataflow graph', - processor: a => produceDataFlowGraph(a), + processor: (r, a) => produceDataFlowGraph(r, a), required: 'once-per-file', printer: { [StepOutputFormat.Internal]: internalPrinter, diff --git a/src/dataflow/extractor.ts b/src/dataflow/extractor.ts index 3a05026fc3..49c6b5929f 100644 --- a/src/dataflow/extractor.ts +++ b/src/dataflow/extractor.ts @@ -1,4 +1,4 @@ -import type { NormalizedAst, ParentInformation, RAssignmentOp, RBinaryOp} from '../r-bridge' +import type {NormalizedAst, ParentInformation, RAssignmentOp, RBinaryOp, RParseRequest} from '../r-bridge' import { RType } from '../r-bridge' import type { DataflowInformation } from './internal/info' import type { DataflowProcessorInformation, DataflowProcessors} from './processor' @@ -48,13 +48,13 @@ const processors: DataflowProcessors = { [RType.ExpressionList]: processExpressionList, } -export function produceDataFlowGraph(ast: NormalizedAst, initialScope: DataflowScopeName = LocalScope): DataflowInformation { +export function produceDataFlowGraph(request: RParseRequest, ast: NormalizedAst, initialScope: DataflowScopeName = LocalScope): DataflowInformation { return processDataflowFor(ast.ast, { completeAst: ast, activeScope: initialScope, environments: initializeCleanEnvironments(), processors: processors as DataflowProcessors, - currentPath: 'initial', + currentRequest: request, sourceReferences: new Map() }) } diff --git a/src/dataflow/internal/process/functions/source.ts b/src/dataflow/internal/process/functions/source.ts index 6766509b3b..247ce91ed7 100644 --- a/src/dataflow/internal/process/functions/source.ts +++ b/src/dataflow/internal/process/functions/source.ts @@ -30,7 +30,7 @@ export function processSourceCall(functionCall: RFunctionCall(functionCall: RFunctionCall - const dataflow = processDataflowFor(normalized.ast, {...data, currentPath: path, environments: information.environments}) + const dataflow = processDataflowFor(normalized.ast, {...data, currentRequest: request, environments: information.environments}) // update our graph with the sourced file's information const newInformation = {...information} diff --git a/src/dataflow/processor.ts b/src/dataflow/processor.ts index 8672b746c0..b4388a95e0 100644 --- a/src/dataflow/processor.ts +++ b/src/dataflow/processor.ts @@ -4,7 +4,7 @@ import type { NormalizedAst, ParentInformation, RNode, - RNodeWithParent + RNodeWithParent, RParseRequest } from '../r-bridge' import type { DataflowInformation } from './internal/info' import type { DataflowScopeName, REnvironmentInformation } from './environments' @@ -27,8 +27,7 @@ export interface DataflowProcessorInformation { * Other processors to be called by the given functions */ readonly processors: DataflowProcessors - // TODO using "initial" as the default path doesn't allow us to skip re-sourcing the initial file - how do we find out the initial file's name/path? - readonly currentPath: string | 'initial' + readonly currentRequest: RParseRequest readonly sourceReferences: Map } From 5bc6d08692ed7e3c37a7b7c095b3a864389e317b Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Tue, 6 Feb 2024 11:25:32 +0100 Subject: [PATCH 38/46] refactor: first pass of reference chain impl --- src/dataflow/extractor.ts | 12 ++++++------ src/dataflow/internal/process/functions/source.ts | 9 ++++----- src/dataflow/processor.ts | 12 ++++++------ 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/dataflow/extractor.ts b/src/dataflow/extractor.ts index 49c6b5929f..03b1bddddc 100644 --- a/src/dataflow/extractor.ts +++ b/src/dataflow/extractor.ts @@ -50,12 +50,12 @@ const processors: DataflowProcessors = { export function produceDataFlowGraph(request: RParseRequest, ast: NormalizedAst, initialScope: DataflowScopeName = LocalScope): DataflowInformation { return processDataflowFor(ast.ast, { - completeAst: ast, - activeScope: initialScope, - environments: initializeCleanEnvironments(), - processors: processors as DataflowProcessors, - currentRequest: request, - sourceReferences: new Map() + completeAst: ast, + activeScope: initialScope, + environments: initializeCleanEnvironments(), + processors: processors as DataflowProcessors, + currentRequest: request, + referenceChain: [request] }) } diff --git a/src/dataflow/internal/process/functions/source.ts b/src/dataflow/internal/process/functions/source.ts index 247ce91ed7..ea4a93da86 100644 --- a/src/dataflow/internal/process/functions/source.ts +++ b/src/dataflow/internal/process/functions/source.ts @@ -28,10 +28,10 @@ export function processSourceCall(functionCall: RFunctionCall JSON.stringify(r) == requestString)) { + dataflowLogger.info(`Found loop in dataflow analysis for ${requestString}: ${JSON.stringify(data.referenceChain)}, skipping further dataflow analysis`) return information } @@ -45,8 +45,7 @@ export function processSourceCall(functionCall: RFunctionCall const dataflow = processDataflowFor(normalized.ast, {...data, currentRequest: request, environments: information.environments}) diff --git a/src/dataflow/processor.ts b/src/dataflow/processor.ts index b4388a95e0..90760e9f7b 100644 --- a/src/dataflow/processor.ts +++ b/src/dataflow/processor.ts @@ -13,22 +13,22 @@ export interface DataflowProcessorInformation { /** * Initial and frozen ast-information */ - readonly completeAst: NormalizedAst + readonly completeAst: NormalizedAst /** * Correctly contains pushed local scopes introduced by `function` scopes. * Will by default *not* contain any symbol-bindings introduces along the way, they have to be decorated when moving up the tree. */ - readonly environments: REnvironmentInformation + readonly environments: REnvironmentInformation /** * Name of the currently active scope, (hopefully) always {@link LocalScope | Local} */ - readonly activeScope: DataflowScopeName + readonly activeScope: DataflowScopeName /** * Other processors to be called by the given functions */ - readonly processors: DataflowProcessors - readonly currentRequest: RParseRequest - readonly sourceReferences: Map + readonly processors: DataflowProcessors + readonly currentRequest: RParseRequest + readonly referenceChain: RParseRequest[] } export type DataflowProcessor> = (node: NodeType, data: DataflowProcessorInformation) => DataflowInformation From 56a40477ddd4487f823650bd72895c34807b85a7 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Tue, 6 Feb 2024 11:39:13 +0100 Subject: [PATCH 39/46] feat-fix: also catch normalize and dataflow errors --- .../internal/process/functions/source.ts | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/dataflow/internal/process/functions/source.ts b/src/dataflow/internal/process/functions/source.ts index ea4a93da86..f05f6a5788 100644 --- a/src/dataflow/internal/process/functions/source.ts +++ b/src/dataflow/internal/process/functions/source.ts @@ -34,22 +34,21 @@ export function processSourceCall(functionCall: RFunctionCall + let dataflow: DataflowInformation try { - parsed = executeSingleSubStep('parse', request, executor) as string + const parsed = executeSingleSubStep('parse', request, executor) as string + normalized = executeSingleSubStep('normalize', parsed, executor.getTokenMap(), undefined, fileNameDeterministicCountingIdGenerator(path)) as NormalizedAst + dataflow = processDataflowFor(normalized.ast, {...data, currentRequest: request, environments: information.environments}) } catch(e) { - dataflowLogger.warn(`Failed to parse sourced file ${path}, ignoring: ${(e as Error).message}`) + dataflowLogger.warn(`Failed to analyze sourced file ${requestString}, skipping: ${(e as Error).message}`) return information } - // make the currently analyzed file remember that it already referenced the path - data.referenceChain.push(request) - - const normalized = executeSingleSubStep('normalize', parsed, executor.getTokenMap(), undefined, fileNameDeterministicCountingIdGenerator(path)) as NormalizedAst - const dataflow = processDataflowFor(normalized.ast, {...data, currentRequest: request, environments: information.environments}) - // update our graph with the sourced file's information const newInformation = {...information} newInformation.environments = overwriteEnvironments(information.environments, dataflow.environments) From 11b625b93b3d07712b07bfa591414acc437961c1 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Tue, 6 Feb 2024 11:51:51 +0100 Subject: [PATCH 40/46] test: finished recursive source test --- .../functions/source-tests.ts | 52 ++++++++++++++++++- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts index 21af81fa0d..a80abcfd55 100644 --- a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts +++ b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts @@ -18,7 +18,6 @@ describe('source', withShell(shell => { LocalScope, initializeCleanEnvironments() ) - assertDataflow('simple source', shell, 'source("simple")\ncat(N)', new DataflowGraph() .addVertex({ tag: 'variable-definition', id: 'simple-0', name: 'N', scope: LocalScope }) .addVertex({ @@ -98,5 +97,54 @@ describe('source', withShell(shell => { .addEdge('3', BuiltIn, EdgeType.Reads, 'always') ) - assertDataflow('recursive source', shell, sources.recursive1, new DataflowGraph()) + const envWithX = define( + {nodeId: '0', scope: 'local', name: 'x', used: 'always', kind: 'variable', definedAt: '2' }, + LocalScope, + initializeCleanEnvironments() + ) + assertDataflow('recursive source', shell, sources.recursive1, new DataflowGraph() + .addVertex({ + tag: 'function-call', + name: 'source', + id: '6', + environment: envWithX, + args: [{ + nodeId: '5', name: `${UnnamedArgumentPrefix}5`, scope: LocalScope, used: 'always' } + ], + when: 'always' + }) + .addVertex({ + tag: 'function-call', + name: 'source', + id: 'recursive2-7', + environment: envWithX, + args: [{ + nodeId: 'recursive2-6', name: `${UnnamedArgumentPrefix}recursive2-6`, scope: LocalScope, used: 'always' } + ], + when: 'always' + }) + .addVertex({ + tag: 'function-call', + name: 'cat', + id: 'recursive2-3', + environment: envWithX, + args: [{ + nodeId: 'recursive2-2', name: `${UnnamedArgumentPrefix}recursive2-2`, scope: LocalScope, used: 'always' } + ], + when: 'always' + }) + .addVertex({ tag: 'variable-definition', id: '0', name: 'x', scope: LocalScope }) + .addVertex({tag: 'use', id: '5', name: `${UnnamedArgumentPrefix}5`, environment: envWithX }) + .addVertex({tag: 'use', id: 'recursive2-6', name: `${UnnamedArgumentPrefix}recursive2-6`, environment: envWithX }) + .addVertex({tag: 'use', id: 'recursive2-2', name: `${UnnamedArgumentPrefix}recursive2-2`, environment: envWithX }) + .addVertex({tag: 'use', id: 'recursive2-1', name: 'x', environment: envWithX }) + .addEdge('6', '5', EdgeType.Argument, 'always') + .addEdge('6', BuiltIn, EdgeType.Reads, 'always') + .addEdge('recursive2-3', BuiltIn, EdgeType.Reads, 'always') + .addEdge('recursive2-3', 'recursive2-2', EdgeType.Argument, 'always') + .addEdge('recursive2-2', 'recursive2-1', EdgeType.Reads, 'always') + .addEdge('recursive2-1', '0', EdgeType.Reads, 'always') + .addEdge('recursive2-7', 'recursive2-6', EdgeType.Argument, 'always') + .addEdge('recursive2-7', BuiltIn, EdgeType.Reads, 'always') + ) })) From 85dd0fd648c152bde0a47e0443034ab140ffa092 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Tue, 6 Feb 2024 12:57:06 +0100 Subject: [PATCH 41/46] test: added test for non-constant source argument --- .../internal/process/functions/source.ts | 4 +++- .../functions/source-tests.ts | 21 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/dataflow/internal/process/functions/source.ts b/src/dataflow/internal/process/functions/source.ts index f05f6a5788..4767f7f94a 100644 --- a/src/dataflow/internal/process/functions/source.ts +++ b/src/dataflow/internal/process/functions/source.ts @@ -57,6 +57,8 @@ export function processSourceCall(functionCall: RFunctionCall { .addEdge('recursive2-7', 'recursive2-6', EdgeType.Argument, 'always') .addEdge('recursive2-7', BuiltIn, EdgeType.Reads, 'always') ) + + // we currently don't support (and ignore) source calls with non-constant arguments! + assertDataflow('non-constant source', shell, 'x <- "recursive1"\nsource(x)', new DataflowGraph() + .addVertex({ + tag: 'function-call', + name: 'source', + id: '6', + environment: envWithX, + args: [{ + nodeId: '5', name: `${UnnamedArgumentPrefix}5`, scope: LocalScope, used: 'always' } + ], + when: 'always' + }) + .addVertex({ tag: 'variable-definition', id: '0', name: 'x', scope: LocalScope }) + .addVertex({tag: 'use', id: '5', name: `${UnnamedArgumentPrefix}5`, environment: envWithX }) + .addVertex({tag: 'use', id: '4', name: 'x', environment: envWithX }) + .addEdge('6', '5', EdgeType.Argument, 'always') + .addEdge('6', BuiltIn, EdgeType.Reads, 'always') + .addEdge('5', '4', EdgeType.Reads, 'always') + .addEdge('4', '0', EdgeType.Reads, 'always') + ) })) From 0c239afee12eb2d896cd8abf1d797521be12281a Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Tue, 6 Feb 2024 13:12:13 +0100 Subject: [PATCH 42/46] test: added multi-source test --- .../functions/source-tests.ts | 71 ++++++++++++++++--- 1 file changed, 63 insertions(+), 8 deletions(-) diff --git a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts index ca3b7dd728..afadd01dd8 100644 --- a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts +++ b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts @@ -13,7 +13,7 @@ describe('source', withShell(shell => { } setSourceProvider(requestProviderFromText(sources)) - const envWithN = define( + const envWithSimpleN = define( {nodeId: 'simple-0', scope: 'local', name: 'N', used: 'always', kind: 'variable', definedAt: 'simple-2' }, LocalScope, initializeCleanEnvironments() @@ -27,19 +27,20 @@ describe('source', withShell(shell => { environment: initializeCleanEnvironments(), args: [{ nodeId: '2', name: `${UnnamedArgumentPrefix}2`, scope: LocalScope, used: 'always' } - ]}) + ] + }) .addVertex({ tag: 'function-call', name: 'cat', id: '7', - environment: envWithN, + environment: envWithSimpleN, args: [{ nodeId: '6', name: `${UnnamedArgumentPrefix}6`, scope: LocalScope, used: 'always' }] }) - .addVertex({tag: 'use', id: '5', name: 'N', environment: envWithN}) + .addVertex({tag: 'use', id: '5', name: 'N', environment: envWithSimpleN}) .addVertex({tag: 'use', id: '2', name: `${UnnamedArgumentPrefix}2`}) - .addVertex({tag: 'use', id: '6', name: `${UnnamedArgumentPrefix}6`, environment: envWithN}) + .addVertex({tag: 'use', id: '6', name: `${UnnamedArgumentPrefix}6`, environment: envWithSimpleN}) .addEdge('3', '2', EdgeType.Argument, 'always') .addEdge('3', BuiltIn, EdgeType.Reads, 'always') .addEdge('5', 'simple-0', EdgeType.Reads, 'always') @@ -48,6 +49,60 @@ describe('source', withShell(shell => { .addEdge('7', BuiltIn, EdgeType.Reads, 'always') ) + const envWithLocalN = define( + {nodeId: '4', scope: 'local', name: 'N', used: 'always', kind: 'variable', definedAt: '6' }, + LocalScope, + initializeCleanEnvironments() + ) + assertDataflow('multiple source', shell, 'source("simple")\nN <- 0\nsource("simple")\ncat(N)', new DataflowGraph() + .addVertex({ + tag: 'function-call', + name: 'source', + id: '3', + environment: initializeCleanEnvironments(), + args: [{ + nodeId: '2', name: `${UnnamedArgumentPrefix}2`, scope: LocalScope, used: 'always' } + ], + when: 'always' + }) + .addVertex({ + tag: 'function-call', + name: 'source', + id: '10', + environment: envWithLocalN, + args: [{ + nodeId: '9', name: `${UnnamedArgumentPrefix}9`, scope: LocalScope, used: 'always' } + ], + when: 'always' + }) + .addVertex({ + tag: 'function-call', + name: 'cat', + id: '14', + environment: envWithLocalN, + args: [{ + nodeId: '13', name: `${UnnamedArgumentPrefix}13`, scope: LocalScope, used: 'always' } + ], + when: 'always' + }) + .addVertex({ tag: 'variable-definition', id: 'simple-0', name: 'N', scope: LocalScope }) + .addVertex({ tag: 'variable-definition', id: '4', name: 'N', scope: LocalScope, environment: envWithSimpleN }) + .addVertex({tag: 'use', id: '2', name: `${UnnamedArgumentPrefix}2` }) + .addVertex({tag: 'use', id: '9', name: `${UnnamedArgumentPrefix}9`, environment: envWithLocalN }) + .addVertex({tag: 'use', id: '13', name: `${UnnamedArgumentPrefix}13`, environment: envWithLocalN }) + .addVertex({tag: 'use', id: '12', name: 'N', environment: envWithLocalN }) + .addEdge('3', '10', EdgeType.SameReadRead, 'always') + .addEdge('3', '2', EdgeType.Argument, 'always') + .addEdge('14', '13', EdgeType.Argument, 'always') + .addEdge('10', '9', EdgeType.Argument, 'always') + .addEdge('3', BuiltIn, EdgeType.Reads, 'always') + .addEdge('10', BuiltIn, EdgeType.Reads, 'always') + .addEdge('14', BuiltIn, EdgeType.Reads, 'always') + .addEdge('13', '12', EdgeType.Reads, 'always') + .addEdge('12', '4', EdgeType.Reads, 'always') + .addEdge('4', 'simple-0', EdgeType.SameDefDef, 'always') + ) + assertDataflow('conditional', shell, 'if (x) { source("simple") }\ncat(N)', new DataflowGraph() .addVertex({ tag: 'variable-definition', id: 'simple-0', name: 'N', scope: LocalScope }) .addVertex({ @@ -64,15 +119,15 @@ describe('source', withShell(shell => { tag: 'function-call', name: 'cat', id: '10', - environment: envWithN, + environment: envWithSimpleN, args: [{ nodeId: '9', name: `${UnnamedArgumentPrefix}9`, scope: LocalScope, used: 'always' }] }) .addVertex({tag: 'use', id: '0', name: 'x', scope: LocalScope}) - .addVertex({tag: 'use', id: '8', name: 'N', environment: envWithN}) + .addVertex({tag: 'use', id: '8', name: 'N', environment: envWithSimpleN}) .addVertex({tag: 'use', id: '3', name: `${UnnamedArgumentPrefix}3`}) - .addVertex({tag: 'use', id: '9', name: `${UnnamedArgumentPrefix}9`, environment: envWithN}) + .addVertex({tag: 'use', id: '9', name: `${UnnamedArgumentPrefix}9`, environment: envWithSimpleN}) .addEdge('4', '3', EdgeType.Argument, 'always') .addEdge('4', BuiltIn, EdgeType.Reads, 'maybe') .addEdge('8', 'simple-0', EdgeType.Reads, 'always') From f6323c6478aff83f0246c9f67454155f64a3f2a1 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Tue, 6 Feb 2024 14:38:41 +0100 Subject: [PATCH 43/46] feat-fix: sourcing multiple files works correctly now --- .../internal/process/functions/source.ts | 14 ++- .../lang-4.x/ast/model/processing/decorate.ts | 4 +- .../functions/source-tests.ts | 92 ++++++++++++------- 3 files changed, 69 insertions(+), 41 deletions(-) diff --git a/src/dataflow/internal/process/functions/source.ts b/src/dataflow/internal/process/functions/source.ts index 4767f7f94a..09aa2ddc57 100644 --- a/src/dataflow/internal/process/functions/source.ts +++ b/src/dataflow/internal/process/functions/source.ts @@ -1,6 +1,7 @@ import type {RArgument, RParseRequestProvider} from '../../../../r-bridge' +import { sourcedDeterministicCountingIdGenerator} from '../../../../r-bridge' import {requestProviderFromFile} from '../../../../r-bridge' -import {fileNameDeterministicCountingIdGenerator, type NormalizedAst, type ParentInformation, removeTokenMapQuotationMarks, type RFunctionCall, RType} from '../../../../r-bridge' +import {type NormalizedAst, type ParentInformation, removeTokenMapQuotationMarks, type RFunctionCall, RType} from '../../../../r-bridge' import {RShellExecutor} from '../../../../r-bridge/shell-executor' import {executeSingleSubStep} from '../../../../core' import {type DataflowProcessorInformation, processDataflowFor} from '../../../processor' @@ -34,16 +35,19 @@ export function processSourceCall(functionCall: RFunctionCall let dataflow: DataflowInformation try { const parsed = executeSingleSubStep('parse', request, executor) as string - normalized = executeSingleSubStep('normalize', parsed, executor.getTokenMap(), undefined, fileNameDeterministicCountingIdGenerator(path)) as NormalizedAst - dataflow = processDataflowFor(normalized.ast, {...data, currentRequest: request, environments: information.environments}) + normalized = executeSingleSubStep('normalize', parsed, executor.getTokenMap(), undefined, sourcedDeterministicCountingIdGenerator(path, functionCall.location)) as NormalizedAst + dataflow = processDataflowFor(normalized.ast, { + ...data, + currentRequest: request, + environments: information.environments, + referenceChain: [...data.referenceChain, request] + }) } catch(e) { dataflowLogger.warn(`Failed to analyze sourced file ${requestString}, skipping: ${(e as Error).message}`) return information diff --git a/src/r-bridge/lang-4.x/ast/model/processing/decorate.ts b/src/r-bridge/lang-4.x/ast/model/processing/decorate.ts index f848b6c532..284ea7ee42 100644 --- a/src/r-bridge/lang-4.x/ast/model/processing/decorate.ts +++ b/src/r-bridge/lang-4.x/ast/model/processing/decorate.ts @@ -46,9 +46,9 @@ export function deterministicCountingIdGenerator(start = 0): () => NodeId { return () => `${id++}` } -export function fileNameDeterministicCountingIdGenerator(filename: string, start = 0): () => NodeId { +export function sourcedDeterministicCountingIdGenerator(path: string, location: SourceRange, start = 0): () => NodeId { let id = start - return () => `${filename}-${id++}` + return () => `${path}-${loc2Id(location)}-${id++}` } function loc2Id(loc: SourceRange) { diff --git a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts index afadd01dd8..be39ac450b 100644 --- a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts +++ b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts @@ -14,12 +14,12 @@ describe('source', withShell(shell => { setSourceProvider(requestProviderFromText(sources)) const envWithSimpleN = define( - {nodeId: 'simple-0', scope: 'local', name: 'N', used: 'always', kind: 'variable', definedAt: 'simple-2' }, + {nodeId: 'simple-1:1-1:6-0', scope: 'local', name: 'N', used: 'always', kind: 'variable', definedAt: 'simple-1:1-1:6-2' }, LocalScope, initializeCleanEnvironments() ) assertDataflow('simple source', shell, 'source("simple")\ncat(N)', new DataflowGraph() - .addVertex({ tag: 'variable-definition', id: 'simple-0', name: 'N', scope: LocalScope }) + .addVertex({ tag: 'variable-definition', id: 'simple-1:1-1:6-0', name: 'N', scope: LocalScope }) .addVertex({ tag: 'function-call', name: 'source', @@ -43,17 +43,12 @@ describe('source', withShell(shell => { .addVertex({tag: 'use', id: '6', name: `${UnnamedArgumentPrefix}6`, environment: envWithSimpleN}) .addEdge('3', '2', EdgeType.Argument, 'always') .addEdge('3', BuiltIn, EdgeType.Reads, 'always') - .addEdge('5', 'simple-0', EdgeType.Reads, 'always') + .addEdge('5', 'simple-1:1-1:6-0', EdgeType.Reads, 'always') .addEdge('6', '5', EdgeType.Reads, 'always') .addEdge('7', '6', EdgeType.Argument, 'always') .addEdge('7', BuiltIn, EdgeType.Reads, 'always') ) - const envWithLocalN = define( - {nodeId: '4', scope: 'local', name: 'N', used: 'always', kind: 'variable', definedAt: '6' }, - LocalScope, - initializeCleanEnvironments() - ) assertDataflow('multiple source', shell, 'source("simple")\nN <- 0\nsource("simple")\ncat(N)', new DataflowGraph() .addVertex({ tag: 'function-call', @@ -69,7 +64,7 @@ describe('source', withShell(shell => { tag: 'function-call', name: 'source', id: '10', - environment: envWithLocalN, + environment: define({nodeId: '4', scope: 'local', name: 'N', used: 'always', kind: 'variable', definedAt: '6' }, LocalScope, initializeCleanEnvironments()), args: [{ nodeId: '9', name: `${UnnamedArgumentPrefix}9`, scope: LocalScope, used: 'always' } ], @@ -79,18 +74,40 @@ describe('source', withShell(shell => { tag: 'function-call', name: 'cat', id: '14', - environment: envWithLocalN, + environment: define({nodeId: 'simple-3:1-3:6-0', scope: 'local', name: 'N', used: 'always', kind: 'variable', definedAt: 'simple-3:1-3:6-2' }, LocalScope, initializeCleanEnvironments()), args: [{ nodeId: '13', name: `${UnnamedArgumentPrefix}13`, scope: LocalScope, used: 'always' } ], when: 'always' }) - .addVertex({ tag: 'variable-definition', id: 'simple-0', name: 'N', scope: LocalScope }) + .addVertex({ + tag: 'variable-definition', + id: 'simple-3:1-3:6-0', + name: 'N', + scope: LocalScope, + environment: define({nodeId: '4', scope: 'local', name: 'N', used: 'always', kind: 'variable', definedAt: '6' }, LocalScope, initializeCleanEnvironments()) + }) + .addVertex({ tag: 'variable-definition', id: 'simple-1:1-1:6-0', name: 'N', scope: LocalScope }) .addVertex({ tag: 'variable-definition', id: '4', name: 'N', scope: LocalScope, environment: envWithSimpleN }) .addVertex({tag: 'use', id: '2', name: `${UnnamedArgumentPrefix}2` }) - .addVertex({tag: 'use', id: '9', name: `${UnnamedArgumentPrefix}9`, environment: envWithLocalN }) - .addVertex({tag: 'use', id: '13', name: `${UnnamedArgumentPrefix}13`, environment: envWithLocalN }) - .addVertex({tag: 'use', id: '12', name: 'N', environment: envWithLocalN }) + .addVertex({ + tag: 'use', + id: '9', + name: `${UnnamedArgumentPrefix}9`, + environment: define({nodeId: '4', scope: 'local', name: 'N', used: 'always', kind: 'variable', definedAt: '6' }, LocalScope, initializeCleanEnvironments()) + }) + .addVertex({ + tag: 'use', + id: '13', + name: `${UnnamedArgumentPrefix}13`, + environment: define({nodeId: 'simple-3:1-3:6-0', scope: 'local', name: 'N', used: 'always', kind: 'variable', definedAt: 'simple-3:1-3:6-2' }, LocalScope, initializeCleanEnvironments()) + }) + .addVertex({ + tag: 'use', + id: '12', + name: 'N', + environment: define({nodeId: 'simple-3:1-3:6-0', scope: 'local', name: 'N', used: 'always', kind: 'variable', definedAt: 'simple-3:1-3:6-2' }, LocalScope, initializeCleanEnvironments()) + }) .addEdge('3', '10', EdgeType.SameReadRead, 'always') .addEdge('3', '2', EdgeType.Argument, 'always') .addEdge('14', '13', EdgeType.Argument, 'always') @@ -99,12 +116,18 @@ describe('source', withShell(shell => { .addEdge('10', BuiltIn, EdgeType.Reads, 'always') .addEdge('14', BuiltIn, EdgeType.Reads, 'always') .addEdge('13', '12', EdgeType.Reads, 'always') - .addEdge('12', '4', EdgeType.Reads, 'always') - .addEdge('4', 'simple-0', EdgeType.SameDefDef, 'always') + .addEdge('12', 'simple-3:1-3:6-0', EdgeType.Reads, 'always') + .addEdge('simple-3:1-3:6-0', '4', EdgeType.SameDefDef, 'always') + .addEdge('4', 'simple-1:1-1:6-0', EdgeType.SameDefDef, 'always') ) + const envWithConditionalN = define( + {nodeId: 'simple-1:10-1:15-0', scope: 'local', name: 'N', used: 'always', kind: 'variable', definedAt: 'simple-1:10-1:15-2' }, + LocalScope, + initializeCleanEnvironments() + ) assertDataflow('conditional', shell, 'if (x) { source("simple") }\ncat(N)', new DataflowGraph() - .addVertex({ tag: 'variable-definition', id: 'simple-0', name: 'N', scope: LocalScope }) + .addVertex({ tag: 'variable-definition', id: 'simple-1:10-1:15-0', name: 'N', scope: LocalScope }) .addVertex({ tag: 'function-call', name: 'source', @@ -119,18 +142,18 @@ describe('source', withShell(shell => { tag: 'function-call', name: 'cat', id: '10', - environment: envWithSimpleN, + environment: envWithConditionalN, args: [{ nodeId: '9', name: `${UnnamedArgumentPrefix}9`, scope: LocalScope, used: 'always' }] }) .addVertex({tag: 'use', id: '0', name: 'x', scope: LocalScope}) - .addVertex({tag: 'use', id: '8', name: 'N', environment: envWithSimpleN}) + .addVertex({tag: 'use', id: '8', name: 'N', environment: envWithConditionalN}) .addVertex({tag: 'use', id: '3', name: `${UnnamedArgumentPrefix}3`}) - .addVertex({tag: 'use', id: '9', name: `${UnnamedArgumentPrefix}9`, environment: envWithSimpleN}) + .addVertex({tag: 'use', id: '9', name: `${UnnamedArgumentPrefix}9`, environment: envWithConditionalN}) .addEdge('4', '3', EdgeType.Argument, 'always') .addEdge('4', BuiltIn, EdgeType.Reads, 'maybe') - .addEdge('8', 'simple-0', EdgeType.Reads, 'always') + .addEdge('8', 'simple-1:10-1:15-0', EdgeType.Reads, 'always') .addEdge('9', '8', EdgeType.Reads, 'always') .addEdge('10', '9', EdgeType.Argument, 'always') .addEdge('10', BuiltIn, EdgeType.Reads, 'always') @@ -157,6 +180,7 @@ describe('source', withShell(shell => { LocalScope, initializeCleanEnvironments() ) + const recursive2Prefix = 'recursive2-2:1-2:6-' assertDataflow('recursive source', shell, sources.recursive1, new DataflowGraph() .addVertex({ tag: 'function-call', @@ -171,36 +195,36 @@ describe('source', withShell(shell => { .addVertex({ tag: 'function-call', name: 'source', - id: 'recursive2-7', + id: `${recursive2Prefix}7`, environment: envWithX, args: [{ - nodeId: 'recursive2-6', name: `${UnnamedArgumentPrefix}recursive2-6`, scope: LocalScope, used: 'always' } + nodeId: `${recursive2Prefix}6`, name: `${UnnamedArgumentPrefix}${recursive2Prefix}6`, scope: LocalScope, used: 'always' } ], when: 'always' }) .addVertex({ tag: 'function-call', name: 'cat', - id: 'recursive2-3', + id: `${recursive2Prefix}3`, environment: envWithX, args: [{ - nodeId: 'recursive2-2', name: `${UnnamedArgumentPrefix}recursive2-2`, scope: LocalScope, used: 'always' } + nodeId: `${recursive2Prefix}2`, name: `${UnnamedArgumentPrefix}${recursive2Prefix}2`, scope: LocalScope, used: 'always' } ], when: 'always' }) .addVertex({ tag: 'variable-definition', id: '0', name: 'x', scope: LocalScope }) .addVertex({tag: 'use', id: '5', name: `${UnnamedArgumentPrefix}5`, environment: envWithX }) - .addVertex({tag: 'use', id: 'recursive2-6', name: `${UnnamedArgumentPrefix}recursive2-6`, environment: envWithX }) - .addVertex({tag: 'use', id: 'recursive2-2', name: `${UnnamedArgumentPrefix}recursive2-2`, environment: envWithX }) - .addVertex({tag: 'use', id: 'recursive2-1', name: 'x', environment: envWithX }) + .addVertex({tag: 'use', id: `${recursive2Prefix}6`, name: `${UnnamedArgumentPrefix}${recursive2Prefix}6`, environment: envWithX }) + .addVertex({tag: 'use', id: `${recursive2Prefix}2`, name: `${UnnamedArgumentPrefix}${recursive2Prefix}2`, environment: envWithX }) + .addVertex({tag: 'use', id: `${recursive2Prefix}1`, name: 'x', environment: envWithX }) .addEdge('6', '5', EdgeType.Argument, 'always') .addEdge('6', BuiltIn, EdgeType.Reads, 'always') - .addEdge('recursive2-3', BuiltIn, EdgeType.Reads, 'always') - .addEdge('recursive2-3', 'recursive2-2', EdgeType.Argument, 'always') - .addEdge('recursive2-2', 'recursive2-1', EdgeType.Reads, 'always') - .addEdge('recursive2-1', '0', EdgeType.Reads, 'always') - .addEdge('recursive2-7', 'recursive2-6', EdgeType.Argument, 'always') - .addEdge('recursive2-7', BuiltIn, EdgeType.Reads, 'always') + .addEdge(`${recursive2Prefix}3`, BuiltIn, EdgeType.Reads, 'always') + .addEdge(`${recursive2Prefix}3`, `${recursive2Prefix}2`, EdgeType.Argument, 'always') + .addEdge(`${recursive2Prefix}2`, `${recursive2Prefix}1`, EdgeType.Reads, 'always') + .addEdge(`${recursive2Prefix}1`, '0', EdgeType.Reads, 'always') + .addEdge(`${recursive2Prefix}7`, `${recursive2Prefix}6`, EdgeType.Argument, 'always') + .addEdge(`${recursive2Prefix}7`, BuiltIn, EdgeType.Reads, 'always') ) // we currently don't support (and ignore) source calls with non-constant arguments! From 03b46183ef1e30d3b69d72027d3789090b2224bd Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Wed, 7 Feb 2024 10:45:37 +0100 Subject: [PATCH 44/46] refactor: resolve review comments --- src/dataflow/extractor.ts | 3 +- .../process/functions/function-call.ts | 4 +- .../internal/process/functions/source.ts | 78 +++++++++++-------- src/dataflow/processor.ts | 9 ++- src/r-bridge/retriever.ts | 8 ++ .../functions/source-tests.ts | 30 +++---- 6 files changed, 80 insertions(+), 52 deletions(-) diff --git a/src/dataflow/extractor.ts b/src/dataflow/extractor.ts index 03b1bddddc..eb82896ef3 100644 --- a/src/dataflow/extractor.ts +++ b/src/dataflow/extractor.ts @@ -1,4 +1,5 @@ import type {NormalizedAst, ParentInformation, RAssignmentOp, RBinaryOp, RParseRequest} from '../r-bridge' +import { requestFingerprint} from '../r-bridge' import { RType } from '../r-bridge' import type { DataflowInformation } from './internal/info' import type { DataflowProcessorInformation, DataflowProcessors} from './processor' @@ -55,7 +56,7 @@ export function produceDataFlowGraph(request: RParseRequest, ast: Nor environments: initializeCleanEnvironments(), processors: processors as DataflowProcessors, currentRequest: request, - referenceChain: [request] + referenceChain: [requestFingerprint(request)] }) } diff --git a/src/dataflow/internal/process/functions/function-call.ts b/src/dataflow/internal/process/functions/function-call.ts index 82bff3634c..e3a9305180 100644 --- a/src/dataflow/internal/process/functions/function-call.ts +++ b/src/dataflow/internal/process/functions/function-call.ts @@ -117,9 +117,9 @@ export function processFunctionCall(functionCall: RFunctionCall d.kind == 'built-in-function') + if(definitions === undefined) { + return false + } + // fail if there are multiple definitions because then we must treat the complete import as a maybe because it might do something different + if(definitions.length !== 1) { + return false + } + const def = definitions[0] + return def.name == 'source' && def.kind == 'built-in-function' } export function processSourceCall(functionCall: RFunctionCall, data: DataflowProcessorInformation, information: DataflowInformation): DataflowInformation { const sourceFile = functionCall.arguments[0] as RArgument | undefined if(sourceFile?.value?.type == RType.String) { - const executor = new RShellExecutor() const path = removeTokenMapQuotationMarks(sourceFile.lexeme) const request = sourceProvider.createRequest(path) // check if the sourced file has already been dataflow analyzed, and if so, skip it - const requestString = JSON.stringify(request) - if(data.referenceChain.some(r => JSON.stringify(r) == requestString)) { - dataflowLogger.info(`Found loop in dataflow analysis for ${requestString}: ${JSON.stringify(data.referenceChain)}, skipping further dataflow analysis`) - return information - } - - // parse, normalize and dataflow the sourced file - let normalized: NormalizedAst - let dataflow: DataflowInformation - try { - const parsed = executeSingleSubStep('parse', request, executor) as string - normalized = executeSingleSubStep('normalize', parsed, executor.getTokenMap(), undefined, sourcedDeterministicCountingIdGenerator(path, functionCall.location)) as NormalizedAst - dataflow = processDataflowFor(normalized.ast, { - ...data, - currentRequest: request, - environments: information.environments, - referenceChain: [...data.referenceChain, request] - }) - } catch(e) { - dataflowLogger.warn(`Failed to analyze sourced file ${requestString}, skipping: ${(e as Error).message}`) + if(data.referenceChain.includes(requestFingerprint(request))) { + dataflowLogger.info(`Found loop in dataflow analysis for ${JSON.stringify(request)}: ${JSON.stringify(data.referenceChain)}, skipping further dataflow analysis`) return information } - // update our graph with the sourced file's information - const newInformation = {...information} - newInformation.environments = overwriteEnvironments(information.environments, dataflow.environments) - newInformation.graph.mergeWith(dataflow.graph) - // this can be improved, see issue #628 - for(const [k, v] of normalized.idMap) - data.completeAst.idMap.set(k, v) - return newInformation + return sourceRequest(request, data, information, sourcedDeterministicCountingIdGenerator(path, functionCall.location)) } else { dataflowLogger.info(`Non-constant argument ${JSON.stringify(sourceFile)} for source is currently not supported, skipping`) return information } } + +export function sourceRequest(request: RParseRequest, data: DataflowProcessorInformation, information: DataflowInformation, getId: IdGenerator): DataflowInformation { + const executor = new RShellExecutor() + + // parse, normalize and dataflow the sourced file + let normalized: NormalizedAst + let dataflow: DataflowInformation + try { + const parsed = executeSingleSubStep('parse', request, executor) as string + normalized = executeSingleSubStep('normalize', parsed, executor.getTokenMap(), undefined, getId) as NormalizedAst + dataflow = processDataflowFor(normalized.ast, { + ...data, + currentRequest: request, + environments: information.environments, + referenceChain: [...data.referenceChain, requestFingerprint(request)] + }) + } catch(e) { + dataflowLogger.warn(`Failed to analyze sourced file ${JSON.stringify(request)}, skipping: ${(e as Error).message}`) + return information + } + + // update our graph with the sourced file's information + const newInformation = {...information} + newInformation.environments = overwriteEnvironments(information.environments, dataflow.environments) + newInformation.graph.mergeWith(dataflow.graph) + // this can be improved, see issue #628 + for(const [k, v] of normalized.idMap) { + data.completeAst.idMap.set(k, v) + } + return newInformation +} diff --git a/src/dataflow/processor.ts b/src/dataflow/processor.ts index 90760e9f7b..8e9497129b 100644 --- a/src/dataflow/processor.ts +++ b/src/dataflow/processor.ts @@ -27,8 +27,15 @@ export interface DataflowProcessorInformation { * Other processors to be called by the given functions */ readonly processors: DataflowProcessors + /** + * The {@link RParseRequest} that is currently being parsed + */ readonly currentRequest: RParseRequest - readonly referenceChain: RParseRequest[] + /** + * The chain of {@link RParseRequest} fingerprints ({@link requestFingerprint}) that lead to the {@link currentRequest}. + * The most recent (last) entry is expected to always be the {@link currentRequest}. + */ + readonly referenceChain: string[] } export type DataflowProcessor> = (node: NodeType, data: DataflowProcessorInformation) => DataflowInformation diff --git a/src/r-bridge/retriever.ts b/src/r-bridge/retriever.ts index 08ee9bdecc..b65b92e7d1 100644 --- a/src/r-bridge/retriever.ts +++ b/src/r-bridge/retriever.ts @@ -26,6 +26,9 @@ interface RParseRequestBase { ensurePackageInstalled: boolean } +/** + * A provider for an {@link RParseRequest} that can be used, for example, to override source file parsing behavior in tests + */ export interface RParseRequestProvider { createRequest(path: string): RParseRequest } @@ -73,6 +76,11 @@ export function requestProviderFromText(text: {[path: string]: string}): RParseR } } +export function requestFingerprint(request: RParseRequest): string { + // eventually we should do this properly, like using a hashing function etc. + return JSON.stringify(request) +} + const ErrorMarker = 'err' /** diff --git a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts index be39ac450b..a4c519b706 100644 --- a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts +++ b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts @@ -1,6 +1,6 @@ import {assertDataflow, withShell} from '../../../_helper/shell' import {setSourceProvider} from '../../../../../src/dataflow/internal/process/functions/source' -import {BuiltIn, DataflowGraph, EdgeType, initializeCleanEnvironments, requestProviderFromText} from '../../../../../src' +import {BuiltIn, DataflowGraph, EdgeType, initializeCleanEnvironments, requestProviderFromText, sourcedDeterministicCountingIdGenerator} from '../../../../../src' import {LocalScope} from '../../../../../src/dataflow/environments/scopes' import {UnnamedArgumentPrefix} from '../../../../../src/dataflow/internal/process/functions/argument' import {define} from '../../../../../src/dataflow/environments' @@ -175,12 +175,12 @@ describe('source', withShell(shell => { .addEdge('3', BuiltIn, EdgeType.Reads, 'always') ) + const recursive2Id = (id: number) => sourcedDeterministicCountingIdGenerator('recursive2', {start: {line: 2, column: 1}, end: {line: 2, column: 6}}, id)() const envWithX = define( {nodeId: '0', scope: 'local', name: 'x', used: 'always', kind: 'variable', definedAt: '2' }, LocalScope, initializeCleanEnvironments() ) - const recursive2Prefix = 'recursive2-2:1-2:6-' assertDataflow('recursive source', shell, sources.recursive1, new DataflowGraph() .addVertex({ tag: 'function-call', @@ -195,36 +195,36 @@ describe('source', withShell(shell => { .addVertex({ tag: 'function-call', name: 'source', - id: `${recursive2Prefix}7`, + id: recursive2Id(7), environment: envWithX, args: [{ - nodeId: `${recursive2Prefix}6`, name: `${UnnamedArgumentPrefix}${recursive2Prefix}6`, scope: LocalScope, used: 'always' } + nodeId: recursive2Id(6), name: `${UnnamedArgumentPrefix}${recursive2Id(6)}`, scope: LocalScope, used: 'always' } ], when: 'always' }) .addVertex({ tag: 'function-call', name: 'cat', - id: `${recursive2Prefix}3`, + id: recursive2Id(3), environment: envWithX, args: [{ - nodeId: `${recursive2Prefix}2`, name: `${UnnamedArgumentPrefix}${recursive2Prefix}2`, scope: LocalScope, used: 'always' } + nodeId: recursive2Id(2), name: `${UnnamedArgumentPrefix}${recursive2Id(2)}`, scope: LocalScope, used: 'always' } ], when: 'always' }) .addVertex({ tag: 'variable-definition', id: '0', name: 'x', scope: LocalScope }) .addVertex({tag: 'use', id: '5', name: `${UnnamedArgumentPrefix}5`, environment: envWithX }) - .addVertex({tag: 'use', id: `${recursive2Prefix}6`, name: `${UnnamedArgumentPrefix}${recursive2Prefix}6`, environment: envWithX }) - .addVertex({tag: 'use', id: `${recursive2Prefix}2`, name: `${UnnamedArgumentPrefix}${recursive2Prefix}2`, environment: envWithX }) - .addVertex({tag: 'use', id: `${recursive2Prefix}1`, name: 'x', environment: envWithX }) + .addVertex({tag: 'use', id: recursive2Id(6), name: `${UnnamedArgumentPrefix}${recursive2Id(6)}`, environment: envWithX }) + .addVertex({tag: 'use', id: recursive2Id(2), name: `${UnnamedArgumentPrefix}${recursive2Id(2)}`, environment: envWithX }) + .addVertex({tag: 'use', id: recursive2Id(1), name: 'x', environment: envWithX }) .addEdge('6', '5', EdgeType.Argument, 'always') .addEdge('6', BuiltIn, EdgeType.Reads, 'always') - .addEdge(`${recursive2Prefix}3`, BuiltIn, EdgeType.Reads, 'always') - .addEdge(`${recursive2Prefix}3`, `${recursive2Prefix}2`, EdgeType.Argument, 'always') - .addEdge(`${recursive2Prefix}2`, `${recursive2Prefix}1`, EdgeType.Reads, 'always') - .addEdge(`${recursive2Prefix}1`, '0', EdgeType.Reads, 'always') - .addEdge(`${recursive2Prefix}7`, `${recursive2Prefix}6`, EdgeType.Argument, 'always') - .addEdge(`${recursive2Prefix}7`, BuiltIn, EdgeType.Reads, 'always') + .addEdge(recursive2Id(3), BuiltIn, EdgeType.Reads, 'always') + .addEdge(recursive2Id(3), recursive2Id(2), EdgeType.Argument, 'always') + .addEdge(recursive2Id(2), recursive2Id(1), EdgeType.Reads, 'always') + .addEdge(recursive2Id(1), '0', EdgeType.Reads, 'always') + .addEdge(recursive2Id(7), recursive2Id(6), EdgeType.Argument, 'always') + .addEdge(recursive2Id(7), BuiltIn, EdgeType.Reads, 'always') ) // we currently don't support (and ignore) source calls with non-constant arguments! From d1ea24ad56362ce1c45fc988243c95a31dad5a9e Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Wed, 7 Feb 2024 13:43:52 +0100 Subject: [PATCH 45/46] test: reset the source provider to the default value after each describe --- test/functionality/dataflow/dataflow.spec.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/functionality/dataflow/dataflow.spec.ts b/test/functionality/dataflow/dataflow.spec.ts index b13e6d891d..c14a5b687a 100644 --- a/test/functionality/dataflow/dataflow.spec.ts +++ b/test/functionality/dataflow/dataflow.spec.ts @@ -1,5 +1,7 @@ import { requireAllTestsInFolder } from '../_helper/collect-tests' import path from 'path' +import {setSourceProvider} from '../../../src/dataflow/internal/process/functions/source' +import {requestProviderFromFile} from '../../../src' describe('Dataflow', () => { describe('Environments', () => @@ -10,5 +12,8 @@ describe('Dataflow', () => { requireAllTestsInFolder(path.join(__dirname, 'graph')) ) + // reset the source provider back to the default value after each "describe" section + after(() => setSourceProvider(requestProviderFromFile())) + require('./processing-of-elements/processing-of-elements') }) From b5ddd9a8a6ce80553f110b022e0f41573e11aea5 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Wed, 7 Feb 2024 13:50:23 +0100 Subject: [PATCH 46/46] test-fix: reset the source provider in the source describe instead --- test/functionality/dataflow/dataflow.spec.ts | 5 ----- .../processing-of-elements/functions/source-tests.ts | 5 ++++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/test/functionality/dataflow/dataflow.spec.ts b/test/functionality/dataflow/dataflow.spec.ts index c14a5b687a..b13e6d891d 100644 --- a/test/functionality/dataflow/dataflow.spec.ts +++ b/test/functionality/dataflow/dataflow.spec.ts @@ -1,7 +1,5 @@ import { requireAllTestsInFolder } from '../_helper/collect-tests' import path from 'path' -import {setSourceProvider} from '../../../src/dataflow/internal/process/functions/source' -import {requestProviderFromFile} from '../../../src' describe('Dataflow', () => { describe('Environments', () => @@ -12,8 +10,5 @@ describe('Dataflow', () => { requireAllTestsInFolder(path.join(__dirname, 'graph')) ) - // reset the source provider back to the default value after each "describe" section - after(() => setSourceProvider(requestProviderFromFile())) - require('./processing-of-elements/processing-of-elements') }) diff --git a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts index a4c519b706..40c8b390f4 100644 --- a/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts +++ b/test/functionality/dataflow/processing-of-elements/functions/source-tests.ts @@ -1,11 +1,14 @@ import {assertDataflow, withShell} from '../../../_helper/shell' import {setSourceProvider} from '../../../../../src/dataflow/internal/process/functions/source' -import {BuiltIn, DataflowGraph, EdgeType, initializeCleanEnvironments, requestProviderFromText, sourcedDeterministicCountingIdGenerator} from '../../../../../src' +import {BuiltIn, DataflowGraph, EdgeType, initializeCleanEnvironments, requestProviderFromFile, requestProviderFromText, sourcedDeterministicCountingIdGenerator} from '../../../../../src' import {LocalScope} from '../../../../../src/dataflow/environments/scopes' import {UnnamedArgumentPrefix} from '../../../../../src/dataflow/internal/process/functions/argument' import {define} from '../../../../../src/dataflow/environments' describe('source', withShell(shell => { + // reset the source provider back to the default value after our tests + after(() => setSourceProvider(requestProviderFromFile())) + const sources = { simple: 'N <- 9', recursive1: 'x <- 1\nsource("recursive2")',