Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Query API] Provide Access to Clustering #1059

Merged
merged 7 commits into from
Oct 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion src/cli/repl/commands/repl-query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import { graphToMermaidUrl } from '../../../util/mermaid/dfg';
import { normalizedAstToMermaidUrl } from '../../../util/mermaid/ast';

import { printAsMs } from '../../../util/time';
import { textWithTooltip } from '../../../documentation/doc-util/doc-hover-over';

async function getDataflow(shell: RShell, remainingLine: string) {
return await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, {
Expand Down Expand Up @@ -131,7 +132,7 @@ function summarizeIdsIfTooLong(ids: readonly NodeId[]) {
if(i < ids.length) {
acc += '... (see JSON below)';
}
return acc;
return textWithTooltip(acc, JSON.stringify(ids));
}

export function asciiSummaryOfQueryResult(formatter: OutputFormatter, totalInMs: number, results: QueryResults<SupportedQueryTypes>, processed: PipelineOutput<typeof DEFAULT_DATAFLOW_PIPELINE>): string {
Expand Down Expand Up @@ -161,6 +162,17 @@ export function asciiSummaryOfQueryResult(formatter: OutputFormatter, totalInMs:
result.push(`Query: ${bold(query, formatter)} (${printAsMs(out['.meta'].timing, 0)})`);
result.push(` ╰ [Normalized AST](${normalizedAstToMermaidUrl(out.normalized.ast)})`);
continue;
} else if(query === 'dataflow-cluster') {
const out = queryResults as QueryResults<'dataflow-cluster'>['dataflow-cluster'];
result.push(`Query: ${bold(query, formatter)} (${out['.meta'].timing.toFixed(0)}ms)`);
result.push(` ╰ Found ${out.clusters.length} cluster${out.clusters.length === 1 ? '': 's'}`);
for(const cluster of out.clusters) {
const unknownSideEffects = cluster.hasUnknownSideEffects ? '(has unknown side effect)' : '';
result.push(` ╰ ${unknownSideEffects} {${summarizeIdsIfTooLong(cluster.members)}} ([marked](${
graphToMermaidUrl(processed.dataflow.graph, false, new Set(cluster.members))
}))`);
}
continue;
}

result.push(`Query: ${bold(query, formatter)}`);
Expand Down
10 changes: 6 additions & 4 deletions src/documentation/doc-util/doc-dfg.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,12 @@ export interface PrintDataflowGraphOptions {
readonly codeOpen?: boolean;
readonly exposeResult?: boolean;
readonly switchCodeAndGraph?: boolean;
readonly hideEnvInMermaid?: boolean;
}

export async function printDfGraphForCode(shell: RShell, code: string, options: PrintDataflowGraphOptions & { exposeResult: true }): Promise<[string, PipelineOutput<typeof DEFAULT_DATAFLOW_PIPELINE>]>;
export async function printDfGraphForCode(shell: RShell, code: string, options?: PrintDataflowGraphOptions & { exposeResult?: false | undefined }): Promise<string>;
export async function printDfGraphForCode(shell: RShell, code: string, { mark, showCode = true, codeOpen = false, exposeResult, switchCodeAndGraph = false }: PrintDataflowGraphOptions = {}): Promise<string | [string, PipelineOutput<typeof DEFAULT_DATAFLOW_PIPELINE>]> {
export async function printDfGraphForCode(shell: RShell, code: string, { mark, showCode = true, codeOpen = false, exposeResult, switchCodeAndGraph = false, hideEnvInMermaid = false }: PrintDataflowGraphOptions = {}): Promise<string | [string, PipelineOutput<typeof DEFAULT_DATAFLOW_PIPELINE>]> {
const now = performance.now();
const result = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, {
shell,
Expand All @@ -48,7 +49,7 @@ export async function printDfGraphForCode(shell: RShell, code: string, { mark, s
guard(showCode, 'can not switch code and graph if code is not shown');
}

const metaInfo = `The analysis required _${printAsMs(duration)}_ (including parsing and normalization) within the generation environment.`;
const metaInfo = `The analysis required _${printAsMs(duration)}_ (incl. parse and normalize) within the generation environment.`;
const dfGraph = printDfGraph(result.dataflow.graph, mark);
let resultText = '\n\n';

Expand All @@ -73,8 +74,9 @@ ${switchCodeAndGraph ? dfGraph : codeText}

\`\`\`
${graphToMermaid({
graph: result.dataflow.graph,
prefix: 'flowchart LR'
graph: result.dataflow.graph,
prefix: 'flowchart LR',
includeEnvironments: !hideEnvInMermaid
}).string}
\`\`\`

Expand Down
2 changes: 1 addition & 1 deletion src/documentation/doc-util/doc-query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ function linkify(name: string) {
}

export function tocForQueryType(type: 'active' | 'virtual') {
const queries = RegisteredQueries[type];
const queries = [...RegisteredQueries[type].entries()].sort(([,{ name: a }], [, { name: b }]) => a.localeCompare(b));
const result: string[] = [];
for(const [id, { name, shortDescription }] of queries) {
result.push(`1. [${name}](#${linkify(name)}) (\`${id}\`):\\\n ${shortDescription}`);
Expand Down
22 changes: 22 additions & 0 deletions src/documentation/print-query-wiki.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import { codeBlock } from './doc-util/doc-code';
import { executeDataflowQuery } from '../queries/catalog/dataflow-query/dataflow-query-executor';
import { executeIdMapQuery } from '../queries/catalog/id-map-query/id-map-query-executor';
import { executeNormalizedAstQuery } from '../queries/catalog/normalized-ast-query/normalized-ast-query-executor';
import { executeDataflowClusterQuery } from '../queries/catalog/cluster-query/cluster-query-executor';


registerQueryDocumentation('call-context', {
Expand Down Expand Up @@ -130,6 +131,27 @@ ${
}
});

registerQueryDocumentation('dataflow-cluster', {
name: 'Dataflow Cluster Query',
type: 'active',
shortDescription: 'Calculates and returns all the clusters present in the dataflow graph.',
functionName: executeDataflowClusterQuery.name,
functionFile: '../queries/catalog/cluster-query/cluster-query-executor.ts',
buildExplanation: async(shell: RShell) => {
return `
This query automatically calculates clusters in flowR's dataflow graph and returns a list of all clusters
found.

Using the example code from above, the following query returns all clusters:
${
await showQuery(shell, exampleQueryCode, [{
type: 'dataflow-cluster'
}], { showCode: false })
}
`;
}
});

registerQueryDocumentation('id-map', {
name: 'Id-Map Query',
type: 'active',
Expand Down
20 changes: 20 additions & 0 deletions src/queries/catalog/cluster-query/cluster-query-executor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import type { BasicQueryData } from '../../query';
import { log } from '../../../util/log';
import type { DataflowClusterQuery, DataflowClusterQueryResult } from './cluster-query-format';
import { findAllClusters } from '../../../dataflow/cluster';


export function executeDataflowClusterQuery({ graph }: BasicQueryData, queries: readonly DataflowClusterQuery[]): DataflowClusterQueryResult {
if(queries.length !== 1) {
log.warn('The dataflow cluster query expects only up to one query, but got', queries.length);
}

const start = Date.now();
const clusters = findAllClusters(graph);
return {
'.meta': {
timing: Date.now() - start
},
clusters
};
}
14 changes: 14 additions & 0 deletions src/queries/catalog/cluster-query/cluster-query-format.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import type { BaseQueryFormat, BaseQueryResult } from '../../base-query-format';
import type { DataflowGraphClusters } from '../../../dataflow/cluster';

/**
* Calculates and returns all clusters encountered in the dataflow graph.
*/
export interface DataflowClusterQuery extends BaseQueryFormat {
readonly type: 'dataflow-cluster';
}

export interface DataflowClusterQueryResult extends BaseQueryResult {
/** All clusters found in the respective dataflow */
readonly clusters: DataflowGraphClusters;
}
8 changes: 7 additions & 1 deletion src/queries/query-schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,17 @@ export const NormalizedAstQuerySchema = Joi.object({
type: Joi.string().valid('normalized-ast').required().description('The type of the query.'),
}).description('The normalized AST query simply returns the normalized AST, there is no need to pass it multiple times!');

export const DataflowClusterQuerySchema = Joi.object({
type: Joi.string().valid('dataflow-cluster').required().description('The type of the query.'),
}).description('The cluster query calculates and returns all clusters in the dataflow graph.');


export const SupportedQueriesSchema = Joi.alternatives(
CallContextQuerySchema,
DataflowQuerySchema,
IdMapQuerySchema,
NormalizedAstQuerySchema
NormalizedAstQuerySchema,
DataflowClusterQuerySchema
).description('Supported queries');

export const CompoundQuerySchema = Joi.object({
Expand Down
13 changes: 8 additions & 5 deletions src/queries/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ import { executeIdMapQuery } from './catalog/id-map-query/id-map-query-executor'
import type { IdMapQuery } from './catalog/id-map-query/id-map-query-format';
import { executeNormalizedAstQuery } from './catalog/normalized-ast-query/normalized-ast-query-executor';
import type { NormalizedAstQuery } from './catalog/normalized-ast-query/normalized-ast-query-format';
import type { DataflowClusterQuery } from './catalog/cluster-query/cluster-query-format';
import { executeDataflowClusterQuery } from './catalog/cluster-query/cluster-query-executor';

export type Query = CallContextQuery | DataflowQuery | NormalizedAstQuery | IdMapQuery;
export type Query = CallContextQuery | DataflowQuery | NormalizedAstQuery | IdMapQuery | DataflowClusterQuery;

export type QueryArgumentsWithType<QueryType extends BaseQueryFormat['type']> = Query & { type: QueryType };

Expand All @@ -32,10 +34,11 @@ type SupportedQueries = {
}

export const SupportedQueries = {
'call-context': executeCallContextQueries,
'dataflow': executeDataflowQuery,
'id-map': executeIdMapQuery,
'normalized-ast': executeNormalizedAstQuery
'call-context': executeCallContextQueries,
'dataflow': executeDataflowQuery,
'id-map': executeIdMapQuery,
'normalized-ast': executeNormalizedAstQuery,
'dataflow-cluster': executeDataflowClusterQuery,
} as const satisfies SupportedQueries;

export type SupportedQueryTypes = keyof typeof SupportedQueries;
Expand Down
2 changes: 1 addition & 1 deletion src/statistics/summarizer/post-process/clusterer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export type ClusterValueInfoMap = DefaultMap<string, ContextsWithCount>
export interface ClusterReport extends MergeableRecord {
/** The input file which has been clustered */
readonly filepath: string
/** Maps each context encountered (i.e., every file which contains something associated with the feature) to a unique id, used in the {@link ClusterReport#valueInfoMap | valueInfoMap}. */
/** Maps each context encountered (i.e., every file that contains something associated with the feature) to a unique id, used in the {@link ClusterReport#valueInfoMap|valueInfoMap}. */
contextIdMap: ClusterContextIdMap
/**
* Counts which contexts contained which values of a feature.
Expand Down
10 changes: 9 additions & 1 deletion src/util/ansi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,15 @@ export const markdownFormatter: OutputFormatter = new class implements OutputFor
}

input = input.replaceAll(/\\/g, '\\\\');
return input.replaceAll(/\n/g, '\\\n').replaceAll(/ /g, '&nbsp;');
let source = input.replaceAll(/\n/g, '\\\n');
/* repeatedly replace all spaces but only at the beginning of a line */
let target = source;
do{
source = target;
/* or replace back to front */
target = source.replace(/^(?<leading>(&nbsp;)*) /m, '$<leading>&nbsp;');
} while(target !== source);
return target;
}

public getFormatString(_options?: FormatOptions): string {
Expand Down
14 changes: 14 additions & 0 deletions test/functionality/dataflow/query/dataflow-cluster-query-tests.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import { assertQuery } from '../../_helper/query';
import { label } from '../../_helper/label';
import { withShell } from '../../_helper/shell';
import { findAllClusters } from '../../../../src/dataflow/cluster';
import type { DataflowClusterQuery } from '../../../../src/queries/catalog/cluster-query/cluster-query-format';

describe('Dataflow Cluster Query', withShell(shell => {
function testQuery(name: string, code: string, query: readonly DataflowClusterQuery[]) {
assertQuery(label(name), shell, code, query, ({ dataflow }) => ({ 'dataflow-cluster': { clusters: findAllClusters(dataflow.graph) } }));
}

testQuery('Single Expression', 'x + 1', [{ type: 'dataflow-cluster' }]);
testQuery('Multiple Queries', 'x + 1', [{ type: 'dataflow-cluster' }, { type: 'dataflow-cluster' }, { type: 'dataflow-cluster' }]);
}));
Loading
Loading