Skip to content

Commit

Permalink
[Query API] Provide Access to Clustering (#1059)
Browse files Browse the repository at this point in the history
* feat(cluster-query): basic testless implementation

* doc(cluster-query): basic doc and test setup

* doc(dataflow-query): dataflow-cluster query

* refactor(wiki): only escape leading ansi spaces

* refactor(query-wiki): simplify code print
  • Loading branch information
EagleoutIce authored Oct 12, 2024
1 parent 2fb47ff commit 2a38978
Show file tree
Hide file tree
Showing 12 changed files with 363 additions and 96 deletions.
14 changes: 13 additions & 1 deletion src/cli/repl/commands/repl-query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import { graphToMermaidUrl } from '../../../util/mermaid/dfg';
import { normalizedAstToMermaidUrl } from '../../../util/mermaid/ast';

import { printAsMs } from '../../../util/time';
import { textWithTooltip } from '../../../documentation/doc-util/doc-hover-over';

async function getDataflow(shell: RShell, remainingLine: string) {
return await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, {
Expand Down Expand Up @@ -131,7 +132,7 @@ function summarizeIdsIfTooLong(ids: readonly NodeId[]) {
if(i < ids.length) {
acc += '... (see JSON below)';
}
return acc;
return textWithTooltip(acc, JSON.stringify(ids));
}

export function asciiSummaryOfQueryResult(formatter: OutputFormatter, totalInMs: number, results: QueryResults<SupportedQueryTypes>, processed: PipelineOutput<typeof DEFAULT_DATAFLOW_PIPELINE>): string {
Expand Down Expand Up @@ -161,6 +162,17 @@ export function asciiSummaryOfQueryResult(formatter: OutputFormatter, totalInMs:
result.push(`Query: ${bold(query, formatter)} (${printAsMs(out['.meta'].timing, 0)})`);
result.push(` ╰ [Normalized AST](${normalizedAstToMermaidUrl(out.normalized.ast)})`);
continue;
} else if(query === 'dataflow-cluster') {
const out = queryResults as QueryResults<'dataflow-cluster'>['dataflow-cluster'];
result.push(`Query: ${bold(query, formatter)} (${out['.meta'].timing.toFixed(0)}ms)`);
result.push(` ╰ Found ${out.clusters.length} cluster${out.clusters.length === 1 ? '': 's'}`);
for(const cluster of out.clusters) {
const unknownSideEffects = cluster.hasUnknownSideEffects ? '(has unknown side effect)' : '';
result.push(` ╰ ${unknownSideEffects} {${summarizeIdsIfTooLong(cluster.members)}} ([marked](${
graphToMermaidUrl(processed.dataflow.graph, false, new Set(cluster.members))
}))`);
}
continue;
}

result.push(`Query: ${bold(query, formatter)}`);
Expand Down
10 changes: 6 additions & 4 deletions src/documentation/doc-util/doc-dfg.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,12 @@ export interface PrintDataflowGraphOptions {
readonly codeOpen?: boolean;
readonly exposeResult?: boolean;
readonly switchCodeAndGraph?: boolean;
readonly hideEnvInMermaid?: boolean;
}

export async function printDfGraphForCode(shell: RShell, code: string, options: PrintDataflowGraphOptions & { exposeResult: true }): Promise<[string, PipelineOutput<typeof DEFAULT_DATAFLOW_PIPELINE>]>;
export async function printDfGraphForCode(shell: RShell, code: string, options?: PrintDataflowGraphOptions & { exposeResult?: false | undefined }): Promise<string>;
export async function printDfGraphForCode(shell: RShell, code: string, { mark, showCode = true, codeOpen = false, exposeResult, switchCodeAndGraph = false }: PrintDataflowGraphOptions = {}): Promise<string | [string, PipelineOutput<typeof DEFAULT_DATAFLOW_PIPELINE>]> {
export async function printDfGraphForCode(shell: RShell, code: string, { mark, showCode = true, codeOpen = false, exposeResult, switchCodeAndGraph = false, hideEnvInMermaid = false }: PrintDataflowGraphOptions = {}): Promise<string | [string, PipelineOutput<typeof DEFAULT_DATAFLOW_PIPELINE>]> {
const now = performance.now();
const result = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, {
shell,
Expand All @@ -48,7 +49,7 @@ export async function printDfGraphForCode(shell: RShell, code: string, { mark, s
guard(showCode, 'can not switch code and graph if code is not shown');
}

const metaInfo = `The analysis required _${printAsMs(duration)}_ (including parsing and normalization) within the generation environment.`;
const metaInfo = `The analysis required _${printAsMs(duration)}_ (incl. parse and normalize) within the generation environment.`;
const dfGraph = printDfGraph(result.dataflow.graph, mark);
let resultText = '\n\n';

Expand All @@ -73,8 +74,9 @@ ${switchCodeAndGraph ? dfGraph : codeText}
\`\`\`
${graphToMermaid({
graph: result.dataflow.graph,
prefix: 'flowchart LR'
graph: result.dataflow.graph,
prefix: 'flowchart LR',
includeEnvironments: !hideEnvInMermaid
}).string}
\`\`\`
Expand Down
2 changes: 1 addition & 1 deletion src/documentation/doc-util/doc-query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ function linkify(name: string) {
}

export function tocForQueryType(type: 'active' | 'virtual') {
const queries = RegisteredQueries[type];
const queries = [...RegisteredQueries[type].entries()].sort(([,{ name: a }], [, { name: b }]) => a.localeCompare(b));
const result: string[] = [];
for(const [id, { name, shortDescription }] of queries) {
result.push(`1. [${name}](#${linkify(name)}) (\`${id}\`):\\\n ${shortDescription}`);
Expand Down
22 changes: 22 additions & 0 deletions src/documentation/print-query-wiki.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import { codeBlock } from './doc-util/doc-code';
import { executeDataflowQuery } from '../queries/catalog/dataflow-query/dataflow-query-executor';
import { executeIdMapQuery } from '../queries/catalog/id-map-query/id-map-query-executor';
import { executeNormalizedAstQuery } from '../queries/catalog/normalized-ast-query/normalized-ast-query-executor';
import { executeDataflowClusterQuery } from '../queries/catalog/cluster-query/cluster-query-executor';


registerQueryDocumentation('call-context', {
Expand Down Expand Up @@ -130,6 +131,27 @@ ${
}
});

registerQueryDocumentation('dataflow-cluster', {
name: 'Dataflow Cluster Query',
type: 'active',
shortDescription: 'Calculates and returns all the clusters present in the dataflow graph.',
functionName: executeDataflowClusterQuery.name,
functionFile: '../queries/catalog/cluster-query/cluster-query-executor.ts',
buildExplanation: async(shell: RShell) => {
return `
This query automatically calculates clusters in flowR's dataflow graph and returns a list of all clusters
found.
Using the example code from above, the following query returns all clusters:
${
await showQuery(shell, exampleQueryCode, [{
type: 'dataflow-cluster'
}], { showCode: false })
}
`;
}
});

registerQueryDocumentation('id-map', {
name: 'Id-Map Query',
type: 'active',
Expand Down
20 changes: 20 additions & 0 deletions src/queries/catalog/cluster-query/cluster-query-executor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import type { BasicQueryData } from '../../query';
import { log } from '../../../util/log';
import type { DataflowClusterQuery, DataflowClusterQueryResult } from './cluster-query-format';
import { findAllClusters } from '../../../dataflow/cluster';


export function executeDataflowClusterQuery({ graph }: BasicQueryData, queries: readonly DataflowClusterQuery[]): DataflowClusterQueryResult {
if(queries.length !== 1) {
log.warn('The dataflow cluster query expects only up to one query, but got', queries.length);
}

const start = Date.now();
const clusters = findAllClusters(graph);
return {
'.meta': {
timing: Date.now() - start
},
clusters
};
}
14 changes: 14 additions & 0 deletions src/queries/catalog/cluster-query/cluster-query-format.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import type { BaseQueryFormat, BaseQueryResult } from '../../base-query-format';
import type { DataflowGraphClusters } from '../../../dataflow/cluster';

/**
* Calculates and returns all clusters encountered in the dataflow graph.
*/
export interface DataflowClusterQuery extends BaseQueryFormat {
readonly type: 'dataflow-cluster';
}

export interface DataflowClusterQueryResult extends BaseQueryResult {
/** All clusters found in the respective dataflow */
readonly clusters: DataflowGraphClusters;
}
8 changes: 7 additions & 1 deletion src/queries/query-schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,17 @@ export const NormalizedAstQuerySchema = Joi.object({
type: Joi.string().valid('normalized-ast').required().description('The type of the query.'),
}).description('The normalized AST query simply returns the normalized AST, there is no need to pass it multiple times!');

export const DataflowClusterQuerySchema = Joi.object({
type: Joi.string().valid('dataflow-cluster').required().description('The type of the query.'),
}).description('The cluster query calculates and returns all clusters in the dataflow graph.');


export const SupportedQueriesSchema = Joi.alternatives(
CallContextQuerySchema,
DataflowQuerySchema,
IdMapQuerySchema,
NormalizedAstQuerySchema
NormalizedAstQuerySchema,
DataflowClusterQuerySchema
).description('Supported queries');

export const CompoundQuerySchema = Joi.object({
Expand Down
13 changes: 8 additions & 5 deletions src/queries/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ import { executeIdMapQuery } from './catalog/id-map-query/id-map-query-executor'
import type { IdMapQuery } from './catalog/id-map-query/id-map-query-format';
import { executeNormalizedAstQuery } from './catalog/normalized-ast-query/normalized-ast-query-executor';
import type { NormalizedAstQuery } from './catalog/normalized-ast-query/normalized-ast-query-format';
import type { DataflowClusterQuery } from './catalog/cluster-query/cluster-query-format';
import { executeDataflowClusterQuery } from './catalog/cluster-query/cluster-query-executor';

export type Query = CallContextQuery | DataflowQuery | NormalizedAstQuery | IdMapQuery;
export type Query = CallContextQuery | DataflowQuery | NormalizedAstQuery | IdMapQuery | DataflowClusterQuery;

export type QueryArgumentsWithType<QueryType extends BaseQueryFormat['type']> = Query & { type: QueryType };

Expand All @@ -32,10 +34,11 @@ type SupportedQueries = {
}

export const SupportedQueries = {
'call-context': executeCallContextQueries,
'dataflow': executeDataflowQuery,
'id-map': executeIdMapQuery,
'normalized-ast': executeNormalizedAstQuery
'call-context': executeCallContextQueries,
'dataflow': executeDataflowQuery,
'id-map': executeIdMapQuery,
'normalized-ast': executeNormalizedAstQuery,
'dataflow-cluster': executeDataflowClusterQuery,
} as const satisfies SupportedQueries;

export type SupportedQueryTypes = keyof typeof SupportedQueries;
Expand Down
2 changes: 1 addition & 1 deletion src/statistics/summarizer/post-process/clusterer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export type ClusterValueInfoMap = DefaultMap<string, ContextsWithCount>
export interface ClusterReport extends MergeableRecord {
/** The input file which has been clustered */
readonly filepath: string
/** Maps each context encountered (i.e., every file which contains something associated with the feature) to a unique id, used in the {@link ClusterReport#valueInfoMap | valueInfoMap}. */
/** Maps each context encountered (i.e., every file that contains something associated with the feature) to a unique id, used in the {@link ClusterReport#valueInfoMap|valueInfoMap}. */
contextIdMap: ClusterContextIdMap
/**
* Counts which contexts contained which values of a feature.
Expand Down
10 changes: 9 additions & 1 deletion src/util/ansi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,15 @@ export const markdownFormatter: OutputFormatter = new class implements OutputFor
}

input = input.replaceAll(/\\/g, '\\\\');
return input.replaceAll(/\n/g, '\\\n').replaceAll(/ /g, '&nbsp;');
let source = input.replaceAll(/\n/g, '\\\n');
/* repeatedly replace all spaces but only at the beginning of a line */
let target = source;
do{
source = target;
/* or replace back to front */
target = source.replace(/^(?<leading>(&nbsp;)*) /m, '$<leading>&nbsp;');
} while(target !== source);
return target;
}

public getFormatString(_options?: FormatOptions): string {
Expand Down
14 changes: 14 additions & 0 deletions test/functionality/dataflow/query/dataflow-cluster-query-tests.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import { assertQuery } from '../../_helper/query';
import { label } from '../../_helper/label';
import { withShell } from '../../_helper/shell';
import { findAllClusters } from '../../../../src/dataflow/cluster';
import type { DataflowClusterQuery } from '../../../../src/queries/catalog/cluster-query/cluster-query-format';

describe('Dataflow Cluster Query', withShell(shell => {
function testQuery(name: string, code: string, query: readonly DataflowClusterQuery[]) {
assertQuery(label(name), shell, code, query, ({ dataflow }) => ({ 'dataflow-cluster': { clusters: findAllClusters(dataflow.graph) } }));
}

testQuery('Single Expression', 'x + 1', [{ type: 'dataflow-cluster' }]);
testQuery('Multiple Queries', 'x + 1', [{ type: 'dataflow-cluster' }, { type: 'dataflow-cluster' }, { type: 'dataflow-cluster' }]);
}));
Loading

0 comments on commit 2a38978

Please sign in to comment.