Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Statistic Generation #981

Merged
merged 4 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/cli/common/scripts-info.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,15 @@ import { asOptionName } from '../repl/commands/commands';


interface BaseScriptInformation extends MergeableRecord {
/** name of the tool to present to the user */
toolName: string
/** internal module name to fork/execute, make sure to use the correct path to it with the help of `__dirname` */
target: string
/** description of the tool for the user */
description: string
/** example usage */
usageExample: string
/** command line options that are available */
options: OptionDefinition[]
}

Expand Down
2 changes: 1 addition & 1 deletion src/cli/repl/execute.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ export function stdioCaptureProcessor(stdio: Stdio, onStdOutLine: (msg: string)
* the output of the script, see {@link stdioCaptureProcessor}.
* @param exitOnError - If set to `true`, the process will exit with the exit code of the script.
*/
export async function waitOnScript(module: string, args: string[], io?: StdioProcessor, exitOnError = false): Promise<void> {
export async function waitOnScript(module: string, args: readonly string[], io?: StdioProcessor, exitOnError = false): Promise<void> {
log.info(`starting script ${module} with args ${JSON.stringify(args)}`);
const child = cp.fork(module, args, {
silent: io !== undefined
Expand Down
13 changes: 13 additions & 0 deletions src/cli/run-script.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import type { StdioProcessor } from './repl/execute';
import { waitOnScript } from './repl/execute';
import { scripts } from './common/scripts-info';
import path from 'path';

/**
* Path-safe helper of {@link waitOnScript} for other flowR scripts.
*
* @see waitOnScript
*/
export async function runScript(name: keyof typeof scripts, args: readonly string[], io?: StdioProcessor, exitOnError = false): Promise<void> {
return waitOnScript(path.resolve(__dirname,scripts[name].target), args, io, exitOnError);
}
110 changes: 110 additions & 0 deletions src/cli/script-core/statistics-core.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import path from 'path';
import type { Arguments } from '../../util/parallel';
import { LimitedThreadPool } from '../../util/parallel';
import { allRFilesFrom } from '../../util/files';
import { retrieveArchiveName, validateFeatures } from '../common/features';
import fs from 'fs';
import { initFileProvider } from '../../statistics/output/statistics-file';
import { jsonReplacer } from '../../util/json';
import { log } from '../../util/log';
import type { StatsCliOptions } from '../statistics-app';
import { getStatsForSingleFile } from './statistics-helper-core';
import commandLineArgs from 'command-line-args';
import { scripts } from '../common/scripts-info';
import type { StatsHelperCliOptions } from '../statistics-helper-app';
import { setFormatter, voidFormatter } from '../../util/ansi';

const testRegex = /[^/]*\/test/i;
const exampleRegex = /[^/]*\/example/i;

function getPrefixForFile(file: string) {
if(testRegex.test(file)) {
return 'test-';
} else if(exampleRegex.test(file)) {
return 'example-';
} else {
return '';
}
}

function getSuffixForFile(base: string, file: string) {
const subpath = path.relative(base, file);
return '--' + subpath.replace(/\//g, '/');
}

async function collectFileArguments(options: StatsCliOptions, verboseAdd: readonly string[], dumpJson: readonly string[], features: readonly string[]) {
const files: Arguments[] = [];
let counter = 0;
let presentSteps = 5000;
let skipped = 0;
for await (const f of allRFilesFrom(options.input)) {
const outputDir = path.join(options['output-dir'], `${getPrefixForFile(f.content)}${getSuffixForFile(options.input.length === 1 ? options.input[0] : '', f.content)}`);
const target = retrieveArchiveName(outputDir);
if(fs.existsSync(target)) {
console.log(`Archive ${target} exists. Skip.`);
skipped++;
continue;
}
files.push(['--input', f.content, '--output-dir', outputDir,'--compress', '--root-dir', options.input.length === 1 ? options.input[0] : '""', ...verboseAdd, ...features, ...dumpJson]);
if(++counter % presentSteps === 0) {
console.log(`Collected ${counter} files`);
if(counter >= 10 * presentSteps) {
presentSteps *= 5;
}
}
}
console.log(`Total: ${counter} files (${skipped} skipped with archive existing)`);
return files;
}

export async function flowrScriptGetStats(options: StatsCliOptions) {
if(options.input.length === 0) {
console.error('No input files given. Nothing to do. See \'--help\' if this is an error.');
process.exit(0);
}

if(options['no-ansi']) {
log.info('disabling ansi colors');
setFormatter(voidFormatter);
}

const processedFeatures = validateFeatures(options.features);
initFileProvider(options['output-dir']);
console.log(`Processing features: ${JSON.stringify(processedFeatures, jsonReplacer)}`);
console.log(`Using ${options.parallel} parallel executors`);

const verboseAdd = options.verbose ? ['--verbose'] : [];
const features = [...processedFeatures].flatMap(s => ['--features', s]);
const dumpJson = options['dump-json'] ? ['--dump-json'] : [];

// we do not use the limit argument to be able to pick the limit randomly
const args = await collectFileArguments(options, verboseAdd, dumpJson, features);

if(options.limit) {
console.log('Shuffle...');
log.info(`limiting to ${options.limit} files`);
// shuffle and limit
args.sort(() => Math.random() - 0.5);
}
console.log('Prepare Pool...');

const limit = options.limit ?? args.length;

if(options.parallel > 0) {
const pool = new LimitedThreadPool(
`${__dirname}/statistics-helper-app`,
args,
limit,
options.parallel
);
console.log('Run Pool...');
await pool.run();
const stats = pool.getStats();
console.log(`Processed ${stats.counter} files, skipped ${stats.skipped.length} files due to errors`);
} else {
console.log('Run Sequentially as parallel <= 0...');
for(const arg of args) {
await getStatsForSingleFile(commandLineArgs(scripts['stats-helper'].options, { argv: arg }) as StatsHelperCliOptions);
}
}
}
90 changes: 90 additions & 0 deletions src/cli/script-core/statistics-helper-core.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import { retrieveArchiveName } from '../common/features';
import fs from 'fs';
import type { FeatureKey } from '../../statistics/features/feature';
import { RShell } from '../../r-bridge/shell';
import { initFileProvider, statisticsFileProvider } from '../../statistics/output/statistics-file';
import { extractUsageStatistics, staticRequests } from '../../statistics/statistics';
import { extractCFG } from '../../util/cfg/cfg';
import { printStepResult, StepOutputFormat } from '../../core/print/print';
import { PARSE_WITH_R_SHELL_STEP } from '../../core/steps/all/core/00-parse';
import { NORMALIZE } from '../../core/steps/all/core/10-normalize';
import { STATIC_DATAFLOW } from '../../core/steps/all/core/20-dataflow';
import { jsonReplacer } from '../../util/json';
import { log } from '../../util/log';
import { guard } from '../../util/assert';
import { date2string } from '../../util/time';
import type { StatsHelperCliOptions } from '../statistics-helper-app';
import { create } from 'tar';
import { setFormatter, voidFormatter } from '../../util/ansi';


function compressFolder(folder: string, target: string) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-call,@typescript-eslint/no-unsafe-member-access
return create({
gzip: true,
file: target,
portable: true,
preservePaths: false
}, [folder]).then(() => {
// now, remove the folder
fs.rmSync(folder, { recursive: true, force: true });
}, () => {
console.log(`failed to compress ${folder}`);
});
}


export async function getStatsForSingleFile(options: StatsHelperCliOptions) {
if(options['no-ansi']) {
log.info('disabling ansi colors');
setFormatter(voidFormatter);
}

let target: string | undefined = undefined;
if(options.compress) {
target = retrieveArchiveName(options['output-dir']);
if(fs.existsSync(target)) {
console.log(`Archive ${target} exists. Skip.`);
process.exit(0);
}
}

// assume correct
const processedFeatures = new Set<FeatureKey>(options.features as FeatureKey[]);

const shell = new RShell();

initFileProvider(options['output-dir']);

await shell.obtainTmpDir();
const stats = await extractUsageStatistics(shell,
() => { /* do nothing */ },
processedFeatures,
staticRequests({ request: 'file', content: options.input }),
options['root-dir']
);
// console.warn(`skipped ${stats.meta.failedRequests.length} requests due to errors (run with logs to get more info)`)

if(stats.outputs.size === 1) {
if(options['dump-json']) {
const [, output] = [...stats.outputs.entries()][0];
const cfg = extractCFG(output.normalize);
statisticsFileProvider.append('output-json', 'parse', await printStepResult(PARSE_WITH_R_SHELL_STEP, output.parse, StepOutputFormat.Json));
statisticsFileProvider.append('output-json', 'normalize', await printStepResult(NORMALIZE, output.normalize, StepOutputFormat.Json));
statisticsFileProvider.append('output-json', 'dataflow', await printStepResult(STATIC_DATAFLOW, output.dataflow, StepOutputFormat.Json));
statisticsFileProvider.append('output-json', 'cfg', JSON.stringify(cfg, jsonReplacer));
}

statisticsFileProvider.append('meta', 'stats', JSON.stringify({ ...stats.meta, file: options.input }, jsonReplacer));
statisticsFileProvider.append('meta', 'features', JSON.stringify(stats.features, jsonReplacer));
} else {
log.error(`expected exactly one output vs. ${stats.outputs.size}, got: ${JSON.stringify([...stats.outputs.keys()], jsonReplacer, 2)}`);
}
if(options.compress) {
guard(target !== undefined, 'target must be defined given the compress option');
console.log(`[${date2string(new Date())}] Compressing ${options['output-dir']} to ${target}`);
await compressFolder(options['output-dir'], target);
}

shell.close();
}
56 changes: 56 additions & 0 deletions src/cli/script-core/summarizer-core.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import type { SummarizerCliOptions } from '../summarizer-app';
import { StatisticsSummarizer } from '../../statistics/summarizer/summarizer';
import { BenchmarkSummarizer } from '../../benchmark/summarizer/summarizer';
import { detectSummarizationType } from '../../statistics/summarizer/auto-detect';
import { SummarizerType } from '../../util/summarizer';
import { allFeatureNames } from '../../statistics/features/feature';


function getBenchmarkSummarizer(options: SummarizerCliOptions, outputBase: string) {
return new BenchmarkSummarizer({
graphOutputPath: options.graph ? `${outputBase}-graph.json` : undefined,
inputPath: options.input,
intermediateOutputPath: outputBase,
outputPath: `${outputBase}-ultimate.json`,
logger: console.log
});
}

function getStatisticsSummarizer(options: SummarizerCliOptions, outputBase: string) {
return new StatisticsSummarizer({
inputPath: options.input,
outputPath: `${outputBase}-final`,
intermediateOutputPath: `${outputBase}-intermediate/`,
projectSkip: options['project-skip'],
featuresToUse: allFeatureNames,
logger: console.log
});
}


async function retrieveSummarizer(options: SummarizerCliOptions, outputBase: string): Promise<StatisticsSummarizer | BenchmarkSummarizer> {
const type = options.type === 'auto' ? await detectSummarizationType(options.input) : options.type;
if(type === SummarizerType.Benchmark) {
console.log('Summarizing benchmark');
return getBenchmarkSummarizer(options, outputBase);
} else if(type === SummarizerType.Statistics) {
console.log('Summarizing statistics');
return getStatisticsSummarizer(options, outputBase);
} else {
console.error('Unknown type', type, 'either give "benchmark" or "statistics"');
process.exit(1);
}
}

export async function flowrScriptSummarizer(options: SummarizerCliOptions) {
const outputBase = (options.output ?? options.input).replace(/\.json$|\/$/, '-summary');
console.log(`Writing outputs to base ${outputBase}`);

const summarizer = await retrieveSummarizer(options, outputBase);

if(!options['ultimate-only']) {
await summarizer.preparationPhase(options.categorize);
}

await summarizer.summarizePhase();
}
Loading
Loading