Skip to content

Commit

Permalink
extract table renderer
Browse files Browse the repository at this point in the history
  • Loading branch information
pgayvallet committed Sep 4, 2024
1 parent c886419 commit e71fe3b
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 108 deletions.
128 changes: 20 additions & 108 deletions x-pack/plugins/inference/scripts/evaluation/evaluation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,18 @@ import { Client } from '@elastic/elasticsearch';
import { run } from '@kbn/dev-cli-runner';
import * as fastGlob from 'fast-glob';
import yargs from 'yargs';
import chalk from 'chalk';
import { castArray } from 'lodash';
// @ts-expect-error
import Mocha from 'mocha';
import Path from 'path';
import * as table from 'table';
import { TableUserConfig } from 'table';
import { EvaluateWith, options } from './cli';
import { getServiceUrls } from '../util/get_service_urls';
import { KibanaClient } from '../util/kibana_client';
import { initServices } from './services';
import { EvaluationResult } from './types';
import { selectConnector } from '../util/select_connector';
import { createInferenceEvaluationClient } from './evaluation_client';
import { createResultRenderer, renderFailedScenarios } from './table_renderer';

function runEvaluations() {
yargs(process.argv.slice(2))
Expand All @@ -43,7 +41,6 @@ function runEvaluations() {
await kibanaClient.createSpaceIfNeeded();

const connectors = await kibanaClient.getConnectors();

if (!connectors.length) {
throw new Error('No connectors found');
}
Expand Down Expand Up @@ -100,91 +97,17 @@ function runEvaluations() {
suite: mocha.suite,
});

const header: string[][] = [
[chalk.bold('Criterion'), chalk.bold('Result'), chalk.bold('Reasoning')],
];

const tableConfig: TableUserConfig = {
singleLine: false,
border: {
topBody: `─`,
topJoin: `┬`,
topLeft: `┌`,
topRight: `┐`,

bottomBody: `─`,
bottomJoin: `┴`,
bottomLeft: `└`,
bottomRight: `┘`,

bodyLeft: `│`,
bodyRight: `│`,
bodyJoin: `│`,

joinBody: `─`,
joinLeft: `├`,
joinRight: `┤`,
joinJoin: `┼`,
},
spanningCells: [
{ row: 0, col: 0, colSpan: 3 },
{ row: 1, col: 0, colSpan: 3 },
],
columns: [
{ wrapWord: true, width: 60 },
{ wrapWord: true },
{ wrapWord: true, width: 60 },
],
};

// avoid glitches in the table rendering
const sanitize = (text: string) => {
return text
.replace(/^ +/gm, '')
.replace(/ +$/gm, '')
.replace(/^\n+/g, '')
.replace(/\n+$/g, '');
};

const renderer = createResultRenderer();
const results: EvaluationResult[] = [];
const failedScenarios: string[][] = [
['Failed Tests', '', ''],
['Scenario, Scores, Reasoning', '', ''],
];
const failedResults: EvaluationResult[] = [];

evaluationClient.onResult((result) => {
results.push(result);
log.debug(`Result:`, JSON.stringify(result));
const output: string[][] = [[sanitize(result.input), '', ''], ['', '', ''], ...header];

result.scores.forEach((score) => {
output.push([
sanitize(score.criterion),
score.score < 1
? chalk.redBright(String(score.score))
: chalk.greenBright(String(score.score)),
sanitize(score.reasoning),
]);
});

log.write(table.table(output, tableConfig));

const totalResults = result.scores.length;
const failedResults = result.scores.filter((score) => score.score < 1).length;

if (failedResults / totalResults > 0) {
const reasoningConcat = result.scores
.map((score) => sanitize(score.reasoning))
.join(' ');
failedScenarios.push([
`${result.name}`,
`Average score ${Math.round(
(result.scores.reduce((total, next) => total + next.score, 0) * 100) /
totalResults
)}. Failed ${failedResults} tests out of ${totalResults}`,
`Reasoning: ${reasoningConcat}`,
]);
if (result.scores.filter((score) => score.score < 1).length) {
failedResults.push(result);
}

log.write(renderer.render({ result }));
});

initServices({
Expand All @@ -202,7 +125,7 @@ function runEvaluations() {
return new Promise<void>((resolve, reject) => {
mocha.run((failures: any) => {
if (failures) {
log.write(table.table(failedScenarios, tableConfig));
log.write(renderFailedScenarios(failedResults));
reject(new Error(`Some tests failed`));
return;
}
Expand All @@ -226,33 +149,22 @@ function runEvaluations() {
);
log.write('-------------------------------------------');

const scoresByCategory: {
const scoresByCategory = results.reduce<{
[key: string]: {
score: number;
total: number;
};
} = results.reduce(
(
acc: {
[key: string]: {
score: number;
total: number;
};
},
result
) => {
const category = result.category;
if (!acc[category]) {
acc[category] = { score: 0, total: 0 };
}
result.scores.forEach((score) => {
acc[category].score += score.score;
acc[category].total += 1;
});
return acc;
},
{}
);
}>((acc, result) => {
const category = result.category;
if (!acc[category]) {
acc[category] = { score: 0, total: 0 };
}
result.scores.forEach((score) => {
acc[category].score += score.score;
acc[category].total += 1;
});
return acc;
}, {});

log.write('-------------------------------------------');
log.write(`Model ${connector.connectorId} scores per category`);
Expand Down
100 changes: 100 additions & 0 deletions x-pack/plugins/inference/scripts/evaluation/table_renderer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import * as table from 'table';
import chalk from 'chalk';
import type { TableUserConfig } from 'table';
import type { EvaluationResult } from './types';

interface ResultRenderer {
render: (params: { result: EvaluationResult }) => string;
}

export const createResultRenderer = (): ResultRenderer => {
const config = {
...baseConfig,
spanningCells: [
{ row: 0, col: 0, colSpan: 3 },
{ row: 1, col: 0, colSpan: 3 },
],
columns: [{ wrapWord: true, width: 60 }, { wrapWord: true }, { wrapWord: true, width: 60 }],
};
const header = [chalk.bold('Criterion'), chalk.bold('Result'), chalk.bold('Reasoning')];

return {
render: ({ result }) => {
const rows: string[][] = [[sanitize(result.input), '', ''], ['', '', ''], header];
result.scores.forEach((score) => {
rows.push([
sanitize(score.criterion),
score.score < 1
? chalk.redBright(String(score.score))
: chalk.greenBright(String(score.score)),
sanitize(score.reasoning),
]);
});
return table.table(rows, config);
},
};
};

export const renderFailedScenarios = (failedScenario: EvaluationResult[]): string => {
const config = {
...baseConfig,
spanningCells: [],
columns: [{ wrapWord: true, width: 60 }, { wrapWord: true }, { wrapWord: true, width: 60 }],
};
const rows: string[][] = [
['Failed Tests', '', ''],
['Scenario', 'Scores', 'Reasoning'],
];

failedScenario.forEach((result) => {
const totalResults = result.scores.length;
const failedResults = result.scores.filter((score) => score.score < 1).length;

const reasoningConcat = result.scores.map((score) => sanitize(score.reasoning)).join(' ');
rows.push([
`${result.name}`,
`Average score ${Math.round(
(result.scores.reduce((total, next) => total + next.score, 0) * 100) / totalResults
)}. Failed ${failedResults} tests out of ${totalResults}`,
`Reasoning: ${reasoningConcat}`,
]);
});

return table.table(rows, config);
};

const baseConfig: TableUserConfig = {
singleLine: false,
border: {
topBody: `─`,
topJoin: `┬`,
topLeft: `┌`,
topRight: `┐`,

bottomBody: `─`,
bottomJoin: `┴`,
bottomLeft: `└`,
bottomRight: `┘`,

bodyLeft: `│`,
bodyRight: `│`,
bodyJoin: `│`,

joinBody: `─`,
joinLeft: `├`,
joinRight: `┤`,
joinJoin: `┼`,
},
};

const sanitize = (text: string) => {
// table really doesn't like leading whitespaces and empty lines...
return text.replace(/^ +/gm, '').replace(/ +$/gm, '').replace(/^\n+/g, '').replace(/\n+$/g, '');
};

0 comments on commit e71fe3b

Please sign in to comment.