Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions docs/get-started/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,19 @@ their corresponding top-level category object in your `settings.json` file.
}
}
},
"fast-ack-helper": {
"extends": "base",
"modelConfig": {
"model": "gemini-2.5-flash-lite",
"generateContentConfig": {
"temperature": 0.2,
"maxOutputTokens": 120,
"thinkingConfig": {
"thinkingBudget": 0
}
}
}
},
"edit-corrector": {
"extends": "base",
"modelConfig": {
Expand Down
86 changes: 86 additions & 0 deletions evals/app-test-helper.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/

import { AppRig } from '../packages/cli/src/test-utils/AppRig.js';
import {
type EvalPolicy,
runEval,
prepareLogDir,
symlinkNodeModules,
} from './test-helper.js';
import fs from 'node:fs';
import path from 'node:path';
import { DEFAULT_GEMINI_MODEL } from '@google/gemini-cli-core';

export interface AppEvalCase {
name: string;
configOverrides?: any;
prompt: string;
timeout?: number;
files?: Record<string, string>;
setup?: (rig: AppRig) => Promise<void>;
assert: (rig: AppRig, output: string) => Promise<void>;
}

/**
* A helper for running behavioral evaluations using the in-process AppRig.
* This matches the API of evalTest in test-helper.ts as closely as possible.
*/
export function appEvalTest(policy: EvalPolicy, evalCase: AppEvalCase) {
const fn = async () => {
const rig = new AppRig({
configOverrides: {
model: DEFAULT_GEMINI_MODEL,
...evalCase.configOverrides,
},
});

const { logDir, sanitizedName } = await prepareLogDir(evalCase.name);
const logFile = path.join(logDir, `${sanitizedName}.log`);

try {
await rig.initialize();

const testDir = rig.getTestDir();
symlinkNodeModules(testDir);

// Setup initial files
if (evalCase.files) {
for (const [filePath, content] of Object.entries(evalCase.files)) {
const fullPath = path.join(testDir, filePath);
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
fs.writeFileSync(fullPath, content);
}
}

// Run custom setup if provided (e.g. for breakpoints)
if (evalCase.setup) {
await evalCase.setup(rig);
}

// Render the app!
rig.render();

// Wait for initial ready state
await rig.waitForIdle();

// Send the initial prompt
await rig.sendMessage(evalCase.prompt);

// Run assertion. Interaction-heavy tests can do their own waiting/steering here.
const output = rig.getStaticOutput();
await evalCase.assert(rig, output);
} finally {
const output = rig.getStaticOutput();
if (output) {
await fs.promises.writeFile(logFile, output);
}
await rig.unmount();
}
};

runEval(policy, evalCase.name, fn, (evalCase.timeout ?? 60000) + 10000);
}
39 changes: 31 additions & 8 deletions evals/test-helper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,7 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {

// Symlink node modules to reduce the amount of time needed to
// bootstrap test projects.
const rootNodeModules = path.join(process.cwd(), 'node_modules');
const testNodeModules = path.join(rig.testDir || '', 'node_modules');
if (fs.existsSync(rootNodeModules) && !fs.existsSync(testNodeModules)) {
fs.symlinkSync(rootNodeModules, testNodeModules, 'dir');
}
symlinkNodeModules(rig.testDir || '');

if (evalCase.files) {
const acknowledgedAgents: Record<string, Record<string, string>> = {};
Expand Down Expand Up @@ -159,20 +155,47 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
}
};

runEval(policy, evalCase.name, fn, evalCase.timeout);
}

/**
* Wraps a test function with the appropriate Vitest 'it' or 'it.skip' based on policy.
*/
export function runEval(
policy: EvalPolicy,
name: string,
fn: () => Promise<void>,
timeout?: number,
) {
if (policy === 'USUALLY_PASSES' && !process.env['RUN_EVALS']) {
it.skip(evalCase.name, fn);
it.skip(name, fn);
} else {
it(evalCase.name, fn, evalCase.timeout);
it(name, fn, timeout);
}
}

async function prepareLogDir(name: string) {
export async function prepareLogDir(name: string) {
const logDir = path.resolve(process.cwd(), 'evals/logs');
await fs.promises.mkdir(logDir, { recursive: true });
const sanitizedName = name.replace(/[^a-z0-9]/gi, '_').toLowerCase();
return { logDir, sanitizedName };
}

/**
* Symlinks node_modules to the test directory to speed up tests that need to run tools.
*/
export function symlinkNodeModules(testDir: string) {
const rootNodeModules = path.join(process.cwd(), 'node_modules');
const testNodeModules = path.join(testDir, 'node_modules');
if (
testDir &&
fs.existsSync(rootNodeModules) &&
!fs.existsSync(testNodeModules)
) {
fs.symlinkSync(rootNodeModules, testNodeModules, 'dir');
}
}

export interface EvalCase {
name: string;
params?: Record<string, any>;
Expand Down
18 changes: 18 additions & 0 deletions evals/vitest.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,32 @@
*/

import { defineConfig } from 'vitest/config';
import { fileURLToPath } from 'node:url';
import * as path from 'node:path';

const __dirname = path.dirname(fileURLToPath(import.meta.url));

export default defineConfig({
resolve: {
conditions: ['test'],
},
test: {
testTimeout: 300000, // 5 minutes
reporters: ['default', 'json'],
outputFile: {
json: 'evals/logs/report.json',
},
include: ['**/*.eval.ts'],
environment: 'node',
globals: true,
alias: {
react: path.resolve(__dirname, '../node_modules/react'),
},
setupFiles: [path.resolve(__dirname, '../packages/cli/test-setup.ts')],
server: {
deps: {
inline: [/@google\/gemini-cli-core/],
},
},
},
});
41 changes: 41 additions & 0 deletions packages/cli/src/test-utils/AppRig.test.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/

import { describe, it, afterEach } from 'vitest';
import { AppRig } from './AppRig.js';
import path from 'node:path';
import { fileURLToPath } from 'node:url';

const __dirname = path.dirname(fileURLToPath(import.meta.url));

describe('AppRig', () => {
let rig: AppRig | undefined;

afterEach(async () => {
await rig?.unmount();
});

it('should render the app and handle a simple message', async () => {
const fakeResponsesPath = path.join(
__dirname,
'fixtures',
'simple.responses',
);
rig = new AppRig({ fakeResponsesPath });
await rig.initialize();
rig.render();

// Wait for initial render
await rig.waitForIdle();

// Type a message
await rig.type('Hello');
await rig.pressEnter();

// Wait for model response
await rig.waitForOutput('Hello! How can I help you today?');
});
});
Loading
Loading