google-gemini · joshualitt · Feb 17, 2026 · Feb 13, 2026 · Feb 17, 2026
@@ -490,6 +490,19 @@ their corresponding top-level category object in your `settings.json` file.
           }
         }
       },
+      "fast-ack-helper": {
+        "extends": "base",
+        "modelConfig": {
+          "model": "gemini-2.5-flash-lite",
+          "generateContentConfig": {
+            "temperature": 0.2,
+            "maxOutputTokens": 120,
+            "thinkingConfig": {
+              "thinkingBudget": 0
+            }
+          }
+        }
+      },
       "edit-corrector": {
         "extends": "base",
         "modelConfig": {

@@ -0,0 +1,86 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { AppRig } from '../packages/cli/src/test-utils/AppRig.js';
+import {
+  type EvalPolicy,
+  runEval,
+  prepareLogDir,
+  symlinkNodeModules,
+} from './test-helper.js';
+import fs from 'node:fs';
+import path from 'node:path';
+import { DEFAULT_GEMINI_MODEL } from '@google/gemini-cli-core';
+
+export interface AppEvalCase {
+  name: string;
+  configOverrides?: any;
+  prompt: string;
+  timeout?: number;
+  files?: Record<string, string>;
+  setup?: (rig: AppRig) => Promise<void>;
+  assert: (rig: AppRig, output: string) => Promise<void>;
+}
+
+/**
+ * A helper for running behavioral evaluations using the in-process AppRig.
+ * This matches the API of evalTest in test-helper.ts as closely as possible.
+ */
+export function appEvalTest(policy: EvalPolicy, evalCase: AppEvalCase) {
+  const fn = async () => {
+    const rig = new AppRig({
+      configOverrides: {
+        model: DEFAULT_GEMINI_MODEL,
+        ...evalCase.configOverrides,
+      },
+    });
+
+    const { logDir, sanitizedName } = await prepareLogDir(evalCase.name);
+    const logFile = path.join(logDir, `${sanitizedName}.log`);
+
+    try {
+      await rig.initialize();
+
+      const testDir = rig.getTestDir();
+      symlinkNodeModules(testDir);
+
+      // Setup initial files
+      if (evalCase.files) {
+        for (const [filePath, content] of Object.entries(evalCase.files)) {
+          const fullPath = path.join(testDir, filePath);
+          fs.mkdirSync(path.dirname(fullPath), { recursive: true });
+          fs.writeFileSync(fullPath, content);
+        }
+      }
+
+      // Run custom setup if provided (e.g. for breakpoints)
+      if (evalCase.setup) {
+        await evalCase.setup(rig);
+      }
+
+      // Render the app!
+      rig.render();
+
+      // Wait for initial ready state
+      await rig.waitForIdle();
+
+      // Send the initial prompt
+      await rig.sendMessage(evalCase.prompt);
+
+      // Run assertion. Interaction-heavy tests can do their own waiting/steering here.
+      const output = rig.getStaticOutput();
+      await evalCase.assert(rig, output);
+    } finally {
+      const output = rig.getStaticOutput();
+      if (output) {
+        await fs.promises.writeFile(logFile, output);
+      }
+      await rig.unmount();
+    }
+  };
+
+  runEval(policy, evalCase.name, fn, (evalCase.timeout ?? 60000) + 10000);
+}
@@ -47,11 +47,7 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
 
       // Symlink node modules to reduce the amount of time needed to
       // bootstrap test projects.
-      const rootNodeModules = path.join(process.cwd(), 'node_modules');
-      const testNodeModules = path.join(rig.testDir || '', 'node_modules');
-      if (fs.existsSync(rootNodeModules) && !fs.existsSync(testNodeModules)) {
-        fs.symlinkSync(rootNodeModules, testNodeModules, 'dir');
-      }
+      symlinkNodeModules(rig.testDir || '');
 
       if (evalCase.files) {
         const acknowledgedAgents: Record<string, Record<string, string>> = {};
@@ -159,20 +155,47 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
     }
   };
 
+  runEval(policy, evalCase.name, fn, evalCase.timeout);
+}
+
+/**
+ * Wraps a test function with the appropriate Vitest 'it' or 'it.skip' based on policy.
+ */
+export function runEval(
+  policy: EvalPolicy,
+  name: string,
+  fn: () => Promise<void>,
+  timeout?: number,
+) {
   if (policy === 'USUALLY_PASSES' && !process.env['RUN_EVALS']) {
-    it.skip(evalCase.name, fn);
+    it.skip(name, fn);
   } else {
-    it(evalCase.name, fn, evalCase.timeout);
+    it(name, fn, timeout);
   }
 }
 
-async function prepareLogDir(name: string) {
+export async function prepareLogDir(name: string) {
   const logDir = path.resolve(process.cwd(), 'evals/logs');
   await fs.promises.mkdir(logDir, { recursive: true });
   const sanitizedName = name.replace(/[^a-z0-9]/gi, '_').toLowerCase();
   return { logDir, sanitizedName };
 }
 
+/**
+ * Symlinks node_modules to the test directory to speed up tests that need to run tools.
+ */
+export function symlinkNodeModules(testDir: string) {
+  const rootNodeModules = path.join(process.cwd(), 'node_modules');
+  const testNodeModules = path.join(testDir, 'node_modules');
+  if (
+    testDir &&
+    fs.existsSync(rootNodeModules) &&
+    !fs.existsSync(testNodeModules)
+  ) {
+    fs.symlinkSync(rootNodeModules, testNodeModules, 'dir');
+  }
+}
+
 export interface EvalCase {
   name: string;
   params?: Record<string, any>;

@@ -5,14 +5,32 @@
  */
 
 import { defineConfig } from 'vitest/config';
+import { fileURLToPath } from 'node:url';
+import * as path from 'node:path';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
 
 export default defineConfig({
+  resolve: {
+    conditions: ['test'],
+  },
   test: {
     testTimeout: 300000, // 5 minutes
     reporters: ['default', 'json'],
     outputFile: {
       json: 'evals/logs/report.json',
     },
     include: ['**/*.eval.ts'],
+    environment: 'node',
+    globals: true,
+    alias: {
+      react: path.resolve(__dirname, '../node_modules/react'),
+    },
+    setupFiles: [path.resolve(__dirname, '../packages/cli/test-setup.ts')],
+    server: {
+      deps: {
+        inline: [/@google\/gemini-cli-core/],
+      },
+    },
   },
 });
@@ -0,0 +1,41 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, afterEach } from 'vitest';
+import { AppRig } from './AppRig.js';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+
+describe('AppRig', () => {
+  let rig: AppRig | undefined;
+
+  afterEach(async () => {
+    await rig?.unmount();
+  });
+
+  it('should render the app and handle a simple message', async () => {
+    const fakeResponsesPath = path.join(
+      __dirname,
+      'fixtures',
+      'simple.responses',
+    );
+    rig = new AppRig({ fakeResponsesPath });
+    await rig.initialize();
+    rig.render();
+
+    // Wait for initial render
+    await rig.waitForIdle();
+
+    // Type a message
+    await rig.type('Hello');
+    await rig.pressEnter();
+
+    // Wait for model response
+    await rig.waitForOutput('Hello! How can I help you today?');
+  });
+});