diff --git a/evals/frugalSearch.eval.ts b/evals/frugalSearch.eval.ts index e4f3e85956a..11c51e85298 100644 --- a/evals/frugalSearch.eval.ts +++ b/evals/frugalSearch.eval.ts @@ -7,6 +7,11 @@ import { describe, expect } from 'vitest'; import { evalTest } from './test-helper.js'; +/** + * Evals to verify that the agent uses search tools efficiently (frugally) + * by utilizing limiting parameters like `total_max_matches` and `max_matches_per_file`. + * This ensures the agent doesn't flood the context window with unnecessary search results. + */ describe('Frugal Search', () => { const getGrepParams = (call: any): any => { let args = call.toolRequest.args; @@ -112,21 +117,26 @@ describe('Frugal Search', () => { expect(grepCalls.length).toBeGreaterThan(0); - const hasFrugalLimit = grepCalls.some((call) => { - const params = getGrepParams(call); - // Check for explicitly set small limit for "sample" or "example" requests - return ( - params.total_max_matches !== undefined && - params.total_max_matches <= 100 - ); - }); + const grepParams = grepCalls.map(getGrepParams); + + const hasTotalMaxLimit = grepParams.some( + (p) => p.total_max_matches !== undefined && p.total_max_matches <= 100, + ); + expect( + hasTotalMaxLimit, + `Expected agent to use a small total_max_matches (<= 100) for a sample usage request. Actual values: ${JSON.stringify( + grepParams.map((p) => p.total_max_matches), + )}`, + ).toBe(true); + const hasMaxMatchesPerFileLimit = grepParams.some( + (p) => + p.max_matches_per_file !== undefined && p.max_matches_per_file <= 5, + ); expect( - hasFrugalLimit, - `Expected agent to use a small total_max_matches for a sample usage request. Params used: ${JSON.stringify( - grepCalls.map(getGrepParams), - null, - 2, + hasMaxMatchesPerFileLimit, + `Expected agent to use a small max_matches_per_file (<= 5) for a sample usage request. Actual values: ${JSON.stringify( + grepParams.map((p) => p.max_matches_per_file), )}`, ).toBe(true); }, diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index c827d66ddce..e9445653663 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -520,7 +520,8 @@ exports[`Core System Prompt (prompts.ts) > should append userMemory with separat - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always minimize wasted context window by scoping and limiting all of your grep_search searches. e.g.: pass total_max_matches, include, and max_matches_per_file. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -650,7 +651,8 @@ exports[`Core System Prompt (prompts.ts) > should handle CodebaseInvestigator wi - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always minimize wasted context window by scoping and limiting all of your grep_search searches. e.g.: pass total_max_matches, include, and max_matches_per_file. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -745,7 +747,8 @@ exports[`Core System Prompt (prompts.ts) > should handle CodebaseInvestigator wi - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always minimize wasted context window by scoping and limiting all of your grep_search searches. e.g.: pass total_max_matches, include, and max_matches_per_file. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -1309,7 +1312,8 @@ exports[`Core System Prompt (prompts.ts) > should include available_skills with - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always minimize wasted context window by scoping and limiting all of your grep_search searches. e.g.: pass total_max_matches, include, and max_matches_per_file. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -1435,7 +1439,8 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always minimize wasted context window by scoping and limiting all of your grep_search searches. e.g.: pass total_max_matches, include, and max_matches_per_file. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -1552,7 +1557,8 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always minimize wasted context window by scoping and limiting all of your grep_search searches. e.g.: pass total_max_matches, include, and max_matches_per_file. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -1669,7 +1675,8 @@ exports[`Core System Prompt (prompts.ts) > should include correct sandbox instru - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always minimize wasted context window by scoping and limiting all of your grep_search searches. e.g.: pass total_max_matches, include, and max_matches_per_file. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -1782,7 +1789,8 @@ exports[`Core System Prompt (prompts.ts) > should include planning phase suggest - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always minimize wasted context window by scoping and limiting all of your grep_search searches. e.g.: pass total_max_matches, include, and max_matches_per_file. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -1895,7 +1903,8 @@ exports[`Core System Prompt (prompts.ts) > should include sub-agents in XML for - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always minimize wasted context window by scoping and limiting all of your grep_search searches. e.g.: pass total_max_matches, include, and max_matches_per_file. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -2247,7 +2256,8 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always minimize wasted context window by scoping and limiting all of your grep_search searches. e.g.: pass total_max_matches, include, and max_matches_per_file. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -2360,7 +2370,8 @@ exports[`Core System Prompt (prompts.ts) > should return the base prompt when us - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always minimize wasted context window by scoping and limiting all of your grep_search searches. e.g.: pass total_max_matches, include, and max_matches_per_file. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -2584,7 +2595,8 @@ exports[`Core System Prompt (prompts.ts) > should use chatty system prompt for p - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always minimize wasted context window by scoping and limiting all of your grep_search searches. e.g.: pass total_max_matches, include, and max_matches_per_file. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. @@ -2697,7 +2709,8 @@ exports[`Core System Prompt (prompts.ts) > should use chatty system prompt for p - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always minimize wasted context window by scoping and limiting all of your grep_search searches. e.g.: pass total_max_matches, include, and max_matches_per_file. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. ## Engineering Standards - **Contextual Precedence:** Instructions found in \`GEMINI.md\` files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt. diff --git a/packages/core/src/prompts/snippets.ts b/packages/core/src/prompts/snippets.ts index f1b9f6dc127..3dcf346de6e 100644 --- a/packages/core/src/prompts/snippets.ts +++ b/packages/core/src/prompts/snippets.ts @@ -165,7 +165,8 @@ export function renderCoreMandates(options?: CoreMandatesOptions): string { - **Source Control:** Do not stage or commit changes unless specifically requested by the user. ## Context Efficiency: -- Always minimize wasted context window by scoping and limiting all of your ${GREP_TOOL_NAME} searches. e.g.: pass total_max_matches, include, and max_matches_per_file. +- Always scope and limit your searches to avoid context window exhaustion and ensure high-signal results. Use include to target relevant files and strictly limit results using total_max_matches and max_matches_per_file, especially during the research phase. +- For broad discovery, use names_only=true or max_matches_per_file=1 to identify files without retrieving their context. ## Engineering Standards - **Contextual Precedence:** Instructions found in ${formattedFilenames} files are foundational mandates. They take absolute precedence over the general workflows and tool defaults described in this system prompt.