diff --git a/crates/goose/src/agents/agent.rs b/crates/goose/src/agents/agent.rs index 798f54e25f19..134c5193e6e6 100644 --- a/crates/goose/src/agents/agent.rs +++ b/crates/goose/src/agents/agent.rs @@ -236,7 +236,8 @@ impl Agent { ))); // Add repetition inspector (lower priority - basic repetition checking) - tool_inspection_manager.add_inspector(Box::new(RepetitionInspector::new(None))); + // Limit to 5 repetitions to allow model self-correction while preventing infinite loops + tool_inspection_manager.add_inspector(Box::new(RepetitionInspector::new(Some(5)))); tool_inspection_manager } diff --git a/crates/goose/src/agents/code_execution_extension.rs b/crates/goose/src/agents/code_execution_extension.rs index 812bb5e1484f..fe8eae93ddbf 100644 --- a/crates/goose/src/agents/code_execution_extension.rs +++ b/crates/goose/src/agents/code_execution_extension.rs @@ -440,18 +440,21 @@ impl CodeExecutionClient { website_url: None, }, instructions: Some(indoc! {r#" - BATCH MULTIPLE TOOL CALLS INTO ONE execute_code CALL. + BATCH MULTIPLE TOOL CALLS INTO ONE code_execution__execute_code CALL. This extension exists to reduce round-trips. When a task requires multiple tool calls: - WRONG: Multiple execute_code calls, each with one tool - RIGHT: One execute_code call with a script that calls all needed tools - IMPORTANT: All tool calls are SYNCHRONOUS. Do NOT use async/await. + IMPORTANT: All tool calls are SYNCHRONOUS and return PARSED OBJECTS. + - Do NOT use async/await + - NEVER use JSON.parse() on tool results - they are already JavaScript objects Workflow: - 1. Use the read_module tool to discover tools and signatures + 1. Use code_execution__read_module to discover tools and signatures 2. Write ONE script that imports and calls ALL tools needed for the task 3. Chain results: use output from one tool as input to the next + Example: const user = get_me(); const prs = search({author: user.login}); "#}.to_string()), }; @@ -521,6 +524,8 @@ impl CodeExecutionClient { let tools = self.get_tool_infos(session_id).await; let parts: Vec<&str> = path.trim_start_matches('/').split('/').collect(); + let reminder = "\n\n// IMPORTANT: Use code_execution__execute_code to call these tools. Do NOT call them directly."; + match parts.as_slice() { [server] => { let server_tools: Vec<_> = @@ -530,8 +535,9 @@ impl CodeExecutionClient { } let sigs: Vec<_> = server_tools.iter().map(|t| t.to_signature()).collect(); Ok(vec![Content::text(format!( - "// import * as {server} from \"{server}\";\n\n{}", - sigs.join("\n") + "// import * as {server} from \"{server}\";\n\n{}{}", + sigs.join("\n"), + reminder ))]) } [server, tool] => { @@ -540,9 +546,10 @@ impl CodeExecutionClient { .find(|t| t.server_name == *server && t.tool_name == *tool) .ok_or_else(|| format!("Tool not found: {server}/{tool}"))?; Ok(vec![Content::text(format!( - "// import * as {server} from \"{server}\";\n\n{}\n\n{}", + "// import * as {server} from \"{server}\";\n\n{}\n\n{}{}", t.to_signature(), - t.description + t.description, + reminder ))]) } _ => Err(format!( @@ -661,6 +668,8 @@ impl CodeExecutionClient { } } + output.push_str("\n// IMPORTANT: Use code_execution__execute_code to call these tools. Do NOT call them directly."); + Ok(vec![Content::text(output)]) } @@ -730,9 +739,16 @@ impl McpClientTrait for CodeExecutionClient { indoc! {r#" Batch multiple MCP tool calls into ONE execution. This is the primary purpose of this tool. - CRITICAL: Always combine related operations into a single execute_code call. + CRITICAL RULES: + 1. Combine related operations into a single execute_code call + 2. Tool calls return PARSED OBJECTS - NEVER use JSON.parse() on results + 3. All calls are synchronous - do NOT use async/await + + WRONG vs RIGHT: - WRONG: execute_code to read → execute_code to write (2 calls) - RIGHT: execute_code that reads AND writes in one script (1 call) + - WRONG: JSON.parse(get_me()) - results are already objects! + - RIGHT: const user = get_me(); user.login - access properties directly EXAMPLE - Read file and write to another (ONE call): ```javascript @@ -755,7 +771,7 @@ impl McpClientTrait for CodeExecutionClient { - Import: import { tool1, tool2 } from "serverName"; - Call: toolName({ param1: value, param2: value }) - Result: record_result(value) - call this to return a value from the script - - All calls are synchronous, return strings + - All calls are synchronous and return parsed objects (NEVER use JSON.parse) TOOL_GRAPH: Always provide tool_graph to describe the execution flow for the UI. Each node has: tool (server/name), description (what it does), depends_on (indices of dependencies). @@ -877,13 +893,14 @@ impl McpClientTrait for CodeExecutionClient { Some(format!( indoc::indoc! {r#" - ALWAYS batch multiple tool operations into ONE execute_code call. + ALWAYS batch multiple tool operations into ONE code_execution__execute_code call. - WRONG: Separate execute_code calls for read file, then write file - RIGHT: One execute_code with a script that reads AND writes + Tools: code_execution__execute_code, code_execution__read_module, code_execution__search_modules Modules: {} - Use the read_module tool to see signatures before calling unfamiliar tools. + Use code_execution__read_module to see signatures before calling unfamiliar tools. "#}, server_list.join(", ") )) diff --git a/crates/goose/src/agents/extension_manager.rs b/crates/goose/src/agents/extension_manager.rs index 104cdf2a9bb1..188b74d871cd 100644 --- a/crates/goose/src/agents/extension_manager.rs +++ b/crates/goose/src/agents/extension_manager.rs @@ -1196,24 +1196,11 @@ impl ExtensionManager { tool_call: CallToolRequestParams, cancellation_token: CancellationToken, ) -> Result { - // Some models strip the tool prefix, so auto-add it for known code_execution tools let tool_name_str = tool_call.name.to_string(); - let prefixed_name = if !tool_name_str.contains("__") { - let code_exec_tools = ["execute_code", "read_module", "search_modules"]; - if code_exec_tools.contains(&tool_name_str.as_str()) - && self.extensions.lock().await.contains_key("code_execution") - { - format!("code_execution__{}", tool_name_str) - } else { - tool_name_str - } - } else { - tool_name_str - }; // Dispatch tool call based on the prefix naming convention let (client_name, client) = - self.get_client_for_tool(&prefixed_name) + self.get_client_for_tool(&tool_name_str) .await .ok_or_else(|| { ErrorData::new( @@ -1223,7 +1210,7 @@ impl ExtensionManager { ) })?; - let tool_name = prefixed_name + let tool_name = tool_name_str .strip_prefix(client_name.as_str()) .and_then(|s| s.strip_prefix("__")) .ok_or_else(|| { diff --git a/crates/goose/src/agents/snapshots/goose__agents__prompt_manager__tests__basic.snap b/crates/goose/src/agents/snapshots/goose__agents__prompt_manager__tests__basic.snap index 5afeffcddf08..157d01fc57ee 100644 --- a/crates/goose/src/agents/snapshots/goose__agents__prompt_manager__tests__basic.snap +++ b/crates/goose/src/agents/snapshots/goose__agents__prompt_manager__tests__basic.snap @@ -9,7 +9,6 @@ goose uses LLM providers with tool calling capability. You can be used with diff claude-sonnet-4, o1, llama-3.2, deepseek-r1, etc). These models have varying knowledge cut-off dates depending on when they were trained, but typically it's between 5-10 months prior to the current date. - # Extensions Extensions allow other applications to provide context to goose. Extensions connect goose to different data sources and diff --git a/crates/goose/src/agents/snapshots/goose__agents__prompt_manager__tests__one_extension.snap b/crates/goose/src/agents/snapshots/goose__agents__prompt_manager__tests__one_extension.snap index 8f02d1cf7af1..77190259017b 100644 --- a/crates/goose/src/agents/snapshots/goose__agents__prompt_manager__tests__one_extension.snap +++ b/crates/goose/src/agents/snapshots/goose__agents__prompt_manager__tests__one_extension.snap @@ -9,7 +9,6 @@ goose uses LLM providers with tool calling capability. You can be used with diff claude-sonnet-4, o1, llama-3.2, deepseek-r1, etc). These models have varying knowledge cut-off dates depending on when they were trained, but typically it's between 5-10 months prior to the current date. - # Extensions Extensions allow other applications to provide context to goose. Extensions connect goose to different data sources and diff --git a/crates/goose/src/agents/snapshots/goose__agents__prompt_manager__tests__typical_setup.snap b/crates/goose/src/agents/snapshots/goose__agents__prompt_manager__tests__typical_setup.snap index 719a84871da8..7c4a42cfc7f2 100644 --- a/crates/goose/src/agents/snapshots/goose__agents__prompt_manager__tests__typical_setup.snap +++ b/crates/goose/src/agents/snapshots/goose__agents__prompt_manager__tests__typical_setup.snap @@ -9,7 +9,6 @@ goose uses LLM providers with tool calling capability. You can be used with diff claude-sonnet-4, o1, llama-3.2, deepseek-r1, etc). These models have varying knowledge cut-off dates depending on when they were trained, but typically it's between 5-10 months prior to the current date. - # Extensions Extensions allow other applications to provide context to goose. Extensions connect goose to different data sources and diff --git a/crates/goose/src/prompts/system.md b/crates/goose/src/prompts/system.md index c282d75497ac..40f5f699aa1e 100644 --- a/crates/goose/src/prompts/system.md +++ b/crates/goose/src/prompts/system.md @@ -5,8 +5,6 @@ goose uses LLM providers with tool calling capability. You can be used with diff claude-sonnet-4, o1, llama-3.2, deepseek-r1, etc). These models have varying knowledge cut-off dates depending on when they were trained, but typically it's between 5-10 months prior to the current date. -{% if not code_execution_mode %} - # Extensions Extensions allow other applications to provide context to goose. Extensions connect goose to different data sources and @@ -41,7 +39,6 @@ and extensionmanager__list_resources on this extension. {% else %} No extensions are defined. You should let the user know that they should add extensions. {% endif %} -{% endif %} {% if extension_tool_limits is defined and not code_execution_mode %} {% with (extension_count, tool_count) = extension_tool_limits %}