zed-industries · bennetbo · Oct 22, 2025 · Oct 19, 2025 · Oct 19, 2025 · Oct 19, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -58,7 +58,7 @@ members = [
     "crates/edit_prediction_context",
     "crates/zeta2_tools",
     "crates/editor",
-    # "crates/eval",
+    "crates/eval",
     "crates/explorer_command_injector",
     "crates/extension",
     "crates/extension_api",

diff --git a/crates/acp_thread/src/terminal.rs b/crates/acp_thread/src/terminal.rs
@@ -1,10 +1,15 @@
 use agent_client_protocol as acp;
-
+use anyhow::Result;
 use futures::{FutureExt as _, future::Shared};
-use gpui::{App, AppContext, Context, Entity, Task};
+use gpui::{App, AppContext, AsyncApp, Context, Entity, Task};
 use language::LanguageRegistry;
 use markdown::Markdown;
+use project::Project;
+use settings::{Settings as _, SettingsLocation};
 use std::{path::PathBuf, process::ExitStatus, sync::Arc, time::Instant};
+use task::Shell;
+use terminal::terminal_settings::TerminalSettings;
+use util::get_default_system_shell_preferring_bash;
 
 pub struct Terminal {
     id: acp::TerminalId,
@@ -170,3 +175,68 @@ impl Terminal {
         )
     }
 }
+
+pub async fn create_terminal_entity(
+    command: String,
+    args: &[String],
+    env_vars: Vec<(String, String)>,
+    cwd: Option<PathBuf>,
+    project: &Entity<Project>,
+    cx: &mut AsyncApp,
+) -> Result<Entity<terminal::Terminal>> {
+    let mut env = if let Some(dir) = &cwd {
+        project
+            .update(cx, |project, cx| {
+                let worktree = project.find_worktree(dir.as_path(), cx);
+                let shell = TerminalSettings::get(
+                    worktree.as_ref().map(|(worktree, path)| SettingsLocation {
+                        worktree_id: worktree.read(cx).id(),
+                        path: &path,
+                    }),
+                    cx,
+                )
+                .shell
+                .clone();
+                project.directory_environment(&shell, dir.clone().into(), cx)
+            })?
+            .await
+            .unwrap_or_default()
+    } else {
+        Default::default()
+    };
+
+    // Disables paging for `git` and hopefully other commands
+    env.insert("PAGER".into(), "".into());
+    env.extend(env_vars);
+
+    // Use remote shell or default system shell, as appropriate
+    let shell = project
+        .update(cx, |project, cx| {
+            project
+                .remote_client()
+                .and_then(|r| r.read(cx).default_system_shell())
+                .map(Shell::Program)
+        })?
+        .unwrap_or_else(|| Shell::Program(get_default_system_shell_preferring_bash()));
+    let is_windows = project
+        .read_with(cx, |project, cx| project.path_style(cx).is_windows())
+        .unwrap_or(cfg!(windows));
+    let (task_command, task_args) = task::ShellBuilder::new(&shell, is_windows)
+        .redirect_stdin_to_dev_null()
+        .build(Some(command.clone()), &args);
+
+    project
+        .update(cx, |project, cx| {
+            project.create_terminal_task(
+                task::SpawnInTerminal {
+                    command: Some(task_command),
+                    args: task_args,
+                    cwd,
+                    env,
+                    ..Default::default()
+                },
+                cx,
+            )
+        })?
+        .await
+}
diff --git a/crates/agent/Cargo.toml b/crates/agent/Cargo.toml
@@ -10,6 +10,8 @@ path = "src/agent.rs"
 
 [features]
 test-support = ["db/test-support"]
+eval = []
+edit-agent-eval = []
 e2e = []
 
 [lints]

diff --git a/crates/agent/src/edit_agent/evals.rs b/crates/agent/src/edit_agent/evals.rs
@@ -31,7 +31,7 @@ use std::{
 use util::path;
 
 #[test]
-#[cfg_attr(not(feature = "eval"), ignore)]
+#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
 fn eval_extract_handle_command_output() {
     // Test how well agent generates multiple edit hunks.
     //
@@ -108,7 +108,7 @@ fn eval_extract_handle_command_output() {
 }
 
 #[test]
-#[cfg_attr(not(feature = "eval"), ignore)]
+#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
 fn eval_delete_run_git_blame() {
     // Model                       | Pass rate
     // ----------------------------|----------
@@ -171,7 +171,7 @@ fn eval_delete_run_git_blame() {
 }
 
 #[test]
-#[cfg_attr(not(feature = "eval"), ignore)]
+#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
 fn eval_translate_doc_comments() {
     //  Model                          | Pass rate
     // ============================================
@@ -234,7 +234,7 @@ fn eval_translate_doc_comments() {
 }
 
 #[test]
-#[cfg_attr(not(feature = "eval"), ignore)]
+#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
 fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
     //  Model                          | Pass rate
     // ============================================
@@ -360,7 +360,7 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
 }
 
 #[test]
-#[cfg_attr(not(feature = "eval"), ignore)]
+#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
 fn eval_disable_cursor_blinking() {
     //  Model                          | Pass rate
     // ============================================
@@ -446,7 +446,7 @@ fn eval_disable_cursor_blinking() {
 }
 
 #[test]
-#[cfg_attr(not(feature = "eval"), ignore)]
+#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
 fn eval_from_pixels_constructor() {
     // Results for 2025-06-13
     //
@@ -656,7 +656,7 @@ fn eval_from_pixels_constructor() {
 }
 
 #[test]
-#[cfg_attr(not(feature = "eval"), ignore)]
+#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
 fn eval_zode() {
     //  Model                          | Pass rate
     // ============================================
@@ -763,7 +763,7 @@ fn eval_zode() {
 }
 
 #[test]
-#[cfg_attr(not(feature = "eval"), ignore)]
+#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
 fn eval_add_overwrite_test() {
     //  Model                          | Pass rate
     // ============================================
@@ -995,7 +995,7 @@ fn eval_add_overwrite_test() {
 }
 
 #[test]
-#[cfg_attr(not(feature = "eval"), ignore)]
+#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
 fn eval_create_empty_file() {
     // Check that Edit Agent can create a file without writing its
     // thoughts into it. This issue is not specific to empty files, but
@@ -1490,9 +1490,20 @@ impl EditAgentTest {
             &std::env::var("ZED_JUDGE_MODEL").unwrap_or("anthropic/claude-4-sonnet-latest".into()),
         )
         .unwrap();
+
+        let authenticate_provider_tasks = cx.update(|cx| {
+            LanguageModelRegistry::global(cx).update(cx, |registry, cx| {
+                registry
+                    .providers()
+                    .iter()
+                    .map(|p| p.authenticate(cx))
+                    .collect::<Vec<_>>()
+            })
+        });
         let (agent_model, judge_model) = cx
             .update(|cx| {
                 cx.spawn(async move |cx| {
+                    futures::future::join_all(authenticate_provider_tasks).await;
                     let agent_model = Self::load_model(&agent_model, cx).await;
                     let judge_model = Self::load_model(&judge_model, cx).await;
                     (agent_model.unwrap(), judge_model.unwrap())

diff --git a/crates/agent/src/tests/mod.rs b/crates/agent/src/tests/mod.rs
@@ -1995,7 +1995,7 @@ async fn test_tool_updates_to_completion(cx: &mut TestAppContext) {
             locations: vec![],
             raw_input: Some(json!({})),
             raw_output: None,
-            meta: None,
+            meta: Some(json!({ "tool_name": "thinking" })),
         }
     );
     let update = expect_tool_call_update_fields(&mut events).await;

diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs
@@ -750,7 +750,13 @@ impl Thread {
 
         let title = tool.initial_title(tool_use.input.clone(), cx);
         let kind = tool.kind();
-        stream.send_tool_call(&tool_use.id, title, kind, tool_use.input.clone());
+        stream.send_tool_call(
+            &tool_use.id,
+            &tool_use.name,
+            title,
+            kind,
+            tool_use.input.clone(),
+        );
 
         let output = tool_result
             .as_ref()
@@ -1133,14 +1139,18 @@ impl Thread {
         Ok(())
     }
 
-    pub fn latest_token_usage(&self) -> Option<acp_thread::TokenUsage> {
+    pub fn latest_request_token_usage(&self) -> Option<language_model::TokenUsage> {
         let last_user_message = self.last_user_message()?;
         let tokens = self.request_token_usage.get(&last_user_message.id)?;
-        let model = self.model.clone()?;
+        Some(*tokens)
+    }
 
+    pub fn latest_token_usage(&self) -> Option<acp_thread::TokenUsage> {
+        let usage = self.latest_request_token_usage()?;
+        let model = self.model.clone()?;
         Some(acp_thread::TokenUsage {
             max_tokens: model.max_token_count_for_mode(self.completion_mode.into()),
-            used_tokens: tokens.total_tokens(),
+            used_tokens: usage.total_tokens(),
         })
     }
 
@@ -1183,6 +1193,14 @@ impl Thread {
         self.run_turn(cx)
     }
 
+    #[cfg(feature = "eval")]
+    pub fn proceed(
+        &mut self,
+        cx: &mut Context<Self>,
+    ) -> Result<mpsc::UnboundedReceiver<Result<ThreadEvent>>> {
+        self.run_turn(cx)
+    }
+
     fn run_turn(
         &mut self,
         cx: &mut Context<Self>,
@@ -1550,7 +1568,13 @@ impl Thread {
         });
 
         if push_new_tool_use {
-            event_stream.send_tool_call(&tool_use.id, title, kind, tool_use.input.clone());
+            event_stream.send_tool_call(
+                &tool_use.id,
+                &tool_use.name,
+                title,
+                kind,
+                tool_use.input.clone(),
+            );
             last_message
                 .content
                 .push(AgentMessageContent::ToolUse(tool_use.clone()));
@@ -2345,13 +2369,15 @@ impl ThreadEventStream {
     fn send_tool_call(
         &self,
         id: &LanguageModelToolUseId,
+        tool_name: &str,
         title: SharedString,
         kind: acp::ToolKind,
         input: serde_json::Value,
     ) {
         self.0
             .unbounded_send(Ok(ThreadEvent::ToolCall(Self::initial_tool_call(
                 id,
+                tool_name,
                 title.to_string(),
                 kind,
                 input,
@@ -2361,12 +2387,15 @@ impl ThreadEventStream {
 
     fn initial_tool_call(
         id: &LanguageModelToolUseId,
+        tool_name: &str,
         title: String,
         kind: acp::ToolKind,
         input: serde_json::Value,
     ) -> acp::ToolCall {
         acp::ToolCall {
-            meta: None,
+            meta: Some(serde_json::json!({
+                "tool_name": tool_name
+            })),
             id: acp::ToolCallId(id.to_string().into()),
             title,
             kind,