Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ members = [
"crates/edit_prediction_context",
"crates/zeta2_tools",
"crates/editor",
# "crates/eval",
"crates/eval",
"crates/explorer_command_injector",
"crates/extension",
"crates/extension_api",
Expand Down
74 changes: 72 additions & 2 deletions crates/acp_thread/src/terminal.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
use agent_client_protocol as acp;

use anyhow::Result;
use futures::{FutureExt as _, future::Shared};
use gpui::{App, AppContext, Context, Entity, Task};
use gpui::{App, AppContext, AsyncApp, Context, Entity, Task};
use language::LanguageRegistry;
use markdown::Markdown;
use project::Project;
use settings::{Settings as _, SettingsLocation};
use std::{path::PathBuf, process::ExitStatus, sync::Arc, time::Instant};
use task::Shell;
use terminal::terminal_settings::TerminalSettings;
use util::get_default_system_shell_preferring_bash;

pub struct Terminal {
id: acp::TerminalId,
Expand Down Expand Up @@ -170,3 +175,68 @@ impl Terminal {
)
}
}

pub async fn create_terminal_entity(
command: String,
args: &[String],
env_vars: Vec<(String, String)>,
cwd: Option<PathBuf>,
project: &Entity<Project>,
cx: &mut AsyncApp,
) -> Result<Entity<terminal::Terminal>> {
let mut env = if let Some(dir) = &cwd {
project
.update(cx, |project, cx| {
let worktree = project.find_worktree(dir.as_path(), cx);
let shell = TerminalSettings::get(
worktree.as_ref().map(|(worktree, path)| SettingsLocation {
worktree_id: worktree.read(cx).id(),
path: &path,
}),
cx,
)
.shell
.clone();
project.directory_environment(&shell, dir.clone().into(), cx)
})?
.await
.unwrap_or_default()
} else {
Default::default()
};

// Disables paging for `git` and hopefully other commands
env.insert("PAGER".into(), "".into());
env.extend(env_vars);

// Use remote shell or default system shell, as appropriate
let shell = project
.update(cx, |project, cx| {
project
.remote_client()
.and_then(|r| r.read(cx).default_system_shell())
.map(Shell::Program)
})?
.unwrap_or_else(|| Shell::Program(get_default_system_shell_preferring_bash()));
let is_windows = project
.read_with(cx, |project, cx| project.path_style(cx).is_windows())
.unwrap_or(cfg!(windows));
let (task_command, task_args) = task::ShellBuilder::new(&shell, is_windows)
.redirect_stdin_to_dev_null()
.build(Some(command.clone()), &args);

project
.update(cx, |project, cx| {
project.create_terminal_task(
task::SpawnInTerminal {
command: Some(task_command),
args: task_args,
cwd,
env,
..Default::default()
},
cx,
)
})?
.await
}
2 changes: 2 additions & 0 deletions crates/agent/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ path = "src/agent.rs"

[features]
test-support = ["db/test-support"]
eval = []
edit-agent-eval = []
e2e = []

[lints]
Expand Down
29 changes: 20 additions & 9 deletions crates/agent/src/edit_agent/evals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ use std::{
use util::path;

#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
fn eval_extract_handle_command_output() {
// Test how well agent generates multiple edit hunks.
//
Expand Down Expand Up @@ -108,7 +108,7 @@ fn eval_extract_handle_command_output() {
}

#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
fn eval_delete_run_git_blame() {
// Model | Pass rate
// ----------------------------|----------
Expand Down Expand Up @@ -171,7 +171,7 @@ fn eval_delete_run_git_blame() {
}

#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
fn eval_translate_doc_comments() {
// Model | Pass rate
// ============================================
Expand Down Expand Up @@ -234,7 +234,7 @@ fn eval_translate_doc_comments() {
}

#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
// Model | Pass rate
// ============================================
Expand Down Expand Up @@ -360,7 +360,7 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
}

#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
fn eval_disable_cursor_blinking() {
// Model | Pass rate
// ============================================
Expand Down Expand Up @@ -446,7 +446,7 @@ fn eval_disable_cursor_blinking() {
}

#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
fn eval_from_pixels_constructor() {
// Results for 2025-06-13
//
Expand Down Expand Up @@ -656,7 +656,7 @@ fn eval_from_pixels_constructor() {
}

#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
fn eval_zode() {
// Model | Pass rate
// ============================================
Expand Down Expand Up @@ -763,7 +763,7 @@ fn eval_zode() {
}

#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
fn eval_add_overwrite_test() {
// Model | Pass rate
// ============================================
Expand Down Expand Up @@ -995,7 +995,7 @@ fn eval_add_overwrite_test() {
}

#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
fn eval_create_empty_file() {
// Check that Edit Agent can create a file without writing its
// thoughts into it. This issue is not specific to empty files, but
Expand Down Expand Up @@ -1490,9 +1490,20 @@ impl EditAgentTest {
&std::env::var("ZED_JUDGE_MODEL").unwrap_or("anthropic/claude-4-sonnet-latest".into()),
)
.unwrap();

let authenticate_provider_tasks = cx.update(|cx| {
LanguageModelRegistry::global(cx).update(cx, |registry, cx| {
registry
.providers()
.iter()
.map(|p| p.authenticate(cx))
.collect::<Vec<_>>()
})
});
let (agent_model, judge_model) = cx
.update(|cx| {
cx.spawn(async move |cx| {
futures::future::join_all(authenticate_provider_tasks).await;
let agent_model = Self::load_model(&agent_model, cx).await;
let judge_model = Self::load_model(&judge_model, cx).await;
(agent_model.unwrap(), judge_model.unwrap())
Expand Down
2 changes: 1 addition & 1 deletion crates/agent/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1995,7 +1995,7 @@ async fn test_tool_updates_to_completion(cx: &mut TestAppContext) {
locations: vec![],
raw_input: Some(json!({})),
raw_output: None,
meta: None,
meta: Some(json!({ "tool_name": "thinking" })),
}
);
let update = expect_tool_call_update_fields(&mut events).await;
Expand Down
41 changes: 35 additions & 6 deletions crates/agent/src/thread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,13 @@ impl Thread {

let title = tool.initial_title(tool_use.input.clone(), cx);
let kind = tool.kind();
stream.send_tool_call(&tool_use.id, title, kind, tool_use.input.clone());
stream.send_tool_call(
&tool_use.id,
&tool_use.name,
title,
kind,
tool_use.input.clone(),
);

let output = tool_result
.as_ref()
Expand Down Expand Up @@ -1133,14 +1139,18 @@ impl Thread {
Ok(())
}

pub fn latest_token_usage(&self) -> Option<acp_thread::TokenUsage> {
pub fn latest_request_token_usage(&self) -> Option<language_model::TokenUsage> {
let last_user_message = self.last_user_message()?;
let tokens = self.request_token_usage.get(&last_user_message.id)?;
let model = self.model.clone()?;
Some(*tokens)
}

pub fn latest_token_usage(&self) -> Option<acp_thread::TokenUsage> {
let usage = self.latest_request_token_usage()?;
let model = self.model.clone()?;
Some(acp_thread::TokenUsage {
max_tokens: model.max_token_count_for_mode(self.completion_mode.into()),
used_tokens: tokens.total_tokens(),
used_tokens: usage.total_tokens(),
})
}

Expand Down Expand Up @@ -1183,6 +1193,14 @@ impl Thread {
self.run_turn(cx)
}

#[cfg(feature = "eval")]
pub fn proceed(
&mut self,
cx: &mut Context<Self>,
) -> Result<mpsc::UnboundedReceiver<Result<ThreadEvent>>> {
self.run_turn(cx)
}

fn run_turn(
&mut self,
cx: &mut Context<Self>,
Expand Down Expand Up @@ -1550,7 +1568,13 @@ impl Thread {
});

if push_new_tool_use {
event_stream.send_tool_call(&tool_use.id, title, kind, tool_use.input.clone());
event_stream.send_tool_call(
&tool_use.id,
&tool_use.name,
title,
kind,
tool_use.input.clone(),
);
last_message
.content
.push(AgentMessageContent::ToolUse(tool_use.clone()));
Expand Down Expand Up @@ -2345,13 +2369,15 @@ impl ThreadEventStream {
fn send_tool_call(
&self,
id: &LanguageModelToolUseId,
tool_name: &str,
title: SharedString,
kind: acp::ToolKind,
input: serde_json::Value,
) {
self.0
.unbounded_send(Ok(ThreadEvent::ToolCall(Self::initial_tool_call(
id,
tool_name,
title.to_string(),
kind,
input,
Expand All @@ -2361,12 +2387,15 @@ impl ThreadEventStream {

fn initial_tool_call(
id: &LanguageModelToolUseId,
tool_name: &str,
title: String,
kind: acp::ToolKind,
input: serde_json::Value,
) -> acp::ToolCall {
acp::ToolCall {
meta: None,
meta: Some(serde_json::json!({
"tool_name": tool_name
})),
id: acp::ToolCallId(id.to_string().into()),
title,
kind,
Expand Down
Loading
Loading