Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 73 additions & 1 deletion codex-rs/core/src/exec_policy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,9 @@ fn default_policy_path(codex_home: &Path) -> PathBuf {
}

fn commands_for_exec_policy(command: &[String]) -> (Vec<Vec<String>>, bool) {
if let Some(commands) = parse_shell_lc_plain_commands(command) {
if let Some(commands) = parse_shell_lc_plain_commands(command)
&& !commands.is_empty()
{
return (commands, false);
}

Expand Down Expand Up @@ -814,6 +816,24 @@ prefix_rule(pattern=["rm"], decision="forbidden")
);
}

#[test]
fn commands_for_exec_policy_falls_back_for_empty_shell_script() {
let command = vec!["bash".to_string(), "-lc".to_string(), "".to_string()];

assert_eq!(commands_for_exec_policy(&command), (vec![command], false));
}

#[test]
fn commands_for_exec_policy_falls_back_for_whitespace_shell_script() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
" \n\t ".to_string(),
];

assert_eq!(commands_for_exec_policy(&command), (vec![command], false));
}

#[tokio::test]
async fn evaluates_heredoc_script_against_prefix_rules() {
let policy_src = r#"prefix_rule(pattern=["python3"], decision="allow")"#;
Expand Down Expand Up @@ -1023,6 +1043,58 @@ prefix_rule(
);
}

#[tokio::test]
async fn empty_bash_lc_script_falls_back_to_original_command() {
let command = vec!["bash".to_string(), "-lc".to_string(), "".to_string()];

let manager = ExecPolicyManager::default();
let requirement = manager
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
command: &command,
approval_policy: AskForApproval::UnlessTrusted,
sandbox_policy: &SandboxPolicy::ReadOnly,
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
})
.await;

assert_eq!(
requirement,
ExecApprovalRequirement::NeedsApproval {
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(command)),
}
);
}

#[tokio::test]
async fn whitespace_bash_lc_script_falls_back_to_original_command() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
" \n\t ".to_string(),
];

let manager = ExecPolicyManager::default();
let requirement = manager
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
command: &command,
approval_policy: AskForApproval::UnlessTrusted,
sandbox_policy: &SandboxPolicy::ReadOnly,
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
})
.await;

assert_eq!(
requirement,
ExecApprovalRequirement::NeedsApproval {
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(command)),
}
);
}

#[tokio::test]
async fn request_rule_uses_prefix_rule() {
let command = vec![
Expand Down
264 changes: 264 additions & 0 deletions codex-rs/core/tests/suite/exec_policy.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
#![allow(clippy::unwrap_used, clippy::expect_used)]

use anyhow::Result;
use codex_core::features::Feature;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::config_types::CollaborationMode;
use codex_protocol::config_types::ModeKind;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::config_types::Settings;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
Expand All @@ -16,9 +20,59 @@ use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use serde_json::Value;
use serde_json::json;
use std::fs;

fn collaboration_mode_for_model(model: String) -> CollaborationMode {
CollaborationMode {
mode: ModeKind::Default,
settings: Settings {
model,
reasoning_effort: None,
developer_instructions: Some("exercise approvals in collaboration mode".to_string()),
},
}
}

async fn submit_user_turn(
test: &core_test_support::test_codex::TestCodex,
prompt: &str,
approval_policy: AskForApproval,
sandbox_policy: SandboxPolicy,
collaboration_mode: Option<CollaborationMode>,
) -> Result<()> {
let session_model = test.session_configured.model.clone();
test.codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: prompt.into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: test.cwd_path().to_path_buf(),
approval_policy,
sandbox_policy,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
collaboration_mode,
personality: None,
})
.await?;
Ok(())
}

fn assert_no_matched_rules_invariant(output_item: &Value) {
let Some(output) = output_item.get("output").and_then(Value::as_str) else {
panic!("function_call_output should include string output payload: {output_item:?}");
};
assert!(
!output.contains("invariant failed: matched_rules must be non-empty"),
"unexpected invariant panic surfaced in output: {output}"
);
}

#[tokio::test]
async fn execpolicy_blocks_shell_invocation() -> Result<()> {
// TODO execpolicy doesn't parse powershell commands yet
Expand Down Expand Up @@ -107,3 +161,213 @@ async fn execpolicy_blocks_shell_invocation() -> Result<()> {

Ok(())
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_command_empty_script_with_collaboration_mode_does_not_panic() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
config.features.enable(Feature::CollaborationModes);
});
let test = builder.build(&server).await?;
let call_id = "shell-empty-script-collab";
let args = json!({
"command": "",
"timeout_ms": 1_000,
});

mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-empty-shell-1"),
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
ev_completed("resp-empty-shell-1"),
]),
)
.await;
let results_mock = mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-empty-shell-1", "done"),
ev_completed("resp-empty-shell-2"),
]),
)
.await;

let collaboration_mode = collaboration_mode_for_model(test.session_configured.model.clone());
submit_user_turn(
&test,
"run an empty shell command",
AskForApproval::OnRequest,
SandboxPolicy::DangerFullAccess,
Some(collaboration_mode),
)
.await?;

wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::TurnComplete(_))
})
.await;

let output_item = results_mock.single_request().function_call_output(call_id);
assert_no_matched_rules_invariant(&output_item);

Ok(())
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn unified_exec_empty_script_with_collaboration_mode_does_not_panic() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
config.features.enable(Feature::UnifiedExec);
config.features.enable(Feature::CollaborationModes);
});
let test = builder.build(&server).await?;
let call_id = "unified-exec-empty-script-collab";
let args = json!({
"cmd": "",
"yield_time_ms": 1_000,
});

mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-empty-unified-1"),
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
ev_completed("resp-empty-unified-1"),
]),
)
.await;
let results_mock = mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-empty-unified-1", "done"),
ev_completed("resp-empty-unified-2"),
]),
)
.await;

let collaboration_mode = collaboration_mode_for_model(test.session_configured.model.clone());
submit_user_turn(
&test,
"run empty unified exec command",
AskForApproval::OnRequest,
SandboxPolicy::DangerFullAccess,
Some(collaboration_mode),
)
.await?;

wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::TurnComplete(_))
})
.await;

let output_item = results_mock.single_request().function_call_output(call_id);
assert_no_matched_rules_invariant(&output_item);

Ok(())
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_command_whitespace_script_with_collaboration_mode_does_not_panic() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
config.features.enable(Feature::CollaborationModes);
});
let test = builder.build(&server).await?;
let call_id = "shell-whitespace-script-collab";
let args = json!({
"command": " \n\t ",
"timeout_ms": 1_000,
});

mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-whitespace-shell-1"),
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
ev_completed("resp-whitespace-shell-1"),
]),
)
.await;
let results_mock = mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-whitespace-shell-1", "done"),
ev_completed("resp-whitespace-shell-2"),
]),
)
.await;

let collaboration_mode = collaboration_mode_for_model(test.session_configured.model.clone());
submit_user_turn(
&test,
"run whitespace shell command",
AskForApproval::OnRequest,
SandboxPolicy::DangerFullAccess,
Some(collaboration_mode),
)
.await?;

wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::TurnComplete(_))
})
.await;

let output_item = results_mock.single_request().function_call_output(call_id);
assert_no_matched_rules_invariant(&output_item);

Ok(())
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn unified_exec_whitespace_script_with_collaboration_mode_does_not_panic() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
config.features.enable(Feature::UnifiedExec);
config.features.enable(Feature::CollaborationModes);
});
let test = builder.build(&server).await?;
let call_id = "unified-exec-whitespace-script-collab";
let args = json!({
"cmd": " \n \t",
"yield_time_ms": 1_000,
});

mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-whitespace-unified-1"),
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
ev_completed("resp-whitespace-unified-1"),
]),
)
.await;
let results_mock = mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-whitespace-unified-1", "done"),
ev_completed("resp-whitespace-unified-2"),
]),
)
.await;

let collaboration_mode = collaboration_mode_for_model(test.session_configured.model.clone());
submit_user_turn(
&test,
"run whitespace unified exec command",
AskForApproval::OnRequest,
SandboxPolicy::DangerFullAccess,
Some(collaboration_mode),
)
.await?;

wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::TurnComplete(_))
})
.await;

let output_item = results_mock.single_request().function_call_output(call_id);
assert_no_matched_rules_invariant(&output_item);

Ok(())
}
Loading
Loading