Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 187 additions & 0 deletions codex-rs/core/src/bash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,23 @@ pub fn parse_shell_lc_plain_commands(command: &[String]) -> Option<Vec<Vec<Strin
try_parse_word_only_commands_sequence(&tree, script)
}

/// Returns the parsed argv for a single shell command in a here-doc style
/// script (`<<`), as long as the script contains exactly one command node.
pub fn parse_shell_lc_single_command_prefix(command: &[String]) -> Option<Vec<String>> {
let (_, script) = extract_bash_command(command)?;
let tree = try_parse_shell(script)?;
let root = tree.root_node();
if root.has_error() {
return None;
}
if !has_named_descendant_kind(root, "heredoc_redirect") {
return None;
}

let command_node = find_single_command_node(root)?;
parse_heredoc_command_words(command_node, script)
}

fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Vec<String>> {
if cmd.kind() != "command" {
return None;
Expand Down Expand Up @@ -177,6 +194,94 @@ fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Ve
Some(words)
}

fn parse_heredoc_command_words(cmd: Node<'_>, src: &str) -> Option<Vec<String>> {
if cmd.kind() != "command" {
return None;
}

let mut words = Vec::new();
let mut cursor = cmd.walk();
for child in cmd.named_children(&mut cursor) {
match child.kind() {
"command_name" => {
let word_node = child.named_child(0)?;
if !matches!(word_node.kind(), "word" | "number")
|| !is_literal_word_or_number(word_node)
{
return None;
}
words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
}
"word" | "number" => {
if !is_literal_word_or_number(child) {
return None;
}
words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
}
// Allow shell constructs that attach IO to a single command without
// changing argv matching semantics for the executable prefix.
Comment on lines 215 to 222
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Ensure non-heredoc << scripts don’t bypass approvals

This parser accepts any word nodes verbatim, and parse_shell_lc_single_command_prefix is gated only by script.contains("<<"). That combination means scripts that merely contain << (e.g., arithmetic shift echo $((1<<2)) or a here‑string with command substitution like python3 <<< "$(rm -rf /)") can be reduced to a simple argv prefix and matched by prefix_rule, skipping approvals that previously triggered when word‑only parsing failed. This is a regression in safety: << in non‑heredoc contexts can now be used to bypass the intended “complex script” fallback. Consider verifying that the AST actually contains a heredoc redirect, and/or rejecting words that include expansions before returning a prefix.

Useful? React with 👍 / 👎.

"variable_assignment" | "comment" => {}
kind if is_allowed_heredoc_attachment_kind(kind) => {}
_ => return None,
}
}

if words.is_empty() { None } else { Some(words) }
}

fn is_literal_word_or_number(node: Node<'_>) -> bool {
if !matches!(node.kind(), "word" | "number") {
return false;
}
let mut cursor = node.walk();
node.named_children(&mut cursor).next().is_none()
}

fn has_named_descendant_kind(node: Node<'_>, kind: &str) -> bool {
let mut stack = vec![node];
while let Some(current) = stack.pop() {
if current.kind() == kind {
return true;
}
let mut cursor = current.walk();
for child in current.named_children(&mut cursor) {
stack.push(child);
}
}
false
}

fn is_allowed_heredoc_attachment_kind(kind: &str) -> bool {
matches!(
kind,
"heredoc_body"
| "simple_heredoc_body"
| "heredoc_redirect"
| "herestring_redirect"
| "file_redirect"
| "redirected_statement"
)
}

fn find_single_command_node(root: Node<'_>) -> Option<Node<'_>> {
let mut stack = vec![root];
let mut single_command = None;
while let Some(node) = stack.pop() {
if node.kind() == "command" {
if single_command.is_some() {
return None;
}
single_command = Some(node);
}

let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
stack.push(child);
}
}
single_command
}

fn parse_double_quoted_string(node: Node, src: &str) -> Option<String> {
if node.kind() != "string" {
return None;
Expand Down Expand Up @@ -375,4 +480,86 @@ mod tests {
assert!(parse_seq("rg -g\"$(pwd)\" pattern").is_none());
assert!(parse_seq("rg -g\"$(echo '*.py')\" pattern").is_none());
}

#[test]
fn parse_shell_lc_single_command_prefix_supports_heredoc() {
let command = vec![
"zsh".to_string(),
"-lc".to_string(),
"python3 <<'PY'\nprint('hello')\nPY".to_string(),
];
let parsed = parse_shell_lc_single_command_prefix(&command);
assert_eq!(parsed, Some(vec!["python3".to_string()]));

let command_unquoted = vec![
"zsh".to_string(),
"-lc".to_string(),
"python3 << PY\nprint('hello')\nPY".to_string(),
];
let parsed_unquoted = parse_shell_lc_single_command_prefix(&command_unquoted);
assert_eq!(parsed_unquoted, Some(vec!["python3".to_string()]));
}

#[test]
fn parse_shell_lc_single_command_prefix_rejects_multi_command_scripts() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"python3 <<'PY'\nprint('hello')\nPY\necho done".to_string(),
];
assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
}

#[test]
fn parse_shell_lc_single_command_prefix_rejects_non_heredoc_redirects() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"echo hello > /tmp/out.txt".to_string(),
];
assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
}

#[test]
fn parse_shell_lc_single_command_prefix_accepts_heredoc_with_extra_redirect() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"python3 <<'PY' > /tmp/out.txt\nprint('hello')\nPY".to_string(),
];
assert_eq!(
parse_shell_lc_single_command_prefix(&command),
Some(vec!["python3".to_string()])
);
}

#[test]
fn parse_shell_lc_single_command_prefix_rejects_herestring_with_substitution() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
r#"python3 <<< "$(rm -rf /)""#.to_string(),
];
assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
}

#[test]
fn parse_shell_lc_single_command_prefix_rejects_arithmetic_shift_non_heredoc_script() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"echo $((1<<2))".to_string(),
];
assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
}

#[test]
fn parse_shell_lc_single_command_prefix_rejects_heredoc_command_with_word_expansion() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"python3 $((1<<2)) <<'PY'\nprint('hello')\nPY".to_string(),
];
assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
}
}
130 changes: 130 additions & 0 deletions codex-rs/core/src/command_canonicalization.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
use crate::bash::extract_bash_command;
use crate::bash::parse_shell_lc_plain_commands;
use crate::powershell::extract_powershell_command;

const CANONICAL_BASH_SCRIPT_PREFIX: &str = "__codex_shell_script__";
const CANONICAL_POWERSHELL_SCRIPT_PREFIX: &str = "__codex_powershell_script__";

/// Canonicalize command argv for approval-cache matching.
///
/// This keeps approval decisions stable across wrapper-path differences (for
/// example `/bin/bash -lc` vs `bash -lc`) and across shell wrapper tools while
/// preserving exact script text for complex scripts where we cannot safely
/// recover a tokenized command sequence.
pub(crate) fn canonicalize_command_for_approval(command: &[String]) -> Vec<String> {
if let Some(commands) = parse_shell_lc_plain_commands(command)
&& let [single_command] = commands.as_slice()
{
return single_command.clone();
}

if let Some((_shell, script)) = extract_bash_command(command) {
let shell_mode = command.get(1).cloned().unwrap_or_default();
return vec![
CANONICAL_BASH_SCRIPT_PREFIX.to_string(),
shell_mode,
script.to_string(),
];
}

if let Some((_shell, script)) = extract_powershell_command(command) {
return vec![
CANONICAL_POWERSHELL_SCRIPT_PREFIX.to_string(),
script.to_string(),
];
}

command.to_vec()
}

#[cfg(test)]
mod tests {
use super::canonicalize_command_for_approval;
use pretty_assertions::assert_eq;

#[test]
fn canonicalizes_word_only_shell_scripts_to_inner_command() {
let command_a = vec![
"/bin/bash".to_string(),
"-lc".to_string(),
"cargo test -p codex-core".to_string(),
];
let command_b = vec![
"bash".to_string(),
"-lc".to_string(),
"cargo test -p codex-core".to_string(),
];

assert_eq!(
canonicalize_command_for_approval(&command_a),
vec![
"cargo".to_string(),
"test".to_string(),
"-p".to_string(),
"codex-core".to_string(),
]
);
assert_eq!(
canonicalize_command_for_approval(&command_a),
canonicalize_command_for_approval(&command_b)
);
}

#[test]
fn canonicalizes_heredoc_scripts_to_stable_script_key() {
let script = "python3 <<'PY'\nprint('hello')\nPY";
let command_a = vec![
"/bin/zsh".to_string(),
"-lc".to_string(),
script.to_string(),
];
let command_b = vec!["zsh".to_string(), "-lc".to_string(), script.to_string()];

assert_eq!(
canonicalize_command_for_approval(&command_a),
vec![
"__codex_shell_script__".to_string(),
"-lc".to_string(),
script.to_string(),
]
);
assert_eq!(
canonicalize_command_for_approval(&command_a),
canonicalize_command_for_approval(&command_b)
);
}

#[test]
fn canonicalizes_powershell_wrappers_to_stable_script_key() {
let script = "Write-Host hi";
let command_a = vec![
"powershell.exe".to_string(),
"-NoProfile".to_string(),
"-Command".to_string(),
script.to_string(),
];
let command_b = vec![
"powershell".to_string(),
"-Command".to_string(),
script.to_string(),
];

assert_eq!(
canonicalize_command_for_approval(&command_a),
vec![
"__codex_powershell_script__".to_string(),
script.to_string(),
]
);
assert_eq!(
canonicalize_command_for_approval(&command_a),
canonicalize_command_for_approval(&command_b)
);
}

#[test]
fn preserves_non_shell_commands() {
let command = vec!["cargo".to_string(), "fmt".to_string()];
assert_eq!(canonicalize_command_for_approval(&command), command);
}
}
Loading
Loading