openai · dylan-hurd-oai · Feb 10, 2026 · Feb 6, 2026 · Feb 6, 2026 · Feb 6, 2026
diff --git a/codex-rs/core/src/bash.rs b/codex-rs/core/src/bash.rs
@@ -119,6 +119,23 @@ pub fn parse_shell_lc_plain_commands(command: &[String]) -> Option<Vec<Vec<Strin
     try_parse_word_only_commands_sequence(&tree, script)
 }
 
+/// Returns the parsed argv for a single shell command in a here-doc style
+/// script (`<<`), as long as the script contains exactly one command node.
+pub fn parse_shell_lc_single_command_prefix(command: &[String]) -> Option<Vec<String>> {
+    let (_, script) = extract_bash_command(command)?;
+    let tree = try_parse_shell(script)?;
+    let root = tree.root_node();
+    if root.has_error() {
+        return None;
+    }
+    if !has_named_descendant_kind(root, "heredoc_redirect") {
+        return None;
+    }
+
+    let command_node = find_single_command_node(root)?;
+    parse_heredoc_command_words(command_node, script)
+}
+
 fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Vec<String>> {
     if cmd.kind() != "command" {
         return None;
@@ -177,6 +194,94 @@ fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Ve
     Some(words)
 }
 
+fn parse_heredoc_command_words(cmd: Node<'_>, src: &str) -> Option<Vec<String>> {
+    if cmd.kind() != "command" {
+        return None;
+    }
+
+    let mut words = Vec::new();
+    let mut cursor = cmd.walk();
+    for child in cmd.named_children(&mut cursor) {
+        match child.kind() {
+            "command_name" => {
+                let word_node = child.named_child(0)?;
+                if !matches!(word_node.kind(), "word" | "number")
+                    || !is_literal_word_or_number(word_node)
+                {
+                    return None;
+                }
+                words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
+            }
+            "word" | "number" => {
+                if !is_literal_word_or_number(child) {
+                    return None;
+                }
+                words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
+            }
+            // Allow shell constructs that attach IO to a single command without
+            // changing argv matching semantics for the executable prefix.
+            "variable_assignment" | "comment" => {}
+            kind if is_allowed_heredoc_attachment_kind(kind) => {}
+            _ => return None,
+        }
+    }
+
+    if words.is_empty() { None } else { Some(words) }
+}
+
+fn is_literal_word_or_number(node: Node<'_>) -> bool {
+    if !matches!(node.kind(), "word" | "number") {
+        return false;
+    }
+    let mut cursor = node.walk();
+    node.named_children(&mut cursor).next().is_none()
+}
+
+fn has_named_descendant_kind(node: Node<'_>, kind: &str) -> bool {
+    let mut stack = vec![node];
+    while let Some(current) = stack.pop() {
+        if current.kind() == kind {
+            return true;
+        }
+        let mut cursor = current.walk();
+        for child in current.named_children(&mut cursor) {
+            stack.push(child);
+        }
+    }
+    false
+}
+
+fn is_allowed_heredoc_attachment_kind(kind: &str) -> bool {
+    matches!(
+        kind,
+        "heredoc_body"
+            | "simple_heredoc_body"
+            | "heredoc_redirect"
+            | "herestring_redirect"
+            | "file_redirect"
+            | "redirected_statement"
+    )
+}
+
+fn find_single_command_node(root: Node<'_>) -> Option<Node<'_>> {
+    let mut stack = vec![root];
+    let mut single_command = None;
+    while let Some(node) = stack.pop() {
+        if node.kind() == "command" {
+            if single_command.is_some() {
+                return None;
+            }
+            single_command = Some(node);
+        }
+
+        let mut cursor = node.walk();
+        for child in node.named_children(&mut cursor) {
+            stack.push(child);
+        }
+    }
+    single_command
+}
+
 fn parse_double_quoted_string(node: Node, src: &str) -> Option<String> {
     if node.kind() != "string" {
         return None;
@@ -375,4 +480,86 @@ mod tests {
         assert!(parse_seq("rg -g\"$(pwd)\" pattern").is_none());
         assert!(parse_seq("rg -g\"$(echo '*.py')\" pattern").is_none());
     }
+
+    #[test]
+    fn parse_shell_lc_single_command_prefix_supports_heredoc() {
+        let command = vec![
+            "zsh".to_string(),
+            "-lc".to_string(),
+            "python3 <<'PY'\nprint('hello')\nPY".to_string(),
+        ];
+        let parsed = parse_shell_lc_single_command_prefix(&command);
+        assert_eq!(parsed, Some(vec!["python3".to_string()]));
+
+        let command_unquoted = vec![
+            "zsh".to_string(),
+            "-lc".to_string(),
+            "python3 << PY\nprint('hello')\nPY".to_string(),
+        ];
+        let parsed_unquoted = parse_shell_lc_single_command_prefix(&command_unquoted);
+        assert_eq!(parsed_unquoted, Some(vec!["python3".to_string()]));
+    }
+
+    #[test]
+    fn parse_shell_lc_single_command_prefix_rejects_multi_command_scripts() {
+        let command = vec![
+            "bash".to_string(),
+            "-lc".to_string(),
+            "python3 <<'PY'\nprint('hello')\nPY\necho done".to_string(),
+        ];
+        assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
+    }
+
+    #[test]
+    fn parse_shell_lc_single_command_prefix_rejects_non_heredoc_redirects() {
+        let command = vec![
+            "bash".to_string(),
+            "-lc".to_string(),
+            "echo hello > /tmp/out.txt".to_string(),
+        ];
+        assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
+    }
+
+    #[test]
+    fn parse_shell_lc_single_command_prefix_accepts_heredoc_with_extra_redirect() {
+        let command = vec![
+            "bash".to_string(),
+            "-lc".to_string(),
+            "python3 <<'PY' > /tmp/out.txt\nprint('hello')\nPY".to_string(),
+        ];
+        assert_eq!(
+            parse_shell_lc_single_command_prefix(&command),
+            Some(vec!["python3".to_string()])
+        );
+    }
+
+    #[test]
+    fn parse_shell_lc_single_command_prefix_rejects_herestring_with_substitution() {
+        let command = vec![
+            "bash".to_string(),
+            "-lc".to_string(),
+            r#"python3 <<< "$(rm -rf /)""#.to_string(),
+        ];
+        assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
+    }
+
+    #[test]
+    fn parse_shell_lc_single_command_prefix_rejects_arithmetic_shift_non_heredoc_script() {
+        let command = vec![
+            "bash".to_string(),
+            "-lc".to_string(),
+            "echo $((1<<2))".to_string(),
+        ];
+        assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
+    }
+
+    #[test]
+    fn parse_shell_lc_single_command_prefix_rejects_heredoc_command_with_word_expansion() {
+        let command = vec![
+            "bash".to_string(),
+            "-lc".to_string(),
+            "python3 $((1<<2)) <<'PY'\nprint('hello')\nPY".to_string(),
+        ];
+        assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
+    }
 }
diff --git a/codex-rs/core/src/command_canonicalization.rs b/codex-rs/core/src/command_canonicalization.rs
@@ -0,0 +1,130 @@
+use crate::bash::extract_bash_command;
+use crate::bash::parse_shell_lc_plain_commands;
+use crate::powershell::extract_powershell_command;
+
+const CANONICAL_BASH_SCRIPT_PREFIX: &str = "__codex_shell_script__";
+const CANONICAL_POWERSHELL_SCRIPT_PREFIX: &str = "__codex_powershell_script__";
+
+/// Canonicalize command argv for approval-cache matching.
+///
+/// This keeps approval decisions stable across wrapper-path differences (for
+/// example `/bin/bash -lc` vs `bash -lc`) and across shell wrapper tools while
+/// preserving exact script text for complex scripts where we cannot safely
+/// recover a tokenized command sequence.
+pub(crate) fn canonicalize_command_for_approval(command: &[String]) -> Vec<String> {
+    if let Some(commands) = parse_shell_lc_plain_commands(command)
+        && let [single_command] = commands.as_slice()
+    {
+        return single_command.clone();
+    }
+
+    if let Some((_shell, script)) = extract_bash_command(command) {
+        let shell_mode = command.get(1).cloned().unwrap_or_default();
+        return vec![
+            CANONICAL_BASH_SCRIPT_PREFIX.to_string(),
+            shell_mode,
+            script.to_string(),
+        ];
+    }
+
+    if let Some((_shell, script)) = extract_powershell_command(command) {
+        return vec![
+            CANONICAL_POWERSHELL_SCRIPT_PREFIX.to_string(),
+            script.to_string(),
+        ];
+    }
+
+    command.to_vec()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::canonicalize_command_for_approval;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn canonicalizes_word_only_shell_scripts_to_inner_command() {
+        let command_a = vec![
+            "/bin/bash".to_string(),
+            "-lc".to_string(),
+            "cargo test -p codex-core".to_string(),
+        ];
+        let command_b = vec![
+            "bash".to_string(),
+            "-lc".to_string(),
+            "cargo   test   -p codex-core".to_string(),
+        ];
+
+        assert_eq!(
+            canonicalize_command_for_approval(&command_a),
+            vec![
+                "cargo".to_string(),
+                "test".to_string(),
+                "-p".to_string(),
+                "codex-core".to_string(),
+            ]
+        );
+        assert_eq!(
+            canonicalize_command_for_approval(&command_a),
+            canonicalize_command_for_approval(&command_b)
+        );
+    }
+
+    #[test]
+    fn canonicalizes_heredoc_scripts_to_stable_script_key() {
+        let script = "python3 <<'PY'\nprint('hello')\nPY";
+        let command_a = vec![
+            "/bin/zsh".to_string(),
+            "-lc".to_string(),
+            script.to_string(),
+        ];
+        let command_b = vec!["zsh".to_string(), "-lc".to_string(), script.to_string()];
+
+        assert_eq!(
+            canonicalize_command_for_approval(&command_a),
+            vec![
+                "__codex_shell_script__".to_string(),
+                "-lc".to_string(),
+                script.to_string(),
+            ]
+        );
+        assert_eq!(
+            canonicalize_command_for_approval(&command_a),
+            canonicalize_command_for_approval(&command_b)
+        );
+    }
+
+    #[test]
+    fn canonicalizes_powershell_wrappers_to_stable_script_key() {
+        let script = "Write-Host hi";
+        let command_a = vec![
+            "powershell.exe".to_string(),
+            "-NoProfile".to_string(),
+            "-Command".to_string(),
+            script.to_string(),
+        ];
+        let command_b = vec![
+            "powershell".to_string(),
+            "-Command".to_string(),
+            script.to_string(),
+        ];
+
+        assert_eq!(
+            canonicalize_command_for_approval(&command_a),
+            vec![
+                "__codex_powershell_script__".to_string(),
+                script.to_string(),
+            ]
+        );
+        assert_eq!(
+            canonicalize_command_for_approval(&command_a),
+            canonicalize_command_for_approval(&command_b)
+        );
+    }
+
+    #[test]
+    fn preserves_non_shell_commands() {
+        let command = vec!["cargo".to_string(), "fmt".to_string()];
+        assert_eq!(canonicalize_command_for_approval(&command), command);
+    }
+}