feature: Add "!cmd" user shell execution (#2471)

feature: Add "!cmd" user shell execution This change lets users run local shell commands directly from the TUI by prefixing their input with ! (e.g. !ls). Output is truncated to keep the exec cell usable, and Ctrl-C cleanly interrupts long-running commands (e.g. !sleep 10000). **Summary of changes** - Route Op::RunUserShellCommand through a dedicated UserShellCommandTask (core/src/tasks/user_shell.rs), keeping the task logic out of codex.rs. - Reuse the existing tool router: the task constructs a ToolCall for the local_shell tool and relies on ShellHandler, so no manual MCP tool lookup is required. - Emit exec lifecycle events (ExecCommandBegin/ExecCommandEnd) so the TUI can show command metadata, live output, and exit status. **End-to-end flow** **TUI handling** 1. ChatWidget::submit_user_message (TUI) intercepts messages starting with !. 2. Non-empty commands dispatch Op::RunUserShellCommand { command }; empty commands surface a help hint. 3. No UserInput items are created, so nothing is enqueued for the model. **Core submission loop** 4. The submission loop routes the op to handlers::run_user_shell_command (core/src/codex.rs). 5. A fresh TurnContext is created and Session::spawn_user_shell_command enqueues UserShellCommandTask. **Task execution** 6. UserShellCommandTask::run emits TaskStartedEvent, formats the command, and prepares a ToolCall targeting local_shell. 7. ToolCallRuntime::handle_tool_call dispatches to ShellHandler. **Shell tool runtime** 8. ShellHandler::run_exec_like launches the process via the unified exec runtime, honoring sandbox and shell policies, and emits ExecCommandBegin/End. 9. Stdout/stderr are captured for the UI, but the task does not turn the resulting ToolOutput into a model response. **Completion** 10. After ExecCommandEnd, the task finishes without an assistant message; the session marks it complete and the exec cell displays the final output. **Conversation context** - The command and its output never enter the conversation history or the model prompt; the flow is local-only. - Only exec/task events are emitted for UI rendering. **Demo video** https://github.com/user-attachments/assets/fcd114b0-4304-4448-a367-a04c43e0b996
2025-10-29 00:31:20 -07:00
parent 802d2440b4
commit 89591e4246
16 changed files with 419 additions and 21 deletions
--- a/codex-rs/core/tests/suite/mod.rs
+++ b/codex-rs/core/tests/suite/mod.rs
@@ -36,4 +36,5 @@ mod tools;
 mod truncation;
 mod unified_exec;
 mod user_notification;
+mod user_shell_cmd;
 mod view_image;
--- a/codex-rs/core/tests/suite/user_shell_cmd.rs
+++ b/codex-rs/core/tests/suite/user_shell_cmd.rs
@@ -0,0 +1,140 @@
+use codex_core::ConversationManager;
+use codex_core::NewConversation;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::ExecCommandEndEvent;
+use codex_core::protocol::Op;
+use codex_core::protocol::TurnAbortReason;
+use core_test_support::load_default_config_for_test;
+use core_test_support::wait_for_event;
+use std::path::PathBuf;
+use std::process::Command;
+use std::process::Stdio;
+use tempfile::TempDir;
+
+fn detect_python_executable() -> Option<String> {
+    let candidates = ["python3", "python"];
+    candidates.iter().find_map(|candidate| {
+        Command::new(candidate)
+            .arg("--version")
+            .stdout(Stdio::null())
+            .stderr(Stdio::null())
+            .status()
+            .ok()
+            .and_then(|status| status.success().then(|| (*candidate).to_string()))
+    })
+}
+
+#[tokio::test]
+async fn user_shell_cmd_ls_and_cat_in_temp_dir() {
+    let Some(python) = detect_python_executable() else {
+        eprintln!("skipping test: python3 not found in PATH");
+        return;
+    };
+
+    // Create a temporary working directory with a known file.
+    let cwd = TempDir::new().unwrap();
+    let file_name = "hello.txt";
+    let file_path: PathBuf = cwd.path().join(file_name);
+    let contents = "hello from bang test\n";
+    tokio::fs::write(&file_path, contents)
+        .await
+        .expect("write temp file");
+
+    // Load config and pin cwd to the temp dir so ls/cat operate there.
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.cwd = cwd.path().to_path_buf();
+
+    let conversation_manager =
+        ConversationManager::with_auth(codex_core::CodexAuth::from_api_key("dummy"));
+    let NewConversation {
+        conversation: codex,
+        ..
+    } = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation");
+
+    // 1) python should list the file
+    let list_cmd = format!(
+        "{python} -c \"import pathlib; print('\\n'.join(sorted(p.name for p in pathlib.Path('.').iterdir())))\""
+    );
+    codex
+        .submit(Op::RunUserShellCommand { command: list_cmd })
+        .await
+        .unwrap();
+    let msg = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExecCommandEnd(_))).await;
+    let EventMsg::ExecCommandEnd(ExecCommandEndEvent {
+        stdout, exit_code, ..
+    }) = msg
+    else {
+        unreachable!()
+    };
+    assert_eq!(exit_code, 0);
+    assert!(
+        stdout.contains(file_name),
+        "ls output should include {file_name}, got: {stdout:?}"
+    );
+
+    // 2) python should print the file contents verbatim
+    let cat_cmd = format!(
+        "{python} -c \"import pathlib; print(pathlib.Path('{file_name}').read_text(), end='')\""
+    );
+    codex
+        .submit(Op::RunUserShellCommand { command: cat_cmd })
+        .await
+        .unwrap();
+    let msg = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExecCommandEnd(_))).await;
+    let EventMsg::ExecCommandEnd(ExecCommandEndEvent {
+        mut stdout,
+        exit_code,
+        ..
+    }) = msg
+    else {
+        unreachable!()
+    };
+    assert_eq!(exit_code, 0);
+    if cfg!(windows) {
+        // Windows' Python writes CRLF line endings; normalize so the assertion remains portable.
+        stdout = stdout.replace("\r\n", "\n");
+    }
+    assert_eq!(stdout, contents);
+}
+
+#[tokio::test]
+async fn user_shell_cmd_can_be_interrupted() {
+    let Some(python) = detect_python_executable() else {
+        eprintln!("skipping test: python3 not found in PATH");
+        return;
+    };
+    // Set up isolated config and conversation.
+    let codex_home = TempDir::new().unwrap();
+    let config = load_default_config_for_test(&codex_home);
+    let conversation_manager =
+        ConversationManager::with_auth(codex_core::CodexAuth::from_api_key("dummy"));
+    let NewConversation {
+        conversation: codex,
+        ..
+    } = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation");
+
+    // Start a long-running command and then interrupt it.
+    let sleep_cmd = format!("{python} -c \"import time; time.sleep(5)\"");
+    codex
+        .submit(Op::RunUserShellCommand { command: sleep_cmd })
+        .await
+        .unwrap();
+
+    // Wait until it has started (ExecCommandBegin), then interrupt.
+    let _ = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExecCommandBegin(_))).await;
+    codex.submit(Op::Interrupt).await.unwrap();
+
+    // Expect a TurnAborted(Interrupted) notification.
+    let msg = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnAborted(_))).await;
+    let EventMsg::TurnAborted(ev) = msg else {
+        unreachable!()
+    };
+    assert_eq!(ev.reason, TurnAbortReason::Interrupted);
+}