2025-08-03 13:05:48 -07:00
|
|
|
#![cfg(target_os = "macos")]
|
|
|
|
|
|
|
|
|
|
use std::collections::HashMap;
|
|
|
|
|
|
|
|
|
|
use codex_core::exec::ExecParams;
|
2025-08-11 11:52:05 -07:00
|
|
|
use codex_core::exec::ExecToolCallOutput;
|
2025-08-03 13:05:48 -07:00
|
|
|
use codex_core::exec::SandboxType;
|
|
|
|
|
use codex_core::exec::process_exec_tool_call;
|
|
|
|
|
use codex_core::protocol::SandboxPolicy;
|
|
|
|
|
use codex_core::spawn::CODEX_SANDBOX_ENV_VAR;
|
|
|
|
|
use tempfile::TempDir;
|
|
|
|
|
|
2025-08-11 11:52:05 -07:00
|
|
|
use codex_core::error::Result;
|
|
|
|
|
|
2025-08-03 13:05:48 -07:00
|
|
|
use codex_core::get_platform_sandbox;
|
|
|
|
|
|
2025-08-11 11:52:05 -07:00
|
|
|
fn skip_test() -> bool {
|
2025-08-03 13:05:48 -07:00
|
|
|
if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
|
|
|
|
|
eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
|
2025-08-11 11:52:05 -07:00
|
|
|
return true;
|
2025-08-03 13:05:48 -07:00
|
|
|
}
|
|
|
|
|
|
2025-08-11 11:52:05 -07:00
|
|
|
false
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-14 17:59:01 -07:00
|
|
|
#[expect(clippy::expect_used)]
|
2025-08-11 11:52:05 -07:00
|
|
|
async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput> {
|
2025-08-03 13:05:48 -07:00
|
|
|
let sandbox_type = get_platform_sandbox().expect("should be able to get sandbox type");
|
|
|
|
|
assert_eq!(sandbox_type, SandboxType::MacosSeatbelt);
|
|
|
|
|
|
|
|
|
|
let params = ExecParams {
|
|
|
|
|
command: cmd.iter().map(|s| s.to_string()).collect(),
|
|
|
|
|
cwd: tmp.path().to_path_buf(),
|
|
|
|
|
timeout_ms: Some(1000),
|
|
|
|
|
env: HashMap::new(),
|
2025-08-05 20:44:20 -07:00
|
|
|
with_escalated_permissions: None,
|
|
|
|
|
justification: None,
|
2025-08-03 13:05:48 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let policy = SandboxPolicy::new_read_only_policy();
|
|
|
|
|
|
chore: introduce ConversationManager as a clearinghouse for all conversations (#2240)
This PR does two things because after I got deep into the first one I
started pulling on the thread to the second:
- Makes `ConversationManager` the place where all in-memory
conversations are created and stored. Previously, `MessageProcessor` in
the `codex-mcp-server` crate was doing this via its `session_map`, but
this is something that should be done in `codex-core`.
- It unwinds the `ctrl_c: tokio::sync::Notify` that was threaded
throughout our code. I think this made sense at one time, but now that
we handle Ctrl-C within the TUI and have a proper `Op::Interrupt` event,
I don't think this was quite right, so I removed it. For `codex exec`
and `codex proto`, we now use `tokio::signal::ctrl_c()` directly, but we
no longer make `Notify` a field of `Codex` or `CodexConversation`.
Changes of note:
- Adds the files `conversation_manager.rs` and `codex_conversation.rs`
to `codex-core`.
- `Codex` and `CodexSpawnOk` are no longer exported from `codex-core`:
other crates must use `CodexConversation` instead (which is created via
`ConversationManager`).
- `core/src/codex_wrapper.rs` has been deleted in favor of
`ConversationManager`.
- `ConversationManager::new_conversation()` returns `NewConversation`,
which is in line with the `new_conversation` tool we want to add to the
MCP server. Note `NewConversation` includes `SessionConfiguredEvent`, so
we eliminate checks in cases like `codex-rs/core/tests/client.rs` to
verify `SessionConfiguredEvent` is the first event because that is now
internal to `ConversationManager`.
- Quite a bit of code was deleted from
`codex-rs/mcp-server/src/message_processor.rs` since it no longer has to
manage multiple conversations itself: it goes through
`ConversationManager` instead.
- `core/tests/live_agent.rs` has been deleted because I had to update a
bunch of tests and all the tests in here were ignored, and I don't think
anyone ever ran them, so this was just technical debt, at this point.
- Removed `notify_on_sigint()` from `util.rs` (and in a follow-up, I
hope to refactor the blandly-named `util.rs` into more descriptive
files).
- In general, I started replacing local variables named `codex` as
`conversation`, where appropriate, though admittedly I didn't do it
through all the integration tests because that would have added a lot of
noise to this PR.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2240).
* #2264
* #2263
* __->__ #2240
2025-08-13 13:38:18 -07:00
|
|
|
process_exec_tool_call(params, sandbox_type, &policy, &None, None).await
|
2025-08-03 13:05:48 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Command succeeds with exit code 0 normally
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn exit_code_0_succeeds() {
|
2025-08-11 11:52:05 -07:00
|
|
|
if skip_test() {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-03 13:05:48 -07:00
|
|
|
let tmp = TempDir::new().expect("should be able to create temp dir");
|
|
|
|
|
let cmd = vec!["echo", "hello"];
|
|
|
|
|
|
2025-08-11 11:52:05 -07:00
|
|
|
let output = run_test_cmd(tmp, cmd).await.unwrap();
|
|
|
|
|
assert_eq!(output.stdout.text, "hello\n");
|
|
|
|
|
assert_eq!(output.stderr.text, "");
|
|
|
|
|
assert_eq!(output.stdout.truncated_after_lines, None);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Command succeeds with exit code 0 normally
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn truncates_output_lines() {
|
|
|
|
|
if skip_test() {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let tmp = TempDir::new().expect("should be able to create temp dir");
|
|
|
|
|
let cmd = vec!["seq", "300"];
|
|
|
|
|
|
|
|
|
|
let output = run_test_cmd(tmp, cmd).await.unwrap();
|
|
|
|
|
|
2025-08-23 09:54:31 -07:00
|
|
|
let expected_output = (1..=300)
|
2025-08-11 11:52:05 -07:00
|
|
|
.map(|i| format!("{i}\n"))
|
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
|
.join("");
|
|
|
|
|
assert_eq!(output.stdout.text, expected_output);
|
2025-08-23 09:54:31 -07:00
|
|
|
assert_eq!(output.stdout.truncated_after_lines, None);
|
2025-08-11 11:52:05 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Command succeeds with exit code 0 normally
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn truncates_output_bytes() {
|
|
|
|
|
if skip_test() {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let tmp = TempDir::new().expect("should be able to create temp dir");
|
|
|
|
|
// each line is 1000 bytes
|
|
|
|
|
let cmd = vec!["bash", "-lc", "seq 15 | awk '{printf \"%-1000s\\n\", $0}'"];
|
|
|
|
|
|
|
|
|
|
let output = run_test_cmd(tmp, cmd).await.unwrap();
|
|
|
|
|
|
2025-08-23 09:54:31 -07:00
|
|
|
assert!(output.stdout.text.len() >= 15000);
|
|
|
|
|
assert_eq!(output.stdout.truncated_after_lines, None);
|
2025-08-03 13:05:48 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Command not found returns exit code 127, this is not considered a sandbox error
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn exit_command_not_found_is_ok() {
|
2025-08-11 11:52:05 -07:00
|
|
|
if skip_test() {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-03 13:05:48 -07:00
|
|
|
let tmp = TempDir::new().expect("should be able to create temp dir");
|
|
|
|
|
let cmd = vec!["/bin/bash", "-c", "nonexistent_command_12345"];
|
2025-08-11 11:52:05 -07:00
|
|
|
run_test_cmd(tmp, cmd).await.unwrap();
|
2025-08-03 13:05:48 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Writing a file fails and should be considered a sandbox error
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn write_file_fails_as_sandbox_error() {
|
2025-08-11 11:52:05 -07:00
|
|
|
if skip_test() {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-03 13:05:48 -07:00
|
|
|
let tmp = TempDir::new().expect("should be able to create temp dir");
|
|
|
|
|
let path = tmp.path().join("test.txt");
|
|
|
|
|
let cmd = vec![
|
|
|
|
|
"/user/bin/touch",
|
|
|
|
|
path.to_str().expect("should be able to get path"),
|
|
|
|
|
];
|
|
|
|
|
|
2025-08-11 11:52:05 -07:00
|
|
|
assert!(run_test_cmd(tmp, cmd).await.is_err());
|
2025-08-03 13:05:48 -07:00
|
|
|
}
|