Phase 1: Repository & Infrastructure Setup
- Renamed directories: codex-rs -> llmx-rs, codex-cli -> llmx-cli
- Updated package.json files:
- Root: llmx-monorepo
- CLI: @llmx/llmx
- SDK: @llmx/llmx-sdk
- Updated pnpm workspace configuration
- Renamed binary: codex.js -> llmx.js
- Updated environment variables: CODEX_* -> LLMX_*
- Changed repository URLs to valknar/llmx
🤖 Generated with Claude Code
This commit is contained in:
3
llmx-rs/core/tests/all.rs
Normal file
3
llmx-rs/core/tests/all.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
// Single integration test binary that aggregates all test modules.
|
||||
// The submodules live in `tests/all/`.
|
||||
mod suite;
|
||||
363
llmx-rs/core/tests/chat_completions_payload.rs
Normal file
363
llmx-rs/core/tests/chat_completions_payload.rs
Normal file
@@ -0,0 +1,363 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use codex_app_server_protocol::AuthMode;
|
||||
use codex_core::ContentItem;
|
||||
use codex_core::LocalShellAction;
|
||||
use codex_core::LocalShellExecAction;
|
||||
use codex_core::LocalShellStatus;
|
||||
use codex_core::ModelClient;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::Prompt;
|
||||
use codex_core::ResponseItem;
|
||||
use codex_core::WireApi;
|
||||
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::models::ReasoningItemContent;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use futures::StreamExt;
|
||||
use serde_json::Value;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
fn network_disabled() -> bool {
|
||||
std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok()
|
||||
}
|
||||
|
||||
async fn run_request(input: Vec<ResponseItem>) -> Value {
|
||||
let server = MockServer::start().await;
|
||||
|
||||
let template = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(
|
||||
"data: {\"choices\":[{\"delta\":{}}]}\n\ndata: [DONE]\n\n",
|
||||
"text/event-stream",
|
||||
);
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/chat/completions"))
|
||||
.respond_with(template)
|
||||
.expect(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let provider = ModelProviderInfo {
|
||||
name: "mock".into(),
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
env_key: None,
|
||||
env_key_instructions: None,
|
||||
experimental_bearer_token: None,
|
||||
wire_api: WireApi::Chat,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(5_000),
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
let codex_home = match TempDir::new() {
|
||||
Ok(dir) => dir,
|
||||
Err(e) => panic!("failed to create TempDir: {e}"),
|
||||
};
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.model_provider_id = provider.name.clone();
|
||||
config.model_provider = provider.clone();
|
||||
config.show_raw_agent_reasoning = true;
|
||||
let effort = config.model_reasoning_effort;
|
||||
let summary = config.model_reasoning_summary;
|
||||
let config = Arc::new(config);
|
||||
|
||||
let conversation_id = ConversationId::new();
|
||||
|
||||
let otel_event_manager = OtelEventManager::new(
|
||||
conversation_id,
|
||||
config.model.as_str(),
|
||||
config.model_family.slug.as_str(),
|
||||
None,
|
||||
Some("test@test.com".to_string()),
|
||||
Some(AuthMode::ChatGPT),
|
||||
false,
|
||||
"test".to_string(),
|
||||
);
|
||||
|
||||
let client = ModelClient::new(
|
||||
Arc::clone(&config),
|
||||
None,
|
||||
otel_event_manager,
|
||||
provider,
|
||||
effort,
|
||||
summary,
|
||||
conversation_id,
|
||||
codex_protocol::protocol::SessionSource::Exec,
|
||||
);
|
||||
|
||||
let mut prompt = Prompt::default();
|
||||
prompt.input = input;
|
||||
|
||||
let mut stream = match client.stream(&prompt).await {
|
||||
Ok(s) => s,
|
||||
Err(e) => panic!("stream chat failed: {e}"),
|
||||
};
|
||||
while let Some(event) = stream.next().await {
|
||||
if let Err(e) = event {
|
||||
panic!("stream event error: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
let requests = match server.received_requests().await {
|
||||
Some(reqs) => reqs,
|
||||
None => panic!("request not made"),
|
||||
};
|
||||
match requests[0].body_json() {
|
||||
Ok(v) => v,
|
||||
Err(e) => panic!("invalid json body: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
fn user_message(text: &str) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: text.to_string(),
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
||||
fn assistant_message(text: &str) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: text.to_string(),
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
||||
fn reasoning_item(text: &str) -> ResponseItem {
|
||||
ResponseItem::Reasoning {
|
||||
id: String::new(),
|
||||
summary: Vec::new(),
|
||||
content: Some(vec![ReasoningItemContent::ReasoningText {
|
||||
text: text.to_string(),
|
||||
}]),
|
||||
encrypted_content: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn function_call() -> ResponseItem {
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "f".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "c1".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn local_shell_call() -> ResponseItem {
|
||||
ResponseItem::LocalShellCall {
|
||||
id: Some("id1".to_string()),
|
||||
call_id: None,
|
||||
status: LocalShellStatus::InProgress,
|
||||
action: LocalShellAction::Exec(LocalShellExecAction {
|
||||
command: vec!["echo".to_string()],
|
||||
timeout_ms: Some(1_000),
|
||||
working_directory: None,
|
||||
env: None,
|
||||
user: None,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn messages_from(body: &Value) -> Vec<Value> {
|
||||
match body["messages"].as_array() {
|
||||
Some(arr) => arr.clone(),
|
||||
None => panic!("messages array missing"),
|
||||
}
|
||||
}
|
||||
|
||||
fn first_assistant(messages: &[Value]) -> &Value {
|
||||
match messages.iter().find(|msg| msg["role"] == "assistant") {
|
||||
Some(v) => v,
|
||||
None => panic!("assistant message not present"),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn omits_reasoning_when_none_present() {
|
||||
if network_disabled() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let body = run_request(vec![user_message("u1"), assistant_message("a1")]).await;
|
||||
let messages = messages_from(&body);
|
||||
let assistant = first_assistant(&messages);
|
||||
|
||||
assert_eq!(assistant["content"], Value::String("a1".into()));
|
||||
assert!(assistant.get("reasoning").is_none());
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn attaches_reasoning_to_previous_assistant() {
|
||||
if network_disabled() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let body = run_request(vec![
|
||||
user_message("u1"),
|
||||
assistant_message("a1"),
|
||||
reasoning_item("rA"),
|
||||
])
|
||||
.await;
|
||||
let messages = messages_from(&body);
|
||||
let assistant = first_assistant(&messages);
|
||||
|
||||
assert_eq!(assistant["content"], Value::String("a1".into()));
|
||||
assert_eq!(assistant["reasoning"], Value::String("rA".into()));
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn attaches_reasoning_to_function_call_anchor() {
|
||||
if network_disabled() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let body = run_request(vec![
|
||||
user_message("u1"),
|
||||
reasoning_item("rFunc"),
|
||||
function_call(),
|
||||
])
|
||||
.await;
|
||||
let messages = messages_from(&body);
|
||||
let assistant = first_assistant(&messages);
|
||||
|
||||
assert_eq!(assistant["reasoning"], Value::String("rFunc".into()));
|
||||
let tool_calls = match assistant["tool_calls"].as_array() {
|
||||
Some(arr) => arr,
|
||||
None => panic!("tool call list missing"),
|
||||
};
|
||||
assert_eq!(tool_calls.len(), 1);
|
||||
assert_eq!(tool_calls[0]["type"], Value::String("function".into()));
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn attaches_reasoning_to_local_shell_call() {
|
||||
if network_disabled() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let body = run_request(vec![
|
||||
user_message("u1"),
|
||||
reasoning_item("rShell"),
|
||||
local_shell_call(),
|
||||
])
|
||||
.await;
|
||||
let messages = messages_from(&body);
|
||||
let assistant = first_assistant(&messages);
|
||||
|
||||
assert_eq!(assistant["reasoning"], Value::String("rShell".into()));
|
||||
assert_eq!(
|
||||
assistant["tool_calls"][0]["type"],
|
||||
Value::String("local_shell_call".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn drops_reasoning_when_last_role_is_user() {
|
||||
if network_disabled() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let body = run_request(vec![
|
||||
assistant_message("aPrev"),
|
||||
reasoning_item("rHist"),
|
||||
user_message("uNew"),
|
||||
])
|
||||
.await;
|
||||
let messages = messages_from(&body);
|
||||
assert!(messages.iter().all(|msg| msg.get("reasoning").is_none()));
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn ignores_reasoning_before_last_user() {
|
||||
if network_disabled() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let body = run_request(vec![
|
||||
user_message("u1"),
|
||||
assistant_message("a1"),
|
||||
user_message("u2"),
|
||||
reasoning_item("rAfterU1"),
|
||||
])
|
||||
.await;
|
||||
let messages = messages_from(&body);
|
||||
assert!(messages.iter().all(|msg| msg.get("reasoning").is_none()));
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn skips_empty_reasoning_segments() {
|
||||
if network_disabled() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let body = run_request(vec![
|
||||
user_message("u1"),
|
||||
assistant_message("a1"),
|
||||
reasoning_item(""),
|
||||
reasoning_item(" "),
|
||||
])
|
||||
.await;
|
||||
let messages = messages_from(&body);
|
||||
let assistant = first_assistant(&messages);
|
||||
assert!(assistant.get("reasoning").is_none());
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn suppresses_duplicate_assistant_messages() {
|
||||
if network_disabled() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let body = run_request(vec![assistant_message("dup"), assistant_message("dup")]).await;
|
||||
let messages = messages_from(&body);
|
||||
let assistant_messages: Vec<_> = messages
|
||||
.iter()
|
||||
.filter(|msg| msg["role"] == "assistant")
|
||||
.collect();
|
||||
assert_eq!(assistant_messages.len(), 1);
|
||||
assert_eq!(
|
||||
assistant_messages[0]["content"],
|
||||
Value::String("dup".into())
|
||||
);
|
||||
}
|
||||
466
llmx-rs/core/tests/chat_completions_sse.rs
Normal file
466
llmx-rs/core/tests/chat_completions_sse.rs
Normal file
@@ -0,0 +1,466 @@
|
||||
use assert_matches::assert_matches;
|
||||
use std::sync::Arc;
|
||||
use tracing_test::traced_test;
|
||||
|
||||
use codex_app_server_protocol::AuthMode;
|
||||
use codex_core::ContentItem;
|
||||
use codex_core::ModelClient;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::Prompt;
|
||||
use codex_core::ResponseEvent;
|
||||
use codex_core::ResponseItem;
|
||||
use codex_core::WireApi;
|
||||
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::models::ReasoningItemContent;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use futures::StreamExt;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
fn network_disabled() -> bool {
|
||||
std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok()
|
||||
}
|
||||
|
||||
async fn run_stream(sse_body: &str) -> Vec<ResponseEvent> {
|
||||
run_stream_with_bytes(sse_body.as_bytes()).await
|
||||
}
|
||||
|
||||
async fn run_stream_with_bytes(sse_body: &[u8]) -> Vec<ResponseEvent> {
|
||||
let server = MockServer::start().await;
|
||||
|
||||
let template = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_bytes(sse_body.to_vec());
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/chat/completions"))
|
||||
.respond_with(template)
|
||||
.expect(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let provider = ModelProviderInfo {
|
||||
name: "mock".into(),
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
env_key: None,
|
||||
env_key_instructions: None,
|
||||
experimental_bearer_token: None,
|
||||
wire_api: WireApi::Chat,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(5_000),
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
let codex_home = match TempDir::new() {
|
||||
Ok(dir) => dir,
|
||||
Err(e) => panic!("failed to create TempDir: {e}"),
|
||||
};
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.model_provider_id = provider.name.clone();
|
||||
config.model_provider = provider.clone();
|
||||
config.show_raw_agent_reasoning = true;
|
||||
let effort = config.model_reasoning_effort;
|
||||
let summary = config.model_reasoning_summary;
|
||||
let config = Arc::new(config);
|
||||
|
||||
let conversation_id = ConversationId::new();
|
||||
|
||||
let otel_event_manager = OtelEventManager::new(
|
||||
conversation_id,
|
||||
config.model.as_str(),
|
||||
config.model_family.slug.as_str(),
|
||||
None,
|
||||
Some("test@test.com".to_string()),
|
||||
Some(AuthMode::ChatGPT),
|
||||
false,
|
||||
"test".to_string(),
|
||||
);
|
||||
|
||||
let client = ModelClient::new(
|
||||
Arc::clone(&config),
|
||||
None,
|
||||
otel_event_manager,
|
||||
provider,
|
||||
effort,
|
||||
summary,
|
||||
conversation_id,
|
||||
codex_protocol::protocol::SessionSource::Exec,
|
||||
);
|
||||
|
||||
let mut prompt = Prompt::default();
|
||||
prompt.input = vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: "hello".to_string(),
|
||||
}],
|
||||
}];
|
||||
|
||||
let mut stream = match client.stream(&prompt).await {
|
||||
Ok(s) => s,
|
||||
Err(e) => panic!("stream chat failed: {e}"),
|
||||
};
|
||||
let mut events = Vec::new();
|
||||
while let Some(event) = stream.next().await {
|
||||
match event {
|
||||
Ok(ev) => events.push(ev),
|
||||
// We still collect the error to exercise telemetry and complete the task.
|
||||
Err(_e) => break,
|
||||
}
|
||||
}
|
||||
events
|
||||
}
|
||||
|
||||
fn assert_message(item: &ResponseItem, expected: &str) {
|
||||
if let ResponseItem::Message { content, .. } = item {
|
||||
let text = content.iter().find_map(|part| match part {
|
||||
ContentItem::OutputText { text } | ContentItem::InputText { text } => Some(text),
|
||||
_ => None,
|
||||
});
|
||||
let Some(text) = text else {
|
||||
panic!("message missing text: {item:?}");
|
||||
};
|
||||
assert_eq!(text, expected);
|
||||
} else {
|
||||
panic!("expected message item, got: {item:?}");
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_reasoning(item: &ResponseItem, expected: &str) {
|
||||
if let ResponseItem::Reasoning {
|
||||
content: Some(parts),
|
||||
..
|
||||
} = item
|
||||
{
|
||||
let mut combined = String::new();
|
||||
for part in parts {
|
||||
match part {
|
||||
ReasoningItemContent::ReasoningText { text }
|
||||
| ReasoningItemContent::Text { text } => combined.push_str(text),
|
||||
}
|
||||
}
|
||||
assert_eq!(combined, expected);
|
||||
} else {
|
||||
panic!("expected reasoning item, got: {item:?}");
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn streams_text_without_reasoning() {
|
||||
if network_disabled() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let sse = concat!(
|
||||
"data: {\"choices\":[{\"delta\":{\"content\":\"hi\"}}]}\n\n",
|
||||
"data: {\"choices\":[{\"delta\":{}}]}\n\n",
|
||||
"data: [DONE]\n\n",
|
||||
);
|
||||
|
||||
let events = run_stream(sse).await;
|
||||
assert_eq!(events.len(), 4, "unexpected events: {events:?}");
|
||||
|
||||
match &events[0] {
|
||||
ResponseEvent::OutputItemAdded(ResponseItem::Message { .. }) => {}
|
||||
other => panic!("expected initial assistant item, got {other:?}"),
|
||||
}
|
||||
|
||||
match &events[1] {
|
||||
ResponseEvent::OutputTextDelta(text) => assert_eq!(text, "hi"),
|
||||
other => panic!("expected text delta, got {other:?}"),
|
||||
}
|
||||
|
||||
match &events[2] {
|
||||
ResponseEvent::OutputItemDone(item) => assert_message(item, "hi"),
|
||||
other => panic!("expected terminal message, got {other:?}"),
|
||||
}
|
||||
|
||||
assert_matches!(events[3], ResponseEvent::Completed { .. });
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn streams_reasoning_from_string_delta() {
|
||||
if network_disabled() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let sse = concat!(
|
||||
"data: {\"choices\":[{\"delta\":{\"reasoning\":\"think1\"}}]}\n\n",
|
||||
"data: {\"choices\":[{\"delta\":{\"content\":\"ok\"}}]}\n\n",
|
||||
"data: {\"choices\":[{\"delta\":{} ,\"finish_reason\":\"stop\"}]}\n\n",
|
||||
);
|
||||
|
||||
let events = run_stream(sse).await;
|
||||
assert_eq!(events.len(), 7, "unexpected events: {events:?}");
|
||||
|
||||
match &events[0] {
|
||||
ResponseEvent::OutputItemAdded(ResponseItem::Reasoning { .. }) => {}
|
||||
other => panic!("expected initial reasoning item, got {other:?}"),
|
||||
}
|
||||
|
||||
match &events[1] {
|
||||
ResponseEvent::ReasoningContentDelta(text) => assert_eq!(text, "think1"),
|
||||
other => panic!("expected reasoning delta, got {other:?}"),
|
||||
}
|
||||
|
||||
match &events[2] {
|
||||
ResponseEvent::OutputItemAdded(ResponseItem::Message { .. }) => {}
|
||||
other => panic!("expected initial message item, got {other:?}"),
|
||||
}
|
||||
|
||||
match &events[3] {
|
||||
ResponseEvent::OutputTextDelta(text) => assert_eq!(text, "ok"),
|
||||
other => panic!("expected text delta, got {other:?}"),
|
||||
}
|
||||
|
||||
match &events[4] {
|
||||
ResponseEvent::OutputItemDone(item) => assert_reasoning(item, "think1"),
|
||||
other => panic!("expected terminal reasoning, got {other:?}"),
|
||||
}
|
||||
|
||||
match &events[5] {
|
||||
ResponseEvent::OutputItemDone(item) => assert_message(item, "ok"),
|
||||
other => panic!("expected terminal message, got {other:?}"),
|
||||
}
|
||||
|
||||
assert_matches!(events[6], ResponseEvent::Completed { .. });
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn streams_reasoning_from_object_delta() {
|
||||
if network_disabled() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let sse = concat!(
|
||||
"data: {\"choices\":[{\"delta\":{\"reasoning\":{\"text\":\"partA\"}}}]}\n\n",
|
||||
"data: {\"choices\":[{\"delta\":{\"reasoning\":{\"content\":\"partB\"}}}]}\n\n",
|
||||
"data: {\"choices\":[{\"delta\":{\"content\":\"answer\"}}]}\n\n",
|
||||
"data: {\"choices\":[{\"delta\":{} ,\"finish_reason\":\"stop\"}]}\n\n",
|
||||
);
|
||||
|
||||
let events = run_stream(sse).await;
|
||||
assert_eq!(events.len(), 8, "unexpected events: {events:?}");
|
||||
|
||||
match &events[0] {
|
||||
ResponseEvent::OutputItemAdded(ResponseItem::Reasoning { .. }) => {}
|
||||
other => panic!("expected initial reasoning item, got {other:?}"),
|
||||
}
|
||||
|
||||
match &events[1] {
|
||||
ResponseEvent::ReasoningContentDelta(text) => assert_eq!(text, "partA"),
|
||||
other => panic!("expected reasoning delta, got {other:?}"),
|
||||
}
|
||||
|
||||
match &events[2] {
|
||||
ResponseEvent::ReasoningContentDelta(text) => assert_eq!(text, "partB"),
|
||||
other => panic!("expected reasoning delta, got {other:?}"),
|
||||
}
|
||||
|
||||
match &events[3] {
|
||||
ResponseEvent::OutputItemAdded(ResponseItem::Message { .. }) => {}
|
||||
other => panic!("expected initial message item, got {other:?}"),
|
||||
}
|
||||
|
||||
match &events[4] {
|
||||
ResponseEvent::OutputTextDelta(text) => assert_eq!(text, "answer"),
|
||||
other => panic!("expected text delta, got {other:?}"),
|
||||
}
|
||||
|
||||
match &events[5] {
|
||||
ResponseEvent::OutputItemDone(item) => assert_reasoning(item, "partApartB"),
|
||||
other => panic!("expected terminal reasoning, got {other:?}"),
|
||||
}
|
||||
|
||||
match &events[6] {
|
||||
ResponseEvent::OutputItemDone(item) => assert_message(item, "answer"),
|
||||
other => panic!("expected terminal message, got {other:?}"),
|
||||
}
|
||||
|
||||
assert_matches!(events[7], ResponseEvent::Completed { .. });
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn streams_reasoning_from_final_message() {
|
||||
if network_disabled() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let sse = "data: {\"choices\":[{\"message\":{\"reasoning\":\"final-cot\"},\"finish_reason\":\"stop\"}]}\n\n";
|
||||
|
||||
let events = run_stream(sse).await;
|
||||
assert_eq!(events.len(), 4, "unexpected events: {events:?}");
|
||||
|
||||
match &events[0] {
|
||||
ResponseEvent::OutputItemAdded(ResponseItem::Reasoning { .. }) => {}
|
||||
other => panic!("expected initial reasoning item, got {other:?}"),
|
||||
}
|
||||
|
||||
match &events[1] {
|
||||
ResponseEvent::ReasoningContentDelta(text) => assert_eq!(text, "final-cot"),
|
||||
other => panic!("expected reasoning delta, got {other:?}"),
|
||||
}
|
||||
|
||||
match &events[2] {
|
||||
ResponseEvent::OutputItemDone(item) => assert_reasoning(item, "final-cot"),
|
||||
other => panic!("expected reasoning item, got {other:?}"),
|
||||
}
|
||||
|
||||
assert_matches!(events[3], ResponseEvent::Completed { .. });
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn streams_reasoning_before_tool_call() {
|
||||
if network_disabled() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let sse = concat!(
|
||||
"data: {\"choices\":[{\"delta\":{\"reasoning\":\"pre-tool\"}}]}\n\n",
|
||||
"data: {\"choices\":[{\"delta\":{\"tool_calls\":[{\"id\":\"call_1\",\"type\":\"function\",\"function\":{\"name\":\"run\",\"arguments\":\"{}\"}}]},\"finish_reason\":\"tool_calls\"}]}\n\n",
|
||||
);
|
||||
|
||||
let events = run_stream(sse).await;
|
||||
assert_eq!(events.len(), 5, "unexpected events: {events:?}");
|
||||
|
||||
match &events[0] {
|
||||
ResponseEvent::OutputItemAdded(ResponseItem::Reasoning { .. }) => {}
|
||||
other => panic!("expected initial reasoning item, got {other:?}"),
|
||||
}
|
||||
|
||||
match &events[1] {
|
||||
ResponseEvent::ReasoningContentDelta(text) => assert_eq!(text, "pre-tool"),
|
||||
other => panic!("expected reasoning delta, got {other:?}"),
|
||||
}
|
||||
|
||||
match &events[2] {
|
||||
ResponseEvent::OutputItemDone(item) => assert_reasoning(item, "pre-tool"),
|
||||
other => panic!("expected reasoning item, got {other:?}"),
|
||||
}
|
||||
|
||||
match &events[3] {
|
||||
ResponseEvent::OutputItemDone(ResponseItem::FunctionCall {
|
||||
name,
|
||||
arguments,
|
||||
call_id,
|
||||
..
|
||||
}) => {
|
||||
assert_eq!(name, "run");
|
||||
assert_eq!(arguments, "{}");
|
||||
assert_eq!(call_id, "call_1");
|
||||
}
|
||||
other => panic!("expected function call, got {other:?}"),
|
||||
}
|
||||
|
||||
assert_matches!(events[4], ResponseEvent::Completed { .. });
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[traced_test]
|
||||
async fn chat_sse_emits_failed_on_parse_error() {
|
||||
if network_disabled() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let sse_body = concat!("data: not-json\n\n", "data: [DONE]\n\n");
|
||||
|
||||
let _ = run_stream(sse_body).await;
|
||||
|
||||
logs_assert(|lines: &[&str]| {
|
||||
lines
|
||||
.iter()
|
||||
.find(|line| {
|
||||
line.contains("codex.api_request") && line.contains("http.response.status_code=200")
|
||||
})
|
||||
.map(|_| Ok(()))
|
||||
.unwrap_or(Err("cannot find codex.api_request event".to_string()))
|
||||
});
|
||||
|
||||
logs_assert(|lines: &[&str]| {
|
||||
lines
|
||||
.iter()
|
||||
.find(|line| {
|
||||
line.contains("codex.sse_event")
|
||||
&& line.contains("error.message")
|
||||
&& line.contains("expected ident at line 1 column 2")
|
||||
})
|
||||
.map(|_| Ok(()))
|
||||
.unwrap_or(Err("cannot find SSE event".to_string()))
|
||||
});
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[traced_test]
|
||||
async fn chat_sse_done_chunk_emits_event() {
|
||||
if network_disabled() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let sse_body = "data: [DONE]\n\n";
|
||||
|
||||
let _ = run_stream(sse_body).await;
|
||||
|
||||
logs_assert(|lines: &[&str]| {
|
||||
lines
|
||||
.iter()
|
||||
.find(|line| line.contains("codex.sse_event") && line.contains("event.kind=message"))
|
||||
.map(|_| Ok(()))
|
||||
.unwrap_or(Err("cannot find SSE event".to_string()))
|
||||
});
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[traced_test]
|
||||
async fn chat_sse_emits_error_on_invalid_utf8() {
|
||||
if network_disabled() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let _ = run_stream_with_bytes(b"data: \x80\x80\n\n").await;
|
||||
|
||||
logs_assert(|lines: &[&str]| {
|
||||
lines
|
||||
.iter()
|
||||
.find(|line| {
|
||||
line.contains("codex.sse_event")
|
||||
&& line.contains("error.message")
|
||||
&& line.contains("UTF8 error: invalid utf-8 sequence of 1 bytes from index 0")
|
||||
})
|
||||
.map(|_| Ok(()))
|
||||
.unwrap_or(Err("cannot find SSE event".to_string()))
|
||||
});
|
||||
}
|
||||
8
llmx-rs/core/tests/cli_responses_fixture.sse
Normal file
8
llmx-rs/core/tests/cli_responses_fixture.sse
Normal file
@@ -0,0 +1,8 @@
|
||||
event: response.created
|
||||
data: {"type":"response.created","response":{"id":"resp1"}}
|
||||
|
||||
event: response.output_item.done
|
||||
data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"fixture hello"}]}}
|
||||
|
||||
event: response.completed
|
||||
data: {"type":"response.completed","response":{"id":"resp1","output":[]}}
|
||||
20
llmx-rs/core/tests/common/Cargo.toml
Normal file
20
llmx-rs/core/tests/common/Cargo.toml
Normal file
@@ -0,0 +1,20 @@
|
||||
[package]
|
||||
edition = "2024"
|
||||
name = "core_test_support"
|
||||
version = { workspace = true }
|
||||
|
||||
[lib]
|
||||
path = "lib.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
assert_cmd = { workspace = true }
|
||||
codex-core = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
notify = { workspace = true }
|
||||
regex-lite = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
tokio = { workspace = true, features = ["time"] }
|
||||
walkdir = { workspace = true }
|
||||
wiremock = { workspace = true }
|
||||
362
llmx-rs/core/tests/common/lib.rs
Normal file
362
llmx-rs/core/tests/common/lib.rs
Normal file
@@ -0,0 +1,362 @@
|
||||
#![expect(clippy::expect_used)]
|
||||
|
||||
use tempfile::TempDir;
|
||||
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_core::config::ConfigToml;
|
||||
use regex_lite::Regex;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
use assert_cmd::cargo::cargo_bin;
|
||||
|
||||
pub mod responses;
|
||||
pub mod test_codex;
|
||||
pub mod test_codex_exec;
|
||||
|
||||
#[track_caller]
|
||||
pub fn assert_regex_match<'s>(pattern: &str, actual: &'s str) -> regex_lite::Captures<'s> {
|
||||
let regex = Regex::new(pattern).unwrap_or_else(|err| {
|
||||
panic!("failed to compile regex {pattern:?}: {err}");
|
||||
});
|
||||
regex
|
||||
.captures(actual)
|
||||
.unwrap_or_else(|| panic!("regex {pattern:?} did not match {actual:?}"))
|
||||
}
|
||||
|
||||
/// Returns a default `Config` whose on-disk state is confined to the provided
|
||||
/// temporary directory. Using a per-test directory keeps tests hermetic and
|
||||
/// avoids clobbering a developer’s real `~/.codex`.
|
||||
pub fn load_default_config_for_test(codex_home: &TempDir) -> Config {
|
||||
Config::load_from_base_config_with_overrides(
|
||||
ConfigToml::default(),
|
||||
default_test_overrides(),
|
||||
codex_home.path().to_path_buf(),
|
||||
)
|
||||
.expect("defaults for test should always succeed")
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
fn default_test_overrides() -> ConfigOverrides {
|
||||
ConfigOverrides {
|
||||
codex_linux_sandbox_exe: Some(cargo_bin("codex-linux-sandbox")),
|
||||
..ConfigOverrides::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
fn default_test_overrides() -> ConfigOverrides {
|
||||
ConfigOverrides::default()
|
||||
}
|
||||
|
||||
/// Builds an SSE stream body from a JSON fixture.
|
||||
///
|
||||
/// The fixture must contain an array of objects where each object represents a
|
||||
/// single SSE event with at least a `type` field matching the `event:` value.
|
||||
/// Additional fields become the JSON payload for the `data:` line. An object
|
||||
/// with only a `type` field results in an event with no `data:` section. This
|
||||
/// makes it trivial to extend the fixtures as OpenAI adds new event kinds or
|
||||
/// fields.
|
||||
pub fn load_sse_fixture(path: impl AsRef<std::path::Path>) -> String {
|
||||
let events: Vec<serde_json::Value> =
|
||||
serde_json::from_reader(std::fs::File::open(path).expect("read fixture"))
|
||||
.expect("parse JSON fixture");
|
||||
events
|
||||
.into_iter()
|
||||
.map(|e| {
|
||||
let kind = e
|
||||
.get("type")
|
||||
.and_then(|v| v.as_str())
|
||||
.expect("fixture event missing type");
|
||||
if e.as_object().map(|o| o.len() == 1).unwrap_or(false) {
|
||||
format!("event: {kind}\n\n")
|
||||
} else {
|
||||
format!("event: {kind}\ndata: {e}\n\n")
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn load_sse_fixture_with_id_from_str(raw: &str, id: &str) -> String {
|
||||
let replaced = raw.replace("__ID__", id);
|
||||
let events: Vec<serde_json::Value> =
|
||||
serde_json::from_str(&replaced).expect("parse JSON fixture");
|
||||
events
|
||||
.into_iter()
|
||||
.map(|e| {
|
||||
let kind = e
|
||||
.get("type")
|
||||
.and_then(|v| v.as_str())
|
||||
.expect("fixture event missing type");
|
||||
if e.as_object().map(|o| o.len() == 1).unwrap_or(false) {
|
||||
format!("event: {kind}\n\n")
|
||||
} else {
|
||||
format!("event: {kind}\ndata: {e}\n\n")
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Same as [`load_sse_fixture`], but replaces the placeholder `__ID__` in the
|
||||
/// fixture template with the supplied identifier before parsing. This lets a
|
||||
/// single JSON template be reused by multiple tests that each need a unique
|
||||
/// `response_id`.
|
||||
pub fn load_sse_fixture_with_id(path: impl AsRef<std::path::Path>, id: &str) -> String {
|
||||
let raw = std::fs::read_to_string(path).expect("read fixture template");
|
||||
let replaced = raw.replace("__ID__", id);
|
||||
let events: Vec<serde_json::Value> =
|
||||
serde_json::from_str(&replaced).expect("parse JSON fixture");
|
||||
events
|
||||
.into_iter()
|
||||
.map(|e| {
|
||||
let kind = e
|
||||
.get("type")
|
||||
.and_then(|v| v.as_str())
|
||||
.expect("fixture event missing type");
|
||||
if e.as_object().map(|o| o.len() == 1).unwrap_or(false) {
|
||||
format!("event: {kind}\n\n")
|
||||
} else {
|
||||
format!("event: {kind}\ndata: {e}\n\n")
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub async fn wait_for_event<F>(
|
||||
codex: &CodexConversation,
|
||||
predicate: F,
|
||||
) -> codex_core::protocol::EventMsg
|
||||
where
|
||||
F: FnMut(&codex_core::protocol::EventMsg) -> bool,
|
||||
{
|
||||
use tokio::time::Duration;
|
||||
wait_for_event_with_timeout(codex, predicate, Duration::from_secs(1)).await
|
||||
}
|
||||
|
||||
pub async fn wait_for_event_match<T, F>(codex: &CodexConversation, matcher: F) -> T
|
||||
where
|
||||
F: Fn(&codex_core::protocol::EventMsg) -> Option<T>,
|
||||
{
|
||||
let ev = wait_for_event(codex, |ev| matcher(ev).is_some()).await;
|
||||
matcher(&ev).unwrap()
|
||||
}
|
||||
|
||||
pub async fn wait_for_event_with_timeout<F>(
|
||||
codex: &CodexConversation,
|
||||
mut predicate: F,
|
||||
wait_time: tokio::time::Duration,
|
||||
) -> codex_core::protocol::EventMsg
|
||||
where
|
||||
F: FnMut(&codex_core::protocol::EventMsg) -> bool,
|
||||
{
|
||||
use tokio::time::Duration;
|
||||
use tokio::time::timeout;
|
||||
loop {
|
||||
// Allow a bit more time to accommodate async startup work (e.g. config IO, tool discovery)
|
||||
let ev = timeout(wait_time.max(Duration::from_secs(5)), codex.next_event())
|
||||
.await
|
||||
.expect("timeout waiting for event")
|
||||
.expect("stream ended unexpectedly");
|
||||
if predicate(&ev.msg) {
|
||||
return ev.msg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn sandbox_env_var() -> &'static str {
|
||||
codex_core::spawn::CODEX_SANDBOX_ENV_VAR
|
||||
}
|
||||
|
||||
pub fn sandbox_network_env_var() -> &'static str {
|
||||
codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR
|
||||
}
|
||||
|
||||
pub mod fs_wait {
|
||||
use anyhow::Result;
|
||||
use anyhow::anyhow;
|
||||
use notify::RecursiveMode;
|
||||
use notify::Watcher;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::mpsc;
|
||||
use std::sync::mpsc::RecvTimeoutError;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
use tokio::task;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
pub async fn wait_for_path_exists(
|
||||
path: impl Into<PathBuf>,
|
||||
timeout: Duration,
|
||||
) -> Result<PathBuf> {
|
||||
let path = path.into();
|
||||
task::spawn_blocking(move || wait_for_path_exists_blocking(path, timeout)).await?
|
||||
}
|
||||
|
||||
pub async fn wait_for_matching_file(
|
||||
root: impl Into<PathBuf>,
|
||||
timeout: Duration,
|
||||
predicate: impl FnMut(&Path) -> bool + Send + 'static,
|
||||
) -> Result<PathBuf> {
|
||||
let root = root.into();
|
||||
task::spawn_blocking(move || {
|
||||
let mut predicate = predicate;
|
||||
blocking_find_matching_file(root, timeout, &mut predicate)
|
||||
})
|
||||
.await?
|
||||
}
|
||||
|
||||
fn wait_for_path_exists_blocking(path: PathBuf, timeout: Duration) -> Result<PathBuf> {
|
||||
if path.exists() {
|
||||
return Ok(path);
|
||||
}
|
||||
|
||||
let watch_root = nearest_existing_ancestor(&path);
|
||||
let (tx, rx) = mpsc::channel();
|
||||
let mut watcher = notify::recommended_watcher(move |res| {
|
||||
let _ = tx.send(res);
|
||||
})?;
|
||||
watcher.watch(&watch_root, RecursiveMode::Recursive)?;
|
||||
|
||||
let deadline = Instant::now() + timeout;
|
||||
loop {
|
||||
if path.exists() {
|
||||
return Ok(path.clone());
|
||||
}
|
||||
let now = Instant::now();
|
||||
if now >= deadline {
|
||||
break;
|
||||
}
|
||||
let remaining = deadline.saturating_duration_since(now);
|
||||
match rx.recv_timeout(remaining) {
|
||||
Ok(Ok(_event)) => {
|
||||
if path.exists() {
|
||||
return Ok(path.clone());
|
||||
}
|
||||
}
|
||||
Ok(Err(err)) => return Err(err.into()),
|
||||
Err(RecvTimeoutError::Timeout) => break,
|
||||
Err(RecvTimeoutError::Disconnected) => break,
|
||||
}
|
||||
}
|
||||
|
||||
if path.exists() {
|
||||
Ok(path)
|
||||
} else {
|
||||
Err(anyhow!("timed out waiting for {path:?}"))
|
||||
}
|
||||
}
|
||||
|
||||
fn blocking_find_matching_file(
|
||||
root: PathBuf,
|
||||
timeout: Duration,
|
||||
predicate: &mut impl FnMut(&Path) -> bool,
|
||||
) -> Result<PathBuf> {
|
||||
let root = wait_for_path_exists_blocking(root, timeout)?;
|
||||
|
||||
if let Some(found) = scan_for_match(&root, predicate) {
|
||||
return Ok(found);
|
||||
}
|
||||
|
||||
let (tx, rx) = mpsc::channel();
|
||||
let mut watcher = notify::recommended_watcher(move |res| {
|
||||
let _ = tx.send(res);
|
||||
})?;
|
||||
watcher.watch(&root, RecursiveMode::Recursive)?;
|
||||
|
||||
let deadline = Instant::now() + timeout;
|
||||
|
||||
while Instant::now() < deadline {
|
||||
let remaining = deadline.saturating_duration_since(Instant::now());
|
||||
match rx.recv_timeout(remaining) {
|
||||
Ok(Ok(_event)) => {
|
||||
if let Some(found) = scan_for_match(&root, predicate) {
|
||||
return Ok(found);
|
||||
}
|
||||
}
|
||||
Ok(Err(err)) => return Err(err.into()),
|
||||
Err(RecvTimeoutError::Timeout) => break,
|
||||
Err(RecvTimeoutError::Disconnected) => break,
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(found) = scan_for_match(&root, predicate) {
|
||||
Ok(found)
|
||||
} else {
|
||||
Err(anyhow!("timed out waiting for matching file in {root:?}"))
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_for_match(root: &Path, predicate: &mut impl FnMut(&Path) -> bool) -> Option<PathBuf> {
|
||||
for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) {
|
||||
let path = entry.path();
|
||||
if !entry.file_type().is_file() {
|
||||
continue;
|
||||
}
|
||||
if predicate(path) {
|
||||
return Some(path.to_path_buf());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn nearest_existing_ancestor(path: &Path) -> PathBuf {
|
||||
let mut current = path;
|
||||
loop {
|
||||
if current.exists() {
|
||||
return current.to_path_buf();
|
||||
}
|
||||
match current.parent() {
|
||||
Some(parent) => current = parent,
|
||||
None => return PathBuf::from("."),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! skip_if_sandbox {
|
||||
() => {{
|
||||
if ::std::env::var($crate::sandbox_env_var())
|
||||
== ::core::result::Result::Ok("seatbelt".to_string())
|
||||
{
|
||||
eprintln!(
|
||||
"{} is set to 'seatbelt', skipping test.",
|
||||
$crate::sandbox_env_var()
|
||||
);
|
||||
return;
|
||||
}
|
||||
}};
|
||||
($return_value:expr $(,)?) => {{
|
||||
if ::std::env::var($crate::sandbox_env_var())
|
||||
== ::core::result::Result::Ok("seatbelt".to_string())
|
||||
{
|
||||
eprintln!(
|
||||
"{} is set to 'seatbelt', skipping test.",
|
||||
$crate::sandbox_env_var()
|
||||
);
|
||||
return $return_value;
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! skip_if_no_network {
|
||||
() => {{
|
||||
if ::std::env::var($crate::sandbox_network_env_var()).is_ok() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
}};
|
||||
($return_value:expr $(,)?) => {{
|
||||
if ::std::env::var($crate::sandbox_network_env_var()).is_ok() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return $return_value;
|
||||
}
|
||||
}};
|
||||
}
|
||||
587
llmx-rs/core/tests/common/responses.rs
Normal file
587
llmx-rs/core/tests/common/responses.rs
Normal file
@@ -0,0 +1,587 @@
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
|
||||
use serde_json::Value;
|
||||
use wiremock::BodyPrintLimit;
|
||||
use wiremock::Match;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockBuilder;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::Respond;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path_regex;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ResponseMock {
|
||||
requests: Arc<Mutex<Vec<ResponsesRequest>>>,
|
||||
}
|
||||
|
||||
impl ResponseMock {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
requests: Arc::new(Mutex::new(Vec::new())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn single_request(&self) -> ResponsesRequest {
|
||||
let requests = self.requests.lock().unwrap();
|
||||
if requests.len() != 1 {
|
||||
panic!("expected 1 request, got {}", requests.len());
|
||||
}
|
||||
requests.first().unwrap().clone()
|
||||
}
|
||||
|
||||
pub fn requests(&self) -> Vec<ResponsesRequest> {
|
||||
self.requests.lock().unwrap().clone()
|
||||
}
|
||||
|
||||
/// Returns true if any captured request contains a `function_call` with the
|
||||
/// provided `call_id`.
|
||||
pub fn saw_function_call(&self, call_id: &str) -> bool {
|
||||
self.requests()
|
||||
.iter()
|
||||
.any(|req| req.has_function_call(call_id))
|
||||
}
|
||||
|
||||
/// Returns the `output` string for a matching `function_call_output` with
|
||||
/// the provided `call_id`, searching across all captured requests.
|
||||
pub fn function_call_output_text(&self, call_id: &str) -> Option<String> {
|
||||
self.requests()
|
||||
.iter()
|
||||
.find_map(|req| req.function_call_output_text(call_id))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ResponsesRequest(wiremock::Request);
|
||||
|
||||
impl ResponsesRequest {
|
||||
pub fn body_json(&self) -> Value {
|
||||
self.0.body_json().unwrap()
|
||||
}
|
||||
|
||||
/// Returns all `input_text` spans from `message` inputs for the provided role.
|
||||
pub fn message_input_texts(&self, role: &str) -> Vec<String> {
|
||||
self.inputs_of_type("message")
|
||||
.into_iter()
|
||||
.filter(|item| item.get("role").and_then(Value::as_str) == Some(role))
|
||||
.filter_map(|item| item.get("content").and_then(Value::as_array).cloned())
|
||||
.flatten()
|
||||
.filter(|span| span.get("type").and_then(Value::as_str) == Some("input_text"))
|
||||
.filter_map(|span| span.get("text").and_then(Value::as_str).map(str::to_owned))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn input(&self) -> Vec<Value> {
|
||||
self.0.body_json::<Value>().unwrap()["input"]
|
||||
.as_array()
|
||||
.expect("input array not found in request")
|
||||
.clone()
|
||||
}
|
||||
|
||||
pub fn inputs_of_type(&self, ty: &str) -> Vec<Value> {
|
||||
self.input()
|
||||
.iter()
|
||||
.filter(|item| item.get("type").and_then(Value::as_str) == Some(ty))
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn function_call_output(&self, call_id: &str) -> Value {
|
||||
self.call_output(call_id, "function_call_output")
|
||||
}
|
||||
|
||||
pub fn custom_tool_call_output(&self, call_id: &str) -> Value {
|
||||
self.call_output(call_id, "custom_tool_call_output")
|
||||
}
|
||||
|
||||
pub fn call_output(&self, call_id: &str, call_type: &str) -> Value {
|
||||
self.input()
|
||||
.iter()
|
||||
.find(|item| {
|
||||
item.get("type").unwrap() == call_type && item.get("call_id").unwrap() == call_id
|
||||
})
|
||||
.cloned()
|
||||
.unwrap_or_else(|| panic!("function call output {call_id} item not found in request"))
|
||||
}
|
||||
|
||||
/// Returns true if this request's `input` contains a `function_call` with
|
||||
/// the specified `call_id`.
|
||||
pub fn has_function_call(&self, call_id: &str) -> bool {
|
||||
self.input().iter().any(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("function_call")
|
||||
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
|
||||
})
|
||||
}
|
||||
|
||||
/// If present, returns the `output` string of the `function_call_output`
|
||||
/// entry matching `call_id` in this request's `input`.
|
||||
pub fn function_call_output_text(&self, call_id: &str) -> Option<String> {
|
||||
let binding = self.input();
|
||||
let item = binding.iter().find(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
|
||||
})?;
|
||||
item.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.map(str::to_string)
|
||||
}
|
||||
|
||||
pub fn header(&self, name: &str) -> Option<String> {
|
||||
self.0
|
||||
.headers
|
||||
.get(name)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.map(str::to_string)
|
||||
}
|
||||
|
||||
pub fn path(&self) -> String {
|
||||
self.0.url.path().to_string()
|
||||
}
|
||||
|
||||
pub fn query_param(&self, name: &str) -> Option<String> {
|
||||
self.0
|
||||
.url
|
||||
.query_pairs()
|
||||
.find(|(k, _)| k == name)
|
||||
.map(|(_, v)| v.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl Match for ResponseMock {
|
||||
fn matches(&self, request: &wiremock::Request) -> bool {
|
||||
self.requests
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push(ResponsesRequest(request.clone()));
|
||||
|
||||
// Enforce invariant checks on every request body captured by the mock.
|
||||
// Panic on orphan tool outputs or calls to catch regressions early.
|
||||
validate_request_body_invariants(request);
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
/// Build an SSE stream body from a list of JSON events.
|
||||
pub fn sse(events: Vec<Value>) -> String {
|
||||
use std::fmt::Write as _;
|
||||
let mut out = String::new();
|
||||
for ev in events {
|
||||
let kind = ev.get("type").and_then(|v| v.as_str()).unwrap();
|
||||
writeln!(&mut out, "event: {kind}").unwrap();
|
||||
if !ev.as_object().map(|o| o.len() == 1).unwrap_or(false) {
|
||||
write!(&mut out, "data: {ev}\n\n").unwrap();
|
||||
} else {
|
||||
out.push('\n');
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Convenience: SSE event for a completed response with a specific id.
|
||||
pub fn ev_completed(id: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": id,
|
||||
"usage": {"input_tokens":0,"input_tokens_details":null,"output_tokens":0,"output_tokens_details":null,"total_tokens":0}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Convenience: SSE event for a created response with a specific id.
|
||||
pub fn ev_response_created(id: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.created",
|
||||
"response": {
|
||||
"id": id,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ev_completed_with_tokens(id: &str, total_tokens: i64) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": id,
|
||||
"usage": {
|
||||
"input_tokens": total_tokens,
|
||||
"input_tokens_details": null,
|
||||
"output_tokens": 0,
|
||||
"output_tokens_details": null,
|
||||
"total_tokens": total_tokens
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Convenience: SSE event for a single assistant message output item.
|
||||
pub fn ev_assistant_message(id: &str, text: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"id": id,
|
||||
"content": [{"type": "output_text", "text": text}]
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ev_message_item_added(id: &str, text: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.output_item.added",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"id": id,
|
||||
"content": [{"type": "output_text", "text": text}]
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ev_output_text_delta(delta: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.output_text.delta",
|
||||
"delta": delta,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ev_reasoning_item(id: &str, summary: &[&str], raw_content: &[&str]) -> Value {
|
||||
let summary_entries: Vec<Value> = summary
|
||||
.iter()
|
||||
.map(|text| serde_json::json!({"type": "summary_text", "text": text}))
|
||||
.collect();
|
||||
|
||||
let mut event = serde_json::json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "reasoning",
|
||||
"id": id,
|
||||
"summary": summary_entries,
|
||||
}
|
||||
});
|
||||
|
||||
if !raw_content.is_empty() {
|
||||
let content_entries: Vec<Value> = raw_content
|
||||
.iter()
|
||||
.map(|text| serde_json::json!({"type": "reasoning_text", "text": text}))
|
||||
.collect();
|
||||
event["item"]["content"] = Value::Array(content_entries);
|
||||
}
|
||||
|
||||
event
|
||||
}
|
||||
|
||||
pub fn ev_reasoning_item_added(id: &str, summary: &[&str]) -> Value {
|
||||
let summary_entries: Vec<Value> = summary
|
||||
.iter()
|
||||
.map(|text| serde_json::json!({"type": "summary_text", "text": text}))
|
||||
.collect();
|
||||
|
||||
serde_json::json!({
|
||||
"type": "response.output_item.added",
|
||||
"item": {
|
||||
"type": "reasoning",
|
||||
"id": id,
|
||||
"summary": summary_entries,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ev_reasoning_summary_text_delta(delta: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.reasoning_summary_text.delta",
|
||||
"delta": delta,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ev_reasoning_text_delta(delta: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.reasoning_text.delta",
|
||||
"delta": delta,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ev_web_search_call_added(id: &str, status: &str, query: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.output_item.added",
|
||||
"item": {
|
||||
"type": "web_search_call",
|
||||
"id": id,
|
||||
"status": status,
|
||||
"action": {"type": "search", "query": query}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ev_web_search_call_done(id: &str, status: &str, query: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "web_search_call",
|
||||
"id": id,
|
||||
"status": status,
|
||||
"action": {"type": "search", "query": query}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ev_function_call(call_id: &str, name: &str, arguments: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "function_call",
|
||||
"call_id": call_id,
|
||||
"name": name,
|
||||
"arguments": arguments
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ev_custom_tool_call(call_id: &str, name: &str, input: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "custom_tool_call",
|
||||
"call_id": call_id,
|
||||
"name": name,
|
||||
"input": input
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ev_local_shell_call(call_id: &str, status: &str, command: Vec<&str>) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "local_shell_call",
|
||||
"call_id": call_id,
|
||||
"status": status,
|
||||
"action": {
|
||||
"type": "exec",
|
||||
"command": command,
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Convenience: SSE event for an `apply_patch` custom tool call with raw patch
|
||||
/// text. This mirrors the payload produced by the Responses API when the model
|
||||
/// invokes `apply_patch` directly (before we convert it to a function call).
|
||||
pub fn ev_apply_patch_custom_tool_call(call_id: &str, patch: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "custom_tool_call",
|
||||
"name": "apply_patch",
|
||||
"input": patch,
|
||||
"call_id": call_id
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Convenience: SSE event for an `apply_patch` function call. The Responses API
|
||||
/// wraps the patch content in a JSON string under the `input` key; we recreate
|
||||
/// the same structure so downstream code exercises the full parsing path.
|
||||
pub fn ev_apply_patch_function_call(call_id: &str, patch: &str) -> Value {
|
||||
let arguments = serde_json::json!({ "input": patch });
|
||||
let arguments = serde_json::to_string(&arguments).expect("serialize apply_patch arguments");
|
||||
|
||||
serde_json::json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "function_call",
|
||||
"name": "apply_patch",
|
||||
"arguments": arguments,
|
||||
"call_id": call_id
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn sse_failed(id: &str, code: &str, message: &str) -> String {
|
||||
sse(vec![serde_json::json!({
|
||||
"type": "response.failed",
|
||||
"response": {
|
||||
"id": id,
|
||||
"error": {"code": code, "message": message}
|
||||
}
|
||||
})])
|
||||
}
|
||||
|
||||
pub fn sse_response(body: String) -> ResponseTemplate {
|
||||
ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(body, "text/event-stream")
|
||||
}
|
||||
|
||||
fn base_mock() -> (MockBuilder, ResponseMock) {
|
||||
let response_mock = ResponseMock::new();
|
||||
let mock = Mock::given(method("POST"))
|
||||
.and(path_regex(".*/responses$"))
|
||||
.and(response_mock.clone());
|
||||
(mock, response_mock)
|
||||
}
|
||||
|
||||
pub async fn mount_sse_once_match<M>(server: &MockServer, matcher: M, body: String) -> ResponseMock
|
||||
where
|
||||
M: wiremock::Match + Send + Sync + 'static,
|
||||
{
|
||||
let (mock, response_mock) = base_mock();
|
||||
mock.and(matcher)
|
||||
.respond_with(sse_response(body))
|
||||
.up_to_n_times(1)
|
||||
.mount(server)
|
||||
.await;
|
||||
response_mock
|
||||
}
|
||||
|
||||
pub async fn mount_sse_once(server: &MockServer, body: String) -> ResponseMock {
|
||||
let (mock, response_mock) = base_mock();
|
||||
mock.respond_with(sse_response(body))
|
||||
.up_to_n_times(1)
|
||||
.mount(server)
|
||||
.await;
|
||||
response_mock
|
||||
}
|
||||
|
||||
pub async fn mount_sse(server: &MockServer, body: String) -> ResponseMock {
|
||||
let (mock, response_mock) = base_mock();
|
||||
mock.respond_with(sse_response(body)).mount(server).await;
|
||||
response_mock
|
||||
}
|
||||
|
||||
pub async fn start_mock_server() -> MockServer {
|
||||
MockServer::builder()
|
||||
.body_print_limit(BodyPrintLimit::Limited(80_000))
|
||||
.start()
|
||||
.await
|
||||
}
|
||||
|
||||
/// Mounts a sequence of SSE response bodies and serves them in order for each
|
||||
/// POST to `/v1/responses`. Panics if more requests are received than bodies
|
||||
/// provided. Also asserts the exact number of expected calls.
|
||||
pub async fn mount_sse_sequence(server: &MockServer, bodies: Vec<String>) -> ResponseMock {
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
struct SeqResponder {
|
||||
num_calls: AtomicUsize,
|
||||
responses: Vec<String>,
|
||||
}
|
||||
|
||||
impl Respond for SeqResponder {
|
||||
fn respond(&self, _: &wiremock::Request) -> ResponseTemplate {
|
||||
let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst);
|
||||
match self.responses.get(call_num) {
|
||||
Some(body) => ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_string(body.clone()),
|
||||
None => panic!("no response for {call_num}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let num_calls = bodies.len();
|
||||
let responder = SeqResponder {
|
||||
num_calls: AtomicUsize::new(0),
|
||||
responses: bodies,
|
||||
};
|
||||
|
||||
let (mock, response_mock) = base_mock();
|
||||
mock.respond_with(responder)
|
||||
.up_to_n_times(num_calls as u64)
|
||||
.expect(num_calls as u64)
|
||||
.mount(server)
|
||||
.await;
|
||||
|
||||
response_mock
|
||||
}
|
||||
|
||||
/// Validate invariants on the request body sent to `/v1/responses`.
|
||||
///
|
||||
/// - No `function_call_output`/`custom_tool_call_output` with missing/empty `call_id`.
|
||||
/// - Every `function_call_output` must match a prior `function_call` or
|
||||
/// `local_shell_call` with the same `call_id` in the same `input`.
|
||||
/// - Every `custom_tool_call_output` must match a prior `custom_tool_call`.
|
||||
/// - Additionally, enforce symmetry: every `function_call`/`custom_tool_call`
|
||||
/// in the `input` must have a matching output entry.
|
||||
fn validate_request_body_invariants(request: &wiremock::Request) {
|
||||
let Ok(body): Result<Value, _> = request.body_json() else {
|
||||
return;
|
||||
};
|
||||
let Some(items) = body.get("input").and_then(Value::as_array) else {
|
||||
panic!("input array not found in request");
|
||||
};
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
fn get_call_id(item: &Value) -> Option<&str> {
|
||||
item.get("call_id")
|
||||
.and_then(Value::as_str)
|
||||
.filter(|id| !id.is_empty())
|
||||
}
|
||||
|
||||
fn gather_ids(items: &[Value], kind: &str) -> HashSet<String> {
|
||||
items
|
||||
.iter()
|
||||
.filter(|item| item.get("type").and_then(Value::as_str) == Some(kind))
|
||||
.filter_map(get_call_id)
|
||||
.map(str::to_string)
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn gather_output_ids(items: &[Value], kind: &str, missing_msg: &str) -> HashSet<String> {
|
||||
items
|
||||
.iter()
|
||||
.filter(|item| item.get("type").and_then(Value::as_str) == Some(kind))
|
||||
.map(|item| {
|
||||
let Some(id) = get_call_id(item) else {
|
||||
panic!("{missing_msg}");
|
||||
};
|
||||
id.to_string()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
let function_calls = gather_ids(items, "function_call");
|
||||
let custom_tool_calls = gather_ids(items, "custom_tool_call");
|
||||
let local_shell_calls = gather_ids(items, "local_shell_call");
|
||||
let function_call_outputs = gather_output_ids(
|
||||
items,
|
||||
"function_call_output",
|
||||
"orphan function_call_output with empty call_id should be dropped",
|
||||
);
|
||||
let custom_tool_call_outputs = gather_output_ids(
|
||||
items,
|
||||
"custom_tool_call_output",
|
||||
"orphan custom_tool_call_output with empty call_id should be dropped",
|
||||
);
|
||||
|
||||
for cid in &function_call_outputs {
|
||||
assert!(
|
||||
function_calls.contains(cid) || local_shell_calls.contains(cid),
|
||||
"function_call_output without matching call in input: {cid}",
|
||||
);
|
||||
}
|
||||
for cid in &custom_tool_call_outputs {
|
||||
assert!(
|
||||
custom_tool_calls.contains(cid),
|
||||
"custom_tool_call_output without matching call in input: {cid}",
|
||||
);
|
||||
}
|
||||
|
||||
for cid in &function_calls {
|
||||
assert!(
|
||||
function_call_outputs.contains(cid),
|
||||
"Function call output is missing for call id: {cid}",
|
||||
);
|
||||
}
|
||||
for cid in &custom_tool_calls {
|
||||
assert!(
|
||||
custom_tool_call_outputs.contains(cid),
|
||||
"Custom tool call output is missing for call id: {cid}",
|
||||
);
|
||||
}
|
||||
}
|
||||
288
llmx-rs/core/tests/common/test_codex.rs
Normal file
288
llmx-rs/core/tests/common/test_codex.rs
Normal file
@@ -0,0 +1,288 @@
|
||||
use std::mem::swap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_core::protocol::SessionConfiguredEvent;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use serde_json::Value;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::MockServer;
|
||||
|
||||
use crate::load_default_config_for_test;
|
||||
use crate::responses::start_mock_server;
|
||||
use crate::wait_for_event;
|
||||
|
||||
type ConfigMutator = dyn FnOnce(&mut Config) + Send;
|
||||
|
||||
pub struct TestCodexBuilder {
|
||||
config_mutators: Vec<Box<ConfigMutator>>,
|
||||
}
|
||||
|
||||
impl TestCodexBuilder {
|
||||
pub fn with_config<T>(mut self, mutator: T) -> Self
|
||||
where
|
||||
T: FnOnce(&mut Config) + Send + 'static,
|
||||
{
|
||||
self.config_mutators.push(Box::new(mutator));
|
||||
self
|
||||
}
|
||||
|
||||
pub async fn build(&mut self, server: &wiremock::MockServer) -> anyhow::Result<TestCodex> {
|
||||
let home = Arc::new(TempDir::new()?);
|
||||
self.build_with_home(server, home, None).await
|
||||
}
|
||||
|
||||
pub async fn resume(
|
||||
&mut self,
|
||||
server: &wiremock::MockServer,
|
||||
home: Arc<TempDir>,
|
||||
rollout_path: PathBuf,
|
||||
) -> anyhow::Result<TestCodex> {
|
||||
self.build_with_home(server, home, Some(rollout_path)).await
|
||||
}
|
||||
|
||||
async fn build_with_home(
|
||||
&mut self,
|
||||
server: &wiremock::MockServer,
|
||||
home: Arc<TempDir>,
|
||||
resume_from: Option<PathBuf>,
|
||||
) -> anyhow::Result<TestCodex> {
|
||||
let (config, cwd) = self.prepare_config(server, &home).await?;
|
||||
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
|
||||
|
||||
let new_conversation = match resume_from {
|
||||
Some(path) => {
|
||||
let auth_manager = codex_core::AuthManager::from_auth_for_testing(
|
||||
CodexAuth::from_api_key("dummy"),
|
||||
);
|
||||
conversation_manager
|
||||
.resume_conversation_from_rollout(config, path, auth_manager)
|
||||
.await?
|
||||
}
|
||||
None => conversation_manager.new_conversation(config).await?,
|
||||
};
|
||||
|
||||
Ok(TestCodex {
|
||||
home,
|
||||
cwd,
|
||||
codex: new_conversation.conversation,
|
||||
session_configured: new_conversation.session_configured,
|
||||
})
|
||||
}
|
||||
|
||||
async fn prepare_config(
|
||||
&mut self,
|
||||
server: &wiremock::MockServer,
|
||||
home: &TempDir,
|
||||
) -> anyhow::Result<(Config, Arc<TempDir>)> {
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
let cwd = Arc::new(TempDir::new()?);
|
||||
let mut config = load_default_config_for_test(home);
|
||||
config.cwd = cwd.path().to_path_buf();
|
||||
config.model_provider = model_provider;
|
||||
if let Ok(cmd) = assert_cmd::Command::cargo_bin("codex") {
|
||||
config.codex_linux_sandbox_exe = Some(PathBuf::from(cmd.get_program().to_os_string()));
|
||||
}
|
||||
|
||||
let mut mutators = vec![];
|
||||
swap(&mut self.config_mutators, &mut mutators);
|
||||
for mutator in mutators {
|
||||
mutator(&mut config);
|
||||
}
|
||||
|
||||
if config.include_apply_patch_tool {
|
||||
config.features.enable(Feature::ApplyPatchFreeform);
|
||||
} else {
|
||||
config.features.disable(Feature::ApplyPatchFreeform);
|
||||
}
|
||||
|
||||
Ok((config, cwd))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TestCodex {
|
||||
pub home: Arc<TempDir>,
|
||||
pub cwd: Arc<TempDir>,
|
||||
pub codex: Arc<CodexConversation>,
|
||||
pub session_configured: SessionConfiguredEvent,
|
||||
}
|
||||
|
||||
impl TestCodex {
|
||||
pub fn cwd_path(&self) -> &Path {
|
||||
self.cwd.path()
|
||||
}
|
||||
|
||||
pub fn workspace_path(&self, rel: impl AsRef<Path>) -> PathBuf {
|
||||
self.cwd_path().join(rel)
|
||||
}
|
||||
|
||||
pub async fn submit_turn(&self, prompt: &str) -> Result<()> {
|
||||
self.submit_turn_with_policy(prompt, SandboxPolicy::DangerFullAccess)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn submit_turn_with_policy(
|
||||
&self,
|
||||
prompt: &str,
|
||||
sandbox_policy: SandboxPolicy,
|
||||
) -> Result<()> {
|
||||
let session_model = self.session_configured.model.clone();
|
||||
self.codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: prompt.into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: self.cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&self.codex, |event| {
|
||||
matches!(event, EventMsg::TaskComplete(_))
|
||||
})
|
||||
.await;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TestCodexHarness {
|
||||
server: MockServer,
|
||||
test: TestCodex,
|
||||
}
|
||||
|
||||
impl TestCodexHarness {
|
||||
pub async fn new() -> Result<Self> {
|
||||
Self::with_builder(test_codex()).await
|
||||
}
|
||||
|
||||
pub async fn with_config(mutator: impl FnOnce(&mut Config) + Send + 'static) -> Result<Self> {
|
||||
Self::with_builder(test_codex().with_config(mutator)).await
|
||||
}
|
||||
|
||||
pub async fn with_builder(mut builder: TestCodexBuilder) -> Result<Self> {
|
||||
let server = start_mock_server().await;
|
||||
let test = builder.build(&server).await?;
|
||||
Ok(Self { server, test })
|
||||
}
|
||||
|
||||
pub fn server(&self) -> &MockServer {
|
||||
&self.server
|
||||
}
|
||||
|
||||
pub fn test(&self) -> &TestCodex {
|
||||
&self.test
|
||||
}
|
||||
|
||||
pub fn cwd(&self) -> &Path {
|
||||
self.test.cwd_path()
|
||||
}
|
||||
|
||||
pub fn path(&self, rel: impl AsRef<Path>) -> PathBuf {
|
||||
self.test.workspace_path(rel)
|
||||
}
|
||||
|
||||
pub async fn submit(&self, prompt: &str) -> Result<()> {
|
||||
self.test.submit_turn(prompt).await
|
||||
}
|
||||
|
||||
pub async fn submit_with_policy(
|
||||
&self,
|
||||
prompt: &str,
|
||||
sandbox_policy: SandboxPolicy,
|
||||
) -> Result<()> {
|
||||
self.test
|
||||
.submit_turn_with_policy(prompt, sandbox_policy)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn request_bodies(&self) -> Vec<Value> {
|
||||
self.server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("requests")
|
||||
.into_iter()
|
||||
.map(|req| serde_json::from_slice(&req.body).expect("request body json"))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub async fn function_call_output_value(&self, call_id: &str) -> Value {
|
||||
let bodies = self.request_bodies().await;
|
||||
function_call_output(&bodies, call_id).clone()
|
||||
}
|
||||
|
||||
pub async fn function_call_stdout(&self, call_id: &str) -> String {
|
||||
self.function_call_output_value(call_id)
|
||||
.await
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("output string")
|
||||
.to_string()
|
||||
}
|
||||
|
||||
pub async fn custom_tool_call_output(&self, call_id: &str) -> String {
|
||||
let bodies = self.request_bodies().await;
|
||||
custom_tool_call_output(&bodies, call_id)
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("output string")
|
||||
.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn custom_tool_call_output<'a>(bodies: &'a [Value], call_id: &str) -> &'a Value {
|
||||
for body in bodies {
|
||||
if let Some(items) = body.get("input").and_then(Value::as_array) {
|
||||
for item in items {
|
||||
if item.get("type").and_then(Value::as_str) == Some("custom_tool_call_output")
|
||||
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
|
||||
{
|
||||
return item;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
panic!("custom_tool_call_output {call_id} not found");
|
||||
}
|
||||
|
||||
fn function_call_output<'a>(bodies: &'a [Value], call_id: &str) -> &'a Value {
|
||||
for body in bodies {
|
||||
if let Some(items) = body.get("input").and_then(Value::as_array) {
|
||||
for item in items {
|
||||
if item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
|
||||
{
|
||||
return item;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
panic!("function_call_output {call_id} not found");
|
||||
}
|
||||
|
||||
pub fn test_codex() -> TestCodexBuilder {
|
||||
TestCodexBuilder {
|
||||
config_mutators: vec![],
|
||||
}
|
||||
}
|
||||
41
llmx-rs/core/tests/common/test_codex_exec.rs
Normal file
41
llmx-rs/core/tests/common/test_codex_exec.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
#![allow(clippy::expect_used)]
|
||||
use codex_core::auth::CODEX_API_KEY_ENV_VAR;
|
||||
use std::path::Path;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::MockServer;
|
||||
|
||||
pub struct TestCodexExecBuilder {
|
||||
home: TempDir,
|
||||
cwd: TempDir,
|
||||
}
|
||||
|
||||
impl TestCodexExecBuilder {
|
||||
pub fn cmd(&self) -> assert_cmd::Command {
|
||||
let mut cmd = assert_cmd::Command::cargo_bin("codex-exec")
|
||||
.expect("should find binary for codex-exec");
|
||||
cmd.current_dir(self.cwd.path())
|
||||
.env("CODEX_HOME", self.home.path())
|
||||
.env(CODEX_API_KEY_ENV_VAR, "dummy");
|
||||
cmd
|
||||
}
|
||||
pub fn cmd_with_server(&self, server: &MockServer) -> assert_cmd::Command {
|
||||
let mut cmd = self.cmd();
|
||||
let base = format!("{}/v1", server.uri());
|
||||
cmd.env("OPENAI_BASE_URL", base);
|
||||
cmd
|
||||
}
|
||||
|
||||
pub fn cwd_path(&self) -> &Path {
|
||||
self.cwd.path()
|
||||
}
|
||||
pub fn home_path(&self) -> &Path {
|
||||
self.home.path()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn test_codex_exec() -> TestCodexExecBuilder {
|
||||
TestCodexExecBuilder {
|
||||
home: TempDir::new().expect("create temp home"),
|
||||
cwd: TempDir::new().expect("create temp cwd"),
|
||||
}
|
||||
}
|
||||
16
llmx-rs/core/tests/fixtures/completed_template.json
vendored
Normal file
16
llmx-rs/core/tests/fixtures/completed_template.json
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
[
|
||||
{
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": "__ID__",
|
||||
"usage": {
|
||||
"input_tokens": 0,
|
||||
"input_tokens_details": null,
|
||||
"output_tokens": 0,
|
||||
"output_tokens_details": null,
|
||||
"total_tokens": 0
|
||||
},
|
||||
"output": []
|
||||
}
|
||||
}
|
||||
]
|
||||
3
llmx-rs/core/tests/fixtures/incomplete_sse.json
vendored
Normal file
3
llmx-rs/core/tests/fixtures/incomplete_sse.json
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
[
|
||||
{"type": "response.output_item.done"}
|
||||
]
|
||||
196
llmx-rs/core/tests/responses_headers.rs
Normal file
196
llmx-rs/core/tests/responses_headers.rs
Normal file
@@ -0,0 +1,196 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use codex_app_server_protocol::AuthMode;
|
||||
use codex_core::ContentItem;
|
||||
use codex_core::ModelClient;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::Prompt;
|
||||
use codex_core::ResponseEvent;
|
||||
use codex_core::ResponseItem;
|
||||
use codex_core::WireApi;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::responses;
|
||||
use futures::StreamExt;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::matchers::header;
|
||||
|
||||
#[tokio::test]
|
||||
async fn responses_stream_includes_subagent_header_on_review() {
|
||||
core_test_support::skip_if_no_network!();
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let response_body = responses::sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_completed("resp-1"),
|
||||
]);
|
||||
|
||||
let request_recorder = responses::mount_sse_once_match(
|
||||
&server,
|
||||
header("x-openai-subagent", "review"),
|
||||
response_body,
|
||||
)
|
||||
.await;
|
||||
|
||||
let provider = ModelProviderInfo {
|
||||
name: "mock".into(),
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
env_key: None,
|
||||
env_key_instructions: None,
|
||||
experimental_bearer_token: None,
|
||||
wire_api: WireApi::Responses,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(5_000),
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
let codex_home = TempDir::new().expect("failed to create TempDir");
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.model_provider_id = provider.name.clone();
|
||||
config.model_provider = provider.clone();
|
||||
let effort = config.model_reasoning_effort;
|
||||
let summary = config.model_reasoning_summary;
|
||||
let config = Arc::new(config);
|
||||
|
||||
let conversation_id = ConversationId::new();
|
||||
|
||||
let otel_event_manager = OtelEventManager::new(
|
||||
conversation_id,
|
||||
config.model.as_str(),
|
||||
config.model_family.slug.as_str(),
|
||||
None,
|
||||
Some("test@test.com".to_string()),
|
||||
Some(AuthMode::ChatGPT),
|
||||
false,
|
||||
"test".to_string(),
|
||||
);
|
||||
|
||||
let client = ModelClient::new(
|
||||
Arc::clone(&config),
|
||||
None,
|
||||
otel_event_manager,
|
||||
provider,
|
||||
effort,
|
||||
summary,
|
||||
conversation_id,
|
||||
SessionSource::SubAgent(codex_protocol::protocol::SubAgentSource::Review),
|
||||
);
|
||||
|
||||
let mut prompt = Prompt::default();
|
||||
prompt.input = vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".into(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: "hello".into(),
|
||||
}],
|
||||
}];
|
||||
|
||||
let mut stream = client.stream(&prompt).await.expect("stream failed");
|
||||
while let Some(event) = stream.next().await {
|
||||
if matches!(event, Ok(ResponseEvent::Completed { .. })) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let request = request_recorder.single_request();
|
||||
assert_eq!(
|
||||
request.header("x-openai-subagent").as_deref(),
|
||||
Some("review")
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn responses_stream_includes_subagent_header_on_other() {
|
||||
core_test_support::skip_if_no_network!();
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let response_body = responses::sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_completed("resp-1"),
|
||||
]);
|
||||
|
||||
let request_recorder = responses::mount_sse_once_match(
|
||||
&server,
|
||||
header("x-openai-subagent", "my-task"),
|
||||
response_body,
|
||||
)
|
||||
.await;
|
||||
|
||||
let provider = ModelProviderInfo {
|
||||
name: "mock".into(),
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
env_key: None,
|
||||
env_key_instructions: None,
|
||||
experimental_bearer_token: None,
|
||||
wire_api: WireApi::Responses,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(5_000),
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
let codex_home = TempDir::new().expect("failed to create TempDir");
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.model_provider_id = provider.name.clone();
|
||||
config.model_provider = provider.clone();
|
||||
let effort = config.model_reasoning_effort;
|
||||
let summary = config.model_reasoning_summary;
|
||||
let config = Arc::new(config);
|
||||
|
||||
let conversation_id = ConversationId::new();
|
||||
|
||||
let otel_event_manager = OtelEventManager::new(
|
||||
conversation_id,
|
||||
config.model.as_str(),
|
||||
config.model_family.slug.as_str(),
|
||||
None,
|
||||
Some("test@test.com".to_string()),
|
||||
Some(AuthMode::ChatGPT),
|
||||
false,
|
||||
"test".to_string(),
|
||||
);
|
||||
|
||||
let client = ModelClient::new(
|
||||
Arc::clone(&config),
|
||||
None,
|
||||
otel_event_manager,
|
||||
provider,
|
||||
effort,
|
||||
summary,
|
||||
conversation_id,
|
||||
SessionSource::SubAgent(codex_protocol::protocol::SubAgentSource::Other(
|
||||
"my-task".to_string(),
|
||||
)),
|
||||
);
|
||||
|
||||
let mut prompt = Prompt::default();
|
||||
prompt.input = vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".into(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: "hello".into(),
|
||||
}],
|
||||
}];
|
||||
|
||||
let mut stream = client.stream(&prompt).await.expect("stream failed");
|
||||
while let Some(event) = stream.next().await {
|
||||
if matches!(event, Ok(ResponseEvent::Completed { .. })) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let request = request_recorder.single_request();
|
||||
assert_eq!(
|
||||
request.header("x-openai-subagent").as_deref(),
|
||||
Some("my-task")
|
||||
);
|
||||
}
|
||||
158
llmx-rs/core/tests/suite/abort_tasks.rs
Normal file
158
llmx-rs/core/tests/suite/abort_tasks.rs
Normal file
@@ -0,0 +1,158 @@
|
||||
use assert_matches::assert_matches;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_once;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use regex_lite::Regex;
|
||||
use serde_json::json;
|
||||
|
||||
/// Integration test: spawn a long‑running shell tool via a mocked Responses SSE
|
||||
/// function call, then interrupt the session and expect TurnAborted.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn interrupt_long_running_tool_emits_turn_aborted() {
|
||||
let command = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"sleep 60".to_string(),
|
||||
];
|
||||
|
||||
let args = json!({
|
||||
"command": command,
|
||||
"timeout_ms": 60_000
|
||||
})
|
||||
.to_string();
|
||||
let body = sse(vec![
|
||||
ev_function_call("call_sleep", "shell", &args),
|
||||
ev_completed("done"),
|
||||
]);
|
||||
|
||||
let server = start_mock_server().await;
|
||||
mount_sse_once(&server, body).await;
|
||||
|
||||
let codex = test_codex().build(&server).await.unwrap().codex;
|
||||
|
||||
// Kick off a turn that triggers the function call.
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "start sleep".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Wait until the exec begins to avoid a race, then interrupt.
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExecCommandBegin(_))).await;
|
||||
|
||||
codex.submit(Op::Interrupt).await.unwrap();
|
||||
|
||||
// Expect TurnAborted soon after.
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnAborted(_))).await;
|
||||
}
|
||||
|
||||
/// After an interrupt we expect the next request to the model to include both
|
||||
/// the original tool call and an `"aborted"` `function_call_output`. This test
|
||||
/// exercises the follow-up flow: it sends another user turn, inspects the mock
|
||||
/// responses server, and ensures the model receives the synthesized abort.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn interrupt_tool_records_history_entries() {
|
||||
let command = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"sleep 60".to_string(),
|
||||
];
|
||||
let call_id = "call-history";
|
||||
|
||||
let args = json!({
|
||||
"command": command,
|
||||
"timeout_ms": 60_000
|
||||
})
|
||||
.to_string();
|
||||
let first_body = sse(vec![
|
||||
ev_response_created("resp-history"),
|
||||
ev_function_call(call_id, "shell", &args),
|
||||
ev_completed("resp-history"),
|
||||
]);
|
||||
let follow_up_body = sse(vec![
|
||||
ev_response_created("resp-followup"),
|
||||
ev_completed("resp-followup"),
|
||||
]);
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let response_mock = mount_sse_sequence(&server, vec![first_body, follow_up_body]).await;
|
||||
|
||||
let fixture = test_codex().build(&server).await.unwrap();
|
||||
let codex = Arc::clone(&fixture.codex);
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "start history recording".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExecCommandBegin(_))).await;
|
||||
|
||||
tokio::time::sleep(Duration::from_secs_f32(0.1)).await;
|
||||
codex.submit(Op::Interrupt).await.unwrap();
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnAborted(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "follow up".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = response_mock.requests();
|
||||
assert!(
|
||||
requests.len() == 2,
|
||||
"expected two calls to the responses API, got {}",
|
||||
requests.len()
|
||||
);
|
||||
|
||||
assert!(
|
||||
response_mock.saw_function_call(call_id),
|
||||
"function call not recorded in responses payload"
|
||||
);
|
||||
let output = response_mock
|
||||
.function_call_output_text(call_id)
|
||||
.expect("missing function_call_output text");
|
||||
let re = Regex::new(r"^Wall time: ([0-9]+(?:\.[0-9])?) seconds\naborted by user$")
|
||||
.expect("compile regex");
|
||||
let captures = re.captures(&output);
|
||||
assert_matches!(
|
||||
captures.as_ref(),
|
||||
Some(caps) if caps.get(1).is_some(),
|
||||
"aborted message with elapsed seconds"
|
||||
);
|
||||
let secs: f32 = captures
|
||||
.expect("aborted message with elapsed seconds")
|
||||
.get(1)
|
||||
.unwrap()
|
||||
.as_str()
|
||||
.parse()
|
||||
.unwrap();
|
||||
assert!(
|
||||
secs >= 0.1,
|
||||
"expected at least one tenth of a second of elapsed time, got {secs}"
|
||||
);
|
||||
}
|
||||
1052
llmx-rs/core/tests/suite/apply_patch_cli.rs
Normal file
1052
llmx-rs/core/tests/suite/apply_patch_cli.rs
Normal file
File diff suppressed because it is too large
Load Diff
1001
llmx-rs/core/tests/suite/apply_patch_freeform.rs
Normal file
1001
llmx-rs/core/tests/suite/apply_patch_freeform.rs
Normal file
File diff suppressed because it is too large
Load Diff
1262
llmx-rs/core/tests/suite/approvals.rs
Normal file
1262
llmx-rs/core/tests/suite/approvals.rs
Normal file
File diff suppressed because it is too large
Load Diff
272
llmx-rs/core/tests/suite/auth_refresh.rs
Normal file
272
llmx-rs/core/tests/suite/auth_refresh.rs
Normal file
@@ -0,0 +1,272 @@
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use base64::Engine;
|
||||
use chrono::Duration;
|
||||
use chrono::Utc;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::auth::AuthCredentialsStoreMode;
|
||||
use codex_core::auth::AuthDotJson;
|
||||
use codex_core::auth::REFRESH_TOKEN_URL_OVERRIDE_ENV_VAR;
|
||||
use codex_core::auth::RefreshTokenError;
|
||||
use codex_core::auth::load_auth_dot_json;
|
||||
use codex_core::auth::save_auth;
|
||||
use codex_core::error::RefreshTokenFailedReason;
|
||||
use codex_core::token_data::IdTokenInfo;
|
||||
use codex_core::token_data::TokenData;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use std::ffi::OsString;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
const INITIAL_ACCESS_TOKEN: &str = "initial-access-token";
|
||||
const INITIAL_REFRESH_TOKEN: &str = "initial-refresh-token";
|
||||
|
||||
#[serial_test::serial(auth_refresh)]
|
||||
#[tokio::test]
|
||||
async fn refresh_token_succeeds_updates_storage() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = MockServer::start().await;
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/oauth/token"))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_json(json!({
|
||||
"access_token": "new-access-token",
|
||||
"refresh_token": "new-refresh-token"
|
||||
})))
|
||||
.expect(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let ctx = RefreshTokenTestContext::new(&server)?;
|
||||
let auth = ctx.auth.clone();
|
||||
|
||||
let access = auth
|
||||
.refresh_token()
|
||||
.await
|
||||
.context("refresh should succeed")?;
|
||||
assert_eq!(access, "new-access-token");
|
||||
|
||||
let stored = ctx.load_auth()?;
|
||||
let tokens = stored.tokens.as_ref().context("tokens should exist")?;
|
||||
assert_eq!(tokens.access_token, "new-access-token");
|
||||
assert_eq!(tokens.refresh_token, "new-refresh-token");
|
||||
let refreshed_at = stored
|
||||
.last_refresh
|
||||
.as_ref()
|
||||
.context("last_refresh should be recorded")?;
|
||||
assert!(
|
||||
*refreshed_at >= ctx.initial_last_refresh,
|
||||
"last_refresh should advance"
|
||||
);
|
||||
|
||||
let cached = auth
|
||||
.get_token_data()
|
||||
.await
|
||||
.context("token data should be cached")?;
|
||||
assert_eq!(cached.access_token, "new-access-token");
|
||||
|
||||
server.verify().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[serial_test::serial(auth_refresh)]
|
||||
#[tokio::test]
|
||||
async fn refresh_token_returns_permanent_error_for_expired_refresh_token() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = MockServer::start().await;
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/oauth/token"))
|
||||
.respond_with(ResponseTemplate::new(401).set_body_json(json!({
|
||||
"error": {
|
||||
"code": "refresh_token_expired"
|
||||
}
|
||||
})))
|
||||
.expect(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let ctx = RefreshTokenTestContext::new(&server)?;
|
||||
let auth = ctx.auth.clone();
|
||||
|
||||
let err = auth
|
||||
.refresh_token()
|
||||
.await
|
||||
.err()
|
||||
.context("refresh should fail")?;
|
||||
assert_eq!(err.failed_reason(), Some(RefreshTokenFailedReason::Expired));
|
||||
|
||||
let stored = ctx.load_auth()?;
|
||||
let tokens = stored.tokens.as_ref().context("tokens should remain")?;
|
||||
assert_eq!(tokens.access_token, INITIAL_ACCESS_TOKEN);
|
||||
assert_eq!(tokens.refresh_token, INITIAL_REFRESH_TOKEN);
|
||||
assert_eq!(
|
||||
*stored
|
||||
.last_refresh
|
||||
.as_ref()
|
||||
.context("last_refresh should remain unchanged")?,
|
||||
ctx.initial_last_refresh,
|
||||
);
|
||||
|
||||
server.verify().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[serial_test::serial(auth_refresh)]
|
||||
#[tokio::test]
|
||||
async fn refresh_token_returns_transient_error_on_server_failure() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = MockServer::start().await;
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/oauth/token"))
|
||||
.respond_with(ResponseTemplate::new(500).set_body_json(json!({
|
||||
"error": "temporary-failure"
|
||||
})))
|
||||
.expect(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let ctx = RefreshTokenTestContext::new(&server)?;
|
||||
let auth = ctx.auth.clone();
|
||||
|
||||
let err = auth
|
||||
.refresh_token()
|
||||
.await
|
||||
.err()
|
||||
.context("refresh should fail")?;
|
||||
assert!(matches!(err, RefreshTokenError::Transient(_)));
|
||||
assert_eq!(err.failed_reason(), None);
|
||||
|
||||
let stored = ctx.load_auth()?;
|
||||
let tokens = stored.tokens.as_ref().context("tokens should remain")?;
|
||||
assert_eq!(tokens.access_token, INITIAL_ACCESS_TOKEN);
|
||||
assert_eq!(tokens.refresh_token, INITIAL_REFRESH_TOKEN);
|
||||
assert_eq!(
|
||||
*stored
|
||||
.last_refresh
|
||||
.as_ref()
|
||||
.context("last_refresh should remain unchanged")?,
|
||||
ctx.initial_last_refresh,
|
||||
);
|
||||
|
||||
server.verify().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct RefreshTokenTestContext {
|
||||
codex_home: TempDir,
|
||||
auth: CodexAuth,
|
||||
initial_last_refresh: chrono::DateTime<Utc>,
|
||||
_env_guard: EnvGuard,
|
||||
}
|
||||
|
||||
impl RefreshTokenTestContext {
|
||||
fn new(server: &MockServer) -> Result<Self> {
|
||||
let codex_home = TempDir::new()?;
|
||||
let initial_last_refresh = Utc::now() - Duration::days(1);
|
||||
let mut id_token = IdTokenInfo::default();
|
||||
id_token.raw_jwt = minimal_jwt();
|
||||
let tokens = TokenData {
|
||||
id_token,
|
||||
access_token: INITIAL_ACCESS_TOKEN.to_string(),
|
||||
refresh_token: INITIAL_REFRESH_TOKEN.to_string(),
|
||||
account_id: Some("account-id".to_string()),
|
||||
};
|
||||
let auth_dot_json = AuthDotJson {
|
||||
openai_api_key: None,
|
||||
tokens: Some(tokens),
|
||||
last_refresh: Some(initial_last_refresh),
|
||||
};
|
||||
save_auth(
|
||||
codex_home.path(),
|
||||
&auth_dot_json,
|
||||
AuthCredentialsStoreMode::File,
|
||||
)?;
|
||||
|
||||
let endpoint = format!("{}/oauth/token", server.uri());
|
||||
let env_guard = EnvGuard::set(REFRESH_TOKEN_URL_OVERRIDE_ENV_VAR, endpoint);
|
||||
|
||||
let auth = CodexAuth::from_auth_storage(codex_home.path(), AuthCredentialsStoreMode::File)?
|
||||
.context("auth should load from storage")?;
|
||||
|
||||
Ok(Self {
|
||||
codex_home,
|
||||
auth,
|
||||
initial_last_refresh,
|
||||
_env_guard: env_guard,
|
||||
})
|
||||
}
|
||||
|
||||
fn load_auth(&self) -> Result<AuthDotJson> {
|
||||
load_auth_dot_json(self.codex_home.path(), AuthCredentialsStoreMode::File)
|
||||
.context("load auth.json")?
|
||||
.context("auth.json should exist")
|
||||
}
|
||||
}
|
||||
|
||||
struct EnvGuard {
|
||||
key: &'static str,
|
||||
original: Option<OsString>,
|
||||
}
|
||||
|
||||
impl EnvGuard {
|
||||
fn set(key: &'static str, value: String) -> Self {
|
||||
let original = std::env::var_os(key);
|
||||
// SAFETY: these tests execute serially, so updating the process environment is safe.
|
||||
unsafe {
|
||||
std::env::set_var(key, &value);
|
||||
}
|
||||
Self { key, original }
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for EnvGuard {
|
||||
fn drop(&mut self) {
|
||||
// SAFETY: the guard restores the original environment value before other tests run.
|
||||
unsafe {
|
||||
match &self.original {
|
||||
Some(value) => std::env::set_var(self.key, value),
|
||||
None => std::env::remove_var(self.key),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn minimal_jwt() -> String {
|
||||
#[derive(Serialize)]
|
||||
struct Header {
|
||||
alg: &'static str,
|
||||
typ: &'static str,
|
||||
}
|
||||
|
||||
let header = Header {
|
||||
alg: "none",
|
||||
typ: "JWT",
|
||||
};
|
||||
let payload = json!({ "sub": "user-123" });
|
||||
|
||||
fn b64(data: &[u8]) -> String {
|
||||
base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(data)
|
||||
}
|
||||
|
||||
let header_bytes = match serde_json::to_vec(&header) {
|
||||
Ok(bytes) => bytes,
|
||||
Err(err) => panic!("serialize header: {err}"),
|
||||
};
|
||||
let payload_bytes = match serde_json::to_vec(&payload) {
|
||||
Ok(bytes) => bytes,
|
||||
Err(err) => panic!("serialize payload: {err}"),
|
||||
};
|
||||
let header_b64 = b64(&header_bytes);
|
||||
let payload_b64 = b64(&payload_bytes);
|
||||
let signature_b64 = b64(b"sig");
|
||||
format!("{header_b64}.{payload_b64}.{signature_b64}")
|
||||
}
|
||||
526
llmx-rs/core/tests/suite/cli_stream.rs
Normal file
526
llmx-rs/core/tests/suite/cli_stream.rs
Normal file
@@ -0,0 +1,526 @@
|
||||
use assert_cmd::Command as AssertCommand;
|
||||
use assert_cmd::cargo::cargo_bin;
|
||||
use codex_core::RolloutRecorder;
|
||||
use codex_core::protocol::GitInfo;
|
||||
use core_test_support::fs_wait;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use std::time::Duration;
|
||||
use tempfile::TempDir;
|
||||
use uuid::Uuid;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
/// Tests streaming chat completions through the CLI using a mock server.
|
||||
/// This test:
|
||||
/// 1. Sets up a mock server that simulates OpenAI's chat completions API
|
||||
/// 2. Configures codex to use this mock server via a custom provider
|
||||
/// 3. Sends a simple "hello?" prompt and verifies the streamed response
|
||||
/// 4. Ensures the response is received exactly once and contains "hi"
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn chat_mode_stream_cli() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let server = MockServer::start().await;
|
||||
let sse = concat!(
|
||||
"data: {\"choices\":[{\"delta\":{\"content\":\"hi\"}}]}\n\n",
|
||||
"data: {\"choices\":[{\"delta\":{}}]}\n\n",
|
||||
"data: [DONE]\n\n"
|
||||
);
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/chat/completions"))
|
||||
.respond_with(
|
||||
ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse, "text/event-stream"),
|
||||
)
|
||||
.expect(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let provider_override = format!(
|
||||
"model_providers.mock={{ name = \"mock\", base_url = \"{}/v1\", env_key = \"PATH\", wire_api = \"chat\" }}",
|
||||
server.uri()
|
||||
);
|
||||
let bin = cargo_bin("codex");
|
||||
let mut cmd = AssertCommand::new(bin);
|
||||
cmd.arg("exec")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-c")
|
||||
.arg(&provider_override)
|
||||
.arg("-c")
|
||||
.arg("model_provider=\"mock\"")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg("hello?");
|
||||
cmd.env("CODEX_HOME", home.path())
|
||||
.env("OPENAI_API_KEY", "dummy")
|
||||
.env("OPENAI_BASE_URL", format!("{}/v1", server.uri()));
|
||||
|
||||
let output = cmd.output().unwrap();
|
||||
println!("Status: {}", output.status);
|
||||
println!("Stdout:\n{}", String::from_utf8_lossy(&output.stdout));
|
||||
println!("Stderr:\n{}", String::from_utf8_lossy(&output.stderr));
|
||||
assert!(output.status.success());
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let hi_lines = stdout.lines().filter(|line| line.trim() == "hi").count();
|
||||
assert_eq!(hi_lines, 1, "Expected exactly one line with 'hi'");
|
||||
|
||||
server.verify().await;
|
||||
|
||||
// Verify a new session rollout was created and is discoverable via list_conversations
|
||||
let provider_filter = vec!["mock".to_string()];
|
||||
let page = RolloutRecorder::list_conversations(
|
||||
home.path(),
|
||||
10,
|
||||
None,
|
||||
&[],
|
||||
Some(provider_filter.as_slice()),
|
||||
"mock",
|
||||
)
|
||||
.await
|
||||
.expect("list conversations");
|
||||
assert!(
|
||||
!page.items.is_empty(),
|
||||
"expected at least one session to be listed"
|
||||
);
|
||||
// First line of head must be the SessionMeta payload (id/timestamp)
|
||||
let head0 = page.items[0].head.first().expect("missing head record");
|
||||
assert!(head0.get("id").is_some(), "head[0] missing id");
|
||||
assert!(
|
||||
head0.get("timestamp").is_some(),
|
||||
"head[0] missing timestamp"
|
||||
);
|
||||
}
|
||||
|
||||
/// Verify that passing `-c experimental_instructions_file=...` to the CLI
|
||||
/// overrides the built-in base instructions by inspecting the request body
|
||||
/// received by a mock OpenAI Responses endpoint.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn exec_cli_applies_experimental_instructions_file() {
|
||||
skip_if_no_network!();
|
||||
|
||||
// Start mock server which will capture the request and return a minimal
|
||||
// SSE stream for a single turn.
|
||||
let server = MockServer::start().await;
|
||||
let sse = concat!(
|
||||
"data: {\"type\":\"response.created\",\"response\":{}}\n\n",
|
||||
"data: {\"type\":\"response.completed\",\"response\":{\"id\":\"r1\"}}\n\n"
|
||||
);
|
||||
let resp_mock = core_test_support::responses::mount_sse_once_match(
|
||||
&server,
|
||||
path("/v1/responses"),
|
||||
sse.to_string(),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Create a temporary instructions file with a unique marker we can assert
|
||||
// appears in the outbound request payload.
|
||||
let custom = TempDir::new().unwrap();
|
||||
let marker = "cli-experimental-instructions-marker";
|
||||
let custom_path = custom.path().join("instr.md");
|
||||
std::fs::write(&custom_path, marker).unwrap();
|
||||
let custom_path_str = custom_path.to_string_lossy().replace('\\', "/");
|
||||
|
||||
// Build a provider override that points at the mock server and instructs
|
||||
// Codex to use the Responses API with the dummy env var.
|
||||
let provider_override = format!(
|
||||
"model_providers.mock={{ name = \"mock\", base_url = \"{}/v1\", env_key = \"PATH\", wire_api = \"responses\" }}",
|
||||
server.uri()
|
||||
);
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let bin = cargo_bin("codex");
|
||||
let mut cmd = AssertCommand::new(bin);
|
||||
cmd.arg("exec")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-c")
|
||||
.arg(&provider_override)
|
||||
.arg("-c")
|
||||
.arg("model_provider=\"mock\"")
|
||||
.arg("-c")
|
||||
.arg(format!(
|
||||
"experimental_instructions_file=\"{custom_path_str}\""
|
||||
))
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg("hello?\n");
|
||||
cmd.env("CODEX_HOME", home.path())
|
||||
.env("OPENAI_API_KEY", "dummy")
|
||||
.env("OPENAI_BASE_URL", format!("{}/v1", server.uri()));
|
||||
|
||||
let output = cmd.output().unwrap();
|
||||
println!("Status: {}", output.status);
|
||||
println!("Stdout:\n{}", String::from_utf8_lossy(&output.stdout));
|
||||
println!("Stderr:\n{}", String::from_utf8_lossy(&output.stderr));
|
||||
assert!(output.status.success());
|
||||
|
||||
// Inspect the captured request and verify our custom base instructions were
|
||||
// included in the `instructions` field.
|
||||
let request = resp_mock.single_request();
|
||||
let body = request.body_json();
|
||||
let instructions = body
|
||||
.get("instructions")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
assert!(
|
||||
instructions.contains(marker),
|
||||
"instructions did not contain custom marker; got: {instructions}"
|
||||
);
|
||||
}
|
||||
|
||||
/// Tests streaming responses through the CLI using a local SSE fixture file.
|
||||
/// This test:
|
||||
/// 1. Uses a pre-recorded SSE response fixture instead of a live server
|
||||
/// 2. Configures codex to read from this fixture via CODEX_RS_SSE_FIXTURE env var
|
||||
/// 3. Sends a "hello?" prompt and verifies the response
|
||||
/// 4. Ensures the fixture content is correctly streamed through the CLI
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn responses_api_stream_cli() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let fixture =
|
||||
std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/cli_responses_fixture.sse");
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let bin = cargo_bin("codex");
|
||||
let mut cmd = AssertCommand::new(bin);
|
||||
cmd.arg("exec")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg("hello?");
|
||||
cmd.env("CODEX_HOME", home.path())
|
||||
.env("OPENAI_API_KEY", "dummy")
|
||||
.env("CODEX_RS_SSE_FIXTURE", fixture)
|
||||
.env("OPENAI_BASE_URL", "http://unused.local");
|
||||
|
||||
let output = cmd.output().unwrap();
|
||||
assert!(output.status.success());
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
assert!(stdout.contains("fixture hello"));
|
||||
}
|
||||
|
||||
/// End-to-end: create a session (writes rollout), verify the file, then resume and confirm append.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn integration_creates_and_checks_session_file() -> anyhow::Result<()> {
|
||||
// Honor sandbox network restrictions for CI parity with the other tests.
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
// 1. Temp home so we read/write isolated session files.
|
||||
let home = TempDir::new()?;
|
||||
|
||||
// 2. Unique marker we'll look for in the session log.
|
||||
let marker = format!("integration-test-{}", Uuid::new_v4());
|
||||
let prompt = format!("echo {marker}");
|
||||
|
||||
// 3. Use the same offline SSE fixture as responses_api_stream_cli so the test is hermetic.
|
||||
let fixture =
|
||||
std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/cli_responses_fixture.sse");
|
||||
|
||||
// 4. Run the codex CLI and invoke `exec`, which is what records a session.
|
||||
let bin = cargo_bin("codex");
|
||||
let mut cmd = AssertCommand::new(bin);
|
||||
cmd.arg("exec")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt);
|
||||
cmd.env("CODEX_HOME", home.path())
|
||||
.env("OPENAI_API_KEY", "dummy")
|
||||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||||
// Required for CLI arg parsing even though fixture short-circuits network usage.
|
||||
.env("OPENAI_BASE_URL", "http://unused.local");
|
||||
|
||||
let output = cmd.output().unwrap();
|
||||
assert!(
|
||||
output.status.success(),
|
||||
"codex-cli exec failed: {}",
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
|
||||
// Wait for sessions dir to appear.
|
||||
let sessions_dir = home.path().join("sessions");
|
||||
fs_wait::wait_for_path_exists(&sessions_dir, Duration::from_secs(5)).await?;
|
||||
|
||||
// Find the session file that contains `marker`.
|
||||
let marker_clone = marker.clone();
|
||||
let path = fs_wait::wait_for_matching_file(&sessions_dir, Duration::from_secs(10), move |p| {
|
||||
if p.extension().and_then(|ext| ext.to_str()) != Some("jsonl") {
|
||||
return false;
|
||||
}
|
||||
let Ok(content) = std::fs::read_to_string(p) else {
|
||||
return false;
|
||||
};
|
||||
content.contains(&marker_clone)
|
||||
})
|
||||
.await?;
|
||||
|
||||
// Basic sanity checks on location and metadata.
|
||||
let rel = match path.strip_prefix(&sessions_dir) {
|
||||
Ok(r) => r,
|
||||
Err(_) => panic!("session file should live under sessions/"),
|
||||
};
|
||||
let comps: Vec<String> = rel
|
||||
.components()
|
||||
.map(|c| c.as_os_str().to_string_lossy().into_owned())
|
||||
.collect();
|
||||
assert_eq!(
|
||||
comps.len(),
|
||||
4,
|
||||
"Expected sessions/YYYY/MM/DD/<file>, got {rel:?}"
|
||||
);
|
||||
let year = &comps[0];
|
||||
let month = &comps[1];
|
||||
let day = &comps[2];
|
||||
assert!(
|
||||
year.len() == 4 && year.chars().all(|c| c.is_ascii_digit()),
|
||||
"Year dir not 4-digit numeric: {year}"
|
||||
);
|
||||
assert!(
|
||||
month.len() == 2 && month.chars().all(|c| c.is_ascii_digit()),
|
||||
"Month dir not zero-padded 2-digit numeric: {month}"
|
||||
);
|
||||
assert!(
|
||||
day.len() == 2 && day.chars().all(|c| c.is_ascii_digit()),
|
||||
"Day dir not zero-padded 2-digit numeric: {day}"
|
||||
);
|
||||
if let Ok(m) = month.parse::<u8>() {
|
||||
assert!((1..=12).contains(&m), "Month out of range: {m}");
|
||||
}
|
||||
if let Ok(d) = day.parse::<u8>() {
|
||||
assert!((1..=31).contains(&d), "Day out of range: {d}");
|
||||
}
|
||||
|
||||
let content =
|
||||
std::fs::read_to_string(&path).unwrap_or_else(|_| panic!("Failed to read session file"));
|
||||
let mut lines = content.lines();
|
||||
let meta_line = lines
|
||||
.next()
|
||||
.ok_or("missing session meta line")
|
||||
.unwrap_or_else(|_| panic!("missing session meta line"));
|
||||
let meta: serde_json::Value = serde_json::from_str(meta_line)
|
||||
.unwrap_or_else(|_| panic!("Failed to parse session meta line as JSON"));
|
||||
assert_eq!(
|
||||
meta.get("type").and_then(|v| v.as_str()),
|
||||
Some("session_meta")
|
||||
);
|
||||
let payload = meta
|
||||
.get("payload")
|
||||
.unwrap_or_else(|| panic!("Missing payload in meta line"));
|
||||
assert!(payload.get("id").is_some(), "SessionMeta missing id");
|
||||
assert!(
|
||||
payload.get("timestamp").is_some(),
|
||||
"SessionMeta missing timestamp"
|
||||
);
|
||||
|
||||
let mut found_message = false;
|
||||
for line in lines {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
let Ok(item) = serde_json::from_str::<serde_json::Value>(line) else {
|
||||
continue;
|
||||
};
|
||||
if item.get("type").and_then(|t| t.as_str()) == Some("response_item")
|
||||
&& let Some(payload) = item.get("payload")
|
||||
&& payload.get("type").and_then(|t| t.as_str()) == Some("message")
|
||||
&& let Some(c) = payload.get("content")
|
||||
&& c.to_string().contains(&marker)
|
||||
{
|
||||
found_message = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert!(
|
||||
found_message,
|
||||
"No message found in session file containing the marker"
|
||||
);
|
||||
|
||||
// Second run: resume should update the existing file.
|
||||
let marker2 = format!("integration-resume-{}", Uuid::new_v4());
|
||||
let prompt2 = format!("echo {marker2}");
|
||||
let bin2 = cargo_bin("codex");
|
||||
let mut cmd2 = AssertCommand::new(bin2);
|
||||
cmd2.arg("exec")
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt2)
|
||||
.arg("resume")
|
||||
.arg("--last");
|
||||
cmd2.env("CODEX_HOME", home.path())
|
||||
.env("OPENAI_API_KEY", "dummy")
|
||||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||||
.env("OPENAI_BASE_URL", "http://unused.local");
|
||||
|
||||
let output2 = cmd2.output().unwrap();
|
||||
assert!(output2.status.success(), "resume codex-cli run failed");
|
||||
|
||||
// Find the new session file containing the resumed marker.
|
||||
let marker2_clone = marker2.clone();
|
||||
let resumed_path =
|
||||
fs_wait::wait_for_matching_file(&sessions_dir, Duration::from_secs(10), move |p| {
|
||||
if p.extension().and_then(|ext| ext.to_str()) != Some("jsonl") {
|
||||
return false;
|
||||
}
|
||||
std::fs::read_to_string(p)
|
||||
.map(|content| content.contains(&marker2_clone))
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.await?;
|
||||
|
||||
// Resume should write to the existing log file.
|
||||
assert_eq!(
|
||||
resumed_path, path,
|
||||
"resume should create a new session file"
|
||||
);
|
||||
|
||||
let resumed_content = std::fs::read_to_string(&resumed_path)?;
|
||||
assert!(
|
||||
resumed_content.contains(&marker),
|
||||
"resumed file missing original marker"
|
||||
);
|
||||
assert!(
|
||||
resumed_content.contains(&marker2),
|
||||
"resumed file missing resumed marker"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Integration test to verify git info is collected and recorded in session files.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn integration_git_info_unit_test() {
|
||||
// This test verifies git info collection works independently
|
||||
// without depending on the full CLI integration
|
||||
|
||||
// 1. Create temp directory for git repo
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let git_repo = temp_dir.path().to_path_buf();
|
||||
let envs = vec![
|
||||
("GIT_CONFIG_GLOBAL", "/dev/null"),
|
||||
("GIT_CONFIG_NOSYSTEM", "1"),
|
||||
];
|
||||
|
||||
// 2. Initialize a git repository with some content
|
||||
let init_output = std::process::Command::new("git")
|
||||
.envs(envs.clone())
|
||||
.args(["init"])
|
||||
.current_dir(&git_repo)
|
||||
.output()
|
||||
.unwrap();
|
||||
assert!(init_output.status.success(), "git init failed");
|
||||
|
||||
// Configure git user (required for commits)
|
||||
std::process::Command::new("git")
|
||||
.envs(envs.clone())
|
||||
.args(["config", "user.name", "Integration Test"])
|
||||
.current_dir(&git_repo)
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
std::process::Command::new("git")
|
||||
.envs(envs.clone())
|
||||
.args(["config", "user.email", "test@example.com"])
|
||||
.current_dir(&git_repo)
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// Create a test file and commit it
|
||||
let test_file = git_repo.join("test.txt");
|
||||
std::fs::write(&test_file, "integration test content").unwrap();
|
||||
|
||||
std::process::Command::new("git")
|
||||
.envs(envs.clone())
|
||||
.args(["add", "."])
|
||||
.current_dir(&git_repo)
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
let commit_output = std::process::Command::new("git")
|
||||
.envs(envs.clone())
|
||||
.args(["commit", "-m", "Integration test commit"])
|
||||
.current_dir(&git_repo)
|
||||
.output()
|
||||
.unwrap();
|
||||
assert!(commit_output.status.success(), "git commit failed");
|
||||
|
||||
// Create a branch to test branch detection
|
||||
std::process::Command::new("git")
|
||||
.envs(envs.clone())
|
||||
.args(["checkout", "-b", "integration-test-branch"])
|
||||
.current_dir(&git_repo)
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// Add a remote to test repository URL detection
|
||||
std::process::Command::new("git")
|
||||
.envs(envs.clone())
|
||||
.args([
|
||||
"remote",
|
||||
"add",
|
||||
"origin",
|
||||
"https://github.com/example/integration-test.git",
|
||||
])
|
||||
.current_dir(&git_repo)
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// 3. Test git info collection directly
|
||||
let git_info = codex_core::git_info::collect_git_info(&git_repo).await;
|
||||
|
||||
// 4. Verify git info is present and contains expected data
|
||||
assert!(git_info.is_some(), "Git info should be collected");
|
||||
|
||||
let git_info = git_info.unwrap();
|
||||
|
||||
// Check that we have a commit hash
|
||||
assert!(
|
||||
git_info.commit_hash.is_some(),
|
||||
"Git info should contain commit_hash"
|
||||
);
|
||||
let commit_hash = git_info.commit_hash.as_ref().unwrap();
|
||||
assert_eq!(commit_hash.len(), 40, "Commit hash should be 40 characters");
|
||||
assert!(
|
||||
commit_hash.chars().all(|c| c.is_ascii_hexdigit()),
|
||||
"Commit hash should be hexadecimal"
|
||||
);
|
||||
|
||||
// Check that we have the correct branch
|
||||
assert!(git_info.branch.is_some(), "Git info should contain branch");
|
||||
let branch = git_info.branch.as_ref().unwrap();
|
||||
assert_eq!(
|
||||
branch, "integration-test-branch",
|
||||
"Branch should match what we created"
|
||||
);
|
||||
|
||||
// Check that we have the repository URL
|
||||
assert!(
|
||||
git_info.repository_url.is_some(),
|
||||
"Git info should contain repository_url"
|
||||
);
|
||||
let repo_url = git_info.repository_url.as_ref().unwrap();
|
||||
assert_eq!(
|
||||
repo_url, "https://github.com/example/integration-test.git",
|
||||
"Repository URL should match what we configured"
|
||||
);
|
||||
|
||||
println!("✅ Git info collection test passed!");
|
||||
println!(" Commit: {commit_hash}");
|
||||
println!(" Branch: {branch}");
|
||||
println!(" Repo: {repo_url}");
|
||||
|
||||
// 5. Test serialization to ensure it works in SessionMeta
|
||||
let serialized = serde_json::to_string(&git_info).unwrap();
|
||||
let deserialized: GitInfo = serde_json::from_str(&serialized).unwrap();
|
||||
|
||||
assert_eq!(git_info.commit_hash, deserialized.commit_hash);
|
||||
assert_eq!(git_info.branch, deserialized.branch);
|
||||
assert_eq!(git_info.repository_url, deserialized.repository_url);
|
||||
|
||||
println!("✅ Git info serialization test passed!");
|
||||
}
|
||||
1460
llmx-rs/core/tests/suite/client.rs
Normal file
1460
llmx-rs/core/tests/suite/client.rs
Normal file
File diff suppressed because it is too large
Load Diff
225
llmx-rs/core/tests/suite/codex_delegate.rs
Normal file
225
llmx-rs/core/tests/suite/codex_delegate.rs
Normal file
@@ -0,0 +1,225 @@
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::ReviewDecision;
|
||||
use codex_core::protocol::ReviewRequest;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use core_test_support::responses::ev_apply_patch_function_call;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_reasoning_item_added;
|
||||
use core_test_support::responses::ev_reasoning_summary_text_delta;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
/// Delegate should surface ExecApprovalRequest from sub-agent and proceed
|
||||
/// after parent submits an approval decision.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn codex_delegate_forwards_exec_approval_and_proceeds_on_approval() {
|
||||
skip_if_no_network!();
|
||||
|
||||
// Sub-agent turn 1: emit a shell function_call requiring approval, then complete.
|
||||
let call_id = "call-exec-1";
|
||||
let args = serde_json::json!({
|
||||
"command": ["bash", "-lc", "rm -rf delegated"],
|
||||
"timeout_ms": 1000,
|
||||
"with_escalated_permissions": true,
|
||||
})
|
||||
.to_string();
|
||||
let sse1 = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &args),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
|
||||
// Sub-agent turn 2: return structured review output and complete.
|
||||
let review_json = serde_json::json!({
|
||||
"findings": [],
|
||||
"overall_correctness": "ok",
|
||||
"overall_explanation": "delegate approved exec",
|
||||
"overall_confidence_score": 0.5
|
||||
})
|
||||
.to_string();
|
||||
let sse2 = sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_assistant_message("msg-1", &review_json),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
|
||||
let server = start_mock_server().await;
|
||||
mount_sse_sequence(&server, vec![sse1, sse2]).await;
|
||||
|
||||
// Build a conversation configured to require approvals so the delegate
|
||||
// routes ExecApprovalRequest via the parent.
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.approval_policy = AskForApproval::OnRequest;
|
||||
config.sandbox_policy = SandboxPolicy::ReadOnly;
|
||||
});
|
||||
let test = builder.build(&server).await.expect("build test codex");
|
||||
|
||||
// Kick off review (sub-agent starts internally).
|
||||
test.codex
|
||||
.submit(Op::Review {
|
||||
review_request: ReviewRequest {
|
||||
prompt: "Please review".to_string(),
|
||||
user_facing_hint: "review".to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.expect("submit review");
|
||||
|
||||
// Lifecycle: Entered -> ExecApprovalRequest -> Exited(Some) -> TaskComplete.
|
||||
wait_for_event(&test.codex, |ev| {
|
||||
matches!(ev, EventMsg::EnteredReviewMode(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
// Expect parent-side approval request (forwarded by delegate).
|
||||
wait_for_event(&test.codex, |ev| {
|
||||
matches!(ev, EventMsg::ExecApprovalRequest(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
// Approve via parent; id "0" is the active sub_id in tests.
|
||||
test.codex
|
||||
.submit(Op::ExecApproval {
|
||||
id: "0".into(),
|
||||
decision: ReviewDecision::Approved,
|
||||
})
|
||||
.await
|
||||
.expect("submit exec approval");
|
||||
|
||||
wait_for_event(&test.codex, |ev| {
|
||||
matches!(ev, EventMsg::ExitedReviewMode(_))
|
||||
})
|
||||
.await;
|
||||
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
}
|
||||
|
||||
/// Delegate should surface ApplyPatchApprovalRequest and honor parent decision
|
||||
/// so the sub-agent can proceed to completion.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn codex_delegate_forwards_patch_approval_and_proceeds_on_decision() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let call_id = "call-patch-1";
|
||||
let patch = "*** Begin Patch\n*** Add File: delegated.txt\n+hello\n*** End Patch\n";
|
||||
let sse1 = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_apply_patch_function_call(call_id, patch),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
let review_json = serde_json::json!({
|
||||
"findings": [],
|
||||
"overall_correctness": "ok",
|
||||
"overall_explanation": "delegate patch handled",
|
||||
"overall_confidence_score": 0.5
|
||||
})
|
||||
.to_string();
|
||||
let sse2 = sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_assistant_message("msg-1", &review_json),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
|
||||
let server = start_mock_server().await;
|
||||
mount_sse_sequence(&server, vec![sse1, sse2]).await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.approval_policy = AskForApproval::OnRequest;
|
||||
// Use a restricted sandbox so patch approval is required
|
||||
config.sandbox_policy = SandboxPolicy::ReadOnly;
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await.expect("build test codex");
|
||||
|
||||
test.codex
|
||||
.submit(Op::Review {
|
||||
review_request: ReviewRequest {
|
||||
prompt: "Please review".to_string(),
|
||||
user_facing_hint: "review".to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.expect("submit review");
|
||||
|
||||
wait_for_event(&test.codex, |ev| {
|
||||
matches!(ev, EventMsg::EnteredReviewMode(_))
|
||||
})
|
||||
.await;
|
||||
wait_for_event(&test.codex, |ev| {
|
||||
matches!(ev, EventMsg::ApplyPatchApprovalRequest(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
// Deny via parent so delegate can continue; id "0" is the active sub_id in tests.
|
||||
test.codex
|
||||
.submit(Op::PatchApproval {
|
||||
id: "0".into(),
|
||||
decision: ReviewDecision::Denied,
|
||||
})
|
||||
.await
|
||||
.expect("submit patch approval");
|
||||
|
||||
wait_for_event(&test.codex, |ev| {
|
||||
matches!(ev, EventMsg::ExitedReviewMode(_))
|
||||
})
|
||||
.await;
|
||||
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn codex_delegate_ignores_legacy_deltas() {
|
||||
skip_if_no_network!();
|
||||
|
||||
// Single response with reasoning summary deltas.
|
||||
let sse_stream = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_reasoning_item_added("reason-1", &["initial"]),
|
||||
ev_reasoning_summary_text_delta("think-1"),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
|
||||
let server = start_mock_server().await;
|
||||
mount_sse_sequence(&server, vec![sse_stream]).await;
|
||||
|
||||
let mut builder = test_codex();
|
||||
let test = builder.build(&server).await.expect("build test codex");
|
||||
|
||||
// Kick off review (delegated).
|
||||
test.codex
|
||||
.submit(Op::Review {
|
||||
review_request: ReviewRequest {
|
||||
prompt: "Please review".to_string(),
|
||||
user_facing_hint: "review".to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.expect("submit review");
|
||||
|
||||
let mut reasoning_delta_count = 0;
|
||||
let mut legacy_reasoning_delta_count = 0;
|
||||
|
||||
loop {
|
||||
let ev = wait_for_event(&test.codex, |_| true).await;
|
||||
match ev {
|
||||
EventMsg::ReasoningContentDelta(_) => reasoning_delta_count += 1,
|
||||
EventMsg::AgentReasoningDelta(_) => legacy_reasoning_delta_count += 1,
|
||||
EventMsg::TaskComplete(_) => break,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(reasoning_delta_count, 1, "expected one new reasoning delta");
|
||||
assert_eq!(
|
||||
legacy_reasoning_delta_count, 1,
|
||||
"expected one legacy reasoning delta"
|
||||
);
|
||||
}
|
||||
1411
llmx-rs/core/tests/suite/compact.rs
Normal file
1411
llmx-rs/core/tests/suite/compact.rs
Normal file
File diff suppressed because it is too large
Load Diff
873
llmx-rs/core/tests/suite/compact_resume_fork.rs
Normal file
873
llmx-rs/core/tests/suite/compact_resume_fork.rs
Normal file
@@ -0,0 +1,873 @@
|
||||
#![allow(clippy::expect_used)]
|
||||
|
||||
//! Integration tests that cover compacting, resuming, and forking conversations.
|
||||
//!
|
||||
//! Each test sets up a mocked SSE conversation and drives the conversation through
|
||||
//! a specific sequence of operations. After every operation we capture the
|
||||
//! request payload that Codex would send to the model and assert that the
|
||||
//! model-visible history matches the expected sequence of messages.
|
||||
|
||||
use super::compact::COMPACT_WARNING_MESSAGE;
|
||||
use super::compact::FIRST_REPLY;
|
||||
use super::compact::SUMMARY_TEXT;
|
||||
use super::compact::TEST_COMPACT_PROMPT;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::OPENAI_DEFAULT_MODEL;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::WarningEvent;
|
||||
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::mount_sse_once_match;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::wait_for_event;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use std::sync::Arc;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::MockServer;
|
||||
|
||||
const AFTER_SECOND_RESUME: &str = "AFTER_SECOND_RESUME";
|
||||
const COMPACT_PROMPT_MARKER: &str =
|
||||
"You are performing a CONTEXT CHECKPOINT COMPACTION for a tool.";
|
||||
|
||||
fn network_disabled() -> bool {
|
||||
std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok()
|
||||
}
|
||||
|
||||
fn filter_out_ghost_snapshot_entries(items: &[Value]) -> Vec<Value> {
|
||||
items
|
||||
.iter()
|
||||
.filter(|item| !is_ghost_snapshot_message(item))
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn is_ghost_snapshot_message(item: &Value) -> bool {
|
||||
if item.get("type").and_then(Value::as_str) != Some("message") {
|
||||
return false;
|
||||
}
|
||||
if item.get("role").and_then(Value::as_str) != Some("user") {
|
||||
return false;
|
||||
}
|
||||
item.get("content")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|content| content.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(Value::as_str)
|
||||
.is_some_and(|text| text.trim_start().starts_with("<ghost_snapshot>"))
|
||||
}
|
||||
|
||||
fn extract_summary_message(request: &Value, summary_text: &str) -> Value {
|
||||
request
|
||||
.get("input")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|items| {
|
||||
items.iter().find(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("message")
|
||||
&& item.get("role").and_then(Value::as_str) == Some("user")
|
||||
&& item
|
||||
.get("content")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(Value::as_str)
|
||||
== Some(summary_text)
|
||||
})
|
||||
})
|
||||
.cloned()
|
||||
.unwrap_or_else(|| panic!("expected summary message {summary_text}"))
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
/// Scenario: compact an initial conversation, resume it, fork one turn back, and
|
||||
/// ensure the model-visible history matches expectations at each request.
|
||||
async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
if network_disabled() {
|
||||
println!("Skipping test because network is disabled in this sandbox");
|
||||
return;
|
||||
}
|
||||
|
||||
// 1. Arrange mocked SSE responses for the initial compact/resume/fork flow.
|
||||
let server = MockServer::start().await;
|
||||
mount_initial_flow(&server).await;
|
||||
|
||||
// 2. Start a new conversation and drive it through the compact/resume/fork steps.
|
||||
let (_home, config, manager, base) = start_test_conversation(&server).await;
|
||||
|
||||
user_turn(&base, "hello world").await;
|
||||
compact_conversation(&base).await;
|
||||
user_turn(&base, "AFTER_COMPACT").await;
|
||||
let base_path = fetch_conversation_path(&base).await;
|
||||
assert!(
|
||||
base_path.exists(),
|
||||
"compact+resume test expects base path {base_path:?} to exist",
|
||||
);
|
||||
|
||||
let resumed = resume_conversation(&manager, &config, base_path).await;
|
||||
user_turn(&resumed, "AFTER_RESUME").await;
|
||||
let resumed_path = fetch_conversation_path(&resumed).await;
|
||||
assert!(
|
||||
resumed_path.exists(),
|
||||
"compact+resume test expects resumed path {resumed_path:?} to exist",
|
||||
);
|
||||
|
||||
let forked = fork_conversation(&manager, &config, resumed_path, 2).await;
|
||||
user_turn(&forked, "AFTER_FORK").await;
|
||||
|
||||
// 3. Capture the requests to the model and validate the history slices.
|
||||
let requests = gather_request_bodies(&server).await;
|
||||
|
||||
// input after compact is a prefix of input after resume/fork
|
||||
let input_after_compact = json!(requests[requests.len() - 3]["input"]);
|
||||
let input_after_resume = json!(requests[requests.len() - 2]["input"]);
|
||||
let input_after_fork = json!(requests[requests.len() - 1]["input"]);
|
||||
|
||||
let compact_arr = input_after_compact
|
||||
.as_array()
|
||||
.expect("input after compact should be an array");
|
||||
let resume_arr = input_after_resume
|
||||
.as_array()
|
||||
.expect("input after resume should be an array");
|
||||
let fork_arr = input_after_fork
|
||||
.as_array()
|
||||
.expect("input after fork should be an array");
|
||||
|
||||
assert!(
|
||||
compact_arr.len() <= resume_arr.len(),
|
||||
"after-resume input should have at least as many items as after-compact",
|
||||
);
|
||||
assert_eq!(compact_arr.as_slice(), &resume_arr[..compact_arr.len()]);
|
||||
|
||||
assert!(
|
||||
compact_arr.len() <= fork_arr.len(),
|
||||
"after-fork input should have at least as many items as after-compact",
|
||||
);
|
||||
assert_eq!(
|
||||
&compact_arr.as_slice()[..compact_arr.len()],
|
||||
&fork_arr[..compact_arr.len()]
|
||||
);
|
||||
|
||||
let prompt = requests[0]["instructions"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let user_instructions = requests[0]["input"][0]["content"][0]["text"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let environment_context = requests[0]["input"][1]["content"][0]["text"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let tool_calls = json!(requests[0]["tools"].as_array());
|
||||
let prompt_cache_key = requests[0]["prompt_cache_key"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let fork_prompt_cache_key = requests[requests.len() - 1]["prompt_cache_key"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let expected_model = OPENAI_DEFAULT_MODEL;
|
||||
let summary_after_compact = extract_summary_message(&requests[2], SUMMARY_TEXT);
|
||||
let summary_after_resume = extract_summary_message(&requests[3], SUMMARY_TEXT);
|
||||
let summary_after_fork = extract_summary_message(&requests[4], SUMMARY_TEXT);
|
||||
let user_turn_1 = json!(
|
||||
{
|
||||
"model": expected_model,
|
||||
"instructions": prompt,
|
||||
"input": [
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": user_instructions
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": environment_context
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "hello world"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"tools": tool_calls,
|
||||
"tool_choice": "auto",
|
||||
"parallel_tool_calls": false,
|
||||
"reasoning": {
|
||||
"summary": "auto"
|
||||
},
|
||||
"store": false,
|
||||
"stream": true,
|
||||
"include": [
|
||||
"reasoning.encrypted_content"
|
||||
],
|
||||
"prompt_cache_key": prompt_cache_key
|
||||
});
|
||||
let compact_1 = json!(
|
||||
{
|
||||
"model": expected_model,
|
||||
"instructions": prompt,
|
||||
"input": [
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": user_instructions
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": environment_context
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "hello world"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"type": "output_text",
|
||||
"text": "FIRST_REPLY"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": TEST_COMPACT_PROMPT
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"tools": [],
|
||||
"tool_choice": "auto",
|
||||
"parallel_tool_calls": false,
|
||||
"reasoning": {
|
||||
"summary": "auto"
|
||||
},
|
||||
"store": false,
|
||||
"stream": true,
|
||||
"include": [
|
||||
"reasoning.encrypted_content"
|
||||
],
|
||||
"prompt_cache_key": prompt_cache_key
|
||||
});
|
||||
let user_turn_2_after_compact = json!(
|
||||
{
|
||||
"model": expected_model,
|
||||
"instructions": prompt,
|
||||
"input": [
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": user_instructions
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": environment_context
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "hello world"
|
||||
}
|
||||
]
|
||||
},
|
||||
summary_after_compact,
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "AFTER_COMPACT"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"tools": tool_calls,
|
||||
"tool_choice": "auto",
|
||||
"parallel_tool_calls": false,
|
||||
"reasoning": {
|
||||
"summary": "auto"
|
||||
},
|
||||
"store": false,
|
||||
"stream": true,
|
||||
"include": [
|
||||
"reasoning.encrypted_content"
|
||||
],
|
||||
"prompt_cache_key": prompt_cache_key
|
||||
});
|
||||
let usert_turn_3_after_resume = json!(
|
||||
{
|
||||
"model": expected_model,
|
||||
"instructions": prompt,
|
||||
"input": [
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": user_instructions
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": environment_context
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "hello world"
|
||||
}
|
||||
]
|
||||
},
|
||||
summary_after_resume,
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "AFTER_COMPACT"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"type": "output_text",
|
||||
"text": "AFTER_COMPACT_REPLY"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "AFTER_RESUME"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"tools": tool_calls,
|
||||
"tool_choice": "auto",
|
||||
"parallel_tool_calls": false,
|
||||
"reasoning": {
|
||||
"summary": "auto"
|
||||
},
|
||||
"store": false,
|
||||
"stream": true,
|
||||
"include": [
|
||||
"reasoning.encrypted_content"
|
||||
],
|
||||
"prompt_cache_key": prompt_cache_key
|
||||
});
|
||||
let user_turn_3_after_fork = json!(
|
||||
{
|
||||
"model": expected_model,
|
||||
"instructions": prompt,
|
||||
"input": [
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": user_instructions
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": environment_context
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "hello world"
|
||||
}
|
||||
]
|
||||
},
|
||||
summary_after_fork,
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "AFTER_COMPACT"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"type": "output_text",
|
||||
"text": "AFTER_COMPACT_REPLY"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "AFTER_FORK"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"tools": tool_calls,
|
||||
"tool_choice": "auto",
|
||||
"parallel_tool_calls": false,
|
||||
"reasoning": {
|
||||
"summary": "auto"
|
||||
},
|
||||
"store": false,
|
||||
"stream": true,
|
||||
"include": [
|
||||
"reasoning.encrypted_content"
|
||||
],
|
||||
"prompt_cache_key": fork_prompt_cache_key
|
||||
});
|
||||
let mut expected = json!([
|
||||
user_turn_1,
|
||||
compact_1,
|
||||
user_turn_2_after_compact,
|
||||
usert_turn_3_after_resume,
|
||||
user_turn_3_after_fork
|
||||
]);
|
||||
normalize_line_endings(&mut expected);
|
||||
assert_eq!(requests.len(), 5);
|
||||
assert_eq!(json!(requests), expected);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
/// Scenario: after the forked branch is compacted, resuming again should reuse
|
||||
/// the compacted history and only append the new user message.
|
||||
async fn compact_resume_after_second_compaction_preserves_history() {
|
||||
if network_disabled() {
|
||||
println!("Skipping test because network is disabled in this sandbox");
|
||||
return;
|
||||
}
|
||||
|
||||
// 1. Arrange mocked SSE responses for the initial flow plus the second compact.
|
||||
let server = MockServer::start().await;
|
||||
mount_initial_flow(&server).await;
|
||||
mount_second_compact_flow(&server).await;
|
||||
|
||||
// 2. Drive the conversation through compact -> resume -> fork -> compact -> resume.
|
||||
let (_home, config, manager, base) = start_test_conversation(&server).await;
|
||||
|
||||
user_turn(&base, "hello world").await;
|
||||
compact_conversation(&base).await;
|
||||
user_turn(&base, "AFTER_COMPACT").await;
|
||||
let base_path = fetch_conversation_path(&base).await;
|
||||
assert!(
|
||||
base_path.exists(),
|
||||
"second compact test expects base path {base_path:?} to exist",
|
||||
);
|
||||
|
||||
let resumed = resume_conversation(&manager, &config, base_path).await;
|
||||
user_turn(&resumed, "AFTER_RESUME").await;
|
||||
let resumed_path = fetch_conversation_path(&resumed).await;
|
||||
assert!(
|
||||
resumed_path.exists(),
|
||||
"second compact test expects resumed path {resumed_path:?} to exist",
|
||||
);
|
||||
|
||||
let forked = fork_conversation(&manager, &config, resumed_path, 3).await;
|
||||
user_turn(&forked, "AFTER_FORK").await;
|
||||
|
||||
compact_conversation(&forked).await;
|
||||
user_turn(&forked, "AFTER_COMPACT_2").await;
|
||||
let forked_path = fetch_conversation_path(&forked).await;
|
||||
assert!(
|
||||
forked_path.exists(),
|
||||
"second compact test expects forked path {forked_path:?} to exist",
|
||||
);
|
||||
|
||||
let resumed_again = resume_conversation(&manager, &config, forked_path).await;
|
||||
user_turn(&resumed_again, AFTER_SECOND_RESUME).await;
|
||||
|
||||
let requests = gather_request_bodies(&server).await;
|
||||
let input_after_compact = json!(requests[requests.len() - 2]["input"]);
|
||||
let input_after_resume = json!(requests[requests.len() - 1]["input"]);
|
||||
|
||||
// test input after compact before resume is the same as input after resume
|
||||
let compact_input_array = input_after_compact
|
||||
.as_array()
|
||||
.expect("input after compact should be an array");
|
||||
let resume_input_array = input_after_resume
|
||||
.as_array()
|
||||
.expect("input after resume should be an array");
|
||||
let compact_filtered = filter_out_ghost_snapshot_entries(compact_input_array);
|
||||
let resume_filtered = filter_out_ghost_snapshot_entries(resume_input_array);
|
||||
assert!(
|
||||
compact_filtered.len() <= resume_filtered.len(),
|
||||
"after-resume input should have at least as many items as after-compact"
|
||||
);
|
||||
assert_eq!(
|
||||
compact_filtered.as_slice(),
|
||||
&resume_filtered[..compact_filtered.len()]
|
||||
);
|
||||
// hard coded test
|
||||
let prompt = requests[0]["instructions"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let user_instructions = requests[0]["input"][0]["content"][0]["text"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let environment_instructions = requests[0]["input"][1]["content"][0]["text"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
|
||||
// Build expected final request input: initial context + forked user message +
|
||||
// compacted summary + post-compact user message + resumed user message.
|
||||
let summary_after_second_compact =
|
||||
extract_summary_message(&requests[requests.len() - 3], SUMMARY_TEXT);
|
||||
|
||||
let mut expected = json!([
|
||||
{
|
||||
"instructions": prompt,
|
||||
"input": [
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": user_instructions
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": environment_instructions
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "AFTER_FORK"
|
||||
}
|
||||
]
|
||||
},
|
||||
summary_after_second_compact,
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "AFTER_COMPACT_2"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "AFTER_SECOND_RESUME"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
}
|
||||
]);
|
||||
normalize_line_endings(&mut expected);
|
||||
let last_request_after_2_compacts = json!([{
|
||||
"instructions": requests[requests.len() -1]["instructions"],
|
||||
"input": requests[requests.len() -1]["input"],
|
||||
}]);
|
||||
assert_eq!(expected, last_request_after_2_compacts);
|
||||
}
|
||||
|
||||
fn normalize_line_endings(value: &mut Value) {
|
||||
match value {
|
||||
Value::String(text) => {
|
||||
if text.contains('\r') {
|
||||
*text = text.replace("\r\n", "\n").replace('\r', "\n");
|
||||
}
|
||||
}
|
||||
Value::Array(items) => {
|
||||
for item in items {
|
||||
normalize_line_endings(item);
|
||||
}
|
||||
}
|
||||
Value::Object(map) => {
|
||||
for item in map.values_mut() {
|
||||
normalize_line_endings(item);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
async fn gather_request_bodies(server: &MockServer) -> Vec<Value> {
|
||||
server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("mock server should not fail")
|
||||
.into_iter()
|
||||
.map(|req| {
|
||||
let mut value = req.body_json::<Value>().expect("valid JSON body");
|
||||
normalize_line_endings(&mut value);
|
||||
value
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
async fn mount_initial_flow(server: &MockServer) {
|
||||
let sse1 = sse(vec![
|
||||
ev_assistant_message("m1", FIRST_REPLY),
|
||||
ev_completed("r1"),
|
||||
]);
|
||||
let sse2 = sse(vec![
|
||||
ev_assistant_message("m2", SUMMARY_TEXT),
|
||||
ev_completed("r2"),
|
||||
]);
|
||||
let sse3 = sse(vec![
|
||||
ev_assistant_message("m3", "AFTER_COMPACT_REPLY"),
|
||||
ev_completed("r3"),
|
||||
]);
|
||||
let sse4 = sse(vec![ev_completed("r4")]);
|
||||
let sse5 = sse(vec![ev_completed("r5")]);
|
||||
|
||||
let match_first = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("\"text\":\"hello world\"")
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
&& !body.contains(&format!("\"text\":\"{SUMMARY_TEXT}\""))
|
||||
&& !body.contains("\"text\":\"AFTER_COMPACT\"")
|
||||
&& !body.contains("\"text\":\"AFTER_RESUME\"")
|
||||
&& !body.contains("\"text\":\"AFTER_FORK\"")
|
||||
};
|
||||
mount_sse_once_match(server, match_first, sse1).await;
|
||||
|
||||
let match_compact = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(server, match_compact, sse2).await;
|
||||
|
||||
let match_after_compact = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("\"text\":\"AFTER_COMPACT\"")
|
||||
&& !body.contains("\"text\":\"AFTER_RESUME\"")
|
||||
&& !body.contains("\"text\":\"AFTER_FORK\"")
|
||||
};
|
||||
mount_sse_once_match(server, match_after_compact, sse3).await;
|
||||
|
||||
let match_after_resume = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("\"text\":\"AFTER_RESUME\"")
|
||||
};
|
||||
mount_sse_once_match(server, match_after_resume, sse4).await;
|
||||
|
||||
let match_after_fork = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("\"text\":\"AFTER_FORK\"")
|
||||
};
|
||||
mount_sse_once_match(server, match_after_fork, sse5).await;
|
||||
}
|
||||
|
||||
async fn mount_second_compact_flow(server: &MockServer) {
|
||||
let sse6 = sse(vec![
|
||||
ev_assistant_message("m4", SUMMARY_TEXT),
|
||||
ev_completed("r6"),
|
||||
]);
|
||||
let sse7 = sse(vec![ev_completed("r7")]);
|
||||
|
||||
let match_second_compact = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(COMPACT_PROMPT_MARKER) && body.contains("AFTER_FORK")
|
||||
};
|
||||
mount_sse_once_match(server, match_second_compact, sse6).await;
|
||||
|
||||
let match_after_second_resume = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(&format!("\"text\":\"{AFTER_SECOND_RESUME}\""))
|
||||
};
|
||||
mount_sse_once_match(server, match_after_second_resume, sse7).await;
|
||||
}
|
||||
|
||||
async fn start_test_conversation(
|
||||
server: &MockServer,
|
||||
) -> (TempDir, Config, ConversationManager, Arc<CodexConversation>) {
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
let home = TempDir::new().expect("create temp dir");
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
config.compact_prompt = Some(TEST_COMPACT_PROMPT.to_string());
|
||||
|
||||
let manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
|
||||
let NewConversation { conversation, .. } = manager
|
||||
.new_conversation(config.clone())
|
||||
.await
|
||||
.expect("create conversation");
|
||||
|
||||
(home, config, manager, conversation)
|
||||
}
|
||||
|
||||
async fn user_turn(conversation: &Arc<CodexConversation>, text: &str) {
|
||||
conversation
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text { text: text.into() }],
|
||||
})
|
||||
.await
|
||||
.expect("submit user turn");
|
||||
wait_for_event(conversation, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
}
|
||||
|
||||
async fn compact_conversation(conversation: &Arc<CodexConversation>) {
|
||||
conversation
|
||||
.submit(Op::Compact)
|
||||
.await
|
||||
.expect("compact conversation");
|
||||
let warning_event = wait_for_event(conversation, |ev| matches!(ev, EventMsg::Warning(_))).await;
|
||||
let EventMsg::Warning(WarningEvent { message }) = warning_event else {
|
||||
panic!("expected warning event after compact");
|
||||
};
|
||||
assert_eq!(message, COMPACT_WARNING_MESSAGE);
|
||||
wait_for_event(conversation, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
}
|
||||
|
||||
async fn fetch_conversation_path(conversation: &Arc<CodexConversation>) -> std::path::PathBuf {
|
||||
conversation.rollout_path()
|
||||
}
|
||||
|
||||
async fn resume_conversation(
|
||||
manager: &ConversationManager,
|
||||
config: &Config,
|
||||
path: std::path::PathBuf,
|
||||
) -> Arc<CodexConversation> {
|
||||
let auth_manager =
|
||||
codex_core::AuthManager::from_auth_for_testing(CodexAuth::from_api_key("dummy"));
|
||||
let NewConversation { conversation, .. } = manager
|
||||
.resume_conversation_from_rollout(config.clone(), path, auth_manager)
|
||||
.await
|
||||
.expect("resume conversation");
|
||||
conversation
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
async fn fork_conversation(
|
||||
manager: &ConversationManager,
|
||||
config: &Config,
|
||||
path: std::path::PathBuf,
|
||||
nth_user_message: usize,
|
||||
) -> Arc<CodexConversation> {
|
||||
let NewConversation { conversation, .. } = manager
|
||||
.fork_conversation(nth_user_message, config.clone(), path)
|
||||
.await
|
||||
.expect("fork conversation");
|
||||
conversation
|
||||
}
|
||||
50
llmx-rs/core/tests/suite/deprecation_notice.rs
Normal file
50
llmx-rs/core/tests/suite/deprecation_notice.rs
Normal file
@@ -0,0 +1,50 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
|
||||
use anyhow::Ok;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::protocol::DeprecationNoticeEvent;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event_match;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn emits_deprecation_notice_for_legacy_feature_flag() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
config
|
||||
.features
|
||||
.record_legacy_usage_force("use_experimental_unified_exec_tool", Feature::UnifiedExec);
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
});
|
||||
|
||||
let TestCodex { codex, .. } = builder.build(&server).await?;
|
||||
|
||||
let notice = wait_for_event_match(&codex, |event| match event {
|
||||
EventMsg::DeprecationNotice(ev) => Some(ev.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let DeprecationNoticeEvent { summary, details } = notice;
|
||||
assert_eq!(
|
||||
summary,
|
||||
"`use_experimental_unified_exec_tool` is deprecated. Use `unified_exec` instead."
|
||||
.to_string(),
|
||||
);
|
||||
assert_eq!(
|
||||
details.as_deref(),
|
||||
Some(
|
||||
"Enable it with `--enable unified_exec` or `[features].unified_exec` in config.toml. See https://github.com/openai/codex/blob/main/docs/config.md#feature-flags for details."
|
||||
),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
127
llmx-rs/core/tests/suite/exec.rs
Normal file
127
llmx-rs/core/tests/suite/exec.rs
Normal file
@@ -0,0 +1,127 @@
|
||||
#![cfg(target_os = "macos")]
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::string::ToString;
|
||||
|
||||
use codex_core::exec::ExecParams;
|
||||
use codex_core::exec::ExecToolCallOutput;
|
||||
use codex_core::exec::SandboxType;
|
||||
use codex_core::exec::process_exec_tool_call;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_core::spawn::CODEX_SANDBOX_ENV_VAR;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use codex_core::error::Result;
|
||||
|
||||
use codex_core::get_platform_sandbox;
|
||||
|
||||
fn skip_test() -> bool {
|
||||
if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
|
||||
eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
#[expect(clippy::expect_used)]
|
||||
async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput> {
|
||||
let sandbox_type = get_platform_sandbox().expect("should be able to get sandbox type");
|
||||
assert_eq!(sandbox_type, SandboxType::MacosSeatbelt);
|
||||
|
||||
let params = ExecParams {
|
||||
command: cmd.iter().map(ToString::to_string).collect(),
|
||||
cwd: tmp.path().to_path_buf(),
|
||||
timeout_ms: Some(1000),
|
||||
env: HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
};
|
||||
|
||||
let policy = SandboxPolicy::new_read_only_policy();
|
||||
|
||||
process_exec_tool_call(params, sandbox_type, &policy, tmp.path(), &None, None).await
|
||||
}
|
||||
|
||||
/// Command succeeds with exit code 0 normally
|
||||
#[tokio::test]
|
||||
async fn exit_code_0_succeeds() {
|
||||
if skip_test() {
|
||||
return;
|
||||
}
|
||||
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let cmd = vec!["echo", "hello"];
|
||||
|
||||
let output = run_test_cmd(tmp, cmd).await.unwrap();
|
||||
assert_eq!(output.stdout.text, "hello\n");
|
||||
assert_eq!(output.stderr.text, "");
|
||||
assert_eq!(output.stdout.truncated_after_lines, None);
|
||||
}
|
||||
|
||||
/// Command succeeds with exit code 0 normally
|
||||
#[tokio::test]
|
||||
async fn truncates_output_lines() {
|
||||
if skip_test() {
|
||||
return;
|
||||
}
|
||||
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let cmd = vec!["seq", "300"];
|
||||
|
||||
let output = run_test_cmd(tmp, cmd).await.unwrap();
|
||||
|
||||
let expected_output = (1..=300)
|
||||
.map(|i| format!("{i}\n"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("");
|
||||
assert_eq!(output.stdout.text, expected_output);
|
||||
assert_eq!(output.stdout.truncated_after_lines, None);
|
||||
}
|
||||
|
||||
/// Command succeeds with exit code 0 normally
|
||||
#[tokio::test]
|
||||
async fn truncates_output_bytes() {
|
||||
if skip_test() {
|
||||
return;
|
||||
}
|
||||
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
// each line is 1000 bytes
|
||||
let cmd = vec!["bash", "-lc", "seq 15 | awk '{printf \"%-1000s\\n\", $0}'"];
|
||||
|
||||
let output = run_test_cmd(tmp, cmd).await.unwrap();
|
||||
|
||||
assert!(output.stdout.text.len() >= 15000);
|
||||
assert_eq!(output.stdout.truncated_after_lines, None);
|
||||
}
|
||||
|
||||
/// Command not found returns exit code 127, this is not considered a sandbox error
|
||||
#[tokio::test]
|
||||
async fn exit_command_not_found_is_ok() {
|
||||
if skip_test() {
|
||||
return;
|
||||
}
|
||||
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let cmd = vec!["/bin/bash", "-c", "nonexistent_command_12345"];
|
||||
run_test_cmd(tmp, cmd).await.unwrap();
|
||||
}
|
||||
|
||||
/// Writing a file fails and should be considered a sandbox error
|
||||
#[tokio::test]
|
||||
async fn write_file_fails_as_sandbox_error() {
|
||||
if skip_test() {
|
||||
return;
|
||||
}
|
||||
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let path = tmp.path().join("test.txt");
|
||||
let cmd = vec![
|
||||
"/user/bin/touch",
|
||||
path.to_str().expect("should be able to get path"),
|
||||
];
|
||||
|
||||
assert!(run_test_cmd(tmp, cmd).await.is_err());
|
||||
}
|
||||
168
llmx-rs/core/tests/suite/fork_conversation.rs
Normal file
168
llmx-rs/core/tests/suite/fork_conversation.rs
Normal file
@@ -0,0 +1,168 @@
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::parse_turn_item;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::RolloutItem;
|
||||
use codex_core::protocol::RolloutLine;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::wait_for_event;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
/// Build minimal SSE stream with completed marker using the JSON fixture.
|
||||
fn sse_completed(id: &str) -> String {
|
||||
core_test_support::load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn fork_conversation_twice_drops_to_first_message() {
|
||||
skip_if_no_network!();
|
||||
|
||||
// Start a mock server that completes three turns.
|
||||
let server = MockServer::start().await;
|
||||
let sse = sse_completed("resp");
|
||||
let first = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse.clone(), "text/event-stream");
|
||||
|
||||
// Expect three calls to /v1/responses – one per user input.
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(first)
|
||||
.expect(3)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
// Configure Codex to use the mock server.
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider.clone();
|
||||
let config_for_fork = config.clone();
|
||||
|
||||
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
|
||||
let NewConversation {
|
||||
conversation: codex,
|
||||
..
|
||||
} = conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.expect("create conversation");
|
||||
|
||||
// Send three user messages; wait for three completed turns.
|
||||
for text in ["first", "second", "third"] {
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: text.to_string(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
let _ = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
}
|
||||
|
||||
// Request history from the base conversation to obtain rollout path.
|
||||
let base_path = codex.rollout_path();
|
||||
|
||||
// GetHistory flushes before returning the path; no wait needed.
|
||||
|
||||
// Helper: read rollout items (excluding SessionMeta) from a JSONL path.
|
||||
let read_items = |p: &std::path::Path| -> Vec<RolloutItem> {
|
||||
let text = std::fs::read_to_string(p).expect("read rollout file");
|
||||
let mut items: Vec<RolloutItem> = Vec::new();
|
||||
for line in text.lines() {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
let v: serde_json::Value = serde_json::from_str(line).expect("jsonl line");
|
||||
let rl: RolloutLine = serde_json::from_value(v).expect("rollout line");
|
||||
match rl.item {
|
||||
RolloutItem::SessionMeta(_) => {}
|
||||
other => items.push(other),
|
||||
}
|
||||
}
|
||||
items
|
||||
};
|
||||
|
||||
// Compute expected prefixes after each fork by truncating base rollout
|
||||
// strictly before the nth user input (0-based).
|
||||
let base_items = read_items(&base_path);
|
||||
let find_user_input_positions = |items: &[RolloutItem]| -> Vec<usize> {
|
||||
let mut pos = Vec::new();
|
||||
for (i, it) in items.iter().enumerate() {
|
||||
if let RolloutItem::ResponseItem(response_item) = it
|
||||
&& let Some(TurnItem::UserMessage(_)) = parse_turn_item(response_item)
|
||||
{
|
||||
// Consider any user message as an input boundary; recorder stores both EventMsg and ResponseItem.
|
||||
// We specifically look for input items, which are represented as ContentItem::InputText.
|
||||
pos.push(i);
|
||||
}
|
||||
}
|
||||
pos
|
||||
};
|
||||
let user_inputs = find_user_input_positions(&base_items);
|
||||
|
||||
// After cutting at nth user input (n=1 → second user message), cut strictly before that input.
|
||||
let cut1 = user_inputs.get(1).copied().unwrap_or(0);
|
||||
let expected_after_first: Vec<RolloutItem> = base_items[..cut1].to_vec();
|
||||
|
||||
// After dropping again (n=1 on fork1), compute expected relative to fork1's rollout.
|
||||
|
||||
// Fork once with n=1 → drops the last user input and everything after.
|
||||
let NewConversation {
|
||||
conversation: codex_fork1,
|
||||
..
|
||||
} = conversation_manager
|
||||
.fork_conversation(1, config_for_fork.clone(), base_path.clone())
|
||||
.await
|
||||
.expect("fork 1");
|
||||
|
||||
let fork1_path = codex_fork1.rollout_path();
|
||||
|
||||
// GetHistory on fork1 flushed; the file is ready.
|
||||
let fork1_items = read_items(&fork1_path);
|
||||
pretty_assertions::assert_eq!(
|
||||
serde_json::to_value(&fork1_items).unwrap(),
|
||||
serde_json::to_value(&expected_after_first).unwrap()
|
||||
);
|
||||
|
||||
// Fork again with n=0 → drops the (new) last user message, leaving only the first.
|
||||
let NewConversation {
|
||||
conversation: codex_fork2,
|
||||
..
|
||||
} = conversation_manager
|
||||
.fork_conversation(0, config_for_fork.clone(), fork1_path.clone())
|
||||
.await
|
||||
.expect("fork 2");
|
||||
|
||||
let fork2_path = codex_fork2.rollout_path();
|
||||
// GetHistory on fork2 flushed; the file is ready.
|
||||
let fork1_items = read_items(&fork1_path);
|
||||
let fork1_user_inputs = find_user_input_positions(&fork1_items);
|
||||
let cut_last_on_fork1 = fork1_user_inputs
|
||||
.get(fork1_user_inputs.len().saturating_sub(1))
|
||||
.copied()
|
||||
.unwrap_or(0);
|
||||
let expected_after_second: Vec<RolloutItem> = fork1_items[..cut_last_on_fork1].to_vec();
|
||||
let fork2_items = read_items(&fork2_path);
|
||||
pretty_assertions::assert_eq!(
|
||||
serde_json::to_value(&fork2_items).unwrap(),
|
||||
serde_json::to_value(&expected_after_second).unwrap()
|
||||
);
|
||||
}
|
||||
237
llmx-rs/core/tests/suite/grep_files.rs
Normal file
237
llmx-rs/core/tests/suite/grep_files.rs
Normal file
@@ -0,0 +1,237 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::model_family::find_family_for_model;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use serde_json::Value;
|
||||
use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
use std::process::Command as StdCommand;
|
||||
use wiremock::matchers::any;
|
||||
|
||||
const MODEL_WITH_TOOL: &str = "test-gpt-5-codex";
|
||||
|
||||
fn ripgrep_available() -> bool {
|
||||
StdCommand::new("rg")
|
||||
.arg("--version")
|
||||
.output()
|
||||
.map(|output| output.status.success())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
macro_rules! skip_if_ripgrep_missing {
|
||||
($ret:expr $(,)?) => {{
|
||||
if !ripgrep_available() {
|
||||
eprintln!("rg not available in PATH; skipping test");
|
||||
return $ret;
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn grep_files_tool_collects_matches() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_ripgrep_missing!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let test = build_test_codex(&server).await?;
|
||||
|
||||
let search_dir = test.cwd.path().join("src");
|
||||
std::fs::create_dir_all(&search_dir)?;
|
||||
let alpha = search_dir.join("alpha.rs");
|
||||
let beta = search_dir.join("beta.rs");
|
||||
let gamma = search_dir.join("gamma.txt");
|
||||
std::fs::write(&alpha, "alpha needle\n")?;
|
||||
std::fs::write(&beta, "beta needle\n")?;
|
||||
std::fs::write(&gamma, "needle in text but excluded\n")?;
|
||||
|
||||
let call_id = "grep-files-collect";
|
||||
let arguments = serde_json::json!({
|
||||
"pattern": "needle",
|
||||
"path": search_dir.to_string_lossy(),
|
||||
"include": "*.rs",
|
||||
})
|
||||
.to_string();
|
||||
|
||||
mount_tool_sequence(&server, call_id, &arguments, "grep_files").await;
|
||||
submit_turn(&test, "please find uses of needle").await?;
|
||||
|
||||
let bodies = recorded_bodies(&server).await?;
|
||||
let tool_output = find_tool_output(&bodies, call_id).expect("tool output present");
|
||||
let payload = tool_output.get("output").expect("output field present");
|
||||
let (content_opt, success_opt) = extract_content_and_success(payload);
|
||||
let content = content_opt.expect("content present");
|
||||
let success = success_opt.unwrap_or(true);
|
||||
assert!(success, "expected success for matches, got {payload:?}");
|
||||
|
||||
let entries = collect_file_names(content);
|
||||
assert_eq!(entries.len(), 2, "content: {content}");
|
||||
assert!(
|
||||
entries.contains("alpha.rs"),
|
||||
"missing alpha.rs in {entries:?}"
|
||||
);
|
||||
assert!(
|
||||
entries.contains("beta.rs"),
|
||||
"missing beta.rs in {entries:?}"
|
||||
);
|
||||
assert!(
|
||||
!entries.contains("gamma.txt"),
|
||||
"txt file should be filtered out: {entries:?}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn grep_files_tool_reports_empty_results() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_ripgrep_missing!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let test = build_test_codex(&server).await?;
|
||||
|
||||
let search_dir = test.cwd.path().join("logs");
|
||||
std::fs::create_dir_all(&search_dir)?;
|
||||
std::fs::write(search_dir.join("output.txt"), "no hits here")?;
|
||||
|
||||
let call_id = "grep-files-empty";
|
||||
let arguments = serde_json::json!({
|
||||
"pattern": "needle",
|
||||
"path": search_dir.to_string_lossy(),
|
||||
"limit": 5,
|
||||
})
|
||||
.to_string();
|
||||
|
||||
mount_tool_sequence(&server, call_id, &arguments, "grep_files").await;
|
||||
submit_turn(&test, "search again").await?;
|
||||
|
||||
let bodies = recorded_bodies(&server).await?;
|
||||
let tool_output = find_tool_output(&bodies, call_id).expect("tool output present");
|
||||
let payload = tool_output.get("output").expect("output field present");
|
||||
let (content_opt, success_opt) = extract_content_and_success(payload);
|
||||
let content = content_opt.expect("content present");
|
||||
if let Some(success) = success_opt {
|
||||
assert!(!success, "expected success=false payload: {payload:?}");
|
||||
}
|
||||
assert_eq!(content, "No matches found.");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(clippy::expect_used)]
|
||||
async fn build_test_codex(server: &wiremock::MockServer) -> Result<TestCodex> {
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = MODEL_WITH_TOOL.to_string();
|
||||
config.model_family =
|
||||
find_family_for_model(MODEL_WITH_TOOL).expect("model family for test model");
|
||||
});
|
||||
builder.build(server).await
|
||||
}
|
||||
|
||||
async fn submit_turn(test: &TestCodex, prompt: &str) -> Result<()> {
|
||||
let session_model = test.session_configured.model.clone();
|
||||
|
||||
test.codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: prompt.into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: test.cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::TaskComplete(_))
|
||||
})
|
||||
.await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn mount_tool_sequence(
|
||||
server: &wiremock::MockServer,
|
||||
call_id: &str,
|
||||
arguments: &str,
|
||||
tool_name: &str,
|
||||
) {
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, tool_name, arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
responses::mount_sse_once_match(server, any(), second_response).await;
|
||||
}
|
||||
|
||||
#[allow(clippy::expect_used)]
|
||||
async fn recorded_bodies(server: &wiremock::MockServer) -> Result<Vec<Value>> {
|
||||
let requests = server.received_requests().await.expect("requests recorded");
|
||||
Ok(requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().expect("request json"))
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn find_tool_output<'a>(requests: &'a [Value], call_id: &str) -> Option<&'a Value> {
|
||||
requests.iter().find_map(|body| {
|
||||
body.get("input")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|items| {
|
||||
items.iter().find(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
fn collect_file_names(content: &str) -> HashSet<String> {
|
||||
content
|
||||
.lines()
|
||||
.filter_map(|line| {
|
||||
if line.trim().is_empty() {
|
||||
return None;
|
||||
}
|
||||
Path::new(line)
|
||||
.file_name()
|
||||
.map(|name| name.to_string_lossy().into_owned())
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn extract_content_and_success(value: &Value) -> (Option<&str>, Option<bool>) {
|
||||
match value {
|
||||
Value::String(text) => (Some(text.as_str()), None),
|
||||
Value::Object(obj) => (
|
||||
obj.get("content").and_then(Value::as_str),
|
||||
obj.get("success").and_then(Value::as_bool),
|
||||
),
|
||||
_ => (None, None),
|
||||
}
|
||||
}
|
||||
419
llmx-rs/core/tests/suite/items.rs
Normal file
419
llmx-rs/core/tests/suite/items.rs
Normal file
@@ -0,0 +1,419 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
|
||||
use anyhow::Ok;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::ItemCompletedEvent;
|
||||
use codex_core::protocol::ItemStartedEvent;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_message_item_added;
|
||||
use core_test_support::responses::ev_output_text_delta;
|
||||
use core_test_support::responses::ev_reasoning_item;
|
||||
use core_test_support::responses::ev_reasoning_item_added;
|
||||
use core_test_support::responses::ev_reasoning_summary_text_delta;
|
||||
use core_test_support::responses::ev_reasoning_text_delta;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::ev_web_search_call_added;
|
||||
use core_test_support::responses::ev_web_search_call_done;
|
||||
use core_test_support::responses::mount_sse_once_match;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event_match;
|
||||
use pretty_assertions::assert_eq;
|
||||
use wiremock::matchers::any;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn user_message_item_is_emitted() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex().build(&server).await?;
|
||||
|
||||
let first_response = sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]);
|
||||
mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: (vec![UserInput::Text {
|
||||
text: "please inspect sample.txt".into(),
|
||||
}]),
|
||||
})
|
||||
.await?;
|
||||
|
||||
let started_item = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemStarted(ItemStartedEvent {
|
||||
item: TurnItem::UserMessage(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
let completed_item = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemCompleted(ItemCompletedEvent {
|
||||
item: TurnItem::UserMessage(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(started_item.id, completed_item.id);
|
||||
assert_eq!(
|
||||
started_item.content,
|
||||
vec![UserInput::Text {
|
||||
text: "please inspect sample.txt".into(),
|
||||
}]
|
||||
);
|
||||
assert_eq!(
|
||||
completed_item.content,
|
||||
vec![UserInput::Text {
|
||||
text: "please inspect sample.txt".into(),
|
||||
}]
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn assistant_message_item_is_emitted() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex().build(&server).await?;
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_assistant_message("msg-1", "all done"),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please summarize results".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
|
||||
let started = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemStarted(ItemStartedEvent {
|
||||
item: TurnItem::AgentMessage(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
let completed = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemCompleted(ItemCompletedEvent {
|
||||
item: TurnItem::AgentMessage(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(started.id, completed.id);
|
||||
let Some(codex_protocol::items::AgentMessageContent::Text { text }) = completed.content.first()
|
||||
else {
|
||||
panic!("expected agent message text content");
|
||||
};
|
||||
assert_eq!(text, "all done");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn reasoning_item_is_emitted() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex().build(&server).await?;
|
||||
|
||||
let reasoning_item = ev_reasoning_item(
|
||||
"reasoning-1",
|
||||
&["Consider inputs", "Compute output"],
|
||||
&["Detailed reasoning trace"],
|
||||
);
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
reasoning_item,
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "explain your reasoning".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
|
||||
let started = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemStarted(ItemStartedEvent {
|
||||
item: TurnItem::Reasoning(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
let completed = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemCompleted(ItemCompletedEvent {
|
||||
item: TurnItem::Reasoning(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(started.id, completed.id);
|
||||
assert_eq!(
|
||||
completed.summary_text,
|
||||
vec!["Consider inputs".to_string(), "Compute output".to_string()]
|
||||
);
|
||||
assert_eq!(
|
||||
completed.raw_content,
|
||||
vec!["Detailed reasoning trace".to_string()]
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn web_search_item_is_emitted() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex().build(&server).await?;
|
||||
|
||||
let web_search_added =
|
||||
ev_web_search_call_added("web-search-1", "in_progress", "weather seattle");
|
||||
let web_search_done = ev_web_search_call_done("web-search-1", "completed", "weather seattle");
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
web_search_added,
|
||||
web_search_done,
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "find the weather".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
|
||||
let started = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemStarted(ItemStartedEvent {
|
||||
item: TurnItem::WebSearch(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
let completed = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemCompleted(ItemCompletedEvent {
|
||||
item: TurnItem::WebSearch(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(started.id, completed.id);
|
||||
assert_eq!(completed.query, "weather seattle");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn agent_message_content_delta_has_item_metadata() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex {
|
||||
codex,
|
||||
session_configured,
|
||||
..
|
||||
} = test_codex().build(&server).await?;
|
||||
|
||||
let stream = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_message_item_added("msg-1", ""),
|
||||
ev_output_text_delta("streamed response"),
|
||||
ev_assistant_message("msg-1", "streamed response"),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
mount_sse_once_match(&server, any(), stream).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please stream text".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
|
||||
let (started_turn_id, started_item) = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemStarted(ItemStartedEvent {
|
||||
turn_id,
|
||||
item: TurnItem::AgentMessage(item),
|
||||
..
|
||||
}) => Some((turn_id.clone(), item.clone())),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let delta_event = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::AgentMessageContentDelta(event) => Some(event.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
let legacy_delta = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::AgentMessageDelta(event) => Some(event.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
let completed_item = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemCompleted(ItemCompletedEvent {
|
||||
item: TurnItem::AgentMessage(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let session_id = session_configured.session_id.to_string();
|
||||
assert_eq!(delta_event.thread_id, session_id);
|
||||
assert_eq!(delta_event.turn_id, started_turn_id);
|
||||
assert_eq!(delta_event.item_id, started_item.id);
|
||||
assert_eq!(delta_event.delta, "streamed response");
|
||||
assert_eq!(legacy_delta.delta, "streamed response");
|
||||
assert_eq!(completed_item.id, started_item.id);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn reasoning_content_delta_has_item_metadata() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex().build(&server).await?;
|
||||
|
||||
let stream = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_reasoning_item_added("reasoning-1", &[""]),
|
||||
ev_reasoning_summary_text_delta("step one"),
|
||||
ev_reasoning_item("reasoning-1", &["step one"], &[]),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
mount_sse_once_match(&server, any(), stream).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "reason through it".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
|
||||
let reasoning_item = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemStarted(ItemStartedEvent {
|
||||
item: TurnItem::Reasoning(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let delta_event = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ReasoningContentDelta(event) => Some(event.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
let legacy_delta = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::AgentReasoningDelta(event) => Some(event.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(delta_event.item_id, reasoning_item.id);
|
||||
assert_eq!(delta_event.delta, "step one");
|
||||
assert_eq!(legacy_delta.delta, "step one");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn reasoning_raw_content_delta_respects_flag() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(|config| {
|
||||
config.show_raw_agent_reasoning = true;
|
||||
})
|
||||
.build(&server)
|
||||
.await?;
|
||||
|
||||
let stream = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_reasoning_item_added("reasoning-raw", &[""]),
|
||||
ev_reasoning_text_delta("raw detail"),
|
||||
ev_reasoning_item("reasoning-raw", &["complete"], &["raw detail"]),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
mount_sse_once_match(&server, any(), stream).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "show raw reasoning".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
|
||||
let reasoning_item = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemStarted(ItemStartedEvent {
|
||||
item: TurnItem::Reasoning(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let delta_event = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ReasoningRawContentDelta(event) => Some(event.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
let legacy_delta = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::AgentReasoningRawContentDelta(event) => Some(event.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(delta_event.item_id, reasoning_item.id);
|
||||
assert_eq!(delta_event.delta, "raw detail");
|
||||
assert_eq!(legacy_delta.delta, "raw detail");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
106
llmx-rs/core/tests/suite/json_result.rs
Normal file
106
llmx-rs/core/tests/suite/json_result.rs
Normal file
@@ -0,0 +1,106 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use pretty_assertions::assert_eq;
|
||||
use responses::ev_assistant_message;
|
||||
use responses::ev_completed;
|
||||
use responses::sse;
|
||||
use responses::start_mock_server;
|
||||
|
||||
const SCHEMA: &str = r#"
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"explanation": { "type": "string" },
|
||||
"final_answer": { "type": "string" }
|
||||
},
|
||||
"required": ["explanation", "final_answer"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
"#;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn codex_returns_json_result_for_gpt5() -> anyhow::Result<()> {
|
||||
codex_returns_json_result("gpt-5".to_string()).await
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn codex_returns_json_result_for_gpt5_codex() -> anyhow::Result<()> {
|
||||
codex_returns_json_result("gpt-5-codex".to_string()).await
|
||||
}
|
||||
|
||||
async fn codex_returns_json_result(model: String) -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let sse1 = sse(vec![
|
||||
ev_assistant_message(
|
||||
"m2",
|
||||
r#"{"explanation": "explanation", "final_answer": "final_answer"}"#,
|
||||
),
|
||||
ev_completed("r1"),
|
||||
]);
|
||||
|
||||
let expected_schema: serde_json::Value = serde_json::from_str(SCHEMA)?;
|
||||
let match_json_text_param = move |req: &wiremock::Request| {
|
||||
let body: serde_json::Value = serde_json::from_slice(&req.body).unwrap_or_default();
|
||||
let Some(text) = body.get("text") else {
|
||||
return false;
|
||||
};
|
||||
let Some(format) = text.get("format") else {
|
||||
return false;
|
||||
};
|
||||
|
||||
format.get("name") == Some(&serde_json::Value::String("codex_output_schema".into()))
|
||||
&& format.get("type") == Some(&serde_json::Value::String("json_schema".into()))
|
||||
&& format.get("strict") == Some(&serde_json::Value::Bool(true))
|
||||
&& format.get("schema") == Some(&expected_schema)
|
||||
};
|
||||
responses::mount_sse_once_match(&server, match_json_text_param, sse1).await;
|
||||
|
||||
let TestCodex { codex, cwd, .. } = test_codex().build(&server).await?;
|
||||
|
||||
// 1) Normal user input – should hit server once.
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello world".into(),
|
||||
}],
|
||||
final_output_json_schema: Some(serde_json::from_str(SCHEMA)?),
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let message = wait_for_event(&codex, |ev| matches!(ev, EventMsg::AgentMessage(_))).await;
|
||||
if let EventMsg::AgentMessage(message) = message {
|
||||
let json: serde_json::Value = serde_json::from_str(&message.message)?;
|
||||
assert_eq!(
|
||||
json.get("explanation"),
|
||||
Some(&serde_json::Value::String("explanation".into()))
|
||||
);
|
||||
assert_eq!(
|
||||
json.get("final_answer"),
|
||||
Some(&serde_json::Value::String("final_answer".into()))
|
||||
);
|
||||
} else {
|
||||
anyhow::bail!("expected agent message event");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
460
llmx-rs/core/tests/suite/list_dir.rs
Normal file
460
llmx-rs/core/tests/suite/list_dir.rs
Normal file
@@ -0,0 +1,460 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::Value;
|
||||
use wiremock::matchers::any;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
#[ignore = "disabled until we enable list_dir tool"]
|
||||
async fn list_dir_tool_returns_entries() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = test_codex().build(&server).await?;
|
||||
|
||||
let dir_path = cwd.path().join("sample_dir");
|
||||
std::fs::create_dir(&dir_path)?;
|
||||
std::fs::write(dir_path.join("alpha.txt"), "first file")?;
|
||||
std::fs::create_dir(dir_path.join("nested"))?;
|
||||
let dir_path = dir_path.to_string_lossy().to_string();
|
||||
|
||||
let call_id = "list-dir-call";
|
||||
let arguments = serde_json::json!({
|
||||
"dir_path": dir_path,
|
||||
"offset": 1,
|
||||
"limit": 2,
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "list_dir", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "list directory contents".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
let request_bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
assert!(
|
||||
!request_bodies.is_empty(),
|
||||
"expected at least one request body"
|
||||
);
|
||||
|
||||
let tool_output_item = request_bodies
|
||||
.iter()
|
||||
.find_map(|body| {
|
||||
body.get("input")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|items| {
|
||||
items.iter().find(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
})
|
||||
})
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
panic!("function_call_output item not found in requests: {request_bodies:#?}")
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
tool_output_item.get("call_id").and_then(Value::as_str),
|
||||
Some(call_id)
|
||||
);
|
||||
|
||||
let output_text = tool_output_item
|
||||
.get("output")
|
||||
.and_then(|value| match value {
|
||||
Value::String(text) => Some(text.as_str()),
|
||||
Value::Object(obj) => obj.get("content").and_then(Value::as_str),
|
||||
_ => None,
|
||||
})
|
||||
.expect("output text present");
|
||||
assert_eq!(output_text, "E1: [file] alpha.txt\nE2: [dir] nested");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
#[ignore = "disabled until we enable list_dir tool"]
|
||||
async fn list_dir_tool_depth_one_omits_children() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = test_codex().build(&server).await?;
|
||||
|
||||
let dir_path = cwd.path().join("depth_one");
|
||||
std::fs::create_dir(&dir_path)?;
|
||||
std::fs::write(dir_path.join("alpha.txt"), "alpha")?;
|
||||
std::fs::create_dir(dir_path.join("nested"))?;
|
||||
std::fs::write(dir_path.join("nested").join("beta.txt"), "beta")?;
|
||||
let dir_path = dir_path.to_string_lossy().to_string();
|
||||
|
||||
let call_id = "list-dir-depth1";
|
||||
let arguments = serde_json::json!({
|
||||
"dir_path": dir_path,
|
||||
"offset": 1,
|
||||
"limit": 10,
|
||||
"depth": 1,
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "list_dir", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "list directory contents depth one".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
let request_bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
assert!(
|
||||
!request_bodies.is_empty(),
|
||||
"expected at least one request body"
|
||||
);
|
||||
|
||||
let tool_output_item = request_bodies
|
||||
.iter()
|
||||
.find_map(|body| {
|
||||
body.get("input")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|items| {
|
||||
items.iter().find(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
})
|
||||
})
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
panic!("function_call_output item not found in requests: {request_bodies:#?}")
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
tool_output_item.get("call_id").and_then(Value::as_str),
|
||||
Some(call_id)
|
||||
);
|
||||
|
||||
let output_text = tool_output_item
|
||||
.get("output")
|
||||
.and_then(|value| match value {
|
||||
Value::String(text) => Some(text.as_str()),
|
||||
Value::Object(obj) => obj.get("content").and_then(Value::as_str),
|
||||
_ => None,
|
||||
})
|
||||
.expect("output text present");
|
||||
assert_eq!(output_text, "E1: [file] alpha.txt\nE2: [dir] nested");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
#[ignore = "disabled until we enable list_dir tool"]
|
||||
async fn list_dir_tool_depth_two_includes_children_only() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = test_codex().build(&server).await?;
|
||||
|
||||
let dir_path = cwd.path().join("depth_two");
|
||||
std::fs::create_dir(&dir_path)?;
|
||||
std::fs::write(dir_path.join("alpha.txt"), "alpha")?;
|
||||
let nested = dir_path.join("nested");
|
||||
std::fs::create_dir(&nested)?;
|
||||
std::fs::write(nested.join("beta.txt"), "beta")?;
|
||||
let deeper = nested.join("grand");
|
||||
std::fs::create_dir(&deeper)?;
|
||||
std::fs::write(deeper.join("gamma.txt"), "gamma")?;
|
||||
let dir_path_string = dir_path.to_string_lossy().to_string();
|
||||
|
||||
let call_id = "list-dir-depth2";
|
||||
let arguments = serde_json::json!({
|
||||
"dir_path": dir_path_string,
|
||||
"offset": 1,
|
||||
"limit": 10,
|
||||
"depth": 2,
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
serde_json::json!({
|
||||
"type": "response.created",
|
||||
"response": {"id": "resp-1"}
|
||||
}),
|
||||
ev_function_call(call_id, "list_dir", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "list directory contents depth two".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
let request_bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
assert!(
|
||||
!request_bodies.is_empty(),
|
||||
"expected at least one request body"
|
||||
);
|
||||
|
||||
let tool_output_item = request_bodies
|
||||
.iter()
|
||||
.find_map(|body| {
|
||||
body.get("input")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|items| {
|
||||
items.iter().find(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
})
|
||||
})
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
panic!("function_call_output item not found in requests: {request_bodies:#?}")
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
tool_output_item.get("call_id").and_then(Value::as_str),
|
||||
Some(call_id)
|
||||
);
|
||||
|
||||
let output_text = tool_output_item
|
||||
.get("output")
|
||||
.and_then(|value| match value {
|
||||
Value::String(text) => Some(text.as_str()),
|
||||
Value::Object(obj) => obj.get("content").and_then(Value::as_str),
|
||||
_ => None,
|
||||
})
|
||||
.expect("output text present");
|
||||
assert_eq!(
|
||||
output_text,
|
||||
"E1: [file] alpha.txt\nE2: [dir] nested\nE3: [file] nested/beta.txt\nE4: [dir] nested/grand"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
#[ignore = "disabled until we enable list_dir tool"]
|
||||
async fn list_dir_tool_depth_three_includes_grandchildren() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = test_codex().build(&server).await?;
|
||||
|
||||
let dir_path = cwd.path().join("depth_three");
|
||||
std::fs::create_dir(&dir_path)?;
|
||||
std::fs::write(dir_path.join("alpha.txt"), "alpha")?;
|
||||
let nested = dir_path.join("nested");
|
||||
std::fs::create_dir(&nested)?;
|
||||
std::fs::write(nested.join("beta.txt"), "beta")?;
|
||||
let deeper = nested.join("grand");
|
||||
std::fs::create_dir(&deeper)?;
|
||||
std::fs::write(deeper.join("gamma.txt"), "gamma")?;
|
||||
let dir_path_string = dir_path.to_string_lossy().to_string();
|
||||
|
||||
let call_id = "list-dir-depth3";
|
||||
let arguments = serde_json::json!({
|
||||
"dir_path": dir_path_string,
|
||||
"offset": 1,
|
||||
"limit": 10,
|
||||
"depth": 3,
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
serde_json::json!({
|
||||
"type": "response.created",
|
||||
"response": {"id": "resp-1"}
|
||||
}),
|
||||
ev_function_call(call_id, "list_dir", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "list directory contents depth three".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
let request_bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
assert!(
|
||||
!request_bodies.is_empty(),
|
||||
"expected at least one request body"
|
||||
);
|
||||
|
||||
let tool_output_item = request_bodies
|
||||
.iter()
|
||||
.find_map(|body| {
|
||||
body.get("input")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|items| {
|
||||
items.iter().find(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
})
|
||||
})
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
panic!("function_call_output item not found in requests: {request_bodies:#?}")
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
tool_output_item.get("call_id").and_then(Value::as_str),
|
||||
Some(call_id)
|
||||
);
|
||||
|
||||
let output_text = tool_output_item
|
||||
.get("output")
|
||||
.and_then(|value| match value {
|
||||
Value::String(text) => Some(text.as_str()),
|
||||
Value::Object(obj) => obj.get("content").and_then(Value::as_str),
|
||||
_ => None,
|
||||
})
|
||||
.expect("output text present");
|
||||
assert_eq!(
|
||||
output_text,
|
||||
"E1: [file] alpha.txt\nE2: [dir] nested\nE3: [file] nested/beta.txt\nE4: [dir] nested/grand\nE5: [file] nested/grand/gamma.txt"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
148
llmx-rs/core/tests/suite/live_cli.rs
Normal file
148
llmx-rs/core/tests/suite/live_cli.rs
Normal file
@@ -0,0 +1,148 @@
|
||||
#![expect(clippy::expect_used)]
|
||||
|
||||
//! Optional smoke tests that hit the real OpenAI /v1/responses endpoint. They are `#[ignore]` by
|
||||
//! default so CI stays deterministic and free. Developers can run them locally with
|
||||
//! `cargo test --test live_cli -- --ignored` provided they set a valid `OPENAI_API_KEY`.
|
||||
|
||||
use assert_cmd::prelude::*;
|
||||
use predicates::prelude::*;
|
||||
use std::process::Command;
|
||||
use std::process::Stdio;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn require_api_key() -> String {
|
||||
std::env::var("OPENAI_API_KEY")
|
||||
.expect("OPENAI_API_KEY env var not set — skip running live tests")
|
||||
}
|
||||
|
||||
/// Helper that spawns the binary inside a TempDir with minimal flags. Returns (Assert, TempDir).
|
||||
fn run_live(prompt: &str) -> (assert_cmd::assert::Assert, TempDir) {
|
||||
#![expect(clippy::unwrap_used)]
|
||||
use std::io::Read;
|
||||
use std::io::Write;
|
||||
use std::thread;
|
||||
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
||||
// Build a plain `std::process::Command` so we have full control over the underlying stdio
|
||||
// handles. `assert_cmd`’s own `Command` wrapper always forces stdout/stderr to be piped
|
||||
// internally which prevents us from streaming them live to the terminal (see its `spawn`
|
||||
// implementation). Instead we configure the std `Command` ourselves, then later hand the
|
||||
// resulting `Output` to `assert_cmd` for the familiar assertions.
|
||||
|
||||
let mut cmd = Command::cargo_bin("codex-rs").unwrap();
|
||||
cmd.current_dir(dir.path());
|
||||
cmd.env("OPENAI_API_KEY", require_api_key());
|
||||
|
||||
// We want three things at once:
|
||||
// 1. live streaming of the child’s stdout/stderr while the test is running
|
||||
// 2. captured output so we can keep using assert_cmd’s `Assert` helpers
|
||||
// 3. cross‑platform behavior (best effort)
|
||||
//
|
||||
// To get that we:
|
||||
// • set both stdout and stderr to `piped()` so we can read them programmatically
|
||||
// • spawn a thread for each stream that copies bytes into two sinks:
|
||||
// – the parent process’ stdout/stderr for live visibility
|
||||
// – an in‑memory buffer so we can pass it to `assert_cmd` later
|
||||
|
||||
// Pass the prompt through the `--` separator so the CLI knows when user input ends.
|
||||
cmd.arg("--allow-no-git-exec")
|
||||
.arg("-v")
|
||||
.arg("--")
|
||||
.arg(prompt);
|
||||
|
||||
cmd.stdin(Stdio::piped());
|
||||
cmd.stdout(Stdio::piped());
|
||||
cmd.stderr(Stdio::piped());
|
||||
|
||||
let mut child = cmd.spawn().expect("failed to spawn codex-rs");
|
||||
|
||||
// Send the terminating newline so Session::run exits after the first turn.
|
||||
child
|
||||
.stdin
|
||||
.as_mut()
|
||||
.expect("child stdin unavailable")
|
||||
.write_all(b"\n")
|
||||
.expect("failed to write to child stdin");
|
||||
|
||||
// Helper that tees a ChildStdout/ChildStderr into both the parent’s stdio and a Vec<u8>.
|
||||
fn tee<R: Read + Send + 'static>(
|
||||
mut reader: R,
|
||||
mut writer: impl Write + Send + 'static,
|
||||
) -> thread::JoinHandle<Vec<u8>> {
|
||||
thread::spawn(move || {
|
||||
let mut buf = Vec::new();
|
||||
let mut chunk = [0u8; 4096];
|
||||
loop {
|
||||
match reader.read(&mut chunk) {
|
||||
Ok(0) => break,
|
||||
Ok(n) => {
|
||||
writer.write_all(&chunk[..n]).ok();
|
||||
writer.flush().ok();
|
||||
buf.extend_from_slice(&chunk[..n]);
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
buf
|
||||
})
|
||||
}
|
||||
|
||||
let stdout_handle = tee(
|
||||
child.stdout.take().expect("child stdout"),
|
||||
std::io::stdout(),
|
||||
);
|
||||
let stderr_handle = tee(
|
||||
child.stderr.take().expect("child stderr"),
|
||||
std::io::stderr(),
|
||||
);
|
||||
|
||||
let status = child.wait().expect("failed to wait on child");
|
||||
let stdout = stdout_handle.join().expect("stdout thread panicked");
|
||||
let stderr = stderr_handle.join().expect("stderr thread panicked");
|
||||
|
||||
let output = std::process::Output {
|
||||
status,
|
||||
stdout,
|
||||
stderr,
|
||||
};
|
||||
|
||||
(output.assert(), dir)
|
||||
}
|
||||
|
||||
#[ignore]
|
||||
#[test]
|
||||
fn live_create_file_hello_txt() {
|
||||
if std::env::var("OPENAI_API_KEY").is_err() {
|
||||
eprintln!("skipping live_create_file_hello_txt – OPENAI_API_KEY not set");
|
||||
return;
|
||||
}
|
||||
|
||||
let (assert, dir) = run_live(
|
||||
"Use the shell tool with the apply_patch command to create a file named hello.txt containing the text 'hello'.",
|
||||
);
|
||||
|
||||
assert.success();
|
||||
|
||||
let path = dir.path().join("hello.txt");
|
||||
assert!(path.exists(), "hello.txt was not created by the model");
|
||||
|
||||
let contents = std::fs::read_to_string(path).unwrap();
|
||||
|
||||
assert_eq!(contents.trim(), "hello");
|
||||
}
|
||||
|
||||
#[ignore]
|
||||
#[test]
|
||||
fn live_print_working_directory() {
|
||||
if std::env::var("OPENAI_API_KEY").is_err() {
|
||||
eprintln!("skipping live_print_working_directory – OPENAI_API_KEY not set");
|
||||
return;
|
||||
}
|
||||
|
||||
let (assert, dir) = run_live("Print the current working directory using the shell function.");
|
||||
|
||||
assert
|
||||
.success()
|
||||
.stdout(predicate::str::contains(dir.path().to_string_lossy()));
|
||||
}
|
||||
60
llmx-rs/core/tests/suite/mod.rs
Normal file
60
llmx-rs/core/tests/suite/mod.rs
Normal file
@@ -0,0 +1,60 @@
|
||||
// Aggregates all former standalone integration tests as modules.
|
||||
use codex_arg0::arg0_dispatch;
|
||||
use ctor::ctor;
|
||||
use tempfile::TempDir;
|
||||
|
||||
// This code runs before any other tests are run.
|
||||
// It allows the test binary to behave like codex and dispatch to apply_patch and codex-linux-sandbox
|
||||
// based on the arg0.
|
||||
// NOTE: this doesn't work on ARM
|
||||
#[ctor]
|
||||
pub static CODEX_ALIASES_TEMP_DIR: TempDir = unsafe {
|
||||
#[allow(clippy::unwrap_used)]
|
||||
arg0_dispatch().unwrap()
|
||||
};
|
||||
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
mod abort_tasks;
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
mod apply_patch_cli;
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
mod apply_patch_freeform;
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
mod approvals;
|
||||
mod auth_refresh;
|
||||
mod cli_stream;
|
||||
mod client;
|
||||
mod codex_delegate;
|
||||
mod compact;
|
||||
mod compact_resume_fork;
|
||||
mod deprecation_notice;
|
||||
mod exec;
|
||||
mod fork_conversation;
|
||||
mod grep_files;
|
||||
mod items;
|
||||
mod json_result;
|
||||
mod list_dir;
|
||||
mod live_cli;
|
||||
mod model_overrides;
|
||||
mod model_tools;
|
||||
mod otel;
|
||||
mod prompt_caching;
|
||||
mod quota_exceeded;
|
||||
mod read_file;
|
||||
mod resume;
|
||||
mod review;
|
||||
mod rmcp_client;
|
||||
mod rollout_list_find;
|
||||
mod seatbelt;
|
||||
mod shell_serialization;
|
||||
mod stream_error_allows_next_turn;
|
||||
mod stream_no_completed;
|
||||
mod tool_harness;
|
||||
mod tool_parallelism;
|
||||
mod tools;
|
||||
mod truncation;
|
||||
mod undo;
|
||||
mod unified_exec;
|
||||
mod user_notification;
|
||||
mod user_shell_cmd;
|
||||
mod view_image;
|
||||
92
llmx-rs/core/tests/suite/model_overrides.rs
Normal file
92
llmx-rs/core/tests/suite/model_overrides.rs
Normal file
@@ -0,0 +1,92 @@
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol_config_types::ReasoningEffort;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::wait_for_event;
|
||||
use pretty_assertions::assert_eq;
|
||||
use tempfile::TempDir;
|
||||
|
||||
const CONFIG_TOML: &str = "config.toml";
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn override_turn_context_does_not_persist_when_config_exists() {
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let config_path = codex_home.path().join(CONFIG_TOML);
|
||||
let initial_contents = "model = \"gpt-4o\"\n";
|
||||
tokio::fs::write(&config_path, initial_contents)
|
||||
.await
|
||||
.expect("seed config.toml");
|
||||
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.model = "gpt-4o".to_string();
|
||||
|
||||
let conversation_manager =
|
||||
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
|
||||
let codex = conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.expect("create conversation")
|
||||
.conversation;
|
||||
|
||||
codex
|
||||
.submit(Op::OverrideTurnContext {
|
||||
cwd: None,
|
||||
approval_policy: None,
|
||||
sandbox_policy: None,
|
||||
model: Some("o3".to_string()),
|
||||
effort: Some(Some(ReasoningEffort::High)),
|
||||
summary: None,
|
||||
})
|
||||
.await
|
||||
.expect("submit override");
|
||||
|
||||
codex.submit(Op::Shutdown).await.expect("request shutdown");
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::ShutdownComplete)).await;
|
||||
|
||||
let contents = tokio::fs::read_to_string(&config_path)
|
||||
.await
|
||||
.expect("read config.toml after override");
|
||||
assert_eq!(contents, initial_contents);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn override_turn_context_does_not_create_config_file() {
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let config_path = codex_home.path().join(CONFIG_TOML);
|
||||
assert!(
|
||||
!config_path.exists(),
|
||||
"test setup should start without config"
|
||||
);
|
||||
|
||||
let config = load_default_config_for_test(&codex_home);
|
||||
|
||||
let conversation_manager =
|
||||
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
|
||||
let codex = conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.expect("create conversation")
|
||||
.conversation;
|
||||
|
||||
codex
|
||||
.submit(Op::OverrideTurnContext {
|
||||
cwd: None,
|
||||
approval_policy: None,
|
||||
sandbox_policy: None,
|
||||
model: Some("o3".to_string()),
|
||||
effort: Some(Some(ReasoningEffort::Medium)),
|
||||
summary: None,
|
||||
})
|
||||
.await
|
||||
.expect("submit override");
|
||||
|
||||
codex.submit(Op::Shutdown).await.expect("request shutdown");
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::ShutdownComplete)).await;
|
||||
|
||||
assert!(
|
||||
!config_path.exists(),
|
||||
"override should not create config.toml"
|
||||
);
|
||||
}
|
||||
131
llmx-rs/core/tests/suite/model_tools.rs
Normal file
131
llmx-rs/core/tests/suite/model_tools.rs
Normal file
@@ -0,0 +1,131 @@
|
||||
#![allow(clippy::unwrap_used)]
|
||||
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::model_family::find_family_for_model;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::load_sse_fixture_with_id;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::wait_for_event;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::MockServer;
|
||||
|
||||
fn sse_completed(id: &str) -> String {
|
||||
load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
|
||||
}
|
||||
|
||||
#[allow(clippy::expect_used)]
|
||||
fn tool_identifiers(body: &serde_json::Value) -> Vec<String> {
|
||||
body["tools"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|tool| {
|
||||
tool.get("name")
|
||||
.and_then(|v| v.as_str())
|
||||
.or_else(|| tool.get("type").and_then(|v| v.as_str()))
|
||||
.map(std::string::ToString::to_string)
|
||||
.expect("tool should have either name or type")
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[allow(clippy::expect_used)]
|
||||
async fn collect_tool_identifiers_for_model(model: &str) -> Vec<String> {
|
||||
let server = MockServer::start().await;
|
||||
|
||||
let sse = sse_completed(model);
|
||||
let resp_mock = responses::mount_sse_once_match(&server, wiremock::matchers::any(), sse).await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
let cwd = TempDir::new().unwrap();
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.cwd = cwd.path().to_path_buf();
|
||||
config.model_provider = model_provider;
|
||||
config.model = model.to_string();
|
||||
config.model_family =
|
||||
find_family_for_model(model).unwrap_or_else(|| panic!("unknown model family for {model}"));
|
||||
config.features.disable(Feature::ApplyPatchFreeform);
|
||||
config.features.disable(Feature::ViewImageTool);
|
||||
config.features.disable(Feature::WebSearchRequest);
|
||||
config.features.disable(Feature::UnifiedExec);
|
||||
|
||||
let conversation_manager =
|
||||
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
|
||||
let codex = conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.expect("create new conversation")
|
||||
.conversation;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello tools".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let body = resp_mock.single_request().body_json();
|
||||
tool_identifiers(&body)
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn model_selects_expected_tools() {
|
||||
skip_if_no_network!();
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
let codex_tools = collect_tool_identifiers_for_model("codex-mini-latest").await;
|
||||
assert_eq!(
|
||||
codex_tools,
|
||||
vec![
|
||||
"local_shell".to_string(),
|
||||
"list_mcp_resources".to_string(),
|
||||
"list_mcp_resource_templates".to_string(),
|
||||
"read_mcp_resource".to_string(),
|
||||
"update_plan".to_string()
|
||||
],
|
||||
"codex-mini-latest should expose the local shell tool",
|
||||
);
|
||||
|
||||
let o3_tools = collect_tool_identifiers_for_model("o3").await;
|
||||
assert_eq!(
|
||||
o3_tools,
|
||||
vec![
|
||||
"shell".to_string(),
|
||||
"list_mcp_resources".to_string(),
|
||||
"list_mcp_resource_templates".to_string(),
|
||||
"read_mcp_resource".to_string(),
|
||||
"update_plan".to_string()
|
||||
],
|
||||
"o3 should expose the generic shell tool",
|
||||
);
|
||||
|
||||
let gpt5_codex_tools = collect_tool_identifiers_for_model("gpt-5-codex").await;
|
||||
assert_eq!(
|
||||
gpt5_codex_tools,
|
||||
vec![
|
||||
"shell".to_string(),
|
||||
"list_mcp_resources".to_string(),
|
||||
"list_mcp_resource_templates".to_string(),
|
||||
"read_mcp_resource".to_string(),
|
||||
"update_plan".to_string(),
|
||||
"apply_patch".to_string()
|
||||
],
|
||||
"gpt-5-codex should expose the apply_patch tool",
|
||||
);
|
||||
}
|
||||
1060
llmx-rs/core/tests/suite/otel.rs
Normal file
1060
llmx-rs/core/tests/suite/otel.rs
Normal file
File diff suppressed because it is too large
Load Diff
891
llmx-rs/core/tests/suite/prompt_caching.rs
Normal file
891
llmx-rs/core/tests/suite/prompt_caching.rs
Normal file
@@ -0,0 +1,891 @@
|
||||
#![allow(clippy::unwrap_used)]
|
||||
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::config::OPENAI_DEFAULT_MODEL;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::model_family::find_family_for_model;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_core::protocol_config_types::ReasoningEffort;
|
||||
use codex_core::protocol_config_types::ReasoningSummary;
|
||||
use codex_core::shell::Shell;
|
||||
use codex_core::shell::default_user_shell;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::load_sse_fixture_with_id;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::wait_for_event;
|
||||
use std::collections::HashMap;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
fn text_user_input(text: String) -> serde_json::Value {
|
||||
serde_json::json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": text } ]
|
||||
})
|
||||
}
|
||||
|
||||
fn default_env_context_str(cwd: &str, shell: &Shell) -> String {
|
||||
format!(
|
||||
r#"<environment_context>
|
||||
<cwd>{}</cwd>
|
||||
<approval_policy>on-request</approval_policy>
|
||||
<sandbox_mode>read-only</sandbox_mode>
|
||||
<network_access>restricted</network_access>
|
||||
{}</environment_context>"#,
|
||||
cwd,
|
||||
match shell.name() {
|
||||
Some(name) => format!(" <shell>{name}</shell>\n"),
|
||||
None => String::new(),
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/// Build minimal SSE stream with completed marker using the JSON fixture.
|
||||
fn sse_completed(id: &str) -> String {
|
||||
load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
|
||||
}
|
||||
|
||||
fn assert_tool_names(body: &serde_json::Value, expected_names: &[&str]) {
|
||||
assert_eq!(
|
||||
body["tools"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|t| t["name"].as_str().unwrap().to_string())
|
||||
.collect::<Vec<_>>(),
|
||||
expected_names
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
|
||||
async fn codex_mini_latest_tools() {
|
||||
skip_if_no_network!();
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
let server = MockServer::start().await;
|
||||
|
||||
let sse = sse_completed("resp");
|
||||
let template = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse, "text/event-stream");
|
||||
|
||||
// Expect two POSTs to /v1/responses
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(template)
|
||||
.expect(2)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
let cwd = TempDir::new().unwrap();
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.cwd = cwd.path().to_path_buf();
|
||||
config.model_provider = model_provider;
|
||||
config.user_instructions = Some("be consistent and helpful".to_string());
|
||||
config.features.disable(Feature::ApplyPatchFreeform);
|
||||
|
||||
let conversation_manager =
|
||||
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
|
||||
config.model = "codex-mini-latest".to_string();
|
||||
config.model_family = find_family_for_model("codex-mini-latest").unwrap();
|
||||
|
||||
let codex = conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.expect("create new conversation")
|
||||
.conversation;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello 1".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello 2".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
assert_eq!(requests.len(), 2, "expected two POST requests");
|
||||
|
||||
let expected_instructions = [
|
||||
include_str!("../../prompt.md"),
|
||||
include_str!("../../../apply-patch/apply_patch_tool_instructions.md"),
|
||||
]
|
||||
.join("\n");
|
||||
|
||||
let body0 = requests[0].body_json::<serde_json::Value>().unwrap();
|
||||
assert_eq!(
|
||||
body0["instructions"],
|
||||
serde_json::json!(expected_instructions),
|
||||
);
|
||||
let body1 = requests[1].body_json::<serde_json::Value>().unwrap();
|
||||
assert_eq!(
|
||||
body1["instructions"],
|
||||
serde_json::json!(expected_instructions),
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
|
||||
async fn prompt_tools_are_consistent_across_requests() {
|
||||
skip_if_no_network!();
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
let server = MockServer::start().await;
|
||||
|
||||
let sse = sse_completed("resp");
|
||||
let template = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse, "text/event-stream");
|
||||
|
||||
// Expect two POSTs to /v1/responses
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(template)
|
||||
.expect(2)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
let cwd = TempDir::new().unwrap();
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.cwd = cwd.path().to_path_buf();
|
||||
config.model_provider = model_provider;
|
||||
config.user_instructions = Some("be consistent and helpful".to_string());
|
||||
|
||||
let conversation_manager =
|
||||
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
|
||||
let base_instructions = config.model_family.base_instructions.clone();
|
||||
let codex = conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.expect("create new conversation")
|
||||
.conversation;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello 1".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello 2".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
assert_eq!(requests.len(), 2, "expected two POST requests");
|
||||
|
||||
// our internal implementation is responsible for keeping tools in sync
|
||||
// with the OpenAI schema, so we just verify the tool presence here
|
||||
let tools_by_model: HashMap<&'static str, Vec<&'static str>> = HashMap::from([
|
||||
(
|
||||
"gpt-5",
|
||||
vec![
|
||||
"shell",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
"update_plan",
|
||||
"view_image",
|
||||
],
|
||||
),
|
||||
(
|
||||
"gpt-5-codex",
|
||||
vec![
|
||||
"shell",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
"update_plan",
|
||||
"apply_patch",
|
||||
"view_image",
|
||||
],
|
||||
),
|
||||
]);
|
||||
let expected_tools_names = tools_by_model
|
||||
.get(OPENAI_DEFAULT_MODEL)
|
||||
.unwrap_or_else(|| panic!("expected tools to be defined for model {OPENAI_DEFAULT_MODEL}"))
|
||||
.as_slice();
|
||||
let body0 = requests[0].body_json::<serde_json::Value>().unwrap();
|
||||
|
||||
let expected_instructions = if expected_tools_names.contains(&"apply_patch") {
|
||||
base_instructions
|
||||
} else {
|
||||
[
|
||||
base_instructions.clone(),
|
||||
include_str!("../../../apply-patch/apply_patch_tool_instructions.md").to_string(),
|
||||
]
|
||||
.join("\n")
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
body0["instructions"],
|
||||
serde_json::json!(expected_instructions),
|
||||
);
|
||||
assert_tool_names(&body0, expected_tools_names);
|
||||
|
||||
let body1 = requests[1].body_json::<serde_json::Value>().unwrap();
|
||||
assert_eq!(
|
||||
body1["instructions"],
|
||||
serde_json::json!(expected_instructions),
|
||||
);
|
||||
assert_tool_names(&body1, expected_tools_names);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn prefixes_context_and_instructions_once_and_consistently_across_requests() {
|
||||
skip_if_no_network!();
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
let server = MockServer::start().await;
|
||||
|
||||
let sse = sse_completed("resp");
|
||||
let template = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse, "text/event-stream");
|
||||
|
||||
// Expect two POSTs to /v1/responses
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(template)
|
||||
.expect(2)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
let cwd = TempDir::new().unwrap();
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.cwd = cwd.path().to_path_buf();
|
||||
config.model_provider = model_provider;
|
||||
config.user_instructions = Some("be consistent and helpful".to_string());
|
||||
|
||||
let conversation_manager =
|
||||
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
|
||||
let codex = conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.expect("create new conversation")
|
||||
.conversation;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello 1".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello 2".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
assert_eq!(requests.len(), 2, "expected two POST requests");
|
||||
|
||||
let shell = default_user_shell().await;
|
||||
|
||||
let expected_env_text = format!(
|
||||
r#"<environment_context>
|
||||
<cwd>{}</cwd>
|
||||
<approval_policy>on-request</approval_policy>
|
||||
<sandbox_mode>read-only</sandbox_mode>
|
||||
<network_access>restricted</network_access>
|
||||
{}</environment_context>"#,
|
||||
cwd.path().to_string_lossy(),
|
||||
match shell.name() {
|
||||
Some(name) => format!(" <shell>{name}</shell>\n"),
|
||||
None => String::new(),
|
||||
}
|
||||
);
|
||||
let expected_ui_text = format!(
|
||||
"# AGENTS.md instructions for {}\n\n<INSTRUCTIONS>\nbe consistent and helpful\n</INSTRUCTIONS>",
|
||||
cwd.path().to_string_lossy()
|
||||
);
|
||||
|
||||
let expected_env_msg = serde_json::json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": expected_env_text } ]
|
||||
});
|
||||
let expected_ui_msg = serde_json::json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": expected_ui_text } ]
|
||||
});
|
||||
|
||||
let expected_user_message_1 = serde_json::json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": "hello 1" } ]
|
||||
});
|
||||
let body1 = requests[0].body_json::<serde_json::Value>().unwrap();
|
||||
assert_eq!(
|
||||
body1["input"],
|
||||
serde_json::json!([expected_ui_msg, expected_env_msg, expected_user_message_1])
|
||||
);
|
||||
|
||||
let expected_user_message_2 = serde_json::json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": "hello 2" } ]
|
||||
});
|
||||
let body2 = requests[1].body_json::<serde_json::Value>().unwrap();
|
||||
let expected_body2 = serde_json::json!(
|
||||
[
|
||||
body1["input"].as_array().unwrap().as_slice(),
|
||||
[expected_user_message_2].as_slice(),
|
||||
]
|
||||
.concat()
|
||||
);
|
||||
assert_eq!(body2["input"], expected_body2);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() {
|
||||
skip_if_no_network!();
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
let server = MockServer::start().await;
|
||||
|
||||
let sse = sse_completed("resp");
|
||||
let template = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse, "text/event-stream");
|
||||
|
||||
// Expect two POSTs to /v1/responses
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(template)
|
||||
.expect(2)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
let cwd = TempDir::new().unwrap();
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.cwd = cwd.path().to_path_buf();
|
||||
config.model_provider = model_provider;
|
||||
config.user_instructions = Some("be consistent and helpful".to_string());
|
||||
|
||||
let conversation_manager =
|
||||
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
|
||||
let codex = conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.expect("create new conversation")
|
||||
.conversation;
|
||||
|
||||
// First turn
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello 1".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let writable = TempDir::new().unwrap();
|
||||
codex
|
||||
.submit(Op::OverrideTurnContext {
|
||||
cwd: None,
|
||||
approval_policy: Some(AskForApproval::Never),
|
||||
sandbox_policy: Some(SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![writable.path().to_path_buf()],
|
||||
network_access: true,
|
||||
exclude_tmpdir_env_var: true,
|
||||
exclude_slash_tmp: true,
|
||||
}),
|
||||
model: Some("o3".to_string()),
|
||||
effort: Some(Some(ReasoningEffort::High)),
|
||||
summary: Some(ReasoningSummary::Detailed),
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Second turn after overrides
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello 2".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// Verify we issued exactly two requests, and the cached prefix stayed identical.
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
assert_eq!(requests.len(), 2, "expected two POST requests");
|
||||
|
||||
let body1 = requests[0].body_json::<serde_json::Value>().unwrap();
|
||||
let body2 = requests[1].body_json::<serde_json::Value>().unwrap();
|
||||
// prompt_cache_key should remain constant across overrides
|
||||
assert_eq!(
|
||||
body1["prompt_cache_key"], body2["prompt_cache_key"],
|
||||
"prompt_cache_key should not change across overrides"
|
||||
);
|
||||
|
||||
// The entire prefix from the first request should be identical and reused
|
||||
// as the prefix of the second request, ensuring cache hit potential.
|
||||
let expected_user_message_2 = serde_json::json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": "hello 2" } ]
|
||||
});
|
||||
// After overriding the turn context, the environment context should be emitted again
|
||||
// reflecting the new approval policy and sandbox settings. Omit cwd because it did
|
||||
// not change.
|
||||
let expected_env_text_2 = format!(
|
||||
r#"<environment_context>
|
||||
<approval_policy>never</approval_policy>
|
||||
<sandbox_mode>workspace-write</sandbox_mode>
|
||||
<network_access>enabled</network_access>
|
||||
<writable_roots>
|
||||
<root>{}</root>
|
||||
</writable_roots>
|
||||
</environment_context>"#,
|
||||
writable.path().to_string_lossy(),
|
||||
);
|
||||
let expected_env_msg_2 = serde_json::json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": expected_env_text_2 } ]
|
||||
});
|
||||
let expected_body2 = serde_json::json!(
|
||||
[
|
||||
body1["input"].as_array().unwrap().as_slice(),
|
||||
[expected_env_msg_2, expected_user_message_2].as_slice(),
|
||||
]
|
||||
.concat()
|
||||
);
|
||||
assert_eq!(body2["input"], expected_body2);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {
|
||||
skip_if_no_network!();
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
let server = MockServer::start().await;
|
||||
|
||||
let sse = sse_completed("resp");
|
||||
let template = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse, "text/event-stream");
|
||||
|
||||
// Expect two POSTs to /v1/responses
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(template)
|
||||
.expect(2)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
let cwd = TempDir::new().unwrap();
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.cwd = cwd.path().to_path_buf();
|
||||
config.model_provider = model_provider;
|
||||
config.user_instructions = Some("be consistent and helpful".to_string());
|
||||
|
||||
let conversation_manager =
|
||||
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
|
||||
let codex = conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.expect("create new conversation")
|
||||
.conversation;
|
||||
|
||||
// First turn
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello 1".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// Second turn using per-turn overrides via UserTurn
|
||||
let new_cwd = TempDir::new().unwrap();
|
||||
let writable = TempDir::new().unwrap();
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello 2".into(),
|
||||
}],
|
||||
cwd: new_cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![writable.path().to_path_buf()],
|
||||
network_access: true,
|
||||
exclude_tmpdir_env_var: true,
|
||||
exclude_slash_tmp: true,
|
||||
},
|
||||
model: "o3".to_string(),
|
||||
effort: Some(ReasoningEffort::High),
|
||||
summary: ReasoningSummary::Detailed,
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// Verify we issued exactly two requests, and the cached prefix stayed identical.
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
assert_eq!(requests.len(), 2, "expected two POST requests");
|
||||
|
||||
let body1 = requests[0].body_json::<serde_json::Value>().unwrap();
|
||||
let body2 = requests[1].body_json::<serde_json::Value>().unwrap();
|
||||
|
||||
// prompt_cache_key should remain constant across per-turn overrides
|
||||
assert_eq!(
|
||||
body1["prompt_cache_key"], body2["prompt_cache_key"],
|
||||
"prompt_cache_key should not change across per-turn overrides"
|
||||
);
|
||||
|
||||
// The entire prefix from the first request should be identical and reused
|
||||
// as the prefix of the second request.
|
||||
let expected_user_message_2 = serde_json::json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": "hello 2" } ]
|
||||
});
|
||||
let expected_env_text_2 = format!(
|
||||
r#"<environment_context>
|
||||
<cwd>{}</cwd>
|
||||
<approval_policy>never</approval_policy>
|
||||
<sandbox_mode>workspace-write</sandbox_mode>
|
||||
<network_access>enabled</network_access>
|
||||
<writable_roots>
|
||||
<root>{}</root>
|
||||
</writable_roots>
|
||||
</environment_context>"#,
|
||||
new_cwd.path().to_string_lossy(),
|
||||
writable.path().to_string_lossy(),
|
||||
);
|
||||
let expected_env_msg_2 = serde_json::json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": expected_env_text_2 } ]
|
||||
});
|
||||
let expected_body2 = serde_json::json!(
|
||||
[
|
||||
body1["input"].as_array().unwrap().as_slice(),
|
||||
[expected_env_msg_2, expected_user_message_2].as_slice(),
|
||||
]
|
||||
.concat()
|
||||
);
|
||||
assert_eq!(body2["input"], expected_body2);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn send_user_turn_with_no_changes_does_not_send_environment_context() {
|
||||
skip_if_no_network!();
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
let server = MockServer::start().await;
|
||||
|
||||
let sse = sse_completed("resp");
|
||||
let template = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse, "text/event-stream");
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(template)
|
||||
.expect(2)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
let cwd = TempDir::new().unwrap();
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.cwd = cwd.path().to_path_buf();
|
||||
config.model_provider = model_provider;
|
||||
config.user_instructions = Some("be consistent and helpful".to_string());
|
||||
|
||||
let default_cwd = config.cwd.clone();
|
||||
let default_approval_policy = config.approval_policy;
|
||||
let default_sandbox_policy = config.sandbox_policy.clone();
|
||||
let default_model = config.model.clone();
|
||||
let default_effort = config.model_reasoning_effort;
|
||||
let default_summary = config.model_reasoning_summary;
|
||||
|
||||
let conversation_manager =
|
||||
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
|
||||
let codex = conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.expect("create new conversation")
|
||||
.conversation;
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello 1".into(),
|
||||
}],
|
||||
cwd: default_cwd.clone(),
|
||||
approval_policy: default_approval_policy,
|
||||
sandbox_policy: default_sandbox_policy.clone(),
|
||||
model: default_model.clone(),
|
||||
effort: default_effort,
|
||||
summary: default_summary,
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello 2".into(),
|
||||
}],
|
||||
cwd: default_cwd.clone(),
|
||||
approval_policy: default_approval_policy,
|
||||
sandbox_policy: default_sandbox_policy.clone(),
|
||||
model: default_model.clone(),
|
||||
effort: default_effort,
|
||||
summary: default_summary,
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
assert_eq!(requests.len(), 2, "expected two POST requests");
|
||||
|
||||
let body1 = requests[0].body_json::<serde_json::Value>().unwrap();
|
||||
let body2 = requests[1].body_json::<serde_json::Value>().unwrap();
|
||||
|
||||
let shell = default_user_shell().await;
|
||||
let expected_ui_text = format!(
|
||||
"# AGENTS.md instructions for {}\n\n<INSTRUCTIONS>\nbe consistent and helpful\n</INSTRUCTIONS>",
|
||||
default_cwd.to_string_lossy()
|
||||
);
|
||||
let expected_ui_msg = text_user_input(expected_ui_text);
|
||||
|
||||
let expected_env_msg_1 = text_user_input(default_env_context_str(
|
||||
&cwd.path().to_string_lossy(),
|
||||
&shell,
|
||||
));
|
||||
let expected_user_message_1 = text_user_input("hello 1".to_string());
|
||||
|
||||
let expected_input_1 = serde_json::Value::Array(vec![
|
||||
expected_ui_msg.clone(),
|
||||
expected_env_msg_1.clone(),
|
||||
expected_user_message_1.clone(),
|
||||
]);
|
||||
assert_eq!(body1["input"], expected_input_1);
|
||||
|
||||
let expected_user_message_2 = text_user_input("hello 2".to_string());
|
||||
let expected_input_2 = serde_json::Value::Array(vec![
|
||||
expected_ui_msg,
|
||||
expected_env_msg_1,
|
||||
expected_user_message_1,
|
||||
expected_user_message_2,
|
||||
]);
|
||||
assert_eq!(body2["input"], expected_input_2);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn send_user_turn_with_changes_sends_environment_context() {
|
||||
skip_if_no_network!();
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
let server = MockServer::start().await;
|
||||
|
||||
let sse = sse_completed("resp");
|
||||
let template = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse, "text/event-stream");
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(template)
|
||||
.expect(2)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
let cwd = TempDir::new().unwrap();
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.cwd = cwd.path().to_path_buf();
|
||||
config.model_provider = model_provider;
|
||||
config.user_instructions = Some("be consistent and helpful".to_string());
|
||||
|
||||
let default_cwd = config.cwd.clone();
|
||||
let default_approval_policy = config.approval_policy;
|
||||
let default_sandbox_policy = config.sandbox_policy.clone();
|
||||
let default_model = config.model.clone();
|
||||
let default_effort = config.model_reasoning_effort;
|
||||
let default_summary = config.model_reasoning_summary;
|
||||
|
||||
let conversation_manager =
|
||||
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
|
||||
let codex = conversation_manager
|
||||
.new_conversation(config.clone())
|
||||
.await
|
||||
.expect("create new conversation")
|
||||
.conversation;
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello 1".into(),
|
||||
}],
|
||||
cwd: default_cwd.clone(),
|
||||
approval_policy: default_approval_policy,
|
||||
sandbox_policy: default_sandbox_policy.clone(),
|
||||
model: default_model,
|
||||
effort: default_effort,
|
||||
summary: default_summary,
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello 2".into(),
|
||||
}],
|
||||
cwd: default_cwd.clone(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: "o3".to_string(),
|
||||
effort: Some(ReasoningEffort::High),
|
||||
summary: ReasoningSummary::Detailed,
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
assert_eq!(requests.len(), 2, "expected two POST requests");
|
||||
|
||||
let body1 = requests[0].body_json::<serde_json::Value>().unwrap();
|
||||
let body2 = requests[1].body_json::<serde_json::Value>().unwrap();
|
||||
|
||||
let shell = default_user_shell().await;
|
||||
let expected_ui_text = format!(
|
||||
"# AGENTS.md instructions for {}\n\n<INSTRUCTIONS>\nbe consistent and helpful\n</INSTRUCTIONS>",
|
||||
default_cwd.to_string_lossy()
|
||||
);
|
||||
let expected_ui_msg = serde_json::json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": expected_ui_text } ]
|
||||
});
|
||||
let expected_env_text_1 = default_env_context_str(&default_cwd.to_string_lossy(), &shell);
|
||||
let expected_env_msg_1 = text_user_input(expected_env_text_1);
|
||||
let expected_user_message_1 = text_user_input("hello 1".to_string());
|
||||
let expected_input_1 = serde_json::Value::Array(vec![
|
||||
expected_ui_msg.clone(),
|
||||
expected_env_msg_1.clone(),
|
||||
expected_user_message_1.clone(),
|
||||
]);
|
||||
assert_eq!(body1["input"], expected_input_1);
|
||||
|
||||
let expected_env_msg_2 = text_user_input(
|
||||
r#"<environment_context>
|
||||
<approval_policy>never</approval_policy>
|
||||
<sandbox_mode>danger-full-access</sandbox_mode>
|
||||
<network_access>enabled</network_access>
|
||||
</environment_context>"#
|
||||
.to_string(),
|
||||
);
|
||||
let expected_user_message_2 = text_user_input("hello 2".to_string());
|
||||
let expected_input_2 = serde_json::Value::Array(vec![
|
||||
expected_ui_msg,
|
||||
expected_env_msg_1,
|
||||
expected_user_message_1,
|
||||
expected_env_msg_2,
|
||||
expected_user_message_2,
|
||||
]);
|
||||
assert_eq!(body2["input"], expected_input_2);
|
||||
}
|
||||
72
llmx-rs/core/tests/suite/quota_exceeded.rs
Normal file
72
llmx-rs/core/tests/suite/quota_exceeded.rs
Normal file
@@ -0,0 +1,72 @@
|
||||
use anyhow::Result;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_once;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn quota_exceeded_emits_single_error_event() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex();
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
json!({
|
||||
"type": "response.failed",
|
||||
"response": {
|
||||
"id": "resp-1",
|
||||
"error": {
|
||||
"code": "insufficient_quota",
|
||||
"message": "You exceeded your current quota, please check your plan and billing details."
|
||||
}
|
||||
}
|
||||
}),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
test.codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "quota?".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut error_events = 0;
|
||||
|
||||
loop {
|
||||
let event = wait_for_event(&test.codex, |_| true).await;
|
||||
|
||||
match event {
|
||||
EventMsg::Error(err) => {
|
||||
error_events += 1;
|
||||
assert_eq!(
|
||||
err.message,
|
||||
"Quota exceeded. Check your plan and billing details."
|
||||
);
|
||||
}
|
||||
EventMsg::TaskComplete(_) => break,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(error_events, 1, "expected exactly one Codex:Error event");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
99
llmx-rs/core/tests/suite/read_file.rs
Normal file
99
llmx-rs/core/tests/suite/read_file.rs
Normal file
@@ -0,0 +1,99 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::Value;
|
||||
use wiremock::matchers::any;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
#[ignore = "disabled until we enable read_file tool"]
|
||||
async fn read_file_tool_returns_requested_lines() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = test_codex().build(&server).await?;
|
||||
|
||||
let file_path = cwd.path().join("sample.txt");
|
||||
std::fs::write(&file_path, "first\nsecond\nthird\nfourth\n")?;
|
||||
let file_path = file_path.to_string_lossy().to_string();
|
||||
|
||||
let call_id = "read-file-call";
|
||||
let arguments = serde_json::json!({
|
||||
"file_path": file_path,
|
||||
"offset": 2,
|
||||
"limit": 2,
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "read_file", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let second_mock = responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please inspect sample.txt".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let tool_output_item = req.function_call_output(call_id);
|
||||
assert_eq!(
|
||||
tool_output_item.get("call_id").and_then(Value::as_str),
|
||||
Some(call_id)
|
||||
);
|
||||
let output_text = tool_output_item
|
||||
.get("output")
|
||||
.and_then(|value| match value {
|
||||
Value::String(text) => Some(text.as_str()),
|
||||
Value::Object(obj) => obj.get("content").and_then(Value::as_str),
|
||||
_ => None,
|
||||
})
|
||||
.expect("output text present");
|
||||
assert_eq!(output_text, "L2: second\nL3: third");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
121
llmx-rs/core/tests/suite/resume.rs
Normal file
121
llmx-rs/core/tests/suite/resume.rs
Normal file
@@ -0,0 +1,121 @@
|
||||
use anyhow::Result;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_reasoning_item;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_once_match;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use std::sync::Arc;
|
||||
use wiremock::matchers::any;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn resume_includes_initial_messages_from_rollout_events() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex();
|
||||
let initial = builder.build(&server).await?;
|
||||
let codex = Arc::clone(&initial.codex);
|
||||
let home = initial.home.clone();
|
||||
let rollout_path = initial.session_configured.rollout_path.clone();
|
||||
|
||||
let initial_sse = sse(vec![
|
||||
ev_response_created("resp-initial"),
|
||||
ev_assistant_message("msg-1", "Completed first turn"),
|
||||
ev_completed("resp-initial"),
|
||||
]);
|
||||
mount_sse_once_match(&server, any(), initial_sse).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "Record some messages".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let resumed = builder.resume(&server, home, rollout_path).await?;
|
||||
let initial_messages = resumed
|
||||
.session_configured
|
||||
.initial_messages
|
||||
.expect("expected initial messages to be present for resumed session");
|
||||
match initial_messages.as_slice() {
|
||||
[
|
||||
EventMsg::UserMessage(first_user),
|
||||
EventMsg::TokenCount(_),
|
||||
EventMsg::AgentMessage(assistant_message),
|
||||
EventMsg::TokenCount(_),
|
||||
] => {
|
||||
assert_eq!(first_user.message, "Record some messages");
|
||||
assert_eq!(assistant_message.message, "Completed first turn");
|
||||
}
|
||||
other => panic!("unexpected initial messages after resume: {other:#?}"),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn resume_includes_initial_messages_from_reasoning_events() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.show_raw_agent_reasoning = true;
|
||||
});
|
||||
let initial = builder.build(&server).await?;
|
||||
let codex = Arc::clone(&initial.codex);
|
||||
let home = initial.home.clone();
|
||||
let rollout_path = initial.session_configured.rollout_path.clone();
|
||||
|
||||
let initial_sse = sse(vec![
|
||||
ev_response_created("resp-initial"),
|
||||
ev_reasoning_item("reason-1", &["Summarized step"], &["raw detail"]),
|
||||
ev_assistant_message("msg-1", "Completed reasoning turn"),
|
||||
ev_completed("resp-initial"),
|
||||
]);
|
||||
mount_sse_once_match(&server, any(), initial_sse).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "Record reasoning messages".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let resumed = builder.resume(&server, home, rollout_path).await?;
|
||||
let initial_messages = resumed
|
||||
.session_configured
|
||||
.initial_messages
|
||||
.expect("expected initial messages to be present for resumed session");
|
||||
match initial_messages.as_slice() {
|
||||
[
|
||||
EventMsg::UserMessage(first_user),
|
||||
EventMsg::TokenCount(_),
|
||||
EventMsg::AgentReasoning(reasoning),
|
||||
EventMsg::AgentReasoningRawContent(raw),
|
||||
EventMsg::AgentMessage(assistant_message),
|
||||
EventMsg::TokenCount(_),
|
||||
] => {
|
||||
assert_eq!(first_user.message, "Record reasoning messages");
|
||||
assert_eq!(reasoning.text, "Summarized step");
|
||||
assert_eq!(raw.text, "raw detail");
|
||||
assert_eq!(assistant_message.message, "Completed reasoning turn");
|
||||
}
|
||||
other => panic!("unexpected initial messages after resume: {other:#?}"),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
736
llmx-rs/core/tests/suite/review.rs
Normal file
736
llmx-rs/core/tests/suite/review.rs
Normal file
@@ -0,0 +1,736 @@
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::ContentItem;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::REVIEW_PROMPT;
|
||||
use codex_core::ResponseItem;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::protocol::ENVIRONMENT_CONTEXT_OPEN_TAG;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::ExitedReviewModeEvent;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::ReviewCodeLocation;
|
||||
use codex_core::protocol::ReviewFinding;
|
||||
use codex_core::protocol::ReviewLineRange;
|
||||
use codex_core::protocol::ReviewOutputEvent;
|
||||
use codex_core::protocol::ReviewRequest;
|
||||
use codex_core::protocol::RolloutItem;
|
||||
use codex_core::protocol::RolloutLine;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::load_sse_fixture_with_id_from_str;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::wait_for_event;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use tempfile::TempDir;
|
||||
use tokio::io::AsyncWriteExt as _;
|
||||
use uuid::Uuid;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
/// Verify that submitting `Op::Review` spawns a child task and emits
|
||||
/// EnteredReviewMode -> ExitedReviewMode(None) -> TaskComplete
|
||||
/// in that order when the model returns a structured review JSON payload.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn review_op_emits_lifecycle_and_review_output() {
|
||||
// Skip under Codex sandbox network restrictions.
|
||||
skip_if_no_network!();
|
||||
|
||||
// Start mock Responses API server. Return a single assistant message whose
|
||||
// text is a JSON-encoded ReviewOutputEvent.
|
||||
let review_json = serde_json::json!({
|
||||
"findings": [
|
||||
{
|
||||
"title": "Prefer Stylize helpers",
|
||||
"body": "Use .dim()/.bold() chaining instead of manual Style where possible.",
|
||||
"confidence_score": 0.9,
|
||||
"priority": 1,
|
||||
"code_location": {
|
||||
"absolute_file_path": "/tmp/file.rs",
|
||||
"line_range": {"start": 10, "end": 20}
|
||||
}
|
||||
}
|
||||
],
|
||||
"overall_correctness": "good",
|
||||
"overall_explanation": "All good with some improvements suggested.",
|
||||
"overall_confidence_score": 0.8
|
||||
})
|
||||
.to_string();
|
||||
let sse_template = r#"[
|
||||
{"type":"response.output_item.done", "item":{
|
||||
"type":"message", "role":"assistant",
|
||||
"content":[{"type":"output_text","text":__REVIEW__}]
|
||||
}},
|
||||
{"type":"response.completed", "response": {"id": "__ID__"}}
|
||||
]"#;
|
||||
let review_json_escaped = serde_json::to_string(&review_json).unwrap();
|
||||
let sse_raw = sse_template.replace("__REVIEW__", &review_json_escaped);
|
||||
let server = start_responses_server_with_sse(&sse_raw, 1).await;
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let codex = new_conversation_for_server(&server, &codex_home, |_| {}).await;
|
||||
|
||||
// Submit review request.
|
||||
codex
|
||||
.submit(Op::Review {
|
||||
review_request: ReviewRequest {
|
||||
prompt: "Please review my changes".to_string(),
|
||||
user_facing_hint: "my changes".to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Verify lifecycle: Entered -> Exited(Some(review)) -> TaskComplete.
|
||||
let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode(_))).await;
|
||||
let closed = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExitedReviewMode(_))).await;
|
||||
let review = match closed {
|
||||
EventMsg::ExitedReviewMode(ev) => ev
|
||||
.review_output
|
||||
.expect("expected ExitedReviewMode with Some(review_output)"),
|
||||
other => panic!("expected ExitedReviewMode(..), got {other:?}"),
|
||||
};
|
||||
|
||||
// Deep compare full structure using PartialEq (floats are f32 on both sides).
|
||||
let expected = ReviewOutputEvent {
|
||||
findings: vec![ReviewFinding {
|
||||
title: "Prefer Stylize helpers".to_string(),
|
||||
body: "Use .dim()/.bold() chaining instead of manual Style where possible.".to_string(),
|
||||
confidence_score: 0.9,
|
||||
priority: 1,
|
||||
code_location: ReviewCodeLocation {
|
||||
absolute_file_path: PathBuf::from("/tmp/file.rs"),
|
||||
line_range: ReviewLineRange { start: 10, end: 20 },
|
||||
},
|
||||
}],
|
||||
overall_correctness: "good".to_string(),
|
||||
overall_explanation: "All good with some improvements suggested.".to_string(),
|
||||
overall_confidence_score: 0.8,
|
||||
};
|
||||
assert_eq!(expected, review);
|
||||
let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// Also verify that a user message with the header and a formatted finding
|
||||
// was recorded back in the parent session's rollout.
|
||||
let path = codex.rollout_path();
|
||||
let text = std::fs::read_to_string(&path).expect("read rollout file");
|
||||
|
||||
let mut saw_header = false;
|
||||
let mut saw_finding_line = false;
|
||||
for line in text.lines() {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
let v: serde_json::Value = serde_json::from_str(line).expect("jsonl line");
|
||||
let rl: RolloutLine = serde_json::from_value(v).expect("rollout line");
|
||||
if let RolloutItem::ResponseItem(ResponseItem::Message { role, content, .. }) = rl.item
|
||||
&& role == "user"
|
||||
{
|
||||
for c in content {
|
||||
if let ContentItem::InputText { text } = c {
|
||||
if text.contains("full review output from reviewer model") {
|
||||
saw_header = true;
|
||||
}
|
||||
if text.contains("- Prefer Stylize helpers — /tmp/file.rs:10-20") {
|
||||
saw_finding_line = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
assert!(saw_header, "user header missing from rollout");
|
||||
assert!(
|
||||
saw_finding_line,
|
||||
"formatted finding line missing from rollout"
|
||||
);
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
/// When the model returns plain text that is not JSON, ensure the child
|
||||
/// lifecycle still occurs and the plain text is surfaced via
|
||||
/// ExitedReviewMode(Some(..)) as the overall_explanation.
|
||||
// Windows CI only: bump to 4 workers to prevent SSE/event starvation and test timeouts.
|
||||
#[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
|
||||
#[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
|
||||
async fn review_op_with_plain_text_emits_review_fallback() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let sse_raw = r#"[
|
||||
{"type":"response.output_item.done", "item":{
|
||||
"type":"message", "role":"assistant",
|
||||
"content":[{"type":"output_text","text":"just plain text"}]
|
||||
}},
|
||||
{"type":"response.completed", "response": {"id": "__ID__"}}
|
||||
]"#;
|
||||
let server = start_responses_server_with_sse(sse_raw, 1).await;
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let codex = new_conversation_for_server(&server, &codex_home, |_| {}).await;
|
||||
|
||||
codex
|
||||
.submit(Op::Review {
|
||||
review_request: ReviewRequest {
|
||||
prompt: "Plain text review".to_string(),
|
||||
user_facing_hint: "plain text review".to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode(_))).await;
|
||||
let closed = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExitedReviewMode(_))).await;
|
||||
let review = match closed {
|
||||
EventMsg::ExitedReviewMode(ev) => ev
|
||||
.review_output
|
||||
.expect("expected ExitedReviewMode with Some(review_output)"),
|
||||
other => panic!("expected ExitedReviewMode(..), got {other:?}"),
|
||||
};
|
||||
|
||||
// Expect a structured fallback carrying the plain text.
|
||||
let expected = ReviewOutputEvent {
|
||||
overall_explanation: "just plain text".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(expected, review);
|
||||
let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
/// Ensure review flow suppresses assistant-specific streaming/completion events:
|
||||
/// - AgentMessageContentDelta
|
||||
/// - AgentMessageDelta (legacy)
|
||||
/// - ItemCompleted for TurnItem::AgentMessage
|
||||
// Windows CI only: bump to 4 workers to prevent SSE/event starvation and test timeouts.
|
||||
#[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
|
||||
#[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
|
||||
async fn review_filters_agent_message_related_events() {
|
||||
skip_if_no_network!();
|
||||
|
||||
// Stream simulating a typing assistant message with deltas and finalization.
|
||||
let sse_raw = r#"[
|
||||
{"type":"response.output_item.added", "item":{
|
||||
"type":"message", "role":"assistant", "id":"msg-1",
|
||||
"content":[{"type":"output_text","text":""}]
|
||||
}},
|
||||
{"type":"response.output_text.delta", "delta":"Hi"},
|
||||
{"type":"response.output_text.delta", "delta":" there"},
|
||||
{"type":"response.output_item.done", "item":{
|
||||
"type":"message", "role":"assistant", "id":"msg-1",
|
||||
"content":[{"type":"output_text","text":"Hi there"}]
|
||||
}},
|
||||
{"type":"response.completed", "response": {"id": "__ID__"}}
|
||||
]"#;
|
||||
let server = start_responses_server_with_sse(sse_raw, 1).await;
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let codex = new_conversation_for_server(&server, &codex_home, |_| {}).await;
|
||||
|
||||
codex
|
||||
.submit(Op::Review {
|
||||
review_request: ReviewRequest {
|
||||
prompt: "Filter streaming events".to_string(),
|
||||
user_facing_hint: "Filter streaming events".to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut saw_entered = false;
|
||||
let mut saw_exited = false;
|
||||
|
||||
// Drain until TaskComplete; assert filtered events never surface.
|
||||
wait_for_event(&codex, |event| match event {
|
||||
EventMsg::TaskComplete(_) => true,
|
||||
EventMsg::EnteredReviewMode(_) => {
|
||||
saw_entered = true;
|
||||
false
|
||||
}
|
||||
EventMsg::ExitedReviewMode(_) => {
|
||||
saw_exited = true;
|
||||
false
|
||||
}
|
||||
// The following must be filtered by review flow
|
||||
EventMsg::AgentMessageContentDelta(_) => {
|
||||
panic!("unexpected AgentMessageContentDelta surfaced during review")
|
||||
}
|
||||
EventMsg::AgentMessageDelta(_) => {
|
||||
panic!("unexpected AgentMessageDelta surfaced during review")
|
||||
}
|
||||
EventMsg::ItemCompleted(ev) => match &ev.item {
|
||||
codex_protocol::items::TurnItem::AgentMessage(_) => {
|
||||
panic!("unexpected ItemCompleted for TurnItem::AgentMessage surfaced during review")
|
||||
}
|
||||
_ => false,
|
||||
},
|
||||
_ => false,
|
||||
})
|
||||
.await;
|
||||
assert!(saw_entered && saw_exited, "missing review lifecycle events");
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
/// When the model returns structured JSON in a review, ensure no AgentMessage
|
||||
/// is emitted; the UI consumes the structured result via ExitedReviewMode.
|
||||
// Windows CI only: bump to 4 workers to prevent SSE/event starvation and test timeouts.
|
||||
#[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
|
||||
#[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
|
||||
async fn review_does_not_emit_agent_message_on_structured_output() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let review_json = serde_json::json!({
|
||||
"findings": [
|
||||
{
|
||||
"title": "Example",
|
||||
"body": "Structured review output.",
|
||||
"confidence_score": 0.5,
|
||||
"priority": 1,
|
||||
"code_location": {
|
||||
"absolute_file_path": "/tmp/file.rs",
|
||||
"line_range": {"start": 1, "end": 2}
|
||||
}
|
||||
}
|
||||
],
|
||||
"overall_correctness": "ok",
|
||||
"overall_explanation": "ok",
|
||||
"overall_confidence_score": 0.5
|
||||
})
|
||||
.to_string();
|
||||
let sse_template = r#"[
|
||||
{"type":"response.output_item.done", "item":{
|
||||
"type":"message", "role":"assistant",
|
||||
"content":[{"type":"output_text","text":__REVIEW__}]
|
||||
}},
|
||||
{"type":"response.completed", "response": {"id": "__ID__"}}
|
||||
]"#;
|
||||
let review_json_escaped = serde_json::to_string(&review_json).unwrap();
|
||||
let sse_raw = sse_template.replace("__REVIEW__", &review_json_escaped);
|
||||
let server = start_responses_server_with_sse(&sse_raw, 1).await;
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let codex = new_conversation_for_server(&server, &codex_home, |_| {}).await;
|
||||
|
||||
codex
|
||||
.submit(Op::Review {
|
||||
review_request: ReviewRequest {
|
||||
prompt: "check structured".to_string(),
|
||||
user_facing_hint: "check structured".to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Drain events until TaskComplete; ensure none are AgentMessage.
|
||||
let mut saw_entered = false;
|
||||
let mut saw_exited = false;
|
||||
wait_for_event(&codex, |event| match event {
|
||||
EventMsg::TaskComplete(_) => true,
|
||||
EventMsg::AgentMessage(_) => {
|
||||
panic!("unexpected AgentMessage during review with structured output")
|
||||
}
|
||||
EventMsg::EnteredReviewMode(_) => {
|
||||
saw_entered = true;
|
||||
false
|
||||
}
|
||||
EventMsg::ExitedReviewMode(_) => {
|
||||
saw_exited = true;
|
||||
false
|
||||
}
|
||||
_ => false,
|
||||
})
|
||||
.await;
|
||||
assert!(saw_entered && saw_exited, "missing review lifecycle events");
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
/// Ensure that when a custom `review_model` is set in the config, the review
|
||||
/// request uses that model (and not the main chat model).
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn review_uses_custom_review_model_from_config() {
|
||||
skip_if_no_network!();
|
||||
|
||||
// Minimal stream: just a completed event
|
||||
let sse_raw = r#"[
|
||||
{"type":"response.completed", "response": {"id": "__ID__"}}
|
||||
]"#;
|
||||
let server = start_responses_server_with_sse(sse_raw, 1).await;
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
// Choose a review model different from the main model; ensure it is used.
|
||||
let codex = new_conversation_for_server(&server, &codex_home, |cfg| {
|
||||
cfg.model = "gpt-4.1".to_string();
|
||||
cfg.review_model = "gpt-5".to_string();
|
||||
})
|
||||
.await;
|
||||
|
||||
codex
|
||||
.submit(Op::Review {
|
||||
review_request: ReviewRequest {
|
||||
prompt: "use custom model".to_string(),
|
||||
user_facing_hint: "use custom model".to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Wait for completion
|
||||
let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode(_))).await;
|
||||
let _closed = wait_for_event(&codex, |ev| {
|
||||
matches!(
|
||||
ev,
|
||||
EventMsg::ExitedReviewMode(ExitedReviewModeEvent {
|
||||
review_output: None
|
||||
})
|
||||
)
|
||||
})
|
||||
.await;
|
||||
let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// Assert the request body model equals the configured review model
|
||||
let request = &server.received_requests().await.unwrap()[0];
|
||||
let body = request.body_json::<serde_json::Value>().unwrap();
|
||||
assert_eq!(body["model"].as_str().unwrap(), "gpt-5");
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
/// When a review session begins, it must not prepend prior chat history from
|
||||
/// the parent session. The request `input` should contain only the review
|
||||
/// prompt from the user.
|
||||
// Windows CI only: bump to 4 workers to prevent SSE/event starvation and test timeouts.
|
||||
#[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
|
||||
#[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
|
||||
async fn review_input_isolated_from_parent_history() {
|
||||
skip_if_no_network!();
|
||||
|
||||
// Mock server for the single review request
|
||||
let sse_raw = r#"[
|
||||
{"type":"response.completed", "response": {"id": "__ID__"}}
|
||||
]"#;
|
||||
let server = start_responses_server_with_sse(sse_raw, 1).await;
|
||||
|
||||
// Seed a parent session history via resume file with both user + assistant items.
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
let session_file = codex_home.path().join("resume.jsonl");
|
||||
{
|
||||
let mut f = tokio::fs::File::create(&session_file).await.unwrap();
|
||||
let convo_id = Uuid::new_v4();
|
||||
// Proper session_meta line (enveloped) with a conversation id
|
||||
let meta_line = serde_json::json!({
|
||||
"timestamp": "2024-01-01T00:00:00.000Z",
|
||||
"type": "session_meta",
|
||||
"payload": {
|
||||
"id": convo_id,
|
||||
"timestamp": "2024-01-01T00:00:00Z",
|
||||
"instructions": null,
|
||||
"cwd": ".",
|
||||
"originator": "test_originator",
|
||||
"cli_version": "test_version",
|
||||
"model_provider": "test-provider"
|
||||
}
|
||||
});
|
||||
f.write_all(format!("{meta_line}\n").as_bytes())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Prior user message (enveloped response_item)
|
||||
let user = codex_protocol::models::ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![codex_protocol::models::ContentItem::InputText {
|
||||
text: "parent: earlier user message".to_string(),
|
||||
}],
|
||||
};
|
||||
let user_json = serde_json::to_value(&user).unwrap();
|
||||
let user_line = serde_json::json!({
|
||||
"timestamp": "2024-01-01T00:00:01.000Z",
|
||||
"type": "response_item",
|
||||
"payload": user_json
|
||||
});
|
||||
f.write_all(format!("{user_line}\n").as_bytes())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Prior assistant message (enveloped response_item)
|
||||
let assistant = codex_protocol::models::ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![codex_protocol::models::ContentItem::OutputText {
|
||||
text: "parent: assistant reply".to_string(),
|
||||
}],
|
||||
};
|
||||
let assistant_json = serde_json::to_value(&assistant).unwrap();
|
||||
let assistant_line = serde_json::json!({
|
||||
"timestamp": "2024-01-01T00:00:02.000Z",
|
||||
"type": "response_item",
|
||||
"payload": assistant_json
|
||||
});
|
||||
f.write_all(format!("{assistant_line}\n").as_bytes())
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
let codex =
|
||||
resume_conversation_for_server(&server, &codex_home, session_file.clone(), |_| {}).await;
|
||||
|
||||
// Submit review request; it must start fresh (no parent history in `input`).
|
||||
let review_prompt = "Please review only this".to_string();
|
||||
codex
|
||||
.submit(Op::Review {
|
||||
review_request: ReviewRequest {
|
||||
prompt: review_prompt.clone(),
|
||||
user_facing_hint: review_prompt.clone(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode(_))).await;
|
||||
let _closed = wait_for_event(&codex, |ev| {
|
||||
matches!(
|
||||
ev,
|
||||
EventMsg::ExitedReviewMode(ExitedReviewModeEvent {
|
||||
review_output: None
|
||||
})
|
||||
)
|
||||
})
|
||||
.await;
|
||||
let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// Assert the request `input` contains the environment context followed by the user review prompt.
|
||||
let request = &server.received_requests().await.unwrap()[0];
|
||||
let body = request.body_json::<serde_json::Value>().unwrap();
|
||||
let input = body["input"].as_array().expect("input array");
|
||||
assert_eq!(
|
||||
input.len(),
|
||||
2,
|
||||
"expected environment context and review prompt"
|
||||
);
|
||||
|
||||
let env_msg = &input[0];
|
||||
assert_eq!(env_msg["type"].as_str().unwrap(), "message");
|
||||
assert_eq!(env_msg["role"].as_str().unwrap(), "user");
|
||||
let env_text = env_msg["content"][0]["text"].as_str().expect("env text");
|
||||
assert!(
|
||||
env_text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG),
|
||||
"environment context must be the first item"
|
||||
);
|
||||
assert!(
|
||||
env_text.contains("<cwd>"),
|
||||
"environment context should include cwd"
|
||||
);
|
||||
|
||||
let review_msg = &input[1];
|
||||
assert_eq!(review_msg["type"].as_str().unwrap(), "message");
|
||||
assert_eq!(review_msg["role"].as_str().unwrap(), "user");
|
||||
assert_eq!(
|
||||
review_msg["content"][0]["text"].as_str().unwrap(),
|
||||
review_prompt,
|
||||
"user message should only contain the raw review prompt"
|
||||
);
|
||||
|
||||
// Ensure the REVIEW_PROMPT rubric is sent via instructions.
|
||||
let instructions = body["instructions"].as_str().expect("instructions string");
|
||||
assert_eq!(instructions, REVIEW_PROMPT);
|
||||
|
||||
// Also verify that a user interruption note was recorded in the rollout.
|
||||
let path = codex.rollout_path();
|
||||
let text = std::fs::read_to_string(&path).expect("read rollout file");
|
||||
let mut saw_interruption_message = false;
|
||||
for line in text.lines() {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
let v: serde_json::Value = serde_json::from_str(line).expect("jsonl line");
|
||||
let rl: RolloutLine = serde_json::from_value(v).expect("rollout line");
|
||||
if let RolloutItem::ResponseItem(ResponseItem::Message { role, content, .. }) = rl.item
|
||||
&& role == "user"
|
||||
{
|
||||
for c in content {
|
||||
if let ContentItem::InputText { text } = c
|
||||
&& text.contains("User initiated a review task, but was interrupted.")
|
||||
{
|
||||
saw_interruption_message = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if saw_interruption_message {
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert!(
|
||||
saw_interruption_message,
|
||||
"expected user interruption message in rollout"
|
||||
);
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
/// After a review thread finishes, its conversation should not leak into the
|
||||
/// parent session. A subsequent parent turn must not include any review
|
||||
/// messages in its request `input`.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn review_history_does_not_leak_into_parent_session() {
|
||||
skip_if_no_network!();
|
||||
|
||||
// Respond to both the review request and the subsequent parent request.
|
||||
let sse_raw = r#"[
|
||||
{"type":"response.output_item.done", "item":{
|
||||
"type":"message", "role":"assistant",
|
||||
"content":[{"type":"output_text","text":"review assistant output"}]
|
||||
}},
|
||||
{"type":"response.completed", "response": {"id": "__ID__"}}
|
||||
]"#;
|
||||
let server = start_responses_server_with_sse(sse_raw, 2).await;
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let codex = new_conversation_for_server(&server, &codex_home, |_| {}).await;
|
||||
|
||||
// 1) Run a review turn that produces an assistant message (isolated in child).
|
||||
codex
|
||||
.submit(Op::Review {
|
||||
review_request: ReviewRequest {
|
||||
prompt: "Start a review".to_string(),
|
||||
user_facing_hint: "Start a review".to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode(_))).await;
|
||||
let _closed = wait_for_event(&codex, |ev| {
|
||||
matches!(
|
||||
ev,
|
||||
EventMsg::ExitedReviewMode(ExitedReviewModeEvent {
|
||||
review_output: Some(_)
|
||||
})
|
||||
)
|
||||
})
|
||||
.await;
|
||||
let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// 2) Continue in the parent session; request input must not include any review items.
|
||||
let followup = "back to parent".to_string();
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: followup.clone(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// Inspect the second request (parent turn) input contents.
|
||||
// Parent turns include session initial messages (user_instructions, environment_context).
|
||||
// Critically, no messages from the review thread should appear.
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
assert_eq!(requests.len(), 2);
|
||||
let body = requests[1].body_json::<serde_json::Value>().unwrap();
|
||||
let input = body["input"].as_array().expect("input array");
|
||||
|
||||
// Must include the followup as the last item for this turn
|
||||
let last = input.last().expect("at least one item in input");
|
||||
assert_eq!(last["role"].as_str().unwrap(), "user");
|
||||
let last_text = last["content"][0]["text"].as_str().unwrap();
|
||||
assert_eq!(last_text, followup);
|
||||
|
||||
// Ensure no review-thread content leaked into the parent request
|
||||
let contains_review_prompt = input
|
||||
.iter()
|
||||
.any(|msg| msg["content"][0]["text"].as_str().unwrap_or_default() == "Start a review");
|
||||
let contains_review_assistant = input.iter().any(|msg| {
|
||||
msg["content"][0]["text"].as_str().unwrap_or_default() == "review assistant output"
|
||||
});
|
||||
assert!(
|
||||
!contains_review_prompt,
|
||||
"review prompt leaked into parent turn input"
|
||||
);
|
||||
assert!(
|
||||
!contains_review_assistant,
|
||||
"review assistant output leaked into parent turn input"
|
||||
);
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
/// Start a mock Responses API server and mount the given SSE stream body.
|
||||
async fn start_responses_server_with_sse(sse_raw: &str, expected_requests: usize) -> MockServer {
|
||||
let server = MockServer::start().await;
|
||||
let sse = load_sse_fixture_with_id_from_str(sse_raw, &Uuid::new_v4().to_string());
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(
|
||||
ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse.clone(), "text/event-stream"),
|
||||
)
|
||||
.expect(expected_requests as u64)
|
||||
.mount(&server)
|
||||
.await;
|
||||
server
|
||||
}
|
||||
|
||||
/// Create a conversation configured to talk to the provided mock server.
|
||||
#[expect(clippy::expect_used)]
|
||||
async fn new_conversation_for_server<F>(
|
||||
server: &MockServer,
|
||||
codex_home: &TempDir,
|
||||
mutator: F,
|
||||
) -> Arc<CodexConversation>
|
||||
where
|
||||
F: FnOnce(&mut Config),
|
||||
{
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
let mut config = load_default_config_for_test(codex_home);
|
||||
config.model_provider = model_provider;
|
||||
mutator(&mut config);
|
||||
let conversation_manager =
|
||||
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
|
||||
conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.expect("create conversation")
|
||||
.conversation
|
||||
}
|
||||
|
||||
/// Create a conversation resuming from a rollout file, configured to talk to the provided mock server.
|
||||
#[expect(clippy::expect_used)]
|
||||
async fn resume_conversation_for_server<F>(
|
||||
server: &MockServer,
|
||||
codex_home: &TempDir,
|
||||
resume_path: std::path::PathBuf,
|
||||
mutator: F,
|
||||
) -> Arc<CodexConversation>
|
||||
where
|
||||
F: FnOnce(&mut Config),
|
||||
{
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
let mut config = load_default_config_for_test(codex_home);
|
||||
config.model_provider = model_provider;
|
||||
mutator(&mut config);
|
||||
let conversation_manager =
|
||||
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
|
||||
let auth_manager =
|
||||
codex_core::AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key"));
|
||||
conversation_manager
|
||||
.resume_conversation_from_rollout(config, resume_path, auth_manager)
|
||||
.await
|
||||
.expect("resume conversation")
|
||||
.conversation
|
||||
}
|
||||
1104
llmx-rs/core/tests/suite/rmcp_client.rs
Normal file
1104
llmx-rs/core/tests/suite/rmcp_client.rs
Normal file
File diff suppressed because it is too large
Load Diff
82
llmx-rs/core/tests/suite/rollout_list_find.rs
Normal file
82
llmx-rs/core/tests/suite/rollout_list_find.rs
Normal file
@@ -0,0 +1,82 @@
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use codex_core::find_conversation_path_by_id_str;
|
||||
use tempfile::TempDir;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Create sessions/YYYY/MM/DD and write a minimal rollout file containing the
|
||||
/// provided conversation id in the SessionMeta line. Returns the absolute path.
|
||||
fn write_minimal_rollout_with_id(codex_home: &Path, id: Uuid) -> PathBuf {
|
||||
let sessions = codex_home.join("sessions/2024/01/01");
|
||||
std::fs::create_dir_all(&sessions).unwrap();
|
||||
|
||||
let file = sessions.join(format!("rollout-2024-01-01T00-00-00-{id}.jsonl"));
|
||||
let mut f = std::fs::File::create(&file).unwrap();
|
||||
// Minimal first line: session_meta with the id so content search can find it
|
||||
writeln!(
|
||||
f,
|
||||
"{}",
|
||||
serde_json::json!({
|
||||
"timestamp": "2024-01-01T00:00:00.000Z",
|
||||
"type": "session_meta",
|
||||
"payload": {
|
||||
"id": id,
|
||||
"timestamp": "2024-01-01T00:00:00Z",
|
||||
"instructions": null,
|
||||
"cwd": ".",
|
||||
"originator": "test",
|
||||
"cli_version": "test",
|
||||
"model_provider": "test-provider"
|
||||
}
|
||||
})
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
file
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn find_locates_rollout_file_by_id() {
|
||||
let home = TempDir::new().unwrap();
|
||||
let id = Uuid::new_v4();
|
||||
let expected = write_minimal_rollout_with_id(home.path(), id);
|
||||
|
||||
let found = find_conversation_path_by_id_str(home.path(), &id.to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(found.unwrap(), expected);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn find_handles_gitignore_covering_codex_home_directory() {
|
||||
let repo = TempDir::new().unwrap();
|
||||
let codex_home = repo.path().join(".codex");
|
||||
std::fs::create_dir_all(&codex_home).unwrap();
|
||||
std::fs::write(repo.path().join(".gitignore"), ".codex/**\n").unwrap();
|
||||
let id = Uuid::new_v4();
|
||||
let expected = write_minimal_rollout_with_id(&codex_home, id);
|
||||
|
||||
let found = find_conversation_path_by_id_str(&codex_home, &id.to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(found, Some(expected));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn find_ignores_granular_gitignore_rules() {
|
||||
let home = TempDir::new().unwrap();
|
||||
let id = Uuid::new_v4();
|
||||
let expected = write_minimal_rollout_with_id(home.path(), id);
|
||||
std::fs::write(home.path().join("sessions/.gitignore"), "*.jsonl\n").unwrap();
|
||||
|
||||
let found = find_conversation_path_by_id_str(home.path(), &id.to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(found, Some(expected));
|
||||
}
|
||||
311
llmx-rs/core/tests/suite/seatbelt.rs
Normal file
311
llmx-rs/core/tests/suite/seatbelt.rs
Normal file
@@ -0,0 +1,311 @@
|
||||
#![cfg(target_os = "macos")]
|
||||
|
||||
//! Tests for the macOS sandboxing that are specific to Seatbelt.
|
||||
//! Tests that apply to both Mac and Linux sandboxing should go in sandbox.rs.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_core::seatbelt::spawn_command_under_seatbelt;
|
||||
use codex_core::spawn::CODEX_SANDBOX_ENV_VAR;
|
||||
use codex_core::spawn::StdioPolicy;
|
||||
use tempfile::TempDir;
|
||||
|
||||
struct TestScenario {
|
||||
repo_parent: PathBuf,
|
||||
file_outside_repo: PathBuf,
|
||||
repo_root: PathBuf,
|
||||
file_in_repo_root: PathBuf,
|
||||
file_in_dot_git_dir: PathBuf,
|
||||
}
|
||||
|
||||
struct TestExpectations {
|
||||
file_outside_repo_is_writable: bool,
|
||||
file_in_repo_root_is_writable: bool,
|
||||
file_in_dot_git_dir_is_writable: bool,
|
||||
}
|
||||
|
||||
impl TestScenario {
|
||||
async fn run_test(&self, policy: &SandboxPolicy, expectations: TestExpectations) {
|
||||
if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
|
||||
eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
|
||||
return;
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
touch(&self.file_outside_repo, policy).await,
|
||||
expectations.file_outside_repo_is_writable
|
||||
);
|
||||
assert_eq!(
|
||||
self.file_outside_repo.exists(),
|
||||
expectations.file_outside_repo_is_writable
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
touch(&self.file_in_repo_root, policy).await,
|
||||
expectations.file_in_repo_root_is_writable
|
||||
);
|
||||
assert_eq!(
|
||||
self.file_in_repo_root.exists(),
|
||||
expectations.file_in_repo_root_is_writable
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
touch(&self.file_in_dot_git_dir, policy).await,
|
||||
expectations.file_in_dot_git_dir_is_writable
|
||||
);
|
||||
assert_eq!(
|
||||
self.file_in_dot_git_dir.exists(),
|
||||
expectations.file_in_dot_git_dir_is_writable
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// If the user has added a workspace root that is not a Git repo root, then
|
||||
/// the user has to specify `--skip-git-repo-check` or go through some
|
||||
/// interstitial that indicates they are taking on some risk because Git
|
||||
/// cannot be used to backup their work before the agent begins.
|
||||
///
|
||||
/// Because the user has agreed to this risk, we do not try find all .git
|
||||
/// folders in the workspace and block them (though we could change our
|
||||
/// position on this in the future).
|
||||
#[tokio::test]
|
||||
async fn if_parent_of_repo_is_writable_then_dot_git_folder_is_writable() {
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let test_scenario = create_test_scenario(&tmp);
|
||||
let policy = SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![test_scenario.repo_parent.clone()],
|
||||
network_access: false,
|
||||
exclude_tmpdir_env_var: true,
|
||||
exclude_slash_tmp: true,
|
||||
};
|
||||
|
||||
test_scenario
|
||||
.run_test(
|
||||
&policy,
|
||||
TestExpectations {
|
||||
file_outside_repo_is_writable: true,
|
||||
file_in_repo_root_is_writable: true,
|
||||
file_in_dot_git_dir_is_writable: true,
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// When the writable root is the root of a Git repository (as evidenced by the
|
||||
/// presence of a .git folder), then the .git folder should be read-only if
|
||||
/// the policy is `WorkspaceWrite`.
|
||||
#[tokio::test]
|
||||
async fn if_git_repo_is_writable_root_then_dot_git_folder_is_read_only() {
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let test_scenario = create_test_scenario(&tmp);
|
||||
let policy = SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![test_scenario.repo_root.clone()],
|
||||
network_access: false,
|
||||
exclude_tmpdir_env_var: true,
|
||||
exclude_slash_tmp: true,
|
||||
};
|
||||
|
||||
test_scenario
|
||||
.run_test(
|
||||
&policy,
|
||||
TestExpectations {
|
||||
file_outside_repo_is_writable: false,
|
||||
file_in_repo_root_is_writable: true,
|
||||
file_in_dot_git_dir_is_writable: false,
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Under DangerFullAccess, all writes should be permitted anywhere on disk,
|
||||
/// including inside the .git folder.
|
||||
#[tokio::test]
|
||||
async fn danger_full_access_allows_all_writes() {
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let test_scenario = create_test_scenario(&tmp);
|
||||
let policy = SandboxPolicy::DangerFullAccess;
|
||||
|
||||
test_scenario
|
||||
.run_test(
|
||||
&policy,
|
||||
TestExpectations {
|
||||
file_outside_repo_is_writable: true,
|
||||
file_in_repo_root_is_writable: true,
|
||||
file_in_dot_git_dir_is_writable: true,
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Under ReadOnly, writes should not be permitted anywhere on disk.
|
||||
#[tokio::test]
|
||||
async fn read_only_forbids_all_writes() {
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let test_scenario = create_test_scenario(&tmp);
|
||||
let policy = SandboxPolicy::ReadOnly;
|
||||
|
||||
test_scenario
|
||||
.run_test(
|
||||
&policy,
|
||||
TestExpectations {
|
||||
file_outside_repo_is_writable: false,
|
||||
file_in_repo_root_is_writable: false,
|
||||
file_in_dot_git_dir_is_writable: false,
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Verify that user lookups via `pwd.getpwuid(os.getuid())` work under the
|
||||
/// seatbelt sandbox. Prior to allowing the necessary mach‑lookup for
|
||||
/// OpenDirectory libinfo, this would fail with `KeyError: getpwuid(): uid not found`.
|
||||
#[tokio::test]
|
||||
async fn python_getpwuid_works_under_seatbelt() {
|
||||
if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
|
||||
eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
|
||||
return;
|
||||
}
|
||||
|
||||
// For local dev.
|
||||
if which::which("python3").is_err() {
|
||||
eprintln!("python3 not found in PATH, skipping test.");
|
||||
return;
|
||||
}
|
||||
|
||||
// ReadOnly is sufficient here since we are only exercising user lookup.
|
||||
let policy = SandboxPolicy::ReadOnly;
|
||||
let command_cwd = std::env::current_dir().expect("getcwd");
|
||||
let sandbox_cwd = command_cwd.clone();
|
||||
|
||||
let mut child = spawn_command_under_seatbelt(
|
||||
vec![
|
||||
"python3".to_string(),
|
||||
"-c".to_string(),
|
||||
// Print the passwd struct; success implies lookup worked.
|
||||
"import pwd, os; print(pwd.getpwuid(os.getuid()))".to_string(),
|
||||
],
|
||||
command_cwd,
|
||||
&policy,
|
||||
sandbox_cwd.as_path(),
|
||||
StdioPolicy::RedirectForShellTool,
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.expect("should be able to spawn python under seatbelt");
|
||||
|
||||
let status = child
|
||||
.wait()
|
||||
.await
|
||||
.expect("should be able to wait for child process");
|
||||
assert!(status.success(), "python exited with {status:?}");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn java_home_finds_runtime_under_seatbelt() {
|
||||
if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
|
||||
eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
|
||||
return;
|
||||
}
|
||||
|
||||
let java_home_path = Path::new("/usr/libexec/java_home");
|
||||
if !java_home_path.exists() {
|
||||
eprintln!("/usr/libexec/java_home is not present, skipping test.");
|
||||
return;
|
||||
}
|
||||
|
||||
let baseline_output = tokio::process::Command::new(java_home_path)
|
||||
.env_remove("JAVA_HOME")
|
||||
.output()
|
||||
.await
|
||||
.expect("should be able to invoke java_home outside seatbelt");
|
||||
if !baseline_output.status.success() {
|
||||
eprintln!(
|
||||
"java_home exited with {:?} outside seatbelt, skipping test",
|
||||
baseline_output.status
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let policy = SandboxPolicy::ReadOnly;
|
||||
let command_cwd = std::env::current_dir().expect("getcwd");
|
||||
let sandbox_cwd = command_cwd.clone();
|
||||
|
||||
let mut env: HashMap<String, String> = std::env::vars().collect();
|
||||
env.remove("JAVA_HOME");
|
||||
env.remove(CODEX_SANDBOX_ENV_VAR);
|
||||
|
||||
let child = spawn_command_under_seatbelt(
|
||||
vec![java_home_path.to_string_lossy().to_string()],
|
||||
command_cwd,
|
||||
&policy,
|
||||
sandbox_cwd.as_path(),
|
||||
StdioPolicy::RedirectForShellTool,
|
||||
env,
|
||||
)
|
||||
.await
|
||||
.expect("should be able to spawn java_home under seatbelt");
|
||||
|
||||
let output = child
|
||||
.wait_with_output()
|
||||
.await
|
||||
.expect("should be able to wait for java_home child");
|
||||
assert!(
|
||||
output.status.success(),
|
||||
"java_home under seatbelt exited with {:?}, stderr: {}",
|
||||
output.status,
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
assert!(
|
||||
!stdout.trim().is_empty(),
|
||||
"java_home stdout unexpectedly empty under seatbelt"
|
||||
);
|
||||
}
|
||||
|
||||
#[expect(clippy::expect_used)]
|
||||
fn create_test_scenario(tmp: &TempDir) -> TestScenario {
|
||||
let repo_parent = tmp.path().to_path_buf();
|
||||
let repo_root = repo_parent.join("repo");
|
||||
let dot_git_dir = repo_root.join(".git");
|
||||
|
||||
std::fs::create_dir(&repo_root).expect("should be able to create repo root");
|
||||
std::fs::create_dir(&dot_git_dir).expect("should be able to create .git dir");
|
||||
|
||||
TestScenario {
|
||||
file_outside_repo: repo_parent.join("outside.txt"),
|
||||
repo_parent,
|
||||
file_in_repo_root: repo_root.join("repo_file.txt"),
|
||||
repo_root,
|
||||
file_in_dot_git_dir: dot_git_dir.join("dot_git_file.txt"),
|
||||
}
|
||||
}
|
||||
|
||||
#[expect(clippy::expect_used)]
|
||||
/// Note that `path` must be absolute.
|
||||
async fn touch(path: &Path, policy: &SandboxPolicy) -> bool {
|
||||
assert!(path.is_absolute(), "Path must be absolute: {path:?}");
|
||||
let command_cwd = std::env::current_dir().expect("getcwd");
|
||||
let sandbox_cwd = command_cwd.clone();
|
||||
let mut child = spawn_command_under_seatbelt(
|
||||
vec![
|
||||
"/usr/bin/touch".to_string(),
|
||||
path.to_string_lossy().to_string(),
|
||||
],
|
||||
command_cwd,
|
||||
policy,
|
||||
sandbox_cwd.as_path(),
|
||||
StdioPolicy::RedirectForShellTool,
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.expect("should be able to spawn command under seatbelt");
|
||||
child
|
||||
.wait()
|
||||
.await
|
||||
.expect("should be able to wait for child process")
|
||||
.success()
|
||||
}
|
||||
966
llmx-rs/core/tests/suite/shell_serialization.rs
Normal file
966
llmx-rs/core/tests/suite/shell_serialization.rs
Normal file
@@ -0,0 +1,966 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::model_family::find_family_for_model;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::assert_regex_match;
|
||||
use core_test_support::responses::ev_apply_patch_function_call;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_custom_tool_call;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_local_shell_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use pretty_assertions::assert_eq;
|
||||
use regex_lite::Regex;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use std::fs;
|
||||
|
||||
const FIXTURE_JSON: &str = r#"{
|
||||
"description": "This is an example JSON file.",
|
||||
"foo": "bar",
|
||||
"isTest": true,
|
||||
"testNumber": 123,
|
||||
"testArray": [1, 2, 3],
|
||||
"testObject": {
|
||||
"foo": "bar"
|
||||
}
|
||||
}
|
||||
"#;
|
||||
|
||||
async fn submit_turn(test: &TestCodex, prompt: &str, sandbox_policy: SandboxPolicy) -> Result<()> {
|
||||
let session_model = test.session_configured.model.clone();
|
||||
|
||||
test.codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: prompt.into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: test.cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::TaskComplete(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn request_bodies(requests: &[wiremock::Request]) -> Result<Vec<Value>> {
|
||||
requests
|
||||
.iter()
|
||||
.map(|req| Ok(serde_json::from_slice::<Value>(&req.body)?))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn find_function_call_output<'a>(bodies: &'a [Value], call_id: &str) -> Option<&'a Value> {
|
||||
for body in bodies {
|
||||
if let Some(items) = body.get("input").and_then(Value::as_array) {
|
||||
for item in items {
|
||||
if item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
|
||||
{
|
||||
return Some(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn find_custom_tool_call_output<'a>(bodies: &'a [Value], call_id: &str) -> Option<&'a Value> {
|
||||
for body in bodies {
|
||||
if let Some(items) = body.get("input").and_then(Value::as_array) {
|
||||
for item in items {
|
||||
if item.get("type").and_then(Value::as_str) == Some("custom_tool_call_output")
|
||||
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
|
||||
{
|
||||
return Some(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.features.disable(Feature::ApplyPatchFreeform);
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family");
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-json";
|
||||
let args = json!({
|
||||
"command": ["/bin/echo", "shell json"],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"run the json shell command",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let requests = server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("recorded requests present");
|
||||
let bodies = request_bodies(&requests)?;
|
||||
let output_item = find_function_call_output(&bodies, call_id).expect("shell output present");
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("shell output string");
|
||||
|
||||
let mut parsed: Value = serde_json::from_str(output)?;
|
||||
if let Some(metadata) = parsed.get_mut("metadata").and_then(Value::as_object_mut) {
|
||||
// duration_seconds is non-deterministic; remove it for deep equality
|
||||
let _ = metadata.remove("duration_seconds");
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
parsed
|
||||
.get("metadata")
|
||||
.and_then(|metadata| metadata.get("exit_code"))
|
||||
.and_then(Value::as_i64),
|
||||
Some(0),
|
||||
"expected zero exit code in unformatted JSON output",
|
||||
);
|
||||
let stdout = parsed
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or_default();
|
||||
assert_regex_match(r"(?s)^shell json\n?$", stdout);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_is_structured_with_freeform_apply_patch() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::ApplyPatchFreeform);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-structured";
|
||||
let args = json!({
|
||||
"command": ["/bin/echo", "freeform shell"],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"run the structured shell command",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let requests = server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("recorded requests present");
|
||||
let bodies = request_bodies(&requests)?;
|
||||
let output_item =
|
||||
find_function_call_output(&bodies, call_id).expect("structured output present");
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("structured output string");
|
||||
|
||||
assert!(
|
||||
serde_json::from_str::<Value>(output).is_err(),
|
||||
"expected structured shell output to be plain text",
|
||||
);
|
||||
let expected_pattern = r"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Output:
|
||||
freeform shell
|
||||
?$";
|
||||
assert_regex_match(expected_pattern, output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_preserves_fixture_json_without_serialization() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.features.disable(Feature::ApplyPatchFreeform);
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family");
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let fixture_path = test.cwd.path().join("fixture.json");
|
||||
fs::write(&fixture_path, FIXTURE_JSON)?;
|
||||
let fixture_path_str = fixture_path.to_string_lossy().to_string();
|
||||
|
||||
let call_id = "shell-json-fixture";
|
||||
let args = json!({
|
||||
"command": ["/usr/bin/sed", "-n", "p", fixture_path_str],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"read the fixture JSON with sed",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let requests = server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("recorded requests present");
|
||||
let bodies = request_bodies(&requests)?;
|
||||
let output_item = find_function_call_output(&bodies, call_id).expect("shell output present");
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("shell output string");
|
||||
|
||||
let mut parsed: Value = serde_json::from_str(output)?;
|
||||
if let Some(metadata) = parsed.get_mut("metadata").and_then(Value::as_object_mut) {
|
||||
let _ = metadata.remove("duration_seconds");
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
parsed
|
||||
.get("metadata")
|
||||
.and_then(|metadata| metadata.get("exit_code"))
|
||||
.and_then(Value::as_i64),
|
||||
Some(0),
|
||||
"expected zero exit code when serialization is disabled",
|
||||
);
|
||||
let stdout = parsed
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
assert_eq!(
|
||||
stdout, FIXTURE_JSON,
|
||||
"expected shell output to match the fixture contents"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_structures_fixture_with_serialization() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::ApplyPatchFreeform);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let fixture_path = test.cwd.path().join("fixture.json");
|
||||
fs::write(&fixture_path, FIXTURE_JSON)?;
|
||||
let fixture_path_str = fixture_path.to_string_lossy().to_string();
|
||||
|
||||
let call_id = "shell-structured-fixture";
|
||||
let args = json!({
|
||||
"command": ["/usr/bin/sed", "-n", "p", fixture_path_str],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"read the fixture JSON with structured output",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let requests = server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("recorded requests present");
|
||||
let bodies = request_bodies(&requests)?;
|
||||
let output_item =
|
||||
find_function_call_output(&bodies, call_id).expect("structured output present");
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("structured output string");
|
||||
|
||||
assert!(
|
||||
serde_json::from_str::<Value>(output).is_err(),
|
||||
"expected structured output to be plain text"
|
||||
);
|
||||
let (header, body) = output
|
||||
.split_once("Output:\n")
|
||||
.expect("structured output contains an Output section");
|
||||
assert_regex_match(
|
||||
r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds$",
|
||||
header.trim_end(),
|
||||
);
|
||||
assert_eq!(
|
||||
body, FIXTURE_JSON,
|
||||
"expected Output section to include the fixture contents"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_for_freeform_tool_records_duration() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
let sleep_cmd = vec!["/bin/bash", "-c", "sleep 1"];
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
let sleep_cmd = vec!["/bin/bash", "-c", "sleep 1"];
|
||||
|
||||
#[cfg(windows)]
|
||||
let sleep_cmd = "timeout 1";
|
||||
|
||||
let call_id = "shell-structured";
|
||||
let args = json!({
|
||||
"command": sleep_cmd,
|
||||
"timeout_ms": 2_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"run the structured shell command",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let requests = server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("recorded requests present");
|
||||
let bodies = request_bodies(&requests)?;
|
||||
let output_item =
|
||||
find_function_call_output(&bodies, call_id).expect("structured output present");
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("structured output string");
|
||||
|
||||
let expected_pattern = r#"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Output:
|
||||
$"#;
|
||||
assert_regex_match(expected_pattern, output);
|
||||
|
||||
let wall_time_regex = Regex::new(r"(?m)^Wall (?:time|Clock): ([0-9]+(?:\.[0-9]+)?) seconds$")
|
||||
.expect("compile wall time regex");
|
||||
let wall_time_seconds = wall_time_regex
|
||||
.captures(output)
|
||||
.and_then(|caps| caps.get(1))
|
||||
.and_then(|value| value.as_str().parse::<f32>().ok())
|
||||
.expect("expected structured shell output to contain wall time seconds");
|
||||
assert!(
|
||||
wall_time_seconds > 0.5,
|
||||
"expected wall time to be greater than zero seconds, got {wall_time_seconds}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_reserializes_truncated_content() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5 is a model family");
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-truncated";
|
||||
let args = json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 400"],
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"run the truncation shell command",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let requests = server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("recorded requests present");
|
||||
let bodies = request_bodies(&requests)?;
|
||||
let output_item =
|
||||
find_function_call_output(&bodies, call_id).expect("truncated output present");
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("truncated output string");
|
||||
|
||||
assert!(
|
||||
serde_json::from_str::<Value>(output).is_err(),
|
||||
"expected truncated shell output to be plain text",
|
||||
);
|
||||
let truncated_pattern = r#"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Total output lines: 400
|
||||
Output:
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
.*
|
||||
\[\.{3} omitted \d+ of 400 lines \.{3}\]
|
||||
|
||||
.*
|
||||
396
|
||||
397
|
||||
398
|
||||
399
|
||||
400
|
||||
$"#;
|
||||
assert_regex_match(truncated_pattern, output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_custom_tool_output_is_structured() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "apply-patch-structured";
|
||||
let file_name = "structured.txt";
|
||||
let patch = format!(
|
||||
r#"*** Begin Patch
|
||||
*** Add File: {file_name}
|
||||
+from custom tool
|
||||
*** End Patch
|
||||
"#
|
||||
);
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_custom_tool_call(call_id, "apply_patch", &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"apply the patch via custom tool",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let requests = server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("recorded requests present");
|
||||
let bodies = request_bodies(&requests)?;
|
||||
let output_item =
|
||||
find_custom_tool_call_output(&bodies, call_id).expect("apply_patch output present");
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
|
||||
let expected_pattern = format!(
|
||||
r"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Output:
|
||||
Success. Updated the following files:
|
||||
A {file_name}
|
||||
?$"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_custom_tool_call_creates_file() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "apply-patch-add-file";
|
||||
let file_name = "custom_tool_apply_patch.txt";
|
||||
let patch = format!(
|
||||
"*** Begin Patch\n*** Add File: {file_name}\n+custom tool content\n*** End Patch\n"
|
||||
);
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_custom_tool_call(call_id, "apply_patch", &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "apply_patch done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"apply the patch via custom tool to create a file",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let requests = server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("recorded requests present");
|
||||
let bodies = request_bodies(&requests)?;
|
||||
let output_item =
|
||||
find_custom_tool_call_output(&bodies, call_id).expect("apply_patch output present");
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
|
||||
let expected_pattern = format!(
|
||||
r"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Output:
|
||||
Success. Updated the following files:
|
||||
A {file_name}
|
||||
?$"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, output);
|
||||
|
||||
let new_file_path = test.cwd.path().join(file_name);
|
||||
let created_contents = fs::read_to_string(&new_file_path)?;
|
||||
assert_eq!(
|
||||
created_contents, "custom tool content\n",
|
||||
"expected file contents for {file_name}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_custom_tool_call_updates_existing_file() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "apply-patch-update-file";
|
||||
let file_name = "custom_tool_apply_patch_existing.txt";
|
||||
let file_path = test.cwd.path().join(file_name);
|
||||
fs::write(&file_path, "before\n")?;
|
||||
let patch = format!(
|
||||
"*** Begin Patch\n*** Update File: {file_name}\n@@\n-before\n+after\n*** End Patch\n"
|
||||
);
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_custom_tool_call(call_id, "apply_patch", &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "apply_patch update done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"apply the patch via custom tool to update a file",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let requests = server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("recorded requests present");
|
||||
let bodies = request_bodies(&requests)?;
|
||||
let output_item =
|
||||
find_custom_tool_call_output(&bodies, call_id).expect("apply_patch output present");
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
|
||||
let expected_pattern = format!(
|
||||
r"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Output:
|
||||
Success. Updated the following files:
|
||||
M {file_name}
|
||||
?$"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, output);
|
||||
|
||||
let updated_contents = fs::read_to_string(file_path)?;
|
||||
assert_eq!(updated_contents, "after\n", "expected updated file content");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_custom_tool_call_reports_failure_output() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "apply-patch-failure";
|
||||
let missing_file = "missing_custom_tool_apply_patch.txt";
|
||||
let patch = format!(
|
||||
"*** Begin Patch\n*** Update File: {missing_file}\n@@\n-before\n+after\n*** End Patch\n"
|
||||
);
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_custom_tool_call(call_id, "apply_patch", &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "apply_patch failure done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"attempt a failing apply_patch via custom tool",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let requests = server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("recorded requests present");
|
||||
let bodies = request_bodies(&requests)?;
|
||||
let output_item =
|
||||
find_custom_tool_call_output(&bodies, call_id).expect("apply_patch output present");
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
|
||||
let expected_output = format!(
|
||||
"apply_patch verification failed: Failed to read file to update {}/{missing_file}: No such file or directory (os error 2)",
|
||||
test.cwd.path().to_string_lossy()
|
||||
);
|
||||
assert_eq!(output, expected_output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_function_call_output_is_structured() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "apply-patch-function";
|
||||
let file_name = "function_apply_patch.txt";
|
||||
let patch =
|
||||
format!("*** Begin Patch\n*** Add File: {file_name}\n+via function call\n*** End Patch\n");
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_apply_patch_function_call(call_id, &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "apply_patch function done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"apply the patch via function-call apply_patch",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let requests = server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("recorded requests present");
|
||||
let bodies = request_bodies(&requests)?;
|
||||
let output_item =
|
||||
find_function_call_output(&bodies, call_id).expect("apply_patch function output present");
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
|
||||
let expected_pattern = format!(
|
||||
r"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Output:
|
||||
Success. Updated the following files:
|
||||
A {file_name}
|
||||
?$"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_is_structured_for_nonzero_exit() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-nonzero-exit";
|
||||
let args = json!({
|
||||
"command": ["/bin/sh", "-c", "exit 42"],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "shell failure handled"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"run the failing shell command",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let requests = server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("recorded requests present");
|
||||
let bodies = request_bodies(&requests)?;
|
||||
let output_item = find_function_call_output(&bodies, call_id).expect("shell output present");
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("shell output string");
|
||||
|
||||
let expected_pattern = r"(?s)^Exit code: 42
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Output:
|
||||
?$";
|
||||
assert_regex_match(expected_pattern, output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn local_shell_call_output_is_structured() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "local-shell-call";
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_local_shell_call(call_id, "completed", vec!["/bin/echo", "local shell"]),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "local shell done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"run the local shell command",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let requests = server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("recorded requests present");
|
||||
let bodies = request_bodies(&requests)?;
|
||||
let output_item =
|
||||
find_function_call_output(&bodies, call_id).expect("local shell output present");
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("local shell output string");
|
||||
|
||||
let expected_pattern = r"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Output:
|
||||
local shell
|
||||
?$";
|
||||
assert_regex_match(expected_pattern, output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
114
llmx-rs/core/tests/suite/stream_error_allows_next_turn.rs
Normal file
114
llmx-rs/core/tests/suite/stream_error_allows_next_turn.rs
Normal file
@@ -0,0 +1,114 @@
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::WireApi;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::load_sse_fixture_with_id;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::body_string_contains;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
fn sse_completed(id: &str) -> String {
|
||||
load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn continue_after_stream_error() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let server = MockServer::start().await;
|
||||
|
||||
let fail = ResponseTemplate::new(500)
|
||||
.insert_header("content-type", "application/json")
|
||||
.set_body_string(
|
||||
serde_json::json!({
|
||||
"error": {"type": "bad_request", "message": "synthetic client error"}
|
||||
})
|
||||
.to_string(),
|
||||
);
|
||||
|
||||
// The provider below disables request retries (request_max_retries = 0),
|
||||
// so the failing request should only occur once.
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.and(body_string_contains("first message"))
|
||||
.respond_with(fail)
|
||||
.up_to_n_times(2)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let ok = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse_completed("resp_ok2"), "text/event-stream");
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.and(body_string_contains("follow up"))
|
||||
.respond_with(ok)
|
||||
.expect(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
// Configure a provider that uses the Responses API and points at our mock
|
||||
// server. Use an existing env var (PATH) to satisfy the auth plumbing
|
||||
// without requiring a real secret.
|
||||
let provider = ModelProviderInfo {
|
||||
name: "mock-openai".into(),
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
env_key: Some("PATH".into()),
|
||||
env_key_instructions: None,
|
||||
experimental_bearer_token: None,
|
||||
wire_api: WireApi::Responses,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: Some(1),
|
||||
stream_max_retries: Some(1),
|
||||
stream_idle_timeout_ms: Some(2_000),
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.base_instructions = Some("You are a helpful assistant".to_string());
|
||||
config.model_provider = provider;
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "first message".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Expect an Error followed by TaskComplete so the session is released.
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::Error(_))).await;
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// 2) Second turn: now send another prompt that should succeed using the
|
||||
// mock server SSE stream. If the agent failed to clear the running task on
|
||||
// error above, this submission would be rejected/queued indefinitely.
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "follow up".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
}
|
||||
105
llmx-rs/core/tests/suite/stream_no_completed.rs
Normal file
105
llmx-rs/core/tests/suite/stream_no_completed.rs
Normal file
@@ -0,0 +1,105 @@
|
||||
//! Verifies that the agent retries when the SSE stream terminates before
|
||||
//! delivering a `response.completed` event.
|
||||
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::WireApi;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::load_sse_fixture;
|
||||
use core_test_support::load_sse_fixture_with_id;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::Request;
|
||||
use wiremock::Respond;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
fn sse_incomplete() -> String {
|
||||
load_sse_fixture("tests/fixtures/incomplete_sse.json")
|
||||
}
|
||||
|
||||
fn sse_completed(id: &str) -> String {
|
||||
load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn retries_on_early_close() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let server = MockServer::start().await;
|
||||
|
||||
struct SeqResponder;
|
||||
impl Respond for SeqResponder {
|
||||
fn respond(&self, _: &Request) -> ResponseTemplate {
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::atomic::Ordering;
|
||||
static CALLS: AtomicUsize = AtomicUsize::new(0);
|
||||
let n = CALLS.fetch_add(1, Ordering::SeqCst);
|
||||
if n == 0 {
|
||||
ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse_incomplete(), "text/event-stream")
|
||||
} else {
|
||||
ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse_completed("resp_ok"), "text/event-stream")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(SeqResponder {})
|
||||
.expect(2)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
// Configure retry behavior explicitly to avoid mutating process-wide
|
||||
// environment variables.
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
name: "openai".into(),
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
// Environment variable that should exist in the test environment.
|
||||
// ModelClient will return an error if the environment variable for the
|
||||
// provider is not set.
|
||||
env_key: Some("PATH".into()),
|
||||
env_key_instructions: None,
|
||||
experimental_bearer_token: None,
|
||||
wire_api: WireApi::Responses,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
// exercise retry path: first attempt yields incomplete stream, so allow 1 retry
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(1),
|
||||
stream_idle_timeout_ms: Some(2000),
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.model_provider = model_provider;
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Wait until TaskComplete (should succeed after retry).
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
}
|
||||
469
llmx-rs/core/tests/suite/tool_harness.rs
Normal file
469
llmx-rs/core/tests/suite/tool_harness.rs
Normal file
@@ -0,0 +1,469 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
|
||||
use std::fs;
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::model_family::find_family_for_model;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::plan_tool::StepStatus;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::assert_regex_match;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::ev_apply_patch_function_call;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_local_shell_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use wiremock::matchers::any;
|
||||
|
||||
fn extract_output_text(item: &Value) -> Option<&str> {
|
||||
item.get("output").and_then(|value| match value {
|
||||
Value::String(text) => Some(text.as_str()),
|
||||
Value::Object(obj) => obj.get("content").and_then(Value::as_str),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_tool_executes_command_and_streams_output() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a valid model");
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-tool-call";
|
||||
let command = vec!["/bin/echo", "tool harness"];
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_local_shell_call(call_id, "completed", command),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "all done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let second_mock = responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please run the shell command".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let output_item = req.function_call_output(call_id);
|
||||
let output_text = extract_output_text(&output_item).expect("output text present");
|
||||
let exec_output: Value = serde_json::from_str(output_text)?;
|
||||
assert_eq!(exec_output["metadata"]["exit_code"], 0);
|
||||
let stdout = exec_output["output"].as_str().expect("stdout field");
|
||||
assert_regex_match(r"(?s)^tool harness\n?$", stdout);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn update_plan_tool_emits_plan_update_event() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex();
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let call_id = "plan-tool-call";
|
||||
let plan_args = json!({
|
||||
"explanation": "Tool harness check",
|
||||
"plan": [
|
||||
{"step": "Inspect workspace", "status": "in_progress"},
|
||||
{"step": "Report results", "status": "pending"},
|
||||
],
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "update_plan", &plan_args),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "plan acknowledged"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let second_mock = responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please update the plan".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let mut saw_plan_update = false;
|
||||
wait_for_event(&codex, |event| match event {
|
||||
EventMsg::PlanUpdate(update) => {
|
||||
saw_plan_update = true;
|
||||
assert_eq!(update.explanation.as_deref(), Some("Tool harness check"));
|
||||
assert_eq!(update.plan.len(), 2);
|
||||
assert_eq!(update.plan[0].step, "Inspect workspace");
|
||||
assert_matches!(update.plan[0].status, StepStatus::InProgress);
|
||||
assert_eq!(update.plan[1].step, "Report results");
|
||||
assert_matches!(update.plan[1].status, StepStatus::Pending);
|
||||
false
|
||||
}
|
||||
EventMsg::TaskComplete(_) => true,
|
||||
_ => false,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert!(saw_plan_update, "expected PlanUpdate event");
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let output_item = req.function_call_output(call_id);
|
||||
assert_eq!(
|
||||
output_item.get("call_id").and_then(Value::as_str),
|
||||
Some(call_id)
|
||||
);
|
||||
let output_text = extract_output_text(&output_item).expect("output text present");
|
||||
assert_eq!(output_text, "Plan updated");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn update_plan_tool_rejects_malformed_payload() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex();
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let call_id = "plan-tool-invalid";
|
||||
let invalid_args = json!({
|
||||
"explanation": "Missing plan data"
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "update_plan", &invalid_args),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "malformed plan payload"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let second_mock = responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please update the plan".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let mut saw_plan_update = false;
|
||||
wait_for_event(&codex, |event| match event {
|
||||
EventMsg::PlanUpdate(_) => {
|
||||
saw_plan_update = true;
|
||||
false
|
||||
}
|
||||
EventMsg::TaskComplete(_) => true,
|
||||
_ => false,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert!(
|
||||
!saw_plan_update,
|
||||
"did not expect PlanUpdate event for malformed payload"
|
||||
);
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let output_item = req.function_call_output(call_id);
|
||||
assert_eq!(
|
||||
output_item.get("call_id").and_then(Value::as_str),
|
||||
Some(call_id)
|
||||
);
|
||||
let output_text = extract_output_text(&output_item).expect("output text present");
|
||||
assert!(
|
||||
output_text.contains("failed to parse function arguments"),
|
||||
"expected parse error message in output text, got {output_text:?}"
|
||||
);
|
||||
if let Some(success_flag) = output_item
|
||||
.get("output")
|
||||
.and_then(|value| value.as_object())
|
||||
.and_then(|obj| obj.get("success"))
|
||||
.and_then(serde_json::Value::as_bool)
|
||||
{
|
||||
assert!(
|
||||
!success_flag,
|
||||
"expected tool output to mark success=false for malformed payload"
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_tool_executes_and_emits_patch_events() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::ApplyPatchFreeform);
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let file_name = "notes.txt";
|
||||
let file_path = cwd.path().join(file_name);
|
||||
let call_id = "apply-patch-call";
|
||||
let patch_content = format!(
|
||||
r#"*** Begin Patch
|
||||
*** Add File: {file_name}
|
||||
+Tool harness apply patch
|
||||
*** End Patch"#
|
||||
);
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_apply_patch_function_call(call_id, &patch_content),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "patch complete"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let second_mock = responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please apply a patch".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let mut saw_patch_begin = false;
|
||||
let mut patch_end_success = None;
|
||||
wait_for_event(&codex, |event| match event {
|
||||
EventMsg::PatchApplyBegin(begin) => {
|
||||
saw_patch_begin = true;
|
||||
assert_eq!(begin.call_id, call_id);
|
||||
false
|
||||
}
|
||||
EventMsg::PatchApplyEnd(end) => {
|
||||
assert_eq!(end.call_id, call_id);
|
||||
patch_end_success = Some(end.success);
|
||||
false
|
||||
}
|
||||
EventMsg::TaskComplete(_) => true,
|
||||
_ => false,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert!(saw_patch_begin, "expected PatchApplyBegin event");
|
||||
let patch_end_success =
|
||||
patch_end_success.expect("expected PatchApplyEnd event to capture success flag");
|
||||
assert!(patch_end_success);
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let output_item = req.function_call_output(call_id);
|
||||
assert_eq!(
|
||||
output_item.get("call_id").and_then(Value::as_str),
|
||||
Some(call_id)
|
||||
);
|
||||
let output_text = extract_output_text(&output_item).expect("output text present");
|
||||
|
||||
let expected_pattern = format!(
|
||||
r"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Output:
|
||||
Success. Updated the following files:
|
||||
A {file_name}
|
||||
?$"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, output_text);
|
||||
|
||||
let updated_contents = fs::read_to_string(file_path)?;
|
||||
assert_eq!(
|
||||
updated_contents, "Tool harness apply patch\n",
|
||||
"expected updated file content"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_reports_parse_diagnostics() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::ApplyPatchFreeform);
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let call_id = "apply-patch-parse-error";
|
||||
let patch_content = r"*** Begin Patch
|
||||
*** Update File: broken.txt
|
||||
*** End Patch";
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_apply_patch_function_call(call_id, patch_content),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "failed"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let second_mock = responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please apply a patch".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let output_item = req.function_call_output(call_id);
|
||||
assert_eq!(
|
||||
output_item.get("call_id").and_then(Value::as_str),
|
||||
Some(call_id)
|
||||
);
|
||||
let output_text = extract_output_text(&output_item).expect("output text present");
|
||||
|
||||
assert!(
|
||||
output_text.contains("apply_patch verification failed"),
|
||||
"expected apply_patch verification failure message, got {output_text:?}"
|
||||
);
|
||||
assert!(
|
||||
output_text.contains("invalid hunk"),
|
||||
"expected parse diagnostics in output text, got {output_text:?}"
|
||||
);
|
||||
|
||||
if let Some(success_flag) = output_item
|
||||
.get("output")
|
||||
.and_then(|value| value.as_object())
|
||||
.and_then(|obj| obj.get("success"))
|
||||
.and_then(serde_json::Value::as_bool)
|
||||
{
|
||||
assert!(
|
||||
!success_flag,
|
||||
"expected tool output to mark success=false for parse failures"
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
206
llmx-rs/core/tests/suite/tool_parallelism.rs
Normal file
206
llmx-rs/core/tests/suite/tool_parallelism.rs
Normal file
@@ -0,0 +1,206 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
#![allow(clippy::unwrap_used)]
|
||||
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
use codex_core::model_family::find_family_for_model;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use serde_json::json;
|
||||
|
||||
async fn run_turn(test: &TestCodex, prompt: &str) -> anyhow::Result<()> {
|
||||
let session_model = test.session_configured.model.clone();
|
||||
|
||||
test.codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: prompt.into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: test.cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn run_turn_and_measure(test: &TestCodex, prompt: &str) -> anyhow::Result<Duration> {
|
||||
let start = Instant::now();
|
||||
run_turn(test, prompt).await?;
|
||||
Ok(start.elapsed())
|
||||
}
|
||||
|
||||
#[allow(clippy::expect_used)]
|
||||
async fn build_codex_with_test_tool(server: &wiremock::MockServer) -> anyhow::Result<TestCodex> {
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "test-gpt-5-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("test-gpt-5-codex").expect("test-gpt-5-codex model family");
|
||||
});
|
||||
builder.build(server).await
|
||||
}
|
||||
|
||||
fn assert_parallel_duration(actual: Duration) {
|
||||
// Allow headroom for runtime overhead while still differentiating from serial execution.
|
||||
assert!(
|
||||
actual < Duration::from_millis(750),
|
||||
"expected parallel execution to finish quickly, got {actual:?}"
|
||||
);
|
||||
}
|
||||
|
||||
fn assert_serial_duration(actual: Duration) {
|
||||
assert!(
|
||||
actual >= Duration::from_millis(500),
|
||||
"expected serial execution to take longer, got {actual:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn read_file_tools_run_in_parallel() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let test = build_codex_with_test_tool(&server).await?;
|
||||
|
||||
let warmup_args = json!({
|
||||
"sleep_after_ms": 10,
|
||||
"barrier": {
|
||||
"id": "parallel-test-sync-warmup",
|
||||
"participants": 2,
|
||||
"timeout_ms": 1_000,
|
||||
}
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let parallel_args = json!({
|
||||
"sleep_after_ms": 300,
|
||||
"barrier": {
|
||||
"id": "parallel-test-sync",
|
||||
"participants": 2,
|
||||
"timeout_ms": 1_000,
|
||||
}
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let warmup_first = sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-warm-1"}}),
|
||||
ev_function_call("warm-call-1", "test_sync_tool", &warmup_args),
|
||||
ev_function_call("warm-call-2", "test_sync_tool", &warmup_args),
|
||||
ev_completed("resp-warm-1"),
|
||||
]);
|
||||
let warmup_second = sse(vec![
|
||||
ev_assistant_message("warm-msg-1", "warmup complete"),
|
||||
ev_completed("resp-warm-2"),
|
||||
]);
|
||||
|
||||
let first_response = sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call("call-1", "test_sync_tool", ¶llel_args),
|
||||
ev_function_call("call-2", "test_sync_tool", ¶llel_args),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
mount_sse_sequence(
|
||||
&server,
|
||||
vec![warmup_first, warmup_second, first_response, second_response],
|
||||
)
|
||||
.await;
|
||||
|
||||
run_turn(&test, "warm up parallel tool").await?;
|
||||
|
||||
let duration = run_turn_and_measure(&test, "exercise sync tool").await?;
|
||||
assert_parallel_duration(duration);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn non_parallel_tools_run_serially() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let test = test_codex().build(&server).await?;
|
||||
|
||||
let shell_args = json!({
|
||||
"command": ["/bin/sh", "-c", "sleep 0.3"],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let args_one = serde_json::to_string(&shell_args)?;
|
||||
let args_two = serde_json::to_string(&shell_args)?;
|
||||
|
||||
let first_response = sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call("call-1", "shell", &args_one),
|
||||
ev_function_call("call-2", "shell", &args_two),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
mount_sse_sequence(&server, vec![first_response, second_response]).await;
|
||||
|
||||
let duration = run_turn_and_measure(&test, "run shell twice").await?;
|
||||
assert_serial_duration(duration);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn mixed_tools_fall_back_to_serial() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let test = build_codex_with_test_tool(&server).await?;
|
||||
|
||||
let sync_args = json!({
|
||||
"sleep_after_ms": 300
|
||||
})
|
||||
.to_string();
|
||||
let shell_args = serde_json::to_string(&json!({
|
||||
"command": ["/bin/sh", "-c", "sleep 0.3"],
|
||||
"timeout_ms": 1_000,
|
||||
}))?;
|
||||
|
||||
let first_response = sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call("call-1", "test_sync_tool", &sync_args),
|
||||
ev_function_call("call-2", "shell", &shell_args),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
mount_sse_sequence(&server, vec![first_response, second_response]).await;
|
||||
|
||||
let duration = run_turn_and_measure(&test, "mix tools").await?;
|
||||
assert_serial_duration(duration);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
537
llmx-rs/core/tests/suite/tools.rs
Normal file
537
llmx-rs/core/tests/suite/tools.rs
Normal file
@@ -0,0 +1,537 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::model_family::find_family_for_model;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::assert_regex_match;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_custom_tool_call;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_once;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use regex_lite::Regex;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
|
||||
async fn submit_turn(
|
||||
test: &TestCodex,
|
||||
prompt: &str,
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: SandboxPolicy,
|
||||
) -> Result<()> {
|
||||
let session_model = test.session_configured.model.clone();
|
||||
|
||||
test.codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: prompt.into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: test.cwd.path().to_path_buf(),
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::TaskComplete(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn tool_names(body: &Value) -> Vec<String> {
|
||||
body.get("tools")
|
||||
.and_then(Value::as_array)
|
||||
.map(|tools| {
|
||||
tools
|
||||
.iter()
|
||||
.filter_map(|tool| {
|
||||
tool.get("name")
|
||||
.or_else(|| tool.get("type"))
|
||||
.and_then(Value::as_str)
|
||||
.map(str::to_string)
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn custom_tool_unknown_returns_custom_output_error() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex();
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "custom-unsupported";
|
||||
let tool_name = "unsupported_tool";
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_custom_tool_call(call_id, tool_name, "\"payload\""),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"invoke custom tool",
|
||||
AskForApproval::Never,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let item = mock.single_request().custom_tool_call_output(call_id);
|
||||
let output = item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or_default();
|
||||
let expected = format!("unsupported custom tool call: {tool_name}");
|
||||
assert_eq!(output, expected);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a valid model");
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let command = ["/bin/echo", "shell ok"];
|
||||
let call_id_blocked = "shell-blocked";
|
||||
let call_id_success = "shell-success";
|
||||
|
||||
let first_args = json!({
|
||||
"command": command,
|
||||
"timeout_ms": 1_000,
|
||||
"with_escalated_permissions": true,
|
||||
});
|
||||
let second_args = json!({
|
||||
"command": command,
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(
|
||||
call_id_blocked,
|
||||
"shell",
|
||||
&serde_json::to_string(&first_args)?,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let second_mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_function_call(
|
||||
call_id_success,
|
||||
"shell",
|
||||
&serde_json::to_string(&second_args)?,
|
||||
),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let third_mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-3"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"run the shell command",
|
||||
AskForApproval::Never,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let policy = AskForApproval::Never;
|
||||
let expected_message = format!(
|
||||
"approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}"
|
||||
);
|
||||
|
||||
let blocked_item = second_mock
|
||||
.single_request()
|
||||
.function_call_output(call_id_blocked);
|
||||
assert_eq!(
|
||||
blocked_item.get("output").and_then(Value::as_str),
|
||||
Some(expected_message.as_str()),
|
||||
"unexpected rejection message"
|
||||
);
|
||||
|
||||
let success_item = third_mock
|
||||
.single_request()
|
||||
.function_call_output(call_id_success);
|
||||
let output_json: Value = serde_json::from_str(
|
||||
success_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("success output string"),
|
||||
)?;
|
||||
assert_eq!(
|
||||
output_json["metadata"]["exit_code"].as_i64(),
|
||||
Some(0),
|
||||
"expected exit code 0 after rerunning without escalation",
|
||||
);
|
||||
let stdout = output_json["output"].as_str().unwrap_or_default();
|
||||
let stdout_pattern = r"(?s)^shell ok\n?$";
|
||||
assert_regex_match(stdout_pattern, stdout);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn sandbox_denied_shell_returns_original_output() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
let call_id = "sandbox-denied-shell";
|
||||
let target_path = fixture.workspace_path("sandbox-denied.txt");
|
||||
let sentinel = "sandbox-denied sentinel output";
|
||||
let command = vec![
|
||||
"/bin/sh".to_string(),
|
||||
"-c".to_string(),
|
||||
format!(
|
||||
"printf {sentinel:?}; printf {content:?} > {path:?}",
|
||||
sentinel = format!("{sentinel}\n"),
|
||||
content = "sandbox denied",
|
||||
path = &target_path
|
||||
),
|
||||
];
|
||||
let args = json!({
|
||||
"command": command,
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
fixture
|
||||
.submit_turn_with_policy(
|
||||
"run a command that should be denied by the read-only sandbox",
|
||||
SandboxPolicy::ReadOnly,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let output_text = mock
|
||||
.function_call_output_text(call_id)
|
||||
.context("shell output present")?;
|
||||
let exit_code_line = output_text
|
||||
.lines()
|
||||
.next()
|
||||
.context("exit code line present")?;
|
||||
let exit_code = exit_code_line
|
||||
.strip_prefix("Exit code: ")
|
||||
.context("exit code prefix present")?
|
||||
.trim()
|
||||
.parse::<i32>()
|
||||
.context("exit code is integer")?;
|
||||
let body = output_text;
|
||||
|
||||
let body_lower = body.to_lowercase();
|
||||
// Required for multi-OS.
|
||||
let has_denial = body_lower.contains("permission denied")
|
||||
|| body_lower.contains("operation not permitted")
|
||||
|| body_lower.contains("read-only file system");
|
||||
assert!(
|
||||
has_denial,
|
||||
"expected sandbox denial details in tool output: {body}"
|
||||
);
|
||||
assert!(
|
||||
body.contains(sentinel),
|
||||
"expected sentinel output from command to reach the model: {body}"
|
||||
);
|
||||
let target_path_str = target_path
|
||||
.to_str()
|
||||
.context("target path string representation")?;
|
||||
assert!(
|
||||
body.contains(target_path_str),
|
||||
"expected sandbox error to mention denied path: {body}"
|
||||
);
|
||||
assert!(
|
||||
!body_lower.contains("failed in sandbox"),
|
||||
"expected original tool output, found fallback message: {body}"
|
||||
);
|
||||
assert_ne!(
|
||||
exit_code, 0,
|
||||
"sandbox denial should surface a non-zero exit code"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn collect_tools(use_unified_exec: bool) -> Result<Vec<String>> {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let responses = vec![sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-1"),
|
||||
])];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
if use_unified_exec {
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
} else {
|
||||
config.features.disable(Feature::UnifiedExec);
|
||||
}
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"list tools",
|
||||
AskForApproval::Never,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let first_body = mock.single_request().body_json();
|
||||
Ok(tool_names(&first_body))
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn unified_exec_spec_toggle_end_to_end() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let tools_disabled = collect_tools(false).await?;
|
||||
assert!(
|
||||
!tools_disabled.iter().any(|name| name == "exec_command"),
|
||||
"tools list should not include exec_command when disabled: {tools_disabled:?}"
|
||||
);
|
||||
assert!(
|
||||
!tools_disabled.iter().any(|name| name == "write_stdin"),
|
||||
"tools list should not include write_stdin when disabled: {tools_disabled:?}"
|
||||
);
|
||||
|
||||
let tools_enabled = collect_tools(true).await?;
|
||||
assert!(
|
||||
tools_enabled.iter().any(|name| name == "exec_command"),
|
||||
"tools list should include exec_command when enabled: {tools_enabled:?}"
|
||||
);
|
||||
assert!(
|
||||
tools_enabled.iter().any(|name| name == "write_stdin"),
|
||||
"tools list should include write_stdin when enabled: {tools_enabled:?}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a valid model");
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-timeout";
|
||||
let timeout_ms = 50u64;
|
||||
let args = json!({
|
||||
"command": ["/bin/sh", "-c", "yes line | head -n 400; sleep 1"],
|
||||
"timeout_ms": timeout_ms,
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let second_mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"run a long command",
|
||||
AskForApproval::Never,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let timeout_item = second_mock.single_request().function_call_output(call_id);
|
||||
|
||||
let output_str = timeout_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("timeout output string");
|
||||
|
||||
// The exec path can report a timeout in two ways depending on timing:
|
||||
// 1) Structured JSON with exit_code 124 and a timeout prefix (preferred), or
|
||||
// 2) A plain error string if the child is observed as killed by a signal first.
|
||||
if let Ok(output_json) = serde_json::from_str::<Value>(output_str) {
|
||||
assert_eq!(
|
||||
output_json["metadata"]["exit_code"].as_i64(),
|
||||
Some(124),
|
||||
"expected timeout exit code 124",
|
||||
);
|
||||
|
||||
let stdout = output_json["output"].as_str().unwrap_or_default();
|
||||
assert!(
|
||||
stdout.contains("command timed out"),
|
||||
"timeout output missing `command timed out`: {stdout}"
|
||||
);
|
||||
} else {
|
||||
// Fallback: accept the signal classification path to deflake the test.
|
||||
let signal_pattern = r"(?is)^execution error:.*signal.*$";
|
||||
assert_regex_match(signal_pattern, output_str);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_spawn_failure_truncates_exec_error() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|cfg| {
|
||||
cfg.sandbox_policy = SandboxPolicy::DangerFullAccess;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-spawn-failure";
|
||||
let bogus_component = "missing-bin-".repeat(700);
|
||||
let bogus_exe = test
|
||||
.cwd
|
||||
.path()
|
||||
.join(bogus_component)
|
||||
.to_string_lossy()
|
||||
.into_owned();
|
||||
|
||||
let args = json!({
|
||||
"command": [bogus_exe],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let second_mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"spawn a missing binary",
|
||||
AskForApproval::Never,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let failure_item = second_mock.single_request().function_call_output(call_id);
|
||||
|
||||
let output = failure_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("spawn failure output string");
|
||||
|
||||
let spawn_error_pattern = r#"(?s)^Exit code: -?\d+
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Output:
|
||||
execution error: .*$"#;
|
||||
let spawn_truncated_pattern = r#"(?s)^Exit code: -?\d+
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Total output lines: \d+
|
||||
Output:
|
||||
|
||||
execution error: .*$"#;
|
||||
let spawn_error_regex = Regex::new(spawn_error_pattern)?;
|
||||
let spawn_truncated_regex = Regex::new(spawn_truncated_pattern)?;
|
||||
if !spawn_error_regex.is_match(output) && !spawn_truncated_regex.is_match(output) {
|
||||
let fallback_pattern = r"(?s)^execution error: .*$";
|
||||
assert_regex_match(fallback_pattern, output);
|
||||
}
|
||||
assert!(output.len() <= 10 * 1024);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
382
llmx-rs/core/tests/suite/truncation.rs
Normal file
382
llmx-rs/core/tests/suite/truncation.rs
Normal file
@@ -0,0 +1,382 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use codex_core::config::types::McpServerConfig;
|
||||
use codex_core::config::types::McpServerTransportConfig;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::model_family::find_family_for_model;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::assert_regex_match;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_once_match;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use escargot::CargoBuild;
|
||||
use regex_lite::Regex;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use std::collections::HashMap;
|
||||
use std::time::Duration;
|
||||
use wiremock::matchers::any;
|
||||
|
||||
// Verifies byte-truncation formatting for function error output (RespondToModel errors)
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
// Use the test model that wires function tools like grep_files
|
||||
config.model = "test-gpt-5-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("test-gpt-5-codex").expect("model family for test model");
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
// Construct a very long, non-existent path to force a RespondToModel error with a large message
|
||||
let long_path = "a".repeat(20_000);
|
||||
let call_id = "grep-huge-error";
|
||||
let args = json!({
|
||||
"pattern": "alpha",
|
||||
"path": long_path,
|
||||
"limit": 10
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "grep_files", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"trigger grep_files with long path to test truncation",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let output = mock
|
||||
.function_call_output_text(call_id)
|
||||
.context("function error output present")?;
|
||||
|
||||
tracing::debug!(output = %output, "truncated function error output");
|
||||
|
||||
// Expect plaintext with byte-truncation marker and no omitted-lines marker
|
||||
assert!(
|
||||
serde_json::from_str::<serde_json::Value>(&output).is_err(),
|
||||
"expected error output to be plain text",
|
||||
);
|
||||
let truncated_pattern = r#"(?s)^Total output lines: 1\s+.*\[\.\.\. output truncated to fit 10240 bytes \.\.\.\]\s*$"#;
|
||||
assert_regex_match(truncated_pattern, &output);
|
||||
assert!(
|
||||
!output.contains("omitted"),
|
||||
"line omission marker should not appear when no lines were dropped: {output}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Verifies that a standard tool call (shell) exceeding the model formatting
|
||||
// limits is truncated before being sent back to the model.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
// Use a model that exposes the generic shell tool.
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-too-large";
|
||||
let args = serde_json::json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 400"],
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
|
||||
// First response: model tells us to run the tool; second: complete the turn.
|
||||
mount_sse_once_match(
|
||||
&server,
|
||||
any(),
|
||||
sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
responses::ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let mock2 = mount_sse_once_match(
|
||||
&server,
|
||||
any(),
|
||||
sse(vec![
|
||||
responses::ev_assistant_message("msg-1", "done"),
|
||||
responses::ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
fixture
|
||||
.submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess)
|
||||
.await?;
|
||||
|
||||
// Inspect what we sent back to the model; it should contain a truncated
|
||||
// function_call_output for the shell call.
|
||||
let output = mock2
|
||||
.single_request()
|
||||
.function_call_output_text(call_id)
|
||||
.context("function_call_output present for shell call")?;
|
||||
|
||||
// Expect plain text (not JSON) with truncation markers and line elision.
|
||||
assert!(
|
||||
serde_json::from_str::<Value>(&output).is_err(),
|
||||
"expected truncated shell output to be plain text"
|
||||
);
|
||||
let truncated_pattern = r#"(?s)^Exit code: 0
|
||||
Wall time: .* seconds
|
||||
Total output lines: 400
|
||||
Output:
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
.*
|
||||
\[\.{3} omitted 144 of 400 lines \.{3}\]
|
||||
|
||||
.*
|
||||
396
|
||||
397
|
||||
398
|
||||
399
|
||||
400
|
||||
$"#;
|
||||
assert_regex_match(truncated_pattern, &output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Verifies that an MCP tool call result exceeding the model formatting limits
|
||||
// is truncated before being sent back to the model.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
||||
async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let call_id = "rmcp-truncated";
|
||||
let server_name = "rmcp";
|
||||
let tool_name = format!("mcp__{server_name}__echo");
|
||||
|
||||
// Build a very large message to exceed 10KiB once serialized.
|
||||
let large_msg = "long-message-with-newlines-".repeat(600);
|
||||
let args_json = serde_json::json!({ "message": large_msg });
|
||||
|
||||
mount_sse_once_match(
|
||||
&server,
|
||||
any(),
|
||||
sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_function_call(call_id, &tool_name, &args_json.to_string()),
|
||||
responses::ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let mock2 = mount_sse_once_match(
|
||||
&server,
|
||||
any(),
|
||||
sse(vec![
|
||||
responses::ev_assistant_message("msg-1", "rmcp echo tool completed."),
|
||||
responses::ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Compile the rmcp stdio test server and configure it.
|
||||
let rmcp_test_server_bin = CargoBuild::new()
|
||||
.package("codex-rmcp-client")
|
||||
.bin("test_stdio_server")
|
||||
.run()?
|
||||
.path()
|
||||
.to_string_lossy()
|
||||
.into_owned();
|
||||
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.enable(Feature::RmcpClient);
|
||||
config.mcp_servers.insert(
|
||||
server_name.to_string(),
|
||||
codex_core::config::types::McpServerConfig {
|
||||
transport: codex_core::config::types::McpServerTransportConfig::Stdio {
|
||||
command: rmcp_test_server_bin,
|
||||
args: Vec::new(),
|
||||
env: None,
|
||||
env_vars: Vec::new(),
|
||||
cwd: None,
|
||||
},
|
||||
enabled: true,
|
||||
startup_timeout_sec: Some(std::time::Duration::from_secs(10)),
|
||||
tool_timeout_sec: None,
|
||||
enabled_tools: None,
|
||||
disabled_tools: None,
|
||||
},
|
||||
);
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
fixture
|
||||
.submit_turn_with_policy(
|
||||
"call the rmcp echo tool with a very large message",
|
||||
SandboxPolicy::ReadOnly,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// The MCP tool call output is converted to a function_call_output for the model.
|
||||
let output = mock2
|
||||
.single_request()
|
||||
.function_call_output_text(call_id)
|
||||
.context("function_call_output present for rmcp call")?;
|
||||
|
||||
// Expect plain text with byte-based truncation marker.
|
||||
assert!(
|
||||
serde_json::from_str::<Value>(&output).is_err(),
|
||||
"expected truncated MCP output to be plain text"
|
||||
);
|
||||
assert!(
|
||||
output.starts_with("Total output lines: 1\n\n{"),
|
||||
"expected total line header and JSON head, got: {output}"
|
||||
);
|
||||
let byte_marker = Regex::new(r"\[\.\.\. output truncated to fit 10240 bytes \.\.\.\]")
|
||||
.expect("compile regex");
|
||||
assert!(
|
||||
byte_marker.is_match(&output),
|
||||
"expected byte truncation marker, got: {output}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Verifies that an MCP image tool output is serialized as content_items array with
|
||||
// the image preserved and no truncation summary appended (since there are no text items).
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
||||
async fn mcp_image_output_preserves_image_and_no_text_summary() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let call_id = "rmcp-image-no-trunc";
|
||||
let server_name = "rmcp";
|
||||
let tool_name = format!("mcp__{server_name}__image");
|
||||
|
||||
mount_sse_once_match(
|
||||
&server,
|
||||
any(),
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, &tool_name, "{}"),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let final_mock = mount_sse_once_match(
|
||||
&server,
|
||||
any(),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Build the stdio rmcp server and pass a tiny PNG via data URL so it can construct ImageContent.
|
||||
let rmcp_test_server_bin = CargoBuild::new()
|
||||
.package("codex-rmcp-client")
|
||||
.bin("test_stdio_server")
|
||||
.run()?
|
||||
.path()
|
||||
.to_string_lossy()
|
||||
.into_owned();
|
||||
|
||||
// 1x1 PNG data URL
|
||||
let openai_png = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMB/ee9bQAAAABJRU5ErkJggg==";
|
||||
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.enable(Feature::RmcpClient);
|
||||
config.mcp_servers.insert(
|
||||
server_name.to_string(),
|
||||
McpServerConfig {
|
||||
transport: McpServerTransportConfig::Stdio {
|
||||
command: rmcp_test_server_bin,
|
||||
args: Vec::new(),
|
||||
env: Some(HashMap::from([(
|
||||
"MCP_TEST_IMAGE_DATA_URL".to_string(),
|
||||
openai_png.to_string(),
|
||||
)])),
|
||||
env_vars: Vec::new(),
|
||||
cwd: None,
|
||||
},
|
||||
enabled: true,
|
||||
startup_timeout_sec: Some(Duration::from_secs(10)),
|
||||
tool_timeout_sec: None,
|
||||
enabled_tools: None,
|
||||
disabled_tools: None,
|
||||
},
|
||||
);
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
let session_model = fixture.session_configured.model.clone();
|
||||
|
||||
fixture
|
||||
.codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "call the rmcp image tool".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: fixture.cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
// Wait for completion to ensure the outbound request is captured.
|
||||
wait_for_event(&fixture.codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
let output_item = final_mock.single_request().function_call_output(call_id);
|
||||
// Expect exactly one array element: the image item; and no trailing summary text.
|
||||
let output = output_item.get("output").expect("output");
|
||||
assert!(output.is_array(), "expected array output");
|
||||
let arr = output.as_array().unwrap();
|
||||
assert_eq!(arr.len(), 1, "no truncation summary should be appended");
|
||||
assert_eq!(
|
||||
arr[0],
|
||||
json!({"type": "input_image", "image_url": openai_png})
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
491
llmx-rs/core/tests/suite/undo.rs
Normal file
491
llmx-rs/core/tests/suite/undo.rs
Normal file
@@ -0,0 +1,491 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::bail;
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::model_family::find_family_for_model;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::UndoCompletedEvent;
|
||||
use core_test_support::responses::ev_apply_patch_function_call;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodexHarness;
|
||||
use core_test_support::wait_for_event_match;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[allow(clippy::expect_used)]
|
||||
async fn undo_harness() -> Result<TestCodexHarness> {
|
||||
TestCodexHarness::with_config(|config: &mut Config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
|
||||
config.features.enable(Feature::GhostCommit);
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
fn git(path: &Path, args: &[&str]) -> Result<()> {
|
||||
let status = Command::new("git")
|
||||
.args(args)
|
||||
.current_dir(path)
|
||||
.status()
|
||||
.with_context(|| format!("failed to run git {args:?}"))?;
|
||||
if status.success() {
|
||||
return Ok(());
|
||||
}
|
||||
let exit_status = status;
|
||||
bail!("git {args:?} exited with {exit_status}");
|
||||
}
|
||||
|
||||
fn git_output(path: &Path, args: &[&str]) -> Result<String> {
|
||||
let output = Command::new("git")
|
||||
.args(args)
|
||||
.current_dir(path)
|
||||
.output()
|
||||
.with_context(|| format!("failed to run git {args:?}"))?;
|
||||
if !output.status.success() {
|
||||
let exit_status = output.status;
|
||||
bail!("git {args:?} exited with {exit_status}");
|
||||
}
|
||||
String::from_utf8(output.stdout).context("stdout was not valid utf8")
|
||||
}
|
||||
|
||||
fn init_git_repo(path: &Path) -> Result<()> {
|
||||
// Use a consistent initial branch and config across environments to avoid
|
||||
// CI variance (default-branch hints, line ending differences, etc.).
|
||||
git(path, &["init", "--initial-branch=main"])?;
|
||||
git(path, &["config", "core.autocrlf", "false"])?;
|
||||
git(path, &["config", "user.name", "Codex Tests"])?;
|
||||
git(path, &["config", "user.email", "codex-tests@example.com"])?;
|
||||
|
||||
// Create README.txt
|
||||
let readme_path = path.join("README.txt");
|
||||
fs::write(&readme_path, "Test repository initialized by Codex.\n")?;
|
||||
|
||||
// Stage and commit
|
||||
git(path, &["add", "README.txt"])?;
|
||||
git(path, &["commit", "-m", "Add README.txt"])?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn apply_patch_responses(call_id: &str, patch: &str, assistant_msg: &str) -> Vec<String> {
|
||||
vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_apply_patch_function_call(call_id, patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", assistant_msg),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
]
|
||||
}
|
||||
|
||||
async fn run_apply_patch_turn(
|
||||
harness: &TestCodexHarness,
|
||||
prompt: &str,
|
||||
call_id: &str,
|
||||
patch: &str,
|
||||
assistant_msg: &str,
|
||||
) -> Result<()> {
|
||||
mount_sse_sequence(
|
||||
harness.server(),
|
||||
apply_patch_responses(call_id, patch, assistant_msg),
|
||||
)
|
||||
.await;
|
||||
harness.submit(prompt).await
|
||||
}
|
||||
|
||||
async fn invoke_undo(codex: &Arc<CodexConversation>) -> Result<UndoCompletedEvent> {
|
||||
codex.submit(Op::Undo).await?;
|
||||
let event = wait_for_event_match(codex, |msg| match msg {
|
||||
EventMsg::UndoCompleted(done) => Some(done.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
Ok(event)
|
||||
}
|
||||
|
||||
async fn expect_successful_undo(codex: &Arc<CodexConversation>) -> Result<UndoCompletedEvent> {
|
||||
let event = invoke_undo(codex).await?;
|
||||
assert!(
|
||||
event.success,
|
||||
"expected undo to succeed but failed with message {:?}",
|
||||
event.message
|
||||
);
|
||||
Ok(event)
|
||||
}
|
||||
|
||||
async fn expect_failed_undo(codex: &Arc<CodexConversation>) -> Result<UndoCompletedEvent> {
|
||||
let event = invoke_undo(codex).await?;
|
||||
assert!(
|
||||
!event.success,
|
||||
"expected undo to fail but succeeded with message {:?}",
|
||||
event.message
|
||||
);
|
||||
assert_eq!(
|
||||
event.message.as_deref(),
|
||||
Some("No ghost snapshot available to undo.")
|
||||
);
|
||||
Ok(event)
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn undo_removes_new_file_created_during_turn() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = undo_harness().await?;
|
||||
init_git_repo(harness.cwd())?;
|
||||
|
||||
let call_id = "undo-create-file";
|
||||
let patch = "*** Begin Patch\n*** Add File: new_file.txt\n+from turn\n*** End Patch";
|
||||
run_apply_patch_turn(&harness, "create file", call_id, patch, "ok").await?;
|
||||
|
||||
let new_path = harness.path("new_file.txt");
|
||||
assert_eq!(fs::read_to_string(&new_path)?, "from turn\n");
|
||||
|
||||
let codex = Arc::clone(&harness.test().codex);
|
||||
let completed = expect_successful_undo(&codex).await?;
|
||||
assert!(completed.success, "undo failed: {:?}", completed.message);
|
||||
|
||||
assert!(!new_path.exists());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn undo_restores_tracked_file_edit() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = undo_harness().await?;
|
||||
init_git_repo(harness.cwd())?;
|
||||
|
||||
let tracked = harness.path("tracked.txt");
|
||||
fs::write(&tracked, "before\n")?;
|
||||
git(harness.cwd(), &["add", "tracked.txt"])?;
|
||||
git(harness.cwd(), &["commit", "-m", "track file"])?;
|
||||
|
||||
let patch = "*** Begin Patch\n*** Update File: tracked.txt\n@@\n-before\n+after\n*** End Patch";
|
||||
run_apply_patch_turn(
|
||||
&harness,
|
||||
"update tracked file",
|
||||
"undo-tracked-edit",
|
||||
patch,
|
||||
"done",
|
||||
)
|
||||
.await?;
|
||||
println!(
|
||||
"apply_patch output: {}",
|
||||
harness.function_call_stdout("undo-tracked-edit").await
|
||||
);
|
||||
|
||||
assert_eq!(fs::read_to_string(&tracked)?, "after\n");
|
||||
|
||||
let codex = Arc::clone(&harness.test().codex);
|
||||
let completed = expect_successful_undo(&codex).await?;
|
||||
assert!(completed.success, "undo failed: {:?}", completed.message);
|
||||
|
||||
assert_eq!(fs::read_to_string(&tracked)?, "before\n");
|
||||
let status = git_output(harness.cwd(), &["status", "--short"])?;
|
||||
assert_eq!(status, "");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn undo_restores_untracked_file_edit() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = undo_harness().await?;
|
||||
init_git_repo(harness.cwd())?;
|
||||
git(harness.cwd(), &["commit", "--allow-empty", "-m", "init"])?;
|
||||
|
||||
let notes = harness.path("notes.txt");
|
||||
fs::write(¬es, "original\n")?;
|
||||
let status_before = git_output(harness.cwd(), &["status", "--short", "--ignored"])?;
|
||||
assert!(status_before.contains("?? notes.txt"));
|
||||
|
||||
let patch =
|
||||
"*** Begin Patch\n*** Update File: notes.txt\n@@\n-original\n+modified\n*** End Patch";
|
||||
run_apply_patch_turn(
|
||||
&harness,
|
||||
"edit untracked",
|
||||
"undo-untracked-edit",
|
||||
patch,
|
||||
"done",
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(fs::read_to_string(¬es)?, "modified\n");
|
||||
|
||||
let codex = Arc::clone(&harness.test().codex);
|
||||
let completed = expect_successful_undo(&codex).await?;
|
||||
assert!(completed.success, "undo failed: {:?}", completed.message);
|
||||
|
||||
assert_eq!(fs::read_to_string(¬es)?, "original\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn undo_reverts_only_latest_turn() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = undo_harness().await?;
|
||||
init_git_repo(harness.cwd())?;
|
||||
|
||||
let call_id_one = "undo-turn-one";
|
||||
let add_patch = "*** Begin Patch\n*** Add File: story.txt\n+first version\n*** End Patch";
|
||||
run_apply_patch_turn(&harness, "create story", call_id_one, add_patch, "done").await?;
|
||||
let story = harness.path("story.txt");
|
||||
assert_eq!(fs::read_to_string(&story)?, "first version\n");
|
||||
|
||||
let call_id_two = "undo-turn-two";
|
||||
let update_patch = "*** Begin Patch\n*** Update File: story.txt\n@@\n-first version\n+second version\n*** End Patch";
|
||||
run_apply_patch_turn(&harness, "revise story", call_id_two, update_patch, "done").await?;
|
||||
assert_eq!(fs::read_to_string(&story)?, "second version\n");
|
||||
|
||||
let codex = Arc::clone(&harness.test().codex);
|
||||
let completed = expect_successful_undo(&codex).await?;
|
||||
assert!(completed.success, "undo failed: {:?}", completed.message);
|
||||
|
||||
assert_eq!(fs::read_to_string(&story)?, "first version\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn undo_does_not_touch_unrelated_files() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = undo_harness().await?;
|
||||
init_git_repo(harness.cwd())?;
|
||||
|
||||
let tracked_constant = harness.path("stable.txt");
|
||||
fs::write(&tracked_constant, "stable\n")?;
|
||||
let target = harness.path("target.txt");
|
||||
fs::write(&target, "start\n")?;
|
||||
let gitignore = harness.path(".gitignore");
|
||||
fs::write(&gitignore, "ignored-stable.log\n")?;
|
||||
git(
|
||||
harness.cwd(),
|
||||
&["add", "stable.txt", "target.txt", ".gitignore"],
|
||||
)?;
|
||||
git(harness.cwd(), &["commit", "-m", "seed tracked"])?;
|
||||
|
||||
let preexisting_untracked = harness.path("scratch.txt");
|
||||
fs::write(&preexisting_untracked, "scratch before\n")?;
|
||||
let ignored = harness.path("ignored-stable.log");
|
||||
fs::write(&ignored, "ignored before\n")?;
|
||||
|
||||
let full_patch = "*** Begin Patch\n*** Update File: target.txt\n@@\n-start\n+edited\n*** Add File: temp.txt\n+ephemeral\n*** End Patch";
|
||||
run_apply_patch_turn(
|
||||
&harness,
|
||||
"modify target",
|
||||
"undo-unrelated",
|
||||
full_patch,
|
||||
"done",
|
||||
)
|
||||
.await?;
|
||||
let temp = harness.path("temp.txt");
|
||||
assert_eq!(fs::read_to_string(&target)?, "edited\n");
|
||||
assert_eq!(fs::read_to_string(&temp)?, "ephemeral\n");
|
||||
|
||||
let codex = Arc::clone(&harness.test().codex);
|
||||
let completed = expect_successful_undo(&codex).await?;
|
||||
assert!(completed.success, "undo failed: {:?}", completed.message);
|
||||
|
||||
assert_eq!(fs::read_to_string(&tracked_constant)?, "stable\n");
|
||||
assert_eq!(fs::read_to_string(&target)?, "start\n");
|
||||
assert_eq!(
|
||||
fs::read_to_string(&preexisting_untracked)?,
|
||||
"scratch before\n"
|
||||
);
|
||||
assert_eq!(fs::read_to_string(&ignored)?, "ignored before\n");
|
||||
assert!(!temp.exists());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn undo_sequential_turns_consumes_snapshots() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = undo_harness().await?;
|
||||
init_git_repo(harness.cwd())?;
|
||||
|
||||
let story = harness.path("story.txt");
|
||||
fs::write(&story, "initial\n")?;
|
||||
git(harness.cwd(), &["add", "story.txt"])?;
|
||||
git(harness.cwd(), &["commit", "-m", "seed story"])?;
|
||||
|
||||
run_apply_patch_turn(
|
||||
&harness,
|
||||
"first change",
|
||||
"seq-turn-1",
|
||||
"*** Begin Patch\n*** Update File: story.txt\n@@\n-initial\n+turn one\n*** End Patch",
|
||||
"ok",
|
||||
)
|
||||
.await?;
|
||||
assert_eq!(fs::read_to_string(&story)?, "turn one\n");
|
||||
|
||||
run_apply_patch_turn(
|
||||
&harness,
|
||||
"second change",
|
||||
"seq-turn-2",
|
||||
"*** Begin Patch\n*** Update File: story.txt\n@@\n-turn one\n+turn two\n*** End Patch",
|
||||
"ok",
|
||||
)
|
||||
.await?;
|
||||
assert_eq!(fs::read_to_string(&story)?, "turn two\n");
|
||||
|
||||
run_apply_patch_turn(
|
||||
&harness,
|
||||
"third change",
|
||||
"seq-turn-3",
|
||||
"*** Begin Patch\n*** Update File: story.txt\n@@\n-turn two\n+turn three\n*** End Patch",
|
||||
"ok",
|
||||
)
|
||||
.await?;
|
||||
assert_eq!(fs::read_to_string(&story)?, "turn three\n");
|
||||
|
||||
let codex = Arc::clone(&harness.test().codex);
|
||||
expect_successful_undo(&codex).await?;
|
||||
assert_eq!(fs::read_to_string(&story)?, "turn two\n");
|
||||
|
||||
expect_successful_undo(&codex).await?;
|
||||
assert_eq!(fs::read_to_string(&story)?, "turn one\n");
|
||||
|
||||
expect_successful_undo(&codex).await?;
|
||||
assert_eq!(fs::read_to_string(&story)?, "initial\n");
|
||||
|
||||
expect_failed_undo(&codex).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn undo_without_snapshot_reports_failure() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = undo_harness().await?;
|
||||
let codex = Arc::clone(&harness.test().codex);
|
||||
|
||||
expect_failed_undo(&codex).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn undo_restores_moves_and_renames() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = undo_harness().await?;
|
||||
init_git_repo(harness.cwd())?;
|
||||
|
||||
let source = harness.path("rename_me.txt");
|
||||
fs::write(&source, "original\n")?;
|
||||
git(harness.cwd(), &["add", "rename_me.txt"])?;
|
||||
git(harness.cwd(), &["commit", "-m", "add rename target"])?;
|
||||
|
||||
let patch = "*** Begin Patch\n*** Update File: rename_me.txt\n*** Move to: relocated/renamed.txt\n@@\n-original\n+renamed content\n*** End Patch";
|
||||
run_apply_patch_turn(&harness, "rename file", "undo-rename", patch, "done").await?;
|
||||
|
||||
let destination = harness.path("relocated/renamed.txt");
|
||||
assert!(!source.exists());
|
||||
assert_eq!(fs::read_to_string(&destination)?, "renamed content\n");
|
||||
|
||||
let codex = Arc::clone(&harness.test().codex);
|
||||
expect_successful_undo(&codex).await?;
|
||||
|
||||
assert_eq!(fs::read_to_string(&source)?, "original\n");
|
||||
assert!(!destination.exists());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn undo_does_not_touch_ignored_directory_contents() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = undo_harness().await?;
|
||||
init_git_repo(harness.cwd())?;
|
||||
|
||||
let gitignore = harness.path(".gitignore");
|
||||
fs::write(&gitignore, "logs/\n")?;
|
||||
git(harness.cwd(), &["add", ".gitignore"])?;
|
||||
git(harness.cwd(), &["commit", "-m", "ignore logs directory"])?;
|
||||
|
||||
let logs_dir = harness.path("logs");
|
||||
fs::create_dir_all(&logs_dir)?;
|
||||
let preserved = logs_dir.join("persistent.log");
|
||||
fs::write(&preserved, "keep me\n")?;
|
||||
|
||||
run_apply_patch_turn(
|
||||
&harness,
|
||||
"write log",
|
||||
"undo-log",
|
||||
"*** Begin Patch\n*** Add File: logs/session.log\n+ephemeral log\n*** End Patch",
|
||||
"ok",
|
||||
)
|
||||
.await?;
|
||||
|
||||
let new_log = logs_dir.join("session.log");
|
||||
assert_eq!(fs::read_to_string(&new_log)?, "ephemeral log\n");
|
||||
|
||||
let codex = Arc::clone(&harness.test().codex);
|
||||
expect_successful_undo(&codex).await?;
|
||||
|
||||
assert!(new_log.exists());
|
||||
assert_eq!(fs::read_to_string(&preserved)?, "keep me\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn undo_overwrites_manual_edits_after_turn() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = undo_harness().await?;
|
||||
init_git_repo(harness.cwd())?;
|
||||
|
||||
let tracked = harness.path("tracked.txt");
|
||||
fs::write(&tracked, "baseline\n")?;
|
||||
git(harness.cwd(), &["add", "tracked.txt"])?;
|
||||
git(harness.cwd(), &["commit", "-m", "baseline tracked"])?;
|
||||
|
||||
run_apply_patch_turn(
|
||||
&harness,
|
||||
"modify tracked",
|
||||
"undo-manual-overwrite",
|
||||
"*** Begin Patch\n*** Update File: tracked.txt\n@@\n-baseline\n+turn change\n*** End Patch",
|
||||
"ok",
|
||||
)
|
||||
.await?;
|
||||
assert_eq!(fs::read_to_string(&tracked)?, "turn change\n");
|
||||
|
||||
fs::write(&tracked, "manual edit\n")?;
|
||||
assert_eq!(fs::read_to_string(&tracked)?, "manual edit\n");
|
||||
|
||||
let codex = Arc::clone(&harness.test().codex);
|
||||
expect_successful_undo(&codex).await?;
|
||||
|
||||
assert_eq!(fs::read_to_string(&tracked)?, "baseline\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
1527
llmx-rs/core/tests/suite/unified_exec.rs
Normal file
1527
llmx-rs/core/tests/suite/unified_exec.rs
Normal file
File diff suppressed because it is too large
Load Diff
79
llmx-rs/core/tests/suite/user_notification.rs
Normal file
79
llmx-rs/core/tests/suite/user_notification.rs
Normal file
@@ -0,0 +1,79 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::fs_wait;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::matchers::any;
|
||||
|
||||
use responses::ev_assistant_message;
|
||||
use responses::ev_completed;
|
||||
use responses::sse;
|
||||
use responses::start_mock_server;
|
||||
use std::time::Duration;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
#[ignore = "flaky on ubuntu-24.04-arm - aarch64-unknown-linux-gnu"]
|
||||
// The notify script gets far enough to create (and therefore surface) the file,
|
||||
// but hasn’t flushed the JSON yet. Reading an empty file produces EOF while parsing
|
||||
// a value at line 1 column 0. May be caused by a slow runner.
|
||||
async fn summarize_context_three_requests_and_instructions() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let sse1 = sse(vec![ev_assistant_message("m1", "Done"), ev_completed("r1")]);
|
||||
|
||||
responses::mount_sse_once_match(&server, any(), sse1).await;
|
||||
|
||||
let notify_dir = TempDir::new()?;
|
||||
// write a script to the notify that touches a file next to it
|
||||
let notify_script = notify_dir.path().join("notify.sh");
|
||||
std::fs::write(
|
||||
¬ify_script,
|
||||
r#"#!/bin/bash
|
||||
set -e
|
||||
echo -n "${@: -1}" > $(dirname "${0}")/notify.txt"#,
|
||||
)?;
|
||||
std::fs::set_permissions(¬ify_script, std::fs::Permissions::from_mode(0o755))?;
|
||||
|
||||
let notify_file = notify_dir.path().join("notify.txt");
|
||||
let notify_script_str = notify_script.to_str().unwrap().to_string();
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(move |cfg| cfg.notify = Some(vec![notify_script_str]))
|
||||
.build(&server)
|
||||
.await?;
|
||||
|
||||
// 1) Normal user input – should hit server once.
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello world".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// We fork the notify script, so we need to wait for it to write to the file.
|
||||
fs_wait::wait_for_path_exists(¬ify_file, Duration::from_secs(5)).await?;
|
||||
let notify_payload_raw = tokio::fs::read_to_string(¬ify_file).await?;
|
||||
let payload: Value = serde_json::from_str(¬ify_payload_raw)?;
|
||||
|
||||
assert_eq!(payload["type"], json!("agent-turn-complete"));
|
||||
assert_eq!(payload["input-messages"], json!(["hello world"]));
|
||||
assert_eq!(payload["last-assistant-message"], json!("Done"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
251
llmx-rs/core/tests/suite/user_shell_cmd.rs
Normal file
251
llmx-rs/core/tests/suite/user_shell_cmd.rs
Normal file
@@ -0,0 +1,251 @@
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::ExecCommandEndEvent;
|
||||
use codex_core::protocol::ExecOutputStream;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::TurnAbortReason;
|
||||
use core_test_support::assert_regex_match;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::wait_for_event;
|
||||
use core_test_support::wait_for_event_match;
|
||||
use regex_lite::escape;
|
||||
use std::path::PathBuf;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[tokio::test]
|
||||
async fn user_shell_cmd_ls_and_cat_in_temp_dir() {
|
||||
// Create a temporary working directory with a known file.
|
||||
let cwd = TempDir::new().unwrap();
|
||||
let file_name = "hello.txt";
|
||||
let file_path: PathBuf = cwd.path().join(file_name);
|
||||
let contents = "hello from bang test\n";
|
||||
tokio::fs::write(&file_path, contents)
|
||||
.await
|
||||
.expect("write temp file");
|
||||
|
||||
// Load config and pin cwd to the temp dir so ls/cat operate there.
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.cwd = cwd.path().to_path_buf();
|
||||
|
||||
let conversation_manager =
|
||||
ConversationManager::with_auth(codex_core::CodexAuth::from_api_key("dummy"));
|
||||
let NewConversation {
|
||||
conversation: codex,
|
||||
..
|
||||
} = conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.expect("create new conversation");
|
||||
|
||||
// 1) shell command should list the file
|
||||
let list_cmd = "ls".to_string();
|
||||
codex
|
||||
.submit(Op::RunUserShellCommand { command: list_cmd })
|
||||
.await
|
||||
.unwrap();
|
||||
let msg = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExecCommandEnd(_))).await;
|
||||
let EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
stdout, exit_code, ..
|
||||
}) = msg
|
||||
else {
|
||||
unreachable!()
|
||||
};
|
||||
assert_eq!(exit_code, 0);
|
||||
assert!(
|
||||
stdout.contains(file_name),
|
||||
"ls output should include {file_name}, got: {stdout:?}"
|
||||
);
|
||||
|
||||
// 2) shell command should print the file contents verbatim
|
||||
let cat_cmd = format!("cat {file_name}");
|
||||
codex
|
||||
.submit(Op::RunUserShellCommand { command: cat_cmd })
|
||||
.await
|
||||
.unwrap();
|
||||
let msg = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExecCommandEnd(_))).await;
|
||||
let EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
mut stdout,
|
||||
exit_code,
|
||||
..
|
||||
}) = msg
|
||||
else {
|
||||
unreachable!()
|
||||
};
|
||||
assert_eq!(exit_code, 0);
|
||||
if cfg!(windows) {
|
||||
// Windows shells emit CRLF line endings; normalize so the assertion remains portable.
|
||||
stdout = stdout.replace("\r\n", "\n");
|
||||
}
|
||||
assert_eq!(stdout, contents);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn user_shell_cmd_can_be_interrupted() {
|
||||
// Set up isolated config and conversation.
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let config = load_default_config_for_test(&codex_home);
|
||||
let conversation_manager =
|
||||
ConversationManager::with_auth(codex_core::CodexAuth::from_api_key("dummy"));
|
||||
let NewConversation {
|
||||
conversation: codex,
|
||||
..
|
||||
} = conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.expect("create new conversation");
|
||||
|
||||
// Start a long-running command and then interrupt it.
|
||||
let sleep_cmd = "sleep 5".to_string();
|
||||
codex
|
||||
.submit(Op::RunUserShellCommand { command: sleep_cmd })
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Wait until it has started (ExecCommandBegin), then interrupt.
|
||||
let _ = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExecCommandBegin(_))).await;
|
||||
codex.submit(Op::Interrupt).await.unwrap();
|
||||
|
||||
// Expect a TurnAborted(Interrupted) notification.
|
||||
let msg = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnAborted(_))).await;
|
||||
let EventMsg::TurnAborted(ev) = msg else {
|
||||
unreachable!()
|
||||
};
|
||||
assert_eq!(ev.reason, TurnAbortReason::Interrupted);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn user_shell_command_history_is_persisted_and_shared_with_model() -> anyhow::Result<()> {
|
||||
let server = responses::start_mock_server().await;
|
||||
let mut builder = core_test_support::test_codex::test_codex();
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
#[cfg(windows)]
|
||||
let command = r#"$val = $env:CODEX_SANDBOX; if ([string]::IsNullOrEmpty($val)) { $val = 'not-set' } ; [System.Console]::Write($val)"#.to_string();
|
||||
#[cfg(not(windows))]
|
||||
let command = r#"sh -c "printf '%s' \"${CODEX_SANDBOX:-not-set}\"""#.to_string();
|
||||
|
||||
test.codex
|
||||
.submit(Op::RunUserShellCommand {
|
||||
command: command.clone(),
|
||||
})
|
||||
.await?;
|
||||
|
||||
let begin_event = wait_for_event_match(&test.codex, |ev| match ev {
|
||||
EventMsg::ExecCommandBegin(event) => Some(event.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
assert!(begin_event.is_user_shell_command);
|
||||
let matches_last_arg = begin_event.command.last() == Some(&command);
|
||||
let matches_split = shlex::split(&command).is_some_and(|split| split == begin_event.command);
|
||||
assert!(
|
||||
matches_last_arg || matches_split,
|
||||
"user command begin event should include the original command; got: {:?}",
|
||||
begin_event.command
|
||||
);
|
||||
|
||||
let delta_event = wait_for_event_match(&test.codex, |ev| match ev {
|
||||
EventMsg::ExecCommandOutputDelta(event) => Some(event.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
assert_eq!(delta_event.stream, ExecOutputStream::Stdout);
|
||||
let chunk_text =
|
||||
String::from_utf8(delta_event.chunk.clone()).expect("user command chunk is valid utf-8");
|
||||
assert_eq!(chunk_text.trim(), "not-set");
|
||||
|
||||
let end_event = wait_for_event_match(&test.codex, |ev| match ev {
|
||||
EventMsg::ExecCommandEnd(event) => Some(event.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
assert_eq!(end_event.exit_code, 0);
|
||||
assert_eq!(end_event.stdout.trim(), "not-set");
|
||||
|
||||
let _ = wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let responses = vec![responses::sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_assistant_message("msg-1", "done"),
|
||||
responses::ev_completed("resp-1"),
|
||||
])];
|
||||
let mock = responses::mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn("follow-up after shell command").await?;
|
||||
|
||||
let request = mock.single_request();
|
||||
|
||||
let command_message = request
|
||||
.message_input_texts("user")
|
||||
.into_iter()
|
||||
.find(|text| text.contains("<user_shell_command>"))
|
||||
.expect("command message recorded in request");
|
||||
let command_message = command_message.replace("\r\n", "\n");
|
||||
let escaped_command = escape(&command);
|
||||
let expected_pattern = format!(
|
||||
r"(?m)\A<user_shell_command>\n<command>\n{escaped_command}\n</command>\n<result>\nExit code: 0\nDuration: [0-9]+(?:\.[0-9]+)? seconds\nOutput:\nnot-set\n</result>\n</user_shell_command>\z"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, &command_message);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn user_shell_command_output_is_truncated_in_history() -> anyhow::Result<()> {
|
||||
let server = responses::start_mock_server().await;
|
||||
let mut builder = core_test_support::test_codex::test_codex();
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
#[cfg(windows)]
|
||||
let command = r#"for ($i=1; $i -le 400; $i++) { Write-Output $i }"#.to_string();
|
||||
#[cfg(not(windows))]
|
||||
let command = "seq 1 400".to_string();
|
||||
|
||||
test.codex
|
||||
.submit(Op::RunUserShellCommand {
|
||||
command: command.clone(),
|
||||
})
|
||||
.await?;
|
||||
|
||||
let end_event = wait_for_event_match(&test.codex, |ev| match ev {
|
||||
EventMsg::ExecCommandEnd(event) => Some(event.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
assert_eq!(end_event.exit_code, 0);
|
||||
|
||||
let _ = wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let responses = vec![responses::sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_assistant_message("msg-1", "done"),
|
||||
responses::ev_completed("resp-1"),
|
||||
])];
|
||||
let mock = responses::mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn("follow-up after shell command").await?;
|
||||
|
||||
let request = mock.single_request();
|
||||
let command_message = request
|
||||
.message_input_texts("user")
|
||||
.into_iter()
|
||||
.find(|text| text.contains("<user_shell_command>"))
|
||||
.expect("command message recorded in request");
|
||||
let command_message = command_message.replace("\r\n", "\n");
|
||||
|
||||
let head = (1..=128).map(|i| format!("{i}\n")).collect::<String>();
|
||||
let tail = (273..=400).map(|i| format!("{i}\n")).collect::<String>();
|
||||
let truncated_body =
|
||||
format!("Total output lines: 400\n\n{head}\n[... omitted 144 of 400 lines ...]\n\n{tail}");
|
||||
let escaped_command = escape(&command);
|
||||
let escaped_truncated_body = escape(&truncated_body);
|
||||
let expected_pattern = format!(
|
||||
r"(?m)\A<user_shell_command>\n<command>\n{escaped_command}\n</command>\n<result>\nExit code: 0\nDuration: [0-9]+(?:\.[0-9]+)? seconds\nOutput:\n{escaped_truncated_body}\n</result>\n</user_shell_command>\z"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, &command_message);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
474
llmx-rs/core/tests/suite/view_image.rs
Normal file
474
llmx-rs/core/tests/suite/view_image.rs
Normal file
@@ -0,0 +1,474 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
|
||||
use base64::Engine;
|
||||
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use image::GenericImageView;
|
||||
use image::ImageBuffer;
|
||||
use image::Rgba;
|
||||
use image::load_from_memory;
|
||||
use serde_json::Value;
|
||||
use wiremock::matchers::any;
|
||||
|
||||
fn find_image_message(body: &Value) -> Option<&Value> {
|
||||
body.get("input")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|items| {
|
||||
items.iter().find(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("message")
|
||||
&& item
|
||||
.get("content")
|
||||
.and_then(Value::as_array)
|
||||
.map(|content| {
|
||||
content.iter().any(|span| {
|
||||
span.get("type").and_then(Value::as_str) == Some("input_image")
|
||||
})
|
||||
})
|
||||
.unwrap_or(false)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_output_text(item: &Value) -> Option<&str> {
|
||||
item.get("output").and_then(|value| match value {
|
||||
Value::String(text) => Some(text.as_str()),
|
||||
Value::Object(obj) => obj.get("content").and_then(Value::as_str),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn user_turn_with_local_image_attaches_image() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = test_codex().build(&server).await?;
|
||||
|
||||
let rel_path = "user-turn/example.png";
|
||||
let abs_path = cwd.path().join(rel_path);
|
||||
if let Some(parent) = abs_path.parent() {
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
let image = ImageBuffer::from_pixel(4096, 1024, Rgba([20u8, 40, 60, 255]));
|
||||
image.save(&abs_path)?;
|
||||
|
||||
let response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
let mock = responses::mount_sse_once_match(&server, any(), response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::LocalImage {
|
||||
path: abs_path.clone(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let body = mock.single_request().body_json();
|
||||
let image_message =
|
||||
find_image_message(&body).expect("pending input image message not included in request");
|
||||
let image_url = image_message
|
||||
.get("content")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|content| {
|
||||
content.iter().find_map(|span| {
|
||||
if span.get("type").and_then(Value::as_str) == Some("input_image") {
|
||||
span.get("image_url").and_then(Value::as_str)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
})
|
||||
.expect("image_url present");
|
||||
|
||||
let (prefix, encoded) = image_url
|
||||
.split_once(',')
|
||||
.expect("image url contains data prefix");
|
||||
assert_eq!(prefix, "data:image/png;base64");
|
||||
|
||||
let decoded = BASE64_STANDARD
|
||||
.decode(encoded)
|
||||
.expect("image data decodes from base64 for request");
|
||||
let resized = load_from_memory(&decoded).expect("load resized image");
|
||||
let (width, height) = resized.dimensions();
|
||||
assert!(width <= 2048);
|
||||
assert!(height <= 768);
|
||||
assert!(width < 4096);
|
||||
assert!(height < 1024);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = test_codex().build(&server).await?;
|
||||
|
||||
let rel_path = "assets/example.png";
|
||||
let abs_path = cwd.path().join(rel_path);
|
||||
if let Some(parent) = abs_path.parent() {
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
let image = ImageBuffer::from_pixel(4096, 1024, Rgba([255u8, 0, 0, 255]));
|
||||
image.save(&abs_path)?;
|
||||
|
||||
let call_id = "view-image-call";
|
||||
let arguments = serde_json::json!({ "path": rel_path }).to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "view_image", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let mock = responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please add the screenshot".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let mut tool_event = None;
|
||||
wait_for_event(&codex, |event| match event {
|
||||
EventMsg::ViewImageToolCall(_) => {
|
||||
tool_event = Some(event.clone());
|
||||
false
|
||||
}
|
||||
EventMsg::TaskComplete(_) => true,
|
||||
_ => false,
|
||||
})
|
||||
.await;
|
||||
|
||||
let tool_event = match tool_event.expect("view image tool event emitted") {
|
||||
EventMsg::ViewImageToolCall(event) => event,
|
||||
_ => unreachable!("stored event must be ViewImageToolCall"),
|
||||
};
|
||||
assert_eq!(tool_event.call_id, call_id);
|
||||
assert_eq!(tool_event.path, abs_path);
|
||||
|
||||
let body = mock.single_request().body_json();
|
||||
let output_item = mock.single_request().function_call_output(call_id);
|
||||
|
||||
let output_text = extract_output_text(&output_item).expect("output text present");
|
||||
assert_eq!(output_text, "attached local image path");
|
||||
|
||||
let image_message =
|
||||
find_image_message(&body).expect("pending input image message not included in request");
|
||||
let image_url = image_message
|
||||
.get("content")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|content| {
|
||||
content.iter().find_map(|span| {
|
||||
if span.get("type").and_then(Value::as_str) == Some("input_image") {
|
||||
span.get("image_url").and_then(Value::as_str)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
})
|
||||
.expect("image_url present");
|
||||
|
||||
let (prefix, encoded) = image_url
|
||||
.split_once(',')
|
||||
.expect("image url contains data prefix");
|
||||
assert_eq!(prefix, "data:image/png;base64");
|
||||
|
||||
let decoded = BASE64_STANDARD
|
||||
.decode(encoded)
|
||||
.expect("image data decodes from base64 for request");
|
||||
let resized = load_from_memory(&decoded).expect("load resized image");
|
||||
let (resized_width, resized_height) = resized.dimensions();
|
||||
assert!(resized_width <= 2048);
|
||||
assert!(resized_height <= 768);
|
||||
assert!(resized_width < 4096);
|
||||
assert!(resized_height < 1024);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_errors_when_path_is_directory() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = test_codex().build(&server).await?;
|
||||
|
||||
let rel_path = "assets";
|
||||
let abs_path = cwd.path().join(rel_path);
|
||||
std::fs::create_dir_all(&abs_path)?;
|
||||
|
||||
let call_id = "view-image-directory";
|
||||
let arguments = serde_json::json!({ "path": rel_path }).to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "view_image", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let mock = responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please attach the folder".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let body_with_tool_output = mock.single_request().body_json();
|
||||
let output_item = mock.single_request().function_call_output(call_id);
|
||||
let output_text = extract_output_text(&output_item).expect("output text present");
|
||||
let expected_message = format!("image path `{}` is not a file", abs_path.display());
|
||||
assert_eq!(output_text, expected_message);
|
||||
|
||||
assert!(
|
||||
find_image_message(&body_with_tool_output).is_none(),
|
||||
"directory path should not produce an input_image message"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_placeholder_for_non_image_files() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = test_codex().build(&server).await?;
|
||||
|
||||
let rel_path = "assets/example.json";
|
||||
let abs_path = cwd.path().join(rel_path);
|
||||
if let Some(parent) = abs_path.parent() {
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
std::fs::write(&abs_path, br#"{ "message": "hello" }"#)?;
|
||||
|
||||
let call_id = "view-image-non-image";
|
||||
let arguments = serde_json::json!({ "path": rel_path }).to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "view_image", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let mock = responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please use the view_image tool to read the json file".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let request = mock.single_request();
|
||||
assert!(
|
||||
request.inputs_of_type("input_image").is_empty(),
|
||||
"non-image file should not produce an input_image message"
|
||||
);
|
||||
|
||||
let placeholder = request
|
||||
.inputs_of_type("message")
|
||||
.iter()
|
||||
.find_map(|item| {
|
||||
let content = item.get("content").and_then(Value::as_array)?;
|
||||
content.iter().find_map(|span| {
|
||||
if span.get("type").and_then(Value::as_str) == Some("input_text") {
|
||||
let text = span.get("text").and_then(Value::as_str)?;
|
||||
if text.contains("Codex could not read the local image at")
|
||||
&& text.contains("unsupported MIME type `application/json`")
|
||||
{
|
||||
return Some(text.to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
})
|
||||
.expect("placeholder text found");
|
||||
|
||||
assert!(
|
||||
placeholder.contains(&abs_path.display().to_string()),
|
||||
"placeholder should mention path: {placeholder}"
|
||||
);
|
||||
|
||||
let output_item = mock.single_request().function_call_output(call_id);
|
||||
let output_text = extract_output_text(&output_item).expect("output text present");
|
||||
assert_eq!(output_text, "attached local image path");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_errors_when_file_missing() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = test_codex().build(&server).await?;
|
||||
|
||||
let rel_path = "missing/example.png";
|
||||
let abs_path = cwd.path().join(rel_path);
|
||||
|
||||
let call_id = "view-image-missing";
|
||||
let arguments = serde_json::json!({ "path": rel_path }).to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "view_image", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let mock = responses::mount_sse_once_match(&server, any(), second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please attach the missing image".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let body_with_tool_output = mock.single_request().body_json();
|
||||
let output_item = mock.single_request().function_call_output(call_id);
|
||||
let output_text = extract_output_text(&output_item).expect("output text present");
|
||||
let expected_prefix = format!("unable to locate image at `{}`:", abs_path.display());
|
||||
assert!(
|
||||
output_text.starts_with(&expected_prefix),
|
||||
"expected error to start with `{expected_prefix}` but got `{output_text}`"
|
||||
);
|
||||
|
||||
assert!(
|
||||
find_image_message(&body_with_tool_output).is_none(),
|
||||
"missing file should not produce an input_image message"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user