#![cfg(not(target_os = "windows"))] use std::fs; use assert_matches::assert_matches; use codex_core::features::Feature; use codex_core::model_family::find_family_for_model; use codex_core::protocol::AskForApproval; use codex_core::protocol::EventMsg; use codex_core::protocol::Op; use codex_core::protocol::SandboxPolicy; use codex_protocol::config_types::ReasoningSummary; use codex_protocol::plan_tool::StepStatus; use codex_protocol::user_input::UserInput; use core_test_support::assert_regex_match; use core_test_support::responses; use core_test_support::responses::ev_apply_patch_function_call; use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_function_call; use core_test_support::responses::ev_local_shell_call; use core_test_support::responses::ev_response_created; use core_test_support::responses::sse; use core_test_support::responses::start_mock_server; use core_test_support::skip_if_no_network; use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; use serde_json::Value; use serde_json::json; use wiremock::matchers::any; fn extract_output_text(item: &Value) -> Option<&str> { item.get("output").and_then(|value| match value { Value::String(text) => Some(text.as_str()), Value::Object(obj) => obj.get("content").and_then(Value::as_str), _ => None, }) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn shell_tool_executes_command_and_streams_output() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let mut builder = test_codex().with_config(|config| { config.model = "gpt-5".to_string(); config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a valid model"); }); let TestCodex { codex, cwd, session_configured, .. } = builder.build(&server).await?; let call_id = "shell-tool-call"; let command = vec!["/bin/echo", "tool harness"]; let first_response = sse(vec![ ev_response_created("resp-1"), ev_local_shell_call(call_id, "completed", command), ev_completed("resp-1"), ]); responses::mount_sse_once_match(&server, any(), first_response).await; let second_response = sse(vec![ ev_assistant_message("msg-1", "all done"), ev_completed("resp-2"), ]); let second_mock = responses::mount_sse_once_match(&server, any(), second_response).await; let session_model = session_configured.model.clone(); codex .submit(Op::UserTurn { items: vec![UserInput::Text { text: "please run the shell command".into(), }], final_output_json_schema: None, cwd: cwd.path().to_path_buf(), approval_policy: AskForApproval::Never, sandbox_policy: SandboxPolicy::DangerFullAccess, model: session_model, effort: None, summary: ReasoningSummary::Auto, }) .await?; wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await; let req = second_mock.single_request(); let output_item = req.function_call_output(call_id); let output_text = extract_output_text(&output_item).expect("output text present"); let exec_output: Value = serde_json::from_str(output_text)?; assert_eq!(exec_output["metadata"]["exit_code"], 0); let stdout = exec_output["output"].as_str().expect("stdout field"); assert_regex_match(r"(?s)^tool harness\n?$", stdout); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn update_plan_tool_emits_plan_update_event() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let mut builder = test_codex(); let TestCodex { codex, cwd, session_configured, .. } = builder.build(&server).await?; let call_id = "plan-tool-call"; let plan_args = json!({ "explanation": "Tool harness check", "plan": [ {"step": "Inspect workspace", "status": "in_progress"}, {"step": "Report results", "status": "pending"}, ], }) .to_string(); let first_response = sse(vec![ ev_response_created("resp-1"), ev_function_call(call_id, "update_plan", &plan_args), ev_completed("resp-1"), ]); responses::mount_sse_once_match(&server, any(), first_response).await; let second_response = sse(vec![ ev_assistant_message("msg-1", "plan acknowledged"), ev_completed("resp-2"), ]); let second_mock = responses::mount_sse_once_match(&server, any(), second_response).await; let session_model = session_configured.model.clone(); codex .submit(Op::UserTurn { items: vec![UserInput::Text { text: "please update the plan".into(), }], final_output_json_schema: None, cwd: cwd.path().to_path_buf(), approval_policy: AskForApproval::Never, sandbox_policy: SandboxPolicy::DangerFullAccess, model: session_model, effort: None, summary: ReasoningSummary::Auto, }) .await?; let mut saw_plan_update = false; wait_for_event(&codex, |event| match event { EventMsg::PlanUpdate(update) => { saw_plan_update = true; assert_eq!(update.explanation.as_deref(), Some("Tool harness check")); assert_eq!(update.plan.len(), 2); assert_eq!(update.plan[0].step, "Inspect workspace"); assert_matches!(update.plan[0].status, StepStatus::InProgress); assert_eq!(update.plan[1].step, "Report results"); assert_matches!(update.plan[1].status, StepStatus::Pending); false } EventMsg::TaskComplete(_) => true, _ => false, }) .await; assert!(saw_plan_update, "expected PlanUpdate event"); let req = second_mock.single_request(); let output_item = req.function_call_output(call_id); assert_eq!( output_item.get("call_id").and_then(Value::as_str), Some(call_id) ); let output_text = extract_output_text(&output_item).expect("output text present"); assert_eq!(output_text, "Plan updated"); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn update_plan_tool_rejects_malformed_payload() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let mut builder = test_codex(); let TestCodex { codex, cwd, session_configured, .. } = builder.build(&server).await?; let call_id = "plan-tool-invalid"; let invalid_args = json!({ "explanation": "Missing plan data" }) .to_string(); let first_response = sse(vec![ ev_response_created("resp-1"), ev_function_call(call_id, "update_plan", &invalid_args), ev_completed("resp-1"), ]); responses::mount_sse_once_match(&server, any(), first_response).await; let second_response = sse(vec![ ev_assistant_message("msg-1", "malformed plan payload"), ev_completed("resp-2"), ]); let second_mock = responses::mount_sse_once_match(&server, any(), second_response).await; let session_model = session_configured.model.clone(); codex .submit(Op::UserTurn { items: vec![UserInput::Text { text: "please update the plan".into(), }], final_output_json_schema: None, cwd: cwd.path().to_path_buf(), approval_policy: AskForApproval::Never, sandbox_policy: SandboxPolicy::DangerFullAccess, model: session_model, effort: None, summary: ReasoningSummary::Auto, }) .await?; let mut saw_plan_update = false; wait_for_event(&codex, |event| match event { EventMsg::PlanUpdate(_) => { saw_plan_update = true; false } EventMsg::TaskComplete(_) => true, _ => false, }) .await; assert!( !saw_plan_update, "did not expect PlanUpdate event for malformed payload" ); let req = second_mock.single_request(); let output_item = req.function_call_output(call_id); assert_eq!( output_item.get("call_id").and_then(Value::as_str), Some(call_id) ); let output_text = extract_output_text(&output_item).expect("output text present"); assert!( output_text.contains("failed to parse function arguments"), "expected parse error message in output text, got {output_text:?}" ); if let Some(success_flag) = output_item .get("output") .and_then(|value| value.as_object()) .and_then(|obj| obj.get("success")) .and_then(serde_json::Value::as_bool) { assert!( !success_flag, "expected tool output to mark success=false for malformed payload" ); } Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn apply_patch_tool_executes_and_emits_patch_events() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let mut builder = test_codex().with_config(|config| { config.features.enable(Feature::ApplyPatchFreeform); }); let TestCodex { codex, cwd, session_configured, .. } = builder.build(&server).await?; let file_name = "notes.txt"; let file_path = cwd.path().join(file_name); let call_id = "apply-patch-call"; let patch_content = format!( r#"*** Begin Patch *** Add File: {file_name} +Tool harness apply patch *** End Patch"# ); let first_response = sse(vec![ ev_response_created("resp-1"), ev_apply_patch_function_call(call_id, &patch_content), ev_completed("resp-1"), ]); responses::mount_sse_once_match(&server, any(), first_response).await; let second_response = sse(vec![ ev_assistant_message("msg-1", "patch complete"), ev_completed("resp-2"), ]); let second_mock = responses::mount_sse_once_match(&server, any(), second_response).await; let session_model = session_configured.model.clone(); codex .submit(Op::UserTurn { items: vec![UserInput::Text { text: "please apply a patch".into(), }], final_output_json_schema: None, cwd: cwd.path().to_path_buf(), approval_policy: AskForApproval::Never, sandbox_policy: SandboxPolicy::DangerFullAccess, model: session_model, effort: None, summary: ReasoningSummary::Auto, }) .await?; let mut saw_patch_begin = false; let mut patch_end_success = None; wait_for_event(&codex, |event| match event { EventMsg::PatchApplyBegin(begin) => { saw_patch_begin = true; assert_eq!(begin.call_id, call_id); false } EventMsg::PatchApplyEnd(end) => { assert_eq!(end.call_id, call_id); patch_end_success = Some(end.success); false } EventMsg::TaskComplete(_) => true, _ => false, }) .await; assert!(saw_patch_begin, "expected PatchApplyBegin event"); let patch_end_success = patch_end_success.expect("expected PatchApplyEnd event to capture success flag"); assert!(patch_end_success); let req = second_mock.single_request(); let output_item = req.function_call_output(call_id); assert_eq!( output_item.get("call_id").and_then(Value::as_str), Some(call_id) ); let output_text = extract_output_text(&output_item).expect("output text present"); let expected_pattern = format!( r"(?s)^Exit code: 0 Wall time: [0-9]+(?:\.[0-9]+)? seconds Output: Success. Updated the following files: A {file_name} ?$" ); assert_regex_match(&expected_pattern, output_text); let updated_contents = fs::read_to_string(file_path)?; assert_eq!( updated_contents, "Tool harness apply patch\n", "expected updated file content" ); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn apply_patch_reports_parse_diagnostics() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let mut builder = test_codex().with_config(|config| { config.features.enable(Feature::ApplyPatchFreeform); }); let TestCodex { codex, cwd, session_configured, .. } = builder.build(&server).await?; let call_id = "apply-patch-parse-error"; let patch_content = r"*** Begin Patch *** Update File: broken.txt *** End Patch"; let first_response = sse(vec![ ev_response_created("resp-1"), ev_apply_patch_function_call(call_id, patch_content), ev_completed("resp-1"), ]); responses::mount_sse_once_match(&server, any(), first_response).await; let second_response = sse(vec![ ev_assistant_message("msg-1", "failed"), ev_completed("resp-2"), ]); let second_mock = responses::mount_sse_once_match(&server, any(), second_response).await; let session_model = session_configured.model.clone(); codex .submit(Op::UserTurn { items: vec![UserInput::Text { text: "please apply a patch".into(), }], final_output_json_schema: None, cwd: cwd.path().to_path_buf(), approval_policy: AskForApproval::Never, sandbox_policy: SandboxPolicy::DangerFullAccess, model: session_model, effort: None, summary: ReasoningSummary::Auto, }) .await?; wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await; let req = second_mock.single_request(); let output_item = req.function_call_output(call_id); assert_eq!( output_item.get("call_id").and_then(Value::as_str), Some(call_id) ); let output_text = extract_output_text(&output_item).expect("output text present"); assert!( output_text.contains("apply_patch verification failed"), "expected apply_patch verification failure message, got {output_text:?}" ); assert!( output_text.contains("invalid hunk"), "expected parse diagnostics in output text, got {output_text:?}" ); if let Some(success_flag) = output_item .get("output") .and_then(|value| value.as_object()) .and_then(|obj| obj.get("success")) .and_then(serde_json::Value::as_bool) { assert!( !success_flag, "expected tool output to mark success=false for parse failures" ); } Ok(()) }