From 36113509f2fa0df910519556d988876a21acfa57 Mon Sep 17 00:00:00 2001 From: zhao-oai Date: Tue, 28 Oct 2025 14:52:51 -0700 Subject: [PATCH] verify mime type of images (#5888) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit solves: https://github.com/openai/codex/issues/5675 Block non-image uploads in the view_image workflow. We now confirm the file’s MIME is image/* before building the data URL; otherwise we emit a “unsupported MIME type” error to the model. This stops the agent from sending application/json blobs that the Responses API rejects with 400s. Screenshot 2025-10-28 at 1 15 10 PM --- codex-rs/core/tests/suite/view_image.rs | 92 +++++++++++++++++++++++++ codex-rs/protocol/src/models.rs | 51 ++++++++++++-- 2 files changed, 139 insertions(+), 4 deletions(-) diff --git a/codex-rs/core/tests/suite/view_image.rs b/codex-rs/core/tests/suite/view_image.rs index f50a5a5c..ae9af966 100644 --- a/codex-rs/core/tests/suite/view_image.rs +++ b/codex-rs/core/tests/suite/view_image.rs @@ -313,6 +313,98 @@ async fn view_image_tool_errors_when_path_is_directory() -> anyhow::Result<()> { Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn view_image_tool_placeholder_for_non_image_files() -> anyhow::Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + + let TestCodex { + codex, + cwd, + session_configured, + .. + } = test_codex().build(&server).await?; + + let rel_path = "assets/example.json"; + let abs_path = cwd.path().join(rel_path); + if let Some(parent) = abs_path.parent() { + std::fs::create_dir_all(parent)?; + } + std::fs::write(&abs_path, br#"{ "message": "hello" }"#)?; + + let call_id = "view-image-non-image"; + let arguments = serde_json::json!({ "path": rel_path }).to_string(); + + let first_response = sse(vec![ + ev_response_created("resp-1"), + ev_function_call(call_id, "view_image", &arguments), + ev_completed("resp-1"), + ]); + responses::mount_sse_once_match(&server, any(), first_response).await; + + let second_response = sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]); + let mock = responses::mount_sse_once_match(&server, any(), second_response).await; + + let session_model = session_configured.model.clone(); + + codex + .submit(Op::UserTurn { + items: vec![UserInput::Text { + text: "please use the view_image tool to read the json file".into(), + }], + final_output_json_schema: None, + cwd: cwd.path().to_path_buf(), + approval_policy: AskForApproval::Never, + sandbox_policy: SandboxPolicy::DangerFullAccess, + model: session_model, + effort: None, + summary: ReasoningSummary::Auto, + }) + .await?; + + wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await; + + let request = mock.single_request(); + assert!( + request.inputs_of_type("input_image").is_empty(), + "non-image file should not produce an input_image message" + ); + + let placeholder = request + .inputs_of_type("message") + .iter() + .find_map(|item| { + let content = item.get("content").and_then(Value::as_array)?; + content.iter().find_map(|span| { + if span.get("type").and_then(Value::as_str) == Some("input_text") { + let text = span.get("text").and_then(Value::as_str)?; + if text.contains("Codex could not read the local image at") + && text.contains("unsupported MIME type `application/json`") + { + return Some(text.to_string()); + } + } + None + }) + }) + .expect("placeholder text found"); + + assert!( + placeholder.contains(&abs_path.display().to_string()), + "placeholder should mention path: {placeholder}" + ); + + let output_item = mock.single_request().function_call_output(call_id); + let output_text = extract_output_text(&output_item).expect("output text present"); + assert_eq!(output_text, "attached local image path"); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn view_image_tool_errors_when_file_missing() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); diff --git a/codex-rs/protocol/src/models.rs b/codex-rs/protocol/src/models.rs index 614a5ff2..a91cef1d 100644 --- a/codex-rs/protocol/src/models.rs +++ b/codex-rs/protocol/src/models.rs @@ -244,10 +244,20 @@ impl From> for ResponseInputItem { } else { match std::fs::read(&path) { Ok(bytes) => { - let mime = mime_guess::from_path(&path) - .first() - .map(|m| m.essence_str().to_owned()) - .unwrap_or_else(|| "image".to_string()); + let Some(mime_guess) = mime_guess::from_path(&path).first() + else { + return local_image_error_placeholder( + &path, + "unsupported MIME type (unknown)", + ); + }; + let mime = mime_guess.essence_str().to_owned(); + if !mime.starts_with("image/") { + return local_image_error_placeholder( + &path, + format!("unsupported MIME type `{mime}`"), + ); + } let encoded = base64::engine::general_purpose::STANDARD.encode(bytes); ContentItem::InputImage { @@ -635,4 +645,37 @@ mod tests { Ok(()) } + + #[test] + fn local_image_non_image_adds_placeholder() -> Result<()> { + let dir = tempdir()?; + let json_path = dir.path().join("example.json"); + std::fs::write(&json_path, br#"{"hello":"world"}"#)?; + + let item = ResponseInputItem::from(vec![UserInput::LocalImage { + path: json_path.clone(), + }]); + + match item { + ResponseInputItem::Message { content, .. } => { + assert_eq!(content.len(), 1); + match &content[0] { + ContentItem::InputText { text } => { + assert!( + text.contains("unsupported MIME type `application/json`"), + "placeholder should mention unsupported MIME: {text}" + ); + assert!( + text.contains(&json_path.display().to_string()), + "placeholder should mention path: {text}" + ); + } + other => panic!("expected placeholder text but found {other:?}"), + } + } + other => panic!("expected message response but got {other:?}"), + } + + Ok(()) + } }