verify mime type of images (#5888)

solves: https://github.com/openai/codex/issues/5675

Block non-image uploads in the view_image workflow. We now confirm the
file’s MIME is image/* before building the data URL; otherwise we emit a
“unsupported MIME type” error to the model. This stops the agent from
sending application/json blobs that the Responses API rejects with 400s.

<img width="409" height="556" alt="Screenshot 2025-10-28 at 1 15 10 PM"
src="https://github.com/user-attachments/assets/a92199e8-2769-4b1d-8e33-92d9238c90fe"
/>
This commit is contained in:
zhao-oai
2025-10-28 14:52:51 -07:00
committed by GitHub
parent ba95d9862c
commit 36113509f2
2 changed files with 139 additions and 4 deletions

View File

@@ -313,6 +313,98 @@ async fn view_image_tool_errors_when_path_is_directory() -> anyhow::Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn view_image_tool_placeholder_for_non_image_files() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex {
codex,
cwd,
session_configured,
..
} = test_codex().build(&server).await?;
let rel_path = "assets/example.json";
let abs_path = cwd.path().join(rel_path);
if let Some(parent) = abs_path.parent() {
std::fs::create_dir_all(parent)?;
}
std::fs::write(&abs_path, br#"{ "message": "hello" }"#)?;
let call_id = "view-image-non-image";
let arguments = serde_json::json!({ "path": rel_path }).to_string();
let first_response = sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "view_image", &arguments),
ev_completed("resp-1"),
]);
responses::mount_sse_once_match(&server, any(), first_response).await;
let second_response = sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]);
let mock = responses::mount_sse_once_match(&server, any(), second_response).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "please use the view_image tool to read the json file".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
let request = mock.single_request();
assert!(
request.inputs_of_type("input_image").is_empty(),
"non-image file should not produce an input_image message"
);
let placeholder = request
.inputs_of_type("message")
.iter()
.find_map(|item| {
let content = item.get("content").and_then(Value::as_array)?;
content.iter().find_map(|span| {
if span.get("type").and_then(Value::as_str) == Some("input_text") {
let text = span.get("text").and_then(Value::as_str)?;
if text.contains("Codex could not read the local image at")
&& text.contains("unsupported MIME type `application/json`")
{
return Some(text.to_string());
}
}
None
})
})
.expect("placeholder text found");
assert!(
placeholder.contains(&abs_path.display().to_string()),
"placeholder should mention path: {placeholder}"
);
let output_item = mock.single_request().function_call_output(call_id);
let output_text = extract_output_text(&output_item).expect("output text present");
assert_eq!(output_text, "attached local image path");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn view_image_tool_errors_when_file_missing() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));

View File

@@ -244,10 +244,20 @@ impl From<Vec<UserInput>> for ResponseInputItem {
} else {
match std::fs::read(&path) {
Ok(bytes) => {
let mime = mime_guess::from_path(&path)
.first()
.map(|m| m.essence_str().to_owned())
.unwrap_or_else(|| "image".to_string());
let Some(mime_guess) = mime_guess::from_path(&path).first()
else {
return local_image_error_placeholder(
&path,
"unsupported MIME type (unknown)",
);
};
let mime = mime_guess.essence_str().to_owned();
if !mime.starts_with("image/") {
return local_image_error_placeholder(
&path,
format!("unsupported MIME type `{mime}`"),
);
}
let encoded =
base64::engine::general_purpose::STANDARD.encode(bytes);
ContentItem::InputImage {
@@ -635,4 +645,37 @@ mod tests {
Ok(())
}
#[test]
fn local_image_non_image_adds_placeholder() -> Result<()> {
let dir = tempdir()?;
let json_path = dir.path().join("example.json");
std::fs::write(&json_path, br#"{"hello":"world"}"#)?;
let item = ResponseInputItem::from(vec![UserInput::LocalImage {
path: json_path.clone(),
}]);
match item {
ResponseInputItem::Message { content, .. } => {
assert_eq!(content.len(), 1);
match &content[0] {
ContentItem::InputText { text } => {
assert!(
text.contains("unsupported MIME type `application/json`"),
"placeholder should mention unsupported MIME: {text}"
);
assert!(
text.contains(&json_path.display().to_string()),
"placeholder should mention path: {text}"
);
}
other => panic!("expected placeholder text but found {other:?}"),
}
}
other => panic!("expected message response but got {other:?}"),
}
Ok(())
}
}