use std::collections::HashMap; use base64::Engine; use mcp_types::CallToolResult; use serde::Deserialize; use serde::Deserializer; use serde::Serialize; use serde::ser::Serializer; use ts_rs::TS; use crate::protocol::InputItem; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)] #[serde(tag = "type", rename_all = "snake_case")] pub enum ResponseInputItem { Message { role: String, content: Vec, }, FunctionCallOutput { call_id: String, output: FunctionCallOutputPayload, }, McpToolCallOutput { call_id: String, result: Result, }, CustomToolCallOutput { call_id: String, output: String, }, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)] #[serde(tag = "type", rename_all = "snake_case")] pub enum ContentItem { InputText { text: String }, InputImage { image_url: String }, OutputText { text: String }, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)] #[serde(tag = "type", rename_all = "snake_case")] pub enum ResponseItem { Message { #[serde(skip_serializing)] id: Option, role: String, content: Vec, }, Reasoning { #[serde(default, skip_serializing)] id: String, summary: Vec, #[serde(default, skip_serializing_if = "should_serialize_reasoning_content")] content: Option>, encrypted_content: Option, }, LocalShellCall { /// Set when using the chat completions API. #[serde(skip_serializing)] id: Option, /// Set when using the Responses API. call_id: Option, status: LocalShellStatus, action: LocalShellAction, }, FunctionCall { #[serde(skip_serializing)] id: Option, name: String, // The Responses API returns the function call arguments as a *string* that contains // JSON, not as an already‑parsed object. We keep it as a raw string here and let // Session::handle_function_call parse it into a Value. This exactly matches the // Chat Completions + Responses API behavior. arguments: String, call_id: String, }, // NOTE: The input schema for `function_call_output` objects that clients send to the // OpenAI /v1/responses endpoint is NOT the same shape as the objects the server returns on the // SSE stream. When *sending* we must wrap the string output inside an object that includes a // required `success` boolean. The upstream TypeScript CLI does this implicitly. To ensure we // serialize exactly the expected shape we introduce a dedicated payload struct and flatten it // here. FunctionCallOutput { call_id: String, output: FunctionCallOutputPayload, }, CustomToolCall { #[serde(skip_serializing)] id: Option, #[serde(default, skip_serializing_if = "Option::is_none")] status: Option, call_id: String, name: String, input: String, }, CustomToolCallOutput { call_id: String, output: String, }, // Emitted by the Responses API when the agent triggers a web search. // Example payload (from SSE `response.output_item.done`): // { // "id":"ws_...", // "type":"web_search_call", // "status":"completed", // "action": {"type":"search","query":"weather: San Francisco, CA"} // } WebSearchCall { #[serde(skip_serializing)] id: Option, #[serde(default, skip_serializing_if = "Option::is_none")] status: Option, action: WebSearchAction, }, #[serde(other)] Other, } fn should_serialize_reasoning_content(content: &Option>) -> bool { match content { Some(content) => !content .iter() .any(|c| matches!(c, ReasoningItemContent::ReasoningText { .. })), None => false, } } impl From for ResponseItem { fn from(item: ResponseInputItem) -> Self { match item { ResponseInputItem::Message { role, content } => Self::Message { role, content, id: None, }, ResponseInputItem::FunctionCallOutput { call_id, output } => { Self::FunctionCallOutput { call_id, output } } ResponseInputItem::McpToolCallOutput { call_id, result } => Self::FunctionCallOutput { call_id, output: FunctionCallOutputPayload { success: Some(result.is_ok()), content: result.map_or_else( |tool_call_err| format!("err: {tool_call_err:?}"), |result| { serde_json::to_string(&result) .unwrap_or_else(|e| format!("JSON serialization error: {e}")) }, ), }, }, ResponseInputItem::CustomToolCallOutput { call_id, output } => { Self::CustomToolCallOutput { call_id, output } } } } } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)] #[serde(rename_all = "snake_case")] pub enum LocalShellStatus { Completed, InProgress, Incomplete, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)] #[serde(tag = "type", rename_all = "snake_case")] pub enum LocalShellAction { Exec(LocalShellExecAction), } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)] pub struct LocalShellExecAction { pub command: Vec, pub timeout_ms: Option, pub working_directory: Option, pub env: Option>, pub user: Option, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)] #[serde(tag = "type", rename_all = "snake_case")] pub enum WebSearchAction { Search { query: String, }, #[serde(other)] Other, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)] #[serde(tag = "type", rename_all = "snake_case")] pub enum ReasoningItemReasoningSummary { SummaryText { text: String }, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)] #[serde(tag = "type", rename_all = "snake_case")] pub enum ReasoningItemContent { ReasoningText { text: String }, Text { text: String }, } impl From> for ResponseInputItem { fn from(items: Vec) -> Self { Self::Message { role: "user".to_string(), content: items .into_iter() .filter_map(|c| match c { InputItem::Text { text } => Some(ContentItem::InputText { text }), InputItem::Image { image_url } => Some(ContentItem::InputImage { image_url }), InputItem::LocalImage { path } => match std::fs::read(&path) { Ok(bytes) => { let mime = mime_guess::from_path(&path) .first() .map(|m| m.essence_str().to_owned()) .unwrap_or_else(|| "image".to_string()); let encoded = base64::engine::general_purpose::STANDARD.encode(bytes); Some(ContentItem::InputImage { image_url: format!("data:{mime};base64,{encoded}"), }) } Err(err) => { tracing::warn!( "Skipping image {} – could not read file: {}", path.display(), err ); None } }, }) .collect::>(), } } } /// If the `name` of a `ResponseItem::FunctionCall` is either `container.exec` /// or shell`, the `arguments` field should deserialize to this struct. #[derive(Deserialize, Debug, Clone, PartialEq, TS)] pub struct ShellToolCallParams { pub command: Vec, pub workdir: Option, /// This is the maximum time in milliseconds that the command is allowed to run. #[serde(alias = "timeout")] pub timeout_ms: Option, #[serde(skip_serializing_if = "Option::is_none")] pub with_escalated_permissions: Option, #[serde(skip_serializing_if = "Option::is_none")] pub justification: Option, } #[derive(Debug, Clone, PartialEq, TS)] pub struct FunctionCallOutputPayload { pub content: String, pub success: Option, } // The Responses API expects two *different* shapes depending on success vs failure: // • success → output is a plain string (no nested object) // • failure → output is an object { content, success:false } // The upstream TypeScript CLI implements this by special‑casing the serialize path. // We replicate that behavior with a manual Serialize impl. impl Serialize for FunctionCallOutputPayload { fn serialize(&self, serializer: S) -> Result where S: Serializer, { // The upstream TypeScript CLI always serializes `output` as a *plain string* regardless // of whether the function call succeeded or failed. The boolean is purely informational // for local bookkeeping and is NOT sent to the OpenAI endpoint. Sending the nested object // form `{ content, success:false }` triggers the 400 we are still seeing. Mirror the JS CLI // exactly: always emit a bare string. serializer.serialize_str(&self.content) } } impl<'de> Deserialize<'de> for FunctionCallOutputPayload { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let s = String::deserialize(deserializer)?; Ok(FunctionCallOutputPayload { content: s, success: None, }) } } // Implement Display so callers can treat the payload like a plain string when logging or doing // trivial substring checks in tests (existing tests call `.contains()` on the output). Display // returns the raw `content` field. impl std::fmt::Display for FunctionCallOutputPayload { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(&self.content) } } impl std::ops::Deref for FunctionCallOutputPayload { type Target = str; fn deref(&self) -> &Self::Target { &self.content } } // (Moved event mapping logic into codex-core to avoid coupling protocol to UI-facing events.) #[cfg(test)] mod tests { use super::*; use anyhow::Result; #[test] fn serializes_success_as_plain_string() -> Result<()> { let item = ResponseInputItem::FunctionCallOutput { call_id: "call1".into(), output: FunctionCallOutputPayload { content: "ok".into(), success: None, }, }; let json = serde_json::to_string(&item)?; let v: serde_json::Value = serde_json::from_str(&json)?; // Success case -> output should be a plain string assert_eq!(v.get("output").unwrap().as_str().unwrap(), "ok"); Ok(()) } #[test] fn serializes_failure_as_string() -> Result<()> { let item = ResponseInputItem::FunctionCallOutput { call_id: "call1".into(), output: FunctionCallOutputPayload { content: "bad".into(), success: Some(false), }, }; let json = serde_json::to_string(&item)?; let v: serde_json::Value = serde_json::from_str(&json)?; assert_eq!(v.get("output").unwrap().as_str().unwrap(), "bad"); Ok(()) } #[test] fn deserialize_shell_tool_call_params() -> Result<()> { let json = r#"{ "command": ["ls", "-l"], "workdir": "/tmp", "timeout": 1000 }"#; let params: ShellToolCallParams = serde_json::from_str(json)?; assert_eq!( ShellToolCallParams { command: vec!["ls".to_string(), "-l".to_string()], workdir: Some("/tmp".to_string()), timeout_ms: Some(1000), with_escalated_permissions: None, justification: None, }, params ); Ok(()) } }