176 lines
6.2 KiB
Rust
176 lines
6.2 KiB
Rust
|
|
use base64::Engine;
|
|||
|
|
use serde::ser::Serializer;
|
|||
|
|
use serde::Deserialize;
|
|||
|
|
use serde::Serialize;
|
|||
|
|
|
|||
|
|
use crate::protocol::InputItem;
|
|||
|
|
|
|||
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|||
|
|
#[serde(tag = "type", rename_all = "snake_case")]
|
|||
|
|
pub enum ResponseInputItem {
|
|||
|
|
Message {
|
|||
|
|
role: String,
|
|||
|
|
content: Vec<ContentItem>,
|
|||
|
|
},
|
|||
|
|
FunctionCallOutput {
|
|||
|
|
call_id: String,
|
|||
|
|
output: FunctionCallOutputPayload,
|
|||
|
|
},
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|||
|
|
#[serde(tag = "type", rename_all = "snake_case")]
|
|||
|
|
pub enum ContentItem {
|
|||
|
|
InputText { text: String },
|
|||
|
|
InputImage { image_url: String },
|
|||
|
|
OutputText { text: String },
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|||
|
|
#[serde(tag = "type", rename_all = "snake_case")]
|
|||
|
|
pub enum ResponseItem {
|
|||
|
|
Message {
|
|||
|
|
role: String,
|
|||
|
|
content: Vec<ContentItem>,
|
|||
|
|
},
|
|||
|
|
FunctionCall {
|
|||
|
|
name: String,
|
|||
|
|
// The Responses API returns the function call arguments as a *string* that contains
|
|||
|
|
// JSON, not as an already‑parsed object. We keep it as a raw string here and let
|
|||
|
|
// Session::handle_function_call parse it into a Value. This exactly matches the
|
|||
|
|
// Chat Completions + Responses API behavior.
|
|||
|
|
arguments: String,
|
|||
|
|
call_id: String,
|
|||
|
|
},
|
|||
|
|
// NOTE: The input schema for `function_call_output` objects that clients send to the
|
|||
|
|
// OpenAI /v1/responses endpoint is NOT the same shape as the objects the server returns on the
|
|||
|
|
// SSE stream. When *sending* we must wrap the string output inside an object that includes a
|
|||
|
|
// required `success` boolean. The upstream TypeScript CLI does this implicitly. To ensure we
|
|||
|
|
// serialize exactly the expected shape we introduce a dedicated payload struct and flatten it
|
|||
|
|
// here.
|
|||
|
|
FunctionCallOutput {
|
|||
|
|
call_id: String,
|
|||
|
|
output: FunctionCallOutputPayload,
|
|||
|
|
},
|
|||
|
|
#[serde(other)]
|
|||
|
|
Other,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
impl From<Vec<InputItem>> for ResponseInputItem {
|
|||
|
|
fn from(items: Vec<InputItem>) -> Self {
|
|||
|
|
Self::Message {
|
|||
|
|
role: "user".to_string(),
|
|||
|
|
content: items
|
|||
|
|
.into_iter()
|
|||
|
|
.filter_map(|c| match c {
|
|||
|
|
InputItem::Text { text } => Some(ContentItem::InputText { text }),
|
|||
|
|
InputItem::Image { image_url } => Some(ContentItem::InputImage { image_url }),
|
|||
|
|
InputItem::LocalImage { path } => match std::fs::read(&path) {
|
|||
|
|
Ok(bytes) => {
|
|||
|
|
let mime = mime_guess::from_path(&path)
|
|||
|
|
.first()
|
|||
|
|
.map(|m| m.essence_str().to_owned())
|
|||
|
|
.unwrap_or_else(|| "application/octet-stream".to_string());
|
|||
|
|
let encoded = base64::engine::general_purpose::STANDARD.encode(bytes);
|
|||
|
|
Some(ContentItem::InputImage {
|
|||
|
|
image_url: format!("data:{};base64,{}", mime, encoded),
|
|||
|
|
})
|
|||
|
|
}
|
|||
|
|
Err(err) => {
|
|||
|
|
tracing::warn!(
|
|||
|
|
"Skipping image {} – could not read file: {}",
|
|||
|
|
path.display(),
|
|||
|
|
err
|
|||
|
|
);
|
|||
|
|
None
|
|||
|
|
}
|
|||
|
|
},
|
|||
|
|
})
|
|||
|
|
.collect::<Vec<ContentItem>>(),
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
#[expect(dead_code)]
|
|||
|
|
#[derive(Deserialize, Debug, Clone)]
|
|||
|
|
pub struct FunctionCallOutputPayload {
|
|||
|
|
pub content: String,
|
|||
|
|
pub success: Option<bool>,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// The Responses API expects two *different* shapes depending on success vs failure:
|
|||
|
|
// • success → output is a plain string (no nested object)
|
|||
|
|
// • failure → output is an object { content, success:false }
|
|||
|
|
// The upstream TypeScript CLI implements this by special‑casing the serialize path.
|
|||
|
|
// We replicate that behavior with a manual Serialize impl.
|
|||
|
|
|
|||
|
|
impl Serialize for FunctionCallOutputPayload {
|
|||
|
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
|||
|
|
where
|
|||
|
|
S: Serializer,
|
|||
|
|
{
|
|||
|
|
// The upstream TypeScript CLI always serializes `output` as a *plain string* regardless
|
|||
|
|
// of whether the function call succeeded or failed. The boolean is purely informational
|
|||
|
|
// for local bookkeeping and is NOT sent to the OpenAI endpoint. Sending the nested object
|
|||
|
|
// form `{ content, success:false }` triggers the 400 we are still seeing. Mirror the JS CLI
|
|||
|
|
// exactly: always emit a bare string.
|
|||
|
|
|
|||
|
|
serializer.serialize_str(&self.content)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Implement Display so callers can treat the payload like a plain string when logging or doing
|
|||
|
|
// trivial substring checks in tests (existing tests call `.contains()` on the output). Display
|
|||
|
|
// returns the raw `content` field.
|
|||
|
|
|
|||
|
|
impl std::fmt::Display for FunctionCallOutputPayload {
|
|||
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|||
|
|
f.write_str(&self.content)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
impl std::ops::Deref for FunctionCallOutputPayload {
|
|||
|
|
type Target = str;
|
|||
|
|
fn deref(&self) -> &Self::Target {
|
|||
|
|
&self.content
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
#[cfg(test)]
|
|||
|
|
mod tests {
|
|||
|
|
use super::*;
|
|||
|
|
|
|||
|
|
#[test]
|
|||
|
|
fn serializes_success_as_plain_string() {
|
|||
|
|
let item = ResponseInputItem::FunctionCallOutput {
|
|||
|
|
call_id: "call1".into(),
|
|||
|
|
output: FunctionCallOutputPayload {
|
|||
|
|
content: "ok".into(),
|
|||
|
|
success: None,
|
|||
|
|
},
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
let json = serde_json::to_string(&item).unwrap();
|
|||
|
|
let v: serde_json::Value = serde_json::from_str(&json).unwrap();
|
|||
|
|
|
|||
|
|
// Success case -> output should be a plain string
|
|||
|
|
assert_eq!(v.get("output").unwrap().as_str().unwrap(), "ok");
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
#[test]
|
|||
|
|
fn serializes_failure_as_string() {
|
|||
|
|
let item = ResponseInputItem::FunctionCallOutput {
|
|||
|
|
call_id: "call1".into(),
|
|||
|
|
output: FunctionCallOutputPayload {
|
|||
|
|
content: "bad".into(),
|
|||
|
|
success: Some(false),
|
|||
|
|
},
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
let json = serde_json::to_string(&item).unwrap();
|
|||
|
|
let v: serde_json::Value = serde_json::from_str(&json).unwrap();
|
|||
|
|
|
|||
|
|
assert_eq!(v.get("output").unwrap().as_str().unwrap(), "bad");
|
|||
|
|
}
|
|||
|
|
}
|