Files
llmx/codex-rs/core/src/models.rs

264 lines
8.8 KiB
Rust
Raw Normal View History

use std::collections::HashMap;
use base64::Engine;
use serde::Deserialize;
use serde::Serialize;
use serde::ser::Serializer;
use crate::protocol::InputItem;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ResponseInputItem {
Message {
role: String,
content: Vec<ContentItem>,
},
FunctionCallOutput {
call_id: String,
output: FunctionCallOutputPayload,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ContentItem {
InputText { text: String },
InputImage { image_url: String },
OutputText { text: String },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ResponseItem {
Message {
role: String,
content: Vec<ContentItem>,
},
Reasoning {
id: String,
summary: Vec<ReasoningItemReasoningSummary>,
},
LocalShellCall {
/// Set when using the chat completions API.
id: Option<String>,
/// Set when using the Responses API.
call_id: Option<String>,
status: LocalShellStatus,
action: LocalShellAction,
},
FunctionCall {
name: String,
// The Responses API returns the function call arguments as a *string* that contains
// JSON, not as an alreadyparsed object. We keep it as a raw string here and let
// Session::handle_function_call parse it into a Value. This exactly matches the
// Chat Completions + Responses API behavior.
arguments: String,
call_id: String,
},
// NOTE: The input schema for `function_call_output` objects that clients send to the
// OpenAI /v1/responses endpoint is NOT the same shape as the objects the server returns on the
// SSE stream. When *sending* we must wrap the string output inside an object that includes a
// required `success` boolean. The upstream TypeScript CLI does this implicitly. To ensure we
// serialize exactly the expected shape we introduce a dedicated payload struct and flatten it
// here.
FunctionCallOutput {
call_id: String,
output: FunctionCallOutputPayload,
},
#[serde(other)]
Other,
}
impl From<ResponseInputItem> for ResponseItem {
fn from(item: ResponseInputItem) -> Self {
match item {
ResponseInputItem::Message { role, content } => Self::Message { role, content },
ResponseInputItem::FunctionCallOutput { call_id, output } => {
Self::FunctionCallOutput { call_id, output }
}
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum LocalShellStatus {
Completed,
InProgress,
Incomplete,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum LocalShellAction {
Exec(LocalShellExecAction),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LocalShellExecAction {
pub command: Vec<String>,
pub timeout_ms: Option<u64>,
pub working_directory: Option<String>,
pub env: Option<HashMap<String, String>>,
pub user: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ReasoningItemReasoningSummary {
SummaryText { text: String },
}
impl From<Vec<InputItem>> for ResponseInputItem {
fn from(items: Vec<InputItem>) -> Self {
Self::Message {
role: "user".to_string(),
content: items
.into_iter()
.filter_map(|c| match c {
InputItem::Text { text } => Some(ContentItem::InputText { text }),
InputItem::Image { image_url } => Some(ContentItem::InputImage { image_url }),
InputItem::LocalImage { path } => match std::fs::read(&path) {
Ok(bytes) => {
let mime = mime_guess::from_path(&path)
.first()
.map(|m| m.essence_str().to_owned())
.unwrap_or_else(|| "application/octet-stream".to_string());
let encoded = base64::engine::general_purpose::STANDARD.encode(bytes);
Some(ContentItem::InputImage {
image_url: format!("data:{};base64,{}", mime, encoded),
})
}
Err(err) => {
tracing::warn!(
"Skipping image {} could not read file: {}",
path.display(),
err
);
None
}
},
})
.collect::<Vec<ContentItem>>(),
}
}
}
/// If the `name` of a `ResponseItem::FunctionCall` is either `container.exec`
/// or shell`, the `arguments` field should deserialize to this struct.
#[derive(Deserialize, Debug, Clone, PartialEq)]
pub struct ShellToolCallParams {
pub command: Vec<String>,
pub workdir: Option<String>,
/// This is the maximum time in seconds that the command is allowed to run.
#[serde(rename = "timeout")]
// The wire format uses `timeout`, which has ambiguous units, so we use
// `timeout_ms` as the field name so it is clear in code.
pub timeout_ms: Option<u64>,
}
#[derive(Deserialize, Debug, Clone)]
pub struct FunctionCallOutputPayload {
pub content: String,
feat: support the chat completions API in the Rust CLI (#862) This is a substantial PR to add support for the chat completions API, which in turn makes it possible to use non-OpenAI model providers (just like in the TypeScript CLI): * It moves a number of structs from `client.rs` to `client_common.rs` so they can be shared. * It introduces support for the chat completions API in `chat_completions.rs`. * It updates `ModelProviderInfo` so that `env_key` is `Option<String>` instead of `String` (for e.g., ollama) and adds a `wire_api` field * It updates `client.rs` to choose between `stream_responses()` and `stream_chat_completions()` based on the `wire_api` for the `ModelProviderInfo` * It updates the `exec` and TUI CLIs to no longer fail if the `OPENAI_API_KEY` environment variable is not set * It updates the TUI so that `EventMsg::Error` is displayed more prominently when it occurs, particularly now that it is important to alert users to the `CodexErr::EnvVar` variant. * `CodexErr::EnvVar` was updated to include an optional `instructions` field so we can preserve the behavior where we direct users to https://platform.openai.com if `OPENAI_API_KEY` is not set. * Cleaned up the "welcome message" in the TUI to ensure the model provider is displayed. * Updated the docs in `codex-rs/README.md`. To exercise the chat completions API from OpenAI models, I added the following to my `config.toml`: ```toml model = "gpt-4o" model_provider = "openai-chat-completions" [model_providers.openai-chat-completions] name = "OpenAI using Chat Completions" base_url = "https://api.openai.com/v1" env_key = "OPENAI_API_KEY" wire_api = "chat" ``` Though to test a non-OpenAI provider, I installed ollama with mistral locally on my Mac because ChatGPT said that would be a good match for my hardware: ```shell brew install ollama ollama serve ollama pull mistral ``` Then I added the following to my `~/.codex/config.toml`: ```toml model = "mistral" model_provider = "ollama" ``` Note this code could certainly use more test coverage, but I want to get this in so folks can start playing with it. For reference, I believe https://github.com/openai/codex/pull/247 was roughly the comparable PR on the TypeScript side.
2025-05-08 21:46:06 -07:00
#[expect(dead_code)]
pub success: Option<bool>,
}
// The Responses API expects two *different* shapes depending on success vs failure:
// • success → output is a plain string (no nested object)
// • failure → output is an object { content, success:false }
// The upstream TypeScript CLI implements this by specialcasing the serialize path.
// We replicate that behavior with a manual Serialize impl.
impl Serialize for FunctionCallOutputPayload {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
// The upstream TypeScript CLI always serializes `output` as a *plain string* regardless
// of whether the function call succeeded or failed. The boolean is purely informational
// for local bookkeeping and is NOT sent to the OpenAI endpoint. Sending the nested object
// form `{ content, success:false }` triggers the 400 we are still seeing. Mirror the JS CLI
// exactly: always emit a bare string.
serializer.serialize_str(&self.content)
}
}
// Implement Display so callers can treat the payload like a plain string when logging or doing
// trivial substring checks in tests (existing tests call `.contains()` on the output). Display
// returns the raw `content` field.
impl std::fmt::Display for FunctionCallOutputPayload {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.content)
}
}
impl std::ops::Deref for FunctionCallOutputPayload {
type Target = str;
fn deref(&self) -> &Self::Target {
&self.content
}
}
#[cfg(test)]
mod tests {
#![allow(clippy::unwrap_used)]
use super::*;
#[test]
fn serializes_success_as_plain_string() {
let item = ResponseInputItem::FunctionCallOutput {
call_id: "call1".into(),
output: FunctionCallOutputPayload {
content: "ok".into(),
success: None,
},
};
let json = serde_json::to_string(&item).unwrap();
let v: serde_json::Value = serde_json::from_str(&json).unwrap();
// Success case -> output should be a plain string
assert_eq!(v.get("output").unwrap().as_str().unwrap(), "ok");
}
#[test]
fn serializes_failure_as_string() {
let item = ResponseInputItem::FunctionCallOutput {
call_id: "call1".into(),
output: FunctionCallOutputPayload {
content: "bad".into(),
success: Some(false),
},
};
let json = serde_json::to_string(&item).unwrap();
let v: serde_json::Value = serde_json::from_str(&json).unwrap();
assert_eq!(v.get("output").unwrap().as_str().unwrap(), "bad");
}
#[test]
fn deserialize_shell_tool_call_params() {
let json = r#"{
"command": ["ls", "-l"],
"workdir": "/tmp",
"timeout": 1000
}"#;
let params: ShellToolCallParams = serde_json::from_str(json).unwrap();
assert_eq!(
ShellToolCallParams {
command: vec!["ls".to_string(), "-l".to_string()],
workdir: Some("/tmp".to_string()),
timeout_ms: Some(1000),
},
params
);
}
}