Phase 1: Repository & Infrastructure Setup

- Renamed directories: codex-rs -> llmx-rs, codex-cli -> llmx-cli
- Updated package.json files:
  - Root: llmx-monorepo
  - CLI: @llmx/llmx
  - SDK: @llmx/llmx-sdk
- Updated pnpm workspace configuration
- Renamed binary: codex.js -> llmx.js
- Updated environment variables: CODEX_* -> LLMX_*
- Changed repository URLs to valknar/llmx

🤖 Generated with Claude Code
This commit is contained in:
Sebastian Krüger
2025-11-11 14:01:52 +01:00
parent 052b052832
commit f237fe560d
1151 changed files with 41 additions and 35 deletions

View File

@@ -0,0 +1,3 @@
// Single integration test binary that aggregates all test modules.
// The submodules live in `tests/suite/`.
mod suite;

View File

@@ -0,0 +1,25 @@
[package]
edition = "2024"
name = "mcp_test_support"
version = { workspace = true }
[lib]
path = "lib.rs"
[dependencies]
anyhow = { workspace = true }
assert_cmd = { workspace = true }
codex-core = { workspace = true }
codex-mcp-server = { workspace = true }
mcp-types = { workspace = true }
os_info = { workspace = true }
pretty_assertions = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tokio = { workspace = true, features = [
"io-std",
"macros",
"process",
"rt-multi-thread",
] }
wiremock = { workspace = true }

View File

@@ -0,0 +1,17 @@
mod mcp_process;
mod mock_model_server;
mod responses;
pub use mcp_process::McpProcess;
use mcp_types::JSONRPCResponse;
pub use mock_model_server::create_mock_chat_completions_server;
pub use responses::create_apply_patch_sse_response;
pub use responses::create_final_assistant_message_sse_response;
pub use responses::create_shell_sse_response;
use serde::de::DeserializeOwned;
pub fn to_response<T: DeserializeOwned>(response: JSONRPCResponse) -> anyhow::Result<T> {
let value = serde_json::to_value(response.result)?;
let codex_response = serde_json::from_value(value)?;
Ok(codex_response)
}

View File

@@ -0,0 +1,343 @@
use std::path::Path;
use std::process::Stdio;
use std::sync::atomic::AtomicI64;
use std::sync::atomic::Ordering;
use tokio::io::AsyncBufReadExt;
use tokio::io::AsyncWriteExt;
use tokio::io::BufReader;
use tokio::process::Child;
use tokio::process::ChildStdin;
use tokio::process::ChildStdout;
use anyhow::Context;
use assert_cmd::prelude::*;
use codex_mcp_server::CodexToolCallParam;
use mcp_types::CallToolRequestParams;
use mcp_types::ClientCapabilities;
use mcp_types::Implementation;
use mcp_types::InitializeRequestParams;
use mcp_types::JSONRPC_VERSION;
use mcp_types::JSONRPCMessage;
use mcp_types::JSONRPCNotification;
use mcp_types::JSONRPCRequest;
use mcp_types::JSONRPCResponse;
use mcp_types::ModelContextProtocolNotification;
use mcp_types::ModelContextProtocolRequest;
use mcp_types::RequestId;
use pretty_assertions::assert_eq;
use serde_json::json;
use std::process::Command as StdCommand;
use tokio::process::Command;
pub struct McpProcess {
next_request_id: AtomicI64,
/// Retain this child process until the client is dropped. The Tokio runtime
/// will make a "best effort" to reap the process after it exits, but it is
/// not a guarantee. See the `kill_on_drop` documentation for details.
#[allow(dead_code)]
process: Child,
stdin: ChildStdin,
stdout: BufReader<ChildStdout>,
}
impl McpProcess {
pub async fn new(codex_home: &Path) -> anyhow::Result<Self> {
Self::new_with_env(codex_home, &[]).await
}
/// Creates a new MCP process, allowing tests to override or remove
/// specific environment variables for the child process only.
///
/// Pass a tuple of (key, Some(value)) to set/override, or (key, None) to
/// remove a variable from the child's environment.
pub async fn new_with_env(
codex_home: &Path,
env_overrides: &[(&str, Option<&str>)],
) -> anyhow::Result<Self> {
// Use assert_cmd to locate the binary path and then switch to tokio::process::Command
let std_cmd = StdCommand::cargo_bin("codex-mcp-server")
.context("should find binary for codex-mcp-server")?;
let program = std_cmd.get_program().to_owned();
let mut cmd = Command::new(program);
cmd.stdin(Stdio::piped());
cmd.stdout(Stdio::piped());
cmd.stderr(Stdio::piped());
cmd.env("CODEX_HOME", codex_home);
cmd.env("RUST_LOG", "debug");
for (k, v) in env_overrides {
match v {
Some(val) => {
cmd.env(k, val);
}
None => {
cmd.env_remove(k);
}
}
}
let mut process = cmd
.kill_on_drop(true)
.spawn()
.context("codex-mcp-server proc should start")?;
let stdin = process
.stdin
.take()
.ok_or_else(|| anyhow::format_err!("mcp should have stdin fd"))?;
let stdout = process
.stdout
.take()
.ok_or_else(|| anyhow::format_err!("mcp should have stdout fd"))?;
let stdout = BufReader::new(stdout);
// Forward child's stderr to our stderr so failures are visible even
// when stdout/stderr are captured by the test harness.
if let Some(stderr) = process.stderr.take() {
let mut stderr_reader = BufReader::new(stderr).lines();
tokio::spawn(async move {
while let Ok(Some(line)) = stderr_reader.next_line().await {
eprintln!("[mcp stderr] {line}");
}
});
}
Ok(Self {
next_request_id: AtomicI64::new(0),
process,
stdin,
stdout,
})
}
/// Performs the initialization handshake with the MCP server.
pub async fn initialize(&mut self) -> anyhow::Result<()> {
let request_id = self.next_request_id.fetch_add(1, Ordering::Relaxed);
let params = InitializeRequestParams {
capabilities: ClientCapabilities {
elicitation: Some(json!({})),
experimental: None,
roots: None,
sampling: None,
},
client_info: Implementation {
name: "elicitation test".into(),
title: Some("Elicitation Test".into()),
version: "0.0.0".into(),
user_agent: None,
},
protocol_version: mcp_types::MCP_SCHEMA_VERSION.into(),
};
let params_value = serde_json::to_value(params)?;
self.send_jsonrpc_message(JSONRPCMessage::Request(JSONRPCRequest {
jsonrpc: JSONRPC_VERSION.into(),
id: RequestId::Integer(request_id),
method: mcp_types::InitializeRequest::METHOD.into(),
params: Some(params_value),
}))
.await?;
let initialized = self.read_jsonrpc_message().await?;
let os_info = os_info::get();
let user_agent = format!(
"codex_cli_rs/0.0.0 ({} {}; {}) {} (elicitation test; 0.0.0)",
os_info.os_type(),
os_info.version(),
os_info.architecture().unwrap_or("unknown"),
codex_core::terminal::user_agent()
);
assert_eq!(
JSONRPCMessage::Response(JSONRPCResponse {
jsonrpc: JSONRPC_VERSION.into(),
id: RequestId::Integer(request_id),
result: json!({
"capabilities": {
"tools": {
"listChanged": true
},
},
"serverInfo": {
"name": "codex-mcp-server",
"title": "Codex",
"version": "0.0.0",
"user_agent": user_agent
},
"protocolVersion": mcp_types::MCP_SCHEMA_VERSION
})
}),
initialized
);
// Send notifications/initialized to ack the response.
self.send_jsonrpc_message(JSONRPCMessage::Notification(JSONRPCNotification {
jsonrpc: JSONRPC_VERSION.into(),
method: mcp_types::InitializedNotification::METHOD.into(),
params: None,
}))
.await?;
Ok(())
}
/// Returns the id used to make the request so it can be used when
/// correlating notifications.
pub async fn send_codex_tool_call(
&mut self,
params: CodexToolCallParam,
) -> anyhow::Result<i64> {
let codex_tool_call_params = CallToolRequestParams {
name: "codex".to_string(),
arguments: Some(serde_json::to_value(params)?),
};
self.send_request(
mcp_types::CallToolRequest::METHOD,
Some(serde_json::to_value(codex_tool_call_params)?),
)
.await
}
async fn send_request(
&mut self,
method: &str,
params: Option<serde_json::Value>,
) -> anyhow::Result<i64> {
let request_id = self.next_request_id.fetch_add(1, Ordering::Relaxed);
let message = JSONRPCMessage::Request(JSONRPCRequest {
jsonrpc: JSONRPC_VERSION.into(),
id: RequestId::Integer(request_id),
method: method.to_string(),
params,
});
self.send_jsonrpc_message(message).await?;
Ok(request_id)
}
pub async fn send_response(
&mut self,
id: RequestId,
result: serde_json::Value,
) -> anyhow::Result<()> {
self.send_jsonrpc_message(JSONRPCMessage::Response(JSONRPCResponse {
jsonrpc: JSONRPC_VERSION.into(),
id,
result,
}))
.await
}
async fn send_jsonrpc_message(&mut self, message: JSONRPCMessage) -> anyhow::Result<()> {
eprintln!("writing message to stdin: {message:?}");
let payload = serde_json::to_string(&message)?;
self.stdin.write_all(payload.as_bytes()).await?;
self.stdin.write_all(b"\n").await?;
self.stdin.flush().await?;
Ok(())
}
async fn read_jsonrpc_message(&mut self) -> anyhow::Result<JSONRPCMessage> {
let mut line = String::new();
self.stdout.read_line(&mut line).await?;
let message = serde_json::from_str::<JSONRPCMessage>(&line)?;
eprintln!("read message from stdout: {message:?}");
Ok(message)
}
pub async fn read_stream_until_request_message(&mut self) -> anyhow::Result<JSONRPCRequest> {
eprintln!("in read_stream_until_request_message()");
loop {
let message = self.read_jsonrpc_message().await?;
match message {
JSONRPCMessage::Notification(_) => {
eprintln!("notification: {message:?}");
}
JSONRPCMessage::Request(jsonrpc_request) => {
return Ok(jsonrpc_request);
}
JSONRPCMessage::Error(_) => {
anyhow::bail!("unexpected JSONRPCMessage::Error: {message:?}");
}
JSONRPCMessage::Response(_) => {
anyhow::bail!("unexpected JSONRPCMessage::Response: {message:?}");
}
}
}
}
pub async fn read_stream_until_response_message(
&mut self,
request_id: RequestId,
) -> anyhow::Result<JSONRPCResponse> {
eprintln!("in read_stream_until_response_message({request_id:?})");
loop {
let message = self.read_jsonrpc_message().await?;
match message {
JSONRPCMessage::Notification(_) => {
eprintln!("notification: {message:?}");
}
JSONRPCMessage::Request(_) => {
anyhow::bail!("unexpected JSONRPCMessage::Request: {message:?}");
}
JSONRPCMessage::Error(_) => {
anyhow::bail!("unexpected JSONRPCMessage::Error: {message:?}");
}
JSONRPCMessage::Response(jsonrpc_response) => {
if jsonrpc_response.id == request_id {
return Ok(jsonrpc_response);
}
}
}
}
}
/// Reads notifications until a legacy TaskComplete event is observed:
/// Method "codex/event" with params.msg.type == "task_complete".
pub async fn read_stream_until_legacy_task_complete_notification(
&mut self,
) -> anyhow::Result<JSONRPCNotification> {
eprintln!("in read_stream_until_legacy_task_complete_notification()");
loop {
let message = self.read_jsonrpc_message().await?;
match message {
JSONRPCMessage::Notification(notification) => {
let is_match = if notification.method == "codex/event" {
if let Some(params) = &notification.params {
params
.get("msg")
.and_then(|m| m.get("type"))
.and_then(|t| t.as_str())
== Some("task_complete")
} else {
false
}
} else {
false
};
if is_match {
return Ok(notification);
} else {
eprintln!("ignoring notification: {notification:?}");
}
}
JSONRPCMessage::Request(_) => {
anyhow::bail!("unexpected JSONRPCMessage::Request: {message:?}");
}
JSONRPCMessage::Error(_) => {
anyhow::bail!("unexpected JSONRPCMessage::Error: {message:?}");
}
JSONRPCMessage::Response(_) => {
anyhow::bail!("unexpected JSONRPCMessage::Response: {message:?}");
}
}
}
}
}

View File

@@ -0,0 +1,47 @@
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering;
use wiremock::Mock;
use wiremock::MockServer;
use wiremock::Respond;
use wiremock::ResponseTemplate;
use wiremock::matchers::method;
use wiremock::matchers::path;
/// Create a mock server that will provide the responses, in order, for
/// requests to the `/v1/chat/completions` endpoint.
pub async fn create_mock_chat_completions_server(responses: Vec<String>) -> MockServer {
let server = MockServer::start().await;
let num_calls = responses.len();
let seq_responder = SeqResponder {
num_calls: AtomicUsize::new(0),
responses,
};
Mock::given(method("POST"))
.and(path("/v1/chat/completions"))
.respond_with(seq_responder)
.expect(num_calls as u64)
.mount(&server)
.await;
server
}
struct SeqResponder {
num_calls: AtomicUsize,
responses: Vec<String>,
}
impl Respond for SeqResponder {
fn respond(&self, _: &wiremock::Request) -> ResponseTemplate {
let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst);
match self.responses.get(call_num) {
Some(response) => ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(response.clone(), "text/event-stream"),
None => panic!("no response for {call_num}"),
}
}
}

View File

@@ -0,0 +1,95 @@
use serde_json::json;
use std::path::Path;
pub fn create_shell_sse_response(
command: Vec<String>,
workdir: Option<&Path>,
timeout_ms: Option<u64>,
call_id: &str,
) -> anyhow::Result<String> {
// The `arguments`` for the `shell` tool is a serialized JSON object.
let tool_call_arguments = serde_json::to_string(&json!({
"command": command,
"workdir": workdir.map(|w| w.to_string_lossy()),
"timeout": timeout_ms
}))?;
let tool_call = json!({
"choices": [
{
"delta": {
"tool_calls": [
{
"id": call_id,
"function": {
"name": "shell",
"arguments": tool_call_arguments
}
}
]
},
"finish_reason": "tool_calls"
}
]
});
let sse = format!(
"data: {}\n\ndata: DONE\n\n",
serde_json::to_string(&tool_call)?
);
Ok(sse)
}
pub fn create_final_assistant_message_sse_response(message: &str) -> anyhow::Result<String> {
let assistant_message = json!({
"choices": [
{
"delta": {
"content": message
},
"finish_reason": "stop"
}
]
});
let sse = format!(
"data: {}\n\ndata: DONE\n\n",
serde_json::to_string(&assistant_message)?
);
Ok(sse)
}
pub fn create_apply_patch_sse_response(
patch_content: &str,
call_id: &str,
) -> anyhow::Result<String> {
// Use shell command to call apply_patch with heredoc format
let shell_command = format!("apply_patch <<'EOF'\n{patch_content}\nEOF");
let tool_call_arguments = serde_json::to_string(&json!({
"command": ["bash", "-lc", shell_command]
}))?;
let tool_call = json!({
"choices": [
{
"delta": {
"tool_calls": [
{
"id": call_id,
"function": {
"name": "shell",
"arguments": tool_call_arguments
}
}
]
},
"finish_reason": "tool_calls"
}
]
});
let sse = format!(
"data: {}\n\ndata: DONE\n\n",
serde_json::to_string(&tool_call)?
);
Ok(sse)
}

View File

@@ -0,0 +1,481 @@
use std::collections::HashMap;
use std::env;
use std::path::Path;
use std::path::PathBuf;
use codex_core::parse_command;
use codex_core::protocol::FileChange;
use codex_core::protocol::ReviewDecision;
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
use codex_mcp_server::CodexToolCallParam;
use codex_mcp_server::ExecApprovalElicitRequestParams;
use codex_mcp_server::ExecApprovalResponse;
use codex_mcp_server::PatchApprovalElicitRequestParams;
use codex_mcp_server::PatchApprovalResponse;
use mcp_types::ElicitRequest;
use mcp_types::ElicitRequestParamsRequestedSchema;
use mcp_types::JSONRPC_VERSION;
use mcp_types::JSONRPCRequest;
use mcp_types::JSONRPCResponse;
use mcp_types::ModelContextProtocolRequest;
use mcp_types::RequestId;
use pretty_assertions::assert_eq;
use serde_json::json;
use tempfile::TempDir;
use tokio::time::timeout;
use wiremock::MockServer;
use core_test_support::skip_if_no_network;
use mcp_test_support::McpProcess;
use mcp_test_support::create_apply_patch_sse_response;
use mcp_test_support::create_final_assistant_message_sse_response;
use mcp_test_support::create_mock_chat_completions_server;
use mcp_test_support::create_shell_sse_response;
// Allow ample time on slower CI or under load to avoid flakes.
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(20);
/// Test that a shell command that is not on the "trusted" list triggers an
/// elicitation request to the MCP and that sending the approval runs the
/// command, as expected.
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn test_shell_command_approval_triggers_elicitation() {
if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
println!(
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
);
return;
}
// Apparently `#[tokio::test]` must return `()`, so we create a helper
// function that returns `Result` so we can use `?` in favor of `unwrap`.
if let Err(err) = shell_command_approval_triggers_elicitation().await {
panic!("failure: {err}");
}
}
async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
// Use a simple, untrusted command that creates a file so we can
// observe a side-effect.
//
// Crossplatform approach: run a tiny Python snippet to touch the file
// using `python3 -c ...` on all platforms.
let workdir_for_shell_function_call = TempDir::new()?;
let created_filename = "created_by_shell_tool.txt";
let created_file = workdir_for_shell_function_call
.path()
.join(created_filename);
let shell_command = vec![
"python3".to_string(),
"-c".to_string(),
format!("import pathlib; pathlib.Path('{created_filename}').touch()"),
];
let McpHandle {
process: mut mcp_process,
server: _server,
dir: _dir,
} = create_mcp_process(vec![
create_shell_sse_response(
shell_command.clone(),
Some(workdir_for_shell_function_call.path()),
Some(5_000),
"call1234",
)?,
create_final_assistant_message_sse_response("File created!")?,
])
.await?;
// Send a "codex" tool request, which should hit the completions endpoint.
// In turn, it should reply with a tool call, which the MCP should forward
// as an elicitation.
let codex_request_id = mcp_process
.send_codex_tool_call(CodexToolCallParam {
prompt: "run `git init`".to_string(),
..Default::default()
})
.await?;
let elicitation_request = timeout(
DEFAULT_READ_TIMEOUT,
mcp_process.read_stream_until_request_message(),
)
.await??;
let elicitation_request_id = elicitation_request.id.clone();
let params = serde_json::from_value::<ExecApprovalElicitRequestParams>(
elicitation_request
.params
.clone()
.ok_or_else(|| anyhow::anyhow!("elicitation_request.params must be set"))?,
)?;
let expected_elicitation_request = create_expected_elicitation_request(
elicitation_request_id.clone(),
shell_command.clone(),
workdir_for_shell_function_call.path(),
codex_request_id.to_string(),
params.codex_event_id.clone(),
)?;
assert_eq!(expected_elicitation_request, elicitation_request);
// Accept the `git init` request by responding to the elicitation.
mcp_process
.send_response(
elicitation_request_id,
serde_json::to_value(ExecApprovalResponse {
decision: ReviewDecision::Approved,
})?,
)
.await?;
// Verify task_complete notification arrives before the tool call completes.
#[expect(clippy::expect_used)]
let _task_complete = timeout(
DEFAULT_READ_TIMEOUT,
mcp_process.read_stream_until_legacy_task_complete_notification(),
)
.await
.expect("task_complete_notification timeout")
.expect("task_complete_notification resp");
// Verify the original `codex` tool call completes and that the file was created.
let codex_response = timeout(
DEFAULT_READ_TIMEOUT,
mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
)
.await??;
assert_eq!(
JSONRPCResponse {
jsonrpc: JSONRPC_VERSION.into(),
id: RequestId::Integer(codex_request_id),
result: json!({
"content": [
{
"text": "File created!",
"type": "text"
}
]
}),
},
codex_response
);
assert!(created_file.is_file(), "created file should exist");
Ok(())
}
fn create_expected_elicitation_request(
elicitation_request_id: RequestId,
command: Vec<String>,
workdir: &Path,
codex_mcp_tool_call_id: String,
codex_event_id: String,
) -> anyhow::Result<JSONRPCRequest> {
let expected_message = format!(
"Allow Codex to run `{}` in `{}`?",
shlex::try_join(command.iter().map(std::convert::AsRef::as_ref))?,
workdir.to_string_lossy()
);
let codex_parsed_cmd = parse_command::parse_command(&command);
Ok(JSONRPCRequest {
jsonrpc: JSONRPC_VERSION.into(),
id: elicitation_request_id,
method: ElicitRequest::METHOD.to_string(),
params: Some(serde_json::to_value(&ExecApprovalElicitRequestParams {
message: expected_message,
requested_schema: ElicitRequestParamsRequestedSchema {
r#type: "object".to_string(),
properties: json!({}),
required: None,
},
codex_elicitation: "exec-approval".to_string(),
codex_mcp_tool_call_id,
codex_event_id,
codex_command: command,
codex_cwd: workdir.to_path_buf(),
codex_call_id: "call1234".to_string(),
codex_parsed_cmd,
codex_risk: None,
})?),
})
}
/// Test that patch approval triggers an elicitation request to the MCP and that
/// sending the approval applies the patch, as expected.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_patch_approval_triggers_elicitation() {
if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
println!(
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
);
return;
}
if let Err(err) = patch_approval_triggers_elicitation().await {
panic!("failure: {err}");
}
}
async fn patch_approval_triggers_elicitation() -> anyhow::Result<()> {
let cwd = TempDir::new()?;
let test_file = cwd.path().join("destination_file.txt");
std::fs::write(&test_file, "original content\n")?;
let patch_content = format!(
"*** Begin Patch\n*** Update File: {}\n-original content\n+modified content\n*** End Patch",
test_file.as_path().to_string_lossy()
);
let McpHandle {
process: mut mcp_process,
server: _server,
dir: _dir,
} = create_mcp_process(vec![
create_apply_patch_sse_response(&patch_content, "call1234")?,
create_final_assistant_message_sse_response("Patch has been applied successfully!")?,
])
.await?;
// Send a "codex" tool request that will trigger the apply_patch command
let codex_request_id = mcp_process
.send_codex_tool_call(CodexToolCallParam {
cwd: Some(cwd.path().to_string_lossy().to_string()),
prompt: "please modify the test file".to_string(),
..Default::default()
})
.await?;
let elicitation_request = timeout(
DEFAULT_READ_TIMEOUT,
mcp_process.read_stream_until_request_message(),
)
.await??;
let elicitation_request_id = RequestId::Integer(0);
let mut expected_changes = HashMap::new();
expected_changes.insert(
test_file.as_path().to_path_buf(),
FileChange::Update {
unified_diff: "@@ -1 +1 @@\n-original content\n+modified content\n".to_string(),
move_path: None,
},
);
let expected_elicitation_request = create_expected_patch_approval_elicitation_request(
elicitation_request_id.clone(),
expected_changes,
None, // No grant_root expected
None, // No reason expected
codex_request_id.to_string(),
"1".to_string(),
)?;
assert_eq!(expected_elicitation_request, elicitation_request);
// Accept the patch approval request by responding to the elicitation
mcp_process
.send_response(
elicitation_request_id,
serde_json::to_value(PatchApprovalResponse {
decision: ReviewDecision::Approved,
})?,
)
.await?;
// Verify the original `codex` tool call completes
let codex_response = timeout(
DEFAULT_READ_TIMEOUT,
mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
)
.await??;
assert_eq!(
JSONRPCResponse {
jsonrpc: JSONRPC_VERSION.into(),
id: RequestId::Integer(codex_request_id),
result: json!({
"content": [
{
"text": "Patch has been applied successfully!",
"type": "text"
}
]
}),
},
codex_response
);
let file_contents = std::fs::read_to_string(test_file.as_path())?;
assert_eq!(file_contents, "modified content\n");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_codex_tool_passes_base_instructions() {
skip_if_no_network!();
// Apparently `#[tokio::test]` must return `()`, so we create a helper
// function that returns `Result` so we can use `?` in favor of `unwrap`.
if let Err(err) = codex_tool_passes_base_instructions().await {
panic!("failure: {err}");
}
}
async fn codex_tool_passes_base_instructions() -> anyhow::Result<()> {
#![expect(clippy::unwrap_used)]
let server =
create_mock_chat_completions_server(vec![create_final_assistant_message_sse_response(
"Enjoy!",
)?])
.await;
// Run `codex mcp` with a specific config.toml.
let codex_home = TempDir::new()?;
create_config_toml(codex_home.path(), &server.uri())?;
let mut mcp_process = McpProcess::new(codex_home.path()).await?;
timeout(DEFAULT_READ_TIMEOUT, mcp_process.initialize()).await??;
// Send a "codex" tool request, which should hit the completions endpoint.
let codex_request_id = mcp_process
.send_codex_tool_call(CodexToolCallParam {
prompt: "How are you?".to_string(),
base_instructions: Some("You are a helpful assistant.".to_string()),
developer_instructions: Some("Foreshadow upcoming tool calls.".to_string()),
..Default::default()
})
.await?;
let codex_response = timeout(
DEFAULT_READ_TIMEOUT,
mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
)
.await??;
assert_eq!(
JSONRPCResponse {
jsonrpc: JSONRPC_VERSION.into(),
id: RequestId::Integer(codex_request_id),
result: json!({
"content": [
{
"text": "Enjoy!",
"type": "text"
}
]
}),
},
codex_response
);
let requests = server.received_requests().await.unwrap();
let request = requests[0].body_json::<serde_json::Value>()?;
let instructions = request["messages"][0]["content"].as_str().unwrap();
assert!(instructions.starts_with("You are a helpful assistant."));
let developer_msg = request["messages"]
.as_array()
.and_then(|messages| {
messages
.iter()
.find(|msg| msg.get("role").and_then(|role| role.as_str()) == Some("developer"))
})
.unwrap();
let developer_content = developer_msg
.get("content")
.and_then(|value| value.as_str())
.unwrap();
assert!(
!developer_content.contains('<'),
"expected developer instructions without XML tags, got `{developer_content}`"
);
assert_eq!(developer_content, "Foreshadow upcoming tool calls.");
Ok(())
}
fn create_expected_patch_approval_elicitation_request(
elicitation_request_id: RequestId,
changes: HashMap<PathBuf, FileChange>,
grant_root: Option<PathBuf>,
reason: Option<String>,
codex_mcp_tool_call_id: String,
codex_event_id: String,
) -> anyhow::Result<JSONRPCRequest> {
let mut message_lines = Vec::new();
if let Some(r) = &reason {
message_lines.push(r.clone());
}
message_lines.push("Allow Codex to apply proposed code changes?".to_string());
Ok(JSONRPCRequest {
jsonrpc: JSONRPC_VERSION.into(),
id: elicitation_request_id,
method: ElicitRequest::METHOD.to_string(),
params: Some(serde_json::to_value(&PatchApprovalElicitRequestParams {
message: message_lines.join("\n"),
requested_schema: ElicitRequestParamsRequestedSchema {
r#type: "object".to_string(),
properties: json!({}),
required: None,
},
codex_elicitation: "patch-approval".to_string(),
codex_mcp_tool_call_id,
codex_event_id,
codex_reason: reason,
codex_grant_root: grant_root,
codex_changes: changes,
codex_call_id: "call1234".to_string(),
})?),
})
}
/// This handle is used to ensure that the MockServer and TempDir are not dropped while
/// the McpProcess is still running.
pub struct McpHandle {
pub process: McpProcess,
/// Retain the server for the lifetime of the McpProcess.
#[allow(dead_code)]
server: MockServer,
/// Retain the temporary directory for the lifetime of the McpProcess.
#[allow(dead_code)]
dir: TempDir,
}
async fn create_mcp_process(responses: Vec<String>) -> anyhow::Result<McpHandle> {
let server = create_mock_chat_completions_server(responses).await;
let codex_home = TempDir::new()?;
create_config_toml(codex_home.path(), &server.uri())?;
let mut mcp_process = McpProcess::new(codex_home.path()).await?;
timeout(DEFAULT_READ_TIMEOUT, mcp_process.initialize()).await??;
Ok(McpHandle {
process: mcp_process,
server,
dir: codex_home,
})
}
/// Create a Codex config that uses the mock server as the model provider.
/// It also uses `approval_policy = "untrusted"` so that we exercise the
/// elicitation code path for shell commands.
fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
let config_toml = codex_home.join("config.toml");
std::fs::write(
config_toml,
format!(
r#"
model = "mock-model"
approval_policy = "untrusted"
sandbox_policy = "workspace-write"
model_provider = "mock_provider"
[model_providers.mock_provider]
name = "Mock provider for test"
base_url = "{server_uri}/v1"
wire_api = "chat"
request_max_retries = 0
stream_max_retries = 0
"#
),
)
}

View File

@@ -0,0 +1 @@
mod codex_tool;