Phase 1: Repository & Infrastructure Setup
- Renamed directories: codex-rs -> llmx-rs, codex-cli -> llmx-cli
- Updated package.json files:
- Root: llmx-monorepo
- CLI: @llmx/llmx
- SDK: @llmx/llmx-sdk
- Updated pnpm workspace configuration
- Renamed binary: codex.js -> llmx.js
- Updated environment variables: CODEX_* -> LLMX_*
- Changed repository URLs to valknar/llmx
🤖 Generated with Claude Code
This commit is contained in:
3
llmx-rs/mcp-server/tests/all.rs
Normal file
3
llmx-rs/mcp-server/tests/all.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
// Single integration test binary that aggregates all test modules.
|
||||
// The submodules live in `tests/suite/`.
|
||||
mod suite;
|
||||
25
llmx-rs/mcp-server/tests/common/Cargo.toml
Normal file
25
llmx-rs/mcp-server/tests/common/Cargo.toml
Normal file
@@ -0,0 +1,25 @@
|
||||
[package]
|
||||
edition = "2024"
|
||||
name = "mcp_test_support"
|
||||
version = { workspace = true }
|
||||
|
||||
[lib]
|
||||
path = "lib.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
assert_cmd = { workspace = true }
|
||||
codex-core = { workspace = true }
|
||||
codex-mcp-server = { workspace = true }
|
||||
mcp-types = { workspace = true }
|
||||
os_info = { workspace = true }
|
||||
pretty_assertions = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
tokio = { workspace = true, features = [
|
||||
"io-std",
|
||||
"macros",
|
||||
"process",
|
||||
"rt-multi-thread",
|
||||
] }
|
||||
wiremock = { workspace = true }
|
||||
17
llmx-rs/mcp-server/tests/common/lib.rs
Normal file
17
llmx-rs/mcp-server/tests/common/lib.rs
Normal file
@@ -0,0 +1,17 @@
|
||||
mod mcp_process;
|
||||
mod mock_model_server;
|
||||
mod responses;
|
||||
|
||||
pub use mcp_process::McpProcess;
|
||||
use mcp_types::JSONRPCResponse;
|
||||
pub use mock_model_server::create_mock_chat_completions_server;
|
||||
pub use responses::create_apply_patch_sse_response;
|
||||
pub use responses::create_final_assistant_message_sse_response;
|
||||
pub use responses::create_shell_sse_response;
|
||||
use serde::de::DeserializeOwned;
|
||||
|
||||
pub fn to_response<T: DeserializeOwned>(response: JSONRPCResponse) -> anyhow::Result<T> {
|
||||
let value = serde_json::to_value(response.result)?;
|
||||
let codex_response = serde_json::from_value(value)?;
|
||||
Ok(codex_response)
|
||||
}
|
||||
343
llmx-rs/mcp-server/tests/common/mcp_process.rs
Normal file
343
llmx-rs/mcp-server/tests/common/mcp_process.rs
Normal file
@@ -0,0 +1,343 @@
|
||||
use std::path::Path;
|
||||
use std::process::Stdio;
|
||||
use std::sync::atomic::AtomicI64;
|
||||
use std::sync::atomic::Ordering;
|
||||
use tokio::io::AsyncBufReadExt;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tokio::io::BufReader;
|
||||
use tokio::process::Child;
|
||||
use tokio::process::ChildStdin;
|
||||
use tokio::process::ChildStdout;
|
||||
|
||||
use anyhow::Context;
|
||||
use assert_cmd::prelude::*;
|
||||
use codex_mcp_server::CodexToolCallParam;
|
||||
|
||||
use mcp_types::CallToolRequestParams;
|
||||
use mcp_types::ClientCapabilities;
|
||||
use mcp_types::Implementation;
|
||||
use mcp_types::InitializeRequestParams;
|
||||
use mcp_types::JSONRPC_VERSION;
|
||||
use mcp_types::JSONRPCMessage;
|
||||
use mcp_types::JSONRPCNotification;
|
||||
use mcp_types::JSONRPCRequest;
|
||||
use mcp_types::JSONRPCResponse;
|
||||
use mcp_types::ModelContextProtocolNotification;
|
||||
use mcp_types::ModelContextProtocolRequest;
|
||||
use mcp_types::RequestId;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use std::process::Command as StdCommand;
|
||||
use tokio::process::Command;
|
||||
|
||||
pub struct McpProcess {
|
||||
next_request_id: AtomicI64,
|
||||
/// Retain this child process until the client is dropped. The Tokio runtime
|
||||
/// will make a "best effort" to reap the process after it exits, but it is
|
||||
/// not a guarantee. See the `kill_on_drop` documentation for details.
|
||||
#[allow(dead_code)]
|
||||
process: Child,
|
||||
stdin: ChildStdin,
|
||||
stdout: BufReader<ChildStdout>,
|
||||
}
|
||||
|
||||
impl McpProcess {
|
||||
pub async fn new(codex_home: &Path) -> anyhow::Result<Self> {
|
||||
Self::new_with_env(codex_home, &[]).await
|
||||
}
|
||||
|
||||
/// Creates a new MCP process, allowing tests to override or remove
|
||||
/// specific environment variables for the child process only.
|
||||
///
|
||||
/// Pass a tuple of (key, Some(value)) to set/override, or (key, None) to
|
||||
/// remove a variable from the child's environment.
|
||||
pub async fn new_with_env(
|
||||
codex_home: &Path,
|
||||
env_overrides: &[(&str, Option<&str>)],
|
||||
) -> anyhow::Result<Self> {
|
||||
// Use assert_cmd to locate the binary path and then switch to tokio::process::Command
|
||||
let std_cmd = StdCommand::cargo_bin("codex-mcp-server")
|
||||
.context("should find binary for codex-mcp-server")?;
|
||||
|
||||
let program = std_cmd.get_program().to_owned();
|
||||
|
||||
let mut cmd = Command::new(program);
|
||||
|
||||
cmd.stdin(Stdio::piped());
|
||||
cmd.stdout(Stdio::piped());
|
||||
cmd.stderr(Stdio::piped());
|
||||
cmd.env("CODEX_HOME", codex_home);
|
||||
cmd.env("RUST_LOG", "debug");
|
||||
|
||||
for (k, v) in env_overrides {
|
||||
match v {
|
||||
Some(val) => {
|
||||
cmd.env(k, val);
|
||||
}
|
||||
None => {
|
||||
cmd.env_remove(k);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut process = cmd
|
||||
.kill_on_drop(true)
|
||||
.spawn()
|
||||
.context("codex-mcp-server proc should start")?;
|
||||
let stdin = process
|
||||
.stdin
|
||||
.take()
|
||||
.ok_or_else(|| anyhow::format_err!("mcp should have stdin fd"))?;
|
||||
let stdout = process
|
||||
.stdout
|
||||
.take()
|
||||
.ok_or_else(|| anyhow::format_err!("mcp should have stdout fd"))?;
|
||||
let stdout = BufReader::new(stdout);
|
||||
|
||||
// Forward child's stderr to our stderr so failures are visible even
|
||||
// when stdout/stderr are captured by the test harness.
|
||||
if let Some(stderr) = process.stderr.take() {
|
||||
let mut stderr_reader = BufReader::new(stderr).lines();
|
||||
tokio::spawn(async move {
|
||||
while let Ok(Some(line)) = stderr_reader.next_line().await {
|
||||
eprintln!("[mcp stderr] {line}");
|
||||
}
|
||||
});
|
||||
}
|
||||
Ok(Self {
|
||||
next_request_id: AtomicI64::new(0),
|
||||
process,
|
||||
stdin,
|
||||
stdout,
|
||||
})
|
||||
}
|
||||
|
||||
/// Performs the initialization handshake with the MCP server.
|
||||
pub async fn initialize(&mut self) -> anyhow::Result<()> {
|
||||
let request_id = self.next_request_id.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
let params = InitializeRequestParams {
|
||||
capabilities: ClientCapabilities {
|
||||
elicitation: Some(json!({})),
|
||||
experimental: None,
|
||||
roots: None,
|
||||
sampling: None,
|
||||
},
|
||||
client_info: Implementation {
|
||||
name: "elicitation test".into(),
|
||||
title: Some("Elicitation Test".into()),
|
||||
version: "0.0.0".into(),
|
||||
user_agent: None,
|
||||
},
|
||||
protocol_version: mcp_types::MCP_SCHEMA_VERSION.into(),
|
||||
};
|
||||
let params_value = serde_json::to_value(params)?;
|
||||
|
||||
self.send_jsonrpc_message(JSONRPCMessage::Request(JSONRPCRequest {
|
||||
jsonrpc: JSONRPC_VERSION.into(),
|
||||
id: RequestId::Integer(request_id),
|
||||
method: mcp_types::InitializeRequest::METHOD.into(),
|
||||
params: Some(params_value),
|
||||
}))
|
||||
.await?;
|
||||
|
||||
let initialized = self.read_jsonrpc_message().await?;
|
||||
let os_info = os_info::get();
|
||||
let user_agent = format!(
|
||||
"codex_cli_rs/0.0.0 ({} {}; {}) {} (elicitation test; 0.0.0)",
|
||||
os_info.os_type(),
|
||||
os_info.version(),
|
||||
os_info.architecture().unwrap_or("unknown"),
|
||||
codex_core::terminal::user_agent()
|
||||
);
|
||||
assert_eq!(
|
||||
JSONRPCMessage::Response(JSONRPCResponse {
|
||||
jsonrpc: JSONRPC_VERSION.into(),
|
||||
id: RequestId::Integer(request_id),
|
||||
result: json!({
|
||||
"capabilities": {
|
||||
"tools": {
|
||||
"listChanged": true
|
||||
},
|
||||
},
|
||||
"serverInfo": {
|
||||
"name": "codex-mcp-server",
|
||||
"title": "Codex",
|
||||
"version": "0.0.0",
|
||||
"user_agent": user_agent
|
||||
},
|
||||
"protocolVersion": mcp_types::MCP_SCHEMA_VERSION
|
||||
})
|
||||
}),
|
||||
initialized
|
||||
);
|
||||
|
||||
// Send notifications/initialized to ack the response.
|
||||
self.send_jsonrpc_message(JSONRPCMessage::Notification(JSONRPCNotification {
|
||||
jsonrpc: JSONRPC_VERSION.into(),
|
||||
method: mcp_types::InitializedNotification::METHOD.into(),
|
||||
params: None,
|
||||
}))
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the id used to make the request so it can be used when
|
||||
/// correlating notifications.
|
||||
pub async fn send_codex_tool_call(
|
||||
&mut self,
|
||||
params: CodexToolCallParam,
|
||||
) -> anyhow::Result<i64> {
|
||||
let codex_tool_call_params = CallToolRequestParams {
|
||||
name: "codex".to_string(),
|
||||
arguments: Some(serde_json::to_value(params)?),
|
||||
};
|
||||
self.send_request(
|
||||
mcp_types::CallToolRequest::METHOD,
|
||||
Some(serde_json::to_value(codex_tool_call_params)?),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn send_request(
|
||||
&mut self,
|
||||
method: &str,
|
||||
params: Option<serde_json::Value>,
|
||||
) -> anyhow::Result<i64> {
|
||||
let request_id = self.next_request_id.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
let message = JSONRPCMessage::Request(JSONRPCRequest {
|
||||
jsonrpc: JSONRPC_VERSION.into(),
|
||||
id: RequestId::Integer(request_id),
|
||||
method: method.to_string(),
|
||||
params,
|
||||
});
|
||||
self.send_jsonrpc_message(message).await?;
|
||||
Ok(request_id)
|
||||
}
|
||||
|
||||
pub async fn send_response(
|
||||
&mut self,
|
||||
id: RequestId,
|
||||
result: serde_json::Value,
|
||||
) -> anyhow::Result<()> {
|
||||
self.send_jsonrpc_message(JSONRPCMessage::Response(JSONRPCResponse {
|
||||
jsonrpc: JSONRPC_VERSION.into(),
|
||||
id,
|
||||
result,
|
||||
}))
|
||||
.await
|
||||
}
|
||||
|
||||
async fn send_jsonrpc_message(&mut self, message: JSONRPCMessage) -> anyhow::Result<()> {
|
||||
eprintln!("writing message to stdin: {message:?}");
|
||||
let payload = serde_json::to_string(&message)?;
|
||||
self.stdin.write_all(payload.as_bytes()).await?;
|
||||
self.stdin.write_all(b"\n").await?;
|
||||
self.stdin.flush().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn read_jsonrpc_message(&mut self) -> anyhow::Result<JSONRPCMessage> {
|
||||
let mut line = String::new();
|
||||
self.stdout.read_line(&mut line).await?;
|
||||
let message = serde_json::from_str::<JSONRPCMessage>(&line)?;
|
||||
eprintln!("read message from stdout: {message:?}");
|
||||
Ok(message)
|
||||
}
|
||||
|
||||
pub async fn read_stream_until_request_message(&mut self) -> anyhow::Result<JSONRPCRequest> {
|
||||
eprintln!("in read_stream_until_request_message()");
|
||||
|
||||
loop {
|
||||
let message = self.read_jsonrpc_message().await?;
|
||||
|
||||
match message {
|
||||
JSONRPCMessage::Notification(_) => {
|
||||
eprintln!("notification: {message:?}");
|
||||
}
|
||||
JSONRPCMessage::Request(jsonrpc_request) => {
|
||||
return Ok(jsonrpc_request);
|
||||
}
|
||||
JSONRPCMessage::Error(_) => {
|
||||
anyhow::bail!("unexpected JSONRPCMessage::Error: {message:?}");
|
||||
}
|
||||
JSONRPCMessage::Response(_) => {
|
||||
anyhow::bail!("unexpected JSONRPCMessage::Response: {message:?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn read_stream_until_response_message(
|
||||
&mut self,
|
||||
request_id: RequestId,
|
||||
) -> anyhow::Result<JSONRPCResponse> {
|
||||
eprintln!("in read_stream_until_response_message({request_id:?})");
|
||||
|
||||
loop {
|
||||
let message = self.read_jsonrpc_message().await?;
|
||||
match message {
|
||||
JSONRPCMessage::Notification(_) => {
|
||||
eprintln!("notification: {message:?}");
|
||||
}
|
||||
JSONRPCMessage::Request(_) => {
|
||||
anyhow::bail!("unexpected JSONRPCMessage::Request: {message:?}");
|
||||
}
|
||||
JSONRPCMessage::Error(_) => {
|
||||
anyhow::bail!("unexpected JSONRPCMessage::Error: {message:?}");
|
||||
}
|
||||
JSONRPCMessage::Response(jsonrpc_response) => {
|
||||
if jsonrpc_response.id == request_id {
|
||||
return Ok(jsonrpc_response);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads notifications until a legacy TaskComplete event is observed:
|
||||
/// Method "codex/event" with params.msg.type == "task_complete".
|
||||
pub async fn read_stream_until_legacy_task_complete_notification(
|
||||
&mut self,
|
||||
) -> anyhow::Result<JSONRPCNotification> {
|
||||
eprintln!("in read_stream_until_legacy_task_complete_notification()");
|
||||
|
||||
loop {
|
||||
let message = self.read_jsonrpc_message().await?;
|
||||
match message {
|
||||
JSONRPCMessage::Notification(notification) => {
|
||||
let is_match = if notification.method == "codex/event" {
|
||||
if let Some(params) = ¬ification.params {
|
||||
params
|
||||
.get("msg")
|
||||
.and_then(|m| m.get("type"))
|
||||
.and_then(|t| t.as_str())
|
||||
== Some("task_complete")
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
if is_match {
|
||||
return Ok(notification);
|
||||
} else {
|
||||
eprintln!("ignoring notification: {notification:?}");
|
||||
}
|
||||
}
|
||||
JSONRPCMessage::Request(_) => {
|
||||
anyhow::bail!("unexpected JSONRPCMessage::Request: {message:?}");
|
||||
}
|
||||
JSONRPCMessage::Error(_) => {
|
||||
anyhow::bail!("unexpected JSONRPCMessage::Error: {message:?}");
|
||||
}
|
||||
JSONRPCMessage::Response(_) => {
|
||||
anyhow::bail!("unexpected JSONRPCMessage::Response: {message:?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
47
llmx-rs/mcp-server/tests/common/mock_model_server.rs
Normal file
47
llmx-rs/mcp-server/tests/common/mock_model_server.rs
Normal file
@@ -0,0 +1,47 @@
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::Respond;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
/// Create a mock server that will provide the responses, in order, for
|
||||
/// requests to the `/v1/chat/completions` endpoint.
|
||||
pub async fn create_mock_chat_completions_server(responses: Vec<String>) -> MockServer {
|
||||
let server = MockServer::start().await;
|
||||
|
||||
let num_calls = responses.len();
|
||||
let seq_responder = SeqResponder {
|
||||
num_calls: AtomicUsize::new(0),
|
||||
responses,
|
||||
};
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/chat/completions"))
|
||||
.respond_with(seq_responder)
|
||||
.expect(num_calls as u64)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
server
|
||||
}
|
||||
|
||||
struct SeqResponder {
|
||||
num_calls: AtomicUsize,
|
||||
responses: Vec<String>,
|
||||
}
|
||||
|
||||
impl Respond for SeqResponder {
|
||||
fn respond(&self, _: &wiremock::Request) -> ResponseTemplate {
|
||||
let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst);
|
||||
match self.responses.get(call_num) {
|
||||
Some(response) => ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(response.clone(), "text/event-stream"),
|
||||
None => panic!("no response for {call_num}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
95
llmx-rs/mcp-server/tests/common/responses.rs
Normal file
95
llmx-rs/mcp-server/tests/common/responses.rs
Normal file
@@ -0,0 +1,95 @@
|
||||
use serde_json::json;
|
||||
use std::path::Path;
|
||||
|
||||
pub fn create_shell_sse_response(
|
||||
command: Vec<String>,
|
||||
workdir: Option<&Path>,
|
||||
timeout_ms: Option<u64>,
|
||||
call_id: &str,
|
||||
) -> anyhow::Result<String> {
|
||||
// The `arguments`` for the `shell` tool is a serialized JSON object.
|
||||
let tool_call_arguments = serde_json::to_string(&json!({
|
||||
"command": command,
|
||||
"workdir": workdir.map(|w| w.to_string_lossy()),
|
||||
"timeout": timeout_ms
|
||||
}))?;
|
||||
let tool_call = json!({
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": call_id,
|
||||
"function": {
|
||||
"name": "shell",
|
||||
"arguments": tool_call_arguments
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": "tool_calls"
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
let sse = format!(
|
||||
"data: {}\n\ndata: DONE\n\n",
|
||||
serde_json::to_string(&tool_call)?
|
||||
);
|
||||
Ok(sse)
|
||||
}
|
||||
|
||||
pub fn create_final_assistant_message_sse_response(message: &str) -> anyhow::Result<String> {
|
||||
let assistant_message = json!({
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": message
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
let sse = format!(
|
||||
"data: {}\n\ndata: DONE\n\n",
|
||||
serde_json::to_string(&assistant_message)?
|
||||
);
|
||||
Ok(sse)
|
||||
}
|
||||
|
||||
pub fn create_apply_patch_sse_response(
|
||||
patch_content: &str,
|
||||
call_id: &str,
|
||||
) -> anyhow::Result<String> {
|
||||
// Use shell command to call apply_patch with heredoc format
|
||||
let shell_command = format!("apply_patch <<'EOF'\n{patch_content}\nEOF");
|
||||
let tool_call_arguments = serde_json::to_string(&json!({
|
||||
"command": ["bash", "-lc", shell_command]
|
||||
}))?;
|
||||
|
||||
let tool_call = json!({
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": call_id,
|
||||
"function": {
|
||||
"name": "shell",
|
||||
"arguments": tool_call_arguments
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": "tool_calls"
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
let sse = format!(
|
||||
"data: {}\n\ndata: DONE\n\n",
|
||||
serde_json::to_string(&tool_call)?
|
||||
);
|
||||
Ok(sse)
|
||||
}
|
||||
481
llmx-rs/mcp-server/tests/suite/codex_tool.rs
Normal file
481
llmx-rs/mcp-server/tests/suite/codex_tool.rs
Normal file
@@ -0,0 +1,481 @@
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use codex_core::parse_command;
|
||||
use codex_core::protocol::FileChange;
|
||||
use codex_core::protocol::ReviewDecision;
|
||||
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||
use codex_mcp_server::CodexToolCallParam;
|
||||
use codex_mcp_server::ExecApprovalElicitRequestParams;
|
||||
use codex_mcp_server::ExecApprovalResponse;
|
||||
use codex_mcp_server::PatchApprovalElicitRequestParams;
|
||||
use codex_mcp_server::PatchApprovalResponse;
|
||||
use mcp_types::ElicitRequest;
|
||||
use mcp_types::ElicitRequestParamsRequestedSchema;
|
||||
use mcp_types::JSONRPC_VERSION;
|
||||
use mcp_types::JSONRPCRequest;
|
||||
use mcp_types::JSONRPCResponse;
|
||||
use mcp_types::ModelContextProtocolRequest;
|
||||
use mcp_types::RequestId;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
use tokio::time::timeout;
|
||||
use wiremock::MockServer;
|
||||
|
||||
use core_test_support::skip_if_no_network;
|
||||
use mcp_test_support::McpProcess;
|
||||
use mcp_test_support::create_apply_patch_sse_response;
|
||||
use mcp_test_support::create_final_assistant_message_sse_response;
|
||||
use mcp_test_support::create_mock_chat_completions_server;
|
||||
use mcp_test_support::create_shell_sse_response;
|
||||
|
||||
// Allow ample time on slower CI or under load to avoid flakes.
|
||||
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(20);
|
||||
|
||||
/// Test that a shell command that is not on the "trusted" list triggers an
|
||||
/// elicitation request to the MCP and that sending the approval runs the
|
||||
/// command, as expected.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
|
||||
async fn test_shell_command_approval_triggers_elicitation() {
|
||||
if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Apparently `#[tokio::test]` must return `()`, so we create a helper
|
||||
// function that returns `Result` so we can use `?` in favor of `unwrap`.
|
||||
if let Err(err) = shell_command_approval_triggers_elicitation().await {
|
||||
panic!("failure: {err}");
|
||||
}
|
||||
}
|
||||
|
||||
async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
|
||||
// Use a simple, untrusted command that creates a file so we can
|
||||
// observe a side-effect.
|
||||
//
|
||||
// Cross‑platform approach: run a tiny Python snippet to touch the file
|
||||
// using `python3 -c ...` on all platforms.
|
||||
let workdir_for_shell_function_call = TempDir::new()?;
|
||||
let created_filename = "created_by_shell_tool.txt";
|
||||
let created_file = workdir_for_shell_function_call
|
||||
.path()
|
||||
.join(created_filename);
|
||||
|
||||
let shell_command = vec![
|
||||
"python3".to_string(),
|
||||
"-c".to_string(),
|
||||
format!("import pathlib; pathlib.Path('{created_filename}').touch()"),
|
||||
];
|
||||
|
||||
let McpHandle {
|
||||
process: mut mcp_process,
|
||||
server: _server,
|
||||
dir: _dir,
|
||||
} = create_mcp_process(vec![
|
||||
create_shell_sse_response(
|
||||
shell_command.clone(),
|
||||
Some(workdir_for_shell_function_call.path()),
|
||||
Some(5_000),
|
||||
"call1234",
|
||||
)?,
|
||||
create_final_assistant_message_sse_response("File created!")?,
|
||||
])
|
||||
.await?;
|
||||
|
||||
// Send a "codex" tool request, which should hit the completions endpoint.
|
||||
// In turn, it should reply with a tool call, which the MCP should forward
|
||||
// as an elicitation.
|
||||
let codex_request_id = mcp_process
|
||||
.send_codex_tool_call(CodexToolCallParam {
|
||||
prompt: "run `git init`".to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
let elicitation_request = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp_process.read_stream_until_request_message(),
|
||||
)
|
||||
.await??;
|
||||
|
||||
let elicitation_request_id = elicitation_request.id.clone();
|
||||
let params = serde_json::from_value::<ExecApprovalElicitRequestParams>(
|
||||
elicitation_request
|
||||
.params
|
||||
.clone()
|
||||
.ok_or_else(|| anyhow::anyhow!("elicitation_request.params must be set"))?,
|
||||
)?;
|
||||
let expected_elicitation_request = create_expected_elicitation_request(
|
||||
elicitation_request_id.clone(),
|
||||
shell_command.clone(),
|
||||
workdir_for_shell_function_call.path(),
|
||||
codex_request_id.to_string(),
|
||||
params.codex_event_id.clone(),
|
||||
)?;
|
||||
assert_eq!(expected_elicitation_request, elicitation_request);
|
||||
|
||||
// Accept the `git init` request by responding to the elicitation.
|
||||
mcp_process
|
||||
.send_response(
|
||||
elicitation_request_id,
|
||||
serde_json::to_value(ExecApprovalResponse {
|
||||
decision: ReviewDecision::Approved,
|
||||
})?,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Verify task_complete notification arrives before the tool call completes.
|
||||
#[expect(clippy::expect_used)]
|
||||
let _task_complete = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp_process.read_stream_until_legacy_task_complete_notification(),
|
||||
)
|
||||
.await
|
||||
.expect("task_complete_notification timeout")
|
||||
.expect("task_complete_notification resp");
|
||||
|
||||
// Verify the original `codex` tool call completes and that the file was created.
|
||||
let codex_response = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
|
||||
)
|
||||
.await??;
|
||||
assert_eq!(
|
||||
JSONRPCResponse {
|
||||
jsonrpc: JSONRPC_VERSION.into(),
|
||||
id: RequestId::Integer(codex_request_id),
|
||||
result: json!({
|
||||
"content": [
|
||||
{
|
||||
"text": "File created!",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}),
|
||||
},
|
||||
codex_response
|
||||
);
|
||||
|
||||
assert!(created_file.is_file(), "created file should exist");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_expected_elicitation_request(
|
||||
elicitation_request_id: RequestId,
|
||||
command: Vec<String>,
|
||||
workdir: &Path,
|
||||
codex_mcp_tool_call_id: String,
|
||||
codex_event_id: String,
|
||||
) -> anyhow::Result<JSONRPCRequest> {
|
||||
let expected_message = format!(
|
||||
"Allow Codex to run `{}` in `{}`?",
|
||||
shlex::try_join(command.iter().map(std::convert::AsRef::as_ref))?,
|
||||
workdir.to_string_lossy()
|
||||
);
|
||||
let codex_parsed_cmd = parse_command::parse_command(&command);
|
||||
Ok(JSONRPCRequest {
|
||||
jsonrpc: JSONRPC_VERSION.into(),
|
||||
id: elicitation_request_id,
|
||||
method: ElicitRequest::METHOD.to_string(),
|
||||
params: Some(serde_json::to_value(&ExecApprovalElicitRequestParams {
|
||||
message: expected_message,
|
||||
requested_schema: ElicitRequestParamsRequestedSchema {
|
||||
r#type: "object".to_string(),
|
||||
properties: json!({}),
|
||||
required: None,
|
||||
},
|
||||
codex_elicitation: "exec-approval".to_string(),
|
||||
codex_mcp_tool_call_id,
|
||||
codex_event_id,
|
||||
codex_command: command,
|
||||
codex_cwd: workdir.to_path_buf(),
|
||||
codex_call_id: "call1234".to_string(),
|
||||
codex_parsed_cmd,
|
||||
codex_risk: None,
|
||||
})?),
|
||||
})
|
||||
}
|
||||
|
||||
/// Test that patch approval triggers an elicitation request to the MCP and that
|
||||
/// sending the approval applies the patch, as expected.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn test_patch_approval_triggers_elicitation() {
|
||||
if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
if let Err(err) = patch_approval_triggers_elicitation().await {
|
||||
panic!("failure: {err}");
|
||||
}
|
||||
}
|
||||
|
||||
async fn patch_approval_triggers_elicitation() -> anyhow::Result<()> {
|
||||
let cwd = TempDir::new()?;
|
||||
let test_file = cwd.path().join("destination_file.txt");
|
||||
std::fs::write(&test_file, "original content\n")?;
|
||||
|
||||
let patch_content = format!(
|
||||
"*** Begin Patch\n*** Update File: {}\n-original content\n+modified content\n*** End Patch",
|
||||
test_file.as_path().to_string_lossy()
|
||||
);
|
||||
|
||||
let McpHandle {
|
||||
process: mut mcp_process,
|
||||
server: _server,
|
||||
dir: _dir,
|
||||
} = create_mcp_process(vec![
|
||||
create_apply_patch_sse_response(&patch_content, "call1234")?,
|
||||
create_final_assistant_message_sse_response("Patch has been applied successfully!")?,
|
||||
])
|
||||
.await?;
|
||||
|
||||
// Send a "codex" tool request that will trigger the apply_patch command
|
||||
let codex_request_id = mcp_process
|
||||
.send_codex_tool_call(CodexToolCallParam {
|
||||
cwd: Some(cwd.path().to_string_lossy().to_string()),
|
||||
prompt: "please modify the test file".to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
let elicitation_request = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp_process.read_stream_until_request_message(),
|
||||
)
|
||||
.await??;
|
||||
|
||||
let elicitation_request_id = RequestId::Integer(0);
|
||||
|
||||
let mut expected_changes = HashMap::new();
|
||||
expected_changes.insert(
|
||||
test_file.as_path().to_path_buf(),
|
||||
FileChange::Update {
|
||||
unified_diff: "@@ -1 +1 @@\n-original content\n+modified content\n".to_string(),
|
||||
move_path: None,
|
||||
},
|
||||
);
|
||||
|
||||
let expected_elicitation_request = create_expected_patch_approval_elicitation_request(
|
||||
elicitation_request_id.clone(),
|
||||
expected_changes,
|
||||
None, // No grant_root expected
|
||||
None, // No reason expected
|
||||
codex_request_id.to_string(),
|
||||
"1".to_string(),
|
||||
)?;
|
||||
assert_eq!(expected_elicitation_request, elicitation_request);
|
||||
|
||||
// Accept the patch approval request by responding to the elicitation
|
||||
mcp_process
|
||||
.send_response(
|
||||
elicitation_request_id,
|
||||
serde_json::to_value(PatchApprovalResponse {
|
||||
decision: ReviewDecision::Approved,
|
||||
})?,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Verify the original `codex` tool call completes
|
||||
let codex_response = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
|
||||
)
|
||||
.await??;
|
||||
assert_eq!(
|
||||
JSONRPCResponse {
|
||||
jsonrpc: JSONRPC_VERSION.into(),
|
||||
id: RequestId::Integer(codex_request_id),
|
||||
result: json!({
|
||||
"content": [
|
||||
{
|
||||
"text": "Patch has been applied successfully!",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}),
|
||||
},
|
||||
codex_response
|
||||
);
|
||||
|
||||
let file_contents = std::fs::read_to_string(test_file.as_path())?;
|
||||
assert_eq!(file_contents, "modified content\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn test_codex_tool_passes_base_instructions() {
|
||||
skip_if_no_network!();
|
||||
|
||||
// Apparently `#[tokio::test]` must return `()`, so we create a helper
|
||||
// function that returns `Result` so we can use `?` in favor of `unwrap`.
|
||||
if let Err(err) = codex_tool_passes_base_instructions().await {
|
||||
panic!("failure: {err}");
|
||||
}
|
||||
}
|
||||
|
||||
async fn codex_tool_passes_base_instructions() -> anyhow::Result<()> {
|
||||
#![expect(clippy::unwrap_used)]
|
||||
|
||||
let server =
|
||||
create_mock_chat_completions_server(vec![create_final_assistant_message_sse_response(
|
||||
"Enjoy!",
|
||||
)?])
|
||||
.await;
|
||||
|
||||
// Run `codex mcp` with a specific config.toml.
|
||||
let codex_home = TempDir::new()?;
|
||||
create_config_toml(codex_home.path(), &server.uri())?;
|
||||
let mut mcp_process = McpProcess::new(codex_home.path()).await?;
|
||||
timeout(DEFAULT_READ_TIMEOUT, mcp_process.initialize()).await??;
|
||||
|
||||
// Send a "codex" tool request, which should hit the completions endpoint.
|
||||
let codex_request_id = mcp_process
|
||||
.send_codex_tool_call(CodexToolCallParam {
|
||||
prompt: "How are you?".to_string(),
|
||||
base_instructions: Some("You are a helpful assistant.".to_string()),
|
||||
developer_instructions: Some("Foreshadow upcoming tool calls.".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
|
||||
let codex_response = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
|
||||
)
|
||||
.await??;
|
||||
assert_eq!(
|
||||
JSONRPCResponse {
|
||||
jsonrpc: JSONRPC_VERSION.into(),
|
||||
id: RequestId::Integer(codex_request_id),
|
||||
result: json!({
|
||||
"content": [
|
||||
{
|
||||
"text": "Enjoy!",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}),
|
||||
},
|
||||
codex_response
|
||||
);
|
||||
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
let request = requests[0].body_json::<serde_json::Value>()?;
|
||||
let instructions = request["messages"][0]["content"].as_str().unwrap();
|
||||
assert!(instructions.starts_with("You are a helpful assistant."));
|
||||
|
||||
let developer_msg = request["messages"]
|
||||
.as_array()
|
||||
.and_then(|messages| {
|
||||
messages
|
||||
.iter()
|
||||
.find(|msg| msg.get("role").and_then(|role| role.as_str()) == Some("developer"))
|
||||
})
|
||||
.unwrap();
|
||||
let developer_content = developer_msg
|
||||
.get("content")
|
||||
.and_then(|value| value.as_str())
|
||||
.unwrap();
|
||||
assert!(
|
||||
!developer_content.contains('<'),
|
||||
"expected developer instructions without XML tags, got `{developer_content}`"
|
||||
);
|
||||
assert_eq!(developer_content, "Foreshadow upcoming tool calls.");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_expected_patch_approval_elicitation_request(
|
||||
elicitation_request_id: RequestId,
|
||||
changes: HashMap<PathBuf, FileChange>,
|
||||
grant_root: Option<PathBuf>,
|
||||
reason: Option<String>,
|
||||
codex_mcp_tool_call_id: String,
|
||||
codex_event_id: String,
|
||||
) -> anyhow::Result<JSONRPCRequest> {
|
||||
let mut message_lines = Vec::new();
|
||||
if let Some(r) = &reason {
|
||||
message_lines.push(r.clone());
|
||||
}
|
||||
message_lines.push("Allow Codex to apply proposed code changes?".to_string());
|
||||
|
||||
Ok(JSONRPCRequest {
|
||||
jsonrpc: JSONRPC_VERSION.into(),
|
||||
id: elicitation_request_id,
|
||||
method: ElicitRequest::METHOD.to_string(),
|
||||
params: Some(serde_json::to_value(&PatchApprovalElicitRequestParams {
|
||||
message: message_lines.join("\n"),
|
||||
requested_schema: ElicitRequestParamsRequestedSchema {
|
||||
r#type: "object".to_string(),
|
||||
properties: json!({}),
|
||||
required: None,
|
||||
},
|
||||
codex_elicitation: "patch-approval".to_string(),
|
||||
codex_mcp_tool_call_id,
|
||||
codex_event_id,
|
||||
codex_reason: reason,
|
||||
codex_grant_root: grant_root,
|
||||
codex_changes: changes,
|
||||
codex_call_id: "call1234".to_string(),
|
||||
})?),
|
||||
})
|
||||
}
|
||||
|
||||
/// This handle is used to ensure that the MockServer and TempDir are not dropped while
|
||||
/// the McpProcess is still running.
|
||||
pub struct McpHandle {
|
||||
pub process: McpProcess,
|
||||
/// Retain the server for the lifetime of the McpProcess.
|
||||
#[allow(dead_code)]
|
||||
server: MockServer,
|
||||
/// Retain the temporary directory for the lifetime of the McpProcess.
|
||||
#[allow(dead_code)]
|
||||
dir: TempDir,
|
||||
}
|
||||
|
||||
async fn create_mcp_process(responses: Vec<String>) -> anyhow::Result<McpHandle> {
|
||||
let server = create_mock_chat_completions_server(responses).await;
|
||||
let codex_home = TempDir::new()?;
|
||||
create_config_toml(codex_home.path(), &server.uri())?;
|
||||
let mut mcp_process = McpProcess::new(codex_home.path()).await?;
|
||||
timeout(DEFAULT_READ_TIMEOUT, mcp_process.initialize()).await??;
|
||||
Ok(McpHandle {
|
||||
process: mcp_process,
|
||||
server,
|
||||
dir: codex_home,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a Codex config that uses the mock server as the model provider.
|
||||
/// It also uses `approval_policy = "untrusted"` so that we exercise the
|
||||
/// elicitation code path for shell commands.
|
||||
fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
|
||||
let config_toml = codex_home.join("config.toml");
|
||||
std::fs::write(
|
||||
config_toml,
|
||||
format!(
|
||||
r#"
|
||||
model = "mock-model"
|
||||
approval_policy = "untrusted"
|
||||
sandbox_policy = "workspace-write"
|
||||
|
||||
model_provider = "mock_provider"
|
||||
|
||||
[model_providers.mock_provider]
|
||||
name = "Mock provider for test"
|
||||
base_url = "{server_uri}/v1"
|
||||
wire_api = "chat"
|
||||
request_max_retries = 0
|
||||
stream_max_retries = 0
|
||||
"#
|
||||
),
|
||||
)
|
||||
}
|
||||
1
llmx-rs/mcp-server/tests/suite/mod.rs
Normal file
1
llmx-rs/mcp-server/tests/suite/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
mod codex_tool;
|
||||
Reference in New Issue
Block a user