2025-07-19 01:32:03 -04:00
|
|
|
use std::collections::HashMap;
|
chore: introduce OutgoingMessageSender (#1622)
Previous to this change, `MessageProcessor` had a
`tokio::sync::mpsc::Sender<JSONRPCMessage>` as an abstraction for server
code to send a message down to the MCP client. Because `Sender` is cheap
to `clone()`, it was straightforward to make it available to tasks
scheduled with `tokio::task::spawn()`.
This worked well when we were only sending notifications or responses
back down to the client, but we want to add support for sending
elicitations in #1623, which means that we need to be able to send
_requests_ to the client, and now we need a bit of centralization to
ensure all request ids are unique.
To that end, this PR introduces `OutgoingMessageSender`, which houses
the existing `Sender<OutgoingMessage>` as well as an `AtomicI64` to mint
out new, unique request ids. It has methods like `send_request()` and
`send_response()` so that callers do not have to deal with
`JSONRPCMessage` directly, as having to set the `jsonrpc` for each
message was a bit tedious (this cleans up `codex_tool_runner.rs` quite a
bit).
We do not have `OutgoingMessageSender` implement `Clone` because it is
important that the `AtomicI64` is shared across all users of
`OutgoingMessageSender`. As such, `Arc<OutgoingMessageSender>` must be
used instead, as it is frequently shared with new tokio tasks.
As part of this change, we update `message_processor.rs` to embrace
`await`, though we must be careful that no individual handler blocks the
main loop and prevents other messages from being handled.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1622).
* #1623
* __->__ #1622
* #1621
* #1620
2025-07-19 00:30:56 -04:00
|
|
|
use std::sync::atomic::AtomicI64;
|
|
|
|
|
use std::sync::atomic::Ordering;
|
|
|
|
|
|
|
|
|
|
use codex_core::protocol::Event;
|
2025-08-20 20:36:34 -07:00
|
|
|
use codex_protocol::mcp_protocol::ServerNotification;
|
chore: introduce OutgoingMessageSender (#1622)
Previous to this change, `MessageProcessor` had a
`tokio::sync::mpsc::Sender<JSONRPCMessage>` as an abstraction for server
code to send a message down to the MCP client. Because `Sender` is cheap
to `clone()`, it was straightforward to make it available to tasks
scheduled with `tokio::task::spawn()`.
This worked well when we were only sending notifications or responses
back down to the client, but we want to add support for sending
elicitations in #1623, which means that we need to be able to send
_requests_ to the client, and now we need a bit of centralization to
ensure all request ids are unique.
To that end, this PR introduces `OutgoingMessageSender`, which houses
the existing `Sender<OutgoingMessage>` as well as an `AtomicI64` to mint
out new, unique request ids. It has methods like `send_request()` and
`send_response()` so that callers do not have to deal with
`JSONRPCMessage` directly, as having to set the `jsonrpc` for each
message was a bit tedious (this cleans up `codex_tool_runner.rs` quite a
bit).
We do not have `OutgoingMessageSender` implement `Clone` because it is
important that the `AtomicI64` is shared across all users of
`OutgoingMessageSender`. As such, `Arc<OutgoingMessageSender>` must be
used instead, as it is frequently shared with new tokio tasks.
As part of this change, we update `message_processor.rs` to embrace
`await`, though we must be careful that no individual handler blocks the
main loop and prevents other messages from being handled.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1622).
* #1623
* __->__ #1622
* #1621
* #1620
2025-07-19 00:30:56 -04:00
|
|
|
use mcp_types::JSONRPC_VERSION;
|
|
|
|
|
use mcp_types::JSONRPCError;
|
|
|
|
|
use mcp_types::JSONRPCErrorError;
|
|
|
|
|
use mcp_types::JSONRPCMessage;
|
|
|
|
|
use mcp_types::JSONRPCNotification;
|
|
|
|
|
use mcp_types::JSONRPCRequest;
|
|
|
|
|
use mcp_types::JSONRPCResponse;
|
|
|
|
|
use mcp_types::RequestId;
|
|
|
|
|
use mcp_types::Result;
|
|
|
|
|
use serde::Serialize;
|
2025-07-19 01:32:03 -04:00
|
|
|
use tokio::sync::Mutex;
|
chore: introduce OutgoingMessageSender (#1622)
Previous to this change, `MessageProcessor` had a
`tokio::sync::mpsc::Sender<JSONRPCMessage>` as an abstraction for server
code to send a message down to the MCP client. Because `Sender` is cheap
to `clone()`, it was straightforward to make it available to tasks
scheduled with `tokio::task::spawn()`.
This worked well when we were only sending notifications or responses
back down to the client, but we want to add support for sending
elicitations in #1623, which means that we need to be able to send
_requests_ to the client, and now we need a bit of centralization to
ensure all request ids are unique.
To that end, this PR introduces `OutgoingMessageSender`, which houses
the existing `Sender<OutgoingMessage>` as well as an `AtomicI64` to mint
out new, unique request ids. It has methods like `send_request()` and
`send_response()` so that callers do not have to deal with
`JSONRPCMessage` directly, as having to set the `jsonrpc` for each
message was a bit tedious (this cleans up `codex_tool_runner.rs` quite a
bit).
We do not have `OutgoingMessageSender` implement `Clone` because it is
important that the `AtomicI64` is shared across all users of
`OutgoingMessageSender`. As such, `Arc<OutgoingMessageSender>` must be
used instead, as it is frequently shared with new tokio tasks.
As part of this change, we update `message_processor.rs` to embrace
`await`, though we must be careful that no individual handler blocks the
main loop and prevents other messages from being handled.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1622).
* #1623
* __->__ #1622
* #1621
* #1620
2025-07-19 00:30:56 -04:00
|
|
|
use tokio::sync::mpsc;
|
2025-07-19 01:32:03 -04:00
|
|
|
use tokio::sync::oneshot;
|
|
|
|
|
use tracing::warn;
|
chore: introduce OutgoingMessageSender (#1622)
Previous to this change, `MessageProcessor` had a
`tokio::sync::mpsc::Sender<JSONRPCMessage>` as an abstraction for server
code to send a message down to the MCP client. Because `Sender` is cheap
to `clone()`, it was straightforward to make it available to tasks
scheduled with `tokio::task::spawn()`.
This worked well when we were only sending notifications or responses
back down to the client, but we want to add support for sending
elicitations in #1623, which means that we need to be able to send
_requests_ to the client, and now we need a bit of centralization to
ensure all request ids are unique.
To that end, this PR introduces `OutgoingMessageSender`, which houses
the existing `Sender<OutgoingMessage>` as well as an `AtomicI64` to mint
out new, unique request ids. It has methods like `send_request()` and
`send_response()` so that callers do not have to deal with
`JSONRPCMessage` directly, as having to set the `jsonrpc` for each
message was a bit tedious (this cleans up `codex_tool_runner.rs` quite a
bit).
We do not have `OutgoingMessageSender` implement `Clone` because it is
important that the `AtomicI64` is shared across all users of
`OutgoingMessageSender`. As such, `Arc<OutgoingMessageSender>` must be
used instead, as it is frequently shared with new tokio tasks.
As part of this change, we update `message_processor.rs` to embrace
`await`, though we must be careful that no individual handler blocks the
main loop and prevents other messages from being handled.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1622).
* #1623
* __->__ #1622
* #1621
* #1620
2025-07-19 00:30:56 -04:00
|
|
|
|
2025-08-13 14:29:13 -07:00
|
|
|
use crate::error_code::INTERNAL_ERROR_CODE;
|
|
|
|
|
|
2025-07-28 13:32:09 -07:00
|
|
|
/// Sends messages to the client and manages request callbacks.
|
chore: introduce OutgoingMessageSender (#1622)
Previous to this change, `MessageProcessor` had a
`tokio::sync::mpsc::Sender<JSONRPCMessage>` as an abstraction for server
code to send a message down to the MCP client. Because `Sender` is cheap
to `clone()`, it was straightforward to make it available to tasks
scheduled with `tokio::task::spawn()`.
This worked well when we were only sending notifications or responses
back down to the client, but we want to add support for sending
elicitations in #1623, which means that we need to be able to send
_requests_ to the client, and now we need a bit of centralization to
ensure all request ids are unique.
To that end, this PR introduces `OutgoingMessageSender`, which houses
the existing `Sender<OutgoingMessage>` as well as an `AtomicI64` to mint
out new, unique request ids. It has methods like `send_request()` and
`send_response()` so that callers do not have to deal with
`JSONRPCMessage` directly, as having to set the `jsonrpc` for each
message was a bit tedious (this cleans up `codex_tool_runner.rs` quite a
bit).
We do not have `OutgoingMessageSender` implement `Clone` because it is
important that the `AtomicI64` is shared across all users of
`OutgoingMessageSender`. As such, `Arc<OutgoingMessageSender>` must be
used instead, as it is frequently shared with new tokio tasks.
As part of this change, we update `message_processor.rs` to embrace
`await`, though we must be careful that no individual handler blocks the
main loop and prevents other messages from being handled.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1622).
* #1623
* __->__ #1622
* #1621
* #1620
2025-07-19 00:30:56 -04:00
|
|
|
pub(crate) struct OutgoingMessageSender {
|
|
|
|
|
next_request_id: AtomicI64,
|
fix: switch to unbounded channel (#2874)
#2747 encouraged me to audit our codebase for similar issues, as now I
am particularly suspicious that our flaky tests are due to a racy
deadlock.
I asked Codex to audit our code, and one of its suggestions was this:
> **High-Risk Patterns**
>
> All `send_*` methods await on a bounded
`mpsc::Sender<OutgoingMessage>`. If the writer blocks, the channel fills
and the processor task blocks on send, stops draining incoming requests,
and stdin reader eventually blocks on its send. This creates a
backpressure deadlock cycle across the three tasks.
>
> **Recommendations**
> * Server outgoing path: break the backpressure cycle
> * Option A (minimal risk): Change `OutgoingMessageSender` to use an
unbounded channel to decouple producer from stdout. Add rate logging so
floods are visible.
> * Option B (bounded + drop policy): Change `send_*` to try_send and
drop messages (or coalesce) when the queue is full, logging a warning.
This prevents processor stalls at the cost of losing messages under
extreme backpressure.
> * Option C (two-stage buffer): Keep bounded channel, but have a
dedicated “egress” task that drains an unbounded internal queue, writing
to stdout with retries and a shutdown timeout. This centralizes
backpressure policy.
So this PR is Option A.
Indeed, we previously used a bounded channel with a capacity of `128`,
but as we discovered recently with #2776, there are certainly cases
where we can get flooded with events.
That said, `test_shell_command_approval_triggers_elicitation` just
failed one one build when I put up this PR, so clearly we are not out of
the woods yet...
**Update:** I think I found the true source of the deadlock! See
https://github.com/openai/codex/pull/2876
2025-08-28 22:20:10 -07:00
|
|
|
sender: mpsc::UnboundedSender<OutgoingMessage>,
|
2025-07-19 01:32:03 -04:00
|
|
|
request_id_to_callback: Mutex<HashMap<RequestId, oneshot::Sender<Result>>>,
|
chore: introduce OutgoingMessageSender (#1622)
Previous to this change, `MessageProcessor` had a
`tokio::sync::mpsc::Sender<JSONRPCMessage>` as an abstraction for server
code to send a message down to the MCP client. Because `Sender` is cheap
to `clone()`, it was straightforward to make it available to tasks
scheduled with `tokio::task::spawn()`.
This worked well when we were only sending notifications or responses
back down to the client, but we want to add support for sending
elicitations in #1623, which means that we need to be able to send
_requests_ to the client, and now we need a bit of centralization to
ensure all request ids are unique.
To that end, this PR introduces `OutgoingMessageSender`, which houses
the existing `Sender<OutgoingMessage>` as well as an `AtomicI64` to mint
out new, unique request ids. It has methods like `send_request()` and
`send_response()` so that callers do not have to deal with
`JSONRPCMessage` directly, as having to set the `jsonrpc` for each
message was a bit tedious (this cleans up `codex_tool_runner.rs` quite a
bit).
We do not have `OutgoingMessageSender` implement `Clone` because it is
important that the `AtomicI64` is shared across all users of
`OutgoingMessageSender`. As such, `Arc<OutgoingMessageSender>` must be
used instead, as it is frequently shared with new tokio tasks.
As part of this change, we update `message_processor.rs` to embrace
`await`, though we must be careful that no individual handler blocks the
main loop and prevents other messages from being handled.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1622).
* #1623
* __->__ #1622
* #1621
* #1620
2025-07-19 00:30:56 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl OutgoingMessageSender {
|
fix: switch to unbounded channel (#2874)
#2747 encouraged me to audit our codebase for similar issues, as now I
am particularly suspicious that our flaky tests are due to a racy
deadlock.
I asked Codex to audit our code, and one of its suggestions was this:
> **High-Risk Patterns**
>
> All `send_*` methods await on a bounded
`mpsc::Sender<OutgoingMessage>`. If the writer blocks, the channel fills
and the processor task blocks on send, stops draining incoming requests,
and stdin reader eventually blocks on its send. This creates a
backpressure deadlock cycle across the three tasks.
>
> **Recommendations**
> * Server outgoing path: break the backpressure cycle
> * Option A (minimal risk): Change `OutgoingMessageSender` to use an
unbounded channel to decouple producer from stdout. Add rate logging so
floods are visible.
> * Option B (bounded + drop policy): Change `send_*` to try_send and
drop messages (or coalesce) when the queue is full, logging a warning.
This prevents processor stalls at the cost of losing messages under
extreme backpressure.
> * Option C (two-stage buffer): Keep bounded channel, but have a
dedicated “egress” task that drains an unbounded internal queue, writing
to stdout with retries and a shutdown timeout. This centralizes
backpressure policy.
So this PR is Option A.
Indeed, we previously used a bounded channel with a capacity of `128`,
but as we discovered recently with #2776, there are certainly cases
where we can get flooded with events.
That said, `test_shell_command_approval_triggers_elicitation` just
failed one one build when I put up this PR, so clearly we are not out of
the woods yet...
**Update:** I think I found the true source of the deadlock! See
https://github.com/openai/codex/pull/2876
2025-08-28 22:20:10 -07:00
|
|
|
pub(crate) fn new(sender: mpsc::UnboundedSender<OutgoingMessage>) -> Self {
|
chore: introduce OutgoingMessageSender (#1622)
Previous to this change, `MessageProcessor` had a
`tokio::sync::mpsc::Sender<JSONRPCMessage>` as an abstraction for server
code to send a message down to the MCP client. Because `Sender` is cheap
to `clone()`, it was straightforward to make it available to tasks
scheduled with `tokio::task::spawn()`.
This worked well when we were only sending notifications or responses
back down to the client, but we want to add support for sending
elicitations in #1623, which means that we need to be able to send
_requests_ to the client, and now we need a bit of centralization to
ensure all request ids are unique.
To that end, this PR introduces `OutgoingMessageSender`, which houses
the existing `Sender<OutgoingMessage>` as well as an `AtomicI64` to mint
out new, unique request ids. It has methods like `send_request()` and
`send_response()` so that callers do not have to deal with
`JSONRPCMessage` directly, as having to set the `jsonrpc` for each
message was a bit tedious (this cleans up `codex_tool_runner.rs` quite a
bit).
We do not have `OutgoingMessageSender` implement `Clone` because it is
important that the `AtomicI64` is shared across all users of
`OutgoingMessageSender`. As such, `Arc<OutgoingMessageSender>` must be
used instead, as it is frequently shared with new tokio tasks.
As part of this change, we update `message_processor.rs` to embrace
`await`, though we must be careful that no individual handler blocks the
main loop and prevents other messages from being handled.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1622).
* #1623
* __->__ #1622
* #1621
* #1620
2025-07-19 00:30:56 -04:00
|
|
|
Self {
|
|
|
|
|
next_request_id: AtomicI64::new(0),
|
|
|
|
|
sender,
|
2025-07-19 01:32:03 -04:00
|
|
|
request_id_to_callback: Mutex::new(HashMap::new()),
|
chore: introduce OutgoingMessageSender (#1622)
Previous to this change, `MessageProcessor` had a
`tokio::sync::mpsc::Sender<JSONRPCMessage>` as an abstraction for server
code to send a message down to the MCP client. Because `Sender` is cheap
to `clone()`, it was straightforward to make it available to tasks
scheduled with `tokio::task::spawn()`.
This worked well when we were only sending notifications or responses
back down to the client, but we want to add support for sending
elicitations in #1623, which means that we need to be able to send
_requests_ to the client, and now we need a bit of centralization to
ensure all request ids are unique.
To that end, this PR introduces `OutgoingMessageSender`, which houses
the existing `Sender<OutgoingMessage>` as well as an `AtomicI64` to mint
out new, unique request ids. It has methods like `send_request()` and
`send_response()` so that callers do not have to deal with
`JSONRPCMessage` directly, as having to set the `jsonrpc` for each
message was a bit tedious (this cleans up `codex_tool_runner.rs` quite a
bit).
We do not have `OutgoingMessageSender` implement `Clone` because it is
important that the `AtomicI64` is shared across all users of
`OutgoingMessageSender`. As such, `Arc<OutgoingMessageSender>` must be
used instead, as it is frequently shared with new tokio tasks.
As part of this change, we update `message_processor.rs` to embrace
`await`, though we must be careful that no individual handler blocks the
main loop and prevents other messages from being handled.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1622).
* #1623
* __->__ #1622
* #1621
* #1620
2025-07-19 00:30:56 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-07-19 01:32:03 -04:00
|
|
|
pub(crate) async fn send_request(
|
|
|
|
|
&self,
|
|
|
|
|
method: &str,
|
|
|
|
|
params: Option<serde_json::Value>,
|
|
|
|
|
) -> oneshot::Receiver<Result> {
|
|
|
|
|
let id = RequestId::Integer(self.next_request_id.fetch_add(1, Ordering::Relaxed));
|
|
|
|
|
let outgoing_message_id = id.clone();
|
|
|
|
|
let (tx_approve, rx_approve) = oneshot::channel();
|
|
|
|
|
{
|
|
|
|
|
let mut request_id_to_callback = self.request_id_to_callback.lock().await;
|
|
|
|
|
request_id_to_callback.insert(id, tx_approve);
|
|
|
|
|
}
|
|
|
|
|
|
chore: introduce OutgoingMessageSender (#1622)
Previous to this change, `MessageProcessor` had a
`tokio::sync::mpsc::Sender<JSONRPCMessage>` as an abstraction for server
code to send a message down to the MCP client. Because `Sender` is cheap
to `clone()`, it was straightforward to make it available to tasks
scheduled with `tokio::task::spawn()`.
This worked well when we were only sending notifications or responses
back down to the client, but we want to add support for sending
elicitations in #1623, which means that we need to be able to send
_requests_ to the client, and now we need a bit of centralization to
ensure all request ids are unique.
To that end, this PR introduces `OutgoingMessageSender`, which houses
the existing `Sender<OutgoingMessage>` as well as an `AtomicI64` to mint
out new, unique request ids. It has methods like `send_request()` and
`send_response()` so that callers do not have to deal with
`JSONRPCMessage` directly, as having to set the `jsonrpc` for each
message was a bit tedious (this cleans up `codex_tool_runner.rs` quite a
bit).
We do not have `OutgoingMessageSender` implement `Clone` because it is
important that the `AtomicI64` is shared across all users of
`OutgoingMessageSender`. As such, `Arc<OutgoingMessageSender>` must be
used instead, as it is frequently shared with new tokio tasks.
As part of this change, we update `message_processor.rs` to embrace
`await`, though we must be careful that no individual handler blocks the
main loop and prevents other messages from being handled.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1622).
* #1623
* __->__ #1622
* #1621
* #1620
2025-07-19 00:30:56 -04:00
|
|
|
let outgoing_message = OutgoingMessage::Request(OutgoingRequest {
|
2025-07-19 01:32:03 -04:00
|
|
|
id: outgoing_message_id,
|
chore: introduce OutgoingMessageSender (#1622)
Previous to this change, `MessageProcessor` had a
`tokio::sync::mpsc::Sender<JSONRPCMessage>` as an abstraction for server
code to send a message down to the MCP client. Because `Sender` is cheap
to `clone()`, it was straightforward to make it available to tasks
scheduled with `tokio::task::spawn()`.
This worked well when we were only sending notifications or responses
back down to the client, but we want to add support for sending
elicitations in #1623, which means that we need to be able to send
_requests_ to the client, and now we need a bit of centralization to
ensure all request ids are unique.
To that end, this PR introduces `OutgoingMessageSender`, which houses
the existing `Sender<OutgoingMessage>` as well as an `AtomicI64` to mint
out new, unique request ids. It has methods like `send_request()` and
`send_response()` so that callers do not have to deal with
`JSONRPCMessage` directly, as having to set the `jsonrpc` for each
message was a bit tedious (this cleans up `codex_tool_runner.rs` quite a
bit).
We do not have `OutgoingMessageSender` implement `Clone` because it is
important that the `AtomicI64` is shared across all users of
`OutgoingMessageSender`. As such, `Arc<OutgoingMessageSender>` must be
used instead, as it is frequently shared with new tokio tasks.
As part of this change, we update `message_processor.rs` to embrace
`await`, though we must be careful that no individual handler blocks the
main loop and prevents other messages from being handled.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1622).
* #1623
* __->__ #1622
* #1621
* #1620
2025-07-19 00:30:56 -04:00
|
|
|
method: method.to_string(),
|
|
|
|
|
params,
|
|
|
|
|
});
|
fix: switch to unbounded channel (#2874)
#2747 encouraged me to audit our codebase for similar issues, as now I
am particularly suspicious that our flaky tests are due to a racy
deadlock.
I asked Codex to audit our code, and one of its suggestions was this:
> **High-Risk Patterns**
>
> All `send_*` methods await on a bounded
`mpsc::Sender<OutgoingMessage>`. If the writer blocks, the channel fills
and the processor task blocks on send, stops draining incoming requests,
and stdin reader eventually blocks on its send. This creates a
backpressure deadlock cycle across the three tasks.
>
> **Recommendations**
> * Server outgoing path: break the backpressure cycle
> * Option A (minimal risk): Change `OutgoingMessageSender` to use an
unbounded channel to decouple producer from stdout. Add rate logging so
floods are visible.
> * Option B (bounded + drop policy): Change `send_*` to try_send and
drop messages (or coalesce) when the queue is full, logging a warning.
This prevents processor stalls at the cost of losing messages under
extreme backpressure.
> * Option C (two-stage buffer): Keep bounded channel, but have a
dedicated “egress” task that drains an unbounded internal queue, writing
to stdout with retries and a shutdown timeout. This centralizes
backpressure policy.
So this PR is Option A.
Indeed, we previously used a bounded channel with a capacity of `128`,
but as we discovered recently with #2776, there are certainly cases
where we can get flooded with events.
That said, `test_shell_command_approval_triggers_elicitation` just
failed one one build when I put up this PR, so clearly we are not out of
the woods yet...
**Update:** I think I found the true source of the deadlock! See
https://github.com/openai/codex/pull/2876
2025-08-28 22:20:10 -07:00
|
|
|
let _ = self.sender.send(outgoing_message);
|
2025-07-19 01:32:03 -04:00
|
|
|
rx_approve
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub(crate) async fn notify_client_response(&self, id: RequestId, result: Result) {
|
|
|
|
|
let entry = {
|
|
|
|
|
let mut request_id_to_callback = self.request_id_to_callback.lock().await;
|
|
|
|
|
request_id_to_callback.remove_entry(&id)
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
match entry {
|
|
|
|
|
Some((id, sender)) => {
|
|
|
|
|
if let Err(err) = sender.send(result) {
|
|
|
|
|
warn!("could not notify callback for {id:?} due to: {err:?}");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
None => {
|
|
|
|
|
warn!("could not find callback for {id:?}");
|
|
|
|
|
}
|
|
|
|
|
}
|
chore: introduce OutgoingMessageSender (#1622)
Previous to this change, `MessageProcessor` had a
`tokio::sync::mpsc::Sender<JSONRPCMessage>` as an abstraction for server
code to send a message down to the MCP client. Because `Sender` is cheap
to `clone()`, it was straightforward to make it available to tasks
scheduled with `tokio::task::spawn()`.
This worked well when we were only sending notifications or responses
back down to the client, but we want to add support for sending
elicitations in #1623, which means that we need to be able to send
_requests_ to the client, and now we need a bit of centralization to
ensure all request ids are unique.
To that end, this PR introduces `OutgoingMessageSender`, which houses
the existing `Sender<OutgoingMessage>` as well as an `AtomicI64` to mint
out new, unique request ids. It has methods like `send_request()` and
`send_response()` so that callers do not have to deal with
`JSONRPCMessage` directly, as having to set the `jsonrpc` for each
message was a bit tedious (this cleans up `codex_tool_runner.rs` quite a
bit).
We do not have `OutgoingMessageSender` implement `Clone` because it is
important that the `AtomicI64` is shared across all users of
`OutgoingMessageSender`. As such, `Arc<OutgoingMessageSender>` must be
used instead, as it is frequently shared with new tokio tasks.
As part of this change, we update `message_processor.rs` to embrace
`await`, though we must be careful that no individual handler blocks the
main loop and prevents other messages from being handled.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1622).
* #1623
* __->__ #1622
* #1621
* #1620
2025-07-19 00:30:56 -04:00
|
|
|
}
|
|
|
|
|
|
2025-08-13 14:29:13 -07:00
|
|
|
pub(crate) async fn send_response<T: Serialize>(&self, id: RequestId, response: T) {
|
|
|
|
|
match serde_json::to_value(response) {
|
|
|
|
|
Ok(result) => {
|
|
|
|
|
let outgoing_message = OutgoingMessage::Response(OutgoingResponse { id, result });
|
fix: switch to unbounded channel (#2874)
#2747 encouraged me to audit our codebase for similar issues, as now I
am particularly suspicious that our flaky tests are due to a racy
deadlock.
I asked Codex to audit our code, and one of its suggestions was this:
> **High-Risk Patterns**
>
> All `send_*` methods await on a bounded
`mpsc::Sender<OutgoingMessage>`. If the writer blocks, the channel fills
and the processor task blocks on send, stops draining incoming requests,
and stdin reader eventually blocks on its send. This creates a
backpressure deadlock cycle across the three tasks.
>
> **Recommendations**
> * Server outgoing path: break the backpressure cycle
> * Option A (minimal risk): Change `OutgoingMessageSender` to use an
unbounded channel to decouple producer from stdout. Add rate logging so
floods are visible.
> * Option B (bounded + drop policy): Change `send_*` to try_send and
drop messages (or coalesce) when the queue is full, logging a warning.
This prevents processor stalls at the cost of losing messages under
extreme backpressure.
> * Option C (two-stage buffer): Keep bounded channel, but have a
dedicated “egress” task that drains an unbounded internal queue, writing
to stdout with retries and a shutdown timeout. This centralizes
backpressure policy.
So this PR is Option A.
Indeed, we previously used a bounded channel with a capacity of `128`,
but as we discovered recently with #2776, there are certainly cases
where we can get flooded with events.
That said, `test_shell_command_approval_triggers_elicitation` just
failed one one build when I put up this PR, so clearly we are not out of
the woods yet...
**Update:** I think I found the true source of the deadlock! See
https://github.com/openai/codex/pull/2876
2025-08-28 22:20:10 -07:00
|
|
|
let _ = self.sender.send(outgoing_message);
|
2025-08-13 14:29:13 -07:00
|
|
|
}
|
|
|
|
|
Err(err) => {
|
|
|
|
|
self.send_error(
|
|
|
|
|
id,
|
|
|
|
|
JSONRPCErrorError {
|
|
|
|
|
code: INTERNAL_ERROR_CODE,
|
|
|
|
|
message: format!("failed to serialize response: {err}"),
|
|
|
|
|
data: None,
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
.await;
|
|
|
|
|
}
|
|
|
|
|
}
|
chore: introduce OutgoingMessageSender (#1622)
Previous to this change, `MessageProcessor` had a
`tokio::sync::mpsc::Sender<JSONRPCMessage>` as an abstraction for server
code to send a message down to the MCP client. Because `Sender` is cheap
to `clone()`, it was straightforward to make it available to tasks
scheduled with `tokio::task::spawn()`.
This worked well when we were only sending notifications or responses
back down to the client, but we want to add support for sending
elicitations in #1623, which means that we need to be able to send
_requests_ to the client, and now we need a bit of centralization to
ensure all request ids are unique.
To that end, this PR introduces `OutgoingMessageSender`, which houses
the existing `Sender<OutgoingMessage>` as well as an `AtomicI64` to mint
out new, unique request ids. It has methods like `send_request()` and
`send_response()` so that callers do not have to deal with
`JSONRPCMessage` directly, as having to set the `jsonrpc` for each
message was a bit tedious (this cleans up `codex_tool_runner.rs` quite a
bit).
We do not have `OutgoingMessageSender` implement `Clone` because it is
important that the `AtomicI64` is shared across all users of
`OutgoingMessageSender`. As such, `Arc<OutgoingMessageSender>` must be
used instead, as it is frequently shared with new tokio tasks.
As part of this change, we update `message_processor.rs` to embrace
`await`, though we must be careful that no individual handler blocks the
main loop and prevents other messages from being handled.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1622).
* #1623
* __->__ #1622
* #1621
* #1620
2025-07-19 00:30:56 -04:00
|
|
|
}
|
|
|
|
|
|
2025-09-04 17:49:50 -07:00
|
|
|
/// This is used with the MCP server, but not the more general JSON-RPC app
|
|
|
|
|
/// server. Prefer [`OutgoingMessageSender::send_server_notification`] where
|
|
|
|
|
/// possible.
|
2025-07-28 13:32:09 -07:00
|
|
|
pub(crate) async fn send_event_as_notification(
|
|
|
|
|
&self,
|
|
|
|
|
event: &Event,
|
|
|
|
|
meta: Option<OutgoingNotificationMeta>,
|
|
|
|
|
) {
|
2025-08-14 17:59:01 -07:00
|
|
|
#[expect(clippy::expect_used)]
|
2025-07-28 13:32:09 -07:00
|
|
|
let event_json = serde_json::to_value(event).expect("Event must serialize");
|
|
|
|
|
|
|
|
|
|
let params = if let Ok(params) = serde_json::to_value(OutgoingNotificationParams {
|
|
|
|
|
meta,
|
|
|
|
|
event: event_json.clone(),
|
|
|
|
|
}) {
|
|
|
|
|
params
|
|
|
|
|
} else {
|
|
|
|
|
warn!("Failed to serialize event as OutgoingNotificationParams");
|
|
|
|
|
event_json
|
|
|
|
|
};
|
|
|
|
|
|
2025-08-13 17:54:12 -07:00
|
|
|
self.send_notification(OutgoingNotification {
|
chore: introduce OutgoingMessageSender (#1622)
Previous to this change, `MessageProcessor` had a
`tokio::sync::mpsc::Sender<JSONRPCMessage>` as an abstraction for server
code to send a message down to the MCP client. Because `Sender` is cheap
to `clone()`, it was straightforward to make it available to tasks
scheduled with `tokio::task::spawn()`.
This worked well when we were only sending notifications or responses
back down to the client, but we want to add support for sending
elicitations in #1623, which means that we need to be able to send
_requests_ to the client, and now we need a bit of centralization to
ensure all request ids are unique.
To that end, this PR introduces `OutgoingMessageSender`, which houses
the existing `Sender<OutgoingMessage>` as well as an `AtomicI64` to mint
out new, unique request ids. It has methods like `send_request()` and
`send_response()` so that callers do not have to deal with
`JSONRPCMessage` directly, as having to set the `jsonrpc` for each
message was a bit tedious (this cleans up `codex_tool_runner.rs` quite a
bit).
We do not have `OutgoingMessageSender` implement `Clone` because it is
important that the `AtomicI64` is shared across all users of
`OutgoingMessageSender`. As such, `Arc<OutgoingMessageSender>` must be
used instead, as it is frequently shared with new tokio tasks.
As part of this change, we update `message_processor.rs` to embrace
`await`, though we must be careful that no individual handler blocks the
main loop and prevents other messages from being handled.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1622).
* #1623
* __->__ #1622
* #1621
* #1620
2025-07-19 00:30:56 -04:00
|
|
|
method: "codex/event".to_string(),
|
2025-07-28 13:32:09 -07:00
|
|
|
params: Some(params.clone()),
|
2025-08-13 17:54:12 -07:00
|
|
|
})
|
|
|
|
|
.await;
|
2025-07-26 10:35:49 -07:00
|
|
|
}
|
2025-07-28 13:32:09 -07:00
|
|
|
|
2025-08-20 20:36:34 -07:00
|
|
|
pub(crate) async fn send_server_notification(&self, notification: ServerNotification) {
|
2025-09-04 17:49:50 -07:00
|
|
|
let _ = self
|
|
|
|
|
.sender
|
|
|
|
|
.send(OutgoingMessage::AppServerNotification(notification));
|
2025-08-20 20:36:34 -07:00
|
|
|
}
|
|
|
|
|
|
2025-08-13 17:54:12 -07:00
|
|
|
pub(crate) async fn send_notification(&self, notification: OutgoingNotification) {
|
|
|
|
|
let outgoing_message = OutgoingMessage::Notification(notification);
|
fix: switch to unbounded channel (#2874)
#2747 encouraged me to audit our codebase for similar issues, as now I
am particularly suspicious that our flaky tests are due to a racy
deadlock.
I asked Codex to audit our code, and one of its suggestions was this:
> **High-Risk Patterns**
>
> All `send_*` methods await on a bounded
`mpsc::Sender<OutgoingMessage>`. If the writer blocks, the channel fills
and the processor task blocks on send, stops draining incoming requests,
and stdin reader eventually blocks on its send. This creates a
backpressure deadlock cycle across the three tasks.
>
> **Recommendations**
> * Server outgoing path: break the backpressure cycle
> * Option A (minimal risk): Change `OutgoingMessageSender` to use an
unbounded channel to decouple producer from stdout. Add rate logging so
floods are visible.
> * Option B (bounded + drop policy): Change `send_*` to try_send and
drop messages (or coalesce) when the queue is full, logging a warning.
This prevents processor stalls at the cost of losing messages under
extreme backpressure.
> * Option C (two-stage buffer): Keep bounded channel, but have a
dedicated “egress” task that drains an unbounded internal queue, writing
to stdout with retries and a shutdown timeout. This centralizes
backpressure policy.
So this PR is Option A.
Indeed, we previously used a bounded channel with a capacity of `128`,
but as we discovered recently with #2776, there are certainly cases
where we can get flooded with events.
That said, `test_shell_command_approval_triggers_elicitation` just
failed one one build when I put up this PR, so clearly we are not out of
the woods yet...
**Update:** I think I found the true source of the deadlock! See
https://github.com/openai/codex/pull/2876
2025-08-28 22:20:10 -07:00
|
|
|
let _ = self.sender.send(outgoing_message);
|
2025-08-13 17:54:12 -07:00
|
|
|
}
|
|
|
|
|
|
chore: introduce OutgoingMessageSender (#1622)
Previous to this change, `MessageProcessor` had a
`tokio::sync::mpsc::Sender<JSONRPCMessage>` as an abstraction for server
code to send a message down to the MCP client. Because `Sender` is cheap
to `clone()`, it was straightforward to make it available to tasks
scheduled with `tokio::task::spawn()`.
This worked well when we were only sending notifications or responses
back down to the client, but we want to add support for sending
elicitations in #1623, which means that we need to be able to send
_requests_ to the client, and now we need a bit of centralization to
ensure all request ids are unique.
To that end, this PR introduces `OutgoingMessageSender`, which houses
the existing `Sender<OutgoingMessage>` as well as an `AtomicI64` to mint
out new, unique request ids. It has methods like `send_request()` and
`send_response()` so that callers do not have to deal with
`JSONRPCMessage` directly, as having to set the `jsonrpc` for each
message was a bit tedious (this cleans up `codex_tool_runner.rs` quite a
bit).
We do not have `OutgoingMessageSender` implement `Clone` because it is
important that the `AtomicI64` is shared across all users of
`OutgoingMessageSender`. As such, `Arc<OutgoingMessageSender>` must be
used instead, as it is frequently shared with new tokio tasks.
As part of this change, we update `message_processor.rs` to embrace
`await`, though we must be careful that no individual handler blocks the
main loop and prevents other messages from being handled.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1622).
* #1623
* __->__ #1622
* #1621
* #1620
2025-07-19 00:30:56 -04:00
|
|
|
pub(crate) async fn send_error(&self, id: RequestId, error: JSONRPCErrorError) {
|
|
|
|
|
let outgoing_message = OutgoingMessage::Error(OutgoingError { id, error });
|
fix: switch to unbounded channel (#2874)
#2747 encouraged me to audit our codebase for similar issues, as now I
am particularly suspicious that our flaky tests are due to a racy
deadlock.
I asked Codex to audit our code, and one of its suggestions was this:
> **High-Risk Patterns**
>
> All `send_*` methods await on a bounded
`mpsc::Sender<OutgoingMessage>`. If the writer blocks, the channel fills
and the processor task blocks on send, stops draining incoming requests,
and stdin reader eventually blocks on its send. This creates a
backpressure deadlock cycle across the three tasks.
>
> **Recommendations**
> * Server outgoing path: break the backpressure cycle
> * Option A (minimal risk): Change `OutgoingMessageSender` to use an
unbounded channel to decouple producer from stdout. Add rate logging so
floods are visible.
> * Option B (bounded + drop policy): Change `send_*` to try_send and
drop messages (or coalesce) when the queue is full, logging a warning.
This prevents processor stalls at the cost of losing messages under
extreme backpressure.
> * Option C (two-stage buffer): Keep bounded channel, but have a
dedicated “egress” task that drains an unbounded internal queue, writing
to stdout with retries and a shutdown timeout. This centralizes
backpressure policy.
So this PR is Option A.
Indeed, we previously used a bounded channel with a capacity of `128`,
but as we discovered recently with #2776, there are certainly cases
where we can get flooded with events.
That said, `test_shell_command_approval_triggers_elicitation` just
failed one one build when I put up this PR, so clearly we are not out of
the woods yet...
**Update:** I think I found the true source of the deadlock! See
https://github.com/openai/codex/pull/2876
2025-08-28 22:20:10 -07:00
|
|
|
let _ = self.sender.send(outgoing_message);
|
chore: introduce OutgoingMessageSender (#1622)
Previous to this change, `MessageProcessor` had a
`tokio::sync::mpsc::Sender<JSONRPCMessage>` as an abstraction for server
code to send a message down to the MCP client. Because `Sender` is cheap
to `clone()`, it was straightforward to make it available to tasks
scheduled with `tokio::task::spawn()`.
This worked well when we were only sending notifications or responses
back down to the client, but we want to add support for sending
elicitations in #1623, which means that we need to be able to send
_requests_ to the client, and now we need a bit of centralization to
ensure all request ids are unique.
To that end, this PR introduces `OutgoingMessageSender`, which houses
the existing `Sender<OutgoingMessage>` as well as an `AtomicI64` to mint
out new, unique request ids. It has methods like `send_request()` and
`send_response()` so that callers do not have to deal with
`JSONRPCMessage` directly, as having to set the `jsonrpc` for each
message was a bit tedious (this cleans up `codex_tool_runner.rs` quite a
bit).
We do not have `OutgoingMessageSender` implement `Clone` because it is
important that the `AtomicI64` is shared across all users of
`OutgoingMessageSender`. As such, `Arc<OutgoingMessageSender>` must be
used instead, as it is frequently shared with new tokio tasks.
As part of this change, we update `message_processor.rs` to embrace
`await`, though we must be careful that no individual handler blocks the
main loop and prevents other messages from being handled.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1622).
* #1623
* __->__ #1622
* #1621
* #1620
2025-07-19 00:30:56 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Outgoing message from the server to the client.
|
|
|
|
|
pub(crate) enum OutgoingMessage {
|
|
|
|
|
Request(OutgoingRequest),
|
|
|
|
|
Notification(OutgoingNotification),
|
2025-09-04 17:49:50 -07:00
|
|
|
/// AppServerNotification is specific to the case where this is run as an
|
|
|
|
|
/// "app server" as opposed to an MCP server.
|
|
|
|
|
AppServerNotification(ServerNotification),
|
chore: introduce OutgoingMessageSender (#1622)
Previous to this change, `MessageProcessor` had a
`tokio::sync::mpsc::Sender<JSONRPCMessage>` as an abstraction for server
code to send a message down to the MCP client. Because `Sender` is cheap
to `clone()`, it was straightforward to make it available to tasks
scheduled with `tokio::task::spawn()`.
This worked well when we were only sending notifications or responses
back down to the client, but we want to add support for sending
elicitations in #1623, which means that we need to be able to send
_requests_ to the client, and now we need a bit of centralization to
ensure all request ids are unique.
To that end, this PR introduces `OutgoingMessageSender`, which houses
the existing `Sender<OutgoingMessage>` as well as an `AtomicI64` to mint
out new, unique request ids. It has methods like `send_request()` and
`send_response()` so that callers do not have to deal with
`JSONRPCMessage` directly, as having to set the `jsonrpc` for each
message was a bit tedious (this cleans up `codex_tool_runner.rs` quite a
bit).
We do not have `OutgoingMessageSender` implement `Clone` because it is
important that the `AtomicI64` is shared across all users of
`OutgoingMessageSender`. As such, `Arc<OutgoingMessageSender>` must be
used instead, as it is frequently shared with new tokio tasks.
As part of this change, we update `message_processor.rs` to embrace
`await`, though we must be careful that no individual handler blocks the
main loop and prevents other messages from being handled.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1622).
* #1623
* __->__ #1622
* #1621
* #1620
2025-07-19 00:30:56 -04:00
|
|
|
Response(OutgoingResponse),
|
|
|
|
|
Error(OutgoingError),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl From<OutgoingMessage> for JSONRPCMessage {
|
|
|
|
|
fn from(val: OutgoingMessage) -> Self {
|
|
|
|
|
use OutgoingMessage::*;
|
|
|
|
|
match val {
|
|
|
|
|
Request(OutgoingRequest { id, method, params }) => {
|
|
|
|
|
JSONRPCMessage::Request(JSONRPCRequest {
|
|
|
|
|
jsonrpc: JSONRPC_VERSION.into(),
|
|
|
|
|
id,
|
|
|
|
|
method,
|
|
|
|
|
params,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
Notification(OutgoingNotification { method, params }) => {
|
|
|
|
|
JSONRPCMessage::Notification(JSONRPCNotification {
|
|
|
|
|
jsonrpc: JSONRPC_VERSION.into(),
|
|
|
|
|
method,
|
|
|
|
|
params,
|
|
|
|
|
})
|
|
|
|
|
}
|
2025-09-04 17:49:50 -07:00
|
|
|
AppServerNotification(notification) => {
|
|
|
|
|
let method = notification.to_string();
|
|
|
|
|
let params = match notification.to_params() {
|
|
|
|
|
Ok(params) => Some(params),
|
|
|
|
|
Err(err) => {
|
|
|
|
|
warn!("failed to serialize notification params: {err}");
|
|
|
|
|
None
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
JSONRPCMessage::Notification(JSONRPCNotification {
|
|
|
|
|
jsonrpc: JSONRPC_VERSION.into(),
|
|
|
|
|
method,
|
|
|
|
|
params,
|
|
|
|
|
})
|
|
|
|
|
}
|
chore: introduce OutgoingMessageSender (#1622)
Previous to this change, `MessageProcessor` had a
`tokio::sync::mpsc::Sender<JSONRPCMessage>` as an abstraction for server
code to send a message down to the MCP client. Because `Sender` is cheap
to `clone()`, it was straightforward to make it available to tasks
scheduled with `tokio::task::spawn()`.
This worked well when we were only sending notifications or responses
back down to the client, but we want to add support for sending
elicitations in #1623, which means that we need to be able to send
_requests_ to the client, and now we need a bit of centralization to
ensure all request ids are unique.
To that end, this PR introduces `OutgoingMessageSender`, which houses
the existing `Sender<OutgoingMessage>` as well as an `AtomicI64` to mint
out new, unique request ids. It has methods like `send_request()` and
`send_response()` so that callers do not have to deal with
`JSONRPCMessage` directly, as having to set the `jsonrpc` for each
message was a bit tedious (this cleans up `codex_tool_runner.rs` quite a
bit).
We do not have `OutgoingMessageSender` implement `Clone` because it is
important that the `AtomicI64` is shared across all users of
`OutgoingMessageSender`. As such, `Arc<OutgoingMessageSender>` must be
used instead, as it is frequently shared with new tokio tasks.
As part of this change, we update `message_processor.rs` to embrace
`await`, though we must be careful that no individual handler blocks the
main loop and prevents other messages from being handled.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1622).
* #1623
* __->__ #1622
* #1621
* #1620
2025-07-19 00:30:56 -04:00
|
|
|
Response(OutgoingResponse { id, result }) => {
|
|
|
|
|
JSONRPCMessage::Response(JSONRPCResponse {
|
|
|
|
|
jsonrpc: JSONRPC_VERSION.into(),
|
|
|
|
|
id,
|
|
|
|
|
result,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
Error(OutgoingError { id, error }) => JSONRPCMessage::Error(JSONRPCError {
|
|
|
|
|
jsonrpc: JSONRPC_VERSION.into(),
|
|
|
|
|
id,
|
|
|
|
|
error,
|
|
|
|
|
}),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, PartialEq, Serialize)]
|
|
|
|
|
pub(crate) struct OutgoingRequest {
|
|
|
|
|
pub id: RequestId,
|
|
|
|
|
pub method: String,
|
|
|
|
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
|
|
|
pub params: Option<serde_json::Value>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, PartialEq, Serialize)]
|
|
|
|
|
pub(crate) struct OutgoingNotification {
|
|
|
|
|
pub method: String,
|
|
|
|
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
|
|
|
pub params: Option<serde_json::Value>,
|
|
|
|
|
}
|
|
|
|
|
|
2025-07-28 13:32:09 -07:00
|
|
|
#[derive(Debug, Clone, PartialEq, Serialize)]
|
|
|
|
|
pub(crate) struct OutgoingNotificationParams {
|
|
|
|
|
#[serde(rename = "_meta", default, skip_serializing_if = "Option::is_none")]
|
|
|
|
|
pub meta: Option<OutgoingNotificationMeta>,
|
|
|
|
|
|
|
|
|
|
#[serde(flatten)]
|
|
|
|
|
pub event: serde_json::Value,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Additional mcp-specific data to be added to a [`codex_core::protocol::Event`] as notification.params._meta
|
|
|
|
|
// MCP Spec: https://modelcontextprotocol.io/specification/2025-06-18/basic#meta
|
|
|
|
|
// Typescript Schema: https://github.com/modelcontextprotocol/modelcontextprotocol/blob/0695a497eb50a804fc0e88c18a93a21a675d6b3e/schema/2025-06-18/schema.ts
|
|
|
|
|
#[derive(Debug, Clone, PartialEq, Serialize)]
|
|
|
|
|
#[serde(rename_all = "camelCase")]
|
|
|
|
|
pub(crate) struct OutgoingNotificationMeta {
|
|
|
|
|
pub request_id: Option<RequestId>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl OutgoingNotificationMeta {
|
|
|
|
|
pub(crate) fn new(request_id: Option<RequestId>) -> Self {
|
|
|
|
|
Self { request_id }
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
chore: introduce OutgoingMessageSender (#1622)
Previous to this change, `MessageProcessor` had a
`tokio::sync::mpsc::Sender<JSONRPCMessage>` as an abstraction for server
code to send a message down to the MCP client. Because `Sender` is cheap
to `clone()`, it was straightforward to make it available to tasks
scheduled with `tokio::task::spawn()`.
This worked well when we were only sending notifications or responses
back down to the client, but we want to add support for sending
elicitations in #1623, which means that we need to be able to send
_requests_ to the client, and now we need a bit of centralization to
ensure all request ids are unique.
To that end, this PR introduces `OutgoingMessageSender`, which houses
the existing `Sender<OutgoingMessage>` as well as an `AtomicI64` to mint
out new, unique request ids. It has methods like `send_request()` and
`send_response()` so that callers do not have to deal with
`JSONRPCMessage` directly, as having to set the `jsonrpc` for each
message was a bit tedious (this cleans up `codex_tool_runner.rs` quite a
bit).
We do not have `OutgoingMessageSender` implement `Clone` because it is
important that the `AtomicI64` is shared across all users of
`OutgoingMessageSender`. As such, `Arc<OutgoingMessageSender>` must be
used instead, as it is frequently shared with new tokio tasks.
As part of this change, we update `message_processor.rs` to embrace
`await`, though we must be careful that no individual handler blocks the
main loop and prevents other messages from being handled.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1622).
* #1623
* __->__ #1622
* #1621
* #1620
2025-07-19 00:30:56 -04:00
|
|
|
#[derive(Debug, Clone, PartialEq, Serialize)]
|
|
|
|
|
pub(crate) struct OutgoingResponse {
|
|
|
|
|
pub id: RequestId,
|
|
|
|
|
pub result: Result,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, PartialEq, Serialize)]
|
|
|
|
|
pub(crate) struct OutgoingError {
|
|
|
|
|
pub error: JSONRPCErrorError,
|
|
|
|
|
pub id: RequestId,
|
|
|
|
|
}
|
2025-07-28 13:32:09 -07:00
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use codex_core::protocol::EventMsg;
|
|
|
|
|
use codex_core::protocol::SessionConfiguredEvent;
|
2025-09-07 20:22:25 -07:00
|
|
|
use codex_protocol::mcp_protocol::ConversationId;
|
2025-09-04 17:49:50 -07:00
|
|
|
use codex_protocol::mcp_protocol::LoginChatGptCompleteNotification;
|
2025-07-28 13:32:09 -07:00
|
|
|
use pretty_assertions::assert_eq;
|
|
|
|
|
use serde_json::json;
|
|
|
|
|
use uuid::Uuid;
|
|
|
|
|
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn test_send_event_as_notification() {
|
fix: switch to unbounded channel (#2874)
#2747 encouraged me to audit our codebase for similar issues, as now I
am particularly suspicious that our flaky tests are due to a racy
deadlock.
I asked Codex to audit our code, and one of its suggestions was this:
> **High-Risk Patterns**
>
> All `send_*` methods await on a bounded
`mpsc::Sender<OutgoingMessage>`. If the writer blocks, the channel fills
and the processor task blocks on send, stops draining incoming requests,
and stdin reader eventually blocks on its send. This creates a
backpressure deadlock cycle across the three tasks.
>
> **Recommendations**
> * Server outgoing path: break the backpressure cycle
> * Option A (minimal risk): Change `OutgoingMessageSender` to use an
unbounded channel to decouple producer from stdout. Add rate logging so
floods are visible.
> * Option B (bounded + drop policy): Change `send_*` to try_send and
drop messages (or coalesce) when the queue is full, logging a warning.
This prevents processor stalls at the cost of losing messages under
extreme backpressure.
> * Option C (two-stage buffer): Keep bounded channel, but have a
dedicated “egress” task that drains an unbounded internal queue, writing
to stdout with retries and a shutdown timeout. This centralizes
backpressure policy.
So this PR is Option A.
Indeed, we previously used a bounded channel with a capacity of `128`,
but as we discovered recently with #2776, there are certainly cases
where we can get flooded with events.
That said, `test_shell_command_approval_triggers_elicitation` just
failed one one build when I put up this PR, so clearly we are not out of
the woods yet...
**Update:** I think I found the true source of the deadlock! See
https://github.com/openai/codex/pull/2876
2025-08-28 22:20:10 -07:00
|
|
|
let (outgoing_tx, mut outgoing_rx) = mpsc::unbounded_channel::<OutgoingMessage>();
|
2025-07-28 13:32:09 -07:00
|
|
|
let outgoing_message_sender = OutgoingMessageSender::new(outgoing_tx);
|
|
|
|
|
|
2025-09-07 20:22:25 -07:00
|
|
|
let conversation_id = ConversationId::new();
|
2025-07-28 13:32:09 -07:00
|
|
|
let event = Event {
|
|
|
|
|
id: "1".to_string(),
|
|
|
|
|
msg: EventMsg::SessionConfigured(SessionConfiguredEvent {
|
2025-09-07 20:22:25 -07:00
|
|
|
session_id: conversation_id,
|
2025-07-28 13:32:09 -07:00
|
|
|
model: "gpt-4o".to_string(),
|
|
|
|
|
history_log_id: 1,
|
|
|
|
|
history_entry_count: 1000,
|
2025-09-03 21:47:00 -07:00
|
|
|
initial_messages: None,
|
2025-07-28 13:32:09 -07:00
|
|
|
}),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
outgoing_message_sender
|
|
|
|
|
.send_event_as_notification(&event, None)
|
|
|
|
|
.await;
|
|
|
|
|
|
|
|
|
|
let result = outgoing_rx.recv().await.unwrap();
|
|
|
|
|
let OutgoingMessage::Notification(OutgoingNotification { method, params }) = result else {
|
|
|
|
|
panic!("expected Notification for first message");
|
|
|
|
|
};
|
|
|
|
|
assert_eq!(method, "codex/event");
|
|
|
|
|
|
|
|
|
|
let Ok(expected_params) = serde_json::to_value(&event) else {
|
|
|
|
|
panic!("Event must serialize");
|
|
|
|
|
};
|
|
|
|
|
assert_eq!(params, Some(expected_params.clone()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn test_send_event_as_notification_with_meta() {
|
fix: switch to unbounded channel (#2874)
#2747 encouraged me to audit our codebase for similar issues, as now I
am particularly suspicious that our flaky tests are due to a racy
deadlock.
I asked Codex to audit our code, and one of its suggestions was this:
> **High-Risk Patterns**
>
> All `send_*` methods await on a bounded
`mpsc::Sender<OutgoingMessage>`. If the writer blocks, the channel fills
and the processor task blocks on send, stops draining incoming requests,
and stdin reader eventually blocks on its send. This creates a
backpressure deadlock cycle across the three tasks.
>
> **Recommendations**
> * Server outgoing path: break the backpressure cycle
> * Option A (minimal risk): Change `OutgoingMessageSender` to use an
unbounded channel to decouple producer from stdout. Add rate logging so
floods are visible.
> * Option B (bounded + drop policy): Change `send_*` to try_send and
drop messages (or coalesce) when the queue is full, logging a warning.
This prevents processor stalls at the cost of losing messages under
extreme backpressure.
> * Option C (two-stage buffer): Keep bounded channel, but have a
dedicated “egress” task that drains an unbounded internal queue, writing
to stdout with retries and a shutdown timeout. This centralizes
backpressure policy.
So this PR is Option A.
Indeed, we previously used a bounded channel with a capacity of `128`,
but as we discovered recently with #2776, there are certainly cases
where we can get flooded with events.
That said, `test_shell_command_approval_triggers_elicitation` just
failed one one build when I put up this PR, so clearly we are not out of
the woods yet...
**Update:** I think I found the true source of the deadlock! See
https://github.com/openai/codex/pull/2876
2025-08-28 22:20:10 -07:00
|
|
|
let (outgoing_tx, mut outgoing_rx) = mpsc::unbounded_channel::<OutgoingMessage>();
|
2025-07-28 13:32:09 -07:00
|
|
|
let outgoing_message_sender = OutgoingMessageSender::new(outgoing_tx);
|
|
|
|
|
|
2025-09-07 20:22:25 -07:00
|
|
|
let conversation_id = ConversationId::new();
|
2025-07-28 13:32:09 -07:00
|
|
|
let session_configured_event = SessionConfiguredEvent {
|
2025-09-07 20:22:25 -07:00
|
|
|
session_id: conversation_id,
|
2025-07-28 13:32:09 -07:00
|
|
|
model: "gpt-4o".to_string(),
|
|
|
|
|
history_log_id: 1,
|
|
|
|
|
history_entry_count: 1000,
|
2025-09-03 21:47:00 -07:00
|
|
|
initial_messages: None,
|
2025-07-28 13:32:09 -07:00
|
|
|
};
|
|
|
|
|
let event = Event {
|
|
|
|
|
id: "1".to_string(),
|
|
|
|
|
msg: EventMsg::SessionConfigured(session_configured_event.clone()),
|
|
|
|
|
};
|
|
|
|
|
let meta = OutgoingNotificationMeta {
|
|
|
|
|
request_id: Some(RequestId::String("123".to_string())),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
outgoing_message_sender
|
|
|
|
|
.send_event_as_notification(&event, Some(meta))
|
|
|
|
|
.await;
|
|
|
|
|
|
|
|
|
|
let result = outgoing_rx.recv().await.unwrap();
|
|
|
|
|
let OutgoingMessage::Notification(OutgoingNotification { method, params }) = result else {
|
|
|
|
|
panic!("expected Notification for first message");
|
|
|
|
|
};
|
|
|
|
|
assert_eq!(method, "codex/event");
|
|
|
|
|
let expected_params = json!({
|
|
|
|
|
"_meta": {
|
|
|
|
|
"requestId": "123",
|
|
|
|
|
},
|
|
|
|
|
"id": "1",
|
|
|
|
|
"msg": {
|
|
|
|
|
"session_id": session_configured_event.session_id,
|
|
|
|
|
"model": session_configured_event.model,
|
|
|
|
|
"history_log_id": session_configured_event.history_log_id,
|
|
|
|
|
"history_entry_count": session_configured_event.history_entry_count,
|
|
|
|
|
"type": "session_configured",
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
assert_eq!(params.unwrap(), expected_params);
|
|
|
|
|
}
|
2025-09-04 17:49:50 -07:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn verify_server_notification_serialization() {
|
|
|
|
|
let notification =
|
|
|
|
|
ServerNotification::LoginChatGptComplete(LoginChatGptCompleteNotification {
|
|
|
|
|
login_id: Uuid::nil(),
|
|
|
|
|
success: true,
|
|
|
|
|
error: None,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
let jsonrpc_notification: JSONRPCMessage =
|
|
|
|
|
OutgoingMessage::AppServerNotification(notification).into();
|
|
|
|
|
assert_eq!(
|
|
|
|
|
JSONRPCMessage::Notification(JSONRPCNotification {
|
|
|
|
|
jsonrpc: "2.0".into(),
|
|
|
|
|
method: "loginChatGptComplete".into(),
|
|
|
|
|
params: Some(json!({
|
|
|
|
|
"loginId": Uuid::nil(),
|
|
|
|
|
"success": true,
|
|
|
|
|
})),
|
|
|
|
|
}),
|
|
|
|
|
jsonrpc_notification,
|
|
|
|
|
"ensure the strum macros serialize the method field correctly"
|
|
|
|
|
);
|
|
|
|
|
}
|
2025-07-28 13:32:09 -07:00
|
|
|
}
|