Files
llmx/codex-rs/core/tests/previous_response_id.rs

161 lines
4.7 KiB
Rust
Raw Normal View History

use std::time::Duration;
use codex_core::config::Config;
use codex_core::protocol::InputItem;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_core::protocol::Submission;
use codex_core::Codex;
use serde_json::Value;
use tokio::time::timeout;
use wiremock::matchers::method;
use wiremock::matchers::path;
use wiremock::Match;
use wiremock::Mock;
use wiremock::MockServer;
use wiremock::Request;
use wiremock::ResponseTemplate;
/// Matcher asserting that JSON body has NO `previous_response_id` field.
struct NoPrevId;
impl Match for NoPrevId {
fn matches(&self, req: &Request) -> bool {
serde_json::from_slice::<Value>(&req.body)
.map(|v| v.get("previous_response_id").is_none())
.unwrap_or(false)
}
}
/// Matcher asserting that JSON body HAS a `previous_response_id` field.
struct HasPrevId;
impl Match for HasPrevId {
fn matches(&self, req: &Request) -> bool {
serde_json::from_slice::<Value>(&req.body)
.map(|v| v.get("previous_response_id").is_some())
.unwrap_or(false)
}
}
/// Build minimal SSE stream with completed marker.
fn sse_completed(id: &str) -> String {
format!(
"event: response.completed\n\
data: {{\"type\":\"response.completed\",\"response\":{{\"id\":\"{}\",\"output\":[]}}}}\n\n\n",
id
)
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn keeps_previous_response_id_between_tasks() {
// Mock server
let server = MockServer::start().await;
// First request must NOT include `previous_response_id`.
let first = ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(sse_completed("resp1"), "text/event-stream");
Mock::given(method("POST"))
.and(path("/v1/responses"))
.and(NoPrevId)
.respond_with(first)
.expect(1)
.mount(&server)
.await;
// Second request MUST include `previous_response_id`.
let second = ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(sse_completed("resp2"), "text/event-stream");
Mock::given(method("POST"))
.and(path("/v1/responses"))
.and(HasPrevId)
.respond_with(second)
.expect(1)
.mount(&server)
.await;
// Environment
std::env::set_var("OPENAI_API_KEY", "test-key");
std::env::set_var("OPENAI_API_BASE", server.uri());
std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
let codex = Codex::spawn(std::sync::Arc::new(tokio::sync::Notify::new())).unwrap();
// Init session
let config = Config::load_default_config_for_test();
codex
.submit(Submission {
id: "init".into(),
op: Op::ConfigureSession {
model: config.model,
instructions: None,
approval_policy: config.approval_policy,
fix: overhaul SandboxPolicy and config loading in Rust (#732) Previous to this PR, `SandboxPolicy` was a bit difficult to work with: https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108 Specifically: * It was an `enum` and therefore options were mutually exclusive as opposed to additive. * It defined things in terms of what the agent _could not_ do as opposed to what they _could_ do. This made things hard to support because we would prefer to build up a sandbox config by starting with something extremely restrictive and only granting permissions for things the user as explicitly allowed. This PR changes things substantially by redefining the policy in terms of two concepts: * A `SandboxPermission` enum that defines permissions that can be granted to the agent/sandbox. * A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`, but externally exposes a simpler API that can be used to configure Seatbelt/Landlock. Previous to this PR, we supported a `--sandbox` flag that effectively mapped to an enum value in `SandboxPolicy`. Though now that `SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single `--sandbox` flag no longer makes sense. While I could have turned it into a flag that the user can specify multiple times, I think the current values to use with such a flag are long and potentially messy, so for the moment, I have dropped support for `--sandbox` altogether and we can bring it back once we have figured out the naming thing. Since `--sandbox` is gone, users now have to specify `--full-auto` to get a sandbox that allows writes in `cwd`. Admittedly, there is no clean way to specify the equivalent of `--full-auto` in your `config.toml` right now, so we will have to revisit that, as well. Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy` changed considerably, I had to overhaul how config loading works, as well. There are now two distinct concepts, `ConfigToml` and `Config`: * `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one might expect, every field is `Optional` and it is `#[derive(Deserialize, Default)]`. Consistent use of `Optional` makes it clear what the user has specified explicitly. * `Config` is the "normalized config" and is produced by merging `ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw `Option<Vec<SandboxPermission>>`, `Config` presents only the final `SandboxPolicy`. The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra special attention to ensure we are faithfully mapping the `SandboxPolicy` to the Seatbelt and Landlock configs, respectively. Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that `(allow file-read*)` has been removed from the `.sbpl` file as now this is added to the policy in `core/src/exec.rs` when `sandbox_policy.has_full_disk_read_access()` is `true`.
2025-04-29 15:01:16 -07:00
sandbox_policy: SandboxPolicy::new_read_only_policy(),
disable_response_storage: false,
feat: configurable notifications in the Rust CLI (#793) With this change, you can specify a program that will be executed to get notified about events generated by Codex. The notification info will be packaged as a JSON object. The supported notification types are defined by the `UserNotification` enum introduced in this PR. Initially, it contains only one variant, `AgentTurnComplete`: ```rust pub(crate) enum UserNotification { #[serde(rename_all = "kebab-case")] AgentTurnComplete { turn_id: String, /// Messages that the user sent to the agent to initiate the turn. input_messages: Vec<String>, /// The last message sent by the assistant in the turn. last_assistant_message: Option<String>, }, } ``` This is intended to support the common case when a "turn" ends, which often means it is now your chance to give Codex further instructions. For example, I have the following in my `~/.codex/config.toml`: ```toml notify = ["python3", "/Users/mbolin/.codex/notify.py"] ``` I created my own custom notifier script that calls out to [terminal-notifier](https://github.com/julienXX/terminal-notifier) to show a desktop push notification on macOS. Contents of `notify.py`: ```python #!/usr/bin/env python3 import json import subprocess import sys def main() -> int: if len(sys.argv) != 2: print("Usage: notify.py <NOTIFICATION_JSON>") return 1 try: notification = json.loads(sys.argv[1]) except json.JSONDecodeError: return 1 match notification_type := notification.get("type"): case "agent-turn-complete": assistant_message = notification.get("last-assistant-message") if assistant_message: title = f"Codex: {assistant_message}" else: title = "Codex: Turn Complete!" input_messages = notification.get("input_messages", []) message = " ".join(input_messages) title += message case _: print(f"not sending a push notification for: {notification_type}") return 0 subprocess.check_output( [ "terminal-notifier", "-title", title, "-message", message, "-group", "codex", "-ignoreDnD", "-activate", "com.googlecode.iterm2", ] ) return 0 if __name__ == "__main__": sys.exit(main()) ``` For reference, here are related PRs that tried to add this functionality to the TypeScript version of the Codex CLI: * https://github.com/openai/codex/pull/160 * https://github.com/openai/codex/pull/498
2025-05-02 19:48:13 -07:00
notify: None,
cwd: std::env::current_dir().unwrap(),
},
})
.await
.unwrap();
// drain init event
let _ = codex.next_event().await.unwrap();
// Task 1 triggers first request (no previous_response_id)
codex
.submit(Submission {
id: "task1".into(),
op: Op::UserInput {
items: vec![InputItem::Text {
text: "hello".into(),
}],
},
})
.await
.unwrap();
// Wait for TaskComplete
loop {
let ev = timeout(Duration::from_secs(1), codex.next_event())
.await
.unwrap()
.unwrap();
if matches!(ev.msg, codex_core::protocol::EventMsg::TaskComplete) {
break;
}
}
// Task 2 should include `previous_response_id` (triggers second request)
codex
.submit(Submission {
id: "task2".into(),
op: Op::UserInput {
items: vec![InputItem::Text {
text: "again".into(),
}],
},
})
.await
.unwrap();
// Wait for TaskComplete or error
loop {
let ev = timeout(Duration::from_secs(1), codex.next_event())
.await
.unwrap()
.unwrap();
match ev.msg {
codex_core::protocol::EventMsg::TaskComplete => break,
codex_core::protocol::EventMsg::Error { message } => {
panic!("unexpected error: {message}")
}
_ => (),
}
}
}