Previous to this PR, `SandboxPolicy` was a bit difficult to work with:
237f8a11e1/codex-rs/core/src/protocol.rs (L98-L108)
Specifically:
* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.
This PR changes things substantially by redefining the policy in terms
of two concepts:
* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.
Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.
Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.
Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:
* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.
The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.
Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
159 lines
4.6 KiB
Rust
159 lines
4.6 KiB
Rust
use std::time::Duration;
|
||
|
||
use codex_core::config::Config;
|
||
use codex_core::protocol::InputItem;
|
||
use codex_core::protocol::Op;
|
||
use codex_core::protocol::SandboxPolicy;
|
||
use codex_core::protocol::Submission;
|
||
use codex_core::Codex;
|
||
use serde_json::Value;
|
||
use tokio::time::timeout;
|
||
use wiremock::matchers::method;
|
||
use wiremock::matchers::path;
|
||
use wiremock::Match;
|
||
use wiremock::Mock;
|
||
use wiremock::MockServer;
|
||
use wiremock::Request;
|
||
use wiremock::ResponseTemplate;
|
||
|
||
/// Matcher asserting that JSON body has NO `previous_response_id` field.
|
||
struct NoPrevId;
|
||
|
||
impl Match for NoPrevId {
|
||
fn matches(&self, req: &Request) -> bool {
|
||
serde_json::from_slice::<Value>(&req.body)
|
||
.map(|v| v.get("previous_response_id").is_none())
|
||
.unwrap_or(false)
|
||
}
|
||
}
|
||
|
||
/// Matcher asserting that JSON body HAS a `previous_response_id` field.
|
||
struct HasPrevId;
|
||
|
||
impl Match for HasPrevId {
|
||
fn matches(&self, req: &Request) -> bool {
|
||
serde_json::from_slice::<Value>(&req.body)
|
||
.map(|v| v.get("previous_response_id").is_some())
|
||
.unwrap_or(false)
|
||
}
|
||
}
|
||
|
||
/// Build minimal SSE stream with completed marker.
|
||
fn sse_completed(id: &str) -> String {
|
||
format!(
|
||
"event: response.completed\n\
|
||
data: {{\"type\":\"response.completed\",\"response\":{{\"id\":\"{}\",\"output\":[]}}}}\n\n\n",
|
||
id
|
||
)
|
||
}
|
||
|
||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||
async fn keeps_previous_response_id_between_tasks() {
|
||
// Mock server
|
||
let server = MockServer::start().await;
|
||
|
||
// First request – must NOT include `previous_response_id`.
|
||
let first = ResponseTemplate::new(200)
|
||
.insert_header("content-type", "text/event-stream")
|
||
.set_body_raw(sse_completed("resp1"), "text/event-stream");
|
||
|
||
Mock::given(method("POST"))
|
||
.and(path("/v1/responses"))
|
||
.and(NoPrevId)
|
||
.respond_with(first)
|
||
.expect(1)
|
||
.mount(&server)
|
||
.await;
|
||
|
||
// Second request – MUST include `previous_response_id`.
|
||
let second = ResponseTemplate::new(200)
|
||
.insert_header("content-type", "text/event-stream")
|
||
.set_body_raw(sse_completed("resp2"), "text/event-stream");
|
||
|
||
Mock::given(method("POST"))
|
||
.and(path("/v1/responses"))
|
||
.and(HasPrevId)
|
||
.respond_with(second)
|
||
.expect(1)
|
||
.mount(&server)
|
||
.await;
|
||
|
||
// Environment
|
||
std::env::set_var("OPENAI_API_KEY", "test-key");
|
||
std::env::set_var("OPENAI_API_BASE", server.uri());
|
||
std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
|
||
std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
|
||
|
||
let codex = Codex::spawn(std::sync::Arc::new(tokio::sync::Notify::new())).unwrap();
|
||
|
||
// Init session
|
||
let config = Config::load_default_config_for_test();
|
||
codex
|
||
.submit(Submission {
|
||
id: "init".into(),
|
||
op: Op::ConfigureSession {
|
||
model: config.model,
|
||
instructions: None,
|
||
approval_policy: config.approval_policy,
|
||
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
||
disable_response_storage: false,
|
||
},
|
||
})
|
||
.await
|
||
.unwrap();
|
||
// drain init event
|
||
let _ = codex.next_event().await.unwrap();
|
||
|
||
// Task 1 – triggers first request (no previous_response_id)
|
||
codex
|
||
.submit(Submission {
|
||
id: "task1".into(),
|
||
op: Op::UserInput {
|
||
items: vec![InputItem::Text {
|
||
text: "hello".into(),
|
||
}],
|
||
},
|
||
})
|
||
.await
|
||
.unwrap();
|
||
|
||
// Wait for TaskComplete
|
||
loop {
|
||
let ev = timeout(Duration::from_secs(1), codex.next_event())
|
||
.await
|
||
.unwrap()
|
||
.unwrap();
|
||
if matches!(ev.msg, codex_core::protocol::EventMsg::TaskComplete) {
|
||
break;
|
||
}
|
||
}
|
||
|
||
// Task 2 – should include `previous_response_id` (triggers second request)
|
||
codex
|
||
.submit(Submission {
|
||
id: "task2".into(),
|
||
op: Op::UserInput {
|
||
items: vec![InputItem::Text {
|
||
text: "again".into(),
|
||
}],
|
||
},
|
||
})
|
||
.await
|
||
.unwrap();
|
||
|
||
// Wait for TaskComplete or error
|
||
loop {
|
||
let ev = timeout(Duration::from_secs(1), codex.next_event())
|
||
.await
|
||
.unwrap()
|
||
.unwrap();
|
||
match ev.msg {
|
||
codex_core::protocol::EventMsg::TaskComplete => break,
|
||
codex_core::protocol::EventMsg::Error { message } => {
|
||
panic!("unexpected error: {message}")
|
||
}
|
||
_ => (),
|
||
}
|
||
}
|
||
}
|