Add notifier tests (#4064)
Proposal: 1. Use anyhow for tests and avoid unwrap 2. Extract a helper for starting a test instance of codex
This commit is contained in:
1
codex-rs/Cargo.lock
generated
1
codex-rs/Cargo.lock
generated
@@ -1076,6 +1076,7 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
|
|||||||
name = "core_test_support"
|
name = "core_test_support"
|
||||||
version = "0.0.0"
|
version = "0.0.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
"codex-core",
|
"codex-core",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ use crate::AuthManager;
|
|||||||
use crate::client_common::REVIEW_PROMPT;
|
use crate::client_common::REVIEW_PROMPT;
|
||||||
use crate::event_mapping::map_response_item_to_event_messages;
|
use crate::event_mapping::map_response_item_to_event_messages;
|
||||||
use crate::review_format::format_review_findings_block;
|
use crate::review_format::format_review_findings_block;
|
||||||
|
use crate::user_notification::UserNotifier;
|
||||||
use async_channel::Receiver;
|
use async_channel::Receiver;
|
||||||
use async_channel::Sender;
|
use async_channel::Sender;
|
||||||
use codex_apply_patch::ApplyPatchAction;
|
use codex_apply_patch::ApplyPatchAction;
|
||||||
@@ -186,7 +187,7 @@ impl Codex {
|
|||||||
base_instructions: config.base_instructions.clone(),
|
base_instructions: config.base_instructions.clone(),
|
||||||
approval_policy: config.approval_policy,
|
approval_policy: config.approval_policy,
|
||||||
sandbox_policy: config.sandbox_policy.clone(),
|
sandbox_policy: config.sandbox_policy.clone(),
|
||||||
notify: config.notify.clone(),
|
notify: UserNotifier::new(config.notify.clone()),
|
||||||
cwd: config.cwd.clone(),
|
cwd: config.cwd.clone(),
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -274,9 +275,7 @@ pub(crate) struct Session {
|
|||||||
session_manager: ExecSessionManager,
|
session_manager: ExecSessionManager,
|
||||||
unified_exec_manager: UnifiedExecSessionManager,
|
unified_exec_manager: UnifiedExecSessionManager,
|
||||||
|
|
||||||
/// External notifier command (will be passed as args to exec()). When
|
notifier: UserNotifier,
|
||||||
/// `None` this feature is disabled.
|
|
||||||
notify: Option<Vec<String>>,
|
|
||||||
|
|
||||||
/// Optional rollout recorder for persisting the conversation transcript so
|
/// Optional rollout recorder for persisting the conversation transcript so
|
||||||
/// sessions can be replayed or inspected later.
|
/// sessions can be replayed or inspected later.
|
||||||
@@ -335,10 +334,7 @@ struct ConfigureSession {
|
|||||||
/// How to sandbox commands executed in the system
|
/// How to sandbox commands executed in the system
|
||||||
sandbox_policy: SandboxPolicy,
|
sandbox_policy: SandboxPolicy,
|
||||||
|
|
||||||
/// Optional external notifier command tokens. Present only when the
|
notify: UserNotifier,
|
||||||
/// client wants the agent to spawn a program after each completed
|
|
||||||
/// turn.
|
|
||||||
notify: Option<Vec<String>>,
|
|
||||||
|
|
||||||
/// Working directory that should be treated as the *root* of the
|
/// Working directory that should be treated as the *root* of the
|
||||||
/// session. All relative paths supplied by the model as well as the
|
/// session. All relative paths supplied by the model as well as the
|
||||||
@@ -480,7 +476,7 @@ impl Session {
|
|||||||
mcp_connection_manager,
|
mcp_connection_manager,
|
||||||
session_manager: ExecSessionManager::default(),
|
session_manager: ExecSessionManager::default(),
|
||||||
unified_exec_manager: UnifiedExecSessionManager::default(),
|
unified_exec_manager: UnifiedExecSessionManager::default(),
|
||||||
notify,
|
notifier: notify,
|
||||||
state: Mutex::new(state),
|
state: Mutex::new(state),
|
||||||
rollout: Mutex::new(Some(rollout_recorder)),
|
rollout: Mutex::new(Some(rollout_recorder)),
|
||||||
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
|
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
|
||||||
@@ -586,7 +582,7 @@ impl Session {
|
|||||||
command: Vec<String>,
|
command: Vec<String>,
|
||||||
cwd: PathBuf,
|
cwd: PathBuf,
|
||||||
reason: Option<String>,
|
reason: Option<String>,
|
||||||
) -> oneshot::Receiver<ReviewDecision> {
|
) -> ReviewDecision {
|
||||||
// Add the tx_approve callback to the map before sending the request.
|
// Add the tx_approve callback to the map before sending the request.
|
||||||
let (tx_approve, rx_approve) = oneshot::channel();
|
let (tx_approve, rx_approve) = oneshot::channel();
|
||||||
let event_id = sub_id.clone();
|
let event_id = sub_id.clone();
|
||||||
@@ -608,7 +604,7 @@ impl Session {
|
|||||||
}),
|
}),
|
||||||
};
|
};
|
||||||
self.send_event(event).await;
|
self.send_event(event).await;
|
||||||
rx_approve
|
rx_approve.await.unwrap_or_default()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn request_patch_approval(
|
pub async fn request_patch_approval(
|
||||||
@@ -1034,33 +1030,8 @@ impl Session {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Spawn the configured notifier (if any) with the given JSON payload as
|
pub(crate) fn notifier(&self) -> &UserNotifier {
|
||||||
/// the last argument. Failures are logged but otherwise ignored so that
|
&self.notifier
|
||||||
/// notification issues do not interfere with the main workflow.
|
|
||||||
fn maybe_notify(&self, notification: UserNotification) {
|
|
||||||
let Some(notify_command) = &self.notify else {
|
|
||||||
return;
|
|
||||||
};
|
|
||||||
|
|
||||||
if notify_command.is_empty() {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let Ok(json) = serde_json::to_string(¬ification) else {
|
|
||||||
error!("failed to serialise notification payload");
|
|
||||||
return;
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut command = std::process::Command::new(¬ify_command[0]);
|
|
||||||
if notify_command.len() > 1 {
|
|
||||||
command.args(¬ify_command[1..]);
|
|
||||||
}
|
|
||||||
command.arg(json);
|
|
||||||
|
|
||||||
// Fire-and-forget – we do not wait for completion.
|
|
||||||
if let Err(e) = command.spawn() {
|
|
||||||
warn!("failed to spawn notifier '{}': {e}", notify_command[0]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1883,11 +1854,12 @@ async fn run_task(
|
|||||||
last_agent_message = get_last_assistant_message_from_turn(
|
last_agent_message = get_last_assistant_message_from_turn(
|
||||||
&items_to_record_in_conversation_history,
|
&items_to_record_in_conversation_history,
|
||||||
);
|
);
|
||||||
sess.maybe_notify(UserNotification::AgentTurnComplete {
|
sess.notifier()
|
||||||
turn_id: sub_id.clone(),
|
.notify(&UserNotification::AgentTurnComplete {
|
||||||
input_messages: turn_input_messages,
|
turn_id: sub_id.clone(),
|
||||||
last_assistant_message: last_agent_message.clone(),
|
input_messages: turn_input_messages,
|
||||||
});
|
last_assistant_message: last_agent_message.clone(),
|
||||||
|
});
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
@@ -2842,7 +2814,7 @@ async fn handle_container_exec_with_params(
|
|||||||
let sandbox_type = match safety {
|
let sandbox_type = match safety {
|
||||||
SafetyCheck::AutoApprove { sandbox_type } => sandbox_type,
|
SafetyCheck::AutoApprove { sandbox_type } => sandbox_type,
|
||||||
SafetyCheck::AskUser => {
|
SafetyCheck::AskUser => {
|
||||||
let rx_approve = sess
|
let decision = sess
|
||||||
.request_command_approval(
|
.request_command_approval(
|
||||||
sub_id.clone(),
|
sub_id.clone(),
|
||||||
call_id.clone(),
|
call_id.clone(),
|
||||||
@@ -2851,7 +2823,7 @@ async fn handle_container_exec_with_params(
|
|||||||
params.justification.clone(),
|
params.justification.clone(),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
match rx_approve.await.unwrap_or_default() {
|
match decision {
|
||||||
ReviewDecision::Approved => (),
|
ReviewDecision::Approved => (),
|
||||||
ReviewDecision::ApprovedForSession => {
|
ReviewDecision::ApprovedForSession => {
|
||||||
sess.add_approved_command(params.command.clone()).await;
|
sess.add_approved_command(params.command.clone()).await;
|
||||||
@@ -3012,7 +2984,7 @@ async fn handle_sandbox_error(
|
|||||||
sess.notify_background_event(&sub_id, format!("Execution failed: {error}"))
|
sess.notify_background_event(&sub_id, format!("Execution failed: {error}"))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
let rx_approve = sess
|
let decision = sess
|
||||||
.request_command_approval(
|
.request_command_approval(
|
||||||
sub_id.clone(),
|
sub_id.clone(),
|
||||||
call_id.clone(),
|
call_id.clone(),
|
||||||
@@ -3022,7 +2994,7 @@ async fn handle_sandbox_error(
|
|||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
match rx_approve.await.unwrap_or_default() {
|
match decision {
|
||||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
|
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
|
||||||
// Persist this command as pre‑approved for the
|
// Persist this command as pre‑approved for the
|
||||||
// remainder of the session so future
|
// remainder of the session so future
|
||||||
@@ -3642,7 +3614,7 @@ mod tests {
|
|||||||
mcp_connection_manager: McpConnectionManager::default(),
|
mcp_connection_manager: McpConnectionManager::default(),
|
||||||
session_manager: ExecSessionManager::default(),
|
session_manager: ExecSessionManager::default(),
|
||||||
unified_exec_manager: UnifiedExecSessionManager::default(),
|
unified_exec_manager: UnifiedExecSessionManager::default(),
|
||||||
notify: None,
|
notifier: UserNotifier::default(),
|
||||||
rollout: Mutex::new(None),
|
rollout: Mutex::new(None),
|
||||||
state: Mutex::new(State {
|
state: Mutex::new(State {
|
||||||
history: ConversationHistory::new(),
|
history: ConversationHistory::new(),
|
||||||
|
|||||||
@@ -1,4 +1,45 @@
|
|||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
use tracing::error;
|
||||||
|
use tracing::warn;
|
||||||
|
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub(crate) struct UserNotifier {
|
||||||
|
notify_command: Option<Vec<String>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl UserNotifier {
|
||||||
|
pub(crate) fn notify(&self, notification: &UserNotification) {
|
||||||
|
if let Some(notify_command) = &self.notify_command
|
||||||
|
&& !notify_command.is_empty()
|
||||||
|
{
|
||||||
|
self.invoke_notify(notify_command, notification)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn invoke_notify(&self, notify_command: &[String], notification: &UserNotification) {
|
||||||
|
let Ok(json) = serde_json::to_string(¬ification) else {
|
||||||
|
error!("failed to serialise notification payload");
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut command = std::process::Command::new(¬ify_command[0]);
|
||||||
|
if notify_command.len() > 1 {
|
||||||
|
command.args(¬ify_command[1..]);
|
||||||
|
}
|
||||||
|
command.arg(json);
|
||||||
|
|
||||||
|
// Fire-and-forget – we do not wait for completion.
|
||||||
|
if let Err(e) = command.spawn() {
|
||||||
|
warn!("failed to spawn notifier '{}': {e}", notify_command[0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn new(notify: Option<Vec<String>>) -> Self {
|
||||||
|
Self {
|
||||||
|
notify_command: notify,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// User can configure a program that will receive notifications. Each
|
/// User can configure a program that will receive notifications. Each
|
||||||
/// notification is serialized as JSON and passed as an argument to the
|
/// notification is serialized as JSON and passed as an argument to the
|
||||||
|
|||||||
@@ -1,12 +1,13 @@
|
|||||||
[package]
|
[package]
|
||||||
|
edition = "2024"
|
||||||
name = "core_test_support"
|
name = "core_test_support"
|
||||||
version = { workspace = true }
|
version = { workspace = true }
|
||||||
edition = "2024"
|
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
path = "lib.rs"
|
path = "lib.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
anyhow = { workspace = true }
|
||||||
codex-core = { workspace = true }
|
codex-core = { workspace = true }
|
||||||
serde_json = { workspace = true }
|
serde_json = { workspace = true }
|
||||||
tempfile = { workspace = true }
|
tempfile = { workspace = true }
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ use codex_core::config::ConfigOverrides;
|
|||||||
use codex_core::config::ConfigToml;
|
use codex_core::config::ConfigToml;
|
||||||
|
|
||||||
pub mod responses;
|
pub mod responses;
|
||||||
|
pub mod test_codex;
|
||||||
|
|
||||||
/// Returns a default `Config` whose on-disk state is confined to the provided
|
/// Returns a default `Config` whose on-disk state is confined to the provided
|
||||||
/// temporary directory. Using a per-test directory keeps tests hermetic and
|
/// temporary directory. Using a per-test directory keeps tests hermetic and
|
||||||
|
|||||||
75
codex-rs/core/tests/common/test_codex.rs
Normal file
75
codex-rs/core/tests/common/test_codex.rs
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
use std::mem::swap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use codex_core::CodexAuth;
|
||||||
|
use codex_core::CodexConversation;
|
||||||
|
use codex_core::ConversationManager;
|
||||||
|
use codex_core::ModelProviderInfo;
|
||||||
|
use codex_core::NewConversation;
|
||||||
|
use codex_core::built_in_model_providers;
|
||||||
|
use codex_core::config::Config;
|
||||||
|
use codex_core::protocol::SessionConfiguredEvent;
|
||||||
|
use tempfile::TempDir;
|
||||||
|
|
||||||
|
use crate::load_default_config_for_test;
|
||||||
|
|
||||||
|
type ConfigMutator = dyn FnOnce(&mut Config);
|
||||||
|
|
||||||
|
pub struct TestCodexBuilder {
|
||||||
|
config_mutators: Vec<Box<ConfigMutator>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TestCodexBuilder {
|
||||||
|
pub fn with_config<T>(mut self, mutator: T) -> Self
|
||||||
|
where
|
||||||
|
T: FnOnce(&mut Config) + 'static,
|
||||||
|
{
|
||||||
|
self.config_mutators.push(Box::new(mutator));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn build(&mut self, server: &wiremock::MockServer) -> anyhow::Result<TestCodex> {
|
||||||
|
// Build config pointing to the mock server and spawn Codex.
|
||||||
|
let model_provider = ModelProviderInfo {
|
||||||
|
base_url: Some(format!("{}/v1", server.uri())),
|
||||||
|
..built_in_model_providers()["openai"].clone()
|
||||||
|
};
|
||||||
|
let home = TempDir::new()?;
|
||||||
|
let cwd = TempDir::new()?;
|
||||||
|
let mut config = load_default_config_for_test(&home);
|
||||||
|
config.cwd = cwd.path().to_path_buf();
|
||||||
|
config.model_provider = model_provider;
|
||||||
|
let mut mutators = vec![];
|
||||||
|
swap(&mut self.config_mutators, &mut mutators);
|
||||||
|
|
||||||
|
for mutator in mutators {
|
||||||
|
mutator(&mut config)
|
||||||
|
}
|
||||||
|
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
|
||||||
|
let NewConversation {
|
||||||
|
conversation,
|
||||||
|
session_configured,
|
||||||
|
..
|
||||||
|
} = conversation_manager.new_conversation(config).await?;
|
||||||
|
|
||||||
|
Ok(TestCodex {
|
||||||
|
home,
|
||||||
|
cwd,
|
||||||
|
codex: conversation,
|
||||||
|
session_configured,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct TestCodex {
|
||||||
|
pub home: TempDir,
|
||||||
|
pub cwd: TempDir,
|
||||||
|
pub codex: Arc<CodexConversation>,
|
||||||
|
pub session_configured: SessionConfiguredEvent,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn test_codex() -> TestCodexBuilder {
|
||||||
|
TestCodexBuilder {
|
||||||
|
config_mutators: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -15,3 +15,4 @@ mod rollout_list_find;
|
|||||||
mod seatbelt;
|
mod seatbelt;
|
||||||
mod stream_error_allows_next_turn;
|
mod stream_error_allows_next_turn;
|
||||||
mod stream_no_completed;
|
mod stream_no_completed;
|
||||||
|
mod user_notification;
|
||||||
|
|||||||
73
codex-rs/core/tests/suite/user_notification.rs
Normal file
73
codex-rs/core/tests/suite/user_notification.rs
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
#![cfg(not(target_os = "windows"))]
|
||||||
|
|
||||||
|
use std::os::unix::fs::PermissionsExt;
|
||||||
|
|
||||||
|
use codex_core::protocol::EventMsg;
|
||||||
|
use codex_core::protocol::InputItem;
|
||||||
|
use codex_core::protocol::Op;
|
||||||
|
use core_test_support::non_sandbox_test;
|
||||||
|
use core_test_support::responses;
|
||||||
|
use core_test_support::test_codex::TestCodex;
|
||||||
|
use core_test_support::test_codex::test_codex;
|
||||||
|
use core_test_support::wait_for_event;
|
||||||
|
use tempfile::TempDir;
|
||||||
|
use wiremock::matchers::any;
|
||||||
|
|
||||||
|
use responses::ev_assistant_message;
|
||||||
|
use responses::ev_completed;
|
||||||
|
use responses::sse;
|
||||||
|
use responses::start_mock_server;
|
||||||
|
use tokio::time::Duration;
|
||||||
|
use tokio::time::sleep;
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn summarize_context_three_requests_and_instructions() -> anyhow::Result<()> {
|
||||||
|
non_sandbox_test!(result);
|
||||||
|
|
||||||
|
let server = start_mock_server().await;
|
||||||
|
|
||||||
|
let sse1 = sse(vec![ev_assistant_message("m1", "Done"), ev_completed("r1")]);
|
||||||
|
|
||||||
|
responses::mount_sse_once(&server, any(), sse1).await;
|
||||||
|
|
||||||
|
let notify_dir = TempDir::new()?;
|
||||||
|
// write a script to the notify that touches a file next to it
|
||||||
|
let notify_script = notify_dir.path().join("notify.sh");
|
||||||
|
std::fs::write(
|
||||||
|
¬ify_script,
|
||||||
|
r#"#!/bin/bash
|
||||||
|
set -e
|
||||||
|
echo -n "${@: -1}" > $(dirname "${0}")/notify.txt"#,
|
||||||
|
)?;
|
||||||
|
std::fs::set_permissions(¬ify_script, std::fs::Permissions::from_mode(0o755))?;
|
||||||
|
|
||||||
|
let notify_file = notify_dir.path().join("notify.txt");
|
||||||
|
let notify_script_str = notify_script.to_str().unwrap().to_string();
|
||||||
|
|
||||||
|
let TestCodex { codex, .. } = test_codex()
|
||||||
|
.with_config(move |cfg| cfg.notify = Some(vec![notify_script_str]))
|
||||||
|
.build(&server)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// 1) Normal user input – should hit server once.
|
||||||
|
codex
|
||||||
|
.submit(Op::UserInput {
|
||||||
|
items: vec![InputItem::Text {
|
||||||
|
text: "hello world".into(),
|
||||||
|
}],
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||||
|
|
||||||
|
// We fork the notify script, so we need to wait for it to write to the file.
|
||||||
|
for _ in 0..100u32 {
|
||||||
|
if notify_file.exists() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
sleep(Duration::from_millis(100)).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert!(notify_file.exists());
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user