Review Mode (Core) (#3401)
## 📝 Review Mode -- Core
This PR introduces the Core implementation for Review mode:
- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:
```json
{
"findings": [
{
"title": "<≤ 80 chars, imperative>",
"body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
"confidence_score": <float 0.0-1.0>,
"priority": <int 0-3>,
"code_location": {
"absolute_file_path": "<file path>",
"line_range": {"start": <int>, "end": <int>}
}
}
],
"overall_correctness": "patch is correct" | "patch is incorrect",
"overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
"overall_confidence_score": <float 0.0-1.0>
}
```
## Questions
### Why separate out its own message history?
We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.
We also want to make sure the review thread doesn't leak into the parent
thread.
### Why do this as a mode, vs. sub-agents?
1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
This commit is contained in:
@@ -9,6 +9,7 @@ mod fork_conversation;
|
||||
mod live_cli;
|
||||
mod model_overrides;
|
||||
mod prompt_caching;
|
||||
mod review;
|
||||
mod seatbelt;
|
||||
mod stream_error_allows_next_turn;
|
||||
mod stream_no_completed;
|
||||
|
||||
542
codex-rs/core/tests/suite/review.rs
Normal file
542
codex-rs/core/tests/suite/review.rs
Normal file
@@ -0,0 +1,542 @@
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::InputItem;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::ReviewCodeLocation;
|
||||
use codex_core::protocol::ReviewFinding;
|
||||
use codex_core::protocol::ReviewLineRange;
|
||||
use codex_core::protocol::ReviewOutputEvent;
|
||||
use codex_core::protocol::ReviewRequest;
|
||||
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::load_sse_fixture_with_id_from_str;
|
||||
use core_test_support::wait_for_event;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use tempfile::TempDir;
|
||||
use tokio::io::AsyncWriteExt as _;
|
||||
use uuid::Uuid;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
/// Verify that submitting `Op::Review` spawns a child task and emits
|
||||
/// EnteredReviewMode -> ExitedReviewMode(None) -> TaskComplete
|
||||
/// in that order when the model returns a structured review JSON payload.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn review_op_emits_lifecycle_and_review_output() {
|
||||
// Skip under Codex sandbox network restrictions.
|
||||
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Start mock Responses API server. Return a single assistant message whose
|
||||
// text is a JSON-encoded ReviewOutputEvent.
|
||||
let review_json = serde_json::json!({
|
||||
"findings": [
|
||||
{
|
||||
"title": "Prefer Stylize helpers",
|
||||
"body": "Use .dim()/.bold() chaining instead of manual Style where possible.",
|
||||
"confidence_score": 0.9,
|
||||
"priority": 1,
|
||||
"code_location": {
|
||||
"absolute_file_path": "/tmp/file.rs",
|
||||
"line_range": {"start": 10, "end": 20}
|
||||
}
|
||||
}
|
||||
],
|
||||
"overall_correctness": "good",
|
||||
"overall_explanation": "All good with some improvements suggested.",
|
||||
"overall_confidence_score": 0.8
|
||||
})
|
||||
.to_string();
|
||||
let sse_template = r#"[
|
||||
{"type":"response.output_item.done", "item":{
|
||||
"type":"message", "role":"assistant",
|
||||
"content":[{"type":"output_text","text":__REVIEW__}]
|
||||
}},
|
||||
{"type":"response.completed", "response": {"id": "__ID__"}}
|
||||
]"#;
|
||||
let review_json_escaped = serde_json::to_string(&review_json).unwrap();
|
||||
let sse_raw = sse_template.replace("__REVIEW__", &review_json_escaped);
|
||||
let server = start_responses_server_with_sse(&sse_raw, 1).await;
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let codex = new_conversation_for_server(&server, &codex_home, |_| {}).await;
|
||||
|
||||
// Submit review request.
|
||||
codex
|
||||
.submit(Op::Review {
|
||||
review_request: ReviewRequest {
|
||||
prompt: "Please review my changes".to_string(),
|
||||
user_facing_hint: "my changes".to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Verify lifecycle: Entered -> Exited(Some(review)) -> TaskComplete.
|
||||
let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode(_))).await;
|
||||
let closed = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExitedReviewMode(_))).await;
|
||||
let review = match closed {
|
||||
EventMsg::ExitedReviewMode(Some(r)) => r,
|
||||
other => panic!("expected ExitedReviewMode(Some(..)), got {other:?}"),
|
||||
};
|
||||
|
||||
// Deep compare full structure using PartialEq (floats are f32 on both sides).
|
||||
let expected = ReviewOutputEvent {
|
||||
findings: vec![ReviewFinding {
|
||||
title: "Prefer Stylize helpers".to_string(),
|
||||
body: "Use .dim()/.bold() chaining instead of manual Style where possible.".to_string(),
|
||||
confidence_score: 0.9,
|
||||
priority: 1,
|
||||
code_location: ReviewCodeLocation {
|
||||
absolute_file_path: PathBuf::from("/tmp/file.rs"),
|
||||
line_range: ReviewLineRange { start: 10, end: 20 },
|
||||
},
|
||||
}],
|
||||
overall_correctness: "good".to_string(),
|
||||
overall_explanation: "All good with some improvements suggested.".to_string(),
|
||||
overall_confidence_score: 0.8,
|
||||
};
|
||||
assert_eq!(expected, review);
|
||||
let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
/// When the model returns plain text that is not JSON, ensure the child
|
||||
/// lifecycle still occurs and the plain text is surfaced via
|
||||
/// ExitedReviewMode(Some(..)) as the overall_explanation.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn review_op_with_plain_text_emits_review_fallback() {
|
||||
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let sse_raw = r#"[
|
||||
{"type":"response.output_item.done", "item":{
|
||||
"type":"message", "role":"assistant",
|
||||
"content":[{"type":"output_text","text":"just plain text"}]
|
||||
}},
|
||||
{"type":"response.completed", "response": {"id": "__ID__"}}
|
||||
]"#;
|
||||
let server = start_responses_server_with_sse(sse_raw, 1).await;
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let codex = new_conversation_for_server(&server, &codex_home, |_| {}).await;
|
||||
|
||||
codex
|
||||
.submit(Op::Review {
|
||||
review_request: ReviewRequest {
|
||||
prompt: "Plain text review".to_string(),
|
||||
user_facing_hint: "plain text review".to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode(_))).await;
|
||||
let closed = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExitedReviewMode(_))).await;
|
||||
let review = match closed {
|
||||
EventMsg::ExitedReviewMode(Some(r)) => r,
|
||||
other => panic!("expected ExitedReviewMode(Some(..)), got {other:?}"),
|
||||
};
|
||||
|
||||
// Expect a structured fallback carrying the plain text.
|
||||
let expected = ReviewOutputEvent {
|
||||
overall_explanation: "just plain text".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(expected, review);
|
||||
let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
/// When the model returns structured JSON in a review, ensure no AgentMessage
|
||||
/// is emitted; the UI consumes the structured result via ExitedReviewMode.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn review_does_not_emit_agent_message_on_structured_output() {
|
||||
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let review_json = serde_json::json!({
|
||||
"findings": [
|
||||
{
|
||||
"title": "Example",
|
||||
"body": "Structured review output.",
|
||||
"confidence_score": 0.5,
|
||||
"priority": 1,
|
||||
"code_location": {
|
||||
"absolute_file_path": "/tmp/file.rs",
|
||||
"line_range": {"start": 1, "end": 2}
|
||||
}
|
||||
}
|
||||
],
|
||||
"overall_correctness": "ok",
|
||||
"overall_explanation": "ok",
|
||||
"overall_confidence_score": 0.5
|
||||
})
|
||||
.to_string();
|
||||
let sse_template = r#"[
|
||||
{"type":"response.output_item.done", "item":{
|
||||
"type":"message", "role":"assistant",
|
||||
"content":[{"type":"output_text","text":__REVIEW__}]
|
||||
}},
|
||||
{"type":"response.completed", "response": {"id": "__ID__"}}
|
||||
]"#;
|
||||
let review_json_escaped = serde_json::to_string(&review_json).unwrap();
|
||||
let sse_raw = sse_template.replace("__REVIEW__", &review_json_escaped);
|
||||
let server = start_responses_server_with_sse(&sse_raw, 1).await;
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let codex = new_conversation_for_server(&server, &codex_home, |_| {}).await;
|
||||
|
||||
codex
|
||||
.submit(Op::Review {
|
||||
review_request: ReviewRequest {
|
||||
prompt: "check structured".to_string(),
|
||||
user_facing_hint: "check structured".to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Drain events until TaskComplete; ensure none are AgentMessage.
|
||||
use tokio::time::Duration;
|
||||
use tokio::time::timeout;
|
||||
let mut saw_entered = false;
|
||||
let mut saw_exited = false;
|
||||
loop {
|
||||
let ev = timeout(Duration::from_secs(5), codex.next_event())
|
||||
.await
|
||||
.expect("timeout waiting for event")
|
||||
.expect("stream ended unexpectedly");
|
||||
match ev.msg {
|
||||
EventMsg::TaskComplete(_) => break,
|
||||
EventMsg::AgentMessage(_) => {
|
||||
panic!("unexpected AgentMessage during review with structured output")
|
||||
}
|
||||
EventMsg::EnteredReviewMode(_) => saw_entered = true,
|
||||
EventMsg::ExitedReviewMode(_) => saw_exited = true,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
assert!(saw_entered && saw_exited, "missing review lifecycle events");
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
/// Ensure that when a custom `review_model` is set in the config, the review
|
||||
/// request uses that model (and not the main chat model).
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn review_uses_custom_review_model_from_config() {
|
||||
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Minimal stream: just a completed event
|
||||
let sse_raw = r#"[
|
||||
{"type":"response.completed", "response": {"id": "__ID__"}}
|
||||
]"#;
|
||||
let server = start_responses_server_with_sse(sse_raw, 1).await;
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
// Choose a review model different from the main model; ensure it is used.
|
||||
let codex = new_conversation_for_server(&server, &codex_home, |cfg| {
|
||||
cfg.model = "gpt-4.1".to_string();
|
||||
cfg.review_model = "gpt-5".to_string();
|
||||
})
|
||||
.await;
|
||||
|
||||
codex
|
||||
.submit(Op::Review {
|
||||
review_request: ReviewRequest {
|
||||
prompt: "use custom model".to_string(),
|
||||
user_facing_hint: "use custom model".to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Wait for completion
|
||||
let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode(_))).await;
|
||||
let _closed = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExitedReviewMode(None))).await;
|
||||
let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// Assert the request body model equals the configured review model
|
||||
let request = &server.received_requests().await.unwrap()[0];
|
||||
let body = request.body_json::<serde_json::Value>().unwrap();
|
||||
assert_eq!(body["model"].as_str().unwrap(), "gpt-5");
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
/// When a review session begins, it must not prepend prior chat history from
|
||||
/// the parent session. The request `input` should contain only the review
|
||||
/// prompt from the user.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn review_input_isolated_from_parent_history() {
|
||||
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Mock server for the single review request
|
||||
let sse_raw = r#"[
|
||||
{"type":"response.completed", "response": {"id": "__ID__"}}
|
||||
]"#;
|
||||
let server = start_responses_server_with_sse(sse_raw, 1).await;
|
||||
|
||||
// Seed a parent session history via resume file with both user + assistant items.
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
let session_file = codex_home.path().join("resume.jsonl");
|
||||
{
|
||||
let mut f = tokio::fs::File::create(&session_file).await.unwrap();
|
||||
let convo_id = Uuid::new_v4();
|
||||
// Proper session_meta line (enveloped) with a conversation id
|
||||
let meta_line = serde_json::json!({
|
||||
"timestamp": "2024-01-01T00:00:00.000Z",
|
||||
"type": "session_meta",
|
||||
"payload": {
|
||||
"id": convo_id,
|
||||
"timestamp": "2024-01-01T00:00:00Z",
|
||||
"instructions": null,
|
||||
"cwd": ".",
|
||||
"originator": "test_originator",
|
||||
"cli_version": "test_version"
|
||||
}
|
||||
});
|
||||
f.write_all(format!("{meta_line}\n").as_bytes())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Prior user message (enveloped response_item)
|
||||
let user = codex_protocol::models::ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![codex_protocol::models::ContentItem::InputText {
|
||||
text: "parent: earlier user message".to_string(),
|
||||
}],
|
||||
};
|
||||
let user_json = serde_json::to_value(&user).unwrap();
|
||||
let user_line = serde_json::json!({
|
||||
"timestamp": "2024-01-01T00:00:01.000Z",
|
||||
"type": "response_item",
|
||||
"payload": user_json
|
||||
});
|
||||
f.write_all(format!("{user_line}\n").as_bytes())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Prior assistant message (enveloped response_item)
|
||||
let assistant = codex_protocol::models::ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![codex_protocol::models::ContentItem::OutputText {
|
||||
text: "parent: assistant reply".to_string(),
|
||||
}],
|
||||
};
|
||||
let assistant_json = serde_json::to_value(&assistant).unwrap();
|
||||
let assistant_line = serde_json::json!({
|
||||
"timestamp": "2024-01-01T00:00:02.000Z",
|
||||
"type": "response_item",
|
||||
"payload": assistant_json
|
||||
});
|
||||
f.write_all(format!("{assistant_line}\n").as_bytes())
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
config.experimental_resume = Some(session_file);
|
||||
|
||||
let codex = new_conversation_for_server(&server, &codex_home, |cfg| {
|
||||
// apply resume file
|
||||
cfg.experimental_resume = config.experimental_resume.clone();
|
||||
})
|
||||
.await;
|
||||
|
||||
// Submit review request; it must start fresh (no parent history in `input`).
|
||||
let review_prompt = "Please review only this".to_string();
|
||||
codex
|
||||
.submit(Op::Review {
|
||||
review_request: ReviewRequest {
|
||||
prompt: review_prompt.clone(),
|
||||
user_facing_hint: review_prompt.clone(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode(_))).await;
|
||||
let _closed = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExitedReviewMode(None))).await;
|
||||
let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// Assert the request `input` contains only the single review user message.
|
||||
let request = &server.received_requests().await.unwrap()[0];
|
||||
let body = request.body_json::<serde_json::Value>().unwrap();
|
||||
let expected_input = serde_json::json!([
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [{"type": "input_text", "text": review_prompt}]
|
||||
}
|
||||
]);
|
||||
assert_eq!(body["input"], expected_input);
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
/// After a review thread finishes, its conversation should not leak into the
|
||||
/// parent session. A subsequent parent turn must not include any review
|
||||
/// messages in its request `input`.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn review_history_does_not_leak_into_parent_session() {
|
||||
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Respond to both the review request and the subsequent parent request.
|
||||
let sse_raw = r#"[
|
||||
{"type":"response.output_item.done", "item":{
|
||||
"type":"message", "role":"assistant",
|
||||
"content":[{"type":"output_text","text":"review assistant output"}]
|
||||
}},
|
||||
{"type":"response.completed", "response": {"id": "__ID__"}}
|
||||
]"#;
|
||||
let server = start_responses_server_with_sse(sse_raw, 2).await;
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let codex = new_conversation_for_server(&server, &codex_home, |_| {}).await;
|
||||
|
||||
// 1) Run a review turn that produces an assistant message (isolated in child).
|
||||
codex
|
||||
.submit(Op::Review {
|
||||
review_request: ReviewRequest {
|
||||
prompt: "Start a review".to_string(),
|
||||
user_facing_hint: "Start a review".to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode(_))).await;
|
||||
let _closed = wait_for_event(&codex, |ev| {
|
||||
matches!(ev, EventMsg::ExitedReviewMode(Some(_)))
|
||||
})
|
||||
.await;
|
||||
let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// 2) Continue in the parent session; request input must not include any review items.
|
||||
let followup = "back to parent".to_string();
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![InputItem::Text {
|
||||
text: followup.clone(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// Inspect the second request (parent turn) input contents.
|
||||
// Parent turns include session initial messages (user_instructions, environment_context).
|
||||
// Critically, no messages from the review thread should appear.
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
assert_eq!(requests.len(), 2);
|
||||
let body = requests[1].body_json::<serde_json::Value>().unwrap();
|
||||
let input = body["input"].as_array().expect("input array");
|
||||
|
||||
// Must include the followup as the last item for this turn
|
||||
let last = input.last().expect("at least one item in input");
|
||||
assert_eq!(last["role"].as_str().unwrap(), "user");
|
||||
let last_text = last["content"][0]["text"].as_str().unwrap();
|
||||
assert_eq!(last_text, followup);
|
||||
|
||||
// Ensure no review-thread content leaked into the parent request
|
||||
let contains_review_prompt = input
|
||||
.iter()
|
||||
.any(|msg| msg["content"][0]["text"].as_str().unwrap_or_default() == "Start a review");
|
||||
let contains_review_assistant = input.iter().any(|msg| {
|
||||
msg["content"][0]["text"].as_str().unwrap_or_default() == "review assistant output"
|
||||
});
|
||||
assert!(
|
||||
!contains_review_prompt,
|
||||
"review prompt leaked into parent turn input"
|
||||
);
|
||||
assert!(
|
||||
!contains_review_assistant,
|
||||
"review assistant output leaked into parent turn input"
|
||||
);
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
/// Start a mock Responses API server and mount the given SSE stream body.
|
||||
async fn start_responses_server_with_sse(sse_raw: &str, expected_requests: usize) -> MockServer {
|
||||
let server = MockServer::start().await;
|
||||
let sse = load_sse_fixture_with_id_from_str(sse_raw, &Uuid::new_v4().to_string());
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(
|
||||
ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(sse.clone(), "text/event-stream"),
|
||||
)
|
||||
.expect(expected_requests as u64)
|
||||
.mount(&server)
|
||||
.await;
|
||||
server
|
||||
}
|
||||
|
||||
/// Create a conversation configured to talk to the provided mock server.
|
||||
#[expect(clippy::expect_used)]
|
||||
async fn new_conversation_for_server<F>(
|
||||
server: &MockServer,
|
||||
codex_home: &TempDir,
|
||||
mutator: F,
|
||||
) -> Arc<CodexConversation>
|
||||
where
|
||||
F: FnOnce(&mut Config),
|
||||
{
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
let mut config = load_default_config_for_test(codex_home);
|
||||
config.model_provider = model_provider;
|
||||
mutator(&mut config);
|
||||
let conversation_manager =
|
||||
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
|
||||
conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.expect("create conversation")
|
||||
.conversation
|
||||
}
|
||||
Reference in New Issue
Block a user