fix: agent instructions were not being included when ~/.codex/instructions.md was empty (#908)

I had seen issues where `codex-rs` would not always write files without me pressuring it to do so, and between that and the report of https://github.com/openai/codex/issues/900, I decided to look into this further. I found two serious issues with agent instructions: (1) We were only sending agent instructions on the first turn, but looking at the TypeScript code, we should be sending them on every turn. (2) There was a serious issue where the agent instructions were frequently lost: * The TypeScript CLI appears to keep writing `~/.codex/instructions.md`: 55142e3e6c/codex-cli/src/utils/config.ts (L586) * If `instructions.md` is present, the Rust CLI uses the contents of it INSTEAD OF the default prompt, even if `instructions.md` is empty: 55142e3e6c/codex-rs/core/src/config.rs (L202-L203) The combination of these two things means that I have been using `codex-rs` without these key instructions: https://github.com/openai/codex/blob/main/codex-rs/core/prompt.md Looking at the TypeScript code, it appears we should be concatenating these three items every time (if they exist): * `prompt.md` * `~/.codex/instructions.md` * nearest `AGENTS.md` This PR fixes things so that: * `Config.instructions` is `None` if `instructions.md` is empty * `Payload.instructions` is now `&'a str` instead of `Option<&'a String>` because we should always have _something_ to send * `Prompt` now has a `get_full_instructions()` helper that returns a `Cow<str>` that will always include the agent instructions first.
2025-05-12 17:24:44 -07:00
parent 55142e3e6c
commit 61b881d4e5
4 changed files with 34 additions and 17 deletions
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -38,9 +38,8 @@ pub(crate) async fn stream_chat_completions(
    // Build messages array
    let mut messages = Vec::<serde_json::Value>::new();

-    if let Some(instr) = &prompt.instructions {
-        messages.push(json!({"role": "system", "content": instr}));
-    }
+    let full_instructions = prompt.get_full_instructions();
+    messages.push(json!({"role": "system", "content": full_instructions}));

    for item in &prompt.input {
        if let ResponseItem::Message { role, content } = item {
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -166,9 +166,10 @@ impl ModelClient {

        debug!("tools_json: {}", serde_json::to_string_pretty(&tools_json)?);

+        let full_instructions = prompt.get_full_instructions();
        let payload = Payload {
            model: &self.model,
-            instructions: prompt.instructions.as_ref(),
+            instructions: &full_instructions,
            input: &prompt.input,
            tools: &tools_json,
            tool_choice: "auto",
--- a/codex-rs/core/src/client_common.rs
+++ b/codex-rs/core/src/client_common.rs
@@ -2,12 +2,17 @@ use crate::error::Result;
 use crate::models::ResponseItem;
 use futures::Stream;
 use serde::Serialize;
+use std::borrow::Cow;
 use std::collections::HashMap;
 use std::pin::Pin;
 use std::task::Context;
 use std::task::Poll;
 use tokio::sync::mpsc;

+/// The `instructions` field in the payload sent to a model should always start
+/// with this content.
+const BASE_INSTRUCTIONS: &str = include_str!("../prompt.md");
+
 /// API request payload for a single model turn.
 #[derive(Default, Debug, Clone)]
 pub struct Prompt {
@@ -15,7 +20,8 @@ pub struct Prompt {
    pub input: Vec<ResponseItem>,
    /// Optional previous response ID (when storage is enabled).
    pub prev_id: Option<String>,
-    /// Optional initial instructions (only sent on first turn).
+    /// Optional instructions from the user to amend to the built-in agent
+    /// instructions.
    pub instructions: Option<String>,
    /// Whether to store response on server side (disable_response_storage = !store).
    pub store: bool,
@@ -26,6 +32,18 @@ pub struct Prompt {
    pub extra_tools: HashMap<String, mcp_types::Tool>,
 }

+impl Prompt {
+    pub(crate) fn get_full_instructions(&self) -> Cow<str> {
+        match &self.instructions {
+            Some(instructions) => {
+                let instructions = format!("{BASE_INSTRUCTIONS}\n{instructions}");
+                Cow::Owned(instructions)
+            }
+            None => Cow::Borrowed(BASE_INSTRUCTIONS),
+        }
+    }
+}
+
 #[derive(Debug)]
 pub enum ResponseEvent {
    OutputItemDone(ResponseItem),
@@ -54,8 +72,7 @@ pub(crate) enum Summary {
 #[derive(Debug, Serialize)]
 pub(crate) struct Payload<'a> {
    pub(crate) model: &'a str,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub(crate) instructions: Option<&'a String>,
+    pub(crate) instructions: &'a str,
    // TODO(mbolin): ResponseItem::Other should not be serialized. Currently,
    // we code defensively to avoid this case, but perhaps we should use a
    // separate enum for serialization.
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -10,11 +10,6 @@ use serde::Deserialize;
 use std::collections::HashMap;
 use std::path::PathBuf;

-/// Embedded fallback instructions that mirror the TypeScript CLI’s default
-/// system prompt. These are compiled into the binary so a clean install behaves
-/// correctly even if the user has not created `~/.codex/instructions.md`.
-const EMBEDDED_INSTRUCTIONS: &str = include_str!("../prompt.md");
-
 /// Maximum number of bytes of the documentation that will be embedded. Larger
 /// files are *silently truncated* to this size so we do not take up too much of
 /// the context window.
@@ -42,7 +37,7 @@ pub struct Config {
    /// who have opted into Zero Data Retention (ZDR).
    pub disable_response_storage: bool,

-    /// System instructions.
+    /// User-provided instructions from instructions.md.
    pub instructions: Option<String>,

    /// Optional external notifier command. When set, Codex will spawn this
@@ -198,9 +193,7 @@ impl Config {
        cfg: ConfigToml,
        overrides: ConfigOverrides,
    ) -> std::io::Result<Self> {
-        // Instructions: user-provided instructions.md > embedded default.
-        let instructions =
-            Self::load_instructions().or_else(|| Some(EMBEDDED_INSTRUCTIONS.to_string()));
+        let instructions = Self::load_instructions();

        // Destructure ConfigOverrides fully to ensure all overrides are applied.
        let ConfigOverrides {
@@ -289,7 +282,14 @@ impl Config {
    fn load_instructions() -> Option<String> {
        let mut p = codex_dir().ok()?;
        p.push("instructions.md");
-        std::fs::read_to_string(&p).ok()
+        std::fs::read_to_string(&p).ok().and_then(|s| {
+            let s = s.trim();
+            if s.is_empty() {
+                None
+            } else {
+                Some(s.to_string())
+            }
+        })
    }

    /// Meant to be used exclusively for tests: `load_with_overrides()` should