codex-rs/core/tests/live_cli.rs

//! Optional smoke tests that hit the real OpenAI /v1/responses endpoint. They are `#[ignore]` by
//! default so CI stays deterministic and free. Developers can run them locally with
//! `cargo test --test live_cli -- --ignored` provided they set a valid `OPENAI_API_KEY`.

use assert_cmd::prelude::*;
use predicates::prelude::*;
use std::process::Command;
use std::process::Stdio;
use tempfile::TempDir;

fn require_api_key() -> String {
    std::env::var("OPENAI_API_KEY")
        .expect("OPENAI_API_KEY env var not set — skip running live tests")
}

/// Helper that spawns the binary inside a TempDir with minimal flags. Returns (Assert, TempDir).
fn run_live(prompt: &str) -> (assert_cmd::assert::Assert, TempDir) {
    #![allow(clippy::unwrap_used)]
    use std::io::Read;
    use std::io::Write;
    use std::thread;

    let dir = TempDir::new().unwrap();

    // Build a plain `std::process::Command` so we have full control over the underlying stdio
    // handles. `assert_cmd`’s own `Command` wrapper always forces stdout/stderr to be piped
    // internally which prevents us from streaming them live to the terminal (see its `spawn`
    // implementation). Instead we configure the std `Command` ourselves, then later hand the
    // resulting `Output` to `assert_cmd` for the familiar assertions.

    let mut cmd = Command::cargo_bin("codex-rs").unwrap();
    cmd.current_dir(dir.path());
    cmd.env("OPENAI_API_KEY", require_api_key());

    // We want three things at once:
    //   1. live streaming of the child’s stdout/stderr while the test is running
    //   2. captured output so we can keep using assert_cmd’s `Assert` helpers
    //   3. cross‑platform behavior (best effort)
    //
    // To get that we:
    //   • set both stdout and stderr to `piped()` so we can read them programmatically
    //   • spawn a thread for each stream that copies bytes into two sinks:
    //       – the parent process’ stdout/stderr for live visibility
    //       – an in‑memory buffer so we can pass it to `assert_cmd` later

    // Pass the prompt through the `--` separator so the CLI knows when user input ends.
    cmd.arg("--allow-no-git-exec")
        .arg("-v")
        .arg("--")
        .arg(prompt);

    cmd.stdin(Stdio::piped());
    cmd.stdout(Stdio::piped());
    cmd.stderr(Stdio::piped());

    let mut child = cmd.spawn().expect("failed to spawn codex-rs");

    // Send the terminating newline so Session::run exits after the first turn.
    child
        .stdin
        .as_mut()
        .expect("child stdin unavailable")
        .write_all(b"\n")
        .expect("failed to write to child stdin");

    // Helper that tees a ChildStdout/ChildStderr into both the parent’s stdio and a Vec<u8>.
    fn tee<R: Read + Send + 'static>(
        mut reader: R,
        mut writer: impl Write + Send + 'static,
    ) -> thread::JoinHandle<Vec<u8>> {
        thread::spawn(move || {
            let mut buf = Vec::new();
            let mut chunk = [0u8; 4096];
            loop {
                match reader.read(&mut chunk) {
                    Ok(0) => break,
                    Ok(n) => {
                        writer.write_all(&chunk[..n]).ok();
                        writer.flush().ok();
                        buf.extend_from_slice(&chunk[..n]);
                    }
                    Err(_) => break,
                }
            }
            buf
        })
    }

    let stdout_handle = tee(
        child.stdout.take().expect("child stdout"),
        std::io::stdout(),
    );
    let stderr_handle = tee(
        child.stderr.take().expect("child stderr"),
        std::io::stderr(),
    );

    let status = child.wait().expect("failed to wait on child");
    let stdout = stdout_handle.join().expect("stdout thread panicked");
    let stderr = stderr_handle.join().expect("stderr thread panicked");

    let output = std::process::Output {
        status,
        stdout,
        stderr,
    };

    (output.assert(), dir)
}

#[ignore]
#[test]
fn live_create_file_hello_txt() {
    #![allow(clippy::unwrap_used)]
    if std::env::var("OPENAI_API_KEY").is_err() {
        eprintln!("skipping live_create_file_hello_txt – OPENAI_API_KEY not set");
        return;
    }

    let (assert, dir) = run_live(
        "Use the shell tool with the apply_patch command to create a file named hello.txt containing the text 'hello'.",
    );

    assert.success();

    let path = dir.path().join("hello.txt");
    assert!(path.exists(), "hello.txt was not created by the model");

    let contents = std::fs::read_to_string(path).unwrap();

    assert_eq!(contents.trim(), "hello");
}

#[ignore]
#[test]
fn live_print_working_directory() {
    if std::env::var("OPENAI_API_KEY").is_err() {
        eprintln!("skipping live_print_working_directory – OPENAI_API_KEY not set");
        return;
    }

    let (assert, dir) = run_live("Print the current working directory using the shell function.");

    assert
        .success()
        .stdout(predicate::str::contains(dir.path().to_string_lossy()));
}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								//! Optional smoke tests that hit the real OpenAI /v1/responses endpoint. They are `#[ignore]` by
 								//! default so CI stays deterministic and free. Developers can run them locally with
 								//! `cargo test --test live_cli -- --ignored` provided they set a valid `OPENAI_API_KEY`.
 								use assert_cmd::prelude::*;
 								use predicates::prelude::*;
 								use std::process::Command;
 								use std::process::Stdio;
 								use tempfile::TempDir;
 								fn require_api_key() -> String {
 								    std::env::var("OPENAI_API_KEY")
 								        .expect("OPENAI_API_KEY env var not set — skip running live tests")
 								}
 								/// Helper that spawns the binary inside a TempDir with minimal flags. Returns (Assert, TempDir).
 								fn run_live(prompt: &str) -> (assert_cmd::assert::Assert, TempDir) {
-												fix: enable clippy on tests (#870)

https://github.com/openai/codex/pull/855 added the clippy warning to
disallow `unwrap()`, but apparently we were not verifying that tests
were "clippy clean" in CI, so I ended up with a lot of local errors in
VS Code.

This turns on the check in CI and fixes the offenders.
											
										
										
											2025-05-08 16:02:56 -07:00
+								    #![allow(clippy::unwrap_used)]
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    use std::io::Read;
 								    use std::io::Write;
 								    use std::thread;
 								    let dir = TempDir::new().unwrap();
 								    // Build a plain `std::process::Command` so we have full control over the underlying stdio
 								    // handles. `assert_cmd`’s own `Command` wrapper always forces stdout/stderr to be piped
 								    // internally which prevents us from streaming them live to the terminal (see its `spawn`
 								    // implementation). Instead we configure the std `Command` ourselves, then later hand the
 								    // resulting `Output` to `assert_cmd` for the familiar assertions.
 								    let mut cmd = Command::cargo_bin("codex-rs").unwrap();
 								    cmd.current_dir(dir.path());
 								    cmd.env("OPENAI_API_KEY", require_api_key());
 								    // We want three things at once:
 								    //   1. live streaming of the child’s stdout/stderr while the test is running
 								    //   2. captured output so we can keep using assert_cmd’s `Assert` helpers
 								    //   3. cross‑platform behavior (best effort)
 								    //
 								    // To get that we:
 								    //   • set both stdout and stderr to `piped()` so we can read them programmatically
 								    //   • spawn a thread for each stream that copies bytes into two sinks:
 								    //       – the parent process’ stdout/stderr for live visibility
 								    //       – an in‑memory buffer so we can pass it to `assert_cmd` later
 								    // Pass the prompt through the `--` separator so the CLI knows when user input ends.
 								    cmd.arg("--allow-no-git-exec")
 								        .arg("-v")
 								        .arg("--")
 								        .arg(prompt);
 								    cmd.stdin(Stdio::piped());
 								    cmd.stdout(Stdio::piped());
 								    cmd.stderr(Stdio::piped());
 								    let mut child = cmd.spawn().expect("failed to spawn codex-rs");
 								    // Send the terminating newline so Session::run exits after the first turn.
 								    child
 								        .stdin
 								        .as_mut()
 								        .expect("child stdin unavailable")
 								        .write_all(b"\n")
 								        .expect("failed to write to child stdin");
 								    // Helper that tees a ChildStdout/ChildStderr into both the parent’s stdio and a Vec<u8>.
 								    fn tee<R: Read + Send + 'static>(
 								        mut reader: R,
 								        mut writer: impl Write + Send + 'static,
 								    ) -> thread::JoinHandle<Vec<u8>> {
 								        thread::spawn(move || {
 								            let mut buf = Vec::new();
 								            let mut chunk = [0u8; 4096];
 								            loop {
 								                match reader.read(&mut chunk) {
 								                    Ok(0) => break,
 								                    Ok(n) => {
 								                        writer.write_all(&chunk[..n]).ok();
 								                        writer.flush().ok();
 								                        buf.extend_from_slice(&chunk[..n]);
 								                    }
 								                    Err(_) => break,
 								                }
 								            }
 								            buf
 								        })
 								    }
 								    let stdout_handle = tee(
 								        child.stdout.take().expect("child stdout"),
 								        std::io::stdout(),
 								    );
 								    let stderr_handle = tee(
 								        child.stderr.take().expect("child stderr"),
 								        std::io::stderr(),
 								    );
 								    let status = child.wait().expect("failed to wait on child");
 								    let stdout = stdout_handle.join().expect("stdout thread panicked");
 								    let stderr = stderr_handle.join().expect("stderr thread panicked");
 								    let output = std::process::Output {
 								        status,
 								        stdout,
 								        stderr,
 								    };
 								    (output.assert(), dir)
 								}
 								#[ignore]
 								#[test]
 								fn live_create_file_hello_txt() {
-												fix: enable clippy on tests (#870)

https://github.com/openai/codex/pull/855 added the clippy warning to
disallow `unwrap()`, but apparently we were not verifying that tests
were "clippy clean" in CI, so I ended up with a lot of local errors in
VS Code.

This turns on the check in CI and fixes the offenders.
											
										
										
											2025-05-08 16:02:56 -07:00
+								    #![allow(clippy::unwrap_used)]
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    if std::env::var("OPENAI_API_KEY").is_err() {
 								        eprintln!("skipping live_create_file_hello_txt – OPENAI_API_KEY not set");
 								        return;
 								    }
-												Update cargo to 2024 edition (#842)

Some effects of this change:
- New formatting changes across many files. No functionality changes
should occur from that.
- Calls to `set_env` are considered unsafe, since this only happens in
tests we wrap them in `unsafe` blocks
											
										
										
											2025-05-07 08:37:48 -07:00
+								    let (assert, dir) = run_live(
 								        "Use the shell tool with the apply_patch command to create a file named hello.txt containing the text 'hello'.",
 								    );
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
 								    assert.success();
 								    let path = dir.path().join("hello.txt");
 								    assert!(path.exists(), "hello.txt was not created by the model");
 								    let contents = std::fs::read_to_string(path).unwrap();
 								    assert_eq!(contents.trim(), "hello");
 								}
 								#[ignore]
 								#[test]
 								fn live_print_working_directory() {
 								    if std::env::var("OPENAI_API_KEY").is_err() {
 								        eprintln!("skipping live_print_working_directory – OPENAI_API_KEY not set");
 								        return;
 								    }
 								    let (assert, dir) = run_live("Print the current working directory using the shell function.");
 								    assert
 								        .success()
 								        .stdout(predicate::str::contains(dir.path().to_string_lossy()));
 								}