Record Git metadata to rollout (#1598)

# Summary

- Writing effective evals for codex sessions requires context of the
overall repository state at the moment the session began
- This change adds this metadata (git repository, branch, commit hash)
to the top of the rollout of the session (if available - if not it
doesn't add anything)
- Currently, this is only effective on a clean working tree, as we can't
track uncommitted/untracked changes with the current metadata set.
Ideally in the future we may want to track unclean changes somehow, or
perhaps prompt the user to stash or commit them.

# Testing
- Added unit tests
- `cargo test && cargo clippy --tests && cargo fmt -- --config
imports_granularity=Item`

### Resulting Rollout
<img width="1243" height="127" alt="Screenshot 2025-07-17 at 1 50 00 PM"
src="https://github.com/user-attachments/assets/68108941-f015-45b2-985c-ea315ce05415"
/>
This commit is contained in:
vishnu-oai
2025-07-24 19:35:28 +01:00
committed by GitHub
parent d2be0720b5
commit 2437a8d17a
5 changed files with 475 additions and 13 deletions

View File

@@ -594,7 +594,7 @@ async fn submission_loop(
let mut restored_items: Option<Vec<ResponseItem>> = None;
let rollout_recorder: Option<RolloutRecorder> =
if let Some(path) = resume_path.as_ref() {
match RolloutRecorder::resume(path).await {
match RolloutRecorder::resume(path, cwd.clone()).await {
Ok((rec, saved)) => {
session_id = saved.session_id;
if !saved.items.is_empty() {

View File

@@ -0,0 +1,307 @@
use std::path::Path;
use serde::Deserialize;
use serde::Serialize;
use tokio::process::Command;
use tokio::time::Duration as TokioDuration;
use tokio::time::timeout;
/// Timeout for git commands to prevent freezing on large repositories
const GIT_COMMAND_TIMEOUT: TokioDuration = TokioDuration::from_secs(5);
#[derive(Serialize, Deserialize, Clone)]
pub struct GitInfo {
/// Current commit hash (SHA)
#[serde(skip_serializing_if = "Option::is_none")]
pub commit_hash: Option<String>,
/// Current branch name
#[serde(skip_serializing_if = "Option::is_none")]
pub branch: Option<String>,
/// Repository URL (if available from remote)
#[serde(skip_serializing_if = "Option::is_none")]
pub repository_url: Option<String>,
}
/// Collect git repository information from the given working directory using command-line git.
/// Returns None if no git repository is found or if git operations fail.
/// Uses timeouts to prevent freezing on large repositories.
/// All git commands (except the initial repo check) run in parallel for better performance.
pub async fn collect_git_info(cwd: &Path) -> Option<GitInfo> {
// Check if we're in a git repository first
let is_git_repo = run_git_command_with_timeout(&["rev-parse", "--git-dir"], cwd)
.await?
.status
.success();
if !is_git_repo {
return None;
}
// Run all git info collection commands in parallel
let (commit_result, branch_result, url_result) = tokio::join!(
run_git_command_with_timeout(&["rev-parse", "HEAD"], cwd),
run_git_command_with_timeout(&["rev-parse", "--abbrev-ref", "HEAD"], cwd),
run_git_command_with_timeout(&["remote", "get-url", "origin"], cwd)
);
let mut git_info = GitInfo {
commit_hash: None,
branch: None,
repository_url: None,
};
// Process commit hash
if let Some(output) = commit_result {
if output.status.success() {
if let Ok(hash) = String::from_utf8(output.stdout) {
git_info.commit_hash = Some(hash.trim().to_string());
}
}
}
// Process branch name
if let Some(output) = branch_result {
if output.status.success() {
if let Ok(branch) = String::from_utf8(output.stdout) {
let branch = branch.trim();
if branch != "HEAD" {
git_info.branch = Some(branch.to_string());
}
}
}
}
// Process repository URL
if let Some(output) = url_result {
if output.status.success() {
if let Ok(url) = String::from_utf8(output.stdout) {
git_info.repository_url = Some(url.trim().to_string());
}
}
}
Some(git_info)
}
/// Run a git command with a timeout to prevent blocking on large repositories
async fn run_git_command_with_timeout(args: &[&str], cwd: &Path) -> Option<std::process::Output> {
let result = timeout(
GIT_COMMAND_TIMEOUT,
Command::new("git").args(args).current_dir(cwd).output(),
)
.await;
match result {
Ok(Ok(output)) => Some(output),
_ => None, // Timeout or error
}
}
#[cfg(test)]
mod tests {
#![allow(clippy::expect_used)]
#![allow(clippy::unwrap_used)]
use super::*;
use std::fs;
use std::path::PathBuf;
use tempfile::TempDir;
// Helper function to create a test git repository
async fn create_test_git_repo(temp_dir: &TempDir) -> PathBuf {
let repo_path = temp_dir.path().to_path_buf();
// Initialize git repo
Command::new("git")
.args(["init"])
.current_dir(&repo_path)
.output()
.await
.expect("Failed to init git repo");
// Configure git user (required for commits)
Command::new("git")
.args(["config", "user.name", "Test User"])
.current_dir(&repo_path)
.output()
.await
.expect("Failed to set git user name");
Command::new("git")
.args(["config", "user.email", "test@example.com"])
.current_dir(&repo_path)
.output()
.await
.expect("Failed to set git user email");
// Create a test file and commit it
let test_file = repo_path.join("test.txt");
fs::write(&test_file, "test content").expect("Failed to write test file");
Command::new("git")
.args(["add", "."])
.current_dir(&repo_path)
.output()
.await
.expect("Failed to add files");
Command::new("git")
.args(["commit", "-m", "Initial commit"])
.current_dir(&repo_path)
.output()
.await
.expect("Failed to commit");
repo_path
}
#[tokio::test]
async fn test_collect_git_info_non_git_directory() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let result = collect_git_info(temp_dir.path()).await;
assert!(result.is_none());
}
#[tokio::test]
async fn test_collect_git_info_git_repository() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let repo_path = create_test_git_repo(&temp_dir).await;
let git_info = collect_git_info(&repo_path)
.await
.expect("Should collect git info from repo");
// Should have commit hash
assert!(git_info.commit_hash.is_some());
let commit_hash = git_info.commit_hash.unwrap();
assert_eq!(commit_hash.len(), 40); // SHA-1 hash should be 40 characters
assert!(commit_hash.chars().all(|c| c.is_ascii_hexdigit()));
// Should have branch (likely "main" or "master")
assert!(git_info.branch.is_some());
let branch = git_info.branch.unwrap();
assert!(branch == "main" || branch == "master");
// Repository URL might be None for local repos without remote
// This is acceptable behavior
}
#[tokio::test]
async fn test_collect_git_info_with_remote() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let repo_path = create_test_git_repo(&temp_dir).await;
// Add a remote origin
Command::new("git")
.args([
"remote",
"add",
"origin",
"https://github.com/example/repo.git",
])
.current_dir(&repo_path)
.output()
.await
.expect("Failed to add remote");
let git_info = collect_git_info(&repo_path)
.await
.expect("Should collect git info from repo");
// Should have repository URL
assert_eq!(
git_info.repository_url,
Some("https://github.com/example/repo.git".to_string())
);
}
#[tokio::test]
async fn test_collect_git_info_detached_head() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let repo_path = create_test_git_repo(&temp_dir).await;
// Get the current commit hash
let output = Command::new("git")
.args(["rev-parse", "HEAD"])
.current_dir(&repo_path)
.output()
.await
.expect("Failed to get HEAD");
let commit_hash = String::from_utf8(output.stdout).unwrap().trim().to_string();
// Checkout the commit directly (detached HEAD)
Command::new("git")
.args(["checkout", &commit_hash])
.current_dir(&repo_path)
.output()
.await
.expect("Failed to checkout commit");
let git_info = collect_git_info(&repo_path)
.await
.expect("Should collect git info from repo");
// Should have commit hash
assert!(git_info.commit_hash.is_some());
// Branch should be None for detached HEAD (since rev-parse --abbrev-ref HEAD returns "HEAD")
assert!(git_info.branch.is_none());
}
#[tokio::test]
async fn test_collect_git_info_with_branch() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let repo_path = create_test_git_repo(&temp_dir).await;
// Create and checkout a new branch
Command::new("git")
.args(["checkout", "-b", "feature-branch"])
.current_dir(&repo_path)
.output()
.await
.expect("Failed to create branch");
let git_info = collect_git_info(&repo_path)
.await
.expect("Should collect git info from repo");
// Should have the new branch name
assert_eq!(git_info.branch, Some("feature-branch".to_string()));
}
#[test]
fn test_git_info_serialization() {
let git_info = GitInfo {
commit_hash: Some("abc123def456".to_string()),
branch: Some("main".to_string()),
repository_url: Some("https://github.com/example/repo.git".to_string()),
};
let json = serde_json::to_string(&git_info).expect("Should serialize GitInfo");
let parsed: serde_json::Value = serde_json::from_str(&json).expect("Should parse JSON");
assert_eq!(parsed["commit_hash"], "abc123def456");
assert_eq!(parsed["branch"], "main");
assert_eq!(
parsed["repository_url"],
"https://github.com/example/repo.git"
);
}
#[test]
fn test_git_info_serialization_with_nones() {
let git_info = GitInfo {
commit_hash: None,
branch: None,
repository_url: None,
};
let json = serde_json::to_string(&git_info).expect("Should serialize GitInfo");
let parsed: serde_json::Value = serde_json::from_str(&json).expect("Should parse JSON");
// Fields with None values should be omitted due to skip_serializing_if
assert!(!parsed.as_object().unwrap().contains_key("commit_hash"));
assert!(!parsed.as_object().unwrap().contains_key("branch"));
assert!(!parsed.as_object().unwrap().contains_key("repository_url"));
}
}

View File

@@ -19,6 +19,7 @@ pub mod error;
pub mod exec;
pub mod exec_env;
mod flags;
pub mod git_info;
mod is_safe_command;
mod mcp_connection_manager;
mod mcp_tool_call;

View File

@@ -20,6 +20,8 @@ use tracing::warn;
use uuid::Uuid;
use crate::config::Config;
use crate::git_info::GitInfo;
use crate::git_info::collect_git_info;
use crate::models::ResponseItem;
const SESSIONS_SUBDIR: &str = "sessions";
@@ -31,6 +33,14 @@ pub struct SessionMeta {
pub instructions: Option<String>,
}
#[derive(Serialize)]
struct SessionMetaWithGit {
#[serde(flatten)]
meta: SessionMeta,
#[serde(skip_serializing_if = "Option::is_none")]
git: Option<GitInfo>,
}
#[derive(Serialize, Deserialize, Default, Clone)]
pub struct SessionStateSnapshot {}
@@ -86,15 +96,12 @@ impl RolloutRecorder {
.format(timestamp_format)
.map_err(|e| IoError::other(format!("failed to format timestamp: {e}")))?;
let meta = SessionMeta {
timestamp,
id: session_id,
instructions,
};
// Clone the cwd for the spawned task to collect git info asynchronously
let cwd = config.cwd.clone();
// A reasonably-sized bounded channel. If the buffer fills up the send
// future will yield, which is fine we only need to ensure we do not
// perform *blocking* I/O on the callers thread.
// perform *blocking* I/O on the caller's thread.
let (tx, rx) = mpsc::channel::<RolloutCmd>(256);
// Spawn a Tokio task that owns the file handle and performs async
@@ -103,7 +110,12 @@ impl RolloutRecorder {
tokio::task::spawn(rollout_writer(
tokio::fs::File::from_std(file),
rx,
Some(meta),
Some(SessionMeta {
timestamp,
id: session_id,
instructions,
}),
cwd,
));
Ok(Self { tx })
@@ -143,7 +155,10 @@ impl RolloutRecorder {
.map_err(|e| IoError::other(format!("failed to queue rollout state: {e}")))
}
pub async fn resume(path: &Path) -> std::io::Result<(Self, SavedSession)> {
pub async fn resume(
path: &Path,
cwd: std::path::PathBuf,
) -> std::io::Result<(Self, SavedSession)> {
info!("Resuming rollout from {path:?}");
let text = tokio::fs::read_to_string(path).await?;
let mut lines = text.lines();
@@ -201,7 +216,12 @@ impl RolloutRecorder {
.open(path)?;
let (tx, rx) = mpsc::channel::<RolloutCmd>(256);
tokio::task::spawn(rollout_writer(tokio::fs::File::from_std(file), rx, None));
tokio::task::spawn(rollout_writer(
tokio::fs::File::from_std(file),
rx,
None,
cwd,
));
info!("Resumed rollout successfully from {path:?}");
Ok((Self { tx }, saved))
}
@@ -270,15 +290,26 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
async fn rollout_writer(
mut file: tokio::fs::File,
mut rx: mpsc::Receiver<RolloutCmd>,
meta: Option<SessionMeta>,
mut meta: Option<SessionMeta>,
cwd: std::path::PathBuf,
) {
if let Some(meta) = meta {
if let Ok(json) = serde_json::to_string(&meta) {
// If we have a meta, collect git info asynchronously and write meta first
if let Some(session_meta) = meta.take() {
let git_info = collect_git_info(&cwd).await;
let session_meta_with_git = SessionMetaWithGit {
meta: session_meta,
git: git_info,
};
// Write the SessionMeta as the first item in the file
if let Ok(json) = serde_json::to_string(&session_meta_with_git) {
let _ = file.write_all(json.as_bytes()).await;
let _ = file.write_all(b"\n").await;
let _ = file.flush().await;
}
}
// Process rollout commands
while let Some(cmd) = rx.recv().await {
match cmd {
RolloutCmd::AddItems(items) => {

View File

@@ -329,6 +329,7 @@ async fn integration_creates_and_checks_session_file() {
.env("OPENAI_API_KEY", "dummy")
.env("CODEX_RS_SSE_FIXTURE", &fixture)
.env("OPENAI_BASE_URL", "http://unused.local");
let output2 = cmd2.output().unwrap();
assert!(output2.status.success(), "resume codex-cli run failed");
@@ -359,3 +360,125 @@ async fn integration_creates_and_checks_session_file() {
"rollout missing resumed marker"
);
}
/// Integration test to verify git info is collected and recorded in session files.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn integration_git_info_unit_test() {
// This test verifies git info collection works independently
// without depending on the full CLI integration
// 1. Create temp directory for git repo
let temp_dir = TempDir::new().unwrap();
let git_repo = temp_dir.path().to_path_buf();
// 2. Initialize a git repository with some content
let init_output = std::process::Command::new("git")
.args(["init"])
.current_dir(&git_repo)
.output()
.unwrap();
assert!(init_output.status.success(), "git init failed");
// Configure git user (required for commits)
std::process::Command::new("git")
.args(["config", "user.name", "Integration Test"])
.current_dir(&git_repo)
.output()
.unwrap();
std::process::Command::new("git")
.args(["config", "user.email", "test@example.com"])
.current_dir(&git_repo)
.output()
.unwrap();
// Create a test file and commit it
let test_file = git_repo.join("test.txt");
std::fs::write(&test_file, "integration test content").unwrap();
std::process::Command::new("git")
.args(["add", "."])
.current_dir(&git_repo)
.output()
.unwrap();
let commit_output = std::process::Command::new("git")
.args(["commit", "-m", "Integration test commit"])
.current_dir(&git_repo)
.output()
.unwrap();
assert!(commit_output.status.success(), "git commit failed");
// Create a branch to test branch detection
std::process::Command::new("git")
.args(["checkout", "-b", "integration-test-branch"])
.current_dir(&git_repo)
.output()
.unwrap();
// Add a remote to test repository URL detection
std::process::Command::new("git")
.args([
"remote",
"add",
"origin",
"https://github.com/example/integration-test.git",
])
.current_dir(&git_repo)
.output()
.unwrap();
// 3. Test git info collection directly
let git_info = codex_core::git_info::collect_git_info(&git_repo).await;
// 4. Verify git info is present and contains expected data
assert!(git_info.is_some(), "Git info should be collected");
let git_info = git_info.unwrap();
// Check that we have a commit hash
assert!(
git_info.commit_hash.is_some(),
"Git info should contain commit_hash"
);
let commit_hash = git_info.commit_hash.as_ref().unwrap();
assert_eq!(commit_hash.len(), 40, "Commit hash should be 40 characters");
assert!(
commit_hash.chars().all(|c| c.is_ascii_hexdigit()),
"Commit hash should be hexadecimal"
);
// Check that we have the correct branch
assert!(git_info.branch.is_some(), "Git info should contain branch");
let branch = git_info.branch.as_ref().unwrap();
assert_eq!(
branch, "integration-test-branch",
"Branch should match what we created"
);
// Check that we have the repository URL
assert!(
git_info.repository_url.is_some(),
"Git info should contain repository_url"
);
let repo_url = git_info.repository_url.as_ref().unwrap();
assert_eq!(
repo_url, "https://github.com/example/integration-test.git",
"Repository URL should match what we configured"
);
println!("✅ Git info collection test passed!");
println!(" Commit: {commit_hash}");
println!(" Branch: {branch}");
println!(" Repo: {repo_url}");
// 5. Test serialization to ensure it works in SessionMeta
let serialized = serde_json::to_string(&git_info).unwrap();
let deserialized: codex_core::git_info::GitInfo = serde_json::from_str(&serialized).unwrap();
assert_eq!(git_info.commit_hash, deserialized.commit_hash);
assert_eq!(git_info.branch, deserialized.branch);
assert_eq!(git_info.repository_url, deserialized.repository_url);
println!("✅ Git info serialization test passed!");
}