codex-rs/apply-patch/src/parser.rs

//! This module is responsible for parsing & validating a patch into a list of "hunks".
//! (It does not attempt to actually check that the patch can be applied to the filesystem.)
//!
//! The official Lark grammar for the apply-patch format is:
//!
//! start: begin_patch hunk+ end_patch
//! begin_patch: "*** Begin Patch" LF
//! end_patch: "*** End Patch" LF?
//!
//! hunk: add_hunk | delete_hunk | update_hunk
//! add_hunk: "*** Add File: " filename LF add_line+
//! delete_hunk: "*** Delete File: " filename LF
//! update_hunk: "*** Update File: " filename LF change_move? change?
//! filename: /(.+)/
//! add_line: "+" /(.+)/ LF -> line
//!
//! change_move: "*** Move to: " filename LF
//! change: (change_context | change_line)+ eof_line?
//! change_context: ("@@" | "@@ " /(.+)/) LF
//! change_line: ("+" | "-" | " ") /(.+)/ LF
//! eof_line: "*** End of File" LF
//!
//! The parser below is a little more lenient than the explicit spec and allows for
//! leading/trailing whitespace around patch markers.
use std::path::PathBuf;

use thiserror::Error;

const BEGIN_PATCH_MARKER: &str = "*** Begin Patch";
const END_PATCH_MARKER: &str = "*** End Patch";
const ADD_FILE_MARKER: &str = "*** Add File: ";
const DELETE_FILE_MARKER: &str = "*** Delete File: ";
const UPDATE_FILE_MARKER: &str = "*** Update File: ";
const MOVE_TO_MARKER: &str = "*** Move to: ";
const EOF_MARKER: &str = "*** End of File";
const CHANGE_CONTEXT_MARKER: &str = "@@ ";
const EMPTY_CHANGE_CONTEXT_MARKER: &str = "@@";

#[derive(Debug, PartialEq, Error)]
pub enum ParseError {
    #[error("invalid patch: {0}")]
    InvalidPatchError(String),
    #[error("invalid hunk at line {line_number}, {message}")]
    InvalidHunkError { message: String, line_number: usize },
}
use ParseError::*;

#[derive(Debug, PartialEq)]
#[allow(clippy::enum_variant_names)]
pub enum Hunk {
    AddFile {
        path: PathBuf,
        contents: String,
    },
    DeleteFile {
        path: PathBuf,
    },
    UpdateFile {
        path: PathBuf,
        move_path: Option<PathBuf>,

        /// Chunks should be in order, i.e. the `change_context` of one chunk
        /// should occur later in the file than the previous chunk.
        chunks: Vec<UpdateFileChunk>,
    },
}
use Hunk::*;

#[derive(Debug, PartialEq)]
pub struct UpdateFileChunk {
    /// A single line of context used to narrow down the position of the chunk
    /// (this is usually a class, method, or function definition.)
    pub change_context: Option<String>,

    /// A contiguous block of lines that should be replaced with `new_lines`.
    /// `old_lines` must occur strictly after `change_context`.
    pub old_lines: Vec<String>,
    pub new_lines: Vec<String>,

    /// If set to true, `old_lines` must occur at the end of the source file.
    /// (Tolerance around trailing newlines should be encouraged.)
    pub is_end_of_file: bool,
}

pub fn parse_patch(patch: &str) -> Result<Vec<Hunk>, ParseError> {
    let lines: Vec<&str> = patch.trim().lines().collect();
    if lines.is_empty() || lines[0] != BEGIN_PATCH_MARKER {
        return Err(InvalidPatchError(String::from(
            "The first line of the patch must be '*** Begin Patch'",
        )));
    }
    let last_line_index = lines.len() - 1;
    if lines[last_line_index] != END_PATCH_MARKER {
        return Err(InvalidPatchError(String::from(
            "The last line of the patch must be '*** End Patch'",
        )));
    }
    let mut hunks: Vec<Hunk> = Vec::new();
    let mut remaining_lines = &lines[1..last_line_index];
    let mut line_number = 2;
    while !remaining_lines.is_empty() {
        let (hunk, hunk_lines) = parse_one_hunk(remaining_lines, line_number)?;
        hunks.push(hunk);
        line_number += hunk_lines;
        remaining_lines = &remaining_lines[hunk_lines..]
    }
    Ok(hunks)
}

/// Attempts to parse a single hunk from the start of lines.
/// Returns the parsed hunk and the number of lines parsed (or a ParseError).
fn parse_one_hunk(lines: &[&str], line_number: usize) -> Result<(Hunk, usize), ParseError> {
    // Be tolerant of case mismatches and extra padding around marker strings.
    let first_line = lines[0].trim();
    if let Some(path) = first_line.strip_prefix(ADD_FILE_MARKER) {
        // Add File
        let mut contents = String::new();
        let mut parsed_lines = 1;
        for add_line in &lines[1..] {
            if let Some(line_to_add) = add_line.strip_prefix('+') {
                contents.push_str(line_to_add);
                contents.push('\n');
                parsed_lines += 1;
            } else {
                break;
            }
        }
        return Ok((
            AddFile {
                path: PathBuf::from(path),
                contents,
            },
            parsed_lines,
        ));
    } else if let Some(path) = first_line.strip_prefix(DELETE_FILE_MARKER) {
        // Delete File
        return Ok((
            DeleteFile {
                path: PathBuf::from(path),
            },
            1,
        ));
    } else if let Some(path) = first_line.strip_prefix(UPDATE_FILE_MARKER) {
        // Update File
        let mut remaining_lines = &lines[1..];
        let mut parsed_lines = 1;

        // Optional: move file line
        let move_path = remaining_lines
            .first()
            .and_then(|x| x.strip_prefix(MOVE_TO_MARKER));

        if move_path.is_some() {
            remaining_lines = &remaining_lines[1..];
            parsed_lines += 1;
        }

        let mut chunks = Vec::new();
        // NOTE: we need to know to stop once we reach the next special marker header.
        while !remaining_lines.is_empty() {
            // Skip over any completely blank lines that may separate chunks.
            if remaining_lines[0].trim().is_empty() {
                parsed_lines += 1;
                remaining_lines = &remaining_lines[1..];
                continue;
            }

            if remaining_lines[0].starts_with("***") {
                break;
            }

            let (chunk, chunk_lines) = parse_update_file_chunk(
                remaining_lines,
                line_number + parsed_lines,
                chunks.is_empty(),
            )?;
            chunks.push(chunk);
            parsed_lines += chunk_lines;
            remaining_lines = &remaining_lines[chunk_lines..]
        }

        if chunks.is_empty() {
            return Err(InvalidHunkError {
                message: format!("Update file hunk for path '{path}' is empty"),
                line_number,
            });
        }

        return Ok((
            UpdateFile {
                path: PathBuf::from(path),
                move_path: move_path.map(PathBuf::from),
                chunks,
            },
            parsed_lines,
        ));
    }

    Err(InvalidHunkError {
        message: format!(
            "'{first_line}' is not a valid hunk header. Valid hunk headers: '*** Add File: {{path}}', '*** Delete File: {{path}}', '*** Update File: {{path}}'"
        ),
        line_number,
    })
}

fn parse_update_file_chunk(
    lines: &[&str],
    line_number: usize,
    allow_missing_context: bool,
) -> Result<(UpdateFileChunk, usize), ParseError> {
    if lines.is_empty() {
        return Err(InvalidHunkError {
            message: "Update hunk does not contain any lines".to_string(),
            line_number,
        });
    }
    // If we see an explicit context marker @@ or @@ <context>, consume it; otherwise, optionally
    // allow treating the chunk as starting directly with diff lines.
    let (change_context, start_index) = if lines[0] == EMPTY_CHANGE_CONTEXT_MARKER {
        (None, 1)
    } else if let Some(context) = lines[0].strip_prefix(CHANGE_CONTEXT_MARKER) {
        (Some(context.to_string()), 1)
    } else {
        if !allow_missing_context {
            return Err(InvalidHunkError {
                message: format!(
                    "Expected update hunk to start with a @@ context marker, got: '{}'",
                    lines[0]
                ),
                line_number,
            });
        }
        (None, 0)
    };
    if start_index >= lines.len() {
        return Err(InvalidHunkError {
            message: "Update hunk does not contain any lines".to_string(),
            line_number: line_number + 1,
        });
    }
    let mut chunk = UpdateFileChunk {
        change_context,
        old_lines: Vec::new(),
        new_lines: Vec::new(),
        is_end_of_file: false,
    };
    let mut parsed_lines = 0;
    for line in &lines[start_index..] {
        match *line {
            EOF_MARKER => {
                if parsed_lines == 0 {
                    return Err(InvalidHunkError {
                        message: "Update hunk does not contain any lines".to_string(),
                        line_number: line_number + 1,
                    });
                }
                chunk.is_end_of_file = true;
                parsed_lines += 1;
                break;
            }
            line_contents => {
                match line_contents.chars().next() {
                    None => {
                        // Interpret this as an empty line.
                        chunk.old_lines.push(String::new());
                        chunk.new_lines.push(String::new());
                    }
                    Some(' ') => {
                        chunk.old_lines.push(line_contents[1..].to_string());
                        chunk.new_lines.push(line_contents[1..].to_string());
                    }
                    Some('+') => {
                        chunk.new_lines.push(line_contents[1..].to_string());
                    }
                    Some('-') => {
                        chunk.old_lines.push(line_contents[1..].to_string());
                    }
                    _ => {
                        if parsed_lines == 0 {
                            return Err(InvalidHunkError {
                                message: format!(
                                    "Unexpected line found in update hunk: '{line_contents}'. Every line should start with ' ' (context line), '+' (added line), or '-' (removed line)"
                                ),
                                line_number: line_number + 1,
                            });
                        }
                        // Assume this is the start of the next hunk.
                        break;
                    }
                }
                parsed_lines += 1;
            }
        }
    }

    Ok((chunk, parsed_lines + start_index))
}

#[test]
fn test_parse_patch() {
    assert_eq!(
        parse_patch("bad"),
        Err(InvalidPatchError(
            "The first line of the patch must be '*** Begin Patch'".to_string()
        ))
    );
    assert_eq!(
        parse_patch("*** Begin Patch\nbad"),
        Err(InvalidPatchError(
            "The last line of the patch must be '*** End Patch'".to_string()
        ))
    );
    assert_eq!(
        parse_patch(
            "*** Begin Patch\n\
             *** Update File: test.py\n\
             *** End Patch"
        ),
        Err(InvalidHunkError {
            message: "Update file hunk for path 'test.py' is empty".to_string(),
            line_number: 2,
        })
    );
    assert_eq!(
        parse_patch(
            "*** Begin Patch\n\
             *** End Patch"
        ),
        Ok(Vec::new())
    );
    assert_eq!(
        parse_patch(
            "*** Begin Patch\n\
             *** Add File: path/add.py\n\
             +abc\n\
             +def\n\
             *** Delete File: path/delete.py\n\
             *** Update File: path/update.py\n\
             *** Move to: path/update2.py\n\
             @@ def f():\n\
             -    pass\n\
             +    return 123\n\
             *** End Patch"
        ),
        Ok(vec![
            AddFile {
                path: PathBuf::from("path/add.py"),
                contents: "abc\ndef\n".to_string()
            },
            DeleteFile {
                path: PathBuf::from("path/delete.py")
            },
            UpdateFile {
                path: PathBuf::from("path/update.py"),
                move_path: Some(PathBuf::from("path/update2.py")),
                chunks: vec![UpdateFileChunk {
                    change_context: Some("def f():".to_string()),
                    old_lines: vec!["    pass".to_string()],
                    new_lines: vec!["    return 123".to_string()],
                    is_end_of_file: false
                }]
            }
        ])
    );
    // Update hunk followed by another hunk (Add File).
    assert_eq!(
        parse_patch(
            "*** Begin Patch\n\
             *** Update File: file.py\n\
             @@\n\
             +line\n\
             *** Add File: other.py\n\
             +content\n\
             *** End Patch"
        ),
        Ok(vec![
            UpdateFile {
                path: PathBuf::from("file.py"),
                move_path: None,
                chunks: vec![UpdateFileChunk {
                    change_context: None,
                    old_lines: vec![],
                    new_lines: vec!["line".to_string()],
                    is_end_of_file: false
                }],
            },
            AddFile {
                path: PathBuf::from("other.py"),
                contents: "content\n".to_string()
            }
        ])
    );

    // Update hunk without an explicit @@ header for the first chunk should parse.
    // Use a raw string to preserve the leading space diff marker on the context line.
    assert_eq!(
        parse_patch(
            r#"*** Begin Patch
*** Update File: file2.py
 import foo
+bar
*** End Patch"#,
        ),
        Ok(vec![UpdateFile {
            path: PathBuf::from("file2.py"),
            move_path: None,
            chunks: vec![UpdateFileChunk {
                change_context: None,
                old_lines: vec!["import foo".to_string()],
                new_lines: vec!["import foo".to_string(), "bar".to_string()],
                is_end_of_file: false,
            }],
        }])
    );
}

#[test]
fn test_parse_one_hunk() {
    assert_eq!(
        parse_one_hunk(&["bad"], 234),
        Err(InvalidHunkError {
            message: "'bad' is not a valid hunk header. \
            Valid hunk headers: '*** Add File: {path}', '*** Delete File: {path}', '*** Update File: {path}'".to_string(),
            line_number: 234
        })
    );
    // Other edge cases are already covered by tests above/below.
}

#[test]
fn test_update_file_chunk() {
    assert_eq!(
        parse_update_file_chunk(&["bad"], 123, false),
        Err(InvalidHunkError {
            message: "Expected update hunk to start with a @@ context marker, got: 'bad'"
                .to_string(),
            line_number: 123
        })
    );
    assert_eq!(
        parse_update_file_chunk(&["@@"], 123, false),
        Err(InvalidHunkError {
            message: "Update hunk does not contain any lines".to_string(),
            line_number: 124
        })
    );
    assert_eq!(
        parse_update_file_chunk(&["@@", "bad"], 123, false),
        Err(InvalidHunkError {
            message:  "Unexpected line found in update hunk: 'bad'. \
                       Every line should start with ' ' (context line), '+' (added line), or '-' (removed line)".to_string(),
            line_number: 124
        })
    );
    assert_eq!(
        parse_update_file_chunk(&["@@", "*** End of File"], 123, false),
        Err(InvalidHunkError {
            message: "Update hunk does not contain any lines".to_string(),
            line_number: 124
        })
    );
    assert_eq!(
        parse_update_file_chunk(
            &[
                "@@ change_context",
                "",
                " context",
                "-remove",
                "+add",
                " context2",
                "*** End Patch",
            ],
            123,
            false
        ),
        Ok((
            (UpdateFileChunk {
                change_context: Some("change_context".to_string()),
                old_lines: vec![
                    "".to_string(),
                    "context".to_string(),
                    "remove".to_string(),
                    "context2".to_string()
                ],
                new_lines: vec![
                    "".to_string(),
                    "context".to_string(),
                    "add".to_string(),
                    "context2".to_string()
                ],
                is_end_of_file: false
            }),
            6
        ))
    );
    assert_eq!(
        parse_update_file_chunk(&["@@", "+line", "*** End of File"], 123, false),
        Ok((
            (UpdateFileChunk {
                change_context: None,
                old_lines: vec![],
                new_lines: vec!["line".to_string()],
                is_end_of_file: true
            }),
            3
        ))
    );
}
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629) As stated in `codex-rs/README.md`: Today, Codex CLI is written in TypeScript and requires Node.js 22+ to run it. For a number of users, this runtime requirement inhibits adoption: they would be better served by a standalone executable. As maintainers, we want Codex to run efficiently in a wide range of environments with minimal overhead. We also want to take advantage of operating system-specific APIs to provide better sandboxing, where possible. To that end, we are moving forward with a Rust implementation of Codex CLI contained in this folder, which has the following benefits: - The CLI compiles to small, standalone, platform-specific binaries. - Can make direct, native calls to [seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and [landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in order to support sandboxing on Linux. - No runtime garbage collection, resulting in lower memory consumption and better, more predictable performance. Currently, the Rust implementation is materially behind the TypeScript implementation in functionality, so continue to use the TypeScript implmentation for the time being. We will publish native executables via GitHub Releases as soon as we feel the Rust version is usable. 2025-04-24 13:31:40 -07:00			`//! This module is responsible for parsing & validating a patch into a list of "hunks".`
			`//! (It does not attempt to actually check that the patch can be applied to the filesystem.)`
			`//!`
			`//! The official Lark grammar for the apply-patch format is:`
			`//!`
			`//! start: begin_patch hunk+ end_patch`
			`//! begin_patch: "*** Begin Patch" LF`
			`//! end_patch: "*** End Patch" LF?`
			`//!`
			`//! hunk: add_hunk \| delete_hunk \| update_hunk`
			`//! add_hunk: "*** Add File: " filename LF add_line+`
			`//! delete_hunk: "*** Delete File: " filename LF`
			`//! update_hunk: "*** Update File: " filename LF change_move? change?`
			`//! filename: /(.+)/`
			`//! add_line: "+" /(.+)/ LF -> line`
			`//!`
			`//! change_move: "*** Move to: " filename LF`
			`//! change: (change_context \| change_line)+ eof_line?`
			`//! change_context: ("@@" \| "@@ " /(.+)/) LF`
			`//! change_line: ("+" \| "-" \| " ") /(.+)/ LF`
			`//! eof_line: "*** End of File" LF`
			`//!`
			`//! The parser below is a little more lenient than the explicit spec and allows for`
			`//! leading/trailing whitespace around patch markers.`
			`use std::path::PathBuf;`

			`use thiserror::Error;`

			`const BEGIN_PATCH_MARKER: &str = "*** Begin Patch";`
			`const END_PATCH_MARKER: &str = "*** End Patch";`
			`const ADD_FILE_MARKER: &str = "*** Add File: ";`
			`const DELETE_FILE_MARKER: &str = "*** Delete File: ";`
			`const UPDATE_FILE_MARKER: &str = "*** Update File: ";`
			`const MOVE_TO_MARKER: &str = "*** Move to: ";`
			`const EOF_MARKER: &str = "*** End of File";`
			`const CHANGE_CONTEXT_MARKER: &str = "@@ ";`
			`const EMPTY_CHANGE_CONTEXT_MARKER: &str = "@@";`

			`#[derive(Debug, PartialEq, Error)]`
			`pub enum ParseError {`
			`#[error("invalid patch: {0}")]`
			`InvalidPatchError(String),`
			`#[error("invalid hunk at line {line_number}, {message}")]`
			`InvalidHunkError { message: String, line_number: usize },`
			`}`
			`use ParseError::*;`

			`#[derive(Debug, PartialEq)]`
			`#[allow(clippy::enum_variant_names)]`
			`pub enum Hunk {`
			`AddFile {`
			`path: PathBuf,`
			`contents: String,`
			`},`
			`DeleteFile {`
			`path: PathBuf,`
			`},`
			`UpdateFile {`
			`path: PathBuf,`
			`move_path: Option<PathBuf>,`

			/// Chunks should be in order, i.e. the `change_context` of one chunk
			`/// should occur later in the file than the previous chunk.`
			`chunks: Vec<UpdateFileChunk>,`
			`},`
			`}`
			`use Hunk::*;`

			`#[derive(Debug, PartialEq)]`
			`pub struct UpdateFileChunk {`
			`/// A single line of context used to narrow down the position of the chunk`
			`/// (this is usually a class, method, or function definition.)`
			`pub change_context: Option<String>,`

			/// A contiguous block of lines that should be replaced with `new_lines`.
			/// `old_lines` must occur strictly after `change_context`.
			`pub old_lines: Vec<String>,`
			`pub new_lines: Vec<String>,`

			/// If set to true, `old_lines` must occur at the end of the source file.
			`/// (Tolerance around trailing newlines should be encouraged.)`
			`pub is_end_of_file: bool,`
			`}`

			`pub fn parse_patch(patch: &str) -> Result<Vec<Hunk>, ParseError> {`
			`let lines: Vec<&str> = patch.trim().lines().collect();`
			`if lines.is_empty() \|\| lines[0] != BEGIN_PATCH_MARKER {`
			`return Err(InvalidPatchError(String::from(`
			`"The first line of the patch must be '*** Begin Patch'",`
			`)));`
			`}`
			`let last_line_index = lines.len() - 1;`
			`if lines[last_line_index] != END_PATCH_MARKER {`
			`return Err(InvalidPatchError(String::from(`
			`"The last line of the patch must be '*** End Patch'",`
			`)));`
			`}`
			`let mut hunks: Vec<Hunk> = Vec::new();`
			`let mut remaining_lines = &lines[1..last_line_index];`
			`let mut line_number = 2;`
			`while !remaining_lines.is_empty() {`
			`let (hunk, hunk_lines) = parse_one_hunk(remaining_lines, line_number)?;`
			`hunks.push(hunk);`
			`line_number += hunk_lines;`
			`remaining_lines = &remaining_lines[hunk_lines..]`
			`}`
			`Ok(hunks)`
			`}`

			`/// Attempts to parse a single hunk from the start of lines.`
			`/// Returns the parsed hunk and the number of lines parsed (or a ParseError).`
			`fn parse_one_hunk(lines: &[&str], line_number: usize) -> Result<(Hunk, usize), ParseError> {`
			`// Be tolerant of case mismatches and extra padding around marker strings.`
			`let first_line = lines[0].trim();`
			`if let Some(path) = first_line.strip_prefix(ADD_FILE_MARKER) {`
			`// Add File`
			`let mut contents = String::new();`
			`let mut parsed_lines = 1;`
			`for add_line in &lines[1..] {`
			`if let Some(line_to_add) = add_line.strip_prefix('+') {`
			`contents.push_str(line_to_add);`
			`contents.push('\n');`
			`parsed_lines += 1;`
			`} else {`
			`break;`
			`}`
			`}`
			`return Ok((`
			`AddFile {`
			`path: PathBuf::from(path),`
			`contents,`
			`},`
			`parsed_lines,`
			`));`
			`} else if let Some(path) = first_line.strip_prefix(DELETE_FILE_MARKER) {`
			`// Delete File`
			`return Ok((`
			`DeleteFile {`
			`path: PathBuf::from(path),`
			`},`
			`1,`
			`));`
			`} else if let Some(path) = first_line.strip_prefix(UPDATE_FILE_MARKER) {`
			`// Update File`
			`let mut remaining_lines = &lines[1..];`
			`let mut parsed_lines = 1;`

			`// Optional: move file line`
			`let move_path = remaining_lines`
			`.first()`
			`.and_then(\|x\| x.strip_prefix(MOVE_TO_MARKER));`

			`if move_path.is_some() {`
			`remaining_lines = &remaining_lines[1..];`
			`parsed_lines += 1;`
			`}`

			`let mut chunks = Vec::new();`
			`// NOTE: we need to know to stop once we reach the next special marker header.`
			`while !remaining_lines.is_empty() {`
			`// Skip over any completely blank lines that may separate chunks.`
			`if remaining_lines[0].trim().is_empty() {`
			`parsed_lines += 1;`
			`remaining_lines = &remaining_lines[1..];`
			`continue;`
			`}`

			`if remaining_lines[0].starts_with("***") {`
			`break;`
			`}`

			`let (chunk, chunk_lines) = parse_update_file_chunk(`
			`remaining_lines,`
			`line_number + parsed_lines,`
			`chunks.is_empty(),`
			`)?;`
			`chunks.push(chunk);`
			`parsed_lines += chunk_lines;`
			`remaining_lines = &remaining_lines[chunk_lines..]`
			`}`

			`if chunks.is_empty() {`
			`return Err(InvalidHunkError {`
			`message: format!("Update file hunk for path '{path}' is empty"),`
			`line_number,`
			`});`
			`}`

			`return Ok((`
			`UpdateFile {`
			`path: PathBuf::from(path),`
			`move_path: move_path.map(PathBuf::from),`
			`chunks,`
			`},`
			`parsed_lines,`
			`));`
			`}`

Update cargo to 2024 edition (#842) Some effects of this change: - New formatting changes across many files. No functionality changes should occur from that. - Calls to `set_env` are considered unsafe, since this only happens in tests we wrap them in `unsafe` blocks 2025-05-07 08:37:48 -07:00			`Err(InvalidHunkError {`
			`message: format!(`
			`"'{first_line}' is not a valid hunk header. Valid hunk headers: '* Add File: {{path}}', '* Delete File: {{path}}', '*** Update File: {{path}}'"`
			`),`
			`line_number,`
			`})`
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629) As stated in `codex-rs/README.md`: Today, Codex CLI is written in TypeScript and requires Node.js 22+ to run it. For a number of users, this runtime requirement inhibits adoption: they would be better served by a standalone executable. As maintainers, we want Codex to run efficiently in a wide range of environments with minimal overhead. We also want to take advantage of operating system-specific APIs to provide better sandboxing, where possible. To that end, we are moving forward with a Rust implementation of Codex CLI contained in this folder, which has the following benefits: - The CLI compiles to small, standalone, platform-specific binaries. - Can make direct, native calls to [seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and [landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in order to support sandboxing on Linux. - No runtime garbage collection, resulting in lower memory consumption and better, more predictable performance. Currently, the Rust implementation is materially behind the TypeScript implementation in functionality, so continue to use the TypeScript implmentation for the time being. We will publish native executables via GitHub Releases as soon as we feel the Rust version is usable. 2025-04-24 13:31:40 -07:00			`}`

			`fn parse_update_file_chunk(`
			`lines: &[&str],`
			`line_number: usize,`
			`allow_missing_context: bool,`
			`) -> Result<(UpdateFileChunk, usize), ParseError> {`
			`if lines.is_empty() {`
			`return Err(InvalidHunkError {`
			`message: "Update hunk does not contain any lines".to_string(),`
			`line_number,`
			`});`
			`}`
			`// If we see an explicit context marker @@ or @@ <context>, consume it; otherwise, optionally`
			`// allow treating the chunk as starting directly with diff lines.`
			`let (change_context, start_index) = if lines[0] == EMPTY_CHANGE_CONTEXT_MARKER {`
			`(None, 1)`
			`} else if let Some(context) = lines[0].strip_prefix(CHANGE_CONTEXT_MARKER) {`
			`(Some(context.to_string()), 1)`
			`} else {`
			`if !allow_missing_context {`
			`return Err(InvalidHunkError {`
			`message: format!(`
			`"Expected update hunk to start with a @@ context marker, got: '{}'",`
			`lines[0]`
			`),`
			`line_number,`
			`});`
			`}`
			`(None, 0)`
			`};`
			`if start_index >= lines.len() {`
			`return Err(InvalidHunkError {`
			`message: "Update hunk does not contain any lines".to_string(),`
			`line_number: line_number + 1,`
			`});`
			`}`
			`let mut chunk = UpdateFileChunk {`
			`change_context,`
			`old_lines: Vec::new(),`
			`new_lines: Vec::new(),`
			`is_end_of_file: false,`
			`};`
			`let mut parsed_lines = 0;`
			`for line in &lines[start_index..] {`
			`match *line {`
			`EOF_MARKER => {`
			`if parsed_lines == 0 {`
			`return Err(InvalidHunkError {`
			`message: "Update hunk does not contain any lines".to_string(),`
			`line_number: line_number + 1,`
			`});`
			`}`
			`chunk.is_end_of_file = true;`
			`parsed_lines += 1;`
			`break;`
			`}`
			`line_contents => {`
			`match line_contents.chars().next() {`
			`None => {`
			`// Interpret this as an empty line.`
			`chunk.old_lines.push(String::new());`
			`chunk.new_lines.push(String::new());`
			`}`
			`Some(' ') => {`
			`chunk.old_lines.push(line_contents[1..].to_string());`
			`chunk.new_lines.push(line_contents[1..].to_string());`
			`}`
			`Some('+') => {`
			`chunk.new_lines.push(line_contents[1..].to_string());`
			`}`
			`Some('-') => {`
			`chunk.old_lines.push(line_contents[1..].to_string());`
			`}`
			`_ => {`
			`if parsed_lines == 0 {`
Update cargo to 2024 edition (#842) Some effects of this change: - New formatting changes across many files. No functionality changes should occur from that. - Calls to `set_env` are considered unsafe, since this only happens in tests we wrap them in `unsafe` blocks 2025-05-07 08:37:48 -07:00			`return Err(InvalidHunkError {`
			`message: format!(`
			`"Unexpected line found in update hunk: '{line_contents}'. Every line should start with ' ' (context line), '+' (added line), or '-' (removed line)"`
			`),`
			`line_number: line_number + 1,`
			`});`
feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629) As stated in `codex-rs/README.md`: Today, Codex CLI is written in TypeScript and requires Node.js 22+ to run it. For a number of users, this runtime requirement inhibits adoption: they would be better served by a standalone executable. As maintainers, we want Codex to run efficiently in a wide range of environments with minimal overhead. We also want to take advantage of operating system-specific APIs to provide better sandboxing, where possible. To that end, we are moving forward with a Rust implementation of Codex CLI contained in this folder, which has the following benefits: - The CLI compiles to small, standalone, platform-specific binaries. - Can make direct, native calls to [seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and [landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in order to support sandboxing on Linux. - No runtime garbage collection, resulting in lower memory consumption and better, more predictable performance. Currently, the Rust implementation is materially behind the TypeScript implementation in functionality, so continue to use the TypeScript implmentation for the time being. We will publish native executables via GitHub Releases as soon as we feel the Rust version is usable. 2025-04-24 13:31:40 -07:00			`}`
			`// Assume this is the start of the next hunk.`
			`break;`
			`}`
			`}`
			`parsed_lines += 1;`
			`}`
			`}`
			`}`

			`Ok((chunk, parsed_lines + start_index))`
			`}`

			`#[test]`
			`fn test_parse_patch() {`
			`assert_eq!(`
			`parse_patch("bad"),`
			`Err(InvalidPatchError(`
			`"The first line of the patch must be '*** Begin Patch'".to_string()`
			`))`
			`);`
			`assert_eq!(`
			`parse_patch("*** Begin Patch\nbad"),`
			`Err(InvalidPatchError(`
			`"The last line of the patch must be '*** End Patch'".to_string()`
			`))`
			`);`
			`assert_eq!(`
			`parse_patch(`
			`"*** Begin Patch\n\`
			`*** Update File: test.py\n\`
			`*** End Patch"`
			`),`
			`Err(InvalidHunkError {`
			`message: "Update file hunk for path 'test.py' is empty".to_string(),`
			`line_number: 2,`
			`})`
			`);`
			`assert_eq!(`
			`parse_patch(`
			`"*** Begin Patch\n\`
			`*** End Patch"`
			`),`
			`Ok(Vec::new())`
			`);`
			`assert_eq!(`
			`parse_patch(`
			`"*** Begin Patch\n\`
			`*** Add File: path/add.py\n\`
			`+abc\n\`
			`+def\n\`
			`*** Delete File: path/delete.py\n\`
			`*** Update File: path/update.py\n\`
			`*** Move to: path/update2.py\n\`
			`@@ def f():\n\`
			`- pass\n\`
			`+ return 123\n\`
			`*** End Patch"`
			`),`
			`Ok(vec![`
			`AddFile {`
			`path: PathBuf::from("path/add.py"),`
			`contents: "abc\ndef\n".to_string()`
			`},`
			`DeleteFile {`
			`path: PathBuf::from("path/delete.py")`
			`},`
			`UpdateFile {`
			`path: PathBuf::from("path/update.py"),`
			`move_path: Some(PathBuf::from("path/update2.py")),`
			`chunks: vec![UpdateFileChunk {`
			`change_context: Some("def f():".to_string()),`
			`old_lines: vec![" pass".to_string()],`
			`new_lines: vec![" return 123".to_string()],`
			`is_end_of_file: false`
			`}]`
			`}`
			`])`
			`);`
			`// Update hunk followed by another hunk (Add File).`
			`assert_eq!(`
			`parse_patch(`
			`"*** Begin Patch\n\`
			`*** Update File: file.py\n\`
			`@@\n\`
			`+line\n\`
			`*** Add File: other.py\n\`
			`+content\n\`
			`*** End Patch"`
			`),`
			`Ok(vec![`
			`UpdateFile {`
			`path: PathBuf::from("file.py"),`
			`move_path: None,`
			`chunks: vec![UpdateFileChunk {`
			`change_context: None,`
			`old_lines: vec![],`
			`new_lines: vec!["line".to_string()],`
			`is_end_of_file: false`
			`}],`
			`},`
			`AddFile {`
			`path: PathBuf::from("other.py"),`
			`contents: "content\n".to_string()`
			`}`
			`])`
			`);`

			`// Update hunk without an explicit @@ header for the first chunk should parse.`
			`// Use a raw string to preserve the leading space diff marker on the context line.`
			`assert_eq!(`
			`parse_patch(`
			`r#"*** Begin Patch`
			`*** Update File: file2.py`
			`import foo`
			`+bar`
			`*** End Patch"#,`
			`),`
			`Ok(vec![UpdateFile {`
			`path: PathBuf::from("file2.py"),`
			`move_path: None,`
			`chunks: vec![UpdateFileChunk {`
			`change_context: None,`
			`old_lines: vec!["import foo".to_string()],`
			`new_lines: vec!["import foo".to_string(), "bar".to_string()],`
			`is_end_of_file: false,`
			`}],`
			`}])`
			`);`
			`}`

			`#[test]`
			`fn test_parse_one_hunk() {`
			`assert_eq!(`
			`parse_one_hunk(&["bad"], 234),`
			`Err(InvalidHunkError {`
			`message: "'bad' is not a valid hunk header. \`
			`Valid hunk headers: '* Add File: {path}', '* Delete File: {path}', '*** Update File: {path}'".to_string(),`
			`line_number: 234`
			`})`
			`);`
			`// Other edge cases are already covered by tests above/below.`
			`}`

			`#[test]`
			`fn test_update_file_chunk() {`
			`assert_eq!(`
			`parse_update_file_chunk(&["bad"], 123, false),`
			`Err(InvalidHunkError {`
			`message: "Expected update hunk to start with a @@ context marker, got: 'bad'"`
			`.to_string(),`
			`line_number: 123`
			`})`
			`);`
			`assert_eq!(`
			`parse_update_file_chunk(&["@@"], 123, false),`
			`Err(InvalidHunkError {`
			`message: "Update hunk does not contain any lines".to_string(),`
			`line_number: 124`
			`})`
			`);`
			`assert_eq!(`
			`parse_update_file_chunk(&["@@", "bad"], 123, false),`
			`Err(InvalidHunkError {`
			`message: "Unexpected line found in update hunk: 'bad'. \`
			`Every line should start with ' ' (context line), '+' (added line), or '-' (removed line)".to_string(),`
			`line_number: 124`
			`})`
			`);`
			`assert_eq!(`
			`parse_update_file_chunk(&["@@", "*** End of File"], 123, false),`
			`Err(InvalidHunkError {`
			`message: "Update hunk does not contain any lines".to_string(),`
			`line_number: 124`
			`})`
			`);`
			`assert_eq!(`
			`parse_update_file_chunk(`
			`&[`
			`"@@ change_context",`
			`"",`
			`" context",`
			`"-remove",`
			`"+add",`
			`" context2",`
			`"*** End Patch",`
			`],`
			`123,`
			`false`
			`),`
			`Ok((`
			`(UpdateFileChunk {`
			`change_context: Some("change_context".to_string()),`
			`old_lines: vec![`
			`"".to_string(),`
			`"context".to_string(),`
			`"remove".to_string(),`
			`"context2".to_string()`
			`],`
			`new_lines: vec![`
			`"".to_string(),`
			`"context".to_string(),`
			`"add".to_string(),`
			`"context2".to_string()`
			`],`
			`is_end_of_file: false`
			`}),`
			`6`
			`))`
			`);`
			`assert_eq!(`
			`parse_update_file_chunk(&["@@", "+line", "*** End of File"], 123, false),`
			`Ok((`
			`(UpdateFileChunk {`
			`change_context: None,`
			`old_lines: vec![],`
			`new_lines: vec!["line".to_string()],`
			`is_end_of_file: true`
			`}),`
			`3`
			`))`
			`);`
			`}`