codex-cli/src/utils/agent/apply-patch.ts

// Based on reference implementation from
// https://cookbook.openai.com/examples/gpt4-1_prompting_guide#reference-implementation-apply_patchpy

import fs from "fs";
import path from "path";
import {
  ADD_FILE_PREFIX,
  DELETE_FILE_PREFIX,
  END_OF_FILE_PREFIX,
  MOVE_FILE_TO_PREFIX,
  PATCH_SUFFIX,
  UPDATE_FILE_PREFIX,
  HUNK_ADD_LINE_PREFIX,
  PATCH_PREFIX,
} from "src/parse-apply-patch";

// -----------------------------------------------------------------------------
// Types & Models
// -----------------------------------------------------------------------------

export enum ActionType {
  ADD = "add",
  DELETE = "delete",
  UPDATE = "update",
}

export interface FileChange {
  type: ActionType;
  old_content?: string | null;
  new_content?: string | null;
  move_path?: string | null;
}

export interface Commit {
  changes: Record<string, FileChange>;
}

export function assemble_changes(
  orig: Record<string, string | null>,
  updatedFiles: Record<string, string | null>,
): Commit {
  const commit: Commit = { changes: {} };
  for (const [p, newContent] of Object.entries(updatedFiles)) {
    const oldContent = orig[p];
    if (oldContent === newContent) {
      continue;
    }
    if (oldContent !== undefined && newContent !== undefined) {
      commit.changes[p] = {
        type: ActionType.UPDATE,
        old_content: oldContent,
        new_content: newContent,
      };
    } else if (newContent !== undefined) {
      commit.changes[p] = {
        type: ActionType.ADD,
        new_content: newContent,
      };
    } else if (oldContent !== undefined) {
      commit.changes[p] = {
        type: ActionType.DELETE,
        old_content: oldContent,
      };
    } else {
      throw new Error("Unexpected state in assemble_changes");
    }
  }
  return commit;
}

// -----------------------------------------------------------------------------
// Patch‑related structures
// -----------------------------------------------------------------------------

export interface Chunk {
  orig_index: number; // line index of the first line in the original file
  del_lines: Array<string>;
  ins_lines: Array<string>;
}

export interface PatchAction {
  type: ActionType;
  new_file?: string | null;
  chunks: Array<Chunk>;
  move_path?: string | null;
}

export interface Patch {
  actions: Record<string, PatchAction>;
}

export class DiffError extends Error {}

// -----------------------------------------------------------------------------
// Parser (patch text -> Patch)
// -----------------------------------------------------------------------------

class Parser {
  current_files: Record<string, string>;
  lines: Array<string>;
  index = 0;
  patch: Patch = { actions: {} };
  fuzz = 0;

  constructor(currentFiles: Record<string, string>, lines: Array<string>) {
    this.current_files = currentFiles;
    this.lines = lines;
  }

  private is_done(prefixes?: Array<string>): boolean {
    if (this.index >= this.lines.length) {
      return true;
    }
    if (
      prefixes &&
      prefixes.some((p) => this.lines[this.index]!.startsWith(p.trim()))
    ) {
      return true;
    }
    return false;
  }

  private startswith(prefix: string | Array<string>): boolean {
    const prefixes = Array.isArray(prefix) ? prefix : [prefix];
    return prefixes.some((p) => this.lines[this.index]!.startsWith(p));
  }

  private read_str(prefix = "", returnEverything = false): string {
    if (this.index >= this.lines.length) {
      throw new DiffError(`Index: ${this.index} >= ${this.lines.length}`);
    }
    if (this.lines[this.index]!.startsWith(prefix)) {
      const text = returnEverything
        ? this.lines[this.index]
        : this.lines[this.index]!.slice(prefix.length);
      this.index += 1;
      return text ?? "";
    }
    return "";
  }

  parse(): void {
    while (!this.is_done([PATCH_SUFFIX])) {
      let path = this.read_str(UPDATE_FILE_PREFIX);
      if (path) {
        if (this.patch.actions[path]) {
          throw new DiffError(`Update File Error: Duplicate Path: ${path}`);
        }
        const moveTo = this.read_str(MOVE_FILE_TO_PREFIX);
        if (!(path in this.current_files)) {
          throw new DiffError(`Update File Error: Missing File: ${path}`);
        }
        const text = this.current_files[path];
        const action = this.parse_update_file(text ?? "");
        action.move_path = moveTo || undefined;
        this.patch.actions[path] = action;
        continue;
      }
      path = this.read_str(DELETE_FILE_PREFIX);
      if (path) {
        if (this.patch.actions[path]) {
          throw new DiffError(`Delete File Error: Duplicate Path: ${path}`);
        }
        if (!(path in this.current_files)) {
          throw new DiffError(`Delete File Error: Missing File: ${path}`);
        }
        this.patch.actions[path] = { type: ActionType.DELETE, chunks: [] };
        continue;
      }
      path = this.read_str(ADD_FILE_PREFIX);
      if (path) {
        if (this.patch.actions[path]) {
          throw new DiffError(`Add File Error: Duplicate Path: ${path}`);
        }
        if (path in this.current_files) {
          throw new DiffError(`Add File Error: File already exists: ${path}`);
        }
        this.patch.actions[path] = this.parse_add_file();
        continue;
      }
      throw new DiffError(`Unknown Line: ${this.lines[this.index]}`);
    }
    if (!this.startswith(PATCH_SUFFIX.trim())) {
      throw new DiffError("Missing End Patch");
    }
    this.index += 1;
  }

  private parse_update_file(text: string): PatchAction {
    const action: PatchAction = { type: ActionType.UPDATE, chunks: [] };
    const fileLines = text.split("\n");
    let index = 0;

    while (
      !this.is_done([
        PATCH_SUFFIX,
        UPDATE_FILE_PREFIX,
        DELETE_FILE_PREFIX,
        ADD_FILE_PREFIX,
        END_OF_FILE_PREFIX,
      ])
    ) {
      const defStr = this.read_str("@@ ");
      let sectionStr = "";
      if (!defStr && this.lines[this.index] === "@@") {
        sectionStr = this.lines[this.index]!;
        this.index += 1;
      }
      if (!(defStr || sectionStr || index === 0)) {
        throw new DiffError(`Invalid Line:\n${this.lines[this.index]}`);
      }
      if (defStr.trim()) {
        let found = false;
        // ------------------------------------------------------------------
        // Equality helpers using the canonicalisation from find_context_core.
        // (We duplicate a minimal version here because the scope is local.)
        // ------------------------------------------------------------------
        const canonLocal = (s: string): string =>
          s.normalize("NFC").replace(
            /./gu,
            (c) =>
              (
                ({
                  "-": "-",
                  "\u2010": "-",
                  "\u2011": "-",
                  "\u2012": "-",
                  "\u2013": "-",
                  "\u2014": "-",
                  "\u2212": "-",
                  "\u0022": '"',
                  "\u201C": '"',
                  "\u201D": '"',
                  "\u201E": '"',
                  "\u00AB": '"',
                  "\u00BB": '"',
                  "\u0027": "'",
                  "\u2018": "'",
                  "\u2019": "'",
                  "\u201B": "'",
                  "\u00A0": " ",
                  "\u202F": " ",
                }) as Record<string, string>
              )[c] ?? c,
          );

        if (
          !fileLines
            .slice(0, index)
            .some((s) => canonLocal(s) === canonLocal(defStr))
        ) {
          for (let i = index; i < fileLines.length; i++) {
            if (canonLocal(fileLines[i]!) === canonLocal(defStr)) {
              index = i + 1;
              found = true;
              break;
            }
          }
        }
        if (
          !found &&
          !fileLines
            .slice(0, index)
            .some((s) => canonLocal(s.trim()) === canonLocal(defStr.trim()))
        ) {
          for (let i = index; i < fileLines.length; i++) {
            if (
              canonLocal(fileLines[i]!.trim()) === canonLocal(defStr.trim())
            ) {
              index = i + 1;
              this.fuzz += 1;
              found = true;
              break;
            }
          }
        }
      }

      const [nextChunkContext, chunks, endPatchIndex, eof] = peek_next_section(
        this.lines,
        this.index,
      );
      const [newIndex, fuzz] = find_context(
        fileLines,
        nextChunkContext,
        index,
        eof,
      );
      if (newIndex === -1) {
        const ctxText = nextChunkContext.join("\n");
        if (eof) {
          throw new DiffError(`Invalid EOF Context ${index}:\n${ctxText}`);
        } else {
          throw new DiffError(`Invalid Context ${index}:\n${ctxText}`);
        }
      }
      this.fuzz += fuzz;
      for (const ch of chunks) {
        ch.orig_index += newIndex;
        action.chunks.push(ch);
      }
      index = newIndex + nextChunkContext.length;
      this.index = endPatchIndex;
    }
    return action;
  }

  private parse_add_file(): PatchAction {
    const lines: Array<string> = [];
    while (
      !this.is_done([
        PATCH_SUFFIX,
        UPDATE_FILE_PREFIX,
        DELETE_FILE_PREFIX,
        ADD_FILE_PREFIX,
      ])
    ) {
      const s = this.read_str();
      if (!s.startsWith(HUNK_ADD_LINE_PREFIX)) {
        throw new DiffError(`Invalid Add File Line: ${s}`);
      }
      lines.push(s.slice(1));
    }
    return {
      type: ActionType.ADD,
      new_file: lines.join("\n"),
      chunks: [],
    };
  }
}

function find_context_core(
  lines: Array<string>,
  context: Array<string>,
  start: number,
): [number, number] {
  // ---------------------------------------------------------------------------
  // Helpers – Unicode punctuation normalisation
  // ---------------------------------------------------------------------------

  /*
   * The patch-matching algorithm originally required **exact** string equality
   * for non-whitespace characters.  That breaks when the file on disk contains
   * visually identical but different Unicode code-points (e.g. “EN DASH” vs
   * ASCII "-"), because models almost always emit the ASCII variant.  To make
   * apply_patch resilient we canonicalise a handful of common punctuation
   * look-alikes before doing comparisons.
   *
   * We purposefully keep the mapping *small* – only characters that routinely
   * appear in source files and are highly unlikely to introduce ambiguity are
   * included.  Each entry is written using the corresponding Unicode escape so
   * that the file remains ASCII-only even after transpilation.
   */

  const PUNCT_EQUIV: Record<string, string> = {
    // Hyphen / dash variants --------------------------------------------------
    /* U+002D HYPHEN-MINUS */ "-": "-",
    /* U+2010 HYPHEN */ "\u2010": "-",
    /* U+2011 NO-BREAK HYPHEN */ "\u2011": "-",
    /* U+2012 FIGURE DASH */ "\u2012": "-",
    /* U+2013 EN DASH */ "\u2013": "-",
    /* U+2014 EM DASH */ "\u2014": "-",
    /* U+2212 MINUS SIGN */ "\u2212": "-",

    // Double quotes -----------------------------------------------------------
    /* U+0022 QUOTATION MARK */ "\u0022": '"',
    /* U+201C LEFT DOUBLE QUOTATION MARK */ "\u201C": '"',
    /* U+201D RIGHT DOUBLE QUOTATION MARK */ "\u201D": '"',
    /* U+201E DOUBLE LOW-9 QUOTATION MARK */ "\u201E": '"',
    /* U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */ "\u00AB": '"',
    /* U+00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */ "\u00BB": '"',

    // Single quotes -----------------------------------------------------------
    /* U+0027 APOSTROPHE */ "\u0027": "'",
    /* U+2018 LEFT SINGLE QUOTATION MARK */ "\u2018": "'",
    /* U+2019 RIGHT SINGLE QUOTATION MARK */ "\u2019": "'",
    /* U+201B SINGLE HIGH-REVERSED-9 QUOTATION MARK */ "\u201B": "'",
    // Spaces ------------------------------------------------------------------
    /* U+00A0 NO-BREAK SPACE */ "\u00A0": " ",
    /* U+202F NARROW NO-BREAK SPACE */ "\u202F": " ",
  };

  const canon = (s: string): string =>
    s
      // Canonical Unicode composition first
      .normalize("NFC")
      // Replace punctuation look-alikes
      .replace(/./gu, (c) => PUNCT_EQUIV[c] ?? c);
  if (context.length === 0) {
    return [start, 0];
  }
  // Pass 1 – exact equality after canonicalisation ---------------------------
  const canonicalContext = canon(context.join("\n"));
  for (let i = start; i < lines.length; i++) {
    const segment = canon(lines.slice(i, i + context.length).join("\n"));
    if (segment === canonicalContext) {
      return [i, 0];
    }
  }

  // Pass 2 – ignore trailing whitespace -------------------------------------
  for (let i = start; i < lines.length; i++) {
    const segment = canon(
      lines
        .slice(i, i + context.length)
        .map((s) => s.trimEnd())
        .join("\n"),
    );
    const ctx = canon(context.map((s) => s.trimEnd()).join("\n"));
    if (segment === ctx) {
      return [i, 1];
    }
  }

  // Pass 3 – ignore all surrounding whitespace ------------------------------
  for (let i = start; i < lines.length; i++) {
    const segment = canon(
      lines
        .slice(i, i + context.length)
        .map((s) => s.trim())
        .join("\n"),
    );
    const ctx = canon(context.map((s) => s.trim()).join("\n"));
    if (segment === ctx) {
      return [i, 100];
    }
  }

  return [-1, 0];
}

function find_context(
  lines: Array<string>,
  context: Array<string>,
  start: number,
  eof: boolean,
): [number, number] {
  if (eof) {
    let [newIndex, fuzz] = find_context_core(
      lines,
      context,
      lines.length - context.length,
    );
    if (newIndex !== -1) {
      return [newIndex, fuzz];
    }
    [newIndex, fuzz] = find_context_core(lines, context, start);
    return [newIndex, fuzz + 10000];
  }
  return find_context_core(lines, context, start);
}

function peek_next_section(
  lines: Array<string>,
  initialIndex: number,
): [Array<string>, Array<Chunk>, number, boolean] {
  let index = initialIndex;
  const old: Array<string> = [];
  let delLines: Array<string> = [];
  let insLines: Array<string> = [];
  const chunks: Array<Chunk> = [];
  let mode: "keep" | "add" | "delete" = "keep";

  while (index < lines.length) {
    const s = lines[index]!;
    if (
      [
        "@@",
        PATCH_SUFFIX,
        UPDATE_FILE_PREFIX,
        DELETE_FILE_PREFIX,
        ADD_FILE_PREFIX,
        END_OF_FILE_PREFIX,
      ].some((p) => s.startsWith(p.trim()))
    ) {
      break;
    }
    if (s === "***") {
      break;
    }
    if (s.startsWith("***")) {
      throw new DiffError(`Invalid Line: ${s}`);
    }
    index += 1;
    const lastMode: "keep" | "add" | "delete" = mode;
    let line = s;
    if (line[0] === HUNK_ADD_LINE_PREFIX) {
      mode = "add";
    } else if (line[0] === "-") {
      mode = "delete";
    } else if (line[0] === " ") {
      mode = "keep";
    } else {
      // Tolerate invalid lines where the leading whitespace is missing. This is necessary as
      // the model sometimes doesn't fully adhere to the spec and returns lines without leading
      // whitespace for context lines.
      mode = "keep";
      line = " " + line;

      // TODO: Re-enable strict mode.
      // throw new DiffError(`Invalid Line: ${line}`)
    }

    line = line.slice(1);
    if (mode === "keep" && lastMode !== mode) {
      if (insLines.length || delLines.length) {
        chunks.push({
          orig_index: old.length - delLines.length,
          del_lines: delLines,
          ins_lines: insLines,
        });
      }
      delLines = [];
      insLines = [];
    }
    if (mode === "delete") {
      delLines.push(line);
      old.push(line);
    } else if (mode === "add") {
      insLines.push(line);
    } else {
      old.push(line);
    }
  }
  if (insLines.length || delLines.length) {
    chunks.push({
      orig_index: old.length - delLines.length,
      del_lines: delLines,
      ins_lines: insLines,
    });
  }
  if (index < lines.length && lines[index] === END_OF_FILE_PREFIX) {
    index += 1;
    return [old, chunks, index, true];
  }
  return [old, chunks, index, false];
}

// -----------------------------------------------------------------------------
// High‑level helpers
// -----------------------------------------------------------------------------

export function text_to_patch(
  text: string,
  orig: Record<string, string>,
): [Patch, number] {
  const lines = text.trim().split("\n");
  if (
    lines.length < 2 ||
    !(lines[0] ?? "").startsWith(PATCH_PREFIX.trim()) ||
    lines[lines.length - 1] !== PATCH_SUFFIX.trim()
  ) {
    let reason = "Invalid patch text: ";
    if (lines.length < 2) {
      reason += "Patch text must have at least two lines.";
    } else if (!(lines[0] ?? "").startsWith(PATCH_PREFIX.trim())) {
      reason += "Patch text must start with the correct patch prefix.";
    } else if (lines[lines.length - 1] !== PATCH_SUFFIX.trim()) {
      reason += "Patch text must end with the correct patch suffix.";
    }
    throw new DiffError(reason);
  }
  const parser = new Parser(orig, lines);
  parser.index = 1;
  parser.parse();
  return [parser.patch, parser.fuzz];
}

export function identify_files_needed(text: string): Array<string> {
  const lines = text.trim().split("\n");
  const result = new Set<string>();
  for (const line of lines) {
    if (line.startsWith(UPDATE_FILE_PREFIX)) {
      result.add(line.slice(UPDATE_FILE_PREFIX.length));
    }
    if (line.startsWith(DELETE_FILE_PREFIX)) {
      result.add(line.slice(DELETE_FILE_PREFIX.length));
    }
  }
  return [...result];
}

export function identify_files_added(text: string): Array<string> {
  const lines = text.trim().split("\n");
  const result = new Set<string>();
  for (const line of lines) {
    if (line.startsWith(ADD_FILE_PREFIX)) {
      result.add(line.slice(ADD_FILE_PREFIX.length));
    }
  }
  return [...result];
}

function _get_updated_file(
  text: string,
  action: PatchAction,
  path: string,
): string {
  if (action.type !== ActionType.UPDATE) {
    throw new Error("Expected UPDATE action");
  }
  const origLines = text.split("\n");
  const destLines: Array<string> = [];
  let origIndex = 0;
  for (const chunk of action.chunks) {
    if (chunk.orig_index > origLines.length) {
      throw new DiffError(
        `${path}: chunk.orig_index ${chunk.orig_index} > len(lines) ${origLines.length}`,
      );
    }
    if (origIndex > chunk.orig_index) {
      throw new DiffError(
        `${path}: orig_index ${origIndex} > chunk.orig_index ${chunk.orig_index}`,
      );
    }
    destLines.push(...origLines.slice(origIndex, chunk.orig_index));
    const delta = chunk.orig_index - origIndex;
    origIndex += delta;

    // inserted lines
    if (chunk.ins_lines.length) {
      for (const l of chunk.ins_lines) {
        destLines.push(l);
      }
    }
    origIndex += chunk.del_lines.length;
  }
  destLines.push(...origLines.slice(origIndex));
  return destLines.join("\n");
}

export function patch_to_commit(
  patch: Patch,
  orig: Record<string, string>,
): Commit {
  const commit: Commit = { changes: {} };
  for (const [pathKey, action] of Object.entries(patch.actions)) {
    if (action.type === ActionType.DELETE) {
      commit.changes[pathKey] = {
        type: ActionType.DELETE,
        old_content: orig[pathKey],
      };
    } else if (action.type === ActionType.ADD) {
      commit.changes[pathKey] = {
        type: ActionType.ADD,
        new_content: action.new_file ?? "",
      };
    } else if (action.type === ActionType.UPDATE) {
      const newContent = _get_updated_file(orig[pathKey]!, action, pathKey);
      commit.changes[pathKey] = {
        type: ActionType.UPDATE,
        old_content: orig[pathKey],
        new_content: newContent,
        move_path: action.move_path ?? undefined,
      };
    }
  }
  return commit;
}

// -----------------------------------------------------------------------------
// Filesystem helpers for Node environment
// -----------------------------------------------------------------------------

export function load_files(
  paths: Array<string>,
  openFn: (p: string) => string,
): Record<string, string> {
  const orig: Record<string, string> = {};
  for (const p of paths) {
    try {
      orig[p] = openFn(p);
    } catch {
      // Convert any file read error into a DiffError so that callers
      // consistently receive DiffError for patch-related failures.
      throw new DiffError(`File not found: ${p}`);
    }
  }
  return orig;
}

export function apply_commit(
  commit: Commit,
  writeFn: (p: string, c: string) => void,
  removeFn: (p: string) => void,
): void {
  for (const [p, change] of Object.entries(commit.changes)) {
    if (change.type === ActionType.DELETE) {
      removeFn(p);
    } else if (change.type === ActionType.ADD) {
      writeFn(p, change.new_content ?? "");
    } else if (change.type === ActionType.UPDATE) {
      if (change.move_path) {
        writeFn(change.move_path, change.new_content ?? "");
        removeFn(p);
      } else {
        writeFn(p, change.new_content ?? "");
      }
    }
  }
}

export function process_patch(
  text: string,
  openFn: (p: string) => string,
  writeFn: (p: string, c: string) => void,
  removeFn: (p: string) => void,
): string {
  if (!text.startsWith(PATCH_PREFIX)) {
    throw new DiffError("Patch must start with *** Begin Patch\\n");
  }
  const paths = identify_files_needed(text);
  const orig = load_files(paths, openFn);
  const [patch, _fuzz] = text_to_patch(text, orig);
  const commit = patch_to_commit(patch, orig);
  apply_commit(commit, writeFn, removeFn);
  return "Done!";
}

// -----------------------------------------------------------------------------
// Default filesystem implementations
// -----------------------------------------------------------------------------

function open_file(p: string): string {
  return fs.readFileSync(p, "utf8");
}

function write_file(p: string, content: string): void {
  if (path.isAbsolute(p)) {
    throw new DiffError("We do not support absolute paths.");
  }
  const parent = path.dirname(p);
  if (parent !== ".") {
    fs.mkdirSync(parent, { recursive: true });
  }
  fs.writeFileSync(p, content, "utf8");
}

function remove_file(p: string): void {
  fs.unlinkSync(p);
}

// -----------------------------------------------------------------------------
// CLI mode. Not exported, executed only if run directly.
// -----------------------------------------------------------------------------

if (import.meta.url === `file://${process.argv[1]}`) {
  let patchText = "";
  process.stdin.setEncoding("utf8");
  process.stdin.on("data", (chunk) => (patchText += chunk));
  process.stdin.on("end", () => {
    if (!patchText) {
      // eslint-disable-next-line no-console
      console.error("Please pass patch text through stdin");
      process.exit(1);
    }
    try {
      const result = process_patch(
        patchText,
        open_file,
        write_file,
        remove_file,
      );
      // eslint-disable-next-line no-console
      console.log(result);
    } catch (err: unknown) {
      // eslint-disable-next-line no-console
      console.error(err instanceof Error ? err.message : String(err));
      process.exit(1);
    }
  });
}

export const applyPatchToolInstructions = `
To edit files, ALWAYS use the \`shell\` tool with \`apply_patch\` CLI.  \`apply_patch\` effectively allows you to execute a diff/patch against a file, but the format of the diff specification is unique to this task, so pay careful attention to these instructions. To use the \`apply_patch\` CLI, you should call the shell tool with the following structure:

\`\`\`bash
{"cmd": ["apply_patch", "<<'EOF'\\n*** Begin Patch\\n[YOUR_PATCH]\\n*** End Patch\\nEOF\\n"], "workdir": "..."}
\`\`\`

Where [YOUR_PATCH] is the actual content of your patch, specified in the following V4A diff format.

*** [ACTION] File: [path/to/file] -> ACTION can be one of Add, Update, or Delete.
For each snippet of code that needs to be changed, repeat the following:
[context_before] -> See below for further instructions on context.
- [old_code] -> Precede the old code with a minus sign.
+ [new_code] -> Precede the new, replacement code with a plus sign.
[context_after] -> See below for further instructions on context.

For instructions on [context_before] and [context_after]:
- By default, show 3 lines of code immediately above and 3 lines immediately below each change. If a change is within 3 lines of a previous change, do NOT duplicate the first change’s [context_after] lines in the second change’s [context_before] lines.
- If 3 lines of context is insufficient to uniquely identify the snippet of code within the file, use the @@ operator to indicate the class or function to which the snippet belongs. For instance, we might have:
@@ class BaseClass
[3 lines of pre-context]
- [old_code]
+ [new_code]
[3 lines of post-context]

- If a code block is repeated so many times in a class or function such that even a single \`@@\` statement and 3 lines of context cannot uniquely identify the snippet of code, you can use multiple \`@@\` statements to jump to the right context. For instance:

@@ class BaseClass
@@ 	def method():
[3 lines of pre-context]
- [old_code]
+ [new_code]
[3 lines of post-context]

Note, then, that we do not use line numbers in this diff format, as the context is enough to uniquely identify code. An example of a message that you might pass as "input" to this function, in order to apply a patch, is shown below.

\`\`\`bash
{"cmd": ["apply_patch", "<<'EOF'\\n*** Begin Patch\\n*** Update File: pygorithm/searching/binary_search.py\\n@@ class BaseClass\\n@@     def search():\\n-        pass\\n+        raise NotImplementedError()\\n@@ class Subclass\\n@@     def search():\\n-        pass\\n+        raise NotImplementedError()\\n*** End Patch\\nEOF\\n"], "workdir": "..."}
\`\`\`

File references can only be relative, NEVER ABSOLUTE. After the apply_patch command is run, it will always say "Done!", regardless of whether the patch was successfully applied or not. However, you can determine if there are issue and errors by looking at any warnings or logging lines printed BEFORE the "Done!" is output.
`;
-												Add link to cookbook (#2)


											
										
										
											2025-04-16 10:15:46 -07:00
+								// Based on reference implementation from
 								// https://cookbook.openai.com/examples/gpt4-1_prompting_guide#reference-implementation-apply_patchpy
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								import fs from "fs";
 								import path from "path";
-												chore: consolidate patch prefix constants in apply‑patch.ts (#274)

This PR replaces all hard‑coded patch markers in apply‑patch.ts with the
corresponding constants (now) exported from parse‑apply‑patch.ts.

Changes
• Import PATCH_PREFIX, PATCH_SUFFIX, ADD_FILE_PREFIX,
DELETE_FILE_PREFIX, UPDATE_FILE_PREFIX, MOVE_FILE_TO_PREFIX,
END_OF_FILE_PREFIX, and HUNK_ADD_LINE_PREFIX from parse‑apply‑patch.ts.
	•	Remove duplicate string literals for patch markers in apply‑patch.ts.
• Changed is_done() to trim the input to account for the slight
difference between the variables.

Why
• DRY & Consistency: Ensures a single source of truth for patch
prefixes.
• Maintainability: Simplifies future updates to prefix values by
centralizing them.
	•	Readability: Makes the code more declarative and self‑documenting.

All tests are passing, lint and format was ran.
											
										
										
											2025-04-18 02:00:30 +02:00
+								import {
 								  ADD_FILE_PREFIX,
 								  DELETE_FILE_PREFIX,
 								  END_OF_FILE_PREFIX,
 								  MOVE_FILE_TO_PREFIX,
 								  PATCH_SUFFIX,
 								  UPDATE_FILE_PREFIX,
 								  HUNK_ADD_LINE_PREFIX,
 								  PATCH_PREFIX,
 								} from "src/parse-apply-patch";
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
 								// -----------------------------------------------------------------------------
 								// Types & Models
 								// -----------------------------------------------------------------------------
 								export enum ActionType {
 								  ADD = "add",
 								  DELETE = "delete",
 								  UPDATE = "update",
 								}
 								export interface FileChange {
 								  type: ActionType;
 								  old_content?: string | null;
 								  new_content?: string | null;
 								  move_path?: string | null;
 								}
 								export interface Commit {
 								  changes: Record<string, FileChange>;
 								}
 								export function assemble_changes(
 								  orig: Record<string, string | null>,
 								  updatedFiles: Record<string, string | null>,
 								): Commit {
 								  const commit: Commit = { changes: {} };
 								  for (const [p, newContent] of Object.entries(updatedFiles)) {
 								    const oldContent = orig[p];
 								    if (oldContent === newContent) {
 								      continue;
 								    }
 								    if (oldContent !== undefined && newContent !== undefined) {
 								      commit.changes[p] = {
 								        type: ActionType.UPDATE,
 								        old_content: oldContent,
 								        new_content: newContent,
 								      };
 								    } else if (newContent !== undefined) {
 								      commit.changes[p] = {
 								        type: ActionType.ADD,
 								        new_content: newContent,
 								      };
 								    } else if (oldContent !== undefined) {
 								      commit.changes[p] = {
 								        type: ActionType.DELETE,
 								        old_content: oldContent,
 								      };
 								    } else {
 								      throw new Error("Unexpected state in assemble_changes");
 								    }
 								  }
 								  return commit;
 								}
 								// -----------------------------------------------------------------------------
 								// Patch‑related structures
 								// -----------------------------------------------------------------------------
 								export interface Chunk {
 								  orig_index: number; // line index of the first line in the original file
 								  del_lines: Array<string>;
 								  ins_lines: Array<string>;
 								}
 								export interface PatchAction {
 								  type: ActionType;
 								  new_file?: string | null;
 								  chunks: Array<Chunk>;
 								  move_path?: string | null;
 								}
 								export interface Patch {
 								  actions: Record<string, PatchAction>;
 								}
 								export class DiffError extends Error {}
 								// -----------------------------------------------------------------------------
 								// Parser (patch text -> Patch)
 								// -----------------------------------------------------------------------------
 								class Parser {
 								  current_files: Record<string, string>;
 								  lines: Array<string>;
 								  index = 0;
 								  patch: Patch = { actions: {} };
 								  fuzz = 0;
 								  constructor(currentFiles: Record<string, string>, lines: Array<string>) {
 								    this.current_files = currentFiles;
 								    this.lines = lines;
 								  }
 								  private is_done(prefixes?: Array<string>): boolean {
 								    if (this.index >= this.lines.length) {
 								      return true;
 								    }
 								    if (
 								      prefixes &&
-												chore: consolidate patch prefix constants in apply‑patch.ts (#274)

This PR replaces all hard‑coded patch markers in apply‑patch.ts with the
corresponding constants (now) exported from parse‑apply‑patch.ts.

Changes
• Import PATCH_PREFIX, PATCH_SUFFIX, ADD_FILE_PREFIX,
DELETE_FILE_PREFIX, UPDATE_FILE_PREFIX, MOVE_FILE_TO_PREFIX,
END_OF_FILE_PREFIX, and HUNK_ADD_LINE_PREFIX from parse‑apply‑patch.ts.
	•	Remove duplicate string literals for patch markers in apply‑patch.ts.
• Changed is_done() to trim the input to account for the slight
difference between the variables.

Why
• DRY & Consistency: Ensures a single source of truth for patch
prefixes.
• Maintainability: Simplifies future updates to prefix values by
centralizing them.
	•	Readability: Makes the code more declarative and self‑documenting.

All tests are passing, lint and format was ran.
											
										
										
											2025-04-18 02:00:30 +02:00
+								      prefixes.some((p) => this.lines[this.index]!.startsWith(p.trim()))
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								    ) {
 								      return true;
 								    }
 								    return false;
 								  }
 								  private startswith(prefix: string | Array<string>): boolean {
 								    const prefixes = Array.isArray(prefix) ? prefix : [prefix];
 								    return prefixes.some((p) => this.lines[this.index]!.startsWith(p));
 								  }
 								  private read_str(prefix = "", returnEverything = false): string {
 								    if (this.index >= this.lines.length) {
 								      throw new DiffError(`Index: ${this.index} >= ${this.lines.length}`);
 								    }
 								    if (this.lines[this.index]!.startsWith(prefix)) {
 								      const text = returnEverything
 								        ? this.lines[this.index]
 								        : this.lines[this.index]!.slice(prefix.length);
 								      this.index += 1;
 								      return text ?? "";
 								    }
 								    return "";
 								  }
 								  parse(): void {
-												chore: consolidate patch prefix constants in apply‑patch.ts (#274)

This PR replaces all hard‑coded patch markers in apply‑patch.ts with the
corresponding constants (now) exported from parse‑apply‑patch.ts.

Changes
• Import PATCH_PREFIX, PATCH_SUFFIX, ADD_FILE_PREFIX,
DELETE_FILE_PREFIX, UPDATE_FILE_PREFIX, MOVE_FILE_TO_PREFIX,
END_OF_FILE_PREFIX, and HUNK_ADD_LINE_PREFIX from parse‑apply‑patch.ts.
	•	Remove duplicate string literals for patch markers in apply‑patch.ts.
• Changed is_done() to trim the input to account for the slight
difference between the variables.

Why
• DRY & Consistency: Ensures a single source of truth for patch
prefixes.
• Maintainability: Simplifies future updates to prefix values by
centralizing them.
	•	Readability: Makes the code more declarative and self‑documenting.

All tests are passing, lint and format was ran.
											
										
										
											2025-04-18 02:00:30 +02:00
+								    while (!this.is_done([PATCH_SUFFIX])) {
 								      let path = this.read_str(UPDATE_FILE_PREFIX);
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								      if (path) {
 								        if (this.patch.actions[path]) {
 								          throw new DiffError(`Update File Error: Duplicate Path: ${path}`);
 								        }
-												chore: consolidate patch prefix constants in apply‑patch.ts (#274)

This PR replaces all hard‑coded patch markers in apply‑patch.ts with the
corresponding constants (now) exported from parse‑apply‑patch.ts.

Changes
• Import PATCH_PREFIX, PATCH_SUFFIX, ADD_FILE_PREFIX,
DELETE_FILE_PREFIX, UPDATE_FILE_PREFIX, MOVE_FILE_TO_PREFIX,
END_OF_FILE_PREFIX, and HUNK_ADD_LINE_PREFIX from parse‑apply‑patch.ts.
	•	Remove duplicate string literals for patch markers in apply‑patch.ts.
• Changed is_done() to trim the input to account for the slight
difference between the variables.

Why
• DRY & Consistency: Ensures a single source of truth for patch
prefixes.
• Maintainability: Simplifies future updates to prefix values by
centralizing them.
	•	Readability: Makes the code more declarative and self‑documenting.

All tests are passing, lint and format was ran.
											
										
										
											2025-04-18 02:00:30 +02:00
+								        const moveTo = this.read_str(MOVE_FILE_TO_PREFIX);
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								        if (!(path in this.current_files)) {
 								          throw new DiffError(`Update File Error: Missing File: ${path}`);
 								        }
 								        const text = this.current_files[path];
 								        const action = this.parse_update_file(text ?? "");
 								        action.move_path = moveTo || undefined;
 								        this.patch.actions[path] = action;
 								        continue;
 								      }
-												chore: consolidate patch prefix constants in apply‑patch.ts (#274)

This PR replaces all hard‑coded patch markers in apply‑patch.ts with the
corresponding constants (now) exported from parse‑apply‑patch.ts.

Changes
• Import PATCH_PREFIX, PATCH_SUFFIX, ADD_FILE_PREFIX,
DELETE_FILE_PREFIX, UPDATE_FILE_PREFIX, MOVE_FILE_TO_PREFIX,
END_OF_FILE_PREFIX, and HUNK_ADD_LINE_PREFIX from parse‑apply‑patch.ts.
	•	Remove duplicate string literals for patch markers in apply‑patch.ts.
• Changed is_done() to trim the input to account for the slight
difference between the variables.

Why
• DRY & Consistency: Ensures a single source of truth for patch
prefixes.
• Maintainability: Simplifies future updates to prefix values by
centralizing them.
	•	Readability: Makes the code more declarative and self‑documenting.

All tests are passing, lint and format was ran.
											
										
										
											2025-04-18 02:00:30 +02:00
+								      path = this.read_str(DELETE_FILE_PREFIX);
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								      if (path) {
 								        if (this.patch.actions[path]) {
 								          throw new DiffError(`Delete File Error: Duplicate Path: ${path}`);
 								        }
 								        if (!(path in this.current_files)) {
 								          throw new DiffError(`Delete File Error: Missing File: ${path}`);
 								        }
 								        this.patch.actions[path] = { type: ActionType.DELETE, chunks: [] };
 								        continue;
 								      }
-												chore: consolidate patch prefix constants in apply‑patch.ts (#274)

This PR replaces all hard‑coded patch markers in apply‑patch.ts with the
corresponding constants (now) exported from parse‑apply‑patch.ts.

Changes
• Import PATCH_PREFIX, PATCH_SUFFIX, ADD_FILE_PREFIX,
DELETE_FILE_PREFIX, UPDATE_FILE_PREFIX, MOVE_FILE_TO_PREFIX,
END_OF_FILE_PREFIX, and HUNK_ADD_LINE_PREFIX from parse‑apply‑patch.ts.
	•	Remove duplicate string literals for patch markers in apply‑patch.ts.
• Changed is_done() to trim the input to account for the slight
difference between the variables.

Why
• DRY & Consistency: Ensures a single source of truth for patch
prefixes.
• Maintainability: Simplifies future updates to prefix values by
centralizing them.
	•	Readability: Makes the code more declarative and self‑documenting.

All tests are passing, lint and format was ran.
											
										
										
											2025-04-18 02:00:30 +02:00
+								      path = this.read_str(ADD_FILE_PREFIX);
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								      if (path) {
 								        if (this.patch.actions[path]) {
 								          throw new DiffError(`Add File Error: Duplicate Path: ${path}`);
 								        }
 								        if (path in this.current_files) {
 								          throw new DiffError(`Add File Error: File already exists: ${path}`);
 								        }
 								        this.patch.actions[path] = this.parse_add_file();
 								        continue;
 								      }
 								      throw new DiffError(`Unknown Line: ${this.lines[this.index]}`);
 								    }
-												chore: consolidate patch prefix constants in apply‑patch.ts (#274)

This PR replaces all hard‑coded patch markers in apply‑patch.ts with the
corresponding constants (now) exported from parse‑apply‑patch.ts.

Changes
• Import PATCH_PREFIX, PATCH_SUFFIX, ADD_FILE_PREFIX,
DELETE_FILE_PREFIX, UPDATE_FILE_PREFIX, MOVE_FILE_TO_PREFIX,
END_OF_FILE_PREFIX, and HUNK_ADD_LINE_PREFIX from parse‑apply‑patch.ts.
	•	Remove duplicate string literals for patch markers in apply‑patch.ts.
• Changed is_done() to trim the input to account for the slight
difference between the variables.

Why
• DRY & Consistency: Ensures a single source of truth for patch
prefixes.
• Maintainability: Simplifies future updates to prefix values by
centralizing them.
	•	Readability: Makes the code more declarative and self‑documenting.

All tests are passing, lint and format was ran.
											
										
										
											2025-04-18 02:00:30 +02:00
+								    if (!this.startswith(PATCH_SUFFIX.trim())) {
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								      throw new DiffError("Missing End Patch");
 								    }
 								    this.index += 1;
 								  }
 								  private parse_update_file(text: string): PatchAction {
 								    const action: PatchAction = { type: ActionType.UPDATE, chunks: [] };
 								    const fileLines = text.split("\n");
 								    let index = 0;
 								    while (
 								      !this.is_done([
-												chore: consolidate patch prefix constants in apply‑patch.ts (#274)

This PR replaces all hard‑coded patch markers in apply‑patch.ts with the
corresponding constants (now) exported from parse‑apply‑patch.ts.

Changes
• Import PATCH_PREFIX, PATCH_SUFFIX, ADD_FILE_PREFIX,
DELETE_FILE_PREFIX, UPDATE_FILE_PREFIX, MOVE_FILE_TO_PREFIX,
END_OF_FILE_PREFIX, and HUNK_ADD_LINE_PREFIX from parse‑apply‑patch.ts.
	•	Remove duplicate string literals for patch markers in apply‑patch.ts.
• Changed is_done() to trim the input to account for the slight
difference between the variables.

Why
• DRY & Consistency: Ensures a single source of truth for patch
prefixes.
• Maintainability: Simplifies future updates to prefix values by
centralizing them.
	•	Readability: Makes the code more declarative and self‑documenting.

All tests are passing, lint and format was ran.
											
										
										
											2025-04-18 02:00:30 +02:00
+								        PATCH_SUFFIX,
 								        UPDATE_FILE_PREFIX,
 								        DELETE_FILE_PREFIX,
 								        ADD_FILE_PREFIX,
 								        END_OF_FILE_PREFIX,
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								      ])
 								    ) {
 								      const defStr = this.read_str("@@ ");
 								      let sectionStr = "";
 								      if (!defStr && this.lines[this.index] === "@@") {
 								        sectionStr = this.lines[this.index]!;
 								        this.index += 1;
 								      }
 								      if (!(defStr || sectionStr || index === 0)) {
 								        throw new DiffError(`Invalid Line:\n${this.lines[this.index]}`);
 								      }
 								      if (defStr.trim()) {
 								        let found = false;
-												feat: more loosely match context for apply_patch (#610)

More of a proposal than anything but models seem to struggle with
composing valid patches for `apply_patch` for context matching when
there are unicode look-a-likes involved. This would normalize them.

```
top-level          # ASCII
top-level          # U+2011 NON-BREAKING HYPHEN
top–level          # U+2013 EN DASH
top—level          # U+2014 EM DASH
top‒level          # U+2012 FIGURE DASH
```

thanks unicode.
											
										
										
											2025-04-24 09:05:19 -07:00
+								        // ------------------------------------------------------------------
 								        // Equality helpers using the canonicalisation from find_context_core.
 								        // (We duplicate a minimal version here because the scope is local.)
 								        // ------------------------------------------------------------------
 								        const canonLocal = (s: string): string =>
 								          s.normalize("NFC").replace(
 								            /./gu,
 								            (c) =>
-												chore: upgrade prettier to v3 (#644)

## Description

This PR addresses the following improvements:

**Unify Prettier Version**: Currently, the Prettier version used in
`/package.json` and `/codex-cli/package.json` are different. In this PR,
we're updating both to use Prettier v3.

- Prettier v3 introduces improved support for JavaScript and TypeScript.
(e.g. the formatting scenario shown in the image below. This is more
aligned with the TypeScript indentation standard).

<img width="1126" alt="image"
src="https://github.com/user-attachments/assets/6e237eb8-4553-4574-b336-ed9561c55370"
/>

**Add Prettier Auto-Formatting in lint-staged**: We've added a step to
automatically run prettier --write on JavaScript and TypeScript files as
part of the lint-staged process, before the ESLint checks.

- This will help ensure that all committed code is properly formatted
according to the project's Prettier configuration.
											
										
										
											2025-04-25 22:21:50 +08:00
+								              (
 								                ({
-												feat: more loosely match context for apply_patch (#610)

More of a proposal than anything but models seem to struggle with
composing valid patches for `apply_patch` for context matching when
there are unicode look-a-likes involved. This would normalize them.

```
top-level          # ASCII
top-level          # U+2011 NON-BREAKING HYPHEN
top–level          # U+2013 EN DASH
top—level          # U+2014 EM DASH
top‒level          # U+2012 FIGURE DASH
```

thanks unicode.
											
										
										
											2025-04-24 09:05:19 -07:00
+								                  "-": "-",
 								                  "\u2010": "-",
 								                  "\u2011": "-",
 								                  "\u2012": "-",
 								                  "\u2013": "-",
 								                  "\u2014": "-",
 								                  "\u2212": "-",
 								                  "\u0022": '"',
 								                  "\u201C": '"',
 								                  "\u201D": '"',
 								                  "\u201E": '"',
 								                  "\u00AB": '"',
 								                  "\u00BB": '"',
 								                  "\u0027": "'",
 								                  "\u2018": "'",
 								                  "\u2019": "'",
 								                  "\u201B": "'",
-												fix: `apply_patch` unicode characters (#625)

fuzzy-er matching for apply_patch to handle u00A0 and u202F spaces.
											
										
										
											2025-04-24 13:04:37 -07:00
+								                  "\u00A0": " ",
 								                  "\u202F": " ",
-												chore: upgrade prettier to v3 (#644)

## Description

This PR addresses the following improvements:

**Unify Prettier Version**: Currently, the Prettier version used in
`/package.json` and `/codex-cli/package.json` are different. In this PR,
we're updating both to use Prettier v3.

- Prettier v3 introduces improved support for JavaScript and TypeScript.
(e.g. the formatting scenario shown in the image below. This is more
aligned with the TypeScript indentation standard).

<img width="1126" alt="image"
src="https://github.com/user-attachments/assets/6e237eb8-4553-4574-b336-ed9561c55370"
/>

**Add Prettier Auto-Formatting in lint-staged**: We've added a step to
automatically run prettier --write on JavaScript and TypeScript files as
part of the lint-staged process, before the ESLint checks.

- This will help ensure that all committed code is properly formatted
according to the project's Prettier configuration.
											
										
										
											2025-04-25 22:21:50 +08:00
+								                }) as Record<string, string>
 								              )[c] ?? c,
-												feat: more loosely match context for apply_patch (#610)

More of a proposal than anything but models seem to struggle with
composing valid patches for `apply_patch` for context matching when
there are unicode look-a-likes involved. This would normalize them.

```
top-level          # ASCII
top-level          # U+2011 NON-BREAKING HYPHEN
top–level          # U+2013 EN DASH
top—level          # U+2014 EM DASH
top‒level          # U+2012 FIGURE DASH
```

thanks unicode.
											
										
										
											2025-04-24 09:05:19 -07:00
+								          );
 								        if (
 								          !fileLines
 								            .slice(0, index)
 								            .some((s) => canonLocal(s) === canonLocal(defStr))
 								        ) {
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								          for (let i = index; i < fileLines.length; i++) {
-												feat: more loosely match context for apply_patch (#610)

More of a proposal than anything but models seem to struggle with
composing valid patches for `apply_patch` for context matching when
there are unicode look-a-likes involved. This would normalize them.

```
top-level          # ASCII
top-level          # U+2011 NON-BREAKING HYPHEN
top–level          # U+2013 EN DASH
top—level          # U+2014 EM DASH
top‒level          # U+2012 FIGURE DASH
```

thanks unicode.
											
										
										
											2025-04-24 09:05:19 -07:00
+								            if (canonLocal(fileLines[i]!) === canonLocal(defStr)) {
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								              index = i + 1;
 								              found = true;
 								              break;
 								            }
 								          }
 								        }
 								        if (
 								          !found &&
-												feat: more loosely match context for apply_patch (#610)

More of a proposal than anything but models seem to struggle with
composing valid patches for `apply_patch` for context matching when
there are unicode look-a-likes involved. This would normalize them.

```
top-level          # ASCII
top-level          # U+2011 NON-BREAKING HYPHEN
top–level          # U+2013 EN DASH
top—level          # U+2014 EM DASH
top‒level          # U+2012 FIGURE DASH
```

thanks unicode.
											
										
										
											2025-04-24 09:05:19 -07:00
+								          !fileLines
 								            .slice(0, index)
 								            .some((s) => canonLocal(s.trim()) === canonLocal(defStr.trim()))
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								        ) {
 								          for (let i = index; i < fileLines.length; i++) {
-												feat: more loosely match context for apply_patch (#610)

More of a proposal than anything but models seem to struggle with
composing valid patches for `apply_patch` for context matching when
there are unicode look-a-likes involved. This would normalize them.

```
top-level          # ASCII
top-level          # U+2011 NON-BREAKING HYPHEN
top–level          # U+2013 EN DASH
top—level          # U+2014 EM DASH
top‒level          # U+2012 FIGURE DASH
```

thanks unicode.
											
										
										
											2025-04-24 09:05:19 -07:00
+								            if (
 								              canonLocal(fileLines[i]!.trim()) === canonLocal(defStr.trim())
 								            ) {
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								              index = i + 1;
 								              this.fuzz += 1;
 								              found = true;
 								              break;
 								            }
 								          }
 								        }
 								      }
 								      const [nextChunkContext, chunks, endPatchIndex, eof] = peek_next_section(
 								        this.lines,
 								        this.index,
 								      );
 								      const [newIndex, fuzz] = find_context(
 								        fileLines,
 								        nextChunkContext,
 								        index,
 								        eof,
 								      );
 								      if (newIndex === -1) {
 								        const ctxText = nextChunkContext.join("\n");
 								        if (eof) {
 								          throw new DiffError(`Invalid EOF Context ${index}:\n${ctxText}`);
 								        } else {
 								          throw new DiffError(`Invalid Context ${index}:\n${ctxText}`);
 								        }
 								      }
 								      this.fuzz += fuzz;
 								      for (const ch of chunks) {
 								        ch.orig_index += newIndex;
 								        action.chunks.push(ch);
 								      }
 								      index = newIndex + nextChunkContext.length;
 								      this.index = endPatchIndex;
 								    }
 								    return action;
 								  }
 								  private parse_add_file(): PatchAction {
 								    const lines: Array<string> = [];
 								    while (
 								      !this.is_done([
-												chore: consolidate patch prefix constants in apply‑patch.ts (#274)

This PR replaces all hard‑coded patch markers in apply‑patch.ts with the
corresponding constants (now) exported from parse‑apply‑patch.ts.

Changes
• Import PATCH_PREFIX, PATCH_SUFFIX, ADD_FILE_PREFIX,
DELETE_FILE_PREFIX, UPDATE_FILE_PREFIX, MOVE_FILE_TO_PREFIX,
END_OF_FILE_PREFIX, and HUNK_ADD_LINE_PREFIX from parse‑apply‑patch.ts.
	•	Remove duplicate string literals for patch markers in apply‑patch.ts.
• Changed is_done() to trim the input to account for the slight
difference between the variables.

Why
• DRY & Consistency: Ensures a single source of truth for patch
prefixes.
• Maintainability: Simplifies future updates to prefix values by
centralizing them.
	•	Readability: Makes the code more declarative and self‑documenting.

All tests are passing, lint and format was ran.
											
										
										
											2025-04-18 02:00:30 +02:00
+								        PATCH_SUFFIX,
 								        UPDATE_FILE_PREFIX,
 								        DELETE_FILE_PREFIX,
 								        ADD_FILE_PREFIX,
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								      ])
 								    ) {
 								      const s = this.read_str();
-												chore: consolidate patch prefix constants in apply‑patch.ts (#274)

This PR replaces all hard‑coded patch markers in apply‑patch.ts with the
corresponding constants (now) exported from parse‑apply‑patch.ts.

Changes
• Import PATCH_PREFIX, PATCH_SUFFIX, ADD_FILE_PREFIX,
DELETE_FILE_PREFIX, UPDATE_FILE_PREFIX, MOVE_FILE_TO_PREFIX,
END_OF_FILE_PREFIX, and HUNK_ADD_LINE_PREFIX from parse‑apply‑patch.ts.
	•	Remove duplicate string literals for patch markers in apply‑patch.ts.
• Changed is_done() to trim the input to account for the slight
difference between the variables.

Why
• DRY & Consistency: Ensures a single source of truth for patch
prefixes.
• Maintainability: Simplifies future updates to prefix values by
centralizing them.
	•	Readability: Makes the code more declarative and self‑documenting.

All tests are passing, lint and format was ran.
											
										
										
											2025-04-18 02:00:30 +02:00
+								      if (!s.startsWith(HUNK_ADD_LINE_PREFIX)) {
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								        throw new DiffError(`Invalid Add File Line: ${s}`);
 								      }
 								      lines.push(s.slice(1));
 								    }
 								    return {
 								      type: ActionType.ADD,
 								      new_file: lines.join("\n"),
 								      chunks: [],
 								    };
 								  }
 								}
 								function find_context_core(
 								  lines: Array<string>,
 								  context: Array<string>,
 								  start: number,
 								): [number, number] {
-												feat: more loosely match context for apply_patch (#610)

More of a proposal than anything but models seem to struggle with
composing valid patches for `apply_patch` for context matching when
there are unicode look-a-likes involved. This would normalize them.

```
top-level          # ASCII
top-level          # U+2011 NON-BREAKING HYPHEN
top–level          # U+2013 EN DASH
top—level          # U+2014 EM DASH
top‒level          # U+2012 FIGURE DASH
```

thanks unicode.
											
										
										
											2025-04-24 09:05:19 -07:00
+								  // ---------------------------------------------------------------------------
 								  // Helpers – Unicode punctuation normalisation
 								  // ---------------------------------------------------------------------------
 								  /*
 								   * The patch-matching algorithm originally required **exact** string equality
 								   * for non-whitespace characters.  That breaks when the file on disk contains
 								   * visually identical but different Unicode code-points (e.g. “EN DASH” vs
 								   * ASCII "-"), because models almost always emit the ASCII variant.  To make
 								   * apply_patch resilient we canonicalise a handful of common punctuation
 								   * look-alikes before doing comparisons.
 								   *
 								   * We purposefully keep the mapping *small* – only characters that routinely
 								   * appear in source files and are highly unlikely to introduce ambiguity are
 								   * included.  Each entry is written using the corresponding Unicode escape so
 								   * that the file remains ASCII-only even after transpilation.
 								   */
 								  const PUNCT_EQUIV: Record<string, string> = {
 								    // Hyphen / dash variants --------------------------------------------------
 								    /* U+002D HYPHEN-MINUS */ "-": "-",
 								    /* U+2010 HYPHEN */ "\u2010": "-",
 								    /* U+2011 NO-BREAK HYPHEN */ "\u2011": "-",
 								    /* U+2012 FIGURE DASH */ "\u2012": "-",
 								    /* U+2013 EN DASH */ "\u2013": "-",
 								    /* U+2014 EM DASH */ "\u2014": "-",
 								    /* U+2212 MINUS SIGN */ "\u2212": "-",
 								    // Double quotes -----------------------------------------------------------
 								    /* U+0022 QUOTATION MARK */ "\u0022": '"',
 								    /* U+201C LEFT DOUBLE QUOTATION MARK */ "\u201C": '"',
 								    /* U+201D RIGHT DOUBLE QUOTATION MARK */ "\u201D": '"',
 								    /* U+201E DOUBLE LOW-9 QUOTATION MARK */ "\u201E": '"',
 								    /* U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */ "\u00AB": '"',
 								    /* U+00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */ "\u00BB": '"',
 								    // Single quotes -----------------------------------------------------------
 								    /* U+0027 APOSTROPHE */ "\u0027": "'",
 								    /* U+2018 LEFT SINGLE QUOTATION MARK */ "\u2018": "'",
 								    /* U+2019 RIGHT SINGLE QUOTATION MARK */ "\u2019": "'",
 								    /* U+201B SINGLE HIGH-REVERSED-9 QUOTATION MARK */ "\u201B": "'",
-												fix: `apply_patch` unicode characters (#625)

fuzzy-er matching for apply_patch to handle u00A0 and u202F spaces.
											
										
										
											2025-04-24 13:04:37 -07:00
+								    // Spaces ------------------------------------------------------------------
 								    /* U+00A0 NO-BREAK SPACE */ "\u00A0": " ",
 								    /* U+202F NARROW NO-BREAK SPACE */ "\u202F": " ",
-												feat: more loosely match context for apply_patch (#610)

More of a proposal than anything but models seem to struggle with
composing valid patches for `apply_patch` for context matching when
there are unicode look-a-likes involved. This would normalize them.

```
top-level          # ASCII
top-level          # U+2011 NON-BREAKING HYPHEN
top–level          # U+2013 EN DASH
top—level          # U+2014 EM DASH
top‒level          # U+2012 FIGURE DASH
```

thanks unicode.
											
										
										
											2025-04-24 09:05:19 -07:00
+								  };
 								  const canon = (s: string): string =>
 								    s
 								      // Canonical Unicode composition first
 								      .normalize("NFC")
 								      // Replace punctuation look-alikes
 								      .replace(/./gu, (c) => PUNCT_EQUIV[c] ?? c);
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								  if (context.length === 0) {
 								    return [start, 0];
 								  }
-												feat: more loosely match context for apply_patch (#610)

More of a proposal than anything but models seem to struggle with
composing valid patches for `apply_patch` for context matching when
there are unicode look-a-likes involved. This would normalize them.

```
top-level          # ASCII
top-level          # U+2011 NON-BREAKING HYPHEN
top–level          # U+2013 EN DASH
top—level          # U+2014 EM DASH
top‒level          # U+2012 FIGURE DASH
```

thanks unicode.
											
										
										
											2025-04-24 09:05:19 -07:00
+								  // Pass 1 – exact equality after canonicalisation ---------------------------
-												fix: nits in apply patch (#640)

## Description

Fix a nit in `apply patch`, potentially improving performance slightly.
											
										
										
											2025-04-25 22:27:48 +08:00
+								  const canonicalContext = canon(context.join("\n"));
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								  for (let i = start; i < lines.length; i++) {
-												feat: more loosely match context for apply_patch (#610)

More of a proposal than anything but models seem to struggle with
composing valid patches for `apply_patch` for context matching when
there are unicode look-a-likes involved. This would normalize them.

```
top-level          # ASCII
top-level          # U+2011 NON-BREAKING HYPHEN
top–level          # U+2013 EN DASH
top—level          # U+2014 EM DASH
top‒level          # U+2012 FIGURE DASH
```

thanks unicode.
											
										
										
											2025-04-24 09:05:19 -07:00
+								    const segment = canon(lines.slice(i, i + context.length).join("\n"));
-												fix: nits in apply patch (#640)

## Description

Fix a nit in `apply patch`, potentially improving performance slightly.
											
										
										
											2025-04-25 22:27:48 +08:00
+								    if (segment === canonicalContext) {
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								      return [i, 0];
 								    }
 								  }
-												feat: more loosely match context for apply_patch (#610)

More of a proposal than anything but models seem to struggle with
composing valid patches for `apply_patch` for context matching when
there are unicode look-a-likes involved. This would normalize them.

```
top-level          # ASCII
top-level          # U+2011 NON-BREAKING HYPHEN
top–level          # U+2013 EN DASH
top—level          # U+2014 EM DASH
top‒level          # U+2012 FIGURE DASH
```

thanks unicode.
											
										
										
											2025-04-24 09:05:19 -07:00
 								  // Pass 2 – ignore trailing whitespace -------------------------------------
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								  for (let i = start; i < lines.length; i++) {
-												feat: more loosely match context for apply_patch (#610)

More of a proposal than anything but models seem to struggle with
composing valid patches for `apply_patch` for context matching when
there are unicode look-a-likes involved. This would normalize them.

```
top-level          # ASCII
top-level          # U+2011 NON-BREAKING HYPHEN
top–level          # U+2013 EN DASH
top—level          # U+2014 EM DASH
top‒level          # U+2012 FIGURE DASH
```

thanks unicode.
											
										
										
											2025-04-24 09:05:19 -07:00
+								    const segment = canon(
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								      lines
 								        .slice(i, i + context.length)
 								        .map((s) => s.trimEnd())
-												feat: more loosely match context for apply_patch (#610)

More of a proposal than anything but models seem to struggle with
composing valid patches for `apply_patch` for context matching when
there are unicode look-a-likes involved. This would normalize them.

```
top-level          # ASCII
top-level          # U+2011 NON-BREAKING HYPHEN
top–level          # U+2013 EN DASH
top—level          # U+2014 EM DASH
top‒level          # U+2012 FIGURE DASH
```

thanks unicode.
											
										
										
											2025-04-24 09:05:19 -07:00
+								        .join("\n"),
 								    );
 								    const ctx = canon(context.map((s) => s.trimEnd()).join("\n"));
 								    if (segment === ctx) {
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								      return [i, 1];
 								    }
 								  }
-												feat: more loosely match context for apply_patch (#610)

More of a proposal than anything but models seem to struggle with
composing valid patches for `apply_patch` for context matching when
there are unicode look-a-likes involved. This would normalize them.

```
top-level          # ASCII
top-level          # U+2011 NON-BREAKING HYPHEN
top–level          # U+2013 EN DASH
top—level          # U+2014 EM DASH
top‒level          # U+2012 FIGURE DASH
```

thanks unicode.
											
										
										
											2025-04-24 09:05:19 -07:00
 								  // Pass 3 – ignore all surrounding whitespace ------------------------------
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								  for (let i = start; i < lines.length; i++) {
-												feat: more loosely match context for apply_patch (#610)

More of a proposal than anything but models seem to struggle with
composing valid patches for `apply_patch` for context matching when
there are unicode look-a-likes involved. This would normalize them.

```
top-level          # ASCII
top-level          # U+2011 NON-BREAKING HYPHEN
top–level          # U+2013 EN DASH
top—level          # U+2014 EM DASH
top‒level          # U+2012 FIGURE DASH
```

thanks unicode.
											
										
										
											2025-04-24 09:05:19 -07:00
+								    const segment = canon(
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								      lines
 								        .slice(i, i + context.length)
 								        .map((s) => s.trim())
-												feat: more loosely match context for apply_patch (#610)

More of a proposal than anything but models seem to struggle with
composing valid patches for `apply_patch` for context matching when
there are unicode look-a-likes involved. This would normalize them.

```
top-level          # ASCII
top-level          # U+2011 NON-BREAKING HYPHEN
top–level          # U+2013 EN DASH
top—level          # U+2014 EM DASH
top‒level          # U+2012 FIGURE DASH
```

thanks unicode.
											
										
										
											2025-04-24 09:05:19 -07:00
+								        .join("\n"),
 								    );
 								    const ctx = canon(context.map((s) => s.trim()).join("\n"));
 								    if (segment === ctx) {
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								      return [i, 100];
 								    }
 								  }
-												feat: more loosely match context for apply_patch (#610)

More of a proposal than anything but models seem to struggle with
composing valid patches for `apply_patch` for context matching when
there are unicode look-a-likes involved. This would normalize them.

```
top-level          # ASCII
top-level          # U+2011 NON-BREAKING HYPHEN
top–level          # U+2013 EN DASH
top—level          # U+2014 EM DASH
top‒level          # U+2012 FIGURE DASH
```

thanks unicode.
											
										
										
											2025-04-24 09:05:19 -07:00
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								  return [-1, 0];
 								}
 								function find_context(
 								  lines: Array<string>,
 								  context: Array<string>,
 								  start: number,
 								  eof: boolean,
 								): [number, number] {
 								  if (eof) {
 								    let [newIndex, fuzz] = find_context_core(
 								      lines,
 								      context,
 								      lines.length - context.length,
 								    );
 								    if (newIndex !== -1) {
 								      return [newIndex, fuzz];
 								    }
 								    [newIndex, fuzz] = find_context_core(lines, context, start);
 								    return [newIndex, fuzz + 10000];
 								  }
 								  return find_context_core(lines, context, start);
 								}
 								function peek_next_section(
 								  lines: Array<string>,
 								  initialIndex: number,
 								): [Array<string>, Array<Chunk>, number, boolean] {
 								  let index = initialIndex;
 								  const old: Array<string> = [];
 								  let delLines: Array<string> = [];
 								  let insLines: Array<string> = [];
 								  const chunks: Array<Chunk> = [];
 								  let mode: "keep" | "add" | "delete" = "keep";
 								  while (index < lines.length) {
 								    const s = lines[index]!;
 								    if (
-												chore: consolidate patch prefix constants in apply‑patch.ts (#274)

This PR replaces all hard‑coded patch markers in apply‑patch.ts with the
corresponding constants (now) exported from parse‑apply‑patch.ts.

Changes
• Import PATCH_PREFIX, PATCH_SUFFIX, ADD_FILE_PREFIX,
DELETE_FILE_PREFIX, UPDATE_FILE_PREFIX, MOVE_FILE_TO_PREFIX,
END_OF_FILE_PREFIX, and HUNK_ADD_LINE_PREFIX from parse‑apply‑patch.ts.
	•	Remove duplicate string literals for patch markers in apply‑patch.ts.
• Changed is_done() to trim the input to account for the slight
difference between the variables.

Why
• DRY & Consistency: Ensures a single source of truth for patch
prefixes.
• Maintainability: Simplifies future updates to prefix values by
centralizing them.
	•	Readability: Makes the code more declarative and self‑documenting.

All tests are passing, lint and format was ran.
											
										
										
											2025-04-18 02:00:30 +02:00
+								      [
 								        "@@",
 								        PATCH_SUFFIX,
 								        UPDATE_FILE_PREFIX,
 								        DELETE_FILE_PREFIX,
 								        ADD_FILE_PREFIX,
 								        END_OF_FILE_PREFIX,
 								      ].some((p) => s.startsWith(p.trim()))
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								    ) {
 								      break;
 								    }
 								    if (s === "***") {
 								      break;
 								    }
 								    if (s.startsWith("***")) {
 								      throw new DiffError(`Invalid Line: ${s}`);
 								    }
 								    index += 1;
 								    const lastMode: "keep" | "add" | "delete" = mode;
 								    let line = s;
-												chore: consolidate patch prefix constants in apply‑patch.ts (#274)

This PR replaces all hard‑coded patch markers in apply‑patch.ts with the
corresponding constants (now) exported from parse‑apply‑patch.ts.

Changes
• Import PATCH_PREFIX, PATCH_SUFFIX, ADD_FILE_PREFIX,
DELETE_FILE_PREFIX, UPDATE_FILE_PREFIX, MOVE_FILE_TO_PREFIX,
END_OF_FILE_PREFIX, and HUNK_ADD_LINE_PREFIX from parse‑apply‑patch.ts.
	•	Remove duplicate string literals for patch markers in apply‑patch.ts.
• Changed is_done() to trim the input to account for the slight
difference between the variables.

Why
• DRY & Consistency: Ensures a single source of truth for patch
prefixes.
• Maintainability: Simplifies future updates to prefix values by
centralizing them.
	•	Readability: Makes the code more declarative and self‑documenting.

All tests are passing, lint and format was ran.
											
										
										
											2025-04-18 02:00:30 +02:00
+								    if (line[0] === HUNK_ADD_LINE_PREFIX) {
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								      mode = "add";
 								    } else if (line[0] === "-") {
 								      mode = "delete";
 								    } else if (line[0] === " ") {
 								      mode = "keep";
 								    } else {
 								      // Tolerate invalid lines where the leading whitespace is missing. This is necessary as
 								      // the model sometimes doesn't fully adhere to the spec and returns lines without leading
 								      // whitespace for context lines.
 								      mode = "keep";
 								      line = " " + line;
 								      // TODO: Re-enable strict mode.
 								      // throw new DiffError(`Invalid Line: ${line}`)
 								    }
 								    line = line.slice(1);
 								    if (mode === "keep" && lastMode !== mode) {
 								      if (insLines.length || delLines.length) {
 								        chunks.push({
 								          orig_index: old.length - delLines.length,
 								          del_lines: delLines,
 								          ins_lines: insLines,
 								        });
 								      }
 								      delLines = [];
 								      insLines = [];
 								    }
 								    if (mode === "delete") {
 								      delLines.push(line);
 								      old.push(line);
 								    } else if (mode === "add") {
 								      insLines.push(line);
 								    } else {
 								      old.push(line);
 								    }
 								  }
 								  if (insLines.length || delLines.length) {
 								    chunks.push({
 								      orig_index: old.length - delLines.length,
 								      del_lines: delLines,
 								      ins_lines: insLines,
 								    });
 								  }
-												chore: consolidate patch prefix constants in apply‑patch.ts (#274)

This PR replaces all hard‑coded patch markers in apply‑patch.ts with the
corresponding constants (now) exported from parse‑apply‑patch.ts.

Changes
• Import PATCH_PREFIX, PATCH_SUFFIX, ADD_FILE_PREFIX,
DELETE_FILE_PREFIX, UPDATE_FILE_PREFIX, MOVE_FILE_TO_PREFIX,
END_OF_FILE_PREFIX, and HUNK_ADD_LINE_PREFIX from parse‑apply‑patch.ts.
	•	Remove duplicate string literals for patch markers in apply‑patch.ts.
• Changed is_done() to trim the input to account for the slight
difference between the variables.

Why
• DRY & Consistency: Ensures a single source of truth for patch
prefixes.
• Maintainability: Simplifies future updates to prefix values by
centralizing them.
	•	Readability: Makes the code more declarative and self‑documenting.

All tests are passing, lint and format was ran.
											
										
										
											2025-04-18 02:00:30 +02:00
+								  if (index < lines.length && lines[index] === END_OF_FILE_PREFIX) {
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								    index += 1;
 								    return [old, chunks, index, true];
 								  }
 								  return [old, chunks, index, false];
 								}
 								// -----------------------------------------------------------------------------
 								// High‑level helpers
 								// -----------------------------------------------------------------------------
 								export function text_to_patch(
 								  text: string,
 								  orig: Record<string, string>,
 								): [Patch, number] {
 								  const lines = text.trim().split("\n");
 								  if (
 								    lines.length < 2 ||
-												chore: consolidate patch prefix constants in apply‑patch.ts (#274)

This PR replaces all hard‑coded patch markers in apply‑patch.ts with the
corresponding constants (now) exported from parse‑apply‑patch.ts.

Changes
• Import PATCH_PREFIX, PATCH_SUFFIX, ADD_FILE_PREFIX,
DELETE_FILE_PREFIX, UPDATE_FILE_PREFIX, MOVE_FILE_TO_PREFIX,
END_OF_FILE_PREFIX, and HUNK_ADD_LINE_PREFIX from parse‑apply‑patch.ts.
	•	Remove duplicate string literals for patch markers in apply‑patch.ts.
• Changed is_done() to trim the input to account for the slight
difference between the variables.

Why
• DRY & Consistency: Ensures a single source of truth for patch
prefixes.
• Maintainability: Simplifies future updates to prefix values by
centralizing them.
	•	Readability: Makes the code more declarative and self‑documenting.

All tests are passing, lint and format was ran.
											
										
										
											2025-04-18 02:00:30 +02:00
+								    !(lines[0] ?? "").startsWith(PATCH_PREFIX.trim()) ||
 								    lines[lines.length - 1] !== PATCH_SUFFIX.trim()
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								  ) {
-												fix: gpt-4.1 apply_patch handling (#930)


											
										
										
											2025-05-14 08:34:09 -07:00
+								    let reason = "Invalid patch text: ";
 								    if (lines.length < 2) {
 								      reason += "Patch text must have at least two lines.";
 								    } else if (!(lines[0] ?? "").startsWith(PATCH_PREFIX.trim())) {
 								      reason += "Patch text must start with the correct patch prefix.";
 								    } else if (lines[lines.length - 1] !== PATCH_SUFFIX.trim()) {
 								      reason += "Patch text must end with the correct patch suffix.";
 								    }
 								    throw new DiffError(reason);
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								  }
 								  const parser = new Parser(orig, lines);
 								  parser.index = 1;
 								  parser.parse();
 								  return [parser.patch, parser.fuzz];
 								}
 								export function identify_files_needed(text: string): Array<string> {
 								  const lines = text.trim().split("\n");
 								  const result = new Set<string>();
 								  for (const line of lines) {
-												chore: consolidate patch prefix constants in apply‑patch.ts (#274)

This PR replaces all hard‑coded patch markers in apply‑patch.ts with the
corresponding constants (now) exported from parse‑apply‑patch.ts.

Changes
• Import PATCH_PREFIX, PATCH_SUFFIX, ADD_FILE_PREFIX,
DELETE_FILE_PREFIX, UPDATE_FILE_PREFIX, MOVE_FILE_TO_PREFIX,
END_OF_FILE_PREFIX, and HUNK_ADD_LINE_PREFIX from parse‑apply‑patch.ts.
	•	Remove duplicate string literals for patch markers in apply‑patch.ts.
• Changed is_done() to trim the input to account for the slight
difference between the variables.

Why
• DRY & Consistency: Ensures a single source of truth for patch
prefixes.
• Maintainability: Simplifies future updates to prefix values by
centralizing them.
	•	Readability: Makes the code more declarative and self‑documenting.

All tests are passing, lint and format was ran.
											
										
										
											2025-04-18 02:00:30 +02:00
+								    if (line.startsWith(UPDATE_FILE_PREFIX)) {
 								      result.add(line.slice(UPDATE_FILE_PREFIX.length));
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								    }
-												chore: consolidate patch prefix constants in apply‑patch.ts (#274)

This PR replaces all hard‑coded patch markers in apply‑patch.ts with the
corresponding constants (now) exported from parse‑apply‑patch.ts.

Changes
• Import PATCH_PREFIX, PATCH_SUFFIX, ADD_FILE_PREFIX,
DELETE_FILE_PREFIX, UPDATE_FILE_PREFIX, MOVE_FILE_TO_PREFIX,
END_OF_FILE_PREFIX, and HUNK_ADD_LINE_PREFIX from parse‑apply‑patch.ts.
	•	Remove duplicate string literals for patch markers in apply‑patch.ts.
• Changed is_done() to trim the input to account for the slight
difference between the variables.

Why
• DRY & Consistency: Ensures a single source of truth for patch
prefixes.
• Maintainability: Simplifies future updates to prefix values by
centralizing them.
	•	Readability: Makes the code more declarative and self‑documenting.

All tests are passing, lint and format was ran.
											
										
										
											2025-04-18 02:00:30 +02:00
+								    if (line.startsWith(DELETE_FILE_PREFIX)) {
 								      result.add(line.slice(DELETE_FILE_PREFIX.length));
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								    }
 								  }
 								  return [...result];
 								}
 								export function identify_files_added(text: string): Array<string> {
 								  const lines = text.trim().split("\n");
 								  const result = new Set<string>();
 								  for (const line of lines) {
-												chore: consolidate patch prefix constants in apply‑patch.ts (#274)

This PR replaces all hard‑coded patch markers in apply‑patch.ts with the
corresponding constants (now) exported from parse‑apply‑patch.ts.

Changes
• Import PATCH_PREFIX, PATCH_SUFFIX, ADD_FILE_PREFIX,
DELETE_FILE_PREFIX, UPDATE_FILE_PREFIX, MOVE_FILE_TO_PREFIX,
END_OF_FILE_PREFIX, and HUNK_ADD_LINE_PREFIX from parse‑apply‑patch.ts.
	•	Remove duplicate string literals for patch markers in apply‑patch.ts.
• Changed is_done() to trim the input to account for the slight
difference between the variables.

Why
• DRY & Consistency: Ensures a single source of truth for patch
prefixes.
• Maintainability: Simplifies future updates to prefix values by
centralizing them.
	•	Readability: Makes the code more declarative and self‑documenting.

All tests are passing, lint and format was ran.
											
										
										
											2025-04-18 02:00:30 +02:00
+								    if (line.startsWith(ADD_FILE_PREFIX)) {
 								      result.add(line.slice(ADD_FILE_PREFIX.length));
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								    }
 								  }
 								  return [...result];
 								}
 								function _get_updated_file(
 								  text: string,
 								  action: PatchAction,
 								  path: string,
 								): string {
 								  if (action.type !== ActionType.UPDATE) {
 								    throw new Error("Expected UPDATE action");
 								  }
 								  const origLines = text.split("\n");
 								  const destLines: Array<string> = [];
 								  let origIndex = 0;
 								  for (const chunk of action.chunks) {
 								    if (chunk.orig_index > origLines.length) {
 								      throw new DiffError(
 								        `${path}: chunk.orig_index ${chunk.orig_index} > len(lines) ${origLines.length}`,
 								      );
 								    }
 								    if (origIndex > chunk.orig_index) {
 								      throw new DiffError(
 								        `${path}: orig_index ${origIndex} > chunk.orig_index ${chunk.orig_index}`,
 								      );
 								    }
 								    destLines.push(...origLines.slice(origIndex, chunk.orig_index));
 								    const delta = chunk.orig_index - origIndex;
 								    origIndex += delta;
 								    // inserted lines
 								    if (chunk.ins_lines.length) {
 								      for (const l of chunk.ins_lines) {
 								        destLines.push(l);
 								      }
 								    }
 								    origIndex += chunk.del_lines.length;
 								  }
 								  destLines.push(...origLines.slice(origIndex));
 								  return destLines.join("\n");
 								}
 								export function patch_to_commit(
 								  patch: Patch,
 								  orig: Record<string, string>,
 								): Commit {
 								  const commit: Commit = { changes: {} };
 								  for (const [pathKey, action] of Object.entries(patch.actions)) {
 								    if (action.type === ActionType.DELETE) {
 								      commit.changes[pathKey] = {
 								        type: ActionType.DELETE,
 								        old_content: orig[pathKey],
 								      };
 								    } else if (action.type === ActionType.ADD) {
 								      commit.changes[pathKey] = {
 								        type: ActionType.ADD,
 								        new_content: action.new_file ?? "",
 								      };
 								    } else if (action.type === ActionType.UPDATE) {
 								      const newContent = _get_updated_file(orig[pathKey]!, action, pathKey);
 								      commit.changes[pathKey] = {
 								        type: ActionType.UPDATE,
 								        old_content: orig[pathKey],
 								        new_content: newContent,
 								        move_path: action.move_path ?? undefined,
 								      };
 								    }
 								  }
 								  return commit;
 								}
 								// -----------------------------------------------------------------------------
 								// Filesystem helpers for Node environment
 								// -----------------------------------------------------------------------------
 								export function load_files(
 								  paths: Array<string>,
 								  openFn: (p: string) => string,
 								): Record<string, string> {
 								  const orig: Record<string, string> = {};
 								  for (const p of paths) {
 								    try {
 								      orig[p] = openFn(p);
 								    } catch {
 								      // Convert any file read error into a DiffError so that callers
 								      // consistently receive DiffError for patch-related failures.
 								      throw new DiffError(`File not found: ${p}`);
 								    }
 								  }
 								  return orig;
 								}
 								export function apply_commit(
 								  commit: Commit,
 								  writeFn: (p: string, c: string) => void,
 								  removeFn: (p: string) => void,
 								): void {
 								  for (const [p, change] of Object.entries(commit.changes)) {
 								    if (change.type === ActionType.DELETE) {
 								      removeFn(p);
 								    } else if (change.type === ActionType.ADD) {
 								      writeFn(p, change.new_content ?? "");
 								    } else if (change.type === ActionType.UPDATE) {
 								      if (change.move_path) {
 								        writeFn(change.move_path, change.new_content ?? "");
 								        removeFn(p);
 								      } else {
 								        writeFn(p, change.new_content ?? "");
 								      }
 								    }
 								  }
 								}
 								export function process_patch(
 								  text: string,
 								  openFn: (p: string) => string,
 								  writeFn: (p: string, c: string) => void,
 								  removeFn: (p: string) => void,
 								): string {
-												chore: consolidate patch prefix constants in apply‑patch.ts (#274)

This PR replaces all hard‑coded patch markers in apply‑patch.ts with the
corresponding constants (now) exported from parse‑apply‑patch.ts.

Changes
• Import PATCH_PREFIX, PATCH_SUFFIX, ADD_FILE_PREFIX,
DELETE_FILE_PREFIX, UPDATE_FILE_PREFIX, MOVE_FILE_TO_PREFIX,
END_OF_FILE_PREFIX, and HUNK_ADD_LINE_PREFIX from parse‑apply‑patch.ts.
	•	Remove duplicate string literals for patch markers in apply‑patch.ts.
• Changed is_done() to trim the input to account for the slight
difference between the variables.

Why
• DRY & Consistency: Ensures a single source of truth for patch
prefixes.
• Maintainability: Simplifies future updates to prefix values by
centralizing them.
	•	Readability: Makes the code more declarative and self‑documenting.

All tests are passing, lint and format was ran.
											
										
										
											2025-04-18 02:00:30 +02:00
+								  if (!text.startsWith(PATCH_PREFIX)) {
 								    throw new DiffError("Patch must start with *** Begin Patch\\n");
-												Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>

											
										
										
											2025-04-16 12:56:08 -04:00
+								  }
 								  const paths = identify_files_needed(text);
 								  const orig = load_files(paths, openFn);
 								  const [patch, _fuzz] = text_to_patch(text, orig);
 								  const commit = patch_to_commit(patch, orig);
 								  apply_commit(commit, writeFn, removeFn);
 								  return "Done!";
 								}
 								// -----------------------------------------------------------------------------
 								// Default filesystem implementations
 								// -----------------------------------------------------------------------------
 								function open_file(p: string): string {
 								  return fs.readFileSync(p, "utf8");
 								}
 								function write_file(p: string, content: string): void {
 								  if (path.isAbsolute(p)) {
 								    throw new DiffError("We do not support absolute paths.");
 								  }
 								  const parent = path.dirname(p);
 								  if (parent !== ".") {
 								    fs.mkdirSync(parent, { recursive: true });
 								  }
 								  fs.writeFileSync(p, content, "utf8");
 								}
 								function remove_file(p: string): void {
 								  fs.unlinkSync(p);
 								}
 								// -----------------------------------------------------------------------------
 								// CLI mode. Not exported, executed only if run directly.
 								// -----------------------------------------------------------------------------
 								if (import.meta.url === `file://${process.argv[1]}`) {
 								  let patchText = "";
 								  process.stdin.setEncoding("utf8");
 								  process.stdin.on("data", (chunk) => (patchText += chunk));
 								  process.stdin.on("end", () => {
 								    if (!patchText) {
 								      // eslint-disable-next-line no-console
 								      console.error("Please pass patch text through stdin");
 								      process.exit(1);
 								    }
 								    try {
 								      const result = process_patch(
 								        patchText,
 								        open_file,
 								        write_file,
 								        remove_file,
 								      );
 								      // eslint-disable-next-line no-console
 								      console.log(result);
 								    } catch (err: unknown) {
 								      // eslint-disable-next-line no-console
 								      console.error(err instanceof Error ? err.message : String(err));
 								      process.exit(1);
 								    }
 								  });
 								}
-												fix: gpt-4.1 apply_patch handling (#930)


											
										
										
											2025-05-14 08:34:09 -07:00
 								export const applyPatchToolInstructions = `
 								To edit files, ALWAYS use the \`shell\` tool with \`apply_patch\` CLI.  \`apply_patch\` effectively allows you to execute a diff/patch against a file, but the format of the diff specification is unique to this task, so pay careful attention to these instructions. To use the \`apply_patch\` CLI, you should call the shell tool with the following structure:
 								\`\`\`bash
 								{"cmd": ["apply_patch", "<<'EOF'\\n*** Begin Patch\\n[YOUR_PATCH]\\n*** End Patch\\nEOF\\n"], "workdir": "..."}
 								\`\`\`
 								Where [YOUR_PATCH] is the actual content of your patch, specified in the following V4A diff format.
 								*** [ACTION] File: [path/to/file] -> ACTION can be one of Add, Update, or Delete.
 								For each snippet of code that needs to be changed, repeat the following:
 								[context_before] -> See below for further instructions on context.
 								- [old_code] -> Precede the old code with a minus sign.
 								+ [new_code] -> Precede the new, replacement code with a plus sign.
 								[context_after] -> See below for further instructions on context.
 								For instructions on [context_before] and [context_after]:
 								- By default, show 3 lines of code immediately above and 3 lines immediately below each change. If a change is within 3 lines of a previous change, do NOT duplicate the first change’s [context_after] lines in the second change’s [context_before] lines.
 								- If 3 lines of context is insufficient to uniquely identify the snippet of code within the file, use the @@ operator to indicate the class or function to which the snippet belongs. For instance, we might have:
@@ class BaseClass
 								[3 lines of pre-context]
 								- [old_code]
 								+ [new_code]
 								[3 lines of post-context]
 								- If a code block is repeated so many times in a class or function such that even a single \`@@\` statement and 3 lines of context cannot uniquely identify the snippet of code, you can use multiple \`@@\` statements to jump to the right context. For instance:
@@ class BaseClass
@@ 	def method():
 								[3 lines of pre-context]
 								- [old_code]
 								+ [new_code]
 								[3 lines of post-context]
 								Note, then, that we do not use line numbers in this diff format, as the context is enough to uniquely identify code. An example of a message that you might pass as "input" to this function, in order to apply a patch, is shown below.
 								\`\`\`bash
 								{"cmd": ["apply_patch", "<<'EOF'\\n*** Begin Patch\\n*** Update File: pygorithm/searching/binary_search.py\\n@@ class BaseClass\\n@@     def search():\\n-        pass\\n+        raise NotImplementedError()\\n@@ class Subclass\\n@@     def search():\\n-        pass\\n+        raise NotImplementedError()\\n*** End Patch\\nEOF\\n"], "workdir": "..."}
 								\`\`\`
 								File references can only be relative, NEVER ABSOLUTE. After the apply_patch command is run, it will always say "Done!", regardless of whether the patch was successfully applied or not. However, you can determine if there are issue and errors by looking at any warnings or logging lines printed BEFORE the "Done!" is output.
 								`;