// Based on reference implementation from // https://cookbook.openai.com/examples/gpt4-1_prompting_guide#reference-implementation-apply_patchpy import fs from "fs"; import path from "path"; import { ADD_FILE_PREFIX, DELETE_FILE_PREFIX, END_OF_FILE_PREFIX, MOVE_FILE_TO_PREFIX, PATCH_SUFFIX, UPDATE_FILE_PREFIX, HUNK_ADD_LINE_PREFIX, PATCH_PREFIX, } from "src/parse-apply-patch"; // ----------------------------------------------------------------------------- // Types & Models // ----------------------------------------------------------------------------- export enum ActionType { ADD = "add", DELETE = "delete", UPDATE = "update", } export interface FileChange { type: ActionType; old_content?: string | null; new_content?: string | null; move_path?: string | null; } export interface Commit { changes: Record; } export function assemble_changes( orig: Record, updatedFiles: Record, ): Commit { const commit: Commit = { changes: {} }; for (const [p, newContent] of Object.entries(updatedFiles)) { const oldContent = orig[p]; if (oldContent === newContent) { continue; } if (oldContent !== undefined && newContent !== undefined) { commit.changes[p] = { type: ActionType.UPDATE, old_content: oldContent, new_content: newContent, }; } else if (newContent !== undefined) { commit.changes[p] = { type: ActionType.ADD, new_content: newContent, }; } else if (oldContent !== undefined) { commit.changes[p] = { type: ActionType.DELETE, old_content: oldContent, }; } else { throw new Error("Unexpected state in assemble_changes"); } } return commit; } // ----------------------------------------------------------------------------- // Patch‑related structures // ----------------------------------------------------------------------------- export interface Chunk { orig_index: number; // line index of the first line in the original file del_lines: Array; ins_lines: Array; } export interface PatchAction { type: ActionType; new_file?: string | null; chunks: Array; move_path?: string | null; } export interface Patch { actions: Record; } export class DiffError extends Error {} // ----------------------------------------------------------------------------- // Parser (patch text -> Patch) // ----------------------------------------------------------------------------- class Parser { current_files: Record; lines: Array; index = 0; patch: Patch = { actions: {} }; fuzz = 0; constructor(currentFiles: Record, lines: Array) { this.current_files = currentFiles; this.lines = lines; } private is_done(prefixes?: Array): boolean { if (this.index >= this.lines.length) { return true; } if ( prefixes && prefixes.some((p) => this.lines[this.index]!.startsWith(p.trim())) ) { return true; } return false; } private startswith(prefix: string | Array): boolean { const prefixes = Array.isArray(prefix) ? prefix : [prefix]; return prefixes.some((p) => this.lines[this.index]!.startsWith(p)); } private read_str(prefix = "", returnEverything = false): string { if (this.index >= this.lines.length) { throw new DiffError(`Index: ${this.index} >= ${this.lines.length}`); } if (this.lines[this.index]!.startsWith(prefix)) { const text = returnEverything ? this.lines[this.index] : this.lines[this.index]!.slice(prefix.length); this.index += 1; return text ?? ""; } return ""; } parse(): void { while (!this.is_done([PATCH_SUFFIX])) { let path = this.read_str(UPDATE_FILE_PREFIX); if (path) { if (this.patch.actions[path]) { throw new DiffError(`Update File Error: Duplicate Path: ${path}`); } const moveTo = this.read_str(MOVE_FILE_TO_PREFIX); if (!(path in this.current_files)) { throw new DiffError(`Update File Error: Missing File: ${path}`); } const text = this.current_files[path]; const action = this.parse_update_file(text ?? ""); action.move_path = moveTo || undefined; this.patch.actions[path] = action; continue; } path = this.read_str(DELETE_FILE_PREFIX); if (path) { if (this.patch.actions[path]) { throw new DiffError(`Delete File Error: Duplicate Path: ${path}`); } if (!(path in this.current_files)) { throw new DiffError(`Delete File Error: Missing File: ${path}`); } this.patch.actions[path] = { type: ActionType.DELETE, chunks: [] }; continue; } path = this.read_str(ADD_FILE_PREFIX); if (path) { if (this.patch.actions[path]) { throw new DiffError(`Add File Error: Duplicate Path: ${path}`); } if (path in this.current_files) { throw new DiffError(`Add File Error: File already exists: ${path}`); } this.patch.actions[path] = this.parse_add_file(); continue; } throw new DiffError(`Unknown Line: ${this.lines[this.index]}`); } if (!this.startswith(PATCH_SUFFIX.trim())) { throw new DiffError("Missing End Patch"); } this.index += 1; } private parse_update_file(text: string): PatchAction { const action: PatchAction = { type: ActionType.UPDATE, chunks: [] }; const fileLines = text.split("\n"); let index = 0; while ( !this.is_done([ PATCH_SUFFIX, UPDATE_FILE_PREFIX, DELETE_FILE_PREFIX, ADD_FILE_PREFIX, END_OF_FILE_PREFIX, ]) ) { const defStr = this.read_str("@@ "); let sectionStr = ""; if (!defStr && this.lines[this.index] === "@@") { sectionStr = this.lines[this.index]!; this.index += 1; } if (!(defStr || sectionStr || index === 0)) { throw new DiffError(`Invalid Line:\n${this.lines[this.index]}`); } if (defStr.trim()) { let found = false; // ------------------------------------------------------------------ // Equality helpers using the canonicalisation from find_context_core. // (We duplicate a minimal version here because the scope is local.) // ------------------------------------------------------------------ const canonLocal = (s: string): string => s.normalize("NFC").replace( /./gu, (c) => (( { "-": "-", "\u2010": "-", "\u2011": "-", "\u2012": "-", "\u2013": "-", "\u2014": "-", "\u2212": "-", "\u0022": '"', "\u201C": '"', "\u201D": '"', "\u201E": '"', "\u00AB": '"', "\u00BB": '"', "\u0027": "'", "\u2018": "'", "\u2019": "'", "\u201B": "'", } as Record )[c] ?? c), ); if ( !fileLines .slice(0, index) .some((s) => canonLocal(s) === canonLocal(defStr)) ) { for (let i = index; i < fileLines.length; i++) { if (canonLocal(fileLines[i]!) === canonLocal(defStr)) { index = i + 1; found = true; break; } } } if ( !found && !fileLines .slice(0, index) .some((s) => canonLocal(s.trim()) === canonLocal(defStr.trim())) ) { for (let i = index; i < fileLines.length; i++) { if ( canonLocal(fileLines[i]!.trim()) === canonLocal(defStr.trim()) ) { index = i + 1; this.fuzz += 1; found = true; break; } } } } const [nextChunkContext, chunks, endPatchIndex, eof] = peek_next_section( this.lines, this.index, ); const [newIndex, fuzz] = find_context( fileLines, nextChunkContext, index, eof, ); if (newIndex === -1) { const ctxText = nextChunkContext.join("\n"); if (eof) { throw new DiffError(`Invalid EOF Context ${index}:\n${ctxText}`); } else { throw new DiffError(`Invalid Context ${index}:\n${ctxText}`); } } this.fuzz += fuzz; for (const ch of chunks) { ch.orig_index += newIndex; action.chunks.push(ch); } index = newIndex + nextChunkContext.length; this.index = endPatchIndex; } return action; } private parse_add_file(): PatchAction { const lines: Array = []; while ( !this.is_done([ PATCH_SUFFIX, UPDATE_FILE_PREFIX, DELETE_FILE_PREFIX, ADD_FILE_PREFIX, ]) ) { const s = this.read_str(); if (!s.startsWith(HUNK_ADD_LINE_PREFIX)) { throw new DiffError(`Invalid Add File Line: ${s}`); } lines.push(s.slice(1)); } return { type: ActionType.ADD, new_file: lines.join("\n"), chunks: [], }; } } function find_context_core( lines: Array, context: Array, start: number, ): [number, number] { // --------------------------------------------------------------------------- // Helpers – Unicode punctuation normalisation // --------------------------------------------------------------------------- /* * The patch-matching algorithm originally required **exact** string equality * for non-whitespace characters. That breaks when the file on disk contains * visually identical but different Unicode code-points (e.g. “EN DASH” vs * ASCII "-"), because models almost always emit the ASCII variant. To make * apply_patch resilient we canonicalise a handful of common punctuation * look-alikes before doing comparisons. * * We purposefully keep the mapping *small* – only characters that routinely * appear in source files and are highly unlikely to introduce ambiguity are * included. Each entry is written using the corresponding Unicode escape so * that the file remains ASCII-only even after transpilation. */ const PUNCT_EQUIV: Record = { // Hyphen / dash variants -------------------------------------------------- /* U+002D HYPHEN-MINUS */ "-": "-", /* U+2010 HYPHEN */ "\u2010": "-", /* U+2011 NO-BREAK HYPHEN */ "\u2011": "-", /* U+2012 FIGURE DASH */ "\u2012": "-", /* U+2013 EN DASH */ "\u2013": "-", /* U+2014 EM DASH */ "\u2014": "-", /* U+2212 MINUS SIGN */ "\u2212": "-", // Double quotes ----------------------------------------------------------- /* U+0022 QUOTATION MARK */ "\u0022": '"', /* U+201C LEFT DOUBLE QUOTATION MARK */ "\u201C": '"', /* U+201D RIGHT DOUBLE QUOTATION MARK */ "\u201D": '"', /* U+201E DOUBLE LOW-9 QUOTATION MARK */ "\u201E": '"', /* U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */ "\u00AB": '"', /* U+00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */ "\u00BB": '"', // Single quotes ----------------------------------------------------------- /* U+0027 APOSTROPHE */ "\u0027": "'", /* U+2018 LEFT SINGLE QUOTATION MARK */ "\u2018": "'", /* U+2019 RIGHT SINGLE QUOTATION MARK */ "\u2019": "'", /* U+201B SINGLE HIGH-REVERSED-9 QUOTATION MARK */ "\u201B": "'", }; const canon = (s: string): string => s // Canonical Unicode composition first .normalize("NFC") // Replace punctuation look-alikes .replace(/./gu, (c) => PUNCT_EQUIV[c] ?? c); if (context.length === 0) { return [start, 0]; } // Pass 1 – exact equality after canonicalisation --------------------------- for (let i = start; i < lines.length; i++) { const segment = canon(lines.slice(i, i + context.length).join("\n")); if (segment === canon(context.join("\n"))) { return [i, 0]; } } // Pass 2 – ignore trailing whitespace ------------------------------------- for (let i = start; i < lines.length; i++) { const segment = canon( lines .slice(i, i + context.length) .map((s) => s.trimEnd()) .join("\n"), ); const ctx = canon(context.map((s) => s.trimEnd()).join("\n")); if (segment === ctx) { return [i, 1]; } } // Pass 3 – ignore all surrounding whitespace ------------------------------ for (let i = start; i < lines.length; i++) { const segment = canon( lines .slice(i, i + context.length) .map((s) => s.trim()) .join("\n"), ); const ctx = canon(context.map((s) => s.trim()).join("\n")); if (segment === ctx) { return [i, 100]; } } return [-1, 0]; } function find_context( lines: Array, context: Array, start: number, eof: boolean, ): [number, number] { if (eof) { let [newIndex, fuzz] = find_context_core( lines, context, lines.length - context.length, ); if (newIndex !== -1) { return [newIndex, fuzz]; } [newIndex, fuzz] = find_context_core(lines, context, start); return [newIndex, fuzz + 10000]; } return find_context_core(lines, context, start); } function peek_next_section( lines: Array, initialIndex: number, ): [Array, Array, number, boolean] { let index = initialIndex; const old: Array = []; let delLines: Array = []; let insLines: Array = []; const chunks: Array = []; let mode: "keep" | "add" | "delete" = "keep"; while (index < lines.length) { const s = lines[index]!; if ( [ "@@", PATCH_SUFFIX, UPDATE_FILE_PREFIX, DELETE_FILE_PREFIX, ADD_FILE_PREFIX, END_OF_FILE_PREFIX, ].some((p) => s.startsWith(p.trim())) ) { break; } if (s === "***") { break; } if (s.startsWith("***")) { throw new DiffError(`Invalid Line: ${s}`); } index += 1; const lastMode: "keep" | "add" | "delete" = mode; let line = s; if (line[0] === HUNK_ADD_LINE_PREFIX) { mode = "add"; } else if (line[0] === "-") { mode = "delete"; } else if (line[0] === " ") { mode = "keep"; } else { // Tolerate invalid lines where the leading whitespace is missing. This is necessary as // the model sometimes doesn't fully adhere to the spec and returns lines without leading // whitespace for context lines. mode = "keep"; line = " " + line; // TODO: Re-enable strict mode. // throw new DiffError(`Invalid Line: ${line}`) } line = line.slice(1); if (mode === "keep" && lastMode !== mode) { if (insLines.length || delLines.length) { chunks.push({ orig_index: old.length - delLines.length, del_lines: delLines, ins_lines: insLines, }); } delLines = []; insLines = []; } if (mode === "delete") { delLines.push(line); old.push(line); } else if (mode === "add") { insLines.push(line); } else { old.push(line); } } if (insLines.length || delLines.length) { chunks.push({ orig_index: old.length - delLines.length, del_lines: delLines, ins_lines: insLines, }); } if (index < lines.length && lines[index] === END_OF_FILE_PREFIX) { index += 1; return [old, chunks, index, true]; } return [old, chunks, index, false]; } // ----------------------------------------------------------------------------- // High‑level helpers // ----------------------------------------------------------------------------- export function text_to_patch( text: string, orig: Record, ): [Patch, number] { const lines = text.trim().split("\n"); if ( lines.length < 2 || !(lines[0] ?? "").startsWith(PATCH_PREFIX.trim()) || lines[lines.length - 1] !== PATCH_SUFFIX.trim() ) { throw new DiffError("Invalid patch text"); } const parser = new Parser(orig, lines); parser.index = 1; parser.parse(); return [parser.patch, parser.fuzz]; } export function identify_files_needed(text: string): Array { const lines = text.trim().split("\n"); const result = new Set(); for (const line of lines) { if (line.startsWith(UPDATE_FILE_PREFIX)) { result.add(line.slice(UPDATE_FILE_PREFIX.length)); } if (line.startsWith(DELETE_FILE_PREFIX)) { result.add(line.slice(DELETE_FILE_PREFIX.length)); } } return [...result]; } export function identify_files_added(text: string): Array { const lines = text.trim().split("\n"); const result = new Set(); for (const line of lines) { if (line.startsWith(ADD_FILE_PREFIX)) { result.add(line.slice(ADD_FILE_PREFIX.length)); } } return [...result]; } function _get_updated_file( text: string, action: PatchAction, path: string, ): string { if (action.type !== ActionType.UPDATE) { throw new Error("Expected UPDATE action"); } const origLines = text.split("\n"); const destLines: Array = []; let origIndex = 0; for (const chunk of action.chunks) { if (chunk.orig_index > origLines.length) { throw new DiffError( `${path}: chunk.orig_index ${chunk.orig_index} > len(lines) ${origLines.length}`, ); } if (origIndex > chunk.orig_index) { throw new DiffError( `${path}: orig_index ${origIndex} > chunk.orig_index ${chunk.orig_index}`, ); } destLines.push(...origLines.slice(origIndex, chunk.orig_index)); const delta = chunk.orig_index - origIndex; origIndex += delta; // inserted lines if (chunk.ins_lines.length) { for (const l of chunk.ins_lines) { destLines.push(l); } } origIndex += chunk.del_lines.length; } destLines.push(...origLines.slice(origIndex)); return destLines.join("\n"); } export function patch_to_commit( patch: Patch, orig: Record, ): Commit { const commit: Commit = { changes: {} }; for (const [pathKey, action] of Object.entries(patch.actions)) { if (action.type === ActionType.DELETE) { commit.changes[pathKey] = { type: ActionType.DELETE, old_content: orig[pathKey], }; } else if (action.type === ActionType.ADD) { commit.changes[pathKey] = { type: ActionType.ADD, new_content: action.new_file ?? "", }; } else if (action.type === ActionType.UPDATE) { const newContent = _get_updated_file(orig[pathKey]!, action, pathKey); commit.changes[pathKey] = { type: ActionType.UPDATE, old_content: orig[pathKey], new_content: newContent, move_path: action.move_path ?? undefined, }; } } return commit; } // ----------------------------------------------------------------------------- // Filesystem helpers for Node environment // ----------------------------------------------------------------------------- export function load_files( paths: Array, openFn: (p: string) => string, ): Record { const orig: Record = {}; for (const p of paths) { try { orig[p] = openFn(p); } catch { // Convert any file read error into a DiffError so that callers // consistently receive DiffError for patch-related failures. throw new DiffError(`File not found: ${p}`); } } return orig; } export function apply_commit( commit: Commit, writeFn: (p: string, c: string) => void, removeFn: (p: string) => void, ): void { for (const [p, change] of Object.entries(commit.changes)) { if (change.type === ActionType.DELETE) { removeFn(p); } else if (change.type === ActionType.ADD) { writeFn(p, change.new_content ?? ""); } else if (change.type === ActionType.UPDATE) { if (change.move_path) { writeFn(change.move_path, change.new_content ?? ""); removeFn(p); } else { writeFn(p, change.new_content ?? ""); } } } } export function process_patch( text: string, openFn: (p: string) => string, writeFn: (p: string, c: string) => void, removeFn: (p: string) => void, ): string { if (!text.startsWith(PATCH_PREFIX)) { throw new DiffError("Patch must start with *** Begin Patch\\n"); } const paths = identify_files_needed(text); const orig = load_files(paths, openFn); const [patch, _fuzz] = text_to_patch(text, orig); const commit = patch_to_commit(patch, orig); apply_commit(commit, writeFn, removeFn); return "Done!"; } // ----------------------------------------------------------------------------- // Default filesystem implementations // ----------------------------------------------------------------------------- function open_file(p: string): string { return fs.readFileSync(p, "utf8"); } function write_file(p: string, content: string): void { if (path.isAbsolute(p)) { throw new DiffError("We do not support absolute paths."); } const parent = path.dirname(p); if (parent !== ".") { fs.mkdirSync(parent, { recursive: true }); } fs.writeFileSync(p, content, "utf8"); } function remove_file(p: string): void { fs.unlinkSync(p); } // ----------------------------------------------------------------------------- // CLI mode. Not exported, executed only if run directly. // ----------------------------------------------------------------------------- if (import.meta.url === `file://${process.argv[1]}`) { let patchText = ""; process.stdin.setEncoding("utf8"); process.stdin.on("data", (chunk) => (patchText += chunk)); process.stdin.on("end", () => { if (!patchText) { // eslint-disable-next-line no-console console.error("Please pass patch text through stdin"); process.exit(1); } try { const result = process_patch( patchText, open_file, write_file, remove_file, ); // eslint-disable-next-line no-console console.log(result); } catch (err: unknown) { // eslint-disable-next-line no-console console.error(err instanceof Error ? err.message : String(err)); process.exit(1); } }); }