2025-04-16 10:15:46 -07:00
// Based on reference implementation from
// https://cookbook.openai.com/examples/gpt4-1_prompting_guide#reference-implementation-apply_patchpy
2025-04-16 12:56:08 -04:00
import fs from "fs" ;
import path from "path" ;
2025-04-18 02:00:30 +02:00
import {
ADD_FILE_PREFIX ,
DELETE_FILE_PREFIX ,
END_OF_FILE_PREFIX ,
MOVE_FILE_TO_PREFIX ,
PATCH_SUFFIX ,
UPDATE_FILE_PREFIX ,
HUNK_ADD_LINE_PREFIX ,
PATCH_PREFIX ,
} from "src/parse-apply-patch" ;
2025-04-16 12:56:08 -04:00
// -----------------------------------------------------------------------------
// Types & Models
// -----------------------------------------------------------------------------
export enum ActionType {
ADD = "add" ,
DELETE = "delete" ,
UPDATE = "update" ,
}
export interface FileChange {
type : ActionType ;
old_content? : string | null ;
new_content? : string | null ;
move_path? : string | null ;
}
export interface Commit {
changes : Record < string , FileChange > ;
}
export function assemble_changes (
orig : Record < string , string | null > ,
updatedFiles : Record < string , string | null > ,
) : Commit {
const commit : Commit = { changes : { } } ;
for ( const [ p , newContent ] of Object . entries ( updatedFiles ) ) {
const oldContent = orig [ p ] ;
if ( oldContent === newContent ) {
continue ;
}
if ( oldContent !== undefined && newContent !== undefined ) {
commit . changes [ p ] = {
type : ActionType . UPDATE ,
old_content : oldContent ,
new_content : newContent ,
} ;
} else if ( newContent !== undefined ) {
commit . changes [ p ] = {
type : ActionType . ADD ,
new_content : newContent ,
} ;
} else if ( oldContent !== undefined ) {
commit . changes [ p ] = {
type : ActionType . DELETE ,
old_content : oldContent ,
} ;
} else {
throw new Error ( "Unexpected state in assemble_changes" ) ;
}
}
return commit ;
}
// -----------------------------------------------------------------------------
// Patch‑ related structures
// -----------------------------------------------------------------------------
export interface Chunk {
orig_index : number ; // line index of the first line in the original file
del_lines : Array < string > ;
ins_lines : Array < string > ;
}
export interface PatchAction {
type : ActionType ;
new_file? : string | null ;
chunks : Array < Chunk > ;
move_path? : string | null ;
}
export interface Patch {
actions : Record < string , PatchAction > ;
}
export class DiffError extends Error { }
// -----------------------------------------------------------------------------
// Parser (patch text -> Patch)
// -----------------------------------------------------------------------------
class Parser {
current_files : Record < string , string > ;
lines : Array < string > ;
index = 0 ;
patch : Patch = { actions : { } } ;
fuzz = 0 ;
constructor ( currentFiles : Record < string , string > , lines : Array < string > ) {
this . current_files = currentFiles ;
this . lines = lines ;
}
private is_done ( prefixes? : Array < string > ) : boolean {
if ( this . index >= this . lines . length ) {
return true ;
}
if (
prefixes &&
2025-04-18 02:00:30 +02:00
prefixes . some ( ( p ) = > this . lines [ this . index ] ! . startsWith ( p . trim ( ) ) )
2025-04-16 12:56:08 -04:00
) {
return true ;
}
return false ;
}
private startswith ( prefix : string | Array < string > ) : boolean {
const prefixes = Array . isArray ( prefix ) ? prefix : [ prefix ] ;
return prefixes . some ( ( p ) = > this . lines [ this . index ] ! . startsWith ( p ) ) ;
}
private read_str ( prefix = "" , returnEverything = false ) : string {
if ( this . index >= this . lines . length ) {
throw new DiffError ( ` Index: ${ this . index } >= ${ this . lines . length } ` ) ;
}
if ( this . lines [ this . index ] ! . startsWith ( prefix ) ) {
const text = returnEverything
? this . lines [ this . index ]
: this . lines [ this . index ] ! . slice ( prefix . length ) ;
this . index += 1 ;
return text ? ? "" ;
}
return "" ;
}
parse ( ) : void {
2025-04-18 02:00:30 +02:00
while ( ! this . is_done ( [ PATCH_SUFFIX ] ) ) {
let path = this . read_str ( UPDATE_FILE_PREFIX ) ;
2025-04-16 12:56:08 -04:00
if ( path ) {
if ( this . patch . actions [ path ] ) {
throw new DiffError ( ` Update File Error: Duplicate Path: ${ path } ` ) ;
}
2025-04-18 02:00:30 +02:00
const moveTo = this . read_str ( MOVE_FILE_TO_PREFIX ) ;
2025-04-16 12:56:08 -04:00
if ( ! ( path in this . current_files ) ) {
throw new DiffError ( ` Update File Error: Missing File: ${ path } ` ) ;
}
const text = this . current_files [ path ] ;
const action = this . parse_update_file ( text ? ? "" ) ;
action . move_path = moveTo || undefined ;
this . patch . actions [ path ] = action ;
continue ;
}
2025-04-18 02:00:30 +02:00
path = this . read_str ( DELETE_FILE_PREFIX ) ;
2025-04-16 12:56:08 -04:00
if ( path ) {
if ( this . patch . actions [ path ] ) {
throw new DiffError ( ` Delete File Error: Duplicate Path: ${ path } ` ) ;
}
if ( ! ( path in this . current_files ) ) {
throw new DiffError ( ` Delete File Error: Missing File: ${ path } ` ) ;
}
this . patch . actions [ path ] = { type : ActionType . DELETE , chunks : [ ] } ;
continue ;
}
2025-04-18 02:00:30 +02:00
path = this . read_str ( ADD_FILE_PREFIX ) ;
2025-04-16 12:56:08 -04:00
if ( path ) {
if ( this . patch . actions [ path ] ) {
throw new DiffError ( ` Add File Error: Duplicate Path: ${ path } ` ) ;
}
if ( path in this . current_files ) {
throw new DiffError ( ` Add File Error: File already exists: ${ path } ` ) ;
}
this . patch . actions [ path ] = this . parse_add_file ( ) ;
continue ;
}
throw new DiffError ( ` Unknown Line: ${ this . lines [ this . index ] } ` ) ;
}
2025-04-18 02:00:30 +02:00
if ( ! this . startswith ( PATCH_SUFFIX . trim ( ) ) ) {
2025-04-16 12:56:08 -04:00
throw new DiffError ( "Missing End Patch" ) ;
}
this . index += 1 ;
}
private parse_update_file ( text : string ) : PatchAction {
const action : PatchAction = { type : ActionType . UPDATE , chunks : [ ] } ;
const fileLines = text . split ( "\n" ) ;
let index = 0 ;
while (
! this . is_done ( [
2025-04-18 02:00:30 +02:00
PATCH_SUFFIX ,
UPDATE_FILE_PREFIX ,
DELETE_FILE_PREFIX ,
ADD_FILE_PREFIX ,
END_OF_FILE_PREFIX ,
2025-04-16 12:56:08 -04:00
] )
) {
const defStr = this . read_str ( "@@ " ) ;
let sectionStr = "" ;
if ( ! defStr && this . lines [ this . index ] === "@@" ) {
sectionStr = this . lines [ this . index ] ! ;
this . index += 1 ;
}
if ( ! ( defStr || sectionStr || index === 0 ) ) {
throw new DiffError ( ` Invalid Line: \ n ${ this . lines [ this . index ] } ` ) ;
}
if ( defStr . trim ( ) ) {
let found = false ;
2025-04-24 09:05:19 -07:00
// ------------------------------------------------------------------
// Equality helpers using the canonicalisation from find_context_core.
// (We duplicate a minimal version here because the scope is local.)
// ------------------------------------------------------------------
const canonLocal = ( s : string ) : string = >
s . normalize ( "NFC" ) . replace (
/ . / g u ,
( c ) = >
2025-04-25 22:21:50 +08:00
(
( {
2025-04-24 09:05:19 -07:00
"-" : "-" ,
"\u2010" : "-" ,
"\u2011" : "-" ,
"\u2012" : "-" ,
"\u2013" : "-" ,
"\u2014" : "-" ,
"\u2212" : "-" ,
"\u0022" : '"' ,
"\u201C" : '"' ,
"\u201D" : '"' ,
"\u201E" : '"' ,
"\u00AB" : '"' ,
"\u00BB" : '"' ,
"\u0027" : "'" ,
"\u2018" : "'" ,
"\u2019" : "'" ,
"\u201B" : "'" ,
2025-04-24 13:04:37 -07:00
"\u00A0" : " " ,
"\u202F" : " " ,
2025-04-25 22:21:50 +08:00
} ) as Record < string , string >
) [ c ] ? ? c ,
2025-04-24 09:05:19 -07:00
) ;
if (
! fileLines
. slice ( 0 , index )
. some ( ( s ) = > canonLocal ( s ) === canonLocal ( defStr ) )
) {
2025-04-16 12:56:08 -04:00
for ( let i = index ; i < fileLines . length ; i ++ ) {
2025-04-24 09:05:19 -07:00
if ( canonLocal ( fileLines [ i ] ! ) === canonLocal ( defStr ) ) {
2025-04-16 12:56:08 -04:00
index = i + 1 ;
found = true ;
break ;
}
}
}
if (
! found &&
2025-04-24 09:05:19 -07:00
! fileLines
. slice ( 0 , index )
. some ( ( s ) = > canonLocal ( s . trim ( ) ) === canonLocal ( defStr . trim ( ) ) )
2025-04-16 12:56:08 -04:00
) {
for ( let i = index ; i < fileLines . length ; i ++ ) {
2025-04-24 09:05:19 -07:00
if (
canonLocal ( fileLines [ i ] ! . trim ( ) ) === canonLocal ( defStr . trim ( ) )
) {
2025-04-16 12:56:08 -04:00
index = i + 1 ;
this . fuzz += 1 ;
found = true ;
break ;
}
}
}
}
const [ nextChunkContext , chunks , endPatchIndex , eof ] = peek_next_section (
this . lines ,
this . index ,
) ;
const [ newIndex , fuzz ] = find_context (
fileLines ,
nextChunkContext ,
index ,
eof ,
) ;
if ( newIndex === - 1 ) {
const ctxText = nextChunkContext . join ( "\n" ) ;
if ( eof ) {
throw new DiffError ( ` Invalid EOF Context ${ index } : \ n ${ ctxText } ` ) ;
} else {
throw new DiffError ( ` Invalid Context ${ index } : \ n ${ ctxText } ` ) ;
}
}
this . fuzz += fuzz ;
for ( const ch of chunks ) {
ch . orig_index += newIndex ;
action . chunks . push ( ch ) ;
}
index = newIndex + nextChunkContext . length ;
this . index = endPatchIndex ;
}
return action ;
}
private parse_add_file ( ) : PatchAction {
const lines : Array < string > = [ ] ;
while (
! this . is_done ( [
2025-04-18 02:00:30 +02:00
PATCH_SUFFIX ,
UPDATE_FILE_PREFIX ,
DELETE_FILE_PREFIX ,
ADD_FILE_PREFIX ,
2025-04-16 12:56:08 -04:00
] )
) {
const s = this . read_str ( ) ;
2025-04-18 02:00:30 +02:00
if ( ! s . startsWith ( HUNK_ADD_LINE_PREFIX ) ) {
2025-04-16 12:56:08 -04:00
throw new DiffError ( ` Invalid Add File Line: ${ s } ` ) ;
}
lines . push ( s . slice ( 1 ) ) ;
}
return {
type : ActionType . ADD ,
new_file : lines.join ( "\n" ) ,
chunks : [ ] ,
} ;
}
}
function find_context_core (
lines : Array < string > ,
context : Array < string > ,
start : number ,
) : [ number , number ] {
2025-04-24 09:05:19 -07:00
// ---------------------------------------------------------------------------
// Helpers – Unicode punctuation normalisation
// ---------------------------------------------------------------------------
/ *
* The patch - matching algorithm originally required * * exact * * string equality
* for non - whitespace characters . That breaks when the file on disk contains
* visually identical but different Unicode code - points ( e . g . “ EN DASH ” vs
* ASCII "-" ) , because models almost always emit the ASCII variant . To make
* apply_patch resilient we canonicalise a handful of common punctuation
* look - alikes before doing comparisons .
*
* We purposefully keep the mapping * small * – only characters that routinely
* appear in source files and are highly unlikely to introduce ambiguity are
* included . Each entry is written using the corresponding Unicode escape so
* that the file remains ASCII - only even after transpilation .
* /
const PUNCT_EQUIV : Record < string , string > = {
// Hyphen / dash variants --------------------------------------------------
/* U+002D HYPHEN-MINUS */ "-" : "-" ,
/* U+2010 HYPHEN */ "\u2010" : "-" ,
/* U+2011 NO-BREAK HYPHEN */ "\u2011" : "-" ,
/* U+2012 FIGURE DASH */ "\u2012" : "-" ,
/* U+2013 EN DASH */ "\u2013" : "-" ,
/* U+2014 EM DASH */ "\u2014" : "-" ,
/* U+2212 MINUS SIGN */ "\u2212" : "-" ,
// Double quotes -----------------------------------------------------------
/* U+0022 QUOTATION MARK */ "\u0022" : '"' ,
/* U+201C LEFT DOUBLE QUOTATION MARK */ "\u201C" : '"' ,
/* U+201D RIGHT DOUBLE QUOTATION MARK */ "\u201D" : '"' ,
/* U+201E DOUBLE LOW-9 QUOTATION MARK */ "\u201E" : '"' ,
/* U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */ "\u00AB" : '"' ,
/* U+00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */ "\u00BB" : '"' ,
// Single quotes -----------------------------------------------------------
/* U+0027 APOSTROPHE */ "\u0027" : "'" ,
/* U+2018 LEFT SINGLE QUOTATION MARK */ "\u2018" : "'" ,
/* U+2019 RIGHT SINGLE QUOTATION MARK */ "\u2019" : "'" ,
/* U+201B SINGLE HIGH-REVERSED-9 QUOTATION MARK */ "\u201B" : "'" ,
2025-04-24 13:04:37 -07:00
// Spaces ------------------------------------------------------------------
/* U+00A0 NO-BREAK SPACE */ "\u00A0" : " " ,
/* U+202F NARROW NO-BREAK SPACE */ "\u202F" : " " ,
2025-04-24 09:05:19 -07:00
} ;
const canon = ( s : string ) : string = >
s
// Canonical Unicode composition first
. normalize ( "NFC" )
// Replace punctuation look-alikes
. replace ( / . / g u , ( c ) = > P U N C T _ E Q U I V [ c ] ? ? c ) ;
2025-04-16 12:56:08 -04:00
if ( context . length === 0 ) {
return [ start , 0 ] ;
}
2025-04-24 09:05:19 -07:00
// Pass 1 – exact equality after canonicalisation ---------------------------
2025-04-25 22:27:48 +08:00
const canonicalContext = canon ( context . join ( "\n" ) ) ;
2025-04-16 12:56:08 -04:00
for ( let i = start ; i < lines . length ; i ++ ) {
2025-04-24 09:05:19 -07:00
const segment = canon ( lines . slice ( i , i + context . length ) . join ( "\n" ) ) ;
2025-04-25 22:27:48 +08:00
if ( segment === canonicalContext ) {
2025-04-16 12:56:08 -04:00
return [ i , 0 ] ;
}
}
2025-04-24 09:05:19 -07:00
// Pass 2 – ignore trailing whitespace -------------------------------------
2025-04-16 12:56:08 -04:00
for ( let i = start ; i < lines . length ; i ++ ) {
2025-04-24 09:05:19 -07:00
const segment = canon (
2025-04-16 12:56:08 -04:00
lines
. slice ( i , i + context . length )
. map ( ( s ) = > s . trimEnd ( ) )
2025-04-24 09:05:19 -07:00
. join ( "\n" ) ,
) ;
const ctx = canon ( context . map ( ( s ) = > s . trimEnd ( ) ) . join ( "\n" ) ) ;
if ( segment === ctx ) {
2025-04-16 12:56:08 -04:00
return [ i , 1 ] ;
}
}
2025-04-24 09:05:19 -07:00
// Pass 3 – ignore all surrounding whitespace ------------------------------
2025-04-16 12:56:08 -04:00
for ( let i = start ; i < lines . length ; i ++ ) {
2025-04-24 09:05:19 -07:00
const segment = canon (
2025-04-16 12:56:08 -04:00
lines
. slice ( i , i + context . length )
. map ( ( s ) = > s . trim ( ) )
2025-04-24 09:05:19 -07:00
. join ( "\n" ) ,
) ;
const ctx = canon ( context . map ( ( s ) = > s . trim ( ) ) . join ( "\n" ) ) ;
if ( segment === ctx ) {
2025-04-16 12:56:08 -04:00
return [ i , 100 ] ;
}
}
2025-04-24 09:05:19 -07:00
2025-04-16 12:56:08 -04:00
return [ - 1 , 0 ] ;
}
function find_context (
lines : Array < string > ,
context : Array < string > ,
start : number ,
eof : boolean ,
) : [ number , number ] {
if ( eof ) {
let [ newIndex , fuzz ] = find_context_core (
lines ,
context ,
lines . length - context . length ,
) ;
if ( newIndex !== - 1 ) {
return [ newIndex , fuzz ] ;
}
[ newIndex , fuzz ] = find_context_core ( lines , context , start ) ;
return [ newIndex , fuzz + 10000 ] ;
}
return find_context_core ( lines , context , start ) ;
}
function peek_next_section (
lines : Array < string > ,
initialIndex : number ,
) : [ Array < string > , Array < Chunk > , number , boolean ] {
let index = initialIndex ;
const old : Array < string > = [ ] ;
let delLines : Array < string > = [ ] ;
let insLines : Array < string > = [ ] ;
const chunks : Array < Chunk > = [ ] ;
let mode : "keep" | "add" | "delete" = "keep" ;
while ( index < lines . length ) {
const s = lines [ index ] ! ;
if (
2025-04-18 02:00:30 +02:00
[
"@@" ,
PATCH_SUFFIX ,
UPDATE_FILE_PREFIX ,
DELETE_FILE_PREFIX ,
ADD_FILE_PREFIX ,
END_OF_FILE_PREFIX ,
] . some ( ( p ) = > s . startsWith ( p . trim ( ) ) )
2025-04-16 12:56:08 -04:00
) {
break ;
}
if ( s === "***" ) {
break ;
}
if ( s . startsWith ( "***" ) ) {
throw new DiffError ( ` Invalid Line: ${ s } ` ) ;
}
index += 1 ;
const lastMode : "keep" | "add" | "delete" = mode ;
let line = s ;
2025-04-18 02:00:30 +02:00
if ( line [ 0 ] === HUNK_ADD_LINE_PREFIX ) {
2025-04-16 12:56:08 -04:00
mode = "add" ;
} else if ( line [ 0 ] === "-" ) {
mode = "delete" ;
} else if ( line [ 0 ] === " " ) {
mode = "keep" ;
} else {
// Tolerate invalid lines where the leading whitespace is missing. This is necessary as
// the model sometimes doesn't fully adhere to the spec and returns lines without leading
// whitespace for context lines.
mode = "keep" ;
line = " " + line ;
// TODO: Re-enable strict mode.
// throw new DiffError(`Invalid Line: ${line}`)
}
line = line . slice ( 1 ) ;
if ( mode === "keep" && lastMode !== mode ) {
if ( insLines . length || delLines . length ) {
chunks . push ( {
orig_index : old.length - delLines . length ,
del_lines : delLines ,
ins_lines : insLines ,
} ) ;
}
delLines = [ ] ;
insLines = [ ] ;
}
if ( mode === "delete" ) {
delLines . push ( line ) ;
old . push ( line ) ;
} else if ( mode === "add" ) {
insLines . push ( line ) ;
} else {
old . push ( line ) ;
}
}
if ( insLines . length || delLines . length ) {
chunks . push ( {
orig_index : old.length - delLines . length ,
del_lines : delLines ,
ins_lines : insLines ,
} ) ;
}
2025-04-18 02:00:30 +02:00
if ( index < lines . length && lines [ index ] === END_OF_FILE_PREFIX ) {
2025-04-16 12:56:08 -04:00
index += 1 ;
return [ old , chunks , index , true ] ;
}
return [ old , chunks , index , false ] ;
}
// -----------------------------------------------------------------------------
// High‑ level helpers
// -----------------------------------------------------------------------------
export function text_to_patch (
text : string ,
orig : Record < string , string > ,
) : [ Patch , number ] {
const lines = text . trim ( ) . split ( "\n" ) ;
if (
lines . length < 2 ||
2025-04-18 02:00:30 +02:00
! ( lines [ 0 ] ? ? "" ) . startsWith ( PATCH_PREFIX . trim ( ) ) ||
lines [ lines . length - 1 ] !== PATCH_SUFFIX . trim ( )
2025-04-16 12:56:08 -04:00
) {
2025-05-14 08:34:09 -07:00
let reason = "Invalid patch text: " ;
if ( lines . length < 2 ) {
reason += "Patch text must have at least two lines." ;
} else if ( ! ( lines [ 0 ] ? ? "" ) . startsWith ( PATCH_PREFIX . trim ( ) ) ) {
reason += "Patch text must start with the correct patch prefix." ;
} else if ( lines [ lines . length - 1 ] !== PATCH_SUFFIX . trim ( ) ) {
reason += "Patch text must end with the correct patch suffix." ;
}
throw new DiffError ( reason ) ;
2025-04-16 12:56:08 -04:00
}
const parser = new Parser ( orig , lines ) ;
parser . index = 1 ;
parser . parse ( ) ;
return [ parser . patch , parser . fuzz ] ;
}
export function identify_files_needed ( text : string ) : Array < string > {
const lines = text . trim ( ) . split ( "\n" ) ;
const result = new Set < string > ( ) ;
for ( const line of lines ) {
2025-04-18 02:00:30 +02:00
if ( line . startsWith ( UPDATE_FILE_PREFIX ) ) {
result . add ( line . slice ( UPDATE_FILE_PREFIX . length ) ) ;
2025-04-16 12:56:08 -04:00
}
2025-04-18 02:00:30 +02:00
if ( line . startsWith ( DELETE_FILE_PREFIX ) ) {
result . add ( line . slice ( DELETE_FILE_PREFIX . length ) ) ;
2025-04-16 12:56:08 -04:00
}
}
return [ . . . result ] ;
}
export function identify_files_added ( text : string ) : Array < string > {
const lines = text . trim ( ) . split ( "\n" ) ;
const result = new Set < string > ( ) ;
for ( const line of lines ) {
2025-04-18 02:00:30 +02:00
if ( line . startsWith ( ADD_FILE_PREFIX ) ) {
result . add ( line . slice ( ADD_FILE_PREFIX . length ) ) ;
2025-04-16 12:56:08 -04:00
}
}
return [ . . . result ] ;
}
function _get_updated_file (
text : string ,
action : PatchAction ,
path : string ,
) : string {
if ( action . type !== ActionType . UPDATE ) {
throw new Error ( "Expected UPDATE action" ) ;
}
const origLines = text . split ( "\n" ) ;
const destLines : Array < string > = [ ] ;
let origIndex = 0 ;
for ( const chunk of action . chunks ) {
if ( chunk . orig_index > origLines . length ) {
throw new DiffError (
` ${ path } : chunk.orig_index ${ chunk . orig_index } > len(lines) ${ origLines . length } ` ,
) ;
}
if ( origIndex > chunk . orig_index ) {
throw new DiffError (
` ${ path } : orig_index ${ origIndex } > chunk.orig_index ${ chunk . orig_index } ` ,
) ;
}
destLines . push ( . . . origLines . slice ( origIndex , chunk . orig_index ) ) ;
const delta = chunk . orig_index - origIndex ;
origIndex += delta ;
// inserted lines
if ( chunk . ins_lines . length ) {
for ( const l of chunk . ins_lines ) {
destLines . push ( l ) ;
}
}
origIndex += chunk . del_lines . length ;
}
destLines . push ( . . . origLines . slice ( origIndex ) ) ;
return destLines . join ( "\n" ) ;
}
export function patch_to_commit (
patch : Patch ,
orig : Record < string , string > ,
) : Commit {
const commit : Commit = { changes : { } } ;
for ( const [ pathKey , action ] of Object . entries ( patch . actions ) ) {
if ( action . type === ActionType . DELETE ) {
commit . changes [ pathKey ] = {
type : ActionType . DELETE ,
old_content : orig [ pathKey ] ,
} ;
} else if ( action . type === ActionType . ADD ) {
commit . changes [ pathKey ] = {
type : ActionType . ADD ,
new_content : action.new_file ? ? "" ,
} ;
} else if ( action . type === ActionType . UPDATE ) {
const newContent = _get_updated_file ( orig [ pathKey ] ! , action , pathKey ) ;
commit . changes [ pathKey ] = {
type : ActionType . UPDATE ,
old_content : orig [ pathKey ] ,
new_content : newContent ,
move_path : action.move_path ? ? undefined ,
} ;
}
}
return commit ;
}
// -----------------------------------------------------------------------------
// Filesystem helpers for Node environment
// -----------------------------------------------------------------------------
export function load_files (
paths : Array < string > ,
openFn : ( p : string ) = > string ,
) : Record < string , string > {
const orig : Record < string , string > = { } ;
for ( const p of paths ) {
try {
orig [ p ] = openFn ( p ) ;
} catch {
// Convert any file read error into a DiffError so that callers
// consistently receive DiffError for patch-related failures.
throw new DiffError ( ` File not found: ${ p } ` ) ;
}
}
return orig ;
}
export function apply_commit (
commit : Commit ,
writeFn : ( p : string , c : string ) = > void ,
removeFn : ( p : string ) = > void ,
) : void {
for ( const [ p , change ] of Object . entries ( commit . changes ) ) {
if ( change . type === ActionType . DELETE ) {
removeFn ( p ) ;
} else if ( change . type === ActionType . ADD ) {
writeFn ( p , change . new_content ? ? "" ) ;
} else if ( change . type === ActionType . UPDATE ) {
if ( change . move_path ) {
writeFn ( change . move_path , change . new_content ? ? "" ) ;
removeFn ( p ) ;
} else {
writeFn ( p , change . new_content ? ? "" ) ;
}
}
}
}
export function process_patch (
text : string ,
openFn : ( p : string ) = > string ,
writeFn : ( p : string , c : string ) = > void ,
removeFn : ( p : string ) = > void ,
) : string {
2025-04-18 02:00:30 +02:00
if ( ! text . startsWith ( PATCH_PREFIX ) ) {
throw new DiffError ( "Patch must start with *** Begin Patch\\n" ) ;
2025-04-16 12:56:08 -04:00
}
const paths = identify_files_needed ( text ) ;
const orig = load_files ( paths , openFn ) ;
const [ patch , _fuzz ] = text_to_patch ( text , orig ) ;
const commit = patch_to_commit ( patch , orig ) ;
apply_commit ( commit , writeFn , removeFn ) ;
return "Done!" ;
}
// -----------------------------------------------------------------------------
// Default filesystem implementations
// -----------------------------------------------------------------------------
function open_file ( p : string ) : string {
return fs . readFileSync ( p , "utf8" ) ;
}
function write_file ( p : string , content : string ) : void {
if ( path . isAbsolute ( p ) ) {
throw new DiffError ( "We do not support absolute paths." ) ;
}
const parent = path . dirname ( p ) ;
if ( parent !== "." ) {
fs . mkdirSync ( parent , { recursive : true } ) ;
}
fs . writeFileSync ( p , content , "utf8" ) ;
}
function remove_file ( p : string ) : void {
fs . unlinkSync ( p ) ;
}
// -----------------------------------------------------------------------------
// CLI mode. Not exported, executed only if run directly.
// -----------------------------------------------------------------------------
if ( import . meta . url === ` file:// ${ process . argv [ 1 ] } ` ) {
let patchText = "" ;
process . stdin . setEncoding ( "utf8" ) ;
process . stdin . on ( "data" , ( chunk ) = > ( patchText += chunk ) ) ;
process . stdin . on ( "end" , ( ) = > {
if ( ! patchText ) {
// eslint-disable-next-line no-console
console . error ( "Please pass patch text through stdin" ) ;
process . exit ( 1 ) ;
}
try {
const result = process_patch (
patchText ,
open_file ,
write_file ,
remove_file ,
) ;
// eslint-disable-next-line no-console
console . log ( result ) ;
} catch ( err : unknown ) {
// eslint-disable-next-line no-console
console . error ( err instanceof Error ? err.message : String ( err ) ) ;
process . exit ( 1 ) ;
}
} ) ;
}
2025-05-14 08:34:09 -07:00
export const applyPatchToolInstructions = `
To edit files , ALWAYS use the \ ` shell \` tool with \` apply_patch \` CLI. \` apply_patch \` effectively allows you to execute a diff/patch against a file, but the format of the diff specification is unique to this task, so pay careful attention to these instructions. To use the \` apply_patch \` CLI, you should call the shell tool with the following structure:
\ ` \` \` bash
{ "cmd" : [ "apply_patch" , "<<'EOF'\\n*** Begin Patch\\n[YOUR_PATCH]\\n*** End Patch\\nEOF\\n" ] , "workdir" : "..." }
\ ` \` \`
Where [ YOUR_PATCH ] is the actual content of your patch , specified in the following V4A diff format .
* * * [ ACTION ] File : [ path / to / file ] - > ACTION can be one of Add , Update , or Delete .
For each snippet of code that needs to be changed , repeat the following :
[ context_before ] - > See below for further instructions on context .
- [ old_code ] - > Precede the old code with a minus sign .
+ [ new_code ] - > Precede the new , replacement code with a plus sign .
[ context_after ] - > See below for further instructions on context .
For instructions on [ context_before ] and [ context_after ] :
- By default , show 3 lines of code immediately above and 3 lines immediately below each change . If a change is within 3 lines of a previous change , do NOT duplicate the first change ’ s [ context_after ] lines in the second change ’ s [ context_before ] lines .
- If 3 lines of context is insufficient to uniquely identify the snippet of code within the file , use the @ @ operator to indicate the class or function to which the snippet belongs . For instance , we might have :
@ @ class BaseClass
[ 3 lines of pre - context ]
- [ old_code ]
+ [ new_code ]
[ 3 lines of post - context ]
- If a code block is repeated so many times in a class or function such that even a single \ ` @@ \` statement and 3 lines of context cannot uniquely identify the snippet of code, you can use multiple \` @@ \` statements to jump to the right context. For instance:
@ @ class BaseClass
@ @ def method ( ) :
[ 3 lines of pre - context ]
- [ old_code ]
+ [ new_code ]
[ 3 lines of post - context ]
Note , then , that we do not use line numbers in this diff format , as the context is enough to uniquely identify code . An example of a message that you might pass as "input" to this function , in order to apply a patch , is shown below .
\ ` \` \` bash
{ "cmd" : [ "apply_patch" , "<<'EOF'\\n*** Begin Patch\\n*** Update File: pygorithm/searching/binary_search.py\\n@@ class BaseClass\\n@@ def search():\\n- pass\\n+ raise NotImplementedError()\\n@@ class Subclass\\n@@ def search():\\n- pass\\n+ raise NotImplementedError()\\n*** End Patch\\nEOF\\n" ] , "workdir" : "..." }
\ ` \` \`
File references can only be relative , NEVER ABSOLUTE . After the apply_patch command is run , it will always say "Done!" , regardless of whether the patch was successfully applied or not . However , you can determine if there are issue and errors by looking at any warnings or logging lines printed BEFORE the "Done!" is output .
` ;