Initial commit

Signed-off-by: Ilan Bigio <ilan@openai.com>
This commit is contained in:
Ilan Bigio
2025-04-16 12:56:08 -04:00
commit 59a180ddec
163 changed files with 30587 additions and 0 deletions

View File

@@ -0,0 +1,190 @@
import type { EditedFiles, FileOperation } from "./file_ops";
import { createTwoFilesPatch } from "diff";
/**************************************
* ANSI color codes for output styling
**************************************/
const RED = "\u001b[31m";
const GREEN = "\u001b[32m";
const CYAN = "\u001b[36m";
const YELLOW = "\u001b[33m";
const RESET = "\u001b[0m";
/******************************************************
* Generate a unified diff of two file contents
* akin to generate_file_diff(original, updated)
******************************************************/
export function generateFileDiff(
originalContent: string,
updatedContent: string,
filePath: string,
): string {
return createTwoFilesPatch(
`${filePath} (original)`,
`${filePath} (modified)`,
originalContent,
updatedContent,
undefined,
undefined,
{ context: 5 },
);
}
/******************************************************
* Apply colorization to a unified diff
* akin to generate_colored_diff(diff_content)
******************************************************/
export function generateColoredDiff(diffContent: string): string {
const lines = diffContent.split(/\r?\n/);
const coloredLines: Array<string> = [];
for (const line of lines) {
if (line.startsWith("+++") || line.startsWith("---")) {
// keep these lines uncolored, preserving the original style
coloredLines.push(line);
} else if (line.startsWith("+")) {
// color lines that begin with + but not +++
coloredLines.push(`${GREEN}${line}${RESET}`);
} else if (line.startsWith("-")) {
// color lines that begin with - but not ---
coloredLines.push(`${RED}${line}${RESET}`);
} else if (line.startsWith("@@")) {
// hunk header
coloredLines.push(`${CYAN}${line}${RESET}`);
} else {
coloredLines.push(line);
}
}
return coloredLines.join("\n");
}
/******************************************************
* Count lines added and removed in a unified diff.
* akin to generate_diff_stats(diff_content).
******************************************************/
export function generateDiffStats(diffContent: string): [number, number] {
let linesAdded = 0;
let linesRemoved = 0;
const lines = diffContent.split(/\r?\n/);
for (const line of lines) {
if (line.startsWith("+") && !line.startsWith("+++")) {
linesAdded += 1;
} else if (line.startsWith("-") && !line.startsWith("---")) {
linesRemoved += 1;
}
}
return [linesAdded, linesRemoved];
}
/************************************************
* Helper for generating a short header block
************************************************/
function generateDiffHeader(fileOp: FileOperation): string {
const TTY_WIDTH = 80;
const separatorLine = "=".repeat(TTY_WIDTH) + "\n";
const subSeparatorLine = "-".repeat(TTY_WIDTH) + "\n";
const headerLine = `Changes for: ${fileOp.path}`;
return separatorLine + headerLine + "\n" + subSeparatorLine;
}
/****************************************************************
* Summarize diffs for each file operation that has differences.
* akin to generate_diff_summary(edited_files, original_files)
****************************************************************/
export function generateDiffSummary(
editedFiles: EditedFiles,
originalFileContents: Record<string, string>,
): [string, Array<FileOperation>] {
let combinedDiffs = "";
const opsToApply: Array<FileOperation> = [];
for (const fileOp of editedFiles.ops) {
const diffHeader = generateDiffHeader(fileOp);
if (fileOp.delete) {
// file will be deleted
combinedDiffs += diffHeader + "File will be deleted.\n\n";
opsToApply.push(fileOp);
continue;
} else if (fileOp.move_to) {
combinedDiffs +=
diffHeader + `File will be moved to: ${fileOp.move_to}\n\n`;
opsToApply.push(fileOp);
continue;
}
// otherwise it's an update
const originalContent = originalFileContents[fileOp.path] ?? "";
const updatedContent = fileOp.updated_full_content ?? "";
if (originalContent === updatedContent) {
// no changes => skip
continue;
}
const diffOutput = generateFileDiff(
originalContent,
updatedContent,
fileOp.path,
);
if (diffOutput.trim()) {
const coloredDiff = generateColoredDiff(diffOutput);
combinedDiffs += diffHeader + coloredDiff + "\n";
opsToApply.push(fileOp);
}
}
return [combinedDiffs, opsToApply];
}
/****************************************************************
* Generate a user-friendly summary of the pending file ops.
* akin to generate_edit_summary(ops_to_apply, original_files)
****************************************************************/
export function generateEditSummary(
opsToApply: Array<FileOperation>,
originalFileContents: Record<string, string>,
): string {
if (!opsToApply || opsToApply.length === 0) {
return "No changes detected.";
}
const summaryLines: Array<string> = [];
for (const fileOp of opsToApply) {
if (fileOp.delete) {
// red for deleted
summaryLines.push(`${RED} Deleted: ${fileOp.path}${RESET}`);
} else if (fileOp.move_to) {
// yellow for moved
summaryLines.push(
`${YELLOW} Moved: ${fileOp.path} -> ${fileOp.move_to}${RESET}`,
);
} else {
const originalContent = originalFileContents[fileOp.path];
const updatedContent = fileOp.updated_full_content ?? "";
if (originalContent === undefined) {
// newly created file
const linesAdded = updatedContent.split(/\r?\n/).length;
summaryLines.push(
`${GREEN} Created: ${fileOp.path} (+${linesAdded} lines)${RESET}`,
);
} else {
const diffOutput = generateFileDiff(
originalContent,
updatedContent,
fileOp.path,
);
const [added, removed] = generateDiffStats(diffOutput);
summaryLines.push(
` Modified: ${fileOp.path} (${GREEN}+${added}${RESET}/${RED}-${removed}${RESET})`,
);
}
}
}
return summaryLines.join("\n");
}

View File

@@ -0,0 +1,64 @@
/** Represents file contents with a path and its full text. */
export interface FileContent {
path: string;
content: string;
}
/**
* Represents the context for a task, including:
* - A prompt (the user's request)
* - A list of input paths being considered editable
* - A directory structure overview
* - A collection of file contents
*/
export interface TaskContext {
prompt: string;
input_paths: Array<string>;
input_paths_structure: string;
files: Array<FileContent>;
}
/**
* Renders a string version of the TaskContext, including a note about important output requirements,
* summary of the directory structure (unless omitted), and an XML-like listing of the file contents.
*
* The user is instructed to produce only changes for files strictly under the specified paths
* and provide full file contents in any modifications.
*/
export function renderTaskContext(taskContext: TaskContext): string {
const inputPathsJoined = taskContext.input_paths.join(", ");
return `
Complete the following task: ${taskContext.prompt}
# IMPORTANT OUTPUT REQUIREMENTS
- UNDER NO CIRCUMSTANCES PRODUCE PARTIAL OR TRUNCATED FILE CONTENT. You MUST provide the FULL AND FINAL content for every file modified.
- ALWAYS INCLUDE THE COMPLETE UPDATED VERSION OF THE FILE, do not omit or only partially include lines.
- ONLY produce changes for files located strictly under ${inputPathsJoined}.
- ALWAYS produce absolute paths in the output.
- Do not delete or change code UNRELATED to the task.
# **Directory structure**
${taskContext.input_paths_structure}
# Files
${renderFilesToXml(taskContext.files)}
`;
}
/**
* Converts the provided list of FileContent objects into a custom XML-like format.
*
* For each file, we embed the content in a CDATA section.
*/
function renderFilesToXml(files: Array<FileContent>): string {
let xmlContent = "<files>";
for (const fc of files) {
xmlContent += `
<file>
<path>${fc.path}</path>
<content><![CDATA[${fc.content}]]></content>
</file>`;
}
xmlContent += "\n</files>";
return xmlContent;
}

View File

@@ -0,0 +1,409 @@
/* eslint-disable no-await-in-loop */
import * as fsSync from "fs";
import fs from "fs/promises";
import path from "path";
/** Represents file contents with absolute path. */
export interface FileContent {
path: string;
content: string;
}
/** A simple LRU cache entry structure. */
interface CacheEntry {
/** Last modification time of the file (epoch ms). */
mtime: number;
/** Size of the file in bytes. */
size: number;
/** Entire file content. */
content: string;
}
/**
* A minimal LRU-based file cache to store file contents keyed by absolute path.
* We store (mtime, size, content). If a file's mtime or size changes, we consider
* the cache invalid and re-read.
*/
class LRUFileCache {
private maxSize: number;
private cache: Map<string, CacheEntry>;
constructor(maxSize: number) {
this.maxSize = maxSize;
this.cache = new Map();
}
/**
* Retrieves the cached entry for the given path, if it exists.
* If found, we re-insert it in the map to mark it as recently used.
*/
get(key: string): CacheEntry | undefined {
const entry = this.cache.get(key);
if (entry) {
// Re-insert to maintain recency
this.cache.delete(key);
this.cache.set(key, entry);
}
return entry;
}
/**
* Insert or update an entry in the cache.
*/
set(key: string, entry: CacheEntry): void {
// if key already in map, delete it so that insertion below sets recency.
if (this.cache.has(key)) {
this.cache.delete(key);
}
this.cache.set(key, entry);
// If over capacity, evict the least recently used entry.
if (this.cache.size > this.maxSize) {
const firstKey = this.cache.keys().next();
if (!firstKey.done) {
this.cache.delete(firstKey.value);
}
}
}
/**
* Remove an entry from the cache.
*/
delete(key: string): void {
this.cache.delete(key);
}
/**
* Returns all keys in the cache (for pruning old files, etc.).
*/
keys(): IterableIterator<string> {
return this.cache.keys();
}
}
// Environment-based defaults
const MAX_CACHE_ENTRIES = parseInt(
process.env["TENX_FILE_CACHE_MAX_ENTRIES"] || "1000",
10,
);
// Global LRU file cache instance.
const FILE_CONTENTS_CACHE = new LRUFileCache(MAX_CACHE_ENTRIES);
// Default list of glob patterns to ignore if the user doesn't provide a custom ignore file.
const DEFAULT_IGNORE_PATTERNS = `
# Binaries and large media
*.woff
*.exe
*.dll
*.bin
*.dat
*.pdf
*.png
*.jpg
*.jpeg
*.gif
*.bmp
*.tiff
*.ico
*.zip
*.tar
*.gz
*.rar
*.7z
*.mp3
*.mp4
*.avi
*.mov
*.wmv
# Build and distribution
build/*
dist/*
# Logs and temporary files
*.log
*.tmp
*.swp
*.swo
*.bak
*.old
# Python artifacts
*.egg-info/*
__pycache__/*
*.pyc
*.pyo
*.pyd
.pytest_cache/*
.ruff_cache/*
venv/*
.venv/*
env/*
# Rust artifacts
target/*
Cargo.lock
# Node.js artifacts
*.tsbuildinfo
node_modules/*
package-lock.json
# Environment files
.env/*
# Git
.git/*
# OS specific files
.DS_Store
Thumbs.db
# Hidden files
.*/*
.*
`;
function _read_default_patterns_file(filePath?: string): string {
if (!filePath) {
return DEFAULT_IGNORE_PATTERNS;
}
return fsSync.readFileSync(filePath, "utf-8");
}
/** Loads ignore patterns from a file (or a default list) and returns a list of RegExp patterns. */
export function loadIgnorePatterns(filePath?: string): Array<RegExp> {
try {
const raw = _read_default_patterns_file(filePath);
const lines = raw.split(/\r?\n/);
const cleaned = lines
.map((l: string) => l.trim())
.filter((l: string) => l && !l.startsWith("#"));
// Convert each pattern to a RegExp with a leading '*/'.
const regs = cleaned.map((pattern: string) => {
const escaped = pattern
.replace(/[.+^${}()|[\]\\]/g, "\\$&")
.replace(/\*/g, ".*")
.replace(/\?/g, ".");
const finalRe = `^(?:(?:(?:.*/)?)(?:${escaped}))$`;
return new RegExp(finalRe, "i");
});
return regs;
} catch {
return [];
}
}
/** Checks if a given path is ignored by any of the compiled patterns. */
export function shouldIgnorePath(
p: string,
compiledPatterns: Array<RegExp>,
): boolean {
const normalized = path.resolve(p);
for (const regex of compiledPatterns) {
if (regex.test(normalized)) {
return true;
}
}
return false;
}
/**
* Recursively builds an ASCII representation of a directory structure, given a list
* of file paths.
*/
export function makeAsciiDirectoryStructure(
rootPath: string,
filePaths: Array<string>,
): string {
const root = path.resolve(rootPath);
// We'll store a nested object. Directories => sub-tree or null if it's a file.
interface DirTree {
[key: string]: DirTree | null;
}
const tree: DirTree = {};
for (const file of filePaths) {
const resolved = path.resolve(file);
let relPath: string;
try {
const rp = path.relative(root, resolved);
// If it's outside of root, skip.
if (rp.startsWith("..")) {
continue;
}
relPath = rp;
} catch {
continue;
}
const parts = relPath.split(path.sep);
let current: DirTree = tree;
for (let i = 0; i < parts.length; i++) {
const part = parts[i];
if (!part) {
continue;
}
if (i === parts.length - 1) {
// file
current[part] = null;
} else {
if (!current[part]) {
current[part] = {};
}
current = current[part] as DirTree;
}
}
}
const lines: Array<string> = [root];
function recurse(node: DirTree, prefix: string): void {
const entries = Object.keys(node).sort((a, b) => {
// Directories first, then files
const aIsDir = node[a] != null;
const bIsDir = node[b] != null;
if (aIsDir && !bIsDir) {
return -1;
}
if (!aIsDir && bIsDir) {
return 1;
}
return a.localeCompare(b);
});
for (let i = 0; i < entries.length; i++) {
const entry = entries[i];
if (!entry) {
continue;
}
const isLast = i === entries.length - 1;
const connector = isLast ? "└──" : "├──";
const isDir = node[entry] != null;
lines.push(`${prefix}${connector} ${entry}`);
if (isDir) {
const newPrefix = prefix + (isLast ? " " : "│ ");
recurse(node[entry] as DirTree, newPrefix);
}
}
}
recurse(tree, "");
return lines.join("\n");
}
/**
* Recursively collects all files under rootPath that are not ignored, skipping symlinks.
* Then for each file, we check if it's in the LRU cache. If not or changed, we read it.
* Returns an array of FileContent.
*
* After collecting, we remove from the cache any file that no longer exists in the BFS.
*/
export async function getFileContents(
rootPath: string,
compiledPatterns: Array<RegExp>,
): Promise<Array<FileContent>> {
const root = path.resolve(rootPath);
const candidateFiles: Array<string> = [];
// BFS queue of directories
const queue: Array<string> = [root];
while (queue.length > 0) {
const currentDir = queue.pop()!;
let dirents: Array<fsSync.Dirent> = [];
try {
dirents = await fs.readdir(currentDir, { withFileTypes: true });
} catch {
continue;
}
for (const dirent of dirents) {
try {
const resolved = path.resolve(currentDir, dirent.name);
// skip symlinks
const lstat = await fs.lstat(resolved);
if (lstat.isSymbolicLink()) {
continue;
}
if (dirent.isDirectory()) {
// check if ignored
if (!shouldIgnorePath(resolved, compiledPatterns)) {
queue.push(resolved);
}
} else if (dirent.isFile()) {
// check if ignored
if (!shouldIgnorePath(resolved, compiledPatterns)) {
candidateFiles.push(resolved);
}
}
} catch {
// skip
}
}
}
// We'll read the stat for each candidate file, see if we can skip reading from cache.
const results: Array<FileContent> = [];
// We'll keep track of which files we actually see.
const seenPaths = new Set<string>();
await Promise.all(
candidateFiles.map(async (filePath) => {
seenPaths.add(filePath);
let st: fsSync.Stats | null = null;
try {
st = await fs.stat(filePath);
} catch {
return;
}
if (!st) {
return;
}
const cEntry = FILE_CONTENTS_CACHE.get(filePath);
if (
cEntry &&
Math.abs(cEntry.mtime - st.mtime.getTime()) < 1 &&
cEntry.size === st.size
) {
// same mtime, same size => use cache
results.push({ path: filePath, content: cEntry.content });
} else {
// read file
try {
const buf = await fs.readFile(filePath);
const content = buf.toString("utf-8");
// store in cache
FILE_CONTENTS_CACHE.set(filePath, {
mtime: st.mtime.getTime(),
size: st.size,
content,
});
results.push({ path: filePath, content });
} catch {
// skip
}
}
}),
);
// Now remove from cache any file that wasn't encountered.
const currentKeys = [...FILE_CONTENTS_CACHE.keys()];
for (const key of currentKeys) {
if (!seenPaths.has(key)) {
FILE_CONTENTS_CACHE.delete(key);
}
}
// sort results by path
results.sort((a, b) => a.path.localeCompare(b.path));
return results;
}

View File

@@ -0,0 +1,208 @@
/* eslint-disable no-console */
import type { FileContent } from "./context_files.js";
import path from "path";
/**
* Builds file-size and total-size maps for the provided files, keyed by absolute path.
*
* @param root - The root directory (absolute path) to treat as the top-level. Ascension stops here.
* @param files - An array of FileContent objects, each with a path and content.
* @returns A tuple [fileSizeMap, totalSizeMap] where:
* - fileSizeMap[path] = size (in characters) of the file
* - totalSizeMap[path] = cumulative size (in characters) for path (file or directory)
*/
export function computeSizeMap(
root: string,
files: Array<FileContent>,
): [Record<string, number>, Record<string, number>] {
const rootAbs = path.resolve(root);
const fileSizeMap: Record<string, number> = {};
const totalSizeMap: Record<string, number> = {};
for (const fc of files) {
const pAbs = path.resolve(fc.path);
const length = fc.content.length;
// Record size in fileSizeMap
fileSizeMap[pAbs] = length;
// Ascend from pAbs up to root, adding size along the way.
let current = pAbs;
// eslint-disable-next-line no-constant-condition
while (true) {
totalSizeMap[current] = (totalSizeMap[current] ?? 0) + length;
if (current === rootAbs) {
break;
}
const parent = path.dirname(current);
// If we've reached the top or gone outside root, break.
if (parent === current) {
// e.g. we're at "/" in a *nix system or some root in Windows.
break;
}
// If we have gone above the root (meaning the parent no longer starts with rootAbs), break.
if (!parent.startsWith(rootAbs) && parent !== rootAbs) {
break;
}
current = parent;
}
}
return [fileSizeMap, totalSizeMap];
}
/**
* Builds a mapping of directories to their immediate children. The keys and values
* are absolute paths. For each path in totalSizeMap (except the root itself), we find
* its parent (if also in totalSizeMap) and add the path to the children of that parent.
*
* @param root - The root directory (absolute path).
* @param totalSizeMap - A map from path -> cumulative size.
* @returns A record that maps directory paths to arrays of child paths.
*/
export function buildChildrenMap(
root: string,
totalSizeMap: Record<string, number>,
): Record<string, Array<string>> {
const rootAbs = path.resolve(root);
const childrenMap: Record<string, Array<string>> = {};
// Initialize all potential keys so that each path has an entry.
for (const p of Object.keys(totalSizeMap)) {
if (!childrenMap[p]) {
childrenMap[p] = [];
}
}
for (const p of Object.keys(totalSizeMap)) {
if (p === rootAbs) {
continue;
}
const parent = path.dirname(p);
// If the parent is also tracked in totalSizeMap, we record p as a child.
if (totalSizeMap[parent] !== undefined && parent !== p) {
if (!childrenMap[parent]) {
childrenMap[parent] = [];
}
childrenMap[parent].push(p);
}
}
// Sort the children.
for (const val of Object.values(childrenMap)) {
val.sort((a, b) => {
return a.localeCompare(b);
});
}
return childrenMap;
}
/**
* Recursively prints a directory/file tree, showing size usage.
*
* @param current - The current absolute path (directory or file) to print.
* @param childrenMap - A mapping from directory paths to an array of their child paths.
* @param fileSizeMap - A map from file path to file size (characters).
* @param totalSizeMap - A map from path to total cumulative size.
* @param prefix - The current prefix used for ASCII indentation.
* @param isLast - Whether the current path is the last child in its parent.
* @param contextLimit - The context limit for reference.
*/
export function printSizeTree(
current: string,
childrenMap: Record<string, Array<string>>,
fileSizeMap: Record<string, number>,
totalSizeMap: Record<string, number>,
prefix: string,
isLast: boolean,
contextLimit: number,
): void {
const connector = isLast ? "└──" : "├──";
const label = path.basename(current) || current;
const totalSz = totalSizeMap[current] ?? 0;
const percentageOfLimit =
contextLimit > 0 ? (totalSz / contextLimit) * 100 : 0;
if (fileSizeMap[current] !== undefined) {
// It's a file
const fileSz = fileSizeMap[current];
console.log(
`${prefix}${connector} ${label} [file: ${fileSz} bytes, cumulative: ${totalSz} bytes, ${percentageOfLimit.toFixed(
2,
)}% of limit]`,
);
} else {
// It's a directory
console.log(
`${prefix}${connector} ${label} [dir: ${totalSz} bytes, ${percentageOfLimit.toFixed(
2,
)}% of limit]`,
);
}
const newPrefix = prefix + (isLast ? " " : "│ ");
const children = childrenMap[current] || [];
for (let i = 0; i < children.length; i++) {
const child = children[i];
const childIsLast = i === children.length - 1;
printSizeTree(
child!,
childrenMap,
fileSizeMap,
totalSizeMap,
newPrefix,
childIsLast,
contextLimit,
);
}
}
/**
* Prints a size breakdown for the entire directory (and subpaths), listing cumulative percentages.
*
* @param directory - The directory path (absolute or relative) for which to print the breakdown.
* @param files - The array of FileContent representing the files under that directory.
* @param contextLimit - The maximum context character limit.
*/
export function printDirectorySizeBreakdown(
directory: string,
files: Array<FileContent>,
contextLimit = 300_000,
): void {
const rootAbs = path.resolve(directory);
const [fileSizeMap, totalSizeMap] = computeSizeMap(rootAbs, files);
const childrenMap = buildChildrenMap(rootAbs, totalSizeMap);
console.log("\nContext size breakdown by directory and file:");
const rootTotal = totalSizeMap[rootAbs] ?? 0;
const rootPct =
contextLimit > 0 ? ((rootTotal / contextLimit) * 100).toFixed(2) : "0";
const rootLabel = path.basename(rootAbs) || rootAbs;
console.log(`${rootLabel} [dir: ${rootTotal} bytes, ${rootPct}% of limit]`);
const rootChildren = childrenMap[rootAbs] || [];
rootChildren.sort((a, b) => a.localeCompare(b));
for (let i = 0; i < rootChildren.length; i++) {
const child = rootChildren[i];
const childIsLast = i === rootChildren.length - 1;
printSizeTree(
child!,
childrenMap,
fileSizeMap,
totalSizeMap,
"",
childIsLast,
contextLimit,
);
}
}

View File

@@ -0,0 +1,47 @@
import { z } from "zod";
/**
* Represents a file operation, including modifications, deletes, and moves.
*/
export const FileOperationSchema = z.object({
/**
* Absolute path to the file.
*/
path: z.string(),
/**
* FULL CONTENT of the file after modification. Provides the FULL AND FINAL content of
* the file after modification WITHOUT OMITTING OR TRUNCATING ANY PART OF THE FILE.
*
* Mutually exclusive with 'delete' and 'move_to'.
*/
updated_full_content: z.string().nullable().optional(),
/**
* Set to true if the file is to be deleted.
*
* Mutually exclusive with 'updated_full_content' and 'move_to'.
*/
delete: z.boolean().nullable().optional(),
/**
* New path of the file if it is to be moved.
*
* Mutually exclusive with 'updated_full_content' and 'delete'.
*/
move_to: z.string().nullable().optional(),
});
export type FileOperation = z.infer<typeof FileOperationSchema>;
/**
* Container for one or more FileOperation objects.
*/
export const EditedFilesSchema = z.object({
/**
* A list of file operations that are applied in order.
*/
ops: z.array(FileOperationSchema),
});
export type EditedFiles = z.infer<typeof EditedFilesSchema>;