Files
llmx/codex-cli/scripts/build_npm_package.py

295 lines
9.2 KiB
Python
Raw Normal View History

fix: vendor ripgrep in the npm module (#3660) We try to ensure ripgrep (`rg`) is provided with Codex. - For `brew`, we declare it as a dependency of our formula: https://github.com/Homebrew/homebrew-core/blob/08d82d8b006a19efbe234477bc8b18d35b5fef50/Formula/c/codex.rb#L24 - For `npm`, we declare `@vscode/ripgrep` as a dependency, which installs the platform-specific binary as part of a `postinstall` script: https://github.com/openai/codex/blob/fdb8dadcae9f8eec91bc3eb5a17b3f9b19e28505/codex-cli/package.json#L22 - Users who download the CLI directly from GitHub Releases are on their own. In practice, I have seen `@vscode/ripgrep` fail on occasion. Here is a trace from a GitHub workflow: ``` npm error code 1 npm error path /Users/runner/hostedtoolcache/node/20.19.5/arm64/lib/node_modules/@openai/codex/node_modules/@vscode/ripgrep npm error command failed npm error command sh -c node ./lib/postinstall.js npm error Finding release for v13.0.0-13 npm error GET https://api.github.com/repos/microsoft/ripgrep-prebuilt/releases/tags/v13.0.0-13 npm error Deleting invalid download cache npm error Download attempt 1 failed, retrying in 2 seconds... npm error Finding release for v13.0.0-13 npm error GET https://api.github.com/repos/microsoft/ripgrep-prebuilt/releases/tags/v13.0.0-13 npm error Deleting invalid download cache npm error Download attempt 2 failed, retrying in 4 seconds... npm error Finding release for v13.0.0-13 npm error GET https://api.github.com/repos/microsoft/ripgrep-prebuilt/releases/tags/v13.0.0-13 npm error Deleting invalid download cache npm error Download attempt 3 failed, retrying in 8 seconds... npm error Finding release for v13.0.0-13 npm error GET https://api.github.com/repos/microsoft/ripgrep-prebuilt/releases/tags/v13.0.0-13 npm error Deleting invalid download cache npm error Download attempt 4 failed, retrying in 16 seconds... npm error Finding release for v13.0.0-13 npm error GET https://api.github.com/repos/microsoft/ripgrep-prebuilt/releases/tags/v13.0.0-13 npm error Deleting invalid download cache npm error Error: Request failed: 403 ``` To eliminate this error, this PR changes things so that we vendor the `rg` binary into https://www.npmjs.com/package/@openai/codex so it is guaranteed to be included when a user runs `npm i -g @openai/codex`. The downside of this approach is the increase in package size: we include the `rg` executable for six architectures (in addition to the six copies of `codex` we already include). In a follow-up, I plan to add support for "slices" of our npm module, so that soon users will be able to do: ``` npm install -g @openai/codex@aarch64-apple-darwin ``` Admittedly, this is a sizable change and I tried to clean some things up in the process: - `install_native_deps.sh` has been replaced by `install_native_deps.py` - `stage_release.sh` and `stage_rust_release.py` has been replaced by `build_npm_package.py` We now vendor in a DotSlash file for ripgrep (as a modest attempt to facilitate local testing) and then build up the extension by: - creating a temp directory and copying `package.json` over to it with the target value for `"version"` - finding the GitHub workflow that corresponds to the `--release-version` and copying the various `codex` artifacts to respective `vendor/TARGET_TRIPLE/codex` folder - downloading the `rg` artifacts specified in the DotSlash file and copying them over to the respective `vendor/TARGET_TRIPLE/path` folder - if `--pack-output` is specified, runs `npm pack` on the temp directory To test, I downloaded the artifact produced by this CI job: https://github.com/openai/codex/actions/runs/17961595388/job/51085840022?pr=3660 and verified that `node ./bin/codex.js 'which -a rg'` worked as intended.
2025-09-23 23:00:33 -07:00
#!/usr/bin/env python3
"""Stage and optionally package the @openai/codex npm module."""
import argparse
import json
import re
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path
SCRIPT_DIR = Path(__file__).resolve().parent
CODEX_CLI_ROOT = SCRIPT_DIR.parent
REPO_ROOT = CODEX_CLI_ROOT.parent
GITHUB_REPO = "openai/codex"
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Build or stage the Codex CLI npm package.")
parser.add_argument(
"--version",
help="Version number to write to package.json inside the staged package.",
)
parser.add_argument(
"--release-version",
help=(
"Version to stage for npm release. When provided, the script also resolves the "
"matching rust-release workflow unless --workflow-url is supplied."
),
)
parser.add_argument(
"--workflow-url",
help="Optional GitHub Actions workflow run URL used to download native binaries.",
)
parser.add_argument(
"--staging-dir",
type=Path,
help=(
"Directory to stage the package contents. Defaults to a new temporary directory "
"if omitted. The directory must be empty when provided."
),
)
parser.add_argument(
"--tmp",
dest="staging_dir",
type=Path,
help=argparse.SUPPRESS,
)
parser.add_argument(
"--pack-output",
type=Path,
help="Path where the generated npm tarball should be written.",
)
return parser.parse_args()
def main() -> int:
args = parse_args()
version = args.version
release_version = args.release_version
if release_version:
if version and version != release_version:
raise RuntimeError("--version and --release-version must match when both are provided.")
version = release_version
if not version:
raise RuntimeError("Must specify --version or --release-version.")
staging_dir, created_temp = prepare_staging_dir(args.staging_dir)
try:
stage_sources(staging_dir, version)
workflow_url = args.workflow_url
resolved_head_sha: str | None = None
if not workflow_url:
if release_version:
workflow = resolve_release_workflow(version)
workflow_url = workflow["url"]
resolved_head_sha = workflow.get("headSha")
else:
workflow_url = resolve_latest_alpha_workflow_url()
elif release_version:
try:
workflow = resolve_release_workflow(version)
resolved_head_sha = workflow.get("headSha")
except Exception:
resolved_head_sha = None
if release_version and resolved_head_sha:
print(f"should `git checkout {resolved_head_sha}`")
if not workflow_url:
raise RuntimeError("Unable to determine workflow URL for native binaries.")
install_native_binaries(staging_dir, workflow_url)
if release_version:
staging_dir_str = str(staging_dir)
print(
f"Staged version {version} for release in {staging_dir_str}\n\n"
"Verify the CLI:\n"
f" node {staging_dir_str}/bin/codex.js --version\n"
f" node {staging_dir_str}/bin/codex.js --help\n\n"
)
else:
print(f"Staged package in {staging_dir}")
if args.pack_output is not None:
output_path = run_npm_pack(staging_dir, args.pack_output)
print(f"npm pack output written to {output_path}")
finally:
if created_temp:
# Preserve the staging directory for further inspection.
pass
return 0
def prepare_staging_dir(staging_dir: Path | None) -> tuple[Path, bool]:
if staging_dir is not None:
staging_dir = staging_dir.resolve()
staging_dir.mkdir(parents=True, exist_ok=True)
if any(staging_dir.iterdir()):
raise RuntimeError(f"Staging directory {staging_dir} is not empty.")
return staging_dir, False
temp_dir = Path(tempfile.mkdtemp(prefix="codex-npm-stage-"))
return temp_dir, True
def stage_sources(staging_dir: Path, version: str) -> None:
bin_dir = staging_dir / "bin"
bin_dir.mkdir(parents=True, exist_ok=True)
shutil.copy2(CODEX_CLI_ROOT / "bin" / "codex.js", bin_dir / "codex.js")
rg_manifest = CODEX_CLI_ROOT / "bin" / "rg"
if rg_manifest.exists():
shutil.copy2(rg_manifest, bin_dir / "rg")
readme_src = REPO_ROOT / "README.md"
if readme_src.exists():
shutil.copy2(readme_src, staging_dir / "README.md")
with open(CODEX_CLI_ROOT / "package.json", "r", encoding="utf-8") as fh:
package_json = json.load(fh)
package_json["version"] = version
with open(staging_dir / "package.json", "w", encoding="utf-8") as out:
json.dump(package_json, out, indent=2)
out.write("\n")
def install_native_binaries(staging_dir: Path, workflow_url: str | None) -> None:
cmd = ["./scripts/install_native_deps.py"]
if workflow_url:
cmd.extend(["--workflow-url", workflow_url])
cmd.append(str(staging_dir))
subprocess.check_call(cmd, cwd=CODEX_CLI_ROOT)
def resolve_latest_alpha_workflow_url() -> str:
version = determine_latest_alpha_version()
workflow_url = fetch_workflow_url_for_version(version)
if not workflow_url:
raise RuntimeError(f"Unable to locate workflow for version {version}.")
return workflow_url
def determine_latest_alpha_version() -> str:
releases = list_releases()
best_key: tuple[int, int, int, int] | None = None
best_version: str | None = None
pattern = re.compile(r"^rust-v(\d+)\.(\d+)\.(\d+)-alpha\.(\d+)$")
for release in releases:
tag = release.get("tag_name", "")
match = pattern.match(tag)
if not match:
continue
key = tuple(int(match.group(i)) for i in range(1, 5))
if best_key is None or key > best_key:
best_key = key
best_version = (
f"{match.group(1)}.{match.group(2)}.{match.group(3)}-alpha.{match.group(4)}"
)
if best_version is None:
raise RuntimeError("No alpha releases found when resolving workflow URL.")
return best_version
def list_releases() -> list[dict]:
stdout = subprocess.check_output(
["gh", "api", f"/repos/{GITHUB_REPO}/releases?per_page=100"],
text=True,
)
try:
releases = json.loads(stdout or "[]")
except json.JSONDecodeError as exc:
raise RuntimeError("Unable to parse releases JSON.") from exc
if not isinstance(releases, list):
raise RuntimeError("Unexpected response when listing releases.")
return releases
def fetch_workflow_url_for_version(version: str) -> str | None:
ref = f"rust-v{version}"
stdout = subprocess.check_output(
[
"gh",
"run",
"list",
"--branch",
ref,
"--limit",
"20",
"--json",
"workflowName,url",
],
text=True,
)
try:
runs = json.loads(stdout or "[]")
except json.JSONDecodeError as exc:
raise RuntimeError("Unable to parse workflow run listing.") from exc
for run in runs:
if run.get("workflowName") == "rust-release":
url = run.get("url")
if url:
return url
return None
def resolve_release_workflow(version: str) -> dict:
stdout = subprocess.check_output(
[
"gh",
"run",
"list",
"--branch",
f"rust-v{version}",
"--json",
"workflowName,url,headSha",
"--jq",
'first(.[] | select(.workflowName == "rust-release"))',
],
text=True,
)
workflow = json.loads(stdout)
if not workflow:
raise RuntimeError(f"Unable to find rust-release workflow for version {version}.")
return workflow
def run_npm_pack(staging_dir: Path, output_path: Path) -> Path:
output_path = output_path.resolve()
output_path.parent.mkdir(parents=True, exist_ok=True)
with tempfile.TemporaryDirectory(prefix="codex-npm-pack-") as pack_dir_str:
pack_dir = Path(pack_dir_str)
stdout = subprocess.check_output(
["npm", "pack", "--json", "--pack-destination", str(pack_dir)],
cwd=staging_dir,
text=True,
)
try:
pack_output = json.loads(stdout)
except json.JSONDecodeError as exc:
raise RuntimeError("Failed to parse npm pack output.") from exc
if not pack_output:
raise RuntimeError("npm pack did not produce an output tarball.")
tarball_name = pack_output[0].get("filename") or pack_output[0].get("name")
if not tarball_name:
raise RuntimeError("Unable to determine npm pack output filename.")
tarball_path = pack_dir / tarball_name
if not tarball_path.exists():
raise RuntimeError(f"Expected npm pack output not found: {tarball_path}")
shutil.move(str(tarball_path), output_path)
return output_path
if __name__ == "__main__":
import sys
sys.exit(main())