diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1f5dd0d3..508b5b9b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -70,3 +70,5 @@ jobs: - name: Ensure README.md contains only ASCII and certain Unicode code points run: ./scripts/asciicheck.py README.md + - name: Check README ToC + run: python3 scripts/readme_toc.py README.md diff --git a/README.md b/README.md index 9ac89499..ed8d3ff8 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,8 @@
Table of Contents + + - [Experimental Technology Disclaimer](#experimental-technology-disclaimer) - [Quickstart](#quickstart) - [Why Codex?](#why-codex) @@ -19,13 +21,16 @@ - [CLI Reference](#cli-reference) - [Memory & Project Docs](#memory--project-docs) - [Non-interactive / CI mode](#non-interactive--ci-mode) +- [Tracing / Verbose Logging](#tracing--verbose-logging) - [Recipes](#recipes) - [Installation](#installation) - [Configuration](#configuration) - [FAQ](#faq) +- [Zero Data Retention (ZDR) Organization Limitation](#zero-data-retention-zdr-organization-limitation) - [Funding Opportunity](#funding-opportunity) - [Contributing](#contributing) - [Development workflow](#development-workflow) + - [Git Hooks with Husky](#git-hooks-with-husky) - [Nix Flake Development](#nix-flake-development) - [Writing high-impact code changes](#writing-high-impact-code-changes) - [Opening a pull request](#opening-a-pull-request) @@ -37,7 +42,8 @@ - [Releasing `codex`](#releasing-codex) - [Security & Responsible AI](#security--responsible-ai) - [License](#license) -- [Zero Data Retention (ZDR) Organization Limitation](#zero-data-retention-zdr-organization-limitation) + +
diff --git a/scripts/readme_toc.py b/scripts/readme_toc.py new file mode 100755 index 00000000..fb1ac066 --- /dev/null +++ b/scripts/readme_toc.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 + +""" +Utility script to verify (and optionally fix) the Table of Contents in a +Markdown file. By default, it checks that the ToC between `` +and `` matches the headings in the file. With --fix, it +rewrites the file to update the ToC. +""" + +import argparse +import sys +import re +import difflib +from pathlib import Path +from typing import List + +# Markers for the Table of Contents section +BEGIN_TOC: str = "" +END_TOC: str = "" + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Check and optionally fix the README.md Table of Contents." + ) + parser.add_argument( + "file", nargs="?", default="README.md", help="Markdown file to process" + ) + parser.add_argument( + "--fix", action="store_true", help="Rewrite file with updated ToC" + ) + args = parser.parse_args() + path = Path(args.file) + return check_or_fix(path, args.fix) + + +def generate_toc_lines(content: str) -> List[str]: + """ + Generate markdown list lines for headings (## to ######) in content. + """ + lines = content.splitlines() + headings = [] + in_code = False + for line in lines: + if line.strip().startswith("```"): + in_code = not in_code + continue + if in_code: + continue + m = re.match(r"^(#{2,6})\s+(.*)$", line) + if not m: + continue + level = len(m.group(1)) + text = m.group(2).strip() + headings.append((level, text)) + + toc = [] + for level, text in headings: + indent = " " * (level - 2) + slug = text.lower() + # normalize spaces and dashes + slug = slug.replace("\u00a0", " ") + slug = slug.replace("\u2011", "-").replace("\u2013", "-").replace("\u2014", "-") + # drop other punctuation + slug = re.sub(r"[^0-9a-z\s-]", "", slug) + slug = slug.strip().replace(" ", "-") + toc.append(f"{indent}- [{text}](#{slug})") + return toc + + +def check_or_fix(readme_path: Path, fix: bool) -> int: + if not readme_path.is_file(): + print(f"Error: file not found: {readme_path}", file=sys.stderr) + return 1 + content = readme_path.read_text(encoding="utf-8") + lines = content.splitlines() + # locate ToC markers + try: + begin_idx = next(i for i, l in enumerate(lines) if l.strip() == BEGIN_TOC) + end_idx = next(i for i, l in enumerate(lines) if l.strip() == END_TOC) + except StopIteration: + print( + f"Error: Could not locate '{BEGIN_TOC}' or '{END_TOC}' in {readme_path}.", + file=sys.stderr, + ) + return 1 + # extract current ToC list items + current_block = lines[begin_idx + 1 : end_idx] + current = [l for l in current_block if l.lstrip().startswith("- [")] + # generate expected ToC + expected = generate_toc_lines(content) + if current == expected: + return 0 + if not fix: + print( + "ERROR: README ToC is out of date. Diff between existing and generated ToC:" + ) + # Show full unified diff of current vs expected + diff = difflib.unified_diff( + current, + expected, + fromfile="existing ToC", + tofile="generated ToC", + lineterm="", + ) + for line in diff: + print(line) + return 1 + # rebuild file with updated ToC + prefix = lines[: begin_idx + 1] + suffix = lines[end_idx:] + new_lines = prefix + [""] + expected + [""] + suffix + readme_path.write_text("\n".join(new_lines) + "\n", encoding="utf-8") + print(f"Updated ToC in {readme_path}.") + return 0 + + +if __name__ == "__main__": + sys.exit(main())