add check to ensure ToC in README.md matches headings in the file (#541)
This introduces a Python script (written by Codex!) to verify that the table of contents in the root `README.md` matches the headings. Like `scripts/asciicheck.py` in https://github.com/openai/codex/pull/513, it reports differences by default (and exits non-zero if there are any) and also has a `--fix` option to synchronize the ToC with the headings. This will be enforced by CI and the changes to `README.md` in this PR were generated by the script, so you can see that our ToC was missing some entries prior to this PR.
This commit is contained in:
2
.github/workflows/ci.yml
vendored
2
.github/workflows/ci.yml
vendored
@@ -70,3 +70,5 @@ jobs:
|
||||
|
||||
- name: Ensure README.md contains only ASCII and certain Unicode code points
|
||||
run: ./scripts/asciicheck.py README.md
|
||||
- name: Check README ToC
|
||||
run: python3 scripts/readme_toc.py README.md
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
<details>
|
||||
<summary><strong>Table of Contents</strong></summary>
|
||||
|
||||
<!-- Begin ToC -->
|
||||
|
||||
- [Experimental Technology Disclaimer](#experimental-technology-disclaimer)
|
||||
- [Quickstart](#quickstart)
|
||||
- [Why Codex?](#why-codex)
|
||||
@@ -19,13 +21,16 @@
|
||||
- [CLI Reference](#cli-reference)
|
||||
- [Memory & Project Docs](#memory--project-docs)
|
||||
- [Non-interactive / CI mode](#non-interactive--ci-mode)
|
||||
- [Tracing / Verbose Logging](#tracing--verbose-logging)
|
||||
- [Recipes](#recipes)
|
||||
- [Installation](#installation)
|
||||
- [Configuration](#configuration)
|
||||
- [FAQ](#faq)
|
||||
- [Zero Data Retention (ZDR) Organization Limitation](#zero-data-retention-zdr-organization-limitation)
|
||||
- [Funding Opportunity](#funding-opportunity)
|
||||
- [Contributing](#contributing)
|
||||
- [Development workflow](#development-workflow)
|
||||
- [Git Hooks with Husky](#git-hooks-with-husky)
|
||||
- [Nix Flake Development](#nix-flake-development)
|
||||
- [Writing high-impact code changes](#writing-high-impact-code-changes)
|
||||
- [Opening a pull request](#opening-a-pull-request)
|
||||
@@ -37,7 +42,8 @@
|
||||
- [Releasing `codex`](#releasing-codex)
|
||||
- [Security & Responsible AI](#security--responsible-ai)
|
||||
- [License](#license)
|
||||
- [Zero Data Retention (ZDR) Organization Limitation](#zero-data-retention-zdr-organization-limitation)
|
||||
|
||||
<!-- End ToC -->
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
119
scripts/readme_toc.py
Executable file
119
scripts/readme_toc.py
Executable file
@@ -0,0 +1,119 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Utility script to verify (and optionally fix) the Table of Contents in a
|
||||
Markdown file. By default, it checks that the ToC between `<!-- Begin ToC -->`
|
||||
and `<!-- End ToC -->` matches the headings in the file. With --fix, it
|
||||
rewrites the file to update the ToC.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import re
|
||||
import difflib
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
# Markers for the Table of Contents section
|
||||
BEGIN_TOC: str = "<!-- Begin ToC -->"
|
||||
END_TOC: str = "<!-- End ToC -->"
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Check and optionally fix the README.md Table of Contents."
|
||||
)
|
||||
parser.add_argument(
|
||||
"file", nargs="?", default="README.md", help="Markdown file to process"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fix", action="store_true", help="Rewrite file with updated ToC"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
path = Path(args.file)
|
||||
return check_or_fix(path, args.fix)
|
||||
|
||||
|
||||
def generate_toc_lines(content: str) -> List[str]:
|
||||
"""
|
||||
Generate markdown list lines for headings (## to ######) in content.
|
||||
"""
|
||||
lines = content.splitlines()
|
||||
headings = []
|
||||
in_code = False
|
||||
for line in lines:
|
||||
if line.strip().startswith("```"):
|
||||
in_code = not in_code
|
||||
continue
|
||||
if in_code:
|
||||
continue
|
||||
m = re.match(r"^(#{2,6})\s+(.*)$", line)
|
||||
if not m:
|
||||
continue
|
||||
level = len(m.group(1))
|
||||
text = m.group(2).strip()
|
||||
headings.append((level, text))
|
||||
|
||||
toc = []
|
||||
for level, text in headings:
|
||||
indent = " " * (level - 2)
|
||||
slug = text.lower()
|
||||
# normalize spaces and dashes
|
||||
slug = slug.replace("\u00a0", " ")
|
||||
slug = slug.replace("\u2011", "-").replace("\u2013", "-").replace("\u2014", "-")
|
||||
# drop other punctuation
|
||||
slug = re.sub(r"[^0-9a-z\s-]", "", slug)
|
||||
slug = slug.strip().replace(" ", "-")
|
||||
toc.append(f"{indent}- [{text}](#{slug})")
|
||||
return toc
|
||||
|
||||
|
||||
def check_or_fix(readme_path: Path, fix: bool) -> int:
|
||||
if not readme_path.is_file():
|
||||
print(f"Error: file not found: {readme_path}", file=sys.stderr)
|
||||
return 1
|
||||
content = readme_path.read_text(encoding="utf-8")
|
||||
lines = content.splitlines()
|
||||
# locate ToC markers
|
||||
try:
|
||||
begin_idx = next(i for i, l in enumerate(lines) if l.strip() == BEGIN_TOC)
|
||||
end_idx = next(i for i, l in enumerate(lines) if l.strip() == END_TOC)
|
||||
except StopIteration:
|
||||
print(
|
||||
f"Error: Could not locate '{BEGIN_TOC}' or '{END_TOC}' in {readme_path}.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
# extract current ToC list items
|
||||
current_block = lines[begin_idx + 1 : end_idx]
|
||||
current = [l for l in current_block if l.lstrip().startswith("- [")]
|
||||
# generate expected ToC
|
||||
expected = generate_toc_lines(content)
|
||||
if current == expected:
|
||||
return 0
|
||||
if not fix:
|
||||
print(
|
||||
"ERROR: README ToC is out of date. Diff between existing and generated ToC:"
|
||||
)
|
||||
# Show full unified diff of current vs expected
|
||||
diff = difflib.unified_diff(
|
||||
current,
|
||||
expected,
|
||||
fromfile="existing ToC",
|
||||
tofile="generated ToC",
|
||||
lineterm="",
|
||||
)
|
||||
for line in diff:
|
||||
print(line)
|
||||
return 1
|
||||
# rebuild file with updated ToC
|
||||
prefix = lines[: begin_idx + 1]
|
||||
suffix = lines[end_idx:]
|
||||
new_lines = prefix + [""] + expected + [""] + suffix
|
||||
readme_path.write_text("\n".join(new_lines) + "\n", encoding="utf-8")
|
||||
print(f"Updated ToC in {readme_path}.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user