From d51654822fa0ad4327c7465fe03ea9427be9e150 Mon Sep 17 00:00:00 2001 From: Michael Bolin Date: Tue, 22 Jul 2025 00:41:27 -0700 Subject: [PATCH] fix: use PR_SET_PDEATHSIG so to ensure child processes are killed in a timely manner (#1626) Some users have reported issues where child processes are not cleaned up after Codex exits (e.g., https://github.com/openai/codex/issues/1570). This is generally a tricky issue on operating systems: if a parent process receives `SIGKILL`, then it terminates immediately and cannot communicate with the child. **It only helps on Linux**, but this PR introduces the use of `prctl(2)` so that if the parent process dies, `SIGTERM` will be delivered to the child process. Whereas previously, I believe that if Codex spawned a long-running process (like `tsc --watch`) and the Codex process received `SIGKILL`, the `tsc --watch` process would be reparented to the init process and would never be killed. Now with the use of `prctl(2)`, the `tsc --watch` process should receive `SIGTERM` in that scenario. We still need to come up with a solution for macOS. I've started to look at `launchd`, but I'm researching a number of options. --- codex-rs/Cargo.lock | 1 + codex-rs/core/Cargo.toml | 1 + codex-rs/core/src/exec.rs | 25 +++++++++++++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 9c604e79..9b4a4e32 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -669,6 +669,7 @@ dependencies = [ "fs2", "futures", "landlock", + "libc", "maplit", "mcp-types", "mime_guess", diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml index e192a71f..a87894bc 100644 --- a/codex-rs/core/Cargo.toml +++ b/codex-rs/core/Cargo.toml @@ -22,6 +22,7 @@ env-flags = "0.1.1" eventsource-stream = "0.2.3" fs2 = "0.4.3" futures = "0.3" +libc = "0.2.174" mcp-types = { path = "../mcp-types" } mime_guess = "2.0" rand = "0.9" diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs index 3b37cb53..4b33b0b3 100644 --- a/codex-rs/core/src/exec.rs +++ b/codex-rs/core/src/exec.rs @@ -384,6 +384,31 @@ async fn spawn_child_async( cmd.env(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR, "1"); } + // If this Codex process dies (including being killed via SIGKILL), we want + // any child processes that were spawned as part of a `"shell"` tool call + // to also be terminated. + + // This relies on prctl(2), so it only works on Linux. + #[cfg(target_os = "linux")] + unsafe { + cmd.pre_exec(|| { + // This prctl call effectively requests, "deliver SIGTERM when my + // current parent dies." + if libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM) == -1 { + return Err(io::Error::last_os_error()); + } + + // Though if there was a race condition and this pre_exec() block is + // run _after_ the parent (i.e., the Codex process) has already + // exited, then the parent is the _init_ process (which will never + // die), so we should just terminate the child process now. + if libc::getppid() == 1 { + libc::raise(libc::SIGTERM); + } + Ok(()) + }); + } + match stdio_policy { StdioPolicy::RedirectForShellTool => { // Do not create a file descriptor for stdin because otherwise some