feat: introduce responses-api-proxy (#4246)
Details are in `responses-api-proxy/README.md`, but the key contribution
of this PR is a new subcommand, `codex responses-api-proxy`, which reads
the auth token for use with the OpenAI Responses API from `stdin` at
startup and then proxies `POST` requests to `/v1/responses` over to
`https://api.openai.com/v1/responses`, injecting the auth token as part
of the `Authorization` header.
The expectation is that `codex responses-api-proxy` is launched by a
privileged user who has access to the auth token so that it can be used
by unprivileged users of the Codex CLI on the same host.
If the client only has one user account with `sudo`, one option is to:
- run `sudo codex responses-api-proxy --http-shutdown --server-info
/tmp/server-info.json` to start the server
- record the port written to `/tmp/server-info.json`
- relinquish their `sudo` privileges (which is irreversible!) like so:
```
sudo deluser $USER sudo || sudo gpasswd -d $USER sudo || true
```
- use `codex` with the proxy (see `README.md`)
- when done, make a `GET` request to the server using the `PORT` from
`server-info.json` to shut it down:
```shell
curl --fail --silent --show-error "http://127.0.0.1:$PORT/shutdown"
```
To protect the auth token, we:
- allocate a 1024 byte buffer on the stack and write `"Bearer "` into it
to start
- we then read from `stdin`, copying to the contents into the buffer
after the prefix
- after verifying the input looks good, we create a `String` from that
buffer (so the data is now on the heap)
- we zero out the stack-allocated buffer using
https://crates.io/crates/zeroize so it is not optimized away by the
compiler
- we invoke `.leak()` on the `String` so we can treat its contents as a
`&'static str`, as it will live for the rest of the processs
- on UNIX, we `mlock(2)` the memory backing the `&'static str`
- when using the `&'static str` when building an HTTP request, we use
`HeaderValue::from_static()` to avoid copying the `&str`
- we also invoke `.set_sensitive(true)` on the `HeaderValue`, which in
theory indicates to other parts of the HTTP stack that the header should
be treated with "special care" to avoid leakage:
439d1c50d7/src/header/value.rs (L346-L376)
This commit is contained in:
202
codex-rs/responses-api-proxy/src/lib.rs
Normal file
202
codex-rs/responses-api-proxy/src/lib.rs
Normal file
@@ -0,0 +1,202 @@
|
||||
use std::fs::File;
|
||||
use std::fs::{self};
|
||||
use std::io::Write;
|
||||
use std::net::SocketAddr;
|
||||
use std::net::TcpListener;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::anyhow;
|
||||
use clap::Parser;
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::AUTHORIZATION;
|
||||
use reqwest::header::HOST;
|
||||
use reqwest::header::HeaderMap;
|
||||
use reqwest::header::HeaderName;
|
||||
use reqwest::header::HeaderValue;
|
||||
use serde::Serialize;
|
||||
use tiny_http::Header;
|
||||
use tiny_http::Method;
|
||||
use tiny_http::Request;
|
||||
use tiny_http::Response;
|
||||
use tiny_http::Server;
|
||||
use tiny_http::StatusCode;
|
||||
|
||||
mod read_api_key;
|
||||
use read_api_key::read_auth_header_from_stdin;
|
||||
|
||||
/// CLI arguments for the proxy.
|
||||
#[derive(Debug, Clone, Parser)]
|
||||
#[command(name = "responses-api-proxy", about = "Minimal OpenAI responses proxy")]
|
||||
pub struct Args {
|
||||
/// Port to listen on. If not set, an ephemeral port is used.
|
||||
#[arg(long)]
|
||||
pub port: Option<u16>,
|
||||
|
||||
/// Path to a JSON file to write startup info (single line). Includes {"port": <u16>}.
|
||||
#[arg(long, value_name = "FILE")]
|
||||
pub server_info: Option<PathBuf>,
|
||||
|
||||
/// Enable HTTP shutdown endpoint at GET /shutdown
|
||||
#[arg(long)]
|
||||
pub http_shutdown: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct ServerInfo {
|
||||
port: u16,
|
||||
}
|
||||
|
||||
/// Entry point for the library main, for parity with other crates.
|
||||
pub fn run_main(args: Args) -> Result<()> {
|
||||
let auth_header = read_auth_header_from_stdin()?;
|
||||
|
||||
let (listener, bound_addr) = bind_listener(args.port)?;
|
||||
if let Some(path) = args.server_info.as_ref() {
|
||||
write_server_info(path, bound_addr.port())?;
|
||||
}
|
||||
let server = Server::from_listener(listener, None)
|
||||
.map_err(|err| anyhow!("creating HTTP server: {err}"))?;
|
||||
let client = Arc::new(
|
||||
Client::builder()
|
||||
.build()
|
||||
.context("building reqwest client")?,
|
||||
);
|
||||
|
||||
eprintln!("responses-api-proxy listening on {bound_addr}");
|
||||
|
||||
let http_shutdown = args.http_shutdown;
|
||||
for request in server.incoming_requests() {
|
||||
let client = client.clone();
|
||||
std::thread::spawn(move || {
|
||||
if http_shutdown && request.method() == &Method::Get && request.url() == "/shutdown" {
|
||||
let _ = request.respond(Response::new_empty(StatusCode(200)));
|
||||
std::process::exit(0);
|
||||
}
|
||||
|
||||
if let Err(e) = forward_request(&client, auth_header, request) {
|
||||
eprintln!("forwarding error: {e}");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
Err(anyhow!("server stopped unexpectedly"))
|
||||
}
|
||||
|
||||
fn bind_listener(port: Option<u16>) -> Result<(TcpListener, SocketAddr)> {
|
||||
let addr = SocketAddr::from(([127, 0, 0, 1], port.unwrap_or(0)));
|
||||
let listener = TcpListener::bind(addr).with_context(|| format!("failed to bind {addr}"))?;
|
||||
let bound = listener.local_addr().context("failed to read local_addr")?;
|
||||
Ok((listener, bound))
|
||||
}
|
||||
|
||||
fn write_server_info(path: &Path, port: u16) -> Result<()> {
|
||||
if let Some(parent) = path.parent()
|
||||
&& !parent.as_os_str().is_empty()
|
||||
{
|
||||
let parent_display = parent.display();
|
||||
fs::create_dir_all(parent).with_context(|| format!("create_dir_all {parent_display}"))?;
|
||||
}
|
||||
let info = ServerInfo { port };
|
||||
let data = serde_json::to_vec(&info).context("serialize startup info")?;
|
||||
let p = path.display();
|
||||
let mut f = File::create(path).with_context(|| format!("create {p}"))?;
|
||||
f.write_all(&data).with_context(|| format!("write {p}"))?;
|
||||
f.write_all(b"\n").with_context(|| format!("newline {p}"))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn forward_request(client: &Client, auth_header: &'static str, mut req: Request) -> Result<()> {
|
||||
// Only allow POST /v1/responses exactly, no query string.
|
||||
let method = req.method().clone();
|
||||
let url_path = req.url().to_string();
|
||||
let allow = method == Method::Post && url_path == "/v1/responses";
|
||||
|
||||
if !allow {
|
||||
let resp = Response::new_empty(StatusCode(403));
|
||||
let _ = req.respond(resp);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Read request body
|
||||
let mut body = Vec::new();
|
||||
let mut reader = req.as_reader();
|
||||
std::io::Read::read_to_end(&mut reader, &mut body)?;
|
||||
|
||||
// Build headers for upstream, forwarding everything from the incoming
|
||||
// request except Authorization (we replace it below).
|
||||
let mut headers = HeaderMap::new();
|
||||
for header in req.headers() {
|
||||
let name_ascii = header.field.as_str();
|
||||
let lower = name_ascii.to_ascii_lowercase();
|
||||
if lower.as_str() == "authorization" || lower.as_str() == "host" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let header_name = match HeaderName::from_bytes(lower.as_bytes()) {
|
||||
Ok(name) => name,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if let Ok(value) = HeaderValue::from_bytes(header.value.as_bytes()) {
|
||||
headers.append(header_name, value);
|
||||
}
|
||||
}
|
||||
|
||||
// As part of our effort to to keep `auth_header` secret, we use a
|
||||
// combination of `from_static()` and `set_sensitive(true)`.
|
||||
let mut auth_header_value = HeaderValue::from_static(auth_header);
|
||||
auth_header_value.set_sensitive(true);
|
||||
headers.insert(AUTHORIZATION, auth_header_value);
|
||||
|
||||
headers.insert(HOST, HeaderValue::from_static("api.openai.com"));
|
||||
|
||||
let upstream = "https://api.openai.com/v1/responses";
|
||||
let upstream_resp = client
|
||||
.post(upstream)
|
||||
.headers(headers)
|
||||
.body(body)
|
||||
.send()
|
||||
.context("forwarding request to upstream")?;
|
||||
|
||||
// We have to create an adapter between a `reqwest::blocking::Response`
|
||||
// and a `tiny_http::Response`. Fortunately, `reqwest::blocking::Response`
|
||||
// implements `Read`, so we can use it directly as the body of the
|
||||
// `tiny_http::Response`.
|
||||
let status = upstream_resp.status();
|
||||
let mut response_headers = Vec::new();
|
||||
for (name, value) in upstream_resp.headers().iter() {
|
||||
// Skip headers that tiny_http manages itself.
|
||||
if matches!(
|
||||
name.as_str(),
|
||||
"content-length" | "transfer-encoding" | "connection" | "trailer" | "upgrade"
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Ok(header) = Header::from_bytes(name.as_str().as_bytes(), value.as_bytes()) {
|
||||
response_headers.push(header);
|
||||
}
|
||||
}
|
||||
|
||||
let content_length = upstream_resp.content_length().and_then(|len| {
|
||||
if len <= usize::MAX as u64 {
|
||||
Some(len as usize)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
let response = Response::new(
|
||||
StatusCode(status.as_u16()),
|
||||
response_headers,
|
||||
upstream_resp,
|
||||
content_length,
|
||||
None,
|
||||
);
|
||||
|
||||
let _ = req.respond(response);
|
||||
Ok(())
|
||||
}
|
||||
14
codex-rs/responses-api-proxy/src/main.rs
Normal file
14
codex-rs/responses-api-proxy/src/main.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
use anyhow::Context;
|
||||
use clap::Parser;
|
||||
use codex_arg0::arg0_dispatch_or_else;
|
||||
use codex_responses_api_proxy::Args as ResponsesApiProxyArgs;
|
||||
|
||||
pub fn main() -> anyhow::Result<()> {
|
||||
arg0_dispatch_or_else(|_codex_linux_sandbox_exe| async move {
|
||||
let args = ResponsesApiProxyArgs::parse();
|
||||
tokio::task::spawn_blocking(move || codex_responses_api_proxy::run_main(args))
|
||||
.await
|
||||
.context("responses-api-proxy blocking task panicked")??;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
185
codex-rs/responses-api-proxy/src/read_api_key.rs
Normal file
185
codex-rs/responses-api-proxy/src/read_api_key.rs
Normal file
@@ -0,0 +1,185 @@
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::anyhow;
|
||||
use std::io::Read;
|
||||
use zeroize::Zeroize;
|
||||
|
||||
/// Use a generous buffer size to avoid truncation and to allow for longer API
|
||||
/// keys in the future.
|
||||
const BUFFER_SIZE: usize = 1024;
|
||||
const AUTH_HEADER_PREFIX: &[u8] = b"Bearer ";
|
||||
|
||||
/// Reads the auth token from stdin and returns a static `Authorization` header
|
||||
/// value with the auth token used with `Bearer`. The header value is returned
|
||||
/// as a `&'static str` whose bytes are locked in memory to avoid accidental
|
||||
/// exposure.
|
||||
pub(crate) fn read_auth_header_from_stdin() -> Result<&'static str> {
|
||||
read_auth_header_with(|buffer| std::io::stdin().read(buffer))
|
||||
}
|
||||
|
||||
fn read_auth_header_with<F>(read_fn: F) -> Result<&'static str>
|
||||
where
|
||||
F: FnOnce(&mut [u8]) -> std::io::Result<usize>,
|
||||
{
|
||||
// TAKE CARE WHEN MODIFYING THIS CODE!!!
|
||||
//
|
||||
// This function goes to great lengths to avoid leaving the API key in
|
||||
// memory longer than necessary and to avoid copying it around. We read
|
||||
// directly into a stack buffer so the only heap allocation should be the
|
||||
// one to create the String (with the exact size) for the header value,
|
||||
// which we then immediately protect with mlock(2).
|
||||
let mut buf = [0u8; BUFFER_SIZE];
|
||||
buf[..AUTH_HEADER_PREFIX.len()].copy_from_slice(AUTH_HEADER_PREFIX);
|
||||
|
||||
let read = read_fn(&mut buf[AUTH_HEADER_PREFIX.len()..]).inspect_err(|_err| {
|
||||
buf.zeroize();
|
||||
})?;
|
||||
|
||||
if read == buf.len() - AUTH_HEADER_PREFIX.len() {
|
||||
buf.zeroize();
|
||||
return Err(anyhow!(
|
||||
"OPENAI_API_KEY is too large to fit in the 512-byte buffer"
|
||||
));
|
||||
}
|
||||
|
||||
let mut total = AUTH_HEADER_PREFIX.len() + read;
|
||||
while total > AUTH_HEADER_PREFIX.len() && (buf[total - 1] == b'\n' || buf[total - 1] == b'\r') {
|
||||
total -= 1;
|
||||
}
|
||||
|
||||
if total == AUTH_HEADER_PREFIX.len() {
|
||||
buf.zeroize();
|
||||
return Err(anyhow!(
|
||||
"OPENAI_API_KEY must be provided via stdin (e.g. printenv OPENAI_API_KEY | codex responses-api-proxy)"
|
||||
));
|
||||
}
|
||||
|
||||
let header_str = match std::str::from_utf8(&buf[..total]) {
|
||||
Ok(value) => value,
|
||||
Err(err) => {
|
||||
buf.zeroize();
|
||||
return Err(err).context("reading Authorization header from stdin as UTF-8");
|
||||
}
|
||||
};
|
||||
|
||||
let header_value = String::from(header_str);
|
||||
buf.zeroize();
|
||||
|
||||
let leaked: &'static mut str = header_value.leak();
|
||||
mlock_str(leaked);
|
||||
|
||||
Ok(leaked)
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn mlock_str(value: &str) {
|
||||
use libc::_SC_PAGESIZE;
|
||||
use libc::c_void;
|
||||
use libc::mlock;
|
||||
use libc::sysconf;
|
||||
|
||||
if value.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let page_size = unsafe { sysconf(_SC_PAGESIZE) };
|
||||
if page_size <= 0 {
|
||||
return;
|
||||
}
|
||||
let page_size = page_size as usize;
|
||||
if page_size == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
let addr = value.as_ptr() as usize;
|
||||
let len = value.len();
|
||||
let start = addr & !(page_size - 1);
|
||||
let addr_end = match addr.checked_add(len) {
|
||||
Some(v) => match v.checked_add(page_size - 1) {
|
||||
Some(total) => total,
|
||||
None => return,
|
||||
},
|
||||
None => return,
|
||||
};
|
||||
let end = addr_end & !(page_size - 1);
|
||||
let size = end.saturating_sub(start);
|
||||
if size == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
let _ = unsafe { mlock(start as *const c_void, size) };
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn mlock_str(_value: &str) {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io;
|
||||
|
||||
#[test]
|
||||
fn reads_key_with_no_newlines() {
|
||||
let result = read_auth_header_with(|buf| {
|
||||
let data = b"sk-abc123";
|
||||
buf[..data.len()].copy_from_slice(data);
|
||||
Ok(data.len())
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result, "Bearer sk-abc123");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reads_key_and_trims_newlines() {
|
||||
let result = read_auth_header_with(|buf| {
|
||||
let data = b"sk-abc123\r\n";
|
||||
buf[..data.len()].copy_from_slice(data);
|
||||
Ok(data.len())
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result, "Bearer sk-abc123");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errors_when_no_input_provided() {
|
||||
let err = read_auth_header_with(|_| Ok(0)).unwrap_err();
|
||||
let message = format!("{err:#}");
|
||||
assert!(message.contains("must be provided"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errors_when_buffer_filled() {
|
||||
let err = read_auth_header_with(|buf| {
|
||||
let data = vec![b'a'; BUFFER_SIZE - AUTH_HEADER_PREFIX.len()];
|
||||
buf[..data.len()].copy_from_slice(&data);
|
||||
Ok(data.len())
|
||||
})
|
||||
.unwrap_err();
|
||||
let message = format!("{err:#}");
|
||||
assert!(message.contains("too large"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn propagates_io_error() {
|
||||
let err = read_auth_header_with(|_| Err(io::Error::other("boom"))).unwrap_err();
|
||||
|
||||
let io_error = err.downcast_ref::<io::Error>().unwrap();
|
||||
assert_eq!(io_error.kind(), io::ErrorKind::Other);
|
||||
assert_eq!(io_error.to_string(), "boom");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errors_on_invalid_utf8() {
|
||||
let err = read_auth_header_with(|buf| {
|
||||
let data = b"sk-abc\xff";
|
||||
buf[..data.len()].copy_from_slice(data);
|
||||
Ok(data.len())
|
||||
})
|
||||
.unwrap_err();
|
||||
|
||||
let message = format!("{err:#}");
|
||||
assert!(message.contains("UTF-8"));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user