Phase 1: Repository & Infrastructure Setup

- Renamed directories: codex-rs -> llmx-rs, codex-cli -> llmx-cli - Updated package.json files: - Root: llmx-monorepo - CLI: @llmx/llmx - SDK: @llmx/llmx-sdk - Updated pnpm workspace configuration - Renamed binary: codex.js -> llmx.js - Updated environment variables: CODEX_* -> LLMX_* - Changed repository URLs to valknar/llmx 🤖 Generated with Claude Code
2025-11-11 14:01:52 +01:00
parent 052b052832
commit f237fe560d
1151 changed files with 41 additions and 35 deletions
--- a/llmx-rs/ollama/Cargo.toml
+++ b/llmx-rs/ollama/Cargo.toml
@@ -0,0 +1,31 @@
+[package]
+edition = "2024"
+name = "codex-ollama"
+version = { workspace = true }
+
+[lib]
+name = "codex_ollama"
+path = "src/lib.rs"
+
+[lints]
+workspace = true
+
+[dependencies]
+async-stream = { workspace = true }
+bytes = { workspace = true }
+codex-core = { workspace = true }
+futures = { workspace = true }
+reqwest = { workspace = true, features = ["json", "stream"] }
+serde_json = { workspace = true }
+tokio = { workspace = true, features = [
+    "io-std",
+    "macros",
+    "process",
+    "rt-multi-thread",
+    "signal",
+] }
+tracing = { workspace = true, features = ["log"] }
+wiremock = { workspace = true }
+
+[dev-dependencies]
+assert_matches = { workspace = true }
--- a/llmx-rs/ollama/src/client.rs
+++ b/llmx-rs/ollama/src/client.rs
@@ -0,0 +1,349 @@
+use bytes::BytesMut;
+use futures::StreamExt;
+use futures::stream::BoxStream;
+use serde_json::Value as JsonValue;
+use std::collections::VecDeque;
+use std::io;
+
+use crate::parser::pull_events_from_value;
+use crate::pull::PullEvent;
+use crate::pull::PullProgressReporter;
+use crate::url::base_url_to_host_root;
+use crate::url::is_openai_compatible_base_url;
+use codex_core::BUILT_IN_OSS_MODEL_PROVIDER_ID;
+use codex_core::ModelProviderInfo;
+use codex_core::WireApi;
+use codex_core::config::Config;
+
+const OLLAMA_CONNECTION_ERROR: &str = "No running Ollama server detected. Start it with: `ollama serve` (after installing). Install instructions: https://github.com/ollama/ollama?tab=readme-ov-file#ollama";
+
+/// Client for interacting with a local Ollama instance.
+pub struct OllamaClient {
+    client: reqwest::Client,
+    host_root: String,
+    uses_openai_compat: bool,
+}
+
+impl OllamaClient {
+    /// Construct a client for the built‑in open‑source ("oss") model provider
+    /// and verify that a local Ollama server is reachable. If no server is
+    /// detected, returns an error with helpful installation/run instructions.
+    pub async fn try_from_oss_provider(config: &Config) -> io::Result<Self> {
+        // Note that we must look up the provider from the Config to ensure that
+        // any overrides the user has in their config.toml are taken into
+        // account.
+        let provider = config
+            .model_providers
+            .get(BUILT_IN_OSS_MODEL_PROVIDER_ID)
+            .ok_or_else(|| {
+                io::Error::new(
+                    io::ErrorKind::NotFound,
+                    format!("Built-in provider {BUILT_IN_OSS_MODEL_PROVIDER_ID} not found",),
+                )
+            })?;
+
+        Self::try_from_provider(provider).await
+    }
+
+    #[cfg(test)]
+    async fn try_from_provider_with_base_url(base_url: &str) -> io::Result<Self> {
+        let provider = codex_core::create_oss_provider_with_base_url(base_url);
+        Self::try_from_provider(&provider).await
+    }
+
+    /// Build a client from a provider definition and verify the server is reachable.
+    async fn try_from_provider(provider: &ModelProviderInfo) -> io::Result<Self> {
+        #![expect(clippy::expect_used)]
+        let base_url = provider
+            .base_url
+            .as_ref()
+            .expect("oss provider must have a base_url");
+        let uses_openai_compat = is_openai_compatible_base_url(base_url)
+            || matches!(provider.wire_api, WireApi::Chat)
+                && is_openai_compatible_base_url(base_url);
+        let host_root = base_url_to_host_root(base_url);
+        let client = reqwest::Client::builder()
+            .connect_timeout(std::time::Duration::from_secs(5))
+            .build()
+            .unwrap_or_else(|_| reqwest::Client::new());
+        let client = Self {
+            client,
+            host_root,
+            uses_openai_compat,
+        };
+        client.probe_server().await?;
+        Ok(client)
+    }
+
+    /// Probe whether the server is reachable by hitting the appropriate health endpoint.
+    async fn probe_server(&self) -> io::Result<()> {
+        let url = if self.uses_openai_compat {
+            format!("{}/v1/models", self.host_root.trim_end_matches('/'))
+        } else {
+            format!("{}/api/tags", self.host_root.trim_end_matches('/'))
+        };
+        let resp = self.client.get(url).send().await.map_err(|err| {
+            tracing::warn!("Failed to connect to Ollama server: {err:?}");
+            io::Error::other(OLLAMA_CONNECTION_ERROR)
+        })?;
+        if resp.status().is_success() {
+            Ok(())
+        } else {
+            tracing::warn!(
+                "Failed to probe server at {}: HTTP {}",
+                self.host_root,
+                resp.status()
+            );
+            Err(io::Error::other(OLLAMA_CONNECTION_ERROR))
+        }
+    }
+
+    /// Return the list of model names known to the local Ollama instance.
+    pub async fn fetch_models(&self) -> io::Result<Vec<String>> {
+        let tags_url = format!("{}/api/tags", self.host_root.trim_end_matches('/'));
+        let resp = self
+            .client
+            .get(tags_url)
+            .send()
+            .await
+            .map_err(io::Error::other)?;
+        if !resp.status().is_success() {
+            return Ok(Vec::new());
+        }
+        let val = resp.json::<JsonValue>().await.map_err(io::Error::other)?;
+        let names = val
+            .get("models")
+            .and_then(|m| m.as_array())
+            .map(|arr| {
+                arr.iter()
+                    .filter_map(|v| v.get("name").and_then(|n| n.as_str()))
+                    .map(str::to_string)
+                    .collect::<Vec<_>>()
+            })
+            .unwrap_or_default();
+        Ok(names)
+    }
+
+    /// Start a model pull and emit streaming events. The returned stream ends when
+    /// a Success event is observed or the server closes the connection.
+    pub async fn pull_model_stream(
+        &self,
+        model: &str,
+    ) -> io::Result<BoxStream<'static, PullEvent>> {
+        let url = format!("{}/api/pull", self.host_root.trim_end_matches('/'));
+        let resp = self
+            .client
+            .post(url)
+            .json(&serde_json::json!({"model": model, "stream": true}))
+            .send()
+            .await
+            .map_err(io::Error::other)?;
+        if !resp.status().is_success() {
+            return Err(io::Error::other(format!(
+                "failed to start pull: HTTP {}",
+                resp.status()
+            )));
+        }
+
+        let mut stream = resp.bytes_stream();
+        let mut buf = BytesMut::new();
+        let _pending: VecDeque<PullEvent> = VecDeque::new();
+
+        // Using an async stream adaptor backed by unfold-like manual loop.
+        let s = async_stream::stream! {
+            while let Some(chunk) = stream.next().await {
+                match chunk {
+                    Ok(bytes) => {
+                        buf.extend_from_slice(&bytes);
+                        while let Some(pos) = buf.iter().position(|b| *b == b'\n') {
+                            let line = buf.split_to(pos + 1);
+                            if let Ok(text) = std::str::from_utf8(&line) {
+                                let text = text.trim();
+                                if text.is_empty() { continue; }
+                                if let Ok(value) = serde_json::from_str::<JsonValue>(text) {
+                                    for ev in pull_events_from_value(&value) { yield ev; }
+                                    if let Some(err_msg) = value.get("error").and_then(|e| e.as_str()) {
+                                        yield PullEvent::Error(err_msg.to_string());
+                                        return;
+                                    }
+                                    if let Some(status) = value.get("status").and_then(|s| s.as_str())
+                                        && status == "success" { yield PullEvent::Success; return; }
+                                }
+                            }
+                        }
+                    }
+                    Err(_) => {
+                        // Connection error: end the stream.
+                        return;
+                    }
+                }
+            }
+        };
+
+        Ok(Box::pin(s))
+    }
+
+    /// High-level helper to pull a model and drive a progress reporter.
+    pub async fn pull_with_reporter(
+        &self,
+        model: &str,
+        reporter: &mut dyn PullProgressReporter,
+    ) -> io::Result<()> {
+        reporter.on_event(&PullEvent::Status(format!("Pulling model {model}...")))?;
+        let mut stream = self.pull_model_stream(model).await?;
+        while let Some(event) = stream.next().await {
+            reporter.on_event(&event)?;
+            match event {
+                PullEvent::Success => {
+                    return Ok(());
+                }
+                PullEvent::Error(err) => {
+                    // Empirically, ollama returns a 200 OK response even when
+                    // the output stream includes an error message. Verify with:
+                    //
+                    // `curl -i http://localhost:11434/api/pull -d '{ "model": "foobarbaz" }'`
+                    //
+                    // As such, we have to check the event stream, not the
+                    // HTTP response status, to determine whether to return Err.
+                    return Err(io::Error::other(format!("Pull failed: {err}")));
+                }
+                PullEvent::ChunkProgress { .. } | PullEvent::Status(_) => {
+                    continue;
+                }
+            }
+        }
+        Err(io::Error::other(
+            "Pull stream ended unexpectedly without success.",
+        ))
+    }
+
+    /// Low-level constructor given a raw host root, e.g. "http://localhost:11434".
+    #[cfg(test)]
+    fn from_host_root(host_root: impl Into<String>) -> Self {
+        let client = reqwest::Client::builder()
+            .connect_timeout(std::time::Duration::from_secs(5))
+            .build()
+            .unwrap_or_else(|_| reqwest::Client::new());
+        Self {
+            client,
+            host_root: host_root.into(),
+            uses_openai_compat: false,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // Happy-path tests using a mock HTTP server; skip if sandbox network is disabled.
+    #[tokio::test]
+    async fn test_fetch_models_happy_path() {
+        if std::env::var(codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+            tracing::info!(
+                "{} is set; skipping test_fetch_models_happy_path",
+                codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR
+            );
+            return;
+        }
+
+        let server = wiremock::MockServer::start().await;
+        wiremock::Mock::given(wiremock::matchers::method("GET"))
+            .and(wiremock::matchers::path("/api/tags"))
+            .respond_with(
+                wiremock::ResponseTemplate::new(200).set_body_raw(
+                    serde_json::json!({
+                        "models": [ {"name": "llama3.2:3b"}, {"name":"mistral"} ]
+                    })
+                    .to_string(),
+                    "application/json",
+                ),
+            )
+            .mount(&server)
+            .await;
+
+        let client = OllamaClient::from_host_root(server.uri());
+        let models = client.fetch_models().await.expect("fetch models");
+        assert!(models.contains(&"llama3.2:3b".to_string()));
+        assert!(models.contains(&"mistral".to_string()));
+    }
+
+    #[tokio::test]
+    async fn test_probe_server_happy_path_openai_compat_and_native() {
+        if std::env::var(codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+            tracing::info!(
+                "{} set; skipping test_probe_server_happy_path_openai_compat_and_native",
+                codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR
+            );
+            return;
+        }
+
+        let server = wiremock::MockServer::start().await;
+
+        // Native endpoint
+        wiremock::Mock::given(wiremock::matchers::method("GET"))
+            .and(wiremock::matchers::path("/api/tags"))
+            .respond_with(wiremock::ResponseTemplate::new(200))
+            .mount(&server)
+            .await;
+        let native = OllamaClient::from_host_root(server.uri());
+        native.probe_server().await.expect("probe native");
+
+        // OpenAI compatibility endpoint
+        wiremock::Mock::given(wiremock::matchers::method("GET"))
+            .and(wiremock::matchers::path("/v1/models"))
+            .respond_with(wiremock::ResponseTemplate::new(200))
+            .mount(&server)
+            .await;
+        let ollama_client =
+            OllamaClient::try_from_provider_with_base_url(&format!("{}/v1", server.uri()))
+                .await
+                .expect("probe OpenAI compat");
+        ollama_client
+            .probe_server()
+            .await
+            .expect("probe OpenAI compat");
+    }
+
+    #[tokio::test]
+    async fn test_try_from_oss_provider_ok_when_server_running() {
+        if std::env::var(codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+            tracing::info!(
+                "{} set; skipping test_try_from_oss_provider_ok_when_server_running",
+                codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR
+            );
+            return;
+        }
+
+        let server = wiremock::MockServer::start().await;
+
+        // OpenAI‑compat models endpoint responds OK.
+        wiremock::Mock::given(wiremock::matchers::method("GET"))
+            .and(wiremock::matchers::path("/v1/models"))
+            .respond_with(wiremock::ResponseTemplate::new(200))
+            .mount(&server)
+            .await;
+
+        OllamaClient::try_from_provider_with_base_url(&format!("{}/v1", server.uri()))
+            .await
+            .expect("client should be created when probe succeeds");
+    }
+
+    #[tokio::test]
+    async fn test_try_from_oss_provider_err_when_server_missing() {
+        if std::env::var(codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+            tracing::info!(
+                "{} set; skipping test_try_from_oss_provider_err_when_server_missing",
+                codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR
+            );
+            return;
+        }
+
+        let server = wiremock::MockServer::start().await;
+        let err = OllamaClient::try_from_provider_with_base_url(&format!("{}/v1", server.uri()))
+            .await
+            .err()
+            .expect("expected error");
+        assert_eq!(OLLAMA_CONNECTION_ERROR, err.to_string());
+    }
+}
--- a/llmx-rs/ollama/src/lib.rs
+++ b/llmx-rs/ollama/src/lib.rs
@@ -0,0 +1,44 @@
+mod client;
+mod parser;
+mod pull;
+mod url;
+
+pub use client::OllamaClient;
+use codex_core::config::Config;
+pub use pull::CliProgressReporter;
+pub use pull::PullEvent;
+pub use pull::PullProgressReporter;
+pub use pull::TuiProgressReporter;
+
+/// Default OSS model to use when `--oss` is passed without an explicit `-m`.
+pub const DEFAULT_OSS_MODEL: &str = "gpt-oss:20b";
+
+/// Prepare the local OSS environment when `--oss` is selected.
+///
+/// - Ensures a local Ollama server is reachable.
+/// - Checks if the model exists locally and pulls it if missing.
+pub async fn ensure_oss_ready(config: &Config) -> std::io::Result<()> {
+    // Only download when the requested model is the default OSS model (or when -m is not provided).
+    let model = config.model.as_ref();
+
+    // Verify local Ollama is reachable.
+    let ollama_client = crate::OllamaClient::try_from_oss_provider(config).await?;
+
+    // If the model is not present locally, pull it.
+    match ollama_client.fetch_models().await {
+        Ok(models) => {
+            if !models.iter().any(|m| m == model) {
+                let mut reporter = crate::CliProgressReporter::new();
+                ollama_client
+                    .pull_with_reporter(model, &mut reporter)
+                    .await?;
+            }
+        }
+        Err(err) => {
+            // Not fatal; higher layers may still proceed and surface errors later.
+            tracing::warn!("Failed to query local models from Ollama: {}.", err);
+        }
+    }
+
+    Ok(())
+}
--- a/llmx-rs/ollama/src/parser.rs
+++ b/llmx-rs/ollama/src/parser.rs
@@ -0,0 +1,75 @@
+use serde_json::Value as JsonValue;
+
+use crate::pull::PullEvent;
+
+// Convert a single JSON object representing a pull update into one or more events.
+pub(crate) fn pull_events_from_value(value: &JsonValue) -> Vec<PullEvent> {
+    let mut events = Vec::new();
+    if let Some(status) = value.get("status").and_then(|s| s.as_str()) {
+        events.push(PullEvent::Status(status.to_string()));
+        if status == "success" {
+            events.push(PullEvent::Success);
+        }
+    }
+    let digest = value
+        .get("digest")
+        .and_then(|d| d.as_str())
+        .unwrap_or("")
+        .to_string();
+    let total = value.get("total").and_then(JsonValue::as_u64);
+    let completed = value.get("completed").and_then(JsonValue::as_u64);
+    if total.is_some() || completed.is_some() {
+        events.push(PullEvent::ChunkProgress {
+            digest,
+            total,
+            completed,
+        });
+    }
+    events
+}
+
+#[cfg(test)]
+mod tests {
+    use assert_matches::assert_matches;
+
+    use super::*;
+
+    #[test]
+    fn test_pull_events_decoder_status_and_success() {
+        let v: JsonValue = serde_json::json!({"status":"verifying"});
+        let events = pull_events_from_value(&v);
+        assert_matches!(events.as_slice(), [PullEvent::Status(s)] if s == "verifying");
+
+        let v2: JsonValue = serde_json::json!({"status":"success"});
+        let events2 = pull_events_from_value(&v2);
+        assert_eq!(events2.len(), 2);
+        assert_matches!(events2[0], PullEvent::Status(ref s) if s == "success");
+        assert_matches!(events2[1], PullEvent::Success);
+    }
+
+    #[test]
+    fn test_pull_events_decoder_progress() {
+        let v: JsonValue = serde_json::json!({"digest":"sha256:abc","total":100});
+        let events = pull_events_from_value(&v);
+        assert_eq!(events.len(), 1);
+        assert_matches!(
+            &events[0],
+            PullEvent::ChunkProgress {
+                digest,
+                total,
+                completed,
+            } if digest == "sha256:abc" && total == &Some(100) && completed.is_none()
+        );
+        let v2: JsonValue = serde_json::json!({"digest":"sha256:def","completed":42});
+        let events2 = pull_events_from_value(&v2);
+        assert_eq!(events2.len(), 1);
+        assert_matches!(
+            &events2[0],
+            PullEvent::ChunkProgress {
+                digest,
+                total,
+                completed,
+            } if digest == "sha256:def" && total.is_none() && completed == &Some(42)
+        );
+    }
+}
--- a/llmx-rs/ollama/src/pull.rs
+++ b/llmx-rs/ollama/src/pull.rs
@@ -0,0 +1,147 @@
+use std::collections::HashMap;
+use std::io;
+use std::io::Write;
+
+/// Events emitted while pulling a model from Ollama.
+#[derive(Debug, Clone)]
+pub enum PullEvent {
+    /// A human-readable status message (e.g., "verifying", "writing").
+    Status(String),
+    /// Byte-level progress update for a specific layer digest.
+    ChunkProgress {
+        digest: String,
+        total: Option<u64>,
+        completed: Option<u64>,
+    },
+    /// The pull finished successfully.
+    Success,
+
+    /// Error event with a message.
+    Error(String),
+}
+
+/// A simple observer for pull progress events. Implementations decide how to
+/// render progress (CLI, TUI, logs, ...).
+pub trait PullProgressReporter {
+    fn on_event(&mut self, event: &PullEvent) -> io::Result<()>;
+}
+
+/// A minimal CLI reporter that writes inline progress to stderr.
+pub struct CliProgressReporter {
+    printed_header: bool,
+    last_line_len: usize,
+    last_completed_sum: u64,
+    last_instant: std::time::Instant,
+    totals_by_digest: HashMap<String, (u64, u64)>,
+}
+
+impl Default for CliProgressReporter {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl CliProgressReporter {
+    pub fn new() -> Self {
+        Self {
+            printed_header: false,
+            last_line_len: 0,
+            last_completed_sum: 0,
+            last_instant: std::time::Instant::now(),
+            totals_by_digest: HashMap::new(),
+        }
+    }
+}
+
+impl PullProgressReporter for CliProgressReporter {
+    fn on_event(&mut self, event: &PullEvent) -> io::Result<()> {
+        let mut out = std::io::stderr();
+        match event {
+            PullEvent::Status(status) => {
+                // Avoid noisy manifest messages; otherwise show status inline.
+                if status.eq_ignore_ascii_case("pulling manifest") {
+                    return Ok(());
+                }
+                let pad = self.last_line_len.saturating_sub(status.len());
+                let line = format!("\r{status}{}", " ".repeat(pad));
+                self.last_line_len = status.len();
+                out.write_all(line.as_bytes())?;
+                out.flush()
+            }
+            PullEvent::ChunkProgress {
+                digest,
+                total,
+                completed,
+            } => {
+                if let Some(t) = *total {
+                    self.totals_by_digest
+                        .entry(digest.clone())
+                        .or_insert((0, 0))
+                        .0 = t;
+                }
+                if let Some(c) = *completed {
+                    self.totals_by_digest
+                        .entry(digest.clone())
+                        .or_insert((0, 0))
+                        .1 = c;
+                }
+
+                let (sum_total, sum_completed) = self
+                    .totals_by_digest
+                    .values()
+                    .fold((0u64, 0u64), |acc, (t, c)| (acc.0 + *t, acc.1 + *c));
+                if sum_total > 0 {
+                    if !self.printed_header {
+                        let gb = (sum_total as f64) / (1024.0 * 1024.0 * 1024.0);
+                        let header = format!("Downloading model: total {gb:.2} GB\n");
+                        out.write_all(b"\r\x1b[2K")?;
+                        out.write_all(header.as_bytes())?;
+                        self.printed_header = true;
+                    }
+                    let now = std::time::Instant::now();
+                    let dt = now
+                        .duration_since(self.last_instant)
+                        .as_secs_f64()
+                        .max(0.001);
+                    let dbytes = sum_completed.saturating_sub(self.last_completed_sum) as f64;
+                    let speed_mb_s = dbytes / (1024.0 * 1024.0) / dt;
+                    self.last_completed_sum = sum_completed;
+                    self.last_instant = now;
+
+                    let done_gb = (sum_completed as f64) / (1024.0 * 1024.0 * 1024.0);
+                    let total_gb = (sum_total as f64) / (1024.0 * 1024.0 * 1024.0);
+                    let pct = (sum_completed as f64) * 100.0 / (sum_total as f64);
+                    let text =
+                        format!("{done_gb:.2}/{total_gb:.2} GB ({pct:.1}%) {speed_mb_s:.1} MB/s");
+                    let pad = self.last_line_len.saturating_sub(text.len());
+                    let line = format!("\r{text}{}", " ".repeat(pad));
+                    self.last_line_len = text.len();
+                    out.write_all(line.as_bytes())?;
+                    out.flush()
+                } else {
+                    Ok(())
+                }
+            }
+            PullEvent::Error(_) => {
+                // This will be handled by the caller, so we don't do anything
+                // here or the error will be printed twice.
+                Ok(())
+            }
+            PullEvent::Success => {
+                out.write_all(b"\n")?;
+                out.flush()
+            }
+        }
+    }
+}
+
+/// For now the TUI reporter delegates to the CLI reporter. This keeps UI and
+/// CLI behavior aligned until a dedicated TUI integration is implemented.
+#[derive(Default)]
+pub struct TuiProgressReporter(CliProgressReporter);
+
+impl PullProgressReporter for TuiProgressReporter {
+    fn on_event(&mut self, event: &PullEvent) -> io::Result<()> {
+        self.0.on_event(event)
+    }
+}
--- a/llmx-rs/ollama/src/url.rs
+++ b/llmx-rs/ollama/src/url.rs
@@ -0,0 +1,39 @@
+/// Identify whether a base_url points at an OpenAI-compatible root (".../v1").
+pub(crate) fn is_openai_compatible_base_url(base_url: &str) -> bool {
+    base_url.trim_end_matches('/').ends_with("/v1")
+}
+
+/// Convert a provider base_url into the native Ollama host root.
+/// For example, "http://localhost:11434/v1" -> "http://localhost:11434".
+pub fn base_url_to_host_root(base_url: &str) -> String {
+    let trimmed = base_url.trim_end_matches('/');
+    if trimmed.ends_with("/v1") {
+        trimmed
+            .trim_end_matches("/v1")
+            .trim_end_matches('/')
+            .to_string()
+    } else {
+        trimmed.to_string()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_base_url_to_host_root() {
+        assert_eq!(
+            base_url_to_host_root("http://localhost:11434/v1"),
+            "http://localhost:11434"
+        );
+        assert_eq!(
+            base_url_to_host_root("http://localhost:11434"),
+            "http://localhost:11434"
+        );
+        assert_eq!(
+            base_url_to_host_root("http://localhost:11434/"),
+            "http://localhost:11434"
+        );
+    }
+}