From 462b219d3fba4b340de448a21052e6dfb47ffdf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= Date: Sun, 16 Nov 2025 21:43:01 +0100 Subject: [PATCH] feat: Add comprehensive Anthropic prompt caching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add cache_control to system instructions (stable, high value) - Add cache_control to conversation history (4 messages before end) - Implements full Anthropic caching hierarchy: tools → system → history - Significant cost savings for repeated similar requests Cache hierarchy: 1. Tools (last tool) - already implemented 2. System instructions - new 3. Conversation history - new (stable portion) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- llmx-rs/core/src/chat_completions.rs | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/llmx-rs/core/src/chat_completions.rs b/llmx-rs/core/src/chat_completions.rs index 139b0d24..2ded19a0 100644 --- a/llmx-rs/core/src/chat_completions.rs +++ b/llmx-rs/core/src/chat_completions.rs @@ -56,7 +56,12 @@ pub(crate) async fn stream_chat_completions( let mut messages = Vec::::new(); let full_instructions = prompt.get_full_instructions(model_family); - messages.push(json!({"role": "system", "content": full_instructions})); + // Add cache_control to system instructions for Anthropic prompt caching + messages.push(json!({ + "role": "system", + "content": full_instructions, + "cache_control": {"type": "ephemeral"} + })); let input = prompt.get_formatted_input(); @@ -413,6 +418,20 @@ pub(crate) async fn stream_chat_completions( } debug!("Built {} messages for API request", messages.len()); + + // Add cache_control to conversation history for Anthropic prompt caching + // Add it to a message that's at least 3 messages before the end (stable history) + // This caches the earlier conversation while keeping recent turns uncached + if messages.len() > 4 { + let cache_idx = messages.len().saturating_sub(4); + if let Some(msg) = messages.get_mut(cache_idx) { + if let Some(obj) = msg.as_object_mut() { + obj.insert("cache_control".to_string(), json!({"type": "ephemeral"})); + debug!("Added cache_control to message at index {} (conversation history)", cache_idx); + } + } + } + debug!("=== End Chat Completions Request Debug ==="); let tools_json = create_tools_json_for_chat_completions_api(&prompt.tools)?;