perf: optimize Docker build time from >10min to ~2min

- Remove sentence-transformers dependency (saves ~3GB PyTorch/CUDA) - Make CrossEncoder import optional with graceful fallback - Optimize Dockerfile for layer caching (incremental builds ~3s) - Change PostgREST port from 3000 to 3001 (avoid Next.js conflict) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-30 21:36:29 +01:00
parent 10bcbb2120
commit 6724fea494
5 changed files with 27 additions and 14 deletions
--- a/config.toml
+++ b/config.toml
@@ -1,6 +1,6 @@
 model_provider = "litellm"
 # model = "anthropic/claude-sonnet-4-5-20250929"
-model = "hosted_vllm/openai/qwen-2.5-7b"
+model = "hosted_vllm/meta-llama/Llama-3.1-8B-Instruct"

 [projects."/home/valknar"]
 trust_level = "trusted"