feat: add LLMX configuration with Crawl4AI RAG MCP server

- Add config.toml with MCP servers configuration - Add compose.yaml for PostgreSQL+pgvector, PostgREST, and Crawl4AI RAG - Include forked mcp-crawl4ai-rag with BGE 1024-dim embedding support - Custom schema (crawled_pages_1024.sql) for BGE embeddings 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-25 08:29:43 +01:00
commit 10bcbb2120
23 changed files with 10224 additions and 0 deletions
--- a/compose.yaml
+++ b/compose.yaml
@@ -0,0 +1,71 @@
+services:
+  # PostgreSQL with pgvector for vector storage
+  crawl4ai-db:
+    image: pgvector/pgvector:pg16
+    container_name: llmx_crawl4ai_db
+    restart: unless-stopped
+    ports:
+      - "5433:5432"
+    volumes:
+      - crawl4ai_data:/var/lib/postgresql/data
+      - ./servers/mcp-crawl4ai-rag/crawled_pages_1024.sql:/docker-entrypoint-initdb.d/01_schema.sql:ro
+    environment:
+      POSTGRES_USER: crawl4ai
+      POSTGRES_PASSWORD: ${CRAWL4AI_DB_PASSWORD}
+      POSTGRES_DB: crawl4ai
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U crawl4ai"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+  # PostgREST - Supabase-compatible REST API
+  crawl4ai-rest:
+    image: postgrest/postgrest:v12.2.0
+    container_name: llmx_crawl4ai_rest
+    restart: unless-stopped
+    ports:
+      - "3001:3000"
+    environment:
+      PGRST_DB_URI: postgresql://crawl4ai:${CRAWL4AI_DB_PASSWORD}@crawl4ai-db:5432/crawl4ai
+      PGRST_DB_SCHEMAS: public
+      PGRST_DB_ANON_ROLE: anon
+      PGRST_JWT_SECRET: ${JWT_SECRET}
+      PGRST_DB_EXTRA_SEARCH_PATH: public
+    depends_on:
+      crawl4ai-db:
+        condition: service_healthy
+
+  # Crawl4AI RAG MCP Server
+  crawl4ai-rag:
+    build:
+      context: ./servers/mcp-crawl4ai-rag
+    container_name: llmx_crawl4ai_rag
+    restart: unless-stopped
+    ports:
+      - "8051:8051"
+    environment:
+      # MCP Transport
+      TRANSPORT: sse
+      HOST: 0.0.0.0
+      PORT: 8051
+      # Embedding config - point to remote LiteLLM
+      EMBEDDING_API_BASE: https://llm.ai.pivoine.art/v1
+      EMBEDDING_MODEL: bge-large-en-v1.5
+      EMBEDDING_DIMENSION: "1024"
+      OPENAI_API_KEY: ${LITELLM_API_KEY}
+      # Supabase-compatible config (pointing to PostgREST)
+      SUPABASE_URL: http://crawl4ai-rest:3000
+      SUPABASE_SERVICE_KEY: ${SUPABASE_SERVICE_KEY}
+      # Feature flags
+      USE_CONTEXTUAL_EMBEDDINGS: "false"
+      USE_HYBRID_SEARCH: "true"
+      USE_AGENTIC_RAG: "false"
+      USE_RERANKING: "false"
+      USE_KNOWLEDGE_GRAPH: "false"
+    depends_on:
+      - crawl4ai-rest
+
+volumes:
+  crawl4ai_data:
+    name: llmx_crawl4ai_data