feat: add LLMX configuration with Crawl4AI RAG MCP server

- Add config.toml with MCP servers configuration
- Add compose.yaml for PostgreSQL+pgvector, PostgREST, and Crawl4AI RAG
- Include forked mcp-crawl4ai-rag with BGE 1024-dim embedding support
- Custom schema (crawled_pages_1024.sql) for BGE embeddings

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-25 08:29:43 +01:00
commit 10bcbb2120
23 changed files with 10224 additions and 0 deletions

71
compose.yaml Normal file
View File

@@ -0,0 +1,71 @@
services:
# PostgreSQL with pgvector for vector storage
crawl4ai-db:
image: pgvector/pgvector:pg16
container_name: llmx_crawl4ai_db
restart: unless-stopped
ports:
- "5433:5432"
volumes:
- crawl4ai_data:/var/lib/postgresql/data
- ./servers/mcp-crawl4ai-rag/crawled_pages_1024.sql:/docker-entrypoint-initdb.d/01_schema.sql:ro
environment:
POSTGRES_USER: crawl4ai
POSTGRES_PASSWORD: ${CRAWL4AI_DB_PASSWORD}
POSTGRES_DB: crawl4ai
healthcheck:
test: ["CMD-SHELL", "pg_isready -U crawl4ai"]
interval: 10s
timeout: 5s
retries: 5
# PostgREST - Supabase-compatible REST API
crawl4ai-rest:
image: postgrest/postgrest:v12.2.0
container_name: llmx_crawl4ai_rest
restart: unless-stopped
ports:
- "3001:3000"
environment:
PGRST_DB_URI: postgresql://crawl4ai:${CRAWL4AI_DB_PASSWORD}@crawl4ai-db:5432/crawl4ai
PGRST_DB_SCHEMAS: public
PGRST_DB_ANON_ROLE: anon
PGRST_JWT_SECRET: ${JWT_SECRET}
PGRST_DB_EXTRA_SEARCH_PATH: public
depends_on:
crawl4ai-db:
condition: service_healthy
# Crawl4AI RAG MCP Server
crawl4ai-rag:
build:
context: ./servers/mcp-crawl4ai-rag
container_name: llmx_crawl4ai_rag
restart: unless-stopped
ports:
- "8051:8051"
environment:
# MCP Transport
TRANSPORT: sse
HOST: 0.0.0.0
PORT: 8051
# Embedding config - point to remote LiteLLM
EMBEDDING_API_BASE: https://llm.ai.pivoine.art/v1
EMBEDDING_MODEL: bge-large-en-v1.5
EMBEDDING_DIMENSION: "1024"
OPENAI_API_KEY: ${LITELLM_API_KEY}
# Supabase-compatible config (pointing to PostgREST)
SUPABASE_URL: http://crawl4ai-rest:3000
SUPABASE_SERVICE_KEY: ${SUPABASE_SERVICE_KEY}
# Feature flags
USE_CONTEXTUAL_EMBEDDINGS: "false"
USE_HYBRID_SEARCH: "true"
USE_AGENTIC_RAG: "false"
USE_RERANKING: "false"
USE_KNOWLEDGE_GRAPH: "false"
depends_on:
- crawl4ai-rest
volumes:
crawl4ai_data:
name: llmx_crawl4ai_data