feat: add LLMX configuration with Crawl4AI RAG MCP server

- Add config.toml with MCP servers configuration
- Add compose.yaml for PostgreSQL+pgvector, PostgREST, and Crawl4AI RAG
- Include forked mcp-crawl4ai-rag with BGE 1024-dim embedding support
- Custom schema (crawled_pages_1024.sql) for BGE embeddings

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-25 08:29:43 +01:00
commit 10bcbb2120
23 changed files with 10224 additions and 0 deletions

144
config.toml Normal file
View File

@@ -0,0 +1,144 @@
model_provider = "litellm"
# model = "anthropic/claude-sonnet-4-5-20250929"
model = "hosted_vllm/openai/qwen-2.5-7b"
[projects."/home/valknar"]
trust_level = "trusted"
[projects."/home/valknar/Projects/llmx"]
trust_level = "trusted"
[projects."/home/valknar/Projects/docker-compose"]
trust_level = "trusted"
[projects."/home/valknar/Projects/kit-ui"]
trust_level = "trusted"
[projects."/home/valknar/Projects/image-ui"]
trust_level = "trusted"
[projects."/home/valknar/bin"]
trust_level = "trusted"
# ==============================================================================
# MCP SERVERS CONFIGURATION (10 servers)
# Last updated: 2025-11-24
# Removed: sqlite, brave_search, filescope, in_memoria, rust_filesystem (broken/incompatible)
# ==============================================================================
# ==============================================================================
# ESSENTIAL CORE SERVERS (3)
# ==============================================================================
[mcp_servers.filesystem]
# TypeScript implementation - stable and battle-tested
command = "npx"
args = ["-y", "@modelcontextprotocol/server-filesystem", "/home/valknar"]
enabled = true
startup_timeout_sec = 10
[mcp_servers.git]
# Git operations - Python uvx (no NPM package available)
command = "uvx"
args = ["mcp-server-git"]
enabled = true
startup_timeout_sec = 10
[mcp_servers.playwright]
# Browser automation - most popular MCP server (826k weekly downloads)
command = "npx"
args = ["-y", "@playwright/mcp"]
enabled = true
startup_timeout_sec = 20
# ==============================================================================
# HIGHLY RECOMMENDED SERVERS (2)
# ==============================================================================
[mcp_servers.duckduckgo]
# Web search - Privacy-focused, no API key required
command = "npx"
args = ["-y", "duckduckgo-mcp-server"]
enabled = true
startup_timeout_sec = 10
[mcp_servers.portainer]
# Docker container management (requires Portainer running)
command = "npx"
args = ["-y", "@portainer/portainer-mcp"]
enabled = false # Enable after configuring Portainer
startup_timeout_sec = 15
[mcp_servers.python_runner]
# Secure Python sandbox execution (Python uvx, requires Deno)
command = "uvx"
args = ["mcp-run-python", "stdio"]
enabled = true
startup_timeout_sec = 15
# ==============================================================================
# SPECIALIZED SERVERS (5)
# ==============================================================================
[mcp_servers.commands]
# Shell command execution
command = "npx"
args = ["-y", "mcp-server-commands"]
enabled = true
startup_timeout_sec = 10
[mcp_servers.codemcp]
# Unified read, write, and command execution (Python uvx from GitHub)
command = "uvx"
args = ["--from", "git+https://github.com/ezyang/codemcp@prod", "codemcp"]
enabled = true
startup_timeout_sec = 60
[mcp_servers.anyquery]
# SQL interface to 40+ services (requires Go installation)
command = "anyquery"
args = ["mcp"]
enabled = false # Enable after: go install github.com/julien040/anyquery@latest
startup_timeout_sec = 20
[mcp_servers.openapi]
# Generic OpenAPI integration (Docker-based)
command = "docker"
args = ["run", "-i", "--rm", "snaggle/openapi-mcp"]
enabled = false # Configure --openapi-url for specific API
startup_timeout_sec = 20
# ==============================================================================
# AGGREGATORS (2)
# ==============================================================================
[mcp_servers.pipedream]
# 2,500+ APIs with 8,000+ prebuilt tools (requires account)
url = "https://api.pipedream.com/mcp"
bearer_token_env_var = "PIPEDREAM_API_KEY"
enabled = false # Enable after creating Pipedream account
startup_timeout_sec = 20
# ==============================================================================
# ADDITIONAL CUSTOM SERVERS (1)
# ==============================================================================
[mcp_servers.github_official]
# Official GitHub MCP server (Docker-based)
command = "docker"
args = ["run", "-i", "--rm", "-e", "GITHUB_TOKEN", "ghcr.io/github/github-mcp-server"]
enabled = false # Enable if you prefer native MCP over gh CLI
startup_timeout_sec = 20
# ==============================================================================
# RAG SERVERS (1)
# ==============================================================================
[mcp_servers.crawl4ai_rag]
# Web crawling + RAG with vector search (local Docker stack)
# Uses BGE embeddings via remote LiteLLM, local PostgreSQL + pgvector
# Start with: cd ~/.llmx && docker compose up -d
url = "http://localhost:8051/sse"
enabled = true
startup_timeout_sec = 30
tool_timeout_sec = 120