.llmx/compose.yaml

services:
  # PostgreSQL with pgvector for vector storage
  crawl4ai-db:
    image: pgvector/pgvector:pg16
    container_name: llmx_crawl4ai_db
    restart: unless-stopped
    ports:
      - "5433:5432"
    volumes:
      - crawl4ai_data:/var/lib/postgresql/data
      - ./servers/mcp-crawl4ai-rag/crawled_pages_1024.sql:/docker-entrypoint-initdb.d/01_schema.sql:ro
    environment:
      POSTGRES_USER: crawl4ai
      POSTGRES_PASSWORD: ${CRAWL4AI_DB_PASSWORD}
      POSTGRES_DB: crawl4ai
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U crawl4ai"]
      interval: 10s
      timeout: 5s
      retries: 5

  # PostgREST - Supabase-compatible REST API
  crawl4ai-rest:
    image: postgrest/postgrest:v12.2.0
    container_name: llmx_crawl4ai_rest
    restart: unless-stopped
    ports:
      - "3001:3000"
    environment:
      PGRST_DB_URI: postgresql://crawl4ai:${CRAWL4AI_DB_PASSWORD}@crawl4ai-db:5432/crawl4ai
      PGRST_DB_SCHEMAS: public
      PGRST_DB_ANON_ROLE: anon
      PGRST_JWT_SECRET: ${JWT_SECRET}
      PGRST_DB_EXTRA_SEARCH_PATH: public
    depends_on:
      crawl4ai-db:
        condition: service_healthy

  # Crawl4AI RAG MCP Server
  crawl4ai-rag:
    build:
      context: ./servers/mcp-crawl4ai-rag
    container_name: llmx_crawl4ai_rag
    restart: unless-stopped
    ports:
      - "8051:8051"
    environment:
      # MCP Transport
      TRANSPORT: sse
      HOST: 0.0.0.0
      PORT: 8051
      # Embedding config - point to remote LiteLLM
      EMBEDDING_API_BASE: https://llm.ai.pivoine.art/v1
      EMBEDDING_MODEL: hosted_vllm/BAAI/bge-large-en-v1.5
      EMBEDDING_DIMENSION: "1024"
      OPENAI_API_KEY: ${LITELLM_API_KEY}
      # Supabase-compatible config (pointing to PostgREST)
      SUPABASE_URL: http://crawl4ai-rest:3000
      SUPABASE_SERVICE_KEY: ${SUPABASE_SERVICE_KEY}
      # Feature flags
      USE_CONTEXTUAL_EMBEDDINGS: "false"
      USE_HYBRID_SEARCH: "true"
      USE_AGENTIC_RAG: "false"
      USE_RERANKING: "false"
      USE_KNOWLEDGE_GRAPH: "false"
    depends_on:
      - crawl4ai-rest

volumes:
  crawl4ai_data:
    name: llmx_crawl4ai_data