diff --git a/compose.yaml b/compose.yaml index 6036d4e..4854f12 100644 --- a/compose.yaml +++ b/compose.yaml @@ -51,7 +51,7 @@ services: PORT: 8051 # Embedding config - point to remote LiteLLM EMBEDDING_API_BASE: https://llm.ai.pivoine.art/v1 - EMBEDDING_MODEL: bge-large-en-v1.5 + EMBEDDING_MODEL: hosted_vllm/BAAI/bge-large-en-v1.5 EMBEDDING_DIMENSION: "1024" OPENAI_API_KEY: ${LITELLM_API_KEY} # Supabase-compatible config (pointing to PostgREST) diff --git a/config.toml b/config.toml index 3fbd5a9..778df68 100644 --- a/config.toml +++ b/config.toml @@ -1,6 +1,6 @@ model_provider = "litellm" # model = "anthropic/claude-sonnet-4-5-20250929" -model = "hosted_vllm/openai/qwen-2.5-7b" +model = "hosted_vllm/meta-llama/Llama-3.1-8B-Instruct" [projects."/home/valknar"] trust_level = "trusted" diff --git a/servers/mcp-crawl4ai-rag/Dockerfile b/servers/mcp-crawl4ai-rag/Dockerfile index 0fa9532..23c200d 100644 --- a/servers/mcp-crawl4ai-rag/Dockerfile +++ b/servers/mcp-crawl4ai-rag/Dockerfile @@ -1,21 +1,26 @@ FROM python:3.12-slim ARG PORT=8051 - WORKDIR /app -# Install uv +# Install uv first (cached layer) RUN pip install uv -# Copy the MCP server files +# Copy only dependency files first (cached layer) +COPY pyproject.toml uv.lock* ./ + +# Install dependencies (cached unless pyproject.toml changes) +RUN uv pip install --system . + +# Run crawl4ai-setup for Playwright (cached layer) +RUN crawl4ai-setup + +# Copy source code last (only this invalidates on code changes) COPY . . -# Install packages directly to the system (no virtual environment) -# Combining commands to reduce Docker layers -RUN uv pip install --system -e . && \ - crawl4ai-setup +# Re-install in editable mode (fast, deps already installed) +RUN uv pip install --system -e . --no-deps EXPOSE ${PORT} -# Command to run the MCP server CMD ["python", "src/crawl4ai_mcp.py"] diff --git a/servers/mcp-crawl4ai-rag/pyproject.toml b/servers/mcp-crawl4ai-rag/pyproject.toml index 6961309..e813006 100644 --- a/servers/mcp-crawl4ai-rag/pyproject.toml +++ b/servers/mcp-crawl4ai-rag/pyproject.toml @@ -10,6 +10,5 @@ dependencies = [ "supabase==2.15.1", "openai==1.71.0", "dotenv==0.9.9", - "sentence-transformers>=4.1.0", "neo4j>=5.28.1", ] diff --git a/servers/mcp-crawl4ai-rag/src/crawl4ai_mcp.py b/servers/mcp-crawl4ai-rag/src/crawl4ai_mcp.py index 990811d..6b7d79f 100644 --- a/servers/mcp-crawl4ai-rag/src/crawl4ai_mcp.py +++ b/servers/mcp-crawl4ai-rag/src/crawl4ai_mcp.py @@ -6,8 +6,15 @@ the appropriate crawl method based on URL type (sitemap, txt file, or regular we Also includes AI hallucination detection and repository parsing tools using Neo4j knowledge graphs. """ from mcp.server.fastmcp import FastMCP, Context -from sentence_transformers import CrossEncoder from contextlib import asynccontextmanager + +# Optional import for reranking functionality +try: + from sentence_transformers import CrossEncoder + RERANKING_AVAILABLE = True +except ImportError: + CrossEncoder = None + RERANKING_AVAILABLE = False from collections.abc import AsyncIterator from dataclasses import dataclass from typing import List, Dict, Any, Optional @@ -146,14 +153,16 @@ async def crawl4ai_lifespan(server: FastMCP) -> AsyncIterator[Crawl4AIContext]: # Initialize Supabase client supabase_client = get_supabase_client() - # Initialize cross-encoder model for reranking if enabled + # Initialize cross-encoder model for reranking if enabled and available reranking_model = None - if os.getenv("USE_RERANKING", "false") == "true": + if os.getenv("USE_RERANKING", "false") == "true" and RERANKING_AVAILABLE: try: reranking_model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2") except Exception as e: print(f"Failed to load reranking model: {e}") reranking_model = None + elif os.getenv("USE_RERANKING", "false") == "true" and not RERANKING_AVAILABLE: + print("Reranking requested but sentence-transformers not installed - skipping") # Initialize Neo4j components if configured and enabled knowledge_validator = None