From b1817230c6387a58392f7510be7748683e7b59c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= Date: Sun, 30 Nov 2025 21:58:15 +0100 Subject: [PATCH] fix: redirect print() to stderr for stdio MCP transport MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit print() statements to stdout were corrupting the JSON-RPC protocol when using stdio transport, causing "connection closed: initialize response" error. All diagnostic messages now go to stderr. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- config.toml | 4 ++ servers/mcp-crawl4ai-rag/src/crawl4ai_mcp.py | 52 ++++++++++---------- 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/config.toml b/config.toml index 0d59e2e..942ab0c 100644 --- a/config.toml +++ b/config.toml @@ -2,6 +2,10 @@ model_provider = "litellm" # model = "anthropic/claude-sonnet-4-5-20250929" model = "hosted_vllm/meta-llama/Llama-3.1-8B-Instruct" +# Override litellm provider to set max_tokens +[model_providers.litellm] +max_tokens = 8192 + [projects."/home/valknar"] trust_level = "trusted" diff --git a/servers/mcp-crawl4ai-rag/src/crawl4ai_mcp.py b/servers/mcp-crawl4ai-rag/src/crawl4ai_mcp.py index 6b7d79f..d0647b1 100644 --- a/servers/mcp-crawl4ai-rag/src/crawl4ai_mcp.py +++ b/servers/mcp-crawl4ai-rag/src/crawl4ai_mcp.py @@ -159,10 +159,10 @@ async def crawl4ai_lifespan(server: FastMCP) -> AsyncIterator[Crawl4AIContext]: try: reranking_model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2") except Exception as e: - print(f"Failed to load reranking model: {e}") + print(f"Failed to load reranking model: {e}", file=sys.stderr) reranking_model = None elif os.getenv("USE_RERANKING", "false") == "true" and not RERANKING_AVAILABLE: - print("Reranking requested but sentence-transformers not installed - skipping") + print("Reranking requested but sentence-transformers not installed - skipping", file=sys.stderr) # Initialize Neo4j components if configured and enabled knowledge_validator = None @@ -178,26 +178,26 @@ async def crawl4ai_lifespan(server: FastMCP) -> AsyncIterator[Crawl4AIContext]: if neo4j_uri and neo4j_user and neo4j_password: try: - print("Initializing knowledge graph components...") - + print("Initializing knowledge graph components...", file=sys.stderr) + # Initialize knowledge graph validator knowledge_validator = KnowledgeGraphValidator(neo4j_uri, neo4j_user, neo4j_password) await knowledge_validator.initialize() - print("✓ Knowledge graph validator initialized") - + print("✓ Knowledge graph validator initialized", file=sys.stderr) + # Initialize repository extractor repo_extractor = DirectNeo4jExtractor(neo4j_uri, neo4j_user, neo4j_password) await repo_extractor.initialize() - print("✓ Repository extractor initialized") - + print("✓ Repository extractor initialized", file=sys.stderr) + except Exception as e: - print(f"Failed to initialize Neo4j components: {format_neo4j_error(e)}") + print(f"Failed to initialize Neo4j components: {format_neo4j_error(e)}", file=sys.stderr) knowledge_validator = None repo_extractor = None else: - print("Neo4j credentials not configured - knowledge graph tools will be unavailable") + print("Neo4j credentials not configured - knowledge graph tools will be unavailable", file=sys.stderr) else: - print("Knowledge graph functionality disabled - set USE_KNOWLEDGE_GRAPH=true to enable") + print("Knowledge graph functionality disabled - set USE_KNOWLEDGE_GRAPH=true to enable", file=sys.stderr) try: yield Crawl4AIContext( @@ -213,15 +213,15 @@ async def crawl4ai_lifespan(server: FastMCP) -> AsyncIterator[Crawl4AIContext]: if knowledge_validator: try: await knowledge_validator.close() - print("✓ Knowledge graph validator closed") + print("✓ Knowledge graph validator closed", file=sys.stderr) except Exception as e: - print(f"Error closing knowledge validator: {e}") + print(f"Error closing knowledge validator: {e}", file=sys.stderr) if repo_extractor: try: await repo_extractor.close() - print("✓ Repository extractor closed") + print("✓ Repository extractor closed", file=sys.stderr) except Exception as e: - print(f"Error closing repository extractor: {e}") + print(f"Error closing repository extractor: {e}", file=sys.stderr) # Initialize FastMCP server mcp = FastMCP( @@ -251,23 +251,23 @@ def rerank_results(model: CrossEncoder, query: str, results: List[Dict[str, Any] try: # Extract content from results texts = [result.get(content_key, "") for result in results] - + # Create pairs of [query, document] for the cross-encoder pairs = [[query, text] for text in texts] - + # Get relevance scores from the cross-encoder scores = model.predict(pairs) - + # Add scores to results and sort by score (descending) for i, result in enumerate(results): result["rerank_score"] = float(scores[i]) - + # Sort by rerank score reranked = sorted(results, key=lambda x: x.get("rerank_score", 0), reverse=True) - + return reranked except Exception as e: - print(f"Error during reranking: {e}") + print(f"Error during reranking: {e}", file=sys.stderr) return results def is_sitemap(url: str) -> bool: @@ -312,7 +312,7 @@ def parse_sitemap(sitemap_url: str) -> List[str]: tree = ElementTree.fromstring(resp.content) urls = [loc.text for loc in tree.findall('.//{*}loc')] except Exception as e: - print(f"Error parsing sitemap XML: {e}") + print(f"Error parsing sitemap XML: {e}", file=sys.stderr) return urls @@ -1132,7 +1132,7 @@ async def check_ai_script_hallucinations(ctx: Context, script_path: str) -> str: analysis_result = analyzer.analyze_script(script_path) if analysis_result.errors: - print(f"Analysis warnings for {script_path}: {analysis_result.errors}") + print(f"Analysis warnings for {script_path}: {analysis_result.errors}", file=sys.stderr) # Step 2: Validate against knowledge graph validation_result = await knowledge_validator.validate_script(analysis_result) @@ -1680,9 +1680,9 @@ async def parse_github_repository(ctx: Context, repo_url: str) -> str: repo_name = validation["repo_name"] # Parse the repository (this includes cloning, analysis, and Neo4j storage) - print(f"Starting repository analysis for: {repo_name}") + print(f"Starting repository analysis for: {repo_name}", file=sys.stderr) await repo_extractor.analyze_repository(repo_url) - print(f"Repository analysis completed for: {repo_name}") + print(f"Repository analysis completed for: {repo_name}", file=sys.stderr) # Query Neo4j for statistics about the parsed repository async with repo_extractor.driver.session() as session: @@ -1774,7 +1774,7 @@ async def crawl_markdown_file(crawler: AsyncWebCrawler, url: str) -> List[Dict[s if result.success and result.markdown: return [{'url': url, 'markdown': result.markdown}] else: - print(f"Failed to crawl {url}: {result.error_message}") + print(f"Failed to crawl {url}: {result.error_message}", file=sys.stderr) return [] async def crawl_batch(crawler: AsyncWebCrawler, urls: List[str], max_concurrent: int = 10) -> List[Dict[str, Any]]: