From 94080da3417fd929816f20432f6335e02b66f2eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= Date: Fri, 21 Nov 2025 18:10:53 +0100 Subject: [PATCH] fix: remove incorrect start-vllm.sh that would break orchestrator architecture --- start-vllm.sh | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100755 start-vllm.sh diff --git a/start-vllm.sh b/start-vllm.sh deleted file mode 100755 index ee613a9..0000000 --- a/start-vllm.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash -# Start vLLM server with OpenAI-compatible API -# This uses vLLM's built-in server which properly handles streaming - -vllm serve Qwen/Qwen2.5-7B-Instruct \ - --host 0.0.0.0 \ - --port 9000 \ - --tensor-parallel-size 1 \ - --gpu-memory-utilization 0.85 \ - --max-model-len 4096 \ - --download-dir /workspace/huggingface_cache \ - --trust-remote-code