fix: use EMPTY api_key for vLLM servers

This commit is contained in:
2025-11-23 16:17:27 +01:00
parent 55d9bef18a
commit 64c02228d8

View File

@@ -34,6 +34,7 @@ model_list:
litellm_params: litellm_params:
model: hosted_vllm/openai/qwen-2.5-7b # hosted_vllm/openai/ prefix for proper streaming model: hosted_vllm/openai/qwen-2.5-7b # hosted_vllm/openai/ prefix for proper streaming
api_base: os.environ/GPU_VLLM_QWEN_URL # Direct to vLLM Qwen server api_base: os.environ/GPU_VLLM_QWEN_URL # Direct to vLLM Qwen server
api_key: "EMPTY" # vLLM doesn't validate API keys
rpm: 1000 rpm: 1000
tpm: 100000 tpm: 100000
timeout: 600 # 10 minutes for generation timeout: 600 # 10 minutes for generation
@@ -46,6 +47,7 @@ model_list:
litellm_params: litellm_params:
model: hosted_vllm/openai/llama-3.1-8b # hosted_vllm/openai/ prefix for proper streaming model: hosted_vllm/openai/llama-3.1-8b # hosted_vllm/openai/ prefix for proper streaming
api_base: os.environ/GPU_VLLM_LLAMA_URL # Direct to vLLM Llama server api_base: os.environ/GPU_VLLM_LLAMA_URL # Direct to vLLM Llama server
api_key: "EMPTY" # vLLM doesn't validate API keys
rpm: 1000 rpm: 1000
tpm: 100000 tpm: 100000
timeout: 600 # 10 minutes for generation timeout: 600 # 10 minutes for generation