diff --git a/ai/litellm-config.yaml b/ai/litellm-config.yaml index b43f433..b73c189 100644 --- a/ai/litellm-config.yaml +++ b/ai/litellm-config.yaml @@ -34,7 +34,6 @@ model_list: litellm_params: model: hosted_vllm/openai/qwen-2.5-7b # hosted_vllm/openai/ prefix for proper streaming api_base: os.environ/GPU_VLLM_QWEN_URL # Direct to vLLM Qwen server - api_key: dummy rpm: 1000 tpm: 100000 timeout: 600 # 10 minutes for generation @@ -47,7 +46,6 @@ model_list: litellm_params: model: hosted_vllm/openai/llama-3.1-8b # hosted_vllm/openai/ prefix for proper streaming api_base: os.environ/GPU_VLLM_LLAMA_URL # Direct to vLLM Llama server - api_key: dummy rpm: 1000 tpm: 100000 timeout: 600 # 10 minutes for generation