chore: remove qwen

This commit is contained in:
2025-11-26 21:03:43 +01:00
parent b63ddbffbd
commit cef233b678

View File

@@ -29,65 +29,52 @@ model_list:
# ===========================================================================
# Direct connections to dedicated vLLM servers (no orchestrator)
# Text Generation - Qwen 2.5 7B (Port 8000)
- model_name: qwen-2.5-7b
litellm_params:
model: hosted_vllm/openai/qwen-2.5-7b # hosted_vllm/openai/ prefix for proper streaming
api_base: os.environ/GPU_VLLM_QWEN_URL # Direct to vLLM Qwen server
api_key: "EMPTY" # vLLM doesn't validate API keys
rpm: 1000
tpm: 100000
timeout: 600 # 10 minutes for generation
stream_timeout: 600
supports_system_messages: true # Qwen supports system messages
stream: true # Enable streaming by default
# Text Generation - Llama 3.1 8B (Port 8001)
- model_name: llama-3.1-8b
litellm_params:
model: hosted_vllm/openai/llama-3.1-8b # hosted_vllm/openai/ prefix for proper streaming
api_base: os.environ/GPU_VLLM_LLAMA_URL # Direct to vLLM Llama server
api_key: "EMPTY" # vLLM doesn't validate API keys
model: hosted_vllm/openai/llama-3.1-8b # hosted_vllm/openai/ prefix for proper streaming
api_base: os.environ/GPU_VLLM_LLAMA_URL # Direct to vLLM Llama server
api_key: "EMPTY" # vLLM doesn't validate API keys
rpm: 1000
tpm: 100000
timeout: 600 # 10 minutes for generation
timeout: 600 # 10 minutes for generation
stream_timeout: 600
supports_system_messages: true # Llama supports system messages
stream: true # Enable streaming by default
supports_system_messages: true # Llama supports system messages
stream: true # Enable streaming by default
# Embeddings - BGE Large (Port 8002)
- model_name: bge-large-en-v1.5
litellm_params:
model: hosted_vllm/openai/bge-large-en-v1.5
api_base: os.environ/GPU_VLLM_EMBED_URL # Direct to vLLM embedding server
api_base: os.environ/GPU_VLLM_EMBED_URL # Direct to vLLM embedding server
api_key: "EMPTY"
rpm: 1000
tpm: 500000
litellm_settings:
drop_params: false # DISABLED: Was breaking streaming
set_verbose: true # Enable verbose logging for debugging streaming issues
drop_params: false # DISABLED: Was breaking streaming
set_verbose: true # Enable verbose logging for debugging streaming issues
# Enable caching now that streaming is fixed
cache: true
cache_params:
type: redis
host: core_redis
port: 6379
ttl: 3600 # Cache for 1 hour
ttl: 3600 # Cache for 1 hour
# Force strip specific parameters globally
allowed_fails: 0
# Modify params before sending to provider
modify_params: false # DISABLED: Was breaking streaming
modify_params: false # DISABLED: Was breaking streaming
# Enable success and failure logging but minimize overhead
success_callback: [] # Disable all success callbacks to reduce DB writes
failure_callback: [] # Disable all failure callbacks
success_callback: [] # Disable all success callbacks to reduce DB writes
failure_callback: [] # Disable all failure callbacks
router_settings:
allowed_fails: 0
# Drop unsupported parameters
default_litellm_params:
drop_params: false # DISABLED: Was breaking streaming
drop_params: false # DISABLED: Was breaking streaming
general_settings:
disable_responses_id_security: true