diff --git a/services/vllm/config_llama.yaml b/services/vllm/config_llama.yaml index f1d4422..280343b 100644 --- a/services/vllm/config_llama.yaml +++ b/services/vllm/config_llama.yaml @@ -3,6 +3,6 @@ host: "0.0.0.0" port: 8001 uvicorn-log-level: "info" gpu-memory-utilization: 0.95 -max-model-len: 32768 +max-model-len: 20480 dtype: auto enforce-eager: false