diff --git a/vllm/server.py b/vllm/server.py index 823b852..d51da13 100644 --- a/vllm/server.py +++ b/vllm/server.py @@ -29,7 +29,7 @@ app = FastAPI(title="Simple vLLM Server", version="1.0.0") # Global engine instance engine: Optional[AsyncLLMEngine] = None -model_name: str = "Qwen/Qwen2.5-7B-Instruct" +model_name: str = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-7B-Instruct") # Request/Response models class CompletionRequest(BaseModel): @@ -294,7 +294,7 @@ if __name__ == "__main__": # Get configuration from environment host = os.getenv("VLLM_HOST", "0.0.0.0") - port = int(os.getenv("VLLM_PORT", "8000")) + port = int(os.getenv("PORT", os.getenv("VLLM_PORT", "8000"))) logger.info(f"Starting vLLM server on {host}:{port}")