fix: make model name and port configurable via environment variables

This commit is contained in:
2025-11-23 13:45:01 +01:00
parent 16112e50f6
commit a8c2ee1b90

View File

@@ -29,7 +29,7 @@ app = FastAPI(title="Simple vLLM Server", version="1.0.0")
# Global engine instance # Global engine instance
engine: Optional[AsyncLLMEngine] = None engine: Optional[AsyncLLMEngine] = None
model_name: str = "Qwen/Qwen2.5-7B-Instruct" model_name: str = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-7B-Instruct")
# Request/Response models # Request/Response models
class CompletionRequest(BaseModel): class CompletionRequest(BaseModel):
@@ -294,7 +294,7 @@ if __name__ == "__main__":
# Get configuration from environment # Get configuration from environment
host = os.getenv("VLLM_HOST", "0.0.0.0") host = os.getenv("VLLM_HOST", "0.0.0.0")
port = int(os.getenv("VLLM_PORT", "8000")) port = int(os.getenv("PORT", os.getenv("VLLM_PORT", "8000")))
logger.info(f"Starting vLLM server on {host}:{port}") logger.info(f"Starting vLLM server on {host}:{port}")