fix: make model name and port configurable via environment variables
This commit is contained in:
@@ -29,7 +29,7 @@ app = FastAPI(title="Simple vLLM Server", version="1.0.0")
|
|||||||
|
|
||||||
# Global engine instance
|
# Global engine instance
|
||||||
engine: Optional[AsyncLLMEngine] = None
|
engine: Optional[AsyncLLMEngine] = None
|
||||||
model_name: str = "Qwen/Qwen2.5-7B-Instruct"
|
model_name: str = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-7B-Instruct")
|
||||||
|
|
||||||
# Request/Response models
|
# Request/Response models
|
||||||
class CompletionRequest(BaseModel):
|
class CompletionRequest(BaseModel):
|
||||||
@@ -294,7 +294,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
# Get configuration from environment
|
# Get configuration from environment
|
||||||
host = os.getenv("VLLM_HOST", "0.0.0.0")
|
host = os.getenv("VLLM_HOST", "0.0.0.0")
|
||||||
port = int(os.getenv("VLLM_PORT", "8000"))
|
port = int(os.getenv("PORT", os.getenv("VLLM_PORT", "8000")))
|
||||||
|
|
||||||
logger.info(f"Starting vLLM server on {host}:{port}")
|
logger.info(f"Starting vLLM server on {host}:{port}")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user