diff --git a/vllm/server.py b/vllm/server.py index d51da13..36dbd03 100644 --- a/vllm/server.py +++ b/vllm/server.py @@ -36,7 +36,7 @@ class CompletionRequest(BaseModel): """OpenAI-compatible completion request""" model: str = Field(default="qwen-2.5-7b") prompt: str | List[str] = Field(..., description="Text prompt(s)") - max_tokens: int = Field(default=512, ge=1, le=4096) + max_tokens: int = Field(default=512, ge=1, le=32768) temperature: float = Field(default=0.7, ge=0.0, le=2.0) top_p: float = Field(default=1.0, ge=0.0, le=1.0) n: int = Field(default=1, ge=1, le=10) @@ -54,7 +54,7 @@ class ChatCompletionRequest(BaseModel): """OpenAI-compatible chat completion request""" model: str = Field(default="qwen-2.5-7b") messages: List[ChatMessage] = Field(..., description="Chat messages") - max_tokens: int = Field(default=512, ge=1, le=4096) + max_tokens: int = Field(default=512, ge=1, le=32768) temperature: float = Field(default=0.7, ge=0.0, le=2.0) top_p: float = Field(default=1.0, ge=0.0, le=1.0) n: int = Field(default=1, ge=1, le=10)