fix: increase max_tokens limit from 4096 to 32768 for LLMX CLI support

This commit is contained in:
2025-11-23 15:10:06 +01:00
parent a8c2ee1b90
commit fdd724298a

View File

@@ -36,7 +36,7 @@ class CompletionRequest(BaseModel):
"""OpenAI-compatible completion request"""
model: str = Field(default="qwen-2.5-7b")
prompt: str | List[str] = Field(..., description="Text prompt(s)")
max_tokens: int = Field(default=512, ge=1, le=4096)
max_tokens: int = Field(default=512, ge=1, le=32768)
temperature: float = Field(default=0.7, ge=0.0, le=2.0)
top_p: float = Field(default=1.0, ge=0.0, le=1.0)
n: int = Field(default=1, ge=1, le=10)
@@ -54,7 +54,7 @@ class ChatCompletionRequest(BaseModel):
"""OpenAI-compatible chat completion request"""
model: str = Field(default="qwen-2.5-7b")
messages: List[ChatMessage] = Field(..., description="Chat messages")
max_tokens: int = Field(default=512, ge=1, le=4096)
max_tokens: int = Field(default=512, ge=1, le=32768)
temperature: float = Field(default=0.7, ge=0.0, le=2.0)
top_p: float = Field(default=1.0, ge=0.0, le=1.0)
n: int = Field(default=1, ge=1, le=10)