feat: add /v1/models endpoint and systemd service for orchestrator
- Add OpenAI-compatible /v1/models endpoint to list available models - Create systemd service file for proper service management - Service runs as root with automatic restart on failure - Logs to systemd journal for easy debugging
This commit is contained in:
@@ -282,6 +282,27 @@ async def list_models():
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/v1/models")
|
||||||
|
async def list_models_openai():
|
||||||
|
"""OpenAI-compatible models listing endpoint"""
|
||||||
|
models_list = []
|
||||||
|
for model_name, model_info in model_registry.items():
|
||||||
|
models_list.append({
|
||||||
|
"id": model_name,
|
||||||
|
"object": "model",
|
||||||
|
"created": int(time.time()),
|
||||||
|
"owned_by": "pivoine-gpu",
|
||||||
|
"permission": [],
|
||||||
|
"root": model_name,
|
||||||
|
"parent": None,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"object": "list",
|
||||||
|
"data": models_list
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/chat/completions")
|
@app.post("/v1/chat/completions")
|
||||||
async def chat_completions(request: Request):
|
async def chat_completions(request: Request):
|
||||||
"""OpenAI-compatible chat completions endpoint (text models)"""
|
"""OpenAI-compatible chat completions endpoint (text models)"""
|
||||||
|
|||||||
24
systemd/ai-orchestrator.service
Normal file
24
systemd/ai-orchestrator.service
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=AI Model Orchestrator for RunPod
|
||||||
|
After=network.target
|
||||||
|
StartLimitIntervalSec=0
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
Restart=always
|
||||||
|
RestartSec=10
|
||||||
|
User=root
|
||||||
|
WorkingDirectory=/workspace/ai
|
||||||
|
EnvironmentFile=/workspace/ai/.env
|
||||||
|
ExecStart=/usr/bin/python3 /workspace/ai/model-orchestrator/orchestrator_subprocess.py
|
||||||
|
StandardOutput=journal
|
||||||
|
StandardError=journal
|
||||||
|
SyslogIdentifier=ai-orchestrator
|
||||||
|
|
||||||
|
# Process management
|
||||||
|
KillMode=process
|
||||||
|
KillSignal=SIGTERM
|
||||||
|
TimeoutStopSec=30
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
Reference in New Issue
Block a user