From 91089d3edcbe1037bc98fceb0534a815bfbb0ccd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= Date: Fri, 21 Nov 2025 19:28:16 +0100 Subject: [PATCH] feat: add /v1/models endpoint and systemd service for orchestrator - Add OpenAI-compatible /v1/models endpoint to list available models - Create systemd service file for proper service management - Service runs as root with automatic restart on failure - Logs to systemd journal for easy debugging --- model-orchestrator/orchestrator.py | 21 +++++++++++++++++++++ systemd/ai-orchestrator.service | 24 ++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 systemd/ai-orchestrator.service diff --git a/model-orchestrator/orchestrator.py b/model-orchestrator/orchestrator.py index 822d61b..4f1225b 100644 --- a/model-orchestrator/orchestrator.py +++ b/model-orchestrator/orchestrator.py @@ -282,6 +282,27 @@ async def list_models(): } +@app.get("/v1/models") +async def list_models_openai(): + """OpenAI-compatible models listing endpoint""" + models_list = [] + for model_name, model_info in model_registry.items(): + models_list.append({ + "id": model_name, + "object": "model", + "created": int(time.time()), + "owned_by": "pivoine-gpu", + "permission": [], + "root": model_name, + "parent": None, + }) + + return { + "object": "list", + "data": models_list + } + + @app.post("/v1/chat/completions") async def chat_completions(request: Request): """OpenAI-compatible chat completions endpoint (text models)""" diff --git a/systemd/ai-orchestrator.service b/systemd/ai-orchestrator.service new file mode 100644 index 0000000..8610ce9 --- /dev/null +++ b/systemd/ai-orchestrator.service @@ -0,0 +1,24 @@ +[Unit] +Description=AI Model Orchestrator for RunPod +After=network.target +StartLimitIntervalSec=0 + +[Service] +Type=simple +Restart=always +RestartSec=10 +User=root +WorkingDirectory=/workspace/ai +EnvironmentFile=/workspace/ai/.env +ExecStart=/usr/bin/python3 /workspace/ai/model-orchestrator/orchestrator_subprocess.py +StandardOutput=journal +StandardError=journal +SyslogIdentifier=ai-orchestrator + +# Process management +KillMode=process +KillSignal=SIGTERM +TimeoutStopSec=30 + +[Install] +WantedBy=multi-user.target