diff --git a/ai/compose.yaml b/ai/compose.yaml index cf47842..4b997d0 100644 --- a/ai/compose.yaml +++ b/ai/compose.yaml @@ -235,20 +235,26 @@ services: # Watchtower - 'com.centurylinklabs.watchtower.enable=${WATCHTOWER_LABEL_ENABLE}' - # Supervisor - Process manager web UI (proxies to RunPod GPU) - supervisor: - image: nginx:alpine - container_name: ${AI_COMPOSE_PROJECT_NAME}_supervisor + # Supervisor UI - Modern web interface for RunPod process management + supervisor-ui: + image: dev.pivoine.art/valknar/supervisor-ui:latest + container_name: ${AI_COMPOSE_PROJECT_NAME}_supervisor_ui restart: unless-stopped environment: TZ: ${TIMEZONE:-Europe/Berlin} - SUPERVISOR_BACKEND_HOST: ${GPU_TAILSCALE_IP} - SUPERVISOR_BACKEND_PORT: ${SUPERVISOR_BACKEND_PORT:-9001} - volumes: - - ./supervisor-nginx.conf:/etc/nginx/nginx.conf.template:ro - command: /bin/sh -c "envsubst '$${SUPERVISOR_BACKEND_HOST},$${SUPERVISOR_BACKEND_PORT}' < /etc/nginx/nginx.conf.template > /etc/nginx/nginx.conf && exec nginx -g 'daemon off;'" + NODE_ENV: production + # Connect to RunPod Supervisor via Tailscale + SUPERVISOR_HOST: ${GPU_TAILSCALE_IP} + SUPERVISOR_PORT: ${SUPERVISOR_BACKEND_PORT:-9001} + # No auth needed - Supervisor has auth disabled (protected by Authelia) networks: - compose_network + healthcheck: + test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/api/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s labels: - 'traefik.enable=${AI_SUPERVISOR_TRAEFIK_ENABLED:-true}' # HTTP to HTTPS redirect @@ -262,8 +268,8 @@ services: - 'traefik.http.routers.${AI_COMPOSE_PROJECT_NAME}-supervisor-web-secure.entrypoints=web-secure' - 'traefik.http.middlewares.${AI_COMPOSE_PROJECT_NAME}-supervisor-web-secure-compress.compress=true' - 'traefik.http.routers.${AI_COMPOSE_PROJECT_NAME}-supervisor-web-secure.middlewares=${AI_COMPOSE_PROJECT_NAME}-supervisor-web-secure-compress,net-authelia,security-headers@file' - # Service - - 'traefik.http.services.${AI_COMPOSE_PROJECT_NAME}-supervisor-web-secure.loadbalancer.server.port=80' + # Service (port 3000 for Next.js app) + - 'traefik.http.services.${AI_COMPOSE_PROJECT_NAME}-supervisor-web-secure.loadbalancer.server.port=3000' - 'traefik.docker.network=${NETWORK_NAME}' # Watchtower - 'com.centurylinklabs.watchtower.enable=${WATCHTOWER_LABEL_ENABLE}' diff --git a/ai/supervisor-nginx.conf b/ai/supervisor-nginx.conf deleted file mode 100644 index 4e00b6e..0000000 --- a/ai/supervisor-nginx.conf +++ /dev/null @@ -1,38 +0,0 @@ -events { - worker_connections 1024; -} - -http { - # Proxy settings - proxy_http_version 1.1; - proxy_buffering off; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - - # Timeouts for Supervisor web UI (quick responses) - proxy_connect_timeout 60; - proxy_send_timeout 60; - proxy_read_timeout 60; - send_timeout 60; - - server { - listen 80; - server_name _; - - location / { - # Proxy to Supervisor on RunPod via Tailscale - proxy_pass http://${SUPERVISOR_BACKEND_HOST}:${SUPERVISOR_BACKEND_PORT}; - - # Proxy headers - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - - # Disable buffering for real-time updates - proxy_buffering off; - } - } -}