Files
runpod/playbook.yml
Sebastian Krüger c9b01eef68 refactor: consolidate model management into Ansible playbook
Remove flux/musicgen standalone implementations in favor of ComfyUI:
- Delete models/flux/ and models/musicgen/ directories
- Remove redundant scripts (install.sh, download-models.sh, prepare-template.sh)
- Update README.md to reference Ansible playbook commands
- Update playbook.yml to remove flux/musicgen service definitions
- Add COMFYUI_MODELS.md with comprehensive model installation guide
- Update stop-all.sh to only manage orchestrator and vLLM services

All model downloads and dependency management now handled via
Ansible playbook tags (base, python, vllm, comfyui, comfyui-essential).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-22 00:31:26 +01:00

902 lines
30 KiB
YAML

---
#
# RunPod AI Infrastructure Ansible Playbook
#
# This playbook provisions a RunPod GPU instance with multi-modal AI services.
# It replaces all bash scripts with reproducible Ansible tasks.
#
# Usage:
# ansible-playbook playbook.yml # Full deployment
# ansible-playbook playbook.yml --tags base # Install system packages
# ansible-playbook playbook.yml --tags python # Setup Python environment
# ansible-playbook playbook.yml --tags models # Download models only
# ansible-playbook playbook.yml --tags validate # Validate installation
#
# Tags:
# base - System packages and dependencies
# python - Python environment setup
# dependencies - Install Python packages
# models - Download AI models (vLLM, Flux, MusicGen)
# comfyui - Install and configure ComfyUI base
# comfyui-models-image - Download ComfyUI image generation models
# comfyui-models-video - Download ComfyUI video generation models
# comfyui-models-audio - Download ComfyUI audio generation models
# comfyui-models-support - Download CLIP, IP-Adapter, ControlNet models
# comfyui-models-all - Download all ComfyUI models
# comfyui-nodes - Install essential custom nodes
# comfyui-essential - Quick setup (ComfyUI + essential models only)
# tailscale - Install and configure Tailscale
# systemd - Configure systemd services
# validate - Health checks and validation
#
- name: Provision RunPod GPU Instance for AI Services
hosts: localhost
connection: local
become: false
vars:
# Paths
workspace_dir: /workspace
ai_dir: "{{ workspace_dir }}/ai"
cache_dir: "{{ workspace_dir }}/huggingface_cache"
models_dir: "{{ workspace_dir }}/models"
# Python configuration
python_version: "3.10"
pip_version: "23.3.1"
# Model configuration
models:
vllm:
name: "Qwen/Qwen2.5-7B-Instruct"
size_gb: 14
flux:
name: "black-forest-labs/FLUX.1-schnell"
size_gb: 12
musicgen:
name: "facebook/musicgen-medium"
size_gb: 11
# ========================================================================
# ComfyUI Models - Comprehensive List for 24GB GPU
# ========================================================================
# ComfyUI Image Generation Models
comfyui_image_models:
# FLUX Models (Black Forest Labs) - State of the art 2025
- name: "black-forest-labs/FLUX.1-schnell"
type: "checkpoint"
category: "image"
size_gb: 23
vram_gb: 23
format: "fp16"
description: "FLUX.1 Schnell - Fast 4-step inference"
essential: true
- name: "black-forest-labs/FLUX.1-dev"
type: "checkpoint"
category: "image"
size_gb: 23
vram_gb: 23
format: "fp16"
description: "FLUX.1 Dev - Balanced quality/speed"
essential: false
# SDXL Models - Industry standard
- name: "stabilityai/stable-diffusion-xl-base-1.0"
type: "checkpoint"
category: "image"
size_gb: 7
vram_gb: 12
format: "fp16"
description: "SDXL 1.0 Base - 1024x1024 native resolution"
essential: true
- name: "stabilityai/stable-diffusion-xl-refiner-1.0"
type: "checkpoint"
category: "image"
size_gb: 6
vram_gb: 12
format: "fp16"
description: "SDXL Refiner - Enhances base output"
essential: false
# SD 3.5 Models - Latest Stability AI
- name: "stabilityai/stable-diffusion-3.5-large"
type: "checkpoint"
category: "image"
size_gb: 18
vram_gb: 20
format: "fp16"
description: "SD 3.5 Large - MMDiT architecture"
essential: false
# ComfyUI Video Generation Models
comfyui_video_models:
# CogVideoX - Text-to-video
- name: "THUDM/CogVideoX-5b"
type: "video"
category: "video"
size_gb: 20
vram_gb: 12 # with optimizations
description: "CogVideoX 5B - Professional text-to-video"
essential: true
# Stable Video Diffusion
- name: "stabilityai/stable-video-diffusion-img2vid"
type: "video"
category: "video"
size_gb: 8
vram_gb: 16
description: "SVD - 14 frame image-to-video"
essential: true
- name: "stabilityai/stable-video-diffusion-img2vid-xt"
type: "video"
category: "video"
size_gb: 8
vram_gb: 20
description: "SVD-XT - 25 frame image-to-video"
essential: false
# ComfyUI Audio Generation Models
comfyui_audio_models:
- name: "facebook/musicgen-small"
type: "audio"
category: "audio"
size_gb: 3
vram_gb: 4
description: "MusicGen Small - Fast music generation"
essential: false
- name: "facebook/musicgen-medium"
type: "audio"
category: "audio"
size_gb: 11
vram_gb: 8
description: "MusicGen Medium - Balanced quality"
essential: true
- name: "facebook/musicgen-large"
type: "audio"
category: "audio"
size_gb: 22
vram_gb: 16
description: "MusicGen Large - Highest quality"
essential: false
# ComfyUI Supporting Models (CLIP, IP-Adapter, ControlNet)
comfyui_support_models:
# CLIP Vision Models
- name: "openai/clip-vit-large-patch14"
type: "clip_vision"
category: "support"
size_gb: 2
description: "CLIP H - For SD 1.5 IP-Adapter"
essential: true
target_dir: "clip_vision"
- name: "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
type: "clip_vision"
category: "support"
size_gb: 7
description: "CLIP G - For SDXL IP-Adapter"
essential: true
target_dir: "clip_vision"
- name: "google/siglip-so400m-patch14-384"
type: "clip_vision"
category: "support"
size_gb: 2
description: "SigLIP - For FLUX models"
essential: true
target_dir: "clip_vision"
# ComfyUI Custom Nodes - Essential Extensions
comfyui_custom_nodes:
# ComfyUI Manager - Must have
- name: "ComfyUI-Manager"
repo: "https://github.com/ltdrdata/ComfyUI-Manager.git"
category: "manager"
description: "Install/manage custom nodes and models"
essential: true
# Video Generation Nodes
- name: "ComfyUI-VideoHelperSuite"
repo: "https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git"
category: "video"
description: "Video operations and processing"
essential: true
- name: "ComfyUI-AnimateDiff-Evolved"
repo: "https://github.com/Kosinkadink/ComfyUI-AnimateDiff-Evolved.git"
category: "video"
description: "AnimateDiff for video generation"
essential: true
- name: "ComfyUI-CogVideoXWrapper"
repo: "https://github.com/kijai/ComfyUI-CogVideoXWrapper.git"
category: "video"
description: "CogVideoX integration"
essential: false
# Image Enhancement Nodes
- name: "ComfyUI_IPAdapter_plus"
repo: "https://github.com/cubiq/ComfyUI_IPAdapter_plus.git"
category: "image"
description: "IP-Adapter for style transfer"
essential: true
- name: "ComfyUI-Impact-Pack"
repo: "https://github.com/ltdrdata/ComfyUI-Impact-Pack.git"
category: "image"
description: "Auto face enhancement, detailer"
essential: true
- name: "Comfyui-Inspire-Pack"
repo: "https://github.com/ltdrdata/ComfyUI-Inspire-Pack.git"
category: "image"
description: "Additional inspiration tools"
essential: false
# Audio Generation Nodes
- name: "comfyui-sound-lab"
repo: "https://github.com/eigenpunk/comfyui-sound-lab.git"
category: "audio"
description: "MusicGen and Stable Audio integration"
essential: true
# Utility Nodes
- name: "ComfyUI-Advanced-ControlNet"
repo: "https://github.com/Kosinkadink/ComfyUI-Advanced-ControlNet.git"
category: "control"
description: "Advanced ControlNet features"
essential: false
- name: "ComfyUI-3D-Pack"
repo: "https://github.com/MrForExample/ComfyUI-3D-Pack.git"
category: "3d"
description: "3D asset generation"
essential: false
# Service configuration
services:
- name: orchestrator
port: 9000
script: model-orchestrator/orchestrator_subprocess.py
- name: vllm
port: 8001
script: models/vllm/server.py
- name: comfyui
port: 8188
script: models/comfyui/start.sh
tasks:
#
# Base System Setup
#
- name: Base system packages
tags: [base, always]
block:
- name: Check GPU availability
shell: nvidia-smi
register: nvidia_check
changed_when: false
failed_when: nvidia_check.rc != 0
- name: Display GPU information
debug:
msg: "{{ nvidia_check.stdout_lines }}"
- name: Ensure workspace directory exists
file:
path: "{{ workspace_dir }}"
state: directory
mode: '0755'
- name: Update apt cache
apt:
update_cache: yes
cache_valid_time: 3600
become: true
- name: Install base system packages
apt:
name:
- build-essential
- python3-dev
- python3-pip
- python3-venv
- git
- curl
- wget
- vim
- htop
- tmux
- net-tools
state: present
become: true
#
# Python Environment Setup
#
- name: Python environment setup
tags: [python]
block:
- name: Upgrade pip
pip:
name: pip
version: "{{ pip_version }}"
executable: pip3
extra_args: --upgrade
become: true
- name: Install core Python packages
pip:
requirements: "{{ ai_dir }}/core/requirements.txt"
executable: pip3
become: true
#
# Install Model Dependencies
#
- name: Install model dependencies
tags: [dependencies]
block:
- name: Install vLLM dependencies
pip:
requirements: "{{ ai_dir }}/models/vllm/requirements.txt"
executable: pip3
become: true
#
# ComfyUI Installation
#
- name: Install and configure ComfyUI
tags: [comfyui]
block:
- name: Check if ComfyUI is already installed
stat:
path: "{{ workspace_dir }}/ComfyUI"
register: comfyui_check
- name: Clone ComfyUI repository
git:
repo: https://github.com/comfyanonymous/ComfyUI.git
dest: "{{ workspace_dir }}/ComfyUI"
version: master
update: yes
when: not comfyui_check.stat.exists
- name: Install ComfyUI dependencies
pip:
requirements: "{{ workspace_dir }}/ComfyUI/requirements.txt"
executable: pip3
become: true
- name: Install additional ComfyUI dependencies
pip:
requirements: "{{ ai_dir }}/models/comfyui/requirements.txt"
executable: pip3
become: true
- name: Create ComfyUI models directory structure
file:
path: "{{ workspace_dir }}/ComfyUI/models/{{ item }}"
state: directory
mode: '0755'
loop:
# Image Model Directories
- checkpoints
- unet
- vae
- loras
- clip
- clip_vision
- controlnet
- ipadapter
- embeddings
- upscale_models
# Video Model Directories
- video_models
- animatediff_models
- animatediff_motion_lora
# Audio Model Directories
- audio_models
# Utility Directories
- configs
- custom_nodes
- name: Create symlink for Flux model in ComfyUI
file:
src: "{{ cache_dir }}"
dest: "{{ workspace_dir }}/ComfyUI/models/huggingface_cache"
state: link
ignore_errors: yes
- name: Make ComfyUI start script executable
file:
path: "{{ ai_dir }}/models/comfyui/start.sh"
mode: '0755'
- name: Display ComfyUI setup summary
debug:
msg: |
✓ ComfyUI installed successfully!
Directory: {{ workspace_dir }}/ComfyUI
Port: 8188
HuggingFace Cache: {{ cache_dir }}
To start ComfyUI:
bash {{ ai_dir }}/models/comfyui/start.sh
Or manually:
cd {{ workspace_dir }}/ComfyUI && python3 main.py --listen 0.0.0.0 --port 8188
Access: http://localhost:8188
#
# ComfyUI Custom Nodes Installation
#
- name: Install ComfyUI Custom Nodes
tags: [comfyui-nodes, comfyui-essential]
block:
- name: Install essential ComfyUI custom nodes
git:
repo: "{{ item.repo }}"
dest: "{{ workspace_dir }}/ComfyUI/custom_nodes/{{ item.name }}"
version: main
update: yes
loop: "{{ comfyui_custom_nodes | selectattr('essential', 'equalto', true) | list }}"
loop_control:
label: "{{ item.name }}"
ignore_errors: yes
- name: Install custom node dependencies
shell: |
if [ -f "{{ workspace_dir }}/ComfyUI/custom_nodes/{{ item.name }}/requirements.txt" ]; then
pip3 install -r "{{ workspace_dir }}/ComfyUI/custom_nodes/{{ item.name }}/requirements.txt"
fi
loop: "{{ comfyui_custom_nodes | selectattr('essential', 'equalto', true) | list }}"
loop_control:
label: "{{ item.name }}"
become: true
ignore_errors: yes
- name: Display custom nodes installation summary
debug:
msg: |
✓ Custom nodes installed successfully!
Essential nodes:
{% for node in comfyui_custom_nodes | selectattr('essential', 'equalto', true) | list %}
- {{ node.name }}: {{ node.description }}
{% endfor %}
To install ALL nodes (including optional):
ansible-playbook playbook.yml --tags comfyui-nodes-all
#
# ComfyUI Image Models Download
#
- name: Download ComfyUI Image Generation Models
tags: [comfyui-models-image, comfyui-models-all, comfyui-essential]
block:
- name: Download essential image generation models
shell: |
python3 -c "
from huggingface_hub import snapshot_download
import os
os.environ['HF_HOME'] = '{{ cache_dir }}'
print('Downloading {{ item.name }}...')
snapshot_download(
repo_id='{{ item.name }}',
cache_dir='{{ cache_dir }}',
token=os.environ.get('HF_TOKEN')
)
print('Completed {{ item.name }}')
"
environment:
HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
HF_HOME: "{{ cache_dir }}"
loop: "{{ comfyui_image_models | selectattr('essential', 'equalto', true) | list }}"
loop_control:
label: "{{ item.name }} ({{ item.size_gb }}GB)"
async: 3600
poll: 30
ignore_errors: yes
- name: Display image models summary
debug:
msg: |
Image generation models downloaded:
{% for model in comfyui_image_models | selectattr('essential', 'equalto', true) | list %}
- {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB, {{ model.vram_gb }}GB VRAM)
{% endfor %}
Total size: ~{{ (comfyui_image_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) }}GB
#
# ComfyUI Video Models Download
#
- name: Download ComfyUI Video Generation Models
tags: [comfyui-models-video, comfyui-models-all]
block:
- name: Download essential video generation models
shell: |
python3 -c "
from huggingface_hub import snapshot_download
import os
os.environ['HF_HOME'] = '{{ cache_dir }}'
print('Downloading {{ item.name }}...')
snapshot_download(
repo_id='{{ item.name }}',
cache_dir='{{ cache_dir }}',
token=os.environ.get('HF_TOKEN')
)
print('Completed {{ item.name }}')
"
environment:
HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
HF_HOME: "{{ cache_dir }}"
loop: "{{ comfyui_video_models | selectattr('essential', 'equalto', true) | list }}"
loop_control:
label: "{{ item.name }} ({{ item.size_gb }}GB)"
async: 3600
poll: 30
ignore_errors: yes
- name: Display video models summary
debug:
msg: |
Video generation models downloaded:
{% for model in comfyui_video_models | selectattr('essential', 'equalto', true) | list %}
- {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB, {{ model.vram_gb }}GB VRAM)
{% endfor %}
#
# ComfyUI Audio Models Download
#
- name: Download ComfyUI Audio Generation Models
tags: [comfyui-models-audio, comfyui-models-all]
block:
- name: Download essential audio generation models
shell: |
python3 -c "
from huggingface_hub import snapshot_download
import os
os.environ['HF_HOME'] = '{{ cache_dir }}'
print('Downloading {{ item.name }}...')
snapshot_download(
repo_id='{{ item.name }}',
cache_dir='{{ cache_dir }}',
token=os.environ.get('HF_TOKEN')
)
print('Completed {{ item.name }}')
"
environment:
HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
HF_HOME: "{{ cache_dir }}"
loop: "{{ comfyui_audio_models | selectattr('essential', 'equalto', true) | list }}"
loop_control:
label: "{{ item.name }} ({{ item.size_gb }}GB)"
async: 3600
poll: 30
ignore_errors: yes
- name: Display audio models summary
debug:
msg: |
Audio generation models downloaded:
{% for model in comfyui_audio_models | selectattr('essential', 'equalto', true) | list %}
- {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB)
{% endfor %}
#
# ComfyUI Support Models Download (CLIP, IP-Adapter, ControlNet)
#
- name: Download ComfyUI Support Models
tags: [comfyui-models-support, comfyui-models-all, comfyui-essential]
block:
- name: Download essential support models (CLIP, IP-Adapter)
shell: |
python3 -c "
from huggingface_hub import snapshot_download
import os
os.environ['HF_HOME'] = '{{ cache_dir }}'
print('Downloading {{ item.name }}...')
snapshot_download(
repo_id='{{ item.name }}',
cache_dir='{{ cache_dir }}',
token=os.environ.get('HF_TOKEN')
)
print('Completed {{ item.name }}')
"
environment:
HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
HF_HOME: "{{ cache_dir }}"
loop: "{{ comfyui_support_models | selectattr('essential', 'equalto', true) | list }}"
loop_control:
label: "{{ item.name }} ({{ item.size_gb }}GB)"
async: 1800
poll: 30
ignore_errors: yes
- name: Display support models summary
debug:
msg: |
Support models downloaded:
{% for model in comfyui_support_models | selectattr('essential', 'equalto', true) | list %}
- {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB)
{% endfor %}
Total ComfyUI models cache: ~{{
(comfyui_image_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) +
(comfyui_video_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) +
(comfyui_audio_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) +
(comfyui_support_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb'))
}}GB
#
# Download AI Models
#
- name: Download AI models
tags: [models]
block:
- name: Create model cache directories
file:
path: "{{ item }}"
state: directory
mode: '0755'
loop:
- "{{ cache_dir }}"
- "{{ models_dir }}/flux"
- "{{ models_dir }}/musicgen"
- name: Check if models are already cached
stat:
path: "{{ cache_dir }}/models--{{ item.value.name | regex_replace('/', '--') }}"
register: model_cache_check
loop: "{{ models | dict2items }}"
loop_control:
label: "{{ item.key }}"
- name: Download Qwen 2.5 7B model (14GB, ~15 minutes)
shell: |
python3 -c "
from transformers import AutoTokenizer, AutoModelForCausalLM
import os
os.environ['HF_HOME'] = '{{ cache_dir }}'
print('Downloading Qwen 2.5 7B Instruct...')
AutoTokenizer.from_pretrained('{{ models.vllm.name }}')
print('Tokenizer downloaded successfully')
"
environment:
HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
HF_HOME: "{{ cache_dir }}"
when: not (model_cache_check.results[0].stat.exists | default(false))
register: vllm_download
async: 1800 # 30 minutes timeout
poll: 30
- name: Download Flux.1 Schnell model (12GB, ~12 minutes)
shell: |
python3 -c "
from diffusers import FluxPipeline
import os
os.environ['HF_HOME'] = '{{ cache_dir }}'
print('Downloading Flux.1 Schnell...')
FluxPipeline.from_pretrained(
'{{ models.flux.name }}',
cache_dir='{{ cache_dir }}'
)
print('Flux.1 downloaded successfully')
"
environment:
HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
HF_HOME: "{{ cache_dir }}"
when: not (model_cache_check.results[1].stat.exists | default(false))
register: flux_download
async: 1200 # 20 minutes timeout
poll: 30
- name: Download MusicGen Medium model (11GB, ~10 minutes)
shell: |
python3 -c "
from audiocraft.models import MusicGen
import os
os.environ['HF_HOME'] = '{{ cache_dir }}'
print('Downloading MusicGen Medium...')
MusicGen.get_pretrained('{{ models.musicgen.name }}')
print('MusicGen downloaded successfully')
"
environment:
HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
HF_HOME: "{{ cache_dir }}"
when: not (model_cache_check.results[2].stat.exists | default(false))
register: musicgen_download
async: 900 # 15 minutes timeout
poll: 30
- name: Display model download summary
debug:
msg: |
Model downloads completed:
- Qwen 2.5 7B: {{ 'Downloaded' if vllm_download.changed | default(false) else 'Already cached' }}
- Flux.1 Schnell: {{ 'Downloaded' if flux_download.changed | default(false) else 'Already cached' }}
- MusicGen Medium: {{ 'Downloaded' if musicgen_download.changed | default(false) else 'Already cached' }}
Total cache size: ~37GB
#
# Tailscale VPN
#
- name: Install and configure Tailscale
tags: [tailscale]
block:
- name: Check if Tailscale is installed
command: which tailscale
register: tailscale_check
changed_when: false
failed_when: false
- name: Install Tailscale
shell: curl -fsSL https://tailscale.com/install.sh | sh
become: true
when: tailscale_check.rc != 0
- name: Display Tailscale setup instructions
debug:
msg: |
Tailscale installed. To connect:
1. Start tailscaled: tailscaled --tun=userspace-networking --socks5-server=localhost:1055 &
2. Authenticate: tailscale up --advertise-tags=tag:gpu
3. Get IP: tailscale ip -4
Note: Authentication requires manual intervention via provided URL
#
# Systemd Services (Optional)
#
- name: Configure systemd services
tags: [systemd, never] # never = skip by default
block:
- name: Create systemd service for orchestrator
template:
src: "{{ ai_dir }}/systemd/ai-orchestrator.service.j2"
dest: /etc/systemd/system/ai-orchestrator.service
mode: '0644'
become: true
- name: Reload systemd daemon
systemd:
daemon_reload: yes
become: true
- name: Enable orchestrator service
systemd:
name: ai-orchestrator
enabled: yes
become: true
- name: Display systemd instructions
debug:
msg: |
Systemd service configured. To manage:
- Start: sudo systemctl start ai-orchestrator
- Stop: sudo systemctl stop ai-orchestrator
- Status: sudo systemctl status ai-orchestrator
- Logs: sudo journalctl -u ai-orchestrator -f
#
# Validation
#
- name: Validate installation
tags: [validate, never] # never = skip by default, run explicitly
block:
- name: Check Python packages
shell: pip3 list | grep -E "(fastapi|uvicorn|torch|vllm|diffusers|audiocraft)"
register: pip_check
changed_when: false
- name: Display installed packages
debug:
msg: "{{ pip_check.stdout_lines }}"
- name: Check GPU memory
shell: nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits
register: gpu_memory
changed_when: false
- name: Display GPU memory
debug:
msg: "Free GPU memory: {{ gpu_memory.stdout }} MB"
- name: Check cached models
shell: du -sh {{ cache_dir }}
register: cache_size
changed_when: false
- name: Display cache information
debug:
msg: "Model cache size: {{ cache_size.stdout }}"
- name: Verify service scripts are executable
file:
path: "{{ ai_dir }}/{{ item.script }}"
mode: '0755'
loop: "{{ services }}"
- name: Display validation summary
debug:
msg: |
✓ Installation validated successfully!
Next steps:
1. Start orchestrator: python3 {{ ai_dir }}/model-orchestrator/orchestrator_subprocess.py
2. Test endpoint: curl http://localhost:9000/health
3. Configure LiteLLM on VPS to connect via Tailscale
Services:
{% for service in services %}
- {{ service.name }}: http://localhost:{{ service.port }}
{% endfor %}
#
# Cleanup for Template Creation
#
- name: Cleanup for template creation
tags: [cleanup, never] # never = skip by default, run explicitly
block:
- name: Remove sensitive files
file:
path: "{{ item }}"
state: absent
loop:
- "{{ ai_dir }}/.env"
- /root/.ssh/known_hosts
- /root/.bash_history
- /root/.python_history
- name: Clear system logs
shell: find /var/log -type f -name "*.log" -delete
become: true
ignore_errors: yes
- name: Create template version marker
copy:
dest: "{{ workspace_dir }}/TEMPLATE_VERSION"
content: |
RunPod Multi-Modal AI Template (Process-Based Architecture)
Version: 2.0
Created: {{ ansible_date_time.iso8601 }}
Components:
- Python {{ python_version }}
- Orchestrator (process-based)
- Text Generation (vLLM + Qwen 2.5 7B)
- Image Generation (Flux.1 Schnell)
- Music Generation (MusicGen Medium)
Models Cached: ~37GB
Architecture: No Docker, direct Python execution
Deployment:
1. Create .env file with HF_TOKEN
2. Run: python3 {{ ai_dir }}/model-orchestrator/orchestrator_subprocess.py
3. Access: http://localhost:9000/health
- name: Display template creation instructions
debug:
msg: |
Template prepared successfully!
Next steps in RunPod dashboard:
1. Stop all running services
2. Go to My Pods → Select this pod → ⋮ → Save as Template
3. Name: multi-modal-ai-process-v2.0
4. Description: Process-based multi-modal AI (text/image/music)
5. Save and test deployment from template
Template enables 2-3 minute deployments instead of 60+ minutes!