- Add ComfyUI installation to Ansible playbook with 'comfyui' tag - Create ComfyUI requirements.txt and start.sh script - Clone ComfyUI from official GitHub repository - Symlink HuggingFace cache for Flux model access - ComfyUI runs on port 8188 with CORS enabled - Add ComfyUI to services list in playbook 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
495 lines
15 KiB
YAML
495 lines
15 KiB
YAML
---
|
|
#
|
|
# RunPod AI Infrastructure Ansible Playbook
|
|
#
|
|
# This playbook provisions a RunPod GPU instance with multi-modal AI services.
|
|
# It replaces all bash scripts with reproducible Ansible tasks.
|
|
#
|
|
# Usage:
|
|
# ansible-playbook playbook.yml # Full deployment
|
|
# ansible-playbook playbook.yml --tags base # Install system packages
|
|
# ansible-playbook playbook.yml --tags python # Setup Python environment
|
|
# ansible-playbook playbook.yml --tags models # Download models only
|
|
# ansible-playbook playbook.yml --tags validate # Validate installation
|
|
#
|
|
# Tags:
|
|
# base - System packages and dependencies
|
|
# python - Python environment setup
|
|
# dependencies- Install Python packages
|
|
# models - Download AI models
|
|
# comfyui - Install and configure ComfyUI
|
|
# tailscale - Install and configure Tailscale
|
|
# systemd - Configure systemd services
|
|
# validate - Health checks and validation
|
|
#
|
|
|
|
- name: Provision RunPod GPU Instance for AI Services
|
|
hosts: localhost
|
|
connection: local
|
|
become: false
|
|
vars:
|
|
# Paths
|
|
workspace_dir: /workspace
|
|
ai_dir: "{{ workspace_dir }}/ai"
|
|
cache_dir: "{{ workspace_dir }}/huggingface_cache"
|
|
models_dir: "{{ workspace_dir }}/models"
|
|
|
|
# Python configuration
|
|
python_version: "3.10"
|
|
pip_version: "23.3.1"
|
|
|
|
# Model configuration
|
|
models:
|
|
vllm:
|
|
name: "Qwen/Qwen2.5-7B-Instruct"
|
|
size_gb: 14
|
|
flux:
|
|
name: "black-forest-labs/FLUX.1-schnell"
|
|
size_gb: 12
|
|
musicgen:
|
|
name: "facebook/musicgen-medium"
|
|
size_gb: 11
|
|
|
|
# Service configuration
|
|
services:
|
|
- name: orchestrator
|
|
port: 9000
|
|
script: model-orchestrator/orchestrator_subprocess.py
|
|
- name: vllm
|
|
port: 8001
|
|
script: models/vllm/server.py
|
|
- name: flux
|
|
port: 8002
|
|
script: models/flux/server.py
|
|
- name: musicgen
|
|
port: 8003
|
|
script: models/musicgen/server.py
|
|
- name: comfyui
|
|
port: 8188
|
|
script: models/comfyui/start.sh
|
|
|
|
tasks:
|
|
#
|
|
# Base System Setup
|
|
#
|
|
- name: Base system packages
|
|
tags: [base, always]
|
|
block:
|
|
- name: Check GPU availability
|
|
shell: nvidia-smi
|
|
register: nvidia_check
|
|
changed_when: false
|
|
failed_when: nvidia_check.rc != 0
|
|
|
|
- name: Display GPU information
|
|
debug:
|
|
msg: "{{ nvidia_check.stdout_lines }}"
|
|
|
|
- name: Ensure workspace directory exists
|
|
file:
|
|
path: "{{ workspace_dir }}"
|
|
state: directory
|
|
mode: '0755'
|
|
|
|
- name: Update apt cache
|
|
apt:
|
|
update_cache: yes
|
|
cache_valid_time: 3600
|
|
become: true
|
|
|
|
- name: Install base system packages
|
|
apt:
|
|
name:
|
|
- build-essential
|
|
- python3-dev
|
|
- python3-pip
|
|
- python3-venv
|
|
- git
|
|
- curl
|
|
- wget
|
|
- vim
|
|
- htop
|
|
- tmux
|
|
- net-tools
|
|
state: present
|
|
become: true
|
|
|
|
#
|
|
# Python Environment Setup
|
|
#
|
|
- name: Python environment setup
|
|
tags: [python]
|
|
block:
|
|
- name: Upgrade pip
|
|
pip:
|
|
name: pip
|
|
version: "{{ pip_version }}"
|
|
executable: pip3
|
|
extra_args: --upgrade
|
|
become: true
|
|
|
|
- name: Install core Python packages
|
|
pip:
|
|
requirements: "{{ ai_dir }}/core/requirements.txt"
|
|
executable: pip3
|
|
become: true
|
|
|
|
#
|
|
# Install Model Dependencies
|
|
#
|
|
- name: Install model dependencies
|
|
tags: [dependencies]
|
|
block:
|
|
- name: Install vLLM dependencies
|
|
pip:
|
|
requirements: "{{ ai_dir }}/models/vllm/requirements.txt"
|
|
executable: pip3
|
|
become: true
|
|
|
|
- name: Install Flux dependencies
|
|
pip:
|
|
requirements: "{{ ai_dir }}/models/flux/requirements.txt"
|
|
executable: pip3
|
|
become: true
|
|
|
|
- name: Install MusicGen dependencies
|
|
pip:
|
|
requirements: "{{ ai_dir }}/models/musicgen/requirements.txt"
|
|
executable: pip3
|
|
become: true
|
|
|
|
#
|
|
# ComfyUI Installation
|
|
#
|
|
- name: Install and configure ComfyUI
|
|
tags: [comfyui]
|
|
block:
|
|
- name: Check if ComfyUI is already installed
|
|
stat:
|
|
path: "{{ workspace_dir }}/ComfyUI"
|
|
register: comfyui_check
|
|
|
|
- name: Clone ComfyUI repository
|
|
git:
|
|
repo: https://github.com/comfyanonymous/ComfyUI.git
|
|
dest: "{{ workspace_dir }}/ComfyUI"
|
|
version: master
|
|
update: yes
|
|
when: not comfyui_check.stat.exists
|
|
|
|
- name: Install ComfyUI dependencies
|
|
pip:
|
|
requirements: "{{ workspace_dir }}/ComfyUI/requirements.txt"
|
|
executable: pip3
|
|
become: true
|
|
|
|
- name: Install additional ComfyUI dependencies
|
|
pip:
|
|
requirements: "{{ ai_dir }}/models/comfyui/requirements.txt"
|
|
executable: pip3
|
|
become: true
|
|
|
|
- name: Create ComfyUI models directory structure
|
|
file:
|
|
path: "{{ workspace_dir }}/ComfyUI/models/{{ item }}"
|
|
state: directory
|
|
mode: '0755'
|
|
loop:
|
|
- checkpoints
|
|
- unet
|
|
- vae
|
|
- loras
|
|
- clip
|
|
- controlnet
|
|
|
|
- name: Create symlink for Flux model in ComfyUI
|
|
file:
|
|
src: "{{ cache_dir }}"
|
|
dest: "{{ workspace_dir }}/ComfyUI/models/huggingface_cache"
|
|
state: link
|
|
ignore_errors: yes
|
|
|
|
- name: Make ComfyUI start script executable
|
|
file:
|
|
path: "{{ ai_dir }}/models/comfyui/start.sh"
|
|
mode: '0755'
|
|
|
|
- name: Display ComfyUI setup summary
|
|
debug:
|
|
msg: |
|
|
✓ ComfyUI installed successfully!
|
|
|
|
Directory: {{ workspace_dir }}/ComfyUI
|
|
Port: 8188
|
|
HuggingFace Cache: {{ cache_dir }}
|
|
|
|
To start ComfyUI:
|
|
bash {{ ai_dir }}/models/comfyui/start.sh
|
|
|
|
Or manually:
|
|
cd {{ workspace_dir }}/ComfyUI && python3 main.py --listen 0.0.0.0 --port 8188
|
|
|
|
Access: http://localhost:8188
|
|
|
|
#
|
|
# Download AI Models
|
|
#
|
|
- name: Download AI models
|
|
tags: [models]
|
|
block:
|
|
- name: Create model cache directories
|
|
file:
|
|
path: "{{ item }}"
|
|
state: directory
|
|
mode: '0755'
|
|
loop:
|
|
- "{{ cache_dir }}"
|
|
- "{{ models_dir }}/flux"
|
|
- "{{ models_dir }}/musicgen"
|
|
|
|
- name: Check if models are already cached
|
|
stat:
|
|
path: "{{ cache_dir }}/models--{{ item.value.name | regex_replace('/', '--') }}"
|
|
register: model_cache_check
|
|
loop: "{{ models | dict2items }}"
|
|
loop_control:
|
|
label: "{{ item.key }}"
|
|
|
|
- name: Download Qwen 2.5 7B model (14GB, ~15 minutes)
|
|
shell: |
|
|
python3 -c "
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
import os
|
|
os.environ['HF_HOME'] = '{{ cache_dir }}'
|
|
print('Downloading Qwen 2.5 7B Instruct...')
|
|
AutoTokenizer.from_pretrained('{{ models.vllm.name }}')
|
|
print('Tokenizer downloaded successfully')
|
|
"
|
|
environment:
|
|
HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
|
|
HF_HOME: "{{ cache_dir }}"
|
|
when: not (model_cache_check.results[0].stat.exists | default(false))
|
|
register: vllm_download
|
|
async: 1800 # 30 minutes timeout
|
|
poll: 30
|
|
|
|
- name: Download Flux.1 Schnell model (12GB, ~12 minutes)
|
|
shell: |
|
|
python3 -c "
|
|
from diffusers import FluxPipeline
|
|
import os
|
|
os.environ['HF_HOME'] = '{{ cache_dir }}'
|
|
print('Downloading Flux.1 Schnell...')
|
|
FluxPipeline.from_pretrained(
|
|
'{{ models.flux.name }}',
|
|
cache_dir='{{ cache_dir }}'
|
|
)
|
|
print('Flux.1 downloaded successfully')
|
|
"
|
|
environment:
|
|
HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
|
|
HF_HOME: "{{ cache_dir }}"
|
|
when: not (model_cache_check.results[1].stat.exists | default(false))
|
|
register: flux_download
|
|
async: 1200 # 20 minutes timeout
|
|
poll: 30
|
|
|
|
- name: Download MusicGen Medium model (11GB, ~10 minutes)
|
|
shell: |
|
|
python3 -c "
|
|
from audiocraft.models import MusicGen
|
|
import os
|
|
os.environ['HF_HOME'] = '{{ cache_dir }}'
|
|
print('Downloading MusicGen Medium...')
|
|
MusicGen.get_pretrained('{{ models.musicgen.name }}')
|
|
print('MusicGen downloaded successfully')
|
|
"
|
|
environment:
|
|
HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
|
|
HF_HOME: "{{ cache_dir }}"
|
|
when: not (model_cache_check.results[2].stat.exists | default(false))
|
|
register: musicgen_download
|
|
async: 900 # 15 minutes timeout
|
|
poll: 30
|
|
|
|
- name: Display model download summary
|
|
debug:
|
|
msg: |
|
|
Model downloads completed:
|
|
- Qwen 2.5 7B: {{ 'Downloaded' if vllm_download.changed | default(false) else 'Already cached' }}
|
|
- Flux.1 Schnell: {{ 'Downloaded' if flux_download.changed | default(false) else 'Already cached' }}
|
|
- MusicGen Medium: {{ 'Downloaded' if musicgen_download.changed | default(false) else 'Already cached' }}
|
|
Total cache size: ~37GB
|
|
|
|
#
|
|
# Tailscale VPN
|
|
#
|
|
- name: Install and configure Tailscale
|
|
tags: [tailscale]
|
|
block:
|
|
- name: Check if Tailscale is installed
|
|
command: which tailscale
|
|
register: tailscale_check
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Install Tailscale
|
|
shell: curl -fsSL https://tailscale.com/install.sh | sh
|
|
become: true
|
|
when: tailscale_check.rc != 0
|
|
|
|
- name: Display Tailscale setup instructions
|
|
debug:
|
|
msg: |
|
|
Tailscale installed. To connect:
|
|
1. Start tailscaled: tailscaled --tun=userspace-networking --socks5-server=localhost:1055 &
|
|
2. Authenticate: tailscale up --advertise-tags=tag:gpu
|
|
3. Get IP: tailscale ip -4
|
|
|
|
Note: Authentication requires manual intervention via provided URL
|
|
|
|
#
|
|
# Systemd Services (Optional)
|
|
#
|
|
- name: Configure systemd services
|
|
tags: [systemd, never] # never = skip by default
|
|
block:
|
|
- name: Create systemd service for orchestrator
|
|
template:
|
|
src: "{{ ai_dir }}/systemd/ai-orchestrator.service.j2"
|
|
dest: /etc/systemd/system/ai-orchestrator.service
|
|
mode: '0644'
|
|
become: true
|
|
|
|
- name: Reload systemd daemon
|
|
systemd:
|
|
daemon_reload: yes
|
|
become: true
|
|
|
|
- name: Enable orchestrator service
|
|
systemd:
|
|
name: ai-orchestrator
|
|
enabled: yes
|
|
become: true
|
|
|
|
- name: Display systemd instructions
|
|
debug:
|
|
msg: |
|
|
Systemd service configured. To manage:
|
|
- Start: sudo systemctl start ai-orchestrator
|
|
- Stop: sudo systemctl stop ai-orchestrator
|
|
- Status: sudo systemctl status ai-orchestrator
|
|
- Logs: sudo journalctl -u ai-orchestrator -f
|
|
|
|
#
|
|
# Validation
|
|
#
|
|
- name: Validate installation
|
|
tags: [validate, never] # never = skip by default, run explicitly
|
|
block:
|
|
- name: Check Python packages
|
|
shell: pip3 list | grep -E "(fastapi|uvicorn|torch|vllm|diffusers|audiocraft)"
|
|
register: pip_check
|
|
changed_when: false
|
|
|
|
- name: Display installed packages
|
|
debug:
|
|
msg: "{{ pip_check.stdout_lines }}"
|
|
|
|
- name: Check GPU memory
|
|
shell: nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits
|
|
register: gpu_memory
|
|
changed_when: false
|
|
|
|
- name: Display GPU memory
|
|
debug:
|
|
msg: "Free GPU memory: {{ gpu_memory.stdout }} MB"
|
|
|
|
- name: Check cached models
|
|
shell: du -sh {{ cache_dir }}
|
|
register: cache_size
|
|
changed_when: false
|
|
|
|
- name: Display cache information
|
|
debug:
|
|
msg: "Model cache size: {{ cache_size.stdout }}"
|
|
|
|
- name: Verify service scripts are executable
|
|
file:
|
|
path: "{{ ai_dir }}/{{ item.script }}"
|
|
mode: '0755'
|
|
loop: "{{ services }}"
|
|
|
|
- name: Display validation summary
|
|
debug:
|
|
msg: |
|
|
✓ Installation validated successfully!
|
|
|
|
Next steps:
|
|
1. Start orchestrator: python3 {{ ai_dir }}/model-orchestrator/orchestrator_subprocess.py
|
|
2. Test endpoint: curl http://localhost:9000/health
|
|
3. Configure LiteLLM on VPS to connect via Tailscale
|
|
|
|
Services:
|
|
{% for service in services %}
|
|
- {{ service.name }}: http://localhost:{{ service.port }}
|
|
{% endfor %}
|
|
|
|
#
|
|
# Cleanup for Template Creation
|
|
#
|
|
- name: Cleanup for template creation
|
|
tags: [cleanup, never] # never = skip by default, run explicitly
|
|
block:
|
|
- name: Remove sensitive files
|
|
file:
|
|
path: "{{ item }}"
|
|
state: absent
|
|
loop:
|
|
- "{{ ai_dir }}/.env"
|
|
- /root/.ssh/known_hosts
|
|
- /root/.bash_history
|
|
- /root/.python_history
|
|
|
|
- name: Clear system logs
|
|
shell: find /var/log -type f -name "*.log" -delete
|
|
become: true
|
|
ignore_errors: yes
|
|
|
|
- name: Create template version marker
|
|
copy:
|
|
dest: "{{ workspace_dir }}/TEMPLATE_VERSION"
|
|
content: |
|
|
RunPod Multi-Modal AI Template (Process-Based Architecture)
|
|
Version: 2.0
|
|
Created: {{ ansible_date_time.iso8601 }}
|
|
|
|
Components:
|
|
- Python {{ python_version }}
|
|
- Orchestrator (process-based)
|
|
- Text Generation (vLLM + Qwen 2.5 7B)
|
|
- Image Generation (Flux.1 Schnell)
|
|
- Music Generation (MusicGen Medium)
|
|
|
|
Models Cached: ~37GB
|
|
Architecture: No Docker, direct Python execution
|
|
|
|
Deployment:
|
|
1. Create .env file with HF_TOKEN
|
|
2. Run: python3 {{ ai_dir }}/model-orchestrator/orchestrator_subprocess.py
|
|
3. Access: http://localhost:9000/health
|
|
|
|
- name: Display template creation instructions
|
|
debug:
|
|
msg: |
|
|
Template prepared successfully!
|
|
|
|
Next steps in RunPod dashboard:
|
|
1. Stop all running services
|
|
2. Go to My Pods → Select this pod → ⋮ → Save as Template
|
|
3. Name: multi-modal-ai-process-v2.0
|
|
4. Description: Process-based multi-modal AI (text/image/music)
|
|
5. Save and test deployment from template
|
|
|
|
Template enables 2-3 minute deployments instead of 60+ minutes!
|