runpod/playbook.yml

---
#
# RunPod AI Infrastructure Ansible Playbook
#
# This playbook provisions a RunPod GPU instance with multi-modal AI services.
# It replaces all bash scripts with reproducible Ansible tasks.
#
# Usage:
#   ansible-playbook playbook.yml                    # Full deployment
#   ansible-playbook playbook.yml --tags base        # Install system packages
#   ansible-playbook playbook.yml --tags python      # Setup Python environment
#   ansible-playbook playbook.yml --tags models      # Download models only
#   ansible-playbook playbook.yml --tags validate    # Validate installation
#
# Tags:
#   base                   - System packages and dependencies
#   python                 - Python environment setup
#   dependencies           - Install Python packages
#   models                 - Download AI models (vLLM, Flux, MusicGen)
#   comfyui                - Install and configure ComfyUI base
#   comfyui-models-image   - Download ComfyUI image generation models
#   comfyui-models-video   - Download ComfyUI video generation models
#   comfyui-models-audio   - Download ComfyUI audio generation models
#   comfyui-models-support - Download CLIP, IP-Adapter, ControlNet models
#   comfyui-models-all     - Download all ComfyUI models
#   comfyui-nodes          - Install essential custom nodes
#   comfyui-essential      - Quick setup (ComfyUI + essential models only)
#   tailscale              - Install and configure Tailscale
#   systemd                - Configure systemd services
#   validate               - Health checks and validation
#

- name: Provision RunPod GPU Instance for AI Services
  hosts: localhost
  connection: local
  become: false
  vars:
    # Paths
    workspace_dir: /workspace
    ai_dir: "{{ workspace_dir }}/ai"
    cache_dir: "{{ workspace_dir }}/huggingface_cache"
    models_dir: "{{ workspace_dir }}/models"

    # Python configuration
    python_version: "3.10"
    pip_version: "23.3.1"

    # Model configuration
    models:
      vllm:
        name: "Qwen/Qwen2.5-7B-Instruct"
        size_gb: 14
      flux:
        name: "black-forest-labs/FLUX.1-schnell"
        size_gb: 12
      musicgen:
        name: "facebook/musicgen-medium"
        size_gb: 11

    # ========================================================================
    # ComfyUI Models - Comprehensive List for 24GB GPU
    # ========================================================================

    # ComfyUI Image Generation Models
    comfyui_image_models:
      # FLUX Models (Black Forest Labs) - State of the art 2025
      - name: "black-forest-labs/FLUX.1-schnell"
        type: "checkpoint"
        category: "image"
        size_gb: 23
        vram_gb: 23
        format: "fp16"
        description: "FLUX.1 Schnell - Fast 4-step inference"
        essential: true

      - name: "black-forest-labs/FLUX.1-dev"
        type: "checkpoint"
        category: "image"
        size_gb: 23
        vram_gb: 23
        format: "fp16"
        description: "FLUX.1 Dev - Balanced quality/speed"
        essential: false

      # SDXL Models - Industry standard
      - name: "stabilityai/stable-diffusion-xl-base-1.0"
        type: "checkpoint"
        category: "image"
        size_gb: 7
        vram_gb: 12
        format: "fp16"
        description: "SDXL 1.0 Base - 1024x1024 native resolution"
        essential: true

      - name: "stabilityai/stable-diffusion-xl-refiner-1.0"
        type: "checkpoint"
        category: "image"
        size_gb: 6
        vram_gb: 12
        format: "fp16"
        description: "SDXL Refiner - Enhances base output"
        essential: false

      # SD 3.5 Models - Latest Stability AI
      - name: "stabilityai/stable-diffusion-3.5-large"
        type: "checkpoint"
        category: "image"
        size_gb: 18
        vram_gb: 20
        format: "fp16"
        description: "SD 3.5 Large - MMDiT architecture"
        essential: false

    # ComfyUI Video Generation Models
    comfyui_video_models:
      # CogVideoX - Text-to-video
      - name: "THUDM/CogVideoX-5b"
        type: "video"
        category: "video"
        size_gb: 20
        vram_gb: 12  # with optimizations
        description: "CogVideoX 5B - Professional text-to-video"
        essential: true

      # Stable Video Diffusion
      - name: "stabilityai/stable-video-diffusion-img2vid"
        type: "video"
        category: "video"
        size_gb: 8
        vram_gb: 16
        description: "SVD - 14 frame image-to-video"
        essential: true

      - name: "stabilityai/stable-video-diffusion-img2vid-xt"
        type: "video"
        category: "video"
        size_gb: 8
        vram_gb: 20
        description: "SVD-XT - 25 frame image-to-video"
        essential: false

    # ComfyUI Audio Generation Models
    comfyui_audio_models:
      - name: "facebook/musicgen-small"
        type: "audio"
        category: "audio"
        size_gb: 3
        vram_gb: 4
        description: "MusicGen Small - Fast music generation"
        essential: false

      - name: "facebook/musicgen-medium"
        type: "audio"
        category: "audio"
        size_gb: 11
        vram_gb: 8
        description: "MusicGen Medium - Balanced quality"
        essential: true

      - name: "facebook/musicgen-large"
        type: "audio"
        category: "audio"
        size_gb: 22
        vram_gb: 16
        description: "MusicGen Large - Highest quality"
        essential: false

    # ComfyUI Supporting Models (CLIP, IP-Adapter, ControlNet)
    comfyui_support_models:
      # CLIP Vision Models
      - name: "openai/clip-vit-large-patch14"
        type: "clip_vision"
        category: "support"
        size_gb: 2
        description: "CLIP H - For SD 1.5 IP-Adapter"
        essential: true
        target_dir: "clip_vision"

      - name: "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
        type: "clip_vision"
        category: "support"
        size_gb: 7
        description: "CLIP G - For SDXL IP-Adapter"
        essential: true
        target_dir: "clip_vision"

      - name: "google/siglip-so400m-patch14-384"
        type: "clip_vision"
        category: "support"
        size_gb: 2
        description: "SigLIP - For FLUX models"
        essential: true
        target_dir: "clip_vision"

    # ComfyUI Custom Nodes - Essential Extensions
    comfyui_custom_nodes:
      # ComfyUI Manager - Must have
      - name: "ComfyUI-Manager"
        repo: "https://github.com/ltdrdata/ComfyUI-Manager.git"
        category: "manager"
        description: "Install/manage custom nodes and models"
        essential: true

      # Video Generation Nodes
      - name: "ComfyUI-VideoHelperSuite"
        repo: "https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git"
        category: "video"
        description: "Video operations and processing"
        essential: true

      - name: "ComfyUI-AnimateDiff-Evolved"
        repo: "https://github.com/Kosinkadink/ComfyUI-AnimateDiff-Evolved.git"
        category: "video"
        description: "AnimateDiff for video generation"
        essential: true

      - name: "ComfyUI-CogVideoXWrapper"
        repo: "https://github.com/kijai/ComfyUI-CogVideoXWrapper.git"
        category: "video"
        description: "CogVideoX integration"
        essential: false

      # Image Enhancement Nodes
      - name: "ComfyUI_IPAdapter_plus"
        repo: "https://github.com/cubiq/ComfyUI_IPAdapter_plus.git"
        category: "image"
        description: "IP-Adapter for style transfer"
        essential: true

      - name: "ComfyUI-Impact-Pack"
        repo: "https://github.com/ltdrdata/ComfyUI-Impact-Pack.git"
        category: "image"
        description: "Auto face enhancement, detailer"
        essential: true

      - name: "Comfyui-Inspire-Pack"
        repo: "https://github.com/ltdrdata/ComfyUI-Inspire-Pack.git"
        category: "image"
        description: "Additional inspiration tools"
        essential: false

      # Audio Generation Nodes
      - name: "comfyui-sound-lab"
        repo: "https://github.com/eigenpunk/comfyui-sound-lab.git"
        category: "audio"
        description: "MusicGen and Stable Audio integration"
        essential: true

      # Utility Nodes
      - name: "ComfyUI-Advanced-ControlNet"
        repo: "https://github.com/Kosinkadink/ComfyUI-Advanced-ControlNet.git"
        category: "control"
        description: "Advanced ControlNet features"
        essential: false

      - name: "ComfyUI-3D-Pack"
        repo: "https://github.com/MrForExample/ComfyUI-3D-Pack.git"
        category: "3d"
        description: "3D asset generation"
        essential: false

    # Service configuration
    services:
      - name: orchestrator
        port: 9000
        script: model-orchestrator/orchestrator_subprocess.py
      - name: vllm
        port: 8001
        script: models/vllm/server.py
      - name: comfyui
        port: 8188
        script: models/comfyui/start.sh

  tasks:
    #
    # Base System Setup
    #
    - name: Base system packages
      tags: [base, always]
      block:
        - name: Check GPU availability
          shell: nvidia-smi
          register: nvidia_check
          changed_when: false
          failed_when: nvidia_check.rc != 0

        - name: Display GPU information
          debug:
            msg: "{{ nvidia_check.stdout_lines }}"

        - name: Ensure workspace directory exists
          file:
            path: "{{ workspace_dir }}"
            state: directory
            mode: '0755'

        - name: Update apt cache
          apt:
            update_cache: yes
            cache_valid_time: 3600
          become: true

        - name: Install base system packages
          apt:
            name:
              - build-essential
              - python3-dev
              - python3-pip
              - python3-venv
              - git
              - curl
              - wget
              - vim
              - htop
              - tmux
              - net-tools
            state: present
          become: true

    #
    # Python Environment Setup
    #
    - name: Python environment setup
      tags: [python]
      block:
        - name: Upgrade pip
          pip:
            name: pip
            version: "{{ pip_version }}"
            executable: pip3
            extra_args: --upgrade
          become: true

        - name: Install core Python packages
          pip:
            requirements: "{{ ai_dir }}/core/requirements.txt"
            executable: pip3
          become: true

    #
    # Install Model Dependencies
    #
    - name: Install model dependencies
      tags: [dependencies]
      block:
        - name: Install vLLM dependencies
          pip:
            requirements: "{{ ai_dir }}/models/vllm/requirements.txt"
            executable: pip3
          become: true

    #
    # ComfyUI Installation
    #
    - name: Install and configure ComfyUI
      tags: [comfyui]
      block:
        - name: Check if ComfyUI is already installed
          stat:
            path: "{{ workspace_dir }}/ComfyUI"
          register: comfyui_check

        - name: Clone ComfyUI repository
          git:
            repo: https://github.com/comfyanonymous/ComfyUI.git
            dest: "{{ workspace_dir }}/ComfyUI"
            version: master
            update: yes
          when: not comfyui_check.stat.exists

        - name: Install ComfyUI dependencies
          pip:
            requirements: "{{ workspace_dir }}/ComfyUI/requirements.txt"
            executable: pip3
          become: true

        - name: Install additional ComfyUI dependencies
          pip:
            requirements: "{{ ai_dir }}/models/comfyui/requirements.txt"
            executable: pip3
          become: true

        - name: Create ComfyUI models directory structure
          file:
            path: "{{ workspace_dir }}/ComfyUI/models/{{ item }}"
            state: directory
            mode: '0755'
          loop:
            # Image Model Directories
            - checkpoints
            - unet
            - vae
            - loras
            - clip
            - clip_vision
            - controlnet
            - ipadapter
            - embeddings
            - upscale_models
            # Video Model Directories
            - video_models
            - animatediff_models
            - animatediff_motion_lora
            # Audio Model Directories
            - audio_models
            # Utility Directories
            - configs
            - custom_nodes

        - name: Create symlink for Flux model in ComfyUI
          file:
            src: "{{ cache_dir }}"
            dest: "{{ workspace_dir }}/ComfyUI/models/huggingface_cache"
            state: link
          ignore_errors: yes

        - name: Make ComfyUI start script executable
          file:
            path: "{{ ai_dir }}/models/comfyui/start.sh"
            mode: '0755'

        - name: Display ComfyUI setup summary
          debug:
            msg: |
              ✓ ComfyUI installed successfully!

              Directory: {{ workspace_dir }}/ComfyUI
              Port: 8188
              HuggingFace Cache: {{ cache_dir }}

              To start ComfyUI:
              bash {{ ai_dir }}/models/comfyui/start.sh

              Or manually:
              cd {{ workspace_dir }}/ComfyUI && python3 main.py --listen 0.0.0.0 --port 8188

              Access: http://localhost:8188

    #
    # ComfyUI Custom Nodes Installation
    #
    - name: Install ComfyUI Custom Nodes
      tags: [comfyui-nodes, comfyui-essential]
      block:
        - name: Install essential ComfyUI custom nodes
          git:
            repo: "{{ item.repo }}"
            dest: "{{ workspace_dir }}/ComfyUI/custom_nodes/{{ item.name }}"
            version: main
            update: yes
          loop: "{{ comfyui_custom_nodes | selectattr('essential', 'equalto', true) | list }}"
          loop_control:
            label: "{{ item.name }}"
          ignore_errors: yes

        - name: Install custom node dependencies
          shell: |
            if [ -f "{{ workspace_dir }}/ComfyUI/custom_nodes/{{ item.name }}/requirements.txt" ]; then
              pip3 install -r "{{ workspace_dir }}/ComfyUI/custom_nodes/{{ item.name }}/requirements.txt"
            fi
          loop: "{{ comfyui_custom_nodes | selectattr('essential', 'equalto', true) | list }}"
          loop_control:
            label: "{{ item.name }}"
          become: true
          ignore_errors: yes

        - name: Display custom nodes installation summary
          debug:
            msg: |
              ✓ Custom nodes installed successfully!

              Essential nodes:
              {% for node in comfyui_custom_nodes | selectattr('essential', 'equalto', true) | list %}
              - {{ node.name }}: {{ node.description }}
              {% endfor %}

              To install ALL nodes (including optional):
              ansible-playbook playbook.yml --tags comfyui-nodes-all

    #
    # ComfyUI Image Models Download
    #
    - name: Download ComfyUI Image Generation Models
      tags: [comfyui-models-image, comfyui-models-all, comfyui-essential]
      block:
        - name: Download essential image generation models
          shell: |
            python3 -c "
            from huggingface_hub import snapshot_download
            import os
            os.environ['HF_HOME'] = '{{ cache_dir }}'
            print('Downloading {{ item.name }}...')
            snapshot_download(
                repo_id='{{ item.name }}',
                cache_dir='{{ cache_dir }}',
                token=os.environ.get('HF_TOKEN')
            )
            print('Completed {{ item.name }}')
            "
          environment:
            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
            HF_HOME: "{{ cache_dir }}"
          loop: "{{ comfyui_image_models | selectattr('essential', 'equalto', true) | list }}"
          loop_control:
            label: "{{ item.name }} ({{ item.size_gb }}GB)"
          async: 3600
          poll: 30
          ignore_errors: yes

        - name: Display image models summary
          debug:
            msg: |
              Image generation models downloaded:
              {% for model in comfyui_image_models | selectattr('essential', 'equalto', true) | list %}
              - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB, {{ model.vram_gb }}GB VRAM)
              {% endfor %}

              Total size: ~{{ (comfyui_image_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) }}GB

    #
    # ComfyUI Video Models Download
    #
    - name: Download ComfyUI Video Generation Models
      tags: [comfyui-models-video, comfyui-models-all]
      block:
        - name: Download essential video generation models
          shell: |
            python3 -c "
            from huggingface_hub import snapshot_download
            import os
            os.environ['HF_HOME'] = '{{ cache_dir }}'
            print('Downloading {{ item.name }}...')
            snapshot_download(
                repo_id='{{ item.name }}',
                cache_dir='{{ cache_dir }}',
                token=os.environ.get('HF_TOKEN')
            )
            print('Completed {{ item.name }}')
            "
          environment:
            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
            HF_HOME: "{{ cache_dir }}"
          loop: "{{ comfyui_video_models | selectattr('essential', 'equalto', true) | list }}"
          loop_control:
            label: "{{ item.name }} ({{ item.size_gb }}GB)"
          async: 3600
          poll: 30
          ignore_errors: yes

        - name: Display video models summary
          debug:
            msg: |
              Video generation models downloaded:
              {% for model in comfyui_video_models | selectattr('essential', 'equalto', true) | list %}
              - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB, {{ model.vram_gb }}GB VRAM)
              {% endfor %}

    #
    # ComfyUI Audio Models Download
    #
    - name: Download ComfyUI Audio Generation Models
      tags: [comfyui-models-audio, comfyui-models-all]
      block:
        - name: Download essential audio generation models
          shell: |
            python3 -c "
            from huggingface_hub import snapshot_download
            import os
            os.environ['HF_HOME'] = '{{ cache_dir }}'
            print('Downloading {{ item.name }}...')
            snapshot_download(
                repo_id='{{ item.name }}',
                cache_dir='{{ cache_dir }}',
                token=os.environ.get('HF_TOKEN')
            )
            print('Completed {{ item.name }}')
            "
          environment:
            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
            HF_HOME: "{{ cache_dir }}"
          loop: "{{ comfyui_audio_models | selectattr('essential', 'equalto', true) | list }}"
          loop_control:
            label: "{{ item.name }} ({{ item.size_gb }}GB)"
          async: 3600
          poll: 30
          ignore_errors: yes

        - name: Display audio models summary
          debug:
            msg: |
              Audio generation models downloaded:
              {% for model in comfyui_audio_models | selectattr('essential', 'equalto', true) | list %}
              - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB)
              {% endfor %}

    #
    # ComfyUI Support Models Download (CLIP, IP-Adapter, ControlNet)
    #
    - name: Download ComfyUI Support Models
      tags: [comfyui-models-support, comfyui-models-all, comfyui-essential]
      block:
        - name: Download essential support models (CLIP, IP-Adapter)
          shell: |
            python3 -c "
            from huggingface_hub import snapshot_download
            import os
            os.environ['HF_HOME'] = '{{ cache_dir }}'
            print('Downloading {{ item.name }}...')
            snapshot_download(
                repo_id='{{ item.name }}',
                cache_dir='{{ cache_dir }}',
                token=os.environ.get('HF_TOKEN')
            )
            print('Completed {{ item.name }}')
            "
          environment:
            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
            HF_HOME: "{{ cache_dir }}"
          loop: "{{ comfyui_support_models | selectattr('essential', 'equalto', true) | list }}"
          loop_control:
            label: "{{ item.name }} ({{ item.size_gb }}GB)"
          async: 1800
          poll: 30
          ignore_errors: yes

        - name: Display support models summary
          debug:
            msg: |
              Support models downloaded:
              {% for model in comfyui_support_models | selectattr('essential', 'equalto', true) | list %}
              - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB)
              {% endfor %}

              Total ComfyUI models cache: ~{{
                (comfyui_image_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) +
                (comfyui_video_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) +
                (comfyui_audio_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) +
                (comfyui_support_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb'))
              }}GB

    #
    # Download AI Models
    #
    - name: Download AI models
      tags: [models]
      block:
        - name: Create model cache directories
          file:
            path: "{{ item }}"
            state: directory
            mode: '0755'
          loop:
            - "{{ cache_dir }}"
            - "{{ models_dir }}/flux"
            - "{{ models_dir }}/musicgen"

        - name: Check if models are already cached
          stat:
            path: "{{ cache_dir }}/models--{{ item.value.name | regex_replace('/', '--') }}"
          register: model_cache_check
          loop: "{{ models | dict2items }}"
          loop_control:
            label: "{{ item.key }}"

        - name: Download Qwen 2.5 7B model (14GB, ~15 minutes)
          shell: |
            python3 -c "
            from transformers import AutoTokenizer, AutoModelForCausalLM
            import os
            os.environ['HF_HOME'] = '{{ cache_dir }}'
            print('Downloading Qwen 2.5 7B Instruct...')
            AutoTokenizer.from_pretrained('{{ models.vllm.name }}')
            print('Tokenizer downloaded successfully')
            "
          environment:
            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
            HF_HOME: "{{ cache_dir }}"
          when: not (model_cache_check.results[0].stat.exists | default(false))
          register: vllm_download
          async: 1800  # 30 minutes timeout
          poll: 30

        - name: Download Flux.1 Schnell model (12GB, ~12 minutes)
          shell: |
            python3 -c "
            from diffusers import FluxPipeline
            import os
            os.environ['HF_HOME'] = '{{ cache_dir }}'
            print('Downloading Flux.1 Schnell...')
            FluxPipeline.from_pretrained(
                '{{ models.flux.name }}',
                cache_dir='{{ cache_dir }}'
            )
            print('Flux.1 downloaded successfully')
            "
          environment:
            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
            HF_HOME: "{{ cache_dir }}"
          when: not (model_cache_check.results[1].stat.exists | default(false))
          register: flux_download
          async: 1200  # 20 minutes timeout
          poll: 30

        - name: Download MusicGen Medium model (11GB, ~10 minutes)
          shell: |
            python3 -c "
            from audiocraft.models import MusicGen
            import os
            os.environ['HF_HOME'] = '{{ cache_dir }}'
            print('Downloading MusicGen Medium...')
            MusicGen.get_pretrained('{{ models.musicgen.name }}')
            print('MusicGen downloaded successfully')
            "
          environment:
            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
            HF_HOME: "{{ cache_dir }}"
          when: not (model_cache_check.results[2].stat.exists | default(false))
          register: musicgen_download
          async: 900  # 15 minutes timeout
          poll: 30

        - name: Display model download summary
          debug:
            msg: |
              Model downloads completed:
              - Qwen 2.5 7B: {{ 'Downloaded' if vllm_download.changed | default(false) else 'Already cached' }}
              - Flux.1 Schnell: {{ 'Downloaded' if flux_download.changed | default(false) else 'Already cached' }}
              - MusicGen Medium: {{ 'Downloaded' if musicgen_download.changed | default(false) else 'Already cached' }}
              Total cache size: ~37GB

    #
    # Tailscale VPN
    #
    - name: Install and configure Tailscale
      tags: [tailscale]
      block:
        - name: Check if Tailscale is installed
          command: which tailscale
          register: tailscale_check
          changed_when: false
          failed_when: false

        - name: Install Tailscale
          shell: curl -fsSL https://tailscale.com/install.sh | sh
          become: true
          when: tailscale_check.rc != 0

        - name: Display Tailscale setup instructions
          debug:
            msg: |
              Tailscale installed. To connect:
              1. Start tailscaled: tailscaled --tun=userspace-networking --socks5-server=localhost:1055 &
              2. Authenticate: tailscale up --advertise-tags=tag:gpu
              3. Get IP: tailscale ip -4

              Note: Authentication requires manual intervention via provided URL

    #
    # Systemd Services (Optional)
    #
    - name: Configure systemd services
      tags: [systemd, never]  # never = skip by default
      block:
        - name: Create systemd service for orchestrator
          template:
            src: "{{ ai_dir }}/systemd/ai-orchestrator.service.j2"
            dest: /etc/systemd/system/ai-orchestrator.service
            mode: '0644'
          become: true

        - name: Reload systemd daemon
          systemd:
            daemon_reload: yes
          become: true

        - name: Enable orchestrator service
          systemd:
            name: ai-orchestrator
            enabled: yes
          become: true

        - name: Display systemd instructions
          debug:
            msg: |
              Systemd service configured. To manage:
              - Start: sudo systemctl start ai-orchestrator
              - Stop: sudo systemctl stop ai-orchestrator
              - Status: sudo systemctl status ai-orchestrator
              - Logs: sudo journalctl -u ai-orchestrator -f

    #
    # Validation
    #
    - name: Validate installation
      tags: [validate, never]  # never = skip by default, run explicitly
      block:
        - name: Check Python packages
          shell: pip3 list | grep -E "(fastapi|uvicorn|torch|vllm|diffusers|audiocraft)"
          register: pip_check
          changed_when: false

        - name: Display installed packages
          debug:
            msg: "{{ pip_check.stdout_lines }}"

        - name: Check GPU memory
          shell: nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits
          register: gpu_memory
          changed_when: false

        - name: Display GPU memory
          debug:
            msg: "Free GPU memory: {{ gpu_memory.stdout }} MB"

        - name: Check cached models
          shell: du -sh {{ cache_dir }}
          register: cache_size
          changed_when: false

        - name: Display cache information
          debug:
            msg: "Model cache size: {{ cache_size.stdout }}"

        - name: Verify service scripts are executable
          file:
            path: "{{ ai_dir }}/{{ item.script }}"
            mode: '0755'
          loop: "{{ services }}"

        - name: Display validation summary
          debug:
            msg: |
              ✓ Installation validated successfully!

              Next steps:
              1. Start orchestrator: python3 {{ ai_dir }}/model-orchestrator/orchestrator_subprocess.py
              2. Test endpoint: curl http://localhost:9000/health
              3. Configure LiteLLM on VPS to connect via Tailscale

              Services:
              {% for service in services %}
              - {{ service.name }}: http://localhost:{{ service.port }}
              {% endfor %}

    #
    # Cleanup for Template Creation
    #
    - name: Cleanup for template creation
      tags: [cleanup, never]  # never = skip by default, run explicitly
      block:
        - name: Remove sensitive files
          file:
            path: "{{ item }}"
            state: absent
          loop:
            - "{{ ai_dir }}/.env"
            - /root/.ssh/known_hosts
            - /root/.bash_history
            - /root/.python_history

        - name: Clear system logs
          shell: find /var/log -type f -name "*.log" -delete
          become: true
          ignore_errors: yes

        - name: Create template version marker
          copy:
            dest: "{{ workspace_dir }}/TEMPLATE_VERSION"
            content: |
              RunPod Multi-Modal AI Template (Process-Based Architecture)
              Version: 2.0
              Created: {{ ansible_date_time.iso8601 }}

              Components:
              - Python {{ python_version }}
              - Orchestrator (process-based)
              - Text Generation (vLLM + Qwen 2.5 7B)
              - Image Generation (Flux.1 Schnell)
              - Music Generation (MusicGen Medium)

              Models Cached: ~37GB
              Architecture: No Docker, direct Python execution

              Deployment:
              1. Create .env file with HF_TOKEN
              2. Run: python3 {{ ai_dir }}/model-orchestrator/orchestrator_subprocess.py
              3. Access: http://localhost:9000/health

        - name: Display template creation instructions
          debug:
            msg: |
              Template prepared successfully!

              Next steps in RunPod dashboard:
              1. Stop all running services
              2. Go to My Pods → Select this pod → ⋮ → Save as Template
              3. Name: multi-modal-ai-process-v2.0
              4. Description: Process-based multi-modal AI (text/image/music)
              5. Save and test deployment from template

              Template enables 2-3 minute deployments instead of 60+ minutes!