chore: remove Ansible infrastructure

Remove Ansible playbook and configuration files as all infrastructure setup has been migrated to arty bash scripts. Deleted: - playbook.yml (~950 lines) - inventory.yml - ansible.cfg All functionality now available via arty scripts: - arty run install/essential - arty run setup/system-packages - arty run setup/comfyui-base - arty run setup/comfyui-nodes - arty run setup/supervisor - arty run setup/tailscale Model downloads will be handled separately (TBD). 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-22 12:23:11 +01:00
parent d5965ce0c4
commit 16226c7b39
3 changed files with 0 additions and 1010 deletions
--- a/ansible.cfg
+++ b/ansible.cfg
@@ -1,33 +0,0 @@
 [defaults]
 # Ansible configuration for RunPod deployment
 # Inventory
 inventory = inventory.yml
 # Disable host key checking (RunPod instances may change)
 host_key_checking = False
 # Display settings
 stdout_callback = yaml
 bin_ansible_callbacks = True
 # Performance
 forks = 5
 gathering = smart
 fact_caching = jsonfile
 fact_caching_connection = /tmp/ansible_facts
 fact_caching_timeout = 86400
 # Logging
 log_path = /tmp/ansible-runpod.log
 # Privilege escalation
 become_method = sudo
 become_ask_pass = False
 # SSH settings
 timeout = 30
 transport = local
 # Retry files
 retry_files_enabled = False
--- a/inventory.yml
+++ b/inventory.yml
@@ -1,26 +0,0 @@
 ---
 # Ansible inventory for RunPod deployment
 #
 # This inventory defines localhost as the target for RunPod instances.
 # All tasks run locally on the RunPod GPU server.
 all:
  hosts:
    localhost:
      ansible_connection: local
      ansible_python_interpreter: /usr/bin/python3
  vars:
    # Workspace configuration
    workspace_dir: /workspace
    ai_dir: /workspace/ai
    # Environment variables (loaded from .env if present)
    hf_token: "{{ lookup('env', 'HF_TOKEN') }}"
    tailscale_key: "{{ lookup('env', 'TAILSCALE_AUTH_KEY') | default('') }}"
    # GPU configuration
    gpu_memory_utilization: 0.85
    # Model cache
    huggingface_cache: /workspace/huggingface_cache
--- a/playbook.yml
+++ b/playbook.yml
@@ -1,951 +0,0 @@
 ---
 #
 # RunPod AI Infrastructure Ansible Playbook
 #
 # This playbook provisions a RunPod GPU instance with multi-modal AI services.
 # It replaces all bash scripts with reproducible Ansible tasks.
 #
 # Usage:
 #   ansible-playbook playbook.yml                    # Full deployment
 #   ansible-playbook playbook.yml --tags base        # Install system packages
 #   ansible-playbook playbook.yml --tags python      # Setup Python environment
 #   ansible-playbook playbook.yml --tags models      # Download models only
 #   ansible-playbook playbook.yml --tags validate    # Validate installation
 #
 # Tags:
 #   base                   - System packages and dependencies
 #   python                 - Python environment setup
 #   dependencies           - Install Python packages
 #   models                 - Download AI models (vLLM, Flux, MusicGen)
 #   comfyui                - Install and configure ComfyUI base
 #   comfyui-models-image   - Download ComfyUI image generation models
 #   comfyui-models-video   - Download ComfyUI video generation models
 #   comfyui-models-audio   - Download ComfyUI audio generation models
 #   comfyui-models-support - Download CLIP, IP-Adapter, ControlNet models
 #   comfyui-models-all     - Download all ComfyUI models
 #   comfyui-nodes          - Install essential custom nodes
 #   comfyui-essential      - Quick setup (ComfyUI + essential models only)
 #   tailscale              - Install and configure Tailscale
 #   supervisor             - Install and configure Supervisor process manager
 #   systemd                - Configure systemd services
 #   validate               - Health checks and validation
 #
 - name: Provision RunPod GPU Instance for AI Services
  hosts: localhost
  connection: local
  become: false
  vars:
    # Paths
    workspace_dir: /workspace
    ai_dir: "{{ workspace_dir }}/ai"
    cache_dir: "{{ workspace_dir }}/huggingface_cache"
    models_dir: "{{ workspace_dir }}/models"
    # Python configuration
    python_version: "3.10"
    pip_version: "23.3.1"
    # Model configuration
    models:
      vllm:
        name: "Qwen/Qwen2.5-7B-Instruct"
        size_gb: 14
      flux:
        name: "black-forest-labs/FLUX.1-schnell"
        size_gb: 12
      musicgen:
        name: "facebook/musicgen-medium"
        size_gb: 11
    # ========================================================================
    # ComfyUI Models - Comprehensive List for 24GB GPU
    # ========================================================================
    # ComfyUI Image Generation Models
    comfyui_image_models:
      # FLUX Models (Black Forest Labs) - State of the art 2025
      - name: "black-forest-labs/FLUX.1-schnell"
        type: "checkpoint"
        category: "image"
        size_gb: 23
        vram_gb: 23
        format: "fp16"
        description: "FLUX.1 Schnell - Fast 4-step inference"
        essential: true
      - name: "black-forest-labs/FLUX.1-dev"
        type: "checkpoint"
        category: "image"
        size_gb: 23
        vram_gb: 23
        format: "fp16"
        description: "FLUX.1 Dev - Balanced quality/speed"
        essential: false
      # SDXL Models - Industry standard
      - name: "stabilityai/stable-diffusion-xl-base-1.0"
        type: "checkpoint"
        category: "image"
        size_gb: 7
        vram_gb: 12
        format: "fp16"
        description: "SDXL 1.0 Base - 1024x1024 native resolution"
        essential: true
      - name: "stabilityai/stable-diffusion-xl-refiner-1.0"
        type: "checkpoint"
        category: "image"
        size_gb: 6
        vram_gb: 12
        format: "fp16"
        description: "SDXL Refiner - Enhances base output"
        essential: false
      # SD 3.5 Models - Latest Stability AI
      - name: "stabilityai/stable-diffusion-3.5-large"
        type: "checkpoint"
        category: "image"
        size_gb: 18
        vram_gb: 20
        format: "fp16"
        description: "SD 3.5 Large - MMDiT architecture"
        essential: false
    # ComfyUI Video Generation Models
    comfyui_video_models:
      # CogVideoX - Text-to-video
      - name: "THUDM/CogVideoX-5b"
        type: "video"
        category: "video"
        size_gb: 20
        vram_gb: 12  # with optimizations
        description: "CogVideoX 5B - Professional text-to-video"
        essential: true
      # Stable Video Diffusion
      - name: "stabilityai/stable-video-diffusion-img2vid"
        type: "video"
        category: "video"
        size_gb: 8
        vram_gb: 16
        description: "SVD - 14 frame image-to-video"
        essential: true
      - name: "stabilityai/stable-video-diffusion-img2vid-xt"
        type: "video"
        category: "video"
        size_gb: 8
        vram_gb: 20
        description: "SVD-XT - 25 frame image-to-video"
        essential: false
    # ComfyUI Audio Generation Models
    comfyui_audio_models:
      - name: "facebook/musicgen-small"
        type: "audio"
        category: "audio"
        size_gb: 3
        vram_gb: 4
        description: "MusicGen Small - Fast music generation"
        essential: false
      - name: "facebook/musicgen-medium"
        type: "audio"
        category: "audio"
        size_gb: 11
        vram_gb: 8
        description: "MusicGen Medium - Balanced quality"
        essential: true
      - name: "facebook/musicgen-large"
        type: "audio"
        category: "audio"
        size_gb: 22
        vram_gb: 16
        description: "MusicGen Large - Highest quality"
        essential: false
    # ComfyUI Supporting Models (CLIP, IP-Adapter, ControlNet)
    comfyui_support_models:
      # CLIP Vision Models
      - name: "openai/clip-vit-large-patch14"
        type: "clip_vision"
        category: "support"
        size_gb: 2
        description: "CLIP H - For SD 1.5 IP-Adapter"
        essential: true
        target_dir: "clip_vision"
      - name: "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
        type: "clip_vision"
        category: "support"
        size_gb: 7
        description: "CLIP G - For SDXL IP-Adapter"
        essential: true
        target_dir: "clip_vision"
      - name: "google/siglip-so400m-patch14-384"
        type: "clip_vision"
        category: "support"
        size_gb: 2
        description: "SigLIP - For FLUX models"
        essential: true
        target_dir: "clip_vision"
    # ComfyUI Custom Nodes - Essential Extensions
    comfyui_custom_nodes:
      # ComfyUI Manager - Must have
      - name: "ComfyUI-Manager"
        repo: "https://github.com/ltdrdata/ComfyUI-Manager.git"
        category: "manager"
        description: "Install/manage custom nodes and models"
        essential: true
      # Video Generation Nodes
      - name: "ComfyUI-VideoHelperSuite"
        repo: "https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git"
        category: "video"
        description: "Video operations and processing"
        essential: true
      - name: "ComfyUI-AnimateDiff-Evolved"
        repo: "https://github.com/Kosinkadink/ComfyUI-AnimateDiff-Evolved.git"
        category: "video"
        description: "AnimateDiff for video generation"
        essential: true
      - name: "ComfyUI-CogVideoXWrapper"
        repo: "https://github.com/kijai/ComfyUI-CogVideoXWrapper.git"
        category: "video"
        description: "CogVideoX integration"
        essential: false
      # Image Enhancement Nodes
      - name: "ComfyUI_IPAdapter_plus"
        repo: "https://github.com/cubiq/ComfyUI_IPAdapter_plus.git"
        category: "image"
        description: "IP-Adapter for style transfer"
        essential: true
      - name: "ComfyUI-Impact-Pack"
        repo: "https://github.com/ltdrdata/ComfyUI-Impact-Pack.git"
        category: "image"
        description: "Auto face enhancement, detailer"
        essential: true
      - name: "Comfyui-Inspire-Pack"
        repo: "https://github.com/ltdrdata/ComfyUI-Inspire-Pack.git"
        category: "image"
        description: "Additional inspiration tools"
        essential: false
      # Audio Generation Nodes
      - name: "comfyui-sound-lab"
        repo: "https://github.com/eigenpunk/comfyui-sound-lab.git"
        category: "audio"
        description: "MusicGen and Stable Audio integration"
        essential: true
      # Utility Nodes
      - name: "ComfyUI-Advanced-ControlNet"
        repo: "https://github.com/Kosinkadink/ComfyUI-Advanced-ControlNet.git"
        category: "control"
        description: "Advanced ControlNet features"
        essential: false
      - name: "ComfyUI-3D-Pack"
        repo: "https://github.com/MrForExample/ComfyUI-3D-Pack.git"
        category: "3d"
        description: "3D asset generation"
        essential: false
    # Service configuration
    services:
      - name: orchestrator
        port: 9000
        script: model-orchestrator/orchestrator_subprocess.py
      - name: vllm
        port: 8001
        script: models/vllm/server.py
      - name: comfyui
        port: 8188
        script: models/comfyui/start.sh
  tasks:
    #
    # Base System Setup
    #
    - name: Base system packages
      tags: [base, always]
      block:
        - name: Check GPU availability
          shell: nvidia-smi
          register: nvidia_check
          changed_when: false
          failed_when: nvidia_check.rc != 0
        - name: Display GPU information
          debug:
            msg: "{{ nvidia_check.stdout_lines }}"
        - name: Ensure workspace directory exists
          file:
            path: "{{ workspace_dir }}"
            state: directory
            mode: '0755'
        - name: Update apt cache
          apt:
            update_cache: yes
            cache_valid_time: 3600
          become: true
        - name: Install base system packages
          apt:
            name:
              - build-essential
              - python3-dev
              - python3-pip
              - python3-venv
              - git
              - curl
              - wget
              - vim
              - htop
              - tmux
              - net-tools
            state: present
          become: true
    #
    # Python Environment Setup
    #
    - name: Python environment setup
      tags: [python]
      block:
        - name: Upgrade pip
          pip:
            name: pip
            version: "{{ pip_version }}"
            executable: pip3
            extra_args: --upgrade
          become: true
        - name: Install core Python packages
          pip:
            requirements: "{{ ai_dir }}/core/requirements.txt"
            executable: pip3
          become: true
    #
    # Install Model Dependencies
    #
    - name: Install model dependencies
      tags: [dependencies]
      block:
        - name: Install vLLM dependencies
          pip:
            requirements: "{{ ai_dir }}/models/vllm/requirements.txt"
            executable: pip3
          become: true
    #
    # ComfyUI Installation
    #
    - name: Install and configure ComfyUI
      tags: [comfyui]
      block:
        - name: Check if ComfyUI is already installed
          stat:
            path: "{{ workspace_dir }}/ComfyUI"
          register: comfyui_check
        - name: Clone ComfyUI repository
          git:
            repo: https://github.com/comfyanonymous/ComfyUI.git
            dest: "{{ workspace_dir }}/ComfyUI"
            version: master
            update: yes
          when: not comfyui_check.stat.exists
        - name: Install ComfyUI dependencies
          pip:
            requirements: "{{ workspace_dir }}/ComfyUI/requirements.txt"
            executable: pip3
          become: true
        - name: Install additional ComfyUI dependencies
          pip:
            requirements: "{{ ai_dir }}/models/comfyui/requirements.txt"
            executable: pip3
          become: true
        - name: Create ComfyUI models directory structure
          file:
            path: "{{ workspace_dir }}/ComfyUI/models/{{ item }}"
            state: directory
            mode: '0755'
          loop:
            # Image Model Directories
            - checkpoints
            - unet
            - vae
            - loras
            - clip
            - clip_vision
            - controlnet
            - ipadapter
            - embeddings
            - upscale_models
            # Video Model Directories
            - video_models
            - animatediff_models
            - animatediff_motion_lora
            # Audio Model Directories
            - audio_models
            # Utility Directories
            - configs
            - custom_nodes
        - name: Create symlink for Flux model in ComfyUI
          file:
            src: "{{ cache_dir }}"
            dest: "{{ workspace_dir }}/ComfyUI/models/huggingface_cache"
            state: link
          ignore_errors: yes
        - name: Make ComfyUI start script executable
          file:
            path: "{{ ai_dir }}/models/comfyui/start.sh"
            mode: '0755'
        - name: Display ComfyUI setup summary
          debug:
            msg: |
              ✓ ComfyUI installed successfully!
              Directory: {{ workspace_dir }}/ComfyUI
              Port: 8188
              HuggingFace Cache: {{ cache_dir }}
              To start ComfyUI:
              bash {{ ai_dir }}/models/comfyui/start.sh
              Or manually:
              cd {{ workspace_dir }}/ComfyUI && python3 main.py --listen 0.0.0.0 --port 8188
              Access: http://localhost:8188
    #
    # ComfyUI Custom Nodes Installation
    #
    - name: Install ComfyUI Custom Nodes
      tags: [comfyui-nodes, comfyui-essential]
      block:
        - name: Install essential ComfyUI custom nodes
          git:
            repo: "{{ item.repo }}"
            dest: "{{ workspace_dir }}/ComfyUI/custom_nodes/{{ item.name }}"
            version: main
            update: yes
          loop: "{{ comfyui_custom_nodes | selectattr('essential', 'equalto', true) | list }}"
          loop_control:
            label: "{{ item.name }}"
          ignore_errors: yes
        - name: Install custom node dependencies
          shell: |
            if [ -f "{{ workspace_dir }}/ComfyUI/custom_nodes/{{ item.name }}/requirements.txt" ]; then
              pip3 install -r "{{ workspace_dir }}/ComfyUI/custom_nodes/{{ item.name }}/requirements.txt"
            fi
          loop: "{{ comfyui_custom_nodes | selectattr('essential', 'equalto', true) | list }}"
          loop_control:
            label: "{{ item.name }}"
          become: true
          ignore_errors: yes
        - name: Display custom nodes installation summary
          debug:
            msg: |
              ✓ Custom nodes installed successfully!
              Essential nodes:
              {% for node in comfyui_custom_nodes | selectattr('essential', 'equalto', true) | list %}
              - {{ node.name }}: {{ node.description }}
              {% endfor %}
              To install ALL nodes (including optional):
              ansible-playbook playbook.yml --tags comfyui-nodes-all
    #
    # ComfyUI Image Models Download
    #
    - name: Download ComfyUI Image Generation Models
      tags: [comfyui-models-image, comfyui-models-all, comfyui-essential]
      block:
        - name: Download essential image generation models
          shell: |
            python3 -c "
            from huggingface_hub import snapshot_download
            import os
            os.environ['HF_HOME'] = '{{ cache_dir }}'
            print('Downloading {{ item.name }}...')
            snapshot_download(
                repo_id='{{ item.name }}',
                cache_dir='{{ cache_dir }}',
                token=os.environ.get('HF_TOKEN')
            )
            print('Completed {{ item.name }}')
            "
          environment:
            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
            HF_HOME: "{{ cache_dir }}"
          loop: "{{ comfyui_image_models | selectattr('essential', 'equalto', true) | list }}"
          loop_control:
            label: "{{ item.name }} ({{ item.size_gb }}GB)"
          async: 3600
          poll: 30
          ignore_errors: yes
        - name: Display image models summary
          debug:
            msg: |
              Image generation models downloaded:
              {% for model in comfyui_image_models | selectattr('essential', 'equalto', true) | list %}
              - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB, {{ model.vram_gb }}GB VRAM)
              {% endfor %}
              Total size: ~{{ (comfyui_image_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) }}GB
    #
    # ComfyUI Video Models Download
    #
    - name: Download ComfyUI Video Generation Models
      tags: [comfyui-models-video, comfyui-models-all]
      block:
        - name: Download essential video generation models
          shell: |
            python3 -c "
            from huggingface_hub import snapshot_download
            import os
            os.environ['HF_HOME'] = '{{ cache_dir }}'
            print('Downloading {{ item.name }}...')
            snapshot_download(
                repo_id='{{ item.name }}',
                cache_dir='{{ cache_dir }}',
                token=os.environ.get('HF_TOKEN')
            )
            print('Completed {{ item.name }}')
            "
          environment:
            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
            HF_HOME: "{{ cache_dir }}"
          loop: "{{ comfyui_video_models | selectattr('essential', 'equalto', true) | list }}"
          loop_control:
            label: "{{ item.name }} ({{ item.size_gb }}GB)"
          async: 3600
          poll: 30
          ignore_errors: yes
        - name: Display video models summary
          debug:
            msg: |
              Video generation models downloaded:
              {% for model in comfyui_video_models | selectattr('essential', 'equalto', true) | list %}
              - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB, {{ model.vram_gb }}GB VRAM)
              {% endfor %}
    #
    # ComfyUI Audio Models Download
    #
    - name: Download ComfyUI Audio Generation Models
      tags: [comfyui-models-audio, comfyui-models-all]
      block:
        - name: Download essential audio generation models
          shell: |
            python3 -c "
            from huggingface_hub import snapshot_download
            import os
            os.environ['HF_HOME'] = '{{ cache_dir }}'
            print('Downloading {{ item.name }}...')
            snapshot_download(
                repo_id='{{ item.name }}',
                cache_dir='{{ cache_dir }}',
                token=os.environ.get('HF_TOKEN')
            )
            print('Completed {{ item.name }}')
            "
          environment:
            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
            HF_HOME: "{{ cache_dir }}"
          loop: "{{ comfyui_audio_models | selectattr('essential', 'equalto', true) | list }}"
          loop_control:
            label: "{{ item.name }} ({{ item.size_gb }}GB)"
          async: 3600
          poll: 30
          ignore_errors: yes
        - name: Display audio models summary
          debug:
            msg: |
              Audio generation models downloaded:
              {% for model in comfyui_audio_models | selectattr('essential', 'equalto', true) | list %}
              - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB)
              {% endfor %}
    #
    # ComfyUI Support Models Download (CLIP, IP-Adapter, ControlNet)
    #
    - name: Download ComfyUI Support Models
      tags: [comfyui-models-support, comfyui-models-all, comfyui-essential]
      block:
        - name: Download essential support models (CLIP, IP-Adapter)
          shell: |
            python3 -c "
            from huggingface_hub import snapshot_download
            import os
            os.environ['HF_HOME'] = '{{ cache_dir }}'
            print('Downloading {{ item.name }}...')
            snapshot_download(
                repo_id='{{ item.name }}',
                cache_dir='{{ cache_dir }}',
                token=os.environ.get('HF_TOKEN')
            )
            print('Completed {{ item.name }}')
            "
          environment:
            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
            HF_HOME: "{{ cache_dir }}"
          loop: "{{ comfyui_support_models | selectattr('essential', 'equalto', true) | list }}"
          loop_control:
            label: "{{ item.name }} ({{ item.size_gb }}GB)"
          async: 1800
          poll: 30
          ignore_errors: yes
        - name: Display support models summary
          debug:
            msg: |
              Support models downloaded:
              {% for model in comfyui_support_models | selectattr('essential', 'equalto', true) | list %}
              - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB)
              {% endfor %}
              Total ComfyUI models cache: ~{{
                (comfyui_image_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) +
                (comfyui_video_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) +
                (comfyui_audio_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) +
                (comfyui_support_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb'))
              }}GB
    #
    # Download AI Models
    #
    - name: Download AI models
      tags: [models]
      block:
        - name: Create model cache directories
          file:
            path: "{{ item }}"
            state: directory
            mode: '0755'
          loop:
            - "{{ cache_dir }}"
            - "{{ models_dir }}/flux"
            - "{{ models_dir }}/musicgen"
        - name: Check if models are already cached
          stat:
            path: "{{ cache_dir }}/models--{{ item.value.name | regex_replace('/', '--') }}"
          register: model_cache_check
          loop: "{{ models | dict2items }}"
          loop_control:
            label: "{{ item.key }}"
        - name: Download Qwen 2.5 7B model (14GB, ~15 minutes)
          shell: |
            python3 -c "
            from transformers import AutoTokenizer, AutoModelForCausalLM
            import os
            os.environ['HF_HOME'] = '{{ cache_dir }}'
            print('Downloading Qwen 2.5 7B Instruct...')
            AutoTokenizer.from_pretrained('{{ models.vllm.name }}')
            print('Tokenizer downloaded successfully')
            "
          environment:
            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
            HF_HOME: "{{ cache_dir }}"
          when: not (model_cache_check.results[0].stat.exists | default(false))
          register: vllm_download
          async: 1800  # 30 minutes timeout
          poll: 30
        - name: Download Flux.1 Schnell model (12GB, ~12 minutes)
          shell: |
            python3 -c "
            from diffusers import FluxPipeline
            import os
            os.environ['HF_HOME'] = '{{ cache_dir }}'
            print('Downloading Flux.1 Schnell...')
            FluxPipeline.from_pretrained(
                '{{ models.flux.name }}',
                cache_dir='{{ cache_dir }}'
            )
            print('Flux.1 downloaded successfully')
            "
          environment:
            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
            HF_HOME: "{{ cache_dir }}"
          when: not (model_cache_check.results[1].stat.exists | default(false))
          register: flux_download
          async: 1200  # 20 minutes timeout
          poll: 30
        - name: Download MusicGen Medium model (11GB, ~10 minutes)
          shell: |
            python3 -c "
            from audiocraft.models import MusicGen
            import os
            os.environ['HF_HOME'] = '{{ cache_dir }}'
            print('Downloading MusicGen Medium...')
            MusicGen.get_pretrained('{{ models.musicgen.name }}')
            print('MusicGen downloaded successfully')
            "
          environment:
            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
            HF_HOME: "{{ cache_dir }}"
          when: not (model_cache_check.results[2].stat.exists | default(false))
          register: musicgen_download
          async: 900  # 15 minutes timeout
          poll: 30
        - name: Display model download summary
          debug:
            msg: |
              Model downloads completed:
              - Qwen 2.5 7B: {{ 'Downloaded' if vllm_download.changed | default(false) else 'Already cached' }}
              - Flux.1 Schnell: {{ 'Downloaded' if flux_download.changed | default(false) else 'Already cached' }}
              - MusicGen Medium: {{ 'Downloaded' if musicgen_download.changed | default(false) else 'Already cached' }}
              Total cache size: ~37GB
    #
    # Tailscale VPN
    #
    - name: Install and configure Tailscale
      tags: [tailscale]
      block:
        - name: Check if Tailscale is installed
          command: which tailscale
          register: tailscale_check
          changed_when: false
          failed_when: false
        - name: Install Tailscale
          shell: curl -fsSL https://tailscale.com/install.sh | sh
          become: true
          when: tailscale_check.rc != 0
        - name: Display Tailscale setup instructions
          debug:
            msg: |
              Tailscale installed. To connect:
              1. Start tailscaled: tailscaled --tun=userspace-networking --socks5-server=localhost:1055 &
              2. Authenticate: tailscale up --advertise-tags=tag:gpu
              3. Get IP: tailscale ip -4
              Note: Authentication requires manual intervention via provided URL
    #
    # Supervisor Process Manager
    #
    - name: Install and configure Supervisor
      tags: [supervisor]
      block:
        - name: Install Supervisor
          pip:
            name: supervisor
            executable: pip3
          become: true
        - name: Create logs directory
          file:
            path: "{{ workspace_dir }}/logs"
            state: directory
            mode: '0755'
        - name: Deploy supervisord configuration
          copy:
            src: "{{ ai_dir }}/supervisord.conf"
            dest: "{{ workspace_dir }}/supervisord.conf"
            mode: '0644'
        - name: Display Supervisor setup instructions
          debug:
            msg: |
              ✓ Supervisor installed successfully!
              Configuration: {{ workspace_dir }}/supervisord.conf
              Logs: {{ workspace_dir }}/logs/
              Services configured:
              - comfyui: ComfyUI server (port 8188) - autostart enabled
              - orchestrator: Model orchestrator (port 9000) - autostart disabled
              To start Supervisor:
              supervisord -c {{ workspace_dir }}/supervisord.conf
              To manage services:
              supervisorctl status                    # Check service status
              supervisorctl start orchestrator        # Start orchestrator
              supervisorctl restart comfyui           # Restart ComfyUI
              supervisorctl stop all                  # Stop all services
              supervisorctl tail -f comfyui           # Follow ComfyUI logs
              Web interface:
              http://localhost:9001 (username: admin, password: runpod2024)
    #
    # Systemd Services (Optional)
    #
    - name: Configure systemd services
      tags: [systemd, never]  # never = skip by default
      block:
        - name: Create systemd service for orchestrator
          template:
            src: "{{ ai_dir }}/systemd/ai-orchestrator.service.j2"
            dest: /etc/systemd/system/ai-orchestrator.service
            mode: '0644'
          become: true
        - name: Reload systemd daemon
          systemd:
            daemon_reload: yes
          become: true
        - name: Enable orchestrator service
          systemd:
            name: ai-orchestrator
            enabled: yes
          become: true
        - name: Display systemd instructions
          debug:
            msg: |
              Systemd service configured. To manage:
              - Start: sudo systemctl start ai-orchestrator
              - Stop: sudo systemctl stop ai-orchestrator
              - Status: sudo systemctl status ai-orchestrator
              - Logs: sudo journalctl -u ai-orchestrator -f
    #
    # Validation
    #
    - name: Validate installation
      tags: [validate, never]  # never = skip by default, run explicitly
      block:
        - name: Check Python packages
          shell: pip3 list | grep -E "(fastapi|uvicorn|torch|vllm|diffusers|audiocraft)"
          register: pip_check
          changed_when: false
        - name: Display installed packages
          debug:
            msg: "{{ pip_check.stdout_lines }}"
        - name: Check GPU memory
          shell: nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits
          register: gpu_memory
          changed_when: false
        - name: Display GPU memory
          debug:
            msg: "Free GPU memory: {{ gpu_memory.stdout }} MB"
        - name: Check cached models
          shell: du -sh {{ cache_dir }}
          register: cache_size
          changed_when: false
        - name: Display cache information
          debug:
            msg: "Model cache size: {{ cache_size.stdout }}"
        - name: Verify service scripts are executable
          file:
            path: "{{ ai_dir }}/{{ item.script }}"
            mode: '0755'
          loop: "{{ services }}"
        - name: Display validation summary
          debug:
            msg: |
              ✓ Installation validated successfully!
              Next steps:
              1. Start orchestrator: python3 {{ ai_dir }}/model-orchestrator/orchestrator_subprocess.py
              2. Test endpoint: curl http://localhost:9000/health
              3. Configure LiteLLM on VPS to connect via Tailscale
              Services:
              {% for service in services %}
              - {{ service.name }}: http://localhost:{{ service.port }}
              {% endfor %}
    #
    # Cleanup for Template Creation
    #
    - name: Cleanup for template creation
      tags: [cleanup, never]  # never = skip by default, run explicitly
      block:
        - name: Remove sensitive files
          file:
            path: "{{ item }}"
            state: absent
          loop:
            - "{{ ai_dir }}/.env"
            - /root/.ssh/known_hosts
            - /root/.bash_history
            - /root/.python_history
        - name: Clear system logs
          shell: find /var/log -type f -name "*.log" -delete
          become: true
          ignore_errors: yes
        - name: Create template version marker
          copy:
            dest: "{{ workspace_dir }}/TEMPLATE_VERSION"
            content: |
              RunPod Multi-Modal AI Template (Process-Based Architecture)
              Version: 2.0
              Created: {{ ansible_date_time.iso8601 }}
              Components:
              - Python {{ python_version }}
              - Orchestrator (process-based)
              - Text Generation (vLLM + Qwen 2.5 7B)
              - Image Generation (Flux.1 Schnell)
              - Music Generation (MusicGen Medium)
              Models Cached: ~37GB
              Architecture: No Docker, direct Python execution
              Deployment:
              1. Create .env file with HF_TOKEN
              2. Run: python3 {{ ai_dir }}/model-orchestrator/orchestrator_subprocess.py
              3. Access: http://localhost:9000/health
        - name: Display template creation instructions
          debug:
            msg: |
              Template prepared successfully!
              Next steps in RunPod dashboard:
              1. Stop all running services
              2. Go to My Pods → Select this pod → ⋮ → Save as Template
              3. Name: multi-modal-ai-process-v2.0
              4. Description: Process-based multi-modal AI (text/image/music)
              5. Save and test deployment from template
              Template enables 2-3 minute deployments instead of 60+ minutes!