runpod/playbook.yml

---
#
# RunPod AI Infrastructure Ansible Playbook
#
# This playbook provisions a RunPod GPU instance with multi-modal AI services.
# It replaces all bash scripts with reproducible Ansible tasks.
#
# Usage:
#   ansible-playbook playbook.yml                    # Full deployment
#   ansible-playbook playbook.yml --tags base        # Install system packages
#   ansible-playbook playbook.yml --tags python      # Setup Python environment
#   ansible-playbook playbook.yml --tags models      # Download models only
#   ansible-playbook playbook.yml --tags validate    # Validate installation
#
# Tags:
#   base        - System packages and dependencies
#   python      - Python environment setup
#   dependencies- Install Python packages
#   models      - Download AI models
#   tailscale   - Install and configure Tailscale
#   systemd     - Configure systemd services
#   validate    - Health checks and validation
#

- name: Provision RunPod GPU Instance for AI Services
  hosts: localhost
  connection: local
  become: false
  vars:
    # Paths
    workspace_dir: /workspace
    ai_dir: "{{ workspace_dir }}/ai"
    cache_dir: "{{ workspace_dir }}/huggingface_cache"
    models_dir: "{{ workspace_dir }}/models"

    # Python configuration
    python_version: "3.10"
    pip_version: "23.3.1"

    # Model configuration
    models:
      vllm:
        name: "Qwen/Qwen2.5-7B-Instruct"
        size_gb: 14
      flux:
        name: "black-forest-labs/FLUX.1-schnell"
        size_gb: 12
      musicgen:
        name: "facebook/musicgen-medium"
        size_gb: 11

    # Service configuration
    services:
      - name: orchestrator
        port: 9000
        script: model-orchestrator/orchestrator_subprocess.py
      - name: vllm
        port: 8001
        script: models/vllm/server.py
      - name: flux
        port: 8002
        script: models/flux/server.py
      - name: musicgen
        port: 8003
        script: models/musicgen/server.py

  tasks:
    #
    # Base System Setup
    #
    - name: Base system packages
      tags: [base, always]
      block:
        - name: Check GPU availability
          shell: nvidia-smi
          register: nvidia_check
          changed_when: false
          failed_when: nvidia_check.rc != 0

        - name: Display GPU information
          debug:
            msg: "{{ nvidia_check.stdout_lines }}"

        - name: Ensure workspace directory exists
          file:
            path: "{{ workspace_dir }}"
            state: directory
            mode: '0755'

        - name: Update apt cache
          apt:
            update_cache: yes
            cache_valid_time: 3600
          become: true

        - name: Install base system packages
          apt:
            name:
              - build-essential
              - python3-dev
              - python3-pip
              - python3-venv
              - git
              - curl
              - wget
              - vim
              - htop
              - tmux
              - net-tools
            state: present
          become: true

    #
    # Python Environment Setup
    #
    - name: Python environment setup
      tags: [python]
      block:
        - name: Upgrade pip
          pip:
            name: pip
            version: "{{ pip_version }}"
            executable: pip3
            extra_args: --upgrade
          become: true

        - name: Install core Python packages
          pip:
            requirements: "{{ ai_dir }}/core/requirements.txt"
            executable: pip3
          become: true

    #
    # Install Model Dependencies
    #
    - name: Install model dependencies
      tags: [dependencies]
      block:
        - name: Install vLLM dependencies
          pip:
            requirements: "{{ ai_dir }}/models/vllm/requirements.txt"
            executable: pip3
          become: true

        - name: Install Flux dependencies
          pip:
            requirements: "{{ ai_dir }}/models/flux/requirements.txt"
            executable: pip3
          become: true

        - name: Install MusicGen dependencies
          pip:
            requirements: "{{ ai_dir }}/models/musicgen/requirements.txt"
            executable: pip3
          become: true

    #
    # Download AI Models
    #
    - name: Download AI models
      tags: [models]
      block:
        - name: Create model cache directories
          file:
            path: "{{ item }}"
            state: directory
            mode: '0755'
          loop:
            - "{{ cache_dir }}"
            - "{{ models_dir }}/flux"
            - "{{ models_dir }}/musicgen"

        - name: Check if models are already cached
          stat:
            path: "{{ cache_dir }}/models--{{ item.value.name | regex_replace('/', '--') }}"
          register: model_cache_check
          loop: "{{ models | dict2items }}"
          loop_control:
            label: "{{ item.key }}"

        - name: Download Qwen 2.5 7B model (14GB, ~15 minutes)
          shell: |
            python3 -c "
            from transformers import AutoTokenizer, AutoModelForCausalLM
            import os
            os.environ['HF_HOME'] = '{{ cache_dir }}'
            print('Downloading Qwen 2.5 7B Instruct...')
            AutoTokenizer.from_pretrained('{{ models.vllm.name }}')
            print('Tokenizer downloaded successfully')
            "
          environment:
            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
            HF_HOME: "{{ cache_dir }}"
          when: not (model_cache_check.results[0].stat.exists | default(false))
          register: vllm_download
          async: 1800  # 30 minutes timeout
          poll: 30

        - name: Download Flux.1 Schnell model (12GB, ~12 minutes)
          shell: |
            python3 -c "
            from diffusers import FluxPipeline
            import os
            os.environ['HF_HOME'] = '{{ cache_dir }}'
            print('Downloading Flux.1 Schnell...')
            FluxPipeline.from_pretrained(
                '{{ models.flux.name }}',
                cache_dir='{{ cache_dir }}'
            )
            print('Flux.1 downloaded successfully')
            "
          environment:
            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
            HF_HOME: "{{ cache_dir }}"
          when: not (model_cache_check.results[1].stat.exists | default(false))
          register: flux_download
          async: 1200  # 20 minutes timeout
          poll: 30

        - name: Download MusicGen Medium model (11GB, ~10 minutes)
          shell: |
            python3 -c "
            from audiocraft.models import MusicGen
            import os
            os.environ['HF_HOME'] = '{{ cache_dir }}'
            print('Downloading MusicGen Medium...')
            MusicGen.get_pretrained('{{ models.musicgen.name }}')
            print('MusicGen downloaded successfully')
            "
          environment:
            HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}"
            HF_HOME: "{{ cache_dir }}"
          when: not (model_cache_check.results[2].stat.exists | default(false))
          register: musicgen_download
          async: 900  # 15 minutes timeout
          poll: 30

        - name: Display model download summary
          debug:
            msg: |
              Model downloads completed:
              - Qwen 2.5 7B: {{ 'Downloaded' if vllm_download.changed | default(false) else 'Already cached' }}
              - Flux.1 Schnell: {{ 'Downloaded' if flux_download.changed | default(false) else 'Already cached' }}
              - MusicGen Medium: {{ 'Downloaded' if musicgen_download.changed | default(false) else 'Already cached' }}
              Total cache size: ~37GB

    #
    # Tailscale VPN
    #
    - name: Install and configure Tailscale
      tags: [tailscale]
      block:
        - name: Check if Tailscale is installed
          command: which tailscale
          register: tailscale_check
          changed_when: false
          failed_when: false

        - name: Install Tailscale
          shell: curl -fsSL https://tailscale.com/install.sh | sh
          become: true
          when: tailscale_check.rc != 0

        - name: Display Tailscale setup instructions
          debug:
            msg: |
              Tailscale installed. To connect:
              1. Start tailscaled: tailscaled --tun=userspace-networking --socks5-server=localhost:1055 &
              2. Authenticate: tailscale up --advertise-tags=tag:gpu
              3. Get IP: tailscale ip -4

              Note: Authentication requires manual intervention via provided URL

    #
    # Systemd Services (Optional)
    #
    - name: Configure systemd services
      tags: [systemd, never]  # never = skip by default
      block:
        - name: Create systemd service for orchestrator
          template:
            src: "{{ ai_dir }}/systemd/ai-orchestrator.service.j2"
            dest: /etc/systemd/system/ai-orchestrator.service
            mode: '0644'
          become: true

        - name: Reload systemd daemon
          systemd:
            daemon_reload: yes
          become: true

        - name: Enable orchestrator service
          systemd:
            name: ai-orchestrator
            enabled: yes
          become: true

        - name: Display systemd instructions
          debug:
            msg: |
              Systemd service configured. To manage:
              - Start: sudo systemctl start ai-orchestrator
              - Stop: sudo systemctl stop ai-orchestrator
              - Status: sudo systemctl status ai-orchestrator
              - Logs: sudo journalctl -u ai-orchestrator -f

    #
    # Validation
    #
    - name: Validate installation
      tags: [validate, never]  # never = skip by default, run explicitly
      block:
        - name: Check Python packages
          shell: pip3 list | grep -E "(fastapi|uvicorn|torch|vllm|diffusers|audiocraft)"
          register: pip_check
          changed_when: false

        - name: Display installed packages
          debug:
            msg: "{{ pip_check.stdout_lines }}"

        - name: Check GPU memory
          shell: nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits
          register: gpu_memory
          changed_when: false

        - name: Display GPU memory
          debug:
            msg: "Free GPU memory: {{ gpu_memory.stdout }} MB"

        - name: Check cached models
          shell: du -sh {{ cache_dir }}
          register: cache_size
          changed_when: false

        - name: Display cache information
          debug:
            msg: "Model cache size: {{ cache_size.stdout }}"

        - name: Verify service scripts are executable
          file:
            path: "{{ ai_dir }}/{{ item.script }}"
            mode: '0755'
          loop: "{{ services }}"

        - name: Display validation summary
          debug:
            msg: |
              ✓ Installation validated successfully!

              Next steps:
              1. Start orchestrator: python3 {{ ai_dir }}/model-orchestrator/orchestrator_subprocess.py
              2. Test endpoint: curl http://localhost:9000/health
              3. Configure LiteLLM on VPS to connect via Tailscale

              Services:
              {% for service in services %}
              - {{ service.name }}: http://localhost:{{ service.port }}
              {% endfor %}

    #
    # Cleanup for Template Creation
    #
    - name: Cleanup for template creation
      tags: [cleanup, never]  # never = skip by default, run explicitly
      block:
        - name: Remove sensitive files
          file:
            path: "{{ item }}"
            state: absent
          loop:
            - "{{ ai_dir }}/.env"
            - /root/.ssh/known_hosts
            - /root/.bash_history
            - /root/.python_history

        - name: Clear system logs
          shell: find /var/log -type f -name "*.log" -delete
          become: true
          ignore_errors: yes

        - name: Create template version marker
          copy:
            dest: "{{ workspace_dir }}/TEMPLATE_VERSION"
            content: |
              RunPod Multi-Modal AI Template (Process-Based Architecture)
              Version: 2.0
              Created: {{ ansible_date_time.iso8601 }}

              Components:
              - Python {{ python_version }}
              - Orchestrator (process-based)
              - Text Generation (vLLM + Qwen 2.5 7B)
              - Image Generation (Flux.1 Schnell)
              - Music Generation (MusicGen Medium)

              Models Cached: ~37GB
              Architecture: No Docker, direct Python execution

              Deployment:
              1. Create .env file with HF_TOKEN
              2. Run: python3 {{ ai_dir }}/model-orchestrator/orchestrator_subprocess.py
              3. Access: http://localhost:9000/health

        - name: Display template creation instructions
          debug:
            msg: |
              Template prepared successfully!

              Next steps in RunPod dashboard:
              1. Stop all running services
              2. Go to My Pods → Select this pod → ⋮ → Save as Template
              3. Name: multi-modal-ai-process-v2.0
              4. Description: Process-based multi-modal AI (text/image/music)
              5. Save and test deployment from template

              Template enables 2-3 minute deployments instead of 60+ minutes!