From 16226c7b39976828eae2e56d55491ed7d476c85e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= Date: Sat, 22 Nov 2025 12:23:11 +0100 Subject: [PATCH] chore: remove Ansible infrastructure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove Ansible playbook and configuration files as all infrastructure setup has been migrated to arty bash scripts. Deleted: - playbook.yml (~950 lines) - inventory.yml - ansible.cfg All functionality now available via arty scripts: - arty run install/essential - arty run setup/system-packages - arty run setup/comfyui-base - arty run setup/comfyui-nodes - arty run setup/supervisor - arty run setup/tailscale Model downloads will be handled separately (TBD). 🤖 Generated with Claude Code Co-Authored-By: Claude --- ansible.cfg | 33 -- inventory.yml | 26 -- playbook.yml | 951 -------------------------------------------------- 3 files changed, 1010 deletions(-) delete mode 100644 ansible.cfg delete mode 100644 inventory.yml delete mode 100644 playbook.yml diff --git a/ansible.cfg b/ansible.cfg deleted file mode 100644 index 3feb761..0000000 --- a/ansible.cfg +++ /dev/null @@ -1,33 +0,0 @@ -[defaults] -# Ansible configuration for RunPod deployment - -# Inventory -inventory = inventory.yml - -# Disable host key checking (RunPod instances may change) -host_key_checking = False - -# Display settings -stdout_callback = yaml -bin_ansible_callbacks = True - -# Performance -forks = 5 -gathering = smart -fact_caching = jsonfile -fact_caching_connection = /tmp/ansible_facts -fact_caching_timeout = 86400 - -# Logging -log_path = /tmp/ansible-runpod.log - -# Privilege escalation -become_method = sudo -become_ask_pass = False - -# SSH settings -timeout = 30 -transport = local - -# Retry files -retry_files_enabled = False diff --git a/inventory.yml b/inventory.yml deleted file mode 100644 index e3f2cfb..0000000 --- a/inventory.yml +++ /dev/null @@ -1,26 +0,0 @@ ---- -# Ansible inventory for RunPod deployment -# -# This inventory defines localhost as the target for RunPod instances. -# All tasks run locally on the RunPod GPU server. - -all: - hosts: - localhost: - ansible_connection: local - ansible_python_interpreter: /usr/bin/python3 - - vars: - # Workspace configuration - workspace_dir: /workspace - ai_dir: /workspace/ai - - # Environment variables (loaded from .env if present) - hf_token: "{{ lookup('env', 'HF_TOKEN') }}" - tailscale_key: "{{ lookup('env', 'TAILSCALE_AUTH_KEY') | default('') }}" - - # GPU configuration - gpu_memory_utilization: 0.85 - - # Model cache - huggingface_cache: /workspace/huggingface_cache diff --git a/playbook.yml b/playbook.yml deleted file mode 100644 index f71ecc1..0000000 --- a/playbook.yml +++ /dev/null @@ -1,951 +0,0 @@ ---- -# -# RunPod AI Infrastructure Ansible Playbook -# -# This playbook provisions a RunPod GPU instance with multi-modal AI services. -# It replaces all bash scripts with reproducible Ansible tasks. -# -# Usage: -# ansible-playbook playbook.yml # Full deployment -# ansible-playbook playbook.yml --tags base # Install system packages -# ansible-playbook playbook.yml --tags python # Setup Python environment -# ansible-playbook playbook.yml --tags models # Download models only -# ansible-playbook playbook.yml --tags validate # Validate installation -# -# Tags: -# base - System packages and dependencies -# python - Python environment setup -# dependencies - Install Python packages -# models - Download AI models (vLLM, Flux, MusicGen) -# comfyui - Install and configure ComfyUI base -# comfyui-models-image - Download ComfyUI image generation models -# comfyui-models-video - Download ComfyUI video generation models -# comfyui-models-audio - Download ComfyUI audio generation models -# comfyui-models-support - Download CLIP, IP-Adapter, ControlNet models -# comfyui-models-all - Download all ComfyUI models -# comfyui-nodes - Install essential custom nodes -# comfyui-essential - Quick setup (ComfyUI + essential models only) -# tailscale - Install and configure Tailscale -# supervisor - Install and configure Supervisor process manager -# systemd - Configure systemd services -# validate - Health checks and validation -# - -- name: Provision RunPod GPU Instance for AI Services - hosts: localhost - connection: local - become: false - vars: - # Paths - workspace_dir: /workspace - ai_dir: "{{ workspace_dir }}/ai" - cache_dir: "{{ workspace_dir }}/huggingface_cache" - models_dir: "{{ workspace_dir }}/models" - - # Python configuration - python_version: "3.10" - pip_version: "23.3.1" - - # Model configuration - models: - vllm: - name: "Qwen/Qwen2.5-7B-Instruct" - size_gb: 14 - flux: - name: "black-forest-labs/FLUX.1-schnell" - size_gb: 12 - musicgen: - name: "facebook/musicgen-medium" - size_gb: 11 - - # ======================================================================== - # ComfyUI Models - Comprehensive List for 24GB GPU - # ======================================================================== - - # ComfyUI Image Generation Models - comfyui_image_models: - # FLUX Models (Black Forest Labs) - State of the art 2025 - - name: "black-forest-labs/FLUX.1-schnell" - type: "checkpoint" - category: "image" - size_gb: 23 - vram_gb: 23 - format: "fp16" - description: "FLUX.1 Schnell - Fast 4-step inference" - essential: true - - - name: "black-forest-labs/FLUX.1-dev" - type: "checkpoint" - category: "image" - size_gb: 23 - vram_gb: 23 - format: "fp16" - description: "FLUX.1 Dev - Balanced quality/speed" - essential: false - - # SDXL Models - Industry standard - - name: "stabilityai/stable-diffusion-xl-base-1.0" - type: "checkpoint" - category: "image" - size_gb: 7 - vram_gb: 12 - format: "fp16" - description: "SDXL 1.0 Base - 1024x1024 native resolution" - essential: true - - - name: "stabilityai/stable-diffusion-xl-refiner-1.0" - type: "checkpoint" - category: "image" - size_gb: 6 - vram_gb: 12 - format: "fp16" - description: "SDXL Refiner - Enhances base output" - essential: false - - # SD 3.5 Models - Latest Stability AI - - name: "stabilityai/stable-diffusion-3.5-large" - type: "checkpoint" - category: "image" - size_gb: 18 - vram_gb: 20 - format: "fp16" - description: "SD 3.5 Large - MMDiT architecture" - essential: false - - # ComfyUI Video Generation Models - comfyui_video_models: - # CogVideoX - Text-to-video - - name: "THUDM/CogVideoX-5b" - type: "video" - category: "video" - size_gb: 20 - vram_gb: 12 # with optimizations - description: "CogVideoX 5B - Professional text-to-video" - essential: true - - # Stable Video Diffusion - - name: "stabilityai/stable-video-diffusion-img2vid" - type: "video" - category: "video" - size_gb: 8 - vram_gb: 16 - description: "SVD - 14 frame image-to-video" - essential: true - - - name: "stabilityai/stable-video-diffusion-img2vid-xt" - type: "video" - category: "video" - size_gb: 8 - vram_gb: 20 - description: "SVD-XT - 25 frame image-to-video" - essential: false - - # ComfyUI Audio Generation Models - comfyui_audio_models: - - name: "facebook/musicgen-small" - type: "audio" - category: "audio" - size_gb: 3 - vram_gb: 4 - description: "MusicGen Small - Fast music generation" - essential: false - - - name: "facebook/musicgen-medium" - type: "audio" - category: "audio" - size_gb: 11 - vram_gb: 8 - description: "MusicGen Medium - Balanced quality" - essential: true - - - name: "facebook/musicgen-large" - type: "audio" - category: "audio" - size_gb: 22 - vram_gb: 16 - description: "MusicGen Large - Highest quality" - essential: false - - # ComfyUI Supporting Models (CLIP, IP-Adapter, ControlNet) - comfyui_support_models: - # CLIP Vision Models - - name: "openai/clip-vit-large-patch14" - type: "clip_vision" - category: "support" - size_gb: 2 - description: "CLIP H - For SD 1.5 IP-Adapter" - essential: true - target_dir: "clip_vision" - - - name: "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k" - type: "clip_vision" - category: "support" - size_gb: 7 - description: "CLIP G - For SDXL IP-Adapter" - essential: true - target_dir: "clip_vision" - - - name: "google/siglip-so400m-patch14-384" - type: "clip_vision" - category: "support" - size_gb: 2 - description: "SigLIP - For FLUX models" - essential: true - target_dir: "clip_vision" - - # ComfyUI Custom Nodes - Essential Extensions - comfyui_custom_nodes: - # ComfyUI Manager - Must have - - name: "ComfyUI-Manager" - repo: "https://github.com/ltdrdata/ComfyUI-Manager.git" - category: "manager" - description: "Install/manage custom nodes and models" - essential: true - - # Video Generation Nodes - - name: "ComfyUI-VideoHelperSuite" - repo: "https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git" - category: "video" - description: "Video operations and processing" - essential: true - - - name: "ComfyUI-AnimateDiff-Evolved" - repo: "https://github.com/Kosinkadink/ComfyUI-AnimateDiff-Evolved.git" - category: "video" - description: "AnimateDiff for video generation" - essential: true - - - name: "ComfyUI-CogVideoXWrapper" - repo: "https://github.com/kijai/ComfyUI-CogVideoXWrapper.git" - category: "video" - description: "CogVideoX integration" - essential: false - - # Image Enhancement Nodes - - name: "ComfyUI_IPAdapter_plus" - repo: "https://github.com/cubiq/ComfyUI_IPAdapter_plus.git" - category: "image" - description: "IP-Adapter for style transfer" - essential: true - - - name: "ComfyUI-Impact-Pack" - repo: "https://github.com/ltdrdata/ComfyUI-Impact-Pack.git" - category: "image" - description: "Auto face enhancement, detailer" - essential: true - - - name: "Comfyui-Inspire-Pack" - repo: "https://github.com/ltdrdata/ComfyUI-Inspire-Pack.git" - category: "image" - description: "Additional inspiration tools" - essential: false - - # Audio Generation Nodes - - name: "comfyui-sound-lab" - repo: "https://github.com/eigenpunk/comfyui-sound-lab.git" - category: "audio" - description: "MusicGen and Stable Audio integration" - essential: true - - # Utility Nodes - - name: "ComfyUI-Advanced-ControlNet" - repo: "https://github.com/Kosinkadink/ComfyUI-Advanced-ControlNet.git" - category: "control" - description: "Advanced ControlNet features" - essential: false - - - name: "ComfyUI-3D-Pack" - repo: "https://github.com/MrForExample/ComfyUI-3D-Pack.git" - category: "3d" - description: "3D asset generation" - essential: false - - # Service configuration - services: - - name: orchestrator - port: 9000 - script: model-orchestrator/orchestrator_subprocess.py - - name: vllm - port: 8001 - script: models/vllm/server.py - - name: comfyui - port: 8188 - script: models/comfyui/start.sh - - tasks: - # - # Base System Setup - # - - name: Base system packages - tags: [base, always] - block: - - name: Check GPU availability - shell: nvidia-smi - register: nvidia_check - changed_when: false - failed_when: nvidia_check.rc != 0 - - - name: Display GPU information - debug: - msg: "{{ nvidia_check.stdout_lines }}" - - - name: Ensure workspace directory exists - file: - path: "{{ workspace_dir }}" - state: directory - mode: '0755' - - - name: Update apt cache - apt: - update_cache: yes - cache_valid_time: 3600 - become: true - - - name: Install base system packages - apt: - name: - - build-essential - - python3-dev - - python3-pip - - python3-venv - - git - - curl - - wget - - vim - - htop - - tmux - - net-tools - state: present - become: true - - # - # Python Environment Setup - # - - name: Python environment setup - tags: [python] - block: - - name: Upgrade pip - pip: - name: pip - version: "{{ pip_version }}" - executable: pip3 - extra_args: --upgrade - become: true - - - name: Install core Python packages - pip: - requirements: "{{ ai_dir }}/core/requirements.txt" - executable: pip3 - become: true - - # - # Install Model Dependencies - # - - name: Install model dependencies - tags: [dependencies] - block: - - name: Install vLLM dependencies - pip: - requirements: "{{ ai_dir }}/models/vllm/requirements.txt" - executable: pip3 - become: true - - # - # ComfyUI Installation - # - - name: Install and configure ComfyUI - tags: [comfyui] - block: - - name: Check if ComfyUI is already installed - stat: - path: "{{ workspace_dir }}/ComfyUI" - register: comfyui_check - - - name: Clone ComfyUI repository - git: - repo: https://github.com/comfyanonymous/ComfyUI.git - dest: "{{ workspace_dir }}/ComfyUI" - version: master - update: yes - when: not comfyui_check.stat.exists - - - name: Install ComfyUI dependencies - pip: - requirements: "{{ workspace_dir }}/ComfyUI/requirements.txt" - executable: pip3 - become: true - - - name: Install additional ComfyUI dependencies - pip: - requirements: "{{ ai_dir }}/models/comfyui/requirements.txt" - executable: pip3 - become: true - - - name: Create ComfyUI models directory structure - file: - path: "{{ workspace_dir }}/ComfyUI/models/{{ item }}" - state: directory - mode: '0755' - loop: - # Image Model Directories - - checkpoints - - unet - - vae - - loras - - clip - - clip_vision - - controlnet - - ipadapter - - embeddings - - upscale_models - # Video Model Directories - - video_models - - animatediff_models - - animatediff_motion_lora - # Audio Model Directories - - audio_models - # Utility Directories - - configs - - custom_nodes - - - name: Create symlink for Flux model in ComfyUI - file: - src: "{{ cache_dir }}" - dest: "{{ workspace_dir }}/ComfyUI/models/huggingface_cache" - state: link - ignore_errors: yes - - - name: Make ComfyUI start script executable - file: - path: "{{ ai_dir }}/models/comfyui/start.sh" - mode: '0755' - - - name: Display ComfyUI setup summary - debug: - msg: | - ✓ ComfyUI installed successfully! - - Directory: {{ workspace_dir }}/ComfyUI - Port: 8188 - HuggingFace Cache: {{ cache_dir }} - - To start ComfyUI: - bash {{ ai_dir }}/models/comfyui/start.sh - - Or manually: - cd {{ workspace_dir }}/ComfyUI && python3 main.py --listen 0.0.0.0 --port 8188 - - Access: http://localhost:8188 - - # - # ComfyUI Custom Nodes Installation - # - - name: Install ComfyUI Custom Nodes - tags: [comfyui-nodes, comfyui-essential] - block: - - name: Install essential ComfyUI custom nodes - git: - repo: "{{ item.repo }}" - dest: "{{ workspace_dir }}/ComfyUI/custom_nodes/{{ item.name }}" - version: main - update: yes - loop: "{{ comfyui_custom_nodes | selectattr('essential', 'equalto', true) | list }}" - loop_control: - label: "{{ item.name }}" - ignore_errors: yes - - - name: Install custom node dependencies - shell: | - if [ -f "{{ workspace_dir }}/ComfyUI/custom_nodes/{{ item.name }}/requirements.txt" ]; then - pip3 install -r "{{ workspace_dir }}/ComfyUI/custom_nodes/{{ item.name }}/requirements.txt" - fi - loop: "{{ comfyui_custom_nodes | selectattr('essential', 'equalto', true) | list }}" - loop_control: - label: "{{ item.name }}" - become: true - ignore_errors: yes - - - name: Display custom nodes installation summary - debug: - msg: | - ✓ Custom nodes installed successfully! - - Essential nodes: - {% for node in comfyui_custom_nodes | selectattr('essential', 'equalto', true) | list %} - - {{ node.name }}: {{ node.description }} - {% endfor %} - - To install ALL nodes (including optional): - ansible-playbook playbook.yml --tags comfyui-nodes-all - - # - # ComfyUI Image Models Download - # - - name: Download ComfyUI Image Generation Models - tags: [comfyui-models-image, comfyui-models-all, comfyui-essential] - block: - - name: Download essential image generation models - shell: | - python3 -c " - from huggingface_hub import snapshot_download - import os - os.environ['HF_HOME'] = '{{ cache_dir }}' - print('Downloading {{ item.name }}...') - snapshot_download( - repo_id='{{ item.name }}', - cache_dir='{{ cache_dir }}', - token=os.environ.get('HF_TOKEN') - ) - print('Completed {{ item.name }}') - " - environment: - HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}" - HF_HOME: "{{ cache_dir }}" - loop: "{{ comfyui_image_models | selectattr('essential', 'equalto', true) | list }}" - loop_control: - label: "{{ item.name }} ({{ item.size_gb }}GB)" - async: 3600 - poll: 30 - ignore_errors: yes - - - name: Display image models summary - debug: - msg: | - Image generation models downloaded: - {% for model in comfyui_image_models | selectattr('essential', 'equalto', true) | list %} - - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB, {{ model.vram_gb }}GB VRAM) - {% endfor %} - - Total size: ~{{ (comfyui_image_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) }}GB - - # - # ComfyUI Video Models Download - # - - name: Download ComfyUI Video Generation Models - tags: [comfyui-models-video, comfyui-models-all] - block: - - name: Download essential video generation models - shell: | - python3 -c " - from huggingface_hub import snapshot_download - import os - os.environ['HF_HOME'] = '{{ cache_dir }}' - print('Downloading {{ item.name }}...') - snapshot_download( - repo_id='{{ item.name }}', - cache_dir='{{ cache_dir }}', - token=os.environ.get('HF_TOKEN') - ) - print('Completed {{ item.name }}') - " - environment: - HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}" - HF_HOME: "{{ cache_dir }}" - loop: "{{ comfyui_video_models | selectattr('essential', 'equalto', true) | list }}" - loop_control: - label: "{{ item.name }} ({{ item.size_gb }}GB)" - async: 3600 - poll: 30 - ignore_errors: yes - - - name: Display video models summary - debug: - msg: | - Video generation models downloaded: - {% for model in comfyui_video_models | selectattr('essential', 'equalto', true) | list %} - - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB, {{ model.vram_gb }}GB VRAM) - {% endfor %} - - # - # ComfyUI Audio Models Download - # - - name: Download ComfyUI Audio Generation Models - tags: [comfyui-models-audio, comfyui-models-all] - block: - - name: Download essential audio generation models - shell: | - python3 -c " - from huggingface_hub import snapshot_download - import os - os.environ['HF_HOME'] = '{{ cache_dir }}' - print('Downloading {{ item.name }}...') - snapshot_download( - repo_id='{{ item.name }}', - cache_dir='{{ cache_dir }}', - token=os.environ.get('HF_TOKEN') - ) - print('Completed {{ item.name }}') - " - environment: - HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}" - HF_HOME: "{{ cache_dir }}" - loop: "{{ comfyui_audio_models | selectattr('essential', 'equalto', true) | list }}" - loop_control: - label: "{{ item.name }} ({{ item.size_gb }}GB)" - async: 3600 - poll: 30 - ignore_errors: yes - - - name: Display audio models summary - debug: - msg: | - Audio generation models downloaded: - {% for model in comfyui_audio_models | selectattr('essential', 'equalto', true) | list %} - - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB) - {% endfor %} - - # - # ComfyUI Support Models Download (CLIP, IP-Adapter, ControlNet) - # - - name: Download ComfyUI Support Models - tags: [comfyui-models-support, comfyui-models-all, comfyui-essential] - block: - - name: Download essential support models (CLIP, IP-Adapter) - shell: | - python3 -c " - from huggingface_hub import snapshot_download - import os - os.environ['HF_HOME'] = '{{ cache_dir }}' - print('Downloading {{ item.name }}...') - snapshot_download( - repo_id='{{ item.name }}', - cache_dir='{{ cache_dir }}', - token=os.environ.get('HF_TOKEN') - ) - print('Completed {{ item.name }}') - " - environment: - HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}" - HF_HOME: "{{ cache_dir }}" - loop: "{{ comfyui_support_models | selectattr('essential', 'equalto', true) | list }}" - loop_control: - label: "{{ item.name }} ({{ item.size_gb }}GB)" - async: 1800 - poll: 30 - ignore_errors: yes - - - name: Display support models summary - debug: - msg: | - Support models downloaded: - {% for model in comfyui_support_models | selectattr('essential', 'equalto', true) | list %} - - {{ model.name }}: {{ model.description }} ({{ model.size_gb }}GB) - {% endfor %} - - Total ComfyUI models cache: ~{{ - (comfyui_image_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) + - (comfyui_video_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) + - (comfyui_audio_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) + - (comfyui_support_models | selectattr('essential', 'equalto', true) | list | sum(attribute='size_gb')) - }}GB - - # - # Download AI Models - # - - name: Download AI models - tags: [models] - block: - - name: Create model cache directories - file: - path: "{{ item }}" - state: directory - mode: '0755' - loop: - - "{{ cache_dir }}" - - "{{ models_dir }}/flux" - - "{{ models_dir }}/musicgen" - - - name: Check if models are already cached - stat: - path: "{{ cache_dir }}/models--{{ item.value.name | regex_replace('/', '--') }}" - register: model_cache_check - loop: "{{ models | dict2items }}" - loop_control: - label: "{{ item.key }}" - - - name: Download Qwen 2.5 7B model (14GB, ~15 minutes) - shell: | - python3 -c " - from transformers import AutoTokenizer, AutoModelForCausalLM - import os - os.environ['HF_HOME'] = '{{ cache_dir }}' - print('Downloading Qwen 2.5 7B Instruct...') - AutoTokenizer.from_pretrained('{{ models.vllm.name }}') - print('Tokenizer downloaded successfully') - " - environment: - HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}" - HF_HOME: "{{ cache_dir }}" - when: not (model_cache_check.results[0].stat.exists | default(false)) - register: vllm_download - async: 1800 # 30 minutes timeout - poll: 30 - - - name: Download Flux.1 Schnell model (12GB, ~12 minutes) - shell: | - python3 -c " - from diffusers import FluxPipeline - import os - os.environ['HF_HOME'] = '{{ cache_dir }}' - print('Downloading Flux.1 Schnell...') - FluxPipeline.from_pretrained( - '{{ models.flux.name }}', - cache_dir='{{ cache_dir }}' - ) - print('Flux.1 downloaded successfully') - " - environment: - HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}" - HF_HOME: "{{ cache_dir }}" - when: not (model_cache_check.results[1].stat.exists | default(false)) - register: flux_download - async: 1200 # 20 minutes timeout - poll: 30 - - - name: Download MusicGen Medium model (11GB, ~10 minutes) - shell: | - python3 -c " - from audiocraft.models import MusicGen - import os - os.environ['HF_HOME'] = '{{ cache_dir }}' - print('Downloading MusicGen Medium...') - MusicGen.get_pretrained('{{ models.musicgen.name }}') - print('MusicGen downloaded successfully') - " - environment: - HF_TOKEN: "{{ lookup('env', 'HF_TOKEN') }}" - HF_HOME: "{{ cache_dir }}" - when: not (model_cache_check.results[2].stat.exists | default(false)) - register: musicgen_download - async: 900 # 15 minutes timeout - poll: 30 - - - name: Display model download summary - debug: - msg: | - Model downloads completed: - - Qwen 2.5 7B: {{ 'Downloaded' if vllm_download.changed | default(false) else 'Already cached' }} - - Flux.1 Schnell: {{ 'Downloaded' if flux_download.changed | default(false) else 'Already cached' }} - - MusicGen Medium: {{ 'Downloaded' if musicgen_download.changed | default(false) else 'Already cached' }} - Total cache size: ~37GB - - # - # Tailscale VPN - # - - name: Install and configure Tailscale - tags: [tailscale] - block: - - name: Check if Tailscale is installed - command: which tailscale - register: tailscale_check - changed_when: false - failed_when: false - - - name: Install Tailscale - shell: curl -fsSL https://tailscale.com/install.sh | sh - become: true - when: tailscale_check.rc != 0 - - - name: Display Tailscale setup instructions - debug: - msg: | - Tailscale installed. To connect: - 1. Start tailscaled: tailscaled --tun=userspace-networking --socks5-server=localhost:1055 & - 2. Authenticate: tailscale up --advertise-tags=tag:gpu - 3. Get IP: tailscale ip -4 - - Note: Authentication requires manual intervention via provided URL - - # - # Supervisor Process Manager - # - - name: Install and configure Supervisor - tags: [supervisor] - block: - - name: Install Supervisor - pip: - name: supervisor - executable: pip3 - become: true - - - name: Create logs directory - file: - path: "{{ workspace_dir }}/logs" - state: directory - mode: '0755' - - - name: Deploy supervisord configuration - copy: - src: "{{ ai_dir }}/supervisord.conf" - dest: "{{ workspace_dir }}/supervisord.conf" - mode: '0644' - - - name: Display Supervisor setup instructions - debug: - msg: | - ✓ Supervisor installed successfully! - - Configuration: {{ workspace_dir }}/supervisord.conf - Logs: {{ workspace_dir }}/logs/ - - Services configured: - - comfyui: ComfyUI server (port 8188) - autostart enabled - - orchestrator: Model orchestrator (port 9000) - autostart disabled - - To start Supervisor: - supervisord -c {{ workspace_dir }}/supervisord.conf - - To manage services: - supervisorctl status # Check service status - supervisorctl start orchestrator # Start orchestrator - supervisorctl restart comfyui # Restart ComfyUI - supervisorctl stop all # Stop all services - supervisorctl tail -f comfyui # Follow ComfyUI logs - - Web interface: - http://localhost:9001 (username: admin, password: runpod2024) - - # - # Systemd Services (Optional) - # - - name: Configure systemd services - tags: [systemd, never] # never = skip by default - block: - - name: Create systemd service for orchestrator - template: - src: "{{ ai_dir }}/systemd/ai-orchestrator.service.j2" - dest: /etc/systemd/system/ai-orchestrator.service - mode: '0644' - become: true - - - name: Reload systemd daemon - systemd: - daemon_reload: yes - become: true - - - name: Enable orchestrator service - systemd: - name: ai-orchestrator - enabled: yes - become: true - - - name: Display systemd instructions - debug: - msg: | - Systemd service configured. To manage: - - Start: sudo systemctl start ai-orchestrator - - Stop: sudo systemctl stop ai-orchestrator - - Status: sudo systemctl status ai-orchestrator - - Logs: sudo journalctl -u ai-orchestrator -f - - # - # Validation - # - - name: Validate installation - tags: [validate, never] # never = skip by default, run explicitly - block: - - name: Check Python packages - shell: pip3 list | grep -E "(fastapi|uvicorn|torch|vllm|diffusers|audiocraft)" - register: pip_check - changed_when: false - - - name: Display installed packages - debug: - msg: "{{ pip_check.stdout_lines }}" - - - name: Check GPU memory - shell: nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits - register: gpu_memory - changed_when: false - - - name: Display GPU memory - debug: - msg: "Free GPU memory: {{ gpu_memory.stdout }} MB" - - - name: Check cached models - shell: du -sh {{ cache_dir }} - register: cache_size - changed_when: false - - - name: Display cache information - debug: - msg: "Model cache size: {{ cache_size.stdout }}" - - - name: Verify service scripts are executable - file: - path: "{{ ai_dir }}/{{ item.script }}" - mode: '0755' - loop: "{{ services }}" - - - name: Display validation summary - debug: - msg: | - ✓ Installation validated successfully! - - Next steps: - 1. Start orchestrator: python3 {{ ai_dir }}/model-orchestrator/orchestrator_subprocess.py - 2. Test endpoint: curl http://localhost:9000/health - 3. Configure LiteLLM on VPS to connect via Tailscale - - Services: - {% for service in services %} - - {{ service.name }}: http://localhost:{{ service.port }} - {% endfor %} - - # - # Cleanup for Template Creation - # - - name: Cleanup for template creation - tags: [cleanup, never] # never = skip by default, run explicitly - block: - - name: Remove sensitive files - file: - path: "{{ item }}" - state: absent - loop: - - "{{ ai_dir }}/.env" - - /root/.ssh/known_hosts - - /root/.bash_history - - /root/.python_history - - - name: Clear system logs - shell: find /var/log -type f -name "*.log" -delete - become: true - ignore_errors: yes - - - name: Create template version marker - copy: - dest: "{{ workspace_dir }}/TEMPLATE_VERSION" - content: | - RunPod Multi-Modal AI Template (Process-Based Architecture) - Version: 2.0 - Created: {{ ansible_date_time.iso8601 }} - - Components: - - Python {{ python_version }} - - Orchestrator (process-based) - - Text Generation (vLLM + Qwen 2.5 7B) - - Image Generation (Flux.1 Schnell) - - Music Generation (MusicGen Medium) - - Models Cached: ~37GB - Architecture: No Docker, direct Python execution - - Deployment: - 1. Create .env file with HF_TOKEN - 2. Run: python3 {{ ai_dir }}/model-orchestrator/orchestrator_subprocess.py - 3. Access: http://localhost:9000/health - - - name: Display template creation instructions - debug: - msg: | - Template prepared successfully! - - Next steps in RunPod dashboard: - 1. Stop all running services - 2. Go to My Pods → Select this pod → ⋮ → Save as Template - 3. Name: multi-modal-ai-process-v2.0 - 4. Description: Process-based multi-modal AI (text/image/music) - 5. Save and test deployment from template - - Template enables 2-3 minute deployments instead of 60+ minutes!