diff --git a/arty.yml b/arty.yml index f6f1d72..6ed6fd1 100644 --- a/arty.yml +++ b/arty.yml @@ -98,15 +98,406 @@ envs: # Deployment scripts for RunPod instances scripts: - # Initial setup - setup/full: | + # + # System Setup Scripts + # + setup/system-packages: | + echo "=========================================" + echo " Installing System Packages" + echo "=========================================" + echo "" + + # Check GPU availability + if ! nvidia-smi > /dev/null 2>&1; then + echo "❌ ERROR: nvidia-smi not found. GPU not available!" + exit 1 + fi + echo "✓ GPU detected:" + nvidia-smi --query-gpu=name,memory.total --format=csv,noheader + echo "" + + # Update apt cache + echo "Updating apt cache..." + sudo apt update + + # Install base system packages + echo "Installing system packages..." + sudo apt install -y \ + build-essential \ + python3-dev \ + python3-pip \ + python3-venv \ + git \ + curl \ + wget \ + vim \ + htop \ + tmux \ + net-tools + + echo "" + echo "✓ System packages installed successfully" + + setup/python-env: | + echo "=========================================" + echo " Setting Up Python Environment" + echo "=========================================" + echo "" + + # Upgrade pip + echo "Upgrading pip to 23.3.1..." + sudo pip3 install --upgrade pip==23.3.1 + + # Install core Python packages + echo "Installing core Python packages..." + if [ -f "/workspace/ai/core/requirements.txt" ]; then + sudo pip3 install -r /workspace/ai/core/requirements.txt + else + echo "⚠ Warning: /workspace/ai/core/requirements.txt not found" + fi + + # Install vLLM dependencies + echo "Installing vLLM dependencies..." + if [ -f "/workspace/ai/models/vllm/requirements.txt" ]; then + sudo pip3 install -r /workspace/ai/models/vllm/requirements.txt + else + echo "⚠ Warning: /workspace/ai/models/vllm/requirements.txt not found" + fi + + echo "" + echo "✓ Python environment configured successfully" + + setup/comfyui-base: | + echo "=========================================" + echo " Installing ComfyUI Base" + echo "=========================================" + echo "" + + # Clone ComfyUI if not exists + if [ ! -d "/workspace/ComfyUI" ]; then + echo "Cloning ComfyUI repository..." + git clone https://github.com/comfyanonymous/ComfyUI.git /workspace/ComfyUI + else + echo "ComfyUI already exists, pulling latest changes..." + cd /workspace/ComfyUI && git pull + fi + + # Install ComfyUI dependencies + echo "Installing ComfyUI dependencies..." + sudo pip3 install -r /workspace/ComfyUI/requirements.txt + + # Install additional ComfyUI dependencies + if [ -f "/workspace/ai/models/comfyui/requirements.txt" ]; then + echo "Installing additional ComfyUI dependencies..." + sudo pip3 install -r /workspace/ai/models/comfyui/requirements.txt + fi + + # Create model directory structure + echo "Creating ComfyUI model directories..." + mkdir -p /workspace/ComfyUI/models/{checkpoints,unet,vae,loras,clip,clip_vision,controlnet,ipadapter,embeddings,upscale_models,video_models,animatediff_models,animatediff_motion_lora,audio_models,configs,diffusers,diffusion_models} + + # Create symlink to huggingface cache + echo "Creating symlink to HuggingFace cache..." + ln -sf /workspace/huggingface_cache /workspace/ComfyUI/models/huggingface_cache + + # Make start script executable + if [ -f "/workspace/ai/models/comfyui/start.sh" ]; then + chmod +x /workspace/ai/models/comfyui/start.sh + fi + + echo "" + echo "✓ ComfyUI base installed successfully" + echo " Directory: /workspace/ComfyUI" + echo " Port: 8188" + echo " Start: bash /workspace/ai/models/comfyui/start.sh" + + setup/comfyui-nodes: | + echo "=========================================" + echo " Installing ComfyUI Custom Nodes" + echo "=========================================" + echo "" + + cd /workspace/ComfyUI/custom_nodes + + # ComfyUI Manager + echo "[1/5] Installing ComfyUI-Manager..." + if [ ! -d "ComfyUI-Manager" ]; then + git clone https://github.com/ltdrdata/ComfyUI-Manager.git + fi + [ -f "ComfyUI-Manager/requirements.txt" ] && sudo pip3 install -r ComfyUI-Manager/requirements.txt + + # VideoHelperSuite + echo "[2/5] Installing ComfyUI-VideoHelperSuite..." + if [ ! -d "ComfyUI-VideoHelperSuite" ]; then + git clone https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git + fi + [ -f "ComfyUI-VideoHelperSuite/requirements.txt" ] && sudo pip3 install -r ComfyUI-VideoHelperSuite/requirements.txt + + # AnimateDiff-Evolved + echo "[3/5] Installing ComfyUI-AnimateDiff-Evolved..." + if [ ! -d "ComfyUI-AnimateDiff-Evolved" ]; then + git clone https://github.com/Kosinkadink/ComfyUI-AnimateDiff-Evolved.git + fi + [ -f "ComfyUI-AnimateDiff-Evolved/requirements.txt" ] && sudo pip3 install -r ComfyUI-AnimateDiff-Evolved/requirements.txt + + # IPAdapter Plus + echo "[4/5] Installing ComfyUI_IPAdapter_plus..." + if [ ! -d "ComfyUI_IPAdapter_plus" ]; then + git clone https://github.com/cubiq/ComfyUI_IPAdapter_plus.git + fi + [ -f "ComfyUI_IPAdapter_plus/requirements.txt" ] && sudo pip3 install -r ComfyUI_IPAdapter_plus/requirements.txt + + # Impact-Pack + echo "[5/5] Installing ComfyUI-Impact-Pack..." + if [ ! -d "ComfyUI-Impact-Pack" ]; then + git clone https://github.com/ltdrdata/ComfyUI-Impact-Pack.git + fi + [ -f "ComfyUI-Impact-Pack/requirements.txt" ] && sudo pip3 install -r ComfyUI-Impact-Pack/requirements.txt + + # Fix numpy version for vLLM compatibility + echo "Fixing numpy version..." + sudo pip3 install 'numpy<2.0.0' --force-reinstall + + echo "" + echo "✓ Essential custom nodes installed successfully" + echo " - ComfyUI-Manager: Install/manage custom nodes" + echo " - VideoHelperSuite: Video operations" + echo " - AnimateDiff-Evolved: Video generation" + echo " - IPAdapter_plus: Style transfer" + echo " - Impact-Pack: Face enhancement" + + setup/tailscale: | + echo "=========================================" + echo " Installing Tailscale VPN" + echo "=========================================" + echo "" + + # Check if already installed + if command -v tailscale > /dev/null 2>&1; then + echo "✓ Tailscale already installed" + tailscale version + else + echo "Installing Tailscale..." + curl -fsSL https://tailscale.com/install.sh | sh + echo "✓ Tailscale installed successfully" + fi + + echo "" + echo "To connect Tailscale:" + echo " 1. Start daemon: tailscaled --tun=userspace-networking --socks5-server=localhost:1055 &" + echo " 2. Authenticate: tailscale up --advertise-tags=tag:gpu" + echo " 3. Get IP: tailscale ip -4" + echo "" + echo "Note: Authentication requires manual URL visit" + + setup/supervisor: | + echo "=========================================" + echo " Installing Supervisor" + echo "=========================================" + echo "" + + # Install Supervisor + echo "Installing Supervisor via pip..." + sudo pip3 install supervisor + + # Create logs directory + mkdir -p /workspace/logs + + # Copy supervisor configuration + if [ -f "/workspace/ai/supervisord.conf" ]; then + echo "Deploying supervisord configuration..." + cp /workspace/ai/supervisord.conf /workspace/supervisord.conf + chmod 644 /workspace/supervisord.conf + else + echo "⚠ Warning: supervisord.conf not found at /workspace/ai/supervisord.conf" + fi + + echo "" + echo "✓ Supervisor installed successfully" + echo "" + echo "Configuration: /workspace/supervisord.conf" + echo "Logs: /workspace/logs/" + echo "" + echo "Services configured:" + echo " - comfyui: ComfyUI server (port 8188) - autostart enabled" + echo " - orchestrator: Model orchestrator (port 9000) - autostart disabled" + echo "" + echo "To start: supervisord -c /workspace/supervisord.conf" + echo "To manage: supervisorctl status" + echo "Web UI: http://localhost:9001 (admin/runpod2024)" + + # + # Utility Scripts + # + setup/validate: | + echo "=========================================" + echo " Validating Installation" + echo "=========================================" + echo "" + + # Check Python packages + echo "Installed Python packages:" + pip3 list | grep -E "(fastapi|uvicorn|torch|transformers|diffusers)" || echo " (some packages not found)" + echo "" + + # Check GPU memory + echo "GPU Memory:" + nvidia-smi --query-gpu=name,memory.free,memory.total --format=csv,noheader + echo "" + + # Check cache size + if [ -d "/workspace/huggingface_cache" ]; then + echo "Model cache size:" + du -sh /workspace/huggingface_cache + else + echo "⚠ Warning: /workspace/huggingface_cache not found" + fi + echo "" + + # Verify service scripts + echo "Service scripts:" + for script in /workspace/ai/scripts/*.sh; do + if [ -f "$script" ]; then + echo " ✓ $(basename $script)" + chmod +x "$script" + fi + done + echo "" + + echo "✓ Validation complete" + + setup/cleanup: | + echo "=========================================" + echo " Cleaning Up for Template Creation" + echo "=========================================" + echo "" + + # Remove sensitive files + echo "Removing sensitive files..." + rm -f /workspace/ai/.env + rm -f /root/.ssh/known_hosts + rm -f /root/.bash_history + rm -f /root/.python_history + + # Clear system logs + echo "Clearing system logs..." + sudo find /var/log -type f -name "*.log" -delete 2>/dev/null || true + + # Create template version marker + cat > /workspace/TEMPLATE_VERSION << 'EOF' + RunPod Multi-Modal AI Template (Process-Based Architecture) + Version: 2.0 + + Components: + - Python 3.10 + - Orchestrator (process-based) + - ComfyUI with 20 production workflows + - Supervisor process manager + - Tailscale VPN + + Architecture: No Docker, direct Python execution + + Deployment: + 1. Create .env file with HF_TOKEN + 2. Run model downloads (Ansible) + 3. Start services: arty run services/start + + EOF + + echo "" + echo "✓ Cleanup complete" + echo "" + echo "Next steps in RunPod dashboard:" + echo " 1. Stop all services" + echo " 2. My Pods → Select pod → ⋮ → Save as Template" + echo " 3. Name: multi-modal-ai-v2.0" + echo " 4. Save and test deployment" + + # + # Orchestration Scripts + # + install/minimal: | + echo "=========================================" + echo " Minimal Installation" + echo "=========================================" + echo "" + echo "Installing: System + Python + ComfyUI + Supervisor" + echo "" + + arty run setup/system-packages && \ + arty run setup/python-env && \ + arty run setup/comfyui-base && \ + arty run setup/supervisor + + echo "" + echo "✓ Minimal installation complete" + echo "" + echo "Next steps:" + echo " 1. Download models: Use Ansible playbook" + echo " 2. Link models: arty run models/link-comfyui" + echo " 3. Start services: arty run services/start" + + install/essential: | + echo "=========================================" + echo " Essential Installation" + echo "=========================================" + echo "" + echo "Installing: System + Python + ComfyUI + Nodes + Supervisor" + echo "" + + arty run setup/system-packages && \ + arty run setup/python-env && \ + arty run setup/comfyui-base && \ + arty run setup/comfyui-nodes && \ + arty run setup/supervisor + + echo "" + echo "✓ Essential installation complete" + echo "" + echo "Next steps:" + echo " 1. Download models: ansible-playbook playbook.yml --tags comfyui-essential" + echo " 2. Link models: arty run models/link-comfyui" + echo " 3. Link workflows: arty run workflows/link-comfyui" + echo " 4. Start services: arty run services/start" + + install/full: | + echo "=========================================" + echo " Full Installation" + echo "=========================================" + echo "" + echo "Installing: All components + Tailscale" + echo "" + + arty run setup/system-packages && \ + arty run setup/python-env && \ + arty run setup/comfyui-base && \ + arty run setup/comfyui-nodes && \ + arty run setup/tailscale && \ + arty run setup/supervisor + + echo "" + echo "✓ Full installation complete" + echo "" + echo "Next steps:" + echo " 1. Download models: ansible-playbook playbook.yml --tags comfyui-models-all" + echo " 2. Link models: arty run models/link-comfyui" + echo " 3. Link workflows: arty run workflows/link-comfyui" + echo " 4. Configure Tailscale (see instructions above)" + echo " 5. Start services: arty run services/start" + + # + # Legacy Setup (deprecated - use install/* instead) + # + setup/full-legacy: | cd /workspace/ai cp .env.example .env + echo "⚠ DEPRECATED: Use 'arty run install/full' instead" echo "Edit .env and set HF_TOKEN, then run: ansible-playbook playbook.yml" - setup/essential: | + setup/essential-legacy: | cd /workspace/ai cp .env.example .env + echo "⚠ DEPRECATED: Use 'arty run install/essential' instead" echo "Edit .env and set HF_TOKEN, then run: ansible-playbook playbook.yml --tags comfyui-essential" # Model linking (run after models are downloaded) @@ -150,7 +541,9 @@ scripts: echo "" echo "Access workflows at: /workspace/ComfyUI/workflows/" - # Service management (Supervisor-based) + # + # Service Management (Supervisor-based) + # services/start: bash /workspace/ai/scripts/start-all.sh services/stop: bash /workspace/ai/scripts/stop-all.sh services/restart: bash /workspace/ai/scripts/stop-all.sh && bash /workspace/ai/scripts/start-all.sh @@ -162,27 +555,16 @@ scripts: services/start-orchestrator: supervisorctl -c /workspace/supervisord.conf start orchestrator services/stop-orchestrator: supervisorctl -c /workspace/supervisord.conf stop orchestrator - # Dependency installation - deps/comfyui-nodes: | - pip3 install -r /workspace/ComfyUI/custom_nodes/ComfyUI-Manager/requirements.txt - pip3 install -r /workspace/ComfyUI/custom_nodes/ComfyUI-VideoHelperSuite/requirements.txt - pip3 install 'numpy<2.0.0' --force-reinstall - echo "Custom node dependencies installed" - - # Ansible provisioning shortcuts - ansible/base: cd /workspace/ai && ansible-playbook playbook.yml --tags base,python,dependencies - ansible/supervisor: cd /workspace/ai && ansible-playbook playbook.yml --tags supervisor - ansible/vllm: cd /workspace/ai && ansible-playbook playbook.yml --tags models - ansible/comfyui: cd /workspace/ai && ansible-playbook playbook.yml --tags comfyui,comfyui-essential - ansible/comfyui-all: cd /workspace/ai && ansible-playbook playbook.yml --tags comfyui,comfyui-models-all,comfyui-nodes - ansible/full: cd /workspace/ai && ansible-playbook playbook.yml - - # Health checks + # + # Health Checks + # health/orchestrator: curl http://localhost:9000/health health/comfyui: curl http://localhost:8188 health/vllm: curl http://localhost:8000/health - # System checks + # + # System Checks + # check/gpu: nvidia-smi check/disk: df -h /workspace check/models: du -sh /workspace/huggingface_cache @@ -190,67 +572,163 @@ scripts: # Deployment notes notes: | - RunPod AI Model Orchestrator - Quick Start + RunPod AI Model Orchestrator - Quick Start Guide - 1. Fresh Deployment: - - Clone repositories: arty sync --env prod - - Configure environment: cd /workspace/ai && cp .env.example .env - - Set HF_TOKEN in .env file - - Run Ansible: ansible-playbook playbook.yml --tags comfyui-essential - - Link models: arty run models/link-comfyui - - Link workflows: arty run workflows/link-comfyui - - Install node deps: arty run deps/comfyui-nodes - - Start services: arty run services/start + ======================================== + FRESH DEPLOYMENT + ======================================== - 2. Model Downloads: - - Essential (~80GB): ansible-playbook playbook.yml --tags comfyui-essential - - All models (~137GB): ansible-playbook playbook.yml --tags comfyui-models-all + 1. Clone Git Repositories: + arty sync --env prod - 3. Service Management: - - Start: arty run services/start - - Stop: arty run services/stop - - Restart: arty run services/restart + 2. Run Installation: + # Minimal (System + Python + ComfyUI base + Supervisor) + arty run install/minimal - 4. Health Checks: - - Orchestrator: arty run health/orchestrator - - ComfyUI: arty run health/comfyui - - vLLM: arty run health/vllm + # Essential (+ Custom nodes) + arty run install/essential - 5. Environment Profiles: - - Production (essential only): arty sync --env prod - - Development (all nodes): arty sync --env dev - - Minimal (orchestrator + ComfyUI only): arty sync --env minimal + # Full (+ Tailscale VPN) + arty run install/full - 6. ComfyUI Workflows: - - Link workflows: arty run workflows/link-comfyui - - Location: /workspace/ComfyUI/workflows/ - - 20 production-ready workflows across 6 categories - - Text-to-Image: FLUX Schnell, FLUX Dev, SDXL+Refiner, SD3.5 - - Image-to-Image: IP-Adapter (style, face, composition) - - Image-to-Video: CogVideoX, SVD, SVD-XT - - Text-to-Music: MusicGen (small/medium/large/melody) - - Upscaling: Ultimate SD, Simple, Face-focused - - Advanced: ControlNet, AnimateDiff, Batch processing - - Documentation: README.md, WORKFLOW_STANDARDS.md + 3. Configure Environment: + cd /workspace/ai + cp .env.example .env + # Edit .env and set HF_TOKEN - 7. Important Files: - - Configuration: /workspace/ai/playbook.yml - - Model registry: /workspace/ai/model-orchestrator/models.yaml - - Environment: /workspace/ai/.env - - Services: /workspace/ai/scripts/*.sh + 4. Download Models (using Ansible for now): + # Essential models (~80GB) + ansible-playbook playbook.yml --tags comfyui-essential - 8. Ports: - - Orchestrator: 9000 - - ComfyUI: 8188 - - vLLM: 8000+ + # All models (~137GB) + ansible-playbook playbook.yml --tags comfyui-models-all - 9. Storage: - - Models cache: /workspace/huggingface_cache (~401GB) - - ComfyUI models: /workspace/ComfyUI/models (symlinks to cache) - - ComfyUI workflows: /workspace/ComfyUI/workflows (symlinks to git) - - Project: /workspace/ai + 5. Link Models and Workflows: + arty run models/link-comfyui + arty run workflows/link-comfyui + + 6. Start Services: + arty run services/start + + ======================================== + INSTALLATION SCRIPTS + ======================================== + + System Setup: + - arty run setup/system-packages # Install apt packages, verify GPU + - arty run setup/python-env # Setup Python, pip, core packages + - arty run setup/comfyui-base # Install ComfyUI, create directories + - arty run setup/comfyui-nodes # Install 5 essential custom nodes + - arty run setup/supervisor # Install Supervisor process manager + - arty run setup/tailscale # Install Tailscale VPN (optional) + + Utility Scripts: + - arty run setup/validate # Validate installation + - arty run setup/cleanup # Cleanup for template creation + + Orchestration (run multiple steps): + - arty run install/minimal # Fast minimal setup + - arty run install/essential # Recommended for most users + - arty run install/full # Everything including Tailscale + + ======================================== + SERVICE MANAGEMENT + ======================================== + + Start/Stop: + - arty run services/start # Start all services via Supervisor + - arty run services/stop # Stop all services + - arty run services/restart # Restart all services + - arty run services/status # Check service status + + Individual Services: + - arty run services/restart-comfyui + - arty run services/start-orchestrator + - arty run services/stop-orchestrator + + Logs: + - arty run services/logs # Follow ComfyUI logs + - arty run services/logs-orchestrator + + ======================================== + HEALTH & MONITORING + ======================================== + + Health Checks: + - arty run health/orchestrator # curl http://localhost:9000/health + - arty run health/comfyui # curl http://localhost:8188 + - arty run health/vllm # curl http://localhost:8000/health + + System Checks: + - arty run check/gpu # nvidia-smi + - arty run check/disk # Disk usage + - arty run check/models # Model cache size + - arty run check/cache # List cached models + + ======================================== + COMFYUI WORKFLOWS + ======================================== + + Location: /workspace/ComfyUI/workflows/ + + Link workflows: arty run workflows/link-comfyui + + 20 Production Workflows: + - Text-to-Image: FLUX Schnell, FLUX Dev, SDXL+Refiner, SD3.5 + - Image-to-Image: IP-Adapter (style, face, composition) + - Image-to-Video: CogVideoX, SVD, SVD-XT + - Text-to-Music: MusicGen (small/medium/large/melody) + - Upscaling: Ultimate SD, Simple, Face-focused + - Advanced: ControlNet, AnimateDiff, Batch processing + + Documentation: README.md, WORKFLOW_STANDARDS.md + + ======================================== + ENVIRONMENT PROFILES + ======================================== + + - arty sync --env prod # Production (essential repos only) + - arty sync --env dev # Development (all repos) + - arty sync --env minimal # Minimal (orchestrator + ComfyUI) + + ======================================== + IMPORTANT PATHS + ======================================== + + Configuration: + - /workspace/ai/arty.yml # This file + - /workspace/ai/.env # Environment variables + - /workspace/supervisord.conf # Supervisor config + + Code: + - /workspace/ai/ # Project directory + - /workspace/ComfyUI/ # ComfyUI installation + - /workspace/ai/scripts/*.sh # Service scripts + + Models: + - /workspace/huggingface_cache/ # Model cache (~401GB) + - /workspace/ComfyUI/models/ # Symlinks to cache + + Workflows: + - /workspace/ai/models/comfyui/workflows/ # Source (git) + - /workspace/ComfyUI/workflows/ # Linked workflows + + Logs: + - /workspace/logs/ # Supervisor logs + + ======================================== + PORTS + ======================================== + + - 9000: Model Orchestrator + - 8188: ComfyUI + - 8000+: vLLM servers + - 9001: Supervisor web UI (admin/runpod2024) + + ======================================== + DOCUMENTATION + ======================================== - For detailed documentation, see: - /workspace/ai/README.md - /workspace/ai/CLAUDE.md - /workspace/ai/COMFYUI_MODELS.md