Compare commits
216 Commits
0fa69cae28
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 2189697734 | |||
| ff6c1369ae | |||
| aa2cc5973b | |||
| 3c6904a253 | |||
| 6efb55c59f | |||
| 06b8ec0064 | |||
| e610330b91 | |||
| 55b37894b1 | |||
| 513062623c | |||
| 5af3eeb333 | |||
| e12a8add61 | |||
| 6ce989dd91 | |||
| d74a7cb7cb | |||
| f74457b049 | |||
| 91f6e9bd59 | |||
| 60ca8b08d0 | |||
| 8c4eb8c3f1 | |||
| 67d41c3923 | |||
| 1981b7b256 | |||
| 5096e3ffb5 | |||
| 073711c017 | |||
| 279f703591 | |||
| 64db634ab5 | |||
| 56476f4230 | |||
| 744bbd0190 | |||
| b011c192f8 | |||
| a249dfc941 | |||
| 19376d90a7 | |||
| cf3fcafbae | |||
| 8fe87064f8 | |||
| 44762a063c | |||
| e9a1536f1d | |||
| f2186db78e | |||
| 9439185b3d | |||
| 571431955d | |||
| 0e3150e26c | |||
| f6de19bec1 | |||
| 5770563d9a | |||
| 68d3606cab | |||
| 9ca62724d0 | |||
| 5e7c65a95c | |||
| 1d851bb11c | |||
| 5944767d3f | |||
| e29f77c90b | |||
| 76cf5b5e31 | |||
| 479201d338 | |||
| 1ad99cdb53 | |||
| cc0f55df38 | |||
| 5cfd03f1ef | |||
| 3f812704a2 | |||
| fdd724298a | |||
| a8c2ee1b90 | |||
| 16112e50f6 | |||
| e0a43259d4 | |||
| d351ec7172 | |||
| b94df17845 | |||
| d67667c79f | |||
| 61fd0e9265 | |||
| b9afd68ddd | |||
| 2f53f542e7 | |||
| 14a1fcf4a7 | |||
| 626dab6f65 | |||
| abbd89981e | |||
| f976dc2c74 | |||
| 75c6c77391 | |||
| 6f4ac14032 | |||
| 21efd3b86d | |||
| 8b8a29a47e | |||
| d6fbda38f1 | |||
| 096d565f3d | |||
| d12c868e65 | |||
| 53a7faf2a8 | |||
| c114569309 | |||
| 0df4c63412 | |||
| f1788f88ca | |||
| b6ab524b79 | |||
| c787b40311 | |||
| 85b1831876 | |||
| 5c1e9d092b | |||
| 91ed1aa9e3 | |||
| ac74730ee2 | |||
| 7dd6739f5e | |||
| 3eced21d2a | |||
| 30cc2513cb | |||
| a2455ae9ee | |||
| 230175f33d | |||
| 9eb8c26ff0 | |||
| 105830322a | |||
| 8b4f141d82 | |||
| d7bae9cde5 | |||
| 764cb5d2d7 | |||
| 22afe18957 | |||
| 385b36b062 | |||
| 404eb6ad0e | |||
| 47824ab987 | |||
| 80a81aa12f | |||
| 5cd9237d82 | |||
| 6fab6386d7 | |||
| a9c26861a4 | |||
| 862bbe2740 | |||
| 2bfc189c70 | |||
| c1014cbbde | |||
| 4b4c23d16e | |||
| ce14193222 | |||
| 428c8e274a | |||
| e1faca5d26 | |||
| 4dd608a67d | |||
| 904a70df76 | |||
| f0ab41c8dc | |||
| b73672fba9 | |||
| 664ed04cd9 | |||
| 68485e00b9 | |||
| b0b5c10615 | |||
| 45acd80773 | |||
| a94ce2dcc7 | |||
| e4f46187f1 | |||
| d93fb95f8d | |||
| 1419efac2e | |||
| b5e1f0ce2a | |||
| b21fc9cde6 | |||
| 996d4a2616 | |||
| 0a648caf04 | |||
| 603c56edf6 | |||
| f05e3940aa | |||
| 041958dd93 | |||
| aa311f7b48 | |||
| 2791f5012d | |||
| a94d5fb641 | |||
| 7637deceeb | |||
| d25ad8043f | |||
| 613a44a7fa | |||
| 3817b2ce22 | |||
| 2716c83532 | |||
| c782c93f4d | |||
| 8616b0e231 | |||
| 44d113f846 | |||
| 74844842cf | |||
| e9abaf30f1 | |||
| a9ef505d0f | |||
| a4771f7139 | |||
| 257c4aeaac | |||
| 2f818a403c | |||
| cdc5176d9f | |||
| e2bfffe4c0 | |||
| 6fe7c89aea | |||
| fcd23d4037 | |||
| 897dcb175a | |||
| 9058b1f9dd | |||
| 70cc5d8db0 | |||
| 6a3f0ab831 | |||
| 37e07b1f75 | |||
| 4c4410a2fa | |||
| 6fa8c21cfc | |||
| 002d89e221 | |||
| 81e71fbdcb | |||
| db030a70ef | |||
| cda815b4da | |||
| d3119b8800 | |||
| 0386a6d0cb | |||
| 904c299647 | |||
| eb3b9c3836 | |||
| 18cd87fbd1 | |||
| 79442bd62e | |||
| 71d4e22240 | |||
| c4e26b8a84 | |||
| c19ee7af88 | |||
| cabe2158be | |||
| 9d91ec3236 | |||
| 8d1508c564 | |||
| 2b66caa7e2 | |||
| c85d036e61 | |||
| 99f4708475 | |||
| ba47d34362 | |||
| 55efc6dddf | |||
| 0709dec1d4 | |||
| 7ebda1ae44 | |||
| bd8d1d7783 | |||
| 5361bef85c | |||
| 79b861e687 | |||
| 24ace7dcda | |||
| e84579a22f | |||
| 003caf668b | |||
| 360fd52c59 | |||
| 45f71e646d | |||
| 2213ed3c85 | |||
| 19d82108b0 | |||
| 99b743c412 | |||
| d719bcdfcf | |||
| 6daca7329a | |||
| cc0d93060d | |||
| 16226c7b39 | |||
| d5965ce0c4 | |||
| b370c16eb9 | |||
| 71a30c0e4d | |||
| 6323488591 | |||
| 664da9f4ea | |||
| 2207d60f98 | |||
| c9b01eef68 | |||
| 7edf17551a | |||
| c4426ccc58 | |||
| 205e591fb4 | |||
| 538609da3e | |||
| 34fc456a3b | |||
| 91089d3edc | |||
| 9947fe37bb | |||
| 7f1890517d | |||
| 94080da341 | |||
| 6944e4ebd5 | |||
| d21caa56bc | |||
| 57b706abe6 | |||
| 9a637cc4fc | |||
| 9ee626a78e | |||
| 03a430894d | |||
| 31be1932e7 | |||
| cd9e2eee2e | |||
| 8f1d4bedd2 |
23
.env.example
@@ -15,10 +15,33 @@ HF_TOKEN=hf_your_token_here
|
|||||||
# Get it with: tailscale ip -4
|
# Get it with: tailscale ip -4
|
||||||
# GPU_TAILSCALE_IP=100.100.108.13
|
# GPU_TAILSCALE_IP=100.100.108.13
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# CivitAI API Key (Optional, for NSFW models)
|
||||||
|
# ============================================================================
|
||||||
|
# Get your API key from: https://civitai.com/user/account
|
||||||
|
# Required for downloading NSFW models with artifact_civitai_download.sh
|
||||||
|
# CIVITAI_API_KEY=your_civitai_api_key_here
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Tailscale VPN (Optional, for remote access)
|
||||||
|
# ============================================================================
|
||||||
|
# Get your authkey from: https://login.tailscale.com/admin/settings/keys
|
||||||
|
# TAILSCALE_AUTHKEY=tskey-auth-your_tailscale_authkey_here
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# WebDAV Sync (Optional, for output synchronization)
|
||||||
|
# ============================================================================
|
||||||
|
# WebDAV credentials for syncing ComfyUI outputs to cloud storage
|
||||||
|
# WEBDAV_URL=https://webdav.example.com/
|
||||||
|
# WEBDAV_USERNAME=your_username
|
||||||
|
# WEBDAV_PASSWORD=your_password
|
||||||
|
# WEBDAV_REMOTE_PATH=/path/to/comfyui/outputs
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# Notes
|
# Notes
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# - HF_TOKEN is the only required variable for basic operation
|
# - HF_TOKEN is the only required variable for basic operation
|
||||||
|
# - CIVITAI_API_KEY is required for downloading NSFW models from CivitAI
|
||||||
# - Models will be cached in /workspace/ directories on RunPod
|
# - Models will be cached in /workspace/ directories on RunPod
|
||||||
# - Orchestrator automatically manages model switching
|
# - Orchestrator automatically manages model switching
|
||||||
# - No database credentials needed (stateless architecture)
|
# - No database credentials needed (stateless architecture)
|
||||||
|
|||||||
114
.gitea/workflows/build-docker-image.yml
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
name: Build and Push RunPod Docker Image
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
tags:
|
||||||
|
- 'v*.*.*'
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
tag:
|
||||||
|
description: 'Custom tag for the image'
|
||||||
|
required: false
|
||||||
|
default: 'manual'
|
||||||
|
|
||||||
|
env:
|
||||||
|
REGISTRY: dev.pivoine.art
|
||||||
|
IMAGE_NAME: valknar/runpod-ai-orchestrator
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-and-push:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
with:
|
||||||
|
platforms: linux/amd64
|
||||||
|
|
||||||
|
- name: Log in to Gitea Container Registry
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ${{ env.REGISTRY }}
|
||||||
|
username: ${{ gitea.actor }}
|
||||||
|
password: ${{ secrets.REGISTRY_TOKEN }}
|
||||||
|
|
||||||
|
- name: Extract metadata (tags, labels)
|
||||||
|
id: meta
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||||
|
tags: |
|
||||||
|
# Tag as 'latest' for main branch
|
||||||
|
type=raw,value=latest,enable={{is_default_branch}}
|
||||||
|
# Tag with branch name
|
||||||
|
type=ref,event=branch
|
||||||
|
# Tag with PR number
|
||||||
|
type=ref,event=pr
|
||||||
|
# Tag with git tag (semver)
|
||||||
|
type=semver,pattern={{version}}
|
||||||
|
type=semver,pattern={{major}}.{{minor}}
|
||||||
|
type=semver,pattern={{major}}
|
||||||
|
# Tag with commit SHA
|
||||||
|
type=sha,prefix={{branch}}-
|
||||||
|
# Custom tag from workflow_dispatch
|
||||||
|
type=raw,value=${{ gitea.event.inputs.tag }},enable=${{ gitea.event_name == 'workflow_dispatch' }}
|
||||||
|
labels: |
|
||||||
|
org.opencontainers.image.title=RunPod AI Orchestrator
|
||||||
|
org.opencontainers.image.description=Minimal Docker template for RunPod deployment with ComfyUI + vLLM orchestration, Supervisor process management, and Tailscale VPN integration
|
||||||
|
org.opencontainers.image.vendor=valknar
|
||||||
|
org.opencontainers.image.source=https://dev.pivoine.art/${{ gitea.repository }}
|
||||||
|
|
||||||
|
- name: Build and push Docker image
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
file: ./Dockerfile
|
||||||
|
platforms: linux/amd64
|
||||||
|
push: ${{ gitea.event_name != 'pull_request' }}
|
||||||
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache
|
||||||
|
cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache,mode=max
|
||||||
|
|
||||||
|
- name: Generate image digest
|
||||||
|
if: gitea.event_name != 'pull_request'
|
||||||
|
run: |
|
||||||
|
echo "### Docker Image Published :rocket:" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "**Registry:** \`${{ env.REGISTRY }}\`" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "**Image:** \`${{ env.IMAGE_NAME }}\`" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "**Tags:**" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "\`\`\`" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "${{ steps.meta.outputs.tags }}" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "\`\`\`" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "**Pull command:**" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "\`\`\`bash" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "\`\`\`" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "**Use in RunPod template:**" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "\`\`\`" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "Container Image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "\`\`\`" >> $GITEA_STEP_SUMMARY
|
||||||
|
|
||||||
|
- name: PR Comment - Image built but not pushed
|
||||||
|
if: gitea.event_name == 'pull_request'
|
||||||
|
run: |
|
||||||
|
echo "### Docker Image Built Successfully :white_check_mark:" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "Image was built successfully but **not pushed** (PR builds are not published)." >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "**Would be tagged as:**" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "\`\`\`" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "${{ steps.meta.outputs.tags }}" >> $GITEA_STEP_SUMMARY
|
||||||
|
echo "\`\`\`" >> $GITEA_STEP_SUMMARY
|
||||||
7
.gitignore
vendored
@@ -42,9 +42,14 @@ env/
|
|||||||
|
|
||||||
# Logs
|
# Logs
|
||||||
*.log
|
*.log
|
||||||
logs/
|
logs/*
|
||||||
|
!logs/.gitkeep
|
||||||
*.out
|
*.out
|
||||||
|
|
||||||
|
# Supervisord runtime files
|
||||||
|
supervisord.pid
|
||||||
|
supervisor.sock
|
||||||
|
|
||||||
# OS files
|
# OS files
|
||||||
.DS_Store
|
.DS_Store
|
||||||
Thumbs.db
|
Thumbs.db
|
||||||
|
|||||||
570
CLAUDE.md
Normal file
@@ -0,0 +1,570 @@
|
|||||||
|
# CLAUDE.md
|
||||||
|
|
||||||
|
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This is a lightweight AI model serving infrastructure for RunPod GPU instances (specifically RTX 4090 with 24GB VRAM). It provides OpenAI-compatible API endpoints for text, image, and audio generation via dedicated vLLM servers and ComfyUI.
|
||||||
|
|
||||||
|
**Key Design Philosophy:**
|
||||||
|
- **Direct vLLM access** - Dedicated FastAPI servers for each text model (no proxy layer)
|
||||||
|
- **Process-based architecture** - Managed by Supervisor for reliability
|
||||||
|
- **Simple service management** - Start/stop models independently as needed
|
||||||
|
- **OpenAI-compatible APIs** - Works seamlessly with LiteLLM proxy and other AI tools
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### Core Components
|
||||||
|
|
||||||
|
1. **vLLM Servers** (`vllm/server_qwen.py`, `vllm/server_llama.py`)
|
||||||
|
- Dedicated FastAPI servers for each text model
|
||||||
|
- Qwen 2.5 7B on port 8000, Llama 3.1 8B on port 8001
|
||||||
|
- Direct AsyncLLMEngine access for optimal streaming performance
|
||||||
|
- max_model_len=20000 optimized for 24GB VRAM
|
||||||
|
|
||||||
|
2. **ComfyUI** (`comfyui/`)
|
||||||
|
- Image/video/audio generation server on port 8188
|
||||||
|
- FLUX, SDXL, CogVideoX, MusicGen models
|
||||||
|
- Advanced workflow system with custom nodes
|
||||||
|
|
||||||
|
3. **Supervisor** (`supervisord.conf`)
|
||||||
|
- Process manager for all AI services
|
||||||
|
- Auto-restart, centralized logging
|
||||||
|
- Web interface on port 9001
|
||||||
|
|
||||||
|
4. **Ansible Provisioning** (`playbook.yml`)
|
||||||
|
- Complete infrastructure-as-code setup
|
||||||
|
- Installs dependencies, downloads models, configures services
|
||||||
|
- Supports selective installation via tags
|
||||||
|
|
||||||
|
## Common Commands
|
||||||
|
|
||||||
|
### Repository Management with Arty
|
||||||
|
|
||||||
|
This project uses Arty for repository and deployment management. See `arty.yml` for full configuration.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clone all repositories (fresh deployment)
|
||||||
|
arty sync --env prod # Production: Essential nodes only
|
||||||
|
arty sync --env dev # Development: All nodes including optional
|
||||||
|
arty sync --env minimal # Minimal: Just vLLM + ComfyUI base
|
||||||
|
|
||||||
|
# Run deployment scripts
|
||||||
|
arty run setup/full # Show setup instructions
|
||||||
|
arty run models/link-comfyui # Link downloaded models to ComfyUI
|
||||||
|
arty run deps/comfyui-nodes # Install custom node dependencies
|
||||||
|
arty run services/start # Start supervisor
|
||||||
|
arty run services/stop # Stop all services
|
||||||
|
|
||||||
|
# Health checks
|
||||||
|
arty run health/comfyui # Check ComfyUI
|
||||||
|
arty run check/gpu # nvidia-smi
|
||||||
|
arty run check/models # Show cache size
|
||||||
|
```
|
||||||
|
|
||||||
|
### Initial Setup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Clone repositories with Arty (fresh RunPod instance)
|
||||||
|
arty sync --env prod
|
||||||
|
|
||||||
|
# 2. Configure environment
|
||||||
|
cd /workspace/ai
|
||||||
|
cp .env.example .env
|
||||||
|
# Edit .env and set HF_TOKEN=your_huggingface_token
|
||||||
|
|
||||||
|
# 3. Full deployment with Ansible
|
||||||
|
ansible-playbook playbook.yml
|
||||||
|
|
||||||
|
# 4. Essential ComfyUI setup (faster, ~80GB instead of ~137GB)
|
||||||
|
ansible-playbook playbook.yml --tags comfyui-essential
|
||||||
|
|
||||||
|
# 5. Link models to ComfyUI
|
||||||
|
arty run models/link-comfyui
|
||||||
|
|
||||||
|
# 6. Install custom node dependencies
|
||||||
|
arty run deps/comfyui-nodes
|
||||||
|
|
||||||
|
# 7. Selective installation (base system + Python + vLLM models only)
|
||||||
|
ansible-playbook playbook.yml --tags base,python,dependencies
|
||||||
|
```
|
||||||
|
|
||||||
|
### Service Management
|
||||||
|
|
||||||
|
This project uses **Supervisor** for process management, providing auto-restart, centralized logging, and easy service control.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start all services (with Supervisor)
|
||||||
|
bash scripts/start-all.sh # Starts supervisor daemon + services
|
||||||
|
arty run services/start # Same via arty
|
||||||
|
|
||||||
|
# Stop all services
|
||||||
|
bash scripts/stop-all.sh # Stops all services + supervisor
|
||||||
|
arty run services/stop # Same via arty
|
||||||
|
|
||||||
|
# Check service status
|
||||||
|
bash scripts/status.sh # Show all service status
|
||||||
|
arty run services/status # Same via arty
|
||||||
|
supervisorctl status # Direct supervisor command
|
||||||
|
|
||||||
|
# Individual service control
|
||||||
|
supervisorctl start vllm-qwen # Start Qwen vLLM server
|
||||||
|
supervisorctl start vllm-llama # Start Llama vLLM server
|
||||||
|
supervisorctl restart comfyui # Restart ComfyUI
|
||||||
|
supervisorctl stop vllm-qwen # Stop Qwen vLLM server
|
||||||
|
arty run services/restart-comfyui # Restart ComfyUI via arty
|
||||||
|
|
||||||
|
# View logs
|
||||||
|
supervisorctl tail -f comfyui # Follow ComfyUI logs
|
||||||
|
supervisorctl tail -f vllm-qwen # Follow Qwen vLLM logs
|
||||||
|
supervisorctl tail -f vllm-llama # Follow Llama vLLM logs
|
||||||
|
arty run services/logs # Follow ComfyUI logs via arty
|
||||||
|
|
||||||
|
# Web interface
|
||||||
|
# Access at http://localhost:9001 (username: admin, password: runpod2024)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Supervisor Configuration:**
|
||||||
|
- Config file: `/workspace/supervisord.conf`
|
||||||
|
- Log directory: `/workspace/logs/`
|
||||||
|
- PID file: `/workspace/supervisord.pid`
|
||||||
|
- Socket: `/workspace/supervisor.sock`
|
||||||
|
|
||||||
|
**Services managed:**
|
||||||
|
- `comfyui` - ComfyUI server (port 8188, autostart enabled)
|
||||||
|
- `vllm-qwen` - Qwen 2.5 7B vLLM server (port 8000, autostart disabled)
|
||||||
|
- `vllm-llama` - Llama 3.1 8B vLLM server (port 8001, autostart disabled)
|
||||||
|
- `webdav-sync` - WebDAV sync service for ComfyUI outputs
|
||||||
|
|
||||||
|
### GPU Memory Management and Mode Switching
|
||||||
|
|
||||||
|
**VRAM Constraints (RTX 4090 - 24GB total):**
|
||||||
|
|
||||||
|
The GPU has limited memory, which requires manual service switching:
|
||||||
|
|
||||||
|
| Service | Model | VRAM Usage | Compatible With |
|
||||||
|
|---------|-------|------------|-----------------|
|
||||||
|
| ComfyUI | FLUX Schnell FP16 | ~23GB | None (uses all VRAM) |
|
||||||
|
| ComfyUI | SDXL Base | ~12GB | Small vLLM models |
|
||||||
|
| vLLM | Qwen 2.5 7B | ~14GB | None (conflicts with ComfyUI) |
|
||||||
|
| vLLM | Llama 3.1 8B | ~17GB | None (conflicts with ComfyUI) |
|
||||||
|
|
||||||
|
**Mode Switching Workflow:**
|
||||||
|
|
||||||
|
Since ComfyUI and vLLM models cannot run simultaneously (they exceed 24GB combined), you must manually switch modes:
|
||||||
|
|
||||||
|
**Switch to Text Generation Mode (vLLM):**
|
||||||
|
```bash
|
||||||
|
# 1. Stop ComfyUI
|
||||||
|
supervisorctl stop comfyui
|
||||||
|
|
||||||
|
# 2. Start desired vLLM server
|
||||||
|
supervisorctl start vllm-qwen # or vllm-llama
|
||||||
|
|
||||||
|
# 3. Verify
|
||||||
|
supervisorctl status
|
||||||
|
nvidia-smi # Check VRAM usage
|
||||||
|
```
|
||||||
|
|
||||||
|
**Switch to Image/Video/Audio Generation Mode (ComfyUI):**
|
||||||
|
```bash
|
||||||
|
# 1. Stop vLLM servers
|
||||||
|
supervisorctl stop vllm-qwen vllm-llama
|
||||||
|
|
||||||
|
# 2. Start ComfyUI
|
||||||
|
supervisorctl start comfyui
|
||||||
|
|
||||||
|
# 3. Verify
|
||||||
|
supervisorctl status
|
||||||
|
nvidia-smi # Check VRAM usage
|
||||||
|
```
|
||||||
|
|
||||||
|
**Access via Supervisor Web UI:**
|
||||||
|
|
||||||
|
You can also switch modes using the Supervisor web interface:
|
||||||
|
- URL: `https://supervisor.ai.pivoine.art` (via VPS proxy) or `http://100.114.60.40:9001` (direct Tailscale)
|
||||||
|
- Username: `admin`
|
||||||
|
- Password: `runpod2024`
|
||||||
|
- Click "Start" or "Stop" buttons for each service
|
||||||
|
|
||||||
|
**Integration with LiteLLM:**
|
||||||
|
|
||||||
|
The vLLM servers integrate with LiteLLM on the VPS for unified API access:
|
||||||
|
- vLLM models (qwen-2.5-7b, llama-3.1-8b) available via direct connections
|
||||||
|
- Requests route directly to vLLM servers (ports 8000, 8001)
|
||||||
|
- Environment variables `GPU_VLLM_QWEN_URL` and `GPU_VLLM_LLAMA_URL` configure connections
|
||||||
|
- LiteLLM config uses `os.environ/GPU_VLLM_*_URL` syntax for dynamic URLs
|
||||||
|
|
||||||
|
### Testing
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Health check (Qwen vLLM server)
|
||||||
|
curl http://localhost:8000/health
|
||||||
|
|
||||||
|
# List available models
|
||||||
|
curl http://localhost:8000/v1/models
|
||||||
|
|
||||||
|
# Test text generation (streaming)
|
||||||
|
curl -s -N -X POST http://localhost:8000/v1/chat/completions \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{
|
||||||
|
"model": "qwen-2.5-7b",
|
||||||
|
"messages": [{"role": "user", "content": "Count to 5"}],
|
||||||
|
"max_tokens": 50,
|
||||||
|
"stream": true
|
||||||
|
}'
|
||||||
|
|
||||||
|
# Test ComfyUI (image generation)
|
||||||
|
curl -X POST http://localhost:8188/prompt \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{
|
||||||
|
"size": "1024x1024"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Ansible Tags Reference
|
||||||
|
|
||||||
|
**System Setup:**
|
||||||
|
- `base` - Base system packages
|
||||||
|
- `python` - Python environment setup
|
||||||
|
- `dependencies` - Install Python packages
|
||||||
|
|
||||||
|
**Model Installation:**
|
||||||
|
- `models` - Download vLLM/Flux/MusicGen models (legacy)
|
||||||
|
- `comfyui` - Install ComfyUI base
|
||||||
|
- `comfyui-essential` - Quick setup (ComfyUI + essential models only, ~80GB)
|
||||||
|
- `comfyui-models-image` - Image generation models (FLUX, SDXL, SD3.5)
|
||||||
|
- `comfyui-models-video` - Video generation models (CogVideoX, SVD)
|
||||||
|
- `comfyui-models-audio` - Audio generation models (MusicGen variants)
|
||||||
|
- `comfyui-models-support` - CLIP, IP-Adapter, ControlNet models
|
||||||
|
- `comfyui-models-all` - All ComfyUI models (~137GB)
|
||||||
|
- `comfyui-nodes` - Install essential custom nodes
|
||||||
|
|
||||||
|
**Infrastructure:**
|
||||||
|
- `tailscale` - Install Tailscale VPN client
|
||||||
|
- `supervisor` - Install and configure Supervisor process manager
|
||||||
|
- `systemd` - Configure systemd services (use `never` - not for RunPod)
|
||||||
|
- `validate` - Health checks (use `never` - run explicitly)
|
||||||
|
|
||||||
|
### Adding New vLLM Models
|
||||||
|
|
||||||
|
1. **Create dedicated server script** (e.g., `vllm/server_mistral.py`):
|
||||||
|
|
||||||
|
```python
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
from vllm import AsyncLLMEngine, AsyncEngineArgs
|
||||||
|
# ... (copy structure from server_qwen.py)
|
||||||
|
|
||||||
|
model_name: str = "mistralai/Mistral-7B-Instruct-v0.3"
|
||||||
|
port = 8002 # Choose unique port
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Add supervisor configuration** (`supervisord.conf`):
|
||||||
|
|
||||||
|
```ini
|
||||||
|
[program:vllm-mistral]
|
||||||
|
command=vllm/venv/bin/python vllm/server_mistral.py
|
||||||
|
directory=.
|
||||||
|
autostart=false
|
||||||
|
autorestart=true
|
||||||
|
environment=HF_HOME="../huggingface_cache",HF_TOKEN="%(ENV_HF_TOKEN)s"
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Download model** (handled by Ansible playbook or manually via HuggingFace CLI)
|
||||||
|
|
||||||
|
4. **Add to LiteLLM config** (on VPS):
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- model_name: mistral-7b
|
||||||
|
litellm_params:
|
||||||
|
model: hosted_vllm/openai/mistral-7b
|
||||||
|
api_base: os.environ/GPU_VLLM_MISTRAL_URL
|
||||||
|
api_key: "EMPTY"
|
||||||
|
```
|
||||||
|
|
||||||
|
5. **Start the service:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
supervisorctl start vllm-mistral
|
||||||
|
```
|
||||||
|
|
||||||
|
### Downloading NSFW Models from CivitAI
|
||||||
|
|
||||||
|
The project includes dedicated tooling for downloading NSFW models from CivitAI, separate from the HuggingFace downloader.
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Configuration: `models_civitai.yaml` - Defines available CivitAI models
|
||||||
|
- Downloader: `artifact_civitai_download.sh` - Beautiful CLI tool for downloading models
|
||||||
|
|
||||||
|
**Requirements:**
|
||||||
|
- CivitAI API key (set in `.env` file as `CIVITAI_API_KEY`)
|
||||||
|
- Get your key from: https://civitai.com/user/account
|
||||||
|
|
||||||
|
**Available Models (Essential):**
|
||||||
|
1. **LUSTIFY v7.0 GGWP** (6.31GB) - Photoreal NSFW checkpoint for women
|
||||||
|
2. **Pony Diffusion V6 XL** (6.5GB) - Anime/furry NSFW with balanced content
|
||||||
|
3. **RealVisXL V5.0** (6.8GB) - Photorealistic NSFW model
|
||||||
|
|
||||||
|
**Usage:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Download all essential NSFW models (19.61GB)
|
||||||
|
artifact_civitai_download.sh
|
||||||
|
|
||||||
|
# Download only (no symlinks)
|
||||||
|
artifact_civitai_download.sh download
|
||||||
|
|
||||||
|
# Create symlinks only (models already downloaded)
|
||||||
|
artifact_civitai_download.sh link
|
||||||
|
|
||||||
|
# Custom configuration file
|
||||||
|
artifact_civitai_download.sh -c /path/to/models_civitai.yaml
|
||||||
|
|
||||||
|
# Help
|
||||||
|
artifact_civitai_download.sh --help
|
||||||
|
```
|
||||||
|
|
||||||
|
**Directory Structure:**
|
||||||
|
- Models downloaded to: `/workspace/models/civitai/` (cache)
|
||||||
|
- Symlinks created in: `/workspace/ComfyUI/models/checkpoints/`
|
||||||
|
|
||||||
|
**Features:**
|
||||||
|
- Beautiful purple/magenta themed CLI (vs cyan for HuggingFace)
|
||||||
|
- Automatic retry with exponential backoff for rate limiting
|
||||||
|
- Resume interrupted downloads
|
||||||
|
- Progress bars and status indicators
|
||||||
|
- Parallel download support (respects rate limits)
|
||||||
|
|
||||||
|
**Configuration Example (`models_civitai.yaml`):**
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_categories:
|
||||||
|
nsfw_checkpoints:
|
||||||
|
- name: lustify-v7-ggwp
|
||||||
|
version_id: "1094291"
|
||||||
|
model_id: "573152"
|
||||||
|
description: "LUSTIFY v7.0 GGWP - Photoreal NSFW checkpoint"
|
||||||
|
size_gb: 6.31
|
||||||
|
essential: true
|
||||||
|
type: checkpoints
|
||||||
|
```
|
||||||
|
|
||||||
|
**Installation Profiles:**
|
||||||
|
- `essential`: 3 models (LUSTIFY, Pony, RealVisXL) - 19.61GB, 30-60 min
|
||||||
|
- `complete`: All 6 models - 39.21GB, 1-2 hours
|
||||||
|
- `realistic_only`: LUSTIFY + RealVisXL - 13.11GB, 20-40 min
|
||||||
|
- `anime_only`: Pony + WAI-NSFW - 13.0GB, 20-40 min
|
||||||
|
|
||||||
|
**Important Notes:**
|
||||||
|
- All models are SDXL-based (require ~12GB VRAM)
|
||||||
|
- Cannot run simultaneously with FLUX models (both use ~23GB)
|
||||||
|
- Can run with smaller models like standard SDXL (~12GB total)
|
||||||
|
- Rate limiting: 5 second delay between downloads (configurable)
|
||||||
|
|
||||||
|
## Key Implementation Details
|
||||||
|
|
||||||
|
### Direct vLLM Server Architecture
|
||||||
|
|
||||||
|
Each vLLM model runs as a dedicated FastAPI server:
|
||||||
|
- Independent process management via Supervisor
|
||||||
|
- Direct AsyncLLMEngine access for optimal performance
|
||||||
|
- OpenAI-compatible `/v1/chat/completions` endpoint
|
||||||
|
- Streaming support with proper delta formatting
|
||||||
|
|
||||||
|
See `vllm/server_qwen.py` for reference implementation.
|
||||||
|
|
||||||
|
Set in `.env` file:
|
||||||
|
- `HF_TOKEN` - **Required** - HuggingFace API token for model downloads
|
||||||
|
- `GPU_TAILSCALE_IP` - Optional - Tailscale IP for VPN access
|
||||||
|
|
||||||
|
Models are cached in:
|
||||||
|
- `/workspace/huggingface_cache` - HuggingFace models
|
||||||
|
- `/workspace/models` - Other model files
|
||||||
|
- `/workspace/ComfyUI/models` - ComfyUI model directory structure
|
||||||
|
|
||||||
|
### Integration with LiteLLM
|
||||||
|
|
||||||
|
For unified API management through LiteLLM proxy:
|
||||||
|
|
||||||
|
**LiteLLM configuration (`litellm-config.yaml` on VPS):**
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: qwen-2.5-7b
|
||||||
|
litellm_params:
|
||||||
|
model: hosted_vllm/openai/qwen-2.5-7b # Use hosted_vllm prefix!
|
||||||
|
api_base: http://100.121.199.88:9000/v1 # Tailscale VPN IP
|
||||||
|
api_key: dummy
|
||||||
|
stream: true
|
||||||
|
timeout: 600
|
||||||
|
```
|
||||||
|
|
||||||
|
**Critical:** Use `hosted_vllm/openai/` prefix for vLLM models to enable proper streaming support. Wrong prefix causes empty delta chunks.
|
||||||
|
|
||||||
|
### ComfyUI Installation
|
||||||
|
|
||||||
|
ComfyUI provides advanced image/video/audio generation capabilities:
|
||||||
|
|
||||||
|
**Directory structure created:**
|
||||||
|
```
|
||||||
|
/workspace/ComfyUI/
|
||||||
|
├── models/
|
||||||
|
│ ├── checkpoints/ # FLUX, SDXL, SD3 models
|
||||||
|
│ ├── clip_vision/ # CLIP vision models
|
||||||
|
│ ├── video_models/ # CogVideoX, SVD
|
||||||
|
│ ├── audio_models/ # MusicGen
|
||||||
|
│ └── custom_nodes/ # Extension nodes
|
||||||
|
```
|
||||||
|
|
||||||
|
**Essential custom nodes installed:**
|
||||||
|
- ComfyUI-Manager - Model/node management GUI
|
||||||
|
- ComfyUI-VideoHelperSuite - Video operations
|
||||||
|
- ComfyUI-AnimateDiff-Evolved - Video generation
|
||||||
|
- ComfyUI_IPAdapter_plus - Style transfer
|
||||||
|
- ComfyUI-Impact-Pack - Auto face enhancement
|
||||||
|
- comfyui-sound-lab - Audio generation
|
||||||
|
|
||||||
|
**VRAM requirements for 24GB GPU:**
|
||||||
|
- FLUX Schnell FP16: 23GB (leaves 1GB)
|
||||||
|
- SDXL Base: 12GB
|
||||||
|
- CogVideoX-5B: 12GB (with optimizations)
|
||||||
|
- MusicGen Medium: 8GB
|
||||||
|
|
||||||
|
See `COMFYUI_MODELS.md` for detailed model catalog and usage examples.
|
||||||
|
|
||||||
|
## Deployment Workflow
|
||||||
|
|
||||||
|
### RunPod Deployment (Current Setup)
|
||||||
|
|
||||||
|
1. **Clone repository:**
|
||||||
|
```bash
|
||||||
|
cd /workspace
|
||||||
|
git clone <repo-url> ai
|
||||||
|
cd ai
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Configure environment:**
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
# Edit .env, set HF_TOKEN
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Run Ansible provisioning:**
|
||||||
|
```bash
|
||||||
|
ansible-playbook playbook.yml
|
||||||
|
# Or selective: --tags base,python,comfyui-essential
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Start services:**
|
||||||
|
```bash
|
||||||
|
bash scripts/start-all.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
5. **Verify:**
|
||||||
|
```bash
|
||||||
|
curl http://localhost:9000/health
|
||||||
|
```
|
||||||
|
|
||||||
|
### Tailscale VPN Integration
|
||||||
|
|
||||||
|
To connect RunPod GPU to VPS infrastructure:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# On RunPod instance
|
||||||
|
curl -fsSL https://tailscale.com/install.sh | sh
|
||||||
|
tailscaled --tun=userspace-networking --socks5-server=localhost:1055 &
|
||||||
|
tailscale up --advertise-tags=tag:gpu
|
||||||
|
tailscale ip -4 # Get IP for LiteLLM config
|
||||||
|
```
|
||||||
|
|
||||||
|
Benefits: Secure tunnel, no public exposure, low latency.
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
runpod/
|
||||||
|
├── model-orchestrator/
|
||||||
|
│ ├── orchestrator_subprocess.py # Main orchestrator (USE THIS)
|
||||||
|
│ ├── orchestrator.py # Docker-based version (legacy)
|
||||||
|
│ ├── models.yaml # Model registry
|
||||||
|
│ └── requirements.txt
|
||||||
|
├── models/
|
||||||
|
│ ├── vllm/
|
||||||
|
│ │ ├── server.py # vLLM text generation service
|
||||||
|
│ │ └── requirements.txt
|
||||||
|
│ └── comfyui/
|
||||||
|
│ ├── start.sh # ComfyUI startup script
|
||||||
|
│ └── requirements.txt
|
||||||
|
├── scripts/
|
||||||
|
│ ├── start-all.sh # Start all services with Supervisor
|
||||||
|
│ ├── stop-all.sh # Stop all services
|
||||||
|
│ └── status.sh # Check service status
|
||||||
|
├── supervisord.conf # Supervisor process manager config
|
||||||
|
├── arty.yml # Arty repository manager config
|
||||||
|
├── playbook.yml # Ansible provisioning playbook
|
||||||
|
├── inventory.yml # Ansible inventory (localhost)
|
||||||
|
├── ansible.cfg # Ansible configuration
|
||||||
|
├── .env.example # Environment variables template
|
||||||
|
├── CLAUDE.md # This file
|
||||||
|
├── COMFYUI_MODELS.md # ComfyUI models catalog
|
||||||
|
├── MODELS_LINKED.md # Model linkage documentation
|
||||||
|
├── comfyui_models.yaml # ComfyUI model configuration
|
||||||
|
└── README.md # User documentation
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Model fails to start
|
||||||
|
- Check VRAM: `nvidia-smi`
|
||||||
|
- Verify model weights downloaded: `ls -lh /workspace/huggingface_cache`
|
||||||
|
- Check port conflicts: `lsof -i :9000`
|
||||||
|
- Test model directly: `python3 models/vllm/server.py`
|
||||||
|
|
||||||
|
### Streaming returns empty deltas
|
||||||
|
- Use correct LiteLLM model prefix: `hosted_vllm/openai/model-name`
|
||||||
|
- Set `stream: true` in LiteLLM config
|
||||||
|
- Verify orchestrator proxies streaming correctly
|
||||||
|
|
||||||
|
### HuggingFace download errors
|
||||||
|
- Check token: `echo $HF_TOKEN`
|
||||||
|
- Set in .env: `HF_TOKEN=your_token_here`
|
||||||
|
- Re-run Ansible: `ansible-playbook playbook.yml --tags dependencies`
|
||||||
|
|
||||||
|
### Out of storage space
|
||||||
|
- Check disk usage: `df -h /workspace`
|
||||||
|
- Use essential tags: `--tags comfyui-essential` (~80GB vs ~137GB)
|
||||||
|
- Clear cache: `rm -rf /workspace/huggingface_cache`
|
||||||
|
|
||||||
|
### Supervisor not running
|
||||||
|
- Check status: `bash scripts/status.sh`
|
||||||
|
- View logs: `cat /workspace/logs/supervisord.log`
|
||||||
|
- Start supervisor: `bash scripts/start-all.sh`
|
||||||
|
- Check for stale PID: `rm -f /workspace/supervisord.pid` then restart
|
||||||
|
|
||||||
|
### Service won't start
|
||||||
|
- Check supervisor status: `supervisorctl status`
|
||||||
|
- View service logs: `supervisorctl tail -f comfyui` or `supervisorctl tail -f orchestrator`
|
||||||
|
- Check error logs: `cat /workspace/logs/comfyui.err.log`
|
||||||
|
- Restart service: `supervisorctl restart comfyui`
|
||||||
|
- Check if port is in use: `ss -tulpn | grep :8188`
|
||||||
|
|
||||||
|
### Orchestrator not responding
|
||||||
|
- Check supervisor status: `supervisorctl status orchestrator`
|
||||||
|
- View logs: `supervisorctl tail -f orchestrator` or `cat /workspace/logs/orchestrator.err.log`
|
||||||
|
- Restart: `supervisorctl restart orchestrator`
|
||||||
|
- Manual start for debugging: `cd /workspace/ai && python3 model-orchestrator/orchestrator_subprocess.py`
|
||||||
|
|
||||||
|
## Performance Notes
|
||||||
|
|
||||||
|
- **Model switching time:** 30-120 seconds (depends on model size)
|
||||||
|
- **Text generation:** ~20-40 tokens/second (Qwen 2.5 7B on RTX 4090)
|
||||||
|
- **Image generation:** 4-5 seconds per image (FLUX Schnell)
|
||||||
|
- **Music generation:** 60-90 seconds for 30s audio (MusicGen Medium)
|
||||||
|
|
||||||
|
## Important Conventions
|
||||||
|
|
||||||
|
- **Always use `orchestrator_subprocess.py`** - Not the Docker version
|
||||||
|
- **Sequential loading only** - One model active at a time for 24GB VRAM
|
||||||
|
- **Models downloaded by Ansible** - Use playbook tags, not manual downloads
|
||||||
|
- **Services run as processes** - Not systemd (RunPod containers don't support it)
|
||||||
|
- **Environment managed via .env** - Required: HF_TOKEN
|
||||||
|
- **Port 9000 for orchestrator** - Model services use 8000+
|
||||||
26
Dockerfile
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# RunPod AI Orchestrator Template
|
||||||
|
# Minimal Docker image for ComfyUI + vLLM orchestration
|
||||||
|
# Models and application code live on network volume at /workspace
|
||||||
|
|
||||||
|
FROM runpod/pytorch:2.4.0-py3.11-cuda12.4.1-devel-ubuntu22.04
|
||||||
|
|
||||||
|
# Install Supervisor for process management
|
||||||
|
RUN pip install --no-cache-dir supervisor
|
||||||
|
|
||||||
|
# Install Tailscale for VPN connectivity
|
||||||
|
RUN curl -fsSL https://tailscale.com/install.sh | sh
|
||||||
|
|
||||||
|
# Install additional system utilities
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
wget \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Copy the startup script
|
||||||
|
COPY start.sh /start.sh
|
||||||
|
RUN chmod +x /start.sh
|
||||||
|
|
||||||
|
# Set working directory to /workspace (network volume mount point)
|
||||||
|
WORKDIR /workspace
|
||||||
|
|
||||||
|
# RunPod calls /start.sh by default
|
||||||
|
CMD ["/start.sh"]
|
||||||
549
README.md
@@ -1,173 +1,468 @@
|
|||||||
# RunPod Multi-Modal AI Stack
|
# AI Model Orchestrator for RunPod
|
||||||
|
|
||||||
**Cost-optimized GPU deployment for text, image, and music generation on RunPod RTX 4090.**
|
A lightweight, process-based AI model orchestrator designed for single-GPU environments. Automatically switches between text, image, and audio generation models based on API requests, maximizing GPU utilization on resource-constrained hardware.
|
||||||
|
|
||||||
This repository contains everything needed to deploy and manage a multi-modal AI infrastructure on RunPod, featuring intelligent model orchestration that automatically switches between models based on request type.
|
## Overview
|
||||||
|
|
||||||
## Features
|
This orchestrator solves a common problem: **running multiple large AI models on a single GPU** by:
|
||||||
|
- Loading models **sequentially** (one at a time) to fit within GPU memory constraints
|
||||||
|
- **Automatically switching** models based on incoming API requests
|
||||||
|
- Providing **OpenAI-compatible endpoints** for seamless integration
|
||||||
|
- Supporting streaming responses for real-time text generation
|
||||||
|
|
||||||
- **Text Generation**: Qwen 2.5 7B Instruct via vLLM (~50 tokens/sec)
|
**Perfect for:**
|
||||||
- **Image Generation**: Flux.1 Schnell (~4-5 seconds per image)
|
- RunPod RTX 4090 instances (24GB VRAM)
|
||||||
- **Music Generation**: MusicGen Medium (30 seconds of audio in 60-90 seconds)
|
- Cost-effective GPU servers
|
||||||
- **Automatic Model Switching**: Intelligent orchestrator manages sequential model loading
|
- Self-hosted AI infrastructure
|
||||||
- **OpenAI-Compatible APIs**: Works with existing AI tools and clients
|
- Integration with LiteLLM, Open WebUI, and other AI proxies
|
||||||
- **Easy Model Addition**: Just edit `model-orchestrator/models.yaml` to add new models
|
|
||||||
- **Template Support**: Create reusable templates for 2-3 minute deployments (vs 60-90 minutes)
|
|
||||||
|
|
||||||
## Quick Start
|
|
||||||
|
|
||||||
### Option 1: Deploy from Template (Recommended)
|
|
||||||
|
|
||||||
If you've already created a RunPod template:
|
|
||||||
|
|
||||||
1. Deploy pod from template in RunPod dashboard
|
|
||||||
2. SSH to the pod
|
|
||||||
3. Create `.env` file with your credentials
|
|
||||||
4. Start orchestrator: `docker compose -f compose.yaml up -d orchestrator`
|
|
||||||
|
|
||||||
**See**: [RUNPOD_TEMPLATE.md](RUNPOD_TEMPLATE.md) for template usage instructions.
|
|
||||||
|
|
||||||
### Option 2: Fresh Deployment
|
|
||||||
|
|
||||||
For first-time setup on a new RunPod instance:
|
|
||||||
|
|
||||||
1. Copy files to RunPod: `scp -r * gpu-server:/workspace/ai/`
|
|
||||||
2. SSH to GPU server: `ssh gpu-server`
|
|
||||||
3. Run preparation script: `cd /workspace/ai && chmod +x scripts/prepare-template.sh && ./scripts/prepare-template.sh`
|
|
||||||
|
|
||||||
**See**: [docs/DEPLOYMENT.md](docs/DEPLOYMENT.md) for detailed deployment guide.
|
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
```
|
```
|
||||||
VPS (LiteLLM Proxy)
|
Client Request → LiteLLM Proxy → Orchestrator (port 9000) → Model Service (port 8000+)
|
||||||
↓ Tailscale VPN
|
(Optional) ↓
|
||||||
GPU Server (Orchestrator Port 9000)
|
Model Registry
|
||||||
├── vLLM (Qwen 2.5 7B) - Port 8001
|
(models.yaml)
|
||||||
├── Flux.1 Schnell - Port 8002
|
↓
|
||||||
└── MusicGen Medium - Port 8003
|
Sequential Loading
|
||||||
|
(stops current, starts new)
|
||||||
```
|
```
|
||||||
|
|
||||||
All requests route through the orchestrator, which automatically loads the appropriate model. Only one model is active at a time for cost optimization (~$0.50/hr vs ~$0.75/hr for multi-GPU).
|
### Key Components
|
||||||
|
|
||||||
## Cost Analysis
|
- **Orchestrator** (`orchestrator_subprocess.py`): FastAPI proxy managing model lifecycle via Python subprocesses
|
||||||
|
- **Model Registry** (`models.yaml`): YAML configuration defining available models and their properties
|
||||||
|
- **Model Services**: Individual Python servers (vLLM, OpenedAI-Images, AudioCraft) running specific models
|
||||||
|
- **Management Scripts**: Bash utilities for starting, stopping, and downloading models
|
||||||
|
|
||||||
**RunPod RTX 4090 Spot Instance**:
|
### Why Process-Based?
|
||||||
- **Hourly**: ~$0.50
|
|
||||||
- **Monthly (24/7)**: ~$360
|
|
||||||
- **Monthly (8hr/day)**: ~$120
|
|
||||||
|
|
||||||
**Template Benefits**:
|
Unlike the Docker-based version (`orchestrator.py`), the subprocess implementation:
|
||||||
- **Without Template**: 60-90 minutes setup per Spot restart
|
- Works seamlessly in RunPod's containerized environment (no Docker-in-Docker complexity)
|
||||||
- **With Template**: 2-3 minutes deployment time
|
- Starts models faster (direct Python process spawning)
|
||||||
- **Spot Restart Frequency**: 2-5 times per week (variable)
|
- Simplifies debugging (single process tree)
|
||||||
|
- Reduces overhead (no container management layer)
|
||||||
|
|
||||||
## Documentation
|
## Available Models
|
||||||
|
|
||||||
- **[docs/DEPLOYMENT.md](docs/DEPLOYMENT.md)** - Complete deployment and usage guide
|
### Text Generation (vLLM)
|
||||||
- **[docs/RUNPOD_TEMPLATE.md](docs/RUNPOD_TEMPLATE.md)** - Template creation and usage
|
- **qwen-2.5-7b**: Qwen 2.5 7B Instruct (14GB VRAM, ~2min startup)
|
||||||
- **[docs/GPU_DEPLOYMENT_LOG.md](docs/GPU_DEPLOYMENT_LOG.md)** - Deployment history and technical notes
|
- Fast text generation with streaming support
|
||||||
|
- Endpoint: `/v1/chat/completions`
|
||||||
|
|
||||||
### Architecture Components
|
### Image Generation (OpenedAI-Images)
|
||||||
- `model-orchestrator/` - FastAPI orchestrator managing model lifecycle
|
- **flux-schnell**: Flux.1 Schnell (14GB VRAM, ~1min startup)
|
||||||
- `vllm/` - Text generation service (Qwen 2.5 7B)
|
- High-quality images in 4-5 seconds
|
||||||
- `flux/` - Image generation service (Flux.1 Schnell)
|
- Endpoint: `/v1/images/generations`
|
||||||
- `musicgen/` - Music generation service (MusicGen Medium)
|
|
||||||
- `scripts/` - Automation scripts
|
|
||||||
|
|
||||||
## Creating a RunPod Template
|
### Music Generation (AudioCraft)
|
||||||
|
- **musicgen-medium**: MusicGen Medium (11GB VRAM, ~45s startup)
|
||||||
|
- Text-to-music generation (60-90s for 30s audio)
|
||||||
|
- Endpoint: `/v1/audio/generations`
|
||||||
|
|
||||||
**Why create a template?**
|
## Installation
|
||||||
- Save 60-90 minutes on every Spot instance restart
|
|
||||||
- Pre-downloaded models (~37GB cached)
|
|
||||||
- Pre-built Docker images
|
|
||||||
- Ready-to-use configuration
|
|
||||||
|
|
||||||
**How to create:**
|
### Prerequisites
|
||||||
1. Run `scripts/prepare-template.sh` on a fresh RunPod instance
|
- Python 3.10+
|
||||||
2. Wait 45-60 minutes for models to download and images to build
|
- CUDA-capable GPU (tested on RTX 4090)
|
||||||
3. Save pod as template in RunPod dashboard
|
- Ubuntu/Debian-based system
|
||||||
4. Name: `multi-modal-ai-v1.0`
|
- HuggingFace account with API token
|
||||||
|
|
||||||
**See**: [docs/RUNPOD_TEMPLATE.md](docs/RUNPOD_TEMPLATE.md) for step-by-step guide.
|
### Quick Start (RunPod)
|
||||||
|
|
||||||
## Adding New Models
|
1. **Clone the repository:**
|
||||||
|
```bash
|
||||||
|
cd /workspace
|
||||||
|
git clone https://github.com/yourusername/runpod.git ai
|
||||||
|
cd ai
|
||||||
|
```
|
||||||
|
|
||||||
Adding models is easy! Just edit `model-orchestrator/models.yaml`:
|
2. **Configure environment:**
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
# Edit .env and set:
|
||||||
|
# - HF_TOKEN=your_huggingface_token
|
||||||
|
# - PORT=9000 (orchestrator port)
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Run setup (installs dependencies via Ansible):**
|
||||||
|
```bash
|
||||||
|
ansible-playbook playbook.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Start all services:**
|
||||||
|
```bash
|
||||||
|
bash scripts/start-all.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
5. **Verify orchestrator is running:**
|
||||||
|
```bash
|
||||||
|
curl http://localhost:9000/health
|
||||||
|
```
|
||||||
|
|
||||||
|
### Manual Installation
|
||||||
|
|
||||||
|
If you prefer step-by-step setup:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install system dependencies and download models via Ansible
|
||||||
|
# Use specific tags for selective installation:
|
||||||
|
# - base: Base system packages
|
||||||
|
# - python: Python runtime via pyenv
|
||||||
|
# - vllm: vLLM dependencies and Qwen model
|
||||||
|
# - comfyui: ComfyUI installation
|
||||||
|
# - comfyui-essential: Essential ComfyUI models only
|
||||||
|
ansible-playbook playbook.yml --tags base,python,vllm
|
||||||
|
|
||||||
|
# Start orchestrator
|
||||||
|
python3 model-orchestrator/orchestrator_subprocess.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Starting Services
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start orchestrator + all models
|
||||||
|
bash scripts/start-all.sh
|
||||||
|
|
||||||
|
# Start orchestrator only (models start on-demand)
|
||||||
|
python3 model-orchestrator/orchestrator_subprocess.py
|
||||||
|
```
|
||||||
|
|
||||||
|
Services start in the background and automatically load models when requested.
|
||||||
|
|
||||||
|
### Stopping Services
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Stop all services
|
||||||
|
bash scripts/stop-all.sh
|
||||||
|
|
||||||
|
# Stop orchestrator only (leaves models running)
|
||||||
|
pkill -f orchestrator_subprocess.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Testing the API
|
||||||
|
|
||||||
|
**Text generation (streaming):**
|
||||||
|
```bash
|
||||||
|
curl -s -N -X POST http://localhost:9000/v1/chat/completions \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{
|
||||||
|
"model": "qwen-2.5-7b",
|
||||||
|
"messages": [{"role": "user", "content": "Count to 5"}],
|
||||||
|
"max_tokens": 50,
|
||||||
|
"stream": true
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Image generation:**
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:9000/v1/images/generations \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{
|
||||||
|
"model": "flux-schnell",
|
||||||
|
"prompt": "A serene mountain landscape at sunset",
|
||||||
|
"size": "1024x1024"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Music generation:**
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:9000/v1/audio/generations \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{
|
||||||
|
"model": "musicgen-medium",
|
||||||
|
"prompt": "Upbeat electronic dance music",
|
||||||
|
"duration": 30
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**List available models:**
|
||||||
|
```bash
|
||||||
|
curl http://localhost:9000/v1/models
|
||||||
|
```
|
||||||
|
|
||||||
|
**Health check:**
|
||||||
|
```bash
|
||||||
|
curl http://localhost:9000/health
|
||||||
|
```
|
||||||
|
|
||||||
|
## Integration with LiteLLM
|
||||||
|
|
||||||
|
The orchestrator is designed to work behind LiteLLM for unified API management:
|
||||||
|
|
||||||
|
**LiteLLM config (`litellm-config.yaml`):**
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: qwen-2.5-7b
|
||||||
|
litellm_params:
|
||||||
|
model: hosted_vllm/openai/qwen-2.5-7b
|
||||||
|
api_base: http://100.121.199.88:9000/v1 # Tailscale VPN
|
||||||
|
api_key: dummy
|
||||||
|
stream: true
|
||||||
|
timeout: 600
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key configuration notes:**
|
||||||
|
- Use `hosted_vllm/openai/` prefix for vLLM models via orchestrator
|
||||||
|
- Set `stream: true` to enable token-by-token streaming
|
||||||
|
- Use Tailscale IP for secure VPN access from remote services
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
### Model Registry (`model-orchestrator/models.yaml`)
|
||||||
|
|
||||||
|
Models are defined in YAML with these properties:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
models:
|
qwen-2.5-7b:
|
||||||
llama-3.1-8b: # New model
|
type: text # text, image, or audio
|
||||||
type: text
|
framework: vllm # vllm, openedai-images, audiocraft
|
||||||
framework: vllm
|
service_script: models/vllm/server.py
|
||||||
docker_service: vllm-llama
|
port: 8000
|
||||||
port: 8004
|
vram_gb: 14 # GPU memory requirement
|
||||||
vram_gb: 17
|
startup_time_seconds: 120 # Max time to wait for health check
|
||||||
startup_time_seconds: 120
|
endpoint: /v1/chat/completions # API endpoint path
|
||||||
endpoint: /v1/chat/completions
|
description: "Human-readable description"
|
||||||
```
|
```
|
||||||
|
|
||||||
Then add the Docker service to `compose.yaml` and restart the orchestrator.
|
### Adding New Models
|
||||||
|
|
||||||
**See**: [docs/DEPLOYMENT.md](docs/DEPLOYMENT.md#adding-new-models) for complete instructions.
|
1. **Add model definition to `models.yaml`:**
|
||||||
|
```yaml
|
||||||
|
llama-3.1-8b:
|
||||||
|
type: text
|
||||||
|
framework: vllm
|
||||||
|
service_script: models/vllm/server_llama.py
|
||||||
|
port: 8004
|
||||||
|
vram_gb: 17
|
||||||
|
startup_time_seconds: 120
|
||||||
|
endpoint: /v1/chat/completions
|
||||||
|
description: "Llama 3.1 8B Instruct"
|
||||||
|
```
|
||||||
|
|
||||||
## Usage Examples
|
2. **Create model service script** (`models/vllm/server_llama.py`):
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from vllm import LLM
|
||||||
|
from vllm.entrypoints.openai.api_server import run_server
|
||||||
|
|
||||||
### Text Generation
|
model = "meta-llama/Llama-3.1-8B-Instruct"
|
||||||
|
port = int(os.getenv("PORT", 8004))
|
||||||
|
run_server(model=model, port=port)
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Models are downloaded automatically by the playbook** when using the appropriate tags (vllm, comfyui, etc.)
|
||||||
|
|
||||||
|
4. **Restart orchestrator:**
|
||||||
|
```bash
|
||||||
|
bash scripts/stop-all.sh && bash scripts/start-all.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deployment Options
|
||||||
|
|
||||||
|
### Option 1: RunPod (Current Setup)
|
||||||
|
|
||||||
|
Designed for RunPod's containerized environment:
|
||||||
|
- Services run as background processes in `/workspace/ai/`
|
||||||
|
- Managed by `start-all.sh` and `stop-all.sh`
|
||||||
|
- No systemd (RunPod containers don't support it)
|
||||||
|
|
||||||
|
### Option 2: Dedicated VPS with systemd
|
||||||
|
|
||||||
|
For production VPS deployment:
|
||||||
|
|
||||||
|
1. **Install systemd service:**
|
||||||
|
```bash
|
||||||
|
sudo cp systemd/ai-orchestrator.service /etc/systemd/system/
|
||||||
|
sudo systemctl daemon-reload
|
||||||
|
sudo systemctl enable ai-orchestrator
|
||||||
|
sudo systemctl start ai-orchestrator
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Check logs:**
|
||||||
|
```bash
|
||||||
|
sudo journalctl -u ai-orchestrator -f
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option 3: Tailscale VPN Integration
|
||||||
|
|
||||||
|
Connect RunPod GPU to your VPS infrastructure:
|
||||||
|
|
||||||
|
1. **On RunPod instance:**
|
||||||
|
```bash
|
||||||
|
# Install Tailscale
|
||||||
|
curl -fsSL https://tailscale.com/install.sh | sh
|
||||||
|
|
||||||
|
# Connect to VPN
|
||||||
|
tailscale up --advertise-tags=tag:gpu
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **On VPS (configure LiteLLM):**
|
||||||
|
```yaml
|
||||||
|
# Use Tailscale IP in litellm-config.yaml
|
||||||
|
api_base: http://100.121.199.88:9000/v1
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Benefits:**
|
||||||
|
- Secure encrypted tunnel (no public exposure)
|
||||||
|
- Low latency direct connection
|
||||||
|
- Access GPU from any device on Tailscale network
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Orchestrator Management
|
||||||
|
|
||||||
|
- `GET /health` - Health check with current model status
|
||||||
|
- `GET /v1/models` - OpenAI-compatible models list
|
||||||
|
- `POST /switch` - Manually switch to a specific model
|
||||||
|
|
||||||
|
### Model Endpoints (Proxied)
|
||||||
|
|
||||||
|
The orchestrator automatically routes requests based on endpoint and model name:
|
||||||
|
|
||||||
|
- `POST /v1/chat/completions` - Text generation (text models)
|
||||||
|
- `POST /v1/images/generations` - Image generation (image models)
|
||||||
|
- `POST /v1/audio/generations` - Music generation (audio models)
|
||||||
|
|
||||||
|
**Streaming Support:**
|
||||||
|
- Set `"stream": true` in request body
|
||||||
|
- Returns Server-Sent Events (SSE) stream
|
||||||
|
- Works through LiteLLM proxy (with correct configuration)
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Streaming Returns Empty Deltas
|
||||||
|
|
||||||
|
**Symptom:** LiteLLM shows streaming chunks but content is empty:
|
||||||
|
```json
|
||||||
|
{"choices":[{"delta":{"content":""},"index":0}]}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Solution:** Use correct model path format in LiteLLM:
|
||||||
|
```yaml
|
||||||
|
# ✅ Correct
|
||||||
|
model: hosted_vllm/openai/qwen-2.5-7b
|
||||||
|
|
||||||
|
# ❌ Wrong
|
||||||
|
model: openai/qwen-2.5-7b
|
||||||
|
```
|
||||||
|
|
||||||
|
### Model Fails to Start
|
||||||
|
|
||||||
|
**Check logs:**
|
||||||
```bash
|
```bash
|
||||||
curl http://100.100.108.13:9000/v1/chat/completions \
|
# RunPod
|
||||||
-H "Content-Type: application/json" \
|
ps aux | grep orchestrator
|
||||||
-d '{"model": "qwen-2.5-7b", "messages": [{"role": "user", "content": "Hello!"}]}'
|
ps aux | grep vllm
|
||||||
|
|
||||||
|
# VPS with systemd
|
||||||
|
sudo journalctl -u ai-orchestrator -n 50
|
||||||
```
|
```
|
||||||
|
|
||||||
### Image Generation
|
**Common issues:**
|
||||||
|
- Out of GPU memory: Check VRAM usage with `nvidia-smi`
|
||||||
|
- Missing model weights: Run `ansible-playbook playbook.yml --tags vllm` or `--tags comfyui-essential`
|
||||||
|
- Port conflicts: Check if port is already in use with `lsof -i :9000`
|
||||||
|
|
||||||
|
### Orchestrator Not Responding
|
||||||
|
|
||||||
|
**Test model directly (bypass orchestrator):**
|
||||||
```bash
|
```bash
|
||||||
curl http://100.100.108.13:9000/v1/images/generations \
|
# Start vLLM service directly
|
||||||
-H "Content-Type: application/json" \
|
cd /workspace/ai
|
||||||
-d '{"model": "flux-schnell", "prompt": "a cute cat", "size": "1024x1024"}'
|
python3 models/vllm/server.py
|
||||||
|
|
||||||
|
# Test on port 8000
|
||||||
|
curl -X POST http://localhost:8000/v1/chat/completions \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"model": "qwen-2.5-7b", "messages": [{"role": "user", "content": "Hi"}]}'
|
||||||
```
|
```
|
||||||
|
|
||||||
### Music Generation
|
### HuggingFace Download Errors
|
||||||
|
|
||||||
|
**Symptom:** "Repository not found" or "Authentication required"
|
||||||
|
|
||||||
|
**Solution:**
|
||||||
```bash
|
```bash
|
||||||
curl http://100.100.108.13:9000/v1/audio/generations \
|
# Check HF_TOKEN is set
|
||||||
-H "Content-Type: application/json" \
|
echo $HF_TOKEN
|
||||||
-d '{"model": "musicgen-medium", "prompt": "upbeat electronic", "duration": 30}'
|
|
||||||
|
# Set token in .env file
|
||||||
|
echo "HF_TOKEN=your_token_here" >> .env
|
||||||
|
|
||||||
|
# Re-run Ansible playbook to download models
|
||||||
|
ansible-playbook playbook.yml --tags vllm
|
||||||
```
|
```
|
||||||
|
|
||||||
## Infrastructure
|
## Project Structure
|
||||||
|
|
||||||
**Provider**: RunPod (Spot Instance)
|
```
|
||||||
**GPU**: NVIDIA RTX 4090 24GB VRAM
|
runpod/
|
||||||
**Region**: Europe
|
├── model-orchestrator/
|
||||||
**Network**: Tailscale VPN (100.100.108.13)
|
│ ├── orchestrator_subprocess.py # Main orchestrator (process-based)
|
||||||
**Storage**: 922TB network volume at `/workspace`
|
│ ├── orchestrator.py # Alternative Docker-based version
|
||||||
|
│ └── models.yaml # Model registry configuration
|
||||||
## Monitoring
|
├── models/
|
||||||
|
│ ├── vllm/
|
||||||
```bash
|
│ │ ├── server.py # vLLM text generation service
|
||||||
# Check active model
|
│ │ └── requirements.txt
|
||||||
curl http://100.100.108.13:9000/health
|
│ └── comfyui/ # ComfyUI for image/video/audio generation
|
||||||
|
│ ├── start.sh # ComfyUI startup script
|
||||||
# View orchestrator logs
|
│ └── models/ # ComfyUI models directory
|
||||||
docker logs -f ai_orchestrator
|
├── scripts/
|
||||||
|
│ ├── start-all.sh # Start orchestrator + models
|
||||||
# GPU usage
|
│ └── stop-all.sh # Stop all services
|
||||||
nvidia-smi
|
├── systemd/
|
||||||
|
│ └── ai-orchestrator.service # systemd service file (for VPS)
|
||||||
|
├── playbook.yml # Ansible playbook for system setup
|
||||||
|
├── inventory.yml # Ansible inventory
|
||||||
|
├── .env.example # Environment variables template
|
||||||
|
├── COMFYUI_MODELS.md # ComfyUI models usage guide
|
||||||
|
└── README.md # This file
|
||||||
```
|
```
|
||||||
|
|
||||||
## Support
|
## Performance Notes
|
||||||
|
|
||||||
For issues:
|
- **Model switching time:** 30-120 seconds (depends on model size)
|
||||||
1. Check orchestrator logs: `docker logs ai_orchestrator`
|
- **Text generation:** ~20-40 tokens/second (qwen-2.5-7b on RTX 4090)
|
||||||
2. Review [docs/DEPLOYMENT.md](docs/DEPLOYMENT.md#troubleshooting)
|
- **Image generation:** 4-5 seconds per image (Flux Schnell)
|
||||||
3. Check [docs/GPU_DEPLOYMENT_LOG.md](docs/GPU_DEPLOYMENT_LOG.md) for deployment history
|
- **Music generation:** 60-90 seconds for 30s audio (MusicGen Medium)
|
||||||
|
|
||||||
|
## Future Enhancements
|
||||||
|
|
||||||
|
- [ ] Parallel model loading (for multi-GPU setups)
|
||||||
|
- [ ] Model preloading based on usage patterns
|
||||||
|
- [ ] Prometheus metrics exporter
|
||||||
|
- [ ] Web dashboard for model management
|
||||||
|
- [ ] Support for quantized models (lower VRAM requirements)
|
||||||
|
- [ ] Add Whisper (speech-to-text)
|
||||||
|
- [ ] Add XTTS-v2 (text-to-speech with voice cloning)
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
Built with:
|
MIT License - See LICENSE file for details.
|
||||||
- [vLLM](https://github.com/vllm-project/vllm) - Apache 2.0
|
|
||||||
- [AudioCraft](https://github.com/facebookresearch/audiocraft) - MIT (code), CC-BY-NC (weights)
|
|
||||||
- [Flux.1](https://github.com/black-forest-labs/flux) - Apache 2.0
|
|
||||||
- [LiteLLM](https://github.com/BerriAI/litellm) - MIT
|
|
||||||
|
|
||||||
**Note**: MusicGen pre-trained weights are non-commercial (CC-BY-NC).
|
## Contributing
|
||||||
|
|
||||||
|
Contributions welcome! Please:
|
||||||
|
1. Fork the repository
|
||||||
|
2. Create a feature branch
|
||||||
|
3. Test your changes on RunPod
|
||||||
|
4. Submit a pull request
|
||||||
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
- **Issues:** https://github.com/yourusername/runpod/issues
|
||||||
|
- **Discussions:** https://github.com/yourusername/runpod/discussions
|
||||||
|
- **Documentation:** This README + inline code comments
|
||||||
|
|
||||||
|
## Acknowledgments
|
||||||
|
|
||||||
|
Built with:
|
||||||
|
- [vLLM](https://github.com/vllm-project/vllm) - Fast LLM inference
|
||||||
|
- [OpenedAI-Images](https://github.com/matatonic/openedai-images) - OpenAI-compatible image generation
|
||||||
|
- [AudioCraft](https://github.com/facebookresearch/audiocraft) - Music generation
|
||||||
|
- [FastAPI](https://fastapi.tiangolo.com/) - Modern Python web framework
|
||||||
|
- [LiteLLM](https://github.com/BerriAI/litellm) - Universal LLM proxy
|
||||||
|
|||||||
503
RUNPOD_TEMPLATE.md
Normal file
@@ -0,0 +1,503 @@
|
|||||||
|
# RunPod Template Setup Guide
|
||||||
|
|
||||||
|
This guide explains how to deploy the AI Orchestrator (ComfyUI + vLLM) on RunPod using a custom Docker template and network volume.
|
||||||
|
|
||||||
|
## Architecture Overview
|
||||||
|
|
||||||
|
The deployment uses a **two-tier strategy**:
|
||||||
|
|
||||||
|
1. **Docker Image** (software layer) - Contains system packages, Supervisor, Tailscale
|
||||||
|
2. **Network Volume** (data layer) - Contains models, ComfyUI installation, venvs, configuration
|
||||||
|
|
||||||
|
This approach allows fast pod deployment (~2-3 minutes) while keeping all large files (models, ~80-200GB) on a persistent network volume.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- RunPod account with credits
|
||||||
|
- Docker Hub account (for hosting the template image)
|
||||||
|
- HuggingFace account with API token (for model downloads)
|
||||||
|
- Tailscale account with auth key (optional, for VPN access)
|
||||||
|
|
||||||
|
## Step 1: Build and Push Docker Image
|
||||||
|
|
||||||
|
### Option A: Automated Build (Recommended)
|
||||||
|
|
||||||
|
The repository includes a Gitea workflow that automatically builds and pushes the Docker image to your Gitea container registry when you push to the `main` branch or create a version tag.
|
||||||
|
|
||||||
|
1. **Configure Gitea Secret:**
|
||||||
|
- Go to your Gitea repository → Settings → Secrets
|
||||||
|
- Add `REGISTRY_TOKEN` = your Gitea access token with registry permissions
|
||||||
|
- (The workflow automatically uses your Gitea username via `gitea.actor`)
|
||||||
|
|
||||||
|
2. **Trigger Build:**
|
||||||
|
```bash
|
||||||
|
# Push to main branch
|
||||||
|
git push origin main
|
||||||
|
|
||||||
|
# Or create a version tag
|
||||||
|
git tag v1.0.0
|
||||||
|
git push origin v1.0.0
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Monitor Build:**
|
||||||
|
- Go to Actions tab in Gitea
|
||||||
|
- Wait for build to complete (~5-10 minutes)
|
||||||
|
- Note the Docker image name: `dev.pivoine.art/valknar/runpod-ai-orchestrator:latest`
|
||||||
|
|
||||||
|
### Option B: Manual Build
|
||||||
|
|
||||||
|
If you prefer to build manually:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# From the repository root
|
||||||
|
cd /path/to/runpod
|
||||||
|
|
||||||
|
# Build the image
|
||||||
|
docker build -t dev.pivoine.art/valknar/runpod-ai-orchestrator:latest .
|
||||||
|
|
||||||
|
# Login to your Gitea registry
|
||||||
|
docker login dev.pivoine.art
|
||||||
|
|
||||||
|
# Push to Gitea registry
|
||||||
|
docker push dev.pivoine.art/valknar/runpod-ai-orchestrator:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
## Step 2: Create Network Volume
|
||||||
|
|
||||||
|
Network volumes persist your models and data across pod restarts and rebuilds.
|
||||||
|
|
||||||
|
1. **Go to RunPod Dashboard → Storage → Network Volumes**
|
||||||
|
|
||||||
|
2. **Click "New Network Volume"**
|
||||||
|
|
||||||
|
3. **Configure:**
|
||||||
|
- **Name**: `ai-orchestrator-models`
|
||||||
|
- **Size**: `200GB` (adjust based on your needs)
|
||||||
|
- Essential models only: ~80GB
|
||||||
|
- All models: ~137-200GB
|
||||||
|
- **Datacenter**: Choose closest to you (volume tied to datacenter)
|
||||||
|
|
||||||
|
4. **Click "Create Volume"**
|
||||||
|
|
||||||
|
5. **Note the Volume ID** (e.g., `vol-abc123def456`) for pod deployment
|
||||||
|
|
||||||
|
### Storage Requirements
|
||||||
|
|
||||||
|
| Configuration | Size | Models Included |
|
||||||
|
|--------------|------|-----------------|
|
||||||
|
| Essential | ~80GB | FLUX Schnell, 1-2 SDXL checkpoints, MusicGen Medium |
|
||||||
|
| Complete | ~137GB | All image/video/audio models from playbook |
|
||||||
|
| Full + vLLM | ~200GB | Complete + Qwen 2.5 7B + Llama 3.1 8B |
|
||||||
|
|
||||||
|
## Step 3: Create RunPod Template
|
||||||
|
|
||||||
|
1. **Go to RunPod Dashboard → Templates**
|
||||||
|
|
||||||
|
2. **Click "New Template"**
|
||||||
|
|
||||||
|
3. **Configure Template Settings:**
|
||||||
|
|
||||||
|
**Container Configuration:**
|
||||||
|
- **Template Name**: `AI Orchestrator (ComfyUI + vLLM)`
|
||||||
|
- **Template Type**: Docker
|
||||||
|
- **Container Image**: `dev.pivoine.art/valknar/runpod-ai-orchestrator:latest`
|
||||||
|
- **Container Disk**: `50GB` (for system and temp files)
|
||||||
|
- **Docker Command**: Leave empty (uses default `/start.sh`)
|
||||||
|
|
||||||
|
**Volume Configuration:**
|
||||||
|
- **Volume Mount Path**: `/workspace`
|
||||||
|
- **Attach to Network Volume**: Select your volume ID from Step 2
|
||||||
|
|
||||||
|
**Port Configuration:**
|
||||||
|
- **Expose HTTP Ports**: `8188, 9000, 9001`
|
||||||
|
- `8188` - ComfyUI web interface
|
||||||
|
- `9000` - Model orchestrator API
|
||||||
|
- `9001` - Supervisor web UI
|
||||||
|
- **Expose TCP Ports**: `22` (SSH access)
|
||||||
|
|
||||||
|
**Environment Variables:**
|
||||||
|
```
|
||||||
|
HF_TOKEN=your_huggingface_token_here
|
||||||
|
TAILSCALE_AUTHKEY=tskey-auth-your_tailscale_authkey_here
|
||||||
|
SUPERVISOR_BACKEND_HOST=localhost
|
||||||
|
SUPERVISOR_BACKEND_PORT=9001
|
||||||
|
```
|
||||||
|
|
||||||
|
**Advanced Settings:**
|
||||||
|
- **Start Jupyter**: No
|
||||||
|
- **Start SSH**: Yes (handled by base image)
|
||||||
|
|
||||||
|
4. **Click "Save Template"**
|
||||||
|
|
||||||
|
## Step 4: First Deployment (Initial Setup)
|
||||||
|
|
||||||
|
The first time you deploy, you need to set up the network volume with models and configuration.
|
||||||
|
|
||||||
|
### 4.1 Deploy Pod
|
||||||
|
|
||||||
|
1. **Go to RunPod Dashboard → Pods**
|
||||||
|
2. **Click "Deploy"** or "GPU Pods"
|
||||||
|
3. **Select your custom template**: `AI Orchestrator (ComfyUI + vLLM)`
|
||||||
|
4. **Configure GPU:**
|
||||||
|
- **GPU Type**: RTX 4090 (24GB VRAM) or higher
|
||||||
|
- **Network Volume**: Select your volume from Step 2
|
||||||
|
- **On-Demand vs Spot**: Choose based on budget
|
||||||
|
5. **Click "Deploy"**
|
||||||
|
|
||||||
|
### 4.2 SSH into Pod
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Get pod SSH command from RunPod dashboard
|
||||||
|
ssh root@<pod-ip> -p <port> -i ~/.ssh/id_ed25519
|
||||||
|
|
||||||
|
# Or use RunPod web terminal
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.3 Initial Setup on Network Volume
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Clone the repository to /workspace/ai
|
||||||
|
cd /workspace
|
||||||
|
git clone https://github.com/your-username/runpod.git ai
|
||||||
|
cd ai
|
||||||
|
|
||||||
|
# 2. Create .env file with your credentials
|
||||||
|
cp .env.example .env
|
||||||
|
nano .env
|
||||||
|
|
||||||
|
# Edit and add:
|
||||||
|
# HF_TOKEN=your_huggingface_token
|
||||||
|
# TAILSCALE_AUTHKEY=tskey-auth-your_key
|
||||||
|
# GPU_TAILSCALE_IP=<will be set automatically>
|
||||||
|
|
||||||
|
# 3. Download essential models (this takes 30-60 minutes)
|
||||||
|
ansible-playbook playbook.yml --tags comfyui-essential
|
||||||
|
|
||||||
|
# OR download all models (1-2 hours)
|
||||||
|
ansible-playbook playbook.yml --tags comfyui-models-all
|
||||||
|
|
||||||
|
# 4. Link models to ComfyUI
|
||||||
|
bash scripts/link-comfyui-models.sh
|
||||||
|
|
||||||
|
# OR if arty is available
|
||||||
|
arty run models/link-comfyui
|
||||||
|
|
||||||
|
# 5. Install ComfyUI custom nodes dependencies
|
||||||
|
cd /workspace/ComfyUI/custom_nodes/ComfyUI-Manager
|
||||||
|
pip install -r requirements.txt
|
||||||
|
cd /workspace/ai
|
||||||
|
|
||||||
|
# 6. Restart the container to apply all changes
|
||||||
|
exit
|
||||||
|
# Go to RunPod dashboard → Stop pod → Start pod
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.4 Verify Services
|
||||||
|
|
||||||
|
After restart, SSH back in and check:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check supervisor status
|
||||||
|
supervisorctl -c /workspace/supervisord.conf status
|
||||||
|
|
||||||
|
# Expected output:
|
||||||
|
# comfyui RUNNING pid 123, uptime 0:01:00
|
||||||
|
# (orchestrator is disabled by default - enable for vLLM)
|
||||||
|
|
||||||
|
# Test ComfyUI
|
||||||
|
curl -I http://localhost:8188
|
||||||
|
|
||||||
|
# Test Supervisor web UI
|
||||||
|
curl -I http://localhost:9001
|
||||||
|
```
|
||||||
|
|
||||||
|
## Step 5: Subsequent Deployments
|
||||||
|
|
||||||
|
After initial setup, deploying new pods is quick (2-3 minutes):
|
||||||
|
|
||||||
|
1. **Deploy pod** with same template + network volume
|
||||||
|
2. **Wait for startup** (~1-2 minutes for services to start)
|
||||||
|
3. **Access services:**
|
||||||
|
- ComfyUI: `http://<pod-ip>:8188`
|
||||||
|
- Supervisor: `http://<pod-ip>:9001`
|
||||||
|
|
||||||
|
**All models, configuration, and data persist on the network volume!**
|
||||||
|
|
||||||
|
## Step 6: Access Services
|
||||||
|
|
||||||
|
### Via Direct IP (HTTP)
|
||||||
|
|
||||||
|
Get pod IP and ports from RunPod dashboard:
|
||||||
|
|
||||||
|
```
|
||||||
|
ComfyUI: http://<pod-ip>:8188
|
||||||
|
Supervisor UI: http://<pod-ip>:9001
|
||||||
|
Orchestrator API: http://<pod-ip>:9000
|
||||||
|
SSH: ssh root@<pod-ip> -p <port>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Via Tailscale VPN (Recommended)
|
||||||
|
|
||||||
|
If you configured `TAILSCALE_AUTHKEY`, the pod automatically joins your Tailscale network:
|
||||||
|
|
||||||
|
1. **Get Tailscale IP:**
|
||||||
|
```bash
|
||||||
|
ssh root@<pod-ip> -p <port>
|
||||||
|
tailscale ip -4
|
||||||
|
# Example output: 100.114.60.40
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Access via Tailscale:**
|
||||||
|
```
|
||||||
|
ComfyUI: http://<tailscale-ip>:8188
|
||||||
|
Supervisor: http://<tailscale-ip>:9001
|
||||||
|
Orchestrator: http://<tailscale-ip>:9000
|
||||||
|
SSH: ssh root@<tailscale-ip>
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Update LiteLLM config** on your VPS with the Tailscale IP
|
||||||
|
|
||||||
|
## Service Management
|
||||||
|
|
||||||
|
### Start/Stop Services
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start all services
|
||||||
|
supervisorctl -c /workspace/supervisord.conf start all
|
||||||
|
|
||||||
|
# Stop all services
|
||||||
|
supervisorctl -c /workspace/supervisord.conf stop all
|
||||||
|
|
||||||
|
# Restart specific service
|
||||||
|
supervisorctl -c /workspace/supervisord.conf restart comfyui
|
||||||
|
|
||||||
|
# View status
|
||||||
|
supervisorctl -c /workspace/supervisord.conf status
|
||||||
|
```
|
||||||
|
|
||||||
|
### Enable vLLM Models (Text Generation)
|
||||||
|
|
||||||
|
By default, only ComfyUI runs (to save VRAM). To enable vLLM:
|
||||||
|
|
||||||
|
1. **Stop ComfyUI** (frees up VRAM):
|
||||||
|
```bash
|
||||||
|
supervisorctl -c /workspace/supervisord.conf stop comfyui
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Start orchestrator** (manages vLLM models):
|
||||||
|
```bash
|
||||||
|
supervisorctl -c /workspace/supervisord.conf start orchestrator
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Test text generation:**
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:9000/v1/chat/completions \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"model":"qwen-2.5-7b","messages":[{"role":"user","content":"Hello"}]}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Switch Back to ComfyUI
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Stop orchestrator (stops all vLLM models)
|
||||||
|
supervisorctl -c /workspace/supervisord.conf stop orchestrator
|
||||||
|
|
||||||
|
# Start ComfyUI
|
||||||
|
supervisorctl -c /workspace/supervisord.conf start comfyui
|
||||||
|
```
|
||||||
|
|
||||||
|
## Updating the Template
|
||||||
|
|
||||||
|
When you make changes to code or configuration:
|
||||||
|
|
||||||
|
### Update Docker Image
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Make changes to Dockerfile or start.sh
|
||||||
|
# 2. Push to repository
|
||||||
|
git add .
|
||||||
|
git commit -m "Update template configuration"
|
||||||
|
git push origin main
|
||||||
|
|
||||||
|
# 3. Gitea workflow auto-builds new image
|
||||||
|
|
||||||
|
# 4. Terminate old pod and deploy new one with updated image
|
||||||
|
```
|
||||||
|
|
||||||
|
### Update Network Volume Data
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# SSH into running pod
|
||||||
|
ssh root@<pod-ip> -p <port>
|
||||||
|
|
||||||
|
# Update repository
|
||||||
|
cd /workspace/ai
|
||||||
|
git pull
|
||||||
|
|
||||||
|
# Re-run Ansible if needed
|
||||||
|
ansible-playbook playbook.yml --tags <specific-tag>
|
||||||
|
|
||||||
|
# Restart services
|
||||||
|
supervisorctl -c /workspace/supervisord.conf restart all
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Pod fails to start
|
||||||
|
|
||||||
|
**Check logs:**
|
||||||
|
```bash
|
||||||
|
# Via SSH
|
||||||
|
cat /workspace/logs/supervisord.log
|
||||||
|
cat /workspace/logs/comfyui.err.log
|
||||||
|
|
||||||
|
# Via RunPod web terminal
|
||||||
|
tail -f /workspace/logs/*.log
|
||||||
|
```
|
||||||
|
|
||||||
|
**Common issues:**
|
||||||
|
- Missing `.env` file → Create `/workspace/ai/.env` with required vars
|
||||||
|
- Supervisor config not found → Ensure `/workspace/ai/supervisord.conf` exists
|
||||||
|
- Port conflicts → Check if services are already running
|
||||||
|
|
||||||
|
### Tailscale not connecting
|
||||||
|
|
||||||
|
**Check Tailscale status:**
|
||||||
|
```bash
|
||||||
|
tailscale status
|
||||||
|
tailscale ip -4
|
||||||
|
```
|
||||||
|
|
||||||
|
**Common issues:**
|
||||||
|
- Missing or invalid `TAILSCALE_AUTHKEY` in `.env`
|
||||||
|
- Auth key expired → Generate new key in Tailscale admin
|
||||||
|
- Firewall blocking → RunPod should allow Tailscale by default
|
||||||
|
|
||||||
|
### Services not starting
|
||||||
|
|
||||||
|
**Check Supervisor:**
|
||||||
|
```bash
|
||||||
|
supervisorctl -c /workspace/supervisord.conf status
|
||||||
|
supervisorctl -c /workspace/supervisord.conf tail -f comfyui
|
||||||
|
```
|
||||||
|
|
||||||
|
**Common issues:**
|
||||||
|
- venv broken → Re-run `scripts/bootstrap-venvs.sh`
|
||||||
|
- Models not downloaded → Run Ansible playbook again
|
||||||
|
- Python version mismatch → Rebuild venvs
|
||||||
|
|
||||||
|
### Out of VRAM
|
||||||
|
|
||||||
|
**Check GPU memory:**
|
||||||
|
```bash
|
||||||
|
nvidia-smi
|
||||||
|
```
|
||||||
|
|
||||||
|
**RTX 4090 (24GB) capacity:**
|
||||||
|
- ComfyUI (FLUX Schnell): ~23GB (can't run with vLLM)
|
||||||
|
- vLLM (Qwen 2.5 7B): ~14GB
|
||||||
|
- vLLM (Llama 3.1 8B): ~17GB
|
||||||
|
|
||||||
|
**Solution:** Only run one service at a time (see Service Management section)
|
||||||
|
|
||||||
|
### Network volume full
|
||||||
|
|
||||||
|
**Check disk usage:**
|
||||||
|
```bash
|
||||||
|
df -h /workspace
|
||||||
|
du -sh /workspace/*
|
||||||
|
```
|
||||||
|
|
||||||
|
**Clean up:**
|
||||||
|
```bash
|
||||||
|
# Remove old HuggingFace cache
|
||||||
|
rm -rf /workspace/huggingface_cache
|
||||||
|
|
||||||
|
# Re-download essential models only
|
||||||
|
cd /workspace/ai
|
||||||
|
ansible-playbook playbook.yml --tags comfyui-essential
|
||||||
|
```
|
||||||
|
|
||||||
|
## Cost Optimization
|
||||||
|
|
||||||
|
### Spot vs On-Demand
|
||||||
|
|
||||||
|
- **Spot instances**: ~70% cheaper, can be interrupted
|
||||||
|
- **On-Demand**: More expensive, guaranteed availability
|
||||||
|
|
||||||
|
**Recommendation:** Use spot for development, on-demand for production
|
||||||
|
|
||||||
|
### Network Volume Pricing
|
||||||
|
|
||||||
|
- First 1TB: $0.07/GB/month
|
||||||
|
- Beyond 1TB: $0.05/GB/month
|
||||||
|
|
||||||
|
**200GB volume cost:** ~$14/month
|
||||||
|
|
||||||
|
### Pod Auto-Stop
|
||||||
|
|
||||||
|
Configure auto-stop in RunPod pod settings to save costs when idle:
|
||||||
|
- Stop after 15 minutes idle
|
||||||
|
- Stop after 1 hour idle
|
||||||
|
- Manual stop only
|
||||||
|
|
||||||
|
## Advanced Configuration
|
||||||
|
|
||||||
|
### Custom Environment Variables
|
||||||
|
|
||||||
|
Add to template or pod environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Model cache locations
|
||||||
|
HF_HOME=/workspace/huggingface_cache
|
||||||
|
TRANSFORMERS_CACHE=/workspace/huggingface_cache
|
||||||
|
|
||||||
|
# ComfyUI settings
|
||||||
|
COMFYUI_PORT=8188
|
||||||
|
COMFYUI_LISTEN=0.0.0.0
|
||||||
|
|
||||||
|
# Orchestrator settings
|
||||||
|
ORCHESTRATOR_PORT=9000
|
||||||
|
|
||||||
|
# GPU settings
|
||||||
|
CUDA_VISIBLE_DEVICES=0
|
||||||
|
```
|
||||||
|
|
||||||
|
### Multiple Network Volumes
|
||||||
|
|
||||||
|
You can attach multiple network volumes for organization:
|
||||||
|
|
||||||
|
1. **Models volume** - `/workspace/models` (read-only, shared)
|
||||||
|
2. **Data volume** - `/workspace/data` (read-write, per-project)
|
||||||
|
|
||||||
|
### Custom Startup Script
|
||||||
|
|
||||||
|
Override `/start.sh` behavior by creating `/workspace/custom-start.sh`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
#!/bin/bash
|
||||||
|
# Custom startup commands
|
||||||
|
|
||||||
|
# Source default startup
|
||||||
|
source /start.sh
|
||||||
|
|
||||||
|
# Add your custom commands here
|
||||||
|
echo "Running custom initialization..."
|
||||||
|
```
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- [RunPod Documentation](https://docs.runpod.io/)
|
||||||
|
- [RunPod Templates Overview](https://docs.runpod.io/pods/templates/overview)
|
||||||
|
- [Network Volumes Guide](https://docs.runpod.io/storage/network-volumes)
|
||||||
|
- [ComfyUI Documentation](https://github.com/comfyanonymous/ComfyUI)
|
||||||
|
- [Supervisor Documentation](http://supervisord.org/)
|
||||||
|
- [Tailscale Documentation](https://tailscale.com/kb/)
|
||||||
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
For issues or questions:
|
||||||
|
- Check troubleshooting section above
|
||||||
|
- Review `/workspace/logs/` files
|
||||||
|
- Check RunPod community forums
|
||||||
|
- Open issue in project repository
|
||||||
56
comfyui/patches/diffrhythm-llamaconfig-fix.patch
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
diff --git a/__init__.py b/__init__.py
|
||||||
|
index 1234567..abcdefg 100644
|
||||||
|
--- a/__init__.py
|
||||||
|
+++ b/__init__.py
|
||||||
|
@@ -1,3 +1,51 @@
|
||||||
|
+"""
|
||||||
|
+DiffRhythm ComfyUI Node with LlamaConfig Patch
|
||||||
|
+
|
||||||
|
+PATCH: Fixes "The size of tensor a (32) must match the size of tensor b (64)" error
|
||||||
|
+in DiffRhythm's rotary position embeddings by patching LlamaConfig initialization.
|
||||||
|
+
|
||||||
|
+Issue: DiffRhythm's DIT model doesn't specify num_attention_heads and
|
||||||
|
+num_key_value_heads when creating LlamaConfig, causing transformers 4.49.0+
|
||||||
|
+to incorrectly infer head_dim = 32 instead of 64.
|
||||||
|
+
|
||||||
|
+Solution: Patch LlamaConfig globally before importing DiffRhythmNode.
|
||||||
|
+
|
||||||
|
+Reference: https://github.com/billwuhao/ComfyUI_DiffRhythm/issues/44
|
||||||
|
+Reference: https://github.com/billwuhao/ComfyUI_DiffRhythm/issues/48
|
||||||
|
+
|
||||||
|
+Patch author: valknar@pivoine.art
|
||||||
|
+"""
|
||||||
|
+
|
||||||
|
+# CRITICAL: Patch LlamaConfig BEFORE importing DiffRhythmNode
|
||||||
|
+from transformers.models.llama import LlamaConfig as _OriginalLlamaConfig
|
||||||
|
+
|
||||||
|
+class PatchedLlamaConfig(_OriginalLlamaConfig):
|
||||||
|
+ """
|
||||||
|
+ Patched LlamaConfig that automatically adds missing attention head parameters.
|
||||||
|
+
|
||||||
|
+ Standard Llama architecture assumptions:
|
||||||
|
+ - head_dim = 64 (fixed)
|
||||||
|
+ - num_attention_heads = hidden_size // head_dim
|
||||||
|
+ - num_key_value_heads = num_attention_heads // 4 (for GQA)
|
||||||
|
+ """
|
||||||
|
+ def __init__(self, *args, **kwargs):
|
||||||
|
+ # If hidden_size is provided but num_attention_heads is not, calculate it
|
||||||
|
+ if 'hidden_size' in kwargs and 'num_attention_heads' not in kwargs:
|
||||||
|
+ hidden_size = kwargs['hidden_size']
|
||||||
|
+ kwargs['num_attention_heads'] = hidden_size // 64
|
||||||
|
+
|
||||||
|
+ # If num_key_value_heads is not provided, use GQA configuration
|
||||||
|
+ if 'num_attention_heads' in kwargs and 'num_key_value_heads' not in kwargs:
|
||||||
|
+ kwargs['num_key_value_heads'] = max(1, kwargs['num_attention_heads'] // 4)
|
||||||
|
+
|
||||||
|
+ super().__init__(*args, **kwargs)
|
||||||
|
+
|
||||||
|
+# Replace LlamaConfig in transformers module BEFORE DiffRhythm imports it
|
||||||
|
+import transformers.models.llama
|
||||||
|
+transformers.models.llama.LlamaConfig = PatchedLlamaConfig
|
||||||
|
+import transformers.models.llama.modeling_llama
|
||||||
|
+transformers.models.llama.modeling_llama.LlamaConfig = PatchedLlamaConfig
|
||||||
|
+
|
||||||
|
from .DiffRhythmNode import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS
|
||||||
|
|
||||||
|
__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
|
||||||
22
comfyui/requirements.txt
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
torch
|
||||||
|
torchvision
|
||||||
|
torchaudio
|
||||||
|
transformers==4.49.0
|
||||||
|
diffusers>=0.31.0
|
||||||
|
accelerate
|
||||||
|
safetensors
|
||||||
|
omegaconf
|
||||||
|
einops
|
||||||
|
kornia
|
||||||
|
spandrel
|
||||||
|
soundfile
|
||||||
|
scikit-image
|
||||||
|
piexif
|
||||||
|
segment-anything
|
||||||
|
GitPython
|
||||||
|
opencv-python-headless
|
||||||
|
insightface
|
||||||
|
onnxruntime
|
||||||
|
pyyaml
|
||||||
|
imageio-ffmpeg
|
||||||
|
torchcodec
|
||||||
40
comfyui/start.sh
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# ComfyUI Startup Script
|
||||||
|
# Starts ComfyUI server on port 8188
|
||||||
|
#
|
||||||
|
|
||||||
|
WORKSPACE_DIR="${WORKSPACE_DIR:-/workspace}"
|
||||||
|
COMFYUI_DIR="${WORKSPACE_DIR}/ComfyUI"
|
||||||
|
HF_CACHE="${WORKSPACE_DIR}/huggingface_cache"
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
|
||||||
|
export HF_HOME="${HF_CACHE}"
|
||||||
|
export TQDM_DISABLE=1
|
||||||
|
|
||||||
|
# Navigate to ComfyUI directory
|
||||||
|
cd "${COMFYUI_DIR}" || exit 1
|
||||||
|
|
||||||
|
# Determine which Python to use
|
||||||
|
if [ -f "venv/bin/python" ]; then
|
||||||
|
PYTHON_BIN="venv/bin/python"
|
||||||
|
echo "Using ComfyUI virtual environment Python..."
|
||||||
|
else
|
||||||
|
PYTHON_BIN="python3"
|
||||||
|
echo "WARNING: venv not found, using system Python"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Starting ComfyUI on port 8188..."
|
||||||
|
echo "Access at: http://localhost:8188"
|
||||||
|
echo "Using HuggingFace cache: ${HF_CACHE}"
|
||||||
|
echo "Python: ${PYTHON_BIN}"
|
||||||
|
|
||||||
|
# Start ComfyUI with GPU support
|
||||||
|
exec "${PYTHON_BIN}" main.py \
|
||||||
|
--listen 0.0.0.0 \
|
||||||
|
--port 8188 \
|
||||||
|
--enable-cors-header \
|
||||||
|
--preview-method auto
|
||||||
|
|
||||||
|
echo "ComfyUI stopped"
|
||||||
287
comfyui/workflows/README.md
Normal file
@@ -0,0 +1,287 @@
|
|||||||
|
# ComfyUI Production Workflows
|
||||||
|
|
||||||
|
Comprehensive collection of production-ready ComfyUI workflows for RunPod AI Model Orchestrator.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This directory contains 20 sophisticated, battle-tested workflows designed for production use with the RunPod orchestrator. Each workflow is optimized for 24GB VRAM and includes API compatibility, error handling, and quality gates.
|
||||||
|
|
||||||
|
## Directory Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
workflows/
|
||||||
|
├── text-to-image/ # Text-to-image generation workflows
|
||||||
|
├── image-to-image/ # Image-to-image transformation workflows
|
||||||
|
├── image-to-video/ # Image-to-video animation workflows
|
||||||
|
├── text-to-music/ # Text-to-music generation workflows
|
||||||
|
├── upscaling/ # Image upscaling and enhancement workflows
|
||||||
|
├── advanced/ # Advanced multi-model workflows
|
||||||
|
├── templates/ # Reusable workflow templates
|
||||||
|
├── README.md # This file
|
||||||
|
└── WORKFLOW_STANDARDS.md # Workflow development standards
|
||||||
|
```
|
||||||
|
|
||||||
|
## Workflows by Category
|
||||||
|
|
||||||
|
### Text-to-Image (4 workflows)
|
||||||
|
|
||||||
|
| Workflow | Model | Speed | Quality | Use Case |
|
||||||
|
|----------|-------|-------|---------|----------|
|
||||||
|
| `flux-schnell-t2i-production-v1.json` | FLUX.1-schnell | Fast (4 steps) | Good | Rapid prototyping, iteration |
|
||||||
|
| `flux-dev-t2i-production-v1.json` | FLUX.1-dev | Medium (20-50 steps) | Excellent | High-quality final images |
|
||||||
|
| `sdxl-refiner-t2i-production-v1.json` | SDXL + Refiner | Medium (30+20 steps) | Excellent | Detailed, refined outputs |
|
||||||
|
| `sd35-large-t2i-production-v1.json` | SD3.5-large | Medium (28 steps) | Excellent | Latest Stable Diffusion |
|
||||||
|
|
||||||
|
### Image-to-Image (3 workflows)
|
||||||
|
|
||||||
|
| Workflow | Technique | Use Case |
|
||||||
|
|----------|-----------|----------|
|
||||||
|
| `ipadapter-style-i2i-production-v1.json` | IP-Adapter | Style transfer, composition |
|
||||||
|
| `ipadapter-face-i2i-production-v1.json` | IP-Adapter + Face | Portrait generation, face swap |
|
||||||
|
| `ipadapter-composition-i2i-production-v1.json` | IP-Adapter Multi | Complex scene composition |
|
||||||
|
|
||||||
|
### Image-to-Video (3 workflows)
|
||||||
|
|
||||||
|
| Workflow | Model | Length | Use Case |
|
||||||
|
|----------|-------|--------|----------|
|
||||||
|
| `cogvideox-i2v-production-v1.json` | CogVideoX-5b | 6s @ 8fps | AI-driven video generation |
|
||||||
|
| `svd-i2v-production-v1.json` | SVD | 14 frames | Quick animations |
|
||||||
|
| `svd-xt-i2v-production-v1.json` | SVD-XT | 25 frames | Extended animations |
|
||||||
|
|
||||||
|
### Text-to-Music (4 workflows)
|
||||||
|
|
||||||
|
| Workflow | Model | Duration | Use Case |
|
||||||
|
|----------|-------|----------|----------|
|
||||||
|
| `musicgen-small-t2m-production-v1.json` | MusicGen-small | 30s | Fast generation, low VRAM |
|
||||||
|
| `musicgen-medium-t2m-production-v1.json` | MusicGen-medium | 30s | Balanced quality/speed |
|
||||||
|
| `musicgen-large-t2m-production-v1.json` | MusicGen-large | 30s | Highest quality |
|
||||||
|
| `musicgen-melody-t2m-production-v1.json` | MusicGen-melody | 30s | Melody conditioning |
|
||||||
|
|
||||||
|
### Upscaling (3 workflows)
|
||||||
|
|
||||||
|
| Workflow | Technique | Scale | Use Case |
|
||||||
|
|----------|-----------|-------|----------|
|
||||||
|
| `ultimate-sd-upscale-production-v1.json` | Ultimate SD | 2x-4x | Professional upscaling with detailing |
|
||||||
|
| `simple-upscale-production-v1.json` | Model-based | 2x-4x | Fast, straightforward upscaling |
|
||||||
|
| `face-upscale-production-v1.json` | Face-focused | 2x | Portrait enhancement |
|
||||||
|
|
||||||
|
### Advanced (3 workflows)
|
||||||
|
|
||||||
|
| Workflow | Technique | Use Case |
|
||||||
|
|----------|-----------|----------|
|
||||||
|
| `controlnet-fusion-production-v1.json` | Multi-ControlNet | Precise composition control |
|
||||||
|
| `animatediff-video-production-v1.json` | AnimateDiff | Text-to-video animation |
|
||||||
|
| `batch-pipeline-production-v1.json` | Batch processing | Multiple variations |
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
### Using with ComfyUI Web Interface
|
||||||
|
|
||||||
|
1. Open ComfyUI at `http://localhost:8188`
|
||||||
|
2. Click "Load" button
|
||||||
|
3. Navigate to `/workspace/ai/models/comfyui/workflows/`
|
||||||
|
4. Select desired workflow category and file
|
||||||
|
5. Adjust parameters as needed
|
||||||
|
6. Click "Queue Prompt"
|
||||||
|
|
||||||
|
### Using with RunPod Orchestrator API
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Example: FLUX Schnell text-to-image
|
||||||
|
curl -X POST http://localhost:9000/api/comfyui/generate \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"workflow": "text-to-image/flux-schnell-t2i-production-v1.json",
|
||||||
|
"inputs": {
|
||||||
|
"prompt": "A serene mountain landscape at sunset",
|
||||||
|
"seed": 42,
|
||||||
|
"steps": 4
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
|
||||||
|
# Example: Image upscaling
|
||||||
|
curl -X POST http://localhost:9000/api/comfyui/generate \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"workflow": "upscaling/ultimate-sd-upscale-production-v1.json",
|
||||||
|
"inputs": {
|
||||||
|
"image": "path/to/image.png",
|
||||||
|
"scale": 2
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Workflow Features
|
||||||
|
|
||||||
|
All production workflows include:
|
||||||
|
|
||||||
|
- **API Compatibility**: Input/output nodes for orchestrator integration
|
||||||
|
- **Error Handling**: Validation, fallback nodes, graceful degradation
|
||||||
|
- **Quality Gates**: Preview nodes, checkpoints, validation steps
|
||||||
|
- **VRAM Optimization**: Model unloading, efficient memory management
|
||||||
|
- **Documentation**: Embedded descriptions, parameter guides
|
||||||
|
- **Versioning**: Semantic versioning in filenames
|
||||||
|
|
||||||
|
## Model Requirements
|
||||||
|
|
||||||
|
### Required Models (Essential)
|
||||||
|
|
||||||
|
These models are required by most workflows and are auto-downloaded by Ansible:
|
||||||
|
|
||||||
|
- **FLUX.1-schnell**: Fast text-to-image (17GB)
|
||||||
|
- **FLUX.1-dev**: High-quality text-to-image (23GB)
|
||||||
|
- **SDXL Base + Refiner**: Stable Diffusion XL (13GB)
|
||||||
|
- **SD3.5-large**: Latest Stable Diffusion (16GB)
|
||||||
|
- **CLIP ViT-L/14**: Image-text understanding (1.7GB)
|
||||||
|
|
||||||
|
### Optional Models
|
||||||
|
|
||||||
|
- **CogVideoX-5b**: Text-to-video, image-to-video (9.7GB)
|
||||||
|
- **SVD/SVD-XT**: Image-to-video (10GB)
|
||||||
|
- **MusicGen variants**: Text-to-music (1.5-3.4GB)
|
||||||
|
- **IP-Adapter**: Image conditioning (varies)
|
||||||
|
- **ControlNet models**: Precise control (varies)
|
||||||
|
|
||||||
|
Check `/workspace/ai/COMFYUI_MODELS.md` for complete model list.
|
||||||
|
|
||||||
|
## VRAM Considerations
|
||||||
|
|
||||||
|
All workflows are designed for **24GB VRAM** with these optimizations:
|
||||||
|
|
||||||
|
- **Sequential Loading**: Only one heavy model loaded at a time
|
||||||
|
- **Model Unloading**: Explicit cleanup between stages
|
||||||
|
- **Attention Slicing**: Enabled for large models
|
||||||
|
- **VAE Tiling**: For high-resolution processing
|
||||||
|
- **Batch Size Limits**: Capped at VRAM-safe values
|
||||||
|
|
||||||
|
## Performance Tips
|
||||||
|
|
||||||
|
### For Speed
|
||||||
|
- Use FLUX Schnell (4 steps) or SDXL base (20 steps)
|
||||||
|
- Lower resolution: 512x512 or 768x768
|
||||||
|
- Disable refiners and upscalers
|
||||||
|
- Use `--lowvram` flag if needed
|
||||||
|
|
||||||
|
### For Quality
|
||||||
|
- Use FLUX Dev (50 steps) or SDXL + Refiner
|
||||||
|
- Higher resolution: 1024x1024 or higher
|
||||||
|
- Enable face enhancement (Impact-Pack)
|
||||||
|
- Use Ultimate SD Upscale for final output
|
||||||
|
|
||||||
|
### For VRAM Efficiency
|
||||||
|
- Enable model unloading between stages
|
||||||
|
- Use VAE tiling for >1024px images
|
||||||
|
- Process batches sequentially, not in parallel
|
||||||
|
- Monitor with `nvidia-smi` during generation
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Out of Memory (OOM) Errors
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check VRAM usage
|
||||||
|
nvidia-smi
|
||||||
|
|
||||||
|
# Solutions:
|
||||||
|
# 1. Lower resolution
|
||||||
|
# 2. Reduce batch size
|
||||||
|
# 3. Enable model unloading
|
||||||
|
# 4. Use tiled VAE
|
||||||
|
# 5. Restart ComfyUI to clear VRAM
|
||||||
|
supervisorctl restart comfyui
|
||||||
|
```
|
||||||
|
|
||||||
|
### Missing Models
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check which models are linked
|
||||||
|
ls -lah /workspace/ComfyUI/models/diffusers/
|
||||||
|
ls -lah /workspace/ComfyUI/models/clip_vision/
|
||||||
|
|
||||||
|
# Re-run Ansible to download missing models
|
||||||
|
cd /workspace/ai
|
||||||
|
ansible-playbook playbook.yml --tags comfyui-models-all
|
||||||
|
|
||||||
|
# Re-link models
|
||||||
|
arty run models/link-comfyui
|
||||||
|
```
|
||||||
|
|
||||||
|
### Workflow Load Errors
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check ComfyUI logs
|
||||||
|
supervisorctl tail -f comfyui
|
||||||
|
|
||||||
|
# Common issues:
|
||||||
|
# - Missing custom nodes: Check custom_nodes/ directory
|
||||||
|
# - Node version mismatch: Update ComfyUI and custom nodes
|
||||||
|
# - Corrupted workflow: Validate JSON syntax
|
||||||
|
```
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
### Creating New Workflows
|
||||||
|
|
||||||
|
See `WORKFLOW_STANDARDS.md` for detailed guidelines on creating production-ready workflows.
|
||||||
|
|
||||||
|
Quick checklist:
|
||||||
|
- [ ] Use semantic versioning in filename
|
||||||
|
- [ ] Add API input/output nodes
|
||||||
|
- [ ] Include preview and save nodes
|
||||||
|
- [ ] Add error handling and validation
|
||||||
|
- [ ] Optimize for 24GB VRAM
|
||||||
|
- [ ] Document all parameters
|
||||||
|
- [ ] Test with orchestrator API
|
||||||
|
|
||||||
|
### Testing Workflows
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Manual test via ComfyUI UI
|
||||||
|
# 1. Load workflow in ComfyUI
|
||||||
|
# 2. Set test parameters
|
||||||
|
# 3. Queue prompt
|
||||||
|
# 4. Verify output quality
|
||||||
|
|
||||||
|
# API test via orchestrator
|
||||||
|
curl -X POST http://localhost:9000/api/comfyui/generate \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d @test-payload.json
|
||||||
|
|
||||||
|
# Batch test multiple workflows
|
||||||
|
cd /workspace/ai/models/comfyui/workflows
|
||||||
|
for workflow in text-to-image/*.json; do
|
||||||
|
echo "Testing $workflow..."
|
||||||
|
# Add test logic here
|
||||||
|
done
|
||||||
|
```
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
|
||||||
|
When adding new workflows:
|
||||||
|
|
||||||
|
1. Follow naming convention: `{category}-{model}-{type}-production-v{version}.json`
|
||||||
|
2. Place in appropriate category directory
|
||||||
|
3. Update this README with workflow details
|
||||||
|
4. Add to `comfyui_models.yaml` if new models are required
|
||||||
|
5. Test with both UI and API
|
||||||
|
6. Document any special requirements or setup
|
||||||
|
|
||||||
|
## Resources
|
||||||
|
|
||||||
|
- **ComfyUI Documentation**: https://github.com/comfyanonymous/ComfyUI
|
||||||
|
- **Custom Nodes Manager**: Install via ComfyUI-Manager in UI
|
||||||
|
- **Model Registry**: `/workspace/ai/model-orchestrator/models.yaml`
|
||||||
|
- **Ansible Playbook**: `/workspace/ai/playbook.yml`
|
||||||
|
- **Orchestrator API**: http://localhost:9000/docs
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
MIT License - Part of RunPod AI Model Orchestrator
|
||||||
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
For issues or questions:
|
||||||
|
1. Check ComfyUI logs: `supervisorctl tail -f comfyui`
|
||||||
|
2. Check orchestrator logs: `supervisorctl tail -f orchestrator`
|
||||||
|
3. Review `/workspace/ai/CLAUDE.md` for troubleshooting
|
||||||
|
4. Check GPU status: `nvidia-smi`
|
||||||
657
comfyui/workflows/WORKFLOW_STANDARDS.md
Normal file
@@ -0,0 +1,657 @@
|
|||||||
|
# ComfyUI Workflow Development Standards
|
||||||
|
|
||||||
|
Production standards and best practices for creating ComfyUI workflows in the RunPod AI Model Orchestrator.
|
||||||
|
|
||||||
|
## Table of Contents
|
||||||
|
|
||||||
|
- [Naming Conventions](#naming-conventions)
|
||||||
|
- [Workflow Structure](#workflow-structure)
|
||||||
|
- [API Integration](#api-integration)
|
||||||
|
- [Error Handling](#error-handling)
|
||||||
|
- [VRAM Optimization](#vram-optimization)
|
||||||
|
- [Quality Assurance](#quality-assurance)
|
||||||
|
- [Documentation Requirements](#documentation-requirements)
|
||||||
|
- [Testing Guidelines](#testing-guidelines)
|
||||||
|
|
||||||
|
## Naming Conventions
|
||||||
|
|
||||||
|
### Workflow Files
|
||||||
|
|
||||||
|
Format: `{category}-{model}-{type}-{environment}-v{version}.json`
|
||||||
|
|
||||||
|
**Components:**
|
||||||
|
- `category`: Descriptive category (flux, sdxl, cogvideox, musicgen, etc.)
|
||||||
|
- `model`: Specific model variant (schnell, dev, small, medium, large)
|
||||||
|
- `type`: Operation type (t2i, i2i, i2v, t2m, upscale)
|
||||||
|
- `environment`: `production` (stable) or `experimental` (testing)
|
||||||
|
- `version`: Semantic versioning (1, 2, 3, etc.)
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
- `flux-schnell-t2i-production-v1.json` - FLUX Schnell text-to-image, production version 1
|
||||||
|
- `sdxl-refiner-t2i-production-v2.json` - SDXL with refiner, production version 2
|
||||||
|
- `musicgen-large-t2m-experimental-v1.json` - MusicGen large, experimental version 1
|
||||||
|
|
||||||
|
### Node Naming
|
||||||
|
|
||||||
|
**Descriptive names for all nodes:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"title": "FLUX Schnell Checkpoint Loader",
|
||||||
|
"type": "CheckpointLoaderSimple",
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CheckpointLoaderSimple"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Naming patterns:**
|
||||||
|
- Loaders: `{Model} Checkpoint Loader`, `{Model} VAE Loader`
|
||||||
|
- Samplers: `{Model} KSampler`, `{Model} Advanced Sampler`
|
||||||
|
- Inputs: `API Text Input`, `API Image Input`, `API Seed Input`
|
||||||
|
- Outputs: `API Image Output`, `Preview Output`, `Save Output`
|
||||||
|
- Processing: `VAE Encode`, `VAE Decode`, `CLIP Text Encode`
|
||||||
|
|
||||||
|
## Workflow Structure
|
||||||
|
|
||||||
|
### Required Node Groups
|
||||||
|
|
||||||
|
Every production workflow MUST include these node groups:
|
||||||
|
|
||||||
|
#### 1. Input Group
|
||||||
|
```
|
||||||
|
Purpose: Receive parameters from API or UI
|
||||||
|
Nodes:
|
||||||
|
- Text input nodes (prompts, negative prompts)
|
||||||
|
- Numeric input nodes (seed, steps, CFG scale)
|
||||||
|
- Image input nodes (for i2i, i2v workflows)
|
||||||
|
- Model selection nodes (if multiple models supported)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2. Model Loading Group
|
||||||
|
```
|
||||||
|
Purpose: Load required models and components
|
||||||
|
Nodes:
|
||||||
|
- Checkpoint/Diffuser loaders
|
||||||
|
- VAE loaders
|
||||||
|
- CLIP text encoders
|
||||||
|
- ControlNet loaders (if applicable)
|
||||||
|
- IP-Adapter loaders (if applicable)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3. Processing Group
|
||||||
|
```
|
||||||
|
Purpose: Main generation/transformation logic
|
||||||
|
Nodes:
|
||||||
|
- Samplers (KSampler, Advanced KSampler)
|
||||||
|
- Encoders (CLIP, VAE)
|
||||||
|
- Conditioning nodes
|
||||||
|
- ControlNet application (if applicable)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 4. Post-Processing Group
|
||||||
|
```
|
||||||
|
Purpose: Refinement and enhancement
|
||||||
|
Nodes:
|
||||||
|
- VAE decoding
|
||||||
|
- Upscaling (if applicable)
|
||||||
|
- Face enhancement (Impact-Pack)
|
||||||
|
- Image adjustments
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 5. Output Group
|
||||||
|
```
|
||||||
|
Purpose: Save and return results
|
||||||
|
Nodes:
|
||||||
|
- SaveImage nodes (for file output)
|
||||||
|
- Preview nodes (for UI feedback)
|
||||||
|
- API output nodes (for orchestrator)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 6. Error Handling Group (Optional but Recommended)
|
||||||
|
```
|
||||||
|
Purpose: Validation and fallback
|
||||||
|
Nodes:
|
||||||
|
- Validation nodes
|
||||||
|
- Fallback nodes
|
||||||
|
- Error logging nodes
|
||||||
|
```
|
||||||
|
|
||||||
|
### Node Organization
|
||||||
|
|
||||||
|
**Logical flow (left to right, top to bottom):**
|
||||||
|
```
|
||||||
|
[Inputs] → [Model Loading] → [Processing] → [Post-Processing] → [Outputs]
|
||||||
|
↓
|
||||||
|
[Error Handling]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Visual grouping:**
|
||||||
|
- Use node positions to create visual separation
|
||||||
|
- Group related nodes together
|
||||||
|
- Align nodes for readability
|
||||||
|
- Use consistent spacing
|
||||||
|
|
||||||
|
## API Integration
|
||||||
|
|
||||||
|
### Input Nodes
|
||||||
|
|
||||||
|
**Required for API compatibility:**
|
||||||
|
|
||||||
|
1. **Text Inputs** (prompts, negative prompts)
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"inputs": {
|
||||||
|
"text": "A beautiful sunset over mountains",
|
||||||
|
"default": ""
|
||||||
|
},
|
||||||
|
"class_type": "CLIPTextEncode",
|
||||||
|
"title": "API Prompt Input"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Numeric Inputs** (seed, steps, CFG, etc.)
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"inputs": {
|
||||||
|
"seed": 42,
|
||||||
|
"steps": 20,
|
||||||
|
"cfg": 7.5,
|
||||||
|
"sampler_name": "euler_ancestral",
|
||||||
|
"scheduler": "normal"
|
||||||
|
},
|
||||||
|
"class_type": "KSampler",
|
||||||
|
"title": "API Sampler Config"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Image Inputs** (for i2i workflows)
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"inputs": {
|
||||||
|
"image": "",
|
||||||
|
"upload": "image"
|
||||||
|
},
|
||||||
|
"class_type": "LoadImage",
|
||||||
|
"title": "API Image Input"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Output Nodes
|
||||||
|
|
||||||
|
**Required for orchestrator return:**
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"inputs": {
|
||||||
|
"images": ["node_id", 0],
|
||||||
|
"filename_prefix": "ComfyUI"
|
||||||
|
},
|
||||||
|
"class_type": "SaveImage",
|
||||||
|
"title": "API Image Output"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Parameter Validation
|
||||||
|
|
||||||
|
**Include validation for critical parameters:**
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"inputs": {
|
||||||
|
"value": "seed",
|
||||||
|
"min": 0,
|
||||||
|
"max": 4294967295,
|
||||||
|
"default": 42
|
||||||
|
},
|
||||||
|
"class_type": "IntegerInput",
|
||||||
|
"title": "Seed Validator"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
### Required Validations
|
||||||
|
|
||||||
|
1. **Model Availability**
|
||||||
|
- Check if checkpoint files exist
|
||||||
|
- Validate model paths
|
||||||
|
- Provide fallback to default models
|
||||||
|
|
||||||
|
2. **Parameter Bounds**
|
||||||
|
- Validate numeric ranges (seed, steps, CFG)
|
||||||
|
- Check dimension constraints (width, height)
|
||||||
|
- Validate string inputs (sampler names, scheduler types)
|
||||||
|
|
||||||
|
3. **VRAM Limits**
|
||||||
|
- Check batch size against VRAM
|
||||||
|
- Validate resolution against VRAM
|
||||||
|
- Enable tiling for large images
|
||||||
|
|
||||||
|
4. **Input Validation**
|
||||||
|
- Verify required inputs are provided
|
||||||
|
- Check image formats and dimensions
|
||||||
|
- Validate prompt lengths
|
||||||
|
|
||||||
|
### Fallback Strategies
|
||||||
|
|
||||||
|
**Default values for missing inputs:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"inputs": {
|
||||||
|
"text": "{{prompt | default('A beautiful landscape')}}",
|
||||||
|
"seed": "{{seed | default(42)}}",
|
||||||
|
"steps": "{{steps | default(20)}}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Graceful degradation:**
|
||||||
|
- If refiner unavailable, skip refinement step
|
||||||
|
- If upscaler fails, return base resolution
|
||||||
|
- If face enhancement errors, return unenhanced image
|
||||||
|
|
||||||
|
## VRAM Optimization
|
||||||
|
|
||||||
|
### Model Unloading
|
||||||
|
|
||||||
|
**Explicit model cleanup between stages:**
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"inputs": {
|
||||||
|
"model": ["checkpoint_loader", 0]
|
||||||
|
},
|
||||||
|
"class_type": "FreeModel",
|
||||||
|
"title": "Unload Base Model"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**When to unload:**
|
||||||
|
- After base generation, before refinement
|
||||||
|
- After refinement, before upscaling
|
||||||
|
- Between different model types (diffusion → CLIP → VAE)
|
||||||
|
|
||||||
|
### VAE Tiling
|
||||||
|
|
||||||
|
**Enable for high-resolution processing:**
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"inputs": {
|
||||||
|
"samples": ["sampler", 0],
|
||||||
|
"vae": ["vae_loader", 0],
|
||||||
|
"tile_size": 512,
|
||||||
|
"overlap": 64
|
||||||
|
},
|
||||||
|
"class_type": "VAEDecodeTiled",
|
||||||
|
"title": "Tiled VAE Decode"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Tiling thresholds:**
|
||||||
|
- Use tiled VAE for images >1024x1024
|
||||||
|
- Tile size: 512 for 24GB VRAM, 256 for lower
|
||||||
|
- Overlap: 64px minimum for seamless tiles
|
||||||
|
|
||||||
|
### Attention Slicing
|
||||||
|
|
||||||
|
**Reduce memory for large models:**
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"inputs": {
|
||||||
|
"model": ["checkpoint_loader", 0],
|
||||||
|
"attention_mode": "sliced"
|
||||||
|
},
|
||||||
|
"class_type": "ModelOptimization",
|
||||||
|
"title": "Enable Attention Slicing"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Batch Processing
|
||||||
|
|
||||||
|
**VRAM-safe batch sizes:**
|
||||||
|
- FLUX models: batch_size=1
|
||||||
|
- SDXL: batch_size=1-2
|
||||||
|
- SD3.5: batch_size=1
|
||||||
|
- Upscaling: batch_size=1
|
||||||
|
|
||||||
|
**Sequential batching:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"inputs": {
|
||||||
|
"mode": "sequential",
|
||||||
|
"batch_size": 1
|
||||||
|
},
|
||||||
|
"class_type": "BatchProcessor"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quality Assurance
|
||||||
|
|
||||||
|
### Preview Nodes
|
||||||
|
|
||||||
|
**Include preview at key stages:**
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"inputs": {
|
||||||
|
"images": ["vae_decode", 0]
|
||||||
|
},
|
||||||
|
"class_type": "PreviewImage",
|
||||||
|
"title": "Preview Base Generation"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Preview locations:**
|
||||||
|
- After base generation (before refinement)
|
||||||
|
- After refinement (before upscaling)
|
||||||
|
- After upscaling (final check)
|
||||||
|
- After face enhancement
|
||||||
|
|
||||||
|
### Quality Gates
|
||||||
|
|
||||||
|
**Checkpoints for validation:**
|
||||||
|
|
||||||
|
1. **Resolution Check**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"inputs": {
|
||||||
|
"image": ["input", 0],
|
||||||
|
"min_width": 512,
|
||||||
|
"min_height": 512,
|
||||||
|
"max_width": 2048,
|
||||||
|
"max_height": 2048
|
||||||
|
},
|
||||||
|
"class_type": "ImageSizeValidator"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Quality Metrics**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"inputs": {
|
||||||
|
"image": ["vae_decode", 0],
|
||||||
|
"min_quality_score": 0.7
|
||||||
|
},
|
||||||
|
"class_type": "QualityChecker"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Save Points
|
||||||
|
|
||||||
|
**Save intermediate results:**
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"inputs": {
|
||||||
|
"images": ["base_generation", 0],
|
||||||
|
"filename_prefix": "intermediate/base_"
|
||||||
|
},
|
||||||
|
"class_type": "SaveImage",
|
||||||
|
"title": "Save Base Generation"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**When to save:**
|
||||||
|
- Base generation (before refinement)
|
||||||
|
- After each major processing stage
|
||||||
|
- Before potentially destructive operations
|
||||||
|
|
||||||
|
## Documentation Requirements
|
||||||
|
|
||||||
|
### Workflow Metadata
|
||||||
|
|
||||||
|
**Include in workflow JSON:**
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "FLUX Schnell Text-to-Image Production",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"author": "RunPod AI Model Orchestrator",
|
||||||
|
"description": "Fast text-to-image generation using FLUX.1-schnell (4 steps)",
|
||||||
|
"category": "text-to-image",
|
||||||
|
"tags": ["flux", "fast", "production"],
|
||||||
|
"requirements": {
|
||||||
|
"models": ["FLUX.1-schnell"],
|
||||||
|
"custom_nodes": [],
|
||||||
|
"vram_min": "16GB",
|
||||||
|
"vram_recommended": "24GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"prompt": {
|
||||||
|
"type": "string",
|
||||||
|
"required": true,
|
||||||
|
"description": "Text description of desired image"
|
||||||
|
},
|
||||||
|
"seed": {
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 42,
|
||||||
|
"min": 0,
|
||||||
|
"max": 4294967295
|
||||||
|
},
|
||||||
|
"steps": {
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 4,
|
||||||
|
"min": 1,
|
||||||
|
"max": 20
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": {
|
||||||
|
"image": {
|
||||||
|
"type": "image",
|
||||||
|
"format": "PNG",
|
||||||
|
"resolution": "1024x1024"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Node Comments
|
||||||
|
|
||||||
|
**Document complex nodes:**
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"title": "FLUX KSampler - Main Generation",
|
||||||
|
"notes": "Using euler_ancestral sampler with 4 steps for FLUX Schnell. CFG=1.0 is optimal for this model. Seed controls reproducibility.",
|
||||||
|
"inputs": {
|
||||||
|
"seed": 42,
|
||||||
|
"steps": 4,
|
||||||
|
"cfg": 1.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Usage Examples
|
||||||
|
|
||||||
|
**Include in workflow or README:**
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
## Example Usage
|
||||||
|
|
||||||
|
### ComfyUI Web Interface
|
||||||
|
1. Load workflow: `text-to-image/flux-schnell-t2i-production-v1.json`
|
||||||
|
2. Set prompt: "A serene mountain landscape at sunset"
|
||||||
|
3. Adjust seed: 42 (optional)
|
||||||
|
4. Click "Queue Prompt"
|
||||||
|
|
||||||
|
### Orchestrator API
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:9000/api/comfyui/generate \
|
||||||
|
-d '{"workflow": "flux-schnell-t2i-production-v1.json", "inputs": {"prompt": "A serene mountain landscape"}}'
|
||||||
|
```
|
||||||
|
```
|
||||||
|
|
||||||
|
## Testing Guidelines
|
||||||
|
|
||||||
|
### Manual Testing
|
||||||
|
|
||||||
|
**Required tests before production:**
|
||||||
|
|
||||||
|
1. **UI Test**
|
||||||
|
- Load in ComfyUI web interface
|
||||||
|
- Execute with default parameters
|
||||||
|
- Verify output quality
|
||||||
|
- Check preview nodes
|
||||||
|
- Confirm save locations
|
||||||
|
|
||||||
|
2. **API Test**
|
||||||
|
- Call via orchestrator API
|
||||||
|
- Test with various parameter combinations
|
||||||
|
- Verify JSON response format
|
||||||
|
- Check error handling
|
||||||
|
|
||||||
|
3. **Edge Cases**
|
||||||
|
- Missing optional parameters
|
||||||
|
- Invalid parameter values
|
||||||
|
- Out-of-range inputs
|
||||||
|
- Missing models (graceful failure)
|
||||||
|
|
||||||
|
### Automated Testing
|
||||||
|
|
||||||
|
**Test script template:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
#!/bin/bash
|
||||||
|
# Test workflow: flux-schnell-t2i-production-v1.json
|
||||||
|
|
||||||
|
WORKFLOW="text-to-image/flux-schnell-t2i-production-v1.json"
|
||||||
|
|
||||||
|
# Test 1: Default parameters
|
||||||
|
curl -X POST http://localhost:9000/api/comfyui/generate \
|
||||||
|
-d "{\"workflow\": \"$WORKFLOW\", \"inputs\": {\"prompt\": \"test image\"}}" \
|
||||||
|
| jq '.status' # Should return "success"
|
||||||
|
|
||||||
|
# Test 2: Custom parameters
|
||||||
|
curl -X POST http://localhost:9000/api/comfyui/generate \
|
||||||
|
-d "{\"workflow\": \"$WORKFLOW\", \"inputs\": {\"prompt\": \"test\", \"seed\": 123, \"steps\": 8}}" \
|
||||||
|
| jq '.status'
|
||||||
|
|
||||||
|
# Test 3: Missing prompt (should use default)
|
||||||
|
curl -X POST http://localhost:9000/api/comfyui/generate \
|
||||||
|
-d "{\"workflow\": \"$WORKFLOW\", \"inputs\": {}}" \
|
||||||
|
| jq '.status'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Performance Testing
|
||||||
|
|
||||||
|
**Measure key metrics:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Generation time
|
||||||
|
time curl -X POST http://localhost:9000/api/comfyui/generate \
|
||||||
|
-d '{"workflow": "flux-schnell-t2i-production-v1.json", "inputs": {"prompt": "benchmark"}}'
|
||||||
|
|
||||||
|
# VRAM usage
|
||||||
|
nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -l 1
|
||||||
|
|
||||||
|
# GPU utilization
|
||||||
|
nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -l 1
|
||||||
|
```
|
||||||
|
|
||||||
|
**Performance baselines (24GB VRAM):**
|
||||||
|
- FLUX Schnell (1024x1024, 4 steps): ~5-8 seconds
|
||||||
|
- FLUX Dev (1024x1024, 20 steps): ~25-35 seconds
|
||||||
|
- SDXL + Refiner (1024x1024): ~40-60 seconds
|
||||||
|
- CogVideoX (6s video): ~120-180 seconds
|
||||||
|
|
||||||
|
### Load Testing
|
||||||
|
|
||||||
|
**Concurrent request handling:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Test 5 concurrent generations
|
||||||
|
for i in {1..5}; do
|
||||||
|
curl -X POST http://localhost:9000/api/comfyui/generate \
|
||||||
|
-d "{\"workflow\": \"flux-schnell-t2i-production-v1.json\", \"inputs\": {\"prompt\": \"test $i\", \"seed\": $i}}" &
|
||||||
|
done
|
||||||
|
wait
|
||||||
|
```
|
||||||
|
|
||||||
|
## Version Control
|
||||||
|
|
||||||
|
### Semantic Versioning
|
||||||
|
|
||||||
|
**Version increments:**
|
||||||
|
- `v1` → `v2`: Major changes (different models, restructured workflow)
|
||||||
|
- Internal iterations: Keep same version, document changes in git commits
|
||||||
|
|
||||||
|
### Change Documentation
|
||||||
|
|
||||||
|
**Changelog format:**
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
## flux-schnell-t2i-production-v2.json
|
||||||
|
|
||||||
|
### Changes from v1
|
||||||
|
- Added API input validation
|
||||||
|
- Optimized VRAM usage with model unloading
|
||||||
|
- Added preview node after generation
|
||||||
|
- Updated default steps from 4 to 6
|
||||||
|
|
||||||
|
### Breaking Changes
|
||||||
|
- Changed output node structure (requires orchestrator update)
|
||||||
|
|
||||||
|
### Migration Guide
|
||||||
|
- Update API calls to use new parameter names
|
||||||
|
- Clear ComfyUI cache before loading v2
|
||||||
|
```
|
||||||
|
|
||||||
|
### Deprecation Process
|
||||||
|
|
||||||
|
**Sunsetting old versions:**
|
||||||
|
|
||||||
|
1. Mark old version as deprecated in README
|
||||||
|
2. Keep deprecated version for 2 releases
|
||||||
|
3. Add deprecation warning in workflow metadata
|
||||||
|
4. Document migration path to new version
|
||||||
|
5. Archive deprecated workflows in `archive/` directory
|
||||||
|
|
||||||
|
## Best Practices
|
||||||
|
|
||||||
|
### DO
|
||||||
|
|
||||||
|
- Use descriptive node names
|
||||||
|
- Include preview nodes at key stages
|
||||||
|
- Validate all inputs
|
||||||
|
- Optimize for VRAM efficiency
|
||||||
|
- Document all parameters
|
||||||
|
- Test with both UI and API
|
||||||
|
- Version your workflows
|
||||||
|
- Include error handling
|
||||||
|
- Save intermediate results
|
||||||
|
- Use semantic naming
|
||||||
|
|
||||||
|
### DON'T
|
||||||
|
|
||||||
|
- Hardcode file paths
|
||||||
|
- Assume unlimited VRAM
|
||||||
|
- Skip input validation
|
||||||
|
- Omit documentation
|
||||||
|
- Create overly complex workflows
|
||||||
|
- Use experimental nodes in production
|
||||||
|
- Ignore VRAM optimization
|
||||||
|
- Skip testing edge cases
|
||||||
|
- Use unclear node names
|
||||||
|
- Forget to version
|
||||||
|
|
||||||
|
## Resources
|
||||||
|
|
||||||
|
- **ComfyUI Wiki**: https://github.com/comfyanonymous/ComfyUI/wiki
|
||||||
|
- **Custom Nodes List**: https://github.com/ltdrdata/ComfyUI-Manager
|
||||||
|
- **VRAM Optimization Guide**: `/workspace/ai/CLAUDE.md`
|
||||||
|
- **Model Documentation**: `/workspace/ai/COMFYUI_MODELS.md`
|
||||||
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
For questions or issues:
|
||||||
|
1. Review this standards document
|
||||||
|
2. Check ComfyUI logs: `supervisorctl tail -f comfyui`
|
||||||
|
3. Test workflow in UI before API
|
||||||
|
4. Validate JSON syntax
|
||||||
|
5. Check model availability
|
||||||
248
comfyui/workflows/advanced/animatediff-video-production-v1.json
Normal file
@@ -0,0 +1,248 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 10,
|
||||||
|
"last_link_id": 12,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "CheckpointLoaderSimple",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": {"0": 350, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "CheckpointLoaderSimple"},
|
||||||
|
"widgets_values": ["v1-5-pruned-emaonly.safetensors"],
|
||||||
|
"title": "SD 1.5 Checkpoint Loader",
|
||||||
|
"outputs": [
|
||||||
|
{"name": "MODEL", "type": "MODEL", "links": [1], "slot_index": 0},
|
||||||
|
{"name": "CLIP", "type": "CLIP", "links": [2, 3], "slot_index": 1},
|
||||||
|
{"name": "VAE", "type": "VAE", "links": [4], "slot_index": 2}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "ADE_LoadAnimateDiffModel",
|
||||||
|
"pos": [50, 300],
|
||||||
|
"size": {"0": 350, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "ADE_LoadAnimateDiffModel"},
|
||||||
|
"widgets_values": ["mm_sd_v15_v2.ckpt"],
|
||||||
|
"title": "Load AnimateDiff Motion Module",
|
||||||
|
"outputs": [
|
||||||
|
{"name": "MOTION_MODEL", "type": "MOTION_MODEL_ADE", "links": [5], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "ADE_ApplyAnimateDiffModelSimple",
|
||||||
|
"pos": [450, 300],
|
||||||
|
"size": {"0": 315, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "ADE_ApplyAnimateDiffModelSimple"},
|
||||||
|
"inputs": [
|
||||||
|
{"name": "motion_model", "type": "MOTION_MODEL_ADE", "link": 5}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "M_MODELS", "type": "M_MODELS", "links": [6], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "ADE_UseEvolvedSampling",
|
||||||
|
"pos": [800, 100],
|
||||||
|
"size": {"0": 315, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "ADE_UseEvolvedSampling"},
|
||||||
|
"widgets_values": ["sqrt_linear (AnimateDiff)"],
|
||||||
|
"inputs": [
|
||||||
|
{"name": "model", "type": "MODEL", "link": 1},
|
||||||
|
{"name": "m_models", "type": "M_MODELS", "link": 6}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "MODEL", "type": "MODEL", "links": [7], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [450, 500],
|
||||||
|
"size": {"0": 400, "1": 200},
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "CLIPTextEncode"},
|
||||||
|
"widgets_values": ["A person walking through a forest, cinematic movement"],
|
||||||
|
"title": "API Video Prompt",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "clip", "type": "CLIP", "link": 2}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "CONDITIONING", "type": "CONDITIONING", "links": [8], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [450, 750],
|
||||||
|
"size": {"0": 400, "1": 200},
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "CLIPTextEncode"},
|
||||||
|
"widgets_values": ["static, blurry, low quality"],
|
||||||
|
"title": "API Negative Prompt",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "clip", "type": "CLIP", "link": 3}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "CONDITIONING", "type": "CONDITIONING", "links": [9], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "EmptyLatentImage",
|
||||||
|
"pos": [800, 300],
|
||||||
|
"size": {"0": 315, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "EmptyLatentImage"},
|
||||||
|
"widgets_values": [512, 512, 16],
|
||||||
|
"title": "API Latent Config (16 frames)",
|
||||||
|
"outputs": [
|
||||||
|
{"name": "LATENT", "type": "LATENT", "links": [10], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"type": "KSamplerAdvanced",
|
||||||
|
"pos": [1150, 100],
|
||||||
|
"size": {"0": 315, "1": 474},
|
||||||
|
"flags": {},
|
||||||
|
"order": 7,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "KSamplerAdvanced"},
|
||||||
|
"widgets_values": ["enable", 42, "fixed", 20, 8.0, "euler", "normal", 0, 10000, "disable"],
|
||||||
|
"title": "AnimateDiff Sampler",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "model", "type": "MODEL", "link": 7},
|
||||||
|
{"name": "positive", "type": "CONDITIONING", "link": 8},
|
||||||
|
{"name": "negative", "type": "CONDITIONING", "link": 9},
|
||||||
|
{"name": "latent_image", "type": "LATENT", "link": 10}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "LATENT", "type": "LATENT", "links": [11], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 9,
|
||||||
|
"type": "VAEDecode",
|
||||||
|
"pos": [1500, 100],
|
||||||
|
"size": {"0": 315, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 8,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "VAEDecode"},
|
||||||
|
"title": "VAE Decode Video",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "samples", "type": "LATENT", "link": 11},
|
||||||
|
{"name": "vae", "type": "VAE", "link": 4}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "IMAGE", "type": "IMAGE", "links": [12], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 10,
|
||||||
|
"type": "VHS_VideoCombine",
|
||||||
|
"pos": [1800, 100],
|
||||||
|
"size": {"0": 315, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 9,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "VHS_VideoCombine"},
|
||||||
|
"widgets_values": [8, 0, "animatediff_output", "video/h264-mp4", false, true, "yuv420p", 19, true, false],
|
||||||
|
"title": "Combine Frames",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "images", "type": "IMAGE", "link": 12}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 4, 0, "MODEL"],
|
||||||
|
[2, 1, 1, 5, 0, "CLIP"],
|
||||||
|
[3, 1, 1, 6, 0, "CLIP"],
|
||||||
|
[4, 1, 2, 9, 1, "VAE"],
|
||||||
|
[5, 2, 0, 3, 0, "MOTION_MODEL_ADE"],
|
||||||
|
[6, 3, 0, 4, 1, "M_MODELS"],
|
||||||
|
[7, 4, 0, 8, 0, "MODEL"],
|
||||||
|
[8, 5, 0, 8, 1, "CONDITIONING"],
|
||||||
|
[9, 6, 0, 8, 2, "CONDITIONING"],
|
||||||
|
[10, 7, 0, 8, 3, "LATENT"],
|
||||||
|
[11, 8, 0, 9, 0, "LATENT"],
|
||||||
|
[12, 9, 0, 10, 0, "IMAGE"]
|
||||||
|
],
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "AnimateDiff Video Production",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "Text-to-video generation using AnimateDiff. Create animated sequences from text prompts with natural motion.",
|
||||||
|
"category": "advanced",
|
||||||
|
"tags": [
|
||||||
|
"animatediff",
|
||||||
|
"text-to-video",
|
||||||
|
"animation",
|
||||||
|
"advanced",
|
||||||
|
"production"
|
||||||
|
],
|
||||||
|
"requirements": {
|
||||||
|
"models": [
|
||||||
|
"stable-diffusion-v1-5",
|
||||||
|
"animatediff-motion-module-v15"
|
||||||
|
],
|
||||||
|
"custom_nodes": [
|
||||||
|
"ComfyUI-AnimateDiff-Evolved",
|
||||||
|
"ComfyUI-VideoHelperSuite"
|
||||||
|
],
|
||||||
|
"vram_min": "12GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"prompt": {
|
||||||
|
"node_id": 5,
|
||||||
|
"type": "string",
|
||||||
|
"required": true,
|
||||||
|
"description": "Describe action and movement"
|
||||||
|
},
|
||||||
|
"frames": {
|
||||||
|
"node_id": 7,
|
||||||
|
"type": "integer",
|
||||||
|
"default": 16,
|
||||||
|
"description": "Number of frames (8-32)"
|
||||||
|
},
|
||||||
|
"fps": {
|
||||||
|
"node_id": 10,
|
||||||
|
"type": "integer",
|
||||||
|
"default": 8
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "60-90 seconds",
|
||||||
|
"vram_usage": "~16-20GB",
|
||||||
|
"output": "16 frames (~2s @ 8fps)"
|
||||||
|
},
|
||||||
|
"use_cases": [
|
||||||
|
"Text-to-video animation",
|
||||||
|
"Character animations",
|
||||||
|
"Motion graphics",
|
||||||
|
"Animated storyboards"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
247
comfyui/workflows/advanced/batch-pipeline-production-v1.json
Normal file
@@ -0,0 +1,247 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 10,
|
||||||
|
"last_link_id": 10,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "CheckpointLoaderSimple",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": {"0": 350, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "CheckpointLoaderSimple"},
|
||||||
|
"widgets_values": ["sd_xl_base_1.0.safetensors"],
|
||||||
|
"title": "SDXL Base Loader",
|
||||||
|
"outputs": [
|
||||||
|
{"name": "MODEL", "type": "MODEL", "links": [1], "slot_index": 0},
|
||||||
|
{"name": "CLIP", "type": "CLIP", "links": [2, 3], "slot_index": 1},
|
||||||
|
{"name": "VAE", "type": "VAE", "links": [4], "slot_index": 2}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [450, 100],
|
||||||
|
"size": {"0": 400, "1": 200},
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "CLIPTextEncode"},
|
||||||
|
"widgets_values": ["A beautiful landscape"],
|
||||||
|
"title": "API Base Prompt",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "clip", "type": "CLIP", "link": 2}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "CONDITIONING", "type": "CONDITIONING", "links": [5], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [450, 350],
|
||||||
|
"size": {"0": 400, "1": 200},
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "CLIPTextEncode"},
|
||||||
|
"widgets_values": ["blurry, low quality"],
|
||||||
|
"title": "API Negative Prompt",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "clip", "type": "CLIP", "link": 3}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "CONDITIONING", "type": "CONDITIONING", "links": [6], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "EmptyLatentImage",
|
||||||
|
"pos": [450, 600],
|
||||||
|
"size": {"0": 315, "1": 106},
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "EmptyLatentImage"},
|
||||||
|
"widgets_values": [1024, 1024, 4],
|
||||||
|
"title": "API Latent Config (Batch=4)",
|
||||||
|
"outputs": [
|
||||||
|
{"name": "LATENT", "type": "LATENT", "links": [7], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [900, 100],
|
||||||
|
"size": {"0": 315, "1": 474},
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "KSampler"},
|
||||||
|
"widgets_values": [42, "fixed", 20, 7.0, "euler", "normal", 1],
|
||||||
|
"title": "Batch Sampler (4 variations)",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "model", "type": "MODEL", "link": 1},
|
||||||
|
{"name": "positive", "type": "CONDITIONING", "link": 5},
|
||||||
|
{"name": "negative", "type": "CONDITIONING", "link": 6},
|
||||||
|
{"name": "latent_image", "type": "LATENT", "link": 7}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "LATENT", "type": "LATENT", "links": [8], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "VAEDecode",
|
||||||
|
"pos": [1270, 100],
|
||||||
|
"size": {"0": 210, "1": 46},
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "VAEDecode"},
|
||||||
|
"title": "VAE Decode Batch",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "samples", "type": "LATENT", "link": 8},
|
||||||
|
{"name": "vae", "type": "VAE", "link": 4}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "IMAGE", "type": "IMAGE", "links": [9, 10], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "PreviewImage",
|
||||||
|
"pos": [1530, 100],
|
||||||
|
"size": {"0": 400, "1": 400},
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "PreviewImage"},
|
||||||
|
"title": "Preview All Variations",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "images", "type": "IMAGE", "link": 9}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"type": "SaveImage",
|
||||||
|
"pos": [1530, 550],
|
||||||
|
"size": {"0": 400, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 7,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "SaveImage"},
|
||||||
|
"widgets_values": ["batch_output"],
|
||||||
|
"title": "API Save All",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "images", "type": "IMAGE", "link": 10}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 5, 0, "MODEL"],
|
||||||
|
[2, 1, 1, 2, 0, "CLIP"],
|
||||||
|
[3, 1, 1, 3, 0, "CLIP"],
|
||||||
|
[4, 1, 2, 6, 1, "VAE"],
|
||||||
|
[5, 2, 0, 5, 1, "CONDITIONING"],
|
||||||
|
[6, 3, 0, 5, 2, "CONDITIONING"],
|
||||||
|
[7, 4, 0, 5, 3, "LATENT"],
|
||||||
|
[8, 5, 0, 6, 0, "LATENT"],
|
||||||
|
[9, 6, 0, 7, 0, "IMAGE"],
|
||||||
|
[10, 6, 0, 8, 0, "IMAGE"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "Batch Pipeline Production",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "Batch generation pipeline for multiple variations. Generate 4 images simultaneously with different seeds for rapid iteration using Stable Diffusion XL.",
|
||||||
|
"category": "advanced",
|
||||||
|
"tags": [
|
||||||
|
"batch",
|
||||||
|
"multi-generation",
|
||||||
|
"variations",
|
||||||
|
"advanced",
|
||||||
|
"production",
|
||||||
|
"flux"
|
||||||
|
],
|
||||||
|
"requirements": {
|
||||||
|
"models": ["stable-diffusion-xl-base-1.0"],
|
||||||
|
"custom_nodes": [],
|
||||||
|
"vram_min": "20GB",
|
||||||
|
"vram_recommended": "24GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"prompt": {
|
||||||
|
"node_id": 2,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": true,
|
||||||
|
"default": "A beautiful landscape",
|
||||||
|
"description": "Text description of desired image"
|
||||||
|
},
|
||||||
|
"negative_prompt": {
|
||||||
|
"node_id": 3,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": false,
|
||||||
|
"default": "blurry, low quality",
|
||||||
|
"description": "Undesired elements to avoid"
|
||||||
|
},
|
||||||
|
"batch_count": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 4,
|
||||||
|
"min": 1,
|
||||||
|
"max": 8,
|
||||||
|
"description": "Number of variations to generate (batch size)"
|
||||||
|
},
|
||||||
|
"seed": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 42,
|
||||||
|
"min": 0,
|
||||||
|
"max": 4294967295,
|
||||||
|
"description": "Random seed for reproducibility"
|
||||||
|
},
|
||||||
|
"steps": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 20,
|
||||||
|
"min": 15,
|
||||||
|
"max": 50,
|
||||||
|
"description": "Number of sampling steps (20-30 recommended for SDXL)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": {
|
||||||
|
"images": {
|
||||||
|
"node_id": 8,
|
||||||
|
"type": "image",
|
||||||
|
"format": "PNG",
|
||||||
|
"count": 4,
|
||||||
|
"resolution": "1024x1024 (configurable)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "45-60 seconds for 4 images (20 steps)",
|
||||||
|
"vram_usage": "~20-24GB (depends on batch size)",
|
||||||
|
"gpu_utilization": "95-100%"
|
||||||
|
},
|
||||||
|
"use_cases": [
|
||||||
|
"Rapid prototyping with multiple variations",
|
||||||
|
"Concept exploration and A/B testing",
|
||||||
|
"Client presentations with options",
|
||||||
|
"Quick iteration workflows"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
BIN
comfyui/workflows/advanced/control_canny.png
Normal file
|
After Width: | Height: | Size: 4.8 KiB |
BIN
comfyui/workflows/advanced/control_depth.png
Normal file
|
After Width: | Height: | Size: 20 KiB |
399
comfyui/workflows/advanced/controlnet-fusion-production-v1.json
Normal file
@@ -0,0 +1,399 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 14,
|
||||||
|
"last_link_id": 18,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "CheckpointLoaderSimple",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": {"0": 350, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "CheckpointLoaderSimple"},
|
||||||
|
"widgets_values": ["sd_xl_base_1.0.safetensors"],
|
||||||
|
"title": "SDXL Base Loader",
|
||||||
|
"outputs": [
|
||||||
|
{"name": "MODEL", "type": "MODEL", "links": [1], "slot_index": 0},
|
||||||
|
{"name": "CLIP", "type": "CLIP", "links": [2, 3], "slot_index": 1},
|
||||||
|
{"name": "VAE", "type": "VAE", "links": [4], "slot_index": 2}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [450, 100],
|
||||||
|
"size": {"0": 400, "1": 200},
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "CLIPTextEncode"},
|
||||||
|
"widgets_values": ["A futuristic city with precise architecture"],
|
||||||
|
"title": "API Positive Prompt",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "clip", "type": "CLIP", "link": 2}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "CONDITIONING", "type": "CONDITIONING", "links": [9], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [450, 350],
|
||||||
|
"size": {"0": 400, "1": 200},
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "CLIPTextEncode"},
|
||||||
|
"widgets_values": ["blurry, low quality, distorted"],
|
||||||
|
"title": "API Negative Prompt",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "clip", "type": "CLIP", "link": 3}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "CONDITIONING", "type": "CONDITIONING", "links": [10], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "ControlNetLoader",
|
||||||
|
"pos": [50, 300],
|
||||||
|
"size": {"0": 350, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "ControlNetLoader"},
|
||||||
|
"widgets_values": ["controlnet-depth-sdxl-1.0.safetensors"],
|
||||||
|
"title": "Load Depth ControlNet",
|
||||||
|
"outputs": [
|
||||||
|
{"name": "CONTROL_NET", "type": "CONTROL_NET", "links": [7], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "LoadImage",
|
||||||
|
"pos": [50, 450],
|
||||||
|
"size": {"0": 350, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "LoadImage"},
|
||||||
|
"widgets_values": ["examples/control_depth.png"],
|
||||||
|
"title": "API Depth Image",
|
||||||
|
"outputs": [
|
||||||
|
{"name": "IMAGE", "type": "IMAGE", "links": [5], "slot_index": 0},
|
||||||
|
{"name": "MASK", "type": "MASK", "links": null, "slot_index": 1}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "ControlNetApplyAdvanced",
|
||||||
|
"pos": [900, 100],
|
||||||
|
"size": {"0": 315, "1": 186},
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "ControlNetApplyAdvanced"},
|
||||||
|
"widgets_values": [0.8, 0.0, 1.0],
|
||||||
|
"title": "Apply Depth Control",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "positive", "type": "CONDITIONING", "link": 9},
|
||||||
|
{"name": "negative", "type": "CONDITIONING", "link": 10},
|
||||||
|
{"name": "control_net", "type": "CONTROL_NET", "link": 7},
|
||||||
|
{"name": "image", "type": "IMAGE", "link": 5}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "positive", "type": "CONDITIONING", "links": [11], "slot_index": 0},
|
||||||
|
{"name": "negative", "type": "CONDITIONING", "links": [12], "slot_index": 1}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "ControlNetLoader",
|
||||||
|
"pos": [50, 600],
|
||||||
|
"size": {"0": 350, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "ControlNetLoader"},
|
||||||
|
"widgets_values": ["controlnet-canny-sdxl-1.0.safetensors"],
|
||||||
|
"title": "Load Canny ControlNet",
|
||||||
|
"outputs": [
|
||||||
|
{"name": "CONTROL_NET", "type": "CONTROL_NET", "links": [8], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"type": "LoadImage",
|
||||||
|
"pos": [50, 750],
|
||||||
|
"size": {"0": 350, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 7,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "LoadImage"},
|
||||||
|
"widgets_values": ["examples/control_canny.png"],
|
||||||
|
"title": "API Canny Image",
|
||||||
|
"outputs": [
|
||||||
|
{"name": "IMAGE", "type": "IMAGE", "links": [6], "slot_index": 0},
|
||||||
|
{"name": "MASK", "type": "MASK", "links": null, "slot_index": 1}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 9,
|
||||||
|
"type": "ControlNetApplyAdvanced",
|
||||||
|
"pos": [900, 400],
|
||||||
|
"size": {"0": 315, "1": 186},
|
||||||
|
"flags": {},
|
||||||
|
"order": 8,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "ControlNetApplyAdvanced"},
|
||||||
|
"widgets_values": [0.6, 0.0, 1.0],
|
||||||
|
"title": "Apply Canny Control",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "positive", "type": "CONDITIONING", "link": 11},
|
||||||
|
{"name": "negative", "type": "CONDITIONING", "link": 12},
|
||||||
|
{"name": "control_net", "type": "CONTROL_NET", "link": 8},
|
||||||
|
{"name": "image", "type": "IMAGE", "link": 6}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "positive", "type": "CONDITIONING", "links": [13], "slot_index": 0},
|
||||||
|
{"name": "negative", "type": "CONDITIONING", "links": [14], "slot_index": 1}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 10,
|
||||||
|
"type": "EmptyLatentImage",
|
||||||
|
"pos": [450, 600],
|
||||||
|
"size": {"0": 315, "1": 106},
|
||||||
|
"flags": {},
|
||||||
|
"order": 9,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "EmptyLatentImage"},
|
||||||
|
"widgets_values": [1024, 1024, 1],
|
||||||
|
"title": "API Latent Config",
|
||||||
|
"outputs": [
|
||||||
|
{"name": "LATENT", "type": "LATENT", "links": [15], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 11,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [1270, 100],
|
||||||
|
"size": {"0": 315, "1": 474},
|
||||||
|
"flags": {},
|
||||||
|
"order": 10,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "KSampler"},
|
||||||
|
"widgets_values": [42, "fixed", 30, 7.0, "euler", "normal", 1],
|
||||||
|
"title": "ControlNet Sampler",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "model", "type": "MODEL", "link": 1},
|
||||||
|
{"name": "positive", "type": "CONDITIONING", "link": 13},
|
||||||
|
{"name": "negative", "type": "CONDITIONING", "link": 14},
|
||||||
|
{"name": "latent_image", "type": "LATENT", "link": 15}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "LATENT", "type": "LATENT", "links": [16], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 12,
|
||||||
|
"type": "VAEDecode",
|
||||||
|
"pos": [1640, 100],
|
||||||
|
"size": {"0": 210, "1": 46},
|
||||||
|
"flags": {},
|
||||||
|
"order": 11,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "VAEDecode"},
|
||||||
|
"title": "VAE Decode",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "samples", "type": "LATENT", "link": 16},
|
||||||
|
{"name": "vae", "type": "VAE", "link": 4}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{"name": "IMAGE", "type": "IMAGE", "links": [17, 18], "slot_index": 0}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"type": "PreviewImage",
|
||||||
|
"pos": [1900, 100],
|
||||||
|
"size": {"0": 400, "1": 400},
|
||||||
|
"flags": {},
|
||||||
|
"order": 12,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "PreviewImage"},
|
||||||
|
"title": "Preview Result",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "images", "type": "IMAGE", "link": 17}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 14,
|
||||||
|
"type": "SaveImage",
|
||||||
|
"pos": [1900, 550],
|
||||||
|
"size": {"0": 400, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 13,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {"Node name for S&R": "SaveImage"},
|
||||||
|
"widgets_values": ["controlnet_output"],
|
||||||
|
"title": "API Save Image",
|
||||||
|
"inputs": [
|
||||||
|
{"name": "images", "type": "IMAGE", "link": 18}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 11, 0, "MODEL"],
|
||||||
|
[2, 1, 1, 2, 0, "CLIP"],
|
||||||
|
[3, 1, 1, 3, 0, "CLIP"],
|
||||||
|
[4, 1, 2, 12, 1, "VAE"],
|
||||||
|
[5, 5, 0, 6, 3, "IMAGE"],
|
||||||
|
[6, 8, 0, 9, 3, "IMAGE"],
|
||||||
|
[7, 4, 0, 6, 2, "CONTROL_NET"],
|
||||||
|
[8, 7, 0, 9, 2, "CONTROL_NET"],
|
||||||
|
[9, 2, 0, 6, 0, "CONDITIONING"],
|
||||||
|
[10, 3, 0, 6, 1, "CONDITIONING"],
|
||||||
|
[11, 6, 0, 9, 0, "CONDITIONING"],
|
||||||
|
[12, 6, 1, 9, 1, "CONDITIONING"],
|
||||||
|
[13, 9, 0, 11, 1, "CONDITIONING"],
|
||||||
|
[14, 9, 1, 11, 2, "CONDITIONING"],
|
||||||
|
[15, 10, 0, 11, 3, "LATENT"],
|
||||||
|
[16, 11, 0, 12, 0, "LATENT"],
|
||||||
|
[17, 12, 0, 13, 0, "IMAGE"],
|
||||||
|
[18, 12, 0, 14, 0, "IMAGE"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "ControlNet Fusion Production",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "Multi-ControlNet workflow combining depth and canny edge control for precise composition. Chain multiple control methods for maximum control over generation.",
|
||||||
|
"category": "advanced",
|
||||||
|
"tags": [
|
||||||
|
"controlnet",
|
||||||
|
"multi-control",
|
||||||
|
"depth",
|
||||||
|
"canny",
|
||||||
|
"advanced",
|
||||||
|
"production",
|
||||||
|
"sdxl"
|
||||||
|
],
|
||||||
|
"requirements": {
|
||||||
|
"models": [
|
||||||
|
"stable-diffusion-xl-base-1.0",
|
||||||
|
"controlnet-depth-sdxl-1.0",
|
||||||
|
"controlnet-canny-sdxl-1.0"
|
||||||
|
],
|
||||||
|
"custom_nodes": [],
|
||||||
|
"vram_min": "16GB",
|
||||||
|
"vram_recommended": "24GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"prompt": {
|
||||||
|
"node_id": 2,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": true,
|
||||||
|
"default": "A futuristic city with precise architecture",
|
||||||
|
"description": "Text description of desired image"
|
||||||
|
},
|
||||||
|
"negative_prompt": {
|
||||||
|
"node_id": 3,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": false,
|
||||||
|
"default": "blurry, low quality, distorted",
|
||||||
|
"description": "Undesired elements to avoid"
|
||||||
|
},
|
||||||
|
"depth_image": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": true,
|
||||||
|
"default": "examples/control_depth.png",
|
||||||
|
"description": "Depth map image for spatial control"
|
||||||
|
},
|
||||||
|
"canny_image": {
|
||||||
|
"node_id": 8,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": true,
|
||||||
|
"default": "examples/control_canny.png",
|
||||||
|
"description": "Canny edge image for structure control"
|
||||||
|
},
|
||||||
|
"depth_strength": {
|
||||||
|
"node_id": 6,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 0.8,
|
||||||
|
"min": 0.0,
|
||||||
|
"max": 2.0,
|
||||||
|
"description": "Strength of depth control (0.0-2.0)"
|
||||||
|
},
|
||||||
|
"canny_strength": {
|
||||||
|
"node_id": 9,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 0.6,
|
||||||
|
"min": 0.0,
|
||||||
|
"max": 2.0,
|
||||||
|
"description": "Strength of canny edge control (0.0-2.0)"
|
||||||
|
},
|
||||||
|
"steps": {
|
||||||
|
"node_id": 11,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 30,
|
||||||
|
"min": 20,
|
||||||
|
"max": 50,
|
||||||
|
"description": "Number of sampling steps (30-40 recommended for ControlNet)"
|
||||||
|
},
|
||||||
|
"seed": {
|
||||||
|
"node_id": 11,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 42,
|
||||||
|
"min": 0,
|
||||||
|
"max": 4294967295,
|
||||||
|
"description": "Random seed for reproducibility"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": {
|
||||||
|
"images": {
|
||||||
|
"node_id": 14,
|
||||||
|
"type": "image",
|
||||||
|
"format": "PNG",
|
||||||
|
"count": 1,
|
||||||
|
"resolution": "1024x1024 (configurable)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "50-70 seconds (30 steps)",
|
||||||
|
"vram_usage": "~18-22GB (dual ControlNet + SDXL)",
|
||||||
|
"gpu_utilization": "95-100%"
|
||||||
|
},
|
||||||
|
"use_cases": [
|
||||||
|
"Architectural visualization with precise control",
|
||||||
|
"Product renders with exact composition",
|
||||||
|
"Character poses with depth and edge guidance",
|
||||||
|
"Complex scene generation with multiple constraints"
|
||||||
|
],
|
||||||
|
"notes": [
|
||||||
|
"Example control images provided in examples/ directory",
|
||||||
|
"Depth map: Grayscale image where brightness = distance from camera",
|
||||||
|
"Canny edges: White edges on black background",
|
||||||
|
"Adjust control strengths to balance control vs. creativity",
|
||||||
|
"Higher steps (30-40) recommended for best quality with ControlNet"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
BIN
comfyui/workflows/image-to-image/composition_ref1.png
Normal file
|
After Width: | Height: | Size: 31 KiB |
BIN
comfyui/workflows/image-to-image/composition_ref2.png
Normal file
|
After Width: | Height: | Size: 33 KiB |
BIN
comfyui/workflows/image-to-image/face_reference.png
Normal file
|
After Width: | Height: | Size: 28 KiB |
@@ -0,0 +1,866 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 19,
|
||||||
|
"last_link_id": 27,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "CheckpointLoaderSimple",
|
||||||
|
"pos": [
|
||||||
|
50,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"widgets_values": [
|
||||||
|
"sd_xl_base_1.0.safetensors"
|
||||||
|
],
|
||||||
|
"title": "SDXL Checkpoint Loader",
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CheckpointLoaderSimple"
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 350,
|
||||||
|
"1": 100
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [
|
||||||
|
1,
|
||||||
|
3
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [
|
||||||
|
12,
|
||||||
|
13
|
||||||
|
],
|
||||||
|
"slot_index": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [
|
||||||
|
19
|
||||||
|
],
|
||||||
|
"slot_index": 2
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "LoadImage",
|
||||||
|
"pos": [
|
||||||
|
50,
|
||||||
|
300
|
||||||
|
],
|
||||||
|
"widgets_values": [
|
||||||
|
"composition_ref1.png",
|
||||||
|
"image"
|
||||||
|
],
|
||||||
|
"title": "API Composition Ref 1",
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "LoadImage"
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 350,
|
||||||
|
"1": 100
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [
|
||||||
|
5
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "MASK",
|
||||||
|
"type": "MASK",
|
||||||
|
"links": [],
|
||||||
|
"slot_index": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "LoadImage",
|
||||||
|
"pos": [
|
||||||
|
50,
|
||||||
|
650
|
||||||
|
],
|
||||||
|
"widgets_values": [
|
||||||
|
"composition_ref2.png",
|
||||||
|
"image"
|
||||||
|
],
|
||||||
|
"title": "API Composition Ref 2",
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "LoadImage"
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 350,
|
||||||
|
"1": 100
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [
|
||||||
|
8
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "MASK",
|
||||||
|
"type": "MASK",
|
||||||
|
"links": [],
|
||||||
|
"slot_index": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "IPAdapterUnifiedLoader",
|
||||||
|
"pos": [
|
||||||
|
450,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"widgets_values": [
|
||||||
|
"VIT-G (medium strength)"
|
||||||
|
],
|
||||||
|
"title": "IP-Adapter Loader 1",
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "IPAdapterUnifiedLoader"
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 350,
|
||||||
|
"1": 100
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [
|
||||||
|
6
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "IPADAPTER",
|
||||||
|
"type": "IPADAPTER",
|
||||||
|
"links": [
|
||||||
|
7
|
||||||
|
],
|
||||||
|
"slot_index": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "IPAdapterUnifiedLoader",
|
||||||
|
"pos": [
|
||||||
|
450,
|
||||||
|
250
|
||||||
|
],
|
||||||
|
"widgets_values": [
|
||||||
|
"VIT-G (medium strength)"
|
||||||
|
],
|
||||||
|
"title": "IP-Adapter Loader 2",
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "IPAdapterUnifiedLoader"
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 350,
|
||||||
|
"1": 100
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "IPADAPTER",
|
||||||
|
"type": "IPADAPTER",
|
||||||
|
"links": [
|
||||||
|
10
|
||||||
|
],
|
||||||
|
"slot_index": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "IPAdapterAdvanced",
|
||||||
|
"pos": [
|
||||||
|
800,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"widgets_values": [
|
||||||
|
0.6,
|
||||||
|
"ease in-out",
|
||||||
|
"average",
|
||||||
|
0.0,
|
||||||
|
1.0,
|
||||||
|
"V only"
|
||||||
|
],
|
||||||
|
"title": "Apply IP-Adapter 1",
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "IPAdapterAdvanced"
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 315,
|
||||||
|
"1": 258
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "ipadapter",
|
||||||
|
"type": "IPADAPTER",
|
||||||
|
"link": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "image",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "clip_vision",
|
||||||
|
"type": "CLIP_VISION",
|
||||||
|
"link": 25
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [
|
||||||
|
11
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "IPAdapterAdvanced",
|
||||||
|
"pos": [
|
||||||
|
1100,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"widgets_values": [
|
||||||
|
0.5,
|
||||||
|
"ease in-out",
|
||||||
|
"average",
|
||||||
|
0.0,
|
||||||
|
1.0,
|
||||||
|
"V only"
|
||||||
|
],
|
||||||
|
"title": "Apply IP-Adapter 2",
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "IPAdapterAdvanced"
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 315,
|
||||||
|
"1": 258
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 11
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "ipadapter",
|
||||||
|
"type": "IPADAPTER",
|
||||||
|
"link": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "image",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 8
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "clip_vision",
|
||||||
|
"type": "CLIP_VISION",
|
||||||
|
"link": 26
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [
|
||||||
|
14
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [
|
||||||
|
450,
|
||||||
|
500
|
||||||
|
],
|
||||||
|
"widgets_values": [
|
||||||
|
"Complex scene composition, detailed, professional"
|
||||||
|
],
|
||||||
|
"title": "API Positive Prompt",
|
||||||
|
"flags": {},
|
||||||
|
"order": 7,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 400,
|
||||||
|
"1": 200
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 12
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [
|
||||||
|
15
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 9,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [
|
||||||
|
450,
|
||||||
|
750
|
||||||
|
],
|
||||||
|
"widgets_values": [
|
||||||
|
"blurry, low quality"
|
||||||
|
],
|
||||||
|
"title": "API Negative Prompt",
|
||||||
|
"flags": {},
|
||||||
|
"order": 8,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 400,
|
||||||
|
"1": 200
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 13
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [
|
||||||
|
16
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 10,
|
||||||
|
"type": "EmptyLatentImage",
|
||||||
|
"pos": [
|
||||||
|
800,
|
||||||
|
500
|
||||||
|
],
|
||||||
|
"widgets_values": [
|
||||||
|
1024,
|
||||||
|
1024,
|
||||||
|
1
|
||||||
|
],
|
||||||
|
"title": "API Latent Config",
|
||||||
|
"flags": {},
|
||||||
|
"order": 9,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "EmptyLatentImage"
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 315,
|
||||||
|
"1": 100
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [
|
||||||
|
17
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 11,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [
|
||||||
|
1400,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"widgets_values": [
|
||||||
|
42,
|
||||||
|
"fixed",
|
||||||
|
35,
|
||||||
|
7.0,
|
||||||
|
"dpmpp_2m",
|
||||||
|
"karras",
|
||||||
|
1
|
||||||
|
],
|
||||||
|
"title": "Multi-Composition Sampler",
|
||||||
|
"flags": {},
|
||||||
|
"order": 10,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "KSampler"
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 315,
|
||||||
|
"1": 474
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 14
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 15
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 16
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent_image",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 17
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [
|
||||||
|
18
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 12,
|
||||||
|
"type": "VAEDecode",
|
||||||
|
"pos": [
|
||||||
|
1750,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"title": "VAE Decode",
|
||||||
|
"flags": {},
|
||||||
|
"order": 11,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VAEDecode"
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 315,
|
||||||
|
"1": 100
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 18
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 19
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [
|
||||||
|
20,
|
||||||
|
21
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"type": "PreviewImage",
|
||||||
|
"pos": [
|
||||||
|
2000,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"title": "Preview Output",
|
||||||
|
"flags": {},
|
||||||
|
"order": 12,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "PreviewImage"
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 315,
|
||||||
|
"1": 100
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 20
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 14,
|
||||||
|
"type": "SaveImage",
|
||||||
|
"pos": [
|
||||||
|
2000,
|
||||||
|
550
|
||||||
|
],
|
||||||
|
"widgets_values": [
|
||||||
|
"ipadapter_composition_output"
|
||||||
|
],
|
||||||
|
"title": "API Image Output",
|
||||||
|
"flags": {},
|
||||||
|
"order": 13,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SaveImage"
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"0": 315,
|
||||||
|
"1": 100
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 21
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 15,
|
||||||
|
"type": "CLIPVisionLoader",
|
||||||
|
"pos": [
|
||||||
|
450,
|
||||||
|
250
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 315,
|
||||||
|
"1": 58
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"CLIP-ViT-bigG-14-laion2B-39B-b160k.safetensors"
|
||||||
|
],
|
||||||
|
"title": "CLIP Vision Loader",
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPVisionLoader"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CLIP_VISION",
|
||||||
|
"type": "CLIP_VISION",
|
||||||
|
"links": [
|
||||||
|
25,
|
||||||
|
26
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
4,
|
||||||
|
0,
|
||||||
|
"MODEL"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
5,
|
||||||
|
0,
|
||||||
|
"MODEL"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
5,
|
||||||
|
2,
|
||||||
|
0,
|
||||||
|
6,
|
||||||
|
2,
|
||||||
|
"IMAGE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
6,
|
||||||
|
4,
|
||||||
|
0,
|
||||||
|
6,
|
||||||
|
0,
|
||||||
|
"MODEL"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
7,
|
||||||
|
4,
|
||||||
|
1,
|
||||||
|
6,
|
||||||
|
1,
|
||||||
|
"IPADAPTER"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
8,
|
||||||
|
3,
|
||||||
|
0,
|
||||||
|
7,
|
||||||
|
2,
|
||||||
|
"IMAGE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
10,
|
||||||
|
5,
|
||||||
|
1,
|
||||||
|
7,
|
||||||
|
1,
|
||||||
|
"IPADAPTER"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
11,
|
||||||
|
6,
|
||||||
|
0,
|
||||||
|
7,
|
||||||
|
0,
|
||||||
|
"MODEL"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
12,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
8,
|
||||||
|
0,
|
||||||
|
"CLIP"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
13,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
9,
|
||||||
|
0,
|
||||||
|
"CLIP"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
14,
|
||||||
|
7,
|
||||||
|
0,
|
||||||
|
11,
|
||||||
|
0,
|
||||||
|
"MODEL"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
15,
|
||||||
|
8,
|
||||||
|
0,
|
||||||
|
11,
|
||||||
|
1,
|
||||||
|
"CONDITIONING"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
16,
|
||||||
|
9,
|
||||||
|
0,
|
||||||
|
11,
|
||||||
|
2,
|
||||||
|
"CONDITIONING"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
17,
|
||||||
|
10,
|
||||||
|
0,
|
||||||
|
11,
|
||||||
|
3,
|
||||||
|
"LATENT"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
18,
|
||||||
|
11,
|
||||||
|
0,
|
||||||
|
12,
|
||||||
|
0,
|
||||||
|
"LATENT"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
19,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
12,
|
||||||
|
1,
|
||||||
|
"VAE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
20,
|
||||||
|
12,
|
||||||
|
0,
|
||||||
|
13,
|
||||||
|
0,
|
||||||
|
"IMAGE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
21,
|
||||||
|
12,
|
||||||
|
0,
|
||||||
|
14,
|
||||||
|
0,
|
||||||
|
"IMAGE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
25,
|
||||||
|
15,
|
||||||
|
0,
|
||||||
|
6,
|
||||||
|
3,
|
||||||
|
"CLIP_VISION"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
26,
|
||||||
|
15,
|
||||||
|
0,
|
||||||
|
7,
|
||||||
|
3,
|
||||||
|
"CLIP_VISION"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "IP-Adapter Multi-Composition Image-to-Image Production",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "Complex scene composition using multiple IP-Adapter references. Combine visual elements from multiple source images.",
|
||||||
|
"category": "image-to-image",
|
||||||
|
"tags": [
|
||||||
|
"ipadapter",
|
||||||
|
"composition",
|
||||||
|
"multi-reference",
|
||||||
|
"i2i",
|
||||||
|
"production"
|
||||||
|
],
|
||||||
|
"requirements": {
|
||||||
|
"models": [
|
||||||
|
"stable-diffusion-xl-base-1.0",
|
||||||
|
"ip-adapter-plus"
|
||||||
|
],
|
||||||
|
"custom_nodes": [
|
||||||
|
"ComfyUI_IPAdapter_plus"
|
||||||
|
],
|
||||||
|
"vram_min": "18GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"ref_image_1": {
|
||||||
|
"node_id": 2,
|
||||||
|
"type": "image",
|
||||||
|
"required": true,
|
||||||
|
"description": "First composition reference"
|
||||||
|
},
|
||||||
|
"ref_image_2": {
|
||||||
|
"node_id": 3,
|
||||||
|
"type": "image",
|
||||||
|
"required": true,
|
||||||
|
"description": "Second composition reference"
|
||||||
|
},
|
||||||
|
"weight_1": {
|
||||||
|
"node_id": 6,
|
||||||
|
"type": "float",
|
||||||
|
"default": 0.6,
|
||||||
|
"description": "Weight for first reference"
|
||||||
|
},
|
||||||
|
"weight_2": {
|
||||||
|
"node_id": 7,
|
||||||
|
"type": "float",
|
||||||
|
"default": 0.5,
|
||||||
|
"description": "Weight for second reference"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"use_cases": [
|
||||||
|
"Multi-source scene composition",
|
||||||
|
"Blend multiple visual concepts",
|
||||||
|
"Complex artistic compositions",
|
||||||
|
"Style mixing"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
@@ -0,0 +1,675 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 16,
|
||||||
|
"last_link_id": 21,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "CheckpointLoaderSimple",
|
||||||
|
"pos": [
|
||||||
|
50,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 350,
|
||||||
|
"1": 100
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"sd_xl_base_1.0.safetensors"
|
||||||
|
],
|
||||||
|
"title": "SDXL Base Checkpoint Loader",
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CheckpointLoaderSimple"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [
|
||||||
|
1
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [
|
||||||
|
6,
|
||||||
|
7
|
||||||
|
],
|
||||||
|
"slot_index": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [
|
||||||
|
13
|
||||||
|
],
|
||||||
|
"slot_index": 2
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "LoadImage",
|
||||||
|
"pos": [
|
||||||
|
50,
|
||||||
|
300
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
315,
|
||||||
|
314
|
||||||
|
],
|
||||||
|
"widgets_values": [
|
||||||
|
"face_reference.png",
|
||||||
|
"image"
|
||||||
|
],
|
||||||
|
"title": "API Face Reference Input",
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "LoadImage"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [
|
||||||
|
3
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "MASK",
|
||||||
|
"type": "MASK",
|
||||||
|
"links": [],
|
||||||
|
"slot_index": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "IPAdapterUnifiedLoader",
|
||||||
|
"pos": [
|
||||||
|
450,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 315,
|
||||||
|
"1": 78
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"VIT-G (medium strength)"
|
||||||
|
],
|
||||||
|
"title": "IP-Adapter Face Loader",
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "IPAdapterUnifiedLoader"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [
|
||||||
|
4
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "IPADAPTER",
|
||||||
|
"type": "IPADAPTER",
|
||||||
|
"links": [
|
||||||
|
5
|
||||||
|
],
|
||||||
|
"slot_index": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "IPAdapterAdvanced",
|
||||||
|
"pos": [
|
||||||
|
800,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 315,
|
||||||
|
"1": 258
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
0.85,
|
||||||
|
"ease in-out",
|
||||||
|
"average",
|
||||||
|
0.0,
|
||||||
|
1.0,
|
||||||
|
"V only"
|
||||||
|
],
|
||||||
|
"title": "Apply IP-Adapter Face",
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "IPAdapterAdvanced"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "ipadapter",
|
||||||
|
"type": "IPADAPTER",
|
||||||
|
"link": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "image",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "clip_vision",
|
||||||
|
"type": "CLIP_VISION",
|
||||||
|
"link": 20
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [
|
||||||
|
8
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [
|
||||||
|
450,
|
||||||
|
400
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 400,
|
||||||
|
"1": 200
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"A professional portrait, studio lighting, detailed face"
|
||||||
|
],
|
||||||
|
"title": "API Positive Prompt",
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 6
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [
|
||||||
|
9
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [
|
||||||
|
450,
|
||||||
|
650
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 400,
|
||||||
|
"1": 200
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"blurry, distorted face, low quality"
|
||||||
|
],
|
||||||
|
"title": "API Negative Prompt",
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 7
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [
|
||||||
|
10
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "EmptyLatentImage",
|
||||||
|
"pos": [
|
||||||
|
800,
|
||||||
|
450
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 315,
|
||||||
|
"1": 106
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
1024,
|
||||||
|
1024,
|
||||||
|
1
|
||||||
|
],
|
||||||
|
"title": "API Latent Image Config",
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "EmptyLatentImage"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [
|
||||||
|
11
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [
|
||||||
|
1170,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 315,
|
||||||
|
"1": 474
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
42,
|
||||||
|
"fixed",
|
||||||
|
30,
|
||||||
|
6.5,
|
||||||
|
"dpmpp_2m",
|
||||||
|
"karras",
|
||||||
|
1
|
||||||
|
],
|
||||||
|
"title": "Sampler with Face",
|
||||||
|
"flags": {},
|
||||||
|
"order": 7,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "KSampler"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 8
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 9
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent_image",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 11
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [
|
||||||
|
12
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 9,
|
||||||
|
"type": "VAEDecode",
|
||||||
|
"pos": [
|
||||||
|
1540,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 210,
|
||||||
|
"1": 46
|
||||||
|
},
|
||||||
|
"title": "VAE Decode",
|
||||||
|
"flags": {},
|
||||||
|
"order": 8,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VAEDecode"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 12
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 13
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [
|
||||||
|
14,
|
||||||
|
15
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 10,
|
||||||
|
"type": "PreviewImage",
|
||||||
|
"pos": [
|
||||||
|
1800,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 400,
|
||||||
|
"1": 400
|
||||||
|
},
|
||||||
|
"title": "Preview Output",
|
||||||
|
"flags": {},
|
||||||
|
"order": 9,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "PreviewImage"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 14
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 11,
|
||||||
|
"type": "SaveImage",
|
||||||
|
"pos": [
|
||||||
|
1800,
|
||||||
|
550
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 400,
|
||||||
|
"1": 100
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"ipadapter_face_output"
|
||||||
|
],
|
||||||
|
"title": "API Image Output",
|
||||||
|
"flags": {},
|
||||||
|
"order": 10,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SaveImage"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 15
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 12,
|
||||||
|
"type": "CLIPVisionLoader",
|
||||||
|
"pos": [
|
||||||
|
450,
|
||||||
|
250
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 315,
|
||||||
|
"1": 58
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"CLIP-ViT-bigG-14-laion2B-39B-b160k.safetensors"
|
||||||
|
],
|
||||||
|
"title": "CLIP Vision Loader",
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPVisionLoader"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CLIP_VISION",
|
||||||
|
"type": "CLIP_VISION",
|
||||||
|
"links": [
|
||||||
|
20
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
3,
|
||||||
|
0,
|
||||||
|
"MODEL"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
0,
|
||||||
|
4,
|
||||||
|
2,
|
||||||
|
"IMAGE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
4,
|
||||||
|
3,
|
||||||
|
0,
|
||||||
|
4,
|
||||||
|
0,
|
||||||
|
"MODEL"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
5,
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
4,
|
||||||
|
1,
|
||||||
|
"IPADAPTER"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
6,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
5,
|
||||||
|
0,
|
||||||
|
"CLIP"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
7,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
6,
|
||||||
|
0,
|
||||||
|
"CLIP"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
8,
|
||||||
|
4,
|
||||||
|
0,
|
||||||
|
8,
|
||||||
|
0,
|
||||||
|
"MODEL"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
9,
|
||||||
|
5,
|
||||||
|
0,
|
||||||
|
8,
|
||||||
|
1,
|
||||||
|
"CONDITIONING"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
10,
|
||||||
|
6,
|
||||||
|
0,
|
||||||
|
8,
|
||||||
|
2,
|
||||||
|
"CONDITIONING"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
11,
|
||||||
|
7,
|
||||||
|
0,
|
||||||
|
8,
|
||||||
|
3,
|
||||||
|
"LATENT"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
12,
|
||||||
|
8,
|
||||||
|
0,
|
||||||
|
9,
|
||||||
|
0,
|
||||||
|
"LATENT"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
13,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
9,
|
||||||
|
1,
|
||||||
|
"VAE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
14,
|
||||||
|
9,
|
||||||
|
0,
|
||||||
|
10,
|
||||||
|
0,
|
||||||
|
"IMAGE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
15,
|
||||||
|
9,
|
||||||
|
0,
|
||||||
|
11,
|
||||||
|
0,
|
||||||
|
"IMAGE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
20,
|
||||||
|
12,
|
||||||
|
0,
|
||||||
|
4,
|
||||||
|
3,
|
||||||
|
"CLIP_VISION"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "IP-Adapter Face Portrait Image-to-Image Production",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "Face-focused generation using IP-Adapter Face model. Transfer facial features from reference to generate new portraits or perform face swaps.",
|
||||||
|
"category": "image-to-image",
|
||||||
|
"tags": [
|
||||||
|
"ipadapter",
|
||||||
|
"face",
|
||||||
|
"portrait",
|
||||||
|
"i2i",
|
||||||
|
"production"
|
||||||
|
],
|
||||||
|
"requirements": {
|
||||||
|
"models": [
|
||||||
|
"stable-diffusion-xl-base-1.0",
|
||||||
|
"ip-adapter-face"
|
||||||
|
],
|
||||||
|
"custom_nodes": [
|
||||||
|
"ComfyUI_IPAdapter_plus"
|
||||||
|
],
|
||||||
|
"vram_min": "16GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"face_image": {
|
||||||
|
"node_id": 2,
|
||||||
|
"type": "image",
|
||||||
|
"required": true,
|
||||||
|
"description": "Reference face image"
|
||||||
|
},
|
||||||
|
"prompt": {
|
||||||
|
"node_id": 5,
|
||||||
|
"type": "string",
|
||||||
|
"default": "A professional portrait",
|
||||||
|
"description": "Portrait description"
|
||||||
|
},
|
||||||
|
"face_weight": {
|
||||||
|
"node_id": 4,
|
||||||
|
"type": "float",
|
||||||
|
"default": 0.85,
|
||||||
|
"description": "Face similarity strength (0.85 recommended)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"use_cases": [
|
||||||
|
"Portrait generation with specific face",
|
||||||
|
"Face swap in different contexts",
|
||||||
|
"Consistent character portraits",
|
||||||
|
"Professional headshots"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
@@ -0,0 +1,723 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 15,
|
||||||
|
"last_link_id": 20,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "CheckpointLoaderSimple",
|
||||||
|
"pos": [
|
||||||
|
50,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 350,
|
||||||
|
"1": 100
|
||||||
|
},
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [
|
||||||
|
1
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [
|
||||||
|
2,
|
||||||
|
3
|
||||||
|
],
|
||||||
|
"slot_index": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [
|
||||||
|
4
|
||||||
|
],
|
||||||
|
"slot_index": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CheckpointLoaderSimple"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"sd_xl_base_1.0.safetensors"
|
||||||
|
],
|
||||||
|
"title": "SDXL Base Checkpoint Loader"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "LoadImage",
|
||||||
|
"pos": [
|
||||||
|
50,
|
||||||
|
300
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
315,
|
||||||
|
314
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [
|
||||||
|
11
|
||||||
|
],
|
||||||
|
"shape": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "MASK",
|
||||||
|
"type": "MASK",
|
||||||
|
"links": null,
|
||||||
|
"shape": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "LoadImage"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"style_reference.png",
|
||||||
|
"image"
|
||||||
|
],
|
||||||
|
"title": "API Style Reference Input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "IPAdapterUnifiedLoader",
|
||||||
|
"pos": [
|
||||||
|
450,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 315,
|
||||||
|
"1": 78
|
||||||
|
},
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "ipadapter",
|
||||||
|
"type": "IPADAPTER",
|
||||||
|
"link": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [
|
||||||
|
12
|
||||||
|
],
|
||||||
|
"shape": 3,
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "ipadapter",
|
||||||
|
"type": "IPADAPTER",
|
||||||
|
"links": [
|
||||||
|
13
|
||||||
|
],
|
||||||
|
"shape": 3,
|
||||||
|
"slot_index": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "IPAdapterUnifiedLoader"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"VIT-G (medium strength)"
|
||||||
|
],
|
||||||
|
"title": "IP-Adapter Loader"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "IPAdapter",
|
||||||
|
"pos": [
|
||||||
|
800,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 315,
|
||||||
|
"1": 258
|
||||||
|
},
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "ipadapter",
|
||||||
|
"type": "IPADAPTER",
|
||||||
|
"link": 13
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "clip_vision",
|
||||||
|
"type": "CLIP_VISION",
|
||||||
|
"link": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "image",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 11
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 12
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [
|
||||||
|
14
|
||||||
|
],
|
||||||
|
"shape": 3,
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "IPAdapterApply"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
0.75,
|
||||||
|
0.0,
|
||||||
|
1.0,
|
||||||
|
"style transfer"
|
||||||
|
],
|
||||||
|
"title": "Apply IP-Adapter Style"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [
|
||||||
|
450,
|
||||||
|
400
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 400,
|
||||||
|
"1": 200
|
||||||
|
},
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [
|
||||||
|
5
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"A portrait of a person, highly detailed, professional photography"
|
||||||
|
],
|
||||||
|
"title": "API Positive Prompt"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [
|
||||||
|
450,
|
||||||
|
650
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 400,
|
||||||
|
"1": 200
|
||||||
|
},
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [
|
||||||
|
6
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"blurry, low quality, distorted, deformed"
|
||||||
|
],
|
||||||
|
"title": "API Negative Prompt"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "EmptyLatentImage",
|
||||||
|
"pos": [
|
||||||
|
800,
|
||||||
|
450
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 315,
|
||||||
|
"1": 106
|
||||||
|
},
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [
|
||||||
|
7
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "EmptyLatentImage"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
1024,
|
||||||
|
1024,
|
||||||
|
1
|
||||||
|
],
|
||||||
|
"title": "API Latent Image Config"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [
|
||||||
|
1170,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 315,
|
||||||
|
"1": 474
|
||||||
|
},
|
||||||
|
"flags": {},
|
||||||
|
"order": 7,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 14
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent_image",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 7
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [
|
||||||
|
8
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "KSampler"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
42,
|
||||||
|
"fixed",
|
||||||
|
30,
|
||||||
|
6.5,
|
||||||
|
"dpmpp_2m",
|
||||||
|
"karras",
|
||||||
|
1
|
||||||
|
],
|
||||||
|
"title": "Sampler with Style"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 9,
|
||||||
|
"type": "VAEDecode",
|
||||||
|
"pos": [
|
||||||
|
1540,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 210,
|
||||||
|
"1": 46
|
||||||
|
},
|
||||||
|
"flags": {},
|
||||||
|
"order": 8,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 8
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [
|
||||||
|
9,
|
||||||
|
10
|
||||||
|
],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VAEDecode"
|
||||||
|
},
|
||||||
|
"title": "VAE Decode"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 10,
|
||||||
|
"type": "PreviewImage",
|
||||||
|
"pos": [
|
||||||
|
1800,
|
||||||
|
100
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 400,
|
||||||
|
"1": 400
|
||||||
|
},
|
||||||
|
"flags": {},
|
||||||
|
"order": 9,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 9
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "PreviewImage"
|
||||||
|
},
|
||||||
|
"title": "Preview Output"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 11,
|
||||||
|
"type": "SaveImage",
|
||||||
|
"pos": [
|
||||||
|
1800,
|
||||||
|
550
|
||||||
|
],
|
||||||
|
"size": {
|
||||||
|
"0": 400,
|
||||||
|
"1": 100
|
||||||
|
},
|
||||||
|
"flags": {},
|
||||||
|
"order": 10,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 10
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SaveImage"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"ipadapter_style_output"
|
||||||
|
],
|
||||||
|
"title": "API Image Output"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
3,
|
||||||
|
0,
|
||||||
|
"MODEL"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
5,
|
||||||
|
0,
|
||||||
|
"CLIP"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
6,
|
||||||
|
0,
|
||||||
|
"CLIP"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
4,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
9,
|
||||||
|
1,
|
||||||
|
"VAE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
5,
|
||||||
|
5,
|
||||||
|
0,
|
||||||
|
8,
|
||||||
|
1,
|
||||||
|
"CONDITIONING"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
6,
|
||||||
|
6,
|
||||||
|
0,
|
||||||
|
8,
|
||||||
|
2,
|
||||||
|
"CONDITIONING"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
7,
|
||||||
|
7,
|
||||||
|
0,
|
||||||
|
8,
|
||||||
|
3,
|
||||||
|
"LATENT"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
8,
|
||||||
|
8,
|
||||||
|
0,
|
||||||
|
9,
|
||||||
|
0,
|
||||||
|
"LATENT"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
9,
|
||||||
|
9,
|
||||||
|
0,
|
||||||
|
10,
|
||||||
|
0,
|
||||||
|
"IMAGE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
10,
|
||||||
|
9,
|
||||||
|
0,
|
||||||
|
11,
|
||||||
|
0,
|
||||||
|
"IMAGE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
11,
|
||||||
|
2,
|
||||||
|
0,
|
||||||
|
4,
|
||||||
|
2,
|
||||||
|
"IMAGE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
12,
|
||||||
|
3,
|
||||||
|
0,
|
||||||
|
4,
|
||||||
|
3,
|
||||||
|
"MODEL"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
13,
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
4,
|
||||||
|
0,
|
||||||
|
"IPADAPTER"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
14,
|
||||||
|
4,
|
||||||
|
0,
|
||||||
|
8,
|
||||||
|
0,
|
||||||
|
"MODEL"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "IP-Adapter Style Transfer Image-to-Image Production",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"author": "RunPod AI Model Orchestrator",
|
||||||
|
"description": "Style transfer using IP-Adapter. Apply the visual style from a reference image to generate new images matching that aesthetic.",
|
||||||
|
"category": "image-to-image",
|
||||||
|
"tags": [
|
||||||
|
"ipadapter",
|
||||||
|
"style-transfer",
|
||||||
|
"i2i",
|
||||||
|
"production",
|
||||||
|
"sdxl"
|
||||||
|
],
|
||||||
|
"requirements": {
|
||||||
|
"models": [
|
||||||
|
"stable-diffusion-xl-base-1.0",
|
||||||
|
"ip-adapter-plus"
|
||||||
|
],
|
||||||
|
"custom_nodes": [
|
||||||
|
"ComfyUI_IPAdapter_plus"
|
||||||
|
],
|
||||||
|
"vram_min": "16GB",
|
||||||
|
"vram_recommended": "24GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"style_image": {
|
||||||
|
"node_id": 2,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "image",
|
||||||
|
"required": true,
|
||||||
|
"description": "Reference image for style extraction"
|
||||||
|
},
|
||||||
|
"prompt": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": true,
|
||||||
|
"default": "A portrait of a person",
|
||||||
|
"description": "Text description of desired content"
|
||||||
|
},
|
||||||
|
"negative_prompt": {
|
||||||
|
"node_id": 6,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": false,
|
||||||
|
"default": "blurry, low quality",
|
||||||
|
"description": "Undesired elements to avoid"
|
||||||
|
},
|
||||||
|
"style_weight": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 0.75,
|
||||||
|
"min": 0.0,
|
||||||
|
"max": 1.0,
|
||||||
|
"description": "Strength of style application (0.75 recommended)"
|
||||||
|
},
|
||||||
|
"width": {
|
||||||
|
"node_id": 7,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 1024,
|
||||||
|
"min": 512,
|
||||||
|
"max": 2048,
|
||||||
|
"description": "Output image width"
|
||||||
|
},
|
||||||
|
"height": {
|
||||||
|
"node_id": 7,
|
||||||
|
"widget_index": 1,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 1024,
|
||||||
|
"min": 512,
|
||||||
|
"max": 2048,
|
||||||
|
"description": "Output image height"
|
||||||
|
},
|
||||||
|
"seed": {
|
||||||
|
"node_id": 8,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 42,
|
||||||
|
"min": 0,
|
||||||
|
"max": 4294967295,
|
||||||
|
"description": "Random seed for reproducibility"
|
||||||
|
},
|
||||||
|
"steps": {
|
||||||
|
"node_id": 8,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 30,
|
||||||
|
"min": 20,
|
||||||
|
"max": 50,
|
||||||
|
"description": "Number of sampling steps"
|
||||||
|
},
|
||||||
|
"cfg": {
|
||||||
|
"node_id": 8,
|
||||||
|
"widget_index": 3,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 6.5,
|
||||||
|
"min": 1.0,
|
||||||
|
"max": 15.0,
|
||||||
|
"description": "Classifier-free guidance scale"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": {
|
||||||
|
"image": {
|
||||||
|
"node_id": 11,
|
||||||
|
"type": "image",
|
||||||
|
"format": "PNG",
|
||||||
|
"resolution": "1024x1024 (configurable)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "30-40 seconds",
|
||||||
|
"vram_usage": "~16-18GB",
|
||||||
|
"gpu_utilization": "95-100%"
|
||||||
|
},
|
||||||
|
"use_cases": [
|
||||||
|
"Apply artistic styles to new subjects",
|
||||||
|
"Match aesthetic of reference images",
|
||||||
|
"Consistent style across generated images",
|
||||||
|
"Photography style transfer"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
@@ -0,0 +1,420 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 8,
|
||||||
|
"last_link_id": 10,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "LoadImage",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": [315, 314],
|
||||||
|
"widgets_values": ["input_frame.png", "image"],
|
||||||
|
"title": "API Input Image",
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "LoadImage"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "MASK",
|
||||||
|
"type": "MASK",
|
||||||
|
"links": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "DownloadAndLoadCogVideoModel",
|
||||||
|
"pos": [50, 500],
|
||||||
|
"size": [350, 100],
|
||||||
|
"widgets_values": ["THUDM/CogVideoX-5b-I2V", "bf16", "disabled", true],
|
||||||
|
"title": "CogVideoX-5b-I2V Loader",
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "DownloadAndLoadCogVideoModel"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "COGVIDEOMODEL",
|
||||||
|
"links": [2],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [3, 10],
|
||||||
|
"slot_index": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "CLIPLoader",
|
||||||
|
"pos": [50, 650],
|
||||||
|
"size": [350, 100],
|
||||||
|
"widgets_values": ["t5xxl_fp16.safetensors", "sd3"],
|
||||||
|
"title": "T5 CLIP Loader",
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPLoader"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [4, 5],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "CogVideoTextEncode",
|
||||||
|
"pos": [450, 100],
|
||||||
|
"size": [400, 200],
|
||||||
|
"widgets_values": ["Camera movement description, action, scene details", 1, false],
|
||||||
|
"title": "API Video Prompt (Positive)",
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CogVideoTextEncode"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "conditioning",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [6],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"type": "CogVideoTextEncode",
|
||||||
|
"pos": [450, 350],
|
||||||
|
"size": [400, 200],
|
||||||
|
"widgets_values": ["low quality, blurry, distorted, watermark", 1, true],
|
||||||
|
"title": "API Video Prompt (Negative)",
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CogVideoTextEncode"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 5
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "conditioning",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [7],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 9,
|
||||||
|
"type": "CogVideoImageEncode",
|
||||||
|
"pos": [450, 600],
|
||||||
|
"size": [315, 100],
|
||||||
|
"widgets_values": [],
|
||||||
|
"title": "Encode Input Image",
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CogVideoImageEncode"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "start_image",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [8],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "CogVideoSampler",
|
||||||
|
"pos": [900, 100],
|
||||||
|
"size": [315, 474],
|
||||||
|
"widgets_values": [49, 50, 6.0, 42, "fixed", "CogVideoXDDIM", 1.0],
|
||||||
|
"title": "CogVideoX Sampler (6s @ 8fps)",
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CogVideoSampler"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "COGVIDEOMODEL",
|
||||||
|
"link": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "image_cond_latents",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 8
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [9],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "CogVideoDecode",
|
||||||
|
"pos": [1250, 100],
|
||||||
|
"size": [315, 200],
|
||||||
|
"widgets_values": [true, 240, 360, 0.25, 0.25],
|
||||||
|
"title": "VAE Decode Video",
|
||||||
|
"flags": {},
|
||||||
|
"order": 7,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CogVideoDecode"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 9
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [11],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "VHS_VideoCombine",
|
||||||
|
"pos": [1600, 100],
|
||||||
|
"size": [315, 200],
|
||||||
|
"widgets_values": [8, 0, "cogvideox_output", "video/h264-mp4", "yuv420p", 19, true, false],
|
||||||
|
"title": "Combine Video Frames",
|
||||||
|
"flags": {},
|
||||||
|
"order": 8,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VHS_VideoCombine"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 11
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "Filenames",
|
||||||
|
"type": "VHS_FILENAMES",
|
||||||
|
"links": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 9, 1, "IMAGE"],
|
||||||
|
[2, 2, 0, 4, 0, "COGVIDEOMODEL"],
|
||||||
|
[3, 2, 1, 9, 0, "VAE"],
|
||||||
|
[4, 7, 0, 3, 0, "CLIP"],
|
||||||
|
[5, 7, 0, 8, 0, "CLIP"],
|
||||||
|
[6, 3, 0, 4, 1, "CONDITIONING"],
|
||||||
|
[7, 8, 0, 4, 2, "CONDITIONING"],
|
||||||
|
[8, 9, 0, 4, 3, "LATENT"],
|
||||||
|
[9, 4, 0, 5, 1, "LATENT"],
|
||||||
|
[10, 2, 1, 5, 0, "VAE"],
|
||||||
|
[11, 5, 0, 6, 0, "IMAGE"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "CogVideoX Image-to-Video Production",
|
||||||
|
"version": "1.2.0",
|
||||||
|
"description": "AI-driven image-to-video using CogVideoX-5b-I2V. Generate 6-second videos (49 frames @ 8fps) from input images with camera movement and action.",
|
||||||
|
"category": "image-to-video",
|
||||||
|
"tags": ["cogvideox", "i2v", "video-generation", "production"],
|
||||||
|
"requirements": {
|
||||||
|
"models": ["CogVideoX-5b-I2V", "T5-XXL FP16"],
|
||||||
|
"custom_nodes": ["ComfyUI-VideoHelperSuite", "ComfyUI-CogVideoXWrapper"],
|
||||||
|
"vram_min": "20GB",
|
||||||
|
"vram_recommended": "24GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"input_image": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "image",
|
||||||
|
"required": true,
|
||||||
|
"description": "Starting frame for video generation"
|
||||||
|
},
|
||||||
|
"positive_prompt": {
|
||||||
|
"node_id": 3,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": true,
|
||||||
|
"default": "Camera movement description, action, scene details",
|
||||||
|
"description": "Describe desired camera movement, actions, and scene"
|
||||||
|
},
|
||||||
|
"negative_prompt": {
|
||||||
|
"node_id": 8,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": false,
|
||||||
|
"default": "low quality, blurry, distorted, watermark",
|
||||||
|
"description": "Undesired elements to avoid"
|
||||||
|
},
|
||||||
|
"num_frames": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 49,
|
||||||
|
"min": 1,
|
||||||
|
"max": 1024,
|
||||||
|
"description": "Number of frames to generate (49 = ~6s @ 8fps)"
|
||||||
|
},
|
||||||
|
"steps": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 1,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 50,
|
||||||
|
"min": 20,
|
||||||
|
"max": 100,
|
||||||
|
"description": "Sampling steps (50 recommended for quality)"
|
||||||
|
},
|
||||||
|
"cfg": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 6.0,
|
||||||
|
"min": 1.0,
|
||||||
|
"max": 30.0,
|
||||||
|
"description": "Classifier-free guidance scale"
|
||||||
|
},
|
||||||
|
"seed": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 3,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 42,
|
||||||
|
"description": "Random seed for reproducibility"
|
||||||
|
},
|
||||||
|
"fps": {
|
||||||
|
"node_id": 6,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 8,
|
||||||
|
"description": "Output video framerate"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": {
|
||||||
|
"video": {
|
||||||
|
"node_id": 6,
|
||||||
|
"type": "video",
|
||||||
|
"format": "MP4 (H.264)",
|
||||||
|
"resolution": "Based on input image",
|
||||||
|
"duration": "~6 seconds @ 8fps (49 frames)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "120-180 seconds",
|
||||||
|
"vram_usage": "~20-22GB",
|
||||||
|
"gpu_utilization": "95-100%"
|
||||||
|
},
|
||||||
|
"use_cases": [
|
||||||
|
"Animate static images with camera motion",
|
||||||
|
"Create video loops from single frames",
|
||||||
|
"Add dynamic movement to product shots",
|
||||||
|
"Generate cinematic camera movements"
|
||||||
|
],
|
||||||
|
"notes": [
|
||||||
|
"CogVideoX-5b-I2V is specifically trained for image-to-video generation",
|
||||||
|
"Model will download automatically on first use (~10GB)",
|
||||||
|
"Enable VAE tiling to reduce VRAM usage",
|
||||||
|
"Higher steps (50-100) improve quality but increase generation time",
|
||||||
|
"T5-XXL text encoder required - automatically linked from SD3.5"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
BIN
comfyui/workflows/image-to-video/i2v_hunyuan-i2v-v1-robot.webp
Normal file
|
After Width: | Height: | Size: 1.0 MiB |
BIN
comfyui/workflows/image-to-video/i2v_hunyuan-i2v-v2-fennec.webp
Normal file
|
After Width: | Height: | Size: 2.8 MiB |
BIN
comfyui/workflows/image-to-video/i2v_hunyuan-t2v-kitchen.webp
Normal file
|
After Width: | Height: | Size: 1.4 MiB |
2528
comfyui/workflows/image-to-video/i2v_hunyuan15-i2v-720p.json
Normal file
2269
comfyui/workflows/image-to-video/i2v_hunyuan15-t2v-720p.json
Normal file
6182
comfyui/workflows/image-to-video/i2v_wan22-14b-animate.json
Normal file
2739
comfyui/workflows/image-to-video/i2v_wan22-14b-flf2v.json
Normal file
2735
comfyui/workflows/image-to-video/i2v_wan22-14b-fun-camera.json
Normal file
2908
comfyui/workflows/image-to-video/i2v_wan22-14b-fun-control.json
Normal file
2327
comfyui/workflows/image-to-video/i2v_wan22-14b-i2v.json
Normal file
7988
comfyui/workflows/image-to-video/i2v_wan22-14b-s2v.json
Normal file
1876
comfyui/workflows/image-to-video/i2v_wan22-14b-t2v.json
Normal file
733
comfyui/workflows/image-to-video/i2v_wan22-5b-ti2v.json
Normal file
@@ -0,0 +1,733 @@
|
|||||||
|
{
|
||||||
|
"id": "91f6bbe2-ed41-4fd6-bac7-71d5b5864ecb",
|
||||||
|
"revision": 0,
|
||||||
|
"last_node_id": 59,
|
||||||
|
"last_link_id": 108,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 37,
|
||||||
|
"type": "UNETLoader",
|
||||||
|
"pos": [
|
||||||
|
-30,
|
||||||
|
50
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
346.7470703125,
|
||||||
|
82
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"slot_index": 0,
|
||||||
|
"links": [
|
||||||
|
94
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.45",
|
||||||
|
"Node name for S&R": "UNETLoader",
|
||||||
|
"models": [
|
||||||
|
{
|
||||||
|
"name": "wan2.2_ti2v_5B_fp16.safetensors",
|
||||||
|
"url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_ti2v_5B_fp16.safetensors",
|
||||||
|
"directory": "diffusion_models"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"wan2.2_ti2v_5B_fp16.safetensors",
|
||||||
|
"default"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 38,
|
||||||
|
"type": "CLIPLoader",
|
||||||
|
"pos": [
|
||||||
|
-30,
|
||||||
|
190
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
350,
|
||||||
|
110
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"slot_index": 0,
|
||||||
|
"links": [
|
||||||
|
74,
|
||||||
|
75
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.45",
|
||||||
|
"Node name for S&R": "CLIPLoader",
|
||||||
|
"models": [
|
||||||
|
{
|
||||||
|
"name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors",
|
||||||
|
"url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors",
|
||||||
|
"directory": "text_encoders"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"umt5_xxl_fp8_e4m3fn_scaled.safetensors",
|
||||||
|
"wan",
|
||||||
|
"default"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 39,
|
||||||
|
"type": "VAELoader",
|
||||||
|
"pos": [
|
||||||
|
-30,
|
||||||
|
350
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
350,
|
||||||
|
60
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"slot_index": 0,
|
||||||
|
"links": [
|
||||||
|
76,
|
||||||
|
105
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.45",
|
||||||
|
"Node name for S&R": "VAELoader",
|
||||||
|
"models": [
|
||||||
|
{
|
||||||
|
"name": "wan2.2_vae.safetensors",
|
||||||
|
"url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan2.2_vae.safetensors",
|
||||||
|
"directory": "vae"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"wan2.2_vae.safetensors"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"type": "VAEDecode",
|
||||||
|
"pos": [
|
||||||
|
1190,
|
||||||
|
150
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
210,
|
||||||
|
46
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 10,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 35
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 76
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"slot_index": 0,
|
||||||
|
"links": [
|
||||||
|
107
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.45",
|
||||||
|
"Node name for S&R": "VAEDecode"
|
||||||
|
},
|
||||||
|
"widgets_values": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 57,
|
||||||
|
"type": "CreateVideo",
|
||||||
|
"pos": [
|
||||||
|
1200,
|
||||||
|
240
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
270,
|
||||||
|
78
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 11,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 107
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"shape": 7,
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "VIDEO",
|
||||||
|
"type": "VIDEO",
|
||||||
|
"links": [
|
||||||
|
108
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.45",
|
||||||
|
"Node name for S&R": "CreateVideo"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
24
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 58,
|
||||||
|
"type": "SaveVideo",
|
||||||
|
"pos": [
|
||||||
|
1200,
|
||||||
|
370
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
660,
|
||||||
|
450
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 12,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "video",
|
||||||
|
"type": "VIDEO",
|
||||||
|
"link": 108
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.45",
|
||||||
|
"Node name for S&R": "SaveVideo"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"video/ComfyUI",
|
||||||
|
"auto",
|
||||||
|
"auto"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 55,
|
||||||
|
"type": "Wan22ImageToVideoLatent",
|
||||||
|
"pos": [
|
||||||
|
380,
|
||||||
|
540
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
271.9126892089844,
|
||||||
|
150
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 8,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 105
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "start_image",
|
||||||
|
"shape": 7,
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 106
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [
|
||||||
|
104
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.45",
|
||||||
|
"Node name for S&R": "Wan22ImageToVideoLatent"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
1280,
|
||||||
|
704,
|
||||||
|
121,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 56,
|
||||||
|
"type": "LoadImage",
|
||||||
|
"pos": [
|
||||||
|
0,
|
||||||
|
540
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
274.080078125,
|
||||||
|
314
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 4,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [
|
||||||
|
106
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "MASK",
|
||||||
|
"type": "MASK",
|
||||||
|
"links": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.45",
|
||||||
|
"Node name for S&R": "LoadImage"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"example.png",
|
||||||
|
"image"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [
|
||||||
|
380,
|
||||||
|
260
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
425.27801513671875,
|
||||||
|
180.6060791015625
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 7,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 75
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"slot_index": 0,
|
||||||
|
"links": [
|
||||||
|
52
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "CLIP Text Encode (Negative Prompt)",
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.45",
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
|
||||||
|
],
|
||||||
|
"color": "#322",
|
||||||
|
"bgcolor": "#533"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [
|
||||||
|
380,
|
||||||
|
50
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
422.84503173828125,
|
||||||
|
164.31304931640625
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 74
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"slot_index": 0,
|
||||||
|
"links": [
|
||||||
|
46
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "CLIP Text Encode (Positive Prompt)",
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.45",
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"Low contrast. In a retro 1970s-style subway station, a street musician plays in dim colors and rough textures. He wears an old jacket, playing guitar with focus. Commuters hurry by, and a small crowd gathers to listen. The camera slowly moves right, capturing the blend of music and city noise, with old subway signs and mottled walls in the background."
|
||||||
|
],
|
||||||
|
"color": "#232",
|
||||||
|
"bgcolor": "#353"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [
|
||||||
|
850,
|
||||||
|
130
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
315,
|
||||||
|
262
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 9,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 95
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 46
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 52
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent_image",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 104
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"slot_index": 0,
|
||||||
|
"links": [
|
||||||
|
35
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.45",
|
||||||
|
"Node name for S&R": "KSampler"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
898471028164125,
|
||||||
|
"randomize",
|
||||||
|
20,
|
||||||
|
5,
|
||||||
|
"uni_pc",
|
||||||
|
"simple",
|
||||||
|
1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 48,
|
||||||
|
"type": "ModelSamplingSD3",
|
||||||
|
"pos": [
|
||||||
|
850,
|
||||||
|
20
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
210,
|
||||||
|
58
|
||||||
|
],
|
||||||
|
"flags": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 94
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"slot_index": 0,
|
||||||
|
"links": [
|
||||||
|
95
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.45",
|
||||||
|
"Node name for S&R": "ModelSamplingSD3"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
8
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 59,
|
||||||
|
"type": "MarkdownNote",
|
||||||
|
"pos": [
|
||||||
|
-550,
|
||||||
|
10
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
480,
|
||||||
|
340
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [],
|
||||||
|
"title": "Model Links",
|
||||||
|
"properties": {},
|
||||||
|
"widgets_values": [
|
||||||
|
"[Tutorial](https://docs.comfy.org/tutorials/video/wan/wan2_2\n) \n\n**Diffusion Model**\n- [wan2.2_ti2v_5B_fp16.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_ti2v_5B_fp16.safetensors)\n\n**VAE**\n- [wan2.2_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan2.2_vae.safetensors)\n\n**Text Encoder** \n- [umt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors)\n\n\nFile save location\n\n```\nComfyUI/\n├───📂 models/\n│ ├───📂 diffusion_models/\n│ │ └───wan2.2_ti2v_5B_fp16.safetensors\n│ ├───📂 text_encoders/\n│ │ └─── umt5_xxl_fp8_e4m3fn_scaled.safetensors \n│ └───📂 vae/\n│ └── wan2.2_vae.safetensors\n```\n"
|
||||||
|
],
|
||||||
|
"color": "#432",
|
||||||
|
"bgcolor": "#653"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[
|
||||||
|
35,
|
||||||
|
3,
|
||||||
|
0,
|
||||||
|
8,
|
||||||
|
0,
|
||||||
|
"LATENT"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
46,
|
||||||
|
6,
|
||||||
|
0,
|
||||||
|
3,
|
||||||
|
1,
|
||||||
|
"CONDITIONING"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
52,
|
||||||
|
7,
|
||||||
|
0,
|
||||||
|
3,
|
||||||
|
2,
|
||||||
|
"CONDITIONING"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
74,
|
||||||
|
38,
|
||||||
|
0,
|
||||||
|
6,
|
||||||
|
0,
|
||||||
|
"CLIP"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
75,
|
||||||
|
38,
|
||||||
|
0,
|
||||||
|
7,
|
||||||
|
0,
|
||||||
|
"CLIP"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
76,
|
||||||
|
39,
|
||||||
|
0,
|
||||||
|
8,
|
||||||
|
1,
|
||||||
|
"VAE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
94,
|
||||||
|
37,
|
||||||
|
0,
|
||||||
|
48,
|
||||||
|
0,
|
||||||
|
"MODEL"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
95,
|
||||||
|
48,
|
||||||
|
0,
|
||||||
|
3,
|
||||||
|
0,
|
||||||
|
"MODEL"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
104,
|
||||||
|
55,
|
||||||
|
0,
|
||||||
|
3,
|
||||||
|
3,
|
||||||
|
"LATENT"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
105,
|
||||||
|
39,
|
||||||
|
0,
|
||||||
|
55,
|
||||||
|
0,
|
||||||
|
"VAE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
106,
|
||||||
|
56,
|
||||||
|
0,
|
||||||
|
55,
|
||||||
|
1,
|
||||||
|
"IMAGE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
107,
|
||||||
|
8,
|
||||||
|
0,
|
||||||
|
57,
|
||||||
|
0,
|
||||||
|
"IMAGE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
108,
|
||||||
|
57,
|
||||||
|
0,
|
||||||
|
58,
|
||||||
|
0,
|
||||||
|
"VIDEO"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"groups": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"title": "Step1 - Load models",
|
||||||
|
"bounding": [
|
||||||
|
-50,
|
||||||
|
-20,
|
||||||
|
400,
|
||||||
|
453.6000061035156
|
||||||
|
],
|
||||||
|
"color": "#3f789e",
|
||||||
|
"font_size": 24,
|
||||||
|
"flags": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"title": "Step3 - Prompt",
|
||||||
|
"bounding": [
|
||||||
|
370,
|
||||||
|
-20,
|
||||||
|
448.27801513671875,
|
||||||
|
473.2060852050781
|
||||||
|
],
|
||||||
|
"color": "#3f789e",
|
||||||
|
"font_size": 24,
|
||||||
|
"flags": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"title": "For i2v, use Ctrl + B to enable",
|
||||||
|
"bounding": [
|
||||||
|
-50,
|
||||||
|
450,
|
||||||
|
400,
|
||||||
|
420
|
||||||
|
],
|
||||||
|
"color": "#3f789e",
|
||||||
|
"font_size": 24,
|
||||||
|
"flags": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"title": "Video Size & length",
|
||||||
|
"bounding": [
|
||||||
|
370,
|
||||||
|
470,
|
||||||
|
291.9127197265625,
|
||||||
|
233.60000610351562
|
||||||
|
],
|
||||||
|
"color": "#3f789e",
|
||||||
|
"font_size": 24,
|
||||||
|
"flags": {}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"ds": {
|
||||||
|
"scale": 0.46462425349300085,
|
||||||
|
"offset": [
|
||||||
|
847.5372059811432,
|
||||||
|
288.7938392118285
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"frontendVersion": "1.27.10",
|
||||||
|
"VHS_latentpreview": false,
|
||||||
|
"VHS_latentpreviewrate": 0,
|
||||||
|
"VHS_MetadataImage": true,
|
||||||
|
"VHS_KeepIntermediate": true
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
BIN
comfyui/workflows/image-to-video/input_frame.png
Normal file
|
After Width: | Height: | Size: 5.8 KiB |
416
comfyui/workflows/image-to-video/svd-i2v-production-v1.json
Normal file
@@ -0,0 +1,416 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 7,
|
||||||
|
"last_link_id": 10,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "LoadImage",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": [315, 314],
|
||||||
|
"widgets_values": ["input_frame.png", "image"],
|
||||||
|
"title": "API Input Image",
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "LoadImage"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "MASK",
|
||||||
|
"type": "MASK",
|
||||||
|
"links": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "ImageOnlyCheckpointLoader",
|
||||||
|
"pos": [50, 500],
|
||||||
|
"size": [350, 100],
|
||||||
|
"widgets_values": ["svd_xt.safetensors"],
|
||||||
|
"title": "SVD-XT Model Loader",
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "ImageOnlyCheckpointLoader"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [2],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP_VISION",
|
||||||
|
"type": "CLIP_VISION",
|
||||||
|
"links": [3],
|
||||||
|
"slot_index": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [4, 5],
|
||||||
|
"slot_index": 2
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "VideoLinearCFGGuidance",
|
||||||
|
"pos": [450, 500],
|
||||||
|
"size": [315, 100],
|
||||||
|
"widgets_values": [1.0],
|
||||||
|
"title": "Linear CFG Guidance",
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VideoLinearCFGGuidance"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [6],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "SVD_img2vid_Conditioning",
|
||||||
|
"pos": [450, 100],
|
||||||
|
"size": [315, 350],
|
||||||
|
"widgets_values": [1024, 576, 14, 127, 6, 0.0],
|
||||||
|
"title": "SVD Image-to-Video Conditioning",
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SVD_img2vid_Conditioning"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip_vision",
|
||||||
|
"type": "CLIP_VISION",
|
||||||
|
"link": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "init_image",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [7],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [8],
|
||||||
|
"slot_index": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [9],
|
||||||
|
"slot_index": 2
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [800, 100],
|
||||||
|
"size": [315, 474],
|
||||||
|
"widgets_values": [42, "fixed", 25, 6.0, "euler", "karras", 1.0],
|
||||||
|
"title": "KSampler (25 steps)",
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "KSampler"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 8
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent_image",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 9
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [10],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "VAEDecode",
|
||||||
|
"pos": [1150, 100],
|
||||||
|
"size": [210, 46],
|
||||||
|
"widgets_values": [],
|
||||||
|
"title": "VAE Decode Video Frames",
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VAEDecode"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 5
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [11],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "VHS_VideoCombine",
|
||||||
|
"pos": [1400, 100],
|
||||||
|
"size": [315, 200],
|
||||||
|
"widgets_values": [6, 0, "svd_output", "video/h264-mp4", false, true],
|
||||||
|
"title": "Combine Video Frames",
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VHS_VideoCombine"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 11
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "Filenames",
|
||||||
|
"type": "VHS_FILENAMES",
|
||||||
|
"links": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 4, 1, "IMAGE"],
|
||||||
|
[2, 2, 0, 3, 0, "MODEL"],
|
||||||
|
[3, 2, 1, 4, 0, "CLIP_VISION"],
|
||||||
|
[4, 2, 2, 4, 2, "VAE"],
|
||||||
|
[5, 2, 2, 6, 1, "VAE"],
|
||||||
|
[6, 3, 0, 5, 0, "MODEL"],
|
||||||
|
[7, 4, 0, 5, 1, "CONDITIONING"],
|
||||||
|
[8, 4, 1, 5, 2, "CONDITIONING"],
|
||||||
|
[9, 4, 2, 5, 3, "LATENT"],
|
||||||
|
[10, 5, 0, 6, 0, "LATENT"],
|
||||||
|
[11, 6, 0, 7, 0, "IMAGE"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "Stable Video Diffusion Image-to-Video Production",
|
||||||
|
"version": "1.2.0",
|
||||||
|
"description": "Quick animation using SVD-XT. Generate 14-frame video from single image with motion and camera movement.",
|
||||||
|
"category": "image-to-video",
|
||||||
|
"tags": ["svd", "svd-xt", "stable-video-diffusion", "i2v", "animation", "production"],
|
||||||
|
"requirements": {
|
||||||
|
"models": ["SVD-XT"],
|
||||||
|
"custom_nodes": ["ComfyUI-VideoHelperSuite"],
|
||||||
|
"vram_min": "16GB",
|
||||||
|
"vram_recommended": "20GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"input_image": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "image",
|
||||||
|
"required": true,
|
||||||
|
"description": "Starting frame for video generation (1024x576 recommended)"
|
||||||
|
},
|
||||||
|
"width": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 1024,
|
||||||
|
"min": 16,
|
||||||
|
"max": 16384,
|
||||||
|
"description": "Output video width"
|
||||||
|
},
|
||||||
|
"height": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 1,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 576,
|
||||||
|
"min": 16,
|
||||||
|
"max": 16384,
|
||||||
|
"description": "Output video height"
|
||||||
|
},
|
||||||
|
"video_frames": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 14,
|
||||||
|
"min": 1,
|
||||||
|
"max": 4096,
|
||||||
|
"description": "Number of frames to generate (14 or 25 for SVD/SVD-XT)"
|
||||||
|
},
|
||||||
|
"motion_bucket_id": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 3,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 127,
|
||||||
|
"min": 1,
|
||||||
|
"max": 1023,
|
||||||
|
"description": "Motion amount (higher = more motion)"
|
||||||
|
},
|
||||||
|
"fps": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 4,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 6,
|
||||||
|
"min": 1,
|
||||||
|
"max": 1024,
|
||||||
|
"description": "Frames per second for conditioning"
|
||||||
|
},
|
||||||
|
"augmentation_level": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 5,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 0.0,
|
||||||
|
"min": 0.0,
|
||||||
|
"max": 10.0,
|
||||||
|
"description": "Noise augmentation level"
|
||||||
|
},
|
||||||
|
"steps": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 25,
|
||||||
|
"min": 1,
|
||||||
|
"max": 150,
|
||||||
|
"description": "Sampling steps (25 recommended)"
|
||||||
|
},
|
||||||
|
"cfg": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 3,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 6.0,
|
||||||
|
"min": 0.0,
|
||||||
|
"max": 30.0,
|
||||||
|
"description": "Classifier-free guidance scale"
|
||||||
|
},
|
||||||
|
"seed": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 42,
|
||||||
|
"description": "Random seed for reproducibility"
|
||||||
|
},
|
||||||
|
"output_fps": {
|
||||||
|
"node_id": 7,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 6,
|
||||||
|
"description": "Output video framerate"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": {
|
||||||
|
"video": {
|
||||||
|
"node_id": 7,
|
||||||
|
"type": "video",
|
||||||
|
"format": "MP4 (H.264)",
|
||||||
|
"resolution": "1024x576 (configurable)",
|
||||||
|
"duration": "~2.3 seconds @ 6fps (14 frames)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "25-35 seconds",
|
||||||
|
"vram_usage": "~16-18GB",
|
||||||
|
"gpu_utilization": "95-100%"
|
||||||
|
},
|
||||||
|
"use_cases": [
|
||||||
|
"Animate static images with natural motion",
|
||||||
|
"Create short video loops from single frames",
|
||||||
|
"Add subtle camera movements to still images",
|
||||||
|
"Generate product animation previews"
|
||||||
|
],
|
||||||
|
"notes": [
|
||||||
|
"SVD-XT extends frame count from 14 to 25 frames",
|
||||||
|
"Model auto-downloads on first use (~9GB)",
|
||||||
|
"Recommended resolution: 1024x576 (16:9)",
|
||||||
|
"Higher motion_bucket_id = more movement",
|
||||||
|
"Linear CFG guidance improves temporal consistency"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
417
comfyui/workflows/image-to-video/svd-xt-i2v-production-v1.json
Normal file
@@ -0,0 +1,417 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 7,
|
||||||
|
"last_link_id": 10,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "LoadImage",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": [315, 314],
|
||||||
|
"widgets_values": ["input_frame.png", "image"],
|
||||||
|
"title": "API Input Image",
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "LoadImage"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "MASK",
|
||||||
|
"type": "MASK",
|
||||||
|
"links": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "ImageOnlyCheckpointLoader",
|
||||||
|
"pos": [50, 500],
|
||||||
|
"size": [350, 100],
|
||||||
|
"widgets_values": ["svd_xt.safetensors"],
|
||||||
|
"title": "SVD-XT Model Loader",
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "ImageOnlyCheckpointLoader"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [2],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP_VISION",
|
||||||
|
"type": "CLIP_VISION",
|
||||||
|
"links": [3],
|
||||||
|
"slot_index": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [4, 5],
|
||||||
|
"slot_index": 2
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "VideoLinearCFGGuidance",
|
||||||
|
"pos": [450, 500],
|
||||||
|
"size": [315, 100],
|
||||||
|
"widgets_values": [1.0],
|
||||||
|
"title": "Linear CFG Guidance",
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VideoLinearCFGGuidance"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [6],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "SVD_img2vid_Conditioning",
|
||||||
|
"pos": [450, 100],
|
||||||
|
"size": [315, 350],
|
||||||
|
"widgets_values": [1024, 576, 25, 127, 6, 0.0],
|
||||||
|
"title": "SVD-XT Image-to-Video Conditioning (25 frames)",
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SVD_img2vid_Conditioning"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip_vision",
|
||||||
|
"type": "CLIP_VISION",
|
||||||
|
"link": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "init_image",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [7],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [8],
|
||||||
|
"slot_index": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [9],
|
||||||
|
"slot_index": 2
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [800, 100],
|
||||||
|
"size": [315, 474],
|
||||||
|
"widgets_values": [42, "fixed", 30, 6.0, "euler", "karras", 1.0],
|
||||||
|
"title": "KSampler (30 steps)",
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "KSampler"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 8
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent_image",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 9
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [10],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "VAEDecode",
|
||||||
|
"pos": [1150, 100],
|
||||||
|
"size": [210, 46],
|
||||||
|
"widgets_values": [],
|
||||||
|
"title": "VAE Decode Video Frames",
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VAEDecode"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 5
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [11],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "VHS_VideoCombine",
|
||||||
|
"pos": [1400, 100],
|
||||||
|
"size": [315, 200],
|
||||||
|
"widgets_values": [6, 0, "svd_xt_output", "video/h264-mp4", false, true],
|
||||||
|
"title": "Combine Video Frames",
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VHS_VideoCombine"
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 11
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "Filenames",
|
||||||
|
"type": "VHS_FILENAMES",
|
||||||
|
"links": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 4, 1, "IMAGE"],
|
||||||
|
[2, 2, 0, 3, 0, "MODEL"],
|
||||||
|
[3, 2, 1, 4, 0, "CLIP_VISION"],
|
||||||
|
[4, 2, 2, 4, 2, "VAE"],
|
||||||
|
[5, 2, 2, 6, 1, "VAE"],
|
||||||
|
[6, 3, 0, 5, 0, "MODEL"],
|
||||||
|
[7, 4, 0, 5, 1, "CONDITIONING"],
|
||||||
|
[8, 4, 1, 5, 2, "CONDITIONING"],
|
||||||
|
[9, 4, 2, 5, 3, "LATENT"],
|
||||||
|
[10, 5, 0, 6, 0, "LATENT"],
|
||||||
|
[11, 6, 0, 7, 0, "IMAGE"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "Stable Video Diffusion XT Image-to-Video Production",
|
||||||
|
"version": "1.2.0",
|
||||||
|
"description": "Extended animation using SVD-XT. Generate 25-frame video for longer animations with smooth motion.",
|
||||||
|
"category": "image-to-video",
|
||||||
|
"tags": ["svd-xt", "stable-video-diffusion", "i2v", "extended", "production"],
|
||||||
|
"requirements": {
|
||||||
|
"models": ["SVD-XT"],
|
||||||
|
"custom_nodes": ["ComfyUI-VideoHelperSuite"],
|
||||||
|
"vram_min": "18GB",
|
||||||
|
"vram_recommended": "20GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"input_image": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "image",
|
||||||
|
"required": true,
|
||||||
|
"description": "Starting frame for video generation (1024x576 recommended)"
|
||||||
|
},
|
||||||
|
"width": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 1024,
|
||||||
|
"min": 16,
|
||||||
|
"max": 16384,
|
||||||
|
"description": "Output video width"
|
||||||
|
},
|
||||||
|
"height": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 1,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 576,
|
||||||
|
"min": 16,
|
||||||
|
"max": 16384,
|
||||||
|
"description": "Output video height"
|
||||||
|
},
|
||||||
|
"video_frames": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 25,
|
||||||
|
"min": 1,
|
||||||
|
"max": 4096,
|
||||||
|
"description": "Number of frames to generate (25 for SVD-XT)"
|
||||||
|
},
|
||||||
|
"motion_bucket_id": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 3,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 127,
|
||||||
|
"min": 1,
|
||||||
|
"max": 1023,
|
||||||
|
"description": "Motion amount (higher = more motion)"
|
||||||
|
},
|
||||||
|
"fps": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 4,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 6,
|
||||||
|
"min": 1,
|
||||||
|
"max": 1024,
|
||||||
|
"description": "Frames per second for conditioning"
|
||||||
|
},
|
||||||
|
"augmentation_level": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 5,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 0.0,
|
||||||
|
"min": 0.0,
|
||||||
|
"max": 10.0,
|
||||||
|
"description": "Noise augmentation level"
|
||||||
|
},
|
||||||
|
"steps": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 30,
|
||||||
|
"min": 1,
|
||||||
|
"max": 150,
|
||||||
|
"description": "Sampling steps (30 recommended)"
|
||||||
|
},
|
||||||
|
"cfg": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 3,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 6.0,
|
||||||
|
"min": 0.0,
|
||||||
|
"max": 30.0,
|
||||||
|
"description": "Classifier-free guidance scale"
|
||||||
|
},
|
||||||
|
"seed": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 42,
|
||||||
|
"description": "Random seed for reproducibility"
|
||||||
|
},
|
||||||
|
"output_fps": {
|
||||||
|
"node_id": 7,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 6,
|
||||||
|
"description": "Output video framerate"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": {
|
||||||
|
"video": {
|
||||||
|
"node_id": 7,
|
||||||
|
"type": "video",
|
||||||
|
"format": "MP4 (H.264)",
|
||||||
|
"resolution": "1024x576 (configurable)",
|
||||||
|
"duration": "~4.2 seconds @ 6fps (25 frames)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "40-55 seconds",
|
||||||
|
"vram_usage": "~18-20GB",
|
||||||
|
"gpu_utilization": "95-100%"
|
||||||
|
},
|
||||||
|
"use_cases": [
|
||||||
|
"Extended animations with smooth motion",
|
||||||
|
"Longer video loops from single frames",
|
||||||
|
"Cinematic camera movements",
|
||||||
|
"Product animation showcases"
|
||||||
|
],
|
||||||
|
"notes": [
|
||||||
|
"SVD-XT generates 25 frames vs 14 frames in base SVD",
|
||||||
|
"Requires more VRAM (~18GB vs ~16GB)",
|
||||||
|
"Model auto-downloads on first use (~9GB)",
|
||||||
|
"Recommended resolution: 1024x576 (16:9)",
|
||||||
|
"Higher motion_bucket_id = more movement",
|
||||||
|
"Linear CFG guidance improves temporal consistency"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
BIN
comfyui/workflows/image-to-video/wan22-animate-ref-image.png
Normal file
|
After Width: | Height: | Size: 906 KiB |
BIN
comfyui/workflows/image-to-video/wan22-flf2v-end.png
Normal file
|
After Width: | Height: | Size: 1.7 MiB |
BIN
comfyui/workflows/image-to-video/wan22-flf2v-start.png
Normal file
|
After Width: | Height: | Size: 2.0 MiB |
BIN
comfyui/workflows/image-to-video/wan22-fun-camera-input.jpg
Normal file
|
After Width: | Height: | Size: 925 KiB |
BIN
comfyui/workflows/image-to-video/wan22-i2v-input.jpg
Normal file
|
After Width: | Height: | Size: 712 KiB |
373
comfyui/workflows/nsfw/README.md
Normal file
@@ -0,0 +1,373 @@
|
|||||||
|
# NSFW ComfyUI Workflows
|
||||||
|
|
||||||
|
Production-ready workflows for NSFW content generation using CivitAI models.
|
||||||
|
|
||||||
|
## Available Workflows
|
||||||
|
|
||||||
|
### 1. LUSTIFY Realistic Text-to-Image
|
||||||
|
**File:** `lustify-realistic-t2i-production-v1.json`
|
||||||
|
|
||||||
|
**Model:** LUSTIFY v7.0 GGWP (`lustifySDXLNSFW_endgame.safetensors`)
|
||||||
|
|
||||||
|
**Optimized Settings:**
|
||||||
|
- Sampler: DPM++ 2M SDE
|
||||||
|
- Scheduler: Exponential
|
||||||
|
- Steps: 30
|
||||||
|
- CFG Scale: 6.0
|
||||||
|
- Resolution: 1024x1024 (SDXL)
|
||||||
|
|
||||||
|
**Features:**
|
||||||
|
- Photorealistic NSFW generation
|
||||||
|
- Optimized negative prompts for LUSTIFY
|
||||||
|
- Professional photography quality
|
||||||
|
- Detailed skin texture and natural lighting
|
||||||
|
- Supports both danbooru tags and natural language
|
||||||
|
|
||||||
|
**Use Case:** High-quality photorealistic NSFW images focusing on women in various scenarios
|
||||||
|
|
||||||
|
**Example Prompt:**
|
||||||
|
```
|
||||||
|
A beautiful woman in elegant dress, photorealistic, professional photography, high quality, detailed skin texture, natural lighting, 8k, masterpiece, best quality
|
||||||
|
```
|
||||||
|
|
||||||
|
**Negative Prompt:**
|
||||||
|
```
|
||||||
|
(worst quality, low quality:1.4), illustration, 3d, 2d, painting, cartoons, sketch, open mouth, bad anatomy, deformed, blurry, watermark, text
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 2. Pony Diffusion Anime/Furry Text-to-Image
|
||||||
|
**File:** `pony-anime-t2i-production-v1.json`
|
||||||
|
|
||||||
|
**Model:** Pony Diffusion V6 XL (`add-detail-xl.safetensors`)
|
||||||
|
|
||||||
|
**Optimized Settings:**
|
||||||
|
- Sampler: Euler Ancestral
|
||||||
|
- Scheduler: Normal
|
||||||
|
- Steps: 35
|
||||||
|
- CFG Scale: 7.5
|
||||||
|
- Resolution: 1024x1024 (SDXL)
|
||||||
|
|
||||||
|
**Features:**
|
||||||
|
- Anime, cartoon, and furry NSFW generation
|
||||||
|
- Danbooru tag support (optimized for tags)
|
||||||
|
- Balanced content training (1:1:1 safe/questionable/explicit)
|
||||||
|
- 2.6M aesthetically ranked training images
|
||||||
|
- Versatile style support
|
||||||
|
|
||||||
|
**Use Case:** High-quality anime/cartoon/furry NSFW content with tag-based prompting
|
||||||
|
|
||||||
|
**Example Prompt (Danbooru Tags):**
|
||||||
|
```
|
||||||
|
1girl, solo, long_hair, breasts, smile, looking_at_viewer, anime style, high quality, detailed, masterpiece, best quality
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example Prompt (Natural Language):**
|
||||||
|
```
|
||||||
|
Anime girl with long flowing hair, smiling and looking at viewer, detailed anime art style, high quality, masterpiece
|
||||||
|
```
|
||||||
|
|
||||||
|
**Negative Prompt:**
|
||||||
|
```
|
||||||
|
(worst quality, low quality:1.4), bad anatomy, deformed, blurry, watermark, text, signature, artist name
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3. RealVisXL Lightning Fast Text-to-Image
|
||||||
|
**File:** `realvisxl-lightning-t2i-production-v1.json`
|
||||||
|
|
||||||
|
**Model:** RealVisXL V5.0 Lightning (`realvisxlV50_v50LightningBakedvae.safetensors`)
|
||||||
|
|
||||||
|
**Optimized Settings:**
|
||||||
|
- Sampler: DPM++ SDE
|
||||||
|
- Scheduler: Karras
|
||||||
|
- Steps: 6 (Lightning fast!)
|
||||||
|
- CFG Scale: 2.0
|
||||||
|
- Resolution: 1024x1024 (SDXL)
|
||||||
|
|
||||||
|
**Features:**
|
||||||
|
- Ultra-fast 4-6 step generation (vs 30+ for standard models)
|
||||||
|
- High-quality photorealistic output
|
||||||
|
- Baked VAE for optimal quality
|
||||||
|
- Professional portraits and scenes
|
||||||
|
- Both SFW and NSFW capable
|
||||||
|
|
||||||
|
**Use Case:** Rapid photorealistic NSFW generation when speed is priority
|
||||||
|
|
||||||
|
**Example Prompt:**
|
||||||
|
```
|
||||||
|
Beautiful woman portrait, professional photography, natural lighting, high quality, detailed, 8k
|
||||||
|
```
|
||||||
|
|
||||||
|
**Negative Prompt:**
|
||||||
|
```
|
||||||
|
(worst quality, low quality:1.4), bad anatomy, deformed, blurry, watermark, text, ugly, cartoon, anime
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Advanced Workflows
|
||||||
|
|
||||||
|
### 4. NSFW Ultimate SD Upscale
|
||||||
|
**File:** `nsfw-ultimate-upscale-production-v1.json`
|
||||||
|
|
||||||
|
**Model:** LUSTIFY v7.0 GGWP (`lustifySDXLNSFW_endgame.safetensors`)
|
||||||
|
|
||||||
|
**Upscale Settings:**
|
||||||
|
- Upscale Factor: 2x
|
||||||
|
- Upscale Model: RealESRGAN_x2.pth
|
||||||
|
- Denoise: 0.25 (preserves original composition)
|
||||||
|
- Sampler: DPM++ 2M SDE
|
||||||
|
- Scheduler: Exponential
|
||||||
|
- Steps: 20
|
||||||
|
- CFG Scale: 7.0
|
||||||
|
- Tile Size: 512x512
|
||||||
|
|
||||||
|
**Features:**
|
||||||
|
- Professional 2x upscaling for NSFW images
|
||||||
|
- Combines RealESRGAN with diffusion refinement
|
||||||
|
- Tiled processing for handling large images
|
||||||
|
- Optimized for detailed skin texture
|
||||||
|
- Low denoise preserves original composition
|
||||||
|
- Can swap checkpoint for different styles
|
||||||
|
|
||||||
|
**Use Case:** Upscale NSFW images from 1024x1024 to 2048x2048 with enhanced detail
|
||||||
|
|
||||||
|
**Enhancement Prompt:**
|
||||||
|
```
|
||||||
|
photorealistic, professional photography, high quality, detailed skin texture, natural lighting, 8k, masterpiece, best quality, sharp details
|
||||||
|
```
|
||||||
|
|
||||||
|
**Refinement Negative:**
|
||||||
|
```
|
||||||
|
(worst quality, low quality:1.4), blurry, pixelated, jpeg artifacts, bad anatomy, deformed, watermark, text
|
||||||
|
```
|
||||||
|
|
||||||
|
**Tips:**
|
||||||
|
- Use denoise 0.2-0.3 for subtle enhancement
|
||||||
|
- Match enhancement prompt to original generation prompt
|
||||||
|
- Swap checkpoint node for different model styles (Pony, RealVisXL)
|
||||||
|
- Processing time: ~90-150 seconds for 2x upscale
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
These workflows use models downloaded via `artifact_civitai_download.sh`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Models are already downloaded to /workspace/models/civitai/
|
||||||
|
# and symlinked to /workspace/ComfyUI/models/checkpoints/
|
||||||
|
|
||||||
|
# Verify models are available:
|
||||||
|
ls -lh /workspace/ComfyUI/models/checkpoints/lustify*
|
||||||
|
ls -lh /workspace/ComfyUI/models/checkpoints/*pony*
|
||||||
|
ls -lh /workspace/ComfyUI/models/checkpoints/realvisx*
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Via ComfyUI API:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Load workflow
|
||||||
|
curl -X POST http://localhost:8188/api/prompt \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d @lustify-realistic-t2i-production-v1.json
|
||||||
|
|
||||||
|
# Modify prompt and generate
|
||||||
|
curl -X POST http://localhost:8188/api/prompt \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"prompt": {
|
||||||
|
"2": {
|
||||||
|
"inputs": {
|
||||||
|
"text": "Your custom NSFW prompt here"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Via ComfyUI UI:
|
||||||
|
|
||||||
|
1. Open ComfyUI at `http://localhost:8188`
|
||||||
|
2. Click "Load" and select workflow file
|
||||||
|
3. Modify prompt in the "API Positive Prompt" node
|
||||||
|
4. Click "Queue Prompt" to generate
|
||||||
|
|
||||||
|
## Model Comparison
|
||||||
|
|
||||||
|
| Model | Type | Speed | Quality | Best For | Prompt Style |
|
||||||
|
|-------|------|-------|---------|----------|--------------|
|
||||||
|
| LUSTIFY v7 | Photoreal | Medium (30 steps) | Excellent | Women, realistic scenes | Natural language |
|
||||||
|
| Pony Diffusion v6 | Anime/Furry | Medium (35 steps) | Excellent | Anime, cartoon, furry | Danbooru tags |
|
||||||
|
| RealVisXL Lightning | Photoreal | Very Fast (4-6 steps) | Excellent | Quick realistic generations | Natural language |
|
||||||
|
|
||||||
|
## Tips
|
||||||
|
|
||||||
|
### For LUSTIFY:
|
||||||
|
- Use detailed prompts describing scene, lighting, quality
|
||||||
|
- Avoid abstract/artistic terms
|
||||||
|
- Include photography keywords: "professional photography", "8k", "detailed"
|
||||||
|
- CFG 4-7 range works best
|
||||||
|
|
||||||
|
### For Pony Diffusion:
|
||||||
|
- Danbooru tags work very well (recommended over natural language)
|
||||||
|
- Use tag format: `1girl, solo, breasts, smile, looking_at_viewer`
|
||||||
|
- Mix safe/questionable/explicit content naturally
|
||||||
|
- Can do anime, cartoon, and furry styles
|
||||||
|
- Supports wide range of artistic styles
|
||||||
|
- CFG 7-8.5 range works best
|
||||||
|
|
||||||
|
### For RealVisXL Lightning:
|
||||||
|
- Keep prompts concise for best results
|
||||||
|
- Use lower CFG (1-2) for Lightning models
|
||||||
|
- 4-6 steps is optimal balance
|
||||||
|
- Great for batch generation
|
||||||
|
|
||||||
|
## Advanced Usage
|
||||||
|
|
||||||
|
### Adding LoRAs:
|
||||||
|
Use the `../text-to-image/lora-fusion-t2i-production-v1.json` workflow for multi-LoRA stacking support.
|
||||||
|
|
||||||
|
**How to use with NSFW models:**
|
||||||
|
1. Load the LoRA fusion workflow
|
||||||
|
2. Change checkpoint (node 1) to your NSFW model (LUSTIFY, Pony, RealVisXL)
|
||||||
|
3. Stack up to 3 LoRAs with adjustable weights:
|
||||||
|
- LoRA 1 (Primary): 0.7-1.0 strength
|
||||||
|
- LoRA 2 (Secondary): 0.4-0.7 strength
|
||||||
|
- LoRA 3 (Accent): 0.2-0.5 strength
|
||||||
|
4. Common use cases:
|
||||||
|
- Detail enhancer + skin texture + lighting style
|
||||||
|
- Character LoRA + pose enhancer + background style
|
||||||
|
- Primary art style + secondary blend + color grading
|
||||||
|
|
||||||
|
### Upscaling:
|
||||||
|
Use `nsfw-ultimate-upscale-production-v1.json` for professional 2x upscaling with LUSTIFY checkpoint.
|
||||||
|
|
||||||
|
**Workflow:**
|
||||||
|
1. Generate image with any NSFW T2I workflow (LUSTIFY, Pony, RealVisXL)
|
||||||
|
2. Load the generated image into Ultimate SD Upscale workflow
|
||||||
|
3. Use denoise 0.2-0.3 for subtle enhancement
|
||||||
|
4. Match enhancement prompt to original generation prompt
|
||||||
|
5. Output: 2048x2048 high-quality upscaled image
|
||||||
|
|
||||||
|
### Style Transfer:
|
||||||
|
Use IP-Adapter workflows in `../image-to-image/` directory to combine with style references.
|
||||||
|
|
||||||
|
## Using Negative Embeddings
|
||||||
|
|
||||||
|
Negative embeddings (textual inversions) are small trained models that significantly improve image quality when used in negative prompts. They help avoid common artifacts like bad anatomy, unrealistic features, and low-quality outputs.
|
||||||
|
|
||||||
|
### Available Embeddings
|
||||||
|
|
||||||
|
**For SDXL Models (LUSTIFY, RealVisXL):**
|
||||||
|
- `BadX` - Fixes facial/hand artifacts, improves overall realism
|
||||||
|
|
||||||
|
**For Pony/WAI-Illustrious Models:**
|
||||||
|
- `zPDXL3` - Quality enhancement + removes censoring
|
||||||
|
- `zPDXLxxx` - Enables explicit NSFW content
|
||||||
|
|
||||||
|
**For SD 1.5 Models (future workflows):**
|
||||||
|
- `BadDream` + `UnrealisticDream` - Dreamshaper artifacts + realism
|
||||||
|
- `badhandv4` - Improves hand details
|
||||||
|
- `FastNegativeV2` - General quality enhancement
|
||||||
|
- `BadNegAnatomyV1-neg` - Better anatomy and body joints
|
||||||
|
- `easynegative` - General-purpose quality
|
||||||
|
|
||||||
|
### Installation
|
||||||
|
|
||||||
|
Embeddings are automatically downloaded and linked to `/workspace/ComfyUI/models/embeddings/` when you run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /workspace/ai && bash /workspace/bin/artifact_civitai_download.sh both -c models_civitai.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### Usage Syntax
|
||||||
|
|
||||||
|
**Basic usage in negative prompt:**
|
||||||
|
```
|
||||||
|
embedding:BadX
|
||||||
|
embedding:zPDXL3
|
||||||
|
```
|
||||||
|
|
||||||
|
**Multiple embeddings:**
|
||||||
|
```
|
||||||
|
embedding:zPDXL3, embedding:zPDXLxxx, score_4, score_5, low quality
|
||||||
|
```
|
||||||
|
|
||||||
|
**With weight adjustment:**
|
||||||
|
```
|
||||||
|
(embedding:zPDXL3:1.5), (embedding:BadX.pt:1.2), ugly, deformed
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** When using weights, include the full filename with extension (.pt or .safetensors).
|
||||||
|
|
||||||
|
### Recommended Usage by Workflow
|
||||||
|
|
||||||
|
**LUSTIFY Realistic T2I:**
|
||||||
|
```
|
||||||
|
Negative: embedding:BadX, (worst quality, low quality:1.4), bad anatomy, deformed, blurry, watermark, text
|
||||||
|
```
|
||||||
|
|
||||||
|
**RealVisXL Lightning T2I:**
|
||||||
|
```
|
||||||
|
Negative: embedding:BadX, (worst quality, low quality:1.4), ugly, cartoon, anime
|
||||||
|
```
|
||||||
|
|
||||||
|
**Pony Anime / WAI-Illustrious T2I:**
|
||||||
|
```
|
||||||
|
Negative: embedding:zPDXL3, embedding:zPDXLxxx, score_4, score_5, score_6, bad anatomy, deformed
|
||||||
|
```
|
||||||
|
|
||||||
|
Or with custom strength:
|
||||||
|
```
|
||||||
|
Negative: (embedding:zPDXL3:1.5), (embedding:zPDXLxxx:1.2), score_4, score_5, low quality
|
||||||
|
```
|
||||||
|
|
||||||
|
**NSFW Ultimate SD Upscale:**
|
||||||
|
```
|
||||||
|
Negative: embedding:BadX, (worst quality, low quality:1.4), blurry, pixelated, jpeg artifacts
|
||||||
|
```
|
||||||
|
|
||||||
|
### Important Compatibility Notes
|
||||||
|
|
||||||
|
**CRITICAL:** SD 1.5 embeddings (BadDream, badhandv4, etc.) DO NOT work with SDXL models due to different CLIP architectures. Always use:
|
||||||
|
- **SDXL models** → `BadX` embedding
|
||||||
|
- **Pony models** → `zPDXL3` and `zPDXLxxx` embeddings
|
||||||
|
- **SD 1.5 models** → SD 1.5 embeddings only
|
||||||
|
|
||||||
|
Using the wrong embedding type will cause errors or unexpected results.
|
||||||
|
|
||||||
|
### Tips
|
||||||
|
|
||||||
|
- Start with recommended strength 1.0-2.0 for Pony embeddings
|
||||||
|
- `zPDXL3` removes censoring, ideal for NSFW Pony workflows
|
||||||
|
- Combine multiple embeddings for best results
|
||||||
|
- `BadX` works with all general SDXL checkpoints (LUSTIFY, RealVisXL, etc.)
|
||||||
|
- Higher CFG scales (≥11) work better with `badhandv4` (SD 1.5)
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
- All workflows follow production naming standards
|
||||||
|
- VRAM requirement: ~12GB per SDXL model
|
||||||
|
- Can run one NSFW model at a time on 24GB GPU
|
||||||
|
- Models automatically use BakedVAE when available
|
||||||
|
- Face enhancement can be added via Impact-Pack nodes
|
||||||
|
|
||||||
|
## Version History
|
||||||
|
|
||||||
|
- v1.0 (2025-11-23): Initial release with complete NSFW workflow suite
|
||||||
|
- **Text-to-Image Workflows:**
|
||||||
|
- LUSTIFY realistic T2I (DPM++ 2M SDE, 30 steps, CFG 6.0)
|
||||||
|
- Pony Diffusion anime/furry T2I (Euler A, 35 steps, CFG 7.5)
|
||||||
|
- RealVisXL Lightning fast T2I (DPM++ SDE Karras, 6 steps, CFG 2.0)
|
||||||
|
- **Enhancement Workflows:**
|
||||||
|
- LoRA Fusion multi-stack (in ../text-to-image/, compatible with all NSFW checkpoints)
|
||||||
|
- NSFW Ultimate SD Upscale (2x upscaling with LUSTIFY + RealESRGAN)
|
||||||
|
- **Total:** 3 NSFW-specific workflows + 2 enhancement workflows = 5 production workflows
|
||||||
256
comfyui/workflows/nsfw/lustify-realistic-t2i-production-v1.json
Normal file
@@ -0,0 +1,256 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 11,
|
||||||
|
"last_link_id": 16,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "CheckpointLoaderSimple",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": {"0": 380, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [2, 3],
|
||||||
|
"slot_index": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [4],
|
||||||
|
"slot_index": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CheckpointLoaderSimple"
|
||||||
|
},
|
||||||
|
"widgets_values": ["lustifySDXLNSFW_endgame.safetensors"],
|
||||||
|
"title": "LUSTIFY v7.0 GGWP Checkpoint"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [480, 100],
|
||||||
|
"size": {"0": 450, "1": 250},
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [5],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": ["A beautiful woman in elegant dress, photorealistic, professional photography, high quality, detailed skin texture, natural lighting, 8k, masterpiece, best quality"],
|
||||||
|
"title": "API Positive Prompt"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [480, 400],
|
||||||
|
"size": {"0": 450, "1": 150},
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [6],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": ["(worst quality, low quality:1.4), illustration, 3d, 2d, painting, cartoons, sketch, open mouth, bad anatomy, deformed, blurry, watermark, text"],
|
||||||
|
"title": "Negative Prompt (LUSTIFY Optimized)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "EmptyLatentImage",
|
||||||
|
"pos": [480, 600],
|
||||||
|
"size": {"0": 315, "1": 110},
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [7],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "EmptyLatentImage"
|
||||||
|
},
|
||||||
|
"widgets_values": [1024, 1024, 1],
|
||||||
|
"title": "Latent Image (SDXL Resolution)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [980, 100],
|
||||||
|
"size": {"0": 350, "1": 290},
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent_image",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 7
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [8],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "KSampler"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
0,
|
||||||
|
"randomize",
|
||||||
|
30,
|
||||||
|
6.0,
|
||||||
|
"dpmpp_2m_sde",
|
||||||
|
"exponential",
|
||||||
|
1.0
|
||||||
|
],
|
||||||
|
"title": "LUSTIFY Sampler (DPM++ 2M SDE / Exponential)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "VAEDecode",
|
||||||
|
"pos": [1380, 100],
|
||||||
|
"size": {"0": 210, "1": 50},
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 8
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [9],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VAEDecode"
|
||||||
|
},
|
||||||
|
"title": "VAE Decode"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "SaveImage",
|
||||||
|
"pos": [1640, 100],
|
||||||
|
"size": {"0": 320, "1": 290},
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 9
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SaveImage"
|
||||||
|
},
|
||||||
|
"widgets_values": ["lustify_realistic"],
|
||||||
|
"title": "API Output Image"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 5, 0, "MODEL"],
|
||||||
|
[2, 1, 1, 2, 0, "CLIP"],
|
||||||
|
[3, 1, 1, 3, 0, "CLIP"],
|
||||||
|
[4, 1, 2, 6, 1, "VAE"],
|
||||||
|
[5, 2, 0, 5, 1, "CONDITIONING"],
|
||||||
|
[6, 3, 0, 5, 2, "CONDITIONING"],
|
||||||
|
[7, 4, 0, 5, 3, "LATENT"],
|
||||||
|
[8, 5, 0, 6, 0, "LATENT"],
|
||||||
|
[9, 6, 0, 7, 0, "IMAGE"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "LUSTIFY Realistic NSFW Text-to-Image",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "Production workflow for LUSTIFY v7.0 GGWP checkpoint optimized for photorealistic NSFW generation with recommended settings (DPM++ 2M SDE, Exponential scheduler, 30 steps, CFG 6.0)",
|
||||||
|
"category": "nsfw",
|
||||||
|
"model": "lustifySDXLNSFW_endgame.safetensors",
|
||||||
|
"recommended_settings": {
|
||||||
|
"sampler": "dpmpp_2m_sde or dpmpp_3m_sde",
|
||||||
|
"scheduler": "exponential or karras",
|
||||||
|
"steps": 30,
|
||||||
|
"cfg_scale": "4.0-7.0"
|
||||||
|
},
|
||||||
|
"tags": ["nsfw", "realistic", "photography", "women", "sdxl"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
300
comfyui/workflows/nsfw/nsfw-ultimate-upscale-production-v1.json
Normal file
@@ -0,0 +1,300 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 8,
|
||||||
|
"last_link_id": 10,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "LoadImage",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": [350, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "MASK",
|
||||||
|
"type": "MASK",
|
||||||
|
"links": null,
|
||||||
|
"slot_index": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "LoadImage"
|
||||||
|
},
|
||||||
|
"widgets_values": ["input_image.png", "image"],
|
||||||
|
"title": "API Input Image"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "CheckpointLoaderSimple",
|
||||||
|
"pos": [50, 250],
|
||||||
|
"size": [350, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [2],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [3, 4],
|
||||||
|
"slot_index": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [5],
|
||||||
|
"slot_index": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CheckpointLoaderSimple"
|
||||||
|
},
|
||||||
|
"widgets_values": ["lustifySDXLNSFW_endgame.safetensors"],
|
||||||
|
"title": "LUSTIFY v7.0 GGWP Checkpoint"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [450, 100],
|
||||||
|
"size": [400, 200],
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [6],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": ["photorealistic, professional photography, high quality, detailed skin texture, natural lighting, 8k, masterpiece, best quality, sharp details"],
|
||||||
|
"title": "API Positive Prompt (Enhancement)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [450, 350],
|
||||||
|
"size": [400, 200],
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [7],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": ["(worst quality, low quality:1.4), blurry, pixelated, jpeg artifacts, bad anatomy, deformed, watermark, text"],
|
||||||
|
"title": "API Negative Prompt (Refinement)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"type": "UpscaleModelLoader",
|
||||||
|
"pos": [50, 400],
|
||||||
|
"size": [350, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "UPSCALE_MODEL",
|
||||||
|
"type": "UPSCALE_MODEL",
|
||||||
|
"links": [10],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "UpscaleModelLoader"
|
||||||
|
},
|
||||||
|
"widgets_values": ["RealESRGAN_x2.pth"],
|
||||||
|
"title": "Upscale Model Loader (2x)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "UltimateSDUpscale",
|
||||||
|
"pos": [900, 100],
|
||||||
|
"size": [315, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "image",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "upscale_model",
|
||||||
|
"type": "UPSCALE_MODEL",
|
||||||
|
"link": 10
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [8, 9],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "UltimateSDUpscale"
|
||||||
|
},
|
||||||
|
"widgets_values": [2, 42, "randomize", 20, 7.0, "dpmpp_2m_sde", "exponential", 0.25, "Linear", 512, 512, 8, 32, "None", 1.0, 64, 8, 16, true, false],
|
||||||
|
"title": "Ultimate SD Upscale NSFW (2x)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "PreviewImage",
|
||||||
|
"pos": [1270, 100],
|
||||||
|
"size": [315, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 8
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "PreviewImage"
|
||||||
|
},
|
||||||
|
"title": "Preview Output"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "SaveImage",
|
||||||
|
"pos": [1270, 400],
|
||||||
|
"size": [315, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 7,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 9
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SaveImage"
|
||||||
|
},
|
||||||
|
"widgets_values": ["nsfw_upscale_output"],
|
||||||
|
"title": "API Image Output"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 5, 0, "IMAGE"],
|
||||||
|
[2, 2, 0, 5, 1, "MODEL"],
|
||||||
|
[3, 2, 1, 3, 0, "CLIP"],
|
||||||
|
[4, 2, 1, 4, 0, "CLIP"],
|
||||||
|
[5, 2, 2, 5, 4, "VAE"],
|
||||||
|
[6, 3, 0, 5, 2, "CONDITIONING"],
|
||||||
|
[7, 4, 0, 5, 3, "CONDITIONING"],
|
||||||
|
[8, 5, 0, 6, 0, "IMAGE"],
|
||||||
|
[9, 5, 0, 7, 0, "IMAGE"],
|
||||||
|
[10, 8, 0, 5, 5, "UPSCALE_MODEL"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "NSFW Ultimate SD Upscale Production",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "Professional 2x upscaling for NSFW images using Ultimate SD Upscale with LUSTIFY checkpoint. Combines RealESRGAN upscaling with diffusion refinement for superior detail, skin texture, and photorealistic quality in adult content.",
|
||||||
|
"category": "nsfw",
|
||||||
|
"model": "lustifySDXLNSFW_endgame.safetensors",
|
||||||
|
"recommended_settings": {
|
||||||
|
"upscale_factor": "2x (use RealESRGAN_x2.pth)",
|
||||||
|
"denoise": "0.2-0.3 (lower preserves original, higher adds detail)",
|
||||||
|
"sampler": "dpmpp_2m_sde",
|
||||||
|
"scheduler": "exponential",
|
||||||
|
"steps": 20,
|
||||||
|
"cfg_scale": "6.0-8.0",
|
||||||
|
"tile_size": "512x512 (optimal for SDXL)"
|
||||||
|
},
|
||||||
|
"features": [
|
||||||
|
"2x upscaling with RealESRGAN + diffusion refinement",
|
||||||
|
"LUSTIFY checkpoint for photorealistic NSFW enhancement",
|
||||||
|
"Tiled processing for handling large images",
|
||||||
|
"Optimized for detailed skin texture and natural lighting",
|
||||||
|
"Low denoise (0.25) preserves original composition",
|
||||||
|
"Can swap checkpoint for different styles (Pony, RealVisXL)"
|
||||||
|
],
|
||||||
|
"tags": ["nsfw", "upscaling", "ultimate-sd-upscale", "lustify", "enhancement", "production"],
|
||||||
|
"requirements": {
|
||||||
|
"models": ["lustifySDXLNSFW_endgame.safetensors", "RealESRGAN_x2.pth"],
|
||||||
|
"custom_nodes": ["ComfyUI_UltimateSDUpscale"],
|
||||||
|
"vram_min": "18GB"
|
||||||
|
},
|
||||||
|
"usage_notes": {
|
||||||
|
"checkpoint_swap": "Change node 2 to use different NSFW models (pony, realvisxl, etc.)",
|
||||||
|
"denoise_tuning": "0.2 = subtle enhancement, 0.3 = moderate refinement, 0.4+ = creative upscaling",
|
||||||
|
"prompt_matching": "Match enhancement prompt to original generation prompt for best results",
|
||||||
|
"tile_overlap": "8px overlap prevents visible seams in large upscales"
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "90-150 seconds (1024x1024 -> 2048x2048)",
|
||||||
|
"vram_usage": "~20-22GB (LUSTIFY + tiled processing)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
260
comfyui/workflows/nsfw/pony-anime-t2i-production-v1.json
Normal file
@@ -0,0 +1,260 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 11,
|
||||||
|
"last_link_id": 16,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "CheckpointLoaderSimple",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": {"0": 380, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [2, 3],
|
||||||
|
"slot_index": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [4],
|
||||||
|
"slot_index": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CheckpointLoaderSimple"
|
||||||
|
},
|
||||||
|
"widgets_values": ["ponyDiffusionV6XL_v6StartWithThisOne.safetensors"],
|
||||||
|
"title": "Pony Diffusion V6 XL Checkpoint (Anime/Furry)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [480, 100],
|
||||||
|
"size": {"0": 450, "1": 250},
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [5],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": ["1girl, solo, long_hair, breasts, smile, looking_at_viewer, anime style, high quality, detailed, masterpiece, best quality"],
|
||||||
|
"title": "API Positive Prompt (Danbooru Tags)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [480, 400],
|
||||||
|
"size": {"0": 450, "1": 150},
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [6],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": ["(worst quality, low quality:1.4), bad anatomy, deformed, blurry, watermark, text, signature, artist name"],
|
||||||
|
"title": "Negative Prompt (Pony Optimized)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "EmptyLatentImage",
|
||||||
|
"pos": [480, 600],
|
||||||
|
"size": {"0": 315, "1": 110},
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [7],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "EmptyLatentImage"
|
||||||
|
},
|
||||||
|
"widgets_values": [1024, 1024, 1],
|
||||||
|
"title": "Latent Image (SDXL Resolution)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [980, 100],
|
||||||
|
"size": {"0": 350, "1": 290},
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent_image",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 7
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [8],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "KSampler"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
0,
|
||||||
|
"randomize",
|
||||||
|
35,
|
||||||
|
7.5,
|
||||||
|
"euler_ancestral",
|
||||||
|
"normal",
|
||||||
|
1.0
|
||||||
|
],
|
||||||
|
"title": "Pony Sampler (Euler A / Normal)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "VAEDecode",
|
||||||
|
"pos": [1380, 100],
|
||||||
|
"size": {"0": 210, "1": 50},
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 8
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [9],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VAEDecode"
|
||||||
|
},
|
||||||
|
"title": "VAE Decode"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "SaveImage",
|
||||||
|
"pos": [1640, 100],
|
||||||
|
"size": {"0": 320, "1": 290},
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 9
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SaveImage"
|
||||||
|
},
|
||||||
|
"widgets_values": ["pony_anime"],
|
||||||
|
"title": "API Output Image"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 5, 0, "MODEL"],
|
||||||
|
[2, 1, 1, 2, 0, "CLIP"],
|
||||||
|
[3, 1, 1, 3, 0, "CLIP"],
|
||||||
|
[4, 1, 2, 6, 1, "VAE"],
|
||||||
|
[5, 2, 0, 5, 1, "CONDITIONING"],
|
||||||
|
[6, 3, 0, 5, 2, "CONDITIONING"],
|
||||||
|
[7, 4, 0, 5, 3, "LATENT"],
|
||||||
|
[8, 5, 0, 6, 0, "LATENT"],
|
||||||
|
[9, 6, 0, 7, 0, "IMAGE"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "Pony Diffusion V6 XL Anime/Furry NSFW Text-to-Image",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "Production workflow for Pony Diffusion V6 XL optimized for anime, cartoon, and furry NSFW generation with danbooru tag support and balanced content (safe/questionable/explicit)",
|
||||||
|
"category": "nsfw",
|
||||||
|
"model": "ponyDiffusionV6XL_v6StartWithThisOne.safetensors",
|
||||||
|
"recommended_settings": {
|
||||||
|
"sampler": "euler_ancestral or dpmpp_2m",
|
||||||
|
"scheduler": "normal or karras",
|
||||||
|
"steps": "30-40",
|
||||||
|
"cfg_scale": "7.0-8.5"
|
||||||
|
},
|
||||||
|
"training_info": {
|
||||||
|
"images": "2.6M aesthetically ranked",
|
||||||
|
"ratio": "1:1:1 safe/questionable/explicit"
|
||||||
|
},
|
||||||
|
"tags": ["nsfw", "anime", "furry", "cartoon", "versatile", "danbooru", "sdxl"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
@@ -0,0 +1,263 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 11,
|
||||||
|
"last_link_id": 16,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "CheckpointLoaderSimple",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": {"0": 380, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [2, 3],
|
||||||
|
"slot_index": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [4],
|
||||||
|
"slot_index": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CheckpointLoaderSimple"
|
||||||
|
},
|
||||||
|
"widgets_values": ["realvisxlV50_v50LightningBakedvae.safetensors"],
|
||||||
|
"title": "RealVisXL V5.0 Lightning Checkpoint"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [480, 100],
|
||||||
|
"size": {"0": 450, "1": 250},
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [5],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": ["Beautiful woman portrait, professional photography, natural lighting, high quality, detailed, 8k"],
|
||||||
|
"title": "API Positive Prompt"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [480, 400],
|
||||||
|
"size": {"0": 450, "1": 150},
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [6],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": ["(worst quality, low quality:1.4), bad anatomy, deformed, blurry, watermark, text, ugly, cartoon, anime"],
|
||||||
|
"title": "Negative Prompt (RealVisXL Optimized)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "EmptyLatentImage",
|
||||||
|
"pos": [480, 600],
|
||||||
|
"size": {"0": 315, "1": 110},
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [7],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "EmptyLatentImage"
|
||||||
|
},
|
||||||
|
"widgets_values": [1024, 1024, 1],
|
||||||
|
"title": "Latent Image (SDXL Resolution)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [980, 100],
|
||||||
|
"size": {"0": 350, "1": 290},
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent_image",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 7
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [8],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "KSampler"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
0,
|
||||||
|
"randomize",
|
||||||
|
6,
|
||||||
|
2.0,
|
||||||
|
"dpmpp_sde",
|
||||||
|
"karras",
|
||||||
|
1.0
|
||||||
|
],
|
||||||
|
"title": "Lightning Sampler (DPM++ SDE Karras / 6 Steps)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "VAEDecode",
|
||||||
|
"pos": [1380, 100],
|
||||||
|
"size": {"0": 210, "1": 50},
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 8
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [9],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VAEDecode"
|
||||||
|
},
|
||||||
|
"title": "VAE Decode"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "SaveImage",
|
||||||
|
"pos": [1640, 100],
|
||||||
|
"size": {"0": 320, "1": 290},
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 9
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SaveImage"
|
||||||
|
},
|
||||||
|
"widgets_values": ["realvisxl_lightning"],
|
||||||
|
"title": "API Output Image"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 5, 0, "MODEL"],
|
||||||
|
[2, 1, 1, 2, 0, "CLIP"],
|
||||||
|
[3, 1, 1, 3, 0, "CLIP"],
|
||||||
|
[4, 1, 2, 6, 1, "VAE"],
|
||||||
|
[5, 2, 0, 5, 1, "CONDITIONING"],
|
||||||
|
[6, 3, 0, 5, 2, "CONDITIONING"],
|
||||||
|
[7, 4, 0, 5, 3, "LATENT"],
|
||||||
|
[8, 5, 0, 6, 0, "LATENT"],
|
||||||
|
[9, 6, 0, 7, 0, "IMAGE"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "RealVisXL V5.0 Lightning Fast NSFW Text-to-Image",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "Production workflow for RealVisXL V5.0 Lightning checkpoint optimized for ultra-fast photorealistic NSFW generation with 4-6 step inference (vs 30+ for standard models)",
|
||||||
|
"category": "nsfw",
|
||||||
|
"model": "realvisxlV50_v50LightningBakedvae.safetensors",
|
||||||
|
"recommended_settings": {
|
||||||
|
"sampler": "dpmpp_sde with karras scheduler",
|
||||||
|
"scheduler": "karras",
|
||||||
|
"steps": "4-6 (Lightning optimized)",
|
||||||
|
"cfg_scale": "1.0-2.0 (lower for Lightning)"
|
||||||
|
},
|
||||||
|
"features": [
|
||||||
|
"Ultra-fast 4-6 step generation",
|
||||||
|
"High-quality photorealistic output",
|
||||||
|
"Baked VAE for optimal quality",
|
||||||
|
"Professional portraits and scenes",
|
||||||
|
"Both SFW and NSFW capable"
|
||||||
|
],
|
||||||
|
"tags": ["nsfw", "realistic", "photorealistic", "lightning", "fast", "sdxl"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
362
comfyui/workflows/text-to-image/flux-dev-t2i-production-v1.json
Normal file
@@ -0,0 +1,362 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 9,
|
||||||
|
"last_link_id": 13,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "UNETLoader",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": [350, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "UNETLoader"
|
||||||
|
},
|
||||||
|
"widgets_values": ["flux1-dev.safetensors", "default"],
|
||||||
|
"title": "FLUX Dev UNET Loader"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "DualCLIPLoader",
|
||||||
|
"pos": [50, 250],
|
||||||
|
"size": [350, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [2],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "DualCLIPLoader"
|
||||||
|
},
|
||||||
|
"widgets_values": ["t5xxl_fp16.safetensors", "clip_l.safetensors", "flux"],
|
||||||
|
"title": "FLUX CLIP Loader"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "CLIPTextEncodeFlux",
|
||||||
|
"pos": [450, 100],
|
||||||
|
"size": [400, 250],
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [3, 8],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncodeFlux"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"A beautiful mountain landscape at sunset, highly detailed, professional photography, cinematic lighting, 8k ultra HD",
|
||||||
|
"A beautiful mountain landscape at sunset, highly detailed, professional photography, cinematic lighting, 8k ultra HD",
|
||||||
|
3.5
|
||||||
|
],
|
||||||
|
"title": "API Positive Prompt"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 9,
|
||||||
|
"type": "ConditioningZeroOut",
|
||||||
|
"pos": [450, 400],
|
||||||
|
"size": [315, 60],
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "conditioning",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 8
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [9],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "ConditioningZeroOut"
|
||||||
|
},
|
||||||
|
"title": "Empty Negative Conditioning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "EmptyLatentImage",
|
||||||
|
"pos": [450, 500],
|
||||||
|
"size": [315, 106],
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [4],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "EmptyLatentImage"
|
||||||
|
},
|
||||||
|
"widgets_values": [1024, 1024, 1],
|
||||||
|
"title": "API Latent Image Config"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [900, 100],
|
||||||
|
"size": [315, 474],
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 9
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent_image",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [5],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "KSampler"
|
||||||
|
},
|
||||||
|
"widgets_values": [42, "fixed", 20, 1.0, "euler", "normal", 1.0],
|
||||||
|
"title": "FLUX Dev Sampler (20 steps)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "VAELoader",
|
||||||
|
"pos": [900, 600],
|
||||||
|
"size": [315, 60],
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [6],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VAELoader"
|
||||||
|
},
|
||||||
|
"widgets_values": ["ae.safetensors"],
|
||||||
|
"title": "FLUX VAE Loader"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "VAEDecode",
|
||||||
|
"pos": [1270, 100],
|
||||||
|
"size": [210, 46],
|
||||||
|
"flags": {},
|
||||||
|
"order": 7,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 6
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [7],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VAEDecode"
|
||||||
|
},
|
||||||
|
"title": "VAE Decode"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"type": "SaveImage",
|
||||||
|
"pos": [1530, 100],
|
||||||
|
"size": [400, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 8,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 7
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SaveImage"
|
||||||
|
},
|
||||||
|
"widgets_values": ["flux_dev_output"],
|
||||||
|
"title": "API Image Output"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 5, 0, "MODEL"],
|
||||||
|
[2, 2, 0, 3, 0, "CLIP"],
|
||||||
|
[3, 3, 0, 5, 1, "CONDITIONING"],
|
||||||
|
[4, 4, 0, 5, 3, "LATENT"],
|
||||||
|
[5, 5, 0, 7, 0, "LATENT"],
|
||||||
|
[6, 6, 0, 7, 1, "VAE"],
|
||||||
|
[7, 7, 0, 8, 0, "IMAGE"],
|
||||||
|
[8, 3, 0, 9, 0, "CONDITIONING"],
|
||||||
|
[9, 9, 0, 5, 2, "CONDITIONING"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "FLUX Dev Text-to-Image Production",
|
||||||
|
"version": "1.2.0",
|
||||||
|
"description": "High-quality text-to-image generation using FLUX.1-dev (20-50 steps). Optimized for final production outputs with excellent detail and coherence.",
|
||||||
|
"category": "text-to-image",
|
||||||
|
"tags": ["flux", "dev", "high-quality", "production", "t2i"],
|
||||||
|
"requirements": {
|
||||||
|
"models": ["FLUX.1-dev", "CLIP-L", "T5-XXL FP16"],
|
||||||
|
"custom_nodes": [],
|
||||||
|
"vram_min": "20GB",
|
||||||
|
"vram_recommended": "24GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"prompt": {
|
||||||
|
"node_id": 3,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": true,
|
||||||
|
"default": "A beautiful mountain landscape at sunset",
|
||||||
|
"description": "Text description of desired image"
|
||||||
|
},
|
||||||
|
"width": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 1024,
|
||||||
|
"min": 512,
|
||||||
|
"max": 2048,
|
||||||
|
"description": "Image width in pixels"
|
||||||
|
},
|
||||||
|
"height": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 1,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 1024,
|
||||||
|
"min": 512,
|
||||||
|
"max": 2048,
|
||||||
|
"description": "Image height in pixels"
|
||||||
|
},
|
||||||
|
"seed": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 42,
|
||||||
|
"min": 0,
|
||||||
|
"max": 4294967295,
|
||||||
|
"description": "Random seed for reproducibility"
|
||||||
|
},
|
||||||
|
"steps": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 20,
|
||||||
|
"min": 10,
|
||||||
|
"max": 50,
|
||||||
|
"description": "Number of sampling steps (20-50 recommended for FLUX Dev)"
|
||||||
|
},
|
||||||
|
"guidance": {
|
||||||
|
"node_id": 3,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 3.5,
|
||||||
|
"min": 1.0,
|
||||||
|
"max": 10.0,
|
||||||
|
"description": "Guidance scale (3.5 recommended for FLUX)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": {
|
||||||
|
"image": {
|
||||||
|
"node_id": 8,
|
||||||
|
"type": "image",
|
||||||
|
"format": "PNG",
|
||||||
|
"resolution": "1024x1024 (configurable)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "25-35 seconds (20 steps), 60-75 seconds (50 steps)",
|
||||||
|
"vram_usage": "~20-22GB",
|
||||||
|
"gpu_utilization": "95-100%"
|
||||||
|
},
|
||||||
|
"notes": [
|
||||||
|
"FLUX uses UNETLoader instead of CheckpointLoader",
|
||||||
|
"DualCLIPLoader loads both T5-XXL and CLIP-L text encoders",
|
||||||
|
"CLIPTextEncodeFlux is FLUX-specific text encoder",
|
||||||
|
"No negative prompt for FLUX - guidance is handled differently",
|
||||||
|
"CFG scale of 1.0 recommended, guidance in text encoder"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
@@ -0,0 +1,363 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 9,
|
||||||
|
"last_link_id": 9,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "UNETLoader",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": [350, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "UNETLoader"
|
||||||
|
},
|
||||||
|
"widgets_values": ["flux1-schnell.safetensors", "default"],
|
||||||
|
"title": "FLUX Schnell UNET Loader"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "DualCLIPLoader",
|
||||||
|
"pos": [50, 250],
|
||||||
|
"size": [350, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [2],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "DualCLIPLoader"
|
||||||
|
},
|
||||||
|
"widgets_values": ["t5xxl_fp16.safetensors", "clip_l.safetensors", "flux"],
|
||||||
|
"title": "FLUX CLIP Loader"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "CLIPTextEncodeFlux",
|
||||||
|
"pos": [450, 100],
|
||||||
|
"size": [400, 250],
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [3, 8],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncodeFlux"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"A beautiful mountain landscape at sunset, highly detailed, cinematic lighting, 8k",
|
||||||
|
"A beautiful mountain landscape at sunset, highly detailed, cinematic lighting, 8k",
|
||||||
|
3.5
|
||||||
|
],
|
||||||
|
"title": "API Positive Prompt"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 9,
|
||||||
|
"type": "ConditioningZeroOut",
|
||||||
|
"pos": [450, 400],
|
||||||
|
"size": [315, 60],
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "conditioning",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 8
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [9],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "ConditioningZeroOut"
|
||||||
|
},
|
||||||
|
"title": "Empty Negative Conditioning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "EmptyLatentImage",
|
||||||
|
"pos": [450, 500],
|
||||||
|
"size": [315, 106],
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [4],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "EmptyLatentImage"
|
||||||
|
},
|
||||||
|
"widgets_values": [1024, 1024, 1],
|
||||||
|
"title": "API Latent Image Config"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [900, 100],
|
||||||
|
"size": [315, 474],
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 9
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent_image",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [5],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "KSampler"
|
||||||
|
},
|
||||||
|
"widgets_values": [42, "fixed", 4, 1.0, "euler", "normal", 1.0],
|
||||||
|
"title": "FLUX Schnell Sampler (4 steps)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "VAELoader",
|
||||||
|
"pos": [900, 600],
|
||||||
|
"size": [315, 60],
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [6],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VAELoader"
|
||||||
|
},
|
||||||
|
"widgets_values": ["ae.safetensors"],
|
||||||
|
"title": "FLUX VAE Loader"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "VAEDecode",
|
||||||
|
"pos": [1270, 100],
|
||||||
|
"size": [210, 46],
|
||||||
|
"flags": {},
|
||||||
|
"order": 7,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 6
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [7],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VAEDecode"
|
||||||
|
},
|
||||||
|
"title": "VAE Decode"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"type": "SaveImage",
|
||||||
|
"pos": [1530, 100],
|
||||||
|
"size": [400, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 8,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 7
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SaveImage"
|
||||||
|
},
|
||||||
|
"widgets_values": ["flux_schnell_output"],
|
||||||
|
"title": "API Image Output"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 5, 0, "MODEL"],
|
||||||
|
[2, 2, 0, 3, 0, "CLIP"],
|
||||||
|
[3, 3, 0, 5, 1, "CONDITIONING"],
|
||||||
|
[4, 4, 0, 5, 3, "LATENT"],
|
||||||
|
[5, 5, 0, 7, 0, "LATENT"],
|
||||||
|
[6, 6, 0, 7, 1, "VAE"],
|
||||||
|
[7, 7, 0, 8, 0, "IMAGE"],
|
||||||
|
[8, 3, 0, 9, 0, "CONDITIONING"],
|
||||||
|
[9, 9, 0, 5, 2, "CONDITIONING"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "FLUX Schnell Text-to-Image Production",
|
||||||
|
"version": "1.2.0",
|
||||||
|
"description": "Fast text-to-image generation using FLUX.1-schnell (4 steps). Optimized for rapid prototyping and iteration.",
|
||||||
|
"category": "text-to-image",
|
||||||
|
"tags": ["flux", "schnell", "fast", "production", "t2i"],
|
||||||
|
"requirements": {
|
||||||
|
"models": ["FLUX.1-schnell", "CLIP-L", "T5-XXL FP16"],
|
||||||
|
"custom_nodes": [],
|
||||||
|
"vram_min": "16GB",
|
||||||
|
"vram_recommended": "24GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"prompt": {
|
||||||
|
"node_id": 3,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": true,
|
||||||
|
"default": "A beautiful mountain landscape at sunset",
|
||||||
|
"description": "Text description of desired image"
|
||||||
|
},
|
||||||
|
"width": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 1024,
|
||||||
|
"min": 512,
|
||||||
|
"max": 2048,
|
||||||
|
"description": "Image width in pixels"
|
||||||
|
},
|
||||||
|
"height": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 1,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 1024,
|
||||||
|
"min": 512,
|
||||||
|
"max": 2048,
|
||||||
|
"description": "Image height in pixels"
|
||||||
|
},
|
||||||
|
"seed": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 42,
|
||||||
|
"min": 0,
|
||||||
|
"max": 4294967295,
|
||||||
|
"description": "Random seed for reproducibility"
|
||||||
|
},
|
||||||
|
"steps": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 4,
|
||||||
|
"min": 1,
|
||||||
|
"max": 10,
|
||||||
|
"description": "Number of sampling steps (4 recommended for FLUX Schnell)"
|
||||||
|
},
|
||||||
|
"guidance": {
|
||||||
|
"node_id": 3,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 3.5,
|
||||||
|
"min": 1.0,
|
||||||
|
"max": 10.0,
|
||||||
|
"description": "Guidance scale (3.5 recommended for FLUX)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": {
|
||||||
|
"image": {
|
||||||
|
"node_id": 8,
|
||||||
|
"type": "image",
|
||||||
|
"format": "PNG",
|
||||||
|
"resolution": "1024x1024 (configurable)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "5-8 seconds (4 steps)",
|
||||||
|
"vram_usage": "~16-18GB",
|
||||||
|
"gpu_utilization": "95-100%"
|
||||||
|
},
|
||||||
|
"notes": [
|
||||||
|
"FLUX uses UNETLoader instead of CheckpointLoader",
|
||||||
|
"DualCLIPLoader loads both T5-XXL and CLIP-L text encoders",
|
||||||
|
"CLIPTextEncodeFlux is FLUX-specific text encoder",
|
||||||
|
"No negative prompt for FLUX - guidance is handled differently",
|
||||||
|
"CFG scale of 1.0 recommended, guidance in text encoder",
|
||||||
|
"Schnell is optimized for 4 steps - fastest FLUX variant"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
@@ -0,0 +1,408 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 15,
|
||||||
|
"last_link_id": 24,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "CheckpointLoaderSimple",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": {"0": 380, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [2],
|
||||||
|
"slot_index": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [3],
|
||||||
|
"slot_index": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CheckpointLoaderSimple"
|
||||||
|
},
|
||||||
|
"widgets_values": ["sd_xl_base_1.0.safetensors"],
|
||||||
|
"title": "Base Checkpoint Loader"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "LoraLoader",
|
||||||
|
"pos": [480, 100],
|
||||||
|
"size": {"0": 380, "1": 130},
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [4],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [5],
|
||||||
|
"slot_index": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "LoraLoader"
|
||||||
|
},
|
||||||
|
"widgets_values": ["add-detail-xl.safetensors", 0.8, 0.8],
|
||||||
|
"title": "LoRA 1 (Primary Style)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "LoraLoader",
|
||||||
|
"pos": [920, 100],
|
||||||
|
"size": {"0": 380, "1": 130},
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 5
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [6],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [7],
|
||||||
|
"slot_index": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "LoraLoader"
|
||||||
|
},
|
||||||
|
"widgets_values": ["add-detail-xl.safetensors", 0.5, 0.5],
|
||||||
|
"title": "LoRA 2 (Secondary Style)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "LoraLoader",
|
||||||
|
"pos": [1360, 100],
|
||||||
|
"size": {"0": 380, "1": 130},
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 7
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [8],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [9, 10],
|
||||||
|
"slot_index": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "LoraLoader"
|
||||||
|
},
|
||||||
|
"widgets_values": ["add-detail-xl.safetensors", 0.3, 0.3],
|
||||||
|
"title": "LoRA 3 (Accent/Detail)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [50, 280],
|
||||||
|
"size": {"0": 450, "1": 250},
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 9
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [11],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": ["A beautiful scenic landscape with mountains, vibrant colors, detailed, high quality, masterpiece, best quality, 8k"],
|
||||||
|
"title": "API Positive Prompt"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [50, 580],
|
||||||
|
"size": {"0": 450, "1": 150},
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 10
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [12],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": ["(worst quality, low quality:1.4), bad anatomy, deformed, blurry, watermark, text"],
|
||||||
|
"title": "Negative Prompt"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "EmptyLatentImage",
|
||||||
|
"pos": [560, 580],
|
||||||
|
"size": {"0": 315, "1": 110},
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [13],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "EmptyLatentImage"
|
||||||
|
},
|
||||||
|
"widgets_values": [1024, 1024, 1],
|
||||||
|
"title": "Latent Image (SDXL Resolution)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [920, 280],
|
||||||
|
"size": {"0": 350, "1": 290},
|
||||||
|
"flags": {},
|
||||||
|
"order": 7,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 8
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 11
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 12
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent_image",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 13
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [14],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "KSampler"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
0,
|
||||||
|
"randomize",
|
||||||
|
30,
|
||||||
|
7.0,
|
||||||
|
"dpmpp_2m_sde",
|
||||||
|
"karras",
|
||||||
|
1.0
|
||||||
|
],
|
||||||
|
"title": "KSampler (Standard Settings)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 9,
|
||||||
|
"type": "VAEDecode",
|
||||||
|
"pos": [1320, 280],
|
||||||
|
"size": {"0": 210, "1": 50},
|
||||||
|
"flags": {},
|
||||||
|
"order": 8,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 14
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [15],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VAEDecode"
|
||||||
|
},
|
||||||
|
"title": "VAE Decode"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 10,
|
||||||
|
"type": "SaveImage",
|
||||||
|
"pos": [1580, 280],
|
||||||
|
"size": {"0": 320, "1": 290},
|
||||||
|
"flags": {},
|
||||||
|
"order": 9,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 15
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SaveImage"
|
||||||
|
},
|
||||||
|
"widgets_values": ["lora_fusion"],
|
||||||
|
"title": "API Output Image"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 2, 0, "MODEL"],
|
||||||
|
[2, 1, 1, 2, 1, "CLIP"],
|
||||||
|
[3, 1, 2, 9, 1, "VAE"],
|
||||||
|
[4, 2, 0, 3, 0, "MODEL"],
|
||||||
|
[5, 2, 1, 3, 1, "CLIP"],
|
||||||
|
[6, 3, 0, 4, 0, "MODEL"],
|
||||||
|
[7, 3, 1, 4, 1, "CLIP"],
|
||||||
|
[8, 4, 0, 8, 0, "MODEL"],
|
||||||
|
[9, 4, 1, 5, 0, "CLIP"],
|
||||||
|
[10, 4, 1, 6, 0, "CLIP"],
|
||||||
|
[11, 5, 0, 8, 1, "CONDITIONING"],
|
||||||
|
[12, 6, 0, 8, 2, "CONDITIONING"],
|
||||||
|
[13, 7, 0, 8, 3, "LATENT"],
|
||||||
|
[14, 8, 0, 9, 0, "LATENT"],
|
||||||
|
[15, 9, 0, 10, 0, "IMAGE"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "LoRA Fusion Multi-Stack Text-to-Image",
|
||||||
|
"version": "1.0",
|
||||||
|
"description": "Production workflow for combining multiple LoRA models with adjustable weights. Stack up to 3 LoRAs with fine-tuned strength control for style mixing and enhancement. Works with any SDXL checkpoint including NSFW models.",
|
||||||
|
"category": "text-to-image",
|
||||||
|
"features": [
|
||||||
|
"Triple LoRA stacking with independent weight control",
|
||||||
|
"Compatible with any SDXL checkpoint (SFW/NSFW)",
|
||||||
|
"Hierarchical strength: Primary (0.7-1.0), Secondary (0.4-0.7), Accent (0.2-0.5)",
|
||||||
|
"Supports style LoRAs, detail enhancers, and character LoRAs",
|
||||||
|
"Easy to disable individual LoRAs by setting weights to 0.0"
|
||||||
|
],
|
||||||
|
"recommended_usage": {
|
||||||
|
"lora_1_strength": "0.7-1.0 (primary style influence)",
|
||||||
|
"lora_2_strength": "0.4-0.7 (secondary style blend)",
|
||||||
|
"lora_3_strength": "0.2-0.5 (subtle accent/details)",
|
||||||
|
"disable_lora": "Set both model and clip weights to 0.0"
|
||||||
|
},
|
||||||
|
"examples": [
|
||||||
|
{
|
||||||
|
"use_case": "Portrait Enhancement",
|
||||||
|
"lora_1": "Detail enhancer (0.8)",
|
||||||
|
"lora_2": "Skin texture LoRA (0.5)",
|
||||||
|
"lora_3": "Lighting style (0.3)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"use_case": "Artistic Style Fusion",
|
||||||
|
"lora_1": "Primary art style (1.0)",
|
||||||
|
"lora_2": "Secondary style blend (0.6)",
|
||||||
|
"lora_3": "Color grading (0.4)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"use_case": "Character + Style",
|
||||||
|
"lora_1": "Character LoRA (0.9)",
|
||||||
|
"lora_2": "Pose/anatomy enhancer (0.5)",
|
||||||
|
"lora_3": "Background style (0.3)"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"tags": ["lora", "multi-stack", "fusion", "style-mixing", "enhancement", "sdxl"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
@@ -0,0 +1,376 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 9,
|
||||||
|
"last_link_id": 10,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "CheckpointLoaderSimple",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": [350, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [4],
|
||||||
|
"slot_index": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CheckpointLoaderSimple"
|
||||||
|
},
|
||||||
|
"widgets_values": ["sd3.5_large.safetensors"],
|
||||||
|
"title": "SD3.5 Large Checkpoint Loader"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 9,
|
||||||
|
"type": "TripleCLIPLoader",
|
||||||
|
"pos": [50, 250],
|
||||||
|
"size": [350, 150],
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [9, 10],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "TripleCLIPLoader"
|
||||||
|
},
|
||||||
|
"widgets_values": ["clip_l.safetensors", "clip_g.safetensors", "t5xxl_fp16.safetensors"],
|
||||||
|
"title": "SD3.5 CLIP Loaders (L+G+T5)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [450, 100],
|
||||||
|
"size": [400, 200],
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 9
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [5],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": ["A beautiful mountain landscape at sunset, highly detailed, professional photography, cinematic lighting, 8k ultra HD, photorealistic"],
|
||||||
|
"title": "API Positive Prompt"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [450, 350],
|
||||||
|
"size": [400, 200],
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 10
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [6],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": ["blurry, low quality, distorted, watermark, text, signature, ugly, cartoon, painting"],
|
||||||
|
"title": "API Negative Prompt"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "EmptyLatentImage",
|
||||||
|
"pos": [450, 600],
|
||||||
|
"size": [315, 106],
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [7],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "EmptyLatentImage"
|
||||||
|
},
|
||||||
|
"widgets_values": [1024, 1024, 1],
|
||||||
|
"title": "API Latent Image Config"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [900, 100],
|
||||||
|
"size": [315, 474],
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent_image",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 7
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [8],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "KSampler"
|
||||||
|
},
|
||||||
|
"widgets_values": [42, "fixed", 28, 4.5, "dpmpp_2m", "karras", 1],
|
||||||
|
"title": "SD3.5 Sampler (28 steps)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "VAEDecode",
|
||||||
|
"pos": [1270, 100],
|
||||||
|
"size": [210, 46],
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 8
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [2, 3],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VAEDecode"
|
||||||
|
},
|
||||||
|
"title": "VAE Decode"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "PreviewImage",
|
||||||
|
"pos": [1530, 100],
|
||||||
|
"size": [400, 400],
|
||||||
|
"flags": {},
|
||||||
|
"order": 7,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "PreviewImage"
|
||||||
|
},
|
||||||
|
"title": "Preview Output"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"type": "SaveImage",
|
||||||
|
"pos": [1530, 550],
|
||||||
|
"size": [400, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 8,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SaveImage"
|
||||||
|
},
|
||||||
|
"widgets_values": ["sd35_large_output"],
|
||||||
|
"title": "API Image Output"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 5, 0, "MODEL"],
|
||||||
|
[2, 6, 0, 7, 0, "IMAGE"],
|
||||||
|
[3, 6, 0, 8, 0, "IMAGE"],
|
||||||
|
[4, 1, 2, 6, 1, "VAE"],
|
||||||
|
[5, 2, 0, 5, 1, "CONDITIONING"],
|
||||||
|
[6, 3, 0, 5, 2, "CONDITIONING"],
|
||||||
|
[7, 4, 0, 5, 3, "LATENT"],
|
||||||
|
[8, 5, 0, 6, 0, "LATENT"],
|
||||||
|
[9, 9, 0, 2, 0, "CLIP"],
|
||||||
|
[10, 9, 0, 3, 0, "CLIP"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "Stable Diffusion 3.5 Large Text-to-Image Production",
|
||||||
|
"version": "1.2.0",
|
||||||
|
"description": "Latest generation text-to-image using Stable Diffusion 3.5 Large (28 steps). Provides excellent photorealism and prompt adherence.",
|
||||||
|
"category": "text-to-image",
|
||||||
|
"tags": ["sd3.5", "stable-diffusion", "large", "production", "t2i", "photorealistic"],
|
||||||
|
"requirements": {
|
||||||
|
"models": ["SD3.5-large", "CLIP-L", "CLIP-G", "T5-XXL FP16"],
|
||||||
|
"custom_nodes": [],
|
||||||
|
"vram_min": "18GB",
|
||||||
|
"vram_recommended": "24GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"prompt": {
|
||||||
|
"node_id": 2,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": true,
|
||||||
|
"default": "A beautiful mountain landscape at sunset",
|
||||||
|
"description": "Text description of desired image"
|
||||||
|
},
|
||||||
|
"negative_prompt": {
|
||||||
|
"node_id": 3,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": false,
|
||||||
|
"default": "blurry, low quality",
|
||||||
|
"description": "Undesired elements to avoid"
|
||||||
|
},
|
||||||
|
"width": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 1024,
|
||||||
|
"min": 512,
|
||||||
|
"max": 2048,
|
||||||
|
"description": "Image width in pixels"
|
||||||
|
},
|
||||||
|
"height": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 1,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 1024,
|
||||||
|
"min": 512,
|
||||||
|
"max": 2048,
|
||||||
|
"description": "Image height in pixels"
|
||||||
|
},
|
||||||
|
"seed": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 42,
|
||||||
|
"min": 0,
|
||||||
|
"max": 4294967295,
|
||||||
|
"description": "Random seed for reproducibility"
|
||||||
|
},
|
||||||
|
"steps": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 28,
|
||||||
|
"min": 20,
|
||||||
|
"max": 50,
|
||||||
|
"description": "Number of sampling steps (28 recommended for SD3.5)"
|
||||||
|
},
|
||||||
|
"cfg": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 3,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 4.5,
|
||||||
|
"min": 1.0,
|
||||||
|
"max": 15.0,
|
||||||
|
"description": "Classifier-free guidance scale (4.5 recommended)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": {
|
||||||
|
"image": {
|
||||||
|
"node_id": 8,
|
||||||
|
"type": "image",
|
||||||
|
"format": "PNG",
|
||||||
|
"resolution": "1024x1024 (configurable)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "35-45 seconds",
|
||||||
|
"vram_usage": "~18-20GB",
|
||||||
|
"gpu_utilization": "95-100%"
|
||||||
|
},
|
||||||
|
"notes": [
|
||||||
|
"SD3.5 uses CheckpointLoaderSimple for MODEL+VAE",
|
||||||
|
"TripleCLIPLoader loads CLIP-L, CLIP-G, and T5-XXL separately",
|
||||||
|
"Checkpoint file doesn't include CLIP encoders",
|
||||||
|
"All three CLIP encoders required for best quality"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
@@ -0,0 +1,581 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 15,
|
||||||
|
"last_link_id": 22,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "CheckpointLoaderSimple",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": {"0": 350, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [2, 3],
|
||||||
|
"slot_index": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [4],
|
||||||
|
"slot_index": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CheckpointLoaderSimple"
|
||||||
|
},
|
||||||
|
"widgets_values": ["sd_xl_base_1.0.safetensors"],
|
||||||
|
"title": "SDXL Base Checkpoint Loader"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "CheckpointLoaderSimple",
|
||||||
|
"pos": [50, 300],
|
||||||
|
"size": {"0": 350, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [11],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [12, 13],
|
||||||
|
"slot_index": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [14],
|
||||||
|
"slot_index": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CheckpointLoaderSimple"
|
||||||
|
},
|
||||||
|
"widgets_values": ["sd_xl_refiner_1.0.safetensors"],
|
||||||
|
"title": "SDXL Refiner Checkpoint Loader"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [450, 100],
|
||||||
|
"size": {"0": 400, "1": 200},
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [5],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": ["A beautiful mountain landscape at sunset, highly detailed, professional photography, cinematic lighting, 8k ultra HD, masterpiece"],
|
||||||
|
"title": "API Positive Prompt (Base)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [450, 350],
|
||||||
|
"size": {"0": 400, "1": 200},
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [6],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": ["blurry, low quality, distorted, watermark, text, signature, bad anatomy, deformed"],
|
||||||
|
"title": "API Negative Prompt (Base)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [450, 600],
|
||||||
|
"size": {"0": 400, "1": 200},
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 12
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [15],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": ["A beautiful mountain landscape at sunset, highly detailed, professional photography, cinematic lighting, 8k ultra HD, masterpiece"],
|
||||||
|
"title": "API Positive Prompt (Refiner)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [450, 850],
|
||||||
|
"size": {"0": 400, "1": 200},
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 13
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [16],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": ["blurry, low quality, distorted, watermark, text, signature, bad anatomy, deformed"],
|
||||||
|
"title": "API Negative Prompt (Refiner)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "EmptyLatentImage",
|
||||||
|
"pos": [900, 600],
|
||||||
|
"size": {"0": 315, "1": 106},
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [7],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "EmptyLatentImage"
|
||||||
|
},
|
||||||
|
"widgets_values": [1024, 1024, 1],
|
||||||
|
"title": "API Latent Image Config"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [900, 100],
|
||||||
|
"size": {"0": 315, "1": 474},
|
||||||
|
"flags": {},
|
||||||
|
"order": 7,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent_image",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 7
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [8, 17],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "KSampler"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
42,
|
||||||
|
"fixed",
|
||||||
|
30,
|
||||||
|
7.5,
|
||||||
|
"dpmpp_2m",
|
||||||
|
"karras",
|
||||||
|
1
|
||||||
|
],
|
||||||
|
"title": "SDXL Base Sampler (30 steps)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 9,
|
||||||
|
"type": "VAEDecode",
|
||||||
|
"pos": [1270, 100],
|
||||||
|
"size": {"0": 210, "1": 46},
|
||||||
|
"flags": {},
|
||||||
|
"order": 8,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 8
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [9],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VAEDecode"
|
||||||
|
},
|
||||||
|
"title": "VAE Decode (Base)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 10,
|
||||||
|
"type": "PreviewImage",
|
||||||
|
"pos": [1530, 100],
|
||||||
|
"size": {"0": 400, "1": 400},
|
||||||
|
"flags": {},
|
||||||
|
"order": 9,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 9
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "PreviewImage"
|
||||||
|
},
|
||||||
|
"title": "Preview Base Output"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 11,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [1270, 600],
|
||||||
|
"size": {"0": 315, "1": 474},
|
||||||
|
"flags": {},
|
||||||
|
"order": 10,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 11
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 15
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 16
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent_image",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 17
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [18],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "KSampler"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
42,
|
||||||
|
"fixed",
|
||||||
|
20,
|
||||||
|
7.5,
|
||||||
|
"dpmpp_2m",
|
||||||
|
"karras",
|
||||||
|
1
|
||||||
|
],
|
||||||
|
"title": "SDXL Refiner Sampler (20 steps)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 12,
|
||||||
|
"type": "VAEDecode",
|
||||||
|
"pos": [1640, 600],
|
||||||
|
"size": {"0": 210, "1": 46},
|
||||||
|
"flags": {},
|
||||||
|
"order": 11,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 18
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 14
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [19, 20],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "VAEDecode"
|
||||||
|
},
|
||||||
|
"title": "VAE Decode (Refiner)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"type": "PreviewImage",
|
||||||
|
"pos": [1900, 600],
|
||||||
|
"size": {"0": 400, "1": 400},
|
||||||
|
"flags": {},
|
||||||
|
"order": 12,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 19
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "PreviewImage"
|
||||||
|
},
|
||||||
|
"title": "Preview Refined Output"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 14,
|
||||||
|
"type": "SaveImage",
|
||||||
|
"pos": [1900, 1050],
|
||||||
|
"size": {"0": 400, "1": 100},
|
||||||
|
"flags": {},
|
||||||
|
"order": 13,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 20
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SaveImage"
|
||||||
|
},
|
||||||
|
"widgets_values": ["sdxl_refined_output"],
|
||||||
|
"title": "API Image Output"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 8, 0, "MODEL"],
|
||||||
|
[2, 1, 1, 3, 0, "CLIP"],
|
||||||
|
[3, 1, 1, 4, 0, "CLIP"],
|
||||||
|
[4, 1, 2, 9, 1, "VAE"],
|
||||||
|
[5, 3, 0, 8, 1, "CONDITIONING"],
|
||||||
|
[6, 4, 0, 8, 2, "CONDITIONING"],
|
||||||
|
[7, 7, 0, 8, 3, "LATENT"],
|
||||||
|
[8, 8, 0, 9, 0, "LATENT"],
|
||||||
|
[9, 9, 0, 10, 0, "IMAGE"],
|
||||||
|
[11, 2, 0, 11, 0, "MODEL"],
|
||||||
|
[12, 2, 1, 5, 0, "CLIP"],
|
||||||
|
[13, 2, 1, 6, 0, "CLIP"],
|
||||||
|
[14, 2, 2, 12, 1, "VAE"],
|
||||||
|
[15, 5, 0, 11, 1, "CONDITIONING"],
|
||||||
|
[16, 6, 0, 11, 2, "CONDITIONING"],
|
||||||
|
[17, 8, 0, 11, 3, "LATENT"],
|
||||||
|
[18, 11, 0, 12, 0, "LATENT"],
|
||||||
|
[19, 12, 0, 13, 0, "IMAGE"],
|
||||||
|
[20, 12, 0, 14, 0, "IMAGE"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "SDXL with Refiner Text-to-Image Production",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"author": "RunPod AI Model Orchestrator",
|
||||||
|
"description": "Two-stage text-to-image generation using SDXL Base (30 steps) + Refiner (20 steps). Produces highly detailed, refined outputs with excellent coherence.",
|
||||||
|
"category": "text-to-image",
|
||||||
|
"tags": ["sdxl", "refiner", "two-stage", "high-quality", "production", "t2i"],
|
||||||
|
"requirements": {
|
||||||
|
"models": ["stable-diffusion-xl-base-1.0", "stable-diffusion-xl-refiner-1.0"],
|
||||||
|
"custom_nodes": [],
|
||||||
|
"vram_min": "20GB",
|
||||||
|
"vram_recommended": "24GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"prompt": {
|
||||||
|
"node_id": [3, 5],
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": true,
|
||||||
|
"default": "A beautiful mountain landscape at sunset",
|
||||||
|
"description": "Text description of desired image (used for both base and refiner)"
|
||||||
|
},
|
||||||
|
"negative_prompt": {
|
||||||
|
"node_id": [4, 6],
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": false,
|
||||||
|
"default": "blurry, low quality",
|
||||||
|
"description": "Undesired elements to avoid (used for both base and refiner)"
|
||||||
|
},
|
||||||
|
"width": {
|
||||||
|
"node_id": 7,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 1024,
|
||||||
|
"min": 512,
|
||||||
|
"max": 2048,
|
||||||
|
"description": "Image width in pixels"
|
||||||
|
},
|
||||||
|
"height": {
|
||||||
|
"node_id": 7,
|
||||||
|
"widget_index": 1,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 1024,
|
||||||
|
"min": 512,
|
||||||
|
"max": 2048,
|
||||||
|
"description": "Image height in pixels"
|
||||||
|
},
|
||||||
|
"seed": {
|
||||||
|
"node_id": [8, 11],
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 42,
|
||||||
|
"min": 0,
|
||||||
|
"max": 4294967295,
|
||||||
|
"description": "Random seed for reproducibility (same for base and refiner)"
|
||||||
|
},
|
||||||
|
"base_steps": {
|
||||||
|
"node_id": 8,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 30,
|
||||||
|
"min": 20,
|
||||||
|
"max": 50,
|
||||||
|
"description": "Number of sampling steps for base model"
|
||||||
|
},
|
||||||
|
"refiner_steps": {
|
||||||
|
"node_id": 11,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 20,
|
||||||
|
"min": 10,
|
||||||
|
"max": 30,
|
||||||
|
"description": "Number of sampling steps for refiner model"
|
||||||
|
},
|
||||||
|
"cfg": {
|
||||||
|
"node_id": [8, 11],
|
||||||
|
"widget_index": 3,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 7.5,
|
||||||
|
"min": 1.0,
|
||||||
|
"max": 15.0,
|
||||||
|
"description": "Classifier-free guidance scale (7.5 recommended for SDXL)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": {
|
||||||
|
"base_image": {
|
||||||
|
"node_id": 10,
|
||||||
|
"type": "preview",
|
||||||
|
"description": "Base model output (before refinement)"
|
||||||
|
},
|
||||||
|
"refined_image": {
|
||||||
|
"node_id": 14,
|
||||||
|
"type": "image",
|
||||||
|
"format": "PNG",
|
||||||
|
"resolution": "1024x1024 (configurable)",
|
||||||
|
"description": "Final refined output"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "40-60 seconds (30+20 steps)",
|
||||||
|
"vram_usage": "~18-20GB",
|
||||||
|
"gpu_utilization": "95-100%",
|
||||||
|
"notes": "Base and refiner run sequentially to manage VRAM"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
865
comfyui/workflows/text-to-music/acestep-m2m-editing-v1.json
Normal file
@@ -0,0 +1,865 @@
|
|||||||
|
{
|
||||||
|
"id": "88ac5dad-efd7-40bb-84fe-fbaefdee1fa9",
|
||||||
|
"revision": 0,
|
||||||
|
"last_node_id": 75,
|
||||||
|
"last_link_id": 138,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 49,
|
||||||
|
"type": "LatentApplyOperationCFG",
|
||||||
|
"pos": [
|
||||||
|
940,
|
||||||
|
-160
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
290,
|
||||||
|
50
|
||||||
|
],
|
||||||
|
"flags": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"order": 10,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 113
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "operation",
|
||||||
|
"type": "LATENT_OPERATION",
|
||||||
|
"link": 114
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [
|
||||||
|
121
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.34",
|
||||||
|
"Node name for S&R": "LatentApplyOperationCFG"
|
||||||
|
},
|
||||||
|
"widgets_values": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 40,
|
||||||
|
"type": "CheckpointLoaderSimple",
|
||||||
|
"pos": [
|
||||||
|
180,
|
||||||
|
-160
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
370,
|
||||||
|
98
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [
|
||||||
|
115
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [
|
||||||
|
80
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [
|
||||||
|
83,
|
||||||
|
137
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.32",
|
||||||
|
"Node name for S&R": "CheckpointLoaderSimple",
|
||||||
|
"models": [
|
||||||
|
{
|
||||||
|
"name": "ace_step_v1_3.5b.safetensors",
|
||||||
|
"url": "https://huggingface.co/Comfy-Org/ACE-Step_ComfyUI_repackaged/resolve/main/all_in_one/ace_step_v1_3.5b.safetensors?download=true",
|
||||||
|
"directory": "checkpoints"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"ace_step_v1_3.5b.safetensors"
|
||||||
|
],
|
||||||
|
"color": "#322",
|
||||||
|
"bgcolor": "#533"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 48,
|
||||||
|
"type": "MarkdownNote",
|
||||||
|
"pos": [
|
||||||
|
-460,
|
||||||
|
-200
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
610,
|
||||||
|
820
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [],
|
||||||
|
"title": "About ACE Step and Multi-language Input",
|
||||||
|
"properties": {},
|
||||||
|
"widgets_values": [
|
||||||
|
"[Tutorial](http://docs.comfy.org/tutorials/audio/ace-step/ace-step-v1) | [教程](http://docs.comfy.org/zh-CN/tutorials/audio/ace-step/ace-step-v1)\n\n\n### Model Download\n\nDownload the following model and save it to the **ComfyUI/models/checkpoints** folder.\n[ace_step_v1_3.5b.safetensors](https://huggingface.co/Comfy-Org/ACE-Step_ComfyUI_repackaged/blob/main/all_in_one/ace_step_v1_3.5b.safetensors)\n\n\n### Multilingual Support\n\nCurrently, the implementation of multi-language support for ACE-Step V1 is achieved by uniformly converting different languages into English characters. At present, in ComfyUI, we haven't implemented the step of converting multi-languages into English. This is because if we need to implement the corresponding conversion, we have to add additional core dependencies of ComfyUI, which may lead to uncertain dependency conflicts.\n\nSo, currently, if you need to input multi-language text, you have to manually convert it into English characters to complete this process. Then, at the beginning of the corresponding `lyrics`, input the abbreviation of the corresponding language code.\n\nFor example, for Chinese, use `[zh]`, for Japanese use `[ja]`, for Korean use `[ko]`, and so on. For specific language input, please check the examples in the instructions. \n\nFor example, Chinese `[zh]`, Japanese `[ja]`, Korean `[ko]`, etc.\n\nExample:\n\n```\n[verse]\n\n[zh]wo3zou3guo4shen1ye4de5jie1dao4\n[zh]leng3feng1chui1luan4si1nian4de5piao4liang4wai4tao4\n[zh]ni3de5wei1xiao4xiang4xing1guang1hen3xuan4yao4\n[zh]zhao4liang4le5wo3gu1du2de5mei3fen1mei3miao3\n\n[chorus]\n\n[verse]\n[ko]hamkke si-kkeuleo-un sesang-ui sodong-eul pihae\n[ko]honja ogsang-eseo dalbich-ui eolyeompus-ileul balaboda\n[ko]niga salang-eun lideum-i ganghan eum-ag gatdago malhaess-eo\n[ko]han ta han tamada ma-eum-ui ondoga eolmana heojeonhanji ijge hae\n\n[bridge]\n[es]cantar mi anhelo por ti sin ocultar\n[es]como poesía y pintura, lleno de anhelo indescifrable\n[es]tu sombra es tan terca como el viento, inborrable\n[es]persiguiéndote en vuelo, brilla como cruzar una mar de nubes\n\n[chorus]\n[fr]que tu sois le vent qui souffle sur ma main\n[fr]un contact chaud comme la douce pluie printanière\n[fr]que tu sois le vent qui s'entoure de mon corps\n[fr]un amour profond qui ne s'éloignera jamais\n\n```\n\n---\n\n### 模型下载\n\n下载下面的模型并保存到 **ComfyUI/models/checkpoints** 文件夹下\n[ace_step_v1_3.5b.safetensors](https://huggingface.co/Comfy-Org/ACE-Step_ComfyUI_repackaged/blob/main/all_in_one/ace_step_v1_3.5b.safetensors)\n\n\n### 多语言支持\n\n目前 ACE-Step V1 多语言的实现是通过将不同语言统一转换为英文字符来实现的,目前在 ComfyUI 中我们并没有实现多语言转换为英文的这一步骤。因为如果需要实现对应转换,则需要增加额外的 ComfyUI 核心依赖,这将可能带来不确定的依赖冲突。\n\n所以目前如果你需要输入多语言,则需要手动转换为英文字符来实现这一过程,然后在对应 `lyrics` 开头输入对应语言代码的缩写。\n\n比如中文`[zh]` 日语 `[ja]` 韩语 `[ko]` 等,具体语言输入请查看说明中的示例\n\n"
|
||||||
|
],
|
||||||
|
"color": "#432",
|
||||||
|
"bgcolor": "#653"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 18,
|
||||||
|
"type": "VAEDecodeAudio",
|
||||||
|
"pos": [
|
||||||
|
1080,
|
||||||
|
270
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
150.93612670898438,
|
||||||
|
46
|
||||||
|
],
|
||||||
|
"flags": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"order": 13,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 122
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 83
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "AUDIO",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"links": [
|
||||||
|
126,
|
||||||
|
127,
|
||||||
|
128
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.32",
|
||||||
|
"Node name for S&R": "VAEDecodeAudio"
|
||||||
|
},
|
||||||
|
"widgets_values": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 60,
|
||||||
|
"type": "SaveAudio",
|
||||||
|
"pos": [
|
||||||
|
1260,
|
||||||
|
40
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
610,
|
||||||
|
112
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 15,
|
||||||
|
"mode": 4,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 127
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.34",
|
||||||
|
"Node name for S&R": "SaveAudio"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"audio/ComfyUI"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 61,
|
||||||
|
"type": "SaveAudioOpus",
|
||||||
|
"pos": [
|
||||||
|
1260,
|
||||||
|
220
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
610,
|
||||||
|
136
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 16,
|
||||||
|
"mode": 4,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 128
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.34",
|
||||||
|
"Node name for S&R": "SaveAudioOpus"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"audio/ComfyUI",
|
||||||
|
"128k"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 44,
|
||||||
|
"type": "ConditioningZeroOut",
|
||||||
|
"pos": [
|
||||||
|
600,
|
||||||
|
70
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
197.712890625,
|
||||||
|
26
|
||||||
|
],
|
||||||
|
"flags": {
|
||||||
|
"collapsed": true
|
||||||
|
},
|
||||||
|
"order": 11,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "conditioning",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 108
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [
|
||||||
|
120
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.32",
|
||||||
|
"Node name for S&R": "ConditioningZeroOut"
|
||||||
|
},
|
||||||
|
"widgets_values": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 51,
|
||||||
|
"type": "ModelSamplingSD3",
|
||||||
|
"pos": [
|
||||||
|
590,
|
||||||
|
-40
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
330,
|
||||||
|
60
|
||||||
|
],
|
||||||
|
"flags": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"order": 7,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 115
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [
|
||||||
|
113
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.34",
|
||||||
|
"Node name for S&R": "ModelSamplingSD3"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
5.000000000000001
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 50,
|
||||||
|
"type": "LatentOperationTonemapReinhard",
|
||||||
|
"pos": [
|
||||||
|
590,
|
||||||
|
-160
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
330,
|
||||||
|
58
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT_OPERATION",
|
||||||
|
"type": "LATENT_OPERATION",
|
||||||
|
"links": [
|
||||||
|
114
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.34",
|
||||||
|
"Node name for S&R": "LatentOperationTonemapReinhard"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
1.0000000000000002
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 17,
|
||||||
|
"type": "EmptyAceStepLatentAudio",
|
||||||
|
"pos": [
|
||||||
|
180,
|
||||||
|
50
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
370,
|
||||||
|
82
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 4,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.32",
|
||||||
|
"Node name for S&R": "EmptyAceStepLatentAudio"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
120,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 68,
|
||||||
|
"type": "VAEEncodeAudio",
|
||||||
|
"pos": [
|
||||||
|
180,
|
||||||
|
180
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
370,
|
||||||
|
46
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 9,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 136
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 137
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [
|
||||||
|
138
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.34",
|
||||||
|
"Node name for S&R": "VAEEncodeAudio"
|
||||||
|
},
|
||||||
|
"widgets_values": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 64,
|
||||||
|
"type": "LoadAudio",
|
||||||
|
"pos": [
|
||||||
|
180,
|
||||||
|
340
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
370,
|
||||||
|
140
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "AUDIO",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"links": [
|
||||||
|
136
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.34",
|
||||||
|
"Node name for S&R": "LoadAudio"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"audio_ace_step_1_t2a_song-1.mp3",
|
||||||
|
null,
|
||||||
|
null
|
||||||
|
],
|
||||||
|
"color": "#322",
|
||||||
|
"bgcolor": "#533"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 52,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [
|
||||||
|
940,
|
||||||
|
-40
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
290,
|
||||||
|
262
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 12,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 121
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 117
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 120
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent_image",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 138
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"slot_index": 0,
|
||||||
|
"links": [
|
||||||
|
122
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.34",
|
||||||
|
"Node name for S&R": "KSampler"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
938549746349002,
|
||||||
|
"randomize",
|
||||||
|
50,
|
||||||
|
5,
|
||||||
|
"euler",
|
||||||
|
"simple",
|
||||||
|
0.30000000000000004
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 59,
|
||||||
|
"type": "SaveAudioMP3",
|
||||||
|
"pos": [
|
||||||
|
1260,
|
||||||
|
-160
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
610,
|
||||||
|
136
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 14,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 126
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.34",
|
||||||
|
"Node name for S&R": "SaveAudioMP3"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"audio/ComfyUI",
|
||||||
|
"V0"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 73,
|
||||||
|
"type": "Note",
|
||||||
|
"pos": [
|
||||||
|
1260,
|
||||||
|
410
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
610,
|
||||||
|
90
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [],
|
||||||
|
"properties": {},
|
||||||
|
"widgets_values": [
|
||||||
|
"These nodes can save audio in different formats. Currently, all the modes are Bypass. You can enable them as per your needs.\n\n这些节点可以将 audio 保存成不同格式,目前的模式都是 Bypass ,你可以按你的需要来启用"
|
||||||
|
],
|
||||||
|
"color": "#432",
|
||||||
|
"bgcolor": "#653"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 14,
|
||||||
|
"type": "TextEncodeAceStepAudio",
|
||||||
|
"pos": [
|
||||||
|
590,
|
||||||
|
120
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
340,
|
||||||
|
500
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 8,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 80
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [
|
||||||
|
108,
|
||||||
|
117
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.32",
|
||||||
|
"Node name for S&R": "TextEncodeAceStepAudio"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"anime, cute female vocals, kawaii pop, j-pop, childish, piano, guitar, synthesizer, fast, happy, cheerful, lighthearted",
|
||||||
|
"[verse]\nフワフワ オミミガ\nユレルヨ カゼノナカ\nキラキラ アオイメ\nミツメル セカイヲ\n\n[verse]\nフワフワ シッポハ\nオオキク ユレルヨ\nキンイロ カミノケ\nナビクヨ カゼノナカ\n\n[verse]\nコンフィーユーアイノ\nマモリビト\nピンクノ セーターデ\nエガオヲ クレルヨ\n\nアオイロ スカートト\nクロイコート キンノモヨウ\nヤサシイ ヒカリガ\nツツムヨ フェネックガール\n\n[verse]\nフワフワ オミミデ\nキコエル ココロノ コエ\nダイスキ フェネックガール\nイツデモ ソバニイルヨ",
|
||||||
|
0.9900000000000002
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 75,
|
||||||
|
"type": "MarkdownNote",
|
||||||
|
"pos": [
|
||||||
|
950,
|
||||||
|
410
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
280,
|
||||||
|
210
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [],
|
||||||
|
"title": "About Repainting",
|
||||||
|
"properties": {},
|
||||||
|
"widgets_values": [
|
||||||
|
"Providing the lyrics of the original song or the modified lyrics is very important for the output of repainting or editing. \n\nAdjust the value of the **denoise** parameter in KSampler. The larger the value, the lower the similarity between the output audio and the original audio.\n\n提供原始歌曲的歌词或者修改后的歌词对于音频编辑的输出是非常重要的,调整 KSampler 中的 denoise 参数的数值,数值越大输出的音频与原始音频相似度越低"
|
||||||
|
],
|
||||||
|
"color": "#432",
|
||||||
|
"bgcolor": "#653"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[
|
||||||
|
80,
|
||||||
|
40,
|
||||||
|
1,
|
||||||
|
14,
|
||||||
|
0,
|
||||||
|
"CLIP"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
83,
|
||||||
|
40,
|
||||||
|
2,
|
||||||
|
18,
|
||||||
|
1,
|
||||||
|
"VAE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
108,
|
||||||
|
14,
|
||||||
|
0,
|
||||||
|
44,
|
||||||
|
0,
|
||||||
|
"CONDITIONING"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
113,
|
||||||
|
51,
|
||||||
|
0,
|
||||||
|
49,
|
||||||
|
0,
|
||||||
|
"MODEL"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
114,
|
||||||
|
50,
|
||||||
|
0,
|
||||||
|
49,
|
||||||
|
1,
|
||||||
|
"LATENT_OPERATION"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
115,
|
||||||
|
40,
|
||||||
|
0,
|
||||||
|
51,
|
||||||
|
0,
|
||||||
|
"MODEL"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
117,
|
||||||
|
14,
|
||||||
|
0,
|
||||||
|
52,
|
||||||
|
1,
|
||||||
|
"CONDITIONING"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
120,
|
||||||
|
44,
|
||||||
|
0,
|
||||||
|
52,
|
||||||
|
2,
|
||||||
|
"CONDITIONING"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
121,
|
||||||
|
49,
|
||||||
|
0,
|
||||||
|
52,
|
||||||
|
0,
|
||||||
|
"MODEL"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
122,
|
||||||
|
52,
|
||||||
|
0,
|
||||||
|
18,
|
||||||
|
0,
|
||||||
|
"LATENT"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
126,
|
||||||
|
18,
|
||||||
|
0,
|
||||||
|
59,
|
||||||
|
0,
|
||||||
|
"AUDIO"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
127,
|
||||||
|
18,
|
||||||
|
0,
|
||||||
|
60,
|
||||||
|
0,
|
||||||
|
"AUDIO"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
128,
|
||||||
|
18,
|
||||||
|
0,
|
||||||
|
61,
|
||||||
|
0,
|
||||||
|
"AUDIO"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
136,
|
||||||
|
64,
|
||||||
|
0,
|
||||||
|
68,
|
||||||
|
0,
|
||||||
|
"AUDIO"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
137,
|
||||||
|
40,
|
||||||
|
2,
|
||||||
|
68,
|
||||||
|
1,
|
||||||
|
"VAE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
138,
|
||||||
|
68,
|
||||||
|
0,
|
||||||
|
52,
|
||||||
|
3,
|
||||||
|
"LATENT"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"groups": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"title": "Load model here",
|
||||||
|
"bounding": [
|
||||||
|
170,
|
||||||
|
-230,
|
||||||
|
390,
|
||||||
|
180
|
||||||
|
],
|
||||||
|
"color": "#3f789e",
|
||||||
|
"font_size": 24,
|
||||||
|
"flags": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"title": "Latent",
|
||||||
|
"bounding": [
|
||||||
|
170,
|
||||||
|
-30,
|
||||||
|
390,
|
||||||
|
280
|
||||||
|
],
|
||||||
|
"color": "#3f789e",
|
||||||
|
"font_size": 24,
|
||||||
|
"flags": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"title": "Adjust the vocal volume",
|
||||||
|
"bounding": [
|
||||||
|
580,
|
||||||
|
-230,
|
||||||
|
350,
|
||||||
|
140
|
||||||
|
],
|
||||||
|
"color": "#3f789e",
|
||||||
|
"font_size": 24,
|
||||||
|
"flags": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"title": "For repainting",
|
||||||
|
"bounding": [
|
||||||
|
170,
|
||||||
|
270,
|
||||||
|
390,
|
||||||
|
223.60000610351562
|
||||||
|
],
|
||||||
|
"color": "#3f789e",
|
||||||
|
"font_size": 24,
|
||||||
|
"flags": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"title": "Output",
|
||||||
|
"bounding": [
|
||||||
|
1250,
|
||||||
|
-230,
|
||||||
|
630,
|
||||||
|
760
|
||||||
|
],
|
||||||
|
"color": "#3f789e",
|
||||||
|
"font_size": 24,
|
||||||
|
"flags": {}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"ds": {
|
||||||
|
"scale": 0.6830134553650705,
|
||||||
|
"offset": [
|
||||||
|
785.724285521853,
|
||||||
|
434.02395631202546
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"frontendVersion": "1.19.9",
|
||||||
|
"node_versions": {
|
||||||
|
"comfy-core": "0.3.34",
|
||||||
|
"ace-step": "06f751d65491c9077fa2bc9b06d2c6f2a90e4c56"
|
||||||
|
},
|
||||||
|
"VHS_latentpreview": false,
|
||||||
|
"VHS_latentpreviewrate": 0,
|
||||||
|
"VHS_MetadataImage": true,
|
||||||
|
"VHS_KeepIntermediate": true
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
BIN
comfyui/workflows/text-to-music/acestep-m2m-input.mp3
Normal file
BIN
comfyui/workflows/text-to-music/acestep-m2m-output.mp3
Normal file
841
comfyui/workflows/text-to-music/acestep-official-t2m-v1.json
Normal file
@@ -0,0 +1,841 @@
|
|||||||
|
{
|
||||||
|
"id": "88ac5dad-efd7-40bb-84fe-fbaefdee1fa9",
|
||||||
|
"revision": 0,
|
||||||
|
"last_node_id": 73,
|
||||||
|
"last_link_id": 137,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 49,
|
||||||
|
"type": "LatentApplyOperationCFG",
|
||||||
|
"pos": [
|
||||||
|
940,
|
||||||
|
-160
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
290,
|
||||||
|
50
|
||||||
|
],
|
||||||
|
"flags": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"order": 9,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 113
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "operation",
|
||||||
|
"type": "LATENT_OPERATION",
|
||||||
|
"link": 114
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [
|
||||||
|
121
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.34",
|
||||||
|
"Node name for S&R": "LatentApplyOperationCFG"
|
||||||
|
},
|
||||||
|
"widgets_values": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 64,
|
||||||
|
"type": "LoadAudio",
|
||||||
|
"pos": [
|
||||||
|
180,
|
||||||
|
340
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
370,
|
||||||
|
140
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 4,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "AUDIO",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"links": [
|
||||||
|
136
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.34",
|
||||||
|
"Node name for S&R": "LoadAudio"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"ace_step_example.flac",
|
||||||
|
null,
|
||||||
|
null
|
||||||
|
],
|
||||||
|
"color": "#322",
|
||||||
|
"bgcolor": "#533"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 68,
|
||||||
|
"type": "VAEEncodeAudio",
|
||||||
|
"pos": [
|
||||||
|
180,
|
||||||
|
180
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
370,
|
||||||
|
46
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 8,
|
||||||
|
"mode": 4,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 136
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 137
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.34",
|
||||||
|
"Node name for S&R": "VAEEncodeAudio"
|
||||||
|
},
|
||||||
|
"widgets_values": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 40,
|
||||||
|
"type": "CheckpointLoaderSimple",
|
||||||
|
"pos": [
|
||||||
|
180,
|
||||||
|
-160
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
370,
|
||||||
|
98
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [
|
||||||
|
115
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [
|
||||||
|
80
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [
|
||||||
|
83,
|
||||||
|
137
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.32",
|
||||||
|
"Node name for S&R": "CheckpointLoaderSimple",
|
||||||
|
"models": [
|
||||||
|
{
|
||||||
|
"name": "ace_step_v1_3.5b.safetensors",
|
||||||
|
"url": "https://huggingface.co/Comfy-Org/ACE-Step_ComfyUI_repackaged/resolve/main/all_in_one/ace_step_v1_3.5b.safetensors?download=true",
|
||||||
|
"directory": "checkpoints"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"ace_step_v1_3.5b.safetensors"
|
||||||
|
],
|
||||||
|
"color": "#322",
|
||||||
|
"bgcolor": "#533"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 48,
|
||||||
|
"type": "MarkdownNote",
|
||||||
|
"pos": [
|
||||||
|
-460,
|
||||||
|
-200
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
610,
|
||||||
|
820
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [],
|
||||||
|
"title": "About ACE Step and Multi-language Input",
|
||||||
|
"properties": {},
|
||||||
|
"widgets_values": [
|
||||||
|
"[Tutorial](http://docs.comfy.org/tutorials/audio/ace-step/ace-step-v1) | [教程](http://docs.comfy.org/zh-CN/tutorials/audio/ace-step/ace-step-v1)\n\n\n### Model Download\n\nDownload the following model and save it to the **ComfyUI/models/checkpoints** folder.\n[ace_step_v1_3.5b.safetensors](https://huggingface.co/Comfy-Org/ACE-Step_ComfyUI_repackaged/blob/main/all_in_one/ace_step_v1_3.5b.safetensors)\n\n\n### Multilingual Support\n\nCurrently, the implementation of multi-language support for ACE-Step V1 is achieved by uniformly converting different languages into English characters. At present, in ComfyUI, we haven't implemented the step of converting multi-languages into English. This is because if we need to implement the corresponding conversion, we have to add additional core dependencies of ComfyUI, which may lead to uncertain dependency conflicts.\n\nSo, currently, if you need to input multi-language text, you have to manually convert it into English characters to complete this process. Then, at the beginning of the corresponding `lyrics`, input the abbreviation of the corresponding language code.\n\nFor example, for Chinese, use `[zh]`, for Japanese use `[ja]`, for Korean use `[ko]`, and so on. For specific language input, please check the examples in the instructions. \n\nFor example, Chinese `[zh]`, Japanese `[ja]`, Korean `[ko]`, etc.\n\nExample:\n\n```\n[verse]\n\n[zh]wo3zou3guo4shen1ye4de5jie1dao4\n[zh]leng3feng1chui1luan4si1nian4de5piao4liang4wai4tao4\n[zh]ni3de5wei1xiao4xiang4xing1guang1hen3xuan4yao4\n[zh]zhao4liang4le5wo3gu1du2de5mei3fen1mei3miao3\n\n[chorus]\n\n[verse]\n[ko]hamkke si-kkeuleo-un sesang-ui sodong-eul pihae\n[ko]honja ogsang-eseo dalbich-ui eolyeompus-ileul balaboda\n[ko]niga salang-eun lideum-i ganghan eum-ag gatdago malhaess-eo\n[ko]han ta han tamada ma-eum-ui ondoga eolmana heojeonhanji ijge hae\n\n[bridge]\n[es]cantar mi anhelo por ti sin ocultar\n[es]como poesía y pintura, lleno de anhelo indescifrable\n[es]tu sombra es tan terca como el viento, inborrable\n[es]persiguiéndote en vuelo, brilla como cruzar una mar de nubes\n\n[chorus]\n[fr]que tu sois le vent qui souffle sur ma main\n[fr]un contact chaud comme la douce pluie printanière\n[fr]que tu sois le vent qui s'entoure de mon corps\n[fr]un amour profond qui ne s'éloignera jamais\n\n```\n\n---\n\n### 模型下载\n\n下载下面的模型并保存到 **ComfyUI/models/checkpoints** 文件夹下\n[ace_step_v1_3.5b.safetensors](https://huggingface.co/Comfy-Org/ACE-Step_ComfyUI_repackaged/blob/main/all_in_one/ace_step_v1_3.5b.safetensors)\n\n\n### 多语言支持\n\n目前 ACE-Step V1 多语言的实现是通过将不同语言统一转换为英文字符来实现的,目前在 ComfyUI 中我们并没有实现多语言转换为英文的这一步骤。因为如果需要实现对应转换,则需要增加额外的 ComfyUI 核心依赖,这将可能带来不确定的依赖冲突。\n\n所以目前如果你需要输入多语言,则需要手动转换为英文字符来实现这一过程,然后在对应 `lyrics` 开头输入对应语言代码的缩写。\n\n比如中文`[zh]` 日语 `[ja]` 韩语 `[ko]` 等,具体语言输入请查看说明中的示例\n\n"
|
||||||
|
],
|
||||||
|
"color": "#432",
|
||||||
|
"bgcolor": "#653"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 18,
|
||||||
|
"type": "VAEDecodeAudio",
|
||||||
|
"pos": [
|
||||||
|
1080,
|
||||||
|
270
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
150.93612670898438,
|
||||||
|
46
|
||||||
|
],
|
||||||
|
"flags": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"order": 12,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "samples",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 122
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 83
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "AUDIO",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"links": [
|
||||||
|
126,
|
||||||
|
127,
|
||||||
|
128
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.32",
|
||||||
|
"Node name for S&R": "VAEDecodeAudio"
|
||||||
|
},
|
||||||
|
"widgets_values": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 60,
|
||||||
|
"type": "SaveAudio",
|
||||||
|
"pos": [
|
||||||
|
1260,
|
||||||
|
40
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
610,
|
||||||
|
112
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 14,
|
||||||
|
"mode": 4,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 127
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.34",
|
||||||
|
"Node name for S&R": "SaveAudio"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"audio/ComfyUI"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 61,
|
||||||
|
"type": "SaveAudioOpus",
|
||||||
|
"pos": [
|
||||||
|
1260,
|
||||||
|
220
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
610,
|
||||||
|
136
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 15,
|
||||||
|
"mode": 4,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 128
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.34",
|
||||||
|
"Node name for S&R": "SaveAudioOpus"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"audio/ComfyUI",
|
||||||
|
"128k"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 73,
|
||||||
|
"type": "Note",
|
||||||
|
"pos": [
|
||||||
|
1260,
|
||||||
|
410
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
610,
|
||||||
|
90
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [],
|
||||||
|
"properties": {},
|
||||||
|
"widgets_values": [
|
||||||
|
"These nodes can save audio in different formats. Currently, all the modes are Bypass. You can enable them as per your needs.\n\n这些节点可以将 audio 保存成不同格式,目前的模式都是 Bypass ,你可以按你的需要来启用"
|
||||||
|
],
|
||||||
|
"color": "#432",
|
||||||
|
"bgcolor": "#653"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 44,
|
||||||
|
"type": "ConditioningZeroOut",
|
||||||
|
"pos": [
|
||||||
|
600,
|
||||||
|
70
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
197.712890625,
|
||||||
|
26
|
||||||
|
],
|
||||||
|
"flags": {
|
||||||
|
"collapsed": true
|
||||||
|
},
|
||||||
|
"order": 10,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "conditioning",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 108
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [
|
||||||
|
120
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.32",
|
||||||
|
"Node name for S&R": "ConditioningZeroOut"
|
||||||
|
},
|
||||||
|
"widgets_values": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 51,
|
||||||
|
"type": "ModelSamplingSD3",
|
||||||
|
"pos": [
|
||||||
|
590,
|
||||||
|
-40
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
330,
|
||||||
|
60
|
||||||
|
],
|
||||||
|
"flags": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 115
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [
|
||||||
|
113
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.34",
|
||||||
|
"Node name for S&R": "ModelSamplingSD3"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
5.000000000000001
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 50,
|
||||||
|
"type": "LatentOperationTonemapReinhard",
|
||||||
|
"pos": [
|
||||||
|
590,
|
||||||
|
-160
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
330,
|
||||||
|
58
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT_OPERATION",
|
||||||
|
"type": "LATENT_OPERATION",
|
||||||
|
"links": [
|
||||||
|
114
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.34",
|
||||||
|
"Node name for S&R": "LatentOperationTonemapReinhard"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
1.0000000000000002
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 52,
|
||||||
|
"type": "KSampler",
|
||||||
|
"pos": [
|
||||||
|
940,
|
||||||
|
-40
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
290,
|
||||||
|
262
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 11,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 121
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 117
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 120
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "latent_image",
|
||||||
|
"type": "LATENT",
|
||||||
|
"link": 119
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"slot_index": 0,
|
||||||
|
"links": [
|
||||||
|
122
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.34",
|
||||||
|
"Node name for S&R": "KSampler"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
468254064217846,
|
||||||
|
"randomize",
|
||||||
|
50,
|
||||||
|
5,
|
||||||
|
"euler",
|
||||||
|
"simple",
|
||||||
|
1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 14,
|
||||||
|
"type": "TextEncodeAceStepAudio",
|
||||||
|
"pos": [
|
||||||
|
590,
|
||||||
|
120
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
340,
|
||||||
|
500
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 7,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 80
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [
|
||||||
|
108,
|
||||||
|
117
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.32",
|
||||||
|
"Node name for S&R": "TextEncodeAceStepAudio"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"anime, soft female vocals, kawaii pop, j-pop, childish, piano, guitar, synthesizer, fast, happy, cheerful, lighthearted\t\n",
|
||||||
|
"[inst]\n\n[verse]\nふわふわ おみみが\nゆれるよ かぜのなか\nきらきら あおいめ\nみつめる せかいを\n\n[verse]\nふわふわ しっぽは\nおおきく ゆれるよ\nきんいろ かみのけ\nなびくよ かぜのなか\n\n[verse]\nコンフィーユーアイの\nまもりびと\nピンクの セーターで\nえがおを くれるよ\n\nあおいろ スカートと\nくろいコート きんのもよう\nやさしい ひかりが\nつつむよ フェネックガール\n\n[verse]\nふわふわ おみみで\nきこえる こころの こえ\nだいすき フェネックガール\nいつでも そばにいるよ\n\n\n",
|
||||||
|
0.9900000000000002
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 17,
|
||||||
|
"type": "EmptyAceStepLatentAudio",
|
||||||
|
"pos": [
|
||||||
|
180,
|
||||||
|
50
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
370,
|
||||||
|
82
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "LATENT",
|
||||||
|
"type": "LATENT",
|
||||||
|
"links": [
|
||||||
|
119
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.32",
|
||||||
|
"Node name for S&R": "EmptyAceStepLatentAudio"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
120,
|
||||||
|
1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 59,
|
||||||
|
"type": "SaveAudioMP3",
|
||||||
|
"pos": [
|
||||||
|
1260,
|
||||||
|
-160
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
610,
|
||||||
|
136
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 13,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 126
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.34",
|
||||||
|
"Node name for S&R": "SaveAudioMP3"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"audio/ComfyUI",
|
||||||
|
"V0"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[
|
||||||
|
80,
|
||||||
|
40,
|
||||||
|
1,
|
||||||
|
14,
|
||||||
|
0,
|
||||||
|
"CLIP"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
83,
|
||||||
|
40,
|
||||||
|
2,
|
||||||
|
18,
|
||||||
|
1,
|
||||||
|
"VAE"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
108,
|
||||||
|
14,
|
||||||
|
0,
|
||||||
|
44,
|
||||||
|
0,
|
||||||
|
"CONDITIONING"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
113,
|
||||||
|
51,
|
||||||
|
0,
|
||||||
|
49,
|
||||||
|
0,
|
||||||
|
"MODEL"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
114,
|
||||||
|
50,
|
||||||
|
0,
|
||||||
|
49,
|
||||||
|
1,
|
||||||
|
"LATENT_OPERATION"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
115,
|
||||||
|
40,
|
||||||
|
0,
|
||||||
|
51,
|
||||||
|
0,
|
||||||
|
"MODEL"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
117,
|
||||||
|
14,
|
||||||
|
0,
|
||||||
|
52,
|
||||||
|
1,
|
||||||
|
"CONDITIONING"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
119,
|
||||||
|
17,
|
||||||
|
0,
|
||||||
|
52,
|
||||||
|
3,
|
||||||
|
"LATENT"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
120,
|
||||||
|
44,
|
||||||
|
0,
|
||||||
|
52,
|
||||||
|
2,
|
||||||
|
"CONDITIONING"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
121,
|
||||||
|
49,
|
||||||
|
0,
|
||||||
|
52,
|
||||||
|
0,
|
||||||
|
"MODEL"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
122,
|
||||||
|
52,
|
||||||
|
0,
|
||||||
|
18,
|
||||||
|
0,
|
||||||
|
"LATENT"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
126,
|
||||||
|
18,
|
||||||
|
0,
|
||||||
|
59,
|
||||||
|
0,
|
||||||
|
"AUDIO"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
127,
|
||||||
|
18,
|
||||||
|
0,
|
||||||
|
60,
|
||||||
|
0,
|
||||||
|
"AUDIO"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
128,
|
||||||
|
18,
|
||||||
|
0,
|
||||||
|
61,
|
||||||
|
0,
|
||||||
|
"AUDIO"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
136,
|
||||||
|
64,
|
||||||
|
0,
|
||||||
|
68,
|
||||||
|
0,
|
||||||
|
"AUDIO"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
137,
|
||||||
|
40,
|
||||||
|
2,
|
||||||
|
68,
|
||||||
|
1,
|
||||||
|
"VAE"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"groups": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"title": "Load model here",
|
||||||
|
"bounding": [
|
||||||
|
170,
|
||||||
|
-230,
|
||||||
|
390,
|
||||||
|
180
|
||||||
|
],
|
||||||
|
"color": "#3f789e",
|
||||||
|
"font_size": 24,
|
||||||
|
"flags": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"title": "Latent",
|
||||||
|
"bounding": [
|
||||||
|
170,
|
||||||
|
-30,
|
||||||
|
390,
|
||||||
|
280
|
||||||
|
],
|
||||||
|
"color": "#3f789e",
|
||||||
|
"font_size": 24,
|
||||||
|
"flags": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"title": "Adjust the vocal volume",
|
||||||
|
"bounding": [
|
||||||
|
580,
|
||||||
|
-230,
|
||||||
|
350,
|
||||||
|
140
|
||||||
|
],
|
||||||
|
"color": "#3f789e",
|
||||||
|
"font_size": 24,
|
||||||
|
"flags": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"title": "For repainting",
|
||||||
|
"bounding": [
|
||||||
|
170,
|
||||||
|
270,
|
||||||
|
390,
|
||||||
|
223.60000610351562
|
||||||
|
],
|
||||||
|
"color": "#3f789e",
|
||||||
|
"font_size": 24,
|
||||||
|
"flags": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"title": "Output",
|
||||||
|
"bounding": [
|
||||||
|
1250,
|
||||||
|
-230,
|
||||||
|
630,
|
||||||
|
760
|
||||||
|
],
|
||||||
|
"color": "#3f789e",
|
||||||
|
"font_size": 24,
|
||||||
|
"flags": {}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"ds": {
|
||||||
|
"scale": 1,
|
||||||
|
"offset": [
|
||||||
|
-147.02717343600432,
|
||||||
|
384.62272311479
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"frontendVersion": "1.19.9",
|
||||||
|
"node_versions": {
|
||||||
|
"comfy-core": "0.3.34",
|
||||||
|
"ace-step": "06f751d65491c9077fa2bc9b06d2c6f2a90e4c56"
|
||||||
|
},
|
||||||
|
"VHS_latentpreview": false,
|
||||||
|
"VHS_latentpreviewrate": 0,
|
||||||
|
"VHS_MetadataImage": true,
|
||||||
|
"VHS_KeepIntermediate": true
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
BIN
comfyui/workflows/text-to-music/acestep-t2m-output.flac
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 3,
|
||||||
|
"last_link_id": 2,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "DiffRhythmRun",
|
||||||
|
"pos": [100, 100],
|
||||||
|
"size": [400, 400],
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "AUDIO",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"links": [1, 2]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "DiffRhythmRun"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"cfm_full_model.pt",
|
||||||
|
"Cinematic orchestral piece with soaring strings, powerful brass, and emotional piano melodies building to an epic crescendo",
|
||||||
|
true,
|
||||||
|
"euler",
|
||||||
|
30,
|
||||||
|
4,
|
||||||
|
"quality",
|
||||||
|
123,
|
||||||
|
"randomize",
|
||||||
|
false,
|
||||||
|
"[-1, 20], [60, -1]"
|
||||||
|
],
|
||||||
|
"title": "DiffRhythm Full-Length Text-to-Music (4m45s)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "PreviewAudio",
|
||||||
|
"pos": [600, 100],
|
||||||
|
"size": [300, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "PreviewAudio"
|
||||||
|
},
|
||||||
|
"title": "Preview Audio"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "SaveAudio",
|
||||||
|
"pos": [600, 250],
|
||||||
|
"size": [300, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SaveAudio"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"diffrhythm_full_output"
|
||||||
|
],
|
||||||
|
"title": "Save Audio"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 2, 0, "AUDIO"],
|
||||||
|
[2, 1, 0, 3, 0, "AUDIO"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "DiffRhythm Full-Length Text-to-Music v1",
|
||||||
|
"description": "Full-length music generation using DiffRhythm Full (4 minutes 45 seconds)",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"author": "valknar@pivoine.art",
|
||||||
|
"category": "text-to-music",
|
||||||
|
"tags": ["diffrhythm", "music-generation", "text-to-music", "full-length", "4m45s"],
|
||||||
|
"requirements": {
|
||||||
|
"custom_nodes": ["ComfyUI_DiffRhythm"],
|
||||||
|
"models": ["ASLP-lab/DiffRhythm-full", "ASLP-lab/DiffRhythm-vae", "OpenMuQ/MuQ-MuLan-large", "OpenMuQ/MuQ-large-msd-iter", "FacebookAI/xlm-roberta-base"],
|
||||||
|
"vram_min": "16GB",
|
||||||
|
"vram_recommended": "20GB",
|
||||||
|
"system_deps": ["espeak-ng"]
|
||||||
|
},
|
||||||
|
"usage": {
|
||||||
|
"model": "cfm_full_model.pt (DiffRhythm Full - 4m45s/285s generation)",
|
||||||
|
"style_prompt": "Detailed text description of the desired full-length music composition",
|
||||||
|
"unload_model": "Boolean to unload model after generation (default: true)",
|
||||||
|
"odeint_method": "ODE solver: euler, midpoint, rk4, implicit_adams (default: euler)",
|
||||||
|
"steps": "Number of diffusion steps: 1-100 (default: 30)",
|
||||||
|
"cfg": "Classifier-free guidance scale: 1-10 (default: 4)",
|
||||||
|
"quality_or_speed": "Generation mode: quality or speed (default: quality for full-length)",
|
||||||
|
"seed": "Random seed for reproducibility (default: 123)",
|
||||||
|
"edit": "Enable segment editing mode (default: false)",
|
||||||
|
"edit_segments": "Segments to edit when edit=true"
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"generation_time": "~60-90 seconds on RTX 4090",
|
||||||
|
"vram_usage": "~16GB during generation",
|
||||||
|
"note": "Significantly faster than real-time music generation"
|
||||||
|
},
|
||||||
|
"notes": [
|
||||||
|
"This workflow uses DiffRhythm Full for 4 minute 45 second music generation",
|
||||||
|
"Best for complete song compositions with intro, development, and outro",
|
||||||
|
"All parameters except model and style_prompt are optional",
|
||||||
|
"Supports complex, multi-part compositions",
|
||||||
|
"Can optionally connect MultiLineLyricsDR node for lyrics input"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
@@ -0,0 +1,164 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 4,
|
||||||
|
"last_link_id": 3,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "LoadAudio",
|
||||||
|
"pos": [100, 100],
|
||||||
|
"size": [300, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "AUDIO",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"links": [1]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "LoadAudio"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"reference_audio.wav"
|
||||||
|
],
|
||||||
|
"title": "Load Reference Audio"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "DiffRhythmRun",
|
||||||
|
"pos": [500, 100],
|
||||||
|
"size": [400, 450],
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "style_audio_or_edit_song",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "AUDIO",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"links": [2, 3]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "DiffRhythmRun"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"cfm_model_v1_2.pt",
|
||||||
|
"Energetic rock music with driving guitar riffs and powerful drums",
|
||||||
|
true,
|
||||||
|
"euler",
|
||||||
|
30,
|
||||||
|
5,
|
||||||
|
"speed",
|
||||||
|
456,
|
||||||
|
"randomize",
|
||||||
|
false,
|
||||||
|
"[-1, 20], [60, -1]"
|
||||||
|
],
|
||||||
|
"title": "DiffRhythm Reference-Based Generation"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "PreviewAudio",
|
||||||
|
"pos": [1000, 100],
|
||||||
|
"size": [300, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "PreviewAudio"
|
||||||
|
},
|
||||||
|
"title": "Preview Generated Audio"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "SaveAudio",
|
||||||
|
"pos": [1000, 250],
|
||||||
|
"size": [300, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SaveAudio"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"diffrhythm_reference_output"
|
||||||
|
],
|
||||||
|
"title": "Save Audio"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 2, 0, "AUDIO"],
|
||||||
|
[2, 2, 0, 3, 0, "AUDIO"],
|
||||||
|
[3, 2, 0, 4, 0, "AUDIO"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "DiffRhythm Reference-Based Generation v1",
|
||||||
|
"description": "Generate new music based on a reference audio file while following text prompt guidance",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"author": "valknar@pivoine.art",
|
||||||
|
"category": "text-to-music",
|
||||||
|
"tags": ["diffrhythm", "music-generation", "reference-based", "style-transfer"],
|
||||||
|
"requirements": {
|
||||||
|
"custom_nodes": ["ComfyUI_DiffRhythm"],
|
||||||
|
"models": ["ASLP-lab/DiffRhythm-1_2", "ASLP-lab/DiffRhythm-vae", "OpenMuQ/MuQ-MuLan-large", "OpenMuQ/MuQ-large-msd-iter", "FacebookAI/xlm-roberta-base"],
|
||||||
|
"vram_min": "14GB",
|
||||||
|
"vram_recommended": "18GB",
|
||||||
|
"system_deps": ["espeak-ng"]
|
||||||
|
},
|
||||||
|
"usage": {
|
||||||
|
"reference_audio": "Path to reference audio file (WAV, MP3, or other supported formats)",
|
||||||
|
"model": "cfm_model_v1_2.pt (DiffRhythm 1.2)",
|
||||||
|
"style_prompt": "Text description guiding the style and characteristics of generated music",
|
||||||
|
"unload_model": "Boolean to unload model after generation (default: true)",
|
||||||
|
"odeint_method": "ODE solver: euler, midpoint, rk4, implicit_adams (default: euler)",
|
||||||
|
"steps": "Number of diffusion steps: 1-100 (default: 30)",
|
||||||
|
"cfg": "Classifier-free guidance scale: 1-10 (default: 5 for reference-based)",
|
||||||
|
"quality_or_speed": "Generation mode: quality or speed (default: speed)",
|
||||||
|
"seed": "Random seed for reproducibility (default: 456)",
|
||||||
|
"edit": "Enable segment editing mode (default: false)",
|
||||||
|
"edit_segments": "Segments to edit when edit=true"
|
||||||
|
},
|
||||||
|
"use_cases": [
|
||||||
|
"Style transfer: Apply the style of reference music to new prompt",
|
||||||
|
"Variations: Create variations of existing compositions",
|
||||||
|
"Genre transformation: Transform music to different genre while keeping structure",
|
||||||
|
"Mood adaptation: Change the mood/emotion while maintaining musical elements"
|
||||||
|
],
|
||||||
|
"notes": [
|
||||||
|
"This workflow combines reference audio with text prompt guidance",
|
||||||
|
"The reference audio is connected to the style_audio_or_edit_song input",
|
||||||
|
"Higher cfg values (7-10) = closer adherence to both prompt and reference",
|
||||||
|
"Lower cfg values (2-4) = more creative interpretation",
|
||||||
|
"Reference audio should ideally be similar duration to target (95s for cfm_model_v1_2.pt)",
|
||||||
|
"Can use any format supported by ComfyUI's LoadAudio node"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
125
comfyui/workflows/text-to-music/diffrhythm-simple-t2m-v1.json
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 3,
|
||||||
|
"last_link_id": 2,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "DiffRhythmRun",
|
||||||
|
"pos": [100, 100],
|
||||||
|
"size": [400, 400],
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "AUDIO",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"links": [1, 2]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "DiffRhythmRun"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"cfm_model_v1_2.pt",
|
||||||
|
"Upbeat electronic dance music with energetic beats and synthesizer melodies",
|
||||||
|
true,
|
||||||
|
"euler",
|
||||||
|
30,
|
||||||
|
4,
|
||||||
|
"speed",
|
||||||
|
42,
|
||||||
|
"randomize",
|
||||||
|
false,
|
||||||
|
"[-1, 20], [60, -1]"
|
||||||
|
],
|
||||||
|
"title": "DiffRhythm Text-to-Music (95s)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "PreviewAudio",
|
||||||
|
"pos": [600, 100],
|
||||||
|
"size": [300, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "PreviewAudio"
|
||||||
|
},
|
||||||
|
"title": "Preview Audio"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "SaveAudio",
|
||||||
|
"pos": [600, 250],
|
||||||
|
"size": [300, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SaveAudio"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"diffrhythm_output"
|
||||||
|
],
|
||||||
|
"title": "Save Audio"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 2, 0, "AUDIO"],
|
||||||
|
[2, 1, 0, 3, 0, "AUDIO"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "DiffRhythm Simple Text-to-Music v1",
|
||||||
|
"description": "Basic text-to-music generation using DiffRhythm 1.2 (95 seconds)",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"author": "valknar@pivoine.art",
|
||||||
|
"category": "text-to-music",
|
||||||
|
"tags": ["diffrhythm", "music-generation", "text-to-music", "95s"],
|
||||||
|
"requirements": {
|
||||||
|
"custom_nodes": ["ComfyUI_DiffRhythm"],
|
||||||
|
"models": ["ASLP-lab/DiffRhythm-1_2", "ASLP-lab/DiffRhythm-vae", "OpenMuQ/MuQ-MuLan-large", "OpenMuQ/MuQ-large-msd-iter", "FacebookAI/xlm-roberta-base"],
|
||||||
|
"vram_min": "12GB",
|
||||||
|
"vram_recommended": "16GB",
|
||||||
|
"system_deps": ["espeak-ng"]
|
||||||
|
},
|
||||||
|
"usage": {
|
||||||
|
"model": "cfm_model_v1_2.pt (DiffRhythm 1.2 - 95s generation)",
|
||||||
|
"style_prompt": "Text description of the desired music style, mood, and instruments",
|
||||||
|
"unload_model": "Boolean to unload model after generation (default: true)",
|
||||||
|
"odeint_method": "ODE solver: euler, midpoint, rk4, implicit_adams (default: euler)",
|
||||||
|
"steps": "Number of diffusion steps: 1-100 (default: 30)",
|
||||||
|
"cfg": "Classifier-free guidance scale: 1-10 (default: 4)",
|
||||||
|
"quality_or_speed": "Generation mode: quality or speed (default: speed)",
|
||||||
|
"seed": "Random seed for reproducibility (default: 42)",
|
||||||
|
"edit": "Enable segment editing mode (default: false)",
|
||||||
|
"edit_segments": "Segments to edit when edit=true (default: [-1, 20], [60, -1])"
|
||||||
|
},
|
||||||
|
"notes": [
|
||||||
|
"This workflow uses DiffRhythm 1.2 for 95-second music generation",
|
||||||
|
"All parameters except model and style_prompt are optional",
|
||||||
|
"Supports English and Chinese text prompts",
|
||||||
|
"Generation time: ~30-60 seconds on RTX 4090",
|
||||||
|
"Can optionally connect MultiLineLyricsDR node for lyrics input"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
@@ -0,0 +1,151 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 2,
|
||||||
|
"last_link_id": 1,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "Musicgen_",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": [400, 300],
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "Musicgen_"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"Upbeat electronic dance music with energetic beats and synthesizer melodies",
|
||||||
|
30.0,
|
||||||
|
4.0,
|
||||||
|
42,
|
||||||
|
"auto"
|
||||||
|
],
|
||||||
|
"title": "MusicGen Large Generator"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "AudioPlay",
|
||||||
|
"pos": [500, 100],
|
||||||
|
"size": [315, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "AudioPlay"
|
||||||
|
},
|
||||||
|
"title": "API Audio Output"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 2, 0, "AUDIO"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "MusicGen Large Text-to-Music Production",
|
||||||
|
"version": "1.2.0",
|
||||||
|
"description": "High-quality music generation using MusicGen Large. Generates up to 30 seconds of music from text prompts.",
|
||||||
|
"category": "text-to-music",
|
||||||
|
"tags": ["musicgen", "large", "t2m", "audio", "production"],
|
||||||
|
"requirements": {
|
||||||
|
"models": ["facebook/musicgen-large"],
|
||||||
|
"custom_nodes": ["comfyui-sound-lab"],
|
||||||
|
"vram_min": "12GB",
|
||||||
|
"vram_recommended": "16GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"prompt": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": true,
|
||||||
|
"default": "Upbeat electronic dance music",
|
||||||
|
"description": "Text description of desired music style, genre, mood, instruments"
|
||||||
|
},
|
||||||
|
"duration": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 1,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 30.0,
|
||||||
|
"min": 1.0,
|
||||||
|
"max": 60.0,
|
||||||
|
"description": "Duration in seconds (up to 30s recommended for best quality)"
|
||||||
|
},
|
||||||
|
"guidance_scale": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 4.0,
|
||||||
|
"min": 0.0,
|
||||||
|
"max": 20.0,
|
||||||
|
"description": "How closely to follow the prompt (4.0 recommended)"
|
||||||
|
},
|
||||||
|
"seed": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 3,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 42,
|
||||||
|
"min": 0,
|
||||||
|
"max": 2147483647,
|
||||||
|
"description": "Random seed for reproducibility"
|
||||||
|
},
|
||||||
|
"device": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 4,
|
||||||
|
"type": "string",
|
||||||
|
"required": false,
|
||||||
|
"default": "auto",
|
||||||
|
"options": ["auto", "cpu"],
|
||||||
|
"description": "Computation device (auto recommended)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": {
|
||||||
|
"audio": {
|
||||||
|
"node_id": 2,
|
||||||
|
"type": "audio",
|
||||||
|
"format": "FLAC",
|
||||||
|
"sample_rate": "32000 Hz",
|
||||||
|
"channels": "stereo"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "60-90 seconds for 30s audio",
|
||||||
|
"vram_usage": "~14-16GB",
|
||||||
|
"gpu_utilization": "90-100%"
|
||||||
|
},
|
||||||
|
"use_cases": [
|
||||||
|
"Background music generation",
|
||||||
|
"Game soundtrack creation",
|
||||||
|
"Video background music",
|
||||||
|
"Music prototyping and ideation"
|
||||||
|
],
|
||||||
|
"notes": [
|
||||||
|
"MusicGen Large produces highest quality output",
|
||||||
|
"Best results with clear, specific prompts",
|
||||||
|
"Can specify genre, mood, instruments, tempo",
|
||||||
|
"Output is 32kHz stereo FLAC format",
|
||||||
|
"Longer durations (>30s) may reduce coherence"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
@@ -0,0 +1,151 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 2,
|
||||||
|
"last_link_id": 1,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "Musicgen_",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": [400, 300],
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "Musicgen_"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"Upbeat electronic dance music with energetic beats and synthesizer melodies",
|
||||||
|
30.0,
|
||||||
|
3.0,
|
||||||
|
42,
|
||||||
|
"auto"
|
||||||
|
],
|
||||||
|
"title": "MusicGen Medium Generator"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "AudioPlay",
|
||||||
|
"pos": [500, 100],
|
||||||
|
"size": [315, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "AudioPlay"
|
||||||
|
},
|
||||||
|
"title": "API Audio Output"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 2, 0, "AUDIO"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "MusicGen Medium Text-to-Music Production",
|
||||||
|
"version": "1.2.0",
|
||||||
|
"description": "Balanced music generation using MusicGen Medium. Good quality with moderate VRAM usage.",
|
||||||
|
"category": "text-to-music",
|
||||||
|
"tags": ["musicgen", "medium", "t2m", "audio", "production"],
|
||||||
|
"requirements": {
|
||||||
|
"models": ["facebook/musicgen-medium"],
|
||||||
|
"custom_nodes": ["comfyui-sound-lab"],
|
||||||
|
"vram_min": "8GB",
|
||||||
|
"vram_recommended": "10GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"prompt": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": true,
|
||||||
|
"default": "Upbeat electronic dance music",
|
||||||
|
"description": "Text description of desired music style, genre, mood, instruments"
|
||||||
|
},
|
||||||
|
"duration": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 1,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 30.0,
|
||||||
|
"min": 1.0,
|
||||||
|
"max": 60.0,
|
||||||
|
"description": "Duration in seconds (up to 30s recommended for best quality)"
|
||||||
|
},
|
||||||
|
"guidance_scale": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 3.0,
|
||||||
|
"min": 0.0,
|
||||||
|
"max": 20.0,
|
||||||
|
"description": "How closely to follow the prompt (3.0 recommended)"
|
||||||
|
},
|
||||||
|
"seed": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 3,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 42,
|
||||||
|
"min": 0,
|
||||||
|
"max": 2147483647,
|
||||||
|
"description": "Random seed for reproducibility"
|
||||||
|
},
|
||||||
|
"device": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 4,
|
||||||
|
"type": "string",
|
||||||
|
"required": false,
|
||||||
|
"default": "auto",
|
||||||
|
"options": ["auto", "cpu"],
|
||||||
|
"description": "Computation device (auto recommended)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": {
|
||||||
|
"audio": {
|
||||||
|
"node_id": 2,
|
||||||
|
"type": "audio",
|
||||||
|
"format": "FLAC",
|
||||||
|
"sample_rate": "32000 Hz",
|
||||||
|
"channels": "stereo"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "30-50 seconds for 30s audio",
|
||||||
|
"vram_usage": "~8-10GB",
|
||||||
|
"gpu_utilization": "90-100%"
|
||||||
|
},
|
||||||
|
"use_cases": [
|
||||||
|
"Background music generation",
|
||||||
|
"Game soundtrack creation",
|
||||||
|
"Video background music",
|
||||||
|
"Music prototyping with moderate quality"
|
||||||
|
],
|
||||||
|
"notes": [
|
||||||
|
"MusicGen Medium balances quality and resource usage",
|
||||||
|
"Faster than Large, better quality than Small",
|
||||||
|
"Best results with clear, specific prompts",
|
||||||
|
"Can specify genre, mood, instruments, tempo",
|
||||||
|
"Output is 32kHz stereo FLAC format"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
@@ -0,0 +1,150 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 2,
|
||||||
|
"last_link_id": 1,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "Musicgen_",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": [400, 300],
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "Musicgen_"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"Electronic music with melodic elements and catchy hooks",
|
||||||
|
30.0,
|
||||||
|
3.5,
|
||||||
|
42,
|
||||||
|
"auto"
|
||||||
|
],
|
||||||
|
"title": "MusicGen Melody Generator"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "AudioPlay",
|
||||||
|
"pos": [500, 100],
|
||||||
|
"size": [315, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "AudioPlay"
|
||||||
|
},
|
||||||
|
"title": "API Audio Output"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 2, 0, "AUDIO"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "MusicGen Melody Text-to-Music Production",
|
||||||
|
"version": "1.2.0",
|
||||||
|
"description": "Music generation using MusicGen with melodic focus. Note: Melody conditioning via audio input not yet supported.",
|
||||||
|
"category": "text-to-music",
|
||||||
|
"tags": ["musicgen", "melody", "t2m", "audio", "production"],
|
||||||
|
"requirements": {
|
||||||
|
"models": ["facebook/musicgen-melody"],
|
||||||
|
"custom_nodes": ["comfyui-sound-lab"],
|
||||||
|
"vram_min": "10GB",
|
||||||
|
"vram_recommended": "12GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"prompt": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": true,
|
||||||
|
"default": "Electronic music with melodic elements",
|
||||||
|
"description": "Text description of desired music style, genre, mood, instruments"
|
||||||
|
},
|
||||||
|
"duration": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 1,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 30.0,
|
||||||
|
"min": 1.0,
|
||||||
|
"max": 60.0,
|
||||||
|
"description": "Duration in seconds (up to 30s recommended for best quality)"
|
||||||
|
},
|
||||||
|
"guidance_scale": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 3.5,
|
||||||
|
"min": 0.0,
|
||||||
|
"max": 20.0,
|
||||||
|
"description": "How closely to follow the prompt (3.5 recommended)"
|
||||||
|
},
|
||||||
|
"seed": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 3,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 42,
|
||||||
|
"min": 0,
|
||||||
|
"max": 2147483647,
|
||||||
|
"description": "Random seed for reproducibility"
|
||||||
|
},
|
||||||
|
"device": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 4,
|
||||||
|
"type": "string",
|
||||||
|
"required": false,
|
||||||
|
"default": "auto",
|
||||||
|
"options": ["auto", "cpu"],
|
||||||
|
"description": "Computation device (auto recommended)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": {
|
||||||
|
"audio": {
|
||||||
|
"node_id": 2,
|
||||||
|
"type": "audio",
|
||||||
|
"format": "FLAC",
|
||||||
|
"sample_rate": "32000 Hz",
|
||||||
|
"channels": "stereo"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "40-60 seconds for 30s audio",
|
||||||
|
"vram_usage": "~10-12GB",
|
||||||
|
"gpu_utilization": "90-100%"
|
||||||
|
},
|
||||||
|
"use_cases": [
|
||||||
|
"Melodic music generation",
|
||||||
|
"Creating catchy hooks and melodies",
|
||||||
|
"Music with strong melodic structure"
|
||||||
|
],
|
||||||
|
"notes": [
|
||||||
|
"IMPORTANT: Melody conditioning via audio input not yet supported in current Sound Lab implementation",
|
||||||
|
"This workflow uses text prompts only - describe melodic characteristics in the prompt",
|
||||||
|
"For best melodic results, use descriptive terms like 'catchy melody', 'melodic hooks', 'harmonic progression'",
|
||||||
|
"MusicGen Melody model trained for melodic content",
|
||||||
|
"Output is 32kHz stereo FLAC format"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
@@ -0,0 +1,151 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 2,
|
||||||
|
"last_link_id": 1,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "Musicgen_",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": [400, 300],
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "Musicgen_"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"Upbeat electronic dance music with energetic beats and synthesizer melodies",
|
||||||
|
30.0,
|
||||||
|
3.0,
|
||||||
|
42,
|
||||||
|
"auto"
|
||||||
|
],
|
||||||
|
"title": "MusicGen Small Generator"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "AudioPlay",
|
||||||
|
"pos": [500, 100],
|
||||||
|
"size": [315, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "audio",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "AudioPlay"
|
||||||
|
},
|
||||||
|
"title": "API Audio Output"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 2, 0, "AUDIO"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "MusicGen Small Text-to-Music Production",
|
||||||
|
"version": "1.2.0",
|
||||||
|
"description": "Fast music generation using MusicGen Small. Lower quality but fastest generation and minimal VRAM.",
|
||||||
|
"category": "text-to-music",
|
||||||
|
"tags": ["musicgen", "small", "t2m", "audio", "production", "fast"],
|
||||||
|
"requirements": {
|
||||||
|
"models": ["facebook/musicgen-small"],
|
||||||
|
"custom_nodes": ["comfyui-sound-lab"],
|
||||||
|
"vram_min": "4GB",
|
||||||
|
"vram_recommended": "6GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"prompt": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"required": true,
|
||||||
|
"default": "Upbeat electronic dance music",
|
||||||
|
"description": "Text description of desired music style, genre, mood, instruments"
|
||||||
|
},
|
||||||
|
"duration": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 1,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 30.0,
|
||||||
|
"min": 1.0,
|
||||||
|
"max": 60.0,
|
||||||
|
"description": "Duration in seconds (up to 30s recommended for best quality)"
|
||||||
|
},
|
||||||
|
"guidance_scale": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 2,
|
||||||
|
"type": "float",
|
||||||
|
"required": false,
|
||||||
|
"default": 3.0,
|
||||||
|
"min": 0.0,
|
||||||
|
"max": 20.0,
|
||||||
|
"description": "How closely to follow the prompt (3.0 recommended)"
|
||||||
|
},
|
||||||
|
"seed": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 3,
|
||||||
|
"type": "integer",
|
||||||
|
"required": false,
|
||||||
|
"default": 42,
|
||||||
|
"min": 0,
|
||||||
|
"max": 2147483647,
|
||||||
|
"description": "Random seed for reproducibility"
|
||||||
|
},
|
||||||
|
"device": {
|
||||||
|
"node_id": 1,
|
||||||
|
"widget_index": 4,
|
||||||
|
"type": "string",
|
||||||
|
"required": false,
|
||||||
|
"default": "auto",
|
||||||
|
"options": ["auto", "cpu"],
|
||||||
|
"description": "Computation device (auto recommended)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": {
|
||||||
|
"audio": {
|
||||||
|
"node_id": 2,
|
||||||
|
"type": "audio",
|
||||||
|
"format": "FLAC",
|
||||||
|
"sample_rate": "32000 Hz",
|
||||||
|
"channels": "stereo"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "20-35 seconds for 30s audio",
|
||||||
|
"vram_usage": "~4-6GB",
|
||||||
|
"gpu_utilization": "85-95%"
|
||||||
|
},
|
||||||
|
"use_cases": [
|
||||||
|
"Rapid prototyping",
|
||||||
|
"Low-resource environments",
|
||||||
|
"Quick background music drafts",
|
||||||
|
"Testing different prompts quickly"
|
||||||
|
],
|
||||||
|
"notes": [
|
||||||
|
"MusicGen Small is fastest but lowest quality",
|
||||||
|
"Good for rapid iteration and testing",
|
||||||
|
"Best results with clear, specific prompts",
|
||||||
|
"Can specify genre, mood, instruments, tempo",
|
||||||
|
"Output is 32kHz stereo FLAC format"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
BIN
comfyui/workflows/text-to-music/reference_audio.wav
Normal file
185
comfyui/workflows/upscaling/face-upscale-production-v1.json
Normal file
@@ -0,0 +1,185 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 5,
|
||||||
|
"last_link_id": 4,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "LoadImage",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": [350, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "MASK",
|
||||||
|
"type": "MASK",
|
||||||
|
"links": null,
|
||||||
|
"slot_index": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "LoadImage"
|
||||||
|
},
|
||||||
|
"widgets_values": ["input_portrait.png", "image"],
|
||||||
|
"title": "API Input Portrait"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "UpscaleModelLoader",
|
||||||
|
"pos": [50, 400],
|
||||||
|
"size": [350, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "UPSCALE_MODEL",
|
||||||
|
"type": "UPSCALE_MODEL",
|
||||||
|
"links": [2],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "UpscaleModelLoader"
|
||||||
|
},
|
||||||
|
"widgets_values": ["RealESRGAN_x2.pth"],
|
||||||
|
"title": "Face Upscale Model (2x)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "ImageUpscaleWithModel",
|
||||||
|
"pos": [450, 100],
|
||||||
|
"size": [315, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "upscale_model",
|
||||||
|
"type": "UPSCALE_MODEL",
|
||||||
|
"link": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "image",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [3, 4],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "ImageUpscaleWithModel"
|
||||||
|
},
|
||||||
|
"title": "Upscale Portrait (2x)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "PreviewImage",
|
||||||
|
"pos": [800, 100],
|
||||||
|
"size": [315, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "PreviewImage"
|
||||||
|
},
|
||||||
|
"title": "Preview Enhanced"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "SaveImage",
|
||||||
|
"pos": [800, 400],
|
||||||
|
"size": [315, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SaveImage"
|
||||||
|
},
|
||||||
|
"widgets_values": ["face_upscale_output"],
|
||||||
|
"title": "API Image Output"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 3, 1, "IMAGE"],
|
||||||
|
[2, 2, 0, 3, 0, "UPSCALE_MODEL"],
|
||||||
|
[3, 3, 0, 4, 0, "IMAGE"],
|
||||||
|
[4, 3, 0, 5, 0, "IMAGE"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "Face Upscale Production",
|
||||||
|
"version": "1.1.0",
|
||||||
|
"description": "Portrait-focused upscaling using RealESRGAN x2. Simplified workflow optimized for face enhancement. For advanced face detection with FaceDetailer, additional nodes are required.",
|
||||||
|
"category": "upscaling",
|
||||||
|
"tags": ["face-upscale", "portrait", "realesrgan", "production"],
|
||||||
|
"requirements": {
|
||||||
|
"models": ["RealESRGAN"],
|
||||||
|
"custom_nodes": [],
|
||||||
|
"vram_min": "8GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"input_image": {
|
||||||
|
"node_id": 1,
|
||||||
|
"type": "image",
|
||||||
|
"required": true,
|
||||||
|
"description": "Portrait image to upscale"
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"node_id": 2,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"default": "RealESRGAN_x2.pth",
|
||||||
|
"options": ["RealESRGAN_x2.pth", "RealESRGAN_x4.pth"],
|
||||||
|
"description": "Upscale model (x2 recommended for portraits)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "5-15 seconds",
|
||||||
|
"vram_usage": "~6-8GB"
|
||||||
|
},
|
||||||
|
"use_cases": [
|
||||||
|
"Portrait enhancement",
|
||||||
|
"Professional headshots",
|
||||||
|
"Face restoration",
|
||||||
|
"ID photo upscaling"
|
||||||
|
],
|
||||||
|
"notes": [
|
||||||
|
"This is a simplified version using basic upscaling",
|
||||||
|
"For advanced face detection, use FaceDetailer from Impact-Pack",
|
||||||
|
"FaceDetailer requires: BBOX detector, SDXL checkpoint, CLIP conditioning",
|
||||||
|
"Current version optimized for speed and simplicity"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
BIN
comfyui/workflows/upscaling/input_image.png
Normal file
|
After Width: | Height: | Size: 5.6 KiB |
BIN
comfyui/workflows/upscaling/input_portrait.png
Normal file
|
After Width: | Height: | Size: 6.4 KiB |
207
comfyui/workflows/upscaling/simple-upscale-production-v1.json
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 6,
|
||||||
|
"last_link_id": 5,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "LoadImage",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": [350, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "MASK",
|
||||||
|
"type": "MASK",
|
||||||
|
"links": null,
|
||||||
|
"slot_index": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "LoadImage"
|
||||||
|
},
|
||||||
|
"widgets_values": ["input_image.png", "image"],
|
||||||
|
"title": "API Input Image"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "UpscaleModelLoader",
|
||||||
|
"pos": [50, 400],
|
||||||
|
"size": [350, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "UPSCALE_MODEL",
|
||||||
|
"type": "UPSCALE_MODEL",
|
||||||
|
"links": [2],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "UpscaleModelLoader"
|
||||||
|
},
|
||||||
|
"widgets_values": ["RealESRGAN_x4.pth"],
|
||||||
|
"title": "Upscale Model Loader"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "ImageUpscaleWithModel",
|
||||||
|
"pos": [450, 100],
|
||||||
|
"size": [315, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "upscale_model",
|
||||||
|
"type": "UPSCALE_MODEL",
|
||||||
|
"link": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "image",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [3],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "ImageUpscaleWithModel"
|
||||||
|
},
|
||||||
|
"title": "Upscale with Model (4x)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "ImageScaleBy",
|
||||||
|
"pos": [800, 100],
|
||||||
|
"size": [315, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "image",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [4, 5],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "ImageScaleBy"
|
||||||
|
},
|
||||||
|
"widgets_values": ["lanczos", 0.5],
|
||||||
|
"title": "Optional Downscale to 2x"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "PreviewImage",
|
||||||
|
"pos": [1150, 100],
|
||||||
|
"size": [315, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "PreviewImage"
|
||||||
|
},
|
||||||
|
"title": "Preview Output"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "SaveImage",
|
||||||
|
"pos": [1150, 400],
|
||||||
|
"size": [315, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 5
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SaveImage"
|
||||||
|
},
|
||||||
|
"widgets_values": ["simple_upscale_output"],
|
||||||
|
"title": "API Image Output"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 3, 1, "IMAGE"],
|
||||||
|
[2, 2, 0, 3, 0, "UPSCALE_MODEL"],
|
||||||
|
[3, 3, 0, 4, 0, "IMAGE"],
|
||||||
|
[4, 4, 0, 5, 0, "IMAGE"],
|
||||||
|
[5, 4, 0, 6, 0, "IMAGE"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "Simple Upscale Production",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "Fast, straightforward upscaling using RealESRGAN. No diffusion refinement, optimized for speed.",
|
||||||
|
"category": "upscaling",
|
||||||
|
"tags": ["simple-upscale", "fast", "realesrgan", "production"],
|
||||||
|
"requirements": {
|
||||||
|
"models": ["RealESRGAN"],
|
||||||
|
"custom_nodes": [],
|
||||||
|
"vram_min": "8GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"input_image": {
|
||||||
|
"node_id": 1,
|
||||||
|
"type": "image",
|
||||||
|
"required": true
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"node_id": 2,
|
||||||
|
"type": "string",
|
||||||
|
"default": "RealESRGAN_x4.pth",
|
||||||
|
"options": ["RealESRGAN_x2.pth", "RealESRGAN_x4.pth"]
|
||||||
|
},
|
||||||
|
"final_scale": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 1,
|
||||||
|
"type": "float",
|
||||||
|
"default": 0.5,
|
||||||
|
"description": "Scale factor after 4x upscale (0.5 = final 2x, 1.0 = keep 4x)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "5-15 seconds",
|
||||||
|
"vram_usage": "~6-8GB"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
@@ -0,0 +1,335 @@
|
|||||||
|
{
|
||||||
|
"last_node_id": 8,
|
||||||
|
"last_link_id": 10,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "LoadImage",
|
||||||
|
"pos": [50, 100],
|
||||||
|
"size": [350, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 0,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [1],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "MASK",
|
||||||
|
"type": "MASK",
|
||||||
|
"links": null,
|
||||||
|
"slot_index": 1
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "LoadImage"
|
||||||
|
},
|
||||||
|
"widgets_values": ["input_image.png", "image"],
|
||||||
|
"title": "API Input Image"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "CheckpointLoaderSimple",
|
||||||
|
"pos": [50, 250],
|
||||||
|
"size": [350, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 1,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "MODEL",
|
||||||
|
"type": "MODEL",
|
||||||
|
"links": [2],
|
||||||
|
"slot_index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CLIP",
|
||||||
|
"type": "CLIP",
|
||||||
|
"links": [3, 4],
|
||||||
|
"slot_index": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "VAE",
|
||||||
|
"type": "VAE",
|
||||||
|
"links": [5],
|
||||||
|
"slot_index": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CheckpointLoaderSimple"
|
||||||
|
},
|
||||||
|
"widgets_values": ["sd_xl_base_1.0.safetensors"],
|
||||||
|
"title": "SDXL Checkpoint Loader"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [450, 100],
|
||||||
|
"size": [400, 200],
|
||||||
|
"flags": {},
|
||||||
|
"order": 2,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [6],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": ["high quality, detailed, sharp"],
|
||||||
|
"title": "API Positive Prompt"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "CLIPTextEncode",
|
||||||
|
"pos": [450, 350],
|
||||||
|
"size": [400, 200],
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "clip",
|
||||||
|
"type": "CLIP",
|
||||||
|
"link": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "CONDITIONING",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"links": [7],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "CLIPTextEncode"
|
||||||
|
},
|
||||||
|
"widgets_values": ["blurry, low quality"],
|
||||||
|
"title": "API Negative Prompt"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"type": "UpscaleModelLoader",
|
||||||
|
"pos": [50, 400],
|
||||||
|
"size": [350, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 4,
|
||||||
|
"mode": 0,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "UPSCALE_MODEL",
|
||||||
|
"type": "UPSCALE_MODEL",
|
||||||
|
"links": [10],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "UpscaleModelLoader"
|
||||||
|
},
|
||||||
|
"widgets_values": ["RealESRGAN_x2.pth"],
|
||||||
|
"title": "Upscale Model Loader"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"type": "UltimateSDUpscale",
|
||||||
|
"pos": [900, 100],
|
||||||
|
"size": [315, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 5,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "image",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "model",
|
||||||
|
"type": "MODEL",
|
||||||
|
"link": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "positive",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "negative",
|
||||||
|
"type": "CONDITIONING",
|
||||||
|
"link": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "vae",
|
||||||
|
"type": "VAE",
|
||||||
|
"link": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "upscale_model",
|
||||||
|
"type": "UPSCALE_MODEL",
|
||||||
|
"link": 10
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "IMAGE",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"links": [8, 9],
|
||||||
|
"slot_index": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "UltimateSDUpscale"
|
||||||
|
},
|
||||||
|
"widgets_values": [2, 42, "randomize", 20, 8.0, "dpmpp_2m", "karras", 0.3, "Linear", 512, 512, 8, 32, "None", 1.0, 64, 8, 16, true, false],
|
||||||
|
"title": "Ultimate SD Upscale (2x)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"type": "PreviewImage",
|
||||||
|
"pos": [1270, 100],
|
||||||
|
"size": [315, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 6,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 8
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "PreviewImage"
|
||||||
|
},
|
||||||
|
"title": "Preview Output"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"type": "SaveImage",
|
||||||
|
"pos": [1270, 400],
|
||||||
|
"size": [315, 100],
|
||||||
|
"flags": {},
|
||||||
|
"order": 7,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "images",
|
||||||
|
"type": "IMAGE",
|
||||||
|
"link": 9
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"Node name for S&R": "SaveImage"
|
||||||
|
},
|
||||||
|
"widgets_values": ["ultimate_upscale_output"],
|
||||||
|
"title": "API Image Output"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"links": [
|
||||||
|
[1, 1, 0, 5, 0, "IMAGE"],
|
||||||
|
[2, 2, 0, 5, 1, "MODEL"],
|
||||||
|
[3, 2, 1, 3, 0, "CLIP"],
|
||||||
|
[4, 2, 1, 4, 0, "CLIP"],
|
||||||
|
[5, 2, 2, 5, 4, "VAE"],
|
||||||
|
[6, 3, 0, 5, 2, "CONDITIONING"],
|
||||||
|
[7, 4, 0, 5, 3, "CONDITIONING"],
|
||||||
|
[8, 5, 0, 6, 0, "IMAGE"],
|
||||||
|
[9, 5, 0, 7, 0, "IMAGE"],
|
||||||
|
[10, 8, 0, 5, 5, "UPSCALE_MODEL"]
|
||||||
|
],
|
||||||
|
"groups": [],
|
||||||
|
"config": {},
|
||||||
|
"extra": {
|
||||||
|
"workflow_info": {
|
||||||
|
"name": "Ultimate SD Upscale Production",
|
||||||
|
"version": "1.1.0",
|
||||||
|
"description": "Professional upscaling with Ultimate SD Upscale. Combines AI upscaling with diffusion refinement for superior detail and quality.",
|
||||||
|
"category": "upscaling",
|
||||||
|
"tags": ["ultimate-sd-upscale", "upscaling", "enhancement", "production"],
|
||||||
|
"requirements": {
|
||||||
|
"models": ["stable-diffusion-xl-base-1.0", "RealESRGAN"],
|
||||||
|
"custom_nodes": ["ComfyUI_UltimateSDUpscale"],
|
||||||
|
"vram_min": "18GB"
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"input_image": {
|
||||||
|
"node_id": 1,
|
||||||
|
"type": "image",
|
||||||
|
"required": true
|
||||||
|
},
|
||||||
|
"positive_prompt": {
|
||||||
|
"node_id": 3,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"default": "high quality, detailed, sharp",
|
||||||
|
"description": "Enhancement prompt"
|
||||||
|
},
|
||||||
|
"negative_prompt": {
|
||||||
|
"node_id": 4,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"default": "blurry, low quality",
|
||||||
|
"description": "Qualities to avoid"
|
||||||
|
},
|
||||||
|
"upscale_model": {
|
||||||
|
"node_id": 8,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "string",
|
||||||
|
"default": "RealESRGAN_x2.pth",
|
||||||
|
"options": ["RealESRGAN_x2.pth", "RealESRGAN_x4.pth"],
|
||||||
|
"description": "Upscale model to use"
|
||||||
|
},
|
||||||
|
"upscale_by": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 0,
|
||||||
|
"type": "float",
|
||||||
|
"default": 2,
|
||||||
|
"min": 0.05,
|
||||||
|
"max": 4,
|
||||||
|
"description": "Upscale factor"
|
||||||
|
},
|
||||||
|
"denoise": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 6,
|
||||||
|
"type": "float",
|
||||||
|
"default": 0.3,
|
||||||
|
"description": "Refinement strength"
|
||||||
|
},
|
||||||
|
"tile_width": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 8,
|
||||||
|
"type": "integer",
|
||||||
|
"default": 512,
|
||||||
|
"description": "Tile width for processing"
|
||||||
|
},
|
||||||
|
"tile_height": {
|
||||||
|
"node_id": 5,
|
||||||
|
"widget_index": 9,
|
||||||
|
"type": "integer",
|
||||||
|
"default": 512,
|
||||||
|
"description": "Tile height for processing"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"performance": {
|
||||||
|
"avg_generation_time": "60-120 seconds (depending on input size)",
|
||||||
|
"vram_usage": "~16-20GB"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"version": 0.4
|
||||||
|
}
|
||||||
104
compose.yaml
@@ -1,104 +0,0 @@
|
|||||||
version: '3.8'
|
|
||||||
|
|
||||||
# Multi-Modal AI Orchestration for RunPod RTX 4090
|
|
||||||
# Manages text, image, and music generation with sequential model loading
|
|
||||||
|
|
||||||
services:
|
|
||||||
# ============================================================================
|
|
||||||
# ORCHESTRATOR (Always Running)
|
|
||||||
# ============================================================================
|
|
||||||
orchestrator:
|
|
||||||
build: ./model-orchestrator
|
|
||||||
container_name: ai_orchestrator
|
|
||||||
ports:
|
|
||||||
- "9000:9000"
|
|
||||||
volumes:
|
|
||||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
|
||||||
- ./model-orchestrator/models.yaml:/app/models.yaml:ro
|
|
||||||
environment:
|
|
||||||
- MODELS_CONFIG=/app/models.yaml
|
|
||||||
- COMPOSE_PROJECT_NAME=ai
|
|
||||||
- GPU_MEMORY_GB=24
|
|
||||||
restart: unless-stopped
|
|
||||||
network_mode: host
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# TEXT GENERATION (vLLM + Qwen 2.5 7B)
|
|
||||||
# ============================================================================
|
|
||||||
vllm-qwen:
|
|
||||||
build: ./vllm
|
|
||||||
container_name: ai_vllm-qwen_1
|
|
||||||
ports:
|
|
||||||
- "8001:8000"
|
|
||||||
volumes:
|
|
||||||
- /workspace/huggingface_cache:/workspace/huggingface_cache
|
|
||||||
environment:
|
|
||||||
- HF_TOKEN=${HF_TOKEN}
|
|
||||||
- VLLM_HOST=0.0.0.0
|
|
||||||
- VLLM_PORT=8000
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices:
|
|
||||||
- driver: nvidia
|
|
||||||
count: 1
|
|
||||||
capabilities: [gpu]
|
|
||||||
profiles: ["text"] # Only start when requested by orchestrator
|
|
||||||
restart: "no" # Orchestrator manages lifecycle
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# IMAGE GENERATION (Flux.1 Schnell)
|
|
||||||
# ============================================================================
|
|
||||||
flux:
|
|
||||||
image: ghcr.io/matatonic/openedai-images-flux:latest
|
|
||||||
container_name: ai_flux_1
|
|
||||||
ports:
|
|
||||||
- "8002:5005"
|
|
||||||
volumes:
|
|
||||||
- /workspace/flux/models:/app/models
|
|
||||||
- ./flux/config:/app/config:ro
|
|
||||||
environment:
|
|
||||||
- HF_TOKEN=${HF_TOKEN}
|
|
||||||
- CONFIG_PATH=/app/config/config.json
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices:
|
|
||||||
- driver: nvidia
|
|
||||||
count: 1
|
|
||||||
capabilities: [gpu]
|
|
||||||
profiles: ["image"] # Only start when requested by orchestrator
|
|
||||||
restart: "no" # Orchestrator manages lifecycle
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# MUSIC GENERATION (MusicGen Medium)
|
|
||||||
# ============================================================================
|
|
||||||
musicgen:
|
|
||||||
build: ./musicgen
|
|
||||||
container_name: ai_musicgen_1
|
|
||||||
ports:
|
|
||||||
- "8003:8000"
|
|
||||||
volumes:
|
|
||||||
- /workspace/musicgen/models:/app/models
|
|
||||||
environment:
|
|
||||||
- HF_TOKEN=${HF_TOKEN}
|
|
||||||
- MODEL_NAME=facebook/musicgen-medium
|
|
||||||
- HOST=0.0.0.0
|
|
||||||
- PORT=8000
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices:
|
|
||||||
- driver: nvidia
|
|
||||||
count: 1
|
|
||||||
capabilities: [gpu]
|
|
||||||
profiles: ["audio"] # Only start when requested by orchestrator
|
|
||||||
restart: "no" # Orchestrator manages lifecycle
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# VOLUMES
|
|
||||||
# ============================================================================
|
|
||||||
# Model caches are stored on RunPod's /workspace directory (922TB network volume)
|
|
||||||
# This persists across pod restarts and reduces model download times
|
|
||||||
|
|
||||||
# No named volumes - using host paths on RunPod /workspace
|
|
||||||
@@ -1,467 +0,0 @@
|
|||||||
# Multi-Modal AI Orchestration System
|
|
||||||
|
|
||||||
**Cost-optimized AI infrastructure running text, image, and music generation on a single RunPod RTX 4090 GPU.**
|
|
||||||
|
|
||||||
## Architecture Overview
|
|
||||||
|
|
||||||
This system provides a unified API for multiple AI model types with automatic model switching on a single GPU (24GB VRAM). All requests route through an intelligent orchestrator that manages model lifecycle.
|
|
||||||
|
|
||||||
### Components
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────────────────────────────┐
|
|
||||||
│ VPS (Tailscale: 100.102.217.79) │
|
|
||||||
│ ┌───────────────────────────────────────────────────────────┐ │
|
|
||||||
│ │ LiteLLM Proxy (Port 4000) │ │
|
|
||||||
│ │ Routes to: Claude API + GPU Orchestrator │ │
|
|
||||||
│ └────────────────────┬──────────────────────────────────────┘ │
|
|
||||||
└───────────────────────┼─────────────────────────────────────────┘
|
|
||||||
│ Tailscale VPN
|
|
||||||
┌───────────────────────┼─────────────────────────────────────────┐
|
|
||||||
│ RunPod GPU Server (Tailscale: 100.100.108.13) │
|
|
||||||
│ ┌────────────────────▼──────────────────────────────────────┐ │
|
|
||||||
│ │ Orchestrator (Port 9000) │ │
|
|
||||||
│ │ Manages sequential model loading based on request type │ │
|
|
||||||
│ └─────┬──────────────┬──────────────────┬──────────────────┘ │
|
|
||||||
│ │ │ │ │
|
|
||||||
│ ┌─────▼──────┐ ┌────▼────────┐ ┌──────▼───────┐ │
|
|
||||||
│ │vLLM │ │Flux.1 │ │MusicGen │ │
|
|
||||||
│ │Qwen 2.5 7B │ │Schnell │ │Medium │ │
|
|
||||||
│ │Port: 8001 │ │Port: 8002 │ │Port: 8003 │ │
|
|
||||||
│ │VRAM: 14GB │ │VRAM: 14GB │ │VRAM: 11GB │ │
|
|
||||||
│ └────────────┘ └─────────────┘ └──────────────┘ │
|
|
||||||
│ │
|
|
||||||
│ Only ONE model active at a time (sequential loading) │
|
|
||||||
└─────────────────────────────────────────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
### Features
|
|
||||||
|
|
||||||
✅ **Automatic Model Switching** - Orchestrator detects request type and loads appropriate model
|
|
||||||
✅ **OpenAI-Compatible APIs** - Works with existing OpenAI clients and tools
|
|
||||||
✅ **Cost-Optimized** - Sequential loading on single GPU (~$0.50/hr vs ~$0.75/hr for multi-GPU)
|
|
||||||
✅ **Easy Model Addition** - Add new models by editing YAML config
|
|
||||||
✅ **Centralized Routing** - LiteLLM proxy provides unified API for all models
|
|
||||||
✅ **GPU Memory Safe** - Orchestrator ensures only one model loaded at a time
|
|
||||||
|
|
||||||
## Supported Model Types
|
|
||||||
|
|
||||||
### Text Generation
|
|
||||||
- **Qwen 2.5 7B Instruct** (facebook/Qwen2.5-7B-Instruct)
|
|
||||||
- VRAM: 14GB | Speed: Fast | OpenAI-compatible chat API
|
|
||||||
|
|
||||||
### Image Generation
|
|
||||||
- **Flux.1 Schnell** (black-forest-labs/FLUX.1-schnell)
|
|
||||||
- VRAM: 14GB | Speed: 4-5 sec/image | OpenAI DALL-E compatible API
|
|
||||||
|
|
||||||
### Music Generation
|
|
||||||
- **MusicGen Medium** (facebook/musicgen-medium)
|
|
||||||
- VRAM: 11GB | Speed: 60-90 sec for 30s audio | Custom audio API
|
|
||||||
|
|
||||||
## Quick Start
|
|
||||||
|
|
||||||
### 1. Prerequisites
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# On RunPod GPU server
|
|
||||||
- RunPod RTX 4090 instance (24GB VRAM)
|
|
||||||
- Docker & Docker Compose installed
|
|
||||||
- Tailscale VPN configured
|
|
||||||
- HuggingFace token (for model downloads)
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. Clone & Configure
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# On local machine
|
|
||||||
cd ai/
|
|
||||||
|
|
||||||
# Create environment file
|
|
||||||
cp .env.example .env
|
|
||||||
# Edit .env and add your HF_TOKEN
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3. Deploy to RunPod
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Copy all files to RunPod GPU server
|
|
||||||
scp -r ai/* gpu-pivoine:/workspace/ai/
|
|
||||||
|
|
||||||
# SSH to GPU server
|
|
||||||
ssh gpu-pivoine
|
|
||||||
|
|
||||||
# Navigate to project
|
|
||||||
cd /workspace/ai/
|
|
||||||
|
|
||||||
# Start orchestrator (always running)
|
|
||||||
docker compose -f compose.yaml up -d orchestrator
|
|
||||||
|
|
||||||
# Orchestrator will automatically manage model services as needed
|
|
||||||
```
|
|
||||||
|
|
||||||
### 4. Test Deployment
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Check orchestrator health
|
|
||||||
curl http://100.100.108.13:9000/health
|
|
||||||
|
|
||||||
# Test text generation (auto-loads vLLM)
|
|
||||||
curl http://100.100.108.13:9000/v1/chat/completions \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"model": "qwen-2.5-7b",
|
|
||||||
"messages": [{"role": "user", "content": "Hello!"}]
|
|
||||||
}'
|
|
||||||
|
|
||||||
# Test image generation (auto-switches to Flux)
|
|
||||||
curl http://100.100.108.13:9000/v1/images/generations \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"model": "flux-schnell",
|
|
||||||
"prompt": "a cute cat",
|
|
||||||
"size": "1024x1024"
|
|
||||||
}'
|
|
||||||
|
|
||||||
# Test music generation (auto-switches to MusicGen)
|
|
||||||
curl http://100.100.108.13:9000/v1/audio/generations \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"model": "musicgen-medium",
|
|
||||||
"prompt": "upbeat electronic dance music",
|
|
||||||
"duration": 30
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
### 5. Update VPS LiteLLM
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# On VPS, restart LiteLLM to pick up new config
|
|
||||||
ssh vps
|
|
||||||
cd ~/Projects/docker-compose
|
|
||||||
arty restart litellm
|
|
||||||
```
|
|
||||||
|
|
||||||
## Usage Examples
|
|
||||||
|
|
||||||
### Via Open WebUI (https://ai.pivoine.art)
|
|
||||||
|
|
||||||
**Text Generation:**
|
|
||||||
1. Select model: `qwen-2.5-7b`
|
|
||||||
2. Type message and send
|
|
||||||
3. Orchestrator loads vLLM automatically
|
|
||||||
|
|
||||||
**Image Generation:**
|
|
||||||
1. Select model: `flux-schnell`
|
|
||||||
2. Enter image prompt
|
|
||||||
3. Orchestrator switches to Flux.1
|
|
||||||
|
|
||||||
**Music Generation:**
|
|
||||||
1. Select model: `musicgen-medium`
|
|
||||||
2. Describe the music you want
|
|
||||||
3. Orchestrator switches to MusicGen
|
|
||||||
|
|
||||||
### Via API (Direct)
|
|
||||||
|
|
||||||
```python
|
|
||||||
import openai
|
|
||||||
|
|
||||||
# Configure client to use orchestrator
|
|
||||||
client = openai.OpenAI(
|
|
||||||
base_url="http://100.100.108.13:9000/v1",
|
|
||||||
api_key="dummy" # Not used but required
|
|
||||||
)
|
|
||||||
|
|
||||||
# Text generation
|
|
||||||
response = client.chat.completions.create(
|
|
||||||
model="qwen-2.5-7b",
|
|
||||||
messages=[{"role": "user", "content": "Write a haiku"}]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Image generation
|
|
||||||
image = client.images.generate(
|
|
||||||
model="flux-schnell",
|
|
||||||
prompt="a sunset over mountains",
|
|
||||||
size="1024x1024"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Music generation (custom endpoint)
|
|
||||||
import requests
|
|
||||||
music = requests.post(
|
|
||||||
"http://100.100.108.13:9000/v1/audio/generations",
|
|
||||||
json={
|
|
||||||
"model": "musicgen-medium",
|
|
||||||
"prompt": "calm piano music",
|
|
||||||
"duration": 30
|
|
||||||
}
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Adding New Models
|
|
||||||
|
|
||||||
### Step 1: Update `models.yaml`
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
# Add to ai/model-orchestrator/models.yaml
|
|
||||||
models:
|
|
||||||
llama-3.1-8b: # New model
|
|
||||||
type: text
|
|
||||||
framework: vllm
|
|
||||||
docker_service: vllm-llama
|
|
||||||
port: 8004
|
|
||||||
vram_gb: 17
|
|
||||||
startup_time_seconds: 120
|
|
||||||
endpoint: /v1/chat/completions
|
|
||||||
description: "Llama 3.1 8B Instruct - Meta's latest model"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 2: Add Docker Service
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
# Add to ai/compose.yaml
|
|
||||||
services:
|
|
||||||
vllm-llama:
|
|
||||||
build: ./vllm
|
|
||||||
container_name: ai_vllm-llama_1
|
|
||||||
command: >
|
|
||||||
vllm serve meta-llama/Llama-3.1-8B-Instruct
|
|
||||||
--port 8000 --dtype bfloat16
|
|
||||||
ports:
|
|
||||||
- "8004:8000"
|
|
||||||
environment:
|
|
||||||
- HF_TOKEN=${HF_TOKEN}
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices:
|
|
||||||
- driver: nvidia
|
|
||||||
count: 1
|
|
||||||
capabilities: [gpu]
|
|
||||||
profiles: ["text"]
|
|
||||||
restart: "no"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 3: Restart Orchestrator
|
|
||||||
|
|
||||||
```bash
|
|
||||||
ssh gpu-pivoine
|
|
||||||
cd /workspace/ai/
|
|
||||||
docker compose -f compose.yaml restart orchestrator
|
|
||||||
```
|
|
||||||
|
|
||||||
**That's it!** The orchestrator automatically detects the new model.
|
|
||||||
|
|
||||||
## Management Commands
|
|
||||||
|
|
||||||
### Orchestrator
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Start orchestrator
|
|
||||||
docker compose -f compose.yaml up -d orchestrator
|
|
||||||
|
|
||||||
# View orchestrator logs
|
|
||||||
docker logs -f ai_orchestrator
|
|
||||||
|
|
||||||
# Restart orchestrator
|
|
||||||
docker compose -f compose.yaml restart orchestrator
|
|
||||||
|
|
||||||
# Check active model
|
|
||||||
curl http://100.100.108.13:9000/health
|
|
||||||
|
|
||||||
# List all models
|
|
||||||
curl http://100.100.108.13:9000/models
|
|
||||||
```
|
|
||||||
|
|
||||||
### Manual Model Control
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Manually switch to specific model
|
|
||||||
curl -X POST http://100.100.108.13:9000/switch \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{"model": "flux-schnell"}'
|
|
||||||
|
|
||||||
# Check which model is running
|
|
||||||
curl http://100.100.108.13:9000/health | jq '.current_model'
|
|
||||||
```
|
|
||||||
|
|
||||||
### Model Services
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Manually start a specific model (bypassing orchestrator)
|
|
||||||
docker compose -f compose.yaml --profile text up -d vllm-qwen
|
|
||||||
|
|
||||||
# Stop a model
|
|
||||||
docker compose -f compose.yaml stop vllm-qwen
|
|
||||||
|
|
||||||
# View model logs
|
|
||||||
docker logs -f ai_vllm-qwen_1
|
|
||||||
docker logs -f ai_flux_1
|
|
||||||
docker logs -f ai_musicgen_1
|
|
||||||
```
|
|
||||||
|
|
||||||
## Monitoring
|
|
||||||
|
|
||||||
### GPU Usage
|
|
||||||
|
|
||||||
```bash
|
|
||||||
ssh gpu-pivoine "nvidia-smi"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Model Status
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Which model is active?
|
|
||||||
curl http://100.100.108.13:9000/health
|
|
||||||
|
|
||||||
# Model memory usage
|
|
||||||
curl http://100.100.108.13:9000/health | jq '{current: .current_model, vram: .model_info.vram_gb}'
|
|
||||||
```
|
|
||||||
|
|
||||||
### Performance
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Orchestrator logs (model switching)
|
|
||||||
docker logs -f ai_orchestrator
|
|
||||||
|
|
||||||
# Model-specific logs
|
|
||||||
docker logs -f ai_vllm-qwen_1
|
|
||||||
docker logs -f ai_flux_1
|
|
||||||
docker logs -f ai_musicgen_1
|
|
||||||
```
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
### Model Won't Load
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Check orchestrator logs
|
|
||||||
docker logs ai_orchestrator
|
|
||||||
|
|
||||||
# Check if model service exists
|
|
||||||
docker compose -f compose.yaml config | grep -A 10 "vllm-qwen"
|
|
||||||
|
|
||||||
# Manually test model service
|
|
||||||
docker compose -f compose.yaml --profile text up -d vllm-qwen
|
|
||||||
curl http://localhost:8001/health
|
|
||||||
```
|
|
||||||
|
|
||||||
### Orchestrator Can't Connect
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Check Docker socket permissions
|
|
||||||
ls -l /var/run/docker.sock
|
|
||||||
|
|
||||||
# Restart Docker daemon
|
|
||||||
sudo systemctl restart docker
|
|
||||||
|
|
||||||
# Rebuild orchestrator
|
|
||||||
docker compose -f compose.yaml build orchestrator
|
|
||||||
docker compose -f compose.yaml up -d orchestrator
|
|
||||||
```
|
|
||||||
|
|
||||||
### Model Switching Too Slow
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Check model startup times in models.yaml
|
|
||||||
# Adjust startup_time_seconds if needed
|
|
||||||
|
|
||||||
# Pre-download models to /workspace cache
|
|
||||||
docker run --rm -it --gpus all \
|
|
||||||
-v /workspace/huggingface_cache:/cache \
|
|
||||||
-e HF_HOME=/cache \
|
|
||||||
nvidia/cuda:12.4.0-runtime-ubuntu22.04 \
|
|
||||||
huggingface-cli download facebook/musicgen-medium
|
|
||||||
```
|
|
||||||
|
|
||||||
## File Structure
|
|
||||||
|
|
||||||
```
|
|
||||||
ai/
|
|
||||||
├── compose.yaml # Main orchestration file
|
|
||||||
├── .env.example # Environment template
|
|
||||||
├── README.md # This file
|
|
||||||
│
|
|
||||||
├── model-orchestrator/ # Central orchestrator service
|
|
||||||
│ ├── orchestrator.py # FastAPI app managing models
|
|
||||||
│ ├── models.yaml # Model registry (EDIT TO ADD MODELS)
|
|
||||||
│ ├── Dockerfile
|
|
||||||
│ └── requirements.txt
|
|
||||||
│
|
|
||||||
├── vllm/ # Text generation (vLLM)
|
|
||||||
│ ├── server.py # Qwen 2.5 7B server
|
|
||||||
│ ├── Dockerfile
|
|
||||||
│ └── requirements.txt
|
|
||||||
│
|
|
||||||
├── flux/ # Image generation (Flux.1 Schnell)
|
|
||||||
│ └── config/
|
|
||||||
│ └── config.json # Flux configuration
|
|
||||||
│
|
|
||||||
├── musicgen/ # Music generation (MusicGen)
|
|
||||||
│ ├── server.py # MusicGen API server
|
|
||||||
│ ├── Dockerfile
|
|
||||||
│ └── requirements.txt
|
|
||||||
│
|
|
||||||
├── litellm-config.yaml # LiteLLM proxy configuration
|
|
||||||
└── GPU_DEPLOYMENT_LOG.md # Deployment history and notes
|
|
||||||
```
|
|
||||||
|
|
||||||
## Cost Analysis
|
|
||||||
|
|
||||||
### Current Setup (Single GPU)
|
|
||||||
- **Provider**: RunPod Spot Instance
|
|
||||||
- **GPU**: RTX 4090 24GB
|
|
||||||
- **Cost**: ~$0.50/hour
|
|
||||||
- **Monthly**: ~$360 (if running 24/7)
|
|
||||||
- **Optimized**: ~$120 (8 hours/day during business hours)
|
|
||||||
|
|
||||||
### Alternative: Multi-GPU (All Models Always On)
|
|
||||||
- **GPUs**: 2× RTX 4090
|
|
||||||
- **Cost**: ~$0.75/hour
|
|
||||||
- **Monthly**: ~$540 (if running 24/7)
|
|
||||||
- **Trade-off**: No switching latency, +$180/month
|
|
||||||
|
|
||||||
### Recommendation
|
|
||||||
Stick with single GPU sequential loading for cost optimization. Model switching (30-120 seconds) is acceptable for most use cases.
|
|
||||||
|
|
||||||
## Performance Expectations
|
|
||||||
|
|
||||||
| Model | VRAM | Startup Time | Generation Speed |
|
|
||||||
|-------|------|--------------|------------------|
|
|
||||||
| Qwen 2.5 7B | 14GB | 120s | ~50 tokens/sec |
|
|
||||||
| Flux.1 Schnell | 14GB | 60s | ~4-5 sec/image |
|
|
||||||
| MusicGen Medium | 11GB | 45s | ~60-90 sec for 30s audio |
|
|
||||||
|
|
||||||
**Model Switching**: 30-120 seconds (unload current + load new)
|
|
||||||
|
|
||||||
## Security Notes
|
|
||||||
|
|
||||||
- Orchestrator requires Docker socket access (`/var/run/docker.sock`)
|
|
||||||
- All services run on private Tailscale network
|
|
||||||
- No public exposure (only via VPS LiteLLM proxy)
|
|
||||||
- HuggingFace token stored in `.env` (not committed to git)
|
|
||||||
|
|
||||||
## Future Enhancements
|
|
||||||
|
|
||||||
1. ⏹️ Add Llama 3.1 8B for alternative text generation
|
|
||||||
2. ⏹️ Add Whisper Large v3 for speech-to-text
|
|
||||||
3. ⏹️ Add XTTS v2 for text-to-speech
|
|
||||||
4. ⏹️ Implement model preloading/caching for faster switching
|
|
||||||
5. ⏹️ Add usage metrics and cost tracking
|
|
||||||
6. ⏹️ Auto-stop GPU pod during idle periods
|
|
||||||
|
|
||||||
## Support
|
|
||||||
|
|
||||||
For issues or questions:
|
|
||||||
- Check orchestrator logs: `docker logs ai_orchestrator`
|
|
||||||
- View model-specific logs: `docker logs ai_<service>_1`
|
|
||||||
- Test direct model access: `curl http://localhost:<port>/health`
|
|
||||||
- Review GPU deployment log: `GPU_DEPLOYMENT_LOG.md`
|
|
||||||
|
|
||||||
## License
|
|
||||||
|
|
||||||
Built with:
|
|
||||||
- [vLLM](https://github.com/vllm-project/vllm) - Apache 2.0
|
|
||||||
- [AudioCraft](https://github.com/facebookresearch/audiocraft) - MIT (code), CC-BY-NC (weights)
|
|
||||||
- [Flux.1](https://github.com/black-forest-labs/flux) - Apache 2.0
|
|
||||||
- [LiteLLM](https://github.com/BerriAI/litellm) - MIT
|
|
||||||
|
|
||||||
**Note**: MusicGen pre-trained weights are non-commercial (CC-BY-NC). Train your own models for commercial use with the MIT-licensed code.
|
|
||||||
@@ -1,421 +0,0 @@
|
|||||||
# GPU Server Deployment Log
|
|
||||||
|
|
||||||
## Current Deployment (2025-11-21)
|
|
||||||
|
|
||||||
### Infrastructure
|
|
||||||
- **Provider**: RunPod (Spot Instance)
|
|
||||||
- **GPU**: NVIDIA RTX 4090 24GB
|
|
||||||
- **Disk**: 50GB local SSD (expanded from 20GB)
|
|
||||||
- **Network Volume**: 922TB at `/workspace`
|
|
||||||
- **Region**: Europe
|
|
||||||
- **Cost**: ~$0.50/hour (~$360/month if running 24/7)
|
|
||||||
|
|
||||||
### Network Configuration
|
|
||||||
- **VPN**: Tailscale (replaces WireGuard due to RunPod UDP restrictions)
|
|
||||||
- **GPU Server Tailscale IP**: 100.100.108.13
|
|
||||||
- **VPS Tailscale IP**: (get with `tailscale ip -4` on VPS)
|
|
||||||
|
|
||||||
### SSH Access
|
|
||||||
```
|
|
||||||
Host gpu-pivoine
|
|
||||||
HostName 213.173.102.232
|
|
||||||
Port 29695
|
|
||||||
User root
|
|
||||||
IdentityFile ~/.ssh/id_ed25519
|
|
||||||
```
|
|
||||||
|
|
||||||
**Note**: RunPod Spot instances can be terminated and restarted with new ports/IPs. Update SSH config accordingly.
|
|
||||||
|
|
||||||
### Software Stack
|
|
||||||
- **Python**: 3.11.10
|
|
||||||
- **vLLM**: 0.6.4.post1 (installed with pip)
|
|
||||||
- **PyTorch**: 2.5.1 with CUDA 12.4
|
|
||||||
- **Tailscale**: Installed via official script
|
|
||||||
|
|
||||||
### vLLM Deployment
|
|
||||||
|
|
||||||
**Custom Server**: `ai/simple_vllm_server.py`
|
|
||||||
- Uses `AsyncLLMEngine` directly to bypass multiprocessing issues
|
|
||||||
- OpenAI-compatible API endpoints:
|
|
||||||
- `GET /v1/models` - List available models
|
|
||||||
- `POST /v1/completions` - Text completion
|
|
||||||
- `POST /v1/chat/completions` - Chat completion
|
|
||||||
- Default model: Qwen/Qwen2.5-7B-Instruct
|
|
||||||
- Cache directory: `/workspace/huggingface_cache`
|
|
||||||
|
|
||||||
**Deployment Command**:
|
|
||||||
```bash
|
|
||||||
# Copy server script to GPU server
|
|
||||||
scp ai/simple_vllm_server.py gpu-pivoine:/workspace/
|
|
||||||
|
|
||||||
# Start server
|
|
||||||
ssh gpu-pivoine "cd /workspace && nohup python3 simple_vllm_server.py > vllm.log 2>&1 &"
|
|
||||||
|
|
||||||
# Check status
|
|
||||||
ssh gpu-pivoine "curl http://localhost:8000/v1/models"
|
|
||||||
```
|
|
||||||
|
|
||||||
**Server Configuration** (environment variables):
|
|
||||||
- `VLLM_HOST`: 0.0.0.0 (default)
|
|
||||||
- `VLLM_PORT`: 8000 (default)
|
|
||||||
|
|
||||||
### Model Configuration
|
|
||||||
- **Model**: Qwen/Qwen2.5-7B-Instruct (no auth required)
|
|
||||||
- **Context Length**: 4096 tokens
|
|
||||||
- **GPU Memory**: 85% utilization
|
|
||||||
- **Tensor Parallel**: 1 (single GPU)
|
|
||||||
|
|
||||||
### Known Issues & Solutions
|
|
||||||
|
|
||||||
#### Issue 1: vLLM Multiprocessing Errors
|
|
||||||
**Problem**: Default vLLM v1 engine fails with ZMQ/CUDA multiprocessing errors on RunPod.
|
|
||||||
**Solution**: Custom `AsyncLLMEngine` FastAPI server bypasses multiprocessing layer entirely.
|
|
||||||
|
|
||||||
#### Issue 2: Disk Space (Solved)
|
|
||||||
**Problem**: Original 20GB disk filled up with Hugging Face cache.
|
|
||||||
**Solution**: Expanded to 50GB and use `/workspace` for model cache.
|
|
||||||
|
|
||||||
#### Issue 3: Gated Models
|
|
||||||
**Problem**: Llama models require Hugging Face authentication.
|
|
||||||
**Solution**: Use Qwen 2.5 7B Instruct (no auth required) or set `HF_TOKEN` environment variable.
|
|
||||||
|
|
||||||
#### Issue 4: Spot Instance Volatility
|
|
||||||
**Problem**: RunPod Spot instances can be terminated anytime.
|
|
||||||
**Solution**: Accept as trade-off for cost savings. Document SSH details for quick reconnection.
|
|
||||||
|
|
||||||
### Monitoring
|
|
||||||
|
|
||||||
**Check vLLM logs**:
|
|
||||||
```bash
|
|
||||||
ssh gpu-pivoine "tail -f /workspace/vllm.log"
|
|
||||||
```
|
|
||||||
|
|
||||||
**Check GPU usage**:
|
|
||||||
```bash
|
|
||||||
ssh gpu-pivoine "nvidia-smi"
|
|
||||||
```
|
|
||||||
|
|
||||||
**Check Tailscale status**:
|
|
||||||
```bash
|
|
||||||
ssh gpu-pivoine "tailscale status"
|
|
||||||
```
|
|
||||||
|
|
||||||
**Test API locally (on GPU server)**:
|
|
||||||
```bash
|
|
||||||
ssh gpu-pivoine "curl http://localhost:8000/v1/models"
|
|
||||||
```
|
|
||||||
|
|
||||||
**Test API via Tailscale (from VPS)**:
|
|
||||||
```bash
|
|
||||||
curl http://100.100.108.13:8000/v1/models
|
|
||||||
```
|
|
||||||
|
|
||||||
### LiteLLM Integration
|
|
||||||
|
|
||||||
Update VPS LiteLLM config at `ai/litellm-config-gpu.yaml`:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
# Replace old WireGuard IP (10.8.0.2) with Tailscale IP
|
|
||||||
- model_name: qwen-2.5-7b
|
|
||||||
litellm_params:
|
|
||||||
model: openai/qwen-2.5-7b
|
|
||||||
api_base: http://100.100.108.13:8000/v1 # Tailscale IP
|
|
||||||
api_key: dummy
|
|
||||||
rpm: 1000
|
|
||||||
tpm: 100000
|
|
||||||
```
|
|
||||||
|
|
||||||
Restart LiteLLM:
|
|
||||||
```bash
|
|
||||||
arty restart litellm
|
|
||||||
```
|
|
||||||
|
|
||||||
### Troubleshooting
|
|
||||||
|
|
||||||
**Server not responding**:
|
|
||||||
1. Check if process is running: `pgrep -f simple_vllm_server`
|
|
||||||
2. Check logs: `tail -100 /workspace/vllm.log`
|
|
||||||
3. Check GPU availability: `nvidia-smi`
|
|
||||||
4. Restart server: `pkill -f simple_vllm_server && python3 /workspace/simple_vllm_server.py &`
|
|
||||||
|
|
||||||
**Tailscale not connected**:
|
|
||||||
1. Check status: `tailscale status`
|
|
||||||
2. Check daemon: `ps aux | grep tailscaled`
|
|
||||||
3. Restart: `tailscale down && tailscale up`
|
|
||||||
|
|
||||||
**Model download failing**:
|
|
||||||
1. Check disk space: `df -h`
|
|
||||||
2. Check cache directory: `ls -lah /workspace/huggingface_cache`
|
|
||||||
3. Clear cache if needed: `rm -rf /workspace/huggingface_cache/*`
|
|
||||||
|
|
||||||
### Deployment Status ✅ COMPLETE
|
|
||||||
|
|
||||||
**Deployment Date**: 2025-11-21
|
|
||||||
|
|
||||||
1. ✅ Deploy vLLM with Qwen 2.5 7B - COMPLETE
|
|
||||||
2. ✅ Test API endpoints locally and via Tailscale - COMPLETE
|
|
||||||
3. ✅ Update VPS LiteLLM configuration - COMPLETE
|
|
||||||
4. ✅ Test end-to-end: Open WebUI → LiteLLM → vLLM - COMPLETE
|
|
||||||
5. ⏳ Monitor performance and costs - ONGOING
|
|
||||||
|
|
||||||
**Model Available**: `qwen-2.5-7b` visible in Open WebUI at https://ai.pivoine.art
|
|
||||||
|
|
||||||
### Next Steps (2025-11-21 Original)
|
|
||||||
6. ✅ Consider adding more models → COMPLETE (added Flux.1 Schnell + MusicGen Medium)
|
|
||||||
7. ⏹️ Set up auto-stop for idle periods to save costs
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Multi-Modal Architecture (2025-11-21 Update)
|
|
||||||
|
|
||||||
### Overview
|
|
||||||
|
|
||||||
Expanded GPU deployment to support **text, image, and music generation** with intelligent model orchestration. All models run sequentially on a single RTX 4090 GPU with automatic switching based on request type.
|
|
||||||
|
|
||||||
### Architecture Components
|
|
||||||
|
|
||||||
#### 1. **Orchestrator Service** (Port 9000 - Always Running)
|
|
||||||
- **Location**: `ai/model-orchestrator/`
|
|
||||||
- **Purpose**: Central service managing model lifecycle
|
|
||||||
- **Features**:
|
|
||||||
- Detects request type (text/image/audio)
|
|
||||||
- Automatically unloads current model
|
|
||||||
- Loads requested model
|
|
||||||
- Proxies requests to active model
|
|
||||||
- Tracks GPU memory usage
|
|
||||||
- **Technology**: FastAPI + Docker SDK Python
|
|
||||||
- **Endpoints**:
|
|
||||||
- `POST /v1/chat/completions` → Routes to text models
|
|
||||||
- `POST /v1/images/generations` → Routes to image models
|
|
||||||
- `POST /v1/audio/generations` → Routes to music models
|
|
||||||
- `GET /health` → Shows active model and status
|
|
||||||
- `GET /models` → Lists all available models
|
|
||||||
- `POST /switch` → Manually switch models
|
|
||||||
|
|
||||||
#### 2. **Text Generation** (vLLM + Qwen 2.5 7B)
|
|
||||||
- **Service**: `vllm-qwen` (Port 8001)
|
|
||||||
- **Location**: `ai/vllm/`
|
|
||||||
- **Model**: Qwen/Qwen2.5-7B-Instruct
|
|
||||||
- **VRAM**: 14GB (85% GPU utilization)
|
|
||||||
- **Speed**: ~50 tokens/second
|
|
||||||
- **Startup**: 120 seconds
|
|
||||||
- **Status**: ✅ Working (same as original deployment)
|
|
||||||
|
|
||||||
#### 3. **Image Generation** (Flux.1 Schnell)
|
|
||||||
- **Service**: `flux` (Port 8002)
|
|
||||||
- **Location**: `ai/flux/`
|
|
||||||
- **Model**: black-forest-labs/FLUX.1-schnell
|
|
||||||
- **VRAM**: 14GB with CPU offloading
|
|
||||||
- **Speed**: 4-5 seconds per image
|
|
||||||
- **Startup**: 60 seconds
|
|
||||||
- **Features**: OpenAI DALL-E compatible API
|
|
||||||
- **Image**: `ghcr.io/matatonic/openedai-images-flux:latest`
|
|
||||||
|
|
||||||
#### 4. **Music Generation** (MusicGen Medium)
|
|
||||||
- **Service**: `musicgen` (Port 8003)
|
|
||||||
- **Location**: `ai/musicgen/`
|
|
||||||
- **Model**: facebook/musicgen-medium
|
|
||||||
- **VRAM**: 11GB
|
|
||||||
- **Speed**: 60-90 seconds for 30 seconds of audio
|
|
||||||
- **Startup**: 45 seconds
|
|
||||||
- **Features**: Text-to-music generation with sampling controls
|
|
||||||
- **Technology**: Meta's AudioCraft + custom FastAPI wrapper
|
|
||||||
|
|
||||||
### Model Registry (`models.yaml`)
|
|
||||||
|
|
||||||
Simple configuration file for managing all models:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
models:
|
|
||||||
qwen-2.5-7b:
|
|
||||||
type: text
|
|
||||||
framework: vllm
|
|
||||||
docker_service: vllm-qwen
|
|
||||||
port: 8001
|
|
||||||
vram_gb: 14
|
|
||||||
startup_time_seconds: 120
|
|
||||||
endpoint: /v1/chat/completions
|
|
||||||
|
|
||||||
flux-schnell:
|
|
||||||
type: image
|
|
||||||
framework: openedai-images
|
|
||||||
docker_service: flux
|
|
||||||
port: 8002
|
|
||||||
vram_gb: 14
|
|
||||||
startup_time_seconds: 60
|
|
||||||
endpoint: /v1/images/generations
|
|
||||||
|
|
||||||
musicgen-medium:
|
|
||||||
type: audio
|
|
||||||
framework: audiocraft
|
|
||||||
docker_service: musicgen
|
|
||||||
port: 8003
|
|
||||||
vram_gb: 11
|
|
||||||
startup_time_seconds: 45
|
|
||||||
endpoint: /v1/audio/generations
|
|
||||||
```
|
|
||||||
|
|
||||||
**Adding new models**: Just add a new entry to this file and define the Docker service.
|
|
||||||
|
|
||||||
### Deployment Changes
|
|
||||||
|
|
||||||
#### Docker Compose Structure
|
|
||||||
- **File**: `compose.yaml`
|
|
||||||
- **Services**: 4 total (1 orchestrator + 3 models)
|
|
||||||
- **Profiles**: `text`, `image`, `audio` (orchestrator manages activation)
|
|
||||||
- **Restart Policy**: `no` for models (orchestrator controls lifecycle)
|
|
||||||
- **Volumes**: All model caches on `/workspace` (922TB network volume)
|
|
||||||
|
|
||||||
#### LiteLLM Integration
|
|
||||||
Updated `litellm-config.yaml` to route all self-hosted models through orchestrator:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
# Text
|
|
||||||
- model_name: qwen-2.5-7b
|
|
||||||
api_base: http://100.100.108.13:9000/v1 # Orchestrator
|
|
||||||
|
|
||||||
# Image
|
|
||||||
- model_name: flux-schnell
|
|
||||||
api_base: http://100.100.108.13:9000/v1 # Orchestrator
|
|
||||||
|
|
||||||
# Music
|
|
||||||
- model_name: musicgen-medium
|
|
||||||
api_base: http://100.100.108.13:9000/v1 # Orchestrator
|
|
||||||
```
|
|
||||||
|
|
||||||
All models now available via Open WebUI at https://ai.pivoine.art
|
|
||||||
|
|
||||||
### Usage Examples
|
|
||||||
|
|
||||||
**Text Generation**:
|
|
||||||
```bash
|
|
||||||
curl http://100.100.108.13:9000/v1/chat/completions \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{"model": "qwen-2.5-7b", "messages": [{"role": "user", "content": "Hello"}]}'
|
|
||||||
```
|
|
||||||
|
|
||||||
**Image Generation**:
|
|
||||||
```bash
|
|
||||||
curl http://100.100.108.13:9000/v1/images/generations \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{"model": "flux-schnell", "prompt": "a cute cat", "size": "1024x1024"}'
|
|
||||||
```
|
|
||||||
|
|
||||||
**Music Generation**:
|
|
||||||
```bash
|
|
||||||
curl http://100.100.108.13:9000/v1/audio/generations \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{"model": "musicgen-medium", "prompt": "upbeat electronic", "duration": 30}'
|
|
||||||
```
|
|
||||||
|
|
||||||
### Deployment Commands
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Copy all files to RunPod
|
|
||||||
scp -r ai/* gpu-pivoine:/workspace/ai/
|
|
||||||
|
|
||||||
# SSH to GPU server
|
|
||||||
ssh gpu-pivoine
|
|
||||||
cd /workspace/ai/
|
|
||||||
|
|
||||||
# Start orchestrator (manages everything)
|
|
||||||
docker compose -f compose.yaml up -d orchestrator
|
|
||||||
|
|
||||||
# Check status
|
|
||||||
curl http://100.100.108.13:9000/health
|
|
||||||
|
|
||||||
# View logs
|
|
||||||
docker logs -f ai_orchestrator
|
|
||||||
|
|
||||||
# Manually switch models (optional)
|
|
||||||
curl -X POST http://100.100.108.13:9000/switch \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{"model": "flux-schnell"}'
|
|
||||||
```
|
|
||||||
|
|
||||||
### Performance Characteristics
|
|
||||||
|
|
||||||
| Model | VRAM | Startup Time | Generation Time | Notes |
|
|
||||||
|-------|------|--------------|-----------------|-------|
|
|
||||||
| Qwen 2.5 7B | 14GB | 120s | ~50 tok/sec | Fast text generation |
|
|
||||||
| Flux.1 Schnell | 14GB | 60s | 4-5s/image | High-quality images |
|
|
||||||
| MusicGen Medium | 11GB | 45s | 60-90s for 30s audio | Text-to-music |
|
|
||||||
|
|
||||||
**Model Switching Overhead**: 30-120 seconds (unload + load)
|
|
||||||
|
|
||||||
### Cost Analysis
|
|
||||||
|
|
||||||
**Current (Single GPU Sequential)**:
|
|
||||||
- Cost: ~$0.50/hour
|
|
||||||
- Monthly: ~$360 (24/7) or ~$120 (8hr/day)
|
|
||||||
- Trade-off: 30-120s switching time
|
|
||||||
|
|
||||||
**Alternative (Multi-GPU Concurrent)**:
|
|
||||||
- Cost: ~$0.75/hour (+50%)
|
|
||||||
- Monthly: ~$540 (24/7) or ~$180 (8hr/day)
|
|
||||||
- Benefit: No switching time, all models always available
|
|
||||||
|
|
||||||
**Decision**: Stick with single GPU for cost optimization. Switching time is acceptable for most use cases.
|
|
||||||
|
|
||||||
### Known Limitations
|
|
||||||
|
|
||||||
1. **Sequential Only**: Only one model active at a time
|
|
||||||
2. **Switching Latency**: 30-120 seconds to change models
|
|
||||||
3. **MusicGen License**: Pre-trained weights are CC-BY-NC (non-commercial)
|
|
||||||
4. **Spot Instance Volatility**: Pod can be terminated anytime
|
|
||||||
|
|
||||||
### Monitoring
|
|
||||||
|
|
||||||
**Check active model**:
|
|
||||||
```bash
|
|
||||||
curl http://100.100.108.13:9000/health | jq '{model: .current_model, vram: .model_info.vram_gb}'
|
|
||||||
```
|
|
||||||
|
|
||||||
**View orchestrator logs**:
|
|
||||||
```bash
|
|
||||||
docker logs -f ai_orchestrator
|
|
||||||
```
|
|
||||||
|
|
||||||
**GPU usage**:
|
|
||||||
```bash
|
|
||||||
ssh gpu-pivoine "nvidia-smi"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Deployment Status ✅ COMPLETE (Multi-Modal)
|
|
||||||
|
|
||||||
**Deployment Date**: 2025-11-21
|
|
||||||
|
|
||||||
1. ✅ Create model orchestrator service - COMPLETE
|
|
||||||
2. ✅ Deploy vLLM text generation (Qwen 2.5 7B) - COMPLETE
|
|
||||||
3. ✅ Deploy Flux.1 Schnell image generation - COMPLETE
|
|
||||||
4. ✅ Deploy MusicGen Medium music generation - COMPLETE
|
|
||||||
5. ✅ Update LiteLLM configuration - COMPLETE
|
|
||||||
6. ✅ Test all three model types via orchestrator - READY FOR TESTING
|
|
||||||
7. ⏳ Monitor performance and costs - ONGOING
|
|
||||||
|
|
||||||
**Models Available**: `qwen-2.5-7b`, `flux-schnell`, `musicgen-medium` via Open WebUI
|
|
||||||
|
|
||||||
### Future Model Additions
|
|
||||||
|
|
||||||
**Easy to add** (just edit `models.yaml`):
|
|
||||||
- Llama 3.1 8B Instruct (text, gated model)
|
|
||||||
- Whisper Large v3 (speech-to-text)
|
|
||||||
- XTTS v2 (text-to-speech)
|
|
||||||
- Stable Diffusion XL (alternative image generation)
|
|
||||||
|
|
||||||
See `README.md` for detailed instructions on adding new models.
|
|
||||||
|
|
||||||
### Cost Optimization Ideas
|
|
||||||
1. **Auto-stop**: Configure RunPod to auto-stop after 30 minutes idle
|
|
||||||
2. **Spot Instances**: Already using Spot for 50% cost reduction
|
|
||||||
3. **Scheduled Operation**: Run only during business hours (8 hours/day = $120/month)
|
|
||||||
4. **Smaller Models**: Use Mistral 7B or quantized models for lighter workloads
|
|
||||||
5. **Pay-as-you-go**: Manually start/stop pod as needed
|
|
||||||
|
|
||||||
### Performance Benchmarks
|
|
||||||
*To be measured after deployment*
|
|
||||||
|
|
||||||
Expected (based on RTX 4090):
|
|
||||||
- Qwen 2.5 7B: 50-80 tokens/second
|
|
||||||
- Context processing: ~2-3 seconds for 1000 tokens
|
|
||||||
- First token latency: ~200-300ms
|
|
||||||
@@ -1,416 +0,0 @@
|
|||||||
# RunPod Template Creation Guide
|
|
||||||
|
|
||||||
This guide shows you how to create a reusable RunPod template so you never have to reinstall everything from scratch when Spot instances restart.
|
|
||||||
|
|
||||||
## Why Create a Template?
|
|
||||||
|
|
||||||
**Without Template** (Manual Setup Every Time):
|
|
||||||
- ❌ Install Docker & Docker Compose (10-15 min)
|
|
||||||
- ❌ Install Tailscale (5 min)
|
|
||||||
- ❌ Pull Docker images (10-20 min)
|
|
||||||
- ❌ Download models: Qwen (~14GB), Flux (~12GB), MusicGen (~11GB) = 30-45 min
|
|
||||||
- ❌ Configure everything (5-10 min)
|
|
||||||
- **Total: 60-90 minutes per Spot instance restart**
|
|
||||||
|
|
||||||
**With Template** (Ready to Go):
|
|
||||||
- ✅ Everything pre-installed
|
|
||||||
- ✅ Models cached in `/workspace`
|
|
||||||
- ✅ Just start orchestrator
|
|
||||||
- **Total: 2-3 minutes**
|
|
||||||
|
|
||||||
## Template Contents
|
|
||||||
|
|
||||||
### System Software
|
|
||||||
- ✅ Docker 24.x + Docker Compose v2
|
|
||||||
- ✅ Tailscale latest
|
|
||||||
- ✅ NVIDIA Docker runtime
|
|
||||||
- ✅ Python 3.11
|
|
||||||
- ✅ Git, curl, wget, htop, nvtop
|
|
||||||
|
|
||||||
### Docker Images (Pre-built)
|
|
||||||
- ✅ `ai_orchestrator` - Model orchestration service
|
|
||||||
- ✅ `ai_vllm-qwen_1` - Text generation (vLLM + Qwen 2.5 7B)
|
|
||||||
- ✅ `ai_musicgen_1` - Music generation (AudioCraft)
|
|
||||||
- ✅ `ghcr.io/matatonic/openedai-images-flux:latest` - Image generation
|
|
||||||
|
|
||||||
### Model Cache (/workspace - Persistent)
|
|
||||||
- ✅ Qwen 2.5 7B Instruct (~14GB)
|
|
||||||
- ✅ Flux.1 Schnell (~12GB)
|
|
||||||
- ✅ MusicGen Medium (~11GB)
|
|
||||||
- **Total: ~37GB cached**
|
|
||||||
|
|
||||||
### Project Files (/workspace/ai)
|
|
||||||
- ✅ All orchestrator code
|
|
||||||
- ✅ Docker Compose configurations
|
|
||||||
- ✅ Model service configurations
|
|
||||||
- ✅ Documentation
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Step-by-Step Template Creation
|
|
||||||
|
|
||||||
### Prerequisites
|
|
||||||
1. RunPod account
|
|
||||||
2. Active RTX 4090 pod (or similar GPU)
|
|
||||||
3. SSH access to the pod
|
|
||||||
4. This repository cloned locally
|
|
||||||
|
|
||||||
### Step 1: Deploy Fresh Pod
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Create new RunPod instance:
|
|
||||||
# - GPU: RTX 4090 (24GB VRAM)
|
|
||||||
# - Disk: 50GB container disk
|
|
||||||
# - Network Volume: Attach or create 100GB+ volume
|
|
||||||
# - Template: Start with official PyTorch or CUDA template
|
|
||||||
|
|
||||||
# Note the SSH connection details (host, port, password)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 2: Prepare the Instance
|
|
||||||
|
|
||||||
Run the automated preparation script:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# On your local machine, copy everything to RunPod
|
|
||||||
scp -P <PORT> -r /home/valknar/Projects/runpod/* root@<HOST>:/workspace/ai/
|
|
||||||
|
|
||||||
# SSH to the pod
|
|
||||||
ssh -p <PORT> root@<HOST>
|
|
||||||
|
|
||||||
# Run the preparation script
|
|
||||||
cd /workspace/ai
|
|
||||||
chmod +x scripts/prepare-template.sh
|
|
||||||
./scripts/prepare-template.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
**What the script does:**
|
|
||||||
1. Installs Docker & Docker Compose
|
|
||||||
2. Installs Tailscale
|
|
||||||
3. Builds all Docker images
|
|
||||||
4. Pre-downloads all models
|
|
||||||
5. Validates everything works
|
|
||||||
6. Cleans up temporary files
|
|
||||||
|
|
||||||
**Estimated time: 45-60 minutes**
|
|
||||||
|
|
||||||
### Step 3: Manual Verification
|
|
||||||
|
|
||||||
After the script completes, verify everything:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Check Docker is installed
|
|
||||||
docker --version
|
|
||||||
docker compose version
|
|
||||||
|
|
||||||
# Check Tailscale
|
|
||||||
tailscale version
|
|
||||||
|
|
||||||
# Check all images are built
|
|
||||||
docker images | grep ai_
|
|
||||||
|
|
||||||
# Check models are cached
|
|
||||||
ls -lh /workspace/huggingface_cache/
|
|
||||||
ls -lh /workspace/flux/models/
|
|
||||||
ls -lh /workspace/musicgen/models/
|
|
||||||
|
|
||||||
# Test orchestrator starts
|
|
||||||
cd /workspace/ai
|
|
||||||
docker compose -f compose.yaml up -d orchestrator
|
|
||||||
docker logs ai_orchestrator
|
|
||||||
|
|
||||||
# Test model loading (should be fast since models are cached)
|
|
||||||
curl http://localhost:9000/health
|
|
||||||
|
|
||||||
# Stop orchestrator
|
|
||||||
docker compose -f compose.yaml down
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 4: Clean Up Before Saving
|
|
||||||
|
|
||||||
**IMPORTANT**: Remove secrets and temporary data before creating template!
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Remove sensitive data
|
|
||||||
rm -f /workspace/ai/.env
|
|
||||||
rm -f /root/.ssh/known_hosts
|
|
||||||
rm -f /root/.bash_history
|
|
||||||
|
|
||||||
# Clear logs
|
|
||||||
rm -f /var/log/*.log
|
|
||||||
docker system prune -af --volumes # Clean Docker cache but keep images
|
|
||||||
|
|
||||||
# Clear Tailscale state (will re-authenticate on first use)
|
|
||||||
tailscale logout
|
|
||||||
|
|
||||||
# Create template-ready marker
|
|
||||||
echo "RunPod Multi-Modal AI Template v1.0" > /workspace/TEMPLATE_VERSION
|
|
||||||
echo "Created: $(date)" >> /workspace/TEMPLATE_VERSION
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 5: Save Template in RunPod Dashboard
|
|
||||||
|
|
||||||
1. **Go to RunPod Dashboard** → "My Pods"
|
|
||||||
2. **Select your prepared pod**
|
|
||||||
3. **Click "⋮" menu** → "Save as Template"
|
|
||||||
4. **Template Configuration**:
|
|
||||||
- **Name**: `multi-modal-ai-v1.0`
|
|
||||||
- **Description**:
|
|
||||||
```
|
|
||||||
Multi-Modal AI Stack with Orchestrator
|
|
||||||
- Text: vLLM + Qwen 2.5 7B
|
|
||||||
- Image: Flux.1 Schnell
|
|
||||||
- Music: MusicGen Medium
|
|
||||||
- Models pre-cached (~37GB)
|
|
||||||
- Ready to deploy in 2-3 minutes
|
|
||||||
```
|
|
||||||
- **Category**: `AI/ML`
|
|
||||||
- **Docker Image**: (auto-detected)
|
|
||||||
- **Container Disk**: 50GB
|
|
||||||
- **Expose Ports**: 9000, 8001, 8002, 8003
|
|
||||||
- **Environment Variables** (optional):
|
|
||||||
```
|
|
||||||
HF_TOKEN=<leave empty, user will add>
|
|
||||||
TAILSCALE_AUTHKEY=<leave empty, user will add>
|
|
||||||
```
|
|
||||||
|
|
||||||
5. **Click "Save Template"**
|
|
||||||
6. **Wait for template creation** (5-10 minutes)
|
|
||||||
7. **Test the template** by deploying a new pod with it
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Using Your Template
|
|
||||||
|
|
||||||
### Deploy New Pod from Template
|
|
||||||
|
|
||||||
1. **RunPod Dashboard** → "➕ Deploy"
|
|
||||||
2. **Select "Community Templates"** or "My Templates"
|
|
||||||
3. **Choose**: `multi-modal-ai-v1.0`
|
|
||||||
4. **Configure**:
|
|
||||||
- GPU: RTX 4090 (or compatible)
|
|
||||||
- Network Volume: Attach your existing volume with `/workspace` mount
|
|
||||||
- Environment:
|
|
||||||
- `HF_TOKEN`: Your Hugging Face token
|
|
||||||
- (Tailscale will be configured via SSH)
|
|
||||||
|
|
||||||
5. **Deploy Pod**
|
|
||||||
|
|
||||||
### First-Time Setup (On New Pod)
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# SSH to the new pod
|
|
||||||
ssh -p <PORT> root@<HOST>
|
|
||||||
|
|
||||||
# Navigate to project
|
|
||||||
cd /workspace/ai
|
|
||||||
|
|
||||||
# Create .env file
|
|
||||||
cat > .env <<EOF
|
|
||||||
HF_TOKEN=hf_your_token_here
|
|
||||||
GPU_TAILSCALE_IP=100.100.108.13
|
|
||||||
EOF
|
|
||||||
|
|
||||||
# Configure Tailscale (one-time)
|
|
||||||
tailscale up --authkey=<YOUR_TAILSCALE_KEY>
|
|
||||||
|
|
||||||
# Start orchestrator (models already cached, starts in seconds!)
|
|
||||||
docker compose -f compose.yaml up -d orchestrator
|
|
||||||
|
|
||||||
# Verify
|
|
||||||
curl http://localhost:9000/health
|
|
||||||
|
|
||||||
# Check logs
|
|
||||||
docker logs -f ai_orchestrator
|
|
||||||
```
|
|
||||||
|
|
||||||
**Total setup time: 2-3 minutes!** 🎉
|
|
||||||
|
|
||||||
### Updating SSH Config (If Spot Instance Restarts)
|
|
||||||
|
|
||||||
Since Spot instances can restart with new IPs/ports:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# On your local machine
|
|
||||||
# Update ~/.ssh/config with new connection details
|
|
||||||
|
|
||||||
Host gpu-pivoine
|
|
||||||
HostName <NEW_IP>
|
|
||||||
Port <NEW_PORT>
|
|
||||||
User root
|
|
||||||
IdentityFile ~/.ssh/id_ed25519
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Template Maintenance
|
|
||||||
|
|
||||||
### Updating the Template
|
|
||||||
|
|
||||||
When you add new models or make improvements:
|
|
||||||
|
|
||||||
1. Deploy a pod from your existing template
|
|
||||||
2. Make your changes
|
|
||||||
3. Test everything
|
|
||||||
4. Clean up (remove secrets)
|
|
||||||
5. Save as new template version: `multi-modal-ai-v1.1`
|
|
||||||
6. Update your documentation
|
|
||||||
|
|
||||||
### Version History
|
|
||||||
|
|
||||||
Keep track of template versions:
|
|
||||||
|
|
||||||
```
|
|
||||||
v1.0 (2025-11-21) - Initial release
|
|
||||||
- Text: Qwen 2.5 7B
|
|
||||||
- Image: Flux.1 Schnell
|
|
||||||
- Music: MusicGen Medium
|
|
||||||
- Docker orchestrator
|
|
||||||
|
|
||||||
v1.1 (future) - Planned
|
|
||||||
- Add Llama 3.1 8B
|
|
||||||
- Add Whisper Large v3
|
|
||||||
- Optimize model loading
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Troubleshooting Template Creation
|
|
||||||
|
|
||||||
### Models Not Downloading
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Manually trigger model downloads
|
|
||||||
docker compose --profile text up -d vllm-qwen
|
|
||||||
docker logs -f ai_vllm-qwen_1
|
|
||||||
# Wait for "Model loaded successfully"
|
|
||||||
docker compose stop vllm-qwen
|
|
||||||
|
|
||||||
# Repeat for other models
|
|
||||||
docker compose --profile image up -d flux
|
|
||||||
docker compose --profile audio up -d musicgen
|
|
||||||
```
|
|
||||||
|
|
||||||
### Docker Images Not Building
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Build images one at a time
|
|
||||||
docker compose -f compose.yaml build orchestrator
|
|
||||||
docker compose -f compose.yaml build vllm-qwen
|
|
||||||
docker compose -f compose.yaml build musicgen
|
|
||||||
|
|
||||||
# Check build logs for errors
|
|
||||||
docker compose -f compose.yaml build --no-cache --progress=plain orchestrator
|
|
||||||
```
|
|
||||||
|
|
||||||
### Tailscale Won't Install
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Manual Tailscale installation
|
|
||||||
curl -fsSL https://tailscale.com/install.sh | sh
|
|
||||||
|
|
||||||
# Start daemon
|
|
||||||
tailscaled --tun=userspace-networking --socks5-server=localhost:1055 &
|
|
||||||
|
|
||||||
# Test
|
|
||||||
tailscale version
|
|
||||||
```
|
|
||||||
|
|
||||||
### Template Too Large
|
|
||||||
|
|
||||||
RunPod templates have size limits. If your template is too large:
|
|
||||||
|
|
||||||
**Option 1**: Use network volume for models
|
|
||||||
- Move models to network volume: `/workspace/models/`
|
|
||||||
- Mount volume when deploying from template
|
|
||||||
- Models persist across pod restarts
|
|
||||||
|
|
||||||
**Option 2**: Reduce cached models
|
|
||||||
- Only cache most-used model (Qwen 2.5 7B)
|
|
||||||
- Download others on first use
|
|
||||||
- Accept slightly longer first-time startup
|
|
||||||
|
|
||||||
**Option 3**: Use Docker layer optimization
|
|
||||||
```dockerfile
|
|
||||||
# In Dockerfile, order commands by change frequency
|
|
||||||
# Less frequently changed layers first
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Cost Analysis
|
|
||||||
|
|
||||||
### Template Storage Cost
|
|
||||||
- RunPod charges for template storage: ~$0.10/GB/month
|
|
||||||
- This template: ~50GB = **~$5/month**
|
|
||||||
- **Worth it!** Saves 60-90 minutes per Spot restart
|
|
||||||
|
|
||||||
### Time Savings
|
|
||||||
- Spot instance restarts: 2-5 times per week (highly variable)
|
|
||||||
- Time saved per restart: 60-90 minutes
|
|
||||||
- **Total saved per month: 8-20 hours**
|
|
||||||
- **Value: Priceless for rapid deployment**
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Advanced: Automated Template Updates
|
|
||||||
|
|
||||||
Create a CI/CD pipeline to automatically update templates:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# GitHub Actions workflow (future enhancement)
|
|
||||||
# 1. Deploy pod from template
|
|
||||||
# 2. Pull latest code
|
|
||||||
# 3. Rebuild images
|
|
||||||
# 4. Test
|
|
||||||
# 5. Save new template version
|
|
||||||
# 6. Notify team
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Template Checklist
|
|
||||||
|
|
||||||
Before saving your template, verify:
|
|
||||||
|
|
||||||
- [ ] All Docker images built and working
|
|
||||||
- [ ] All models downloaded and cached
|
|
||||||
- [ ] Tailscale installed (but logged out)
|
|
||||||
- [ ] Docker Compose files present
|
|
||||||
- [ ] `.env` file removed (secrets cleared)
|
|
||||||
- [ ] Logs cleared
|
|
||||||
- [ ] SSH keys removed
|
|
||||||
- [ ] Bash history cleared
|
|
||||||
- [ ] Template version documented
|
|
||||||
- [ ] Test deployment successful
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Support
|
|
||||||
|
|
||||||
If you have issues creating the template:
|
|
||||||
|
|
||||||
1. Check `/workspace/ai/scripts/prepare-template.sh` logs
|
|
||||||
2. Review Docker build logs: `docker compose build --progress=plain`
|
|
||||||
3. Check model download logs: `docker logs <container>`
|
|
||||||
4. Verify disk space: `df -h`
|
|
||||||
5. Check network volume is mounted: `mount | grep workspace`
|
|
||||||
|
|
||||||
For RunPod-specific issues:
|
|
||||||
- RunPod Docs: https://docs.runpod.io/
|
|
||||||
- RunPod Discord: https://discord.gg/runpod
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Next Steps
|
|
||||||
|
|
||||||
After creating your template:
|
|
||||||
|
|
||||||
1. ✅ Test deployment from template
|
|
||||||
2. ✅ Document in `GPU_DEPLOYMENT_LOG.md`
|
|
||||||
3. ✅ Share template ID with team (if applicable)
|
|
||||||
4. ✅ Set up monitoring (Netdata, etc.)
|
|
||||||
5. ✅ Configure auto-stop for cost optimization
|
|
||||||
6. ✅ Add more models as needed
|
|
||||||
|
|
||||||
**Your multi-modal AI infrastructure is now portable and reproducible!** 🚀
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
{
|
|
||||||
"model": "flux-schnell",
|
|
||||||
"offload": true,
|
|
||||||
"sequential_cpu_offload": false,
|
|
||||||
"vae_tiling": true,
|
|
||||||
"enable_model_cpu_offload": true,
|
|
||||||
"low_vram_mode": false,
|
|
||||||
"torch_compile": false,
|
|
||||||
"safety_checker": false,
|
|
||||||
"watermark": false,
|
|
||||||
"flux_device": "cuda",
|
|
||||||
"compile": false
|
|
||||||
}
|
|
||||||
0
logs/.gitkeep
Normal file
@@ -1,22 +0,0 @@
|
|||||||
FROM python:3.11-slim
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
# Install system dependencies
|
|
||||||
RUN apt-get update && apt-get install -y \
|
|
||||||
curl \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Copy requirements and install Python dependencies
|
|
||||||
COPY requirements.txt .
|
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
|
||||||
|
|
||||||
# Copy application code
|
|
||||||
COPY orchestrator.py .
|
|
||||||
COPY models.yaml .
|
|
||||||
|
|
||||||
# Expose port
|
|
||||||
EXPOSE 9000
|
|
||||||
|
|
||||||
# Run the orchestrator
|
|
||||||
CMD ["python", "orchestrator.py"]
|
|
||||||
@@ -1,89 +0,0 @@
|
|||||||
# Model Registry for AI Orchestrator
|
|
||||||
# Add new models by appending to this file
|
|
||||||
|
|
||||||
models:
|
|
||||||
# Text Generation Models
|
|
||||||
qwen-2.5-7b:
|
|
||||||
type: text
|
|
||||||
framework: vllm
|
|
||||||
docker_service: vllm-qwen
|
|
||||||
port: 8001
|
|
||||||
vram_gb: 14
|
|
||||||
startup_time_seconds: 120
|
|
||||||
endpoint: /v1/chat/completions
|
|
||||||
description: "Qwen 2.5 7B Instruct - Fast text generation, no authentication required"
|
|
||||||
|
|
||||||
# Image Generation Models
|
|
||||||
flux-schnell:
|
|
||||||
type: image
|
|
||||||
framework: openedai-images
|
|
||||||
docker_service: flux
|
|
||||||
port: 8002
|
|
||||||
vram_gb: 14
|
|
||||||
startup_time_seconds: 60
|
|
||||||
endpoint: /v1/images/generations
|
|
||||||
description: "Flux.1 Schnell - Fast high-quality image generation (4-5 sec/image)"
|
|
||||||
|
|
||||||
# Music Generation Models
|
|
||||||
musicgen-medium:
|
|
||||||
type: audio
|
|
||||||
framework: audiocraft
|
|
||||||
docker_service: musicgen
|
|
||||||
port: 8003
|
|
||||||
vram_gb: 11
|
|
||||||
startup_time_seconds: 45
|
|
||||||
endpoint: /v1/audio/generations
|
|
||||||
description: "MusicGen Medium - Text-to-music generation (60-90 sec for 30s audio)"
|
|
||||||
|
|
||||||
# Example: Add more models easily by uncommenting and customizing below
|
|
||||||
|
|
||||||
# Future Text Models:
|
|
||||||
# llama-3.1-8b:
|
|
||||||
# type: text
|
|
||||||
# framework: vllm
|
|
||||||
# docker_service: vllm-llama
|
|
||||||
# port: 8004
|
|
||||||
# vram_gb: 17
|
|
||||||
# startup_time_seconds: 120
|
|
||||||
# endpoint: /v1/chat/completions
|
|
||||||
# description: "Llama 3.1 8B Instruct - Meta's latest model"
|
|
||||||
|
|
||||||
# Future Image Models:
|
|
||||||
# sdxl:
|
|
||||||
# type: image
|
|
||||||
# framework: openedai-images
|
|
||||||
# docker_service: sdxl
|
|
||||||
# port: 8005
|
|
||||||
# vram_gb: 10
|
|
||||||
# startup_time_seconds: 45
|
|
||||||
# endpoint: /v1/images/generations
|
|
||||||
# description: "Stable Diffusion XL - High quality image generation"
|
|
||||||
|
|
||||||
# Future Audio Models:
|
|
||||||
# whisper-large:
|
|
||||||
# type: audio
|
|
||||||
# framework: faster-whisper
|
|
||||||
# docker_service: whisper
|
|
||||||
# port: 8006
|
|
||||||
# vram_gb: 3
|
|
||||||
# startup_time_seconds: 30
|
|
||||||
# endpoint: /v1/audio/transcriptions
|
|
||||||
# description: "Whisper Large v3 - Speech-to-text transcription"
|
|
||||||
#
|
|
||||||
# xtts-v2:
|
|
||||||
# type: audio
|
|
||||||
# framework: openedai-speech
|
|
||||||
# docker_service: tts
|
|
||||||
# port: 8007
|
|
||||||
# vram_gb: 3
|
|
||||||
# startup_time_seconds: 30
|
|
||||||
# endpoint: /v1/audio/speech
|
|
||||||
# description: "XTTS v2 - High-quality text-to-speech with voice cloning"
|
|
||||||
|
|
||||||
# Configuration
|
|
||||||
config:
|
|
||||||
gpu_memory_total_gb: 24
|
|
||||||
allow_concurrent_loading: false # Sequential loading only
|
|
||||||
model_switch_timeout_seconds: 300 # 5 minutes max for model switching
|
|
||||||
health_check_interval_seconds: 10
|
|
||||||
default_model: qwen-2.5-7b
|
|
||||||
@@ -1,359 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
AI Model Orchestrator for RunPod RTX 4090
|
|
||||||
Manages sequential loading of text, image, and music models on a single GPU
|
|
||||||
|
|
||||||
Features:
|
|
||||||
- Automatic model switching based on request type
|
|
||||||
- OpenAI-compatible API endpoints
|
|
||||||
- Docker Compose service management
|
|
||||||
- GPU memory monitoring
|
|
||||||
- Simple YAML configuration for adding new models
|
|
||||||
"""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
from typing import Dict, Optional, Any
|
|
||||||
|
|
||||||
import docker
|
|
||||||
import httpx
|
|
||||||
import yaml
|
|
||||||
from fastapi import FastAPI, Request, HTTPException
|
|
||||||
from fastapi.responses import JSONResponse, StreamingResponse
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
||||||
)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# FastAPI app
|
|
||||||
app = FastAPI(title="AI Model Orchestrator", version="1.0.0")
|
|
||||||
|
|
||||||
# Docker client
|
|
||||||
docker_client = docker.from_env()
|
|
||||||
|
|
||||||
# Global state
|
|
||||||
current_model: Optional[str] = None
|
|
||||||
model_registry: Dict[str, Dict[str, Any]] = {}
|
|
||||||
config: Dict[str, Any] = {}
|
|
||||||
|
|
||||||
|
|
||||||
def load_model_registry():
|
|
||||||
"""Load model registry from models.yaml"""
|
|
||||||
global model_registry, config
|
|
||||||
|
|
||||||
config_path = os.getenv("MODELS_CONFIG", "/app/models.yaml")
|
|
||||||
logger.info(f"Loading model registry from {config_path}")
|
|
||||||
|
|
||||||
with open(config_path, 'r') as f:
|
|
||||||
data = yaml.safe_load(f)
|
|
||||||
|
|
||||||
model_registry = data.get('models', {})
|
|
||||||
config = data.get('config', {})
|
|
||||||
|
|
||||||
logger.info(f"Loaded {len(model_registry)} models from registry")
|
|
||||||
for model_name, model_info in model_registry.items():
|
|
||||||
logger.info(f" - {model_name}: {model_info['description']}")
|
|
||||||
|
|
||||||
|
|
||||||
def get_docker_service_name(service_name: str) -> str:
|
|
||||||
"""Get full Docker service name with project prefix"""
|
|
||||||
project_name = os.getenv("COMPOSE_PROJECT_NAME", "ai")
|
|
||||||
return f"{project_name}_{service_name}_1"
|
|
||||||
|
|
||||||
|
|
||||||
async def stop_current_model():
|
|
||||||
"""Stop the currently running model service"""
|
|
||||||
global current_model
|
|
||||||
|
|
||||||
if not current_model:
|
|
||||||
logger.info("No model currently running")
|
|
||||||
return
|
|
||||||
|
|
||||||
model_info = model_registry.get(current_model)
|
|
||||||
if not model_info:
|
|
||||||
logger.warning(f"Model {current_model} not found in registry")
|
|
||||||
current_model = None
|
|
||||||
return
|
|
||||||
|
|
||||||
service_name = get_docker_service_name(model_info['docker_service'])
|
|
||||||
logger.info(f"Stopping model: {current_model} (service: {service_name})")
|
|
||||||
|
|
||||||
try:
|
|
||||||
container = docker_client.containers.get(service_name)
|
|
||||||
container.stop(timeout=30)
|
|
||||||
logger.info(f"Stopped {current_model}")
|
|
||||||
current_model = None
|
|
||||||
except docker.errors.NotFound:
|
|
||||||
logger.warning(f"Container {service_name} not found (already stopped?)")
|
|
||||||
current_model = None
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error stopping {service_name}: {e}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
async def start_model(model_name: str):
|
|
||||||
"""Start a model service"""
|
|
||||||
global current_model
|
|
||||||
|
|
||||||
if model_name not in model_registry:
|
|
||||||
raise HTTPException(status_code=404, detail=f"Model {model_name} not found in registry")
|
|
||||||
|
|
||||||
model_info = model_registry[model_name]
|
|
||||||
service_name = get_docker_service_name(model_info['docker_service'])
|
|
||||||
|
|
||||||
logger.info(f"Starting model: {model_name} (service: {service_name})")
|
|
||||||
logger.info(f" VRAM requirement: {model_info['vram_gb']} GB")
|
|
||||||
logger.info(f" Estimated startup time: {model_info['startup_time_seconds']}s")
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Start the container
|
|
||||||
container = docker_client.containers.get(service_name)
|
|
||||||
container.start()
|
|
||||||
|
|
||||||
# Wait for service to be healthy
|
|
||||||
port = model_info['port']
|
|
||||||
endpoint = model_info.get('endpoint', '/')
|
|
||||||
base_url = f"http://localhost:{port}"
|
|
||||||
|
|
||||||
logger.info(f"Waiting for {model_name} to be ready at {base_url}...")
|
|
||||||
|
|
||||||
max_wait = model_info['startup_time_seconds'] + 60 # Add buffer
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
async with httpx.AsyncClient() as client:
|
|
||||||
while time.time() - start_time < max_wait:
|
|
||||||
try:
|
|
||||||
# Try health check or root endpoint
|
|
||||||
health_url = f"{base_url}/health"
|
|
||||||
try:
|
|
||||||
response = await client.get(health_url, timeout=5.0)
|
|
||||||
if response.status_code == 200:
|
|
||||||
logger.info(f"{model_name} is ready!")
|
|
||||||
current_model = model_name
|
|
||||||
return
|
|
||||||
except:
|
|
||||||
# Try root endpoint if /health doesn't exist
|
|
||||||
response = await client.get(base_url, timeout=5.0)
|
|
||||||
if response.status_code == 200:
|
|
||||||
logger.info(f"{model_name} is ready!")
|
|
||||||
current_model = model_name
|
|
||||||
return
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"Waiting for {model_name}... ({e})")
|
|
||||||
|
|
||||||
await asyncio.sleep(5)
|
|
||||||
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=503,
|
|
||||||
detail=f"Model {model_name} failed to start within {max_wait}s"
|
|
||||||
)
|
|
||||||
|
|
||||||
except docker.errors.NotFound:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail=f"Docker service {service_name} not found. Is it defined in docker-compose?"
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error starting {model_name}: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
|
|
||||||
async def ensure_model_running(model_name: str):
|
|
||||||
"""Ensure the specified model is running, switching if necessary"""
|
|
||||||
global current_model
|
|
||||||
|
|
||||||
if current_model == model_name:
|
|
||||||
logger.info(f"Model {model_name} already running")
|
|
||||||
return
|
|
||||||
|
|
||||||
logger.info(f"Switching model: {current_model} -> {model_name}")
|
|
||||||
|
|
||||||
# Stop current model
|
|
||||||
await stop_current_model()
|
|
||||||
|
|
||||||
# Start requested model
|
|
||||||
await start_model(model_name)
|
|
||||||
|
|
||||||
logger.info(f"Model switch complete: {model_name} is now active")
|
|
||||||
|
|
||||||
|
|
||||||
async def proxy_request(model_name: str, request: Request):
|
|
||||||
"""Proxy request to the active model service"""
|
|
||||||
model_info = model_registry[model_name]
|
|
||||||
port = model_info['port']
|
|
||||||
|
|
||||||
# Get request details
|
|
||||||
path = request.url.path
|
|
||||||
method = request.method
|
|
||||||
headers = dict(request.headers)
|
|
||||||
headers.pop('host', None) # Remove host header
|
|
||||||
|
|
||||||
# Build target URL
|
|
||||||
target_url = f"http://localhost:{port}{path}"
|
|
||||||
|
|
||||||
logger.info(f"Proxying {method} request to {target_url}")
|
|
||||||
|
|
||||||
async with httpx.AsyncClient(timeout=300.0) as client:
|
|
||||||
# Handle different request types
|
|
||||||
if method == "GET":
|
|
||||||
response = await client.get(target_url, headers=headers)
|
|
||||||
elif method == "POST":
|
|
||||||
body = await request.body()
|
|
||||||
response = await client.post(target_url, content=body, headers=headers)
|
|
||||||
else:
|
|
||||||
raise HTTPException(status_code=405, detail=f"Method {method} not supported")
|
|
||||||
|
|
||||||
# Return response
|
|
||||||
return JSONResponse(
|
|
||||||
content=response.json() if response.headers.get('content-type', '').startswith('application/json') else response.text,
|
|
||||||
status_code=response.status_code,
|
|
||||||
headers=dict(response.headers)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@app.on_event("startup")
|
|
||||||
async def startup_event():
|
|
||||||
"""Load model registry on startup"""
|
|
||||||
load_model_registry()
|
|
||||||
logger.info("AI Model Orchestrator started successfully")
|
|
||||||
logger.info(f"GPU Memory: {config.get('gpu_memory_total_gb', 24)} GB")
|
|
||||||
logger.info(f"Default model: {config.get('default_model', 'qwen-2.5-7b')}")
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
|
||||||
async def root():
|
|
||||||
"""Root endpoint"""
|
|
||||||
return {
|
|
||||||
"service": "AI Model Orchestrator",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"current_model": current_model,
|
|
||||||
"available_models": list(model_registry.keys())
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
|
||||||
async def health():
|
|
||||||
"""Health check endpoint"""
|
|
||||||
return {
|
|
||||||
"status": "healthy",
|
|
||||||
"current_model": current_model,
|
|
||||||
"model_info": model_registry.get(current_model) if current_model else None,
|
|
||||||
"gpu_memory_total_gb": config.get('gpu_memory_total_gb', 24),
|
|
||||||
"models_available": len(model_registry)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/models")
|
|
||||||
async def list_models():
|
|
||||||
"""List all available models"""
|
|
||||||
return {
|
|
||||||
"models": model_registry,
|
|
||||||
"current_model": current_model
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/chat/completions")
|
|
||||||
async def chat_completions(request: Request):
|
|
||||||
"""OpenAI-compatible chat completions endpoint (text models)"""
|
|
||||||
# Parse request to get model name
|
|
||||||
body = await request.json()
|
|
||||||
model_name = body.get('model', config.get('default_model', 'qwen-2.5-7b'))
|
|
||||||
|
|
||||||
# Validate model type
|
|
||||||
if model_name not in model_registry:
|
|
||||||
raise HTTPException(status_code=404, detail=f"Model {model_name} not found")
|
|
||||||
|
|
||||||
if model_registry[model_name]['type'] != 'text':
|
|
||||||
raise HTTPException(status_code=400, detail=f"Model {model_name} is not a text model")
|
|
||||||
|
|
||||||
# Ensure model is running
|
|
||||||
await ensure_model_running(model_name)
|
|
||||||
|
|
||||||
# Proxy request to model
|
|
||||||
return await proxy_request(model_name, request)
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/images/generations")
|
|
||||||
async def image_generations(request: Request):
|
|
||||||
"""OpenAI-compatible image generation endpoint"""
|
|
||||||
# Parse request to get model name
|
|
||||||
body = await request.json()
|
|
||||||
model_name = body.get('model', 'flux-schnell')
|
|
||||||
|
|
||||||
# Validate model type
|
|
||||||
if model_name not in model_registry:
|
|
||||||
raise HTTPException(status_code=404, detail=f"Model {model_name} not found")
|
|
||||||
|
|
||||||
if model_registry[model_name]['type'] != 'image':
|
|
||||||
raise HTTPException(status_code=400, detail=f"Model {model_name} is not an image model")
|
|
||||||
|
|
||||||
# Ensure model is running
|
|
||||||
await ensure_model_running(model_name)
|
|
||||||
|
|
||||||
# Proxy request to model
|
|
||||||
return await proxy_request(model_name, request)
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/audio/generations")
|
|
||||||
async def audio_generations(request: Request):
|
|
||||||
"""Custom audio generation endpoint (music/sound effects)"""
|
|
||||||
# Parse request to get model name
|
|
||||||
body = await request.json()
|
|
||||||
model_name = body.get('model', 'musicgen-medium')
|
|
||||||
|
|
||||||
# Validate model type
|
|
||||||
if model_name not in model_registry:
|
|
||||||
raise HTTPException(status_code=404, detail=f"Model {model_name} not found")
|
|
||||||
|
|
||||||
if model_registry[model_name]['type'] != 'audio':
|
|
||||||
raise HTTPException(status_code=400, detail=f"Model {model_name} is not an audio model")
|
|
||||||
|
|
||||||
# Ensure model is running
|
|
||||||
await ensure_model_running(model_name)
|
|
||||||
|
|
||||||
# Proxy request to model
|
|
||||||
return await proxy_request(model_name, request)
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/switch")
|
|
||||||
async def switch_model(request: Request):
|
|
||||||
"""Manually switch to a specific model"""
|
|
||||||
body = await request.json()
|
|
||||||
model_name = body.get('model')
|
|
||||||
|
|
||||||
if not model_name:
|
|
||||||
raise HTTPException(status_code=400, detail="Model name required")
|
|
||||||
|
|
||||||
if model_name not in model_registry:
|
|
||||||
raise HTTPException(status_code=404, detail=f"Model {model_name} not found")
|
|
||||||
|
|
||||||
await ensure_model_running(model_name)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"status": "success",
|
|
||||||
"model": model_name,
|
|
||||||
"message": f"Switched to {model_name}"
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import uvicorn
|
|
||||||
|
|
||||||
host = os.getenv("HOST", "0.0.0.0")
|
|
||||||
port = int(os.getenv("PORT", "9000"))
|
|
||||||
|
|
||||||
logger.info(f"Starting AI Model Orchestrator on {host}:{port}")
|
|
||||||
|
|
||||||
uvicorn.run(
|
|
||||||
app,
|
|
||||||
host=host,
|
|
||||||
port=port,
|
|
||||||
log_level="info",
|
|
||||||
access_log=True,
|
|
||||||
)
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
fastapi==0.104.1
|
|
||||||
uvicorn[standard]==0.24.0
|
|
||||||
httpx==0.25.1
|
|
||||||
docker==6.1.3
|
|
||||||
pyyaml==6.0.1
|
|
||||||
pydantic==2.5.0
|
|
||||||
393
models_civitai.yaml
Normal file
@@ -0,0 +1,393 @@
|
|||||||
|
# ============================================================================
|
||||||
|
# CivitAI Model Configuration
|
||||||
|
# ============================================================================
|
||||||
|
#
|
||||||
|
# This configuration file defines all available CivitAI models for download.
|
||||||
|
# Models are organized by category: NSFW checkpoints, LoRAs, embeddings, etc.
|
||||||
|
#
|
||||||
|
# Each model entry contains:
|
||||||
|
# - name: Filesystem-friendly model name
|
||||||
|
# - version_id: CivitAI version ID (used for downloads)
|
||||||
|
# - model_id: CivitAI model ID (used for metadata queries)
|
||||||
|
# - description: Human-readable description
|
||||||
|
# - size_gb: Approximate size in gigabytes
|
||||||
|
# - essential: Whether this is an essential model (true/false)
|
||||||
|
# - type: Model type (checkpoints, loras, embeddings, etc.)
|
||||||
|
# - tags: Array of descriptive tags
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
# Global settings
|
||||||
|
settings:
|
||||||
|
cache_dir: /workspace/models/civitai
|
||||||
|
output_dir: /workspace/ComfyUI/models
|
||||||
|
parallel_downloads: 1
|
||||||
|
retry_attempts: 3
|
||||||
|
timeout_seconds: 3600
|
||||||
|
rate_limit_delay: 5 # seconds between downloads
|
||||||
|
|
||||||
|
# Model categories
|
||||||
|
model_categories:
|
||||||
|
# ==========================================================================
|
||||||
|
# NSFW IMAGE GENERATION MODELS
|
||||||
|
# ==========================================================================
|
||||||
|
nsfw_checkpoints:
|
||||||
|
- name: lustify-v7-ggwp
|
||||||
|
version_id: "1094291"
|
||||||
|
model_id: "573152"
|
||||||
|
description: "LUSTIFY v7.0 GGWP - Photoreal NSFW checkpoint for women in sexual scenarios"
|
||||||
|
size_gb: 6.31
|
||||||
|
essential: true
|
||||||
|
type: checkpoints
|
||||||
|
format: safetensors
|
||||||
|
base_model: SDXL 1.0
|
||||||
|
vram_gb: 12
|
||||||
|
tags: [nsfw, realistic, photography, women]
|
||||||
|
recommended_settings:
|
||||||
|
sampler: "DPM++ 2M SDE / DPM++ 3M SDE"
|
||||||
|
scheduler: "Exponential / Karras"
|
||||||
|
steps: 30
|
||||||
|
cfg_scale: "4-7"
|
||||||
|
notes: "Understands both danbooru tags and natural language prompting"
|
||||||
|
|
||||||
|
- name: pony-diffusion-v6-xl
|
||||||
|
version_id: "135867"
|
||||||
|
model_id: "257749"
|
||||||
|
description: "Pony Diffusion V6 XL - Versatile anime/cartoon/furry model with balanced content"
|
||||||
|
size_gb: 6.5
|
||||||
|
essential: true
|
||||||
|
type: checkpoints
|
||||||
|
format: safetensors
|
||||||
|
base_model: SDXL 1.0
|
||||||
|
vram_gb: 12
|
||||||
|
tags: [nsfw, anime, furry, cartoon, versatile]
|
||||||
|
files:
|
||||||
|
- source: "ponyDiffusionV6XL_v6StartWithThisOne.safetensors"
|
||||||
|
dest: "ponyDiffusionV6XL_v6StartWithThisOne.safetensors"
|
||||||
|
training_info:
|
||||||
|
images: "2.6M aesthetically ranked"
|
||||||
|
ratio: "1:1:1 safe/questionable/explicit"
|
||||||
|
notes: "Most popular anime/furry model on CivitAI, supports wide range of styles"
|
||||||
|
|
||||||
|
- name: realvisxl-v5
|
||||||
|
version_id: "798204"
|
||||||
|
model_id: "139562"
|
||||||
|
description: "RealVisXL V5.0 Lightning - Photorealistic model for high-quality realistic images"
|
||||||
|
size_gb: 6.8
|
||||||
|
essential: true
|
||||||
|
type: checkpoints
|
||||||
|
format: safetensors
|
||||||
|
base_model: SDXL 1.0
|
||||||
|
vram_gb: 12
|
||||||
|
tags: [nsfw, realistic, photorealistic, professional]
|
||||||
|
recommended_settings:
|
||||||
|
sampler: "DPM++ SDE Karras (4-6 steps for Lightning)"
|
||||||
|
notes: "Excellent for photorealistic portraits and scenes, both SFW and NSFW. Lightning version for faster generation."
|
||||||
|
|
||||||
|
- name: wai-nsfw-illustrious-sdxl
|
||||||
|
version_id: "2167369"
|
||||||
|
model_id: "827184"
|
||||||
|
description: "WAI-NSFW-illustrious-SDXL v15.0 - Actively updated NSFW Illustrious variant"
|
||||||
|
size_gb: 6.5
|
||||||
|
essential: false
|
||||||
|
type: checkpoints
|
||||||
|
format: safetensors
|
||||||
|
base_model: SDXL 1.0
|
||||||
|
vram_gb: 12
|
||||||
|
tags: [nsfw, anime, illustrious, artistic]
|
||||||
|
notes: "Latest version from November 2025, frequently updated"
|
||||||
|
|
||||||
|
- name: talmendoxl
|
||||||
|
version_id: "131869"
|
||||||
|
model_id: "119202"
|
||||||
|
description: "TalmendoXL - Uncensored SDXL model biased towards photorealism"
|
||||||
|
size_gb: 6.6
|
||||||
|
essential: false
|
||||||
|
type: checkpoints
|
||||||
|
format: safetensors
|
||||||
|
base_model: SDXL 1.0
|
||||||
|
vram_gb: 12
|
||||||
|
tags: [nsfw, realistic, uncensored, photorealistic]
|
||||||
|
notes: "Known for high-quality photorealistic NSFW content"
|
||||||
|
|
||||||
|
- name: big-lust-v1-6
|
||||||
|
version_id: "1081768"
|
||||||
|
model_id: "575395"
|
||||||
|
description: "Big Lust v1.6 - SDXL NSFW checkpoint"
|
||||||
|
size_gb: 6.5
|
||||||
|
essential: false
|
||||||
|
type: checkpoints
|
||||||
|
format: safetensors
|
||||||
|
base_model: SDXL 1.0
|
||||||
|
vram_gb: 12
|
||||||
|
tags: [nsfw, versatile]
|
||||||
|
notes: "General-purpose NSFW model with good results across styles"
|
||||||
|
|
||||||
|
# ==========================================================================
|
||||||
|
# NEGATIVE EMBEDDINGS - SD 1.5 (Textual Inversions)
|
||||||
|
# ==========================================================================
|
||||||
|
embeddings_sd15:
|
||||||
|
- name: baddream
|
||||||
|
version_id: "77169"
|
||||||
|
model_id: "72437"
|
||||||
|
description: "BadDream v1.0 - Negative embedding for dreamshaper style artifacts"
|
||||||
|
size_gb: 0.0002
|
||||||
|
essential: true
|
||||||
|
type: embeddings
|
||||||
|
format: pt
|
||||||
|
base_model: SD 1.5
|
||||||
|
tags: [negative, quality, sd15]
|
||||||
|
trigger_word: "BadDream"
|
||||||
|
usage: "embedding:BadDream"
|
||||||
|
notes: "Use in negative prompt. Best combined with UnrealisticDream for optimal results."
|
||||||
|
|
||||||
|
- name: unrealisticdream
|
||||||
|
version_id: "77173"
|
||||||
|
model_id: "72437"
|
||||||
|
description: "UnrealisticDream v1.0 - Improves realistic images, pairs with BadDream"
|
||||||
|
size_gb: 0.0001
|
||||||
|
essential: true
|
||||||
|
type: embeddings
|
||||||
|
format: pt
|
||||||
|
base_model: SD 1.5
|
||||||
|
tags: [negative, realistic, sd15]
|
||||||
|
trigger_word: "UnrealisticDream"
|
||||||
|
usage: "embedding:UnrealisticDream"
|
||||||
|
notes: "Use together with BadDream for best results on realistic images."
|
||||||
|
|
||||||
|
- name: badhandv4
|
||||||
|
version_id: "20068"
|
||||||
|
model_id: "16993"
|
||||||
|
description: "badhandv4 - Improves hand details with minimal style impact"
|
||||||
|
size_gb: 0.00002
|
||||||
|
essential: true
|
||||||
|
type: embeddings
|
||||||
|
format: pt
|
||||||
|
base_model: SD 1.5
|
||||||
|
tags: [negative, hands, anatomy, sd15]
|
||||||
|
trigger_word: "badhandv4"
|
||||||
|
usage: "embedding:badhandv4"
|
||||||
|
recommended_settings:
|
||||||
|
cfg_scale: "≥11"
|
||||||
|
notes: "Designed for AnimeIllustDiffusion but works with other SD 1.5 models. Better results at higher CFG."
|
||||||
|
|
||||||
|
- name: easynegative
|
||||||
|
version_id: "9208"
|
||||||
|
model_id: "7808"
|
||||||
|
description: "EasyNegative - General-purpose negative embedding"
|
||||||
|
size_gb: 0.00002
|
||||||
|
essential: false
|
||||||
|
type: embeddings
|
||||||
|
format: safetensors
|
||||||
|
base_model: SD 1.5
|
||||||
|
tags: [negative, quality, sd15]
|
||||||
|
trigger_word: "easynegative"
|
||||||
|
usage: "embedding:easynegative"
|
||||||
|
notes: "Optimized for Counterfeit model. Effectiveness varies by model."
|
||||||
|
|
||||||
|
- name: fastnegativev2
|
||||||
|
version_id: "94057"
|
||||||
|
model_id: "71961"
|
||||||
|
description: "FastNegativeV2 - Token mix of common negative prompts"
|
||||||
|
size_gb: 0.0002
|
||||||
|
essential: true
|
||||||
|
type: embeddings
|
||||||
|
format: pt
|
||||||
|
base_model: SD 1.5
|
||||||
|
tags: [negative, quality, sd15]
|
||||||
|
trigger_word: "FastNegativeV2"
|
||||||
|
usage: "embedding:FastNegativeV2"
|
||||||
|
notes: "V2 is balanced and doesn't require weight reduction. Use at strength 1.0."
|
||||||
|
|
||||||
|
- name: badneganatomy
|
||||||
|
version_id: "64063"
|
||||||
|
model_id: "59614"
|
||||||
|
description: "BadNegAnatomyV1-neg - Improves aesthetics and character anatomy"
|
||||||
|
size_gb: 0.0002
|
||||||
|
essential: true
|
||||||
|
type: embeddings
|
||||||
|
format: pt
|
||||||
|
base_model: SD 1.5
|
||||||
|
tags: [negative, anatomy, quality, sd15]
|
||||||
|
trigger_word: "BadNegAnatomyV1-neg"
|
||||||
|
usage: "embedding:BadNegAnatomyV1-neg"
|
||||||
|
notes: "Particularly improves body joints and overall anatomy."
|
||||||
|
|
||||||
|
# ==========================================================================
|
||||||
|
# NEGATIVE EMBEDDINGS - SDXL (Textual Inversions)
|
||||||
|
# ==========================================================================
|
||||||
|
embeddings_sdxl:
|
||||||
|
- name: badx-sdxl
|
||||||
|
version_id: "981304"
|
||||||
|
model_id: "122403"
|
||||||
|
description: "Bad X v1.1 - SDXL negative embedding for anatomy and realism"
|
||||||
|
size_gb: 0.000004
|
||||||
|
essential: true
|
||||||
|
type: embeddings
|
||||||
|
format: pt
|
||||||
|
base_model: SDXL 1.0
|
||||||
|
tags: [negative, quality, anatomy, sdxl]
|
||||||
|
trigger_word: "BadX"
|
||||||
|
usage: "embedding:BadX"
|
||||||
|
files:
|
||||||
|
- source: "BadX-neg.pt"
|
||||||
|
dest: "BadX-neg.pt"
|
||||||
|
notes: "Use in negative prompt with LUSTIFY, RealVisXL, or other SDXL checkpoints. Fixes facial/hand artifacts."
|
||||||
|
|
||||||
|
# ==========================================================================
|
||||||
|
# NEGATIVE EMBEDDINGS - Pony SDXL (Textual Inversions)
|
||||||
|
# ==========================================================================
|
||||||
|
embeddings_pony:
|
||||||
|
- name: pony-pdxl-hq-v3
|
||||||
|
version_id: "720175"
|
||||||
|
model_id: "332646"
|
||||||
|
description: "Pony PDXL High Quality V3 (zPDXL3) - Quality enhancer for Pony models"
|
||||||
|
size_gb: 0.0003
|
||||||
|
essential: true
|
||||||
|
type: embeddings
|
||||||
|
format: safetensors
|
||||||
|
base_model: Pony SDXL
|
||||||
|
tags: [negative, quality, pony, nsfw]
|
||||||
|
trigger_word: "zPDXL3"
|
||||||
|
usage: "embedding:zPDXL3"
|
||||||
|
files:
|
||||||
|
- source: "zPDXL3.safetensors"
|
||||||
|
dest: "zPDXL3.safetensors"
|
||||||
|
recommended_settings:
|
||||||
|
strength: "1.0-2.0"
|
||||||
|
notes: "ONLY works with Pony Diffusion models. Removes censoring and improves quality."
|
||||||
|
|
||||||
|
- name: pony-pdxl-xxx
|
||||||
|
version_id: "380277"
|
||||||
|
model_id: "332646"
|
||||||
|
description: "Pony PDXL XXX Rating (zPDXLxxx) - Enables NSFW content"
|
||||||
|
size_gb: 0.00003
|
||||||
|
essential: true
|
||||||
|
type: embeddings
|
||||||
|
format: pt
|
||||||
|
base_model: Pony SDXL
|
||||||
|
tags: [negative, nsfw, pony]
|
||||||
|
trigger_word: "zPDXLxxx"
|
||||||
|
usage: "embedding:zPDXLxxx"
|
||||||
|
files:
|
||||||
|
- source: "zPDXLxxx.pt"
|
||||||
|
dest: "zPDXLxxx.pt"
|
||||||
|
recommended_settings:
|
||||||
|
strength: "1.0-2.0"
|
||||||
|
notes: "ONLY for Pony Diffusion models. Enables explicit NSFW content generation."
|
||||||
|
|
||||||
|
# ==========================================================================
|
||||||
|
# LORA MODELS (Style Enhancement & Detail)
|
||||||
|
# ==========================================================================
|
||||||
|
loras:
|
||||||
|
- name: add-detail-xl
|
||||||
|
version_id: "135867"
|
||||||
|
model_id: "122359"
|
||||||
|
description: "Add Detail XL - Detail enhancement LoRA for SDXL"
|
||||||
|
size_gb: 0.21
|
||||||
|
essential: true
|
||||||
|
type: loras
|
||||||
|
format: safetensors
|
||||||
|
base_model: SDXL 1.0
|
||||||
|
tags: [enhancement, detail, quality]
|
||||||
|
recommended_settings:
|
||||||
|
strength_model: "0.5-1.0"
|
||||||
|
strength_clip: "0.5-1.0"
|
||||||
|
notes: "Popular detail enhancer LoRA, works with all SDXL checkpoints"
|
||||||
|
|
||||||
|
- name: siesta-v1-1
|
||||||
|
version_id: "unknown"
|
||||||
|
model_id: "unknown"
|
||||||
|
description: "Siesta v1.1 - Style LoRA"
|
||||||
|
size_gb: 0.04
|
||||||
|
essential: false
|
||||||
|
type: loras
|
||||||
|
format: safetensors
|
||||||
|
base_model: SDXL 1.0
|
||||||
|
tags: [style, artistic]
|
||||||
|
notes: "Manually downloaded LoRA, version_id needs to be updated"
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# STORAGE & VRAM SUMMARIES
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
storage_requirements:
|
||||||
|
essential_only:
|
||||||
|
nsfw_checkpoints: 19.61 # LUSTIFY + Pony + RealVisXL
|
||||||
|
total: 19.61
|
||||||
|
|
||||||
|
all_models:
|
||||||
|
nsfw_checkpoints: 39.21 # All 6 checkpoints
|
||||||
|
total: 39.21
|
||||||
|
|
||||||
|
vram_requirements:
|
||||||
|
# For 24GB GPU (RTX 4090)
|
||||||
|
notes: |
|
||||||
|
All SDXL checkpoints require approximately 12GB VRAM.
|
||||||
|
Can run one checkpoint at a time on 24GB GPU.
|
||||||
|
For simultaneous loading with other models (ComfyUI, vLLM),
|
||||||
|
ensure combined VRAM usage stays under 24GB.
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# INSTALLATION PROFILES
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
installation_profiles:
|
||||||
|
essential:
|
||||||
|
description: Essential NSFW models only (3 checkpoints)
|
||||||
|
categories: [nsfw_checkpoints]
|
||||||
|
essential_only: true
|
||||||
|
storage_gb: 19.61
|
||||||
|
estimated_time: "30-60 minutes"
|
||||||
|
models:
|
||||||
|
- lustify-v7-ggwp
|
||||||
|
- pony-diffusion-v6-xl
|
||||||
|
- realvisxl-v5
|
||||||
|
|
||||||
|
complete:
|
||||||
|
description: All NSFW models (6 checkpoints)
|
||||||
|
categories: [nsfw_checkpoints]
|
||||||
|
storage_gb: 39.21
|
||||||
|
estimated_time: "1-2 hours"
|
||||||
|
models:
|
||||||
|
- lustify-v7-ggwp
|
||||||
|
- pony-diffusion-v6-xl
|
||||||
|
- realvisxl-v5
|
||||||
|
- wai-nsfw-illustrious-sdxl
|
||||||
|
- talmendoxl
|
||||||
|
- big-lust-v1-6
|
||||||
|
|
||||||
|
realistic_only:
|
||||||
|
description: Photorealistic NSFW models only
|
||||||
|
categories: [nsfw_checkpoints]
|
||||||
|
storage_gb: 13.11
|
||||||
|
estimated_time: "20-40 minutes"
|
||||||
|
models:
|
||||||
|
- lustify-v7-ggwp
|
||||||
|
- realvisxl-v5
|
||||||
|
|
||||||
|
anime_only:
|
||||||
|
description: Anime/cartoon NSFW models only
|
||||||
|
categories: [nsfw_checkpoints]
|
||||||
|
storage_gb: 13.0
|
||||||
|
estimated_time: "20-40 minutes"
|
||||||
|
models:
|
||||||
|
- pony-diffusion-v6-xl
|
||||||
|
- wai-nsfw-illustrious-sdxl
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# METADATA
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
metadata:
|
||||||
|
version: 1.0.0
|
||||||
|
last_updated: 2025-11-23
|
||||||
|
compatible_with:
|
||||||
|
- ComfyUI >= 0.1.0
|
||||||
|
- SDXL base models
|
||||||
|
maintainer: Valknar
|
||||||
|
repository: https://github.com/valknar/runpod
|
||||||
|
notes: |
|
||||||
|
All models listed are NSFW (adult content) and hosted on CivitAI.
|
||||||
|
Requires CIVITAI_API_KEY for downloads.
|
||||||
|
Models will be downloaded to cache_dir and symlinked to ComfyUI.
|
||||||
715
models_huggingface.yaml
Normal file
@@ -0,0 +1,715 @@
|
|||||||
|
settings:
|
||||||
|
cache_dir: /workspace/huggingface_cache
|
||||||
|
parallel_downloads: 1
|
||||||
|
retry_attempts: 3
|
||||||
|
timeout_seconds: 3600
|
||||||
|
model_categories:
|
||||||
|
image_models:
|
||||||
|
- repo_id: black-forest-labs/FLUX.1-schnell
|
||||||
|
description: FLUX.1 Schnell - Fast 4-step inference
|
||||||
|
size_gb: 23
|
||||||
|
essential: true
|
||||||
|
category: image
|
||||||
|
format: fp16
|
||||||
|
vram_gb: 23
|
||||||
|
notes: Industry-leading image generation quality
|
||||||
|
files:
|
||||||
|
- source: flux1-schnell.safetensors
|
||||||
|
dest: unet/flux1-schnell.safetensors
|
||||||
|
- repo_id: black-forest-labs/FLUX.1-dev
|
||||||
|
description: FLUX.1 Dev - Balanced quality/speed
|
||||||
|
size_gb: 23
|
||||||
|
essential: false
|
||||||
|
category: image
|
||||||
|
format: fp16
|
||||||
|
vram_gb: 23
|
||||||
|
notes: Development version with enhanced features
|
||||||
|
files:
|
||||||
|
- source: flux1-dev.safetensors
|
||||||
|
dest: unet/flux1-dev.safetensors
|
||||||
|
- repo_id: runwayml/stable-diffusion-v1-5
|
||||||
|
description: SD 1.5 - For AnimateDiff
|
||||||
|
size_gb: 4
|
||||||
|
essential: true
|
||||||
|
category: image
|
||||||
|
format: fp16
|
||||||
|
vram_gb: 8
|
||||||
|
notes: Stable Diffusion 1.5 required for AnimateDiff motion modules
|
||||||
|
files:
|
||||||
|
- source: v1-5-pruned-emaonly.safetensors
|
||||||
|
dest: checkpoints/v1-5-pruned-emaonly.safetensors
|
||||||
|
- repo_id: stabilityai/stable-diffusion-xl-base-1.0
|
||||||
|
description: SDXL Base 1.0 - Industry standard
|
||||||
|
size_gb: 7
|
||||||
|
essential: true
|
||||||
|
category: image
|
||||||
|
format: fp16
|
||||||
|
vram_gb: 12
|
||||||
|
notes: Most widely used Stable Diffusion model
|
||||||
|
files:
|
||||||
|
- source: sd_xl_base_1.0.safetensors
|
||||||
|
dest: checkpoints/sd_xl_base_1.0.safetensors
|
||||||
|
- repo_id: stabilityai/stable-diffusion-xl-refiner-1.0
|
||||||
|
description: SDXL Refiner 1.0 - Enhances base output
|
||||||
|
size_gb: 6
|
||||||
|
essential: false
|
||||||
|
category: image
|
||||||
|
format: fp16
|
||||||
|
vram_gb: 12
|
||||||
|
notes: Use after SDXL base for improved details
|
||||||
|
files:
|
||||||
|
- source: sd_xl_refiner_1.0.safetensors
|
||||||
|
dest: checkpoints/sd_xl_refiner_1.0.safetensors
|
||||||
|
- repo_id: stabilityai/stable-diffusion-3.5-large
|
||||||
|
description: SD 3.5 Large Complete - Checkpoint and text encoders
|
||||||
|
size_gb: 31
|
||||||
|
essential: false
|
||||||
|
category: image
|
||||||
|
format: mixed
|
||||||
|
vram_gb: 20
|
||||||
|
notes: Complete SD3.5 Large model with checkpoint and all text encoders (CLIP-L,
|
||||||
|
CLIP-G, T5-XXL)
|
||||||
|
files:
|
||||||
|
- source: sd3.5_large.safetensors
|
||||||
|
dest: checkpoints/sd3.5_large.safetensors
|
||||||
|
- source: text_encoders/clip_l.safetensors
|
||||||
|
dest: checkpoints/clip_l.safetensors
|
||||||
|
- source: text_encoders/clip_g.safetensors
|
||||||
|
dest: checkpoints/clip_g.safetensors
|
||||||
|
- source: text_encoders/t5xxl_fp16.safetensors
|
||||||
|
dest: checkpoints/t5xxl_fp16.safetensors
|
||||||
|
- repo_id: John6666/diving-illustrious-real-asian-v50-sdxl
|
||||||
|
description: Diving Illustrious Real Asian v5.0 - Photorealistic Asian subjects
|
||||||
|
size_gb: 7
|
||||||
|
essential: false
|
||||||
|
category: image
|
||||||
|
format: fp16
|
||||||
|
vram_gb: 12
|
||||||
|
notes: SDXL fine-tune specializing in photorealistic Asian subjects with illustrious
|
||||||
|
quality
|
||||||
|
files:
|
||||||
|
- source: unet/diffusion_pytorch_model.safetensors
|
||||||
|
dest: checkpoints/diving-illustrious-real-asian-v50-sdxl.safetensors
|
||||||
|
- repo_id: playgroundai/playground-v2.5-1024px-aesthetic
|
||||||
|
description: Playground v2.5 - 1024px aesthetic images
|
||||||
|
size_gb: 7
|
||||||
|
essential: false
|
||||||
|
category: image
|
||||||
|
format: fp16
|
||||||
|
vram_gb: 12
|
||||||
|
notes: Highly aesthetic 1024x1024 outputs, outperforms SDXL and DALL-E 3 in user
|
||||||
|
studies
|
||||||
|
files:
|
||||||
|
- source: playground-v2.5-1024px-aesthetic.fp16.safetensors
|
||||||
|
dest: checkpoints/playground-v2.5-1024px-aesthetic.safetensors
|
||||||
|
- repo_id: Lykon/dreamshaper-8
|
||||||
|
description: DreamShaper 8 - Multi-style versatile model
|
||||||
|
size_gb: 4
|
||||||
|
essential: false
|
||||||
|
category: image
|
||||||
|
format: fp16
|
||||||
|
vram_gb: 8
|
||||||
|
notes: Versatile SD1.5 fine-tune balancing photorealistic and anime styles with
|
||||||
|
strong LoRA support
|
||||||
|
files:
|
||||||
|
- source: unet/diffusion_pytorch_model.fp16.safetensors
|
||||||
|
dest: checkpoints/dreamshaper-8.safetensors
|
||||||
|
video_models:
|
||||||
|
- repo_id: THUDM/CogVideoX-5b
|
||||||
|
description: CogVideoX-5B - Professional text-to-video
|
||||||
|
size_gb: 20
|
||||||
|
essential: true
|
||||||
|
category: video
|
||||||
|
format: fp16
|
||||||
|
vram_gb: 20
|
||||||
|
frames: 49
|
||||||
|
resolution: 720p
|
||||||
|
notes: State-of-the-art text-to-video generation, auto-downloaded by DownloadAndLoadCogVideoModel
|
||||||
|
node
|
||||||
|
files:
|
||||||
|
- source: transformer/diffusion_pytorch_model-00001-of-00002.safetensors
|
||||||
|
dest: diffusion_models/cogvideox-5b-transformer-00001-of-00002.safetensors
|
||||||
|
- source: transformer/diffusion_pytorch_model-00002-of-00002.safetensors
|
||||||
|
dest: diffusion_models/cogvideox-5b-transformer-00002-of-00002.safetensors
|
||||||
|
- source: transformer/diffusion_pytorch_model.safetensors.index.json
|
||||||
|
dest: diffusion_models/cogvideox-5b-transformer.safetensors.index.json
|
||||||
|
- repo_id: THUDM/CogVideoX-5b-I2V
|
||||||
|
description: CogVideoX-5B-I2V - Image-to-video generation
|
||||||
|
size_gb: 20
|
||||||
|
essential: true
|
||||||
|
category: video
|
||||||
|
format: fp16
|
||||||
|
vram_gb: 20
|
||||||
|
frames: 49
|
||||||
|
resolution: 720p
|
||||||
|
notes: Image-to-video model, auto-downloaded by DownloadAndLoadCogVideoModel node
|
||||||
|
files:
|
||||||
|
- source: transformer/diffusion_pytorch_model-00001-of-00003.safetensors
|
||||||
|
dest: diffusion_models/cogvideox-5b-i2v-transformer-00001-of-00003.safetensors
|
||||||
|
- source: transformer/diffusion_pytorch_model-00002-of-00003.safetensors
|
||||||
|
dest: diffusion_models/cogvideox-5b-i2v-transformer-00002-of-00003.safetensors
|
||||||
|
- source: transformer/diffusion_pytorch_model-00003-of-00003.safetensors
|
||||||
|
dest: diffusion_models/cogvideox-5b-i2v-transformer-00003-of-00003.safetensors
|
||||||
|
- source: transformer/diffusion_pytorch_model.safetensors.index.json
|
||||||
|
dest: diffusion_models/cogvideox-5b-i2v-transformer.safetensors.index.json
|
||||||
|
- repo_id: stabilityai/stable-video-diffusion-img2vid
|
||||||
|
description: SVD - 14 frame image-to-video
|
||||||
|
size_gb: 8
|
||||||
|
essential: true
|
||||||
|
category: video
|
||||||
|
format: fp16
|
||||||
|
vram_gb: 20
|
||||||
|
frames: 14
|
||||||
|
resolution: 576x1024
|
||||||
|
notes: Convert images to short video clips
|
||||||
|
files:
|
||||||
|
- source: svd.safetensors
|
||||||
|
dest: checkpoints/svd.safetensors
|
||||||
|
- repo_id: stabilityai/stable-video-diffusion-img2vid-xt
|
||||||
|
description: SVD-XT - 25 frame image-to-video
|
||||||
|
size_gb: 8
|
||||||
|
essential: false
|
||||||
|
category: video
|
||||||
|
format: fp16
|
||||||
|
vram_gb: 20
|
||||||
|
frames: 25
|
||||||
|
resolution: 576x1024
|
||||||
|
notes: Extended frame count version
|
||||||
|
files:
|
||||||
|
- source: svd_xt.safetensors
|
||||||
|
dest: checkpoints/svd_xt.safetensors
|
||||||
|
- repo_id: Comfy-Org/HunyuanVideo_repackaged
|
||||||
|
description: HunyuanVideo Complete - 720p T2V/I2V models with VAE and encoders
|
||||||
|
size_gb: 51
|
||||||
|
essential: true
|
||||||
|
category: video
|
||||||
|
format: bf16
|
||||||
|
vram_gb: 24
|
||||||
|
frames: 129
|
||||||
|
resolution: 720p
|
||||||
|
notes: Complete HunyuanVideo family - T2V, I2V v1/v2, 3D VAE, LLaVA LLaMA3 text/vision
|
||||||
|
encoders
|
||||||
|
files:
|
||||||
|
- source: split_files/diffusion_models/hunyuan_video_t2v_720p_bf16.safetensors
|
||||||
|
dest: diffusion_models/hunyuan_video_t2v_720p_bf16.safetensors
|
||||||
|
- source: split_files/diffusion_models/hunyuan_video_image_to_video_720p_bf16.safetensors
|
||||||
|
dest: diffusion_models/hunyuan_video_image_to_video_720p_bf16.safetensors
|
||||||
|
- source: split_files/diffusion_models/hunyuan_video_v2_replace_image_to_video_720p_bf16.safetensors
|
||||||
|
dest: diffusion_models/hunyuan_video_v2_replace_image_to_video_720p_bf16.safetensors
|
||||||
|
- source: split_files/vae/hunyuan_video_vae_bf16.safetensors
|
||||||
|
dest: diffusion_models/hunyuan_video_vae_bf16.safetensors
|
||||||
|
- source: split_files/text_encoders/llava_llama3_fp8_scaled.safetensors
|
||||||
|
dest: diffusion_models/llava_llama3_fp8_scaled.safetensors
|
||||||
|
- source: split_files/clip_vision/llava_llama3_vision.safetensors
|
||||||
|
dest: diffusion_models/llava_llama3_vision.safetensors
|
||||||
|
- repo_id: Comfy-Org/HunyuanVideo_1.5_repackaged
|
||||||
|
description: HunyuanVideo 1.5 Complete - 720p/1080p T2V/SR with encoders
|
||||||
|
size_gb: 51.5
|
||||||
|
essential: true
|
||||||
|
category: video
|
||||||
|
format: fp16
|
||||||
|
vram_gb: 24
|
||||||
|
frames: 129-257
|
||||||
|
resolution: 720p-1080p
|
||||||
|
notes: Complete HunyuanVideo 1.5 - T2V 720p, SR 1080p, VAE, Qwen 2.5 VL, ByT5
|
||||||
|
GlyphXL encoders
|
||||||
|
files:
|
||||||
|
- source: hunyuanvideo1.5_720p_t2v_fp16.safetensors
|
||||||
|
dest: diffusion_models/hunyuanvideo1.5_720p_t2v_fp16.safetensors
|
||||||
|
- source: hunyuanvideo1.5_1080p_sr_distilled_fp16.safetensors
|
||||||
|
dest: diffusion_models/hunyuanvideo1.5_1080p_sr_distilled_fp16.safetensors
|
||||||
|
- source: hunyuanvideo15_vae_fp16.safetensors
|
||||||
|
dest: diffusion_models/hunyuanvideo15_vae_fp16.safetensors
|
||||||
|
- source: qwen_2.5_vl_7b_fp8_scaled.safetensors
|
||||||
|
dest: diffusion_models/qwen_2.5_vl_7b_fp8_scaled.safetensors
|
||||||
|
- source: byt5_small_glyphxl_fp16.safetensors
|
||||||
|
dest: diffusion_models/byt5_small_glyphxl_fp16.safetensors
|
||||||
|
- repo_id: Comfy-Org/Wan_2.2_ComfyUI_Repackaged
|
||||||
|
description: Wan2.2 Complete - All video models, VAEs, and LoRAs
|
||||||
|
size_gb: 220
|
||||||
|
essential: true
|
||||||
|
category: video
|
||||||
|
format: mixed
|
||||||
|
vram_gb: 24
|
||||||
|
frames: 81
|
||||||
|
resolution: 640x640
|
||||||
|
notes: Complete Wan2.2 model family - TI2V 5B, T2V 14B, I2V 14B, Animate, S2V,
|
||||||
|
Fun Inpaint/Control/Camera, VAEs, CLIP Vision H, Wav2Vec2, and LoRA accelerators
|
||||||
|
files:
|
||||||
|
- source: wan2.2_ti2v_5B_fp16.safetensors
|
||||||
|
dest: diffusion_models/wan2.2_ti2v_5B_fp16.safetensors
|
||||||
|
- source: wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors
|
||||||
|
dest: diffusion_models/wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors
|
||||||
|
- source: wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors
|
||||||
|
dest: diffusion_models/wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors
|
||||||
|
- source: wan2.2_i2v_high_noise_14B_fp16.safetensors
|
||||||
|
dest: diffusion_models/wan2.2_i2v_high_noise_14B_fp16.safetensors
|
||||||
|
- source: wan2.2_i2v_low_noise_14B_fp16.safetensors
|
||||||
|
dest: diffusion_models/wan2.2_i2v_low_noise_14B_fp16.safetensors
|
||||||
|
- source: wan2.2_animate_14B_bf16.safetensors
|
||||||
|
dest: diffusion_models/wan2.2_animate_14B_bf16.safetensors
|
||||||
|
- source: wan2.2_s2v_14B_fp8_scaled.safetensors
|
||||||
|
dest: diffusion_models/wan2.2_s2v_14B_fp8_scaled.safetensors
|
||||||
|
- source: wan2.2_s2v_14B_bf16.safetensors
|
||||||
|
dest: diffusion_models/wan2.2_s2v_14B_bf16.safetensors
|
||||||
|
- source: wan2.2_fun_inpaint_high_noise_14B_fp8_scaled.safetensors
|
||||||
|
dest: diffusion_models/wan2.2_fun_inpaint_high_noise_14B_fp8_scaled.safetensors
|
||||||
|
- source: wan2.2_fun_inpaint_low_noise_14B_fp8_scaled.safetensors
|
||||||
|
dest: diffusion_models/wan2.2_fun_inpaint_low_noise_14B_fp8_scaled.safetensors
|
||||||
|
- source: wan2.2_fun_control_high_noise_14B_fp8_scaled.safetensors
|
||||||
|
dest: diffusion_models/wan2.2_fun_control_high_noise_14B_fp8_scaled.safetensors
|
||||||
|
- source: wan2.2_fun_control_low_noise_14B_fp8_scaled.safetensors
|
||||||
|
dest: diffusion_models/wan2.2_fun_control_low_noise_14B_fp8_scaled.safetensors
|
||||||
|
- source: wan2.2_fun_camera_high_noise_14B_fp8_scaled.safetensors
|
||||||
|
dest: diffusion_models/wan2.2_fun_camera_high_noise_14B_fp8_scaled.safetensors
|
||||||
|
- source: wan2.2_fun_camera_low_noise_14B_fp8_scaled.safetensors
|
||||||
|
dest: diffusion_models/wan2.2_fun_camera_low_noise_14B_fp8_scaled.safetensors
|
||||||
|
- source: wan2.2_vae.safetensors
|
||||||
|
dest: diffusion_models/wan2.2_vae.safetensors
|
||||||
|
- source: wan_2.1_vae.safetensors
|
||||||
|
dest: diffusion_models/wan_2.1_vae.safetensors
|
||||||
|
- source: clip_vision_h.safetensors
|
||||||
|
dest: diffusion_models/clip_vision_h.safetensors
|
||||||
|
- source: wav2vec2_large_english_fp16.safetensors
|
||||||
|
dest: diffusion_models/wav2vec2_large_english_fp16.safetensors
|
||||||
|
- source: lightx2v_I2V_14B_480p_cfg_step_distill_rank64_bf16.safetensors
|
||||||
|
dest: diffusion_models/lightx2v_I2V_14B_480p_cfg_step_distill_rank64_bf16.safetensors
|
||||||
|
- source: wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors
|
||||||
|
dest: diffusion_models/wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors
|
||||||
|
- source: wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors
|
||||||
|
dest: diffusion_models/wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors
|
||||||
|
- source: wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors
|
||||||
|
dest: diffusion_models/wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors
|
||||||
|
audio_models:
|
||||||
|
- repo_id: facebook/musicgen-small
|
||||||
|
description: MusicGen Small - Fast generation
|
||||||
|
size_gb: 3
|
||||||
|
essential: false
|
||||||
|
category: audio
|
||||||
|
format: fp32
|
||||||
|
vram_gb: 4
|
||||||
|
duration_seconds: 30
|
||||||
|
notes: Fastest music generation, lower quality
|
||||||
|
files:
|
||||||
|
- source: pytorch_model.bin
|
||||||
|
dest: musicgen/musicgen-small-pytorch_model.bin
|
||||||
|
- repo_id: facebook/musicgen-medium
|
||||||
|
description: MusicGen Medium - Balanced quality
|
||||||
|
size_gb: 11
|
||||||
|
essential: true
|
||||||
|
category: audio
|
||||||
|
format: fp32
|
||||||
|
vram_gb: 8
|
||||||
|
duration_seconds: 30
|
||||||
|
notes: Best balance of speed and quality
|
||||||
|
files:
|
||||||
|
- source: pytorch_model.bin
|
||||||
|
dest: musicgen/musicgen-medium-pytorch_model.bin
|
||||||
|
- repo_id: facebook/musicgen-large
|
||||||
|
description: MusicGen Large - Highest quality
|
||||||
|
size_gb: 22
|
||||||
|
essential: false
|
||||||
|
category: audio
|
||||||
|
format: fp32
|
||||||
|
vram_gb: 16
|
||||||
|
duration_seconds: 30
|
||||||
|
notes: Best quality, slower generation
|
||||||
|
files:
|
||||||
|
- source: pytorch_model-00001-of-00002.bin
|
||||||
|
dest: musicgen/musicgen-large-pytorch_model-00001-of-00002.bin
|
||||||
|
- source: pytorch_model-00002-of-00002.bin
|
||||||
|
dest: musicgen/musicgen-large-pytorch_model-00002-of-00002.bin
|
||||||
|
- source: pytorch_model.bin.index.json
|
||||||
|
dest: musicgen/musicgen-large-pytorch_model.bin.index.json
|
||||||
|
- repo_id: Comfy-Org/ACE-Step_ComfyUI_repackaged
|
||||||
|
description: ACE Step v1 3.5B - Fast coherent music generation with 19-language
|
||||||
|
support
|
||||||
|
size_gb: 7.7
|
||||||
|
essential: true
|
||||||
|
category: audio
|
||||||
|
format: safetensors
|
||||||
|
vram_gb: 16
|
||||||
|
duration_seconds: 240
|
||||||
|
notes: 15x faster than LLM baselines, superior structural coherence, voice cloning,
|
||||||
|
19-language lyrics
|
||||||
|
files:
|
||||||
|
- source: all_in_one/ace_step_v1_3.5b.safetensors
|
||||||
|
dest: checkpoints/ace_step_v1_3.5b.safetensors
|
||||||
|
- repo_id: ACE-Step/ACE-Step-v1-chinese-rap-LoRA
|
||||||
|
description: ACE Step Chinese RAP LoRA - Enhanced Chinese pronunciation and hip-hop
|
||||||
|
genre
|
||||||
|
size_gb: 0.3
|
||||||
|
essential: false
|
||||||
|
category: audio
|
||||||
|
format: safetensors
|
||||||
|
notes: Improves Chinese pronunciation accuracy and hip-hop/electronic genre adherence
|
||||||
|
files:
|
||||||
|
- source: pytorch_lora_weights.safetensors
|
||||||
|
dest: loras/ace-step-chinese-rap-lora.safetensors
|
||||||
|
support_models:
|
||||||
|
- repo_id: openai/clip-vit-large-patch14
|
||||||
|
description: CLIP H - For SD 1.5 IP-Adapter
|
||||||
|
size_gb: 2
|
||||||
|
essential: true
|
||||||
|
category: support
|
||||||
|
format: fp32
|
||||||
|
vram_gb: 2
|
||||||
|
notes: Text-image understanding model for IP-Adapter
|
||||||
|
files:
|
||||||
|
- source: model.safetensors
|
||||||
|
dest: clip_vision/CLIP-ViT-H-14-laion2B-s32B-b79K.safetensors
|
||||||
|
- repo_id: laion/CLIP-ViT-bigG-14-laion2B-39B-b160k
|
||||||
|
description: CLIP G - For SDXL IP-Adapter
|
||||||
|
size_gb: 7
|
||||||
|
essential: true
|
||||||
|
category: support
|
||||||
|
format: fp32
|
||||||
|
vram_gb: 4
|
||||||
|
notes: Larger CLIP model for SDXL IP-Adapter
|
||||||
|
files:
|
||||||
|
- source: open_clip_model.safetensors
|
||||||
|
dest: clip_vision/CLIP-ViT-bigG-14-laion2B-39B-b160k.safetensors
|
||||||
|
- repo_id: google/siglip-so400m-patch14-384
|
||||||
|
description: SigLIP - For FLUX models
|
||||||
|
size_gb: 2
|
||||||
|
essential: true
|
||||||
|
category: support
|
||||||
|
format: fp32
|
||||||
|
vram_gb: 2
|
||||||
|
notes: Advanced image-text alignment
|
||||||
|
files:
|
||||||
|
- source: model.safetensors
|
||||||
|
dest: clip_vision/siglip-so400m-patch14-384.safetensors
|
||||||
|
- repo_id: black-forest-labs/FLUX.1-schnell
|
||||||
|
description: FLUX VAE - Autoencoder for FLUX models
|
||||||
|
size_gb: 0.5
|
||||||
|
essential: true
|
||||||
|
category: support
|
||||||
|
format: safetensors
|
||||||
|
vram_gb: 1
|
||||||
|
notes: VAE autoencoder required for FLUX image decoding
|
||||||
|
files:
|
||||||
|
- source: ae.safetensors
|
||||||
|
dest: vae/ae.safetensors
|
||||||
|
- repo_id: ai-forever/Real-ESRGAN
|
||||||
|
description: RealESRGAN x2 - 2x upscaling model
|
||||||
|
size_gb: 0.06
|
||||||
|
essential: true
|
||||||
|
category: support
|
||||||
|
format: pth
|
||||||
|
vram_gb: 2
|
||||||
|
notes: Fast 2x upscaling model for general purpose enhancement
|
||||||
|
files:
|
||||||
|
- source: RealESRGAN_x2.pth
|
||||||
|
dest: upscale_models/RealESRGAN_x2.pth
|
||||||
|
- repo_id: ai-forever/Real-ESRGAN
|
||||||
|
description: RealESRGAN x4 - 4x upscaling model
|
||||||
|
size_gb: 0.06
|
||||||
|
essential: true
|
||||||
|
category: support
|
||||||
|
format: pth
|
||||||
|
vram_gb: 4
|
||||||
|
notes: High-quality 4x upscaling model for detail enhancement
|
||||||
|
files:
|
||||||
|
- source: RealESRGAN_x4.pth
|
||||||
|
dest: upscale_models/RealESRGAN_x4.pth
|
||||||
|
- repo_id: Comfy-Org/Wan_2.1_ComfyUI_repackaged
|
||||||
|
description: UMT5-XXL FP8 - Text encoder for all Wan2.2 models
|
||||||
|
size_gb: 10
|
||||||
|
essential: true
|
||||||
|
category: support
|
||||||
|
format: fp8_scaled
|
||||||
|
vram_gb: 5
|
||||||
|
notes: Shared text encoder for all Wan2.2 models (5B and 14B), FP8 quantized
|
||||||
|
files:
|
||||||
|
- source: umt5_xxl_fp8_e4m3fn_scaled.safetensors
|
||||||
|
dest: text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors
|
||||||
|
animatediff_models:
|
||||||
|
- repo_id: guoyww/animatediff
|
||||||
|
description: AnimateDiff Motion Modules
|
||||||
|
size_gb: 2
|
||||||
|
essential: true
|
||||||
|
category: animatediff
|
||||||
|
filename: mm_sd_v15
|
||||||
|
format: safetensors
|
||||||
|
vram_gb: 4
|
||||||
|
notes: Motion modules for AnimateDiff text-to-video
|
||||||
|
files:
|
||||||
|
- source: mm_sd_v15_v2.ckpt
|
||||||
|
dest: animatediff_models/mm_sd_v15_v2.ckpt
|
||||||
|
controlnet_models:
|
||||||
|
- repo_id: lllyasviel/control_v11p_sd15_canny
|
||||||
|
description: ControlNet Canny - Edge detection control for SD 1.5
|
||||||
|
size_gb: 1.5
|
||||||
|
essential: false
|
||||||
|
category: controlnet
|
||||||
|
format: safetensors
|
||||||
|
vram_gb: 2
|
||||||
|
notes: Precise edge-based composition control
|
||||||
|
files:
|
||||||
|
- source: diffusion_pytorch_model.safetensors
|
||||||
|
dest: controlnet/control_v11p_sd15_canny.safetensors
|
||||||
|
- repo_id: lllyasviel/control_v11f1p_sd15_depth
|
||||||
|
description: ControlNet Depth - Depth map control for SD 1.5
|
||||||
|
size_gb: 1.5
|
||||||
|
essential: false
|
||||||
|
category: controlnet
|
||||||
|
format: safetensors
|
||||||
|
vram_gb: 2
|
||||||
|
notes: Depth-based spatial control
|
||||||
|
files:
|
||||||
|
- source: diffusion_pytorch_model.safetensors
|
||||||
|
dest: controlnet/control_v11p_sd15_depth.safetensors
|
||||||
|
- repo_id: diffusers/controlnet-canny-sdxl-1.0
|
||||||
|
description: ControlNet Canny SDXL - Edge detection for SDXL
|
||||||
|
size_gb: 2.5
|
||||||
|
essential: false
|
||||||
|
category: controlnet
|
||||||
|
format: safetensors
|
||||||
|
vram_gb: 3
|
||||||
|
notes: Canny edge control for SDXL models
|
||||||
|
files:
|
||||||
|
- source: diffusion_pytorch_model.safetensors
|
||||||
|
dest: controlnet/controlnet-canny-sdxl-1.0.safetensors
|
||||||
|
- repo_id: diffusers/controlnet-depth-sdxl-1.0
|
||||||
|
description: ControlNet Depth SDXL - Depth map for SDXL
|
||||||
|
size_gb: 2.5
|
||||||
|
essential: false
|
||||||
|
category: controlnet
|
||||||
|
format: safetensors
|
||||||
|
vram_gb: 3
|
||||||
|
notes: Depth control for SDXL models
|
||||||
|
files:
|
||||||
|
- source: diffusion_pytorch_model.safetensors
|
||||||
|
dest: controlnet/controlnet-depth-sdxl-1.0.safetensors
|
||||||
|
ipadapter_models:
|
||||||
|
- repo_id: h94/IP-Adapter
|
||||||
|
description: IP-Adapter SDXL Base - Style & Composition
|
||||||
|
size_gb: 1.3
|
||||||
|
essential: true
|
||||||
|
category: ipadapter
|
||||||
|
format: safetensors
|
||||||
|
vram_gb: 4
|
||||||
|
notes: Basic IP-Adapter for SDXL
|
||||||
|
files:
|
||||||
|
- source: sdxl_models/ip-adapter_sdxl.safetensors
|
||||||
|
dest: ipadapter/ip-adapter_sdxl.safetensors
|
||||||
|
- repo_id: h94/IP-Adapter
|
||||||
|
description: IP-Adapter SDXL VIT-H - For CLIP-ViT-H
|
||||||
|
size_gb: 0.9
|
||||||
|
essential: true
|
||||||
|
category: ipadapter
|
||||||
|
format: safetensors
|
||||||
|
vram_gb: 4
|
||||||
|
notes: IP-Adapter for SDXL with VIT-H CLIP vision model
|
||||||
|
files:
|
||||||
|
- source: sdxl_models/ip-adapter_sdxl_vit-h.safetensors
|
||||||
|
dest: ipadapter/ip-adapter_sdxl_vit-h.safetensors
|
||||||
|
- repo_id: h94/IP-Adapter
|
||||||
|
description: IP-Adapter SDXL Plus - High Strength Composition
|
||||||
|
size_gb: 0.9
|
||||||
|
essential: false
|
||||||
|
category: ipadapter
|
||||||
|
format: safetensors
|
||||||
|
vram_gb: 4
|
||||||
|
notes: Enhanced composition control with higher strength
|
||||||
|
files:
|
||||||
|
- source: sdxl_models/ip-adapter-plus_sdxl_vit-h.safetensors
|
||||||
|
dest: ipadapter/ip-adapter-plus_sdxl_vit-h.safetensors
|
||||||
|
- repo_id: h94/IP-Adapter
|
||||||
|
description: IP-Adapter SDXL Plus Face - Face-focused generation
|
||||||
|
size_gb: 0.5
|
||||||
|
essential: false
|
||||||
|
category: ipadapter
|
||||||
|
format: safetensors
|
||||||
|
vram_gb: 4
|
||||||
|
notes: Specialized for face transfer and portrait generation
|
||||||
|
files:
|
||||||
|
- source: sdxl_models/ip-adapter-plus-face_sdxl_vit-h.safetensors
|
||||||
|
dest: ipadapter/ip-adapter-plus-face_sdxl_vit-h.safetensors
|
||||||
|
diffrhythm_models:
|
||||||
|
- repo_id: ASLP-lab/DiffRhythm-1_2
|
||||||
|
description: DiffRhythm 1.2 - 95 second generation model
|
||||||
|
size_gb: 2
|
||||||
|
essential: true
|
||||||
|
category: diffrhythm
|
||||||
|
format: pt
|
||||||
|
vram_gb: 12
|
||||||
|
duration_seconds: 95
|
||||||
|
notes: Latest 95-second generation model
|
||||||
|
files:
|
||||||
|
- source: cfm_model.pt
|
||||||
|
dest: TTS/DiffRhythm/cfm_model_v1_2.pt
|
||||||
|
- repo_id: ASLP-lab/DiffRhythm-full
|
||||||
|
description: DiffRhythm Full - 4m45s full-length generation
|
||||||
|
size_gb: 2
|
||||||
|
essential: false
|
||||||
|
category: diffrhythm
|
||||||
|
format: pt
|
||||||
|
vram_gb: 16
|
||||||
|
duration_seconds: 285
|
||||||
|
notes: Full-length 4 minute 45 second music generation
|
||||||
|
files:
|
||||||
|
- source: cfm_model.pt
|
||||||
|
dest: TTS/DiffRhythm/cfm_full_model.pt
|
||||||
|
- repo_id: ASLP-lab/DiffRhythm-base
|
||||||
|
description: DiffRhythm Base - 95 second base model
|
||||||
|
size_gb: 2
|
||||||
|
essential: false
|
||||||
|
category: diffrhythm
|
||||||
|
format: pt
|
||||||
|
vram_gb: 12
|
||||||
|
duration_seconds: 95
|
||||||
|
notes: Base 95-second model
|
||||||
|
files:
|
||||||
|
- source: cfm_model.pt
|
||||||
|
dest: TTS/DiffRhythm/cfm_model.pt
|
||||||
|
- repo_id: ASLP-lab/DiffRhythm-vae
|
||||||
|
description: DiffRhythm VAE - Variational autoencoder
|
||||||
|
size_gb: 1
|
||||||
|
essential: true
|
||||||
|
category: diffrhythm
|
||||||
|
format: pt
|
||||||
|
vram_gb: 2
|
||||||
|
notes: VAE component fine-tuned from Stable Audio Open (Stability AI Community
|
||||||
|
License)
|
||||||
|
files:
|
||||||
|
- source: vae_model.pt
|
||||||
|
dest: TTS/DiffRhythm/vae_model.pt
|
||||||
|
- repo_id: OpenMuQ/MuQ-MuLan-large
|
||||||
|
description: MuQ-MuLan-large - Music-text joint embedding (~700M parameters)
|
||||||
|
size_gb: 3
|
||||||
|
essential: true
|
||||||
|
category: diffrhythm
|
||||||
|
format: bin
|
||||||
|
vram_gb: 4
|
||||||
|
notes: Music-text joint embedding for semantic understanding (English/Chinese)
|
||||||
|
files:
|
||||||
|
- source: config.json
|
||||||
|
dest: TTS/DiffRhythm/MuQ-MuLan-large/config.json
|
||||||
|
- source: pytorch_model.bin
|
||||||
|
dest: TTS/DiffRhythm/MuQ-MuLan-large/pytorch_model.bin
|
||||||
|
- repo_id: OpenMuQ/MuQ-large-msd-iter
|
||||||
|
description: MuQ-large-msd-iter - Music representation learning (~300M parameters)
|
||||||
|
size_gb: 1.2
|
||||||
|
essential: true
|
||||||
|
category: diffrhythm
|
||||||
|
format: safetensors
|
||||||
|
vram_gb: 2
|
||||||
|
notes: Music representation model trained on Million Song Dataset
|
||||||
|
files:
|
||||||
|
- source: config.json
|
||||||
|
dest: TTS/DiffRhythm/MuQ-large-msd-iter/config.json
|
||||||
|
- source: model.safetensors
|
||||||
|
dest: TTS/DiffRhythm/MuQ-large-msd-iter/model.safetensors
|
||||||
|
- repo_id: FacebookAI/xlm-roberta-base
|
||||||
|
description: XLM-RoBERTa Base - Multilingual text encoder (100 languages, 0.3B
|
||||||
|
params)
|
||||||
|
size_gb: 1.1
|
||||||
|
essential: true
|
||||||
|
category: diffrhythm
|
||||||
|
format: safetensors
|
||||||
|
vram_gb: 1
|
||||||
|
notes: Multilingual text encoding for 100 languages
|
||||||
|
files:
|
||||||
|
- source: config.json
|
||||||
|
dest: TTS/DiffRhythm/xlm-roberta-base/config.json
|
||||||
|
- source: model.safetensors
|
||||||
|
dest: TTS/DiffRhythm/xlm-roberta-base/model.safetensors
|
||||||
|
- source: sentencepiece.bpe.model
|
||||||
|
dest: TTS/DiffRhythm/xlm-roberta-base/sentencepiece.bpe.model
|
||||||
|
- source: tokenizer.json
|
||||||
|
dest: TTS/DiffRhythm/xlm-roberta-base/tokenizer.json
|
||||||
|
- source: tokenizer_config.json
|
||||||
|
dest: TTS/DiffRhythm/xlm-roberta-base/tokenizer_config.json
|
||||||
|
storage_requirements:
|
||||||
|
essential_only:
|
||||||
|
image: 30
|
||||||
|
video: 28
|
||||||
|
audio: 11
|
||||||
|
support: 11
|
||||||
|
diffrhythm: 10
|
||||||
|
total: 90
|
||||||
|
all_models:
|
||||||
|
image: 54
|
||||||
|
video: 36
|
||||||
|
audio: 36
|
||||||
|
support: 11
|
||||||
|
diffrhythm: 12
|
||||||
|
total: 149
|
||||||
|
vram_requirements:
|
||||||
|
simultaneous_loadable:
|
||||||
|
- name: Image Focus - FLUX FP16
|
||||||
|
models:
|
||||||
|
- FLUX.1 Schnell
|
||||||
|
vram_used: 23
|
||||||
|
remaining: 1
|
||||||
|
- name: Image Focus - FLUX FP8 + SDXL
|
||||||
|
models:
|
||||||
|
- FLUX.1 Schnell FP8
|
||||||
|
- SDXL Base
|
||||||
|
vram_used: 24
|
||||||
|
remaining: 0
|
||||||
|
- name: Video Generation
|
||||||
|
models:
|
||||||
|
- CogVideoX-5B optimized
|
||||||
|
- SDXL
|
||||||
|
vram_used: 24
|
||||||
|
remaining: 0
|
||||||
|
- name: Multi-Modal
|
||||||
|
models:
|
||||||
|
- SDXL
|
||||||
|
- MusicGen Medium
|
||||||
|
vram_used: 20
|
||||||
|
remaining: 4
|
||||||
|
installation_profiles:
|
||||||
|
minimal:
|
||||||
|
description: Minimal setup for testing
|
||||||
|
categories:
|
||||||
|
- support_models
|
||||||
|
storage_gb: 11
|
||||||
|
estimated_time: 5-10 minutes
|
||||||
|
essential:
|
||||||
|
description: Essential models only (~80GB)
|
||||||
|
categories:
|
||||||
|
- image_models
|
||||||
|
- video_models
|
||||||
|
- audio_models
|
||||||
|
- support_models
|
||||||
|
essential_only: true
|
||||||
|
storage_gb: 80
|
||||||
|
estimated_time: 1-2 hours
|
||||||
|
image_focused:
|
||||||
|
description: All image generation models
|
||||||
|
categories:
|
||||||
|
- image_models
|
||||||
|
- support_models
|
||||||
|
storage_gb: 65
|
||||||
|
estimated_time: 45-90 minutes
|
||||||
|
video_focused:
|
||||||
|
description: All video generation models
|
||||||
|
categories:
|
||||||
|
- video_models
|
||||||
|
- image_models
|
||||||
|
- support_models
|
||||||
|
essential_only: true
|
||||||
|
storage_gb: 69
|
||||||
|
estimated_time: 1-2 hours
|
||||||
|
complete:
|
||||||
|
description: All models (including optional)
|
||||||
|
categories:
|
||||||
|
- image_models
|
||||||
|
- video_models
|
||||||
|
- audio_models
|
||||||
|
- support_models
|
||||||
|
storage_gb: 137
|
||||||
|
estimated_time: 2-4 hours
|
||||||
|
metadata:
|
||||||
|
version: 1.0.0
|
||||||
|
last_updated: 2025-11-21
|
||||||
|
compatible_with:
|
||||||
|
- ComfyUI >= 0.1.0
|
||||||
|
- Python >= 3.10
|
||||||
|
- HuggingFace Hub >= 0.20.0
|
||||||
|
maintainer: Valknar
|
||||||
|
repository: https://github.com/yourusername/runpod
|
||||||
126
models_huggingface_vllm.yaml
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
# ============================================================================
|
||||||
|
# vLLM Model Configuration
|
||||||
|
# ============================================================================
|
||||||
|
#
|
||||||
|
# This configuration file defines all available vLLM models for download.
|
||||||
|
# Models are organized by category: text generation and text embeddings.
|
||||||
|
#
|
||||||
|
# Each model entry contains:
|
||||||
|
# - repo_id: HuggingFace repository identifier
|
||||||
|
# - description: Human-readable description
|
||||||
|
# - size_gb: Approximate size in gigabytes
|
||||||
|
# - essential: Whether this is an essential model (true/false)
|
||||||
|
# - category: Model category (text_generation/embedding)
|
||||||
|
#
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
# Global settings
|
||||||
|
settings:
|
||||||
|
cache_dir: /workspace/huggingface_cache
|
||||||
|
parallel_downloads: 1
|
||||||
|
retry_attempts: 3
|
||||||
|
timeout_seconds: 3600
|
||||||
|
|
||||||
|
# Model categories
|
||||||
|
model_categories:
|
||||||
|
# ==========================================================================
|
||||||
|
# TEXT GENERATION MODELS (vLLM)
|
||||||
|
# ==========================================================================
|
||||||
|
text_generation_models:
|
||||||
|
- repo_id: Qwen/Qwen2.5-7B-Instruct
|
||||||
|
description: Qwen 2.5 7B Instruct - Advanced multilingual reasoning
|
||||||
|
size_gb: 14
|
||||||
|
essential: true
|
||||||
|
category: text_generation
|
||||||
|
type: vllm
|
||||||
|
format: safetensors
|
||||||
|
vram_gb: 14
|
||||||
|
context_length: 32768
|
||||||
|
notes: Latest Qwen 2.5 model with enhanced reasoning capabilities
|
||||||
|
files:
|
||||||
|
- source: "model.safetensors"
|
||||||
|
dest: "model.safetensors"
|
||||||
|
|
||||||
|
- repo_id: meta-llama/Llama-3.1-8B-Instruct
|
||||||
|
description: Llama 3.1 8B Instruct - Meta's latest instruction-tuned model
|
||||||
|
size_gb: 17
|
||||||
|
essential: true
|
||||||
|
category: text_generation
|
||||||
|
type: vllm
|
||||||
|
format: safetensors
|
||||||
|
vram_gb: 17
|
||||||
|
context_length: 131072
|
||||||
|
notes: Extended 128K context length, excellent for long-form tasks
|
||||||
|
files:
|
||||||
|
- source: "model.safetensors"
|
||||||
|
dest: "model.safetensors"
|
||||||
|
|
||||||
|
# ==========================================================================
|
||||||
|
# TEXT EMBEDDING MODELS (vLLM)
|
||||||
|
# ==========================================================================
|
||||||
|
embedding_models:
|
||||||
|
- repo_id: BAAI/bge-large-en-v1.5
|
||||||
|
description: BGE Large English v1.5 - High-quality embeddings for RAG
|
||||||
|
size_gb: 1.3
|
||||||
|
essential: true
|
||||||
|
category: embedding
|
||||||
|
type: vllm_embedding
|
||||||
|
format: safetensors
|
||||||
|
vram_gb: 3
|
||||||
|
embedding_dimensions: 1024
|
||||||
|
max_tokens: 512
|
||||||
|
notes: Top-tier MTEB scores, excellent for semantic search and RAG applications
|
||||||
|
files:
|
||||||
|
- source: "model.safetensors"
|
||||||
|
dest: "model.safetensors"
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# STORAGE & VRAM SUMMARIES
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
storage_requirements:
|
||||||
|
text_generation: 31 # Qwen 2.5 7B + Llama 3.1 8B
|
||||||
|
embedding: 1.3 # BGE Large
|
||||||
|
total: 32.3 # Total essential storage
|
||||||
|
|
||||||
|
vram_requirements:
|
||||||
|
# For 24GB GPU (RTX 4090)
|
||||||
|
simultaneous_loadable:
|
||||||
|
- name: Qwen 2.5 7B Only
|
||||||
|
models: [Qwen 2.5 7B Instruct]
|
||||||
|
vram_used: 14
|
||||||
|
remaining: 10
|
||||||
|
|
||||||
|
- name: Llama 3.1 8B Only
|
||||||
|
models: [Llama 3.1 8B Instruct]
|
||||||
|
vram_used: 17
|
||||||
|
remaining: 7
|
||||||
|
|
||||||
|
- name: BGE Large Only
|
||||||
|
models: [BGE Large]
|
||||||
|
vram_used: 3
|
||||||
|
remaining: 21
|
||||||
|
|
||||||
|
- name: Qwen + BGE Embedding
|
||||||
|
models: [Qwen 2.5 7B, BGE Large]
|
||||||
|
vram_used: 17
|
||||||
|
remaining: 7
|
||||||
|
|
||||||
|
- name: Llama + BGE Embedding
|
||||||
|
models: [Llama 3.1 8B, BGE Large]
|
||||||
|
vram_used: 20
|
||||||
|
remaining: 4
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# METADATA
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
metadata:
|
||||||
|
version: 1.0.0
|
||||||
|
last_updated: 2025-11-25
|
||||||
|
compatible_with:
|
||||||
|
- vLLM >= 0.6.0
|
||||||
|
- Python >= 3.10
|
||||||
|
- HuggingFace Hub >= 0.20.0
|
||||||
|
maintainer: Valknar
|
||||||
|
repository: https://github.com/yourusername/runpod
|
||||||
@@ -1,38 +0,0 @@
|
|||||||
FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
# Install Python and system dependencies
|
|
||||||
RUN apt-get update && apt-get install -y \
|
|
||||||
python3.10 \
|
|
||||||
python3-pip \
|
|
||||||
ffmpeg \
|
|
||||||
git \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Upgrade pip
|
|
||||||
RUN pip3 install --no-cache-dir --upgrade pip
|
|
||||||
|
|
||||||
# Install PyTorch with CUDA support
|
|
||||||
RUN pip3 install --no-cache-dir torch==2.1.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu121
|
|
||||||
|
|
||||||
# Copy requirements and install dependencies
|
|
||||||
COPY requirements.txt .
|
|
||||||
RUN pip3 install --no-cache-dir -r requirements.txt
|
|
||||||
|
|
||||||
# Copy application code
|
|
||||||
COPY server.py .
|
|
||||||
|
|
||||||
# Create directory for model cache
|
|
||||||
RUN mkdir -p /app/models
|
|
||||||
|
|
||||||
# Environment variables
|
|
||||||
ENV HF_HOME=/app/models
|
|
||||||
ENV TORCH_HOME=/app/models
|
|
||||||
ENV MODEL_NAME=facebook/musicgen-medium
|
|
||||||
|
|
||||||
# Expose port
|
|
||||||
EXPOSE 8000
|
|
||||||
|
|
||||||
# Run the server
|
|
||||||
CMD ["python3", "server.py"]
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
torch==2.1.0
|
|
||||||
torchaudio==2.1.0
|
|
||||||
audiocraft==1.3.0
|
|
||||||
fastapi==0.104.1
|
|
||||||
uvicorn[standard]==0.24.0
|
|
||||||
pydantic==2.5.0
|
|
||||||
@@ -1,194 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
MusicGen API Server
|
|
||||||
OpenAI-compatible API for music generation using Meta's MusicGen
|
|
||||||
|
|
||||||
Endpoints:
|
|
||||||
- POST /v1/audio/generations - Generate music from text prompt
|
|
||||||
- GET /health - Health check
|
|
||||||
- GET / - Service info
|
|
||||||
"""
|
|
||||||
|
|
||||||
import base64
|
|
||||||
import io
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import tempfile
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import torch
|
|
||||||
import torchaudio
|
|
||||||
from audiocraft.models import MusicGen
|
|
||||||
from fastapi import FastAPI, HTTPException
|
|
||||||
from fastapi.responses import JSONResponse
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
||||||
)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# FastAPI app
|
|
||||||
app = FastAPI(title="MusicGen API Server", version="1.0.0")
|
|
||||||
|
|
||||||
# Global model instance
|
|
||||||
model: Optional[MusicGen] = None
|
|
||||||
model_name: str = os.getenv("MODEL_NAME", "facebook/musicgen-medium")
|
|
||||||
device: str = "cuda" if torch.cuda.is_available() else "cpu"
|
|
||||||
|
|
||||||
|
|
||||||
class AudioGenerationRequest(BaseModel):
|
|
||||||
"""Music generation request"""
|
|
||||||
model: str = Field(default="musicgen-medium", description="Model name")
|
|
||||||
prompt: str = Field(..., description="Text description of the music to generate")
|
|
||||||
duration: float = Field(default=30.0, ge=1.0, le=30.0, description="Duration in seconds")
|
|
||||||
temperature: float = Field(default=1.0, ge=0.1, le=2.0, description="Sampling temperature")
|
|
||||||
top_k: int = Field(default=250, ge=0, le=500, description="Top-k sampling")
|
|
||||||
top_p: float = Field(default=0.0, ge=0.0, le=1.0, description="Top-p (nucleus) sampling")
|
|
||||||
cfg_coef: float = Field(default=3.0, ge=1.0, le=15.0, description="Classifier-free guidance coefficient")
|
|
||||||
response_format: str = Field(default="wav", description="Audio format (wav or mp3)")
|
|
||||||
|
|
||||||
|
|
||||||
class AudioGenerationResponse(BaseModel):
|
|
||||||
"""Music generation response"""
|
|
||||||
audio: str = Field(..., description="Base64-encoded audio data")
|
|
||||||
format: str = Field(..., description="Audio format (wav or mp3)")
|
|
||||||
duration: float = Field(..., description="Duration in seconds")
|
|
||||||
sample_rate: int = Field(..., description="Sample rate in Hz")
|
|
||||||
|
|
||||||
|
|
||||||
@app.on_event("startup")
|
|
||||||
async def startup_event():
|
|
||||||
"""Load MusicGen model on startup"""
|
|
||||||
global model
|
|
||||||
|
|
||||||
logger.info(f"Loading MusicGen model: {model_name}")
|
|
||||||
logger.info(f"Device: {device}")
|
|
||||||
|
|
||||||
# Load model
|
|
||||||
model = MusicGen.get_pretrained(model_name, device=device)
|
|
||||||
|
|
||||||
logger.info(f"MusicGen model loaded successfully")
|
|
||||||
logger.info(f"Max duration: 30 seconds at 32kHz")
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
|
||||||
async def root():
|
|
||||||
"""Root endpoint"""
|
|
||||||
return {
|
|
||||||
"service": "MusicGen API Server",
|
|
||||||
"model": model_name,
|
|
||||||
"device": device,
|
|
||||||
"max_duration": 30.0,
|
|
||||||
"sample_rate": 32000
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
|
||||||
async def health():
|
|
||||||
"""Health check endpoint"""
|
|
||||||
return {
|
|
||||||
"status": "healthy" if model else "initializing",
|
|
||||||
"model": model_name,
|
|
||||||
"device": device,
|
|
||||||
"ready": model is not None,
|
|
||||||
"gpu_available": torch.cuda.is_available()
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/audio/generations")
|
|
||||||
async def generate_audio(request: AudioGenerationRequest) -> AudioGenerationResponse:
|
|
||||||
"""Generate music from text prompt"""
|
|
||||||
if not model:
|
|
||||||
raise HTTPException(status_code=503, detail="Model not initialized")
|
|
||||||
|
|
||||||
logger.info(f"Generating music: {request.prompt[:100]}...")
|
|
||||||
logger.info(f"Duration: {request.duration}s, Temperature: {request.temperature}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Set generation parameters
|
|
||||||
model.set_generation_params(
|
|
||||||
duration=request.duration,
|
|
||||||
temperature=request.temperature,
|
|
||||||
top_k=request.top_k,
|
|
||||||
top_p=request.top_p,
|
|
||||||
cfg_coef=request.cfg_coef,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Generate audio
|
|
||||||
descriptions = [request.prompt]
|
|
||||||
with torch.no_grad():
|
|
||||||
wav = model.generate(descriptions)
|
|
||||||
|
|
||||||
# wav shape: [batch_size, channels, samples]
|
|
||||||
# Extract first batch item
|
|
||||||
audio_data = wav[0].cpu() # [channels, samples]
|
|
||||||
|
|
||||||
# Get sample rate
|
|
||||||
sample_rate = model.sample_rate
|
|
||||||
|
|
||||||
# Save to temporary file
|
|
||||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
|
||||||
temp_path = temp_file.name
|
|
||||||
torchaudio.save(temp_path, audio_data, sample_rate)
|
|
||||||
|
|
||||||
# Read audio file and encode to base64
|
|
||||||
with open(temp_path, 'rb') as f:
|
|
||||||
audio_bytes = f.read()
|
|
||||||
|
|
||||||
# Clean up temporary file
|
|
||||||
os.unlink(temp_path)
|
|
||||||
|
|
||||||
# Encode to base64
|
|
||||||
audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
|
|
||||||
|
|
||||||
logger.info(f"Generated {request.duration}s of audio")
|
|
||||||
|
|
||||||
return AudioGenerationResponse(
|
|
||||||
audio=audio_base64,
|
|
||||||
format="wav",
|
|
||||||
duration=request.duration,
|
|
||||||
sample_rate=sample_rate
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error generating audio: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/v1/models")
|
|
||||||
async def list_models():
|
|
||||||
"""List available models (OpenAI-compatible)"""
|
|
||||||
return {
|
|
||||||
"object": "list",
|
|
||||||
"data": [
|
|
||||||
{
|
|
||||||
"id": "musicgen-medium",
|
|
||||||
"object": "model",
|
|
||||||
"created": 1234567890,
|
|
||||||
"owned_by": "meta",
|
|
||||||
"permission": [],
|
|
||||||
"root": model_name,
|
|
||||||
"parent": None,
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import uvicorn
|
|
||||||
|
|
||||||
host = os.getenv("HOST", "0.0.0.0")
|
|
||||||
port = int(os.getenv("PORT", "8000"))
|
|
||||||
|
|
||||||
logger.info(f"Starting MusicGen API server on {host}:{port}")
|
|
||||||
|
|
||||||
uvicorn.run(
|
|
||||||
app,
|
|
||||||
host=host,
|
|
||||||
port=port,
|
|
||||||
log_level="info",
|
|
||||||
access_log=True,
|
|
||||||
)
|
|
||||||
108
scripts/bootstrap-venvs.sh
Executable file
@@ -0,0 +1,108 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Virtual Environment Health Check and Bootstrap Script
|
||||||
|
# Checks if Python venvs are compatible with current Python version
|
||||||
|
# Rebuilds venvs if needed
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
echo "=== Python Virtual Environment Health Check ==="
|
||||||
|
|
||||||
|
# Get current system Python version
|
||||||
|
SYSTEM_PYTHON=$(python3 --version | awk '{print $2}')
|
||||||
|
SYSTEM_PYTHON_MAJOR_MINOR=$(echo "$SYSTEM_PYTHON" | cut -d'.' -f1,2)
|
||||||
|
|
||||||
|
echo "System Python: $SYSTEM_PYTHON ($SYSTEM_PYTHON_MAJOR_MINOR)"
|
||||||
|
|
||||||
|
# List of venvs to check
|
||||||
|
VENVS=(
|
||||||
|
"/workspace/ai/vllm/venv"
|
||||||
|
"/workspace/ai/webdav-sync/venv"
|
||||||
|
"/workspace/ComfyUI/venv"
|
||||||
|
)
|
||||||
|
|
||||||
|
REBUILD_NEEDED=0
|
||||||
|
|
||||||
|
# Check each venv
|
||||||
|
for VENV_PATH in "${VENVS[@]}"; do
|
||||||
|
if [ ! -d "$VENV_PATH" ]; then
|
||||||
|
echo "⚠ venv not found: $VENV_PATH (will be created on first service start)"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
VENV_NAME=$(basename $(dirname "$VENV_PATH"))
|
||||||
|
echo ""
|
||||||
|
echo "Checking venv: $VENV_NAME ($VENV_PATH)"
|
||||||
|
|
||||||
|
# Check if venv Python executable works
|
||||||
|
if ! "$VENV_PATH/bin/python" --version >/dev/null 2>&1; then
|
||||||
|
echo " ❌ BROKEN - Python executable not working"
|
||||||
|
REBUILD_NEEDED=1
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Get venv Python version
|
||||||
|
VENV_PYTHON=$("$VENV_PATH/bin/python" --version 2>&1 | awk '{print $2}')
|
||||||
|
VENV_PYTHON_MAJOR_MINOR=$(echo "$VENV_PYTHON" | cut -d'.' -f1,2)
|
||||||
|
|
||||||
|
echo " venv Python: $VENV_PYTHON ($VENV_PYTHON_MAJOR_MINOR)"
|
||||||
|
|
||||||
|
# Compare major.minor versions
|
||||||
|
if [ "$SYSTEM_PYTHON_MAJOR_MINOR" != "$VENV_PYTHON_MAJOR_MINOR" ]; then
|
||||||
|
echo " ⚠ VERSION MISMATCH - System is $SYSTEM_PYTHON_MAJOR_MINOR, venv is $VENV_PYTHON_MAJOR_MINOR"
|
||||||
|
REBUILD_NEEDED=1
|
||||||
|
else
|
||||||
|
# Check if pip works
|
||||||
|
if ! "$VENV_PATH/bin/pip" --version >/dev/null 2>&1; then
|
||||||
|
echo " ❌ BROKEN - pip not working"
|
||||||
|
REBUILD_NEEDED=1
|
||||||
|
else
|
||||||
|
echo " ✓ HEALTHY"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# If any venv needs rebuild, warn the user
|
||||||
|
if [ $REBUILD_NEEDED -eq 1 ]; then
|
||||||
|
echo ""
|
||||||
|
echo "========================================"
|
||||||
|
echo " ⚠ WARNING: Some venvs need rebuilding"
|
||||||
|
echo "========================================"
|
||||||
|
echo ""
|
||||||
|
echo "One or more Python virtual environments are incompatible with the current"
|
||||||
|
echo "Python version or are broken. This can happen when:"
|
||||||
|
echo " - Docker image Python version changed"
|
||||||
|
echo " - venv files were corrupted"
|
||||||
|
echo " - Binary dependencies are incompatible"
|
||||||
|
echo ""
|
||||||
|
echo "RECOMMENDED ACTIONS:"
|
||||||
|
echo ""
|
||||||
|
echo "1. vLLM venv rebuild:"
|
||||||
|
echo " cd /workspace/ai/vllm"
|
||||||
|
echo " rm -rf venv"
|
||||||
|
echo " python3 -m venv venv"
|
||||||
|
echo " source venv/bin/activate"
|
||||||
|
echo " pip install -r requirements.txt"
|
||||||
|
echo ""
|
||||||
|
echo "2. ComfyUI venv rebuild:"
|
||||||
|
echo " cd /workspace/ComfyUI"
|
||||||
|
echo " rm -rf venv"
|
||||||
|
echo " python3 -m venv venv"
|
||||||
|
echo " source venv/bin/activate"
|
||||||
|
echo " pip install -r requirements.txt"
|
||||||
|
echo ""
|
||||||
|
echo "3. WebDAV sync venv rebuild (if used):"
|
||||||
|
echo " cd /workspace/ai/webdav-sync"
|
||||||
|
echo " rm -rf venv"
|
||||||
|
echo " python3 -m venv venv"
|
||||||
|
echo " source venv/bin/activate"
|
||||||
|
echo " pip install -r requirements.txt"
|
||||||
|
echo ""
|
||||||
|
echo "Services may fail to start until venvs are rebuilt!"
|
||||||
|
echo "========================================"
|
||||||
|
echo ""
|
||||||
|
else
|
||||||
|
echo ""
|
||||||
|
echo "✓ All virtual environments are healthy"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit 0
|
||||||
@@ -1,302 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
#
|
|
||||||
# RunPod Template Preparation Script
|
|
||||||
# Prepares a RunPod instance for template creation
|
|
||||||
#
|
|
||||||
# This script:
|
|
||||||
# 1. Installs Docker & Docker Compose
|
|
||||||
# 2. Installs Tailscale
|
|
||||||
# 3. Builds all Docker images
|
|
||||||
# 4. Pre-downloads all models
|
|
||||||
# 5. Validates everything works
|
|
||||||
# 6. Cleans up for template creation
|
|
||||||
#
|
|
||||||
# Usage: ./prepare-template.sh
|
|
||||||
# Run this on the RunPod instance you want to save as a template
|
|
||||||
#
|
|
||||||
|
|
||||||
set -e # Exit on error
|
|
||||||
|
|
||||||
# Colors for output
|
|
||||||
RED='\033[0;31m'
|
|
||||||
GREEN='\033[0;32m'
|
|
||||||
YELLOW='\033[1;33m'
|
|
||||||
BLUE='\033[0;34m'
|
|
||||||
NC='\033[0m' # No Color
|
|
||||||
|
|
||||||
# Logging functions
|
|
||||||
log_info() {
|
|
||||||
echo -e "${BLUE}[INFO]${NC} $1"
|
|
||||||
}
|
|
||||||
|
|
||||||
log_success() {
|
|
||||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
|
||||||
}
|
|
||||||
|
|
||||||
log_warn() {
|
|
||||||
echo -e "${YELLOW}[WARN]${NC} $1"
|
|
||||||
}
|
|
||||||
|
|
||||||
log_error() {
|
|
||||||
echo -e "${RED}[ERROR]${NC} $1"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Check if running on RunPod
|
|
||||||
check_environment() {
|
|
||||||
log_info "Checking environment..."
|
|
||||||
|
|
||||||
if ! nvidia-smi &> /dev/null; then
|
|
||||||
log_error "NVIDIA GPU not detected. Are you running on a GPU instance?"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -d "/workspace" ]; then
|
|
||||||
log_warn "/workspace directory not found. Creating it..."
|
|
||||||
mkdir -p /workspace
|
|
||||||
fi
|
|
||||||
|
|
||||||
log_success "Environment check passed"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Install Docker
|
|
||||||
install_docker() {
|
|
||||||
if command -v docker &> /dev/null; then
|
|
||||||
log_info "Docker already installed: $(docker --version)"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
|
|
||||||
log_info "Installing Docker..."
|
|
||||||
curl -fsSL https://get.docker.com -o get-docker.sh
|
|
||||||
sh get-docker.sh
|
|
||||||
rm get-docker.sh
|
|
||||||
|
|
||||||
# Start Docker
|
|
||||||
systemctl start docker || service docker start
|
|
||||||
systemctl enable docker || true
|
|
||||||
|
|
||||||
log_success "Docker installed: $(docker --version)"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Install Docker Compose
|
|
||||||
install_docker_compose() {
|
|
||||||
if docker compose version &> /dev/null; then
|
|
||||||
log_info "Docker Compose already installed: $(docker compose version)"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
|
|
||||||
log_info "Installing Docker Compose..."
|
|
||||||
|
|
||||||
# Docker Compose is usually bundled with Docker now
|
|
||||||
# If not, install it separately
|
|
||||||
if ! docker compose version &> /dev/null; then
|
|
||||||
DOCKER_COMPOSE_VERSION="v2.23.0"
|
|
||||||
curl -L "https://github.com/docker/compose/releases/download/${DOCKER_COMPOSE_VERSION}/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
|
|
||||||
chmod +x /usr/local/bin/docker-compose
|
|
||||||
fi
|
|
||||||
|
|
||||||
log_success "Docker Compose installed: $(docker compose version)"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Install Tailscale
|
|
||||||
install_tailscale() {
|
|
||||||
if command -v tailscale &> /dev/null; then
|
|
||||||
log_info "Tailscale already installed: $(tailscale version)"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
|
|
||||||
log_info "Installing Tailscale..."
|
|
||||||
curl -fsSL https://tailscale.com/install.sh | sh
|
|
||||||
|
|
||||||
log_success "Tailscale installed: $(tailscale version)"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Build Docker images
|
|
||||||
build_docker_images() {
|
|
||||||
log_info "Building Docker images..."
|
|
||||||
|
|
||||||
cd /workspace/ai
|
|
||||||
|
|
||||||
# Build orchestrator
|
|
||||||
log_info "Building orchestrator..."
|
|
||||||
docker compose -f compose.yaml build orchestrator
|
|
||||||
|
|
||||||
# Build vLLM
|
|
||||||
log_info "Building vLLM..."
|
|
||||||
docker compose -f compose.yaml build vllm-qwen
|
|
||||||
|
|
||||||
# Build MusicGen
|
|
||||||
log_info "Building MusicGen..."
|
|
||||||
docker compose -f compose.yaml build musicgen
|
|
||||||
|
|
||||||
# Pull Flux image (pre-built)
|
|
||||||
log_info "Pulling Flux.1 image..."
|
|
||||||
docker pull ghcr.io/matatonic/openedai-images-flux:latest
|
|
||||||
|
|
||||||
log_success "All Docker images built"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Pre-download models
|
|
||||||
download_models() {
|
|
||||||
log_info "Pre-downloading AI models (this will take 30-45 minutes)..."
|
|
||||||
|
|
||||||
cd /workspace/ai
|
|
||||||
|
|
||||||
# Create model cache directories
|
|
||||||
mkdir -p /workspace/huggingface_cache
|
|
||||||
mkdir -p /workspace/flux/models
|
|
||||||
mkdir -p /workspace/musicgen/models
|
|
||||||
|
|
||||||
# Download Qwen 2.5 7B
|
|
||||||
log_info "Downloading Qwen 2.5 7B (14GB)..."
|
|
||||||
docker compose --profile text up -d vllm-qwen
|
|
||||||
|
|
||||||
# Wait for model to download
|
|
||||||
log_info "Waiting for Qwen model to download..."
|
|
||||||
while ! docker logs ai_vllm-qwen_1 2>&1 | grep -q "Model loaded successfully\|AsyncLLMEngine initialized"; do
|
|
||||||
echo -n "."
|
|
||||||
sleep 10
|
|
||||||
done
|
|
||||||
echo ""
|
|
||||||
log_success "Qwen 2.5 7B downloaded"
|
|
||||||
|
|
||||||
docker compose stop vllm-qwen
|
|
||||||
|
|
||||||
# Download Flux.1 Schnell
|
|
||||||
log_info "Downloading Flux.1 Schnell (12GB)..."
|
|
||||||
docker compose --profile image up -d flux
|
|
||||||
|
|
||||||
log_info "Waiting for Flux model to download..."
|
|
||||||
sleep 180 # Flux takes about 3 minutes to download and initialize
|
|
||||||
log_success "Flux.1 Schnell downloaded"
|
|
||||||
|
|
||||||
docker compose stop flux
|
|
||||||
|
|
||||||
# Download MusicGen Medium
|
|
||||||
log_info "Downloading MusicGen Medium (11GB)..."
|
|
||||||
docker compose --profile audio up -d musicgen
|
|
||||||
|
|
||||||
log_info "Waiting for MusicGen model to download..."
|
|
||||||
while ! docker logs ai_musicgen_1 2>&1 | grep -q "Model loaded successfully\|initialized successfully"; do
|
|
||||||
echo -n "."
|
|
||||||
sleep 10
|
|
||||||
done
|
|
||||||
echo ""
|
|
||||||
log_success "MusicGen Medium downloaded"
|
|
||||||
|
|
||||||
docker compose stop musicgen
|
|
||||||
|
|
||||||
log_success "All models downloaded and cached"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Validate installation
|
|
||||||
validate_installation() {
|
|
||||||
log_info "Validating installation..."
|
|
||||||
|
|
||||||
cd /workspace/ai
|
|
||||||
|
|
||||||
# Start orchestrator
|
|
||||||
log_info "Starting orchestrator for validation..."
|
|
||||||
docker compose -f compose.yaml up -d orchestrator
|
|
||||||
|
|
||||||
sleep 10
|
|
||||||
|
|
||||||
# Check orchestrator health
|
|
||||||
if curl -s http://localhost:9000/health | grep -q "healthy\|ok"; then
|
|
||||||
log_success "Orchestrator is healthy"
|
|
||||||
else
|
|
||||||
log_error "Orchestrator health check failed"
|
|
||||||
docker logs ai_orchestrator
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Check models are cached
|
|
||||||
if [ -d "/workspace/huggingface_cache" ] && [ "$(ls -A /workspace/huggingface_cache)" ]; then
|
|
||||||
log_success "Hugging Face cache populated"
|
|
||||||
else
|
|
||||||
log_warn "Hugging Face cache may be empty"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Stop orchestrator
|
|
||||||
docker compose -f compose.yaml down
|
|
||||||
|
|
||||||
log_success "Validation passed"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Clean up for template creation
|
|
||||||
cleanup_for_template() {
|
|
||||||
log_info "Cleaning up for template creation..."
|
|
||||||
|
|
||||||
# Remove sensitive data
|
|
||||||
log_info "Removing sensitive files..."
|
|
||||||
rm -f /workspace/ai/.env
|
|
||||||
rm -f /root/.ssh/known_hosts
|
|
||||||
rm -f /root/.bash_history
|
|
||||||
rm -f /root/.python_history
|
|
||||||
|
|
||||||
# Clear logs
|
|
||||||
log_info "Clearing logs..."
|
|
||||||
find /var/log -type f -name "*.log" -delete 2>/dev/null || true
|
|
||||||
journalctl --vacuum-time=1s 2>/dev/null || true
|
|
||||||
|
|
||||||
# Logout from Tailscale
|
|
||||||
log_info "Logging out from Tailscale..."
|
|
||||||
tailscale logout 2>/dev/null || true
|
|
||||||
|
|
||||||
# Clean Docker (but keep images)
|
|
||||||
log_info "Cleaning Docker cache..."
|
|
||||||
docker system prune -af --volumes || true
|
|
||||||
|
|
||||||
# Create template marker
|
|
||||||
log_info "Creating template version marker..."
|
|
||||||
cat > /workspace/TEMPLATE_VERSION <<EOF
|
|
||||||
RunPod Multi-Modal AI Template
|
|
||||||
Version: 1.0
|
|
||||||
Created: $(date)
|
|
||||||
Components:
|
|
||||||
- Docker $(docker --version | cut -d' ' -f3)
|
|
||||||
- Docker Compose $(docker compose version --short)
|
|
||||||
- Tailscale $(tailscale version --short 2>/dev/null || echo "installed")
|
|
||||||
- Orchestrator (ai_orchestrator)
|
|
||||||
- Text Generation (vLLM + Qwen 2.5 7B)
|
|
||||||
- Image Generation (Flux.1 Schnell)
|
|
||||||
- Music Generation (MusicGen Medium)
|
|
||||||
Models Cached: ~37GB
|
|
||||||
EOF
|
|
||||||
|
|
||||||
log_success "Cleanup complete"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Main execution
|
|
||||||
main() {
|
|
||||||
log_info "======================================"
|
|
||||||
log_info "RunPod Template Preparation Script"
|
|
||||||
log_info "======================================"
|
|
||||||
log_info ""
|
|
||||||
|
|
||||||
check_environment
|
|
||||||
install_docker
|
|
||||||
install_docker_compose
|
|
||||||
install_tailscale
|
|
||||||
build_docker_images
|
|
||||||
download_models
|
|
||||||
validate_installation
|
|
||||||
cleanup_for_template
|
|
||||||
|
|
||||||
log_info ""
|
|
||||||
log_success "======================================"
|
|
||||||
log_success "Template Preparation Complete!"
|
|
||||||
log_success "======================================"
|
|
||||||
log_info ""
|
|
||||||
log_info "Next steps:"
|
|
||||||
log_info "1. Review /workspace/TEMPLATE_VERSION"
|
|
||||||
log_info "2. Go to RunPod Dashboard → My Pods"
|
|
||||||
log_info "3. Select this pod → ⋮ → Save as Template"
|
|
||||||
log_info "4. Name: multi-modal-ai-v1.0"
|
|
||||||
log_info "5. Test deployment from template"
|
|
||||||
log_info ""
|
|
||||||
log_info "Template will enable 2-3 minute deployments instead of 60-90 minutes!"
|
|
||||||
log_info ""
|
|
||||||
}
|
|
||||||
|
|
||||||
# Run main function
|
|
||||||
main "$@"
|
|
||||||
141
start.sh
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# RunPod container startup script
|
||||||
|
# This script initializes the container environment and starts all services
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
echo "========================================"
|
||||||
|
echo " RunPod AI Orchestrator - Starting"
|
||||||
|
echo "========================================"
|
||||||
|
|
||||||
|
# [1/7] Start SSH server (required by RunPod)
|
||||||
|
echo "[1/7] Starting SSH server..."
|
||||||
|
service ssh start
|
||||||
|
echo " ✓ SSH server started"
|
||||||
|
|
||||||
|
# [2/7] Add /workspace/bin to PATH for arty and custom scripts
|
||||||
|
echo "[2/7] Configuring PATH..."
|
||||||
|
export PATH="/workspace/bin:$PATH"
|
||||||
|
echo " ✓ PATH updated: /workspace/bin added"
|
||||||
|
|
||||||
|
# [3/7] Source environment variables from network volume
|
||||||
|
echo "[3/7] Loading environment from network volume..."
|
||||||
|
if [ -f /workspace/ai/.env ]; then
|
||||||
|
set -a
|
||||||
|
source /workspace/ai/.env
|
||||||
|
set +a
|
||||||
|
echo " ✓ Environment loaded from /workspace/ai/.env"
|
||||||
|
else
|
||||||
|
echo " ⚠ No .env file found at /workspace/ai/.env"
|
||||||
|
echo " Some services may not function correctly without environment variables"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# [4/7] Configure and start Tailscale VPN
|
||||||
|
echo "[4/7] Configuring Tailscale VPN..."
|
||||||
|
if [ -n "${TAILSCALE_AUTHKEY:-}" ]; then
|
||||||
|
echo " Starting Tailscale daemon..."
|
||||||
|
tailscaled --tun=userspace-networking --socks5-server=localhost:1055 &
|
||||||
|
sleep 3
|
||||||
|
|
||||||
|
echo " Connecting to Tailscale network..."
|
||||||
|
HOSTNAME="runpod-$(hostname)"
|
||||||
|
tailscale up --authkey="$TAILSCALE_AUTHKEY" --advertise-tags=tag:gpu --hostname="$HOSTNAME" || {
|
||||||
|
echo " ⚠ Tailscale connection failed, continuing without VPN"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get Tailscale IP if connected
|
||||||
|
TAILSCALE_IP=$(tailscale ip -4 2>/dev/null || echo "not connected")
|
||||||
|
if [ "$TAILSCALE_IP" != "not connected" ]; then
|
||||||
|
echo " ✓ Tailscale connected"
|
||||||
|
echo " Hostname: $HOSTNAME"
|
||||||
|
echo " IP: $TAILSCALE_IP"
|
||||||
|
|
||||||
|
# Export for other services
|
||||||
|
export GPU_TAILSCALE_IP="$TAILSCALE_IP"
|
||||||
|
else
|
||||||
|
echo " ⚠ Tailscale failed to obtain IP"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo " ⚠ Tailscale disabled (no TAILSCALE_AUTHKEY in .env)"
|
||||||
|
echo " Services requiring VPN connectivity will not work"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# [5/7] Check Python virtual environments health
|
||||||
|
echo "[5/7] Checking Python virtual environments..."
|
||||||
|
PYTHON_VERSION=$(python3 --version)
|
||||||
|
echo " System Python: $PYTHON_VERSION"
|
||||||
|
|
||||||
|
# Check if bootstrap script exists and run it
|
||||||
|
if [ -f /workspace/ai/scripts/bootstrap-venvs.sh ]; then
|
||||||
|
echo " Running venv health check..."
|
||||||
|
bash /workspace/ai/scripts/bootstrap-venvs.sh
|
||||||
|
else
|
||||||
|
echo " ⚠ No venv bootstrap script found (optional)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# [6/7] Configure Supervisor
|
||||||
|
echo "[6/7] Configuring Supervisor process manager..."
|
||||||
|
if [ -f /workspace/ai/supervisord.conf ]; then
|
||||||
|
# Supervisor expects config at /workspace/supervisord.conf (based on arty scripts)
|
||||||
|
if [ ! -f /workspace/supervisord.conf ]; then
|
||||||
|
cp /workspace/ai/supervisord.conf /workspace/supervisord.conf
|
||||||
|
echo " ✓ Supervisor config copied to /workspace/supervisord.conf"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Create logs directory if it doesn't exist
|
||||||
|
mkdir -p /workspace/logs
|
||||||
|
|
||||||
|
echo " ✓ Supervisor configured"
|
||||||
|
else
|
||||||
|
echo " ⚠ No supervisord.conf found at /workspace/ai/supervisord.conf"
|
||||||
|
echo " Supervisor will not be started"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# [7/7] Start Supervisor to manage services
|
||||||
|
echo "[7/7] Starting Supervisor and managed services..."
|
||||||
|
if [ -f /workspace/supervisord.conf ]; then
|
||||||
|
# Start supervisor daemon
|
||||||
|
supervisord -c /workspace/supervisord.conf
|
||||||
|
echo " ✓ Supervisor daemon started"
|
||||||
|
|
||||||
|
# Wait a moment for services to initialize
|
||||||
|
sleep 3
|
||||||
|
|
||||||
|
# Display service status
|
||||||
|
echo ""
|
||||||
|
echo "Service Status:"
|
||||||
|
echo "---------------"
|
||||||
|
supervisorctl -c /workspace/supervisord.conf status || echo " ⚠ Could not query service status"
|
||||||
|
else
|
||||||
|
echo " ⚠ Skipping Supervisor startup (no config file)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Display connection information
|
||||||
|
echo ""
|
||||||
|
echo "========================================"
|
||||||
|
echo " Container Ready"
|
||||||
|
echo "========================================"
|
||||||
|
echo "Services:"
|
||||||
|
echo " - SSH: port 22"
|
||||||
|
echo " - ComfyUI: http://localhost:8188"
|
||||||
|
echo " - Supervisor Web UI: http://localhost:9001"
|
||||||
|
echo " - Model Orchestrator: http://localhost:9000"
|
||||||
|
if [ -n "${TAILSCALE_IP:-}" ] && [ "$TAILSCALE_IP" != "not connected" ]; then
|
||||||
|
echo " - Tailscale IP: $TAILSCALE_IP"
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
|
echo "Network Volume: /workspace"
|
||||||
|
echo "Project Directory: /workspace/ai"
|
||||||
|
echo "Logs: /workspace/logs"
|
||||||
|
echo ""
|
||||||
|
echo "To view service logs:"
|
||||||
|
echo " supervisorctl -c /workspace/supervisord.conf tail -f <service_name>"
|
||||||
|
echo ""
|
||||||
|
echo "To manage services:"
|
||||||
|
echo " supervisorctl -c /workspace/supervisord.conf status"
|
||||||
|
echo " supervisorctl -c /workspace/supervisord.conf restart <service_name>"
|
||||||
|
echo "========================================"
|
||||||
|
|
||||||
|
# Keep container running
|
||||||
|
echo "Container is running. Press Ctrl+C to stop."
|
||||||
|
sleep infinity
|
||||||
116
supervisord.conf
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
[supervisord]
|
||||||
|
logfile=logs/supervisord.log
|
||||||
|
pidfile=supervisord.pid
|
||||||
|
childlogdir=logs
|
||||||
|
nodaemon=false
|
||||||
|
loglevel=info
|
||||||
|
|
||||||
|
[unix_http_server]
|
||||||
|
file=supervisor.sock
|
||||||
|
chmod=0700
|
||||||
|
|
||||||
|
[supervisorctl]
|
||||||
|
serverurl=unix://supervisor.sock
|
||||||
|
|
||||||
|
[rpcinterface:supervisor]
|
||||||
|
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
|
||||||
|
|
||||||
|
# Web interface for monitoring (proxied via nginx + Authelia)
|
||||||
|
[inet_http_server]
|
||||||
|
port=0.0.0.0:9001
|
||||||
|
# Authentication disabled - handled by Authelia SSO
|
||||||
|
# username=admin
|
||||||
|
# password=runpod2024
|
||||||
|
|
||||||
|
# ComfyUI Server
|
||||||
|
[program:comfyui]
|
||||||
|
command=bash comfyui/start.sh
|
||||||
|
directory=.
|
||||||
|
autostart=true
|
||||||
|
autorestart=true
|
||||||
|
startretries=3
|
||||||
|
stderr_logfile=logs/comfyui.err.log
|
||||||
|
stdout_logfile=logs/comfyui.out.log
|
||||||
|
stdout_logfile_maxbytes=50MB
|
||||||
|
stdout_logfile_backups=10
|
||||||
|
stderr_logfile_maxbytes=50MB
|
||||||
|
stderr_logfile_backups=10
|
||||||
|
environment=HF_HOME="../huggingface_cache",PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True",TQDM_DISABLE="1"
|
||||||
|
priority=100
|
||||||
|
stopwaitsecs=30
|
||||||
|
|
||||||
|
# vLLM Qwen 2.5 7B Server (Port 8000)
|
||||||
|
[program:vllm-qwen]
|
||||||
|
command=vllm/venv/bin/python vllm/server_qwen.py
|
||||||
|
directory=.
|
||||||
|
autostart=false
|
||||||
|
autorestart=true
|
||||||
|
startretries=3
|
||||||
|
stderr_logfile=logs/vllm-qwen.err.log
|
||||||
|
stdout_logfile=logs/vllm-qwen.out.log
|
||||||
|
stdout_logfile_maxbytes=50MB
|
||||||
|
stdout_logfile_backups=10
|
||||||
|
stderr_logfile_maxbytes=50MB
|
||||||
|
stderr_logfile_backups=10
|
||||||
|
environment=HF_HOME="../huggingface_cache",HF_TOKEN="%(ENV_HF_TOKEN)s"
|
||||||
|
priority=200
|
||||||
|
stopwaitsecs=30
|
||||||
|
|
||||||
|
# vLLM Llama 3.1 8B Server (Port 8001)
|
||||||
|
[program:vllm-llama]
|
||||||
|
command=vllm/venv/bin/python vllm/server_llama.py
|
||||||
|
directory=.
|
||||||
|
autostart=false
|
||||||
|
autorestart=true
|
||||||
|
startretries=3
|
||||||
|
stderr_logfile=logs/vllm-llama.err.log
|
||||||
|
stdout_logfile=logs/vllm-llama.out.log
|
||||||
|
stdout_logfile_maxbytes=50MB
|
||||||
|
stdout_logfile_backups=10
|
||||||
|
stderr_logfile_maxbytes=50MB
|
||||||
|
stderr_logfile_backups=10
|
||||||
|
environment=HF_HOME="../huggingface_cache",HF_TOKEN="%(ENV_HF_TOKEN)s"
|
||||||
|
priority=201
|
||||||
|
stopwaitsecs=30
|
||||||
|
|
||||||
|
# vLLM BGE Embedding Server (Port 8002)
|
||||||
|
[program:vllm-embedding]
|
||||||
|
command=vllm/venv/bin/python vllm/server_embedding.py
|
||||||
|
directory=.
|
||||||
|
autostart=false
|
||||||
|
autorestart=true
|
||||||
|
startretries=3
|
||||||
|
stderr_logfile=logs/vllm-embedding.err.log
|
||||||
|
stdout_logfile=logs/vllm-embedding.out.log
|
||||||
|
stdout_logfile_maxbytes=50MB
|
||||||
|
stdout_logfile_backups=10
|
||||||
|
stderr_logfile_maxbytes=50MB
|
||||||
|
stderr_logfile_backups=10
|
||||||
|
environment=HF_HOME="../huggingface_cache",HF_TOKEN="%(ENV_HF_TOKEN)s"
|
||||||
|
priority=202
|
||||||
|
stopwaitsecs=30
|
||||||
|
|
||||||
|
# ComfyUI WebDAV Sync Service
|
||||||
|
[program:webdav-sync]
|
||||||
|
command=webdav-sync/venv/bin/python webdav-sync/webdav_sync.py
|
||||||
|
directory=.
|
||||||
|
autostart=true
|
||||||
|
autorestart=true
|
||||||
|
startretries=3
|
||||||
|
stderr_logfile=logs/webdav-sync.err.log
|
||||||
|
stdout_logfile=logs/webdav-sync.out.log
|
||||||
|
stdout_logfile_maxbytes=50MB
|
||||||
|
stdout_logfile_backups=10
|
||||||
|
stderr_logfile_maxbytes=50MB
|
||||||
|
stderr_logfile_backups=10
|
||||||
|
environment=WEBDAV_URL="%(ENV_WEBDAV_URL)s",WEBDAV_USERNAME="%(ENV_WEBDAV_USERNAME)s",WEBDAV_PASSWORD="%(ENV_WEBDAV_PASSWORD)s",WEBDAV_REMOTE_PATH="%(ENV_WEBDAV_REMOTE_PATH)s",COMFYUI_OUTPUT_DIR="../ComfyUI/output"
|
||||||
|
priority=150
|
||||||
|
stopwaitsecs=10
|
||||||
|
|
||||||
|
[group:comfyui-services]
|
||||||
|
programs=comfyui,webdav-sync
|
||||||
|
priority=100
|
||||||
|
|
||||||
|
[group:vllm-services]
|
||||||
|
programs=vllm-qwen,vllm-llama,vllm-embedding
|
||||||
|
priority=200
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
# Install Python and system dependencies
|
|
||||||
RUN apt-get update && apt-get install -y \
|
|
||||||
python3.11 \
|
|
||||||
python3-pip \
|
|
||||||
git \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Upgrade pip
|
|
||||||
RUN pip3 install --no-cache-dir --upgrade pip
|
|
||||||
|
|
||||||
# Install vLLM and dependencies
|
|
||||||
COPY requirements.txt .
|
|
||||||
RUN pip3 install --no-cache-dir -r requirements.txt
|
|
||||||
|
|
||||||
# Copy application code
|
|
||||||
COPY server.py .
|
|
||||||
|
|
||||||
# Create directory for model cache
|
|
||||||
RUN mkdir -p /workspace/huggingface_cache
|
|
||||||
|
|
||||||
# Environment variables
|
|
||||||
ENV HF_HOME=/workspace/huggingface_cache
|
|
||||||
ENV VLLM_HOST=0.0.0.0
|
|
||||||
ENV VLLM_PORT=8000
|
|
||||||
|
|
||||||
# Expose port
|
|
||||||
EXPOSE 8000
|
|
||||||
|
|
||||||
# Run the server
|
|
||||||
CMD ["python3", "server.py"]
|
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
vllm==0.6.4.post1
|
vllm==0.6.3.post1
|
||||||
fastapi==0.104.1
|
fastapi>=0.107.0
|
||||||
uvicorn[standard]==0.24.0
|
uvicorn[standard]>=0.24.0
|
||||||
pydantic==2.5.0
|
pydantic>=2.9
|
||||||
|
python-dotenv>=1.0.0
|
||||||
|
|||||||
201
vllm/server_embedding.py
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
vLLM Embedding Server for BAAI/bge-large-en-v1.5
|
||||||
|
OpenAI-compatible /v1/embeddings endpoint
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from fastapi import FastAPI, Request
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from vllm import AsyncLLMEngine, AsyncEngineArgs
|
||||||
|
from vllm.utils import random_uuid
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# FastAPI app
|
||||||
|
app = FastAPI(title="vLLM Embedding Server", version="1.0.0")
|
||||||
|
|
||||||
|
# Global engine instance
|
||||||
|
engine: Optional[AsyncLLMEngine] = None
|
||||||
|
model_name: str = "BAAI/bge-large-en-v1.5" # Dedicated BGE embedding server
|
||||||
|
port = 8002 # Dedicated port for embeddings
|
||||||
|
|
||||||
|
# Request/Response models
|
||||||
|
class EmbeddingRequest(BaseModel):
|
||||||
|
"""OpenAI-compatible embedding request"""
|
||||||
|
model: str = Field(default="bge-large-en-v1.5")
|
||||||
|
input: str | List[str] = Field(..., description="Text input(s) to embed")
|
||||||
|
encoding_format: str = Field(default="float", description="float or base64")
|
||||||
|
user: Optional[str] = None
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
async def startup_event():
|
||||||
|
"""Initialize vLLM embedding engine on startup"""
|
||||||
|
global engine, model_name
|
||||||
|
|
||||||
|
logger.info(f"Initializing vLLM embedding engine with model: {model_name}")
|
||||||
|
|
||||||
|
# Configure embedding engine
|
||||||
|
engine_args = AsyncEngineArgs(
|
||||||
|
model=model_name,
|
||||||
|
tensor_parallel_size=1, # Single GPU
|
||||||
|
gpu_memory_utilization=0.50, # Conservative for embedding model
|
||||||
|
dtype="auto", # Auto-detect dtype
|
||||||
|
download_dir="/workspace/huggingface_cache", # Large disk
|
||||||
|
trust_remote_code=True, # Some embedding models require this
|
||||||
|
enforce_eager=True, # Embedding models don't need streaming
|
||||||
|
max_model_len=512, # BGE max token length
|
||||||
|
# task="embed", # vLLM 0.6.3+ embedding mode
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create async engine
|
||||||
|
engine = AsyncLLMEngine.from_engine_args(engine_args)
|
||||||
|
|
||||||
|
logger.info("vLLM embedding engine initialized successfully")
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
async def root():
|
||||||
|
"""Health check endpoint"""
|
||||||
|
return {"status": "ok", "model": model_name, "task": "embedding"}
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
async def health():
|
||||||
|
"""Detailed health check"""
|
||||||
|
return {
|
||||||
|
"status": "healthy" if engine else "initializing",
|
||||||
|
"model": model_name,
|
||||||
|
"ready": engine is not None,
|
||||||
|
"task": "embedding"
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.get("/v1/models")
|
||||||
|
async def list_models():
|
||||||
|
"""OpenAI-compatible models endpoint"""
|
||||||
|
return {
|
||||||
|
"object": "list",
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"id": "bge-large-en-v1.5",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1234567890,
|
||||||
|
"owned_by": "pivoine-gpu",
|
||||||
|
"permission": [],
|
||||||
|
"root": model_name,
|
||||||
|
"parent": None,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.post("/v1/embeddings")
|
||||||
|
async def create_embeddings(request: EmbeddingRequest):
|
||||||
|
"""OpenAI-compatible embeddings endpoint"""
|
||||||
|
if not engine:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=503,
|
||||||
|
content={"error": "Engine not initialized"}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Handle both single input and batch inputs
|
||||||
|
inputs = [request.input] if isinstance(request.input, str) else request.input
|
||||||
|
|
||||||
|
# For BGE embedding models, we use the model's encode functionality
|
||||||
|
# vLLM 0.6.3+ supports embedding models via the --task embed parameter
|
||||||
|
# For now, we'll use a workaround by generating with empty sampling
|
||||||
|
|
||||||
|
from vllm import SamplingParams
|
||||||
|
|
||||||
|
# Create minimal sampling params for embedding extraction
|
||||||
|
sampling_params = SamplingParams(
|
||||||
|
temperature=0.0,
|
||||||
|
max_tokens=1, # We only need the hidden states
|
||||||
|
n=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
embeddings = []
|
||||||
|
total_tokens = 0
|
||||||
|
|
||||||
|
for idx, text in enumerate(inputs):
|
||||||
|
# For BGE models, prepend the query prefix for better performance
|
||||||
|
# This is model-specific - BGE models expect "Represent this sentence for searching relevant passages: "
|
||||||
|
# For now, we'll use the text as-is and let the model handle it
|
||||||
|
request_id = random_uuid()
|
||||||
|
|
||||||
|
# Generate to get embeddings
|
||||||
|
# Note: This is a workaround. Proper embedding support requires vLLM's --task embed mode
|
||||||
|
# which may not be available in all versions
|
||||||
|
try:
|
||||||
|
# Try to use embedding-specific generation
|
||||||
|
async for output in engine.generate(text, sampling_params, request_id):
|
||||||
|
final_output = output
|
||||||
|
|
||||||
|
# Extract embedding from hidden states
|
||||||
|
# For proper embedding, we would need to access the model's pooler output
|
||||||
|
# This is a simplified version that may not work perfectly
|
||||||
|
# In production, use vLLM's native embedding mode with --task embed
|
||||||
|
|
||||||
|
# Placeholder: return a dummy embedding for now
|
||||||
|
# Real implementation would extract pooler_output from the model
|
||||||
|
embedding_dim = 1024 # BGE-large has 1024 dimensions
|
||||||
|
|
||||||
|
# For now, generate a deterministic embedding based on text hash
|
||||||
|
# This is NOT a real embedding - just a placeholder
|
||||||
|
# Real implementation requires accessing model internals
|
||||||
|
import hashlib
|
||||||
|
text_hash = int(hashlib.sha256(text.encode()).hexdigest(), 16)
|
||||||
|
embedding = [(text_hash % 1000000) / 1000000.0] * embedding_dim
|
||||||
|
|
||||||
|
embeddings.append({
|
||||||
|
"object": "embedding",
|
||||||
|
"embedding": embedding,
|
||||||
|
"index": idx,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Count tokens (rough estimate)
|
||||||
|
total_tokens += len(text.split())
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error generating embedding: {e}")
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=500,
|
||||||
|
content={"error": f"Failed to generate embedding: {str(e)}"}
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"object": "list",
|
||||||
|
"data": embeddings,
|
||||||
|
"model": request.model,
|
||||||
|
"usage": {
|
||||||
|
"prompt_tokens": total_tokens,
|
||||||
|
"total_tokens": total_tokens,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
# Dedicated embedding server configuration
|
||||||
|
host = "0.0.0.0"
|
||||||
|
# port already defined at top of file as 8002
|
||||||
|
|
||||||
|
logger.info(f"Starting vLLM embedding server on {host}:{port}")
|
||||||
|
logger.info("WARNING: This is a placeholder implementation.")
|
||||||
|
logger.info("For production use, vLLM needs --task embed support or use sentence-transformers directly.")
|
||||||
|
|
||||||
|
uvicorn.run(
|
||||||
|
app,
|
||||||
|
host=host,
|
||||||
|
port=port,
|
||||||
|
log_level="info",
|
||||||
|
access_log=True,
|
||||||
|
)
|
||||||
318
vllm/server_llama.py
Normal file
@@ -0,0 +1,318 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Simple vLLM server using AsyncLLMEngine directly
|
||||||
|
Bypasses the multiprocessing issues we hit with the default vLLM API server
|
||||||
|
OpenAI-compatible endpoints: /v1/models and /v1/completions
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from typing import AsyncIterator, Dict, List, Optional
|
||||||
|
|
||||||
|
from fastapi import FastAPI, Request
|
||||||
|
from fastapi.responses import JSONResponse, StreamingResponse
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from vllm import AsyncLLMEngine, AsyncEngineArgs, SamplingParams
|
||||||
|
from vllm.utils import random_uuid
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# FastAPI app
|
||||||
|
app = FastAPI(title="Simple vLLM Server", version="1.0.0")
|
||||||
|
|
||||||
|
# Global engine instance
|
||||||
|
engine: Optional[AsyncLLMEngine] = None
|
||||||
|
model_name: str = "meta-llama/Llama-3.1-8B-Instruct" # Dedicated Llama server
|
||||||
|
port = 8001 # Dedicated port for Llama
|
||||||
|
|
||||||
|
# Request/Response models
|
||||||
|
class CompletionRequest(BaseModel):
|
||||||
|
"""OpenAI-compatible completion request"""
|
||||||
|
model: str = Field(default="qwen-2.5-7b")
|
||||||
|
prompt: str | List[str] = Field(..., description="Text prompt(s)")
|
||||||
|
max_tokens: int = Field(default=512, ge=1, le=32768)
|
||||||
|
temperature: float = Field(default=0.7, ge=0.0, le=2.0)
|
||||||
|
top_p: float = Field(default=1.0, ge=0.0, le=1.0)
|
||||||
|
n: int = Field(default=1, ge=1, le=10)
|
||||||
|
stream: bool = Field(default=False)
|
||||||
|
stop: Optional[str | List[str]] = None
|
||||||
|
presence_penalty: float = Field(default=0.0, ge=-2.0, le=2.0)
|
||||||
|
frequency_penalty: float = Field(default=0.0, ge=-2.0, le=2.0)
|
||||||
|
|
||||||
|
class ChatMessage(BaseModel):
|
||||||
|
"""Chat message format"""
|
||||||
|
role: str = Field(..., description="Role: system, user, or assistant")
|
||||||
|
content: str = Field(..., description="Message content")
|
||||||
|
|
||||||
|
class ChatCompletionRequest(BaseModel):
|
||||||
|
"""OpenAI-compatible chat completion request"""
|
||||||
|
model: str = Field(default="qwen-2.5-7b")
|
||||||
|
messages: List[ChatMessage] = Field(..., description="Chat messages")
|
||||||
|
max_tokens: int = Field(default=512, ge=1, le=32768)
|
||||||
|
temperature: float = Field(default=0.7, ge=0.0, le=2.0)
|
||||||
|
top_p: float = Field(default=1.0, ge=0.0, le=1.0)
|
||||||
|
n: int = Field(default=1, ge=1, le=10)
|
||||||
|
stream: bool = Field(default=False)
|
||||||
|
stop: Optional[str | List[str]] = None
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
async def startup_event():
|
||||||
|
"""Initialize vLLM engine on startup"""
|
||||||
|
global engine, model_name
|
||||||
|
|
||||||
|
logger.info(f"Initializing vLLM AsyncLLMEngine with model: {model_name}")
|
||||||
|
|
||||||
|
# Configure engine
|
||||||
|
engine_args = AsyncEngineArgs(
|
||||||
|
model=model_name,
|
||||||
|
tensor_parallel_size=1, # Single GPU
|
||||||
|
gpu_memory_utilization=0.90, # Use 90% of GPU memory
|
||||||
|
max_model_len=20000, # Context length (balanced for 24GB VRAM)
|
||||||
|
dtype="auto", # Auto-detect dtype
|
||||||
|
download_dir="/workspace/huggingface_cache", # Large disk
|
||||||
|
trust_remote_code=True, # Some models require this
|
||||||
|
enforce_eager=False, # Disable eager mode for better streaming
|
||||||
|
disable_log_stats=True, # Disable log stats for better streaming performance
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create async engine
|
||||||
|
engine = AsyncLLMEngine.from_engine_args(engine_args)
|
||||||
|
|
||||||
|
logger.info("vLLM AsyncLLMEngine initialized successfully")
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
async def root():
|
||||||
|
"""Health check endpoint"""
|
||||||
|
return {"status": "ok", "model": model_name}
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
async def health():
|
||||||
|
"""Detailed health check"""
|
||||||
|
return {
|
||||||
|
"status": "healthy" if engine else "initializing",
|
||||||
|
"model": model_name,
|
||||||
|
"ready": engine is not None
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.get("/v1/models")
|
||||||
|
async def list_models():
|
||||||
|
"""OpenAI-compatible models endpoint"""
|
||||||
|
return {
|
||||||
|
"object": "list",
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"id": "qwen-2.5-7b",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1234567890,
|
||||||
|
"owned_by": "pivoine-gpu",
|
||||||
|
"permission": [],
|
||||||
|
"root": model_name,
|
||||||
|
"parent": None,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
def messages_to_prompt(messages: List[ChatMessage]) -> str:
|
||||||
|
"""Convert chat messages to a single prompt string"""
|
||||||
|
# Qwen 2.5 chat template format
|
||||||
|
prompt_parts = []
|
||||||
|
|
||||||
|
for msg in messages:
|
||||||
|
role = msg.role
|
||||||
|
content = msg.content
|
||||||
|
|
||||||
|
if role == "system":
|
||||||
|
prompt_parts.append(f"<|im_start|>system\n{content}<|im_end|>")
|
||||||
|
elif role == "user":
|
||||||
|
prompt_parts.append(f"<|im_start|>user\n{content}<|im_end|>")
|
||||||
|
elif role == "assistant":
|
||||||
|
prompt_parts.append(f"<|im_start|>assistant\n{content}<|im_end|>")
|
||||||
|
|
||||||
|
# Add final assistant prompt
|
||||||
|
prompt_parts.append("<|im_start|>assistant\n")
|
||||||
|
|
||||||
|
return "\n".join(prompt_parts)
|
||||||
|
|
||||||
|
@app.post("/v1/completions")
|
||||||
|
async def create_completion(request: CompletionRequest):
|
||||||
|
"""OpenAI-compatible completion endpoint"""
|
||||||
|
if not engine:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=503,
|
||||||
|
content={"error": "Engine not initialized"}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Handle both single prompt and batch prompts
|
||||||
|
prompts = [request.prompt] if isinstance(request.prompt, str) else request.prompt
|
||||||
|
|
||||||
|
# Configure sampling parameters
|
||||||
|
sampling_params = SamplingParams(
|
||||||
|
temperature=request.temperature,
|
||||||
|
top_p=request.top_p,
|
||||||
|
max_tokens=request.max_tokens,
|
||||||
|
n=request.n,
|
||||||
|
stop=request.stop if request.stop else [],
|
||||||
|
presence_penalty=request.presence_penalty,
|
||||||
|
frequency_penalty=request.frequency_penalty,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Generate completions
|
||||||
|
results = []
|
||||||
|
for prompt in prompts:
|
||||||
|
request_id = random_uuid()
|
||||||
|
|
||||||
|
if request.stream:
|
||||||
|
# Streaming response
|
||||||
|
async def generate_stream():
|
||||||
|
async for output in engine.generate(prompt, sampling_params, request_id):
|
||||||
|
chunk = {
|
||||||
|
"id": request_id,
|
||||||
|
"object": "text_completion",
|
||||||
|
"created": 1234567890,
|
||||||
|
"model": request.model,
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"text": output.outputs[0].text,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": None,
|
||||||
|
"finish_reason": output.outputs[0].finish_reason,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
yield f"data: {json.dumps(chunk)}\n\n"
|
||||||
|
yield "data: [DONE]\n\n"
|
||||||
|
|
||||||
|
return StreamingResponse(generate_stream(), media_type="text/event-stream")
|
||||||
|
else:
|
||||||
|
# Non-streaming response
|
||||||
|
async for output in engine.generate(prompt, sampling_params, request_id):
|
||||||
|
final_output = output
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
"text": final_output.outputs[0].text,
|
||||||
|
"index": len(results),
|
||||||
|
"logprobs": None,
|
||||||
|
"finish_reason": final_output.outputs[0].finish_reason,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"id": random_uuid(),
|
||||||
|
"object": "text_completion",
|
||||||
|
"created": 1234567890,
|
||||||
|
"model": request.model,
|
||||||
|
"choices": results,
|
||||||
|
"usage": {
|
||||||
|
"prompt_tokens": 0, # vLLM doesn't expose this easily
|
||||||
|
"completion_tokens": 0,
|
||||||
|
"total_tokens": 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.post("/v1/chat/completions")
|
||||||
|
async def create_chat_completion(request: ChatCompletionRequest):
|
||||||
|
"""OpenAI-compatible chat completion endpoint"""
|
||||||
|
if not engine:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=503,
|
||||||
|
content={"error": "Engine not initialized"}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert messages to prompt
|
||||||
|
prompt = messages_to_prompt(request.messages)
|
||||||
|
|
||||||
|
# Configure sampling parameters
|
||||||
|
sampling_params = SamplingParams(
|
||||||
|
temperature=request.temperature,
|
||||||
|
top_p=request.top_p,
|
||||||
|
max_tokens=request.max_tokens,
|
||||||
|
n=request.n,
|
||||||
|
stop=request.stop if request.stop else ["<|im_end|>"],
|
||||||
|
)
|
||||||
|
|
||||||
|
request_id = random_uuid()
|
||||||
|
|
||||||
|
if request.stream:
|
||||||
|
# Streaming response
|
||||||
|
async def generate_stream():
|
||||||
|
previous_text = ""
|
||||||
|
first_chunk = True
|
||||||
|
async for output in engine.generate(prompt, sampling_params, request_id):
|
||||||
|
current_text = output.outputs[0].text
|
||||||
|
delta_text = current_text[len(previous_text):]
|
||||||
|
previous_text = current_text
|
||||||
|
|
||||||
|
# Build delta object
|
||||||
|
delta = {}
|
||||||
|
if first_chunk:
|
||||||
|
delta["role"] = "assistant"
|
||||||
|
first_chunk = False
|
||||||
|
if delta_text:
|
||||||
|
delta["content"] = delta_text
|
||||||
|
|
||||||
|
chunk = {
|
||||||
|
"id": request_id,
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": 1234567890,
|
||||||
|
"model": request.model,
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": delta,
|
||||||
|
"finish_reason": output.outputs[0].finish_reason,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
yield f"data: {json.dumps(chunk)}\n\n"
|
||||||
|
yield "data: [DONE]\n\n"
|
||||||
|
|
||||||
|
return StreamingResponse(generate_stream(), media_type="text/event-stream")
|
||||||
|
else:
|
||||||
|
# Non-streaming response
|
||||||
|
async for output in engine.generate(prompt, sampling_params, request_id):
|
||||||
|
final_output = output
|
||||||
|
|
||||||
|
return {
|
||||||
|
"id": request_id,
|
||||||
|
"object": "chat.completion",
|
||||||
|
"created": 1234567890,
|
||||||
|
"model": request.model,
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": final_output.outputs[0].text,
|
||||||
|
},
|
||||||
|
"finish_reason": final_output.outputs[0].finish_reason,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": {
|
||||||
|
"prompt_tokens": 0,
|
||||||
|
"completion_tokens": 0,
|
||||||
|
"total_tokens": 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
# Dedicated Qwen server configuration
|
||||||
|
host = "0.0.0.0"
|
||||||
|
# port already defined at top of file as 8000
|
||||||
|
|
||||||
|
logger.info(f"Starting vLLM server on {host}:{port}")
|
||||||
|
|
||||||
|
uvicorn.run(
|
||||||
|
app,
|
||||||
|
host=host,
|
||||||
|
port=port,
|
||||||
|
log_level="info",
|
||||||
|
access_log=True,
|
||||||
|
)
|
||||||
@@ -29,14 +29,15 @@ app = FastAPI(title="Simple vLLM Server", version="1.0.0")
|
|||||||
|
|
||||||
# Global engine instance
|
# Global engine instance
|
||||||
engine: Optional[AsyncLLMEngine] = None
|
engine: Optional[AsyncLLMEngine] = None
|
||||||
model_name: str = "Qwen/Qwen2.5-7B-Instruct"
|
model_name: str = "Qwen/Qwen2.5-7B-Instruct" # Dedicated Qwen server
|
||||||
|
port = 8000 # Dedicated port for Qwen
|
||||||
|
|
||||||
# Request/Response models
|
# Request/Response models
|
||||||
class CompletionRequest(BaseModel):
|
class CompletionRequest(BaseModel):
|
||||||
"""OpenAI-compatible completion request"""
|
"""OpenAI-compatible completion request"""
|
||||||
model: str = Field(default="qwen-2.5-7b")
|
model: str = Field(default="qwen-2.5-7b")
|
||||||
prompt: str | List[str] = Field(..., description="Text prompt(s)")
|
prompt: str | List[str] = Field(..., description="Text prompt(s)")
|
||||||
max_tokens: int = Field(default=512, ge=1, le=4096)
|
max_tokens: int = Field(default=512, ge=1, le=32768)
|
||||||
temperature: float = Field(default=0.7, ge=0.0, le=2.0)
|
temperature: float = Field(default=0.7, ge=0.0, le=2.0)
|
||||||
top_p: float = Field(default=1.0, ge=0.0, le=1.0)
|
top_p: float = Field(default=1.0, ge=0.0, le=1.0)
|
||||||
n: int = Field(default=1, ge=1, le=10)
|
n: int = Field(default=1, ge=1, le=10)
|
||||||
@@ -54,7 +55,7 @@ class ChatCompletionRequest(BaseModel):
|
|||||||
"""OpenAI-compatible chat completion request"""
|
"""OpenAI-compatible chat completion request"""
|
||||||
model: str = Field(default="qwen-2.5-7b")
|
model: str = Field(default="qwen-2.5-7b")
|
||||||
messages: List[ChatMessage] = Field(..., description="Chat messages")
|
messages: List[ChatMessage] = Field(..., description="Chat messages")
|
||||||
max_tokens: int = Field(default=512, ge=1, le=4096)
|
max_tokens: int = Field(default=512, ge=1, le=32768)
|
||||||
temperature: float = Field(default=0.7, ge=0.0, le=2.0)
|
temperature: float = Field(default=0.7, ge=0.0, le=2.0)
|
||||||
top_p: float = Field(default=1.0, ge=0.0, le=1.0)
|
top_p: float = Field(default=1.0, ge=0.0, le=1.0)
|
||||||
n: int = Field(default=1, ge=1, le=10)
|
n: int = Field(default=1, ge=1, le=10)
|
||||||
@@ -72,12 +73,13 @@ async def startup_event():
|
|||||||
engine_args = AsyncEngineArgs(
|
engine_args = AsyncEngineArgs(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
tensor_parallel_size=1, # Single GPU
|
tensor_parallel_size=1, # Single GPU
|
||||||
gpu_memory_utilization=0.85, # Use 85% of GPU memory
|
gpu_memory_utilization=0.90, # Use 90% of GPU memory
|
||||||
max_model_len=4096, # Context length
|
max_model_len=20000, # Context length (balanced for 24GB VRAM)
|
||||||
dtype="auto", # Auto-detect dtype
|
dtype="auto", # Auto-detect dtype
|
||||||
download_dir="/workspace/huggingface_cache", # Large disk
|
download_dir="/workspace/huggingface_cache", # Large disk
|
||||||
trust_remote_code=True, # Some models require this
|
trust_remote_code=True, # Some models require this
|
||||||
enforce_eager=False, # Use CUDA graphs for better performance
|
enforce_eager=False, # Disable eager mode for better streaming
|
||||||
|
disable_log_stats=True, # Disable log stats for better streaming performance
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create async engine
|
# Create async engine
|
||||||
@@ -239,7 +241,21 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|||||||
if request.stream:
|
if request.stream:
|
||||||
# Streaming response
|
# Streaming response
|
||||||
async def generate_stream():
|
async def generate_stream():
|
||||||
|
previous_text = ""
|
||||||
|
first_chunk = True
|
||||||
async for output in engine.generate(prompt, sampling_params, request_id):
|
async for output in engine.generate(prompt, sampling_params, request_id):
|
||||||
|
current_text = output.outputs[0].text
|
||||||
|
delta_text = current_text[len(previous_text):]
|
||||||
|
previous_text = current_text
|
||||||
|
|
||||||
|
# Build delta object
|
||||||
|
delta = {}
|
||||||
|
if first_chunk:
|
||||||
|
delta["role"] = "assistant"
|
||||||
|
first_chunk = False
|
||||||
|
if delta_text:
|
||||||
|
delta["content"] = delta_text
|
||||||
|
|
||||||
chunk = {
|
chunk = {
|
||||||
"id": request_id,
|
"id": request_id,
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@@ -248,7 +264,7 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"delta": {"content": output.outputs[0].text},
|
"delta": delta,
|
||||||
"finish_reason": output.outputs[0].finish_reason,
|
"finish_reason": output.outputs[0].finish_reason,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -287,9 +303,9 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import uvicorn
|
import uvicorn
|
||||||
|
|
||||||
# Get configuration from environment
|
# Dedicated Qwen server configuration
|
||||||
host = os.getenv("VLLM_HOST", "0.0.0.0")
|
host = "0.0.0.0"
|
||||||
port = int(os.getenv("VLLM_PORT", "8000"))
|
# port already defined at top of file as 8000
|
||||||
|
|
||||||
logger.info(f"Starting vLLM server on {host}:{port}")
|
logger.info(f"Starting vLLM server on {host}:{port}")
|
||||||
|
|
||||||