Files
bin/artifact_huggingface_download.sh

1678 lines
59 KiB
Bash
Raw Normal View History

#!/bin/bash
#
# ComfyUI Model Downloader - A Beautiful CLI Tool
# Downloads AI models from HuggingFace and creates symlinks to ComfyUI directories
#
# Usage: ./artifact_huggingface_download.sh [COMMAND] [OPTIONS]
#
# Commands:
# download Download models from HuggingFace to cache directory
# link Create symlinks from cache to ComfyUI models directory
# both Download and link (default)
# verify Verify symlinks in ComfyUI models directory
#
# Options:
# -c, --config FILE Path to YAML configuration file
# --cache-dir DIR HuggingFace cache directory (default: auto-detect)
# --comfyui-dir DIR ComfyUI installation directory
# --filter-repo REPO... Only process specific repositories
# --category CAT1[,CAT2] Filter by category (comma-separated for multiple)
# --cleanup, --clean Remove unused cache files (link/both only)
# --dry-run, -n Show what would be done without making changes
#
# Examples:
# # Download and link all models from config
# ./artifact_huggingface_download.sh both -c models.yaml
#
# # Download only specific categories
# ./artifact_huggingface_download.sh download -c models.yaml --category image_models,video_models
#
# # Link with cleanup (remove unused cache files)
# ./artifact_huggingface_download.sh link -c models.yaml --cleanup
#
# # Dry-run to preview operations
# ./artifact_huggingface_download.sh both -c models.yaml --dry-run
#
# # Process only specific repositories
# ./artifact_huggingface_download.sh both -c models.yaml --filter-repo black-forest-labs/FLUX.1-schnell
#
set -euo pipefail
# ============================================================================
# COLOR PALETTE - Beautiful Terminal Colors
# ============================================================================
# Reset
RESET='\033[0m'
# Foreground Colors
BLACK='\033[0;30m'
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
MAGENTA='\033[0;35m'
CYAN='\033[0;36m'
WHITE='\033[0;37m'
# Bold
BOLD_BLACK='\033[1;30m'
BOLD_RED='\033[1;31m'
BOLD_GREEN='\033[1;32m'
BOLD_YELLOW='\033[1;33m'
BOLD_BLUE='\033[1;34m'
BOLD_MAGENTA='\033[1;35m'
BOLD_CYAN='\033[1;36m'
BOLD_WHITE='\033[1;37m'
# Background Colors
BG_BLACK='\033[40m'
BG_RED='\033[41m'
BG_GREEN='\033[42m'
BG_YELLOW='\033[43m'
BG_BLUE='\033[44m'
BG_MAGENTA='\033[45m'
BG_CYAN='\033[46m'
BG_WHITE='\033[47m'
# Styles
DIM='\033[2m'
ITALIC='\033[3m'
UNDERLINE='\033[4m'
BLINK='\033[5m'
REVERSE='\033[7m'
# ============================================================================
# UNICODE CHARACTERS - Make it Pretty
# ============================================================================
CHECK_MARK="✓"
CROSS_MARK="✗"
ROCKET="🚀"
PACKAGE="📦"
DOWNLOAD="⬇️"
SPARKLES="✨"
FIRE="🔥"
CLOCK="⏱️"
FOLDER="📁"
LINK="🔗"
STAR="⭐"
WARNING="⚠️"
INFO=""
ARROW_RIGHT="→"
DOUBLE_ARROW="»"
BOX_LIGHT="─"
BOX_HEAVY="━"
BOX_DOUBLE="═"
# ============================================================================
# CONFIGURATION
# ============================================================================
# Script directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
# Default configuration file path
# Try multiple possible locations
if [[ -f "${HOME}/Projects/runpod/comfyui_models.yaml" ]]; then
CONFIG_FILE="${HOME}/Projects/runpod/comfyui_models.yaml"
elif [[ -f "${PROJECT_ROOT}/comfyui_models.yaml" ]]; then
CONFIG_FILE="${PROJECT_ROOT}/comfyui_models.yaml"
elif [[ -f "${SCRIPT_DIR}/comfyui_models.yaml" ]]; then
CONFIG_FILE="${SCRIPT_DIR}/comfyui_models.yaml"
else
CONFIG_FILE="" # No config file by default
fi
# Default cache directory - detect RunPod or use HuggingFace default
if [[ -d "/workspace" ]]; then
# RunPod environment
CACHE_DIR="${CACHE_DIR:-/workspace/huggingface_cache}"
COMFYUI_DIR="${COMFYUI_DIR:-/workspace/ComfyUI/models}"
else
# Local environment
CACHE_DIR="${CACHE_DIR:-${HOME}/.cache/huggingface}"
COMFYUI_DIR="${COMFYUI_DIR:-${HOME}/ComfyUI/models}"
fi
# Default command
COMMAND="both"
# Feature flags
CATEGORY_FILTER="" # Empty = all categories, or comma-separated list
CLEANUP_MODE=false # Remove unused files from HuggingFace cache
DRY_RUN=false # Simulate operations without making changes
# HuggingFace token from environment or .env file
# Initialize HF_TOKEN if not set
HF_TOKEN="${HF_TOKEN:-}"
# Try multiple locations for .env file
if [[ -z "${HF_TOKEN}" ]] && [[ -f "${PROJECT_ROOT}/ai/.env" ]]; then
HF_TOKEN=$(grep ^HF_TOKEN "${PROJECT_ROOT}/ai/.env" | cut -d'=' -f2- | tr -d '"' | tr -d "'" || true)
fi
if [[ -z "${HF_TOKEN}" ]] && [[ -f "${PROJECT_ROOT}/.env" ]]; then
HF_TOKEN=$(grep ^HF_TOKEN "${PROJECT_ROOT}/.env" | cut -d'=' -f2- | tr -d '"' | tr -d "'" || true)
fi
if [[ -z "${HF_TOKEN}" ]] && [[ -f "/workspace/ai/.env" ]]; then
HF_TOKEN=$(grep ^HF_TOKEN "/workspace/ai/.env" | cut -d'=' -f2- | tr -d '"' | tr -d "'" || true)
fi
# ============================================================================
# UTILITY FUNCTIONS - The Magic Happens Here
# ============================================================================
# Print functions with beautiful formatting
print_banner() {
local text="$1"
local width=80
local padding=$(( (width - ${#text} - 2) / 2 ))
echo -e ""
echo -e "${BOLD_CYAN}${BOX_DOUBLE}$(printf '%.0s'"${BOX_DOUBLE}" $(seq 1 $width))${BOX_DOUBLE}${RESET}"
echo -e "${BOLD_CYAN}${BOX_DOUBLE}$(printf '%.0s ' $(seq 1 $padding))${BOLD_MAGENTA}${text}$(printf '%.0s ' $(seq 1 $padding))${BOLD_CYAN}${BOX_DOUBLE}${RESET}"
echo -e "${BOLD_CYAN}${BOX_DOUBLE}$(printf '%.0s'"${BOX_DOUBLE}" $(seq 1 $width))${BOX_DOUBLE}${RESET}"
echo -e ""
}
print_section() {
local text="$1"
echo -e "\n${BOLD_YELLOW}${DOUBLE_ARROW} ${text}${RESET}"
echo -e "${CYAN}$(printf '%.0s'"${BOX_LIGHT}" $(seq 1 80))${RESET}"
}
print_success() {
echo -e "${BOLD_GREEN}${CHECK_MARK} $1${RESET}"
}
print_error() {
echo -e "${BOLD_RED}${CROSS_MARK} $1${RESET}" >&2
}
print_warning() {
echo -e "${BOLD_YELLOW}${WARNING} $1${RESET}"
}
print_info() {
echo -e "${BOLD_CYAN}${INFO} $1${RESET}"
}
print_step() {
local current="$1"
local total="$2"
local text="$3"
echo -e "${BOLD_BLUE}[${current}/${total}]${RESET} ${MAGENTA}${DOWNLOAD}${RESET} ${text}"
}
print_detail() {
echo -e " ${DIM}${CYAN}${ARROW_RIGHT} $1${RESET}"
}
# Progress bar function
show_progress() {
local current="$1"
local total="$2"
local width=50
local percentage=$((current * 100 / total))
local filled=$((current * width / total))
local empty=$((width - filled))
printf "\r ${BOLD_CYAN}Progress: ${RESET}["
printf "${BG_GREEN}${BOLD_WHITE}%${filled}s${RESET}" | tr ' ' '█'
printf "${DIM}%${empty}s${RESET}" | tr ' ' '░'
printf "] ${BOLD_YELLOW}%3d%%${RESET} ${DIM}(%d/%d)${RESET}" "$percentage" "$current" "$total"
}
# Parse YAML (simple implementation)
parse_yaml() {
local yaml_file="$1"
local category="$2"
python3 - "$yaml_file" "$category" <<EOPYAML
import yaml
import sys
yaml_file = sys.argv[1]
category = sys.argv[2]
try:
with open(yaml_file, 'r') as f:
config = yaml.safe_load(f)
if category == 'settings':
settings = config.get('settings', {})
print("CACHE_DIR={0}".format(settings.get('cache_dir', '/workspace/huggingface_cache')))
print("PARALLEL_DOWNLOADS={0}".format(settings.get('parallel_downloads', 1)))
elif category == 'categories':
for cat_name in config.get('model_categories', {}).keys():
print(cat_name)
elif category in config.get('model_categories', {}):
models = config['model_categories'][category]
for model in models:
repo_id = model.get('repo_id', '')
description = model.get('description', '')
size_gb = model.get('size_gb', 0)
essential = model.get('essential', False)
filename = model.get('filename', '')
print('{0}|{1}|{2}|{3}|{4}'.format(repo_id, description, size_gb, essential, filename))
else:
sys.exit(1)
except Exception as e:
print("ERROR: {0}".format(e), file=sys.stderr)
sys.exit(1)
EOPYAML
}
# Validate and get categories to process
validate_and_get_categories() {
local config_file="$1"
# Get all available categories
local all_categories
all_categories=$(parse_yaml "$config_file" "categories")
# If no filter specified, return all categories
if [[ -z "$CATEGORY_FILTER" ]]; then
echo "$all_categories"
return 0
fi
# Split comma-separated categories and validate each
local requested_categories
IFS=',' read -ra requested_categories <<< "$CATEGORY_FILTER"
local validated_categories=()
for requested in "${requested_categories[@]}"; do
# Trim whitespace
requested=$(echo "$requested" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
# Check if category exists
if echo "$all_categories" | grep -q "^${requested}$"; then
validated_categories+=("$requested")
else
print_error "Invalid category: '$requested'"
echo ""
echo "Available categories:"
while IFS= read -r cat; do
echo " - $cat"
done <<< "$all_categories"
exit 1
fi
done
# Print validated categories (one per line)
for cat in "${validated_categories[@]}"; do
echo "$cat"
done
}
# Parse file mappings for a specific model
parse_file_mappings() {
local yaml_file="$1"
local category="$2"
local repo_id="$3"
python3 - "$yaml_file" "$category" "$repo_id" <<EOPYTHON
import yaml
import sys
yaml_file = sys.argv[1]
category = sys.argv[2]
repo_id = sys.argv[3]
try:
with open(yaml_file, 'r') as f:
config = yaml.safe_load(f)
if category in config.get('model_categories', {}):
models = config['model_categories'][category]
for model in models:
if model.get('repo_id', '') == repo_id:
files = model.get('files', [])
if files:
for file_mapping in files:
source = file_mapping.get('source', '')
dest = file_mapping.get('dest', source)
if source:
print('{0}|{1}'.format(source, dest))
sys.exit(0)
# No file mappings found
sys.exit(0)
except Exception as e:
print("ERROR: {0}".format(e), file=sys.stderr)
sys.exit(1)
EOPYTHON
}
# Check dependencies
check_dependencies() {
print_section "Checking Dependencies"
local missing_deps=()
# Check Python 3
if ! command -v python3 &> /dev/null; then
missing_deps+=("python3")
fi
# Check pip
if ! command -v pip3 &> /dev/null; then
missing_deps+=("pip3")
fi
# Check required Python packages
if ! python3 -c "import yaml" 2>/dev/null; then
print_warning "PyYAML not installed, installing..."
pip3 install pyyaml -q
fi
if ! python3 -c "import huggingface_hub" 2>/dev/null; then
print_warning "huggingface_hub not installed, installing..."
pip3 install huggingface_hub -q
fi
if [[ ${#missing_deps[@]} -gt 0 ]]; then
print_error "Missing dependencies: ${missing_deps[*]}"
exit 1
fi
print_success "All dependencies satisfied"
}
# Validate configuration
validate_config() {
print_section "Validating Configuration"
# Show current command
print_info "Command: ${BOLD_CYAN}${COMMAND}${RESET}"
if [[ -n "$CONFIG_FILE" ]]; then
if [[ ! -f "$CONFIG_FILE" ]]; then
print_error "Configuration file not found: $CONFIG_FILE"
exit 1
fi
print_success "Configuration file found: ${CYAN}${CONFIG_FILE}${RESET}"
else
print_warning "No configuration file specified"
fi
# HF_TOKEN only required for download and both commands
if [[ "$COMMAND" == "download" ]] || [[ "$COMMAND" == "both" ]]; then
if [[ -z "$HF_TOKEN" ]]; then
print_error "HF_TOKEN not set. Please set it in .env file or environment."
exit 1
fi
print_success "HuggingFace token configured: ${DIM}${HF_TOKEN:0:10}...${RESET}"
elif [[ "$COMMAND" == "verify" ]]; then
print_info "Verify mode: HuggingFace token not required"
fi
# Validate flag combinations
if [[ "$CLEANUP_MODE" == true ]] && [[ ! "$COMMAND" =~ ^(link|both)$ ]]; then
print_error "--cleanup can only be used with 'link' or 'both' commands"
exit 1
fi
# Cache directory
if [[ "$COMMAND" == "download" ]] || [[ "$COMMAND" == "both" ]]; then
if [[ ! -d "$CACHE_DIR" ]]; then
print_info "Creating cache directory: ${CYAN}${CACHE_DIR}${RESET}"
mkdir -p "$CACHE_DIR"
fi
print_success "Cache directory ready: ${CYAN}${CACHE_DIR}${RESET}"
else
# For link and verify commands, just show the directory
if [[ -d "$CACHE_DIR" ]]; then
print_success "Cache directory found: ${CYAN}${CACHE_DIR}${RESET}"
else
print_warning "Cache directory not found: ${CYAN}${CACHE_DIR}${RESET}"
fi
fi
# ComfyUI directory
if [[ "$COMMAND" == "link" ]] || [[ "$COMMAND" == "both" ]] || [[ "$COMMAND" == "verify" ]]; then
if [[ -d "$COMFYUI_DIR" ]]; then
print_success "ComfyUI directory found: ${CYAN}${COMFYUI_DIR}${RESET}"
else
if [[ "$COMMAND" == "verify" ]]; then
print_warning "ComfyUI directory not found: ${CYAN}${COMFYUI_DIR}${RESET}"
else
print_info "ComfyUI directory: ${CYAN}${COMFYUI_DIR}${RESET}"
fi
fi
fi
}
# Find model files in HuggingFace cache
find_model_files() {
local repo_id="$1"
local filename_filter="$2"
python3 - "$CACHE_DIR" "$repo_id" "$filename_filter" <<EOPYFINDFIND
import os
import sys
from pathlib import Path
cache_dir = sys.argv[1]
repo_id = sys.argv[2]
filename_filter = sys.argv[3]
# HuggingFace cache structure: cache_dir/models--org--name/snapshots/hash/
# Try both with and without 'hub/' subdirectory for compatibility
cache_path = Path(cache_dir)
repo_path = repo_id.replace('/', '--')
model_dir = cache_path / 'models--{0}'.format(repo_path)
# First attempt: direct path
model_dir_original = model_dir
if not model_dir.exists():
# Fallback to hub/ subdirectory if direct path doesn't exist
model_dir = cache_path / 'hub' / 'models--{0}'.format(repo_path)
if not model_dir.exists():
sys.stderr.write('ERROR: Model directory not found for {0}\n'.format(repo_id))
sys.stderr.write(' Tried: {0}\n'.format(str(model_dir_original)))
sys.stderr.write(' Tried: {0}\n'.format(str(model_dir)))
sys.stderr.write(' Cache: {0}\n'.format(str(cache_path)))
sys.exit(1)
# Find the latest snapshot
snapshots_dir = model_dir / 'snapshots'
if not snapshots_dir.exists():
sys.stderr.write('ERROR: Snapshots directory not found for {0}\n'.format(repo_id))
sys.stderr.write(' Model dir: {0}\n'.format(str(model_dir)))
sys.stderr.write(' Expected: {0}\n'.format(str(snapshots_dir)))
sys.exit(1)
# Get all snapshot directories sorted by modification time
try:
snapshots = sorted(snapshots_dir.iterdir(), key=lambda x: x.stat().st_mtime, reverse=True)
except Exception as e:
sys.stderr.write('ERROR: Failed to list snapshots for {0}: {1}\n'.format(repo_id, str(e)))
sys.exit(1)
if not snapshots:
sys.stderr.write('ERROR: No snapshots found for {0}\n'.format(repo_id))
sys.stderr.write(' Snapshots dir: {0}\n'.format(str(snapshots_dir)))
sys.exit(1)
latest_snapshot = snapshots[0]
# Find model files
file_count = 0
for file_path in latest_snapshot.rglob('*'):
if file_path.is_file():
# If filename filter is specified, only match those files
if filename_filter and filename_filter not in file_path.name:
continue
# Skip metadata files except important config files
# Allow: config.json, tokenizer.json, tokenizer_config.json, sentencepiece models, .index.json for sharded models
important_files = ('config.json', 'tokenizer.json', 'tokenizer_config.json', '.model')
if file_path.name.endswith(('.txt', '.md', '.gitattributes')):
continue
if file_path.name.endswith('.json') and not (file_path.name in important_files or file_path.name.endswith('.index.json')):
continue
print(str(file_path))
file_count += 1
if file_count == 0:
sys.stderr.write('WARN: No files matched filter for {0} (filter: {1})\n'.format(repo_id, filename_filter))
EOPYFINDFIND
}
# Download a single model
download_model() {
local repo_id="$1"
local description="$2"
local size_gb="$3"
print_detail "Repository: ${BOLD_WHITE}${repo_id}${RESET}"
print_detail "Description: ${description}"
print_detail "Size: ${BOLD_YELLOW}${size_gb}GB${RESET}"
# Dry-run mode: skip actual download
if [[ "$DRY_RUN" == true ]]; then
print_info "DRY-RUN: Would download ${BOLD_WHITE}${repo_id}${RESET} (~${size_gb} GB)"
return 0
fi
# Download using Python
python3 - <<EOPYDOWNLOAD
import os
import sys
from huggingface_hub import snapshot_download
cache_dir = '${CACHE_DIR}'
token = '${HF_TOKEN}'
repo_id = '${repo_id}'
os.environ['HF_HOME'] = cache_dir
try:
snapshot_download(
repo_id=repo_id,
cache_dir=cache_dir,
token=token,
resume_download=True
)
print("SUCCESS")
except Exception as e:
print("ERROR: {0}".format(e), file=sys.stderr)
sys.exit(1)
EOPYDOWNLOAD
if [[ $? -eq 0 ]]; then
print_success "Downloaded ${BOLD_WHITE}${repo_id}${RESET}"
return 0
else
print_error "Failed to download ${repo_id}"
return 1
fi
}
# Create symlink for a model
link_model() {
local repo_id="$1"
local filename_filter="$2"
local file_mappings="$3" # Optional: explicit source|dest mappings
local linked_count=0
# If explicit file mappings are provided, use them
if [[ -n "$file_mappings" ]]; then
print_detail "Using explicit file mappings from YAML"
# Extract unique target directories from file mappings for display
local target_dirs=()
while IFS='|' read -r source_pattern dest_path; do
if [[ -n "$dest_path" && "$dest_path" == *"/"* ]]; then
local dir_path="${dest_path%/*}"
target_dirs+=("${COMFYUI_DIR}/${dir_path}")
fi
done <<< "$file_mappings"
# Remove duplicates and display
local unique_dirs=($(printf '%s\n' "${target_dirs[@]}" | sort -u))
for target_dir in "${unique_dirs[@]}"; do
print_detail "Linking to: ${CYAN}${target_dir}/${RESET}"
done
# Dry-run mode: show what would be linked
if [[ "$DRY_RUN" == true ]]; then
local file_count=$(echo "$file_mappings" | grep -c .)
print_info "DRY-RUN: Would link ${BOLD_YELLOW}${file_count}${RESET} file(s)"
print_detail "Files that would be linked:"
while IFS='|' read -r source_pattern dest_path; do
if [[ -n "$dest_path" ]]; then
print_detail " ${LINK} Would link: ${DIM}${dest_path}${RESET}"
fi
done <<< "$file_mappings"
return 0
fi
# Find all model files in cache (no filter needed, we have explicit paths)
local model_files
model_files=$(find_model_files "$repo_id" "")
if [[ -z "$model_files" ]]; then
print_warning "No model files found in cache for ${repo_id}"
return 1
fi
# Process each file mapping
while IFS='|' read -r source_pattern dest_path; do
if [[ -z "$source_pattern" ]] || [[ -z "$dest_path" ]]; then
continue
fi
# Find the file matching the source pattern in model_files
local source_file
source_file=$(echo "$model_files" | grep -F "/$source_pattern" | head -n1)
if [[ -z "$source_file" ]] || [[ ! -f "$source_file" ]]; then
print_warning "Source file not found: ${source_pattern}"
continue
fi
# Construct full link path with directory included in dest_path
local link_path="${COMFYUI_DIR}/${dest_path}"
local link_dir=$(dirname "$link_path")
# Ensure directory exists
if [[ ! -d "$link_dir" ]]; then
mkdir -p "$link_dir"
fi
# Remove existing symlink or file if it exists
if [[ -L "$link_path" ]]; then
rm -f "$link_path"
elif [[ -e "$link_path" ]]; then
print_warning "File already exists (not a symlink): ${dest_path}"
continue
fi
# Create symlink
ln -s "$source_file" "$link_path"
print_detail "${LINK} Linked: ${DIM}${dest_path}${RESET}"
linked_count=$((linked_count+1))
done <<< "$file_mappings"
else
# Fallback: use automatic prefixing for files without explicit mappings
print_detail "No file mappings found, using automatic prefixing"
# Extract model name from repo_id for prefixing filenames
# e.g., "facebook/musicgen-medium" -> "musicgen-medium"
local model_name=$(echo "$repo_id" | sed 's/.*\///')
while IFS= read -r source_file; do
if [[ -f "$source_file" ]]; then
local filename=$(basename "$source_file")
# Add model name prefix to filename for better organization
# e.g., "pytorch_model.bin" -> "musicgen-medium-pytorch_model.bin"
local prefixed_filename="${model_name}-${filename}"
local link_path="${target_dir}/${prefixed_filename}"
# Remove existing symlink or file if it exists
if [[ -L "$link_path" ]]; then
rm -f "$link_path"
elif [[ -e "$link_path" ]]; then
print_warning "File already exists (not a symlink): ${prefixed_filename}"
continue
fi
# Create symlink
ln -s "$source_file" "$link_path"
print_detail "${LINK} Linked: ${DIM}${prefixed_filename}${RESET}"
linked_count=$((linked_count+1))
fi
done <<< "$model_files"
fi
if [[ $linked_count -gt 0 ]]; then
print_success "Linked ${linked_count} file(s) for ${BOLD_WHITE}${repo_id}${RESET}"
return 0
else
print_error "Failed to link files for ${repo_id}"
return 1
fi
}
# Cleanup unused cache files that aren't symlinked
cleanup_unused_cache_files() {
local repo_id="$1"
local cache_dir="$2"
local comfyui_dir="$3"
local file_mappings="$4"
# Find the latest snapshot directory for this repo
local repo_cache_dir="${cache_dir}/models--${repo_id//\//--}"
if [[ ! -d "$repo_cache_dir" ]]; then
print_warning "Cache directory not found for $repo_id"
return 1
fi
print_info "Analyzing cache for ${BOLD_WHITE}${repo_id}${RESET}..."
# Use Python to clean up old snapshots AND non-whitelisted files in latest snapshot
local cleanup_result
cleanup_result=$(python3 - "$repo_cache_dir" "$comfyui_dir" "$file_mappings" <<'EOPYCLEANUP'
import os
import sys
import shutil
from pathlib import Path
repo_cache = Path(sys.argv[1])
comfyui_dir = Path(sys.argv[2])
file_mappings_str = sys.argv[3] if len(sys.argv) > 3 else ""
# Parse whitelist from file_mappings (format: "source|dest\nsource|dest\n...")
whitelist_sources = set()
if file_mappings_str:
for line in file_mappings_str.strip().split('\n'):
if '|' in line:
source = line.split('|')[0].strip()
if source:
whitelist_sources.add(source)
# Essential HuggingFace metadata files to always preserve
ESSENTIAL_FILES = {
'.gitattributes',
'README.md',
'model_index.json',
'.huggingface',
'config.json'
}
# Find latest snapshot
snapshots_dir = repo_cache / 'snapshots'
if not snapshots_dir.exists():
sys.exit(0)
snapshots = sorted(snapshots_dir.iterdir(), key=lambda x: x.stat().st_mtime, reverse=True)
if not snapshots:
sys.exit(0)
latest_snapshot = snapshots[0]
old_snapshots = snapshots[1:] # All snapshots except the latest
# Calculate size of old snapshot directories
old_snapshot_size = 0
old_snapshot_paths = []
for old_snap in old_snapshots:
try:
# Calculate size of old snapshot directory
for file_path in old_snap.rglob('*'):
if file_path.is_file():
old_snapshot_size += file_path.stat().st_size
old_snapshot_paths.append(str(old_snap))
except Exception:
pass
# Find non-whitelisted files in latest snapshot
unwanted_files = []
unwanted_size = 0
if whitelist_sources:
for item in latest_snapshot.rglob('*'):
if item.is_file():
# Get relative path from snapshot root
rel_path = str(item.relative_to(latest_snapshot))
# Check if this file is in whitelist or is essential
is_whitelisted = False
# Check exact match first
if rel_path in whitelist_sources:
is_whitelisted = True
else:
# Check if any whitelist entry matches this file
# (handles cases where whitelist has paths like "split_files/diffusion_models/file.safetensors")
for whitelisted in whitelist_sources:
if rel_path == whitelisted or rel_path.endswith('/' + whitelisted):
is_whitelisted = True
break
# Check if it's an essential file
if item.name in ESSENTIAL_FILES:
is_whitelisted = True
# If not whitelisted, mark for deletion
if not is_whitelisted:
unwanted_files.append(str(item))
unwanted_size += item.stat().st_size
# Output results: old_snapshot_count|old_snapshot_size|unwanted_files_count|unwanted_size
print(f"{len(old_snapshot_paths)}|{old_snapshot_size}|{len(unwanted_files)}|{unwanted_size}")
for snap in old_snapshot_paths:
print(snap)
for unwanted_file in unwanted_files:
print(unwanted_file)
EOPYCLEANUP
)
# Parse results
local first_line
first_line=$(echo "$cleanup_result" | head -n 1)
local snapshot_count
snapshot_count=$(echo "$first_line" | cut -d'|' -f1)
local snapshot_bytes
snapshot_bytes=$(echo "$first_line" | cut -d'|' -f2)
local unwanted_count
unwanted_count=$(echo "$first_line" | cut -d'|' -f3)
local unwanted_bytes
unwanted_bytes=$(echo "$first_line" | cut -d'|' -f4)
# Check if there's anything to clean
if [[ "$snapshot_count" -eq 0 ]] && [[ "$unwanted_count" -eq 0 ]]; then
print_success "No cleanup needed - cache is optimal"
return 0
fi
# Convert bytes to MB
local snapshot_mb
snapshot_mb=$(echo "scale=2; $snapshot_bytes / 1048576" | bc)
local unwanted_mb
unwanted_mb=$(echo "scale=2; $unwanted_bytes / 1048576" | bc)
local total_mb
total_mb=$(echo "scale=2; ($snapshot_bytes + $unwanted_bytes) / 1048576" | bc)
# Get list of items to delete (skip first line which is summary)
# First snapshot_count lines are old snapshots, remaining lines are unwanted files
local all_items
all_items=$(echo "$cleanup_result" | tail -n +2)
local snapshots_to_delete
if [[ "$snapshot_count" -gt 0 ]]; then
snapshots_to_delete=$(echo "$all_items" | head -n "$snapshot_count")
else
snapshots_to_delete=""
fi
local files_to_delete
if [[ "$unwanted_count" -gt 0 ]]; then
files_to_delete=$(echo "$all_items" | tail -n "$unwanted_count")
else
files_to_delete=""
fi
if [[ "$DRY_RUN" == true ]]; then
if [[ "$snapshot_count" -gt 0 ]]; then
print_warning "DRY-RUN: Would clean up ${BOLD_YELLOW}${snapshot_count}${RESET} old snapshot(s) (~${snapshot_mb} MB)"
if [[ -n "$snapshots_to_delete" ]]; then
print_detail "Old snapshots that would be deleted:"
while IFS= read -r snapshot; do
local basename
basename=$(basename "$snapshot")
print_detail " ${CROSS_MARK} Would delete snapshot: ${DIM}${basename}${RESET}"
done <<< "$snapshots_to_delete"
fi
fi
if [[ "$unwanted_count" -gt 0 ]]; then
print_warning "DRY-RUN: Would clean up ${BOLD_YELLOW}${unwanted_count}${RESET} non-whitelisted file(s) (~${unwanted_mb} MB)"
if [[ -n "$files_to_delete" ]]; then
print_detail "Non-whitelisted files that would be deleted (showing first 10):"
echo "$files_to_delete" | head -n 10 | while IFS= read -r file; do
local basename
basename=$(basename "$file")
print_detail " ${CROSS_MARK} Would delete file: ${DIM}${basename}${RESET}"
done
if [[ "$unwanted_count" -gt 10 ]]; then
print_detail " ${DIM}... and $((unwanted_count - 10)) more${RESET}"
fi
fi
fi
print_info "Total space that would be freed: ~${total_mb} MB"
return 0
fi
# Actually delete items
local deleted_snapshots=0
local deleted_files=0
# Delete old snapshot directories
if [[ "$snapshot_count" -gt 0 ]]; then
print_warning "Cleaning up ${BOLD_YELLOW}${snapshot_count}${RESET} old snapshot(s) (~${snapshot_mb} MB)..."
while IFS= read -r snapshot; do
if [[ -d "$snapshot" ]]; then
rm -rf "$snapshot" && deleted_snapshots=$((deleted_snapshots+1))
fi
done <<< "$snapshots_to_delete"
fi
# Delete non-whitelisted files
if [[ "$unwanted_count" -gt 0 ]]; then
print_warning "Cleaning up ${BOLD_YELLOW}${unwanted_count}${RESET} non-whitelisted file(s) (~${unwanted_mb} MB)..."
while IFS= read -r file; do
if [[ -f "$file" ]]; then
rm -f "$file" && deleted_files=$((deleted_files+1))
fi
done <<< "$files_to_delete"
fi
# Report results
local success=true
if [[ "$snapshot_count" -gt 0 ]] && [[ $deleted_snapshots -eq $snapshot_count ]]; then
print_success "Cleaned up ${deleted_snapshots} old snapshot(s), freed ~${snapshot_mb} MB"
elif [[ "$snapshot_count" -gt 0 ]]; then
print_warning "Cleaned up ${deleted_snapshots}/${snapshot_count} old snapshots"
success=false
fi
if [[ "$unwanted_count" -gt 0 ]] && [[ $deleted_files -eq $unwanted_count ]]; then
print_success "Cleaned up ${deleted_files} non-whitelisted file(s), freed ~${unwanted_mb} MB"
elif [[ "$unwanted_count" -gt 0 ]]; then
print_warning "Cleaned up ${deleted_files}/${unwanted_count} non-whitelisted files"
success=false
fi
if [[ "$snapshot_count" -gt 0 ]] || [[ "$unwanted_count" -gt 0 ]]; then
print_info "Total space freed: ~${total_mb} MB"
fi
if $success; then
return 0
else
return 1
fi
}
# ============================================================================
# VERIFICATION FUNCTIONS - Model Status Checking
# ============================================================================
# Get actual disk usage for a model's files
get_model_disk_usage() {
local model_files="$1"
if [[ -z "$model_files" ]]; then
echo "0"
return
fi
local total_bytes=0
while IFS= read -r file_path; do
if [[ -f "$file_path" ]] || [[ -L "$file_path" ]]; then
local file_size
# Use -L to follow symlinks (HuggingFace uses symlinks to blobs)
# Try Linux stat first, then macOS stat
if stat -L -c "%s" "$file_path" >/dev/null 2>&1; then
file_size=$(stat -L -c "%s" "$file_path" 2>/dev/null)
elif stat -L -f "%z" "$file_path" >/dev/null 2>&1; then
file_size=$(stat -L -f "%z" "$file_path" 2>/dev/null)
else
file_size=0
fi
total_bytes=$((total_bytes + file_size))
fi
done <<< "$model_files"
echo "$total_bytes"
}
# Format bytes to human-readable size
format_bytes() {
local bytes="$1"
if (( bytes < 1024 )); then
echo "${bytes} B"
elif (( bytes < 1048576 )); then
echo "$(( bytes / 1024 )) KB"
elif (( bytes < 1073741824 )); then
printf "%.2f MB" "$(bc <<< "scale=2; $bytes / 1048576")"
else
printf "%.2f GB" "$(bc <<< "scale=2; $bytes / 1073741824")"
fi
}
# Verify if model is downloaded
verify_model_download() {
local repo_id="$1"
local expected_size_gb="$2"
local filename_filter="$3"
# Find model files in cache
# Capture both stdout (file paths) and stderr (error messages)
local find_output
find_output=$(find_model_files "$repo_id" "$filename_filter" 2>&1)
# Separate file paths from error/debug messages
local model_files
model_files=$(echo "$find_output" | grep -v "^ERROR:" | grep -v "^WARN:" | grep -v "^DEBUG:")
# Extract error messages for logging
local error_msgs
error_msgs=$(echo "$find_output" | grep "^ERROR:\|^WARN:\|^DEBUG:")
if [[ -z "$model_files" ]]; then
# Log error messages to stderr if they exist
if [[ -n "$error_msgs" ]]; then
echo "$error_msgs" >&2
fi
echo "NOT_FOUND|0|0|"
return 1
fi
# Count files
local file_count
file_count=$(echo "$model_files" | wc -l | tr -d ' ')
# Get actual size
local actual_bytes
actual_bytes=$(get_model_disk_usage "$model_files")
# Get cache path (first file's directory)
local cache_path
cache_path=$(echo "$model_files" | head -n1 | xargs dirname)
# Get modification time of first file
local mod_time="Unknown"
if [[ -n "$model_files" ]]; then
local first_file
first_file=$(echo "$model_files" | head -n1)
if [[ -f "$first_file" ]]; then
# Try Linux stat first (most common), then macOS stat
if stat -c "%y" "$first_file" >/dev/null 2>&1; then
mod_time=$(stat -c "%y" "$first_file" 2>/dev/null | cut -d'.' -f1)
elif stat -f "%Sm" "$first_file" >/dev/null 2>&1; then
mod_time=$(stat -f "%Sm" -t "%Y-%m-%d %H:%M:%S" "$first_file" 2>/dev/null)
fi
fi
fi
echo "FOUND|${actual_bytes}|${file_count}|${cache_path}|${mod_time}"
return 0
}
# Verify model symlinks
verify_model_links() {
local repo_id="$1"
local filename_filter="$2"
local file_mappings="$3"
local total_links=0
local valid_links=0
local broken_links=0
local link_details=""
# Check if explicit file mappings exist
if [[ -n "$file_mappings" ]]; then
# Verify files exist in cache (no filter needed)
local model_files
model_files=$(find_model_files "$repo_id" "" 2>/dev/null)
if [[ -z "$model_files" ]]; then
echo "NOT_DOWNLOADED|0|0|0"
return 1
fi
while IFS='|' read -r source_pattern dest_path; do
if [[ -z "$source_pattern" ]] || [[ -z "$dest_path" ]]; then
continue
fi
total_links=$((total_links + 1))
local link_path="${COMFYUI_DIR}/${dest_path}"
if [[ -L "$link_path" ]]; then
# Symlink exists, check if it's valid
if [[ -e "$link_path" ]]; then
valid_links=$((valid_links + 1))
local link_target
link_target=$(readlink "$link_path")
link_details="${link_details}VALID|${dest_path}|${link_target}\n"
else
broken_links=$((broken_links + 1))
local link_target
link_target=$(readlink "$link_path")
link_details="${link_details}BROKEN|${dest_path}|${link_target}\n"
fi
else
link_details="${link_details}MISSING|${dest_path}|\n"
fi
done <<< "$file_mappings"
else
# No explicit mappings, check automatic prefixed filenames
local model_files
model_files=$(find_model_files "$repo_id" "$filename_filter" 2>/dev/null)
if [[ -z "$model_files" ]]; then
echo "NOT_DOWNLOADED|0|0|0"
return 1
fi
local model_name
model_name=$(echo "$repo_id" | sed 's/.*\///')
while IFS= read -r source_file; do
if [[ -f "$source_file" ]]; then
local filename
filename=$(basename "$source_file")
local prefixed_filename="${model_name}-${filename}"
total_links=$((total_links + 1))
local link_path="${target_dir}/${prefixed_filename}"
if [[ -L "$link_path" ]]; then
if [[ -e "$link_path" ]]; then
valid_links=$((valid_links + 1))
local link_target
link_target=$(readlink "$link_path")
link_details="${link_details}VALID|${prefixed_filename}|${link_target}\n"
else
broken_links=$((broken_links + 1))
local link_target
link_target=$(readlink "$link_path")
link_details="${link_details}BROKEN|${prefixed_filename}|${link_target}\n"
fi
else
link_details="${link_details}MISSING|${prefixed_filename}|\n"
fi
fi
done <<< "$model_files"
fi
echo -e "CHECKED|${total_links}|${valid_links}|${broken_links}\n${link_details}"
return 0
}
# Verify models by category
verify_category() {
local category="$1"
local category_display="$2"
print_section "${category_display}"
# Get models for this category
local models_data
models_data=$(parse_yaml "$CONFIG_FILE" "$category")
if [[ -z "$models_data" ]]; then
print_warning "No models found in category: ${category}"
return 0
fi
local total_models
total_models=$(echo "$models_data" | wc -l)
local current=0
local models_downloaded=0
local models_missing=0
local models_linked=0
local models_broken=0
local models_not_linked=0
local total_size_bytes=0
local expected_size_bytes=0
while IFS='|' read -r repo_id description size_gb essential filename; do
current=$((current+1))
echo ""
print_step "$current" "$total_models" "${BOLD_MAGENTA}${description}${RESET}"
print_detail "Repository: ${BOLD_WHITE}${repo_id}${RESET}"
print_detail "Category: ${CYAN}${category}${RESET}"
print_detail "Expected Size: ${BOLD_YELLOW}${size_gb} GB${RESET}"
expected_size_bytes=$((expected_size_bytes + $(echo "$size_gb * 1073741824" | bc | cut -d'.' -f1)))
# Verify download status
echo ""
local download_result
download_result=$(verify_model_download "$repo_id" "$size_gb" "$filename")
local download_status
download_status=$(echo "$download_result" | cut -d'|' -f1)
if [[ "$download_status" == "FOUND" ]]; then
local actual_bytes
actual_bytes=$(echo "$download_result" | cut -d'|' -f2)
local file_count
file_count=$(echo "$download_result" | cut -d'|' -f3)
local cache_path
cache_path=$(echo "$download_result" | cut -d'|' -f4)
local mod_time
mod_time=$(echo "$download_result" | cut -d'|' -f5)
total_size_bytes=$((total_size_bytes + actual_bytes))
local actual_size_human
actual_size_human=$(format_bytes "$actual_bytes")
print_success "Download Status: ${BOLD_GREEN}DOWNLOADED${RESET}"
print_detail "${DIM}Path: ${cache_path}${RESET}"
print_detail "${DIM}Actual Size: ${actual_size_human} (${actual_bytes} bytes)${RESET}"
print_detail "${DIM}Files: ${file_count} file(s)${RESET}"
print_detail "${DIM}Modified: ${mod_time}${RESET}"
# Check for size mismatch
local expected_bytes
expected_bytes=$(echo "$size_gb * 1073741824" | bc | cut -d'.' -f1)
local size_diff_pct
size_diff_pct=$(echo "scale=2; (($actual_bytes - $expected_bytes) / $expected_bytes) * 100" | bc | sed 's/^\./0./')
local abs_size_diff_pct
abs_size_diff_pct=${size_diff_pct#-}
if (( $(echo "$abs_size_diff_pct > 10" | bc -l) )); then
print_warning "Size mismatch: ${size_diff_pct}% difference from expected"
fi
models_downloaded=$((models_downloaded+1))
# Verify link status
echo ""
local file_mappings
file_mappings=$(parse_file_mappings "$CONFIG_FILE" "$category" "$repo_id")
local link_result
link_result=$(verify_model_links "$repo_id" "$filename" "$file_mappings")
local first_line
first_line=$(echo -e "$link_result" | head -n1)
local link_status
link_status=$(echo "$first_line" | cut -d'|' -f1)
if [[ "$link_status" == "CHECKED" ]]; then
local total_links
total_links=$(echo "$first_line" | cut -d'|' -f2)
local valid_links
valid_links=$(echo "$first_line" | cut -d'|' -f3)
local broken_links
broken_links=$(echo "$first_line" | cut -d'|' -f4)
if [[ $broken_links -gt 0 ]]; then
print_warning "Link Status: ${BOLD_YELLOW}${broken_links} BROKEN LINK(S)${RESET}"
models_broken=$((models_broken+1))
elif [[ $valid_links -eq $total_links ]] && [[ $total_links -gt 0 ]]; then
print_success "Link Status: ${BOLD_GREEN}LINKED${RESET} (${valid_links}/${total_links})"
models_linked=$((models_linked+1))
else
print_warning "Link Status: ${BOLD_YELLOW}PARTIALLY LINKED${RESET} (${valid_links}/${total_links})"
models_not_linked=$((models_not_linked+1))
fi
# Show link details
local link_details
link_details=$(echo -e "$link_result" | tail -n +2)
if [[ -n "$link_details" ]]; then
while IFS='|' read -r link_state link_name link_target; do
if [[ -z "$link_state" ]]; then
continue
fi
case "$link_state" in
VALID)
print_detail "${LINK} ${BOLD_GREEN}${RESET} ${DIM}${link_name}${RESET}"
;;
BROKEN)
print_detail "${LINK} ${BOLD_RED}${RESET} ${DIM}${link_name}${RESET} ${BOLD_RED}(BROKEN)${RESET}"
;;
MISSING)
print_detail "${LINK} ${BOLD_YELLOW}${RESET} ${DIM}${link_name}${RESET} ${BOLD_YELLOW}(NOT LINKED)${RESET}"
;;
esac
done <<< "$link_details"
fi
else
print_error "Link Status: ${BOLD_RED}NOT LINKED${RESET}"
models_not_linked=$((models_not_linked+1))
fi
else
print_error "Download Status: ${BOLD_RED}NOT DOWNLOADED${RESET}"
models_missing=$((models_missing+1))
echo ""
print_info "Link Status: ${DIM}N/A (model not downloaded)${RESET}"
fi
show_progress "$current" "$total_models"
done <<< "$models_data"
echo -e "\n"
# Category summary
local total_size_human
total_size_human=$(format_bytes "$total_size_bytes")
local expected_size_human
expected_size_human=$(format_bytes "$expected_size_bytes")
print_info "Category Summary:"
echo -e " ${BOLD_WHITE}Total Models:${RESET} ${total_models}"
echo -e " ${BOLD_GREEN}✓ Downloaded:${RESET} ${models_downloaded} ($(( models_downloaded * 100 / total_models ))%)"
echo -e " ${BOLD_RED}✗ Missing:${RESET} ${models_missing} ($(( models_missing * 100 / total_models ))%)"
echo -e " ${BOLD_GREEN}✓ Properly Linked:${RESET} ${models_linked}"
echo -e " ${BOLD_YELLOW}⚠ Broken Links:${RESET} ${models_broken}"
echo -e " ${BOLD_YELLOW}○ Not Linked:${RESET} ${models_not_linked}"
echo -e " ${BOLD_CYAN}📊 Disk Usage:${RESET} ${total_size_human} / ${expected_size_human} expected"
# Return statistics for global summary (format: downloaded|missing|linked|broken|not_linked|total_size|expected_size)
echo "${models_downloaded}|${models_missing}|${models_linked}|${models_broken}|${models_not_linked}|${total_size_bytes}|${expected_size_bytes}" > /tmp/verify_stats_${category}
}
# Process models by category
process_category() {
local category="$1"
local category_display="$2"
print_section "${category_display}"
# Get models for this category
local models_data
models_data=$(parse_yaml "$CONFIG_FILE" "$category")
if [[ -z "$models_data" ]]; then
print_warning "No models found in category: ${category}"
return 0
fi
local total_models
total_models=$(echo "$models_data" | wc -l)
local current=0
local succeeded=0
local failed=0
while IFS='|' read -r repo_id description size_gb essential filename; do
current=$((current+1))
2025-11-22 16:27:45 +01:00
echo ""
print_step "$current" "$total_models" "${BOLD_MAGENTA}${description}${RESET}"
local success=true
# Download if command is 'download' or 'both'
if [[ "$COMMAND" == "download" ]] || [[ "$COMMAND" == "both" ]]; then
if ! download_model "$repo_id" "$description" "$size_gb"; then
success=false
fi
fi
# Link if command is 'link' or 'both'
if [[ "$COMMAND" == "link" ]] || [[ "$COMMAND" == "both" ]]; then
if $success; then
# Parse file mappings from YAML for this model
local file_mappings
file_mappings=$(parse_file_mappings "$CONFIG_FILE" "$category" "$repo_id")
# Pass file mappings to link_model (empty string if no mappings found)
if ! link_model "$repo_id" "$filename" "$file_mappings"; then
success=false
fi
# Cleanup unused cache files if enabled
if [[ "$CLEANUP_MODE" == true ]] && $success; then
cleanup_unused_cache_files "$repo_id" "$CACHE_DIR" "$COMFYUI_DIR" "$file_mappings"
fi
fi
fi
if $success; then
succeeded=$((succeeded+1))
else
failed=$((failed+1))
fi
show_progress "$current" "$total_models"
done <<< "$models_data"
echo -e "\n"
print_info "Category Summary: ${BOLD_GREEN}${succeeded} succeeded${RESET}, ${BOLD_RED}${failed} failed${RESET}"
}
# Display verification summary
display_verification_summary() {
local start_time="$1"
local end_time="$2"
local categories="$3"
local duration=$((end_time - start_time))
local minutes=$((duration / 60))
local seconds=$((duration % 60))
# Aggregate statistics from all categories
local total_models=0
local total_downloaded=0
local total_missing=0
local total_linked=0
local total_broken=0
local total_not_linked=0
local total_size_bytes=0
local expected_size_bytes=0
while IFS= read -r category; do
if [[ -f "/tmp/verify_stats_${category}" ]]; then
local stats
stats=$(cat "/tmp/verify_stats_${category}")
local downloaded
downloaded=$(echo "$stats" | cut -d'|' -f1)
local missing
missing=$(echo "$stats" | cut -d'|' -f2)
local linked
linked=$(echo "$stats" | cut -d'|' -f3)
local broken
broken=$(echo "$stats" | cut -d'|' -f4)
local not_linked
not_linked=$(echo "$stats" | cut -d'|' -f5)
local size_bytes
size_bytes=$(echo "$stats" | cut -d'|' -f6)
local expected_bytes
expected_bytes=$(echo "$stats" | cut -d'|' -f7)
total_models=$((total_models + downloaded + missing))
total_downloaded=$((total_downloaded + downloaded))
total_missing=$((total_missing + missing))
total_linked=$((total_linked + linked))
total_broken=$((total_broken + broken))
total_not_linked=$((total_not_linked + not_linked))
total_size_bytes=$((total_size_bytes + size_bytes))
expected_size_bytes=$((expected_size_bytes + expected_bytes))
rm -f "/tmp/verify_stats_${category}"
fi
done <<< "$categories"
local total_size_human
total_size_human=$(format_bytes "$total_size_bytes")
local expected_size_human
expected_size_human=$(format_bytes "$expected_size_bytes")
print_banner "VERIFICATION COMPLETE"
echo -e "${BOLD_CYAN}${STAR} Global Summary${RESET}"
echo -e "${CYAN}$(printf '%.0s'"${BOX_DOUBLE}" $(seq 1 80))${RESET}"
echo -e " ${BOLD_WHITE}Total Models:${RESET} ${total_models}"
echo ""
echo -e " ${BOLD_GREEN}✓ Downloaded:${RESET} ${total_downloaded} ($(( total_models > 0 ? total_downloaded * 100 / total_models : 0 ))%)"
echo -e " ${BOLD_RED}✗ Missing:${RESET} ${total_missing} ($(( total_models > 0 ? total_missing * 100 / total_models : 0 ))%)"
echo ""
echo -e " ${BOLD_GREEN}✓ Properly Linked:${RESET} ${total_linked} ($(( total_models > 0 ? total_linked * 100 / total_models : 0 ))%)"
echo -e " ${BOLD_YELLOW}⚠ Broken Links:${RESET} ${total_broken} ($(( total_models > 0 ? total_broken * 100 / total_models : 0 ))%)"
echo -e " ${BOLD_YELLOW}○ Not Linked:${RESET} ${total_not_linked} ($(( total_models > 0 ? total_not_linked * 100 / total_models : 0 ))%)"
echo ""
echo -e " ${BOLD_CYAN}📊 Disk Space Used:${RESET} ${total_size_human} / ${expected_size_human} expected"
echo -e " ${BOLD_WHITE}Cache Directory:${RESET} ${CYAN}${CACHE_DIR}${RESET}"
echo -e " ${BOLD_WHITE}ComfyUI Directory:${RESET} ${CYAN}${COMFYUI_DIR}${RESET}"
echo -e " ${BOLD_WHITE}Duration:${RESET} ${BOLD_YELLOW}${minutes}m ${seconds}s${RESET}"
echo -e "${CYAN}$(printf '%.0s'"${BOX_DOUBLE}" $(seq 1 80))${RESET}"
# Provide actionable suggestions
if [[ $total_missing -gt 0 ]] || [[ $total_broken -gt 0 ]] || [[ $total_not_linked -gt 0 ]]; then
echo -e "\n${BOLD_YELLOW}${WARNING} Issues Found - Suggested Actions:${RESET}\n"
if [[ $total_missing -gt 0 ]]; then
echo -e " ${BOLD_RED}${RESET} ${total_missing} model(s) not downloaded"
echo -e " ${DIM}Fix:${RESET} ${CYAN}$0 download -c ${CONFIG_FILE}${RESET}"
fi
if [[ $total_broken -gt 0 ]] || [[ $total_not_linked -gt 0 ]]; then
echo -e " ${BOLD_YELLOW}${RESET} $(( total_broken + total_not_linked )) model(s) with link issues"
echo -e " ${DIM}Fix:${RESET} ${CYAN}$0 link -c ${CONFIG_FILE}${RESET}"
fi
if [[ $total_missing -gt 0 ]] && [[ $(( total_broken + total_not_linked )) -gt 0 ]]; then
echo -e " ${BOLD_CYAN}${RESET} Fix everything in one go:"
echo -e " ${CYAN}$0 both -c ${CONFIG_FILE}${RESET}"
fi
echo ""
else
echo -e "\n${BOLD_GREEN}${SPARKLES} All models verified successfully! ${SPARKLES}${RESET}\n"
fi
}
# Display summary
display_summary() {
local start_time="$1"
local end_time="$2"
local total_downloaded="$3"
local total_failed="$4"
local duration=$((end_time - start_time))
local minutes=$((duration / 60))
local seconds=$((duration % 60))
print_banner "DOWNLOAD COMPLETE"
echo -e "${BOLD_CYAN}${STAR} Summary${RESET}"
echo -e "${CYAN}$(printf '%.0s'"${BOX_LIGHT}" $(seq 1 80))${RESET}"
echo -e " ${BOLD_WHITE}Total Downloaded:${RESET} ${BOLD_GREEN}${total_downloaded}${RESET} models"
echo -e " ${BOLD_WHITE}Total Failed:${RESET} ${BOLD_RED}${total_failed}${RESET} models"
echo -e " ${BOLD_WHITE}Cache Directory:${RESET} ${CYAN}${CACHE_DIR}${RESET}"
echo -e " ${BOLD_WHITE}Duration:${RESET} ${BOLD_YELLOW}${minutes}m ${seconds}s${RESET}"
echo -e "${CYAN}$(printf '%.0s'"${BOX_LIGHT}" $(seq 1 80))${RESET}"
if [[ $total_failed -eq 0 ]]; then
echo -e "\n${BOLD_GREEN}${SPARKLES} All models downloaded successfully! ${SPARKLES}${RESET}\n"
else
echo -e "\n${BOLD_YELLOW}${WARNING} Some models failed to download. Check logs above.${RESET}\n"
fi
}
# ============================================================================
# MAIN FUNCTION
# ============================================================================
main() {
local start_time
start_time=$(date +%s)
# Display beautiful banner
if [[ "$COMMAND" == "verify" ]]; then
print_banner "${SPARKLES} ComfyUI Model Verification ${SPARKLES}"
echo -e "${BOLD_CYAN}Comprehensive Model Status Check${RESET}"
echo -e "${DIM}Verify Downloads & Links ${LINK} Configuration-Driven ${STAR}${RESET}\n"
else
print_banner "${ROCKET} ComfyUI Model Downloader ${ROCKET}"
echo -e "${BOLD_CYAN}A Beautiful CLI Tool for Downloading AI Models${RESET}"
echo -e "${DIM}Powered by HuggingFace ${LINK} Configuration-Driven ${STAR}${RESET}\n"
fi
# Show dry-run warning banner
if [[ "$DRY_RUN" == true ]]; then
echo -e "${BOLD_YELLOW}${WARNING} DRY-RUN MODE - No changes will be made ${WARNING}${RESET}\n"
fi
# Check dependencies
check_dependencies
# Validate configuration
validate_config
# Get all categories (or filtered categories if --category is specified)
if [[ -z "$CONFIG_FILE" ]]; then
print_error "No configuration file specified. Use -c/--config to provide one."
exit 1
fi
local categories
categories=$(validate_and_get_categories "$CONFIG_FILE")
if [[ -z "$categories" ]]; then
print_error "No model categories found in configuration"
exit 1
fi
# Handle verify command separately
if [[ "$COMMAND" == "verify" ]]; then
# Process each category with verification
while IFS= read -r category; do
# Get category display name (capitalize and add spaces)
local category_display
category_display=$(echo "$category" | sed 's/_/ /g' | sed 's/\b\(.\)/\u\1/g')
verify_category "$category" "$category_display"
done <<< "$categories"
# Display verification summary
local end_time
end_time=$(date +%s)
display_verification_summary "$start_time" "$end_time" "$categories"
else
# Original download/link logic
local total_succeeded=0
local total_failed=0
# Process each category
while IFS= read -r category; do
# Get category display name (capitalize and add spaces)
local category_display
category_display=$(echo "$category" | sed 's/_/ /g' | sed 's/\b\(.\)/\u\1/g')
process_category "$category" "$category_display"
# Update counters (this is simplified, you'd need to track actual numbers)
total_succeeded=$((total_succeeded+1))
done <<< "$categories"
# Display summary
local end_time
end_time=$(date +%s)
display_summary "$start_time" "$end_time" "$total_succeeded" "$total_failed"
fi
}
# ============================================================================
# ENTRY POINT
# ============================================================================
# Parse command line arguments
POSITIONAL_ARGS=()
while [[ $# -gt 0 ]]; do
case $1 in
-c|--config)
CONFIG_FILE="$2"
shift 2
;;
--cache-dir)
CACHE_DIR="$2"
shift 2
;;
--comfyui-dir)
COMFYUI_DIR="$2"
shift 2
;;
--category|--cat)
CATEGORY_FILTER="$2"
shift 2
;;
--cleanup|--clean)
CLEANUP_MODE=true
shift
;;
--dry-run|-n)
DRY_RUN=true
shift
;;
download|link|both|verify)
COMMAND="$1"
shift
;;
-h|--help)
echo "Usage: $0 [COMMAND] [options]"
echo ""
echo "Commands:"
echo " download Download models only"
echo " link Create symlinks only (models must already be downloaded)"
echo " both Download and create symlinks (default)"
echo " verify Verify model downloads and links (read-only)"
echo ""
echo "Options:"
echo " -c, --config FILE Configuration file (required)"
echo " --cache-dir DIR Cache directory (default: auto-detect)"
echo " RunPod: /workspace/huggingface_cache"
echo " Local: ~/.cache/huggingface"
echo " --comfyui-dir DIR ComfyUI models directory (default: auto-detect)"
echo " RunPod: /workspace/ComfyUI/models"
echo " Local: ~/ComfyUI/models"
echo " -h, --help Show this help message"
echo ""
echo "Examples:"
echo " $0 download -c models.yaml"
echo " $0 link --comfyui-dir /opt/ComfyUI/models -c models.yaml"
echo " $0 both -c models.yaml --cache-dir /data/hf-cache"
echo " $0 verify -c models.yaml"
echo ""
echo "Verify Command:"
echo " The verify command performs comprehensive checks on all models:"
echo " - Download status (file existence, size, location)"
echo " - Link status (valid, broken, or missing symlinks)"
echo " - Size mismatch warnings (actual vs expected)"
echo " - Disk space usage statistics"
echo " - Actionable fix suggestions"
exit 0
;;
-*)
print_error "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
*)
POSITIONAL_ARGS+=("$1")
shift
;;
esac
done
# Handle positional argument (config file path)
if [[ ${#POSITIONAL_ARGS[@]} -gt 0 ]]; then
CONFIG_FILE="${POSITIONAL_ARGS[0]}"
fi
# Run main function
main