Files
bin/artifact_huggingface_download.sh

1303 lines
45 KiB
Bash
Executable File
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
#
# ComfyUI Model Downloader - A Beautiful CLI Tool
# Downloads AI models from HuggingFace and creates symlinks to ComfyUI directories
#
# Usage: ./artifact_comfyui_download.sh [COMMAND] [options]
#
# Commands: download, link, both (default)
#
set -euo pipefail
# ============================================================================
# COLOR PALETTE - Beautiful Terminal Colors
# ============================================================================
# Reset
RESET='\033[0m'
# Foreground Colors
BLACK='\033[0;30m'
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
MAGENTA='\033[0;35m'
CYAN='\033[0;36m'
WHITE='\033[0;37m'
# Bold
BOLD_BLACK='\033[1;30m'
BOLD_RED='\033[1;31m'
BOLD_GREEN='\033[1;32m'
BOLD_YELLOW='\033[1;33m'
BOLD_BLUE='\033[1;34m'
BOLD_MAGENTA='\033[1;35m'
BOLD_CYAN='\033[1;36m'
BOLD_WHITE='\033[1;37m'
# Background Colors
BG_BLACK='\033[40m'
BG_RED='\033[41m'
BG_GREEN='\033[42m'
BG_YELLOW='\033[43m'
BG_BLUE='\033[44m'
BG_MAGENTA='\033[45m'
BG_CYAN='\033[46m'
BG_WHITE='\033[47m'
# Styles
DIM='\033[2m'
ITALIC='\033[3m'
UNDERLINE='\033[4m'
BLINK='\033[5m'
REVERSE='\033[7m'
# ============================================================================
# UNICODE CHARACTERS - Make it Pretty
# ============================================================================
CHECK_MARK="✓"
CROSS_MARK="✗"
ROCKET="🚀"
PACKAGE="📦"
DOWNLOAD="⬇️"
SPARKLES="✨"
FIRE="🔥"
CLOCK="⏱️"
FOLDER="📁"
LINK="🔗"
STAR="⭐"
WARNING="⚠️"
INFO=""
ARROW_RIGHT="→"
DOUBLE_ARROW="»"
BOX_LIGHT="─"
BOX_HEAVY="━"
BOX_DOUBLE="═"
# ============================================================================
# CONFIGURATION
# ============================================================================
# Script directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
# Default configuration file path
# Try multiple possible locations
if [[ -f "${HOME}/Projects/runpod/comfyui_models.yaml" ]]; then
CONFIG_FILE="${HOME}/Projects/runpod/comfyui_models.yaml"
elif [[ -f "${PROJECT_ROOT}/comfyui_models.yaml" ]]; then
CONFIG_FILE="${PROJECT_ROOT}/comfyui_models.yaml"
elif [[ -f "${SCRIPT_DIR}/comfyui_models.yaml" ]]; then
CONFIG_FILE="${SCRIPT_DIR}/comfyui_models.yaml"
else
CONFIG_FILE="" # No config file by default
fi
# Default cache directory - detect RunPod or use HuggingFace default
if [[ -d "/workspace" ]]; then
# RunPod environment
CACHE_DIR="${CACHE_DIR:-/workspace/huggingface_cache}"
COMFYUI_DIR="${COMFYUI_DIR:-/workspace/ComfyUI/models}"
else
# Local environment
CACHE_DIR="${CACHE_DIR:-${HOME}/.cache/huggingface}"
COMFYUI_DIR="${COMFYUI_DIR:-${HOME}/ComfyUI/models}"
fi
# Default command
COMMAND="both"
# HuggingFace token from environment or .env file
# Initialize HF_TOKEN if not set
HF_TOKEN="${HF_TOKEN:-}"
# Try multiple locations for .env file
if [[ -z "${HF_TOKEN}" ]] && [[ -f "${PROJECT_ROOT}/ai/.env" ]]; then
HF_TOKEN=$(grep ^HF_TOKEN "${PROJECT_ROOT}/ai/.env" | cut -d'=' -f2- | tr -d '"' | tr -d "'" || true)
fi
if [[ -z "${HF_TOKEN}" ]] && [[ -f "${PROJECT_ROOT}/.env" ]]; then
HF_TOKEN=$(grep ^HF_TOKEN "${PROJECT_ROOT}/.env" | cut -d'=' -f2- | tr -d '"' | tr -d "'" || true)
fi
if [[ -z "${HF_TOKEN}" ]] && [[ -f "/workspace/ai/.env" ]]; then
HF_TOKEN=$(grep ^HF_TOKEN "/workspace/ai/.env" | cut -d'=' -f2- | tr -d '"' | tr -d "'" || true)
fi
# ============================================================================
# UTILITY FUNCTIONS - The Magic Happens Here
# ============================================================================
# Print functions with beautiful formatting
print_banner() {
local text="$1"
local width=80
local padding=$(( (width - ${#text} - 2) / 2 ))
echo -e ""
echo -e "${BOLD_CYAN}${BOX_DOUBLE}$(printf '%.0s'"${BOX_DOUBLE}" $(seq 1 $width))${BOX_DOUBLE}${RESET}"
echo -e "${BOLD_CYAN}${BOX_DOUBLE}$(printf '%.0s ' $(seq 1 $padding))${BOLD_MAGENTA}${text}$(printf '%.0s ' $(seq 1 $padding))${BOLD_CYAN}${BOX_DOUBLE}${RESET}"
echo -e "${BOLD_CYAN}${BOX_DOUBLE}$(printf '%.0s'"${BOX_DOUBLE}" $(seq 1 $width))${BOX_DOUBLE}${RESET}"
echo -e ""
}
print_section() {
local text="$1"
echo -e "\n${BOLD_YELLOW}${DOUBLE_ARROW} ${text}${RESET}"
echo -e "${CYAN}$(printf '%.0s'"${BOX_LIGHT}" $(seq 1 80))${RESET}"
}
print_success() {
echo -e "${BOLD_GREEN}${CHECK_MARK} $1${RESET}"
}
print_error() {
echo -e "${BOLD_RED}${CROSS_MARK} $1${RESET}" >&2
}
print_warning() {
echo -e "${BOLD_YELLOW}${WARNING} $1${RESET}"
}
print_info() {
echo -e "${BOLD_CYAN}${INFO} $1${RESET}"
}
print_step() {
local current="$1"
local total="$2"
local text="$3"
echo -e "${BOLD_BLUE}[${current}/${total}]${RESET} ${MAGENTA}${DOWNLOAD}${RESET} ${text}"
}
print_detail() {
echo -e " ${DIM}${CYAN}${ARROW_RIGHT} $1${RESET}"
}
# Progress bar function
show_progress() {
local current="$1"
local total="$2"
local width=50
local percentage=$((current * 100 / total))
local filled=$((current * width / total))
local empty=$((width - filled))
printf "\r ${BOLD_CYAN}Progress: ${RESET}["
printf "${BG_GREEN}${BOLD_WHITE}%${filled}s${RESET}" | tr ' ' '█'
printf "${DIM}%${empty}s${RESET}" | tr ' ' '░'
printf "] ${BOLD_YELLOW}%3d%%${RESET} ${DIM}(%d/%d)${RESET}" "$percentage" "$current" "$total"
}
# Parse YAML (simple implementation)
parse_yaml() {
local yaml_file="$1"
local category="$2"
python3 - "$yaml_file" "$category" <<EOPYAML
import yaml
import sys
yaml_file = sys.argv[1]
category = sys.argv[2]
try:
with open(yaml_file, 'r') as f:
config = yaml.safe_load(f)
if category == 'settings':
settings = config.get('settings', {})
print("CACHE_DIR={0}".format(settings.get('cache_dir', '/workspace/huggingface_cache')))
print("PARALLEL_DOWNLOADS={0}".format(settings.get('parallel_downloads', 1)))
elif category == 'categories':
for cat_name in config.get('model_categories', {}).keys():
print(cat_name)
elif category in config.get('model_categories', {}):
models = config['model_categories'][category]
for model in models:
repo_id = model.get('repo_id', '')
description = model.get('description', '')
size_gb = model.get('size_gb', 0)
essential = model.get('essential', False)
model_type = model.get('type', 'checkpoints')
filename = model.get('filename', '')
print('{0}|{1}|{2}|{3}|{4}|{5}'.format(repo_id, description, size_gb, essential, model_type, filename))
else:
sys.exit(1)
except Exception as e:
print("ERROR: {0}".format(e), file=sys.stderr)
sys.exit(1)
EOPYAML
}
# Parse file mappings for a specific model
parse_file_mappings() {
local yaml_file="$1"
local category="$2"
local repo_id="$3"
python3 - "$yaml_file" "$category" "$repo_id" <<EOPYTHON
import yaml
import sys
yaml_file = sys.argv[1]
category = sys.argv[2]
repo_id = sys.argv[3]
try:
with open(yaml_file, 'r') as f:
config = yaml.safe_load(f)
if category in config.get('model_categories', {}):
models = config['model_categories'][category]
for model in models:
if model.get('repo_id', '') == repo_id:
files = model.get('files', [])
if files:
for file_mapping in files:
source = file_mapping.get('source', '')
dest = file_mapping.get('dest', source)
if source:
print('{0}|{1}'.format(source, dest))
sys.exit(0)
# No file mappings found
sys.exit(0)
except Exception as e:
print("ERROR: {0}".format(e), file=sys.stderr)
sys.exit(1)
EOPYTHON
}
# Check dependencies
check_dependencies() {
print_section "Checking Dependencies"
local missing_deps=()
# Check Python 3
if ! command -v python3 &> /dev/null; then
missing_deps+=("python3")
fi
# Check pip
if ! command -v pip3 &> /dev/null; then
missing_deps+=("pip3")
fi
# Check required Python packages
if ! python3 -c "import yaml" 2>/dev/null; then
print_warning "PyYAML not installed, installing..."
pip3 install pyyaml -q
fi
if ! python3 -c "import huggingface_hub" 2>/dev/null; then
print_warning "huggingface_hub not installed, installing..."
pip3 install huggingface_hub -q
fi
if [[ ${#missing_deps[@]} -gt 0 ]]; then
print_error "Missing dependencies: ${missing_deps[*]}"
exit 1
fi
print_success "All dependencies satisfied"
}
# Validate configuration
validate_config() {
print_section "Validating Configuration"
# Show current command
print_info "Command: ${BOLD_CYAN}${COMMAND}${RESET}"
if [[ -n "$CONFIG_FILE" ]]; then
if [[ ! -f "$CONFIG_FILE" ]]; then
print_error "Configuration file not found: $CONFIG_FILE"
exit 1
fi
print_success "Configuration file found: ${CYAN}${CONFIG_FILE}${RESET}"
else
print_warning "No configuration file specified"
fi
# HF_TOKEN only required for download and both commands
if [[ "$COMMAND" == "download" ]] || [[ "$COMMAND" == "both" ]]; then
if [[ -z "$HF_TOKEN" ]]; then
print_error "HF_TOKEN not set. Please set it in .env file or environment."
exit 1
fi
print_success "HuggingFace token configured: ${DIM}${HF_TOKEN:0:10}...${RESET}"
elif [[ "$COMMAND" == "verify" ]]; then
print_info "Verify mode: HuggingFace token not required"
fi
# Cache directory
if [[ "$COMMAND" == "download" ]] || [[ "$COMMAND" == "both" ]]; then
if [[ ! -d "$CACHE_DIR" ]]; then
print_info "Creating cache directory: ${CYAN}${CACHE_DIR}${RESET}"
mkdir -p "$CACHE_DIR"
fi
print_success "Cache directory ready: ${CYAN}${CACHE_DIR}${RESET}"
else
# For link and verify commands, just show the directory
if [[ -d "$CACHE_DIR" ]]; then
print_success "Cache directory found: ${CYAN}${CACHE_DIR}${RESET}"
else
print_warning "Cache directory not found: ${CYAN}${CACHE_DIR}${RESET}"
fi
fi
# ComfyUI directory
if [[ "$COMMAND" == "link" ]] || [[ "$COMMAND" == "both" ]] || [[ "$COMMAND" == "verify" ]]; then
if [[ -d "$COMFYUI_DIR" ]]; then
print_success "ComfyUI directory found: ${CYAN}${COMFYUI_DIR}${RESET}"
else
if [[ "$COMMAND" == "verify" ]]; then
print_warning "ComfyUI directory not found: ${CYAN}${COMFYUI_DIR}${RESET}"
else
print_info "ComfyUI directory: ${CYAN}${COMFYUI_DIR}${RESET}"
fi
fi
fi
}
# Find model files in HuggingFace cache
find_model_files() {
local repo_id="$1"
local filename_filter="$2"
python3 - "$CACHE_DIR" "$repo_id" "$filename_filter" <<EOPYFINDFIND
import os
import sys
from pathlib import Path
cache_dir = sys.argv[1]
repo_id = sys.argv[2]
filename_filter = sys.argv[3]
# HuggingFace cache structure: cache_dir/models--org--name/snapshots/hash/
# Try both with and without 'hub/' subdirectory for compatibility
cache_path = Path(cache_dir)
repo_path = repo_id.replace('/', '--')
model_dir = cache_path / 'models--{0}'.format(repo_path)
# First attempt: direct path
model_dir_original = model_dir
if not model_dir.exists():
# Fallback to hub/ subdirectory if direct path doesn't exist
model_dir = cache_path / 'hub' / 'models--{0}'.format(repo_path)
if not model_dir.exists():
sys.stderr.write('ERROR: Model directory not found for {0}\n'.format(repo_id))
sys.stderr.write(' Tried: {0}\n'.format(str(model_dir_original)))
sys.stderr.write(' Tried: {0}\n'.format(str(model_dir)))
sys.stderr.write(' Cache: {0}\n'.format(str(cache_path)))
sys.exit(1)
# Find the latest snapshot
snapshots_dir = model_dir / 'snapshots'
if not snapshots_dir.exists():
sys.stderr.write('ERROR: Snapshots directory not found for {0}\n'.format(repo_id))
sys.stderr.write(' Model dir: {0}\n'.format(str(model_dir)))
sys.stderr.write(' Expected: {0}\n'.format(str(snapshots_dir)))
sys.exit(1)
# Get all snapshot directories sorted by modification time
try:
snapshots = sorted(snapshots_dir.iterdir(), key=lambda x: x.stat().st_mtime, reverse=True)
except Exception as e:
sys.stderr.write('ERROR: Failed to list snapshots for {0}: {1}\n'.format(repo_id, str(e)))
sys.exit(1)
if not snapshots:
sys.stderr.write('ERROR: No snapshots found for {0}\n'.format(repo_id))
sys.stderr.write(' Snapshots dir: {0}\n'.format(str(snapshots_dir)))
sys.exit(1)
latest_snapshot = snapshots[0]
sys.stderr.write('DEBUG: Using snapshot {0}\n'.format(str(latest_snapshot)))
# Find model files
file_count = 0
for file_path in latest_snapshot.rglob('*'):
if file_path.is_file():
# If filename filter is specified, only match those files
if filename_filter and filename_filter not in file_path.name:
continue
# Skip metadata files except important config files
# Allow: config.json, tokenizer.json, tokenizer_config.json, sentencepiece models
important_files = ('config.json', 'tokenizer.json', 'tokenizer_config.json', '.model')
if file_path.name.endswith(('.txt', '.md', '.gitattributes')):
continue
if file_path.name.endswith('.json') and not file_path.name in important_files:
continue
print(str(file_path))
file_count += 1
if file_count == 0:
sys.stderr.write('WARN: No files matched filter for {0} (filter: {1})\n'.format(repo_id, filename_filter))
EOPYFINDFIND
}
# Download a single model
download_model() {
local repo_id="$1"
local description="$2"
local size_gb="$3"
print_detail "Repository: ${BOLD_WHITE}${repo_id}${RESET}"
print_detail "Description: ${description}"
print_detail "Size: ${BOLD_YELLOW}${size_gb}GB${RESET}"
# Download using Python
python3 - <<EOPYDOWNLOAD
import os
import sys
from huggingface_hub import snapshot_download
cache_dir = '${CACHE_DIR}'
token = '${HF_TOKEN}'
repo_id = '${repo_id}'
os.environ['HF_HOME'] = cache_dir
try:
snapshot_download(
repo_id=repo_id,
cache_dir=cache_dir,
token=token,
resume_download=True
)
print("SUCCESS")
except Exception as e:
print("ERROR: {0}".format(e), file=sys.stderr)
sys.exit(1)
EOPYDOWNLOAD
if [[ $? -eq 0 ]]; then
print_success "Downloaded ${BOLD_WHITE}${repo_id}${RESET}"
return 0
else
print_error "Failed to download ${repo_id}"
return 1
fi
}
# Create symlink for a model
link_model() {
local repo_id="$1"
local model_type="$2"
local filename_filter="$3"
local file_mappings="$4" # Optional: explicit source|dest mappings
print_detail "Linking to: ${CYAN}${COMFYUI_DIR}/${model_type}/${RESET}"
# Create ComfyUI subdirectory if it doesn't exist
local target_dir="${COMFYUI_DIR}/${model_type}"
if [[ ! -d "$target_dir" ]]; then
print_info "Creating directory: ${CYAN}${target_dir}${RESET}"
mkdir -p "$target_dir"
fi
# Clean existing symlinks for this repo in the target directory
# This ensures we start fresh and don't have stale links
find "$target_dir" -type l -lname "*${repo_id/\//-}*" -delete 2>/dev/null || true
# Find model files in cache
local model_files
model_files=$(find_model_files "$repo_id" "$filename_filter")
if [[ -z "$model_files" ]]; then
print_warning "No model files found in cache for ${repo_id}"
return 1
fi
local linked_count=0
# If explicit file mappings are provided, use them
if [[ -n "$file_mappings" ]]; then
print_detail "Using explicit file mappings from YAML"
while IFS='|' read -r source_pattern dest_filename; do
if [[ -z "$source_pattern" ]]; then
continue
fi
# Find the file matching the source pattern in model_files
local source_file
source_file=$(echo "$model_files" | grep -F "/$source_pattern" | head -n1)
if [[ -z "$source_file" ]] || [[ ! -f "$source_file" ]]; then
print_warning "Source file not found: ${source_pattern}"
continue
fi
local link_path="${target_dir}/${dest_filename}"
# Remove existing symlink or file if it exists
if [[ -L "$link_path" ]]; then
rm -f "$link_path"
elif [[ -e "$link_path" ]]; then
print_warning "File already exists (not a symlink): ${dest_filename}"
continue
fi
# Create symlink
ln -s "$source_file" "$link_path"
print_detail "${LINK} Linked: ${DIM}${dest_filename}${RESET}"
linked_count=$((linked_count+1))
done <<< "$file_mappings"
else
# Fallback: use automatic prefixing for files without explicit mappings
print_detail "No file mappings found, using automatic prefixing"
# Extract model name from repo_id for prefixing filenames
# e.g., "facebook/musicgen-medium" -> "musicgen-medium"
local model_name=$(echo "$repo_id" | sed 's/.*\///')
while IFS= read -r source_file; do
if [[ -f "$source_file" ]]; then
local filename=$(basename "$source_file")
# Add model name prefix to filename for better organization
# e.g., "pytorch_model.bin" -> "musicgen-medium-pytorch_model.bin"
local prefixed_filename="${model_name}-${filename}"
local link_path="${target_dir}/${prefixed_filename}"
# Remove existing symlink or file if it exists
if [[ -L "$link_path" ]]; then
rm -f "$link_path"
elif [[ -e "$link_path" ]]; then
print_warning "File already exists (not a symlink): ${prefixed_filename}"
continue
fi
# Create symlink
ln -s "$source_file" "$link_path"
print_detail "${LINK} Linked: ${DIM}${prefixed_filename}${RESET}"
linked_count=$((linked_count+1))
fi
done <<< "$model_files"
fi
if [[ $linked_count -gt 0 ]]; then
print_success "Linked ${linked_count} file(s) for ${BOLD_WHITE}${repo_id}${RESET}"
return 0
else
print_error "Failed to link files for ${repo_id}"
return 1
fi
}
# ============================================================================
# VERIFICATION FUNCTIONS - Model Status Checking
# ============================================================================
# Get actual disk usage for a model's files
get_model_disk_usage() {
local model_files="$1"
if [[ -z "$model_files" ]]; then
echo "0"
return
fi
local total_bytes=0
while IFS= read -r file_path; do
if [[ -f "$file_path" ]]; then
local file_size
file_size=$(stat -f%z "$file_path" 2>/dev/null || stat -c%s "$file_path" 2>/dev/null || echo "0")
total_bytes=$((total_bytes + file_size))
fi
done <<< "$model_files"
echo "$total_bytes"
}
# Format bytes to human-readable size
format_bytes() {
local bytes="$1"
if (( bytes < 1024 )); then
echo "${bytes} B"
elif (( bytes < 1048576 )); then
echo "$(( bytes / 1024 )) KB"
elif (( bytes < 1073741824 )); then
printf "%.2f MB" "$(bc <<< "scale=2; $bytes / 1048576")"
else
printf "%.2f GB" "$(bc <<< "scale=2; $bytes / 1073741824")"
fi
}
# Verify if model is downloaded
verify_model_download() {
local repo_id="$1"
local expected_size_gb="$2"
local filename_filter="$3"
# DEBUG: Show what we're checking
echo "DEBUG_BASH: Checking repo_id='$repo_id' filter='$filename_filter' CACHE_DIR='$CACHE_DIR'" >&2
# Find model files in cache
# Capture both stdout (file paths) and stderr (error messages)
local find_output
find_output=$(find_model_files "$repo_id" "$filename_filter" 2>&1)
# DEBUG: Show raw output
echo "DEBUG_BASH: find_output length=${#find_output}" >&2
echo "DEBUG_BASH: find_output first 200 chars: ${find_output:0:200}" >&2
# Separate file paths from error/debug messages
local model_files
model_files=$(echo "$find_output" | grep -v "^ERROR:" | grep -v "^WARN:" | grep -v "^DEBUG:")
# Extract error messages for logging
local error_msgs
error_msgs=$(echo "$find_output" | grep "^ERROR:\|^WARN:\|^DEBUG:")
if [[ -z "$model_files" ]]; then
# Log error messages to stderr if they exist
if [[ -n "$error_msgs" ]]; then
echo "$error_msgs" >&2
fi
echo "NOT_FOUND|0|0|"
return 1
fi
# Count files
local file_count
file_count=$(echo "$model_files" | wc -l | tr -d ' ')
# Get actual size
local actual_bytes
actual_bytes=$(get_model_disk_usage "$model_files")
# Get cache path (first file's directory)
local cache_path
cache_path=$(echo "$model_files" | head -n1 | xargs dirname)
# Get modification time of first file
local mod_time=""
if [[ -n "$model_files" ]]; then
local first_file
first_file=$(echo "$model_files" | head -n1)
if [[ -f "$first_file" ]]; then
mod_time=$(stat -f "%Sm" -t "%Y-%m-%d %H:%M:%S" "$first_file" 2>/dev/null || \
stat -c "%y" "$first_file" 2>/dev/null | cut -d'.' -f1 || echo "Unknown")
fi
fi
echo "FOUND|${actual_bytes}|${file_count}|${cache_path}|${mod_time}"
return 0
}
# Verify model symlinks
verify_model_links() {
local repo_id="$1"
local model_type="$2"
local filename_filter="$3"
local file_mappings="$4"
local target_dir="${COMFYUI_DIR}/${model_type}"
# If target directory doesn't exist, nothing is linked
if [[ ! -d "$target_dir" ]]; then
echo "NOT_LINKED|0|0|0"
return 1
fi
# Find model files in cache
local model_files
model_files=$(find_model_files "$repo_id" "$filename_filter" 2>/dev/null)
if [[ -z "$model_files" ]]; then
echo "NOT_DOWNLOADED|0|0|0"
return 1
fi
local total_links=0
local valid_links=0
local broken_links=0
local link_details=""
# Check if explicit file mappings exist
if [[ -n "$file_mappings" ]]; then
while IFS='|' read -r source_pattern dest_filename; do
if [[ -z "$source_pattern" ]]; then
continue
fi
total_links=$((total_links + 1))
local link_path="${target_dir}/${dest_filename}"
if [[ -L "$link_path" ]]; then
# Symlink exists, check if it's valid
if [[ -e "$link_path" ]]; then
valid_links=$((valid_links + 1))
local link_target
link_target=$(readlink "$link_path")
link_details="${link_details}VALID|${dest_filename}|${link_target}\n"
else
broken_links=$((broken_links + 1))
local link_target
link_target=$(readlink "$link_path")
link_details="${link_details}BROKEN|${dest_filename}|${link_target}\n"
fi
else
link_details="${link_details}MISSING|${dest_filename}|\n"
fi
done <<< "$file_mappings"
else
# No explicit mappings, check automatic prefixed filenames
local model_name
model_name=$(echo "$repo_id" | sed 's/.*\///')
while IFS= read -r source_file; do
if [[ -f "$source_file" ]]; then
local filename
filename=$(basename "$source_file")
local prefixed_filename="${model_name}-${filename}"
total_links=$((total_links + 1))
local link_path="${target_dir}/${prefixed_filename}"
if [[ -L "$link_path" ]]; then
if [[ -e "$link_path" ]]; then
valid_links=$((valid_links + 1))
local link_target
link_target=$(readlink "$link_path")
link_details="${link_details}VALID|${prefixed_filename}|${link_target}\n"
else
broken_links=$((broken_links + 1))
local link_target
link_target=$(readlink "$link_path")
link_details="${link_details}BROKEN|${prefixed_filename}|${link_target}\n"
fi
else
link_details="${link_details}MISSING|${prefixed_filename}|\n"
fi
fi
done <<< "$model_files"
fi
echo -e "CHECKED|${total_links}|${valid_links}|${broken_links}\n${link_details}"
return 0
}
# Verify models by category
verify_category() {
local category="$1"
local category_display="$2"
print_section "${category_display}"
# Get models for this category
local models_data
models_data=$(parse_yaml "$CONFIG_FILE" "$category")
if [[ -z "$models_data" ]]; then
print_warning "No models found in category: ${category}"
return 0
fi
local total_models
total_models=$(echo "$models_data" | wc -l)
local current=0
local models_downloaded=0
local models_missing=0
local models_linked=0
local models_broken=0
local models_not_linked=0
local total_size_bytes=0
local expected_size_bytes=0
while IFS='|' read -r repo_id description size_gb essential model_type filename; do
current=$((current+1))
echo ""
print_step "$current" "$total_models" "${BOLD_MAGENTA}${description}${RESET}"
print_detail "Repository: ${BOLD_WHITE}${repo_id}${RESET}"
print_detail "Category: ${CYAN}${category}${RESET} ${ARROW_RIGHT} ${CYAN}${model_type}${RESET}"
print_detail "Expected Size: ${BOLD_YELLOW}${size_gb} GB${RESET}"
expected_size_bytes=$((expected_size_bytes + $(echo "$size_gb * 1073741824" | bc | cut -d'.' -f1)))
# Verify download status
echo ""
local download_result
download_result=$(verify_model_download "$repo_id" "$size_gb" "$filename")
local download_status
download_status=$(echo "$download_result" | cut -d'|' -f1)
if [[ "$download_status" == "FOUND" ]]; then
local actual_bytes
actual_bytes=$(echo "$download_result" | cut -d'|' -f2)
local file_count
file_count=$(echo "$download_result" | cut -d'|' -f3)
local cache_path
cache_path=$(echo "$download_result" | cut -d'|' -f4)
local mod_time
mod_time=$(echo "$download_result" | cut -d'|' -f5)
total_size_bytes=$((total_size_bytes + actual_bytes))
local actual_size_human
actual_size_human=$(format_bytes "$actual_bytes")
print_success "Download Status: ${BOLD_GREEN}DOWNLOADED${RESET}"
print_detail "${DIM}Path: ${cache_path}${RESET}"
print_detail "${DIM}Actual Size: ${actual_size_human} (${actual_bytes} bytes)${RESET}"
print_detail "${DIM}Files: ${file_count} file(s)${RESET}"
print_detail "${DIM}Modified: ${mod_time}${RESET}"
# Check for size mismatch
local expected_bytes
expected_bytes=$(echo "$size_gb * 1073741824" | bc | cut -d'.' -f1)
local size_diff_pct
size_diff_pct=$(echo "scale=2; (($actual_bytes - $expected_bytes) / $expected_bytes) * 100" | bc | sed 's/^\./0./')
local abs_size_diff_pct
abs_size_diff_pct=${size_diff_pct#-}
if (( $(echo "$abs_size_diff_pct > 10" | bc -l) )); then
print_warning "Size mismatch: ${size_diff_pct}% difference from expected"
fi
models_downloaded=$((models_downloaded+1))
# Verify link status
echo ""
local file_mappings
file_mappings=$(parse_file_mappings "$CONFIG_FILE" "$category" "$repo_id")
local link_result
link_result=$(verify_model_links "$repo_id" "$model_type" "$filename" "$file_mappings")
local first_line
first_line=$(echo -e "$link_result" | head -n1)
local link_status
link_status=$(echo "$first_line" | cut -d'|' -f1)
if [[ "$link_status" == "CHECKED" ]]; then
local total_links
total_links=$(echo "$first_line" | cut -d'|' -f2)
local valid_links
valid_links=$(echo "$first_line" | cut -d'|' -f3)
local broken_links
broken_links=$(echo "$first_line" | cut -d'|' -f4)
if [[ $broken_links -gt 0 ]]; then
print_warning "Link Status: ${BOLD_YELLOW}${broken_links} BROKEN LINK(S)${RESET}"
models_broken=$((models_broken+1))
elif [[ $valid_links -eq $total_links ]] && [[ $total_links -gt 0 ]]; then
print_success "Link Status: ${BOLD_GREEN}LINKED${RESET} (${valid_links}/${total_links})"
models_linked=$((models_linked+1))
else
print_warning "Link Status: ${BOLD_YELLOW}PARTIALLY LINKED${RESET} (${valid_links}/${total_links})"
models_not_linked=$((models_not_linked+1))
fi
# Show link details
local link_details
link_details=$(echo -e "$link_result" | tail -n +2)
if [[ -n "$link_details" ]]; then
while IFS='|' read -r link_state link_name link_target; do
if [[ -z "$link_state" ]]; then
continue
fi
case "$link_state" in
VALID)
print_detail "${LINK} ${BOLD_GREEN}${RESET} ${DIM}${link_name}${RESET}"
;;
BROKEN)
print_detail "${LINK} ${BOLD_RED}${RESET} ${DIM}${link_name}${RESET} ${BOLD_RED}(BROKEN)${RESET}"
;;
MISSING)
print_detail "${LINK} ${BOLD_YELLOW}${RESET} ${DIM}${link_name}${RESET} ${BOLD_YELLOW}(NOT LINKED)${RESET}"
;;
esac
done <<< "$link_details"
fi
else
print_error "Link Status: ${BOLD_RED}NOT LINKED${RESET}"
models_not_linked=$((models_not_linked+1))
fi
else
print_error "Download Status: ${BOLD_RED}NOT DOWNLOADED${RESET}"
models_missing=$((models_missing+1))
echo ""
print_info "Link Status: ${DIM}N/A (model not downloaded)${RESET}"
fi
show_progress "$current" "$total_models"
done <<< "$models_data"
echo -e "\n"
# Category summary
local total_size_human
total_size_human=$(format_bytes "$total_size_bytes")
local expected_size_human
expected_size_human=$(format_bytes "$expected_size_bytes")
print_info "Category Summary:"
echo -e " ${BOLD_WHITE}Total Models:${RESET} ${total_models}"
echo -e " ${BOLD_GREEN}✓ Downloaded:${RESET} ${models_downloaded} ($(( models_downloaded * 100 / total_models ))%)"
echo -e " ${BOLD_RED}✗ Missing:${RESET} ${models_missing} ($(( models_missing * 100 / total_models ))%)"
echo -e " ${BOLD_GREEN}✓ Properly Linked:${RESET} ${models_linked}"
echo -e " ${BOLD_YELLOW}⚠ Broken Links:${RESET} ${models_broken}"
echo -e " ${BOLD_YELLOW}○ Not Linked:${RESET} ${models_not_linked}"
echo -e " ${BOLD_CYAN}📊 Disk Usage:${RESET} ${total_size_human} / ${expected_size_human} expected"
# Return statistics for global summary (format: downloaded|missing|linked|broken|not_linked|total_size|expected_size)
echo "${models_downloaded}|${models_missing}|${models_linked}|${models_broken}|${models_not_linked}|${total_size_bytes}|${expected_size_bytes}" > /tmp/verify_stats_${category}
}
# Process models by category
process_category() {
local category="$1"
local category_display="$2"
print_section "${category_display}"
# Get models for this category
local models_data
models_data=$(parse_yaml "$CONFIG_FILE" "$category")
if [[ -z "$models_data" ]]; then
print_warning "No models found in category: ${category}"
return 0
fi
local total_models
total_models=$(echo "$models_data" | wc -l)
local current=0
local succeeded=0
local failed=0
while IFS='|' read -r repo_id description size_gb essential model_type filename; do
current=$((current+1))
echo ""
print_step "$current" "$total_models" "${BOLD_MAGENTA}${description}${RESET}"
local success=true
# Download if command is 'download' or 'both'
if [[ "$COMMAND" == "download" ]] || [[ "$COMMAND" == "both" ]]; then
if ! download_model "$repo_id" "$description" "$size_gb"; then
success=false
fi
fi
# Link if command is 'link' or 'both'
if [[ "$COMMAND" == "link" ]] || [[ "$COMMAND" == "both" ]]; then
if $success; then
# Parse file mappings from YAML for this model
local file_mappings
file_mappings=$(parse_file_mappings "$CONFIG_FILE" "$category" "$repo_id")
# Pass file mappings to link_model (empty string if no mappings found)
if ! link_model "$repo_id" "$model_type" "$filename" "$file_mappings"; then
success=false
fi
fi
fi
if $success; then
succeeded=$((succeeded+1))
else
failed=$((failed+1))
fi
show_progress "$current" "$total_models"
done <<< "$models_data"
echo -e "\n"
print_info "Category Summary: ${BOLD_GREEN}${succeeded} succeeded${RESET}, ${BOLD_RED}${failed} failed${RESET}"
}
# Display verification summary
display_verification_summary() {
local start_time="$1"
local end_time="$2"
local categories="$3"
local duration=$((end_time - start_time))
local minutes=$((duration / 60))
local seconds=$((duration % 60))
# Aggregate statistics from all categories
local total_models=0
local total_downloaded=0
local total_missing=0
local total_linked=0
local total_broken=0
local total_not_linked=0
local total_size_bytes=0
local expected_size_bytes=0
while IFS= read -r category; do
if [[ -f "/tmp/verify_stats_${category}" ]]; then
local stats
stats=$(cat "/tmp/verify_stats_${category}")
local downloaded
downloaded=$(echo "$stats" | cut -d'|' -f1)
local missing
missing=$(echo "$stats" | cut -d'|' -f2)
local linked
linked=$(echo "$stats" | cut -d'|' -f3)
local broken
broken=$(echo "$stats" | cut -d'|' -f4)
local not_linked
not_linked=$(echo "$stats" | cut -d'|' -f5)
local size_bytes
size_bytes=$(echo "$stats" | cut -d'|' -f6)
local expected_bytes
expected_bytes=$(echo "$stats" | cut -d'|' -f7)
total_models=$((total_models + downloaded + missing))
total_downloaded=$((total_downloaded + downloaded))
total_missing=$((total_missing + missing))
total_linked=$((total_linked + linked))
total_broken=$((total_broken + broken))
total_not_linked=$((total_not_linked + not_linked))
total_size_bytes=$((total_size_bytes + size_bytes))
expected_size_bytes=$((expected_size_bytes + expected_bytes))
rm -f "/tmp/verify_stats_${category}"
fi
done <<< "$categories"
local total_size_human
total_size_human=$(format_bytes "$total_size_bytes")
local expected_size_human
expected_size_human=$(format_bytes "$expected_size_bytes")
print_banner "VERIFICATION COMPLETE"
echo -e "${BOLD_CYAN}${STAR} Global Summary${RESET}"
echo -e "${CYAN}$(printf '%.0s'"${BOX_DOUBLE}" $(seq 1 80))${RESET}"
echo -e " ${BOLD_WHITE}Total Models:${RESET} ${total_models}"
echo ""
echo -e " ${BOLD_GREEN}✓ Downloaded:${RESET} ${total_downloaded} ($(( total_models > 0 ? total_downloaded * 100 / total_models : 0 ))%)"
echo -e " ${BOLD_RED}✗ Missing:${RESET} ${total_missing} ($(( total_models > 0 ? total_missing * 100 / total_models : 0 ))%)"
echo ""
echo -e " ${BOLD_GREEN}✓ Properly Linked:${RESET} ${total_linked} ($(( total_models > 0 ? total_linked * 100 / total_models : 0 ))%)"
echo -e " ${BOLD_YELLOW}⚠ Broken Links:${RESET} ${total_broken} ($(( total_models > 0 ? total_broken * 100 / total_models : 0 ))%)"
echo -e " ${BOLD_YELLOW}○ Not Linked:${RESET} ${total_not_linked} ($(( total_models > 0 ? total_not_linked * 100 / total_models : 0 ))%)"
echo ""
echo -e " ${BOLD_CYAN}📊 Disk Space Used:${RESET} ${total_size_human} / ${expected_size_human} expected"
echo -e " ${BOLD_WHITE}Cache Directory:${RESET} ${CYAN}${CACHE_DIR}${RESET}"
echo -e " ${BOLD_WHITE}ComfyUI Directory:${RESET} ${CYAN}${COMFYUI_DIR}${RESET}"
echo -e " ${BOLD_WHITE}Duration:${RESET} ${BOLD_YELLOW}${minutes}m ${seconds}s${RESET}"
echo -e "${CYAN}$(printf '%.0s'"${BOX_DOUBLE}" $(seq 1 80))${RESET}"
# Provide actionable suggestions
if [[ $total_missing -gt 0 ]] || [[ $total_broken -gt 0 ]] || [[ $total_not_linked -gt 0 ]]; then
echo -e "\n${BOLD_YELLOW}${WARNING} Issues Found - Suggested Actions:${RESET}\n"
if [[ $total_missing -gt 0 ]]; then
echo -e " ${BOLD_RED}${RESET} ${total_missing} model(s) not downloaded"
echo -e " ${DIM}Fix:${RESET} ${CYAN}$0 download -c ${CONFIG_FILE}${RESET}"
fi
if [[ $total_broken -gt 0 ]] || [[ $total_not_linked -gt 0 ]]; then
echo -e " ${BOLD_YELLOW}${RESET} $(( total_broken + total_not_linked )) model(s) with link issues"
echo -e " ${DIM}Fix:${RESET} ${CYAN}$0 link -c ${CONFIG_FILE}${RESET}"
fi
if [[ $total_missing -gt 0 ]] && [[ $(( total_broken + total_not_linked )) -gt 0 ]]; then
echo -e " ${BOLD_CYAN}${RESET} Fix everything in one go:"
echo -e " ${CYAN}$0 both -c ${CONFIG_FILE}${RESET}"
fi
echo ""
else
echo -e "\n${BOLD_GREEN}${SPARKLES} All models verified successfully! ${SPARKLES}${RESET}\n"
fi
}
# Display summary
display_summary() {
local start_time="$1"
local end_time="$2"
local total_downloaded="$3"
local total_failed="$4"
local duration=$((end_time - start_time))
local minutes=$((duration / 60))
local seconds=$((duration % 60))
print_banner "DOWNLOAD COMPLETE"
echo -e "${BOLD_CYAN}${STAR} Summary${RESET}"
echo -e "${CYAN}$(printf '%.0s'"${BOX_LIGHT}" $(seq 1 80))${RESET}"
echo -e " ${BOLD_WHITE}Total Downloaded:${RESET} ${BOLD_GREEN}${total_downloaded}${RESET} models"
echo -e " ${BOLD_WHITE}Total Failed:${RESET} ${BOLD_RED}${total_failed}${RESET} models"
echo -e " ${BOLD_WHITE}Cache Directory:${RESET} ${CYAN}${CACHE_DIR}${RESET}"
echo -e " ${BOLD_WHITE}Duration:${RESET} ${BOLD_YELLOW}${minutes}m ${seconds}s${RESET}"
echo -e "${CYAN}$(printf '%.0s'"${BOX_LIGHT}" $(seq 1 80))${RESET}"
if [[ $total_failed -eq 0 ]]; then
echo -e "\n${BOLD_GREEN}${SPARKLES} All models downloaded successfully! ${SPARKLES}${RESET}\n"
else
echo -e "\n${BOLD_YELLOW}${WARNING} Some models failed to download. Check logs above.${RESET}\n"
fi
}
# ============================================================================
# MAIN FUNCTION
# ============================================================================
main() {
local start_time
start_time=$(date +%s)
# Display beautiful banner
if [[ "$COMMAND" == "verify" ]]; then
print_banner "${SPARKLES} ComfyUI Model Verification ${SPARKLES}"
echo -e "${BOLD_CYAN}Comprehensive Model Status Check${RESET}"
echo -e "${DIM}Verify Downloads & Links ${LINK} Configuration-Driven ${STAR}${RESET}\n"
else
print_banner "${ROCKET} ComfyUI Model Downloader ${ROCKET}"
echo -e "${BOLD_CYAN}A Beautiful CLI Tool for Downloading AI Models${RESET}"
echo -e "${DIM}Powered by HuggingFace ${LINK} Configuration-Driven ${STAR}${RESET}\n"
fi
# Check dependencies
check_dependencies
# Validate configuration
validate_config
# Get all categories
if [[ -z "$CONFIG_FILE" ]]; then
print_error "No configuration file specified. Use -c/--config to provide one."
exit 1
fi
local categories
categories=$(parse_yaml "$CONFIG_FILE" "categories")
if [[ -z "$categories" ]]; then
print_error "No model categories found in configuration"
exit 1
fi
# Handle verify command separately
if [[ "$COMMAND" == "verify" ]]; then
# Process each category with verification
while IFS= read -r category; do
# Get category display name (capitalize and add spaces)
local category_display
category_display=$(echo "$category" | sed 's/_/ /g' | sed 's/\b\(.\)/\u\1/g')
verify_category "$category" "$category_display"
done <<< "$categories"
# Display verification summary
local end_time
end_time=$(date +%s)
display_verification_summary "$start_time" "$end_time" "$categories"
else
# Original download/link logic
local total_succeeded=0
local total_failed=0
# Process each category
while IFS= read -r category; do
# Get category display name (capitalize and add spaces)
local category_display
category_display=$(echo "$category" | sed 's/_/ /g' | sed 's/\b\(.\)/\u\1/g')
process_category "$category" "$category_display"
# Update counters (this is simplified, you'd need to track actual numbers)
total_succeeded=$((total_succeeded+1))
done <<< "$categories"
# Display summary
local end_time
end_time=$(date +%s)
display_summary "$start_time" "$end_time" "$total_succeeded" "$total_failed"
fi
}
# ============================================================================
# ENTRY POINT
# ============================================================================
# Parse command line arguments
POSITIONAL_ARGS=()
while [[ $# -gt 0 ]]; do
case $1 in
-c|--config)
CONFIG_FILE="$2"
shift 2
;;
--cache-dir)
CACHE_DIR="$2"
shift 2
;;
--comfyui-dir)
COMFYUI_DIR="$2"
shift 2
;;
download|link|both|verify)
COMMAND="$1"
shift
;;
-h|--help)
echo "Usage: $0 [COMMAND] [options]"
echo ""
echo "Commands:"
echo " download Download models only"
echo " link Create symlinks only (models must already be downloaded)"
echo " both Download and create symlinks (default)"
echo " verify Verify model downloads and links (read-only)"
echo ""
echo "Options:"
echo " -c, --config FILE Configuration file (required)"
echo " --cache-dir DIR Cache directory (default: auto-detect)"
echo " RunPod: /workspace/huggingface_cache"
echo " Local: ~/.cache/huggingface"
echo " --comfyui-dir DIR ComfyUI models directory (default: auto-detect)"
echo " RunPod: /workspace/ComfyUI/models"
echo " Local: ~/ComfyUI/models"
echo " -h, --help Show this help message"
echo ""
echo "Examples:"
echo " $0 download -c models.yaml"
echo " $0 link --comfyui-dir /opt/ComfyUI/models -c models.yaml"
echo " $0 both -c models.yaml --cache-dir /data/hf-cache"
echo " $0 verify -c models.yaml"
echo ""
echo "Verify Command:"
echo " The verify command performs comprehensive checks on all models:"
echo " - Download status (file existence, size, location)"
echo " - Link status (valid, broken, or missing symlinks)"
echo " - Size mismatch warnings (actual vs expected)"
echo " - Disk space usage statistics"
echo " - Actionable fix suggestions"
exit 0
;;
-*)
print_error "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
*)
POSITIONAL_ARGS+=("$1")
shift
;;
esac
done
# Handle positional argument (config file path)
if [[ ${#POSITIONAL_ARGS[@]} -gt 0 ]]; then
CONFIG_FILE="${POSITIONAL_ARGS[0]}"
fi
# Run main function
main