diff --git a/artifact_civitai_download.sh b/artifact_civitai_download.sh index 7fea697..14d23bf 100755 Binary files a/artifact_civitai_download.sh and b/artifact_civitai_download.sh differ diff --git a/artifact_huggingface_download.sh b/artifact_huggingface_download.sh index 7c2cf9e..5e4fbf5 100755 --- a/artifact_huggingface_download.sh +++ b/artifact_huggingface_download.sh @@ -1,40 +1,24 @@ #!/bin/bash # -# ComfyUI Model Downloader - A Beautiful CLI Tool -# Downloads AI models from HuggingFace and creates symlinks to ComfyUI directories +# HuggingFace Model Downloader - A Beautiful CLI Tool +# Downloads AI models from HuggingFace and creates symlinks to output directories # # Usage: ./artifact_huggingface_download.sh [COMMAND] [OPTIONS] # # Commands: -# download Download models from HuggingFace to cache directory -# link Create symlinks from cache to ComfyUI models directory -# both Download and link (default) -# verify Verify symlinks in ComfyUI models directory +# download Download models to cache directory (default) +# link Create symlinks from cache to output directory +# verify Verify symlinks in output directory # # Options: -# -c, --config FILE Path to YAML configuration file -# --cache-dir DIR HuggingFace cache directory (default: auto-detect) -# --comfyui-dir DIR ComfyUI installation directory -# --filter-repo REPO... Only process specific repositories -# --category CAT1[,CAT2] Filter by category (comma-separated for multiple) -# --cleanup, --clean Remove unused cache files (link/both only) -# --dry-run, -n Show what would be done without making changes -# -# Examples: -# # Download and link all models from config -# ./artifact_huggingface_download.sh both -c models.yaml -# -# # Download only specific categories -# ./artifact_huggingface_download.sh download -c models.yaml --category image_models,video_models -# -# # Link with cleanup (remove unused cache files) -# ./artifact_huggingface_download.sh link -c models.yaml --cleanup -# -# # Dry-run to preview operations -# ./artifact_huggingface_download.sh both -c models.yaml --dry-run -# -# # Process only specific repositories -# ./artifact_huggingface_download.sh both -c models.yaml --filter-repo black-forest-labs/FLUX.1-schnell +# -c, --config FILE Configuration YAML file (required) +# --cache-dir DIR Cache directory +# --output-dir DIR Output/installation directory +# --category CAT1,CAT2 Filter by category (comma-separated) +# --repo-id ID1,ID2 Filter by repo_id (comma-separated) +# --auth-token TOKEN HuggingFace token +# -n, --dry-run Show what would be done +# -h, --help Show help # set -euo pipefail @@ -43,21 +27,17 @@ set -euo pipefail # COLOR PALETTE - Beautiful Terminal Colors # ============================================================================ -# Reset RESET='\033[0m' # Foreground Colors -BLACK='\033[0;30m' RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' BLUE='\033[0;34m' MAGENTA='\033[0;35m' CYAN='\033[0;36m' -WHITE='\033[0;37m' # Bold -BOLD_BLACK='\033[1;30m' BOLD_RED='\033[1;31m' BOLD_GREEN='\033[1;32m' BOLD_YELLOW='\033[1;33m' @@ -66,121 +46,98 @@ BOLD_MAGENTA='\033[1;35m' BOLD_CYAN='\033[1;36m' BOLD_WHITE='\033[1;37m' -# Background Colors -BG_BLACK='\033[40m' -BG_RED='\033[41m' -BG_GREEN='\033[42m' -BG_YELLOW='\033[43m' -BG_BLUE='\033[44m' -BG_MAGENTA='\033[45m' +# Background BG_CYAN='\033[46m' -BG_WHITE='\033[47m' # Styles DIM='\033[2m' -ITALIC='\033[3m' -UNDERLINE='\033[4m' -BLINK='\033[5m' -REVERSE='\033[7m' # ============================================================================ -# UNICODE CHARACTERS - Make it Pretty +# UNICODE CHARACTERS # ============================================================================ CHECK_MARK="โœ“" CROSS_MARK="โœ—" ROCKET="๐Ÿš€" -PACKAGE="๐Ÿ“ฆ" DOWNLOAD="โฌ‡๏ธ" -SPARKLES="โœจ" -FIRE="๐Ÿ”ฅ" -CLOCK="โฑ๏ธ" -FOLDER="๐Ÿ“" -LINK="๐Ÿ”—" -STAR="โญ" +LINK_ICON="๐Ÿ”—" WARNING="โš ๏ธ" INFO="โ„น๏ธ" +SPARKLES="โœจ" ARROW_RIGHT="โ†’" -DOUBLE_ARROW="ยป" BOX_LIGHT="โ”€" -BOX_HEAVY="โ”" BOX_DOUBLE="โ•" +PACKAGE="๐Ÿ“ฆ" # ============================================================================ # CONFIGURATION # ============================================================================ -# Script directory -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" +CONFIG_FILE="" +COMMAND="download" +DRY_RUN=false +CATEGORY_FILTER="" +REPO_ID_FILTER="" -# Default configuration file path -# Try multiple possible locations -if [[ -f "${HOME}/Projects/runpod/comfyui_models.yaml" ]]; then - CONFIG_FILE="${HOME}/Projects/runpod/comfyui_models.yaml" -elif [[ -f "${PROJECT_ROOT}/comfyui_models.yaml" ]]; then - CONFIG_FILE="${PROJECT_ROOT}/comfyui_models.yaml" -elif [[ -f "${SCRIPT_DIR}/comfyui_models.yaml" ]]; then - CONFIG_FILE="${SCRIPT_DIR}/comfyui_models.yaml" -else - CONFIG_FILE="" # No config file by default -fi - -# Default cache directory - detect RunPod or use HuggingFace default +# Default directories - detect RunPod or local if [[ -d "/workspace" ]]; then - # RunPod environment CACHE_DIR="${CACHE_DIR:-/workspace/huggingface_cache}" - COMFYUI_DIR="${COMFYUI_DIR:-/workspace/ComfyUI/models}" + OUTPUT_DIR="${OUTPUT_DIR:-/workspace/ComfyUI/models}" else - # Local environment - CACHE_DIR="${CACHE_DIR:-${HOME}/.cache/huggingface}" - COMFYUI_DIR="${COMFYUI_DIR:-${HOME}/ComfyUI/models}" + CACHE_DIR="${CACHE_DIR:-${HOME}/.cache/huggingface/hub}" + OUTPUT_DIR="${OUTPUT_DIR:-${HOME}/ComfyUI/models}" fi -# Default command -COMMAND="both" - -# Feature flags -CATEGORY_FILTER="" # Empty = all categories, or comma-separated list -CLEANUP_MODE=false # Remove unused files from HuggingFace cache -DRY_RUN=false # Simulate operations without making changes - -# HuggingFace token from environment or .env file -# Initialize HF_TOKEN if not set +# HuggingFace token from environment HF_TOKEN="${HF_TOKEN:-}" -# Try multiple locations for .env file -if [[ -z "${HF_TOKEN}" ]] && [[ -f "${PROJECT_ROOT}/ai/.env" ]]; then - HF_TOKEN=$(grep ^HF_TOKEN "${PROJECT_ROOT}/ai/.env" | cut -d'=' -f2- | tr -d '"' | tr -d "'" || true) -fi -if [[ -z "${HF_TOKEN}" ]] && [[ -f "${PROJECT_ROOT}/.env" ]]; then - HF_TOKEN=$(grep ^HF_TOKEN "${PROJECT_ROOT}/.env" | cut -d'=' -f2- | tr -d '"' | tr -d "'" || true) -fi -if [[ -z "${HF_TOKEN}" ]] && [[ -f "/workspace/ai/.env" ]]; then - HF_TOKEN=$(grep ^HF_TOKEN "/workspace/ai/.env" | cut -d'=' -f2- | tr -d '"' | tr -d "'" || true) -fi +# Try to load from .env files +load_env_token() { + if [[ -n "$HF_TOKEN" ]]; then + return 0 + fi + + local env_files=( + "${HOME}/.env" + "${HOME}/Projects/runpod/.env" + "${HOME}/Projects/runpod/ai/.env" + "/workspace/.env" + "/workspace/ai/.env" + ) + + for env_file in "${env_files[@]}"; do + if [[ -f "$env_file" ]]; then + local token + token=$(grep "^HF_TOKEN=" "$env_file" 2>/dev/null | cut -d'=' -f2- | tr -d '"' | tr -d "'" || true) + if [[ -n "$token" ]]; then + HF_TOKEN="$token" + return 0 + fi + fi + done +} # ============================================================================ -# UTILITY FUNCTIONS - The Magic Happens Here +# LOGGING FUNCTIONS # ============================================================================ -# Print functions with beautiful formatting print_banner() { local text="$1" - local width=80 - local padding=$(( (width - ${#text} - 2) / 2 )) + local width=70 + local text_len=${#text} + local padding=$(( (width - text_len) / 2 )) - echo -e "" - echo -e "${BOLD_CYAN}${BOX_DOUBLE}$(printf '%.0s'"${BOX_DOUBLE}" $(seq 1 $width))${BOX_DOUBLE}${RESET}" - echo -e "${BOLD_CYAN}${BOX_DOUBLE}$(printf '%.0s ' $(seq 1 $padding))${BOLD_MAGENTA}${text}$(printf '%.0s ' $(seq 1 $padding))${BOLD_CYAN}${BOX_DOUBLE}${RESET}" - echo -e "${BOLD_CYAN}${BOX_DOUBLE}$(printf '%.0s'"${BOX_DOUBLE}" $(seq 1 $width))${BOX_DOUBLE}${RESET}" - echo -e "" + echo "" + echo -e "${BOLD_CYAN}${BOX_DOUBLE}$(printf '%0.sโ•' $(seq 1 $width))${BOX_DOUBLE}${RESET}" + echo -e "${BOLD_CYAN}โ•‘$(printf '%*s' $padding '')${BOLD_MAGENTA}${text}$(printf '%*s' $((width - padding - text_len)) '')${BOLD_CYAN}โ•‘${RESET}" + echo -e "${BOLD_CYAN}${BOX_DOUBLE}$(printf '%0.sโ•' $(seq 1 $width))${BOX_DOUBLE}${RESET}" + echo "" } print_section() { local text="$1" - echo -e "\n${BOLD_YELLOW}${DOUBLE_ARROW} ${text}${RESET}" - echo -e "${CYAN}$(printf '%.0s'"${BOX_LIGHT}" $(seq 1 80))${RESET}" + echo -e "\n${BOLD_CYAN}ยป ${text}${RESET}" + echo -e "${CYAN}$(printf '%0.sโ”€' $(seq 1 70))${RESET}" } print_success() { @@ -203,1382 +160,555 @@ print_step() { local current="$1" local total="$2" local text="$3" - echo -e "${BOLD_BLUE}[${current}/${total}]${RESET} ${MAGENTA}${DOWNLOAD}${RESET} ${text}" + echo -e "${BOLD_BLUE}[${current}/${total}]${RESET} ${CYAN}${PACKAGE}${RESET} ${text}" } print_detail() { echo -e " ${DIM}${CYAN}${ARROW_RIGHT} $1${RESET}" } -# Progress bar function show_progress() { local current="$1" local total="$2" - local width=50 + local width=40 local percentage=$((current * 100 / total)) local filled=$((current * width / total)) local empty=$((width - filled)) printf "\r ${BOLD_CYAN}Progress: ${RESET}[" - printf "${BG_GREEN}${BOLD_WHITE}%${filled}s${RESET}" | tr ' ' 'โ–ˆ' + printf "${BG_CYAN}${BOLD_WHITE}%${filled}s${RESET}" | tr ' ' 'โ–ˆ' printf "${DIM}%${empty}s${RESET}" | tr ' ' 'โ–‘' printf "] ${BOLD_YELLOW}%3d%%${RESET} ${DIM}(%d/%d)${RESET}" "$percentage" "$current" "$total" } -# Parse YAML (simple implementation) -parse_yaml() { - local yaml_file="$1" - local category="$2" +# ============================================================================ +# YAML PARSING (using yq) +# ============================================================================ - python3 - "$yaml_file" "$category" < /dev/null; then - missing_deps+=("python3") - fi - - # Check pip - if ! command -v pip3 &> /dev/null; then - missing_deps+=("pip3") - fi - - # Check required Python packages - if ! python3 -c "import yaml" 2>/dev/null; then - print_warning "PyYAML not installed, installing..." - pip3 install pyyaml -q - fi - - if ! python3 -c "import huggingface_hub" 2>/dev/null; then - print_warning "huggingface_hub not installed, installing..." - pip3 install huggingface_hub -q - fi - - if [[ ${#missing_deps[@]} -gt 0 ]]; then - print_error "Missing dependencies: ${missing_deps[*]}" +check_yq() { + if ! command -v yq &>/dev/null; then + print_error "yq is not installed. Please install yq first." + print_info "Install: https://github.com/mikefarah/yq" exit 1 fi - - print_success "All dependencies satisfied" } -# Validate configuration -validate_config() { - print_section "Validating Configuration" +# Get total count of models +get_model_count() { + local config="$1" + yq eval '. | length' "$config" 2>/dev/null || echo "0" +} - # Show current command - print_info "Command: ${BOLD_CYAN}${COMMAND}${RESET}" +# Get model field at index +get_model_field() { + local config="$1" + local index="$2" + local field="$3" + local value + value=$(yq eval ".[$index].$field // \"\"" "$config" 2>/dev/null) + echo "$value" | sed 's/^"//;s/"$//' +} - if [[ -n "$CONFIG_FILE" ]]; then - if [[ ! -f "$CONFIG_FILE" ]]; then - print_error "Configuration file not found: $CONFIG_FILE" - exit 1 - fi - print_success "Configuration file found: ${CYAN}${CONFIG_FILE}${RESET}" - else - print_warning "No configuration file specified" - fi +# Get files array length for a model +get_files_count() { + local config="$1" + local index="$2" + yq eval ".[$index].files | length" "$config" 2>/dev/null || echo "0" +} - # HF_TOKEN only required for download and both commands - if [[ "$COMMAND" == "download" ]] || [[ "$COMMAND" == "both" ]]; then - if [[ -z "$HF_TOKEN" ]]; then - print_error "HF_TOKEN not set. Please set it in .env file or environment." - exit 1 - fi - print_success "HuggingFace token configured: ${DIM}${HF_TOKEN:0:10}...${RESET}" - elif [[ "$COMMAND" == "verify" ]]; then - print_info "Verify mode: HuggingFace token not required" - fi +# Get file mapping at index +get_file_field() { + local config="$1" + local model_index="$2" + local file_index="$3" + local field="$4" + local value + value=$(yq eval ".[$model_index].files[$file_index].$field // \"\"" "$config" 2>/dev/null) + echo "$value" | sed 's/^"//;s/"$//' +} - # Validate flag combinations - if [[ "$CLEANUP_MODE" == true ]] && [[ ! "$COMMAND" =~ ^(link|both)$ ]]; then - print_error "--cleanup can only be used with 'link' or 'both' commands" - exit 1 - fi +# Check if model matches filters +matches_filters() { + local repo_id="$1" + local category="$2" - # Cache directory - if [[ "$COMMAND" == "download" ]] || [[ "$COMMAND" == "both" ]]; then - if [[ ! -d "$CACHE_DIR" ]]; then - print_info "Creating cache directory: ${CYAN}${CACHE_DIR}${RESET}" - mkdir -p "$CACHE_DIR" - fi - print_success "Cache directory ready: ${CYAN}${CACHE_DIR}${RESET}" - else - # For link and verify commands, just show the directory - if [[ -d "$CACHE_DIR" ]]; then - print_success "Cache directory found: ${CYAN}${CACHE_DIR}${RESET}" - else - print_warning "Cache directory not found: ${CYAN}${CACHE_DIR}${RESET}" - fi - fi - - # ComfyUI directory - if [[ "$COMMAND" == "link" ]] || [[ "$COMMAND" == "both" ]] || [[ "$COMMAND" == "verify" ]]; then - if [[ -d "$COMFYUI_DIR" ]]; then - print_success "ComfyUI directory found: ${CYAN}${COMFYUI_DIR}${RESET}" - else - if [[ "$COMMAND" == "verify" ]]; then - print_warning "ComfyUI directory not found: ${CYAN}${COMFYUI_DIR}${RESET}" - else - print_info "ComfyUI directory: ${CYAN}${COMFYUI_DIR}${RESET}" + # Check category filter + if [[ -n "$CATEGORY_FILTER" ]]; then + local match=false + IFS=',' read -ra cats <<< "$CATEGORY_FILTER" + for cat in "${cats[@]}"; do + cat=$(echo "$cat" | xargs) + if [[ "$category" == "$cat" ]]; then + match=true + break fi - fi - fi -} - -# Find model files in HuggingFace cache -find_model_files() { - local repo_id="$1" - local filename_filter="$2" - - python3 - "$CACHE_DIR" "$repo_id" "$filename_filter" </dev/null | grep "/$filename_only$" | head -n1) - - if [[ -z "$source_file" ]] || [[ ! -f "$source_file" ]]; then - print_warning "Source file not found: ${source_pattern}" - continue - fi - - # Construct full link path with directory included in dest_path - local link_path="${COMFYUI_DIR}/${dest_path}" - local link_dir=$(dirname "$link_path") - - # Ensure directory exists - if [[ ! -d "$link_dir" ]]; then - mkdir -p "$link_dir" - fi - - # Remove existing symlink or file if it exists - if [[ -L "$link_path" ]]; then - rm -f "$link_path" - elif [[ -e "$link_path" ]]; then - print_warning "File already exists (not a symlink): ${dest_path}" - continue - fi - - # Create symlink - ln -s "$source_file" "$link_path" - print_detail "${LINK} Linked: ${DIM}${dest_path}${RESET}" - linked_count=$((linked_count+1)) - done <<< "$file_mappings" - else - # Fallback: use automatic prefixing for files without explicit mappings - print_detail "No file mappings found, using automatic prefixing" - - # Extract model name from repo_id for prefixing filenames - # e.g., "facebook/musicgen-medium" -> "musicgen-medium" - local model_name=$(echo "$repo_id" | sed 's/.*\///') - - while IFS= read -r source_file; do - if [[ -f "$source_file" ]]; then - local filename=$(basename "$source_file") - - # Add model name prefix to filename for better organization - # e.g., "pytorch_model.bin" -> "musicgen-medium-pytorch_model.bin" - local prefixed_filename="${model_name}-${filename}" - local link_path="${target_dir}/${prefixed_filename}" - - # Remove existing symlink or file if it exists - if [[ -L "$link_path" ]]; then - rm -f "$link_path" - elif [[ -e "$link_path" ]]; then - print_warning "File already exists (not a symlink): ${prefixed_filename}" - continue - fi - - # Create symlink - ln -s "$source_file" "$link_path" - print_detail "${LINK} Linked: ${DIM}${prefixed_filename}${RESET}" - linked_count=$((linked_count+1)) - fi - done <<< "$model_files" - fi - - if [[ $linked_count -gt 0 ]]; then - print_success "Linked ${linked_count} file(s) for ${BOLD_WHITE}${repo_id}${RESET}" - return 0 - else - print_error "Failed to link files for ${repo_id}" - return 1 - fi -} - -# Cleanup unused cache files that aren't symlinked -cleanup_unused_cache_files() { - local repo_id="$1" - local cache_dir="$2" - local comfyui_dir="$3" - local file_mappings="$4" - - # Find the latest snapshot directory for this repo - local repo_cache_dir="${cache_dir}/models--${repo_id//\//--}" - if [[ ! -d "$repo_cache_dir" ]]; then - print_warning "Cache directory not found for $repo_id" - return 1 - fi - - print_info "Analyzing cache for ${BOLD_WHITE}${repo_id}${RESET}..." - - # Use Python to clean up old snapshots AND non-whitelisted files in latest snapshot - local cleanup_result - cleanup_result=$(python3 - "$repo_cache_dir" "$comfyui_dir" "$file_mappings" <<'EOPYCLEANUP' -import os -import sys -import shutil -from pathlib import Path - -repo_cache = Path(sys.argv[1]) -comfyui_dir = Path(sys.argv[2]) -file_mappings_str = sys.argv[3] if len(sys.argv) > 3 else "" - -# Parse whitelist from file_mappings (format: "source|dest\nsource|dest\n...") -whitelist_sources = set() -if file_mappings_str: - for line in file_mappings_str.strip().split('\n'): - if '|' in line: - source = line.split('|')[0].strip() - if source: - whitelist_sources.add(source) - -# Essential HuggingFace metadata files to always preserve -ESSENTIAL_FILES = { - '.gitattributes', - 'README.md', - 'model_index.json', - '.huggingface', - 'config.json' -} - -# Find latest snapshot -snapshots_dir = repo_cache / 'snapshots' -if not snapshots_dir.exists(): - sys.exit(0) - -snapshots = sorted(snapshots_dir.iterdir(), key=lambda x: x.stat().st_mtime, reverse=True) -if not snapshots: - sys.exit(0) - -latest_snapshot = snapshots[0] -old_snapshots = snapshots[1:] # All snapshots except the latest - -# Calculate size of old snapshot directories -old_snapshot_size = 0 -old_snapshot_paths = [] -for old_snap in old_snapshots: - try: - # Calculate size of old snapshot directory - for file_path in old_snap.rglob('*'): - if file_path.is_file(): - old_snapshot_size += file_path.stat().st_size - old_snapshot_paths.append(str(old_snap)) - except Exception: - pass - -# Find non-whitelisted files in latest snapshot -unwanted_files = [] -unwanted_size = 0 -if whitelist_sources: - for item in latest_snapshot.rglob('*'): - if item.is_file(): - # Get relative path from snapshot root - rel_path = str(item.relative_to(latest_snapshot)) - - # Check if this file is in whitelist or is essential - is_whitelisted = False - - # Check exact match first - if rel_path in whitelist_sources: - is_whitelisted = True - else: - # Check if any whitelist entry matches this file - # (handles cases where whitelist has paths like "split_files/diffusion_models/file.safetensors") - for whitelisted in whitelist_sources: - if rel_path == whitelisted or rel_path.endswith('/' + whitelisted): - is_whitelisted = True - break - - # Check if it's an essential file - if item.name in ESSENTIAL_FILES: - is_whitelisted = True - - # If not whitelisted, mark for deletion - if not is_whitelisted: - unwanted_files.append(str(item)) - unwanted_size += item.stat().st_size - -# Output results: old_snapshot_count|old_snapshot_size|unwanted_files_count|unwanted_size -print(f"{len(old_snapshot_paths)}|{old_snapshot_size}|{len(unwanted_files)}|{unwanted_size}") -for snap in old_snapshot_paths: - print(snap) -for unwanted_file in unwanted_files: - print(unwanted_file) -EOPYCLEANUP -) - - # Parse results - local first_line - first_line=$(echo "$cleanup_result" | head -n 1) - local snapshot_count - snapshot_count=$(echo "$first_line" | cut -d'|' -f1) - local snapshot_bytes - snapshot_bytes=$(echo "$first_line" | cut -d'|' -f2) - local unwanted_count - unwanted_count=$(echo "$first_line" | cut -d'|' -f3) - local unwanted_bytes - unwanted_bytes=$(echo "$first_line" | cut -d'|' -f4) - - # Check if there's anything to clean - if [[ "$snapshot_count" -eq 0 ]] && [[ "$unwanted_count" -eq 0 ]]; then - print_success "No cleanup needed - cache is optimal" - return 0 - fi - - # Convert bytes to MB - local snapshot_mb - snapshot_mb=$(echo "scale=2; $snapshot_bytes / 1048576" | bc) - local unwanted_mb - unwanted_mb=$(echo "scale=2; $unwanted_bytes / 1048576" | bc) - local total_mb - total_mb=$(echo "scale=2; ($snapshot_bytes + $unwanted_bytes) / 1048576" | bc) - - # Get list of items to delete (skip first line which is summary) - # First snapshot_count lines are old snapshots, remaining lines are unwanted files - local all_items - all_items=$(echo "$cleanup_result" | tail -n +2) - - local snapshots_to_delete - if [[ "$snapshot_count" -gt 0 ]]; then - snapshots_to_delete=$(echo "$all_items" | head -n "$snapshot_count") - else - snapshots_to_delete="" - fi - - local files_to_delete - if [[ "$unwanted_count" -gt 0 ]]; then - files_to_delete=$(echo "$all_items" | tail -n "$unwanted_count") - else - files_to_delete="" - fi - - if [[ "$DRY_RUN" == true ]]; then - if [[ "$snapshot_count" -gt 0 ]]; then - print_warning "DRY-RUN: Would clean up ${BOLD_YELLOW}${snapshot_count}${RESET} old snapshot(s) (~${snapshot_mb} MB)" - if [[ -n "$snapshots_to_delete" ]]; then - print_detail "Old snapshots that would be deleted:" - while IFS= read -r snapshot; do - local basename - basename=$(basename "$snapshot") - print_detail " ${CROSS_MARK} Would delete snapshot: ${DIM}${basename}${RESET}" - done <<< "$snapshots_to_delete" - fi - fi - - if [[ "$unwanted_count" -gt 0 ]]; then - print_warning "DRY-RUN: Would clean up ${BOLD_YELLOW}${unwanted_count}${RESET} non-whitelisted file(s) (~${unwanted_mb} MB)" - if [[ -n "$files_to_delete" ]]; then - print_detail "Non-whitelisted files that would be deleted (showing first 10):" - echo "$files_to_delete" | head -n 10 | while IFS= read -r file; do - local basename - basename=$(basename "$file") - print_detail " ${CROSS_MARK} Would delete file: ${DIM}${basename}${RESET}" - done - if [[ "$unwanted_count" -gt 10 ]]; then - print_detail " ${DIM}... and $((unwanted_count - 10)) more${RESET}" - fi - fi - fi - - print_info "Total space that would be freed: ~${total_mb} MB" - return 0 - fi - - # Actually delete items - local deleted_snapshots=0 - local deleted_files=0 - - # Delete old snapshot directories - if [[ "$snapshot_count" -gt 0 ]]; then - print_warning "Cleaning up ${BOLD_YELLOW}${snapshot_count}${RESET} old snapshot(s) (~${snapshot_mb} MB)..." - while IFS= read -r snapshot; do - if [[ -d "$snapshot" ]]; then - rm -rf "$snapshot" && deleted_snapshots=$((deleted_snapshots+1)) - fi - done <<< "$snapshots_to_delete" - fi - - # Delete non-whitelisted files - if [[ "$unwanted_count" -gt 0 ]]; then - print_warning "Cleaning up ${BOLD_YELLOW}${unwanted_count}${RESET} non-whitelisted file(s) (~${unwanted_mb} MB)..." - while IFS= read -r file; do - if [[ -f "$file" ]]; then - rm -f "$file" && deleted_files=$((deleted_files+1)) - fi - done <<< "$files_to_delete" - fi - - # Report results - local success=true - if [[ "$snapshot_count" -gt 0 ]] && [[ $deleted_snapshots -eq $snapshot_count ]]; then - print_success "Cleaned up ${deleted_snapshots} old snapshot(s), freed ~${snapshot_mb} MB" - elif [[ "$snapshot_count" -gt 0 ]]; then - print_warning "Cleaned up ${deleted_snapshots}/${snapshot_count} old snapshots" - success=false - fi - - if [[ "$unwanted_count" -gt 0 ]] && [[ $deleted_files -eq $unwanted_count ]]; then - print_success "Cleaned up ${deleted_files} non-whitelisted file(s), freed ~${unwanted_mb} MB" - elif [[ "$unwanted_count" -gt 0 ]]; then - print_warning "Cleaned up ${deleted_files}/${unwanted_count} non-whitelisted files" - success=false - fi - - if [[ "$snapshot_count" -gt 0 ]] || [[ "$unwanted_count" -gt 0 ]]; then - print_info "Total space freed: ~${total_mb} MB" - fi - - if $success; then - return 0 - else - return 1 - fi -} - -# ============================================================================ -# VERIFICATION FUNCTIONS - Model Status Checking -# ============================================================================ - -# Get actual disk usage for a model's files -get_model_disk_usage() { - local model_files="$1" - - if [[ -z "$model_files" ]]; then - echo "0" - return - fi - - local total_bytes=0 - while IFS= read -r file_path; do - if [[ -f "$file_path" ]] || [[ -L "$file_path" ]]; then - local file_size - # Use -L to follow symlinks (HuggingFace uses symlinks to blobs) - # Try Linux stat first, then macOS stat - if stat -L -c "%s" "$file_path" >/dev/null 2>&1; then - file_size=$(stat -L -c "%s" "$file_path" 2>/dev/null) - elif stat -L -f "%z" "$file_path" >/dev/null 2>&1; then - file_size=$(stat -L -f "%z" "$file_path" 2>/dev/null) - else - file_size=0 - fi - total_bytes=$((total_bytes + file_size)) - fi - done <<< "$model_files" - - echo "$total_bytes" -} - -# Format bytes to human-readable size -format_bytes() { - local bytes="$1" - - if (( bytes < 1024 )); then - echo "${bytes} B" - elif (( bytes < 1048576 )); then - echo "$(( bytes / 1024 )) KB" - elif (( bytes < 1073741824 )); then - printf "%.2f MB" "$(bc <<< "scale=2; $bytes / 1048576")" - else - printf "%.2f GB" "$(bc <<< "scale=2; $bytes / 1073741824")" - fi -} - -# Verify if model is downloaded -verify_model_download() { - local repo_id="$1" - local expected_size_gb="$2" - local filename_filter="$3" - - # Find model files in cache - # Capture both stdout (file paths) and stderr (error messages) - local find_output - find_output=$(find_model_files "$repo_id" "$filename_filter" 2>&1) - - # Separate file paths from error/debug messages - local model_files - model_files=$(echo "$find_output" | grep -v "^ERROR:" | grep -v "^WARN:" | grep -v "^DEBUG:") - - # Extract error messages for logging - local error_msgs - error_msgs=$(echo "$find_output" | grep "^ERROR:\|^WARN:\|^DEBUG:") - - if [[ -z "$model_files" ]]; then - # Log error messages to stderr if they exist - if [[ -n "$error_msgs" ]]; then - echo "$error_msgs" >&2 - fi - echo "NOT_FOUND|0|0|" - return 1 - fi - - # Count files - local file_count - file_count=$(echo "$model_files" | wc -l | tr -d ' ') - - # Get actual size - local actual_bytes - actual_bytes=$(get_model_disk_usage "$model_files") - - # Get cache path (first file's directory) - local cache_path - cache_path=$(echo "$model_files" | head -n1 | xargs dirname) - - # Get modification time of first file - local mod_time="Unknown" - if [[ -n "$model_files" ]]; then - local first_file - first_file=$(echo "$model_files" | head -n1) - if [[ -f "$first_file" ]]; then - # Try Linux stat first (most common), then macOS stat - if stat -c "%y" "$first_file" >/dev/null 2>&1; then - mod_time=$(stat -c "%y" "$first_file" 2>/dev/null | cut -d'.' -f1) - elif stat -f "%Sm" "$first_file" >/dev/null 2>&1; then - mod_time=$(stat -f "%Sm" -t "%Y-%m-%d %H:%M:%S" "$first_file" 2>/dev/null) - fi - fi - fi - - echo "FOUND|${actual_bytes}|${file_count}|${cache_path}|${mod_time}" - return 0 -} - -# Verify model symlinks -verify_model_links() { - local repo_id="$1" - local filename_filter="$2" - local file_mappings="$3" - - local total_links=0 - local valid_links=0 - local broken_links=0 - local link_details="" - - # Check if explicit file mappings exist - if [[ -n "$file_mappings" ]]; then - # Process each file mapping to verify links - while IFS='|' read -r source_pattern dest_path; do - if [[ -z "$source_pattern" ]] || [[ -z "$dest_path" ]]; then - continue - fi - - total_links=$((total_links + 1)) - local link_path="${COMFYUI_DIR}/${dest_path}" - - if [[ -L "$link_path" ]]; then - # Symlink exists, check if it's valid - if [[ -e "$link_path" ]]; then - valid_links=$((valid_links + 1)) - local link_target - link_target=$(readlink "$link_path") - link_details="${link_details}VALID|${dest_path}|${link_target}\n" - else - broken_links=$((broken_links + 1)) - local link_target - link_target=$(readlink "$link_path") - link_details="${link_details}BROKEN|${dest_path}|${link_target}\n" - fi - else - link_details="${link_details}MISSING|${dest_path}|\n" - fi - done <<< "$file_mappings" - else - # No explicit mappings, check automatic prefixed filenames - local model_files - model_files=$(find_model_files "$repo_id" "$filename_filter" 2>/dev/null) - - if [[ -z "$model_files" ]]; then - echo "NOT_DOWNLOADED|0|0|0" + if [[ "$match" == false ]]; then + return 1 + fi + fi + + # Check repo_id filter + if [[ -n "$REPO_ID_FILTER" ]]; then + local match=false + IFS=',' read -ra repos <<< "$REPO_ID_FILTER" + for repo in "${repos[@]}"; do + repo=$(echo "$repo" | xargs) + if [[ "$repo_id" == "$repo" ]]; then + match=true + break + fi + done + if [[ "$match" == false ]]; then return 1 fi - - local model_name - model_name=$(echo "$repo_id" | sed 's/.*\///') - - while IFS= read -r source_file; do - if [[ -f "$source_file" ]]; then - local filename - filename=$(basename "$source_file") - local prefixed_filename="${model_name}-${filename}" - - total_links=$((total_links + 1)) - local link_path="${target_dir}/${prefixed_filename}" - - if [[ -L "$link_path" ]]; then - if [[ -e "$link_path" ]]; then - valid_links=$((valid_links + 1)) - local link_target - link_target=$(readlink "$link_path") - link_details="${link_details}VALID|${prefixed_filename}|${link_target}\n" - else - broken_links=$((broken_links + 1)) - local link_target - link_target=$(readlink "$link_path") - link_details="${link_details}BROKEN|${prefixed_filename}|${link_target}\n" - fi - else - link_details="${link_details}MISSING|${prefixed_filename}|\n" - fi - fi - done <<< "$model_files" fi - echo -e "CHECKED|${total_links}|${valid_links}|${broken_links}\n${link_details}" return 0 } -# Verify models by category -verify_category() { - local category="$1" - local category_display="$2" +# ============================================================================ +# DOWNLOAD FUNCTIONS +# ============================================================================ - print_section "${category_display}" +download_file() { + local repo_id="$1" + local source="$2" - # Get models for this category - local models_data - models_data=$(parse_yaml "$CONFIG_FILE" "$category") + # Convert repo_id to cache path (replace / with --) + local cache_repo_dir="${CACHE_DIR}/${repo_id}" + local source_dir + source_dir=$(dirname "$source") + local output_dir="${cache_repo_dir}" + if [[ "$source_dir" != "." ]]; then + output_dir="${cache_repo_dir}/${source_dir}" + fi + local filename + filename=$(basename "$source") + local output_path="${output_dir}/${filename}" - if [[ -z "$models_data" ]]; then - print_warning "No models found in category: ${category}" + print_detail "File: ${BOLD_WHITE}${source}${RESET}" + print_detail "Output: ${CYAN}${output_path}${RESET}" + + # Check if already exists + if [[ -f "$output_path" ]]; then + local size + size=$(du -h "$output_path" | cut -f1) + print_success "Already downloaded: ${filename} (${size})" return 0 fi - local total_models - total_models=$(echo "$models_data" | wc -l) - local current=0 - local models_downloaded=0 - local models_missing=0 - local models_linked=0 - local models_broken=0 - local models_not_linked=0 - local total_size_bytes=0 - local expected_size_bytes=0 + # Dry-run mode + if [[ "$DRY_RUN" == true ]]; then + print_info "DRY-RUN: Would download ${BOLD_WHITE}${source}${RESET}" + return 0 + fi - while IFS='|' read -r repo_id description size_gb essential filename; do - current=$((current+1)) + # Create output directory + mkdir -p "$output_dir" - echo "" - print_step "$current" "$total_models" "${BOLD_MAGENTA}${description}${RESET}" - print_detail "Repository: ${BOLD_WHITE}${repo_id}${RESET}" - print_detail "Category: ${CYAN}${category}${RESET}" - print_detail "Expected Size: ${BOLD_YELLOW}${size_gb} GB${RESET}" + # Build download URL + local url="https://huggingface.co/${repo_id}/resolve/main/${source}" + print_detail "Downloading from HuggingFace..." - expected_size_bytes=$((expected_size_bytes + $(echo "$size_gb * 1073741824" | bc | cut -d'.' -f1))) + # Download with curl (with resume support) + local curl_args=(-L -C - --progress-bar -o "$output_path") + if [[ -n "$HF_TOKEN" ]]; then + curl_args+=(-H "Authorization: Bearer ${HF_TOKEN}") + fi - # Verify download status - echo "" - local download_result - download_result=$(verify_model_download "$repo_id" "$size_gb" "$filename") - local download_status - download_status=$(echo "$download_result" | cut -d'|' -f1) - - if [[ "$download_status" == "FOUND" ]]; then - local actual_bytes - actual_bytes=$(echo "$download_result" | cut -d'|' -f2) - local file_count - file_count=$(echo "$download_result" | cut -d'|' -f3) - local cache_path - cache_path=$(echo "$download_result" | cut -d'|' -f4) - local mod_time - mod_time=$(echo "$download_result" | cut -d'|' -f5) - - total_size_bytes=$((total_size_bytes + actual_bytes)) - local actual_size_human - actual_size_human=$(format_bytes "$actual_bytes") - - print_success "Download Status: ${BOLD_GREEN}DOWNLOADED${RESET}" - print_detail "${DIM}Path: ${cache_path}${RESET}" - print_detail "${DIM}Actual Size: ${actual_size_human} (${actual_bytes} bytes)${RESET}" - print_detail "${DIM}Files: ${file_count} file(s)${RESET}" - print_detail "${DIM}Modified: ${mod_time}${RESET}" - - # Check for size mismatch - local expected_bytes - expected_bytes=$(echo "$size_gb * 1073741824" | bc | cut -d'.' -f1) - local size_diff_pct - size_diff_pct=$(echo "scale=2; (($actual_bytes - $expected_bytes) / $expected_bytes) * 100" | bc | sed 's/^\./0./') - local abs_size_diff_pct - abs_size_diff_pct=${size_diff_pct#-} - - if (( $(echo "$abs_size_diff_pct > 10" | bc -l) )); then - print_warning "Size mismatch: ${size_diff_pct}% difference from expected" - fi - - models_downloaded=$((models_downloaded+1)) - - # Verify link status - echo "" - local file_mappings - file_mappings=$(parse_file_mappings "$CONFIG_FILE" "$category" "$repo_id") - local link_result - link_result=$(verify_model_links "$repo_id" "$filename" "$file_mappings") - local first_line - first_line=$(echo -e "$link_result" | head -n1) - local link_status - link_status=$(echo "$first_line" | cut -d'|' -f1) - - if [[ "$link_status" == "CHECKED" ]]; then - local total_links - total_links=$(echo "$first_line" | cut -d'|' -f2) - local valid_links - valid_links=$(echo "$first_line" | cut -d'|' -f3) - local broken_links - broken_links=$(echo "$first_line" | cut -d'|' -f4) - - if [[ $broken_links -gt 0 ]]; then - print_warning "Link Status: ${BOLD_YELLOW}${broken_links} BROKEN LINK(S)${RESET}" - models_broken=$((models_broken+1)) - elif [[ $valid_links -eq $total_links ]] && [[ $total_links -gt 0 ]]; then - print_success "Link Status: ${BOLD_GREEN}LINKED${RESET} (${valid_links}/${total_links})" - models_linked=$((models_linked+1)) - else - print_warning "Link Status: ${BOLD_YELLOW}PARTIALLY LINKED${RESET} (${valid_links}/${total_links})" - models_not_linked=$((models_not_linked+1)) - fi - - # Show link details - local link_details - link_details=$(echo -e "$link_result" | tail -n +2) - if [[ -n "$link_details" ]]; then - while IFS='|' read -r link_state link_name link_target; do - if [[ -z "$link_state" ]]; then - continue - fi - - case "$link_state" in - VALID) - print_detail "${LINK} ${BOLD_GREEN}โœ“${RESET} ${DIM}${link_name}${RESET}" - ;; - BROKEN) - print_detail "${LINK} ${BOLD_RED}โœ—${RESET} ${DIM}${link_name}${RESET} ${BOLD_RED}(BROKEN)${RESET}" - ;; - MISSING) - print_detail "${LINK} ${BOLD_YELLOW}โ—‹${RESET} ${DIM}${link_name}${RESET} ${BOLD_YELLOW}(NOT LINKED)${RESET}" - ;; - esac - done <<< "$link_details" - fi - else - print_error "Link Status: ${BOLD_RED}NOT LINKED${RESET}" - models_not_linked=$((models_not_linked+1)) - fi - else - print_error "Download Status: ${BOLD_RED}NOT DOWNLOADED${RESET}" - models_missing=$((models_missing+1)) - echo "" - print_info "Link Status: ${DIM}N/A (model not downloaded)${RESET}" + if curl "${curl_args[@]}" "$url" 2>&1; then + if [[ -f "$output_path" ]] && [[ -s "$output_path" ]]; then + local size + size=$(du -h "$output_path" | cut -f1) + print_success "Downloaded ${BOLD_WHITE}${filename}${RESET} (${size})" + return 0 fi + fi - show_progress "$current" "$total_models" - done <<< "$models_data" - - echo -e "\n" - - # Category summary - local total_size_human - total_size_human=$(format_bytes "$total_size_bytes") - local expected_size_human - expected_size_human=$(format_bytes "$expected_size_bytes") - - print_info "Category Summary:" - echo -e " ${BOLD_WHITE}Total Models:${RESET} ${total_models}" - echo -e " ${BOLD_GREEN}โœ“ Downloaded:${RESET} ${models_downloaded} ($(( models_downloaded * 100 / total_models ))%)" - echo -e " ${BOLD_RED}โœ— Missing:${RESET} ${models_missing} ($(( models_missing * 100 / total_models ))%)" - echo -e " ${BOLD_GREEN}โœ“ Properly Linked:${RESET} ${models_linked}" - echo -e " ${BOLD_YELLOW}โš  Broken Links:${RESET} ${models_broken}" - echo -e " ${BOLD_YELLOW}โ—‹ Not Linked:${RESET} ${models_not_linked}" - echo -e " ${BOLD_CYAN}๐Ÿ“Š Disk Usage:${RESET} ${total_size_human} / ${expected_size_human} expected" - - # Return statistics for global summary (format: downloaded|missing|linked|broken|not_linked|total_size|expected_size) - echo "${models_downloaded}|${models_missing}|${models_linked}|${models_broken}|${models_not_linked}|${total_size_bytes}|${expected_size_bytes}" > /tmp/verify_stats_${category} + print_error "Failed to download ${source}" + rm -f "$output_path" 2>/dev/null || true + return 1 } -# Process models by category -process_category() { - local category="$1" - local category_display="$2" +download_model() { + local config="$1" + local index="$2" + local repo_id="$3" + local description="$4" - print_section "${category_display}" + print_detail "Repository: ${BOLD_WHITE}${repo_id}${RESET}" + [[ -n "$description" ]] && print_detail "Description: ${description}" - # Get models for this category - local models_data - models_data=$(parse_yaml "$CONFIG_FILE" "$category") + local files_count + files_count=$(get_files_count "$config" "$index") - if [[ -z "$models_data" ]]; then - print_warning "No models found in category: ${category}" - return 0 + if [[ "$files_count" == "0" ]]; then + print_warning "No files defined for ${repo_id}" + return 1 fi - local total_models - total_models=$(echo "$models_data" | wc -l) - local current=0 local succeeded=0 local failed=0 - while IFS='|' read -r repo_id description size_gb essential filename; do - current=$((current+1)) + for ((f=0; f 0 ? total_downloaded * 100 / total_models : 0 ))%)" - echo -e " ${BOLD_RED}โœ— Missing:${RESET} ${total_missing} ($(( total_models > 0 ? total_missing * 100 / total_models : 0 ))%)" - echo "" - echo -e " ${BOLD_GREEN}โœ“ Properly Linked:${RESET} ${total_linked} ($(( total_models > 0 ? total_linked * 100 / total_models : 0 ))%)" - echo -e " ${BOLD_YELLOW}โš  Broken Links:${RESET} ${total_broken} ($(( total_models > 0 ? total_broken * 100 / total_models : 0 ))%)" - echo -e " ${BOLD_YELLOW}โ—‹ Not Linked:${RESET} ${total_not_linked} ($(( total_models > 0 ? total_not_linked * 100 / total_models : 0 ))%)" - echo "" - echo -e " ${BOLD_CYAN}๐Ÿ“Š Disk Space Used:${RESET} ${total_size_human} / ${expected_size_human} expected" - echo -e " ${BOLD_WHITE}Cache Directory:${RESET} ${CYAN}${CACHE_DIR}${RESET}" - echo -e " ${BOLD_WHITE}ComfyUI Directory:${RESET} ${CYAN}${COMFYUI_DIR}${RESET}" - echo -e " ${BOLD_WHITE}Duration:${RESET} ${BOLD_YELLOW}${minutes}m ${seconds}s${RESET}" - echo -e "${CYAN}$(printf '%.0s'"${BOX_DOUBLE}" $(seq 1 80))${RESET}" + # Create symlink + ln -s "$source_path" "$link_path" + print_success "Linked: ${LINK_ICON} ${dest}" + return 0 +} - # Provide actionable suggestions - if [[ $total_missing -gt 0 ]] || [[ $total_broken -gt 0 ]] || [[ $total_not_linked -gt 0 ]]; then - echo -e "\n${BOLD_YELLOW}${WARNING} Issues Found - Suggested Actions:${RESET}\n" +link_model() { + local config="$1" + local index="$2" + local repo_id="$3" - if [[ $total_missing -gt 0 ]]; then - echo -e " ${BOLD_RED}โœ—${RESET} ${total_missing} model(s) not downloaded" - echo -e " ${DIM}Fix:${RESET} ${CYAN}$0 download -c ${CONFIG_FILE}${RESET}" + print_detail "Repository: ${BOLD_WHITE}${repo_id}${RESET}" + + local files_count + files_count=$(get_files_count "$config" "$index") + + if [[ "$files_count" == "0" ]]; then + print_warning "No files defined for ${repo_id}" + return 1 + fi + + local succeeded=0 + local failed=0 + + for ((f=0; f