feat: add category filter, cleanup mode, and dry-run support to HuggingFace downloader
Add three major features to artifact_huggingface_download.sh: 1. Category filtering (--category flag): - Filter models by category (single or comma-separated multiple) - Validates categories against YAML configuration - Works with all commands (download, link, both, verify) 2. Cleanup mode (--cleanup flag): - Removes unreferenced files from HuggingFace cache - Only deletes files not referenced by symlinks - Per-category cleanup (safe and isolated) - Works with link and both commands only 3. Dry-run mode (--dry-run/-n flag): - Preview operations without making changes - Shows what would be downloaded, linked, or cleaned - Includes file counts and size estimates - Warning banner to indicate dry-run mode Additional improvements: - Updated help text with comprehensive examples - Added validation for invalid flag combinations - Enhanced user feedback with detailed preview information 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -3,9 +3,38 @@
|
||||
# ComfyUI Model Downloader - A Beautiful CLI Tool
|
||||
# Downloads AI models from HuggingFace and creates symlinks to ComfyUI directories
|
||||
#
|
||||
# Usage: ./artifact_comfyui_download.sh [COMMAND] [options]
|
||||
# Usage: ./artifact_huggingface_download.sh [COMMAND] [OPTIONS]
|
||||
#
|
||||
# Commands: download, link, both (default)
|
||||
# Commands:
|
||||
# download Download models from HuggingFace to cache directory
|
||||
# link Create symlinks from cache to ComfyUI models directory
|
||||
# both Download and link (default)
|
||||
# verify Verify symlinks in ComfyUI models directory
|
||||
#
|
||||
# Options:
|
||||
# -c, --config FILE Path to YAML configuration file
|
||||
# --cache-dir DIR HuggingFace cache directory (default: auto-detect)
|
||||
# --comfyui-dir DIR ComfyUI installation directory
|
||||
# --filter-repo REPO... Only process specific repositories
|
||||
# --category CAT1[,CAT2] Filter by category (comma-separated for multiple)
|
||||
# --cleanup, --clean Remove unused cache files (link/both only)
|
||||
# --dry-run, -n Show what would be done without making changes
|
||||
#
|
||||
# Examples:
|
||||
# # Download and link all models from config
|
||||
# ./artifact_huggingface_download.sh both -c models.yaml
|
||||
#
|
||||
# # Download only specific categories
|
||||
# ./artifact_huggingface_download.sh download -c models.yaml --category image_models,video_models
|
||||
#
|
||||
# # Link with cleanup (remove unused cache files)
|
||||
# ./artifact_huggingface_download.sh link -c models.yaml --cleanup
|
||||
#
|
||||
# # Dry-run to preview operations
|
||||
# ./artifact_huggingface_download.sh both -c models.yaml --dry-run
|
||||
#
|
||||
# # Process only specific repositories
|
||||
# ./artifact_huggingface_download.sh both -c models.yaml --filter-repo black-forest-labs/FLUX.1-schnell
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
@@ -111,6 +140,11 @@ fi
|
||||
# Default command
|
||||
COMMAND="both"
|
||||
|
||||
# Feature flags
|
||||
CATEGORY_FILTER="" # Empty = all categories, or comma-separated list
|
||||
CLEANUP_MODE=false # Remove unused files from HuggingFace cache
|
||||
DRY_RUN=false # Simulate operations without making changes
|
||||
|
||||
# HuggingFace token from environment or .env file
|
||||
# Initialize HF_TOKEN if not set
|
||||
HF_TOKEN="${HF_TOKEN:-}"
|
||||
@@ -232,6 +266,49 @@ except Exception as e:
|
||||
EOPYAML
|
||||
}
|
||||
|
||||
# Validate and get categories to process
|
||||
validate_and_get_categories() {
|
||||
local config_file="$1"
|
||||
|
||||
# Get all available categories
|
||||
local all_categories
|
||||
all_categories=$(parse_yaml "$config_file" "categories")
|
||||
|
||||
# If no filter specified, return all categories
|
||||
if [[ -z "$CATEGORY_FILTER" ]]; then
|
||||
echo "$all_categories"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Split comma-separated categories and validate each
|
||||
local requested_categories
|
||||
IFS=',' read -ra requested_categories <<< "$CATEGORY_FILTER"
|
||||
|
||||
local validated_categories=()
|
||||
for requested in "${requested_categories[@]}"; do
|
||||
# Trim whitespace
|
||||
requested=$(echo "$requested" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
|
||||
|
||||
# Check if category exists
|
||||
if echo "$all_categories" | grep -q "^${requested}$"; then
|
||||
validated_categories+=("$requested")
|
||||
else
|
||||
print_error "Invalid category: '$requested'"
|
||||
echo ""
|
||||
echo "Available categories:"
|
||||
while IFS= read -r cat; do
|
||||
echo " - $cat"
|
||||
done <<< "$all_categories"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# Print validated categories (one per line)
|
||||
for cat in "${validated_categories[@]}"; do
|
||||
echo "$cat"
|
||||
done
|
||||
}
|
||||
|
||||
# Parse file mappings for a specific model
|
||||
parse_file_mappings() {
|
||||
local yaml_file="$1"
|
||||
@@ -334,6 +411,12 @@ validate_config() {
|
||||
print_info "Verify mode: HuggingFace token not required"
|
||||
fi
|
||||
|
||||
# Validate flag combinations
|
||||
if [[ "$CLEANUP_MODE" == true ]] && [[ ! "$COMMAND" =~ ^(link|both)$ ]]; then
|
||||
print_error "--cleanup can only be used with 'link' or 'both' commands"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Cache directory
|
||||
if [[ "$COMMAND" == "download" ]] || [[ "$COMMAND" == "both" ]]; then
|
||||
if [[ ! -d "$CACHE_DIR" ]]; then
|
||||
@@ -452,6 +535,12 @@ download_model() {
|
||||
print_detail "Description: ${description}"
|
||||
print_detail "Size: ${BOLD_YELLOW}${size_gb}GB${RESET}"
|
||||
|
||||
# Dry-run mode: skip actual download
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
print_info "DRY-RUN: Would download ${BOLD_WHITE}${repo_id}${RESET} (~${size_gb} GB)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Download using Python
|
||||
python3 - <<EOPYDOWNLOAD
|
||||
import os
|
||||
@@ -515,6 +604,22 @@ link_model() {
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Count files
|
||||
local file_count
|
||||
file_count=$(echo "$model_files" | grep -c .)
|
||||
|
||||
# Dry-run mode: show what would be linked
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
print_info "DRY-RUN: Would link ${BOLD_YELLOW}${file_count}${RESET} file(s) to ${target_dir}/"
|
||||
print_detail "Files that would be linked:"
|
||||
while IFS= read -r file; do
|
||||
local basename_file
|
||||
basename_file=$(basename "$file")
|
||||
print_detail " ${LINK} Would link: ${DIM}${basename_file}${RESET}"
|
||||
done <<< "$model_files"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local linked_count=0
|
||||
|
||||
# If explicit file mappings are provided, use them
|
||||
@@ -592,6 +697,130 @@ link_model() {
|
||||
fi
|
||||
}
|
||||
|
||||
# Cleanup unused cache files that aren't symlinked
|
||||
cleanup_unused_cache_files() {
|
||||
local repo_id="$1"
|
||||
local cache_dir="$2"
|
||||
local comfyui_dir="$3"
|
||||
|
||||
# Find the latest snapshot directory for this repo
|
||||
local repo_cache_dir="${cache_dir}/models--${repo_id/\//-}"
|
||||
if [[ ! -d "$repo_cache_dir" ]]; then
|
||||
print_warning "Cache directory not found for $repo_id"
|
||||
return 1
|
||||
fi
|
||||
|
||||
print_info "Analyzing cache for ${BOLD_WHITE}${repo_id}${RESET}..."
|
||||
|
||||
# Use Python to find unreferenced files
|
||||
local cleanup_result
|
||||
cleanup_result=$(python3 - "$repo_cache_dir" "$comfyui_dir" <<'EOPYCLEANUP'
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
repo_cache = Path(sys.argv[1])
|
||||
comfyui_dir = Path(sys.argv[2])
|
||||
|
||||
# Find latest snapshot
|
||||
snapshots_dir = repo_cache / 'snapshots'
|
||||
if not snapshots_dir.exists():
|
||||
sys.exit(0)
|
||||
|
||||
snapshots = sorted(snapshots_dir.iterdir(), key=lambda x: x.stat().st_mtime, reverse=True)
|
||||
if not snapshots:
|
||||
sys.exit(0)
|
||||
|
||||
latest_snapshot = snapshots[0]
|
||||
|
||||
# Find all files in snapshot
|
||||
all_files = []
|
||||
for file_path in latest_snapshot.rglob('*'):
|
||||
if file_path.is_file():
|
||||
all_files.append(file_path)
|
||||
|
||||
# Check which files have symlinks pointing to them
|
||||
unreferenced_files = []
|
||||
total_size = 0
|
||||
|
||||
for file_path in all_files:
|
||||
# Search for symlinks in ComfyUI models that point to this file
|
||||
has_symlink = False
|
||||
|
||||
# Check all possible symlinks in ComfyUI
|
||||
for comfy_file in comfyui_dir.rglob('*'):
|
||||
if comfy_file.is_symlink():
|
||||
try:
|
||||
if comfy_file.resolve() == file_path:
|
||||
has_symlink = True
|
||||
break
|
||||
except:
|
||||
pass
|
||||
|
||||
if not has_symlink:
|
||||
file_size = file_path.stat().st_size
|
||||
unreferenced_files.append(str(file_path))
|
||||
total_size += file_size
|
||||
|
||||
# Output results
|
||||
print(f"{len(unreferenced_files)}|{total_size}")
|
||||
for f in unreferenced_files:
|
||||
print(f)
|
||||
EOPYCLEANUP
|
||||
)
|
||||
|
||||
# Parse results
|
||||
local first_line
|
||||
first_line=$(echo "$cleanup_result" | head -n 1)
|
||||
local file_count
|
||||
file_count=$(echo "$first_line" | cut -d'|' -f1)
|
||||
local total_bytes
|
||||
total_bytes=$(echo "$first_line" | cut -d'|' -f2)
|
||||
|
||||
if [[ "$file_count" -eq 0 ]]; then
|
||||
print_success "No unreferenced files found - cache is optimal"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Convert bytes to MB
|
||||
local size_mb
|
||||
size_mb=$(echo "scale=2; $total_bytes / 1048576" | bc)
|
||||
|
||||
# Get list of files to delete (skip first line which is summary)
|
||||
local files_to_delete
|
||||
files_to_delete=$(echo "$cleanup_result" | tail -n +2)
|
||||
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
print_warning "DRY-RUN: Would delete ${BOLD_YELLOW}${file_count}${RESET} unreferenced file(s) (~${size_mb} MB)"
|
||||
if [[ -n "$files_to_delete" ]]; then
|
||||
print_detail "Files that would be deleted:"
|
||||
while IFS= read -r file; do
|
||||
local basename
|
||||
basename=$(basename "$file")
|
||||
print_detail " ${CROSS} Would delete: ${DIM}${basename}${RESET}"
|
||||
done <<< "$files_to_delete"
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Actually delete files
|
||||
print_warning "Cleaning up ${BOLD_YELLOW}${file_count}${RESET} unreferenced file(s) (~${size_mb} MB)..."
|
||||
local deleted_count=0
|
||||
while IFS= read -r file; do
|
||||
if [[ -f "$file" ]]; then
|
||||
rm -f "$file" && deleted_count=$((deleted_count+1))
|
||||
fi
|
||||
done <<< "$files_to_delete"
|
||||
|
||||
if [[ $deleted_count -eq $file_count ]]; then
|
||||
print_success "Cleaned up ${deleted_count} file(s), freed ~${size_mb} MB"
|
||||
return 0
|
||||
else
|
||||
print_warning "Cleaned up ${deleted_count}/${file_count} files"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# VERIFICATION FUNCTIONS - Model Status Checking
|
||||
# ============================================================================
|
||||
@@ -1006,6 +1235,11 @@ process_category() {
|
||||
if ! link_model "$repo_id" "$model_type" "$filename" "$file_mappings"; then
|
||||
success=false
|
||||
fi
|
||||
|
||||
# Cleanup unused cache files if enabled
|
||||
if [[ "$CLEANUP_MODE" == true ]] && $success; then
|
||||
cleanup_unused_cache_files "$repo_id" "$CACHE_DIR" "$COMFYUI_DIR"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
@@ -1169,20 +1403,25 @@ main() {
|
||||
echo -e "${DIM}Powered by HuggingFace ${LINK} Configuration-Driven ${STAR}${RESET}\n"
|
||||
fi
|
||||
|
||||
# Show dry-run warning banner
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
echo -e "${BOLD_YELLOW}${WARNING} DRY-RUN MODE - No changes will be made ${WARNING}${RESET}\n"
|
||||
fi
|
||||
|
||||
# Check dependencies
|
||||
check_dependencies
|
||||
|
||||
# Validate configuration
|
||||
validate_config
|
||||
|
||||
# Get all categories
|
||||
# Get all categories (or filtered categories if --category is specified)
|
||||
if [[ -z "$CONFIG_FILE" ]]; then
|
||||
print_error "No configuration file specified. Use -c/--config to provide one."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local categories
|
||||
categories=$(parse_yaml "$CONFIG_FILE" "categories")
|
||||
categories=$(validate_and_get_categories "$CONFIG_FILE")
|
||||
|
||||
if [[ -z "$categories" ]]; then
|
||||
print_error "No model categories found in configuration"
|
||||
@@ -1248,6 +1487,18 @@ while [[ $# -gt 0 ]]; do
|
||||
COMFYUI_DIR="$2"
|
||||
shift 2
|
||||
;;
|
||||
--category|--cat)
|
||||
CATEGORY_FILTER="$2"
|
||||
shift 2
|
||||
;;
|
||||
--cleanup|--clean)
|
||||
CLEANUP_MODE=true
|
||||
shift
|
||||
;;
|
||||
--dry-run|-n)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
download|link|both|verify)
|
||||
COMMAND="$1"
|
||||
shift
|
||||
|
||||
Reference in New Issue
Block a user