feat: add category filter, cleanup mode, and dry-run support to HuggingFace downloader
Add three major features to artifact_huggingface_download.sh: 1. Category filtering (--category flag): - Filter models by category (single or comma-separated multiple) - Validates categories against YAML configuration - Works with all commands (download, link, both, verify) 2. Cleanup mode (--cleanup flag): - Removes unreferenced files from HuggingFace cache - Only deletes files not referenced by symlinks - Per-category cleanup (safe and isolated) - Works with link and both commands only 3. Dry-run mode (--dry-run/-n flag): - Preview operations without making changes - Shows what would be downloaded, linked, or cleaned - Includes file counts and size estimates - Warning banner to indicate dry-run mode Additional improvements: - Updated help text with comprehensive examples - Added validation for invalid flag combinations - Enhanced user feedback with detailed preview information 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -3,9 +3,38 @@
|
|||||||
# ComfyUI Model Downloader - A Beautiful CLI Tool
|
# ComfyUI Model Downloader - A Beautiful CLI Tool
|
||||||
# Downloads AI models from HuggingFace and creates symlinks to ComfyUI directories
|
# Downloads AI models from HuggingFace and creates symlinks to ComfyUI directories
|
||||||
#
|
#
|
||||||
# Usage: ./artifact_comfyui_download.sh [COMMAND] [options]
|
# Usage: ./artifact_huggingface_download.sh [COMMAND] [OPTIONS]
|
||||||
#
|
#
|
||||||
# Commands: download, link, both (default)
|
# Commands:
|
||||||
|
# download Download models from HuggingFace to cache directory
|
||||||
|
# link Create symlinks from cache to ComfyUI models directory
|
||||||
|
# both Download and link (default)
|
||||||
|
# verify Verify symlinks in ComfyUI models directory
|
||||||
|
#
|
||||||
|
# Options:
|
||||||
|
# -c, --config FILE Path to YAML configuration file
|
||||||
|
# --cache-dir DIR HuggingFace cache directory (default: auto-detect)
|
||||||
|
# --comfyui-dir DIR ComfyUI installation directory
|
||||||
|
# --filter-repo REPO... Only process specific repositories
|
||||||
|
# --category CAT1[,CAT2] Filter by category (comma-separated for multiple)
|
||||||
|
# --cleanup, --clean Remove unused cache files (link/both only)
|
||||||
|
# --dry-run, -n Show what would be done without making changes
|
||||||
|
#
|
||||||
|
# Examples:
|
||||||
|
# # Download and link all models from config
|
||||||
|
# ./artifact_huggingface_download.sh both -c models.yaml
|
||||||
|
#
|
||||||
|
# # Download only specific categories
|
||||||
|
# ./artifact_huggingface_download.sh download -c models.yaml --category image_models,video_models
|
||||||
|
#
|
||||||
|
# # Link with cleanup (remove unused cache files)
|
||||||
|
# ./artifact_huggingface_download.sh link -c models.yaml --cleanup
|
||||||
|
#
|
||||||
|
# # Dry-run to preview operations
|
||||||
|
# ./artifact_huggingface_download.sh both -c models.yaml --dry-run
|
||||||
|
#
|
||||||
|
# # Process only specific repositories
|
||||||
|
# ./artifact_huggingface_download.sh both -c models.yaml --filter-repo black-forest-labs/FLUX.1-schnell
|
||||||
#
|
#
|
||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
@@ -111,6 +140,11 @@ fi
|
|||||||
# Default command
|
# Default command
|
||||||
COMMAND="both"
|
COMMAND="both"
|
||||||
|
|
||||||
|
# Feature flags
|
||||||
|
CATEGORY_FILTER="" # Empty = all categories, or comma-separated list
|
||||||
|
CLEANUP_MODE=false # Remove unused files from HuggingFace cache
|
||||||
|
DRY_RUN=false # Simulate operations without making changes
|
||||||
|
|
||||||
# HuggingFace token from environment or .env file
|
# HuggingFace token from environment or .env file
|
||||||
# Initialize HF_TOKEN if not set
|
# Initialize HF_TOKEN if not set
|
||||||
HF_TOKEN="${HF_TOKEN:-}"
|
HF_TOKEN="${HF_TOKEN:-}"
|
||||||
@@ -232,6 +266,49 @@ except Exception as e:
|
|||||||
EOPYAML
|
EOPYAML
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Validate and get categories to process
|
||||||
|
validate_and_get_categories() {
|
||||||
|
local config_file="$1"
|
||||||
|
|
||||||
|
# Get all available categories
|
||||||
|
local all_categories
|
||||||
|
all_categories=$(parse_yaml "$config_file" "categories")
|
||||||
|
|
||||||
|
# If no filter specified, return all categories
|
||||||
|
if [[ -z "$CATEGORY_FILTER" ]]; then
|
||||||
|
echo "$all_categories"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Split comma-separated categories and validate each
|
||||||
|
local requested_categories
|
||||||
|
IFS=',' read -ra requested_categories <<< "$CATEGORY_FILTER"
|
||||||
|
|
||||||
|
local validated_categories=()
|
||||||
|
for requested in "${requested_categories[@]}"; do
|
||||||
|
# Trim whitespace
|
||||||
|
requested=$(echo "$requested" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
|
||||||
|
|
||||||
|
# Check if category exists
|
||||||
|
if echo "$all_categories" | grep -q "^${requested}$"; then
|
||||||
|
validated_categories+=("$requested")
|
||||||
|
else
|
||||||
|
print_error "Invalid category: '$requested'"
|
||||||
|
echo ""
|
||||||
|
echo "Available categories:"
|
||||||
|
while IFS= read -r cat; do
|
||||||
|
echo " - $cat"
|
||||||
|
done <<< "$all_categories"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# Print validated categories (one per line)
|
||||||
|
for cat in "${validated_categories[@]}"; do
|
||||||
|
echo "$cat"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
# Parse file mappings for a specific model
|
# Parse file mappings for a specific model
|
||||||
parse_file_mappings() {
|
parse_file_mappings() {
|
||||||
local yaml_file="$1"
|
local yaml_file="$1"
|
||||||
@@ -334,6 +411,12 @@ validate_config() {
|
|||||||
print_info "Verify mode: HuggingFace token not required"
|
print_info "Verify mode: HuggingFace token not required"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Validate flag combinations
|
||||||
|
if [[ "$CLEANUP_MODE" == true ]] && [[ ! "$COMMAND" =~ ^(link|both)$ ]]; then
|
||||||
|
print_error "--cleanup can only be used with 'link' or 'both' commands"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
# Cache directory
|
# Cache directory
|
||||||
if [[ "$COMMAND" == "download" ]] || [[ "$COMMAND" == "both" ]]; then
|
if [[ "$COMMAND" == "download" ]] || [[ "$COMMAND" == "both" ]]; then
|
||||||
if [[ ! -d "$CACHE_DIR" ]]; then
|
if [[ ! -d "$CACHE_DIR" ]]; then
|
||||||
@@ -452,6 +535,12 @@ download_model() {
|
|||||||
print_detail "Description: ${description}"
|
print_detail "Description: ${description}"
|
||||||
print_detail "Size: ${BOLD_YELLOW}${size_gb}GB${RESET}"
|
print_detail "Size: ${BOLD_YELLOW}${size_gb}GB${RESET}"
|
||||||
|
|
||||||
|
# Dry-run mode: skip actual download
|
||||||
|
if [[ "$DRY_RUN" == true ]]; then
|
||||||
|
print_info "DRY-RUN: Would download ${BOLD_WHITE}${repo_id}${RESET} (~${size_gb} GB)"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
# Download using Python
|
# Download using Python
|
||||||
python3 - <<EOPYDOWNLOAD
|
python3 - <<EOPYDOWNLOAD
|
||||||
import os
|
import os
|
||||||
@@ -515,6 +604,22 @@ link_model() {
|
|||||||
return 1
|
return 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Count files
|
||||||
|
local file_count
|
||||||
|
file_count=$(echo "$model_files" | grep -c .)
|
||||||
|
|
||||||
|
# Dry-run mode: show what would be linked
|
||||||
|
if [[ "$DRY_RUN" == true ]]; then
|
||||||
|
print_info "DRY-RUN: Would link ${BOLD_YELLOW}${file_count}${RESET} file(s) to ${target_dir}/"
|
||||||
|
print_detail "Files that would be linked:"
|
||||||
|
while IFS= read -r file; do
|
||||||
|
local basename_file
|
||||||
|
basename_file=$(basename "$file")
|
||||||
|
print_detail " ${LINK} Would link: ${DIM}${basename_file}${RESET}"
|
||||||
|
done <<< "$model_files"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
local linked_count=0
|
local linked_count=0
|
||||||
|
|
||||||
# If explicit file mappings are provided, use them
|
# If explicit file mappings are provided, use them
|
||||||
@@ -592,6 +697,130 @@ link_model() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Cleanup unused cache files that aren't symlinked
|
||||||
|
cleanup_unused_cache_files() {
|
||||||
|
local repo_id="$1"
|
||||||
|
local cache_dir="$2"
|
||||||
|
local comfyui_dir="$3"
|
||||||
|
|
||||||
|
# Find the latest snapshot directory for this repo
|
||||||
|
local repo_cache_dir="${cache_dir}/models--${repo_id/\//-}"
|
||||||
|
if [[ ! -d "$repo_cache_dir" ]]; then
|
||||||
|
print_warning "Cache directory not found for $repo_id"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
print_info "Analyzing cache for ${BOLD_WHITE}${repo_id}${RESET}..."
|
||||||
|
|
||||||
|
# Use Python to find unreferenced files
|
||||||
|
local cleanup_result
|
||||||
|
cleanup_result=$(python3 - "$repo_cache_dir" "$comfyui_dir" <<'EOPYCLEANUP'
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
repo_cache = Path(sys.argv[1])
|
||||||
|
comfyui_dir = Path(sys.argv[2])
|
||||||
|
|
||||||
|
# Find latest snapshot
|
||||||
|
snapshots_dir = repo_cache / 'snapshots'
|
||||||
|
if not snapshots_dir.exists():
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
snapshots = sorted(snapshots_dir.iterdir(), key=lambda x: x.stat().st_mtime, reverse=True)
|
||||||
|
if not snapshots:
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
latest_snapshot = snapshots[0]
|
||||||
|
|
||||||
|
# Find all files in snapshot
|
||||||
|
all_files = []
|
||||||
|
for file_path in latest_snapshot.rglob('*'):
|
||||||
|
if file_path.is_file():
|
||||||
|
all_files.append(file_path)
|
||||||
|
|
||||||
|
# Check which files have symlinks pointing to them
|
||||||
|
unreferenced_files = []
|
||||||
|
total_size = 0
|
||||||
|
|
||||||
|
for file_path in all_files:
|
||||||
|
# Search for symlinks in ComfyUI models that point to this file
|
||||||
|
has_symlink = False
|
||||||
|
|
||||||
|
# Check all possible symlinks in ComfyUI
|
||||||
|
for comfy_file in comfyui_dir.rglob('*'):
|
||||||
|
if comfy_file.is_symlink():
|
||||||
|
try:
|
||||||
|
if comfy_file.resolve() == file_path:
|
||||||
|
has_symlink = True
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not has_symlink:
|
||||||
|
file_size = file_path.stat().st_size
|
||||||
|
unreferenced_files.append(str(file_path))
|
||||||
|
total_size += file_size
|
||||||
|
|
||||||
|
# Output results
|
||||||
|
print(f"{len(unreferenced_files)}|{total_size}")
|
||||||
|
for f in unreferenced_files:
|
||||||
|
print(f)
|
||||||
|
EOPYCLEANUP
|
||||||
|
)
|
||||||
|
|
||||||
|
# Parse results
|
||||||
|
local first_line
|
||||||
|
first_line=$(echo "$cleanup_result" | head -n 1)
|
||||||
|
local file_count
|
||||||
|
file_count=$(echo "$first_line" | cut -d'|' -f1)
|
||||||
|
local total_bytes
|
||||||
|
total_bytes=$(echo "$first_line" | cut -d'|' -f2)
|
||||||
|
|
||||||
|
if [[ "$file_count" -eq 0 ]]; then
|
||||||
|
print_success "No unreferenced files found - cache is optimal"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Convert bytes to MB
|
||||||
|
local size_mb
|
||||||
|
size_mb=$(echo "scale=2; $total_bytes / 1048576" | bc)
|
||||||
|
|
||||||
|
# Get list of files to delete (skip first line which is summary)
|
||||||
|
local files_to_delete
|
||||||
|
files_to_delete=$(echo "$cleanup_result" | tail -n +2)
|
||||||
|
|
||||||
|
if [[ "$DRY_RUN" == true ]]; then
|
||||||
|
print_warning "DRY-RUN: Would delete ${BOLD_YELLOW}${file_count}${RESET} unreferenced file(s) (~${size_mb} MB)"
|
||||||
|
if [[ -n "$files_to_delete" ]]; then
|
||||||
|
print_detail "Files that would be deleted:"
|
||||||
|
while IFS= read -r file; do
|
||||||
|
local basename
|
||||||
|
basename=$(basename "$file")
|
||||||
|
print_detail " ${CROSS} Would delete: ${DIM}${basename}${RESET}"
|
||||||
|
done <<< "$files_to_delete"
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Actually delete files
|
||||||
|
print_warning "Cleaning up ${BOLD_YELLOW}${file_count}${RESET} unreferenced file(s) (~${size_mb} MB)..."
|
||||||
|
local deleted_count=0
|
||||||
|
while IFS= read -r file; do
|
||||||
|
if [[ -f "$file" ]]; then
|
||||||
|
rm -f "$file" && deleted_count=$((deleted_count+1))
|
||||||
|
fi
|
||||||
|
done <<< "$files_to_delete"
|
||||||
|
|
||||||
|
if [[ $deleted_count -eq $file_count ]]; then
|
||||||
|
print_success "Cleaned up ${deleted_count} file(s), freed ~${size_mb} MB"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
print_warning "Cleaned up ${deleted_count}/${file_count} files"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# VERIFICATION FUNCTIONS - Model Status Checking
|
# VERIFICATION FUNCTIONS - Model Status Checking
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@@ -1006,6 +1235,11 @@ process_category() {
|
|||||||
if ! link_model "$repo_id" "$model_type" "$filename" "$file_mappings"; then
|
if ! link_model "$repo_id" "$model_type" "$filename" "$file_mappings"; then
|
||||||
success=false
|
success=false
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Cleanup unused cache files if enabled
|
||||||
|
if [[ "$CLEANUP_MODE" == true ]] && $success; then
|
||||||
|
cleanup_unused_cache_files "$repo_id" "$CACHE_DIR" "$COMFYUI_DIR"
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -1169,20 +1403,25 @@ main() {
|
|||||||
echo -e "${DIM}Powered by HuggingFace ${LINK} Configuration-Driven ${STAR}${RESET}\n"
|
echo -e "${DIM}Powered by HuggingFace ${LINK} Configuration-Driven ${STAR}${RESET}\n"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Show dry-run warning banner
|
||||||
|
if [[ "$DRY_RUN" == true ]]; then
|
||||||
|
echo -e "${BOLD_YELLOW}${WARNING} DRY-RUN MODE - No changes will be made ${WARNING}${RESET}\n"
|
||||||
|
fi
|
||||||
|
|
||||||
# Check dependencies
|
# Check dependencies
|
||||||
check_dependencies
|
check_dependencies
|
||||||
|
|
||||||
# Validate configuration
|
# Validate configuration
|
||||||
validate_config
|
validate_config
|
||||||
|
|
||||||
# Get all categories
|
# Get all categories (or filtered categories if --category is specified)
|
||||||
if [[ -z "$CONFIG_FILE" ]]; then
|
if [[ -z "$CONFIG_FILE" ]]; then
|
||||||
print_error "No configuration file specified. Use -c/--config to provide one."
|
print_error "No configuration file specified. Use -c/--config to provide one."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
local categories
|
local categories
|
||||||
categories=$(parse_yaml "$CONFIG_FILE" "categories")
|
categories=$(validate_and_get_categories "$CONFIG_FILE")
|
||||||
|
|
||||||
if [[ -z "$categories" ]]; then
|
if [[ -z "$categories" ]]; then
|
||||||
print_error "No model categories found in configuration"
|
print_error "No model categories found in configuration"
|
||||||
@@ -1248,6 +1487,18 @@ while [[ $# -gt 0 ]]; do
|
|||||||
COMFYUI_DIR="$2"
|
COMFYUI_DIR="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
|
--category|--cat)
|
||||||
|
CATEGORY_FILTER="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--cleanup|--clean)
|
||||||
|
CLEANUP_MODE=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--dry-run|-n)
|
||||||
|
DRY_RUN=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
download|link|both|verify)
|
download|link|both|verify)
|
||||||
COMMAND="$1"
|
COMMAND="$1"
|
||||||
shift
|
shift
|
||||||
|
|||||||
Reference in New Issue
Block a user