feat: add category filter, cleanup mode, and dry-run support to HuggingFace downloader

Add three major features to artifact_huggingface_download.sh:

1. Category filtering (--category flag):
   - Filter models by category (single or comma-separated multiple)
   - Validates categories against YAML configuration
   - Works with all commands (download, link, both, verify)

2. Cleanup mode (--cleanup flag):
   - Removes unreferenced files from HuggingFace cache
   - Only deletes files not referenced by symlinks
   - Per-category cleanup (safe and isolated)
   - Works with link and both commands only

3. Dry-run mode (--dry-run/-n flag):
   - Preview operations without making changes
   - Shows what would be downloaded, linked, or cleaned
   - Includes file counts and size estimates
   - Warning banner to indicate dry-run mode

Additional improvements:
- Updated help text with comprehensive examples
- Added validation for invalid flag combinations
- Enhanced user feedback with detailed preview information

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-25 16:46:44 +01:00
parent f68ee52297
commit 2c71c49893

View File

@@ -3,9 +3,38 @@
# ComfyUI Model Downloader - A Beautiful CLI Tool
# Downloads AI models from HuggingFace and creates symlinks to ComfyUI directories
#
# Usage: ./artifact_comfyui_download.sh [COMMAND] [options]
# Usage: ./artifact_huggingface_download.sh [COMMAND] [OPTIONS]
#
# Commands: download, link, both (default)
# Commands:
# download Download models from HuggingFace to cache directory
# link Create symlinks from cache to ComfyUI models directory
# both Download and link (default)
# verify Verify symlinks in ComfyUI models directory
#
# Options:
# -c, --config FILE Path to YAML configuration file
# --cache-dir DIR HuggingFace cache directory (default: auto-detect)
# --comfyui-dir DIR ComfyUI installation directory
# --filter-repo REPO... Only process specific repositories
# --category CAT1[,CAT2] Filter by category (comma-separated for multiple)
# --cleanup, --clean Remove unused cache files (link/both only)
# --dry-run, -n Show what would be done without making changes
#
# Examples:
# # Download and link all models from config
# ./artifact_huggingface_download.sh both -c models.yaml
#
# # Download only specific categories
# ./artifact_huggingface_download.sh download -c models.yaml --category image_models,video_models
#
# # Link with cleanup (remove unused cache files)
# ./artifact_huggingface_download.sh link -c models.yaml --cleanup
#
# # Dry-run to preview operations
# ./artifact_huggingface_download.sh both -c models.yaml --dry-run
#
# # Process only specific repositories
# ./artifact_huggingface_download.sh both -c models.yaml --filter-repo black-forest-labs/FLUX.1-schnell
#
set -euo pipefail
@@ -111,6 +140,11 @@ fi
# Default command
COMMAND="both"
# Feature flags
CATEGORY_FILTER="" # Empty = all categories, or comma-separated list
CLEANUP_MODE=false # Remove unused files from HuggingFace cache
DRY_RUN=false # Simulate operations without making changes
# HuggingFace token from environment or .env file
# Initialize HF_TOKEN if not set
HF_TOKEN="${HF_TOKEN:-}"
@@ -232,6 +266,49 @@ except Exception as e:
EOPYAML
}
# Validate and get categories to process
validate_and_get_categories() {
local config_file="$1"
# Get all available categories
local all_categories
all_categories=$(parse_yaml "$config_file" "categories")
# If no filter specified, return all categories
if [[ -z "$CATEGORY_FILTER" ]]; then
echo "$all_categories"
return 0
fi
# Split comma-separated categories and validate each
local requested_categories
IFS=',' read -ra requested_categories <<< "$CATEGORY_FILTER"
local validated_categories=()
for requested in "${requested_categories[@]}"; do
# Trim whitespace
requested=$(echo "$requested" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
# Check if category exists
if echo "$all_categories" | grep -q "^${requested}$"; then
validated_categories+=("$requested")
else
print_error "Invalid category: '$requested'"
echo ""
echo "Available categories:"
while IFS= read -r cat; do
echo " - $cat"
done <<< "$all_categories"
exit 1
fi
done
# Print validated categories (one per line)
for cat in "${validated_categories[@]}"; do
echo "$cat"
done
}
# Parse file mappings for a specific model
parse_file_mappings() {
local yaml_file="$1"
@@ -334,6 +411,12 @@ validate_config() {
print_info "Verify mode: HuggingFace token not required"
fi
# Validate flag combinations
if [[ "$CLEANUP_MODE" == true ]] && [[ ! "$COMMAND" =~ ^(link|both)$ ]]; then
print_error "--cleanup can only be used with 'link' or 'both' commands"
exit 1
fi
# Cache directory
if [[ "$COMMAND" == "download" ]] || [[ "$COMMAND" == "both" ]]; then
if [[ ! -d "$CACHE_DIR" ]]; then
@@ -452,6 +535,12 @@ download_model() {
print_detail "Description: ${description}"
print_detail "Size: ${BOLD_YELLOW}${size_gb}GB${RESET}"
# Dry-run mode: skip actual download
if [[ "$DRY_RUN" == true ]]; then
print_info "DRY-RUN: Would download ${BOLD_WHITE}${repo_id}${RESET} (~${size_gb} GB)"
return 0
fi
# Download using Python
python3 - <<EOPYDOWNLOAD
import os
@@ -515,6 +604,22 @@ link_model() {
return 1
fi
# Count files
local file_count
file_count=$(echo "$model_files" | grep -c .)
# Dry-run mode: show what would be linked
if [[ "$DRY_RUN" == true ]]; then
print_info "DRY-RUN: Would link ${BOLD_YELLOW}${file_count}${RESET} file(s) to ${target_dir}/"
print_detail "Files that would be linked:"
while IFS= read -r file; do
local basename_file
basename_file=$(basename "$file")
print_detail " ${LINK} Would link: ${DIM}${basename_file}${RESET}"
done <<< "$model_files"
return 0
fi
local linked_count=0
# If explicit file mappings are provided, use them
@@ -592,6 +697,130 @@ link_model() {
fi
}
# Cleanup unused cache files that aren't symlinked
cleanup_unused_cache_files() {
local repo_id="$1"
local cache_dir="$2"
local comfyui_dir="$3"
# Find the latest snapshot directory for this repo
local repo_cache_dir="${cache_dir}/models--${repo_id/\//-}"
if [[ ! -d "$repo_cache_dir" ]]; then
print_warning "Cache directory not found for $repo_id"
return 1
fi
print_info "Analyzing cache for ${BOLD_WHITE}${repo_id}${RESET}..."
# Use Python to find unreferenced files
local cleanup_result
cleanup_result=$(python3 - "$repo_cache_dir" "$comfyui_dir" <<'EOPYCLEANUP'
import os
import sys
from pathlib import Path
repo_cache = Path(sys.argv[1])
comfyui_dir = Path(sys.argv[2])
# Find latest snapshot
snapshots_dir = repo_cache / 'snapshots'
if not snapshots_dir.exists():
sys.exit(0)
snapshots = sorted(snapshots_dir.iterdir(), key=lambda x: x.stat().st_mtime, reverse=True)
if not snapshots:
sys.exit(0)
latest_snapshot = snapshots[0]
# Find all files in snapshot
all_files = []
for file_path in latest_snapshot.rglob('*'):
if file_path.is_file():
all_files.append(file_path)
# Check which files have symlinks pointing to them
unreferenced_files = []
total_size = 0
for file_path in all_files:
# Search for symlinks in ComfyUI models that point to this file
has_symlink = False
# Check all possible symlinks in ComfyUI
for comfy_file in comfyui_dir.rglob('*'):
if comfy_file.is_symlink():
try:
if comfy_file.resolve() == file_path:
has_symlink = True
break
except:
pass
if not has_symlink:
file_size = file_path.stat().st_size
unreferenced_files.append(str(file_path))
total_size += file_size
# Output results
print(f"{len(unreferenced_files)}|{total_size}")
for f in unreferenced_files:
print(f)
EOPYCLEANUP
)
# Parse results
local first_line
first_line=$(echo "$cleanup_result" | head -n 1)
local file_count
file_count=$(echo "$first_line" | cut -d'|' -f1)
local total_bytes
total_bytes=$(echo "$first_line" | cut -d'|' -f2)
if [[ "$file_count" -eq 0 ]]; then
print_success "No unreferenced files found - cache is optimal"
return 0
fi
# Convert bytes to MB
local size_mb
size_mb=$(echo "scale=2; $total_bytes / 1048576" | bc)
# Get list of files to delete (skip first line which is summary)
local files_to_delete
files_to_delete=$(echo "$cleanup_result" | tail -n +2)
if [[ "$DRY_RUN" == true ]]; then
print_warning "DRY-RUN: Would delete ${BOLD_YELLOW}${file_count}${RESET} unreferenced file(s) (~${size_mb} MB)"
if [[ -n "$files_to_delete" ]]; then
print_detail "Files that would be deleted:"
while IFS= read -r file; do
local basename
basename=$(basename "$file")
print_detail " ${CROSS} Would delete: ${DIM}${basename}${RESET}"
done <<< "$files_to_delete"
fi
return 0
fi
# Actually delete files
print_warning "Cleaning up ${BOLD_YELLOW}${file_count}${RESET} unreferenced file(s) (~${size_mb} MB)..."
local deleted_count=0
while IFS= read -r file; do
if [[ -f "$file" ]]; then
rm -f "$file" && deleted_count=$((deleted_count+1))
fi
done <<< "$files_to_delete"
if [[ $deleted_count -eq $file_count ]]; then
print_success "Cleaned up ${deleted_count} file(s), freed ~${size_mb} MB"
return 0
else
print_warning "Cleaned up ${deleted_count}/${file_count} files"
return 1
fi
}
# ============================================================================
# VERIFICATION FUNCTIONS - Model Status Checking
# ============================================================================
@@ -1006,6 +1235,11 @@ process_category() {
if ! link_model "$repo_id" "$model_type" "$filename" "$file_mappings"; then
success=false
fi
# Cleanup unused cache files if enabled
if [[ "$CLEANUP_MODE" == true ]] && $success; then
cleanup_unused_cache_files "$repo_id" "$CACHE_DIR" "$COMFYUI_DIR"
fi
fi
fi
@@ -1169,20 +1403,25 @@ main() {
echo -e "${DIM}Powered by HuggingFace ${LINK} Configuration-Driven ${STAR}${RESET}\n"
fi
# Show dry-run warning banner
if [[ "$DRY_RUN" == true ]]; then
echo -e "${BOLD_YELLOW}${WARNING} DRY-RUN MODE - No changes will be made ${WARNING}${RESET}\n"
fi
# Check dependencies
check_dependencies
# Validate configuration
validate_config
# Get all categories
# Get all categories (or filtered categories if --category is specified)
if [[ -z "$CONFIG_FILE" ]]; then
print_error "No configuration file specified. Use -c/--config to provide one."
exit 1
fi
local categories
categories=$(parse_yaml "$CONFIG_FILE" "categories")
categories=$(validate_and_get_categories "$CONFIG_FILE")
if [[ -z "$categories" ]]; then
print_error "No model categories found in configuration"
@@ -1248,6 +1487,18 @@ while [[ $# -gt 0 ]]; do
COMFYUI_DIR="$2"
shift 2
;;
--category|--cat)
CATEGORY_FILTER="$2"
shift 2
;;
--cleanup|--clean)
CLEANUP_MODE=true
shift
;;
--dry-run|-n)
DRY_RUN=true
shift
;;
download|link|both|verify)
COMMAND="$1"
shift