feat: add comprehensive verify command to HuggingFace downloader

Added new 'verify' command to artifact_huggingface_download.sh that performs comprehensive health checks on all downloaded models and their symlinks.

Features:
- Download status verification (existence, size, location, timestamps)
- Link status verification (valid, broken, or missing symlinks)
- Size mismatch detection (warns if actual differs >10% from expected)
- Per-model detailed logging with beautiful formatting
- Category-level and global statistics summaries
- Actionable fix suggestions for detected issues
- Disk space usage analysis

New Functions:
- get_model_disk_usage() - Calculate actual model file sizes
- format_bytes() - Human-readable size formatting
- verify_model_download() - Check model download status
- verify_model_links() - Verify symlink integrity
- verify_category() - Process category with verification
- display_verification_summary() - Show global results

Usage:
  artifact_huggingface_download.sh verify -c models.yaml

Output includes:
  ✓ Downloaded/Missing model counts
  ✓ Properly linked/Broken link statistics
  ✓ File sizes and locations
  ✓ Last modified timestamps
  ⚠️ Size mismatch warnings
  📊 Disk space usage per category
  💡 Fix suggestions with exact commands

🤖 Generated with Claude Code (https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-25 13:33:37 +01:00
parent 60fcc359a4
commit 3308349e78

View File

@@ -330,6 +330,8 @@ validate_config() {
exit 1
fi
print_success "HuggingFace token configured: ${DIM}${HF_TOKEN:0:10}...${RESET}"
elif [[ "$COMMAND" == "verify" ]]; then
print_info "Verify mode: HuggingFace token not required"
fi
# Cache directory
@@ -340,12 +342,25 @@ validate_config() {
fi
print_success "Cache directory ready: ${CYAN}${CACHE_DIR}${RESET}"
else
print_info "Cache directory: ${CYAN}${CACHE_DIR}${RESET}"
# For link and verify commands, just show the directory
if [[ -d "$CACHE_DIR" ]]; then
print_success "Cache directory found: ${CYAN}${CACHE_DIR}${RESET}"
else
print_warning "Cache directory not found: ${CYAN}${CACHE_DIR}${RESET}"
fi
fi
# ComfyUI directory
if [[ "$COMMAND" == "link" ]] || [[ "$COMMAND" == "both" ]]; then
print_info "ComfyUI directory: ${CYAN}${COMFYUI_DIR}${RESET}"
if [[ "$COMMAND" == "link" ]] || [[ "$COMMAND" == "both" ]] || [[ "$COMMAND" == "verify" ]]; then
if [[ -d "$COMFYUI_DIR" ]]; then
print_success "ComfyUI directory found: ${CYAN}${COMFYUI_DIR}${RESET}"
else
if [[ "$COMMAND" == "verify" ]]; then
print_warning "ComfyUI directory not found: ${CYAN}${COMFYUI_DIR}${RESET}"
else
print_info "ComfyUI directory: ${CYAN}${COMFYUI_DIR}${RESET}"
fi
fi
fi
}
@@ -555,6 +570,347 @@ link_model() {
fi
}
# ============================================================================
# VERIFICATION FUNCTIONS - Model Status Checking
# ============================================================================
# Get actual disk usage for a model's files
get_model_disk_usage() {
local model_files="$1"
if [[ -z "$model_files" ]]; then
echo "0"
return
fi
local total_bytes=0
while IFS= read -r file_path; do
if [[ -f "$file_path" ]]; then
local file_size
file_size=$(stat -f%z "$file_path" 2>/dev/null || stat -c%s "$file_path" 2>/dev/null || echo "0")
total_bytes=$((total_bytes + file_size))
fi
done <<< "$model_files"
echo "$total_bytes"
}
# Format bytes to human-readable size
format_bytes() {
local bytes="$1"
if (( bytes < 1024 )); then
echo "${bytes} B"
elif (( bytes < 1048576 )); then
echo "$(( bytes / 1024 )) KB"
elif (( bytes < 1073741824 )); then
printf "%.2f MB" "$(bc <<< "scale=2; $bytes / 1048576")"
else
printf "%.2f GB" "$(bc <<< "scale=2; $bytes / 1073741824")"
fi
}
# Verify if model is downloaded
verify_model_download() {
local repo_id="$1"
local expected_size_gb="$2"
local filename_filter="$3"
# Find model files in cache
local model_files
model_files=$(find_model_files "$repo_id" "$filename_filter" 2>/dev/null)
if [[ -z "$model_files" ]]; then
echo "NOT_FOUND|0|0|"
return 1
fi
# Count files
local file_count
file_count=$(echo "$model_files" | wc -l | tr -d ' ')
# Get actual size
local actual_bytes
actual_bytes=$(get_model_disk_usage "$model_files")
# Get cache path (first file's directory)
local cache_path
cache_path=$(echo "$model_files" | head -n1 | xargs dirname)
# Get modification time of first file
local mod_time=""
if [[ -n "$model_files" ]]; then
local first_file
first_file=$(echo "$model_files" | head -n1)
if [[ -f "$first_file" ]]; then
mod_time=$(stat -f "%Sm" -t "%Y-%m-%d %H:%M:%S" "$first_file" 2>/dev/null || \
stat -c "%y" "$first_file" 2>/dev/null | cut -d'.' -f1 || echo "Unknown")
fi
fi
echo "FOUND|${actual_bytes}|${file_count}|${cache_path}|${mod_time}"
return 0
}
# Verify model symlinks
verify_model_links() {
local repo_id="$1"
local model_type="$2"
local filename_filter="$3"
local file_mappings="$4"
local target_dir="${COMFYUI_DIR}/${model_type}"
# If target directory doesn't exist, nothing is linked
if [[ ! -d "$target_dir" ]]; then
echo "NOT_LINKED|0|0|0"
return 1
fi
# Find model files in cache
local model_files
model_files=$(find_model_files "$repo_id" "$filename_filter" 2>/dev/null)
if [[ -z "$model_files" ]]; then
echo "NOT_DOWNLOADED|0|0|0"
return 1
fi
local total_links=0
local valid_links=0
local broken_links=0
local link_details=""
# Check if explicit file mappings exist
if [[ -n "$file_mappings" ]]; then
while IFS='|' read -r source_pattern dest_filename; do
if [[ -z "$source_pattern" ]]; then
continue
fi
total_links=$((total_links + 1))
local link_path="${target_dir}/${dest_filename}"
if [[ -L "$link_path" ]]; then
# Symlink exists, check if it's valid
if [[ -e "$link_path" ]]; then
valid_links=$((valid_links + 1))
local link_target
link_target=$(readlink "$link_path")
link_details="${link_details}VALID|${dest_filename}|${link_target}\n"
else
broken_links=$((broken_links + 1))
local link_target
link_target=$(readlink "$link_path")
link_details="${link_details}BROKEN|${dest_filename}|${link_target}\n"
fi
else
link_details="${link_details}MISSING|${dest_filename}|\n"
fi
done <<< "$file_mappings"
else
# No explicit mappings, check automatic prefixed filenames
local model_name
model_name=$(echo "$repo_id" | sed 's/.*\///')
while IFS= read -r source_file; do
if [[ -f "$source_file" ]]; then
local filename
filename=$(basename "$source_file")
local prefixed_filename="${model_name}-${filename}"
total_links=$((total_links + 1))
local link_path="${target_dir}/${prefixed_filename}"
if [[ -L "$link_path" ]]; then
if [[ -e "$link_path" ]]; then
valid_links=$((valid_links + 1))
local link_target
link_target=$(readlink "$link_path")
link_details="${link_details}VALID|${prefixed_filename}|${link_target}\n"
else
broken_links=$((broken_links + 1))
local link_target
link_target=$(readlink "$link_path")
link_details="${link_details}BROKEN|${prefixed_filename}|${link_target}\n"
fi
else
link_details="${link_details}MISSING|${prefixed_filename}|\n"
fi
fi
done <<< "$model_files"
fi
echo -e "CHECKED|${total_links}|${valid_links}|${broken_links}\n${link_details}"
return 0
}
# Verify models by category
verify_category() {
local category="$1"
local category_display="$2"
print_section "${category_display}"
# Get models for this category
local models_data
models_data=$(parse_yaml "$CONFIG_FILE" "$category")
if [[ -z "$models_data" ]]; then
print_warning "No models found in category: ${category}"
return 0
fi
local total_models
total_models=$(echo "$models_data" | wc -l)
local current=0
local models_downloaded=0
local models_missing=0
local models_linked=0
local models_broken=0
local models_not_linked=0
local total_size_bytes=0
local expected_size_bytes=0
while IFS='|' read -r repo_id description size_gb essential model_type filename; do
current=$((current+1))
echo ""
print_step "$current" "$total_models" "${BOLD_MAGENTA}${description}${RESET}"
print_detail "Repository: ${BOLD_WHITE}${repo_id}${RESET}"
print_detail "Category: ${CYAN}${category}${RESET} ${ARROW_RIGHT} ${CYAN}${model_type}${RESET}"
print_detail "Expected Size: ${BOLD_YELLOW}${size_gb} GB${RESET}"
expected_size_bytes=$((expected_size_bytes + $(echo "$size_gb * 1073741824" | bc | cut -d'.' -f1)))
# Verify download status
echo ""
local download_result
download_result=$(verify_model_download "$repo_id" "$size_gb" "$filename")
local download_status
download_status=$(echo "$download_result" | cut -d'|' -f1)
if [[ "$download_status" == "FOUND" ]]; then
local actual_bytes
actual_bytes=$(echo "$download_result" | cut -d'|' -f2)
local file_count
file_count=$(echo "$download_result" | cut -d'|' -f3)
local cache_path
cache_path=$(echo "$download_result" | cut -d'|' -f4)
local mod_time
mod_time=$(echo "$download_result" | cut -d'|' -f5)
total_size_bytes=$((total_size_bytes + actual_bytes))
local actual_size_human
actual_size_human=$(format_bytes "$actual_bytes")
print_success "Download Status: ${BOLD_GREEN}DOWNLOADED${RESET}"
print_detail "${DIM}Path: ${cache_path}${RESET}"
print_detail "${DIM}Actual Size: ${actual_size_human} (${actual_bytes} bytes)${RESET}"
print_detail "${DIM}Files: ${file_count} file(s)${RESET}"
print_detail "${DIM}Modified: ${mod_time}${RESET}"
# Check for size mismatch
local expected_bytes
expected_bytes=$(echo "$size_gb * 1073741824" | bc | cut -d'.' -f1)
local size_diff_pct
size_diff_pct=$(echo "scale=2; (($actual_bytes - $expected_bytes) / $expected_bytes) * 100" | bc | sed 's/^\./0./')
local abs_size_diff_pct
abs_size_diff_pct=${size_diff_pct#-}
if (( $(echo "$abs_size_diff_pct > 10" | bc -l) )); then
print_warning "Size mismatch: ${size_diff_pct}% difference from expected"
fi
models_downloaded=$((models_downloaded+1))
# Verify link status
echo ""
local file_mappings
file_mappings=$(parse_file_mappings "$CONFIG_FILE" "$category" "$repo_id")
local link_result
link_result=$(verify_model_links "$repo_id" "$model_type" "$filename" "$file_mappings")
local first_line
first_line=$(echo -e "$link_result" | head -n1)
local link_status
link_status=$(echo "$first_line" | cut -d'|' -f1)
if [[ "$link_status" == "CHECKED" ]]; then
local total_links
total_links=$(echo "$first_line" | cut -d'|' -f2)
local valid_links
valid_links=$(echo "$first_line" | cut -d'|' -f3)
local broken_links
broken_links=$(echo "$first_line" | cut -d'|' -f4)
if [[ $broken_links -gt 0 ]]; then
print_warning "Link Status: ${BOLD_YELLOW}${broken_links} BROKEN LINK(S)${RESET}"
models_broken=$((models_broken+1))
elif [[ $valid_links -eq $total_links ]] && [[ $total_links -gt 0 ]]; then
print_success "Link Status: ${BOLD_GREEN}LINKED${RESET} (${valid_links}/${total_links})"
models_linked=$((models_linked+1))
else
print_warning "Link Status: ${BOLD_YELLOW}PARTIALLY LINKED${RESET} (${valid_links}/${total_links})"
models_not_linked=$((models_not_linked+1))
fi
# Show link details
local link_details
link_details=$(echo -e "$link_result" | tail -n +2)
if [[ -n "$link_details" ]]; then
while IFS='|' read -r link_state link_name link_target; do
if [[ -z "$link_state" ]]; then
continue
fi
case "$link_state" in
VALID)
print_detail "${LINK} ${BOLD_GREEN}${RESET} ${DIM}${link_name}${RESET}"
;;
BROKEN)
print_detail "${LINK} ${BOLD_RED}${RESET} ${DIM}${link_name}${RESET} ${BOLD_RED}(BROKEN)${RESET}"
;;
MISSING)
print_detail "${LINK} ${BOLD_YELLOW}${RESET} ${DIM}${link_name}${RESET} ${BOLD_YELLOW}(NOT LINKED)${RESET}"
;;
esac
done <<< "$link_details"
fi
else
print_error "Link Status: ${BOLD_RED}NOT LINKED${RESET}"
models_not_linked=$((models_not_linked+1))
fi
else
print_error "Download Status: ${BOLD_RED}NOT DOWNLOADED${RESET}"
models_missing=$((models_missing+1))
echo ""
print_info "Link Status: ${DIM}N/A (model not downloaded)${RESET}"
fi
show_progress "$current" "$total_models"
done <<< "$models_data"
echo -e "\n"
# Category summary
local total_size_human
total_size_human=$(format_bytes "$total_size_bytes")
local expected_size_human
expected_size_human=$(format_bytes "$expected_size_bytes")
print_info "Category Summary:"
echo -e " ${BOLD_WHITE}Total Models:${RESET} ${total_models}"
echo -e " ${BOLD_GREEN}✓ Downloaded:${RESET} ${models_downloaded} ($(( models_downloaded * 100 / total_models ))%)"
echo -e " ${BOLD_RED}✗ Missing:${RESET} ${models_missing} ($(( models_missing * 100 / total_models ))%)"
echo -e " ${BOLD_GREEN}✓ Properly Linked:${RESET} ${models_linked}"
echo -e " ${BOLD_YELLOW}⚠ Broken Links:${RESET} ${models_broken}"
echo -e " ${BOLD_YELLOW}○ Not Linked:${RESET} ${models_not_linked}"
echo -e " ${BOLD_CYAN}📊 Disk Usage:${RESET} ${total_size_human} / ${expected_size_human} expected"
# Return statistics for global summary (format: downloaded|missing|linked|broken|not_linked|total_size|expected_size)
echo "${models_downloaded}|${models_missing}|${models_linked}|${models_broken}|${models_not_linked}|${total_size_bytes}|${expected_size_bytes}" > /tmp/verify_stats_${category}
}
# Process models by category
process_category() {
local category="$1"
@@ -619,6 +975,106 @@ process_category() {
print_info "Category Summary: ${BOLD_GREEN}${succeeded} succeeded${RESET}, ${BOLD_RED}${failed} failed${RESET}"
}
# Display verification summary
display_verification_summary() {
local start_time="$1"
local end_time="$2"
local categories="$3"
local duration=$((end_time - start_time))
local minutes=$((duration / 60))
local seconds=$((duration % 60))
# Aggregate statistics from all categories
local total_models=0
local total_downloaded=0
local total_missing=0
local total_linked=0
local total_broken=0
local total_not_linked=0
local total_size_bytes=0
local expected_size_bytes=0
while IFS= read -r category; do
if [[ -f "/tmp/verify_stats_${category}" ]]; then
local stats
stats=$(cat "/tmp/verify_stats_${category}")
local downloaded
downloaded=$(echo "$stats" | cut -d'|' -f1)
local missing
missing=$(echo "$stats" | cut -d'|' -f2)
local linked
linked=$(echo "$stats" | cut -d'|' -f3)
local broken
broken=$(echo "$stats" | cut -d'|' -f4)
local not_linked
not_linked=$(echo "$stats" | cut -d'|' -f5)
local size_bytes
size_bytes=$(echo "$stats" | cut -d'|' -f6)
local expected_bytes
expected_bytes=$(echo "$stats" | cut -d'|' -f7)
total_models=$((total_models + downloaded + missing))
total_downloaded=$((total_downloaded + downloaded))
total_missing=$((total_missing + missing))
total_linked=$((total_linked + linked))
total_broken=$((total_broken + broken))
total_not_linked=$((total_not_linked + not_linked))
total_size_bytes=$((total_size_bytes + size_bytes))
expected_size_bytes=$((expected_size_bytes + expected_bytes))
rm -f "/tmp/verify_stats_${category}"
fi
done <<< "$categories"
local total_size_human
total_size_human=$(format_bytes "$total_size_bytes")
local expected_size_human
expected_size_human=$(format_bytes "$expected_size_bytes")
print_banner "VERIFICATION COMPLETE"
echo -e "${BOLD_CYAN}${STAR} Global Summary${RESET}"
echo -e "${CYAN}$(printf '%.0s'"${BOX_DOUBLE}" $(seq 1 80))${RESET}"
echo -e " ${BOLD_WHITE}Total Models:${RESET} ${total_models}"
echo ""
echo -e " ${BOLD_GREEN}✓ Downloaded:${RESET} ${total_downloaded} ($(( total_models > 0 ? total_downloaded * 100 / total_models : 0 ))%)"
echo -e " ${BOLD_RED}✗ Missing:${RESET} ${total_missing} ($(( total_models > 0 ? total_missing * 100 / total_models : 0 ))%)"
echo ""
echo -e " ${BOLD_GREEN}✓ Properly Linked:${RESET} ${total_linked} ($(( total_models > 0 ? total_linked * 100 / total_models : 0 ))%)"
echo -e " ${BOLD_YELLOW}⚠ Broken Links:${RESET} ${total_broken} ($(( total_models > 0 ? total_broken * 100 / total_models : 0 ))%)"
echo -e " ${BOLD_YELLOW}○ Not Linked:${RESET} ${total_not_linked} ($(( total_models > 0 ? total_not_linked * 100 / total_models : 0 ))%)"
echo ""
echo -e " ${BOLD_CYAN}📊 Disk Space Used:${RESET} ${total_size_human} / ${expected_size_human} expected"
echo -e " ${BOLD_WHITE}Cache Directory:${RESET} ${CYAN}${CACHE_DIR}${RESET}"
echo -e " ${BOLD_WHITE}ComfyUI Directory:${RESET} ${CYAN}${COMFYUI_DIR}${RESET}"
echo -e " ${BOLD_WHITE}Duration:${RESET} ${BOLD_YELLOW}${minutes}m ${seconds}s${RESET}"
echo -e "${CYAN}$(printf '%.0s'"${BOX_DOUBLE}" $(seq 1 80))${RESET}"
# Provide actionable suggestions
if [[ $total_missing -gt 0 ]] || [[ $total_broken -gt 0 ]] || [[ $total_not_linked -gt 0 ]]; then
echo -e "\n${BOLD_YELLOW}${WARNING} Issues Found - Suggested Actions:${RESET}\n"
if [[ $total_missing -gt 0 ]]; then
echo -e " ${BOLD_RED}${RESET} ${total_missing} model(s) not downloaded"
echo -e " ${DIM}Fix:${RESET} ${CYAN}$0 download -c ${CONFIG_FILE}${RESET}"
fi
if [[ $total_broken -gt 0 ]] || [[ $total_not_linked -gt 0 ]]; then
echo -e " ${BOLD_YELLOW}${RESET} $(( total_broken + total_not_linked )) model(s) with link issues"
echo -e " ${DIM}Fix:${RESET} ${CYAN}$0 link -c ${CONFIG_FILE}${RESET}"
fi
if [[ $total_missing -gt 0 ]] && [[ $(( total_broken + total_not_linked )) -gt 0 ]]; then
echo -e " ${BOLD_CYAN}${RESET} Fix everything in one go:"
echo -e " ${CYAN}$0 both -c ${CONFIG_FILE}${RESET}"
fi
echo ""
else
echo -e "\n${BOLD_GREEN}${SPARKLES} All models verified successfully! ${SPARKLES}${RESET}\n"
fi
}
# Display summary
display_summary() {
local start_time="$1"
@@ -656,10 +1112,15 @@ main() {
start_time=$(date +%s)
# Display beautiful banner
print_banner "${ROCKET} ComfyUI Model Downloader ${ROCKET}"
echo -e "${BOLD_CYAN}A Beautiful CLI Tool for Downloading AI Models${RESET}"
echo -e "${DIM}Powered by HuggingFace ${LINK} Configuration-Driven ${STAR}${RESET}\n"
if [[ "$COMMAND" == "verify" ]]; then
print_banner "${SPARKLES} ComfyUI Model Verification ${SPARKLES}"
echo -e "${BOLD_CYAN}Comprehensive Model Status Check${RESET}"
echo -e "${DIM}Verify Downloads & Links ${LINK} Configuration-Driven ${STAR}${RESET}\n"
else
print_banner "${ROCKET} ComfyUI Model Downloader ${ROCKET}"
echo -e "${BOLD_CYAN}A Beautiful CLI Tool for Downloading AI Models${RESET}"
echo -e "${DIM}Powered by HuggingFace ${LINK} Configuration-Driven ${STAR}${RESET}\n"
fi
# Check dependencies
check_dependencies
@@ -681,25 +1142,43 @@ main() {
exit 1
fi
local total_succeeded=0
local total_failed=0
# Handle verify command separately
if [[ "$COMMAND" == "verify" ]]; then
# Process each category with verification
while IFS= read -r category; do
# Get category display name (capitalize and add spaces)
local category_display
category_display=$(echo "$category" | sed 's/_/ /g' | sed 's/\b\(.\)/\u\1/g')
# Process each category
while IFS= read -r category; do
# Get category display name (capitalize and add spaces)
local category_display
category_display=$(echo "$category" | sed 's/_/ /g' | sed 's/\b\(.\)/\u\1/g')
verify_category "$category" "$category_display"
done <<< "$categories"
process_category "$category" "$category_display"
# Display verification summary
local end_time
end_time=$(date +%s)
display_verification_summary "$start_time" "$end_time" "$categories"
else
# Original download/link logic
local total_succeeded=0
local total_failed=0
# Update counters (this is simplified, you'd need to track actual numbers)
total_succeeded=$((total_succeeded+1))
done <<< "$categories"
# Process each category
while IFS= read -r category; do
# Get category display name (capitalize and add spaces)
local category_display
category_display=$(echo "$category" | sed 's/_/ /g' | sed 's/\b\(.\)/\u\1/g')
# Display summary
local end_time
end_time=$(date +%s)
display_summary "$start_time" "$end_time" "$total_succeeded" "$total_failed"
process_category "$category" "$category_display"
# Update counters (this is simplified, you'd need to track actual numbers)
total_succeeded=$((total_succeeded+1))
done <<< "$categories"
# Display summary
local end_time
end_time=$(date +%s)
display_summary "$start_time" "$end_time" "$total_succeeded" "$total_failed"
fi
}
# ============================================================================
@@ -722,7 +1201,7 @@ while [[ $# -gt 0 ]]; do
COMFYUI_DIR="$2"
shift 2
;;
download|link|both)
download|link|both|verify)
COMMAND="$1"
shift
;;
@@ -730,12 +1209,13 @@ while [[ $# -gt 0 ]]; do
echo "Usage: $0 [COMMAND] [options]"
echo ""
echo "Commands:"
echo " download Download models only (default: both)"
echo " download Download models only"
echo " link Create symlinks only (models must already be downloaded)"
echo " both Download and create symlinks (default)"
echo " verify Verify model downloads and links (read-only)"
echo ""
echo "Options:"
echo " -c, --config FILE Configuration file (default: NONE)"
echo " -c, --config FILE Configuration file (required)"
echo " --cache-dir DIR Cache directory (default: auto-detect)"
echo " RunPod: /workspace/huggingface_cache"
echo " Local: ~/.cache/huggingface"
@@ -746,8 +1226,17 @@ while [[ $# -gt 0 ]]; do
echo ""
echo "Examples:"
echo " $0 download -c models.yaml"
echo " $0 link --comfyui-dir /opt/ComfyUI/models"
echo " $0 link --comfyui-dir /opt/ComfyUI/models -c models.yaml"
echo " $0 both -c models.yaml --cache-dir /data/hf-cache"
echo " $0 verify -c models.yaml"
echo ""
echo "Verify Command:"
echo " The verify command performs comprehensive checks on all models:"
echo " - Download status (file existence, size, location)"
echo " - Link status (valid, broken, or missing symlinks)"
echo " - Size mismatch warnings (actual vs expected)"
echo " - Disk space usage statistics"
echo " - Actionable fix suggestions"
exit 0
;;
-*)