diff --git a/artifact_huggingface_download.sh b/artifact_huggingface_download.sh index c607a06..86a9e3c 100755 --- a/artifact_huggingface_download.sh +++ b/artifact_huggingface_download.sh @@ -255,9 +255,8 @@ try: description = model.get('description', '') size_gb = model.get('size_gb', 0) essential = model.get('essential', False) - model_type = model.get('type', 'checkpoints') filename = model.get('filename', '') - print('{0}|{1}|{2}|{3}|{4}|{5}'.format(repo_id, description, size_gb, essential, model_type, filename)) + print('{0}|{1}|{2}|{3}|{4}'.format(repo_id, description, size_gb, essential, filename)) else: sys.exit(1) except Exception as e: @@ -577,22 +576,42 @@ EOPYDOWNLOAD # Create symlink for a model link_model() { local repo_id="$1" - local model_type="$2" - local filename_filter="$3" - local file_mappings="$4" # Optional: explicit source|dest mappings + local filename_filter="$2" + local file_mappings="$3" # Optional: explicit source|dest mappings - print_detail "Linking to: ${CYAN}${COMFYUI_DIR}/${model_type}/${RESET}" - - # Create ComfyUI subdirectory if it doesn't exist - local target_dir="${COMFYUI_DIR}/${model_type}" - if [[ ! -d "$target_dir" ]]; then - print_info "Creating directory: ${CYAN}${target_dir}${RESET}" - mkdir -p "$target_dir" + # Extract unique target directories from file mappings + local target_dirs=() + if [[ -n "$file_mappings" ]]; then + while IFS='|' read -r source_pattern dest_path; do + if [[ -n "$dest_path" && "$dest_path" == *"/"* ]]; then + local dir_path="${dest_path%/*}" + target_dirs+=("${COMFYUI_DIR}/${dir_path}") + fi + done <<< "$file_mappings" fi - # Clean existing symlinks for this repo in the target directory - # This ensures we start fresh and don't have stale links - find "$target_dir" -type l -lname "*${repo_id/\//-}*" -delete 2>/dev/null || true + # Remove duplicates from target_dirs array + local unique_dirs=($(printf '%s\n' "${target_dirs[@]}" | sort -u)) + + if [[ ${#unique_dirs[@]} -eq 0 ]]; then + print_warning "No target directories found in file mappings" + return 1 + fi + + # Display target directories + for target_dir in "${unique_dirs[@]}"; do + print_detail "Linking to: ${CYAN}${target_dir}/${RESET}" + + # Create ComfyUI subdirectory if it doesn't exist + if [[ ! -d "$target_dir" ]]; then + print_info "Creating directory: ${CYAN}${target_dir}${RESET}" + mkdir -p "$target_dir" + fi + + # Clean existing symlinks for this repo in the target directory + # This ensures we start fresh and don't have stale links + find "$target_dir" -type l -lname "*${repo_id/\//-}*" -delete 2>/dev/null || true + done # Find model files in cache local model_files @@ -609,13 +628,13 @@ link_model() { # Dry-run mode: show what would be linked if [[ "$DRY_RUN" == true ]]; then - print_info "DRY-RUN: Would link ${BOLD_YELLOW}${file_count}${RESET} file(s) to ${target_dir}/" + print_info "DRY-RUN: Would link ${BOLD_YELLOW}${file_count}${RESET} file(s)" print_detail "Files that would be linked:" - while IFS= read -r file; do - local basename_file - basename_file=$(basename "$file") - print_detail " ${LINK} Would link: ${DIM}${basename_file}${RESET}" - done <<< "$model_files" + while IFS='|' read -r source_pattern dest_path; do + if [[ -n "$dest_path" ]]; then + print_detail " ${LINK} Would link: ${DIM}${dest_path}${RESET}" + fi + done <<< "$file_mappings" return 0 fi @@ -625,8 +644,8 @@ link_model() { if [[ -n "$file_mappings" ]]; then print_detail "Using explicit file mappings from YAML" - while IFS='|' read -r source_pattern dest_filename; do - if [[ -z "$source_pattern" ]]; then + while IFS='|' read -r source_pattern dest_path; do + if [[ -z "$source_pattern" ]] || [[ -z "$dest_path" ]]; then continue fi @@ -639,19 +658,26 @@ link_model() { continue fi - local link_path="${target_dir}/${dest_filename}" + # Construct full link path with directory included in dest_path + local link_path="${COMFYUI_DIR}/${dest_path}" + local link_dir=$(dirname "$link_path") + + # Ensure directory exists + if [[ ! -d "$link_dir" ]]; then + mkdir -p "$link_dir" + fi # Remove existing symlink or file if it exists if [[ -L "$link_path" ]]; then rm -f "$link_path" elif [[ -e "$link_path" ]]; then - print_warning "File already exists (not a symlink): ${dest_filename}" + print_warning "File already exists (not a symlink): ${dest_path}" continue fi # Create symlink ln -s "$source_file" "$link_path" - print_detail "${LINK} Linked: ${DIM}${dest_filename}${RESET}" + print_detail "${LINK} Linked: ${DIM}${dest_path}${RESET}" linked_count=$((linked_count+1)) done <<< "$file_mappings" else @@ -701,6 +727,7 @@ cleanup_unused_cache_files() { local repo_id="$1" local cache_dir="$2" local comfyui_dir="$3" + local file_mappings="$4" # Find the latest snapshot directory for this repo local repo_cache_dir="${cache_dir}/models--${repo_id//\//--}" @@ -711,15 +738,35 @@ cleanup_unused_cache_files() { print_info "Analyzing cache for ${BOLD_WHITE}${repo_id}${RESET}..." - # Use Python to find unreferenced files + # Use Python to clean up old snapshots AND non-whitelisted files in latest snapshot local cleanup_result - cleanup_result=$(python3 - "$repo_cache_dir" "$comfyui_dir" <<'EOPYCLEANUP' + cleanup_result=$(python3 - "$repo_cache_dir" "$comfyui_dir" "$file_mappings" <<'EOPYCLEANUP' import os import sys +import shutil from pathlib import Path repo_cache = Path(sys.argv[1]) comfyui_dir = Path(sys.argv[2]) +file_mappings_str = sys.argv[3] if len(sys.argv) > 3 else "" + +# Parse whitelist from file_mappings (format: "source|dest\nsource|dest\n...") +whitelist_sources = set() +if file_mappings_str: + for line in file_mappings_str.strip().split('\n'): + if '|' in line: + source = line.split('|')[0].strip() + if source: + whitelist_sources.add(source) + +# Essential HuggingFace metadata files to always preserve +ESSENTIAL_FILES = { + '.gitattributes', + 'README.md', + 'model_index.json', + '.huggingface', + 'config.json' +} # Find latest snapshot snapshots_dir = repo_cache / 'snapshots' @@ -731,91 +778,186 @@ if not snapshots: sys.exit(0) latest_snapshot = snapshots[0] +old_snapshots = snapshots[1:] # All snapshots except the latest -# Find all files in snapshot -all_files = [] -for file_path in latest_snapshot.rglob('*'): - if file_path.is_file(): - all_files.append(file_path) +# Calculate size of old snapshot directories +old_snapshot_size = 0 +old_snapshot_paths = [] +for old_snap in old_snapshots: + try: + # Calculate size of old snapshot directory + for file_path in old_snap.rglob('*'): + if file_path.is_file(): + old_snapshot_size += file_path.stat().st_size + old_snapshot_paths.append(str(old_snap)) + except Exception: + pass -# Check which files have symlinks pointing to them -unreferenced_files = [] -total_size = 0 +# Find non-whitelisted files in latest snapshot +unwanted_files = [] +unwanted_size = 0 +if whitelist_sources: + for item in latest_snapshot.rglob('*'): + if item.is_file(): + # Get relative path from snapshot root + rel_path = str(item.relative_to(latest_snapshot)) -for file_path in all_files: - # Search for symlinks in ComfyUI models that point to this file - has_symlink = False + # Check if this file is in whitelist or is essential + is_whitelisted = False - # Check all possible symlinks in ComfyUI - for comfy_file in comfyui_dir.rglob('*'): - if comfy_file.is_symlink(): - try: - if comfy_file.resolve() == file_path: - has_symlink = True - break - except: - pass + # Check exact match first + if rel_path in whitelist_sources: + is_whitelisted = True + else: + # Check if any whitelist entry matches this file + # (handles cases where whitelist has paths like "split_files/diffusion_models/file.safetensors") + for whitelisted in whitelist_sources: + if rel_path == whitelisted or rel_path.endswith('/' + whitelisted): + is_whitelisted = True + break - if not has_symlink: - file_size = file_path.stat().st_size - unreferenced_files.append(str(file_path)) - total_size += file_size + # Check if it's an essential file + if item.name in ESSENTIAL_FILES: + is_whitelisted = True -# Output results -print(f"{len(unreferenced_files)}|{total_size}") -for f in unreferenced_files: - print(f) + # If not whitelisted, mark for deletion + if not is_whitelisted: + unwanted_files.append(str(item)) + unwanted_size += item.stat().st_size + +# Output results: old_snapshot_count|old_snapshot_size|unwanted_files_count|unwanted_size +print(f"{len(old_snapshot_paths)}|{old_snapshot_size}|{len(unwanted_files)}|{unwanted_size}") +for snap in old_snapshot_paths: + print(snap) +for unwanted_file in unwanted_files: + print(unwanted_file) EOPYCLEANUP ) # Parse results local first_line first_line=$(echo "$cleanup_result" | head -n 1) - local file_count - file_count=$(echo "$first_line" | cut -d'|' -f1) - local total_bytes - total_bytes=$(echo "$first_line" | cut -d'|' -f2) + local snapshot_count + snapshot_count=$(echo "$first_line" | cut -d'|' -f1) + local snapshot_bytes + snapshot_bytes=$(echo "$first_line" | cut -d'|' -f2) + local unwanted_count + unwanted_count=$(echo "$first_line" | cut -d'|' -f3) + local unwanted_bytes + unwanted_bytes=$(echo "$first_line" | cut -d'|' -f4) - if [[ "$file_count" -eq 0 ]]; then - print_success "No unreferenced files found - cache is optimal" + # Check if there's anything to clean + if [[ "$snapshot_count" -eq 0 ]] && [[ "$unwanted_count" -eq 0 ]]; then + print_success "No cleanup needed - cache is optimal" return 0 fi # Convert bytes to MB - local size_mb - size_mb=$(echo "scale=2; $total_bytes / 1048576" | bc) + local snapshot_mb + snapshot_mb=$(echo "scale=2; $snapshot_bytes / 1048576" | bc) + local unwanted_mb + unwanted_mb=$(echo "scale=2; $unwanted_bytes / 1048576" | bc) + local total_mb + total_mb=$(echo "scale=2; ($snapshot_bytes + $unwanted_bytes) / 1048576" | bc) + + # Get list of items to delete (skip first line which is summary) + # First snapshot_count lines are old snapshots, remaining lines are unwanted files + local all_items + all_items=$(echo "$cleanup_result" | tail -n +2) + + local snapshots_to_delete + if [[ "$snapshot_count" -gt 0 ]]; then + snapshots_to_delete=$(echo "$all_items" | head -n "$snapshot_count") + else + snapshots_to_delete="" + fi - # Get list of files to delete (skip first line which is summary) local files_to_delete - files_to_delete=$(echo "$cleanup_result" | tail -n +2) + if [[ "$unwanted_count" -gt 0 ]]; then + files_to_delete=$(echo "$all_items" | tail -n "$unwanted_count") + else + files_to_delete="" + fi if [[ "$DRY_RUN" == true ]]; then - print_warning "DRY-RUN: Would delete ${BOLD_YELLOW}${file_count}${RESET} unreferenced file(s) (~${size_mb} MB)" - if [[ -n "$files_to_delete" ]]; then - print_detail "Files that would be deleted:" - while IFS= read -r file; do - local basename - basename=$(basename "$file") - print_detail " ${CROSS_MARK} Would delete: ${DIM}${basename}${RESET}" - done <<< "$files_to_delete" + if [[ "$snapshot_count" -gt 0 ]]; then + print_warning "DRY-RUN: Would clean up ${BOLD_YELLOW}${snapshot_count}${RESET} old snapshot(s) (~${snapshot_mb} MB)" + if [[ -n "$snapshots_to_delete" ]]; then + print_detail "Old snapshots that would be deleted:" + while IFS= read -r snapshot; do + local basename + basename=$(basename "$snapshot") + print_detail " ${CROSS_MARK} Would delete snapshot: ${DIM}${basename}${RESET}" + done <<< "$snapshots_to_delete" + fi fi + + if [[ "$unwanted_count" -gt 0 ]]; then + print_warning "DRY-RUN: Would clean up ${BOLD_YELLOW}${unwanted_count}${RESET} non-whitelisted file(s) (~${unwanted_mb} MB)" + if [[ -n "$files_to_delete" ]]; then + print_detail "Non-whitelisted files that would be deleted (showing first 10):" + echo "$files_to_delete" | head -n 10 | while IFS= read -r file; do + local basename + basename=$(basename "$file") + print_detail " ${CROSS_MARK} Would delete file: ${DIM}${basename}${RESET}" + done + if [[ "$unwanted_count" -gt 10 ]]; then + print_detail " ${DIM}... and $((unwanted_count - 10)) more${RESET}" + fi + fi + fi + + print_info "Total space that would be freed: ~${total_mb} MB" return 0 fi - # Actually delete files - print_warning "Cleaning up ${BOLD_YELLOW}${file_count}${RESET} unreferenced file(s) (~${size_mb} MB)..." - local deleted_count=0 - while IFS= read -r file; do - if [[ -f "$file" ]]; then - rm -f "$file" && deleted_count=$((deleted_count+1)) - fi - done <<< "$files_to_delete" + # Actually delete items + local deleted_snapshots=0 + local deleted_files=0 - if [[ $deleted_count -eq $file_count ]]; then - print_success "Cleaned up ${deleted_count} file(s), freed ~${size_mb} MB" + # Delete old snapshot directories + if [[ "$snapshot_count" -gt 0 ]]; then + print_warning "Cleaning up ${BOLD_YELLOW}${snapshot_count}${RESET} old snapshot(s) (~${snapshot_mb} MB)..." + while IFS= read -r snapshot; do + if [[ -d "$snapshot" ]]; then + rm -rf "$snapshot" && deleted_snapshots=$((deleted_snapshots+1)) + fi + done <<< "$snapshots_to_delete" + fi + + # Delete non-whitelisted files + if [[ "$unwanted_count" -gt 0 ]]; then + print_warning "Cleaning up ${BOLD_YELLOW}${unwanted_count}${RESET} non-whitelisted file(s) (~${unwanted_mb} MB)..." + while IFS= read -r file; do + if [[ -f "$file" ]]; then + rm -f "$file" && deleted_files=$((deleted_files+1)) + fi + done <<< "$files_to_delete" + fi + + # Report results + local success=true + if [[ "$snapshot_count" -gt 0 ]] && [[ $deleted_snapshots -eq $snapshot_count ]]; then + print_success "Cleaned up ${deleted_snapshots} old snapshot(s), freed ~${snapshot_mb} MB" + elif [[ "$snapshot_count" -gt 0 ]]; then + print_warning "Cleaned up ${deleted_snapshots}/${snapshot_count} old snapshots" + success=false + fi + + if [[ "$unwanted_count" -gt 0 ]] && [[ $deleted_files -eq $unwanted_count ]]; then + print_success "Cleaned up ${deleted_files} non-whitelisted file(s), freed ~${unwanted_mb} MB" + elif [[ "$unwanted_count" -gt 0 ]]; then + print_warning "Cleaned up ${deleted_files}/${unwanted_count} non-whitelisted files" + success=false + fi + + if [[ "$snapshot_count" -gt 0 ]] || [[ "$unwanted_count" -gt 0 ]]; then + print_info "Total space freed: ~${total_mb} MB" + fi + + if $success; then return 0 else - print_warning "Cleaned up ${deleted_count}/${file_count} files" return 1 fi } @@ -930,17 +1072,8 @@ verify_model_download() { # Verify model symlinks verify_model_links() { local repo_id="$1" - local model_type="$2" - local filename_filter="$3" - local file_mappings="$4" - - local target_dir="${COMFYUI_DIR}/${model_type}" - - # If target directory doesn't exist, nothing is linked - if [[ ! -d "$target_dir" ]]; then - echo "NOT_LINKED|0|0|0" - return 1 - fi + local filename_filter="$2" + local file_mappings="$3" # Find model files in cache local model_files @@ -958,13 +1091,13 @@ verify_model_links() { # Check if explicit file mappings exist if [[ -n "$file_mappings" ]]; then - while IFS='|' read -r source_pattern dest_filename; do - if [[ -z "$source_pattern" ]]; then + while IFS='|' read -r source_pattern dest_path; do + if [[ -z "$source_pattern" ]] || [[ -z "$dest_path" ]]; then continue fi total_links=$((total_links + 1)) - local link_path="${target_dir}/${dest_filename}" + local link_path="${COMFYUI_DIR}/${dest_path}" if [[ -L "$link_path" ]]; then # Symlink exists, check if it's valid @@ -972,15 +1105,15 @@ verify_model_links() { valid_links=$((valid_links + 1)) local link_target link_target=$(readlink "$link_path") - link_details="${link_details}VALID|${dest_filename}|${link_target}\n" + link_details="${link_details}VALID|${dest_path}|${link_target}\n" else broken_links=$((broken_links + 1)) local link_target link_target=$(readlink "$link_path") - link_details="${link_details}BROKEN|${dest_filename}|${link_target}\n" + link_details="${link_details}BROKEN|${dest_path}|${link_target}\n" fi else - link_details="${link_details}MISSING|${dest_filename}|\n" + link_details="${link_details}MISSING|${dest_path}|\n" fi done <<< "$file_mappings" else @@ -1047,13 +1180,13 @@ verify_category() { local total_size_bytes=0 local expected_size_bytes=0 - while IFS='|' read -r repo_id description size_gb essential model_type filename; do + while IFS='|' read -r repo_id description size_gb essential filename; do current=$((current+1)) echo "" print_step "$current" "$total_models" "${BOLD_MAGENTA}${description}${RESET}" print_detail "Repository: ${BOLD_WHITE}${repo_id}${RESET}" - print_detail "Category: ${CYAN}${category}${RESET} ${ARROW_RIGHT} ${CYAN}${model_type}${RESET}" + print_detail "Category: ${CYAN}${category}${RESET}" print_detail "Expected Size: ${BOLD_YELLOW}${size_gb} GB${RESET}" expected_size_bytes=$((expected_size_bytes + $(echo "$size_gb * 1073741824" | bc | cut -d'.' -f1))) @@ -1104,7 +1237,7 @@ verify_category() { local file_mappings file_mappings=$(parse_file_mappings "$CONFIG_FILE" "$category" "$repo_id") local link_result - link_result=$(verify_model_links "$repo_id" "$model_type" "$filename" "$file_mappings") + link_result=$(verify_model_links "$repo_id" "$filename" "$file_mappings") local first_line first_line=$(echo -e "$link_result" | head -n1) local link_status @@ -1208,7 +1341,7 @@ process_category() { local succeeded=0 local failed=0 - while IFS='|' read -r repo_id description size_gb essential model_type filename; do + while IFS='|' read -r repo_id description size_gb essential filename; do current=$((current+1)) echo "" @@ -1231,13 +1364,13 @@ process_category() { file_mappings=$(parse_file_mappings "$CONFIG_FILE" "$category" "$repo_id") # Pass file mappings to link_model (empty string if no mappings found) - if ! link_model "$repo_id" "$model_type" "$filename" "$file_mappings"; then + if ! link_model "$repo_id" "$filename" "$file_mappings"; then success=false fi # Cleanup unused cache files if enabled if [[ "$CLEANUP_MODE" == true ]] && $success; then - cleanup_unused_cache_files "$repo_id" "$CACHE_DIR" "$COMFYUI_DIR" + cleanup_unused_cache_files "$repo_id" "$CACHE_DIR" "$COMFYUI_DIR" "$file_mappings" fi fi fi