Files
bin/artifact_huggingface_download.sh
Sebastian Krüger 8291a3b662 feat: rewrite CivitAI and HuggingFace download scripts with curl
Complete rewrite of both model download scripts with:
- Beautiful colorful CLI output with progress indicators
- Pure bash/curl downloads (no Python dependencies for downloading)
- yq-based YAML parsing (consistent with arty.sh)
- Three commands: download, link, verify
- Filtering by --category and --repo-id (comma-separated)
- --dry-run mode for previewing operations
- Respects format field for file extensions (.safetensors, .pt, etc.)
- Uses type field for output subdirectories (checkpoints, embeddings, loras)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-26 15:01:27 +01:00

767 lines
21 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
#
# HuggingFace Model Downloader - A Beautiful CLI Tool
# Downloads AI models from HuggingFace and creates symlinks to output directories
#
# Usage: ./artifact_huggingface_download.sh [COMMAND] [OPTIONS]
#
# Commands:
# download Download models to cache directory (default)
# link Create symlinks from cache to output directory
# verify Verify symlinks in output directory
#
# Options:
# -c, --config FILE Configuration YAML file (required)
# --cache-dir DIR Cache directory
# --output-dir DIR Output/installation directory
# --category CAT1,CAT2 Filter by category (comma-separated)
# --repo-id ID1,ID2 Filter by repo_id (comma-separated)
# --auth-token TOKEN HuggingFace token
# -n, --dry-run Show what would be done
# -h, --help Show help
#
set -euo pipefail
# ============================================================================
# COLOR PALETTE - Beautiful Terminal Colors
# ============================================================================
RESET='\033[0m'
# Foreground Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
MAGENTA='\033[0;35m'
CYAN='\033[0;36m'
# Bold
BOLD_RED='\033[1;31m'
BOLD_GREEN='\033[1;32m'
BOLD_YELLOW='\033[1;33m'
BOLD_BLUE='\033[1;34m'
BOLD_MAGENTA='\033[1;35m'
BOLD_CYAN='\033[1;36m'
BOLD_WHITE='\033[1;37m'
# Background
BG_CYAN='\033[46m'
# Styles
DIM='\033[2m'
# ============================================================================
# UNICODE CHARACTERS
# ============================================================================
CHECK_MARK="✓"
CROSS_MARK="✗"
ROCKET="🚀"
DOWNLOAD="⬇️"
LINK_ICON="🔗"
WARNING="⚠️"
INFO=""
SPARKLES="✨"
ARROW_RIGHT="→"
BOX_LIGHT="─"
BOX_DOUBLE="═"
PACKAGE="📦"
# ============================================================================
# CONFIGURATION
# ============================================================================
CONFIG_FILE=""
COMMAND="download"
DRY_RUN=false
CATEGORY_FILTER=""
REPO_ID_FILTER=""
# Default directories - detect RunPod or local
if [[ -d "/workspace" ]]; then
CACHE_DIR="${CACHE_DIR:-/workspace/huggingface_cache}"
OUTPUT_DIR="${OUTPUT_DIR:-/workspace/ComfyUI/models}"
else
CACHE_DIR="${CACHE_DIR:-${HOME}/.cache/huggingface/hub}"
OUTPUT_DIR="${OUTPUT_DIR:-${HOME}/ComfyUI/models}"
fi
# HuggingFace token from environment
HF_TOKEN="${HF_TOKEN:-}"
# Try to load from .env files
load_env_token() {
if [[ -n "$HF_TOKEN" ]]; then
return 0
fi
local env_files=(
"${HOME}/.env"
"${HOME}/Projects/runpod/.env"
"${HOME}/Projects/runpod/ai/.env"
"/workspace/.env"
"/workspace/ai/.env"
)
for env_file in "${env_files[@]}"; do
if [[ -f "$env_file" ]]; then
local token
token=$(grep "^HF_TOKEN=" "$env_file" 2>/dev/null | cut -d'=' -f2- | tr -d '"' | tr -d "'" || true)
if [[ -n "$token" ]]; then
HF_TOKEN="$token"
return 0
fi
fi
done
}
# ============================================================================
# LOGGING FUNCTIONS
# ============================================================================
print_banner() {
local text="$1"
local width=70
local text_len=${#text}
local padding=$(( (width - text_len) / 2 ))
echo ""
echo -e "${BOLD_CYAN}${BOX_DOUBLE}$(printf '%0.s═' $(seq 1 $width))${BOX_DOUBLE}${RESET}"
echo -e "${BOLD_CYAN}$(printf '%*s' $padding '')${BOLD_MAGENTA}${text}$(printf '%*s' $((width - padding - text_len)) '')${BOLD_CYAN}${RESET}"
echo -e "${BOLD_CYAN}${BOX_DOUBLE}$(printf '%0.s═' $(seq 1 $width))${BOX_DOUBLE}${RESET}"
echo ""
}
print_section() {
local text="$1"
echo -e "\n${BOLD_CYAN}» ${text}${RESET}"
echo -e "${CYAN}$(printf '%0.s─' $(seq 1 70))${RESET}"
}
print_success() {
echo -e "${BOLD_GREEN}${CHECK_MARK} $1${RESET}"
}
print_error() {
echo -e "${BOLD_RED}${CROSS_MARK} $1${RESET}" >&2
}
print_warning() {
echo -e "${BOLD_YELLOW}${WARNING} $1${RESET}"
}
print_info() {
echo -e "${BOLD_CYAN}${INFO} $1${RESET}"
}
print_step() {
local current="$1"
local total="$2"
local text="$3"
echo -e "${BOLD_BLUE}[${current}/${total}]${RESET} ${CYAN}${PACKAGE}${RESET} ${text}"
}
print_detail() {
echo -e " ${DIM}${CYAN}${ARROW_RIGHT} $1${RESET}"
}
show_progress() {
local current="$1"
local total="$2"
local width=40
local percentage=$((current * 100 / total))
local filled=$((current * width / total))
local empty=$((width - filled))
printf "\r ${BOLD_CYAN}Progress: ${RESET}["
printf "${BG_CYAN}${BOLD_WHITE}%${filled}s${RESET}" | tr ' ' '█'
printf "${DIM}%${empty}s${RESET}" | tr ' ' '░'
printf "] ${BOLD_YELLOW}%3d%%${RESET} ${DIM}(%d/%d)${RESET}" "$percentage" "$current" "$total"
}
# ============================================================================
# YAML PARSING (using yq)
# ============================================================================
check_yq() {
if ! command -v yq &>/dev/null; then
print_error "yq is not installed. Please install yq first."
print_info "Install: https://github.com/mikefarah/yq"
exit 1
fi
}
# Get total count of models
get_model_count() {
local config="$1"
yq eval '. | length' "$config" 2>/dev/null || echo "0"
}
# Get model field at index
get_model_field() {
local config="$1"
local index="$2"
local field="$3"
local value
value=$(yq eval ".[$index].$field // \"\"" "$config" 2>/dev/null)
echo "$value" | sed 's/^"//;s/"$//'
}
# Get files array length for a model
get_files_count() {
local config="$1"
local index="$2"
yq eval ".[$index].files | length" "$config" 2>/dev/null || echo "0"
}
# Get file mapping at index
get_file_field() {
local config="$1"
local model_index="$2"
local file_index="$3"
local field="$4"
local value
value=$(yq eval ".[$model_index].files[$file_index].$field // \"\"" "$config" 2>/dev/null)
echo "$value" | sed 's/^"//;s/"$//'
}
# Check if model matches filters
matches_filters() {
local repo_id="$1"
local category="$2"
# Check category filter
if [[ -n "$CATEGORY_FILTER" ]]; then
local match=false
IFS=',' read -ra cats <<< "$CATEGORY_FILTER"
for cat in "${cats[@]}"; do
cat=$(echo "$cat" | xargs)
if [[ "$category" == "$cat" ]]; then
match=true
break
fi
done
if [[ "$match" == false ]]; then
return 1
fi
fi
# Check repo_id filter
if [[ -n "$REPO_ID_FILTER" ]]; then
local match=false
IFS=',' read -ra repos <<< "$REPO_ID_FILTER"
for repo in "${repos[@]}"; do
repo=$(echo "$repo" | xargs)
if [[ "$repo_id" == "$repo" ]]; then
match=true
break
fi
done
if [[ "$match" == false ]]; then
return 1
fi
fi
return 0
}
# ============================================================================
# DOWNLOAD FUNCTIONS
# ============================================================================
download_file() {
local repo_id="$1"
local source="$2"
# Convert repo_id to cache path (replace / with --)
local cache_repo_dir="${CACHE_DIR}/${repo_id}"
local source_dir
source_dir=$(dirname "$source")
local output_dir="${cache_repo_dir}"
if [[ "$source_dir" != "." ]]; then
output_dir="${cache_repo_dir}/${source_dir}"
fi
local filename
filename=$(basename "$source")
local output_path="${output_dir}/${filename}"
print_detail "File: ${BOLD_WHITE}${source}${RESET}"
print_detail "Output: ${CYAN}${output_path}${RESET}"
# Check if already exists
if [[ -f "$output_path" ]]; then
local size
size=$(du -h "$output_path" | cut -f1)
print_success "Already downloaded: ${filename} (${size})"
return 0
fi
# Dry-run mode
if [[ "$DRY_RUN" == true ]]; then
print_info "DRY-RUN: Would download ${BOLD_WHITE}${source}${RESET}"
return 0
fi
# Create output directory
mkdir -p "$output_dir"
# Build download URL
local url="https://huggingface.co/${repo_id}/resolve/main/${source}"
print_detail "Downloading from HuggingFace..."
# Download with curl (with resume support)
local curl_args=(-L -C - --progress-bar -o "$output_path")
if [[ -n "$HF_TOKEN" ]]; then
curl_args+=(-H "Authorization: Bearer ${HF_TOKEN}")
fi
if curl "${curl_args[@]}" "$url" 2>&1; then
if [[ -f "$output_path" ]] && [[ -s "$output_path" ]]; then
local size
size=$(du -h "$output_path" | cut -f1)
print_success "Downloaded ${BOLD_WHITE}${filename}${RESET} (${size})"
return 0
fi
fi
print_error "Failed to download ${source}"
rm -f "$output_path" 2>/dev/null || true
return 1
}
download_model() {
local config="$1"
local index="$2"
local repo_id="$3"
local description="$4"
print_detail "Repository: ${BOLD_WHITE}${repo_id}${RESET}"
[[ -n "$description" ]] && print_detail "Description: ${description}"
local files_count
files_count=$(get_files_count "$config" "$index")
if [[ "$files_count" == "0" ]]; then
print_warning "No files defined for ${repo_id}"
return 1
fi
local succeeded=0
local failed=0
for ((f=0; f<files_count; f++)); do
local source
source=$(get_file_field "$config" "$index" "$f" "source")
if [[ -z "$source" ]]; then
continue
fi
if download_file "$repo_id" "$source"; then
succeeded=$((succeeded+1))
else
failed=$((failed+1))
fi
done
if [[ $failed -eq 0 ]]; then
return 0
else
return 1
fi
}
# ============================================================================
# LINK FUNCTIONS
# ============================================================================
link_file() {
local repo_id="$1"
local source="$2"
local dest="$3"
local cache_repo_dir="${CACHE_DIR}/${repo_id}"
local source_path="${cache_repo_dir}/${source}"
local link_path="${OUTPUT_DIR}/${dest}"
local link_dir
link_dir=$(dirname "$link_path")
print_detail "Source: ${CYAN}${source_path}${RESET}"
print_detail "Target: ${CYAN}${link_path}${RESET}"
# Check if source exists
if [[ ! -f "$source_path" ]]; then
print_warning "Source file not found: ${source}"
return 1
fi
# Dry-run mode
if [[ "$DRY_RUN" == true ]]; then
print_info "DRY-RUN: Would link ${BOLD_WHITE}${source}${RESET}${dest}"
return 0
fi
# Create target directory
mkdir -p "$link_dir"
# Remove existing symlink
if [[ -L "$link_path" ]]; then
rm -f "$link_path"
elif [[ -e "$link_path" ]]; then
print_warning "File exists (not a symlink): ${dest}"
return 1
fi
# Create symlink
ln -s "$source_path" "$link_path"
print_success "Linked: ${LINK_ICON} ${dest}"
return 0
}
link_model() {
local config="$1"
local index="$2"
local repo_id="$3"
print_detail "Repository: ${BOLD_WHITE}${repo_id}${RESET}"
local files_count
files_count=$(get_files_count "$config" "$index")
if [[ "$files_count" == "0" ]]; then
print_warning "No files defined for ${repo_id}"
return 1
fi
local succeeded=0
local failed=0
for ((f=0; f<files_count; f++)); do
local source dest
source=$(get_file_field "$config" "$index" "$f" "source")
dest=$(get_file_field "$config" "$index" "$f" "dest")
if [[ -z "$source" ]]; then
continue
fi
# If dest is empty, use source as dest
if [[ -z "$dest" ]]; then
dest="$source"
fi
if link_file "$repo_id" "$source" "$dest"; then
succeeded=$((succeeded+1))
else
failed=$((failed+1))
fi
done
if [[ $failed -eq 0 ]]; then
return 0
else
return 1
fi
}
# ============================================================================
# VERIFY FUNCTIONS
# ============================================================================
verify_model() {
local config="$1"
local index="$2"
local repo_id="$3"
echo -e " ${BOLD_WHITE}${repo_id}${RESET}"
local files_count
files_count=$(get_files_count "$config" "$index")
if [[ "$files_count" == "0" ]]; then
echo -e " ${BOLD_YELLOW}No files defined${RESET}"
return 1
fi
for ((f=0; f<files_count; f++)); do
local source dest
source=$(get_file_field "$config" "$index" "$f" "source")
dest=$(get_file_field "$config" "$index" "$f" "dest")
if [[ -z "$source" ]]; then
continue
fi
if [[ -z "$dest" ]]; then
dest="$source"
fi
local cache_path="${CACHE_DIR}/${repo_id}/${source}"
local link_path="${OUTPUT_DIR}/${dest}"
local cache_status="${BOLD_RED}${CROSS_MARK}${RESET}"
local link_status="${BOLD_RED}${CROSS_MARK}${RESET}"
# Check cache
if [[ -f "$cache_path" ]]; then
local size
size=$(du -h "$cache_path" | cut -f1)
cache_status="${BOLD_GREEN}${CHECK_MARK}${RESET} (${size})"
fi
# Check symlink
if [[ -L "$link_path" ]]; then
if [[ -e "$link_path" ]]; then
link_status="${BOLD_GREEN}${CHECK_MARK}${RESET}"
else
link_status="${BOLD_YELLOW}${WARNING}${RESET} (broken)"
fi
fi
local filename
filename=$(basename "$source")
echo -e " ${DIM}${filename}${RESET}"
echo -e " Cache: ${cache_status}"
echo -e " Link: ${link_status}"
done
}
# ============================================================================
# MAIN WORKFLOW
# ============================================================================
process_models() {
local action="$1"
print_section "Processing Models (${action})"
local total
total=$(get_model_count "$CONFIG_FILE")
if [[ "$total" == "0" ]]; then
print_warning "No models found in configuration"
return 0
fi
local processed=0
local succeeded=0
local failed=0
local skipped=0
for ((i=0; i<total; i++)); do
local repo_id category description
repo_id=$(get_model_field "$CONFIG_FILE" "$i" "repo_id")
category=$(get_model_field "$CONFIG_FILE" "$i" "category")
description=$(get_model_field "$CONFIG_FILE" "$i" "description")
# Skip entries without repo_id (might be malformed YAML)
if [[ -z "$repo_id" ]]; then
continue
fi
# Apply filters
if ! matches_filters "$repo_id" "$category"; then
skipped=$((skipped+1))
continue
fi
processed=$((processed+1))
echo ""
print_step "$processed" "$((total-skipped))" "${BOLD_CYAN}${repo_id}${RESET}"
local result=0
case "$action" in
download)
download_model "$CONFIG_FILE" "$i" "$repo_id" "$description" || result=1
;;
link)
link_model "$CONFIG_FILE" "$i" "$repo_id" || result=1
;;
verify)
verify_model "$CONFIG_FILE" "$i" "$repo_id"
;;
esac
if [[ $result -eq 0 ]]; then
succeeded=$((succeeded+1))
else
failed=$((failed+1))
fi
done
echo ""
if [[ $skipped -gt 0 ]]; then
print_info "Skipped ${skipped} model(s) (filtered)"
fi
print_info "Summary: ${BOLD_GREEN}${succeeded} succeeded${RESET}, ${BOLD_RED}${failed} failed${RESET}"
}
show_help() {
cat << 'EOF'
HuggingFace Model Downloader - A Beautiful CLI Tool
Usage: artifact_huggingface_download.sh [COMMAND] [OPTIONS]
Commands:
download Download models to cache directory (default)
link Create symlinks from cache to output directory
verify Verify model status (cache and symlinks)
Options:
-c, --config FILE Configuration YAML file (required)
--cache-dir DIR Cache directory
--output-dir DIR Output/installation directory
--category CAT1,CAT2 Filter by category (comma-separated)
--repo-id ID1,ID2 Filter by repo_id (comma-separated)
--auth-token TOKEN HuggingFace token (or set HF_TOKEN env var)
-n, --dry-run Show what would be done without making changes
-h, --help Show this help message
Environment Variables:
HF_TOKEN HuggingFace API token
CACHE_DIR Override default cache directory
OUTPUT_DIR Override default output directory
YAML Configuration Format:
- repo_id: organization/model-name
description: Model description
category: image|video|llm|upscale
files:
- source: path/to/file.safetensors
dest: subdir/output_filename.safetensors
Examples:
# Download all models from config
./artifact_huggingface_download.sh download -c models_huggingface.yaml
# Download only image models
./artifact_huggingface_download.sh download -c models_huggingface.yaml --category image
# Download specific model
./artifact_huggingface_download.sh download -c models_huggingface.yaml --repo-id black-forest-labs/FLUX.1-schnell
# Create symlinks for downloaded models
./artifact_huggingface_download.sh link -c models_huggingface.yaml
# Verify all models
./artifact_huggingface_download.sh verify -c models_huggingface.yaml
# Dry-run to preview operations
./artifact_huggingface_download.sh download -c models_huggingface.yaml --dry-run
EOF
}
main() {
# Check for yq
check_yq
# Load token from .env files
load_env_token
# Display banner
print_banner "${ROCKET} HuggingFace Model Downloader ${SPARKLES}"
# Show dry-run warning
if [[ "$DRY_RUN" == true ]]; then
echo -e "${BOLD_YELLOW}${WARNING} DRY-RUN MODE - No changes will be made ${WARNING}${RESET}\n"
fi
# Validate configuration
print_section "Configuration"
if [[ -z "$CONFIG_FILE" ]]; then
print_error "Configuration file required. Use -c/--config"
exit 1
fi
if [[ ! -f "$CONFIG_FILE" ]]; then
print_error "Configuration file not found: $CONFIG_FILE"
exit 1
fi
print_success "Config: ${CYAN}${CONFIG_FILE}${RESET}"
print_success "Cache: ${CYAN}${CACHE_DIR}${RESET}"
print_success "Output: ${CYAN}${OUTPUT_DIR}${RESET}"
print_success "Command: ${BOLD_CYAN}${COMMAND}${RESET}"
# Show token status (not required for public repos)
if [[ -n "$HF_TOKEN" ]]; then
print_success "HF Token: ${DIM}${HF_TOKEN:0:8}...${RESET}"
else
print_info "HF Token: ${DIM}not set (public repos only)${RESET}"
fi
# Show filters if set
[[ -n "$CATEGORY_FILTER" ]] && print_info "Category filter: ${BOLD_WHITE}${CATEGORY_FILTER}${RESET}"
[[ -n "$REPO_ID_FILTER" ]] && print_info "Repo ID filter: ${BOLD_WHITE}${REPO_ID_FILTER}${RESET}"
# Process based on command
process_models "$COMMAND"
# Final message
echo ""
print_banner "${SPARKLES} Complete ${SPARKLES}"
}
# ============================================================================
# ARGUMENT PARSING
# ============================================================================
while [[ $# -gt 0 ]]; do
case $1 in
-c|--config)
CONFIG_FILE="$2"
shift 2
;;
--cache-dir)
CACHE_DIR="$2"
shift 2
;;
--output-dir)
OUTPUT_DIR="$2"
shift 2
;;
--category)
CATEGORY_FILTER="$2"
shift 2
;;
--repo-id)
REPO_ID_FILTER="$2"
shift 2
;;
--auth-token)
HF_TOKEN="$2"
shift 2
;;
-n|--dry-run)
DRY_RUN=true
shift
;;
download|link|verify)
COMMAND="$1"
shift
;;
-h|--help)
show_help
exit 0
;;
-*)
print_error "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
*)
# Positional argument - treat as config file
if [[ -z "$CONFIG_FILE" ]]; then
CONFIG_FILE="$1"
fi
shift
;;
esac
done
# Run main
main