Files
bin/artifact_huggingface_download.sh

767 lines
21 KiB
Bash
Raw Permalink Normal View History

#!/bin/bash
#
# HuggingFace Model Downloader - A Beautiful CLI Tool
# Downloads AI models from HuggingFace and creates symlinks to output directories
#
# Usage: ./artifact_huggingface_download.sh [COMMAND] [OPTIONS]
#
# Commands:
# download Download models to cache directory (default)
# link Create symlinks from cache to output directory
# verify Verify symlinks in output directory
#
# Options:
# -c, --config FILE Configuration YAML file (required)
# --cache-dir DIR Cache directory
# --output-dir DIR Output/installation directory
# --category CAT1,CAT2 Filter by category (comma-separated)
# --repo-id ID1,ID2 Filter by repo_id (comma-separated)
# --auth-token TOKEN HuggingFace token
# -n, --dry-run Show what would be done
# -h, --help Show help
#
set -euo pipefail
# ============================================================================
# COLOR PALETTE - Beautiful Terminal Colors
# ============================================================================
RESET='\033[0m'
# Foreground Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
MAGENTA='\033[0;35m'
CYAN='\033[0;36m'
# Bold
BOLD_RED='\033[1;31m'
BOLD_GREEN='\033[1;32m'
BOLD_YELLOW='\033[1;33m'
BOLD_BLUE='\033[1;34m'
BOLD_MAGENTA='\033[1;35m'
BOLD_CYAN='\033[1;36m'
BOLD_WHITE='\033[1;37m'
# Background
BG_CYAN='\033[46m'
# Styles
DIM='\033[2m'
# ============================================================================
# UNICODE CHARACTERS
# ============================================================================
CHECK_MARK="✓"
CROSS_MARK="✗"
ROCKET="🚀"
DOWNLOAD="⬇️"
LINK_ICON="🔗"
WARNING="⚠️"
INFO=""
SPARKLES="✨"
ARROW_RIGHT="→"
BOX_LIGHT="─"
BOX_DOUBLE="═"
PACKAGE="📦"
# ============================================================================
# CONFIGURATION
# ============================================================================
CONFIG_FILE=""
COMMAND="download"
DRY_RUN=false
CATEGORY_FILTER=""
REPO_ID_FILTER=""
# Default directories - detect RunPod or local
if [[ -d "/workspace" ]]; then
CACHE_DIR="${CACHE_DIR:-/workspace/huggingface_cache}"
OUTPUT_DIR="${OUTPUT_DIR:-/workspace/ComfyUI/models}"
else
CACHE_DIR="${CACHE_DIR:-${HOME}/.cache/huggingface/hub}"
OUTPUT_DIR="${OUTPUT_DIR:-${HOME}/ComfyUI/models}"
fi
# HuggingFace token from environment
HF_TOKEN="${HF_TOKEN:-}"
# Try to load from .env files
load_env_token() {
if [[ -n "$HF_TOKEN" ]]; then
return 0
fi
local env_files=(
"${HOME}/.env"
"${HOME}/Projects/runpod/.env"
"${HOME}/Projects/runpod/ai/.env"
"/workspace/.env"
"/workspace/ai/.env"
)
for env_file in "${env_files[@]}"; do
if [[ -f "$env_file" ]]; then
local token
token=$(grep "^HF_TOKEN=" "$env_file" 2>/dev/null | cut -d'=' -f2- | tr -d '"' | tr -d "'" || true)
if [[ -n "$token" ]]; then
HF_TOKEN="$token"
return 0
fi
fi
done
}
# ============================================================================
# LOGGING FUNCTIONS
# ============================================================================
print_banner() {
local text="$1"
local width=70
local text_len=${#text}
local padding=$(( (width - text_len) / 2 ))
echo ""
echo -e "${BOLD_CYAN}${BOX_DOUBLE}$(printf '%0.s═' $(seq 1 $width))${BOX_DOUBLE}${RESET}"
echo -e "${BOLD_CYAN}$(printf '%*s' $padding '')${BOLD_MAGENTA}${text}$(printf '%*s' $((width - padding - text_len)) '')${BOLD_CYAN}${RESET}"
echo -e "${BOLD_CYAN}${BOX_DOUBLE}$(printf '%0.s═' $(seq 1 $width))${BOX_DOUBLE}${RESET}"
echo ""
}
print_section() {
local text="$1"
echo -e "\n${BOLD_CYAN}» ${text}${RESET}"
echo -e "${CYAN}$(printf '%0.s─' $(seq 1 70))${RESET}"
}
print_success() {
echo -e "${BOLD_GREEN}${CHECK_MARK} $1${RESET}"
}
print_error() {
echo -e "${BOLD_RED}${CROSS_MARK} $1${RESET}" >&2
}
print_warning() {
echo -e "${BOLD_YELLOW}${WARNING} $1${RESET}"
}
print_info() {
echo -e "${BOLD_CYAN}${INFO} $1${RESET}"
}
print_step() {
local current="$1"
local total="$2"
local text="$3"
echo -e "${BOLD_BLUE}[${current}/${total}]${RESET} ${CYAN}${PACKAGE}${RESET} ${text}"
}
print_detail() {
echo -e " ${DIM}${CYAN}${ARROW_RIGHT} $1${RESET}"
}
show_progress() {
local current="$1"
local total="$2"
local width=40
local percentage=$((current * 100 / total))
local filled=$((current * width / total))
local empty=$((width - filled))
printf "\r ${BOLD_CYAN}Progress: ${RESET}["
printf "${BG_CYAN}${BOLD_WHITE}%${filled}s${RESET}" | tr ' ' '█'
printf "${DIM}%${empty}s${RESET}" | tr ' ' '░'
printf "] ${BOLD_YELLOW}%3d%%${RESET} ${DIM}(%d/%d)${RESET}" "$percentage" "$current" "$total"
}
# ============================================================================
# YAML PARSING (using yq)
# ============================================================================
check_yq() {
if ! command -v yq &>/dev/null; then
print_error "yq is not installed. Please install yq first."
print_info "Install: https://github.com/mikefarah/yq"
exit 1
fi
}
# Get total count of models
get_model_count() {
local config="$1"
yq eval '. | length' "$config" 2>/dev/null || echo "0"
}
# Get model field at index
get_model_field() {
local config="$1"
local index="$2"
local field="$3"
local value
value=$(yq eval ".[$index].$field // \"\"" "$config" 2>/dev/null)
echo "$value" | sed 's/^"//;s/"$//'
}
# Get files array length for a model
get_files_count() {
local config="$1"
local index="$2"
yq eval ".[$index].files | length" "$config" 2>/dev/null || echo "0"
}
# Get file mapping at index
get_file_field() {
local config="$1"
local model_index="$2"
local file_index="$3"
local field="$4"
local value
value=$(yq eval ".[$model_index].files[$file_index].$field // \"\"" "$config" 2>/dev/null)
echo "$value" | sed 's/^"//;s/"$//'
}
# Check if model matches filters
matches_filters() {
local repo_id="$1"
local category="$2"
# Check category filter
if [[ -n "$CATEGORY_FILTER" ]]; then
local match=false
IFS=',' read -ra cats <<< "$CATEGORY_FILTER"
for cat in "${cats[@]}"; do
cat=$(echo "$cat" | xargs)
if [[ "$category" == "$cat" ]]; then
match=true
break
fi
done
if [[ "$match" == false ]]; then
return 1
fi
fi
# Check repo_id filter
if [[ -n "$REPO_ID_FILTER" ]]; then
local match=false
IFS=',' read -ra repos <<< "$REPO_ID_FILTER"
for repo in "${repos[@]}"; do
repo=$(echo "$repo" | xargs)
if [[ "$repo_id" == "$repo" ]]; then
match=true
break
fi
done
if [[ "$match" == false ]]; then
return 1
fi
fi
return 0
}
# ============================================================================
# DOWNLOAD FUNCTIONS
# ============================================================================
download_file() {
local repo_id="$1"
local source="$2"
# Convert repo_id to cache path (replace / with --)
local cache_repo_dir="${CACHE_DIR}/${repo_id}"
local source_dir
source_dir=$(dirname "$source")
local output_dir="${cache_repo_dir}"
if [[ "$source_dir" != "." ]]; then
output_dir="${cache_repo_dir}/${source_dir}"
fi
local filename
filename=$(basename "$source")
local output_path="${output_dir}/${filename}"
print_detail "File: ${BOLD_WHITE}${source}${RESET}"
print_detail "Output: ${CYAN}${output_path}${RESET}"
# Check if already exists
if [[ -f "$output_path" ]]; then
local size
size=$(du -h "$output_path" | cut -f1)
print_success "Already downloaded: ${filename} (${size})"
return 0
fi
# Dry-run mode
if [[ "$DRY_RUN" == true ]]; then
print_info "DRY-RUN: Would download ${BOLD_WHITE}${source}${RESET}"
return 0
fi
# Create output directory
mkdir -p "$output_dir"
# Build download URL
local url="https://huggingface.co/${repo_id}/resolve/main/${source}"
print_detail "Downloading from HuggingFace..."
# Download with curl (with resume support)
local curl_args=(-L -C - --progress-bar -o "$output_path")
if [[ -n "$HF_TOKEN" ]]; then
curl_args+=(-H "Authorization: Bearer ${HF_TOKEN}")
fi
if curl "${curl_args[@]}" "$url" 2>&1; then
if [[ -f "$output_path" ]] && [[ -s "$output_path" ]]; then
local size
size=$(du -h "$output_path" | cut -f1)
print_success "Downloaded ${BOLD_WHITE}${filename}${RESET} (${size})"
return 0
fi
fi
print_error "Failed to download ${source}"
rm -f "$output_path" 2>/dev/null || true
return 1
}
download_model() {
local config="$1"
local index="$2"
local repo_id="$3"
local description="$4"
print_detail "Repository: ${BOLD_WHITE}${repo_id}${RESET}"
[[ -n "$description" ]] && print_detail "Description: ${description}"
local files_count
files_count=$(get_files_count "$config" "$index")
if [[ "$files_count" == "0" ]]; then
print_warning "No files defined for ${repo_id}"
return 1
fi
local succeeded=0
local failed=0
for ((f=0; f<files_count; f++)); do
local source
source=$(get_file_field "$config" "$index" "$f" "source")
if [[ -z "$source" ]]; then
continue
fi
if download_file "$repo_id" "$source"; then
succeeded=$((succeeded+1))
else
failed=$((failed+1))
fi
done
if [[ $failed -eq 0 ]]; then
return 0
else
return 1
fi
}
# ============================================================================
# LINK FUNCTIONS
# ============================================================================
link_file() {
local repo_id="$1"
local source="$2"
local dest="$3"
local cache_repo_dir="${CACHE_DIR}/${repo_id}"
local source_path="${cache_repo_dir}/${source}"
local link_path="${OUTPUT_DIR}/${dest}"
local link_dir
link_dir=$(dirname "$link_path")
print_detail "Source: ${CYAN}${source_path}${RESET}"
print_detail "Target: ${CYAN}${link_path}${RESET}"
# Check if source exists
if [[ ! -f "$source_path" ]]; then
print_warning "Source file not found: ${source}"
return 1
fi
# Dry-run mode
if [[ "$DRY_RUN" == true ]]; then
print_info "DRY-RUN: Would link ${BOLD_WHITE}${source}${RESET}${dest}"
return 0
fi
# Create target directory
mkdir -p "$link_dir"
# Remove existing symlink
if [[ -L "$link_path" ]]; then
rm -f "$link_path"
elif [[ -e "$link_path" ]]; then
print_warning "File exists (not a symlink): ${dest}"
return 1
fi
# Create symlink
ln -s "$source_path" "$link_path"
print_success "Linked: ${LINK_ICON} ${dest}"
return 0
}
link_model() {
local config="$1"
local index="$2"
local repo_id="$3"
print_detail "Repository: ${BOLD_WHITE}${repo_id}${RESET}"
local files_count
files_count=$(get_files_count "$config" "$index")
if [[ "$files_count" == "0" ]]; then
print_warning "No files defined for ${repo_id}"
return 1
fi
local succeeded=0
local failed=0
for ((f=0; f<files_count; f++)); do
local source dest
source=$(get_file_field "$config" "$index" "$f" "source")
dest=$(get_file_field "$config" "$index" "$f" "dest")
if [[ -z "$source" ]]; then
continue
fi
# If dest is empty, use source as dest
if [[ -z "$dest" ]]; then
dest="$source"
fi
if link_file "$repo_id" "$source" "$dest"; then
succeeded=$((succeeded+1))
else
failed=$((failed+1))
fi
done
if [[ $failed -eq 0 ]]; then
return 0
else
return 1
fi
}
# ============================================================================
# VERIFY FUNCTIONS
# ============================================================================
verify_model() {
local config="$1"
local index="$2"
local repo_id="$3"
echo -e " ${BOLD_WHITE}${repo_id}${RESET}"
local files_count
files_count=$(get_files_count "$config" "$index")
if [[ "$files_count" == "0" ]]; then
echo -e " ${BOLD_YELLOW}No files defined${RESET}"
return 1
fi
for ((f=0; f<files_count; f++)); do
local source dest
source=$(get_file_field "$config" "$index" "$f" "source")
dest=$(get_file_field "$config" "$index" "$f" "dest")
if [[ -z "$source" ]]; then
continue
fi
if [[ -z "$dest" ]]; then
dest="$source"
fi
local cache_path="${CACHE_DIR}/${repo_id}/${source}"
local link_path="${OUTPUT_DIR}/${dest}"
local cache_status="${BOLD_RED}${CROSS_MARK}${RESET}"
local link_status="${BOLD_RED}${CROSS_MARK}${RESET}"
# Check cache
if [[ -f "$cache_path" ]]; then
local size
size=$(du -h "$cache_path" | cut -f1)
cache_status="${BOLD_GREEN}${CHECK_MARK}${RESET} (${size})"
fi
# Check symlink
if [[ -L "$link_path" ]]; then
if [[ -e "$link_path" ]]; then
link_status="${BOLD_GREEN}${CHECK_MARK}${RESET}"
else
link_status="${BOLD_YELLOW}${WARNING}${RESET} (broken)"
fi
fi
local filename
filename=$(basename "$source")
echo -e " ${DIM}${filename}${RESET}"
echo -e " Cache: ${cache_status}"
echo -e " Link: ${link_status}"
done
}
# ============================================================================
# MAIN WORKFLOW
# ============================================================================
process_models() {
local action="$1"
print_section "Processing Models (${action})"
local total
total=$(get_model_count "$CONFIG_FILE")
if [[ "$total" == "0" ]]; then
print_warning "No models found in configuration"
return 0
fi
local processed=0
local succeeded=0
local failed=0
local skipped=0
for ((i=0; i<total; i++)); do
local repo_id category description
repo_id=$(get_model_field "$CONFIG_FILE" "$i" "repo_id")
category=$(get_model_field "$CONFIG_FILE" "$i" "category")
description=$(get_model_field "$CONFIG_FILE" "$i" "description")
# Skip entries without repo_id (might be malformed YAML)
if [[ -z "$repo_id" ]]; then
continue
fi
# Apply filters
if ! matches_filters "$repo_id" "$category"; then
skipped=$((skipped+1))
continue
fi
processed=$((processed+1))
echo ""
print_step "$processed" "$((total-skipped))" "${BOLD_CYAN}${repo_id}${RESET}"
local result=0
case "$action" in
download)
download_model "$CONFIG_FILE" "$i" "$repo_id" "$description" || result=1
;;
link)
link_model "$CONFIG_FILE" "$i" "$repo_id" || result=1
;;
verify)
verify_model "$CONFIG_FILE" "$i" "$repo_id"
;;
esac
if [[ $result -eq 0 ]]; then
succeeded=$((succeeded+1))
else
failed=$((failed+1))
fi
done
echo ""
if [[ $skipped -gt 0 ]]; then
print_info "Skipped ${skipped} model(s) (filtered)"
fi
print_info "Summary: ${BOLD_GREEN}${succeeded} succeeded${RESET}, ${BOLD_RED}${failed} failed${RESET}"
}
show_help() {
cat << 'EOF'
HuggingFace Model Downloader - A Beautiful CLI Tool
Usage: artifact_huggingface_download.sh [COMMAND] [OPTIONS]
Commands:
download Download models to cache directory (default)
link Create symlinks from cache to output directory
verify Verify model status (cache and symlinks)
Options:
-c, --config FILE Configuration YAML file (required)
--cache-dir DIR Cache directory
--output-dir DIR Output/installation directory
--category CAT1,CAT2 Filter by category (comma-separated)
--repo-id ID1,ID2 Filter by repo_id (comma-separated)
--auth-token TOKEN HuggingFace token (or set HF_TOKEN env var)
-n, --dry-run Show what would be done without making changes
-h, --help Show this help message
Environment Variables:
HF_TOKEN HuggingFace API token
CACHE_DIR Override default cache directory
OUTPUT_DIR Override default output directory
YAML Configuration Format:
- repo_id: organization/model-name
description: Model description
category: image|video|llm|upscale
files:
- source: path/to/file.safetensors
dest: subdir/output_filename.safetensors
Examples:
# Download all models from config
./artifact_huggingface_download.sh download -c models_huggingface.yaml
# Download only image models
./artifact_huggingface_download.sh download -c models_huggingface.yaml --category image
# Download specific model
./artifact_huggingface_download.sh download -c models_huggingface.yaml --repo-id black-forest-labs/FLUX.1-schnell
# Create symlinks for downloaded models
./artifact_huggingface_download.sh link -c models_huggingface.yaml
# Verify all models
./artifact_huggingface_download.sh verify -c models_huggingface.yaml
# Dry-run to preview operations
./artifact_huggingface_download.sh download -c models_huggingface.yaml --dry-run
EOF
}
main() {
# Check for yq
check_yq
# Load token from .env files
load_env_token
# Display banner
print_banner "${ROCKET} HuggingFace Model Downloader ${SPARKLES}"
# Show dry-run warning
if [[ "$DRY_RUN" == true ]]; then
echo -e "${BOLD_YELLOW}${WARNING} DRY-RUN MODE - No changes will be made ${WARNING}${RESET}\n"
fi
# Validate configuration
print_section "Configuration"
if [[ -z "$CONFIG_FILE" ]]; then
print_error "Configuration file required. Use -c/--config"
exit 1
fi
if [[ ! -f "$CONFIG_FILE" ]]; then
print_error "Configuration file not found: $CONFIG_FILE"
exit 1
fi
print_success "Config: ${CYAN}${CONFIG_FILE}${RESET}"
print_success "Cache: ${CYAN}${CACHE_DIR}${RESET}"
print_success "Output: ${CYAN}${OUTPUT_DIR}${RESET}"
print_success "Command: ${BOLD_CYAN}${COMMAND}${RESET}"
# Show token status (not required for public repos)
if [[ -n "$HF_TOKEN" ]]; then
print_success "HF Token: ${DIM}${HF_TOKEN:0:8}...${RESET}"
else
print_info "HF Token: ${DIM}not set (public repos only)${RESET}"
fi
# Show filters if set
[[ -n "$CATEGORY_FILTER" ]] && print_info "Category filter: ${BOLD_WHITE}${CATEGORY_FILTER}${RESET}"
[[ -n "$REPO_ID_FILTER" ]] && print_info "Repo ID filter: ${BOLD_WHITE}${REPO_ID_FILTER}${RESET}"
# Process based on command
process_models "$COMMAND"
# Final message
echo ""
print_banner "${SPARKLES} Complete ${SPARKLES}"
}
# ============================================================================
# ARGUMENT PARSING
# ============================================================================
while [[ $# -gt 0 ]]; do
case $1 in
-c|--config)
CONFIG_FILE="$2"
shift 2
;;
--cache-dir)
CACHE_DIR="$2"
shift 2
;;
--output-dir)
OUTPUT_DIR="$2"
shift 2
;;
--category)
CATEGORY_FILTER="$2"
shift 2
;;
--repo-id)
REPO_ID_FILTER="$2"
shift 2
;;
--auth-token)
HF_TOKEN="$2"
shift 2
;;
-n|--dry-run)
DRY_RUN=true
shift
;;
download|link|verify)
COMMAND="$1"
shift
;;
-h|--help)
show_help
exit 0
;;
-*)
print_error "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
*)
# Positional argument - treat as config file
if [[ -z "$CONFIG_FILE" ]]; then
CONFIG_FILE="$1"
fi
shift
;;
esac
done
# Run main
main