From 450022894164d09a7efe788c6d51dea3665aad1f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= <valknar@pivoine.art>
Date: Tue, 25 Nov 2025 14:45:57 +0100
Subject: [PATCH] fix: add comprehensive error logging to find_model_files for
 verify command
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The verify command was showing all models as "NOT DOWNLOADED" because find_model_files()
was exiting silently without diagnostic output. This made debugging impossible.

Changes:
- Added detailed error messages to Python script in find_model_files()
  - Reports which directories were checked and why they failed
  - Shows actual vs expected paths for model/snapshots directories
  - Includes DEBUG messages showing which snapshot is being used
  - Warns when no files match the filter

- Modified verify_model_download() to capture and display stderr
  - Changed from suppressing stderr (2>/dev/null) to capturing it (2>&1)
  - Filters ERROR/WARN/DEBUG prefixes from file paths
  - Logs diagnostic messages to stderr for visibility

This will help identify the actual cache structure mismatch causing verification failures.

🤖 Generated with Claude Code (https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 artifact_huggingface_download.sh | 41 +++++++++++++++++++++++++++++---
 1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/artifact_huggingface_download.sh b/artifact_huggingface_download.sh
index 968ddd5..b1f5ba0 100755
--- a/artifact_huggingface_download.sh
+++ b/artifact_huggingface_download.sh
@@ -384,26 +384,44 @@ cache_path = Path(cache_dir)
 repo_path = repo_id.replace('/', '--')
 model_dir = cache_path / 'models--{0}'.format(repo_path)
 
-# Fallback to hub/ subdirectory if direct path doesn't exist
+# First attempt: direct path
+model_dir_original = model_dir
 if not model_dir.exists():
+    # Fallback to hub/ subdirectory if direct path doesn't exist
     model_dir = cache_path / 'hub' / 'models--{0}'.format(repo_path)
 
 if not model_dir.exists():
+    sys.stderr.write('ERROR: Model directory not found for {0}\n'.format(repo_id))
+    sys.stderr.write('  Tried: {0}\n'.format(str(model_dir_original)))
+    sys.stderr.write('  Tried: {0}\n'.format(str(model_dir)))
+    sys.stderr.write('  Cache: {0}\n'.format(str(cache_path)))
     sys.exit(1)
 
 # Find the latest snapshot
 snapshots_dir = model_dir / 'snapshots'
 if not snapshots_dir.exists():
+    sys.stderr.write('ERROR: Snapshots directory not found for {0}\n'.format(repo_id))
+    sys.stderr.write('  Model dir: {0}\n'.format(str(model_dir)))
+    sys.stderr.write('  Expected: {0}\n'.format(str(snapshots_dir)))
     sys.exit(1)
 
 # Get all snapshot directories sorted by modification time
-snapshots = sorted(snapshots_dir.iterdir(), key=lambda x: x.stat().st_mtime, reverse=True)
+try:
+    snapshots = sorted(snapshots_dir.iterdir(), key=lambda x: x.stat().st_mtime, reverse=True)
+except Exception as e:
+    sys.stderr.write('ERROR: Failed to list snapshots for {0}: {1}\n'.format(repo_id, str(e)))
+    sys.exit(1)
+
 if not snapshots:
+    sys.stderr.write('ERROR: No snapshots found for {0}\n'.format(repo_id))
+    sys.stderr.write('  Snapshots dir: {0}\n'.format(str(snapshots_dir)))
     sys.exit(1)
 
 latest_snapshot = snapshots[0]
+sys.stderr.write('DEBUG: Using snapshot {0}\n'.format(str(latest_snapshot)))
 
 # Find model files
+file_count = 0
 for file_path in latest_snapshot.rglob('*'):
     if file_path.is_file():
         # If filename filter is specified, only match those files
@@ -417,6 +435,10 @@ for file_path in latest_snapshot.rglob('*'):
         if file_path.name.endswith('.json') and not file_path.name in important_files:
             continue
         print(str(file_path))
+        file_count += 1
+
+if file_count == 0:
+    sys.stderr.write('WARN: No files matched filter for {0} (filter: {1})\n'.format(repo_id, filename_filter))
 EOPYFINDFIND
 }
 
@@ -617,10 +639,23 @@ verify_model_download() {
     local filename_filter="$3"
 
     # Find model files in cache
+    # Capture both stdout (file paths) and stderr (error messages)
+    local find_output
+    find_output=$(find_model_files "$repo_id" "$filename_filter" 2>&1)
+
+    # Separate file paths from error/debug messages
     local model_files
-    model_files=$(find_model_files "$repo_id" "$filename_filter" 2>/dev/null)
+    model_files=$(echo "$find_output" | grep -v "^ERROR:" | grep -v "^WARN:" | grep -v "^DEBUG:")
+
+    # Extract error messages for logging
+    local error_msgs
+    error_msgs=$(echo "$find_output" | grep "^ERROR:\|^WARN:\|^DEBUG:")
 
     if [[ -z "$model_files" ]]; then
+        # Log error messages to stderr if they exist
+        if [[ -n "$error_msgs" ]]; then
+            echo "$error_msgs" >&2
+        fi
         echo "NOT_FOUND|0|0|"
         return 1
     fi