fix: add comprehensive error logging to find_model_files for verify command

The verify command was showing all models as "NOT DOWNLOADED" because find_model_files() was exiting silently without diagnostic output. This made debugging impossible. Changes: - Added detailed error messages to Python script in find_model_files() - Reports which directories were checked and why they failed - Shows actual vs expected paths for model/snapshots directories - Includes DEBUG messages showing which snapshot is being used - Warns when no files match the filter - Modified verify_model_download() to capture and display stderr - Changed from suppressing stderr (2>/dev/null) to capturing it (2>&1) - Filters ERROR/WARN/DEBUG prefixes from file paths - Logs diagnostic messages to stderr for visibility This will help identify the actual cache structure mismatch causing verification failures. 🤖 Generated with Claude Code (https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-25 14:45:57 +01:00
parent 3308349e78
commit 4500228941
1 changed files with 38 additions and 3 deletions
--- a/artifact_huggingface_download.sh
+++ b/artifact_huggingface_download.sh
@@ -384,26 +384,44 @@ cache_path = Path(cache_dir)
 repo_path = repo_id.replace('/', '--')
 model_dir = cache_path / 'models--{0}'.format(repo_path)

-# Fallback to hub/ subdirectory if direct path doesn't exist
+# First attempt: direct path
+model_dir_original = model_dir
 if not model_dir.exists():
+    # Fallback to hub/ subdirectory if direct path doesn't exist
    model_dir = cache_path / 'hub' / 'models--{0}'.format(repo_path)

 if not model_dir.exists():
+    sys.stderr.write('ERROR: Model directory not found for {0}\n'.format(repo_id))
+    sys.stderr.write('  Tried: {0}\n'.format(str(model_dir_original)))
+    sys.stderr.write('  Tried: {0}\n'.format(str(model_dir)))
+    sys.stderr.write('  Cache: {0}\n'.format(str(cache_path)))
    sys.exit(1)

 # Find the latest snapshot
 snapshots_dir = model_dir / 'snapshots'
 if not snapshots_dir.exists():
+    sys.stderr.write('ERROR: Snapshots directory not found for {0}\n'.format(repo_id))
+    sys.stderr.write('  Model dir: {0}\n'.format(str(model_dir)))
+    sys.stderr.write('  Expected: {0}\n'.format(str(snapshots_dir)))
    sys.exit(1)

 # Get all snapshot directories sorted by modification time
-snapshots = sorted(snapshots_dir.iterdir(), key=lambda x: x.stat().st_mtime, reverse=True)
+try:
+    snapshots = sorted(snapshots_dir.iterdir(), key=lambda x: x.stat().st_mtime, reverse=True)
+except Exception as e:
+    sys.stderr.write('ERROR: Failed to list snapshots for {0}: {1}\n'.format(repo_id, str(e)))
+    sys.exit(1)
+
 if not snapshots:
+    sys.stderr.write('ERROR: No snapshots found for {0}\n'.format(repo_id))
+    sys.stderr.write('  Snapshots dir: {0}\n'.format(str(snapshots_dir)))
    sys.exit(1)

 latest_snapshot = snapshots[0]
+sys.stderr.write('DEBUG: Using snapshot {0}\n'.format(str(latest_snapshot)))

 # Find model files
+file_count = 0
 for file_path in latest_snapshot.rglob('*'):
    if file_path.is_file():
        # If filename filter is specified, only match those files
@@ -417,6 +435,10 @@ for file_path in latest_snapshot.rglob('*'):
        if file_path.name.endswith('.json') and not file_path.name in important_files:
            continue
        print(str(file_path))
+        file_count += 1
+
+if file_count == 0:
+    sys.stderr.write('WARN: No files matched filter for {0} (filter: {1})\n'.format(repo_id, filename_filter))
 EOPYFINDFIND
 }

@@ -617,10 +639,23 @@ verify_model_download() {
    local filename_filter="$3"

    # Find model files in cache
+    # Capture both stdout (file paths) and stderr (error messages)
+    local find_output
+    find_output=$(find_model_files "$repo_id" "$filename_filter" 2>&1)
+
+    # Separate file paths from error/debug messages
    local model_files
-    model_files=$(find_model_files "$repo_id" "$filename_filter" 2>/dev/null)
+    model_files=$(echo "$find_output" | grep -v "^ERROR:" | grep -v "^WARN:" | grep -v "^DEBUG:")
+
+    # Extract error messages for logging
+    local error_msgs
+    error_msgs=$(echo "$find_output" | grep "^ERROR:\|^WARN:\|^DEBUG:")

    if [[ -z "$model_files" ]]; then
+        # Log error messages to stderr if they exist
+        if [[ -n "$error_msgs" ]]; then
+            echo "$error_msgs" >&2
+        fi
        echo "NOT_FOUND|0|0|"
        return 1
    fi