diff --git a/artifact_huggingface_download.sh b/artifact_huggingface_download.sh index 968ddd5..b1f5ba0 100755 --- a/artifact_huggingface_download.sh +++ b/artifact_huggingface_download.sh @@ -384,26 +384,44 @@ cache_path = Path(cache_dir) repo_path = repo_id.replace('/', '--') model_dir = cache_path / 'models--{0}'.format(repo_path) -# Fallback to hub/ subdirectory if direct path doesn't exist +# First attempt: direct path +model_dir_original = model_dir if not model_dir.exists(): + # Fallback to hub/ subdirectory if direct path doesn't exist model_dir = cache_path / 'hub' / 'models--{0}'.format(repo_path) if not model_dir.exists(): + sys.stderr.write('ERROR: Model directory not found for {0}\n'.format(repo_id)) + sys.stderr.write(' Tried: {0}\n'.format(str(model_dir_original))) + sys.stderr.write(' Tried: {0}\n'.format(str(model_dir))) + sys.stderr.write(' Cache: {0}\n'.format(str(cache_path))) sys.exit(1) # Find the latest snapshot snapshots_dir = model_dir / 'snapshots' if not snapshots_dir.exists(): + sys.stderr.write('ERROR: Snapshots directory not found for {0}\n'.format(repo_id)) + sys.stderr.write(' Model dir: {0}\n'.format(str(model_dir))) + sys.stderr.write(' Expected: {0}\n'.format(str(snapshots_dir))) sys.exit(1) # Get all snapshot directories sorted by modification time -snapshots = sorted(snapshots_dir.iterdir(), key=lambda x: x.stat().st_mtime, reverse=True) +try: + snapshots = sorted(snapshots_dir.iterdir(), key=lambda x: x.stat().st_mtime, reverse=True) +except Exception as e: + sys.stderr.write('ERROR: Failed to list snapshots for {0}: {1}\n'.format(repo_id, str(e))) + sys.exit(1) + if not snapshots: + sys.stderr.write('ERROR: No snapshots found for {0}\n'.format(repo_id)) + sys.stderr.write(' Snapshots dir: {0}\n'.format(str(snapshots_dir))) sys.exit(1) latest_snapshot = snapshots[0] +sys.stderr.write('DEBUG: Using snapshot {0}\n'.format(str(latest_snapshot))) # Find model files +file_count = 0 for file_path in latest_snapshot.rglob('*'): if file_path.is_file(): # If filename filter is specified, only match those files @@ -417,6 +435,10 @@ for file_path in latest_snapshot.rglob('*'): if file_path.name.endswith('.json') and not file_path.name in important_files: continue print(str(file_path)) + file_count += 1 + +if file_count == 0: + sys.stderr.write('WARN: No files matched filter for {0} (filter: {1})\n'.format(repo_id, filename_filter)) EOPYFINDFIND } @@ -617,10 +639,23 @@ verify_model_download() { local filename_filter="$3" # Find model files in cache + # Capture both stdout (file paths) and stderr (error messages) + local find_output + find_output=$(find_model_files "$repo_id" "$filename_filter" 2>&1) + + # Separate file paths from error/debug messages local model_files - model_files=$(find_model_files "$repo_id" "$filename_filter" 2>/dev/null) + model_files=$(echo "$find_output" | grep -v "^ERROR:" | grep -v "^WARN:" | grep -v "^DEBUG:") + + # Extract error messages for logging + local error_msgs + error_msgs=$(echo "$find_output" | grep "^ERROR:\|^WARN:\|^DEBUG:") if [[ -z "$model_files" ]]; then + # Log error messages to stderr if they exist + if [[ -n "$error_msgs" ]]; then + echo "$error_msgs" >&2 + fi echo "NOT_FOUND|0|0|" return 1 fi