diff --git a/artifact_huggingface_download.sh b/artifact_huggingface_download.sh index 3e92ad1..24912c7 100755 --- a/artifact_huggingface_download.sh +++ b/artifact_huggingface_download.sh @@ -428,11 +428,11 @@ for file_path in latest_snapshot.rglob('*'): if filename_filter and filename_filter not in file_path.name: continue # Skip metadata files except important config files - # Allow: config.json, tokenizer.json, tokenizer_config.json, sentencepiece models + # Allow: config.json, tokenizer.json, tokenizer_config.json, sentencepiece models, .index.json for sharded models important_files = ('config.json', 'tokenizer.json', 'tokenizer_config.json', '.model') if file_path.name.endswith(('.txt', '.md', '.gitattributes')): continue - if file_path.name.endswith('.json') and not file_path.name in important_files: + if file_path.name.endswith('.json') and not (file_path.name in important_files or file_path.name.endswith('.index.json')): continue print(str(file_path)) file_count += 1