diff --git a/artifact_huggingface_download.sh b/artifact_huggingface_download.sh index 438f503..dd28400 100755 --- a/artifact_huggingface_download.sh +++ b/artifact_huggingface_download.sh @@ -394,8 +394,12 @@ for file_path in latest_snapshot.rglob('*'): # If filename filter is specified, only match those files if filename_filter and filename_filter not in file_path.name: continue - # Skip metadata files - if file_path.name.endswith(('.json', '.txt', '.md', '.gitattributes')): + # Skip metadata files except important config files + # Allow: config.json, tokenizer.json, tokenizer_config.json, sentencepiece models + important_files = ('config.json', 'tokenizer.json', 'tokenizer_config.json', '.model') + if file_path.name.endswith(('.txt', '.md', '.gitattributes')): + continue + if file_path.name.endswith('.json') and not file_path.name in important_files: continue print(str(file_path)) EOPYFINDFIND