Skip to content

Commit

Permalink
[misc] Reduce number of config file requests to HuggingFace (vllm-pro…
Browse files Browse the repository at this point in the history
…ject#12797)

Signed-off-by: EC2 Default User <ec2-user@ip-172-31-20-117.us-west-2.compute.internal>
Signed-off-by: <>
Co-authored-by: EC2 Default User <ec2-user@ip-172-31-20-117.us-west-2.compute.internal>
  • Loading branch information
khluu and EC2 Default User authored Feb 6, 2025
1 parent c786e75 commit e152f29
Showing 1 changed file with 23 additions and 13 deletions.
36 changes: 23 additions & 13 deletions vllm/transformers_utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from typing import Any, Dict, Optional, Type, Union

import huggingface_hub
from huggingface_hub import (file_exists, hf_hub_download,
from huggingface_hub import (file_exists, hf_hub_download, list_repo_files,
try_to_load_from_cache)
from huggingface_hub.utils import (EntryNotFoundError, HfHubHTTPError,
LocalEntryNotFoundError,
Expand Down Expand Up @@ -395,18 +395,28 @@ def get_sentence_transformer_tokenizer_config(model: str,
- dict: A dictionary containing the configuration parameters
for the Sentence Transformer BERT model.
"""
for config_name in [
"sentence_bert_config.json",
"sentence_roberta_config.json",
"sentence_distilbert_config.json",
"sentence_camembert_config.json",
"sentence_albert_config.json",
"sentence_xlm-roberta_config.json",
"sentence_xlnet_config.json",
]:
encoder_dict = get_hf_file_to_dict(config_name, model, revision)
if encoder_dict:
break
sentence_transformer_config_files = [
"sentence_bert_config.json",
"sentence_roberta_config.json",
"sentence_distilbert_config.json",
"sentence_camembert_config.json",
"sentence_albert_config.json",
"sentence_xlm-roberta_config.json",
"sentence_xlnet_config.json",
]
try:
# If model is on HuggingfaceHub, get the repo files
repo_files = list_repo_files(model, revision=revision, token=HF_TOKEN)
except Exception as e:
logger.debug("Error getting repo files", e)
repo_files = []

encoder_dict = None
for config_name in sentence_transformer_config_files:
if config_name in repo_files or Path(model).exists():
encoder_dict = get_hf_file_to_dict(config_name, model, revision)
if encoder_dict:
break

if not encoder_dict:
return None
Expand Down

0 comments on commit e152f29

Please sign in to comment.