File tree Expand file tree Collapse file tree 4 files changed +8
-4
lines changed
model_executor/model_loader Expand file tree Collapse file tree 4 files changed +8
-4
lines changed Original file line number Diff line number Diff line change @@ -208,7 +208,7 @@ def test_mistral_format(
208208 with vllm_runner (
209209 model ,
210210 dtype = dtype ,
211- tokenizer_mode = "auto " ,
211+ tokenizer_mode = "hf " ,
212212 load_format = "safetensors" ,
213213 config_format = "hf" ,
214214 ) as hf_format_model :
Original file line number Diff line number Diff line change @@ -155,6 +155,7 @@ def test_4bit_bnb_moe_model(
155155 quantization = "bitsandbytes" ,
156156 enforce_eager = False ,
157157 default_torch_num_threads = 1 ,
158+ tokenizer_mode = "hf" ,
158159 ) as llm :
159160 vllm_outputs = llm .generate_greedy_logprobs (
160161 example_prompts , max_tokens = 32 , num_logprobs = 5
@@ -204,6 +205,7 @@ def test_4bit_bnb_embedding_model(
204205 gpu_memory_utilization = 0.5 ,
205206 quantization = "bitsandbytes" ,
206207 default_torch_num_threads = 1 ,
208+ tokenizer_mode = "hf" ,
207209 ) as vllm_model :
208210 vllm_outputs = vllm_model .embed (example_prompts )
209211
@@ -256,6 +258,7 @@ def validate_generated_texts(
256258 tensor_parallel_size = vllm_tp_size ,
257259 enforce_eager = False ,
258260 default_torch_num_threads = 1 ,
261+ tokenizer_mode = "hf" ,
259262 ) as llm :
260263 vllm_outputs = llm .generate_greedy (prompts , max_tokens )
261264 vllm_logs = log_generated_texts (prompts , vllm_outputs , "VllmRunner" )
Original file line number Diff line number Diff line change @@ -128,7 +128,8 @@ class ModelConfig:
128128 name or path will be used."""
129129 tokenizer_mode : TokenizerMode = "auto"
130130 """Tokenizer mode:\n
131- - "auto" will use the fast tokenizer if available.\n
131+ - "auto" will use "hf" tokenizer if Mistral's tokenizer is not available.\n
132+ - "hf" will use the fast tokenizer if available.\n
132133 - "slow" will always use the slow tokenizer.\n
133134 - "mistral" will always use the tokenizer from `mistral_common`.\n
134135 - "custom" will use --tokenizer to select the preregistered tokenizer."""
Original file line number Diff line number Diff line change @@ -108,11 +108,11 @@ def _prepare_weights(
108108 )
109109 )
110110 > 0
111- else "auto "
111+ else "hf "
112112 )
113113
114114 # Some quantized models use .pt files for storing the weights.
115- if load_format in [ "auto" , " hf"] :
115+ if load_format in " hf" :
116116 allow_patterns = ["*.safetensors" , "*.bin" ]
117117 elif load_format == "safetensors" or load_format == "fastsafetensors" :
118118 use_safetensors = True
You can’t perform that action at this time.
0 commit comments