-
-
Notifications
You must be signed in to change notification settings - Fork 11.1k
Description
Your current environment
vllm 0.7.2
export VLLM_IMAGE_FETCH_TIMEOUT=100
export VLLM_ENGINE_ITERATION_TIMEOUT_S=600
export CUDA_VISIBLE_DEVICES=6,7
model=/llava-1.5-7b-hf
max_model_len=32768
sed -i "s/"max_position_embeddings": .*$/"max_position_embeddings": ${max_model_len},/g" ${model}/config.json
python -m vllm.entrypoints.openai.api_server
--model=${model}
--host=0.0.0.0
--port=9999
--max-num-seqs=256
--max-model-len=${max_model_len}
--chat-template /vllm-gpu-0.6.3.post1/examples/template_llava.jinja
--dtype bfloat16
--tensor-parallel-size 1
--served-model-name llava-1.5-7b-hf
--gpu-memory-utilization=0.95
--trust-remote-code
--enforce-eager
🐛 Describe the bug
Process SpawnProcess-1:
Traceback (most recent call last):
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/inputs/registry.py", line 180, in call_hf_processor
return hf_processor(**data, **merged_kwargs, return_tensors="pt")
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/transformers/models/llava/processing_llava.py", line 160, in call
num_image_tokens = (height // self.patch_size) * (
TypeError: unsupported operand type(s) for //: 'int' and 'NoneType'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/root/anaconda3/envs/py310/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
self.run()
File "/root/anaconda3/envs/py310/lib/python3.10/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/engine/multiprocessing/engine.py", line 391, in run_mp_engine
raise e
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/engine/multiprocessing/engine.py", line 380, in run_mp_engine
engine = MQLLMEngine.from_engine_args(engine_args=engine_args,
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/engine/multiprocessing/engine.py", line 123, in from_engine_args
return cls(ipc_path=ipc_path,
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/engine/multiprocessing/engine.py", line 75, in init
self.engine = LLMEngine(*args, **kwargs)
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/engine/llm_engine.py", line 276, in init
self._initialize_kv_caches()
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/engine/llm_engine.py", line 416, in _initialize_kv_caches
self.model_executor.determine_num_available_blocks())
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/executor/executor_base.py", line 101, in determine_num_available_blocks
results = self.collective_rpc("determine_num_available_blocks")
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/executor/uniproc_executor.py", line 51, in collective_rpc
answer = run_method(self.driver_worker, method, args, kwargs)
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/utils.py", line 2220, in run_method
return func(*args, **kwargs)
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/worker/worker.py", line 229, in determine_num_available_blocks
self.model_runner.profile_run()
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/worker/model_runner.py", line 1235, in profile_run
self._dummy_run(max_num_batched_tokens, max_num_seqs)
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/worker/model_runner.py", line 1302, in _dummy_run
.dummy_data_for_profiling(self.model_config,
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/inputs/registry.py", line 353, in dummy_data_for_profiling
dummy_data = profiler.get_dummy_data(seq_len)
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/multimodal/profiling.py", line 164, in get_dummy_data
mm_inputs = self._get_dummy_mm_inputs(seq_len, mm_counts)
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/multimodal/profiling.py", line 141, in _get_dummy_mm_inputs
return self.processor.apply(
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/multimodal/processing.py", line 1220, in apply
prompt_ids, mm_kwargs = self._cached_apply_hf_processor(
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/multimodal/processing.py", line 996, in _cached_apply_hf_processor
prompt_ids, mm_missing_kwargs = self._apply_hf_processor_main(
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/multimodal/processing.py", line 946, in _apply_hf_processor_main
mm_missing_kwargs = self._apply_hf_processor_mm_only(
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/multimodal/processing.py", line 911, in _apply_hf_processor_mm_only
_, mm_kwargs = self._apply_hf_processor_text_mm(
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/multimodal/processing.py", line 845, in _apply_hf_processor_text_mm
processed_data = self._call_hf_processor(
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/multimodal/processing.py", line 827, in _call_hf_processor
return self.info.ctx.call_hf_processor(
File "/root/anaconda3/envs/py310/lib/python3.10/site-packages/vllm/inputs/registry.py", line 185, in call_hf_processor
raise RuntimeError(msg) from exc
RuntimeError: Failed to apply LlavaProcessor on data={'text': '', 'images': [<PIL.Image.Image image mode=RGB size=336x336 at 0x7F7318493640>]} with kwargs={}
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.