-
-
Notifications
You must be signed in to change notification settings - Fork 10.9k
Closed
Labels
bugSomething isn't workingSomething isn't working
Description
Your current environment
Trying to start the vllm container with Qwen3 Embedding works with 0.9.2 but no longer with 0.10.0.
Model: Qwen3-Embedding-8B
🐛 Describe the bug
Defaulted container "qwen3-embedding-8b" out of: qwen3-embedding-8b, qwen3-embedding-8b-gpu-test (init)
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
ERROR 07-24 21:30:33 [core.py:632] EngineCore failed to start.
ERROR 07-24 21:30:33 [core.py:632] Traceback (most recent call last):
ERROR 07-24 21:30:33 [core.py:632] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 623, in run_engine_core
ERROR 07-24 21:30:33 [core.py:632] engine_core = EngineCoreProc(*args, **kwargs)
ERROR 07-24 21:30:33 [core.py:632] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-24 21:30:33 [core.py:632] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 441, in __init__
ERROR 07-24 21:30:33 [core.py:632] super().__init__(vllm_config, executor_class, log_stats,
ERROR 07-24 21:30:33 [core.py:632] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 77, in __init__
ERROR 07-24 21:30:33 [core.py:632] self.model_executor = executor_class(vllm_config)
ERROR 07-24 21:30:33 [core.py:632] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-24 21:30:33 [core.py:632] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/executor_base.py", line 53, in __init__
ERROR 07-24 21:30:33 [core.py:632] self._init_executor()
ERROR 07-24 21:30:33 [core.py:632] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/uniproc_executor.py", line 49, in _init_executor
ERROR 07-24 21:30:33 [core.py:632] self.collective_rpc("load_model")
ERROR 07-24 21:30:33 [core.py:632] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/uniproc_executor.py", line 58, in collective_rpc
ERROR 07-24 21:30:33 [core.py:632] answer = run_method(self.driver_worker, method, args, kwargs)
ERROR 07-24 21:30:33 [core.py:632] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-24 21:30:33 [core.py:632] File "/usr/local/lib/python3.12/dist-packages/vllm/utils/__init__.py", line 2985, in run_method
ERROR 07-24 21:30:33 [core.py:632] return func(*args, **kwargs)
ERROR 07-24 21:30:33 [core.py:632] ^^^^^^^^^^^^^^^^^^^^^
ERROR 07-24 21:30:33 [core.py:632] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_worker.py", line 201, in load_model
ERROR 07-24 21:30:33 [core.py:632] self.model_runner.load_model(eep_scale_up=eep_scale_up)
ERROR 07-24 21:30:33 [core.py:632] File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_model_runner.py", line 1876, in load_model
ERROR 07-24 21:30:33 [core.py:632] self.model = model_loader.load_model(
ERROR 07-24 21:30:33 [core.py:632] ^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-24 21:30:33 [core.py:632] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/base_loader.py", line 49, in load_model
ERROR 07-24 21:30:33 [core.py:632] self.load_weights(model, model_config)
ERROR 07-24 21:30:33 [core.py:632] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/default_loader.py", line 259, in load_weights
Process EngineCore_0:
ERROR 07-24 21:30:33 [core.py:632] loaded_weights = model.load_weights(
ERROR 07-24 21:30:33 [core.py:632] ^^^^^^^^^^^^^^^^^^^
ERROR 07-24 21:30:33 [core.py:632] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/qwen3.py", line 321, in load_weights
ERROR 07-24 21:30:33 [core.py:632] return loader.load_weights(weights)
ERROR 07-24 21:30:33 [core.py:632] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-24 21:30:33 [core.py:632] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 291, in load_weights
ERROR 07-24 21:30:33 [core.py:632] autoloaded_weights = set(self._load_module("", self.module, weights))
ERROR 07-24 21:30:33 [core.py:632] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-24 21:30:33 [core.py:632] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 277, in _load_module
ERROR 07-24 21:30:33 [core.py:632] raise ValueError(msg)
ERROR 07-24 21:30:33 [core.py:632] ValueError: There is no module or parameter named 'layers' in Qwen3ForCausalLM
Traceback (most recent call last):
File "/usr/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
self.run()
File "/usr/lib/python3.12/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 636, in run_engine_core
raise e
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 623, in run_engine_core
engine_core = EngineCoreProc(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 441, in __init__
super().__init__(vllm_config, executor_class, log_stats,
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 77, in __init__
self.model_executor = executor_class(vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/executor/executor_base.py", line 53, in __init__
self._init_executor()
File "/usr/local/lib/python3.12/dist-packages/vllm/executor/uniproc_executor.py", line 49, in _init_executor
self.collective_rpc("load_model")
File "/usr/local/lib/python3.12/dist-packages/vllm/executor/uniproc_executor.py", line 58, in collective_rpc
answer = run_method(self.driver_worker, method, args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/utils/__init__.py", line 2985, in run_method
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_worker.py", line 201, in load_model
self.model_runner.load_model(eep_scale_up=eep_scale_up)
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_model_runner.py", line 1876, in load_model
self.model = model_loader.load_model(
^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/base_loader.py", line 49, in load_model
self.load_weights(model, model_config)
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/default_loader.py", line 259, in load_weights
loaded_weights = model.load_weights(
^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/qwen3.py", line 321, in load_weights
return loader.load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 291, in load_weights
autoloaded_weights = set(self._load_module("", self.module, weights))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 277, in _load_module
raise ValueError(msg)
ValueError: There is no module or parameter named 'layers' in Qwen3ForCausalLM
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
[rank0]:[W724 21:30:33.005093769 ProcessGroupNCCL.cpp:1476] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 1856, in <module>
uvloop.run(run_server(args))
File "/usr/local/lib/python3.12/dist-packages/uvloop/__init__.py", line 109, in run
return __asyncio.run(
^^^^^^^^^^^^^^
File "/usr/lib/python3.12/asyncio/runners.py", line 195, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
File "/usr/local/lib/python3.12/dist-packages/uvloop/__init__.py", line 61, in wrapper
return await main
^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 1791, in run_server
await run_server_worker(listen_address, sock, args, **uvicorn_kwargs)
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 1811, in run_server_worker
async with build_async_engine_client(args, client_config) as engine_client:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/contextlib.py", line 210, in __aenter__
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 158, in build_async_engine_client
async with build_async_engine_client_from_engine_args(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/contextlib.py", line 210, in __aenter__
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 194, in build_async_engine_client_from_engine_args
async_llm = AsyncLLM.from_vllm_config(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/async_llm.py", line 163, in from_vllm_config
return cls(
^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/async_llm.py", line 117, in __init__
self.engine_core = EngineCoreClient.make_async_mp_client(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core_client.py", line 98, in make_async_mp_client
return AsyncMPClient(*client_args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core_client.py", line 677, in __init__
super().__init__(
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core_client.py", line 408, in __init__
with launch_core_engines(vllm_config, executor_class,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/contextlib.py", line 144, in __exit__
next(self.gen)
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/utils.py", line 697, in launch_core_engines
wait_for_engine_startup(
File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/utils.py", line 750, in wait_for_engine_startup
raise RuntimeError("Engine core initialization failed. "
RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {}
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working