-
-
Notifications
You must be signed in to change notification settings - Fork 10.8k
Closed
Labels
bugSomething isn't workingSomething isn't working
Description
Your current environment
The output of `python collect_env.py`
ERROR 04-18 10:16:33 [core.py:390] Traceback (most recent call last):
ERROR 04-18 10:16:33 [core.py:390] File "/home/user/vllm/vllm/v1/engine/core.py", line 381, in run_engine_core
ERROR 04-18 10:16:33 [core.py:390] engine_core = EngineCoreProc(*args, **kwargs)
ERROR 04-18 10:16:33 [core.py:390] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-18 10:16:33 [core.py:390] File "/home/user/vllm/vllm/v1/engine/core.py", line 323, in __init__
ERROR 04-18 10:16:33 [core.py:390] super().__init__(vllm_config, executor_class, log_stats,
ERROR 04-18 10:16:33 [core.py:390] File "/home/user/vllm/vllm/v1/engine/core.py", line 63, in __init__
ERROR 04-18 10:16:33 [core.py:390] self.model_executor = executor_class(vllm_config)
ERROR 04-18 10:16:33 [core.py:390] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-18 10:16:33 [core.py:390] File "/home/user/vllm/vllm/executor/executor_base.py", line 52, in __init__
ERROR 04-18 10:16:33 [core.py:390] self._init_executor()
ERROR 04-18 10:16:33 [core.py:390] File "/home/user/vllm/vllm/executor/uniproc_executor.py", line 45, in _init_executor
ERROR 04-18 10:16:33 [core.py:390] self.collective_rpc("init_worker", args=([kwargs], ))
ERROR 04-18 10:16:33 [core.py:390] File "/home/user/vllm/vllm/executor/uniproc_executor.py", line 56, in collective_rpc
ERROR 04-18 10:16:33 [core.py:390] answer = run_method(self.driver_worker, method, args, kwargs)
ERROR 04-18 10:16:33 [core.py:390] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-18 10:16:33 [core.py:390] File "/home/user/vllm/vllm/utils.py", line 2428, in run_method
ERROR 04-18 10:16:33 [core.py:390] return func(*args, **kwargs)
ERROR 04-18 10:16:33 [core.py:390] ^^^^^^^^^^^^^^^^^^^^^
ERROR 04-18 10:16:33 [core.py:390] File "/home/user/vllm/vllm/worker/worker_base.py", line 558, in init_worker
ERROR 04-18 10:16:33 [core.py:390] worker_class = resolve_obj_by_qualname(
ERROR 04-18 10:16:33 [core.py:390] ^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-18 10:16:33 [core.py:390] File "/home/user/vllm/vllm/utils.py", line 2059, in resolve_obj_by_qualname
ERROR 04-18 10:16:33 [core.py:390] module = importlib.import_module(module_name)
ERROR 04-18 10:16:33 [core.py:390] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-18 10:16:33 [core.py:390] File "/home/reid/miniconda3/envs/vllm/lib/python3.12/importlib/__init__.py", line 90, in import_module
ERROR 04-18 10:16:33 [core.py:390] return _bootstrap._gcd_import(name[level:], package, level)
ERROR 04-18 10:16:33 [core.py:390] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-18 10:16:33 [core.py:390] File "<frozen importlib._bootstrap>", line 1387, in _gcd_import
ERROR 04-18 10:16:33 [core.py:390] File "<frozen importlib._bootstrap>", line 1360, in _find_and_load
ERROR 04-18 10:16:33 [core.py:390] File "<frozen importlib._bootstrap>", line 1331, in _find_and_load_unlocked
ERROR 04-18 10:16:33 [core.py:390] File "<frozen importlib._bootstrap>", line 935, in _load_unlocked
ERROR 04-18 10:16:33 [core.py:390] File "<frozen importlib._bootstrap_external>", line 999, in exec_module
ERROR 04-18 10:16:33 [core.py:390] File "<frozen importlib._bootstrap>", line 488, in _call_with_frames_removed
ERROR 04-18 10:16:33 [core.py:390] File "/home/user/vllm/vllm/v1/worker/gpu_worker.py", line 26, in <module>
ERROR 04-18 10:16:33 [core.py:390] from vllm.v1.worker.gpu_model_runner import GPUModelRunner
ERROR 04-18 10:16:33 [core.py:390] File "/home/user/vllm/vllm/v1/worker/gpu_model_runner.py", line 32, in <module>
ERROR 04-18 10:16:33 [core.py:390] from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata
ERROR 04-18 10:16:33 [core.py:390] File "/home/user/vllm/vllm/v1/attention/backends/flash_attn.py", line 26, in <module>
ERROR 04-18 10:16:33 [core.py:390] from vllm.vllm_flash_attn import (flash_attn_varlen_func,
ERROR 04-18 10:16:33 [core.py:390] ImportError: cannot import name 'get_scheduler_metadata' from 'vllm.vllm_flash_attn' (/home/user/vllm/vllm/vllm_flash_attn/__init__.py)
Process EngineCore_0:
Traceback (most recent call last):
File "/home/reid/miniconda3/envs/vllm/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
self.run()
File "/home/reid/miniconda3/envs/vllm/lib/python3.12/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/home/user/vllm/vllm/v1/engine/core.py", line 394, in run_engine_core
raise e
File "/home/user/vllm/vllm/v1/engine/core.py", line 381, in run_engine_core
engine_core = EngineCoreProc(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/vllm/vllm/v1/engine/core.py", line 323, in __init__
super().__init__(vllm_config, executor_class, log_stats,
File "/home/user/vllm/vllm/v1/engine/core.py", line 63, in __init__
self.model_executor = executor_class(vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/vllm/vllm/executor/executor_base.py", line 52, in __init__
self._init_executor()
File "/home/user/vllm/vllm/executor/uniproc_executor.py", line 45, in _init_executor
self.collective_rpc("init_worker", args=([kwargs], ))
File "/home/user/vllm/vllm/executor/uniproc_executor.py", line 56, in collective_rpc
answer = run_method(self.driver_worker, method, args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/vllm/vllm/utils.py", line 2428, in run_method
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/user/vllm/vllm/worker/worker_base.py", line 558, in init_worker
worker_class = resolve_obj_by_qualname(
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/vllm/vllm/utils.py", line 2059, in resolve_obj_by_qualname
module = importlib.import_module(module_name)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/reid/miniconda3/envs/vllm/lib/python3.12/importlib/__init__.py", line 90, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "<frozen importlib._bootstrap>", line 1387, in _gcd_import
File "<frozen importlib._bootstrap>", line 1360, in _find_and_load
File "<frozen importlib._bootstrap>", line 1331, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 935, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 999, in exec_module
File "<frozen importlib._bootstrap>", line 488, in _call_with_frames_removed
File "/home/user/vllm/vllm/v1/worker/gpu_worker.py", line 26, in <module>
from vllm.v1.worker.gpu_model_runner import GPUModelRunner
File "/home/user/vllm/vllm/v1/worker/gpu_model_runner.py", line 32, in <module>
from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata
File "/home/user/vllm/vllm/v1/attention/backends/flash_attn.py", line 26, in <module>
from vllm.vllm_flash_attn import (flash_attn_varlen_func,
ImportError: cannot import name 'get_scheduler_metadata' from 'vllm.vllm_flash_attn' (/home/user/vllm/vllm/vllm_flash_attn/__init__.py)
Traceback (most recent call last):
File "/home/reid/miniconda3/envs/vllm/bin/vllm", line 8, in <module>
sys.exit(main())
^^^^^^
File "/home/user/vllm/vllm/entrypoints/cli/main.py", line 51, in main
args.dispatch_function(args)
File "/home/user/vllm/vllm/entrypoints/cli/serve.py", line 27, in cmd
uvloop.run(run_server(args))
File "/home/reid/miniconda3/envs/vllm/lib/python3.12/site-packages/uvloop/__init__.py", line 109, in run
return __asyncio.run(
^^^^^^^^^^^^^^
File "/home/reid/miniconda3/envs/vllm/lib/python3.12/asyncio/runners.py", line 195, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/home/reid/miniconda3/envs/vllm/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
File "/home/reid/miniconda3/envs/vllm/lib/python3.12/site-packages/uvloop/__init__.py", line 61, in wrapper
return await main
^^^^^^^^^^
File "/home/user/vllm/vllm/entrypoints/openai/api_server.py", line 1078, in run_server
async with build_async_engine_client(args) as engine_client:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/reid/miniconda3/envs/vllm/lib/python3.12/contextlib.py", line 210, in __aenter__
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File "/home/user/vllm/vllm/entrypoints/openai/api_server.py", line 146, in build_async_engine_client
async with build_async_engine_client_from_engine_args(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/reid/miniconda3/envs/vllm/lib/python3.12/contextlib.py", line 210, in __aenter__
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File "/home/user/vllm/vllm/entrypoints/openai/api_server.py", line 178, in build_async_engine_client_from_engine_args
async_llm = AsyncLLM.from_vllm_config(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/vllm/vllm/v1/engine/async_llm.py", line 141, in from_vllm_config
return cls(
^^^^
File "/home/user/vllm/vllm/v1/engine/async_llm.py", line 103, in __init__
self.engine_core = core_client_class(
^^^^^^^^^^^^^^^^^^
File "/home/user/vllm/vllm/v1/engine/core_client.py", line 620, in __init__
super().__init__(
File "/home/user/vllm/vllm/v1/engine/core_client.py", line 395, in __init__
self._wait_for_engine_startup()
File "/home/user/vllm/vllm/v1/engine/core_client.py", line 421, in _wait_for_engine_startup
raise RuntimeError("Engine core initialization failed. "
RuntimeError: Engine core initialization failed. See root cause above.
🐛 Describe the bug
ImportError: cannot import name 'get_scheduler_metadata' from 'vllm.vllm_flash_attn'
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.
Abatom
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working