Skip to content

[Bug]: ImportError: cannot import name 'get_scheduler_metadata' from 'vllm.vllm_flash_attn' #16813

@reidliu41

Description

@reidliu41

Your current environment

The output of `python collect_env.py`

ERROR 04-18 10:16:33 [core.py:390] Traceback (most recent call last):
ERROR 04-18 10:16:33 [core.py:390]   File "/home/user/vllm/vllm/v1/engine/core.py", line 381, in run_engine_core
ERROR 04-18 10:16:33 [core.py:390]     engine_core = EngineCoreProc(*args, **kwargs)
ERROR 04-18 10:16:33 [core.py:390]                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-18 10:16:33 [core.py:390]   File "/home/user/vllm/vllm/v1/engine/core.py", line 323, in __init__
ERROR 04-18 10:16:33 [core.py:390]     super().__init__(vllm_config, executor_class, log_stats,
ERROR 04-18 10:16:33 [core.py:390]   File "/home/user/vllm/vllm/v1/engine/core.py", line 63, in __init__
ERROR 04-18 10:16:33 [core.py:390]     self.model_executor = executor_class(vllm_config)
ERROR 04-18 10:16:33 [core.py:390]                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-18 10:16:33 [core.py:390]   File "/home/user/vllm/vllm/executor/executor_base.py", line 52, in __init__
ERROR 04-18 10:16:33 [core.py:390]     self._init_executor()
ERROR 04-18 10:16:33 [core.py:390]   File "/home/user/vllm/vllm/executor/uniproc_executor.py", line 45, in _init_executor
ERROR 04-18 10:16:33 [core.py:390]     self.collective_rpc("init_worker", args=([kwargs], ))
ERROR 04-18 10:16:33 [core.py:390]   File "/home/user/vllm/vllm/executor/uniproc_executor.py", line 56, in collective_rpc
ERROR 04-18 10:16:33 [core.py:390]     answer = run_method(self.driver_worker, method, args, kwargs)
ERROR 04-18 10:16:33 [core.py:390]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-18 10:16:33 [core.py:390]   File "/home/user/vllm/vllm/utils.py", line 2428, in run_method
ERROR 04-18 10:16:33 [core.py:390]     return func(*args, **kwargs)
ERROR 04-18 10:16:33 [core.py:390]            ^^^^^^^^^^^^^^^^^^^^^
ERROR 04-18 10:16:33 [core.py:390]   File "/home/user/vllm/vllm/worker/worker_base.py", line 558, in init_worker
ERROR 04-18 10:16:33 [core.py:390]     worker_class = resolve_obj_by_qualname(
ERROR 04-18 10:16:33 [core.py:390]                    ^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-18 10:16:33 [core.py:390]   File "/home/user/vllm/vllm/utils.py", line 2059, in resolve_obj_by_qualname
ERROR 04-18 10:16:33 [core.py:390]     module = importlib.import_module(module_name)
ERROR 04-18 10:16:33 [core.py:390]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-18 10:16:33 [core.py:390]   File "/home/reid/miniconda3/envs/vllm/lib/python3.12/importlib/__init__.py", line 90, in import_module
ERROR 04-18 10:16:33 [core.py:390]     return _bootstrap._gcd_import(name[level:], package, level)
ERROR 04-18 10:16:33 [core.py:390]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-18 10:16:33 [core.py:390]   File "<frozen importlib._bootstrap>", line 1387, in _gcd_import
ERROR 04-18 10:16:33 [core.py:390]   File "<frozen importlib._bootstrap>", line 1360, in _find_and_load
ERROR 04-18 10:16:33 [core.py:390]   File "<frozen importlib._bootstrap>", line 1331, in _find_and_load_unlocked
ERROR 04-18 10:16:33 [core.py:390]   File "<frozen importlib._bootstrap>", line 935, in _load_unlocked
ERROR 04-18 10:16:33 [core.py:390]   File "<frozen importlib._bootstrap_external>", line 999, in exec_module
ERROR 04-18 10:16:33 [core.py:390]   File "<frozen importlib._bootstrap>", line 488, in _call_with_frames_removed
ERROR 04-18 10:16:33 [core.py:390]   File "/home/user/vllm/vllm/v1/worker/gpu_worker.py", line 26, in <module>
ERROR 04-18 10:16:33 [core.py:390]     from vllm.v1.worker.gpu_model_runner import GPUModelRunner
ERROR 04-18 10:16:33 [core.py:390]   File "/home/user/vllm/vllm/v1/worker/gpu_model_runner.py", line 32, in <module>
ERROR 04-18 10:16:33 [core.py:390]     from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata
ERROR 04-18 10:16:33 [core.py:390]   File "/home/user/vllm/vllm/v1/attention/backends/flash_attn.py", line 26, in <module>
ERROR 04-18 10:16:33 [core.py:390]     from vllm.vllm_flash_attn import (flash_attn_varlen_func,
ERROR 04-18 10:16:33 [core.py:390] ImportError: cannot import name 'get_scheduler_metadata' from 'vllm.vllm_flash_attn' (/home/user/vllm/vllm/vllm_flash_attn/__init__.py)
Process EngineCore_0:
Traceback (most recent call last):
  File "/home/reid/miniconda3/envs/vllm/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/home/reid/miniconda3/envs/vllm/lib/python3.12/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/user/vllm/vllm/v1/engine/core.py", line 394, in run_engine_core
    raise e
  File "/home/user/vllm/vllm/v1/engine/core.py", line 381, in run_engine_core
    engine_core = EngineCoreProc(*args, **kwargs)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/vllm/vllm/v1/engine/core.py", line 323, in __init__
    super().__init__(vllm_config, executor_class, log_stats,
  File "/home/user/vllm/vllm/v1/engine/core.py", line 63, in __init__
    self.model_executor = executor_class(vllm_config)
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/vllm/vllm/executor/executor_base.py", line 52, in __init__
    self._init_executor()
  File "/home/user/vllm/vllm/executor/uniproc_executor.py", line 45, in _init_executor
    self.collective_rpc("init_worker", args=([kwargs], ))
  File "/home/user/vllm/vllm/executor/uniproc_executor.py", line 56, in collective_rpc
    answer = run_method(self.driver_worker, method, args, kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/vllm/vllm/utils.py", line 2428, in run_method
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/vllm/vllm/worker/worker_base.py", line 558, in init_worker
    worker_class = resolve_obj_by_qualname(
                   ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/vllm/vllm/utils.py", line 2059, in resolve_obj_by_qualname
    module = importlib.import_module(module_name)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/reid/miniconda3/envs/vllm/lib/python3.12/importlib/__init__.py", line 90, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<frozen importlib._bootstrap>", line 1387, in _gcd_import
  File "<frozen importlib._bootstrap>", line 1360, in _find_and_load
  File "<frozen importlib._bootstrap>", line 1331, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 935, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 999, in exec_module
  File "<frozen importlib._bootstrap>", line 488, in _call_with_frames_removed
  File "/home/user/vllm/vllm/v1/worker/gpu_worker.py", line 26, in <module>
    from vllm.v1.worker.gpu_model_runner import GPUModelRunner
  File "/home/user/vllm/vllm/v1/worker/gpu_model_runner.py", line 32, in <module>
    from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata
  File "/home/user/vllm/vllm/v1/attention/backends/flash_attn.py", line 26, in <module>
    from vllm.vllm_flash_attn import (flash_attn_varlen_func,
ImportError: cannot import name 'get_scheduler_metadata' from 'vllm.vllm_flash_attn' (/home/user/vllm/vllm/vllm_flash_attn/__init__.py)
Traceback (most recent call last):
  File "/home/reid/miniconda3/envs/vllm/bin/vllm", line 8, in <module>
    sys.exit(main())
             ^^^^^^
  File "/home/user/vllm/vllm/entrypoints/cli/main.py", line 51, in main
    args.dispatch_function(args)
  File "/home/user/vllm/vllm/entrypoints/cli/serve.py", line 27, in cmd
    uvloop.run(run_server(args))
  File "/home/reid/miniconda3/envs/vllm/lib/python3.12/site-packages/uvloop/__init__.py", line 109, in run
    return __asyncio.run(
           ^^^^^^^^^^^^^^
  File "/home/reid/miniconda3/envs/vllm/lib/python3.12/asyncio/runners.py", line 195, in run
    return runner.run(main)
           ^^^^^^^^^^^^^^^^
  File "/home/reid/miniconda3/envs/vllm/lib/python3.12/asyncio/runners.py", line 118, in run
    return self._loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
  File "/home/reid/miniconda3/envs/vllm/lib/python3.12/site-packages/uvloop/__init__.py", line 61, in wrapper
    return await main
           ^^^^^^^^^^
  File "/home/user/vllm/vllm/entrypoints/openai/api_server.py", line 1078, in run_server
    async with build_async_engine_client(args) as engine_client:
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/reid/miniconda3/envs/vllm/lib/python3.12/contextlib.py", line 210, in __aenter__
    return await anext(self.gen)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/vllm/vllm/entrypoints/openai/api_server.py", line 146, in build_async_engine_client
    async with build_async_engine_client_from_engine_args(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/reid/miniconda3/envs/vllm/lib/python3.12/contextlib.py", line 210, in __aenter__
    return await anext(self.gen)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/vllm/vllm/entrypoints/openai/api_server.py", line 178, in build_async_engine_client_from_engine_args
    async_llm = AsyncLLM.from_vllm_config(
                ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/vllm/vllm/v1/engine/async_llm.py", line 141, in from_vllm_config
    return cls(
           ^^^^
  File "/home/user/vllm/vllm/v1/engine/async_llm.py", line 103, in __init__
    self.engine_core = core_client_class(
                       ^^^^^^^^^^^^^^^^^^
  File "/home/user/vllm/vllm/v1/engine/core_client.py", line 620, in __init__
    super().__init__(
  File "/home/user/vllm/vllm/v1/engine/core_client.py", line 395, in __init__
    self._wait_for_engine_startup()
  File "/home/user/vllm/vllm/v1/engine/core_client.py", line 421, in _wait_for_engine_startup
    raise RuntimeError("Engine core initialization failed. "
RuntimeError: Engine core initialization failed. See root cause above.

🐛 Describe the bug

ImportError: cannot import name 'get_scheduler_metadata' from 'vllm.vllm_flash_attn'

Before submitting a new issue...

  • Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions