Skip to content

[Bug]: Dynamo Unsupported due to BasevLLMParameter.torch_function calling disabled super() #25604

@yewentao256

Description

@yewentao256

Your current environment

The output of python collect_env.py
Your output of `python collect_env.py` here

🐛 Describe the bug

vllm bench throughput --model Qwen/Qwen3-30B-A3B-FP8 --load-format dummy --input-len 1000 --output-len 100 --trust_remote_code --enable-expert-parallel

(EngineCore_DP0 pid=2439401)     self.run()
(EngineCore_DP0 pid=2439401)   File "/usr/lib/python3.12/multiprocessing/process.py", line 108, in run
(EngineCore_DP0 pid=2439401)     self._target(*self._args, **self._kwargs)
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/v1/engine/core.py", line 712, in run_engine_core
(EngineCore_DP0 pid=2439401)     raise e
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/v1/engine/core.py", line 695, in run_engine_core
(EngineCore_DP0 pid=2439401)     engine_core = DPEngineCoreProc(*args, **kwargs)
(EngineCore_DP0 pid=2439401)                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/v1/engine/core.py", line 965, in __init__
(EngineCore_DP0 pid=2439401)     super().__init__(vllm_config, local_client, handshake_address,
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/v1/engine/core.py", line 498, in __init__
(EngineCore_DP0 pid=2439401)     super().__init__(vllm_config, executor_class, log_stats,
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/v1/engine/core.py", line 92, in __init__
(EngineCore_DP0 pid=2439401)     self._initialize_kv_caches(vllm_config)
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/v1/engine/core.py", line 190, in _initialize_kv_caches
(EngineCore_DP0 pid=2439401)     self.model_executor.determine_available_memory())
(EngineCore_DP0 pid=2439401)     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/v1/executor/abstract.py", line 85, in determine_available_memory
(EngineCore_DP0 pid=2439401)     return self.collective_rpc("determine_available_memory")
(EngineCore_DP0 pid=2439401)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/executor/uniproc_executor.py", line 83, in collective_rpc
(EngineCore_DP0 pid=2439401)     return [run_method(self.driver_worker, method, args, kwargs)]
(EngineCore_DP0 pid=2439401)             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/utils/__init__.py", line 3049, in run_method
(EngineCore_DP0 pid=2439401)     return func(*args, **kwargs)
(EngineCore_DP0 pid=2439401)            ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2439401)   File "/home/wentao/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
(EngineCore_DP0 pid=2439401)     return func(*args, **kwargs)
(EngineCore_DP0 pid=2439401)            ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/v1/worker/gpu_worker.py", line 271, in determine_available_memory
(EngineCore_DP0 pid=2439401)     self.model_runner.profile_run()
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/v1/worker/gpu_model_runner.py", line 3292, in profile_run
(EngineCore_DP0 pid=2439401)     = self._dummy_run(self.max_num_tokens, is_profile=True)
(EngineCore_DP0 pid=2439401)       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2439401)   File "/home/wentao/.venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
(EngineCore_DP0 pid=2439401)     return func(*args, **kwargs)
(EngineCore_DP0 pid=2439401)            ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/v1/worker/gpu_model_runner.py", line 3069, in _dummy_run
(EngineCore_DP0 pid=2439401)     outputs = self.model(
(EngineCore_DP0 pid=2439401)               ^^^^^^^^^^^
(EngineCore_DP0 pid=2439401)   File "/home/wentao/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
(EngineCore_DP0 pid=2439401)     return self._call_impl(*args, **kwargs)
(EngineCore_DP0 pid=2439401)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2439401)   File "/home/wentao/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
(EngineCore_DP0 pid=2439401)     return forward_call(*args, **kwargs)
(EngineCore_DP0 pid=2439401)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/model_executor/models/deepseek_v2.py", line 907, in forward
(EngineCore_DP0 pid=2439401)     hidden_states = self.model(input_ids, positions, intermediate_tensors,
(EngineCore_DP0 pid=2439401)                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/compilation/decorators.py", line 310, in __call__
(EngineCore_DP0 pid=2439401)     output = self.compiled_callable(*args, **kwargs)
(EngineCore_DP0 pid=2439401)              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2439401)   File "/home/wentao/.venv/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 659, in _fn
(EngineCore_DP0 pid=2439401)     raise e.with_traceback(None) from None
(EngineCore_DP0 pid=2439401) torch._dynamo.exc.Unsupported: non-function or method super: <built-in function _disabled_torch_function_impl>
(EngineCore_DP0 pid=2439401) 
(EngineCore_DP0 pid=2439401) from user code:
(EngineCore_DP0 pid=2439401)    File "/home/wentao/vllm-source/vllm/model_executor/models/deepseek_v2.py", line 783, in forward
(EngineCore_DP0 pid=2439401)     hidden_states, residual = layer(positions, hidden_states, residual)
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/model_executor/models/deepseek_v2.py", line 711, in forward
(EngineCore_DP0 pid=2439401)     hidden_states = self.mlp(hidden_states)
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/model_executor/models/deepseek_v2.py", line 285, in forward
(EngineCore_DP0 pid=2439401)     router_logits, _ = self.gate(hidden_states)
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/model_executor/layers/linear.py", line 381, in forward
(EngineCore_DP0 pid=2439401)     output = self.quant_method.apply(self, x, bias)
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/model_executor/layers/linear.py", line 236, in apply
(EngineCore_DP0 pid=2439401)     return dispatch_unquantized_gemm()(layer, x, layer.weight, bias)
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/model_executor/layers/utils.py", line 92, in default_unquantized_gemm
(EngineCore_DP0 pid=2439401)     return torch.nn.functional.linear(x, weight, bias)
(EngineCore_DP0 pid=2439401)   File "/home/wentao/vllm-source/vllm/model_executor/parameter.py", line 119, in __torch_function__
(EngineCore_DP0 pid=2439401)     return super().__torch_function__(func, types, args, kwargs)
(EngineCore_DP0 pid=2439401)

Before submitting a new issue...

  • Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions