We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ba8beaf commit d03ead8Copy full SHA for d03ead8
vllm/model_executor/layers/mamba/mamba_mixer2.py
@@ -397,7 +397,6 @@ def forward_cuda(
397
has_prefill = num_prefills > 0
398
has_decode = num_decodes > 0
399
400
- seq_len, _ = hidden_states.shape
401
groups_time_state_size = self.n_groups * self.ssm_state_size
402
403
# 1. Gated MLP's linear projection
0 commit comments