lint

cyang49 · cyang49 · commit 9b3705b3e48e · 2025-03-25T10:23:20.000-04:00
Signed-off-by: Chih-Chieh-Yang &lt;7364402+cyang49@users.noreply.github.com&gt;
diff --git a/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py b/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py
@@ -518,7 +518,7 @@ def _chunk_scan_fwd(
                 # no in this case no point to use initial states
                 initial_states = None
             elif chunk_indices is None and chunk_offsets is None:
-                # if chunk_indices and chunk_offsets both unset, then derive 
+                # if chunk_indices and chunk_offsets both unset, then derive
                 # from seq_idx
                 chunk_indices, chunk_offsets = seq_idx_to_chunk_indices_offsets(
                     seq_idx, chunk_size)
diff --git a/vllm/model_executor/models/bamba.py b/vllm/model_executor/models/bamba.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 """Inference-only Bamba model."""
 # Added by the IBM Team, 2024
-import math
 from typing import Iterable, Optional, Set, Tuple
 
 import torch
@@ -21,9 +20,9 @@
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.mamba.mamba_mixer2 import (
     MambaMixer2, extra_groups_for_head_shards)
-from vllm.model_executor.layers.quantization import QuantizationConfig
 from vllm.model_executor.layers.mamba.ops.ssd_chunk_scan import (
     seq_idx_to_chunk_indices_offsets)
+from vllm.model_executor.layers.quantization import QuantizationConfig
 from vllm.model_executor.layers.rotary_embedding import get_rope
 from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
@@ -258,6 +257,7 @@ def forward(
     "mamba": BambaMixerDecoderLayer
 }
 
+
 class BambaModel(nn.Module):
 
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
@@ -329,10 +329,10 @@ def forward(
                 seq_idx[srt:end] = i
             seq_idx.unsqueeze_(0)
 
-            # compute metadata for chunked prefill. 
-            # actually this is only needed if there are 
+            # compute metadata for chunked prefill.
+            # actually this is only needed if there are
             # initial states, but this is determinable
-            # only from attention metadata yet 
+            # only from attention metadata yet
             # unavailable from the current top-level forward.
             # Rather than complicating things to extract said
             # metadata, we simply just compute redundently and