@@ -822,21 +822,21 @@ def test_hybrid_attention_mamba_tensor_shapes(monkeypatch):
822822        assert  attn_shape [0 ] %  num_blocks  ==  0 
823823        block_split_ratio  =  attn_shape [0 ] //  num_blocks 
824824
825-         # Use  small blocks for testing to avoid memory issues 
825+         # use  small blocks for testing to avoid memory issues 
826826        test_block_size  =  min (2 , len (blocks0 ), len (blocks1 ))
827827
828-         # Use  non-overlapping blocks to avoid data contamination 
829-         # Split physical  blocks: first half for attention, second half for mamba 
828+         # use  non-overlapping blocks to avoid data contamination 
829+         # Split kernel  blocks: first half for attention, second half for mamba 
830830        mid_point  =  num_blocks  //  2 
831831
832-         # Attention  uses physical  blocks from first half (mapped to logical blocks) 
832+         # attention  uses kernel  blocks from first half (mapped to logical blocks) 
833833        kv_blocks_for_attention  =  np .array ([0 , 1 ])[:test_block_size ]
834834
835-         # Mamba  uses physical  blocks from second half 
835+         # mamba  uses kernel  blocks from second half 
836836        kv_blocks_for_mamba  =  np .array ([mid_point , mid_point  +  1 ])[:test_block_size ]
837837
838-         # Create  small constant tensors for testing with corrected shapes 
839-         # Attention : [block_size, ...] starting from dimension 2 
838+         # create  small constant tensors for testing with corrected shapes 
839+         # attention : [block_size, ...] starting from dimension 2 
840840        attn_constant_shape  =  attn_shape [2 :]
841841        conv_constant_shape  =  conv_shape [1 :]
842842        ssm_constant_shape  =  ssm_shape [1 :]
@@ -859,14 +859,14 @@ def test_hybrid_attention_mamba_tensor_shapes(monkeypatch):
859859            for  i , kernel_block  in  enumerate (kernel_blocks_for_attention ):
860860                vllm_ctx [layer ].kv_cache [0 ][kernel_block , :] =  attn_blocks_constant [i ]
861861
862-         # Fill  mamba blocks with constants using physical  block indices 
862+         # fill  mamba blocks with constants using kernel  block indices 
863863        for  layer  in  [layer_2 , layer_3 , layer_4 , layer_5 ]:
864-             # mamba: kv_cache[0][component][physical_block_idx , ...] 
864+             # mamba: kv_cache[0][component][kernel_block_idx , ...] 
865865            for  i , kv_block  in  enumerate (kv_blocks_for_mamba ):
866866                vllm_ctx [layer ].kv_cache [0 ][0 ][kv_block , :] =  conv_blocks_constant [i ]
867867                vllm_ctx [layer ].kv_cache [0 ][1 ][kv_block , :] =  ssm_blocks_constant [i ]
868868
869-         # Verify  attention and mamba contents are correct 
869+         # verify  attention and mamba contents are correct 
870870        for  layer  in  [layer_0 , layer_1 ]:
871871            for  i , kernel_block  in  enumerate (kernel_blocks_for_attention ):
872872                actual_kv  =  vllm_ctx [layer ].kv_cache [0 ][kernel_block , :]
0 commit comments