@@ -4242,9 +4242,14 @@ def _prepare_kernel_block_sizes(self, kv_cache_config: KVCacheConfig) -> list[in
42424242        for  kv_cache_group_id , kv_cache_group  in  enumerate (
42434243            kv_cache_config .kv_cache_groups 
42444244        ):
4245-             if  isinstance (kv_cache_group .kv_cache_spec , EncoderOnlyAttentionSpec ):
4245+             kv_cache_spec  =  kv_cache_group .kv_cache_spec 
4246+             if  isinstance (kv_cache_spec , UniformTypeKVCacheSpecs ):
4247+                 # All layers in the UniformTypeKVCacheSpecs have the same type, 
4248+                 # Pick an arbitrary one to dispatch. 
4249+                 kv_cache_spec  =  next (iter (kv_cache_spec .kv_cache_specs .values ()))
4250+             if  isinstance (kv_cache_spec , EncoderOnlyAttentionSpec ):
42464251                continue 
4247-             elif  isinstance (kv_cache_group . kv_cache_spec , AttentionSpec ):
4252+             elif  isinstance (kv_cache_spec , AttentionSpec ):
42484253                # This is an attention backend that supports virtual 
42494254                # block splitting. Get the supported block sizes from 
42504255                # all backends in the group. 
@@ -4254,10 +4259,10 @@ def _prepare_kernel_block_sizes(self, kv_cache_config: KVCacheConfig) -> list[in
42544259                    kv_manager_block_size , attn_groups 
42554260                )
42564261                kernel_block_sizes .append (selected_kernel_size )
4257-             elif  isinstance (kv_cache_group . kv_cache_spec , MambaSpec ):
4262+             elif  isinstance (kv_cache_spec , MambaSpec ):
42584263                # This is likely Mamba or other non-attention cache, 
42594264                # no splitting. 
4260-                 kernel_block_sizes .append (kv_cache_group . kv_cache_spec .block_size )
4265+                 kernel_block_sizes .append (kv_cache_spec .block_size )
42614266            else :
42624267                raise  NotImplementedError (
42634268                    f"unknown kv cache spec { kv_cache_group .kv_cache_spec }  
0 commit comments