File tree Expand file tree Collapse file tree 2 files changed +3
-1
lines changed 
v1/attention/backends/mla Expand file tree Collapse file tree 2 files changed +3
-1
lines changed Original file line number Diff line number Diff line change @@ -21,6 +21,7 @@ class _Backend(enum.Enum):
2121    TRITON_MLA  =  enum .auto ()
2222    CUTLASS_MLA  =  enum .auto ()
2323    FLASHMLA  =  enum .auto ()
24+     FLASHMLA_SPARSE  =  enum .auto ()
2425    FLASH_ATTN_MLA  =  enum .auto ()
2526    PALLAS  =  enum .auto ()
2627    IPEX  =  enum .auto ()
@@ -43,6 +44,7 @@ class _Backend(enum.Enum):
4344    _Backend .TRITON_MLA : "vllm.v1.attention.backends.mla.triton_mla.TritonMLABackend" ,  # noqa: E501 
4445    _Backend .CUTLASS_MLA : "vllm.v1.attention.backends.mla.cutlass_mla.CutlassMLABackend" ,  # noqa: E501 
4546    _Backend .FLASHMLA : "vllm.v1.attention.backends.mla.flashmla.FlashMLABackend" ,  # noqa: E501 
47+     _Backend .FLASHMLA_SPARSE : "vllm.v1.attention.backends.mla.flashmla_sparse.FlashMLASparseBackend" ,  # noqa: E501 
4648    _Backend .FLASH_ATTN_MLA : "vllm.v1.attention.backends.mla.flashattn_mla.FlashAttnMLABackend" ,  # noqa: E501 
4749    _Backend .PALLAS : "vllm.v1.attention.backends.pallas.PallasAttentionBackend" ,  # noqa: E501 
4850    _Backend .FLEX_ATTENTION : "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend" ,  # noqa: E501 
Original file line number Diff line number Diff line change @@ -55,7 +55,7 @@ class FlashMLASparseBackend(AttentionBackend):
5555
5656    @staticmethod  
5757    def  get_name () ->  str :
58-         return  "FLASHMLA_SPARSE_VLLM_V1 " 
58+         return  "FLASHMLA_SPARSE " 
5959
6060    @staticmethod  
6161    def  get_metadata_cls () ->  type [AttentionMetadata ]:
    
 
   
 
     
   
   
          
     
  
    
     
 
    
      
     
 
     
    You can’t perform that action at this time.
  
 
    
  
     
    
      
        
     
 
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments