File tree Expand file tree Collapse file tree 1 file changed +2
-1
lines changed Expand file tree Collapse file tree 1 file changed +2
-1
lines changed Original file line number Diff line number Diff line change @@ -143,6 +143,8 @@ def __init__(
143143        # the backends) 
144144        if  envs .VLLM_USE_V1 :
145145            self .use_irope  =  extra_impl_args .pop ("use_irope" , False )
146+         else :
147+             self .use_irope  =  extra_impl_args .get ("use_irope" , False )
146148
147149        quant_method  =  quant_config .get_quant_method (
148150            self , prefix = prefix ) if  quant_config  else  None 
@@ -177,7 +179,6 @@ def __init__(
177179                             kv_sharing_target_layer_name , ** extra_impl_args )
178180        self .backend  =  backend_name_to_enum (attn_backend .get_name ())
179181        self .dtype  =  dtype 
180-         self .use_irope  =  extra_impl_args .get ("use_irope" , False )
181182
182183        # For cuda-alike (CUDA and ROCM) and cpu platforms, we control how 
183184        # torch.compile works by registering the attention as one giant 
 
 
   
 
     
   
   
          
    
    
     
    
      
     
     
    You can’t perform that action at this time.
  
 
    
  
    
      
        
     
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments