File tree Expand file tree Collapse file tree 2 files changed +4
-3
lines changed 
vllm/model_executor/layers/fused_moe Expand file tree Collapse file tree 2 files changed +4
-3
lines changed Original file line number Diff line number Diff line change @@ -12,7 +12,7 @@ ARG PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
1212ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git"
1313ARG FA_BRANCH="1a7f4dfa"
1414ARG FA_REPO="https://github.com/Dao-AILab/flash-attention.git"
15- ARG AITER_BRANCH="c1debd8 "
15+ ARG AITER_BRANCH="6487649 "
1616ARG AITER_REPO="https://github.com/ROCm/aiter.git"
1717
1818FROM ${BASE_IMAGE} AS base
Original file line number Diff line number Diff line change @@ -22,8 +22,9 @@ class QuantMethod(IntEnum):
2222    NO  =  0   # a16w16 
2323    PER_TENSOR  =  1   # w8a8 (pre_Tensor) 
2424    PER_TOKEN  =  2   # w8a8/w8a4 (per_Token) 
25-     BLOCK_1X128  =  3   # block quantized w8a8 (per_1x128) 
26-     BLOCK_128x128  =  4   # block quantized w8a8 (per_128x128) 
25+     BLOCK_1X32  =  3   # fp4x2 
26+     BLOCK_1X128  =  4   # block quantized w8a8 (per_1x128) 
27+     BLOCK_128x128  =  5   # block quantized w8a8 (per_128x128) 
2728
2829
2930class  ActivationMethod (IntEnum ):
    
 
   
 
     
   
   
          
     
  
    
     
 
    
      
     
 
     
    You can’t perform that action at this time.
  
 
    
  
     
    
      
        
     
 
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments