xuebwang-amd
diff --git a/‎vllm/model_executor/layers/fused_moe/layer.py‎
Lines changed: 1 addition & 1 deletion b/‎vllm/model_executor/layers/fused_moe/layer.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py‎
Lines changed: 2 additions & 3 deletions b/‎vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎vllm/model_executor/layers/fused_moe/routing_simulator.py‎
Lines changed: 5 additions & 3 deletions b/‎vllm/model_executor/layers/fused_moe/routing_simulator.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎vllm/model_executor/layers/quantization/bitblas.py‎
Lines changed: 5 additions & 5 deletions b/‎vllm/model_executor/layers/quantization/bitblas.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎vllm/model_executor/layers/quantization/gptq_bitblas.py‎
Lines changed: 3 additions & 3 deletions b/‎vllm/model_executor/layers/quantization/gptq_bitblas.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎vllm/model_executor/layers/quantization/kernels/mixed_precision/__init__.py‎
Lines changed: 4 additions & 4 deletions b/‎vllm/model_executor/layers/quantization/kernels/mixed_precision/__init__.py‎
Lines changed: 4 additions & 4 deletions
@@ -755,7 +755,7 @@ class FusedMoE(CustomOp):
         intermediate_size: Intermediate size of the experts
         params_dtype: Data type for the parameters.
         reduce_results: Whether to all all_reduce on the output of the layer
-        renomalize: Whether to renormalize the logits in the fused_moe kernel
+        renormalize: Whether to renormalize the logits in the fused_moe kernel
         quant_config: Quantization configure.
         enable_eplb: Whether to enable expert parallelism load balancer.
     """
 
@@ -420,9 +420,8 @@ def shuffle_weights(
 
     Args:
         *tensors: Variable number of torch.Tensor objects.
-        layout: A pair of integers specifying the
-        block sizes used to divide the tensors during shuffling.
-        Default is (16, 16).
+        layout: A pair of integers specifying the block sizes used to divide 
+            the tensors during shuffling. Default is (16, 16).
 
     Returns:
     A Tuple of shuffled tensors.
 
@@ -10,7 +10,7 @@
 """
 
 from abc import ABC, abstractmethod
-from typing import Optional
+from typing import Any, Optional
 
 import torch
 
@@ -50,7 +50,9 @@ class DistributionBasedRouting(RoutingStrategy):
     distributions for testing different routing patterns.
     """
 
-    def __init__(self, distribution: str = "uniform", **distribution_params):
+    def __init__(self,
+                 distribution: str = "uniform",
+                 **distribution_params: Any):
         """
         Initialize distribution-based routing.
 
@@ -244,7 +246,7 @@ def register_strategy(cls, name: str, strategy: RoutingStrategy):
         cls._routing_strategies[name] = strategy
 
     @classmethod
-    def get_available_strategies(cls):
+    def get_available_strategies(cls) -> list[str]:
         """
         Get list of available routing strategy names.
 
 
@@ -202,7 +202,7 @@ def create_weights_gptq(
         output_size: int,
         params_dtype: torch.dtype,
         **extra_weight_attrs,
-    ):
+    ) -> None:
         """Creates quantized weights for use in linear operations.
 
         The function initializes and returns a dictionary containing quantized 
@@ -211,7 +211,7 @@ def create_weights_gptq(
 
         Args:
             input_size_per_partition: The size of the input partition.
-            output_size_per_partition: The size of the output partition.
+            output_partition_sizes: List of output partition sizes.
             input_size: The total size of the input (unused).
             output_size: The total size of the output (unused).
             params_dtype: 
@@ -222,9 +222,9 @@ def create_weights_gptq(
             scales ('scales'), and zeros ('zeros').
 
         Raises:
-            ValueError: If `params_dtype` is not `torch.float16` or if the 
-            input size per partition is not divisible by the group size in 
-            `quant_config`.
+            ValueError: If `params_dtype` is not `torch.float16` or if the input
+                size per partition is not divisible by the group size
+                in `quant_config`.
         """
         del input_size, output_size  # Unused arguments.
         weight_loader = extra_weight_attrs["weight_loader"]
 
@@ -265,9 +265,9 @@ def create_weights(
             scales ('scales'), and zeros ('zeros').
 
         Raises:
-            ValueError: If `params_dtype` is not `torch.float16` or 
-            if the input size per partition is not divisible by the 
-            group size in `quant_config`.
+            ValueError: If `params_dtype` is not `torch.float16` or if the input
+                size per partition is not divisible by the group size
+                in `quant_config`.
         """
         if params_dtype != torch.float16:
             raise ValueError("Parameter data type must be torch.float16, "
 
@@ -46,11 +46,11 @@ def choose_mp_linear_kernel(
      performance.
 
     Args:
-        config (MPLinearLayerConfig): Description of the linear layer to be 
-          implemented.
+        config (MPLinearLayerConfig): Description of the linear layer to be
+            implemented.
         compute_capability (Optional[int], optional): The compute capability of
-          the target device, if None uses `current_platform` to get the compute 
-          capability. Defaults to None.
+            the target device, if None uses `current_platform` to get
+            the compute capability. Defaults to None.
 
     Raises:
         ValueError: If no kernel can implement the given config.