Skip to content

Commit b7998ae

Browse files
Zerohertzhmelloryewentao256
authored andcommitted
[Docs] Fix warnings in mkdocs build (continued) (vllm-project#24092)
Signed-off-by: Zerohertz <ohg3417@gmail.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Signed-off-by: xuebwang-amd <xuebwang@amd.com>
1 parent eb10e18 commit b7998ae

File tree

10 files changed

+337
-342
lines changed

10 files changed

+337
-342
lines changed

vllm/model_executor/layers/fused_moe/layer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -755,7 +755,7 @@ class FusedMoE(CustomOp):
755755
intermediate_size: Intermediate size of the experts
756756
params_dtype: Data type for the parameters.
757757
reduce_results: Whether to all all_reduce on the output of the layer
758-
renomalize: Whether to renormalize the logits in the fused_moe kernel
758+
renormalize: Whether to renormalize the logits in the fused_moe kernel
759759
quant_config: Quantization configure.
760760
enable_eplb: Whether to enable expert parallelism load balancer.
761761
"""

vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -420,9 +420,8 @@ def shuffle_weights(
420420
421421
Args:
422422
*tensors: Variable number of torch.Tensor objects.
423-
layout: A pair of integers specifying the
424-
block sizes used to divide the tensors during shuffling.
425-
Default is (16, 16).
423+
layout: A pair of integers specifying the block sizes used to divide
424+
the tensors during shuffling. Default is (16, 16).
426425
427426
Returns:
428427
A Tuple of shuffled tensors.

vllm/model_executor/layers/fused_moe/routing_simulator.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"""
1111

1212
from abc import ABC, abstractmethod
13-
from typing import Optional
13+
from typing import Any, Optional
1414

1515
import torch
1616

@@ -50,7 +50,9 @@ class DistributionBasedRouting(RoutingStrategy):
5050
distributions for testing different routing patterns.
5151
"""
5252

53-
def __init__(self, distribution: str = "uniform", **distribution_params):
53+
def __init__(self,
54+
distribution: str = "uniform",
55+
**distribution_params: Any):
5456
"""
5557
Initialize distribution-based routing.
5658
@@ -244,7 +246,7 @@ def register_strategy(cls, name: str, strategy: RoutingStrategy):
244246
cls._routing_strategies[name] = strategy
245247

246248
@classmethod
247-
def get_available_strategies(cls):
249+
def get_available_strategies(cls) -> list[str]:
248250
"""
249251
Get list of available routing strategy names.
250252

vllm/model_executor/layers/quantization/bitblas.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ def create_weights_gptq(
202202
output_size: int,
203203
params_dtype: torch.dtype,
204204
**extra_weight_attrs,
205-
):
205+
) -> None:
206206
"""Creates quantized weights for use in linear operations.
207207
208208
The function initializes and returns a dictionary containing quantized
@@ -211,7 +211,7 @@ def create_weights_gptq(
211211
212212
Args:
213213
input_size_per_partition: The size of the input partition.
214-
output_size_per_partition: The size of the output partition.
214+
output_partition_sizes: List of output partition sizes.
215215
input_size: The total size of the input (unused).
216216
output_size: The total size of the output (unused).
217217
params_dtype:
@@ -222,9 +222,9 @@ def create_weights_gptq(
222222
scales ('scales'), and zeros ('zeros').
223223
224224
Raises:
225-
ValueError: If `params_dtype` is not `torch.float16` or if the
226-
input size per partition is not divisible by the group size in
227-
`quant_config`.
225+
ValueError: If `params_dtype` is not `torch.float16` or if the input
226+
size per partition is not divisible by the group size
227+
in `quant_config`.
228228
"""
229229
del input_size, output_size # Unused arguments.
230230
weight_loader = extra_weight_attrs["weight_loader"]

vllm/model_executor/layers/quantization/gptq_bitblas.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -265,9 +265,9 @@ def create_weights(
265265
scales ('scales'), and zeros ('zeros').
266266
267267
Raises:
268-
ValueError: If `params_dtype` is not `torch.float16` or
269-
if the input size per partition is not divisible by the
270-
group size in `quant_config`.
268+
ValueError: If `params_dtype` is not `torch.float16` or if the input
269+
size per partition is not divisible by the group size
270+
in `quant_config`.
271271
"""
272272
if params_dtype != torch.float16:
273273
raise ValueError("Parameter data type must be torch.float16, "

vllm/model_executor/layers/quantization/kernels/mixed_precision/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,11 @@ def choose_mp_linear_kernel(
4646
performance.
4747
4848
Args:
49-
config (MPLinearLayerConfig): Description of the linear layer to be
50-
implemented.
49+
config (MPLinearLayerConfig): Description of the linear layer to be
50+
implemented.
5151
compute_capability (Optional[int], optional): The compute capability of
52-
the target device, if None uses `current_platform` to get the compute
53-
capability. Defaults to None.
52+
the target device, if None uses `current_platform` to get
53+
the compute capability. Defaults to None.
5454
5555
Raises:
5656
ValueError: If no kernel can implement the given config.

0 commit comments

Comments
 (0)