Skip to content

Commit 1d40f7f

Browse files
committed
fix inplace
Signed-off-by: Bill Nell <bnell@redhat.com>
1 parent ea3ecbf commit 1d40f7f

File tree

4 files changed

+30
-1
lines changed

4 files changed

+30
-1
lines changed

vllm/model_executor/layers/fused_moe/layer.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,10 @@ def get_fused_moe_quant_config(
289289
def supports_eplb(self) -> bool:
290290
return False
291291

292+
@property
293+
def allow_inplace(self) -> bool:
294+
return False
295+
292296
@abstractmethod
293297
def apply(
294298
self,
@@ -333,6 +337,7 @@ def __init__(
333337
self.disable_expert_map = not fused_experts.supports_expert_map()
334338
self.old_method_name = old_moe_method.__class__.__name__
335339
self._supports_eplb = old_moe_method.supports_eplb
340+
self._allow_inplace = old_moe_method.allow_inplace
336341
if isinstance(old_moe_method, torch.nn.Module):
337342
self.load_state_dict(old_moe_method.state_dict())
338343
logger.debug("Swapping out %s", self.old_method_name)
@@ -341,6 +346,10 @@ def __init__(
341346
def supports_eplb(self) -> bool:
342347
return self._supports_eplb
343348

349+
@property
350+
def allow_inplace(self) -> bool:
351+
return self._allow_inplace
352+
344353
def create_weights(
345354
self,
346355
layer: torch.nn.Module,
@@ -426,7 +435,7 @@ def apply(
426435
w2=layer.w2_weight,
427436
topk_weights=topk_weights,
428437
topk_ids=topk_ids,
429-
inplace=True, # TODO(bnell): make sure this is handled properly
438+
inplace=self.allow_inplace,
430439
activation=activation,
431440
global_num_experts=global_num_experts,
432441
apply_router_weight_on_input=apply_router_weight_on_input,
@@ -496,6 +505,14 @@ def __init__(self, moe: FusedMoEConfig):
496505
)
497506
self.flashinfer_cutlass_moe = None # type: ignore
498507

508+
@property
509+
def supports_eplb(self) -> bool:
510+
return True
511+
512+
@property
513+
def allow_inplace(self) -> bool:
514+
return True
515+
499516
def maybe_make_prepare_finalize(self) -> FusedMoEPrepareAndFinalize | None:
500517
if self.rocm_aiter_moe_enabled:
501518
return None

vllm/model_executor/layers/quantization/fp8.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,6 +1179,10 @@ def get_fused_moe_quant_config(
11791179
def supports_eplb(self) -> bool:
11801180
return True
11811181

1182+
@property
1183+
def allow_inplace(self) -> bool:
1184+
return True
1185+
11821186
def apply(
11831187
self,
11841188
layer: torch.nn.Module,

vllm/model_executor/layers/quantization/mxfp4.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,10 @@ def select_gemm_impl(
812812
else:
813813
return OAITritonExperts(self.moe_quant_config)
814814

815+
@property
816+
def allow_inplace(self) -> bool:
817+
return True
818+
815819
def apply(
816820
self,
817821
layer: torch.nn.Module,

vllm/model_executor/layers/quantization/quark/quark_moe.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,10 @@ def get_fused_moe_quant_config(
596596
block_shape=None,
597597
)
598598

599+
@property
600+
def allow_inplace(self) -> bool:
601+
return True
602+
599603
def apply(
600604
self,
601605
layer: torch.nn.Module,

0 commit comments

Comments
 (0)