Skip to content

Commit

Permalink
add kernel config tuning way to get better performance. (#681)
Browse files Browse the repository at this point in the history
  • Loading branch information
hiworldwzj authored Dec 25, 2024
1 parent e3eea7d commit 6a42960
Show file tree
Hide file tree
Showing 18 changed files with 2,297 additions and 260 deletions.
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,10 @@ def experts(self, input_tensor, router_logits, top_k, renormalize, use_grouped_t
w1, w1_scale = self.w1
w2, w2_scale = self.w2
use_fp8_w8a8 = self.quant_method is not None
fused_experts(

from lightllm.common.fused_moe.grouped_fused_moe import fused_experts_impl

fused_experts_impl(
hidden_states=input_tensor,
w1=w1,
w2=w2,
Expand All @@ -61,6 +64,7 @@ def experts(self, input_tensor, router_logits, top_k, renormalize, use_grouped_t
w1_scale=w1_scale,
w2_scale=w2_scale,
)
return

def _fuse(self):
with self.lock:
Expand Down
Loading

0 comments on commit 6a42960

Please sign in to comment.