fix ep

wwwjn · wwwjn · commit 59ecee0d4cb9 · 2025-10-16T19:49:36.000-07:00
diff --git a/torchtitan/experiments/gpt_oss/infra/parallelize.py b/torchtitan/experiments/gpt_oss/infra/parallelize.py
@@ -29,7 +29,7 @@
 from torchtitan.models.llama4.infra.parallelize import apply_fsdp
 from torchtitan.tools.logging import logger
 
-from .expert_parallel import GptossExpertTensorParallel
+from .expert_parallel import GptossExpertTensorParallel, GptossTensorParallel
 
 
 # for selective op activation checkpointing
@@ -304,11 +304,11 @@ def apply_moe_ep_tp(
         if ep_mesh is None:
             experts_mesh = tp_mesh
             # input Replicate, output Partial
-            experts_plan = TensorParallel()
+            experts_plan = GptossTensorParallel()
         elif tp_mesh is None:
             experts_mesh = ep_mesh
             # input / output sharding on the batch / tokens dim
-            experts_plan = GptossExpertParallel()
+            experts_plan = ExpertParallel()
         elif etp_enabled:
             experts_mesh = ep_tp_mesh
             experts_plan = GptossExpertTensorParallel(tp_mesh=tp_mesh, ep_mesh=ep_mesh)