Skip to content

Commit

Permalink
fix microsoft#2240: wrong time unit in flops_profiler
Browse files Browse the repository at this point in the history
  • Loading branch information
yzs981130 committed Aug 19, 2022
1 parent fda6343 commit 3875b33
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 12 deletions.
8 changes: 4 additions & 4 deletions deepspeed/moe/sharded_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def forward(

if self.wall_clock_breakdown:
self.timers('TopKGate').stop()
self.gate_time = self.timers('TopKGate').elapsed(reset=False) * 1000
self.gate_time = self.timers('TopKGate').elapsed(reset=False)

return gate_output

Expand Down Expand Up @@ -535,7 +535,7 @@ def forward(self, *input: Tensor, **kwargs: Any) -> Tensor:

if self.wall_clock_breakdown:
self.timers('falltoall').stop()
self.time_falltoall = self.timers('falltoall').elapsed(reset=False) * 1000
self.time_falltoall = self.timers('falltoall').elapsed(reset=False)

# Re-shape after all-to-all: ecm -> gecm
dispatched_input = dispatched_input.reshape(self.ep_size,
Expand All @@ -552,7 +552,7 @@ def forward(self, *input: Tensor, **kwargs: Any) -> Tensor:

if self.wall_clock_breakdown:
self.timers('salltoall').stop()
self.time_salltoall = self.timers('salltoall').elapsed(reset=False) * 1000
self.time_salltoall = self.timers('salltoall').elapsed(reset=False)

# Re-shape back: gecm -> ecm
expert_output = expert_output.reshape(self.ep_size * self.num_local_experts,
Expand All @@ -576,6 +576,6 @@ def forward(self, *input: Tensor, **kwargs: Any) -> Tensor:

if self.wall_clock_breakdown:
self.timers('moe').stop()
self.time_moe = self.timers('moe').elapsed(reset=False) * 1000
self.time_moe = self.timers('moe').elapsed(reset=False)

return a
14 changes: 6 additions & 8 deletions deepspeed/runtime/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -2049,7 +2049,7 @@ def step(self, lr_kwargs=None):

if self.has_moe_layers:
fwd_time = self.timers(FORWARD_GLOBAL_TIMER).elapsed(
reset=False) * 1000
reset=False)
self.print_forward_breakdown(fwd_time=fwd_time)

self.timers.log(self.engine_timers.global_timers)
Expand Down Expand Up @@ -2093,29 +2093,27 @@ def _write_monitor(self):
self.summary_events = [
(
f"Train/Samples/elapsed_time_ms_forward",
self.timers(FORWARD_GLOBAL_TIMER).elapsed(reset=False) * 1000.0,
self.timers(FORWARD_GLOBAL_TIMER).elapsed(reset=False),
self.global_samples,
),
(
f"Train/Samples/elapsed_time_ms_backward",
self.timers(BACKWARD_GLOBAL_TIMER).elapsed(reset=False) * 1000.0,
self.timers(BACKWARD_GLOBAL_TIMER).elapsed(reset=False),
self.global_samples,
),
(
f"Train/Samples/elapsed_time_ms_backward_inner",
self.timers(BACKWARD_INNER_GLOBAL_TIMER).elapsed(reset=False) *
1000.0,
self.timers(BACKWARD_INNER_GLOBAL_TIMER).elapsed(reset=False),
self.global_samples,
),
(
f"Train/Samples/elapsed_time_ms_backward_allreduce",
self.timers(BACKWARD_REDUCE_GLOBAL_TIMER).elapsed(reset=False) *
1000.0,
self.timers(BACKWARD_REDUCE_GLOBAL_TIMER).elapsed(reset=False),
self.global_samples,
),
(
f"Train/Samples/elapsed_time_ms_step",
self.timers(STEP_GLOBAL_TIMER).elapsed(reset=False) * 1000.0,
self.timers(STEP_GLOBAL_TIMER).elapsed(reset=False),
self.global_samples,
),
]
Expand Down

0 comments on commit 3875b33

Please sign in to comment.