Fix the behavior of collecting 'num_input_tokens_seen' (huggingface#2…

…9099) fix the behavior of collecting 'num_input_tokens_seen' See huggingface#28791 for more details.
hovnatan · Mar 27, 2024 · 4bd2d04 · 4bd2d04
1 parent 8cd2909
commit 4bd2d04
Showing 1 changed file with 6 additions and 1 deletion.
diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
@@ -2097,7 +2097,12 @@ def _inner_training_loop(
                             "a `main_input_name` attribute to the model class you are using."
                         )
                     else:
-                        self.state.num_input_tokens_seen += self.accelerator.gather(inputs[main_input_name]).numel()
+                        input_device = inputs[main_input_name].device
+                        self.state.num_input_tokens_seen += torch.sum(
+                            self.accelerator.gather(
+                                torch.tensor(inputs[main_input_name].numel(), device=input_device, dtype=torch.int64)
+                            )
+                        ).item()
                 if rng_to_sync:
                     self._load_rng_state(resume_from_checkpoint)
                     rng_to_sync = False