From 4bd2d048eb55dbe5c5878c3da2beceda6eeb3694 Mon Sep 17 00:00:00 2001
From: yhuang <56789071+YouliangHUANG@users.noreply.github.com>
Date: Mon, 25 Mar 2024 17:43:46 +0800
Subject: [PATCH] Fix the behavior of collecting 'num_input_tokens_seen'
 (#29099)

fix the behavior of collecting 'num_input_tokens_seen'

See https://github.com/huggingface/transformers/issues/28791 for more details.
---
 src/transformers/trainer.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
index 276c08788a1391..1bf69da039ff0c 100755
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -2097,7 +2097,12 @@ def _inner_training_loop(
                             "a `main_input_name` attribute to the model class you are using."
                         )
                     else:
-                        self.state.num_input_tokens_seen += self.accelerator.gather(inputs[main_input_name]).numel()
+                        input_device = inputs[main_input_name].device
+                        self.state.num_input_tokens_seen += torch.sum(
+                            self.accelerator.gather(
+                                torch.tensor(inputs[main_input_name].numel(), device=input_device, dtype=torch.int64)
+                            )
+                        ).item()
                 if rng_to_sync:
                     self._load_rng_state(resume_from_checkpoint)
                     rng_to_sync = False