diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 99792019846210..056f7a2ca96e34 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -2011,7 +2011,10 @@ def _inner_training_loop( is_accelerate_available() and self.accelerator.distributed_type == DistributedType.DEEPSPEED ): - grad_norm = model.get_global_grad_norm().item() + grad_norm = model.get_global_grad_norm() + # In some cases the grad norm may not return a float + if hasattr(grad_norm, "item"): + grad_norm = grad_norm.item() else: grad_norm = _grad_norm.item() if _grad_norm is not None else None