Revert "remove dtensors, not explicit (#39840)"

ArthurZucker · ArthurZucker · commit ed511f215810 · 2025-08-03T08:04:19.000Z
This reverts commit 6dfd561.
diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
@@ -4122,16 +4122,9 @@ def save_pretrained(
         for shard_file, tensors in filename_to_tensors:
             shard = {}
             for tensor in tensors:
-                if _is_dtensor_available and getattr(self, "_device_mesh", None) is not None:
-                    plan = _get_parameter_tp_plan(tensor, self._tp_plan)
-                    full_tensor = state_dict[tensor]
-                    if isinstance(state_dict[tensor], DTensor):
-                        full_tensor = full_tensor.full_tensor()
-                    elif plan is not None:
-                        shard_dim = -1 if "rowwise" in plan else 0
-                        gather_list = [torch.empty_like(full_tensor) for _ in range(self._device_mesh.size())]
-                        torch.distributed.all_gather(gather_list, full_tensor)
-                        full_tensor = torch.cat(gather_list, dim=shard_dim)
+                if _is_dtensor_available and isinstance(state_dict[tensor], DTensor):
+                    full_tensor = state_dict[tensor].full_tensor()
+                    # to get the correctly ordered tensor we need to repack if packed
                     if _get_parameter_tp_plan(tensor, self._tp_plan) in ("local_packed_rowwise",):
                         full_tensor = repack_weights(full_tensor, -1, self._tp_size, 2)
                     shard[tensor] = full_tensor.contiguous()  # only do contiguous after it's permuted correctly