Skip to content

Commit

Permalink
Fix windows err with checkpoint race conditions (#28637)
Browse files Browse the repository at this point in the history
Fix windows err
  • Loading branch information
muellerzr authored and Ita Zaporozhets committed May 14, 2024
1 parent c4bdb6d commit 7636f1d
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions src/transformers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2415,9 +2415,11 @@ def _save_checkpoint(self, model, trial, metrics=None):
os.rename(staging_output_dir, output_dir)

# Ensure rename completed in cases where os.rename is not atomic
fd = os.open(output_dir, os.O_RDONLY)
os.fsync(fd)
os.close(fd)
# And can only happen on non-windows based systems
if os.name != "nt":
fd = os.open(output_dir, os.O_RDONLY)
os.fsync(fd)
os.close(fd)

# Maybe delete some older checkpoints.
if self.args.should_save:
Expand Down

0 comments on commit 7636f1d

Please sign in to comment.