Skip to content

Commit

Permalink
🔨 [FIX] Fixes memory leak (#83)
Browse files Browse the repository at this point in the history
* Fixes memory leak

* Changed total_loss to use float type and adjusted collection of loss

---------

Co-authored-by: Martin Cerman <m.cerman@enlite.ai>
  • Loading branch information
mcerman and Martin Cerman authored Oct 2, 2024
1 parent 8228669 commit dea5a8a
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions yolo/tools/solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def train_one_batch(self, images: Tensor, targets: Tensor):

def train_one_epoch(self, dataloader):
self.model.train()
total_loss = defaultdict(lambda: torch.tensor(0.0, device=self.device))
total_loss = defaultdict(float)
total_samples = 0
self.optimizer.next_epoch(len(dataloader))
for batch_size, images, targets, *_ in dataloader:
Expand All @@ -96,7 +96,7 @@ def train_one_epoch(self, dataloader):
for loss_name, loss_val in loss_each.items():
if self.use_ddp: # collecting loss for each batch
distributed.all_reduce(loss_val, op=distributed.ReduceOp.AVG)
total_loss[loss_name] += loss_val * batch_size
total_loss[loss_name] += loss_val.item() * batch_size
total_samples += batch_size
self.progress.one_batch(loss_each)

Expand Down

0 comments on commit dea5a8a

Please sign in to comment.