Skip to content

Commit

Permalink
Fix module.training for neva in FusedAttn backward (NVIDIA#8877)
Browse files Browse the repository at this point in the history
Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com>
Co-authored-by: Pablo Garay <palenq@gmail.com>
  • Loading branch information
yaoyu-33 and pablo-garay authored Apr 15, 2024
1 parent d8ea1bc commit e0bbc54
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 4 deletions.
2 changes: 1 addition & 1 deletion nemo/collections/multimodal/data/neva/neva_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,7 +782,7 @@ class DataCollatorForSupervisedDataset(object):

def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
max_len = max(instance['tokens'].shape[0] for instance in instances)
max_len = (max_len - 1) // 4 * 4 + 4
max_len = (max_len - 1) // 64 * 64 + 64
for instance in instances:
pad_len = max_len - instance['tokens'].shape[0]
instance['tokens'] = F.pad(instance['tokens'], (0, pad_len), 'constant', 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -387,9 +387,6 @@ def __init__(
def freeze_llm(self, mm_cfg):
for param in chain(self.embedding.parameters(), self.decoder.parameters(), self.output_layer.parameters(),):
param.requires_grad = False
self.embedding = self.embedding.eval()
self.decoder = self.decoder.eval()
self.output_layer = self.output_layer.eval()

def forward(
self, *args, **kwargs,
Expand Down

0 comments on commit e0bbc54

Please sign in to comment.