Skip to content

Commit

Permalink
Fix dreambooth data sampler issue (#8400) (#8413)
Browse files Browse the repository at this point in the history
* Turn on drop last

* Some neva fixes

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com>
Co-authored-by: yaoyu-33 <54727607+yaoyu-33@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Michal Futrega <mfutrega@nvidia.com>
  • Loading branch information
3 people authored and michal2409 committed Feb 22, 2024
1 parent a28e153 commit e89f8be
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ def setup_training_data(self, cfg):
global_batch_size=self.cfg.global_batch_size,
data_parallel_rank=parallel_state.get_data_parallel_rank(),
data_parallel_size=parallel_state.get_data_parallel_world_size(),
drop_last=False,
drop_last=True,
)

self._train_dl = torch.utils.data.DataLoader(
Expand Down
3 changes: 3 additions & 0 deletions nemo/collections/multimodal/parts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@
from pytorch_lightning.plugins.environments import TorchElasticEnvironment
from transformers import CLIPImageProcessor

from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel
from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector
from nemo.collections.nlp.parts.peft_config import PEFT_CONFIG_MAP
from nemo.utils import AppState, logging
from nemo.utils.model_utils import inject_model_parallel_rank

try:
from megatron.core import dist_checkpointing
Expand Down Expand Up @@ -361,6 +363,7 @@ def create_neva_model_and_processor(cfg):
neva_cfg.activations_checkpoint_method = None
neva_cfg.precision = trainer.precision
neva_cfg.mm_cfg.llm.from_pretrained = cfg.get('base_model_file', None)
neva_cfg.apply_rope_fusion = False
# neva_cfg.mm_cfg.vision_encoder.from_pretrained = None

model = MegatronNevaModel.restore_from(
Expand Down

0 comments on commit e89f8be

Please sign in to comment.