Skip to content

Commit

Permalink
Multimodal r1.23.0 bug fix (NVIDIA#8315) (NVIDIA#8339)
Browse files Browse the repository at this point in the history
* Rename quick-gelu



* ddpm config guard



* Fix ddpm edit api



* Fix insert_image_token cfg issue



* neva updates



* reformat



* Add back jenkins



* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix jenkins



* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix bugs



* Update default neva template



---------

Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com>
Co-authored-by: yaoyu-33 <54727607+yaoyu-33@users.noreply.github.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
4 people authored Feb 16, 2024
1 parent 9216219 commit 0723cb7
Show file tree
Hide file tree
Showing 8 changed files with 202 additions and 197 deletions.
366 changes: 184 additions & 182 deletions Jenkinsfile

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions examples/multimodal/multimodal_llm/neva/neva_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,16 @@ def main(cfg) -> None:
with open(cfg.prompt_file, 'r') as f:
lines = f.readlines()

insert_image_token = cfg.inference.get("insert_image_token", None)
final_prompts = []
for line in lines:
prompt_dict = json.loads(line)
assert 'prompt' in prompt_dict or 'text' in prompt_dict
if 'prompt' not in prompt_dict:
prompt_dict['prompt'] = prompt_dict['text']
if cfg.inference.insert_image_token == 'left':
if insert_image_token == 'left':
prompt_dict['prompt'] = '<image>' + prompt_dict['prompt']
elif cfg.inference.insert_image_token == 'right':
elif insert_image_token == 'right':
prompt_dict['prompt'] = prompt_dict['prompt'] + '<image>'
if 'image' in prompt_dict:
prompt_dict['image_path'] = prompt_dict['image']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ model:
openai_gelu: False
bias_activation_fusion: False
megatron_legacy: True
activation: quick-gelu
activation: approx-gelu



Expand Down Expand Up @@ -144,7 +144,7 @@ model:
fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor
fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history
use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False.
activation: quick-gelu
activation: approx-gelu

# Megatron O2-style half-precision
megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ model:
openai_gelu: false
bias_activation_fusion: false
megatron_legacy: true
activation: quick-gelu
activation: approx-gelu

text:
precision: ${trainer.precision}
Expand Down Expand Up @@ -171,7 +171,7 @@ model:
fp8_amax_history_len: 1
fp8_amax_compute_algo: most_recent
use_emha: false
activation: quick-gelu
activation: approx-gelu

# Megatron O2-style half-precision
megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters
Expand Down
2 changes: 1 addition & 1 deletion nemo/collections/multimodal/data/neva/neva_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ def preprocess_nvgpt(sources: dict, tokenizer, cfg,) -> Dict:
if 'label' not in turn:
turn[
'label'
] = "quality:6,toxicity:0,humor:0,creativity:0,violence:0,helpfulness:6,not_appropriate:0"
] = "quality:4,toxicity:0,humor:0,creativity:0,helpfulness:4,correctness:4,coherence:4,complexity:4,verbosity:4"
value = DEFAULT_LABELS_TOKEN + turn['label'] + '\n' + turn['value']
conv.append_message(turn['from'], value)
if not turn["value"]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -612,8 +612,8 @@ def forward(self, tokens, text_position_ids, attention_mask, labels, media=None)
output_tensor = self.model(**forward_args)
return output_tensor

def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only):
return MegatronGPTModel.fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only)
def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only, first_val_step=None):
return MegatronGPTModel.fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only, first_val_step)

def training_step(self, dataloader_iter, batch_idx):
"""
Expand All @@ -623,7 +623,7 @@ def training_step(self, dataloader_iter, batch_idx):
"""
return MegatronGPTModel.training_step(self, dataloader_iter, batch_idx)

def get_forward_output_and_loss_func(self, validation_step=False):
def get_forward_output_and_loss_func(self, validation_step=False, tuning=False):
def loss_func(output_tensor, loss_mask):
loss_for_ub = self.loss_func(loss_mask, output_tensor)
if validation_step and not self.cfg.data.get('validation_drop_last', True):
Expand Down Expand Up @@ -921,7 +921,7 @@ def list_available_models(cls) -> Optional[PretrainedModelInfo]:
Returns:
List of available pre-trained models.
"""
return []
return None

def setup_test_data(self, cfg):
pass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@


class LatentDiffusionEdit(LatentDiffusion):
def init_from_ckpt(self, path, ignore_keys=list(), only_model=False):
def init_from_ckpt(
self, path, ignore_keys=list(), only_model=False, load_vae=True, load_unet=True, load_encoder=True,
):
pl_sd = torch.load(path, map_location="cpu")
if "state_dict" in list(pl_sd.keys()):
pl_sd = pl_sd["state_dict"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -557,9 +557,9 @@ def __init__(self, cfg, model_parallel_config):

self.restarted_from_ckpt = False
if ckpt_path is not None:
load_vae = True if cfg.load_vae is None else cfg.load_vae
load_unet = True if cfg.load_unet is None else cfg.load_unet
load_encoder = True if cfg.load_encoder is None else cfg.load_encoder
load_vae = True if cfg.get("load_vae", None) is None else cfg.load_vae
load_unet = True if cfg.get("load_unet", None) is None else cfg.load_unet
load_encoder = True if cfg.get("load_encoder", None) is None else cfg.load_encoder

self.init_from_ckpt(
ckpt_path, ignore_keys, load_vae=load_vae, load_unet=load_unet, load_encoder=load_encoder,
Expand Down

0 comments on commit 0723cb7

Please sign in to comment.