Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multimodal r1.23.0 bug fix #8315

Merged
merged 14 commits into from
Feb 6, 2024
366 changes: 184 additions & 182 deletions Jenkinsfile

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions examples/multimodal/multimodal_llm/neva/neva_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,16 @@ def main(cfg) -> None:
with open(cfg.prompt_file, 'r') as f:
lines = f.readlines()

insert_image_token = cfg.inference.get("insert_image_token", None)
final_prompts = []
for line in lines:
prompt_dict = json.loads(line)
assert 'prompt' in prompt_dict or 'text' in prompt_dict
if 'prompt' not in prompt_dict:
prompt_dict['prompt'] = prompt_dict['text']
if cfg.inference.insert_image_token == 'left':
if insert_image_token == 'left':
prompt_dict['prompt'] = '<image>' + prompt_dict['prompt']
elif cfg.inference.insert_image_token == 'right':
elif insert_image_token == 'right':
prompt_dict['prompt'] = prompt_dict['prompt'] + '<image>'
if 'image' in prompt_dict:
prompt_dict['image_path'] = prompt_dict['image']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ model:
openai_gelu: False
bias_activation_fusion: False
megatron_legacy: True
activation: quick-gelu
activation: approx-gelu



Expand Down Expand Up @@ -144,7 +144,7 @@ model:
fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor
fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history
use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False.
activation: quick-gelu
activation: approx-gelu

# Megatron O2-style half-precision
megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ model:
openai_gelu: false
bias_activation_fusion: false
megatron_legacy: true
activation: quick-gelu
activation: approx-gelu

text:
precision: ${trainer.precision}
Expand Down Expand Up @@ -171,7 +171,7 @@ model:
fp8_amax_history_len: 1
fp8_amax_compute_algo: most_recent
use_emha: false
activation: quick-gelu
activation: approx-gelu

# Megatron O2-style half-precision
megatron_amp_O2: False # Enable O2-level automatic mixed precision using main parameters
Expand Down
2 changes: 1 addition & 1 deletion nemo/collections/multimodal/data/neva/neva_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ def preprocess_nvgpt(sources: dict, tokenizer, cfg,) -> Dict:
if 'label' not in turn:
turn[
'label'
] = "quality:6,toxicity:0,humor:0,creativity:0,violence:0,helpfulness:6,not_appropriate:0"
] = "quality:4,toxicity:0,humor:0,creativity:0,helpfulness:4,correctness:4,coherence:4,complexity:4,verbosity:4"
value = DEFAULT_LABELS_TOKEN + turn['label'] + '\n' + turn['value']
conv.append_message(turn['from'], value)
if not turn["value"]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -612,8 +612,8 @@ def forward(self, tokens, text_position_ids, attention_mask, labels, media=None)
output_tensor = self.model(**forward_args)
return output_tensor

def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only):
return MegatronGPTModel.fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only)
def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only, first_val_step=None):
return MegatronGPTModel.fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only, first_val_step)

def training_step(self, dataloader_iter, batch_idx):
"""
Expand All @@ -623,7 +623,7 @@ def training_step(self, dataloader_iter, batch_idx):
"""
return MegatronGPTModel.training_step(self, dataloader_iter, batch_idx)

def get_forward_output_and_loss_func(self, validation_step=False):
def get_forward_output_and_loss_func(self, validation_step=False, tuning=False):
def loss_func(output_tensor, loss_mask):
loss_for_ub = self.loss_func(loss_mask, output_tensor)
if validation_step and not self.cfg.data.get('validation_drop_last', True):
Expand Down Expand Up @@ -921,7 +921,7 @@ def list_available_models(cls) -> Optional[PretrainedModelInfo]:
Returns:
List of available pre-trained models.
"""
return []
return None

def setup_test_data(self, cfg):
pass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@


class LatentDiffusionEdit(LatentDiffusion):
def init_from_ckpt(self, path, ignore_keys=list(), only_model=False):
def init_from_ckpt(
self, path, ignore_keys=list(), only_model=False, load_vae=True, load_unet=True, load_encoder=True,
):
pl_sd = torch.load(path, map_location="cpu")
if "state_dict" in list(pl_sd.keys()):
pl_sd = pl_sd["state_dict"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -557,9 +557,9 @@ def __init__(self, cfg, model_parallel_config):

self.restarted_from_ckpt = False
if ckpt_path is not None:
load_vae = True if cfg.load_vae is None else cfg.load_vae
load_unet = True if cfg.load_unet is None else cfg.load_unet
load_encoder = True if cfg.load_encoder is None else cfg.load_encoder
load_vae = True if cfg.get("load_vae", None) is None else cfg.load_vae
load_unet = True if cfg.get("load_unet", None) is None else cfg.load_unet
load_encoder = True if cfg.get("load_encoder", None) is None else cfg.load_encoder

self.init_from_ckpt(
ckpt_path, ignore_keys, load_vae=load_vae, load_unet=load_unet, load_encoder=load_encoder,
Expand Down
Loading