Fix the names of two sets of weight and bias in mcore_to_nemo_mapping (…

…#9628) These two sets of weight and bias have been renamed according to the `module_name_rewrite_list` given in https://github.com/NVIDIA/Megatron-LM/blob/e33c8f78a35765d5aa37475a144da60e8a2349d1/megatron/core/inference/gpt/state_dict_hooks.py#L116-L119 Also change the hyphens in the two command-line options to underscores to follow the convention in the checkpoint_converters folder. Signed-off-by: Wei Ren <renwei2004@gmail.com> Co-authored-by: Wei Ren <wrn@amazon.com> Co-authored-by: Yu Yao <54727607+yaoyu-33@users.noreply.github.com>
NVIDIA · Aug 15, 2024 · 6784db5 · 6784db5
1 parent 8922d68
commit 6784db5
Showing 1 changed file with 4 additions and 4 deletions.
diff --git a/scripts/checkpoint_converters/convert_gpt_nemo_to_mcore.py b/scripts/checkpoint_converters/convert_gpt_nemo_to_mcore.py
@@ -62,7 +62,7 @@ def get_args():
         help="Path to output mcore weights file (ends in .nemo).",
     )
     parser.add_argument(
-        "--cpu-only",
+        "--cpu_only",
         action="store_true",
         help="Load model in cpu only. Useful if the model cannot fit in GPU memory, "
         "but this option makes the conversion script significantly slower.",
@@ -73,7 +73,7 @@ def get_args():
         help="Run conversion again and overwrite output file when the output file already exists",
     )
     parser.add_argument(
-        "--ignore-if-missing",
+        "--ignore_if_missing",
         default="rotary_pos_emb.inv_freq",
         help="comma-separated list of state_dict keys that are known to be missing in mcore and can be safely ignored",
     )
@@ -158,8 +158,8 @@ def build_key_mapping(nemo_cfg):
         for wb in ('weight', 'bias') if has_layernorm_bias else ('weight',):
             mcore_to_nemo_mapping.update(
                 {
-                    f"{mcore_prefix}.{i}.self_attention.linear_qkv.layer_norm_{wb}": f"{nemo_prefix}.{i}.input_layernorm.{wb}",
-                    f"{mcore_prefix}.{i}.mlp.linear_fc1.layer_norm_{wb}": f"{nemo_prefix}.{i}.post_attention_layernorm.{wb}",
+                    f"{mcore_prefix}.{i}.input_layernorm.{wb}": f"{nemo_prefix}.{i}.input_layernorm.{wb}",
+                    f"{mcore_prefix}.{i}.pre_mlp_layernorm.{wb}": f"{nemo_prefix}.{i}.post_attention_layernorm.{wb}",
                 }
             )