Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci

Signed-off-by: Alexandros Koumparoulis <akoumparouli@nvidia.com>
  • Loading branch information
pre-commit-ci[bot] authored and akoumpa committed Dec 19, 2023
1 parent 4eb7ce4 commit 6fe099f
Showing 1 changed file with 6 additions and 9 deletions.
15 changes: 6 additions & 9 deletions scripts/nlp_language_modeling/convert_mistral_7b_to_nemo.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,19 @@
[--fast-swiglu\
"""

import json
import os
from argparse import ArgumentParser
from collections import OrderedDict

import torch
import torch.nn
from omegaconf import OmegaConf
from pytorch_lightning.core.saving import _load_state as ptl_load_state
from pytorch_lightning.trainer.trainer import Trainer

import torch.nn
import json
from sentencepiece import SentencePieceProcessor

from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel

from nemo.collections.nlp.parts.nlp_overrides import (
GradScaler,
MegatronHalfPrecisionPlugin,
Expand All @@ -46,7 +44,6 @@
from nemo.utils import logging



def get_args():
parser = ArgumentParser()
parser.add_argument(
Expand Down Expand Up @@ -128,6 +125,7 @@ def load_config(mistral_config, tokenizer_path):

return nemo_config


def load_mistral_ckpt(dir):
params_file = os.path.join(dir, 'params.json')
assert os.path.exists(params_file)
Expand All @@ -143,6 +141,7 @@ def load_mistral_ckpt(dir):
assert tokenizer.get_piece_size() == model_args['vocab_size']
return model_args, ckpt, tokenizer


def convert(args):
logging.info(f"loading checkpoint {args.in_file}")

Expand Down Expand Up @@ -224,8 +223,6 @@ def convert(args):
if mcore_gpt:
assert nemo_config.activation.startswith('fast-'), 'mcore only supports fast version of gated linear unit.'



for l in range(int(num_layers)):
print(f"converting layer {l}")
old_tensor_shape = ckpt[f'layers.{l}.attention.wq.weight'].size()
Expand Down Expand Up @@ -274,7 +271,6 @@ def convert(args):
mlp_up_base_name = f'model.language_model.encoder.layers.{l}.mlp.dense_4h_to_h.weight'
checkpoint['state_dict'][mlp_up_base_name] = param_to_weights(mlp_up_weight)


# LayerNorm
input_ln_weight = ckpt[f'layers.{l}.attention_norm.weight']

Expand Down Expand Up @@ -327,6 +323,7 @@ def convert(args):
model.save_to(args.out_file)
logging.info(f'NeMo model saved to: {args.out_file}')


if __name__ == '__main__':
args = get_args()
convert(args)
convert(args)

0 comments on commit 6fe099f

Please sign in to comment.