-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
checkpoint_converter_fsdp_hf.py
81 lines (65 loc) · 3.1 KB
/
checkpoint_converter_fsdp_hf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Copyright (c) Meta Platforms, Inc. and affiliates.
# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
# from accelerate import init_empty_weights, load_checkpoint_and_dispatch
import os
import sys
import fire
import yaml
from llama_recipes.inference.model_utils import load_llama_from_config
from transformers import AutoConfig, AutoTokenizer, MllamaProcessor
# Get the current file's directory
current_directory = os.path.dirname(os.path.abspath(__file__))
# Get the parent directory
parent_directory = os.path.dirname(current_directory)
# Append the parent directory to sys.path
sys.path.append(parent_directory)
from model_checkpointing import load_sharded_model_single_gpu
def main(
fsdp_checkpoint_path="", # Path to FSDP Sharded model checkpoints
consolidated_model_path="", # Path to save the HF converted model checkpoints
HF_model_path_or_name="", # Path/ name of the HF model that include config.json and tokenizer_config.json (e.g. meta-llama/Llama-2-7b-chat-hf)
):
try:
file_name = "train_params.yaml"
# Combine the directory and file name to create the full path
train_params_path = os.path.join(fsdp_checkpoint_path, file_name)
# Open the file
with open(train_params_path, "r") as file:
# Load the YAML data
data = yaml.safe_load(file)
# Access the 'model_name' field
HF_model_path_or_name = data.get("model_name")
print(f"Model name: {HF_model_path_or_name}")
except FileNotFoundError:
print(f"The file {train_params_path} does not exist.")
HF_model_path_or_name = input("Please enter the model name: ")
print(f"Model name: {HF_model_path_or_name}")
except Exception as e:
print(f"An error occurred: {e}")
# load the HF model definition from config
model_def = load_llama_from_config(HF_model_path_or_name)
print("model is loaded from config")
# load the FSDP sharded checkpoints into the model
model = load_sharded_model_single_gpu(model_def, fsdp_checkpoint_path)
print("model is loaded from FSDP checkpoints")
# loading the tokenizer form the model_path
config = AutoConfig.from_pretrained(HF_model_path_or_name)
# save the processor and config for mllama models
if config.model_type == "mllama":
processor = MllamaProcessor.from_pretrained(HF_model_path_or_name)
processor.save_pretrained(consolidated_model_path)
print(
f"HuggingFace mllama processor has been saved in {consolidated_model_path}"
)
else:
# save the tokenizer for llama models
tokenizer = AutoTokenizer.from_pretrained(HF_model_path_or_name)
tokenizer.save_pretrained(consolidated_model_path)
print(
f"HuggingFace llama tokenizer has been saved in {consolidated_model_path}"
)
# save the FSDP sharded checkpoints in HF format
model.save_pretrained(consolidated_model_path)
print(f"HuggingFace model checkpoints has been saved in {consolidated_model_path}")
if __name__ == "__main__":
fire.Fire(main)