-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheval_pretrain.py
162 lines (152 loc) · 6.29 KB
/
eval_pretrain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
"""
Sample from the trained model with MindSpore
"""
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '6'
import json
from contextlib import nullcontext
import mindspore
from pretrain import init_model
from model import ModelArgs, Transformer
from chatglm_tokenizer.tokenization_chatglm import ChatGLMTokenizer
import numpy as np
def init_model():
# model init
# model init
model_args = dict(
dim=dim,
n_layers=n_layers,
n_heads=n_heads,
n_kv_heads=n_heads,
vocab_size=64793,
multiple_of=multiple_of,
max_seq_len=max_seq_len,
dropout=dropout,
) # start with model_args from command line
if init_from == "scratch":
# init a new model from scratch
print("Initializing a new model from scratch")
gptconf = ModelArgs(**model_args)
model = Transformer(gptconf)
elif init_from == "resume":
print(f"Resuming training from {out_dir}")
# resume training from a checkpoint.
ckpt_path = os.path.join(out_dir, "pretrain/baby_llama2_4.ckpt")
state_dict = mindspore.load_checkpoint(ckpt_path)
# checkpoint_model_args = checkpoint["model_args"]
# # force these config attributes to be equal otherwise we can't even resume training
# # the rest of the attributes (e.g. dropout) can stay as desired from command line
# for k in ["dim", "n_layers", "n_heads", "n_kv_heads", "vocab_size", "multiple_of", "max_seq_len"]:
# model_args[k] = checkpoint_model_args[k]
# create the model
gptconf = ModelArgs(**model_args)
model = Transformer(gptconf)
# fix the keys of the state dictionary :(
# honestly no idea how checkpoints sometimes get this prefix, have to debug more
unwanted_prefix = "_orig_mod."
for k, v in list(state_dict.items()):
if k.startswith(unwanted_prefix):
state_dict[k[len(unwanted_prefix) :]] = state_dict.pop(k)
mindspore.load_param_into_net(model, state_dict)
# iter_num = checkpoint["iter_num"]
# best_val_loss = checkpoint["best_val_loss"]
return model
# def compute_bleu(labels, preds, weights=None):
# from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
# weights = weights or (0.25, 0.25, 0.25, 0.25)
# return np.mean([sentence_bleu(references=[label],
# hypothesis=pred,
# smoothing_function=SmoothingFunction().method1,
# weights=weights) for label, pred in zip(labels, preds)])
# -----------------------------------------------------------------------------
out_dir = 'baby-llama2/Baby_Llama2/out' # ignored if init_from is not 'resume'
start = "" # or "<|endoftext|>" or etc. Can also specify a file, use as: "FILE:prompt.txt"
num_samples = 1 # number of samples to draw
max_new_tokens = 100 # number of tokens generated in each sample
temperature = 1.0 # 1.0 = no change, < 1.0 = less random, > 1.0 = more random, in predictions
top_k = 30 # retain only the top_k most likely tokens, clamp others to have 0 probability
seed = 1337
# device = 'cuda' if torch.cuda.is_available() else 'cpu' # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1', etc.
#dtype = 'bfloat16' if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else 'float16' # 'float32' or 'bfloat16' or 'float16'
dtype = "float32"
compile = False # use PyTorch 2.0 to compile the model to be faster
#exec(open('configurator.py').read()) # overrides from command line or config file
# -----------------------------------------------------------------------------
# max_seq_len = 512
# dim = 512
# n_layers = 8
# n_heads = 8
max_seq_len = 512
dim = 512
n_layers = 8
n_heads = 8
multiple_of = 32
dropout = 0.0
init_from = 'scratch'
# model_args = dict(
# dim=dim,
# n_layers=n_layers,
# n_heads=n_heads,
# n_kv_heads=n_heads,
# vocab_size=64793,#64793,
# multiple_of=multiple_of,
# max_seq_len=max_seq_len,
# dropout=dropout,
# ) # s
mindspore.set_seed(seed)
np.random.seed(seed)
# torch.manual_seed(seed)
# torch.cuda.manual_seed(seed)
# torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul
# torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn
# device_type = 'cuda' if 'cuda' in device else 'cpu' # for later use in torch.autocast
ptdtype = {'float32': mindspore.float32, 'bfloat16': mindspore.bfloat16, 'float16': mindspore.float16}[dtype]
# ctx = nullcontext() if device_type == 'cpu' else torch.cuda.amp.autocast()
# init from a model saved in a specific directory
ckpt_path = 'baby-llama2/Baby_Llama2/out/pretrain/baby_llama2_3.ckpt'
state_dict = mindspore.load_checkpoint(ckpt_path)
model = init_model()
# gptconf = ModelArgs(**model_args)
# model = Transformer(gptconf)
# for key, param in model.parameters_and_names():
# print(param.name)
unwanted_prefix = '_orig_mod.'
for k,v in list(state_dict.items()):
if k.startswith(unwanted_prefix):
state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
param_not_load = mindspore.load_param_into_net(model, state_dict)
print(f"Param_not_load:{param_not_load}")
model.set_train(False)
# load the tokenizer
tokenizer=ChatGLMTokenizer(vocab_file='./baby-llama2/Baby_Llama2/chatglm_tokenizer/tokenizer.model')
#
# data = []
# with open('./test_data/test.json','r') as f:
# for line in f:
# data.append(json.loads(line))
data = [
{"question": "床前明月光,疑是地上霜。举头望明月,"},
{"question": "请你讲一个童话故事:"},
{"question": "《小王子》是一本畅销童话书,它讲述了:"},
]
ans_lst=[]
target_lst=[]
for p in data[:100]:
# run generation
prompt=p['question']
x=tokenizer.encode(prompt,add_special_tokens=False)
x = (mindspore.tensor(x, dtype=mindspore.int64)[None, ...])
y = model.generate(x, 2, max_new_tokens, temperature=temperature, top_k=top_k)
#
answer=tokenizer.decode(y[0].tolist())
answer=answer.replace(prompt,'')
ans_lst.append(answer)
print('[prompt]:',prompt)
print('[answer]:',answer)
print('---------------')
#
# import jieba
# target_lst=[jieba.lcut(result.lower()) for result in target_lst]
# preds_lst=[jieba.lcut(result.lower()) for result in ans_lst]
# scores = compute_bleu(preds_lst, target_lst)
# print(scores)