@@ -130,28 +130,76 @@ def make_tensors_list() -> List[str]:
130
130
TENSORS_SET = set (TENSORS_LIST )
131
131
132
132
133
+ def find_n_mult (n_ff : int , n_embd : int ) -> int :
134
+ # hardcoded magic range
135
+ for n_mult in range (256 , 1 , - 1 ):
136
+ calc_ff = (((8 * n_embd ) // 3 + n_mult - 1 ) // n_mult )* n_mult
137
+ if calc_ff == n_ff :
138
+ return n_mult
139
+ return 1
140
+
133
141
@dataclass
134
142
class Params :
135
143
n_vocab : int
136
144
n_embd : int
137
145
n_mult : int
138
146
n_head : int
139
147
n_layer : int
140
- file_type : GGMLFileType
141
148
142
149
@staticmethod
143
- def guessed (model : 'LazyModel' , file_type : GGMLFileType ) -> 'Params' :
144
- n_vocab , n_embd = model ["tok_embeddings.weight" ].shape
150
+ def guessed (model : 'LazyModel' ) -> 'Params' :
151
+ # try transformer naming first
152
+ n_vocab , n_embd = model ["model.embed_tokens.weight" ].shape if "model.embed_tokens.weight" in model else model ["tok_embeddings.weight" ].shape
153
+
154
+ # try transformer naming first
155
+ if "model.layers.0.self_attn.q_proj.weight" in model :
156
+ n_layer = next (i for i in itertools .count () if f"model.layers.{ i } .self_attn.q_proj.weight" not in model )
157
+ else :
158
+ n_layer = next (i for i in itertools .count () if f"layers.{ i } .attention.wq.weight" not in model )
159
+
160
+ n_head = n_embd // 128 # guessed
145
161
146
162
return Params (
147
163
n_vocab = n_vocab ,
148
164
n_embd = n_embd ,
149
165
n_mult = 256 ,
150
- n_head = n_embd // 128 ,
151
- n_layer = next (i for i in itertools .count () if f"layers.{ i } .attention.wq.weight" not in model ),
152
- file_type = file_type ,
166
+ n_head = n_head ,
167
+ n_layer = n_layer ,
153
168
)
154
169
170
+ @staticmethod
171
+ def loadHFTransformerJson (model : 'LazyModel' , config_path : 'Path' ) -> 'Params' :
172
+ config = json .load (open (config_path ))
173
+
174
+ n_vocab = config ["vocab_size" ];
175
+ n_embd = config ["hidden_size" ];
176
+ n_head = config ["num_attention_heads" ];
177
+ n_layer = config ["num_hidden_layers" ];
178
+ n_ff = config ["intermediate_size" ];
179
+
180
+ n_mult = find_n_mult (n_ff , n_embd );
181
+
182
+ return Params (
183
+ n_vocab = n_vocab ,
184
+ n_embd = n_embd ,
185
+ n_mult = n_mult ,
186
+ n_head = n_head ,
187
+ n_layer = n_layer ,
188
+ )
189
+
190
+ @staticmethod
191
+ def load (model_plus : 'ModelPlus' ) -> 'Params' :
192
+ orig_config_path = model_plus .paths [0 ].parent / "params.json"
193
+ hf_transformer_config_path = model_plus .paths [0 ].parent / "config.json"
194
+
195
+ if hf_transformer_config_path .exists ():
196
+ params = Params .loadHFTransformerJson (model_plus .model , hf_transformer_config_path )
197
+ else :
198
+ params = Params .guessed (model_plus .model )
199
+
200
+ print (f'params: n_vocab:{ params .n_vocab } n_embd:{ params .n_embd } n_mult:{ params .n_mult } n_head:{ params .n_head } n_layer:{ params .n_layer } ' )
201
+ return params
202
+
155
203
156
204
class SentencePieceVocab :
157
205
def __init__ (self , fname_tokenizer : Path , fname_added_tokens : Optional [Path ]) -> None :
@@ -595,18 +643,17 @@ def load() -> Tensor:
595
643
return LazyTensor (load , lazy_tensor .shape , lazy_tensor .data_type , f'permute({ n_head } ) ' + lazy_tensor .description )
596
644
597
645
598
- def convert_transformers_to_orig (model : LazyModel ) -> LazyModel :
646
+ def convert_transformers_to_orig (model : LazyModel , params : Params ) -> LazyModel :
599
647
out : LazyModel = {}
600
648
out ["tok_embeddings.weight" ] = model ["model.embed_tokens.weight" ]
601
649
out ["norm.weight" ] = model ["model.norm.weight" ]
602
650
out ["output.weight" ] = model ["lm_head.weight" ]
603
651
604
- n_head = model ["model.layers.0.self_attn.q_proj.weight" ].shape [1 ] // 128
605
652
for i in itertools .count ():
606
653
if f"model.layers.{ i } .self_attn.q_proj.weight" not in model :
607
654
break
608
- out [f"layers.{ i } .attention.wq.weight" ] = permute_lazy (model [f"model.layers.{ i } .self_attn.q_proj.weight" ], n_head )
609
- out [f"layers.{ i } .attention.wk.weight" ] = permute_lazy (model [f"model.layers.{ i } .self_attn.k_proj.weight" ], n_head )
655
+ out [f"layers.{ i } .attention.wq.weight" ] = permute_lazy (model [f"model.layers.{ i } .self_attn.q_proj.weight" ], params . n_head )
656
+ out [f"layers.{ i } .attention.wk.weight" ] = permute_lazy (model [f"model.layers.{ i } .self_attn.k_proj.weight" ], params . n_head )
610
657
out [f"layers.{ i } .attention.wv.weight" ] = model [f"model.layers.{ i } .self_attn.v_proj.weight" ]
611
658
out [f"layers.{ i } .attention.wo.weight" ] = model [f"model.layers.{ i } .self_attn.o_proj.weight" ]
612
659
@@ -920,7 +967,7 @@ class OutputFile:
920
967
def __init__ (self , fname_out : Path ) -> None :
921
968
self .fout = open (fname_out , "wb" )
922
969
923
- def write_file_header (self , params : Params ) -> None :
970
+ def write_file_header (self , params : Params , file_type : GGMLFileType ) -> None :
924
971
self .fout .write (b"ggjt" [::- 1 ]) # magic
925
972
values = [
926
973
1 , # file version
@@ -930,7 +977,7 @@ def write_file_header(self, params: Params) -> None:
930
977
params .n_head ,
931
978
params .n_layer ,
932
979
params .n_embd // params .n_head , # rot (obsolete)
933
- params . file_type .value ,
980
+ file_type .value ,
934
981
]
935
982
self .fout .write (struct .pack ("i" * len (values ), * values ))
936
983
@@ -958,10 +1005,10 @@ def write_vocab_only(fname_out: Path, vocab: Vocab) -> None:
958
1005
of .fout .close ()
959
1006
960
1007
@staticmethod
961
- def write_all (fname_out : Path , params : Params , model : LazyModel , vocab : Vocab ) -> None :
1008
+ def write_all (fname_out : Path , params : Params , file_type : GGMLFileType , model : LazyModel , vocab : Vocab ) -> None :
962
1009
check_vocab_size (params , vocab )
963
1010
of = OutputFile (fname_out )
964
- of .write_file_header (params )
1011
+ of .write_file_header (params , file_type )
965
1012
print ("Writing vocab..." )
966
1013
of .write_vocab (vocab )
967
1014
@@ -997,11 +1044,11 @@ def pick_output_type(model: LazyModel, output_type_str: Optional[str]) -> GGMLFi
997
1044
raise Exception (f"Unexpected combination of types: { name_to_type } " )
998
1045
999
1046
1000
- def do_necessary_conversions (model : LazyModel ) -> LazyModel :
1047
+ def do_necessary_conversions (model : LazyModel , params : Params ) -> LazyModel :
1001
1048
model = handle_quantization (model )
1002
1049
1003
1050
if "lm_head.weight" in model :
1004
- model = convert_transformers_to_orig (model )
1051
+ model = convert_transformers_to_orig (model , params )
1005
1052
model = filter_and_sort_tensors (model )
1006
1053
1007
1054
return model
@@ -1107,14 +1154,14 @@ def load_vocab(path: Path) -> SentencePieceVocab:
1107
1154
return SentencePieceVocab (path , added_tokens_path if added_tokens_path .exists () else None )
1108
1155
1109
1156
1110
- def default_outfile (model_paths : List [Path ], params : Params ) -> Path :
1157
+ def default_outfile (model_paths : List [Path ], file_type : GGMLFileType ) -> Path :
1111
1158
namestr = {
1112
1159
GGMLFileType .AllF32 : "f32" ,
1113
1160
GGMLFileType .MostlyF16 : "f16" ,
1114
1161
GGMLFileType .MostlyQ4_0 : "q4_0" ,
1115
1162
GGMLFileType .MostlyQ4_1 : "q4_1" ,
1116
1163
GGMLFileType .PerLayerIsQ4_1 : "q4_1" ,
1117
- }[params . file_type ]
1164
+ }[file_type ]
1118
1165
ret = model_paths [0 ].parent / f"ggml-model-{ namestr } .bin"
1119
1166
if ret in model_paths :
1120
1167
sys .stderr .write (
@@ -1164,13 +1211,13 @@ def main(args_in: Optional[List[str]] = None) -> None:
1164
1211
else :
1165
1212
vocab_dir = args .vocab_dir if args .vocab_dir else model_plus .paths [0 ].parent
1166
1213
vocab = load_vocab (vocab_dir )
1214
+ params = Params .load (model_plus )
1167
1215
model = model_plus .model
1168
- model = do_necessary_conversions (model )
1216
+ model = do_necessary_conversions (model , params )
1169
1217
output_type = pick_output_type (model , args .outtype )
1170
1218
model = convert_to_output_type (model , output_type )
1171
- params = Params .guessed (model , output_type )
1172
- outfile = args .outfile or default_outfile (model_plus .paths , params )
1173
- OutputFile .write_all (outfile , params , model , vocab )
1219
+ outfile = args .outfile or default_outfile (model_plus .paths , output_type )
1220
+ OutputFile .write_all (outfile , params , output_type , model , vocab )
1174
1221
print (f"Wrote { outfile } " )
1175
1222
1176
1223
0 commit comments