@@ -78,7 +78,7 @@ def parse_args() -> argparse.Namespace:
7878with open (dir_model / "config.json" , "r" , encoding = "utf-8" ) as f :
7979 hparams = json .load (f )
8080
81- if hparams ["architectures" ][0 ] != " FalconForCausalLM" :
81+ if hparams ["architectures" ][0 ] not in ( "RWForCausalLM" , " FalconForCausalLM") :
8282 print ("Model architecture not supported: " + hparams ["architectures" ][0 ])
8383
8484 sys .exit (1 )
@@ -97,19 +97,26 @@ def parse_args() -> argparse.Namespace:
9797
9898print ("gguf: get model metadata" )
9999
100- block_count = hparams ["num_hidden_layers" ]
100+ block_count = hparams .get ("num_hidden_layers" )
101+ if block_count is None :
102+ block_count = hparams ["n_layer" ] # old name
103+
104+ n_head = hparams .get ("num_attention_heads" )
105+ if n_head is None :
106+ n_head = hparams ["n_head" ] # old name
107+
108+ n_head_kv = hparams .get ("num_kv_heads" )
109+ if n_head_kv is None :
110+ n_head_kv = hparams .get ("n_head_kv" , 1 ) # old name
101111
102112gguf_writer .add_name ("Falcon" )
103113gguf_writer .add_context_length (2048 ) # not in config.json
104114gguf_writer .add_tensor_data_layout ("jploski" ) # qkv tensor transform
105115gguf_writer .add_embedding_length (hparams ["hidden_size" ])
106116gguf_writer .add_feed_forward_length (4 * hparams ["hidden_size" ])
107117gguf_writer .add_block_count (block_count )
108- gguf_writer .add_head_count (hparams ["num_attention_heads" ])
109- if "num_kv_heads" in hparams :
110- gguf_writer .add_head_count_kv (hparams ["num_kv_heads" ])
111- else :
112- gguf_writer .add_head_count_kv (1 )
118+ gguf_writer .add_head_count (n_head )
119+ gguf_writer .add_head_count_kv (n_head_kv )
113120gguf_writer .add_layer_norm_eps (hparams ["layer_norm_epsilon" ])
114121gguf_writer .add_file_type (ftype )
115122
@@ -152,10 +159,6 @@ def parse_args() -> argparse.Namespace:
152159
153160tensor_map = gguf .get_tensor_name_map (ARCH ,block_count )
154161
155- # params for qkv transform
156- n_head = hparams ["num_attention_heads" ]
157- n_head_kv = hparams ["num_kv_heads" ] if "num_kv_heads" in hparams else 1
158-
159162head_dim = hparams ["hidden_size" ] // n_head
160163
161164# tensor info
0 commit comments