@@ -209,6 +209,8 @@ def from_model_architecture(model_architecture):
209
209
return InternLM2Model
210
210
if model_architecture == "MiniCPMForCausalLM" :
211
211
return MiniCPMModel
212
+ if model_architecture == "BertModel" :
213
+ return BertModel
212
214
return Model
213
215
214
216
def _is_model_safetensors (self ) -> bool :
@@ -264,6 +266,8 @@ def _get_model_architecture(self) -> gguf.MODEL_ARCH:
264
266
return gguf .MODEL_ARCH .INTERNLM2
265
267
if arch == "MiniCPMForCausalLM" :
266
268
return gguf .MODEL_ARCH .MINICPM
269
+ if arch == "BertModel" :
270
+ return gguf .MODEL_ARCH .BERT
267
271
268
272
raise NotImplementedError (f'Architecture "{ arch } " not supported!' )
269
273
@@ -1629,6 +1633,96 @@ def write_tensors(self):
1629
1633
self .post_write_tensors (tensor_map , name , data_torch )
1630
1634
1631
1635
1636
+ class BertModel (Model ):
1637
+ def __init__ (self , * args , ** kwargs ):
1638
+ super ().__init__ (* args , ** kwargs )
1639
+ self .block_count = self .hparams ["num_hidden_layers" ]
1640
+
1641
+ def set_gguf_parameters (self ):
1642
+ # TODO(cebtenzzre): merge with parent class
1643
+ self .gguf_writer .add_name (self .dir_model .name )
1644
+ self .gguf_writer .add_context_length (self .hparams ["max_position_embeddings" ])
1645
+ self .gguf_writer .add_embedding_length (self .hparams ["hidden_size" ])
1646
+ self .gguf_writer .add_feed_forward_length (self .hparams ["intermediate_size" ])
1647
+ self .gguf_writer .add_block_count (self .block_count )
1648
+ self .gguf_writer .add_head_count (self .hparams ["num_attention_heads" ])
1649
+ self .gguf_writer .add_layer_norm_eps (self .hparams ["layer_norm_eps" ])
1650
+ self .gguf_writer .add_causal_attention (False )
1651
+ self .gguf_writer .add_file_type (self .ftype )
1652
+
1653
+ def set_vocab (self ):
1654
+ path = self .dir_model
1655
+ added_tokens_path = self .dir_model if self .dir_model .exists () else None
1656
+
1657
+ # use huggingface vocab to get all tokens
1658
+ vocab = HfVocab (path , added_tokens_path )
1659
+ tokens , scores , toktypes = zip (* vocab .all_tokens ())
1660
+ assert len (tokens ) == vocab .vocab_size
1661
+
1662
+ # we need this to validate the size of the token_type embeddings
1663
+ # though currently we are passing all zeros to the token_type embeddings
1664
+ n_token_types = len (set (toktypes ))
1665
+ self .gguf_writer .add_token_type_count (n_token_types )
1666
+
1667
+ # convert to phantom space vocab
1668
+ def phantom (tok , typ ):
1669
+ if tok .startswith (b"[" ) and tok .endswith (b"]" ):
1670
+ return tok
1671
+ if tok .startswith (b"##" ):
1672
+ return tok [2 :]
1673
+ return b"\xe2 \x96 \x81 " + tok
1674
+ tokens = [phantom (t , y ) for t , y in zip (tokens , toktypes )]
1675
+
1676
+ # set up bos and eos tokens (cls and sep)
1677
+ self .gguf_writer .add_bos_token_id (vocab .tokenizer .cls_token_id )
1678
+ self .gguf_writer .add_eos_token_id (vocab .tokenizer .sep_token_id )
1679
+
1680
+ # add vocab to gguf
1681
+ self .gguf_writer .add_tokenizer_model ("bert" )
1682
+ self .gguf_writer .add_token_list (tokens )
1683
+ self .gguf_writer .add_token_scores (scores )
1684
+ self .gguf_writer .add_token_types (toktypes )
1685
+
1686
+ # handle special tokens
1687
+ special_vocab = gguf .SpecialVocab (self .dir_model , n_vocab = len (tokens ))
1688
+ special_vocab .add_to_gguf (self .gguf_writer )
1689
+
1690
+ def write_tensors (self ):
1691
+ tensor_map = gguf .get_tensor_name_map (self .model_arch , self .block_count )
1692
+ tensors = dict (self .get_tensors ())
1693
+ for name , data_torch in tensors .items ():
1694
+ # we are only using BERT for embeddings so we don't need the pooling layer
1695
+ if name in ("embeddings.position_ids" , "pooler.dense.weight" , "pooler.dense.bias" ):
1696
+ continue # we don't need these
1697
+
1698
+ # map tensor names
1699
+ new_name = tensor_map .get_name (name , try_suffixes = (".weight" , ".bias" ))
1700
+ if new_name is None :
1701
+ print (f"Can not map tensor { name !r} " )
1702
+ sys .exit ()
1703
+
1704
+ data = data_torch .squeeze ().numpy ()
1705
+ n_dims = len (data .shape )
1706
+ new_dtype : type [np .floating [Any ]]
1707
+
1708
+ if (
1709
+ self .ftype == 1 and name .endswith (".weight" ) and n_dims == 2
1710
+ and name != "embeddings.token_type_embeddings.weight" # not used with get_rows, must be F32
1711
+ ):
1712
+ # if f16 desired, convert any float32 2-dim weight tensors to float16
1713
+ new_dtype = np .float16
1714
+ else :
1715
+ # if f32 desired, convert any float16 to float32
1716
+ new_dtype = np .float32
1717
+
1718
+ print (f"{ new_name } , n_dims = { n_dims } , { data_torch .dtype } --> { new_dtype } " )
1719
+
1720
+ if data .dtype != new_dtype :
1721
+ data = data .astype (new_dtype )
1722
+
1723
+ self .gguf_writer .add_tensor (new_name , data )
1724
+
1725
+
1632
1726
###### CONVERSION LOGIC ######
1633
1727
1634
1728
0 commit comments