@@ -46,7 +46,6 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
4646 SDVersion version = VERSION_SD1;
4747 PMVersion pm_version = PM_VERSION_1;
4848 CLIPTokenizer tokenizer;
49- ggml_type wtype;
5049 std::shared_ptr<CLIPTextModelRunner> text_model;
5150 std::shared_ptr<CLIPTextModelRunner> text_model2;
5251
@@ -57,25 +56,25 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
5756 std::vector<std::string> readed_embeddings;
5857
5958 FrozenCLIPEmbedderWithCustomWords (ggml_backend_t backend,
60- ggml_type wtype ,
59+ std::map<std::string, enum ggml_type>& tensor_types ,
6160 const std::string& embd_dir,
6261 SDVersion version = VERSION_SD1,
6362 PMVersion pv = PM_VERSION_1,
6463 int clip_skip = -1 )
65- : version(version), pm_version(pv), tokenizer(version == VERSION_SD2 ? 0 : 49407 ), embd_dir(embd_dir), wtype(wtype) {
64+ : version(version), pm_version(pv), tokenizer(version == VERSION_SD2 ? 0 : 49407 ), embd_dir(embd_dir) {
6665 if (clip_skip <= 0 ) {
6766 clip_skip = 1 ;
6867 if (version == VERSION_SD2 || version == VERSION_SDXL) {
6968 clip_skip = 2 ;
7069 }
7170 }
7271 if (version == VERSION_SD1) {
73- text_model = std::make_shared<CLIPTextModelRunner>(backend, wtype , OPENAI_CLIP_VIT_L_14, clip_skip);
72+ text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, " cond_stage_model.transformer.text_model " , OPENAI_CLIP_VIT_L_14, clip_skip);
7473 } else if (version == VERSION_SD2) {
75- text_model = std::make_shared<CLIPTextModelRunner>(backend, wtype , OPEN_CLIP_VIT_H_14, clip_skip);
74+ text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, " cond_stage_model.transformer.text_model " , OPEN_CLIP_VIT_H_14, clip_skip);
7675 } else if (version == VERSION_SDXL) {
77- text_model = std::make_shared<CLIPTextModelRunner>(backend, wtype , OPENAI_CLIP_VIT_L_14, clip_skip, false );
78- text_model2 = std::make_shared<CLIPTextModelRunner>(backend, wtype , OPEN_CLIP_VIT_BIGG_14, clip_skip, false );
76+ text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, " cond_stage_model.transformer.text_model " , OPENAI_CLIP_VIT_L_14, clip_skip, false );
77+ text_model2 = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, " cond_stage_model.1.transformer.text_model " , OPEN_CLIP_VIT_BIGG_14, clip_skip, false );
7978 }
8079 }
8180
@@ -138,14 +137,14 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
138137 LOG_DEBUG (" embedding wrong hidden size, got %i, expected %i" , tensor_storage.ne [0 ], hidden_size);
139138 return false ;
140139 }
141- embd = ggml_new_tensor_2d (embd_ctx, wtype , hidden_size, tensor_storage.n_dims > 1 ? tensor_storage.ne [1 ] : 1 );
140+ embd = ggml_new_tensor_2d (embd_ctx, tensor_storage. type , hidden_size, tensor_storage.n_dims > 1 ? tensor_storage.ne [1 ] : 1 );
142141 *dst_tensor = embd;
143142 return true ;
144143 };
145144 model_loader.load_tensors (on_load, NULL );
146145 readed_embeddings.push_back (embd_name);
147146 token_embed_custom.resize (token_embed_custom.size () + ggml_nbytes (embd));
148- memcpy ((void *)(token_embed_custom.data () + num_custom_embeddings * hidden_size * ggml_type_size (wtype )),
147+ memcpy ((void *)(token_embed_custom.data () + num_custom_embeddings * hidden_size * ggml_type_size (embd-> type )),
149148 embd->data ,
150149 ggml_nbytes (embd));
151150 for (int i = 0 ; i < embd->ne [1 ]; i++) {
@@ -590,9 +589,9 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
590589struct FrozenCLIPVisionEmbedder : public GGMLRunner {
591590 CLIPVisionModelProjection vision_model;
592591
593- FrozenCLIPVisionEmbedder (ggml_backend_t backend, ggml_type wtype )
594- : vision_model(OPEN_CLIP_VIT_H_14, true ), GGMLRunner(backend, wtype ) {
595- vision_model.init (params_ctx, wtype );
592+ FrozenCLIPVisionEmbedder (ggml_backend_t backend, std::map<std::string, enum ggml_type>& tensor_types )
593+ : vision_model(OPEN_CLIP_VIT_H_14, true ), GGMLRunner(backend) {
594+ vision_model.init (params_ctx, tensor_types, " cond_stage_model.transformer " );
596595 }
597596
598597 std::string get_desc () {
@@ -627,7 +626,6 @@ struct FrozenCLIPVisionEmbedder : public GGMLRunner {
627626};
628627
629628struct SD3CLIPEmbedder : public Conditioner {
630- ggml_type wtype;
631629 CLIPTokenizer clip_l_tokenizer;
632630 CLIPTokenizer clip_g_tokenizer;
633631 T5UniGramTokenizer t5_tokenizer;
@@ -636,15 +634,15 @@ struct SD3CLIPEmbedder : public Conditioner {
636634 std::shared_ptr<T5Runner> t5;
637635
638636 SD3CLIPEmbedder (ggml_backend_t backend,
639- ggml_type wtype ,
637+ std::map<std::string, enum ggml_type>& tensor_types ,
640638 int clip_skip = -1 )
641- : wtype(wtype), clip_g_tokenizer(0 ) {
639+ : clip_g_tokenizer(0 ) {
642640 if (clip_skip <= 0 ) {
643641 clip_skip = 2 ;
644642 }
645- clip_l = std::make_shared<CLIPTextModelRunner>(backend, wtype , OPENAI_CLIP_VIT_L_14, clip_skip, false );
646- clip_g = std::make_shared<CLIPTextModelRunner>(backend, wtype , OPEN_CLIP_VIT_BIGG_14, clip_skip, false );
647- t5 = std::make_shared<T5Runner>(backend, wtype );
643+ clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, " text_encoders.clip_l.transformer.text_model " , OPENAI_CLIP_VIT_L_14, clip_skip, false );
644+ clip_g = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, " text_encoders.clip_g.transformer.text_model " , OPEN_CLIP_VIT_BIGG_14, clip_skip, false );
645+ t5 = std::make_shared<T5Runner>(backend, tensor_types, " text_encoders.t5xxl.transformer " );
648646 }
649647
650648 void set_clip_skip (int clip_skip) {
@@ -974,21 +972,19 @@ struct SD3CLIPEmbedder : public Conditioner {
974972};
975973
976974struct FluxCLIPEmbedder : public Conditioner {
977- ggml_type wtype;
978975 CLIPTokenizer clip_l_tokenizer;
979976 T5UniGramTokenizer t5_tokenizer;
980977 std::shared_ptr<CLIPTextModelRunner> clip_l;
981978 std::shared_ptr<T5Runner> t5;
982979
983980 FluxCLIPEmbedder (ggml_backend_t backend,
984- ggml_type wtype,
985- int clip_skip = -1 )
986- : wtype(wtype) {
981+ std::map<std::string, enum ggml_type>& tensor_types,
982+ int clip_skip = -1 ) {
987983 if (clip_skip <= 0 ) {
988984 clip_skip = 2 ;
989985 }
990- clip_l = std::make_shared<CLIPTextModelRunner>(backend, wtype , OPENAI_CLIP_VIT_L_14, clip_skip, true );
991- t5 = std::make_shared<T5Runner>(backend, wtype );
986+ clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, " text_encoders.clip_l.transformer.text_model " , OPENAI_CLIP_VIT_L_14, clip_skip, true );
987+ t5 = std::make_shared<T5Runner>(backend, tensor_types, " text_encoders.t5xxl.transformer " );
992988 }
993989
994990 void set_clip_skip (int clip_skip) {
0 commit comments