@@ -15925,19 +15925,20 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1592515925 else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ4_XS;
1592615926 else new_type = GGML_TYPE_Q4_K;
1592715927 }
15928- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
15928+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS ||
15929+ ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL ||
15930+ ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) {
1592915931 if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
1593015932 else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q4_K;
1593115933 else new_type = GGML_TYPE_Q5_K;
1593215934 }
15933- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS ||
15934- ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M ||
15935- ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) {
15935+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
1593615936 if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
1593715937 else new_type = GGML_TYPE_Q5_K;
1593815938 }
15939- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
15940- if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q5_K;
15939+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
15940+ if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
15941+ else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q5_K;
1594115942 else new_type = GGML_TYPE_Q6_K;
1594215943 }
1594315944 else if (new_type != GGML_TYPE_Q8_0) {
@@ -15970,10 +15971,13 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1597015971 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
1597115972 new_type = GGML_TYPE_IQ2_S;
1597215973 }
15973- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL ) {
15974+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
1597415975 if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ2_S;
1597515976 else new_type = GGML_TYPE_IQ3_XXS;
1597615977 }
15978+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
15979+ new_type = GGML_TYPE_IQ3_XXS;
15980+ }
1597715981 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1597815982 if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ3_XXS;
1597915983 else new_type = GGML_TYPE_IQ3_S;
@@ -16018,10 +16022,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1601816022 else if ((ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) &&
1601916023 difquant_half_tensors(qs.i_attention_wv, qs.n_attention_wv)) new_type = GGML_TYPE_Q6_K;
1602016024 else if (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S && qs.i_attention_wv < 4) new_type = GGML_TYPE_Q5_K;
16021- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
16022- new_type = (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_XXS;
16023- }
16024- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
16025+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M ||
16026+ ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
1602516027 new_type = (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_XXS;
1602616028 }
1602716029 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
@@ -16035,7 +16037,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1603516037 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M ||
1603616038 ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL) {
1603716039 if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q5_K;
16038- else new_type = GGML_TYPE_IQ4_XS ;
16040+ else new_type = GGML_TYPE_Q4_K ;
1603916041 }
1604016042 else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) &&
1604116043 (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
@@ -16072,11 +16074,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1607216074 ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M) && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
1607316075 new_type = GGML_TYPE_Q5_K;
1607416076 }
16075- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
16076- if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ2_XS;
16077- else new_type = GGML_TYPE_IQ2_XXS;
16078- }
16079- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS) {
16077+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S) {
1608016078 if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ2_S;
1608116079 else if (qs.model.hparams.n_gqa() >= 2) new_type = GGML_TYPE_IQ2_XXS;
1608216080 else new_type = GGML_TYPE_IQ1_M;
@@ -16204,21 +16202,24 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1620416202 // same quantization as before imatrix stuff, and b) Q4_1/Q5_1 do go crazy on ffn_down without an imatrix.
1620516203 new_type = ftype == LLAMA_FTYPE_MOSTLY_Q4_0 ? GGML_TYPE_Q4_1 : GGML_TYPE_Q5_1;
1620616204 }
16207- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS && (i_layer < n_layer/8 )) new_type = GGML_TYPE_IQ2_XXS;
16208- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M ) {
16209- if (difquant_half_tensors (i_layer, n_layer)) new_type = GGML_TYPE_IQ2_XXS;
16205+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS && (difquant_init_end_tensors( i_layer, n_layer) )) new_type = GGML_TYPE_IQ2_XXS;
16206+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_S) {
16207+ if (difquant_three_eights_tensors (i_layer, n_layer)) new_type = GGML_TYPE_IQ2_XXS;
1621016208 }
16211- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL) new_type = GGML_TYPE_IQ2_XXS;
16212- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS && (difquant_half_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XS;
16209+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
16210+ if (difquant_three_eights_tensors(i_layer, n_layer)) new_type = GGML_TYPE_IQ2_XXS;
16211+ }
16212+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL && (difquant_six_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XXS;
16213+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS && (difquant_three_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XS;
1621316214 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
16214- if (i_layer < n_layer/8 ) new_type = GGML_TYPE_IQ2_S;
16215+ if (difquant_three_eights_tensors( i_layer, n_layer) ) new_type = GGML_TYPE_IQ2_S;
1621516216 }
16216- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S && (difquant_half_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_S;
16217+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S && (difquant_six_eights_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_S;
1621716218 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
1621816219 new_type = difquant_init_end_tensors(i_layer, n_layer) ? GGML_TYPE_IQ3_XXS : GGML_TYPE_IQ2_S;
1621916220 }
1622016221 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
16221- new_type = difquant_half_tensors (i_layer, n_layer) ? GGML_TYPE_IQ3_XXS : GGML_TYPE_IQ2_S;
16222+ new_type = difquant_six_eights_tensors (i_layer, n_layer) ? GGML_TYPE_IQ3_XXS : GGML_TYPE_IQ2_S;
1622216223 }
1622316224 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
1622416225 new_type = difquant_init_tensors(i_layer, n_layer) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
@@ -16254,24 +16255,37 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1625416255 } else if (name.find("attn_output.weight") != std::string::npos) {
1625516256 if (arch != LLM_ARCH_FALCON) {
1625616257 if (qs.model.hparams.n_expert >= 4) {
16257- if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS ||
16258+ if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S ||
16259+ ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M ||
16260+ ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S ||
16261+ ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
16262+ new_type = GGML_TYPE_Q4_K;
16263+ }
16264+ else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS ||
1625816265 ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL ||
1625916266 ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL ||
1626016267 ftype == LLAMA_FTYPE_MOSTLY_Q3_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL ||
1626116268 ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S ||
1626216269 ftype == LLAMA_FTYPE_MOSTLY_IQ4_XSR ||
16263- ftype == LLAMA_FTYPE_MOSTLY_IQ3_M || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) new_type = GGML_TYPE_Q5_K;
16264- }
16265- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S ||
16266- ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M ||
16267- ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S) {
16268- if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q4_K;
16269- else {
16270- if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S ||
16271- ftype == LLAMA_FTYPE_MOSTLY_IQ1_M || ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL) new_type = GGML_TYPE_IQ2_XXS;
16272- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S) new_type = GGML_TYPE_IQ2_S;
16273- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) new_type = GGML_TYPE_IQ3_XXS;
16270+ ftype == LLAMA_FTYPE_MOSTLY_IQ3_M || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) {
16271+ new_type = GGML_TYPE_Q5_K;
16272+ }
16273+ else if (ftype == LLAMA_FTYPE_MOSTLY_Q5_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) new_type = GGML_TYPE_Q5_K;
16274+ else new_type = GGML_TYPE_Q8_0;
16275+ }
16276+ else if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2) {
16277+ if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S ||
16278+ ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) new_type = GGML_TYPE_IQ2_XS;
16279+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS ||
16280+ ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_S) new_type = GGML_TYPE_IQ2_S;
16281+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) new_type = GGML_TYPE_IQ3_XXS;
1627416282 }
16283+ else if (qs.model.hparams.n_gqa() >= 2) {
16284+ if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S ||
16285+ ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) new_type = GGML_TYPE_IQ2_XXS;
16286+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS) new_type = GGML_TYPE_IQ2_XS;
16287+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_S) new_type = GGML_TYPE_IQ2_S;
16288+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) new_type = GGML_TYPE_IQ3_XXS;
1627516289 } else {
1627616290 if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) new_type = GGML_TYPE_Q3_K;
1627716291 else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M ) new_type = GGML_TYPE_Q4_K;
@@ -16323,10 +16337,10 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1632316337 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS && (difquant_init_end_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ1_M;
1632416338 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_S && (difquant_three_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XXS;
1632516339 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_M && (difquant_three_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XXS;
16326- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL && (difquant_half_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XXS;
16340+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL && (difquant_six_eights_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XXS;
1632716341 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS && (difquant_three_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XS;
16328- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS && (difquant_init_end_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XS ;
16329- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S && (difquant_half_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_S;
16342+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS && (difquant_three_eights_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_S ;
16343+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S && (difquant_six_eights_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_S;
1633016344 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_M && (difquant_init_end_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ3_XXS;
1633116345 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL && (difquant_six_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ3_XXS;
1633216346 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && (difquant_init_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ3_S;
@@ -16343,10 +16357,10 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1634316357 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS && (difquant_init_end_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ1_M;
1634416358 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_S && (difquant_three_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XXS;
1634516359 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_M && (difquant_three_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XXS;
16346- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL && (difquant_half_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XXS;
16360+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL && (difquant_six_eights_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XXS;
1634716361 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS && (difquant_three_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XS;
16348- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS && (difquant_init_end_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XS ;
16349- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S && (difquant_half_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_S;
16362+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS && (difquant_three_eights_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_S ;
16363+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S && (difquant_six_eights_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_S;
1635016364 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_M && (difquant_init_end_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ3_XXS;
1635116365 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL && (difquant_six_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ3_XXS;
1635216366 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && (difquant_init_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ3_S;
0 commit comments