@@ -5239,11 +5239,12 @@ static std::string llama_model_ftype_name(llama_ftype ftype) {
52395239 case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw";
52405240 case LLAMA_FTYPE_MOSTLY_IQ4_XS: return "IQ4_XS - 4.25 bpw";
52415241 case LLAMA_FTYPE_MOSTLY_IQ3_S: return "IQ3_S - 3.4375 bpw";
5242- case LLAMA_FTYPE_MOSTLY_IQ3_M: return "IQ3_S mix - 3.70 bpw";
5243- case LLAMA_FTYPE_MOSTLY_IQ3_ML: return "IQ3_S mix - 3.80 bpw";
5244- case LLAMA_FTYPE_MOSTLY_IQ3_XL: return "IQ3_S mix - 3.90 bpw";
5245- case LLAMA_FTYPE_MOSTLY_IQ3_XXL: return "IQ3_S mix - 4.00 bpw";
5246- case LLAMA_FTYPE_MOSTLY_IQ3_XXXL: return "IQ3_S mix - 4.10 bpw";
5242+ case LLAMA_FTYPE_MOSTLY_IQ3_M: return "IQ3_S mix - 3.60 bpw";
5243+ case LLAMA_FTYPE_MOSTLY_IQ3_ML: return "IQ3_S mix - 3.75 bpw";
5244+ case LLAMA_FTYPE_MOSTLY_IQ3_XL: return "IQ3_S mix - 3.85 bpw";
5245+ case LLAMA_FTYPE_MOSTLY_IQ3_XXL: return "IQ3_S mix - 3.95 bpw";
5246+ case LLAMA_FTYPE_MOSTLY_IQ3_XXXL: return "IQ3_S mix - 4.05 bpw";
5247+ case LLAMA_FTYPE_MOSTLY_IQ3_UXL: return "IQ3_S mix - 4.15 bpw";
52475248 case LLAMA_FTYPE_MOSTLY_IQ4_XSR: return "IQ4_XS mix - 4.xx bpw";
52485249 case LLAMA_FTYPE_MOSTLY_Q4_0_4_4: return "Q4_0_4_4";
52495250 case LLAMA_FTYPE_MOSTLY_Q4_0_4_8: return "Q4_0_4_8";
@@ -17648,7 +17649,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1764817649 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS ||
1764917650 ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL ||
1765017651 ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_ML ||
17651- ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL) new_type = GGML_TYPE_IQ4_XS;
17652+ ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL ) new_type = GGML_TYPE_IQ4_XS;
1765217653 }
1765317654 else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) {
1765417655 if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_Q2_K;
@@ -17673,7 +17674,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1767317674 if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_IQ3_XXS;
1767417675 else new_type = GGML_TYPE_IQ3_S;
1767517676 }
17676- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL) {
17677+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL ||
17678+ ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
1767717679 if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_IQ3_S;
1767817680 else if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ4_XS;
1767917681 }
@@ -17698,7 +17700,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1769817700 if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L || ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M ||
1769917701 ftype == LLAMA_FTYPE_MOSTLY_Q5_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q6_K ||
1770017702 ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS ||
17701- ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL) {
17703+ ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL ) {
1770217704 new_type = GGML_TYPE_Q8_0;
1770317705 }
1770417706 else if (new_type != GGML_TYPE_Q8_0) new_type = GGML_TYPE_Q6_K;
@@ -17856,6 +17858,11 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1785617858 new_type = difquant_six_eights_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_Q6_K : GGML_TYPE_Q5_K;
1785717859 else new_type = GGML_TYPE_Q5_K;
1785817860 }
17861+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
17862+ if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
17863+ new_type = difquant_seven_eights_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_Q6_K : GGML_TYPE_Q5_K;
17864+ else new_type = GGML_TYPE_Q5_K;
17865+ }
1785917866 // else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
1786017867 // if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
1786117868 // new_type = difquant_first_last_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
@@ -17870,7 +17877,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1787017877 // if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q5_K;
1787117878 // else new_type = GGML_TYPE_Q4_K;
1787217879 // }
17873- // else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL) {
17880+ // else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL ||
17881+ // ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
1787417882 // if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q5_K;
1787517883 // else new_type = GGML_TYPE_Q4_K;
1787617884 // }
@@ -17903,7 +17911,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1790317911 if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L || ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M ||
1790417912 ftype == LLAMA_FTYPE_MOSTLY_Q5_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q6_K ||
1790517913 ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS ||
17906- ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL) {
17914+ ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL ) {
1790717915 new_type = GGML_TYPE_Q8_0;
1790817916 }
1790917917 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
@@ -18097,6 +18105,11 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1809718105 new_type = difquant_six_eights_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_Q6_K : GGML_TYPE_Q5_K;
1809818106 else new_type = GGML_TYPE_IQ4_XS;
1809918107 }
18108+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
18109+ if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18110+ new_type = difquant_seven_eights_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_Q6_K : GGML_TYPE_Q5_K;
18111+ else new_type = GGML_TYPE_IQ4_XS;
18112+ }
1810018113 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_XSR) {
1810118114 if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) {
1810218115 new_type = qs.i_attention_wk < qs.n_attention_wk/8 ? GGML_TYPE_Q6_K :
@@ -18254,9 +18267,14 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1825418267 // new_type = difquant_six_eights_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
1825518268 // else new_type = GGML_TYPE_IQ3_S;
1825618269 // }
18257- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S ||
18258- ftype == LLAMA_FTYPE_MOSTLY_IQ3_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL ||
18259- ftype == LLAMA_FTYPE_MOSTLY_IQ3_ML || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL) {
18270+ // else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
18271+ // if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18272+ // new_type = difquant_seven_eights_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
18273+ // else new_type = GGML_TYPE_IQ3_S;
18274+ // }
18275+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S ||
18276+ ftype == LLAMA_FTYPE_MOSTLY_IQ3_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL ||
18277+ ftype == LLAMA_FTYPE_MOSTLY_IQ3_ML || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
1826018278 if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_IQ4_XS;
1826118279 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) new_type = GGML_TYPE_IQ2_S;
1826218280 // else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) new_type = GGML_TYPE_IQ3_XXS;
@@ -18406,6 +18424,11 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1840618424 new_type = (difquant_six_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1840718425 else new_type = GGML_TYPE_IQ4_XS;
1840818426 }
18427+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
18428+ if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18429+ new_type = (difquant_seven_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
18430+ else new_type = GGML_TYPE_IQ4_XS;
18431+ }
1840918432 else if (i_layer < n_layer/8 && (ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) && !qs.has_imatrix) {
1841018433 new_type = GGML_TYPE_Q5_K;
1841118434 }
@@ -18518,7 +18541,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1851818541 ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS ||
1851918542 ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL ||
1852018543 ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XSR || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS ||
18521- ftype == LLAMA_FTYPE_MOSTLY_IQ3_ML || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL) {
18544+ ftype == LLAMA_FTYPE_MOSTLY_IQ3_ML || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL ) {
1852218545 new_type = GGML_TYPE_Q5_K;
1852318546 }
1852418547 else if (ftype == LLAMA_FTYPE_MOSTLY_Q5_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) new_type = GGML_TYPE_Q6_K;
@@ -18582,10 +18605,16 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1858218605 new_type = difquant_six_eights_tensors(qs.i_attention_wo, qs.n_attention_wo) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1858318606 else new_type = GGML_TYPE_IQ4_XS;
1858418607 }
18608+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
18609+ if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18610+ new_type = difquant_seven_eights_tensors(qs.i_attention_wo, qs.n_attention_wo) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
18611+ else new_type = GGML_TYPE_IQ4_XS;
18612+ }
1858518613 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS)
1858618614 new_type = GGML_TYPE_IQ3_S;
1858718615 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_ML || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL ||
18588- ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL) new_type = GGML_TYPE_IQ3_S;
18616+ ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL)
18617+ new_type = GGML_TYPE_IQ3_S;
1858918618 } else {
1859018619 if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) new_type = GGML_TYPE_Q3_K;
1859118620 else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_XL)
@@ -18756,10 +18785,16 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1875618785 new_type = difquant_six_eights_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
1875718786 else new_type = GGML_TYPE_Q5_K;
1875818787 }
18788+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
18789+ if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18790+ new_type = difquant_seven_eights_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
18791+ else new_type = GGML_TYPE_Q5_K;
18792+ }
1875918793 // else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) new_type = GGML_TYPE_IQ3_S;
1876018794 // else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_ML)
1876118795 // new_type = GGML_TYPE_IQ4_XS;
18762- // else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL)
18796+ // else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL ||
18797+ // ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL)
1876318798 // new_type = GGML_TYPE_Q4_K;
1876418799 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) new_type = GGML_TYPE_Q4_K;
1876518800 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_XSR) {
@@ -18884,6 +18919,11 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1888418919 new_type = (difquant_six_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1888518920 else new_type = (difquant_half_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1888618921 }
18922+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
18923+ if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18924+ new_type = (difquant_seven_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
18925+ else new_type = (difquant_five_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
18926+ }
1888718927 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_XSR) {
1888818928 if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
1888918929 new_type = (difquant_fl_more_tensors(i_layer, n_layer)) ? GGML_TYPE_Q5_K : GGML_TYPE_IQ4_XS;
@@ -19004,6 +19044,11 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1900419044 new_type = (difquant_six_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1900519045 else new_type = (difquant_half_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1900619046 }
19047+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
19048+ if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
19049+ new_type = (difquant_seven_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
19050+ else new_type = (difquant_five_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
19051+ }
1900719052 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_XSR) {
1900819053 if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
1900919054 new_type = (difquant_fl_more_tensors(i_layer, n_layer)) ? GGML_TYPE_Q5_K : GGML_TYPE_IQ4_XS;
@@ -19168,8 +19213,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
1916819213 case LLAMA_FTYPE_MOSTLY_IQ3_M: default_type = GGML_TYPE_IQ3_S; break;
1916919214 case LLAMA_FTYPE_MOSTLY_IQ3_ML: default_type = GGML_TYPE_IQ3_S; break;
1917019215 case LLAMA_FTYPE_MOSTLY_IQ3_XL: default_type = GGML_TYPE_IQ3_S; break;
19171- case LLAMA_FTYPE_MOSTLY_IQ3_XXL: default_type = GGML_TYPE_IQ3_S; break;
19216+ case LLAMA_FTYPE_MOSTLY_IQ3_XXL: default_type = GGML_TYPE_IQ3_S; break;
1917219217 case LLAMA_FTYPE_MOSTLY_IQ3_XXXL: default_type = GGML_TYPE_IQ3_S; break;
19218+ case LLAMA_FTYPE_MOSTLY_IQ3_UXL: default_type = GGML_TYPE_IQ3_S; break;
1917319219 case LLAMA_FTYPE_MOSTLY_Q4_0_4_4: default_type = GGML_TYPE_Q4_0_4_4; break;
1917419220 case LLAMA_FTYPE_MOSTLY_Q4_0_4_8: default_type = GGML_TYPE_Q4_0_4_8; break;
1917519221 case LLAMA_FTYPE_MOSTLY_Q4_0_8_8: default_type = GGML_TYPE_Q4_0_8_8; break;
0 commit comments