@@ -18744,7 +18744,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1874418744 }
1874518745 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
1874618746 if (qs.model.hparams.n_vocab >= 127999 && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2))
18747- new_type = difquant_seven_eights_tensors (qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_S;
18747+ new_type = difquant_first_last_tensors (qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1874818748 else new_type = GGML_TYPE_Q3_K;
1874918749 }
1875018750 // else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_MR) {
@@ -18933,7 +18933,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1893318933 }
1893418934 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
1893518935 if (qs.model.hparams.n_vocab >= 127999 && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2))
18936- new_type = (difquant_seven_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S ;
18936+ new_type = (difquant_seven_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ4_XS ;
1893718937 else new_type = GGML_TYPE_IQ4_XS;
1893818938 }
1893918939 else if (i_layer < n_layer/8 && (ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) && !qs.has_imatrix) {
@@ -19133,11 +19133,11 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1913319133 // new_type = GGML_TYPE_Q3_K;
1913419134 // else new_type = difquant_seven_eights_tensors(qs.i_attention_wo, qs.n_attention_wo) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1913519135 // }
19136- // else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
19137- // if (qs.model.hparams.n_vocab >= 127999 && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2))
19138- // new_type = difquant_seven_eights_tensors(qs.i_attention_wo, qs.n_attention_wo) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S ;
19139- // else new_type = GGML_TYPE_Q3_K;
19140- // }
19136+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
19137+ if (qs.model.hparams.n_vocab >= 127999 && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2))
19138+ new_type = difquant_seven_eights_tensors(qs.i_attention_wo, qs.n_attention_wo) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ4_XS ;
19139+ else new_type = GGML_TYPE_Q3_K;
19140+ }
1914119141 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_MR) {
1914219142 if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) {
1914319143 new_type = qs.i_attention_wq < qs.n_attention_wq/8 ? GGML_TYPE_Q5_K :
@@ -19496,7 +19496,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1949619496 }
1949719497 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
1949819498 if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
19499- new_type = (difquant_seven_eights_tensors (i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
19499+ new_type = (difquant_first_last_tensors (i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1950019500 else new_type = (difquant_five_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1950119501 }
1950219502 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_MR) {
@@ -19640,7 +19640,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1964019640 }
1964119641 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_UXL) {
1964219642 if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
19643- new_type = (difquant_seven_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S ;
19643+ new_type = (difquant_seven_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ4_XS ;
1964419644 else new_type = (difquant_five_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1964519645 }
1964619646 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_MR) {
0 commit comments