@@ -17801,7 +17801,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1780117801 new_type = (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) ? GGML_TYPE_Q4_K : GGML_TYPE_IQ3_S;
1780217802 }
1780317803 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
17804- if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q5_K ;
17804+ if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q4_K ;
1780517805 else if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q4_K;
1780617806 else new_type = GGML_TYPE_IQ3_S;
1780717807 }
@@ -18006,24 +18006,24 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1800618006 else new_type = difquant_half_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_IQ2_S : GGML_TYPE_IQ2_XS;
1800718007 }
1800818008 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
18009- if (qs.model.hparams.n_gqa() >= 8 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q4_K ;
18009+ if (qs.model.hparams.n_gqa() >= 8 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ4_XS ;
1801018010 else if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ3_S;
1801118011 else new_type = GGML_TYPE_IQ3_XXS;
1801218012 }
1801318013 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
18014- if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q5_K ;
18014+ if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q4_K ;
1801518015 else if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2)
1801618016 new_type = difquant_first_last_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1801718017 else new_type = difquant_fl_more_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
1801818018 }
1801918019 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
18020- if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q5_K ;
18020+ if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q4_K ;
1802118021 else if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
1802218022 new_type = difquant_five_eights_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1802318023 else new_type = GGML_TYPE_IQ3_S;
1802418024 }
1802518025 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
18026- if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q6_K ;
18026+ if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q5_K ;
1802718027 else if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
1802818028 new_type = difquant_first_last_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_Q5_K : GGML_TYPE_IQ4_XS;
1802918029 else new_type = difquant_fl_more_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
0 commit comments