@@ -17601,23 +17601,22 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1760117601 else if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q4_K;
1760217602 else new_type = GGML_TYPE_IQ4_XS;
1760317603 }
17604- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS ||
17605- ftype == LLAMA_FTYPE_MOSTLY_IQ2_S) {
17604+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
1760617605 if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
1760717606 else if (qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_IQ4_XS;
1760817607 else new_type = GGML_TYPE_Q4_K;
1760917608 }
17610- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL ) {
17609+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M ) {
1761117610 if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
17612- else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q4_K;
17611+ else if (qs.model.hparams.n_vocab >= 127999 && qs.model.hparams.n_gqa() < 12 ) new_type = GGML_TYPE_Q4_K;
1761317612 else new_type = GGML_TYPE_Q5_K;
1761417613 }
17615- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
17616- if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
17614+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
17615+ if (qs.model.hparams.n_expert >= 4 && qs.model.hparams.n_gqa() >= 12 ) new_type = GGML_TYPE_Q6_K;
1761717616 else new_type = GGML_TYPE_Q5_K;
1761817617 }
1761917618 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_ML) {
17620- if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
17619+ if (qs.model.hparams.n_expert >= 4 && qs.model.hparams.n_gqa() >= 12 ) new_type = GGML_TYPE_Q6_K;
1762117620 else if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_Q5_K;
1762217621 else new_type = GGML_TYPE_Q6_K;
1762317622 }
0 commit comments