Skip to content

Commit 6263649

Browse files
committed
Revert variable V below Q5_K
1 parent eb4a69e commit 6263649

File tree

1 file changed

+9
-13
lines changed

1 file changed

+9
-13
lines changed

src/llama.cpp

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17801,18 +17801,15 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1780117801
new_type = (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) ? GGML_TYPE_Q4_K : GGML_TYPE_IQ3_S;
1780217802
}
1780317803
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
17804-
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ4_XS;
17805-
// new_type = difquant_six_eights_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
17804+
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q5_K;
1780617805
else new_type = GGML_TYPE_IQ3_S;
1780717806
}
1780817807
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
17809-
if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2)
17810-
new_type = difquant_first_last_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
17808+
if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q5_K;
1781117809
else new_type = GGML_TYPE_Q4_K;
1781217810
}
1781317811
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
17814-
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
17815-
new_type = difquant_five_eights_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
17812+
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q5_K;
1781617813
else new_type = GGML_TYPE_Q4_K;
1781717814
}
1781817815
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
@@ -17853,9 +17850,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1785317850
// if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q5_K;
1785417851
// else new_type = GGML_TYPE_Q4_K;
1785517852
// }
17856-
else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) &&
17857-
(qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
17858-
new_type = GGML_TYPE_Q6_K;
17853+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) {
17854+
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q6_K;
1785917855
else new_type = GGML_TYPE_Q5_K;
1786017856
}
1786117857
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_XSR) {
@@ -18038,13 +18034,13 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1803818034
else new_type = GGML_TYPE_IQ3_XXS;
1803918035
}
1804018036
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
18041-
if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2)
18042-
new_type = difquant_first_last_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
18037+
if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q5_K;
18038+
// new_type = difquant_first_last_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
1804318039
else new_type = difquant_fl_more_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
1804418040
}
1804518041
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
18046-
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18047-
new_type = difquant_five_eights_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
18042+
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q5_K;
18043+
// new_type = difquant_five_eights_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
1804818044
else new_type = GGML_TYPE_IQ3_S;
1804918045
}
1805018046
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {

0 commit comments

Comments
 (0)