Skip to content

Commit 503048a

Browse files
committed
Correct IQ3_M
1 parent ddb1373 commit 503048a

File tree

1 file changed

+4
-5
lines changed

1 file changed

+4
-5
lines changed

src/llama.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16045,8 +16045,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1604516045
}
1604616046
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
1604716047
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
16048-
new_type = use_some_bits(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_Q5_K : GGML_TYPE_IQ4_XS;
16049-
else new_type = use_some_bits(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
16048+
new_type = use_few_bits(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_Q5_K : GGML_TYPE_IQ4_XS;
16049+
else new_type = use_few_bits(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1605016050
}
1605116051
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL) {
1605216052
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
@@ -16123,9 +16123,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1612316123
: arch != LLM_ARCH_FALCON || use_more_bits(i_layer, n_layer) ? GGML_TYPE_Q4_K
1612416124
: GGML_TYPE_Q3_K;
1612516125
}
16126-
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M && (use_some_bits(i_layer, n_layer) ||
16127-
(qs.model.hparams.n_expert >= 4 && use_more_bits(i_layer, n_layer)))) {
16128-
new_type = GGML_TYPE_Q4_K;
16126+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
16127+
new_type = use_few_bits(i_layer, n_layer) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1612916128
}
1613016129
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL) {
1613116130
new_type = use_some_bits(i_layer, n_layer) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;

0 commit comments

Comments
 (0)