Skip to content

Commit 090b291

Browse files
ikawrakowKawrakow
authored andcommitted
iq3_xxs: quards for the no-imatrix situation (ggml-org#5334)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
1 parent 2578cb3 commit 090b291

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

llama.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -9456,8 +9456,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
94569456
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && qs.model.hparams.n_gqa() >= 4) {
94579457
new_type = GGML_TYPE_Q4_K;
94589458
}
9459-
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && qs.model.hparams.n_gqa() >= 4) {
9460-
new_type = GGML_TYPE_Q4_K;
9459+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
9460+
new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : !qs.has_imatrix ? GGML_TYPE_Q3_K : GGML_TYPE_IQ3_XXS;
94619461
}
94629462
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) {
94639463
new_type = qs.i_attention_wv < 2 ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
@@ -9496,9 +9496,9 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
94969496
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_XS) {
94979497
if (i_layer < n_layer/8) new_type = GGML_TYPE_Q4_K;
94989498
}
9499-
//else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
9500-
// if (i_layer < n_layer/8) new_type = GGML_TYPE_Q5_K;
9501-
//}
9499+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && !qs.has_imatrix) {
9500+
new_type = i_layer < n_layer/8 ? GGML_TYPE_Q4_K : GGML_TYPE_Q3_K;
9501+
}
95029502
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) {
95039503
new_type = i_layer < n_layer/16 ? GGML_TYPE_Q5_K
95049504
: arch != LLM_ARCH_FALCON || use_more_bits(i_layer, n_layer) ? GGML_TYPE_Q4_K

0 commit comments

Comments
 (0)