File tree 1 file changed +5
-5
lines changed
1 file changed +5
-5
lines changed Original file line number Diff line number Diff line change @@ -9456,8 +9456,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
9456
9456
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && qs.model.hparams.n_gqa() >= 4) {
9457
9457
new_type = GGML_TYPE_Q4_K;
9458
9458
}
9459
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && qs.model.hparams.n_gqa() >= 4 ) {
9460
- new_type = GGML_TYPE_Q4_K;
9459
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
9460
+ new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : !qs.has_imatrix ? GGML_TYPE_Q3_K : GGML_TYPE_IQ3_XXS ;
9461
9461
}
9462
9462
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) {
9463
9463
new_type = qs.i_attention_wv < 2 ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
@@ -9496,9 +9496,9 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
9496
9496
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_XS) {
9497
9497
if (i_layer < n_layer/8) new_type = GGML_TYPE_Q4_K;
9498
9498
}
9499
- // else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
9500
- // if ( i_layer < n_layer/8) new_type = GGML_TYPE_Q5_K ;
9501
- // }
9499
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && !qs.has_imatrix ) {
9500
+ new_type = i_layer < n_layer/8 ? GGML_TYPE_Q4_K : GGML_TYPE_Q3_K ;
9501
+ }
9502
9502
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) {
9503
9503
new_type = i_layer < n_layer/16 ? GGML_TYPE_Q5_K
9504
9504
: arch != LLM_ARCH_FALCON || use_more_bits(i_layer, n_layer) ? GGML_TYPE_Q4_K
You can’t perform that action at this time.
0 commit comments