@@ -15334,14 +15334,10 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1533415334 ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
1533515335 new_type = GGML_TYPE_Q2_K;
1533615336 }
15337- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
15337+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS ) {
1533815338 new_type = GGML_TYPE_IQ3_S;
1533915339 }
15340- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
15341- new_type = GGML_TYPE_IQ3_S;
15342- }
15343- else if (new_type == GGML_TYPE_Q4_0_4_4 || new_type == GGML_TYPE_Q4_0_4_8 ||
15344- new_type == GGML_TYPE_Q4_0_8_8) {
15340+ else if (new_type == GGML_TYPE_Q4_0_4_4 || new_type == GGML_TYPE_Q4_0_4_8 || new_type == GGML_TYPE_Q4_0_8_8) {
1534515341 new_type = GGML_TYPE_Q4_0;
1534615342 }
1534715343 }
@@ -15381,12 +15377,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1538115377 else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
1538215378 new_type = GGML_TYPE_Q4_K;
1538315379 }
15384- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S) {
15385- new_type = GGML_TYPE_Q4_K;
15386- }
15387- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
15388- new_type = GGML_TYPE_Q5_K;
15389- }
15380+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S) new_type = GGML_TYPE_Q4_K;
15381+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) new_type = GGML_TYPE_Q5_K;
1539015382 else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) {
1539115383 new_type = qs.i_attention_wv < 2 ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
1539215384 }
@@ -15418,16 +15410,12 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1541815410 new_type = GGML_TYPE_IQ2_S;
1541915411 }
1542015412 } else if (name.find("attn_q.weight") != std::string::npos) {
15421- if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
15422- new_type = GGML_TYPE_IQ3_XXS;
15423- }
15413+ if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) new_type = GGML_TYPE_IQ3_XXS;
15414+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) new_type = GGML_TYPE_IQ2_S;
1542415415 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S ||
1542515416 ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
1542615417 if (qs.model.hparams.n_expert >= 8) new_type = GGML_TYPE_Q3_K;
1542715418 }
15428- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
15429- new_type = GGML_TYPE_IQ2_S;
15430- }
1543115419 } else if (name.find("ffn_down") != std::string::npos) {
1543215420 auto info = layer_info(qs.i_ffn_down, qs.n_ffn_down, name.c_str());
1543315421 int i_layer = info.first, n_layer = info.second;
0 commit comments