@@ -17839,7 +17839,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1783917839 // }
1784017840 // else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1784117841 // if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
17842- // new_type = difquant_five_eights_tensors (qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
17842+ // new_type = difquant_six_eights_tensors (qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
1784317843 // else new_type = GGML_TYPE_Q4_K;
1784417844 // }
1784517845 // else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
@@ -18040,7 +18040,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1804018040 }
1804118041 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1804218042 if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q5_K;
18043- // new_type = difquant_five_eights_tensors (qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
18043+ // new_type = difquant_six_eights_tensors (qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
1804418044 else new_type = GGML_TYPE_IQ3_S;
1804518045 }
1804618046 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
@@ -18187,7 +18187,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1818718187 // }
1818818188 // else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1818918189 // if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18190- // new_type = difquant_five_eights_tensors (qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_IQ3_XXS : GGML_TYPE_IQ2_S;
18190+ // new_type = difquant_six_eights_tensors (qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_IQ3_XXS : GGML_TYPE_IQ2_S;
1819118191 // else new_type = GGML_TYPE_IQ3_XXS;
1819218192 // }
1819318193 // else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
@@ -18328,7 +18328,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1832818328 }
1832918329 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1833018330 if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18331- new_type = (difquant_five_eights_tensors (i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
18331+ new_type = (difquant_six_eights_tensors (i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
1833218332 else new_type = GGML_TYPE_IQ3_S;
1833318333 }
1833418334 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S) {
@@ -18493,7 +18493,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1849318493 // }
1849418494 // else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1849518495 // if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18496- // new_type = difquant_five_eights_tensors (qs.i_attention_wo, qs.n_attention_wo) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
18496+ // new_type = difquant_six_eights_tensors (qs.i_attention_wo, qs.n_attention_wo) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
1849718497 // else new_type = GGML_TYPE_IQ3_S;
1849818498 // }
1849918499 // else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
@@ -18656,7 +18656,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1865618656 }
1865718657 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1865818658 if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18659- new_type = difquant_five_eights_tensors (qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
18659+ new_type = difquant_six_eights_tensors (qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1866018660 else new_type = GGML_TYPE_IQ4_XS;
1866118661 }
1866218662 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
@@ -18773,8 +18773,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1877318773 }
1877418774 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1877518775 if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18776- new_type = (difquant_five_eights_tensors (i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
18777- else new_type = (difquant_three_eights_tensors (i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
18776+ new_type = (difquant_six_eights_tensors (i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
18777+ else new_type = (difquant_half_tensors (i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
1877818778 }
1877918779 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S) {
1878018780 if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
@@ -18883,8 +18883,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1888318883 }
1888418884 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1888518885 if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18886- new_type = (difquant_five_eights_tensors (i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
18887- else new_type = (difquant_three_eights_tensors (i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
18886+ new_type = (difquant_six_eights_tensors (i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
18887+ else new_type = (difquant_half_tensors (i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
1888818888 }
1888918889 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S) {
1889018890 if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
0 commit comments