From 4f88a5185e00c790f5fd1889810662652a5e7527 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 12 Jan 2026 07:18:37 +0000 Subject: [PATCH 1/2] Initial plan From 6e75f5045e7547aadd4e55c352eff43b59d6d879 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 12 Jan 2026 07:23:33 +0000 Subject: [PATCH 2/2] Add iq4_nl quantization support to whisper.cpp Co-authored-by: lhpqaq <63844184+lhpqaq@users.noreply.github.com> --- examples/common-ggml.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/common-ggml.cpp b/examples/common-ggml.cpp index c42b644fedd..e7658523ba0 100644 --- a/examples/common-ggml.cpp +++ b/examples/common-ggml.cpp @@ -14,6 +14,7 @@ static const std::map GGML_FTYPE_MAP = { {"q4_k", GGML_FTYPE_MOSTLY_Q4_K}, {"q5_k", GGML_FTYPE_MOSTLY_Q5_K}, {"q6_k", GGML_FTYPE_MOSTLY_Q6_K}, + {"iq4_nl", GGML_FTYPE_MOSTLY_IQ4_NL}, }; void ggml_print_ftypes(FILE * fp) { @@ -24,7 +25,7 @@ void ggml_print_ftypes(FILE * fp) { enum ggml_ftype ggml_parse_ftype(const char * str) { enum ggml_ftype ftype; - if (str[0] == 'q') { + if (str[0] == 'q' || str[0] == 'i') { const auto it = GGML_FTYPE_MAP.find(str); if (it == GGML_FTYPE_MAP.end()) { fprintf(stderr, "%s: unknown ftype '%s'\n", __func__, str); @@ -58,6 +59,7 @@ bool ggml_common_quantize_0( case GGML_FTYPE_MOSTLY_Q4_K: qtype = GGML_TYPE_Q4_K; break; case GGML_FTYPE_MOSTLY_Q5_K: qtype = GGML_TYPE_Q5_K; break; case GGML_FTYPE_MOSTLY_Q6_K: qtype = GGML_TYPE_Q6_K; break; + case GGML_FTYPE_MOSTLY_IQ4_NL: qtype = GGML_TYPE_IQ4_NL; break; case GGML_FTYPE_UNKNOWN: case GGML_FTYPE_ALL_F32: case GGML_FTYPE_MOSTLY_F16: @@ -68,7 +70,6 @@ bool ggml_common_quantize_0( case GGML_FTYPE_MOSTLY_IQ3_XXS: case GGML_FTYPE_MOSTLY_IQ3_S: case GGML_FTYPE_MOSTLY_IQ1_S: - case GGML_FTYPE_MOSTLY_IQ4_NL: case GGML_FTYPE_MOSTLY_IQ4_XS: case GGML_FTYPE_MOSTLY_IQ1_M: case GGML_FTYPE_MOSTLY_BF16: @@ -188,6 +189,7 @@ bool ggml_common_quantize_0( case GGML_TYPE_Q4_K: case GGML_TYPE_Q5_K: case GGML_TYPE_Q6_K: + case GGML_TYPE_IQ4_NL: { cur_size = ggml_quantize_chunk((ggml_type) ttype, data_f32.data(), work.data(), 0, nelements/ne[0], ne[0], nullptr); } break; @@ -206,7 +208,6 @@ bool ggml_common_quantize_0( case GGML_TYPE_IQ3_XXS: case GGML_TYPE_IQ3_S: case GGML_TYPE_IQ1_S: - case GGML_TYPE_IQ4_NL: case GGML_TYPE_IQ4_XS: case GGML_TYPE_IQ1_M: case GGML_TYPE_BF16: