@@ -24,13 +24,16 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
2424 { " IQ2_XS" , LLAMA_FTYPE_MOSTLY_IQ2_XS, " 2.31 bpw quantization" , },
2525 { " IQ2_S" , LLAMA_FTYPE_MOSTLY_IQ2_S, " 2.5 bpw quantization" , },
2626 { " IQ2_M" , LLAMA_FTYPE_MOSTLY_IQ2_M, " 2.7 bpw quantization" , },
27+ { " IQ2_XL" , LLAMA_FTYPE_MOSTLY_IQ2_XL, " 2.85 bpw quantization mix" , },
2728 { " IQ1_S" , LLAMA_FTYPE_MOSTLY_IQ1_S, " 1.56 bpw quantization" , },
2829 { " IQ1_M" , LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.75 bpw quantization" , },
2930 { " Q2_K" , LLAMA_FTYPE_MOSTLY_Q2_K, " 2.96G, +3.5199 ppl @ Llama-3-8B" , },
3031 { " Q2_K_S" , LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.96G, +3.1836 ppl @ Llama-3-8B" , },
32+ { " Q2_K_L" , LLAMA_FTYPE_MOSTLY_Q2_K_L, " 2.96G, +3.1836 ppl @ Llama-3-8B" , },
3133 { " IQ3_XXS" , LLAMA_FTYPE_MOSTLY_IQ3_XXS, " 3.06 bpw quantization" , },
3234 { " IQ3_S" , LLAMA_FTYPE_MOSTLY_IQ3_S, " 3.44 bpw quantization" , },
3335 { " IQ3_M" , LLAMA_FTYPE_MOSTLY_IQ3_M, " 3.66 bpw quantization mix" , },
36+ { " IQ3_XL" , LLAMA_FTYPE_MOSTLY_IQ3_XL, " 3.85 bpw quantization mix" , },
3437 { " Q3_K" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " alias for Q3_K_M" },
3538 { " IQ3_XS" , LLAMA_FTYPE_MOSTLY_IQ3_XS, " 3.3 bpw quantization" , },
3639 { " Q3_K_S" , LLAMA_FTYPE_MOSTLY_Q3_K_S, " 3.41G, +1.6321 ppl @ Llama-3-8B" , },
0 commit comments