continue Q5_K mixes

Nexesenex · Nexesenex · commit cf8375c3c457 · 2024-10-16T17:48:29.000+02:00
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
@@ -62,10 +62,10 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
     { "Q5_K",     LLAMA_FTYPE_MOSTLY_Q5_K_M,   "alias for Q5_K_M",                  },
     { "Q5_K_S",   LLAMA_FTYPE_MOSTLY_Q5_K_S,   " 5.21G, +0.1049 ppl @ Llama-3-8B",  },
     { "Q5_K_M",   LLAMA_FTYPE_MOSTLY_Q5_K_M,   " 5.33G, +0.0569 ppl @ Llama-3-8B",  },
-    { "Q5_K_XSR", LLAMA_FTYPE_MOSTLY_Q5_K_XL,  " 5.4 bpw quantization mix",         },
-    { "Q5_K_SR",  LLAMA_FTYPE_MOSTLY_Q5_K_XL,  " 5.6 bpw quantization mix",         },
-    { "Q5_K_ML",  LLAMA_FTYPE_MOSTLY_Q5_K_XL,  " 5.8 bpw quantization mix",         },
-    { "Q5_K_XL",  LLAMA_FTYPE_MOSTLY_Q5_K_XL,  " 6 bpw quantization mix",           },
+    { "Q5_K_XS1R", LLAMA_FTYPE_MOSTLY_Q5_K_XS1R, " 5.4 bpw quantization mix",       },
+    { "Q5_K_S2R",  LLAMA_FTYPE_MOSTLY_Q5_K_S2R,  " 5.6 bpw quantization mix",       },
+    { "Q5_K_M3L",  LLAMA_FTYPE_MOSTLY_Q5_K_M3L,  " 5.8 bpw quantization mix",       },
+    { "Q5_K_X4L",  LLAMA_FTYPE_MOSTLY_Q5_K_X4L,  " 6 bpw quantization mix",         },
     { "Q6_K",     LLAMA_FTYPE_MOSTLY_Q6_K,     " 6.14G, +0.0217 ppl @ Llama-3-8B",  },
     { "Q8_0",     LLAMA_FTYPE_MOSTLY_Q8_0,     " 7.96G, +0.0026 ppl @ Llama-3-8B",  },
     { "Q4_0_4_4", LLAMA_FTYPE_MOSTLY_Q4_0_4_4, " 4.34G, +0.4685 ppl @ Llama-3-8B",  },
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
@@ -1466,10 +1466,10 @@ class LlamaFileType(IntEnum):
     MOSTLY_IQ4_XSR       = 113 # except 1d tensors
     MOSTLY_IQ4_MR        = 114 # except 1d tensors
     MOSTLY_IQ4_LR        = 115 # except 1d tensors
-    MOSTLY_Q5_K_XSR      = 116 # except 1d tensors
-    MOSTLY_Q5_K_SR       = 117 # except 1d tensors
-    MOSTLY_Q5_K_ML       = 118 # except 1d tensors
-    MOSTLY_Q5_K_XL       = 119 # except 1d tensors
+    MOSTLY_Q5_K_XS1R     = 116 # except 1d tensors
+    MOSTLY_Q5_K_S2R      = 117 # except 1d tensors
+    MOSTLY_Q5_K_M3L      = 118 # except 1d tensors
+    MOSTLY_Q5_K_X4L      = 119 # except 1d tensors
     MOSTLY_CQS           = 199 # except 1d tensors
 
     GUESSED              = 1024  # not specified in the model file
diff --git a/include/llama.h b/include/llama.h
@@ -191,10 +191,10 @@ extern "C" {
         LLAMA_FTYPE_MOSTLY_IQ4_XSR       = 113, // except 1d tensors
         LLAMA_FTYPE_MOSTLY_IQ4_MR        = 114, // except 1d tensors
         LLAMA_FTYPE_MOSTLY_IQ4_LR        = 115, // except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q5_K_XSR      = 116, // except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q5_K_SR       = 117, // except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q5_K_ML       = 118, // except 1d tensors
-        LLAMA_FTYPE_MOSTLY_Q5_K_XL       = 119, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q5_K_XS1R     = 116, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q5_K_S2R      = 117, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q5_K_M3L      = 118, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q5_K_X4L      = 119, // except 1d tensors
         LLAMA_FTYPE_CQS                  = 199, // except 1d tensors
 
         LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
diff --git a/src/llama.cpp b/src/llama.cpp