Skip to content

Commit 6c51f39

Browse files
committed
IQ3_XXXXL, EXL and renaming >=IQ3_ML scheme
Test for Mistral Large IQ3_XL = IQ3_X5L and so on.
1 parent 64bfe69 commit 6c51f39

File tree

4 files changed

+273
-147
lines changed

4 files changed

+273
-147
lines changed

examples/quantize/quantize.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,13 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
3737
{ "IQ3_XXS", LLAMA_FTYPE_MOSTLY_IQ3_XXS, " 3.06 bpw quantization", },
3838
{ "IQ3_S", LLAMA_FTYPE_MOSTLY_IQ3_S, " 3.44 bpw quantization", },
3939
{ "IQ3_M", LLAMA_FTYPE_MOSTLY_IQ3_M, " 3.60 bpw quantization mix", },
40-
{ "IQ3_ML", LLAMA_FTYPE_MOSTLY_IQ3_ML, " 3.75 bpw quantization mix", },
41-
{ "IQ3_XL", LLAMA_FTYPE_MOSTLY_IQ3_XL, " 3.85 bpw quantization mix", },
42-
{ "IQ3_XXL", LLAMA_FTYPE_MOSTLY_IQ3_XXL, " 3.95 bpw quantization mix", },
43-
{ "IQ3_XXXL", LLAMA_FTYPE_MOSTLY_IQ3_XXXL, " 4.05 bpw quantization mix", },
44-
{ "IQ3_UXL", LLAMA_FTYPE_MOSTLY_IQ3_UXL, " 4.15 bpw quantization mix", },
40+
{ "IQ3_M3L", LLAMA_FTYPE_MOSTLY_IQ3_M3L, " 3.70 bpw quantization mix", },
41+
{ "IQ3_X4L", LLAMA_FTYPE_MOSTLY_IQ3_X4L, " 3.80 bpw quantization mix", },
42+
{ "IQ3_X5L", LLAMA_FTYPE_MOSTLY_IQ3_X5L, " 3.90 bpw quantization mix", },
43+
{ "IQ3_X6L", LLAMA_FTYPE_MOSTLY_IQ3_X6L, " 4.00 bpw quantization mix", },
44+
{ "IQ3_X7L", LLAMA_FTYPE_MOSTLY_IQ3_X7L, " 4.10 bpw quantization mix", },
45+
{ "IQ3_EXL", LLAMA_FTYPE_MOSTLY_IQ3_EXL, " 4.15 bpw quantization mix", },
46+
{ "IQ3_UXL", LLAMA_FTYPE_MOSTLY_IQ3_UXL, " 4.20 bpw quantization mix", },
4547
{ "Q3_K", LLAMA_FTYPE_MOSTLY_Q3_K_M, "alias for Q3_K_M" },
4648
{ "IQ3_XS", LLAMA_FTYPE_MOSTLY_IQ3_XS, " 3.3 bpw quantization", },
4749
{ "Q3_K_S", LLAMA_FTYPE_MOSTLY_Q3_K_S, " 3.41G, +1.6321 ppl @ Llama-3-8B", },

gguf-py/gguf/constants.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1450,22 +1450,24 @@ class LlamaFileType(IntEnum):
14501450
MOSTLY_Q4_0_8_8 = 35 # except 1d tensors
14511451
MOSTLY_TQ1_0 = 36 # except 1d tensors
14521452
MOSTLY_TQ2_0 = 37 # except 1d tensors
1453-
MOSTLY_IQ2_XL = 38 # except 1d tensors
1454-
MOSTLY_IQ3_XL = 39 # except 1d tensors
1455-
MOSTLY_Q2_K_L = 40 # except 1d tensors
1456-
MOSTLY_IQ1_XS = 41 # except 1d tensors
1457-
MOSTLY_IQ1_XL = 42 # except 1d tensors
1458-
MOSTLY_IQ3_XXL = 43 # except 1d tensors
1459-
MOSTLY_Q3_K_XL = 44 # except 1d tensors
1460-
MOSTLY_IQ3_ML = 45 # except 1d tensors
1461-
MOSTLY_IQ3_XXXL = 46 # except 1d tensors
1462-
MOSTLY_IQ3_UXL = 47 # except 1d tensors
1463-
MOSTLY_IQ4_XXSR = 48 # except 1d tensors
1464-
MOSTLY_IQ4_XSR = 49 # except 1d tensors
1465-
MOSTLY_IQ4_MR = 50 # except 1d tensors
1466-
MOSTLY_IQ4_LR = 51 # except 1d tensors
1467-
MOSTLY_Q5_K_XL = 52, # except 1d tensors
1468-
MOSTLY_CQS = 99 # except 1d tensors
1453+
MOSTLY_IQ2_XL = 100 # except 1d tensors
1454+
MOSTLY_Q2_K_L = 101 # except 1d tensors
1455+
MOSTLY_IQ1_XS = 102 # except 1d tensors
1456+
MOSTLY_IQ1_XL = 103 # except 1d tensors
1457+
MOSTLY_IQ3_M3L = 104 # except 1d tensors
1458+
MOSTLY_IQ3_X4L = 105 # except 1d tensors
1459+
MOSTLY_IQ3_X5L = 106 # except 1d tensors
1460+
MOSTLY_IQ3_X6L = 107 # except 1d tensors
1461+
MOSTLY_IQ3_X7L = 108 # except 1d tensors
1462+
MOSTLY_IQ3_EXL = 109 # except 1d tensors
1463+
MOSTLY_IQ3_UXL = 110 # except 1d tensors
1464+
MOSTLY_Q3_K_XL = 111 # except 1d tensors
1465+
MOSTLY_IQ4_XXSR = 112 # except 1d tensors
1466+
MOSTLY_IQ4_XSR = 113 # except 1d tensors
1467+
MOSTLY_IQ4_MR = 114 # except 1d tensors
1468+
MOSTLY_IQ4_LR = 115 # except 1d tensors
1469+
MOSTLY_Q5_K_XL = 116 # except 1d tensors
1470+
MOSTLY_CQS = 199 # except 1d tensors
14691471

14701472
GUESSED = 1024 # not specified in the model file
14711473

include/llama.h

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -175,22 +175,24 @@ extern "C" {
175175
LLAMA_FTYPE_MOSTLY_Q4_0_8_8 = 35, // except 1d tensors
176176
LLAMA_FTYPE_MOSTLY_TQ1_0 = 36, // except 1d tensors
177177
LLAMA_FTYPE_MOSTLY_TQ2_0 = 37, // except 1d tensors
178-
LLAMA_FTYPE_MOSTLY_IQ2_XL = 38, // except 1d tensors
179-
LLAMA_FTYPE_MOSTLY_IQ3_XL = 39, // except 1d tensors
180-
LLAMA_FTYPE_MOSTLY_Q2_K_L = 40, // except 1d tensors
181-
LLAMA_FTYPE_MOSTLY_IQ1_XS = 41, // except 1d tensors
182-
LLAMA_FTYPE_MOSTLY_IQ1_XL = 42, // except 1d tensors
183-
LLAMA_FTYPE_MOSTLY_IQ3_XXL = 43, // except 1d tensors
184-
LLAMA_FTYPE_MOSTLY_Q3_K_XL = 44, // except 1d tensors
185-
LLAMA_FTYPE_MOSTLY_IQ3_ML = 45, // except 1d tensors
186-
LLAMA_FTYPE_MOSTLY_IQ3_XXXL = 46, // except 1d tensors
187-
LLAMA_FTYPE_MOSTLY_IQ3_UXL = 47, // except 1d tensors
188-
LLAMA_FTYPE_MOSTLY_IQ4_XXSR = 48, // except 1d tensors
189-
LLAMA_FTYPE_MOSTLY_IQ4_XSR = 49, // except 1d tensors
190-
LLAMA_FTYPE_MOSTLY_IQ4_MR = 50, // except 1d tensors
191-
LLAMA_FTYPE_MOSTLY_IQ4_LR = 51, // except 1d tensors
192-
LLAMA_FTYPE_MOSTLY_Q5_K_XL = 52, // except 1d tensors
193-
LLAMA_FTYPE_CQS = 99, // except 1d tensors
178+
LLAMA_FTYPE_MOSTLY_IQ2_XL = 100, // except 1d tensors
179+
LLAMA_FTYPE_MOSTLY_Q2_K_L = 101, // except 1d tensors
180+
LLAMA_FTYPE_MOSTLY_IQ1_XS = 102, // except 1d tensors
181+
LLAMA_FTYPE_MOSTLY_IQ1_XL = 103, // except 1d tensors
182+
LLAMA_FTYPE_MOSTLY_IQ3_M3L = 104, // except 1d tensors
183+
LLAMA_FTYPE_MOSTLY_IQ3_X4L = 105, // except 1d tensors
184+
LLAMA_FTYPE_MOSTLY_IQ3_X5L = 106, // except 1d tensors
185+
LLAMA_FTYPE_MOSTLY_IQ3_X6L = 107, // except 1d tensors
186+
LLAMA_FTYPE_MOSTLY_IQ3_X7L = 108, // except 1d tensors
187+
LLAMA_FTYPE_MOSTLY_IQ3_EXL = 109, // except 1d tensors
188+
LLAMA_FTYPE_MOSTLY_IQ3_UXL = 110, // except 1d tensors
189+
LLAMA_FTYPE_MOSTLY_Q3_K_XL = 111, // except 1d tensors
190+
LLAMA_FTYPE_MOSTLY_IQ4_XXSR = 112, // except 1d tensors
191+
LLAMA_FTYPE_MOSTLY_IQ4_XSR = 113, // except 1d tensors
192+
LLAMA_FTYPE_MOSTLY_IQ4_MR = 114, // except 1d tensors
193+
LLAMA_FTYPE_MOSTLY_IQ4_LR = 115, // except 1d tensors
194+
LLAMA_FTYPE_MOSTLY_Q5_K_XL = 116, // except 1d tensors
195+
LLAMA_FTYPE_CQS = 199, // except 1d tensors
194196

195197
LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
196198
};

0 commit comments

Comments
 (0)