Skip to content

Commit 1e7e816

Browse files
committed
Add IQ3_ML, reinstate IQ3_XXXL
1 parent 7b0dc30 commit 1e7e816

File tree

4 files changed

+143
-45
lines changed

4 files changed

+143
-45
lines changed

examples/quantize/quantize.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,10 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
3737
{ "IQ3_XXS", LLAMA_FTYPE_MOSTLY_IQ3_XXS, " 3.06 bpw quantization", },
3838
{ "IQ3_S", LLAMA_FTYPE_MOSTLY_IQ3_S, " 3.44 bpw quantization", },
3939
{ "IQ3_M", LLAMA_FTYPE_MOSTLY_IQ3_M, " 3.70 bpw quantization mix", },
40+
{ "IQ3_ML", LLAMA_FTYPE_MOSTLY_IQ3_ML, " 3.80 bpw quantization mix", },
4041
{ "IQ3_XL", LLAMA_FTYPE_MOSTLY_IQ3_XL, " 3.90 bpw quantization mix", },
41-
{ "IQ3_XXL", LLAMA_FTYPE_MOSTLY_IQ3_XXL, " 4.10 bpw quantization mix", },
42+
{ "IQ3_XXL", LLAMA_FTYPE_MOSTLY_IQ3_XXL, " 4.00 bpw quantization mix", },
43+
{ "IQ3_XXXL", LLAMA_FTYPE_MOSTLY_IQ3_XXXL, " 4.10 bpw quantization mix", },
4244
{ "Q3_K", LLAMA_FTYPE_MOSTLY_Q3_K_M, "alias for Q3_K_M" },
4345
{ "IQ3_XS", LLAMA_FTYPE_MOSTLY_IQ3_XS, " 3.3 bpw quantization", },
4446
{ "Q3_K_S", LLAMA_FTYPE_MOSTLY_Q3_K_S, " 3.41G, +1.6321 ppl @ Llama-3-8B", },

gguf-py/gguf/constants.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1370,7 +1370,7 @@ class LlamaFileType(IntEnum):
13701370
MOSTLY_Q2_K = 10 # except 1d tensors
13711371
MOSTLY_Q3_K_S = 11 # except 1d tensors
13721372
MOSTLY_Q3_K_M = 12 # except 1d tensors
1373-
MOSTLY_Q3_K_XL = 13 # except 1d tensors
1373+
MOSTLY_Q3_K_L = 13 # except 1d tensors
13741374
MOSTLY_Q4_K_S = 14 # except 1d tensors
13751375
MOSTLY_Q4_K_M = 15 # except 1d tensors
13761376
MOSTLY_Q5_K_S = 16 # except 1d tensors
@@ -1402,7 +1402,9 @@ class LlamaFileType(IntEnum):
14021402
MOSTLY_IQ1_XL = 42 # except 1d tensors
14031403
MOSTLY_IQ4_XSR = 43 # except 1d tensors
14041404
MOSTLY_IQ3_XXL = 44 # except 1d tensors
1405-
MOSTLY_Q3_K_L = 45 # except 1d tensors
1405+
MOSTLY_Q3_K_XL = 45 # except 1d tensors
1406+
MOSTLY_IQ3_ML = 46 # except 1d tensors
1407+
MOSTLY_IQ3_XXXL = 47 # except 1d tensors
14061408

14071409
GUESSED = 1024 # not specified in the model file
14081410

include/llama.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ extern "C" {
149149
LLAMA_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors
150150
LLAMA_FTYPE_MOSTLY_Q3_K_S = 11, // except 1d tensors
151151
LLAMA_FTYPE_MOSTLY_Q3_K_M = 12, // except 1d tensors
152-
LLAMA_FTYPE_MOSTLY_Q3_K_XL = 13, // except 1d tensors
152+
LLAMA_FTYPE_MOSTLY_Q3_K_L = 13, // except 1d tensors
153153
LLAMA_FTYPE_MOSTLY_Q4_K_S = 14, // except 1d tensors
154154
LLAMA_FTYPE_MOSTLY_Q4_K_M = 15, // except 1d tensors
155155
LLAMA_FTYPE_MOSTLY_Q5_K_S = 16, // except 1d tensors
@@ -181,7 +181,10 @@ extern "C" {
181181
LLAMA_FTYPE_MOSTLY_IQ1_XL = 42, // except 1d tensors
182182
LLAMA_FTYPE_MOSTLY_IQ4_XSR = 43, // except 1d tensors
183183
LLAMA_FTYPE_MOSTLY_IQ3_XXL = 44, // except 1d tensors
184-
LLAMA_FTYPE_MOSTLY_Q3_K_L = 45, // except 1d tensors
184+
LLAMA_FTYPE_MOSTLY_Q3_K_XL = 45, // except 1d tensors
185+
LLAMA_FTYPE_MOSTLY_IQ3_ML = 46, // except 1d tensors
186+
LLAMA_FTYPE_MOSTLY_IQ3_XXXL = 47, // except 1d tensors
187+
LLAMA_FTYPE_CQS = 99, // except 1d tensors
185188

186189
LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
187190
};

0 commit comments

Comments
 (0)