Skip to content

Commit 75b8800

Browse files
committed
More overhaul for IQ4_XSR and new IQ4_MR
1 parent 167a3c5 commit 75b8800

File tree

4 files changed

+82
-64
lines changed

4 files changed

+82
-64
lines changed

examples/quantize/quantize.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
5151
{ "IQ4_NL", LLAMA_FTYPE_MOSTLY_IQ4_NL, " 4.50 bpw non-linear quantization", },
5252
{ "IQ4_XS", LLAMA_FTYPE_MOSTLY_IQ4_XS, " 4.25 bpw non-linear quantization", },
5353
{ "IQ4_XSR", LLAMA_FTYPE_MOSTLY_IQ4_XSR, " 4.xx bpw non-linear quantization", },
54+
{ "IQ4_MR", LLAMA_FTYPE_MOSTLY_IQ4_MR, " 4.xx bpw non-linear quantization", },
5455
{ "Q4_K", LLAMA_FTYPE_MOSTLY_Q4_K_M, "alias for Q4_K_M", },
5556
{ "Q4_K_S", LLAMA_FTYPE_MOSTLY_Q4_K_S, " 4.37G, +0.2689 ppl @ Llama-3-8B", },
5657
{ "Q4_K_M", LLAMA_FTYPE_MOSTLY_Q4_K_M, " 4.58G, +0.1754 ppl @ Llama-3-8B", },

gguf-py/gguf/constants.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,12 +1448,14 @@ class LlamaFileType(IntEnum):
14481448
MOSTLY_Q2_K_L = 40 # except 1d tensors
14491449
MOSTLY_IQ1_XS = 41 # except 1d tensors
14501450
MOSTLY_IQ1_XL = 42 # except 1d tensors
1451-
MOSTLY_IQ4_XSR = 43 # except 1d tensors
1452-
MOSTLY_IQ3_XXL = 44 # except 1d tensors
1453-
MOSTLY_Q3_K_XL = 45 # except 1d tensors
1454-
MOSTLY_IQ3_ML = 46 # except 1d tensors
1455-
MOSTLY_IQ3_XXXL = 47 # except 1d tensors
1456-
MOSTLY_IQ3_UXL = 48 # except 1d tensors
1451+
MOSTLY_IQ3_XXL = 43 # except 1d tensors
1452+
MOSTLY_Q3_K_XL = 44 # except 1d tensors
1453+
MOSTLY_IQ3_ML = 45 # except 1d tensors
1454+
MOSTLY_IQ3_XXXL = 46 # except 1d tensors
1455+
MOSTLY_IQ3_UXL = 47 # except 1d tensors
1456+
MOSTLY_IQ4_XSR = 48 # except 1d tensors
1457+
MOSTLY_IQ4_MR = 49 # except 1d tensors
1458+
MOSTLY_CQS = 99 # except 1d tensors
14571459

14581460
GUESSED = 1024 # not specified in the model file
14591461

include/llama.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -180,12 +180,13 @@ extern "C" {
180180
LLAMA_FTYPE_MOSTLY_Q2_K_L = 40, // except 1d tensors
181181
LLAMA_FTYPE_MOSTLY_IQ1_XS = 41, // except 1d tensors
182182
LLAMA_FTYPE_MOSTLY_IQ1_XL = 42, // except 1d tensors
183-
LLAMA_FTYPE_MOSTLY_IQ4_XSR = 43, // except 1d tensors
184-
LLAMA_FTYPE_MOSTLY_IQ3_XXL = 44, // except 1d tensors
185-
LLAMA_FTYPE_MOSTLY_Q3_K_XL = 45, // except 1d tensors
186-
LLAMA_FTYPE_MOSTLY_IQ3_ML = 46, // except 1d tensors
187-
LLAMA_FTYPE_MOSTLY_IQ3_XXXL = 47, // except 1d tensors
188-
LLAMA_FTYPE_MOSTLY_IQ3_UXL = 48, // except 1d tensors
183+
LLAMA_FTYPE_MOSTLY_IQ3_XXL = 43, // except 1d tensors
184+
LLAMA_FTYPE_MOSTLY_Q3_K_XL = 44, // except 1d tensors
185+
LLAMA_FTYPE_MOSTLY_IQ3_ML = 45, // except 1d tensors
186+
LLAMA_FTYPE_MOSTLY_IQ3_XXXL = 46, // except 1d tensors
187+
LLAMA_FTYPE_MOSTLY_IQ3_UXL = 47, // except 1d tensors
188+
LLAMA_FTYPE_MOSTLY_IQ4_XSR = 48, // except 1d tensors
189+
LLAMA_FTYPE_MOSTLY_IQ4_MR = 49, // except 1d tensors
189190
LLAMA_FTYPE_CQS = 99, // except 1d tensors
190191

191192
LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file

0 commit comments

Comments
 (0)