Skip to content

Commit cf8375c

Browse files
committed
continue Q5_K mixes
1 parent 2d052f7 commit cf8375c

File tree

4 files changed

+89
-79
lines changed

4 files changed

+89
-79
lines changed

examples/quantize/quantize.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,10 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
6262
{ "Q5_K", LLAMA_FTYPE_MOSTLY_Q5_K_M, "alias for Q5_K_M", },
6363
{ "Q5_K_S", LLAMA_FTYPE_MOSTLY_Q5_K_S, " 5.21G, +0.1049 ppl @ Llama-3-8B", },
6464
{ "Q5_K_M", LLAMA_FTYPE_MOSTLY_Q5_K_M, " 5.33G, +0.0569 ppl @ Llama-3-8B", },
65-
{ "Q5_K_XSR", LLAMA_FTYPE_MOSTLY_Q5_K_XL, " 5.4 bpw quantization mix", },
66-
{ "Q5_K_SR", LLAMA_FTYPE_MOSTLY_Q5_K_XL, " 5.6 bpw quantization mix", },
67-
{ "Q5_K_ML", LLAMA_FTYPE_MOSTLY_Q5_K_XL, " 5.8 bpw quantization mix", },
68-
{ "Q5_K_XL", LLAMA_FTYPE_MOSTLY_Q5_K_XL, " 6 bpw quantization mix", },
65+
{ "Q5_K_XS1R", LLAMA_FTYPE_MOSTLY_Q5_K_XS1R, " 5.4 bpw quantization mix", },
66+
{ "Q5_K_S2R", LLAMA_FTYPE_MOSTLY_Q5_K_S2R, " 5.6 bpw quantization mix", },
67+
{ "Q5_K_M3L", LLAMA_FTYPE_MOSTLY_Q5_K_M3L, " 5.8 bpw quantization mix", },
68+
{ "Q5_K_X4L", LLAMA_FTYPE_MOSTLY_Q5_K_X4L, " 6 bpw quantization mix", },
6969
{ "Q6_K", LLAMA_FTYPE_MOSTLY_Q6_K, " 6.14G, +0.0217 ppl @ Llama-3-8B", },
7070
{ "Q8_0", LLAMA_FTYPE_MOSTLY_Q8_0, " 7.96G, +0.0026 ppl @ Llama-3-8B", },
7171
{ "Q4_0_4_4", LLAMA_FTYPE_MOSTLY_Q4_0_4_4, " 4.34G, +0.4685 ppl @ Llama-3-8B", },

gguf-py/gguf/constants.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1466,10 +1466,10 @@ class LlamaFileType(IntEnum):
14661466
MOSTLY_IQ4_XSR = 113 # except 1d tensors
14671467
MOSTLY_IQ4_MR = 114 # except 1d tensors
14681468
MOSTLY_IQ4_LR = 115 # except 1d tensors
1469-
MOSTLY_Q5_K_XSR = 116 # except 1d tensors
1470-
MOSTLY_Q5_K_SR = 117 # except 1d tensors
1471-
MOSTLY_Q5_K_ML = 118 # except 1d tensors
1472-
MOSTLY_Q5_K_XL = 119 # except 1d tensors
1469+
MOSTLY_Q5_K_XS1R = 116 # except 1d tensors
1470+
MOSTLY_Q5_K_S2R = 117 # except 1d tensors
1471+
MOSTLY_Q5_K_M3L = 118 # except 1d tensors
1472+
MOSTLY_Q5_K_X4L = 119 # except 1d tensors
14731473
MOSTLY_CQS = 199 # except 1d tensors
14741474

14751475
GUESSED = 1024 # not specified in the model file

include/llama.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -191,10 +191,10 @@ extern "C" {
191191
LLAMA_FTYPE_MOSTLY_IQ4_XSR = 113, // except 1d tensors
192192
LLAMA_FTYPE_MOSTLY_IQ4_MR = 114, // except 1d tensors
193193
LLAMA_FTYPE_MOSTLY_IQ4_LR = 115, // except 1d tensors
194-
LLAMA_FTYPE_MOSTLY_Q5_K_XSR = 116, // except 1d tensors
195-
LLAMA_FTYPE_MOSTLY_Q5_K_SR = 117, // except 1d tensors
196-
LLAMA_FTYPE_MOSTLY_Q5_K_ML = 118, // except 1d tensors
197-
LLAMA_FTYPE_MOSTLY_Q5_K_XL = 119, // except 1d tensors
194+
LLAMA_FTYPE_MOSTLY_Q5_K_XS1R = 116, // except 1d tensors
195+
LLAMA_FTYPE_MOSTLY_Q5_K_S2R = 117, // except 1d tensors
196+
LLAMA_FTYPE_MOSTLY_Q5_K_M3L = 118, // except 1d tensors
197+
LLAMA_FTYPE_MOSTLY_Q5_K_X4L = 119, // except 1d tensors
198198
LLAMA_FTYPE_CQS = 199, // except 1d tensors
199199

200200
LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file

0 commit comments

Comments
 (0)