|
12 | 12 |
|
13 | 13 | import os |
14 | 14 | if 'NO_LOCAL_GGUF' not in os.environ: |
15 | | - sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf')) |
| 15 | + sys.path.insert(1, str(Path(__file__).parent / 'gguf-py')) |
16 | 16 | import gguf |
17 | 17 |
|
18 | | -# Note: Does not support GGML_QKK_64 |
19 | | -QK_K = 256 |
20 | | -# Items here are (block size, type size) |
21 | | -GGML_QUANT_SIZES = { |
22 | | - gguf.GGMLQuantizationType.F32 : (1, 4), |
23 | | - gguf.GGMLQuantizationType.F16 : (1, 2), |
24 | | - gguf.GGMLQuantizationType.Q4_0 : (32, 2 + 16), |
25 | | - gguf.GGMLQuantizationType.Q4_1 : (32, 2 + 2 + 16), |
26 | | - gguf.GGMLQuantizationType.Q5_0 : (32, 2 + 4 + 16), |
27 | | - gguf.GGMLQuantizationType.Q5_1 : (32, 2 + 2 + 4 + 16), |
28 | | - gguf.GGMLQuantizationType.Q8_0 : (32, 2 + 32), |
29 | | - gguf.GGMLQuantizationType.Q8_1 : (32, 4 + 4 + 32), |
30 | | - gguf.GGMLQuantizationType.Q2_K : (256, 2 + 2 + QK_K // 16 + QK_K // 4), |
31 | | - gguf.GGMLQuantizationType.Q3_K : (256, 2 + QK_K // 4 + QK_K // 8 + 12), |
32 | | - gguf.GGMLQuantizationType.Q4_K : (256, 2 + 2 + QK_K // 2 + 12), |
33 | | - gguf.GGMLQuantizationType.Q5_K : (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12), |
34 | | - gguf.GGMLQuantizationType.Q6_K : (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16), |
35 | | - gguf.GGMLQuantizationType.Q8_K : (256, 4 + QK_K + QK_K // 8), |
36 | | -} |
37 | | - |
38 | 18 | class GGMLFormat(IntEnum): |
39 | 19 | GGML = 0 |
40 | 20 | GGMF = 1 |
@@ -125,7 +105,7 @@ def load(self, data, offset): |
125 | 105 | (n_dims, name_len, dtype) = struct.unpack('<3I', data[offset:offset + 12]) |
126 | 106 | assert n_dims >= 0 and n_dims <= 4, f'Invalid tensor dimensions {n_dims}' |
127 | 107 | assert name_len < 4096, 'Absurd tensor name length' |
128 | | - quant = GGML_QUANT_SIZES.get(dtype) |
| 108 | + quant = gguf.GGML_QUANT_SIZES.get(dtype) |
129 | 109 | assert quant is not None, 'Unknown tensor type' |
130 | 110 | (blksize, tysize) = quant |
131 | 111 | offset += 12 |
|
0 commit comments