Skip to content

Commit 8de8e45

Browse files
monatisikawrakowKawrakow
authored andcommitted
wip : implement GGUF (#2397)
* Add LLAMA_DEFAULT_RMS_EPS so we can change the default (#2384) Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com> * WIP: python class to write GGUF, incomplete C apı for reading --------- Co-authored-by: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
1 parent 367a4a2 commit 8de8e45

File tree

3 files changed

+481
-0
lines changed

3 files changed

+481
-0
lines changed

constants.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
GGUF_MAGIC = 0x47475546
2+
GGUF_VERSION = 1
3+
4+
# general
5+
KEY_GENERAL_ARCHITECTURE = "general.architecture"
6+
KEY_GENERAL_QUANTIZATION_VERSION = "general.quantization_version"
7+
KEY_GENERAL_NAME = "general.name"
8+
KEY_GENERAL_AUTHOR = "general.author"
9+
KEY_GENERAL_URL = "general.url"
10+
KEY_GENERAL_DESCRIPTION = "general.description"
11+
KEY_GENERAL_FILE_TYPE = "general.file_type"
12+
KEY_GENERAL_LICENSE = "general.license"
13+
KEY_GENERAL_SOURCE_URL = "general.source.url"
14+
KEY_GENERAL_SOURCE_HF_REPO = "general.source.hugginface.repository"
15+
16+
# LLM
17+
KEY_LLM_CONTEXT_LENGTH = "{llm}.context_length"
18+
KEY_LLM_EMBEDDING_LENGTH = "{llm}.embedding_length"
19+
KEY_LLM_LAYER_COUNT = "{llm}.layer_count"
20+
KEY_LLM_FEED_FORWARD_LENGTH = "{llm}.feed_forward_length"
21+
KEY_LLM_USE_PARALLEL_RESIDUAL = "{llm}.use_parallel_residual"
22+
KEY_LLM_TENSOR_DATA_LAYOUT = "{llm}.tensor_data_layout"
23+
24+
# attention
25+
KEY_ATTENTION_HEAD_COUNT = "{llm}.attention.head_count"
26+
KEY_ATTENTION_HEAD_COUNT_KV = "{llm}.attention.head_count_kv"
27+
KEY_ATTENTION_MAX_ALIBI_BIAS = "{llm}.attention.max_alibi_bias"
28+
KEY_ATTENTION_CLAMP_KQV = "{llm}.attention.clamp_kqv"
29+
30+
# RoPE
31+
KEY_ROPE_DIMENSION_COUNT = "{llm}.rope.dimension_count"
32+
KEY_ROPE_SCALE = "{llm}.rope.scale"

gguf.c

+192
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
// TODO: convert to proper gguf.h gguf.c structure, now I'm trying to be fast as much as possible,
2+
// and everything is in this file for quick debugging.
3+
4+
#include <stdio.h>
5+
#include <stdint.h>
6+
#include <stdlib.h>
7+
#include <stdbool.h>
8+
9+
10+
enum ggml_type {
11+
GGML_TYPE_F32 = 0,
12+
GGML_TYPE_F16 = 1,
13+
GGML_TYPE_Q4_0 = 2,
14+
GGML_TYPE_Q4_1 = 3,
15+
// GGML_TYPE_Q4_2 = 4, support has been removed
16+
// GGML_TYPE_Q4_3 (5) support has been removed
17+
GGML_TYPE_Q5_0 = 6,
18+
GGML_TYPE_Q5_1 = 7,
19+
GGML_TYPE_Q8_0 = 8,
20+
GGML_TYPE_Q8_1 = 9,
21+
// k-quantizations
22+
GGML_TYPE_Q2_K = 10,
23+
GGML_TYPE_Q3_K = 11,
24+
GGML_TYPE_Q4_K = 12,
25+
GGML_TYPE_Q5_K = 13,
26+
GGML_TYPE_Q6_K = 14,
27+
GGML_TYPE_Q8_K = 15,
28+
GGML_TYPE_I8,
29+
GGML_TYPE_I16,
30+
GGML_TYPE_I32,
31+
GGML_TYPE_COUNT,
32+
};
33+
34+
enum gguf_metadata_value_type {
35+
GGUF_METADATA_VALUE_TYPE_UINT8 = 0,
36+
GGUF_METADATA_VALUE_TYPE_INT8 = 1,
37+
GGUF_METADATA_VALUE_TYPE_UINT16 = 2,
38+
GGUF_METADATA_VALUE_TYPE_INT16 = 3,
39+
GGUF_METADATA_VALUE_TYPE_UINT32 = 4,
40+
GGUF_METADATA_VALUE_TYPE_INT32 = 5,
41+
GGUF_METADATA_VALUE_TYPE_FLOAT32 = 6,
42+
GGUF_METADATA_VALUE_TYPE_BOOL = 7,
43+
GGUF_METADATA_VALUE_TYPE_STRING = 8,
44+
GGUF_METADATA_VALUE_TYPE_ARRAY = 9,
45+
};
46+
47+
struct gguf_string_t {
48+
uint32_t len;
49+
char * string;
50+
};
51+
52+
union gguf_metadata_value_t;
53+
54+
// Union definition for gguf_metadata_value_t
55+
union gguf_metadata_value_t {
56+
uint8_t uint8;
57+
int8_t int8;
58+
uint16_t uint16;
59+
int16_t int16;
60+
uint32_t uint32;
61+
int32_t int32;
62+
float float32;
63+
bool bool_;
64+
struct gguf_string_t string;
65+
struct {
66+
uint32_t len;
67+
enum gguf_metadata_value_type type;
68+
union gguf_metadata_value_t * array;
69+
} array;
70+
};
71+
72+
73+
struct gguf_metadata_kv_t {
74+
struct gguf_string_t key;
75+
uint32_t value_len;
76+
enum gguf_metadata_value_type value_type;
77+
union gguf_metadata_value_t* value;
78+
};
79+
80+
struct gguf_header_t {
81+
uint32_t magic;
82+
uint32_t version;
83+
uint32_t tensor_count;
84+
uint32_t metadata_kv_count;
85+
struct gguf_metadata_kv_t * metadata_kv;
86+
};
87+
88+
struct gguf_tensor_info_t {
89+
struct gguf_string_t name;
90+
uint32_t n_dimensions;
91+
uint32_t dimensions[];
92+
};
93+
94+
struct gguf_file_t {
95+
struct gguf_header_t header;
96+
uint8_t tensor_data[];
97+
};
98+
99+
void read_gguf_file(const char * file_path, struct gguf_file_t * gguf_file) {
100+
FILE* file = fopen(file_path, "rb");
101+
if (file == NULL) {
102+
printf("Error opening the file.\n");
103+
return;
104+
}
105+
106+
fread(&gguf_file->header.magic, sizeof(uint32_t), 1, file);
107+
108+
// Verify magic and version
109+
if (gguf_file->header.magic != 0x47475546) {
110+
printf("Invalid magic number. Not a valid GGUF file.\n");
111+
fclose(file);
112+
return;
113+
}
114+
115+
fread(&gguf_file->header.version, sizeof(uint32_t), 1, file);
116+
117+
if (gguf_file->header.version != 1) {
118+
printf("Unsupported version. Expected version 1.\n");
119+
fclose(file);
120+
return;
121+
}
122+
123+
fread(&gguf_file->header.tensor_count, sizeof(uint32_t), 1, file);
124+
fread(&gguf_file->header.metadata_kv_count, sizeof(uint32_t), 1, file);
125+
126+
printf("Magic: %x\n", gguf_file->header.magic);
127+
printf("Version: %d\n", gguf_file->header.version);
128+
printf("Tensor Count: %d\n", gguf_file->header.tensor_count);
129+
printf("Metadata Key-Value Count: %d\n", gguf_file->header.metadata_kv_count);
130+
131+
gguf_file->header.metadata_kv = (struct gguf_metadata_kv_t*)malloc(gguf_file->header.metadata_kv_count * sizeof(struct gguf_metadata_kv_t));
132+
133+
for (int i = 0; i < gguf_file->header.metadata_kv_count; i++) {
134+
struct gguf_metadata_kv_t* kv = &gguf_file->header.metadata_kv[i];
135+
fread(&kv->key.len, sizeof(uint32_t), 1, file);
136+
kv->key.string = (char*)malloc(kv->key.len ); // Allocate memory for the key string
137+
fread(kv->key.string, sizeof(char), kv->key.len, file);
138+
//kv->key.string[kv->key.len] = '\0'; // Null-terminate the key string
139+
140+
fread(&kv->value_type, sizeof(uint32_t), 1, file);
141+
142+
printf("Metadata Value Type: %d\n", kv->value_type);
143+
printf("Metadata Key: %s\n", kv->key.string);
144+
145+
// Read metadata value according to its type using reinterpret_cast
146+
switch (kv->value_type) {
147+
case GGUF_METADATA_VALUE_TYPE_UINT32:
148+
kv->value = (uint32_t *) malloc(sizeof(uint32_t));
149+
fread(kv->value, sizeof(uint32_t), 1, file);
150+
printf("value: %d\n", kv->value->uint32);
151+
break;
152+
case GGUF_METADATA_VALUE_TYPE_FLOAT32:
153+
kv->value = (float *)malloc(sizeof(float));
154+
fread(kv->value, sizeof(float), 1, file);
155+
printf("value: %f\n", (float)kv->value->float32);
156+
break;
157+
case GGUF_METADATA_VALUE_TYPE_STRING:
158+
fread(&kv->value_len, sizeof(uint32_t), 1, file);
159+
printf("value len: %d\n", kv->value_len);
160+
kv->value = (char *)malloc(sizeof(char) * kv->value_len); // Allocate memory for the value string
161+
fread(kv->value, sizeof(char), kv->value_len, file);
162+
printf("value: %s\n", (char *)kv->value);
163+
break;
164+
// ... (handle other types in a similar manner)
165+
default:
166+
printf("Unsupported metadata value type.\n");
167+
fclose(file);
168+
return;
169+
}
170+
}
171+
172+
// TODO: handle reading tensor data
173+
174+
fclose(file);
175+
}
176+
177+
void gguf_free(struct gguf_file_t * gguf_file) {
178+
// Free allocated memory for key strings avd values
179+
for (int i = 0; i < gguf_file->header.metadata_kv_count; i++) {
180+
free(gguf_file->header.metadata_kv[i].key.string);
181+
free(gguf_file->header.metadata_kv[i].value);
182+
}
183+
free(gguf_file->header.metadata_kv);
184+
}
185+
186+
int main() {
187+
const char* file_path = "example.gguf";
188+
struct gguf_file_t gguf_file;
189+
read_gguf_file(file_path, &gguf_file);
190+
gguf_free(&gguf_file);
191+
return 0;
192+
}

0 commit comments

Comments
 (0)