@@ -86,6 +86,11 @@ void GGUFHandler::OpenFile(const std::string& file_path) {
8686#endif
8787}
8888
89+ void GGUFHandler::CheckOffset (int offset) const {
90+ if (offset > file_size_)
91+ throw std::runtime_error (" Unexpected EOF" );
92+ }
93+
8994void GGUFHandler::CloseFile () {
9095#ifdef _WIN32
9196 if (data_ != nullptr ) {
@@ -101,14 +106,11 @@ void GGUFHandler::CloseFile() {
101106
102107std::pair<std::size_t , std::string> GGUFHandler::ReadString (
103108 std::size_t offset) const {
109+ CheckOffset (offset + 8 );
104110 uint64_t length;
105111 std::memcpy (&length, data_ + offset, sizeof (uint64_t ));
106112
107- if (offset + 8 + length > file_size_) {
108- throw std::runtime_error (
109- " GGUF metadata string length exceeds file size.\n " );
110- }
111-
113+ CheckOffset (offset + 8 + length);
112114 std::string value (reinterpret_cast <const char *>(data_ + offset + 8 ), length);
113115 return {8 + static_cast <std::size_t >(length), value};
114116}
@@ -117,29 +119,37 @@ size_t GGUFHandler::ReadMetadataValue(int type, std::size_t offset,
117119 const std::string& key) {
118120 switch (type) {
119121 case 0 : // UINT8
122+ CheckOffset (offset + 1 );
120123 metadata_uint8_[key] = data_[offset];
121124 return 1 ;
122125 case 1 : // INT8
126+ CheckOffset (offset + 1 );
123127 metadata_int8_[key] = static_cast <int8_t >(data_[offset]);
124128 return 1 ;
125129 case 2 : // UINT16
130+ CheckOffset (offset + 2 );
126131 metadata_uint16_[key] =
127132 *reinterpret_cast <const uint16_t *>(data_ + offset);
128133 return 2 ;
129134 case 3 : // INT16
135+ CheckOffset (offset + 2 );
130136 metadata_int16_[key] = *reinterpret_cast <const int16_t *>(data_ + offset);
131137 return 2 ;
132138 case 4 : // UINT32
139+ CheckOffset (offset + 4 );
133140 metadata_uint32_[key] =
134141 *reinterpret_cast <const uint32_t *>(data_ + offset);
135142 return 4 ;
136143 case 5 : // INT32
144+ CheckOffset (offset + 4 );
137145 metadata_int32_[key] = *reinterpret_cast <const int32_t *>(data_ + offset);
138146 return 4 ;
139147 case 6 : // FLOAT32
148+ CheckOffset (offset + 4 );
140149 metadata_float_[key] = *reinterpret_cast <const float *>(data_ + offset);
141150 return 4 ;
142151 case 7 : // BOOL
152+ CheckOffset (offset + 1 );
143153 metadata_bool_[key] = data_[offset] != 0 ;
144154 return 1 ;
145155 case 8 : // STRING
@@ -152,13 +162,16 @@ size_t GGUFHandler::ReadMetadataValue(int type, std::size_t offset,
152162
153163 return ReadArray (offset, key);
154164 case 10 : // UINT64
165+ CheckOffset (offset + 8 );
155166 metadata_uint64_[key] =
156167 *reinterpret_cast <const uint64_t *>(data_ + offset);
157168 return 8 ;
158169 case 11 : // INT64
170+ CheckOffset (offset + 8 );
159171 metadata_int64_[key] = *reinterpret_cast <const int64_t *>(data_ + offset);
160172 return 8 ;
161173 case 12 : // FLOAT64
174+ CheckOffset (offset + 8 );
162175 metadata_double_[key] = *reinterpret_cast <const double *>(data_ + offset);
163176 return 8 ;
164177 default :
@@ -168,9 +181,11 @@ size_t GGUFHandler::ReadMetadataValue(int type, std::size_t offset,
168181}
169182
170183size_t GGUFHandler::ReadArray (std::size_t offset, const std::string& key) {
184+ CheckOffset (offset + 4 );
171185 uint32_t array_type = *reinterpret_cast <const uint32_t *>(data_ + offset);
172186 // std::memcpy(&array_type, data_ + offset, sizeof(uint32_t));
173187
188+ CheckOffset (offset + 4 + 8 );
174189 uint64_t array_length =
175190 *reinterpret_cast <const uint64_t *>(data_ + offset + 4 );
176191 // std::memcpy(&array_length, data_ + offset + 4, sizeof(uint64_t));
@@ -199,53 +214,62 @@ size_t GGUFHandler::ReadArray(std::size_t offset, const std::string& key) {
199214 // assume that array ony has 2 type string and int
200215 switch (array_type) {
201216 case 0 :
217+ CheckOffset (offset + array_offset + 1 );
202218 uint8_value = data_[offset + array_offset];
203219 length = 1 ;
204220 array_values_float.push_back (static_cast <float >(uint8_value));
205221 break ;
206222 case 1 : {
223+ CheckOffset (offset + array_offset + 1 );
207224 int8_value = static_cast <int8_t >(data_[offset + array_offset]);
208225 length = 1 ;
209226 array_values_float.push_back (static_cast <float >(int8_value));
210227 }
211228
212229 break ;
213230 case 2 :
231+ CheckOffset (offset + array_offset + 2 );
214232 uint16_value =
215233 *reinterpret_cast <const uint16_t *>(data_ + offset + array_offset);
216234 length = 2 ;
217235 array_values_float.push_back (static_cast <float >(uint16_value));
218236 break ;
219237 case 3 :
238+ CheckOffset (offset + array_offset + 2 );
220239 int16_value =
221240 *reinterpret_cast <const int16_t *>(data_ + offset + array_offset);
222241 length = 2 ;
223242 array_values_float.push_back (static_cast <float >(int16_value));
224243 break ;
225244 case 4 :
245+ CheckOffset (offset + array_offset + 4 );
226246 uint32_value =
227247 *reinterpret_cast <const uint32_t *>(data_ + offset + array_offset);
228248 length = 4 ;
229249 array_values_float.push_back (static_cast <float >(uint32_value));
230250 break ;
231251 case 5 :
252+ CheckOffset (offset + array_offset + 4 );
232253 int32_value =
233254 *reinterpret_cast <const int32_t *>(data_ + offset + array_offset);
234255 length = 4 ;
235256 array_values_float.push_back (static_cast <float >(int32_value));
236257 break ;
237258 case 6 :
259+ CheckOffset (offset + array_offset + 4 );
238260 float_value =
239261 *reinterpret_cast <const float *>(data_ + offset + array_offset);
240262 length = 4 ;
241263 array_values_float.push_back (static_cast <float >(float_value));
242264 break ;
243265 case 7 :
266+ CheckOffset (offset + array_offset + 1 );
244267 bool_value = data_[offset + array_offset] != 0 ;
245268 length = 1 ;
246269 array_values_float.push_back (static_cast <float >(bool_value));
247270 break ;
248271 case 8 : {
272+ CheckOffset (offset + array_offset + 8 );
249273 uint64_t length_ =
250274 *reinterpret_cast <const uint64_t *>(data_ + offset + array_offset);
251275 std::string value (
@@ -255,18 +279,21 @@ size_t GGUFHandler::ReadArray(std::size_t offset, const std::string& key) {
255279 array_values_string.push_back (value);
256280 } break ;
257281 case 10 :
282+ CheckOffset (offset + array_offset + 8 );
258283 uint64_value =
259284 *reinterpret_cast <const uint64_t *>(data_ + offset + array_offset);
260285 length = 8 ;
261286 array_values_float.push_back (static_cast <float >(uint64_value));
262287 break ;
263288 case 11 :
289+ CheckOffset (offset + array_offset + 8 );
264290 int64_value =
265291 *reinterpret_cast <const int64_t *>(data_ + offset + array_offset);
266292 length = 8 ;
267293 array_values_float.push_back (static_cast <float >(int64_value));
268294 break ;
269295 case 12 :
296+ CheckOffset (offset + array_offset + 8 );
270297 double_value =
271298 *reinterpret_cast <const double *>(data_ + offset + array_offset);
272299 length = 8 ;
@@ -279,9 +306,6 @@ size_t GGUFHandler::ReadArray(std::size_t offset, const std::string& key) {
279306 }
280307
281308 array_offset += length;
282- if (offset + array_offset > file_size_) {
283- throw std::runtime_error (" GGUF Parser Array exceeded file size.\n " );
284- }
285309 }
286310 if (array_values_string.size () > 0 )
287311 metadata_array_string_[key] = array_values_string;
@@ -290,8 +314,11 @@ size_t GGUFHandler::ReadArray(std::size_t offset, const std::string& key) {
290314 return array_offset;
291315}
292316
317+ // https://github.com/ggml-org/ggml/blob/master/docs/gguf.md
293318void GGUFHandler::Parse (const std::string& file_path) {
294319 OpenFile (file_path);
320+ CheckOffset (4 + 4 + 8 + 8 );
321+
295322 LOG_INFO << " GGUF magic number: " << *reinterpret_cast <const uint32_t *>(data_)
296323 << " \n " ;
297324 if (*reinterpret_cast <const uint32_t *>(data_) != GGUF_MAGIC_NUMBER) {
@@ -311,6 +338,7 @@ void GGUFHandler::Parse(const std::string& file_path) {
311338 auto [key_byte_length, key] = ReadString (offset);
312339 offset += key_byte_length;
313340 LOG_INFO << " key: " << key << " \n " ;
341+ CheckOffset (offset + 4 );
314342 uint32_t value_type = *reinterpret_cast <const uint32_t *>(data_ + offset);
315343 offset += 4 ;
316344 LOG_INFO << " value type number: " << value_type << " \n " ;
0 commit comments