feat: add new GGUFValueType.OBJ virtual type

The content of the OBJ type is actually a list of all key names of the object. * Python * `gguf_writer.py`: * Added `def add_kv(self, key: str, val: Any) -> None`: Automatically determines the appropriate value type based on `val`. * Added `def add_dict(self, key: str, val: dict, excludes: Sequence[str] = []) -> None`: Adds object (dict) values, It will recursively add all subkeys. * Added `add_array_ex` to support the nested and mixed-type array. * `constants.py`: * Added `GGUFValueType.get_type_ex(val)`: Added support for numpy's integers and floating-point numbers, selecting the number of digits according to the size of the integer. * `gguf_reader.py`: * Added functionality to retrieve values from specific fields using `ReaderField.get()` method. * Unit test added * CPP * `ggml`: * Added `GGUF_TYPE_OBJ` to the `gguf_type` enum type. * Use `gguf_get_arr_n` and `gguf_get_arr_str` to get the subKey names of `GGUF_TYPE_OBJ`. * Added `gguf_set_obj_str` function to set object subkey names * Added `gguf_set_arr_obj` function to set object array count * Added `gguf_set_arr_arr` function to set nested array count * `llama`: * Modified `gguf_kv_to_str` * Added `LLAMA_API char * gguf_kv_to_c_str` function to get the c_str value as JSON format. * Maybe this API should be moved into `ggml` as `gguf_get_val_json`. (问题是 ggml.c 用的是C语言,而这里大量用了C++的功能) * Added basic support to `GGUF_TYPE_OBJ` and nested array * Unit test added feat: add basic support to GGUF_TYPE_OBJ on cpp feat(gguf.py): add OBJ and mixed-type array supports to GGUF ARRAY feat: add OBJ and mixed-type array supports to GGUF ARRAY(CPP) feat: add nested array supported feat: * Subkey name convention in OBJ types: * If the first letter of the subkey name is "/", it means referencing the full name of other keys. * If there is a ":" colon delimiter, it means that the string after the colon represents the subkey name in this object, otherwise the referencing subkey name is used. feat: add LLAMA_API gguf_kv_to_c_str to llama.h test: write test gguf file to tests folder directly(py) test: add test-gguf-meta.cpp feat: Key convention: "." indicates that the key is a subkey, not an independent key. feat: add excludes argument to add_dict(gguf_write.py) feat: add_array_ex to supports nested and mix-typed array, and keep the add_array to the same fix(constant.py): rollback the get_type function and add the new get_type_ex test: add test compatibility fix: use GGML_MALLOC instead of malloc
ggerganov · Feb 3, 2024 · fe25927 · fe25927
1 parent a305dba
commit fe25927
Show file tree

Hide file tree

Showing 14 changed files with 574 additions and 89 deletions.
diff --git a/Makefile b/Makefile
@@ -769,3 +769,6 @@ tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o te
 
 tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+
+tests/test-gguf-meta: tests/test-gguf-meta.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
@@ -6,6 +6,7 @@
 #include "ggml.h"
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
+#include "llama.h"
 
 #ifdef GGML_USE_CUBLAS
 #include "ggml-cuda.h"
@@ -148,24 +149,6 @@ static std::string get_ftype(int ftype) {
     return ggml_type_name(static_cast<ggml_type>(ftype));
 }
 
-static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) {
-    switch (type) {
-        case GGUF_TYPE_UINT8:   return std::to_string(((const uint8_t  *)data)[i]);
-        case GGUF_TYPE_INT8:    return std::to_string(((const int8_t   *)data)[i]);
-        case GGUF_TYPE_UINT16:  return std::to_string(((const uint16_t *)data)[i]);
-        case GGUF_TYPE_INT16:   return std::to_string(((const int16_t  *)data)[i]);
-        case GGUF_TYPE_UINT32:  return std::to_string(((const uint32_t *)data)[i]);
-        case GGUF_TYPE_INT32:   return std::to_string(((const int32_t  *)data)[i]);
-        case GGUF_TYPE_UINT64:  return std::to_string(((const uint64_t *)data)[i]);
-        case GGUF_TYPE_INT64:   return std::to_string(((const int64_t  *)data)[i]);
-        case GGUF_TYPE_FLOAT32: return std::to_string(((const float    *)data)[i]);
-        case GGUF_TYPE_FLOAT64: return std::to_string(((const double   *)data)[i]);
-        case GGUF_TYPE_BOOL:    return ((const bool *)data)[i] ? "true" : "false";
-        default:                return format("unknown type %d", type);
-    }
-}
-
-
 static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
     std::string result;
     for (size_t pos = 0; ; pos += search.length()) {
@@ -180,43 +163,6 @@ static void replace_all(std::string & s, const std::string & search, const std::
     s = std::move(result);
 }
 
-static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
-    const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
-
-    switch (type) {
-        case GGUF_TYPE_STRING:
-            return gguf_get_val_str(ctx_gguf, i);
-        case GGUF_TYPE_ARRAY:
-            {
-                const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
-                int arr_n = gguf_get_arr_n(ctx_gguf, i);
-                const void * data = gguf_get_arr_data(ctx_gguf, i);
-                std::stringstream ss;
-                ss << "[";
-                for (int j = 0; j < arr_n; j++) {
-                    if (arr_type == GGUF_TYPE_STRING) {
-                        std::string val = gguf_get_arr_str(ctx_gguf, i, j);
-                        // escape quotes
-                        replace_all(val, "\\", "\\\\");
-                        replace_all(val, "\"", "\\\"");
-                        ss << '"' << val << '"';
-                    } else if (arr_type == GGUF_TYPE_ARRAY) {
-                        ss << "???";
-                    } else {
-                        ss << gguf_data_to_str(arr_type, data, j);
-                    }
-                    if (j < arr_n - 1) {
-                        ss << ", ";
-                    }
-                }
-                ss << "]";
-                return ss.str();
-            }
-        default:
-            return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0);
-    }
-}
-
 static void print_tensor_info(const ggml_tensor* tensor, const char* prefix = "") {
     size_t tensor_size = ggml_nbytes(tensor);
     printf("%s: n_dims = %d, name = %s, tensor_size=%zu, shape:[%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "], type = %s\n",
@@ -784,11 +730,12 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
             const char * name           = gguf_get_key(ctx, i);
             const enum gguf_type type   = gguf_get_kv_type(ctx, i);
             const std::string type_name =
-                type == GGUF_TYPE_ARRAY
+                type == GGUF_TYPE_ARRAY || type == GGUF_TYPE_OBJ
                 ? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(ctx, i)), gguf_get_arr_n(ctx, i))
                 : gguf_type_name(type);
 
-            std::string value          = gguf_kv_to_str(ctx, i);
+            char * v                   = gguf_kv_to_c_str(ctx, i, name);
+            std::string value          = v;
             const size_t MAX_VALUE_LEN = 40;
             if (value.size() > MAX_VALUE_LEN) {
                 value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str());

diff --git a/ggml.c b/ggml.c
@@ -19281,8 +19281,9 @@ static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
     [GGUF_TYPE_INT64]   = sizeof(int64_t),
     [GGUF_TYPE_FLOAT64] = sizeof(double),
     [GGUF_TYPE_ARRAY]   = 0, // undefined
+    [GGUF_TYPE_OBJ]     = 0, // undefined
 };
-static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
+static_assert(GGUF_TYPE_COUNT == 14, "GGUF_TYPE_COUNT != 14");
 
 static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
     [GGUF_TYPE_UINT8]   = "u8",
@@ -19298,8 +19299,9 @@ static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
     [GGUF_TYPE_UINT64]  = "u64",
     [GGUF_TYPE_INT64]   = "i64",
     [GGUF_TYPE_FLOAT64] = "f64",
+    [GGUF_TYPE_OBJ]     = "obj",
 };
-static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
+static_assert(GGUF_TYPE_COUNT == 14, "GGUF_TYPE_COUNT != 14");
 
 union gguf_value {
     uint8_t  uint8;
@@ -19522,6 +19524,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
                 case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
                 case GGUF_TYPE_BOOL:    ok = ok && gguf_fread_el (file, &kv->value.bool_,   sizeof(kv->value.bool_),   &offset); break;
                 case GGUF_TYPE_STRING:  ok = ok && gguf_fread_str(file, &kv->value.str,                                &offset); break;
+                case GGUF_TYPE_OBJ:
                 case GGUF_TYPE_ARRAY:
                     {
                         ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
@@ -19568,7 +19571,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
                                         ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
                                     }
                                 } break;
-                            case GGUF_TYPE_ARRAY:
+                            case GGUF_TYPE_OBJ:
+                            case GGUF_TYPE_ARRAY: break;
                             default: GGML_ASSERT(false && "invalid type"); break;
                         }
                     } break;
@@ -19775,7 +19779,7 @@ void gguf_free(struct gguf_context * ctx) {
                 }
             }
 
-            if (kv->type == GGUF_TYPE_ARRAY) {
+            if (kv->type == GGUF_TYPE_ARRAY || kv->type == GGUF_TYPE_OBJ) {
                 if (kv->value.arr.data) {
                     if (kv->value.arr.type == GGUF_TYPE_STRING) {
                         for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
@@ -19860,7 +19864,7 @@ enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int key_id) {
 
 enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id) {
     GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
+    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY || ctx->kv[key_id].type == GGUF_TYPE_OBJ);
     return ctx->kv[key_id].value.arr.type;
 }
 
@@ -19872,15 +19876,15 @@ const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id) {
 
 const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) {
     GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
+    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY || ctx->kv[key_id].type == GGUF_TYPE_OBJ);
     struct gguf_kv * kv = &ctx->kv[key_id];
     struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
     return str->data;
 }
 
 int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) {
     GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
-    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
+    GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY || ctx->kv[key_id].type == GGUF_TYPE_OBJ);
     return ctx->kv[key_id].value.arr.n;
 }
 
@@ -19959,6 +19963,7 @@ const char * gguf_get_val_str(const struct gguf_context * ctx, int key_id) {
 const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id) {
     GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
     GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_ARRAY);
+    GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_OBJ);
     GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_STRING);
     return &ctx->kv[key_id].value;
 }
@@ -20103,6 +20108,10 @@ void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_ty
     ctx->kv[idx].type           = GGUF_TYPE_ARRAY;
     ctx->kv[idx].value.arr.type = type;
     ctx->kv[idx].value.arr.n    = n;
+    if (data == NULL) {
+        ctx->kv[idx].value.arr.data = NULL;
+        return;
+    }
     ctx->kv[idx].value.arr.data = GGML_MALLOC(n*gguf_type_size(type));
     memcpy(ctx->kv[idx].value.arr.data, data, n*gguf_type_size(type));
 }
@@ -20121,6 +20130,38 @@ void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char **
     }
 }
 
+void gguf_set_arr_obj(struct gguf_context * ctx, const char * key, int n) {
+    const int idx = gguf_get_or_add_key(ctx, key);
+
+    ctx->kv[idx].type           = GGUF_TYPE_ARRAY;
+    ctx->kv[idx].value.arr.type = GGUF_TYPE_OBJ;
+    ctx->kv[idx].value.arr.n    = n;
+    ctx->kv[idx].value.arr.data = NULL;
+}
+
+void gguf_set_arr_arr(struct gguf_context * ctx, const char * key, int n) {
+    const int idx = gguf_get_or_add_key(ctx, key);
+
+    ctx->kv[idx].type           = GGUF_TYPE_ARRAY;
+    ctx->kv[idx].value.arr.type = GGUF_TYPE_ARRAY;
+    ctx->kv[idx].value.arr.n    = n;
+    ctx->kv[idx].value.arr.data = NULL;
+}
+
+void gguf_set_obj_str(struct gguf_context * ctx, const char * key, const char ** data, int n) {
+    const int idx = gguf_get_or_add_key(ctx, key);
+
+    ctx->kv[idx].type           = GGUF_TYPE_OBJ;
+    ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING;
+    ctx->kv[idx].value.arr.n    = n;
+    ctx->kv[idx].value.arr.data = GGML_MALLOC(n*sizeof(struct gguf_str));
+    for (int i = 0; i < n; i++) {
+        struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
+        str->n    = strlen(data[i]);
+        str->data = strdup(data[i]);
+    }
+}
+
 // set or add KV pairs from another context
 void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
     for (uint32_t i = 0; i < src->header.n_kv; i++) {
@@ -20137,6 +20178,15 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
             case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64);  break;
             case GGUF_TYPE_BOOL:    gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_);    break;
             case GGUF_TYPE_STRING:  gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
+            case GGUF_TYPE_OBJ:
+                {
+                    const char ** data = malloc(src->kv[i].value.arr.n*sizeof(char *));
+                    for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
+                        data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
+                    }
+                    gguf_set_obj_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
+                    free((void *)data);
+                } break;
             case GGUF_TYPE_ARRAY:
                 {
                     if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) {
@@ -20146,8 +20196,6 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
                         }
                         gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
                         GGML_FREE((void *)data);
-                    } else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
-                        GGML_ASSERT(false && "nested arrays not supported");
                     } else {
                         gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
                     }
@@ -20301,6 +20349,7 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
             case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
             case GGUF_TYPE_BOOL:    gguf_bwrite_el (buf, &kv->value.bool_,   sizeof(kv->value.bool_)  ); break;
             case GGUF_TYPE_STRING:  gguf_bwrite_str(buf, &kv->value.str                               ); break;
+            case GGUF_TYPE_OBJ:
             case GGUF_TYPE_ARRAY:
                 {
                     gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type));
@@ -20327,7 +20376,8 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
                                     gguf_bwrite_str(buf, &((struct gguf_str *) kv->value.arr.data)[j]);
                                 }
                             } break;
-                        case GGUF_TYPE_ARRAY:
+                        case GGUF_TYPE_OBJ:
+                        case GGUF_TYPE_ARRAY: break;
                         default: GGML_ASSERT(false && "invalid type"); break;
                     }
                 } break;

diff --git a/ggml.h b/ggml.h
@@ -2141,6 +2141,7 @@ extern "C" {
         GGUF_TYPE_UINT64  = 10,
         GGUF_TYPE_INT64   = 11,
         GGUF_TYPE_FLOAT64 = 12,
+        GGUF_TYPE_OBJ     = 13,
         GGUF_TYPE_COUNT,       // marks the end of the enum
     };
 
@@ -2212,6 +2213,9 @@ extern "C" {
     GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
     GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
     GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
+    GGML_API void gguf_set_arr_obj (struct gguf_context * ctx, const char * key, int n);
+    GGML_API void gguf_set_arr_arr (struct gguf_context * ctx, const char * key, int n);
+    GGML_API void gguf_set_obj_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
 
     // set or add KV pairs from another context
     GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);

diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
@@ -3,6 +3,7 @@
 import sys
 from enum import Enum, IntEnum, auto
 from typing import Any
+import numpy as np
 
 #
 # constants
@@ -550,6 +551,64 @@ class GGUFValueType(IntEnum):
     UINT64  = 10
     INT64   = 11
     FLOAT64 = 12
+    OBJ     = 13
+
+    @staticmethod
+    def get_type_ex(val: Any) -> GGUFValueType:
+        if isinstance(val, (str, bytes, bytearray)):
+            return GGUFValueType.STRING
+        elif isinstance(val, list):
+            return GGUFValueType.ARRAY
+        elif isinstance(val, np.float32):
+            return GGUFValueType.FLOAT32
+        elif isinstance(val, np.float64):
+            return GGUFValueType.FLOAT64
+        elif isinstance(val, float):
+            return GGUFValueType.FLOAT32
+        elif isinstance(val, bool):
+            return GGUFValueType.BOOL
+        elif isinstance(val, np.uint8):
+            return GGUFValueType.UINT8
+        elif isinstance(val, np.uint16):
+            return GGUFValueType.UINT16
+        elif isinstance(val, np.uint32):
+            return GGUFValueType.UINT32
+        elif isinstance(val, np.uint64):
+            return GGUFValueType.UINT64
+        elif isinstance(val, np.int8):
+            return GGUFValueType.INT8
+        elif isinstance(val, np.int16):
+            return GGUFValueType.INT16
+        elif isinstance(val, np.int32):
+            return GGUFValueType.INT32
+        elif isinstance(val, np.int64):
+            return GGUFValueType.INT64
+        elif isinstance(val, int):
+            if val >=0 and val <= np.iinfo(np.uint8).max:
+                return GGUFValueType.UINT8
+            elif val >=0 and val <= np.iinfo(np.uint16).max:
+                return GGUFValueType.UINT16
+            elif val >=0 and val <= np.iinfo(np.uint32).max:
+                return GGUFValueType.UINT32
+            elif val >=0 and val <= np.iinfo(np.uint64).max:
+                return GGUFValueType.UINT64
+            elif val >=np.iinfo(np.int8).min and val <= np.iinfo(np.int8).max:
+                return GGUFValueType.INT8
+            elif val >=np.iinfo(np.int16).min and val <= np.iinfo(np.int16).max:
+                return GGUFValueType.INT16
+            elif val >=np.iinfo(np.int32).min and val <= np.iinfo(np.int32).max:
+                return GGUFValueType.INT32
+            elif val >=np.iinfo(np.int64).min and val <= np.iinfo(np.int64).max:
+                return GGUFValueType.INT64
+            else:
+                print("The integer exceed limit:", val)
+                sys.exit()
+        elif isinstance(val, dict):
+            return GGUFValueType.OBJ
+        # TODO: need help with 64-bit types in Python
+        else:
+            print("Unknown type:", type(val))
+            sys.exit()
 
     @staticmethod
     def get_type(val: Any) -> GGUFValueType: