Skip to content

Commit

Permalink
feat: add new GGUFValueType.OBJ virtual type
Browse files Browse the repository at this point in the history
The content of the OBJ type is actually a list of all key names of the object.

* Python
  * `gguf_writer.py`:
    * Added `def add_kv(self, key: str, val: Any) -> None`: Automatically determines the appropriate value type based on `val`.
    * Added `def add_dict(self, key: str, val: dict, excludes: Sequence[str] = []) -> None`: Adds object (dict) values, It will recursively add all subkeys.
    * Added `add_array_ex` to support the nested and mixed-type array.
  * `constants.py`:
    * Added `GGUFValueType.get_type_ex(val)`: Added support for numpy's integers and floating-point numbers, selecting the number of digits according to the size of the integer.
  * `gguf_reader.py`:
    * Added functionality to retrieve values from specific fields using `ReaderField.get()` method.
  * Unit test added
* CPP
  * `ggml`:
    * Added `GGUF_TYPE_OBJ` to the `gguf_type` enum type.
    * Use `gguf_get_arr_n` and `gguf_get_arr_str` to get the subKey names of `GGUF_TYPE_OBJ`.
    * Added `gguf_set_obj_str` function to set object subkey names
    * Added `gguf_set_arr_obj` function to set object array count
    * Added `gguf_set_arr_arr` function to set nested array count
  * `llama`:
    * Modified `gguf_kv_to_str`
    * Added `LLAMA_API char * gguf_kv_to_c_str` function to get the c_str value as JSON format.
      * Maybe this API should be moved into `ggml` as `gguf_get_val_json`. (问题是 ggml.c 用的是C语言,而这里大量用了C++的功能)
    * Added basic support to `GGUF_TYPE_OBJ` and nested array
  * Unit test added

feat: add basic support to GGUF_TYPE_OBJ on cpp
feat(gguf.py): add OBJ and mixed-type array supports to GGUF ARRAY
feat: add OBJ and mixed-type array supports to GGUF ARRAY(CPP)
feat: add nested array supported
feat: * Subkey name convention in OBJ types:
  * If the first letter of the subkey name is "/", it means referencing the full name of other keys.
  * If there is a ":" colon delimiter, it means that the string after the colon represents the subkey name in this object, otherwise the referencing subkey name is used.
feat: add LLAMA_API gguf_kv_to_c_str to llama.h
test: write test gguf file to tests folder directly(py)
test: add test-gguf-meta.cpp
feat: Key convention: "." indicates that the key is a subkey, not an independent key.
feat: add excludes argument to add_dict(gguf_write.py)
feat: add_array_ex to supports nested and mix-typed array, and keep the add_array to the same
fix(constant.py): rollback the get_type function and add the new get_type_ex
test: add test compatibility
fix: use GGML_MALLOC instead of malloc
  • Loading branch information
snowyu committed Feb 3, 2024
1 parent a305dba commit fe25927
Show file tree
Hide file tree
Showing 14 changed files with 574 additions and 89 deletions.
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -769,3 +769,6 @@ tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o te

tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

tests/test-gguf-meta: tests/test-gguf-meta.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
61 changes: 4 additions & 57 deletions examples/llava/clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "ggml.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "llama.h"

#ifdef GGML_USE_CUBLAS
#include "ggml-cuda.h"
Expand Down Expand Up @@ -148,24 +149,6 @@ static std::string get_ftype(int ftype) {
return ggml_type_name(static_cast<ggml_type>(ftype));
}

static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) {
switch (type) {
case GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]);
case GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]);
case GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]);
case GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]);
case GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]);
case GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]);
case GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]);
case GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]);
case GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]);
case GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]);
case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false";
default: return format("unknown type %d", type);
}
}


static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
std::string result;
for (size_t pos = 0; ; pos += search.length()) {
Expand All @@ -180,43 +163,6 @@ static void replace_all(std::string & s, const std::string & search, const std::
s = std::move(result);
}

static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);

switch (type) {
case GGUF_TYPE_STRING:
return gguf_get_val_str(ctx_gguf, i);
case GGUF_TYPE_ARRAY:
{
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
int arr_n = gguf_get_arr_n(ctx_gguf, i);
const void * data = gguf_get_arr_data(ctx_gguf, i);
std::stringstream ss;
ss << "[";
for (int j = 0; j < arr_n; j++) {
if (arr_type == GGUF_TYPE_STRING) {
std::string val = gguf_get_arr_str(ctx_gguf, i, j);
// escape quotes
replace_all(val, "\\", "\\\\");
replace_all(val, "\"", "\\\"");
ss << '"' << val << '"';
} else if (arr_type == GGUF_TYPE_ARRAY) {
ss << "???";
} else {
ss << gguf_data_to_str(arr_type, data, j);
}
if (j < arr_n - 1) {
ss << ", ";
}
}
ss << "]";
return ss.str();
}
default:
return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0);
}
}

static void print_tensor_info(const ggml_tensor* tensor, const char* prefix = "") {
size_t tensor_size = ggml_nbytes(tensor);
printf("%s: n_dims = %d, name = %s, tensor_size=%zu, shape:[%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "], type = %s\n",
Expand Down Expand Up @@ -784,11 +730,12 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
const char * name = gguf_get_key(ctx, i);
const enum gguf_type type = gguf_get_kv_type(ctx, i);
const std::string type_name =
type == GGUF_TYPE_ARRAY
type == GGUF_TYPE_ARRAY || type == GGUF_TYPE_OBJ
? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(ctx, i)), gguf_get_arr_n(ctx, i))
: gguf_type_name(type);

std::string value = gguf_kv_to_str(ctx, i);
char * v = gguf_kv_to_c_str(ctx, i, name);
std::string value = v;
const size_t MAX_VALUE_LEN = 40;
if (value.size() > MAX_VALUE_LEN) {
value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str());
Expand Down
70 changes: 60 additions & 10 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -19281,8 +19281,9 @@ static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_INT64] = sizeof(int64_t),
[GGUF_TYPE_FLOAT64] = sizeof(double),
[GGUF_TYPE_ARRAY] = 0, // undefined
[GGUF_TYPE_OBJ] = 0, // undefined
};
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
static_assert(GGUF_TYPE_COUNT == 14, "GGUF_TYPE_COUNT != 14");

static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_UINT8] = "u8",
Expand All @@ -19298,8 +19299,9 @@ static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_UINT64] = "u64",
[GGUF_TYPE_INT64] = "i64",
[GGUF_TYPE_FLOAT64] = "f64",
[GGUF_TYPE_OBJ] = "obj",
};
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
static_assert(GGUF_TYPE_COUNT == 14, "GGUF_TYPE_COUNT != 14");

union gguf_value {
uint8_t uint8;
Expand Down Expand Up @@ -19522,6 +19524,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
case GGUF_TYPE_OBJ:
case GGUF_TYPE_ARRAY:
{
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
Expand Down Expand Up @@ -19568,7 +19571,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
}
} break;
case GGUF_TYPE_ARRAY:
case GGUF_TYPE_OBJ:
case GGUF_TYPE_ARRAY: break;
default: GGML_ASSERT(false && "invalid type"); break;
}
} break;
Expand Down Expand Up @@ -19775,7 +19779,7 @@ void gguf_free(struct gguf_context * ctx) {
}
}

if (kv->type == GGUF_TYPE_ARRAY) {
if (kv->type == GGUF_TYPE_ARRAY || kv->type == GGUF_TYPE_OBJ) {
if (kv->value.arr.data) {
if (kv->value.arr.type == GGUF_TYPE_STRING) {
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
Expand Down Expand Up @@ -19860,7 +19864,7 @@ enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int key_id) {

enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY || ctx->kv[key_id].type == GGUF_TYPE_OBJ);
return ctx->kv[key_id].value.arr.type;
}

Expand All @@ -19872,15 +19876,15 @@ const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id) {

const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY || ctx->kv[key_id].type == GGUF_TYPE_OBJ);
struct gguf_kv * kv = &ctx->kv[key_id];
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
return str->data;
}

int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY || ctx->kv[key_id].type == GGUF_TYPE_OBJ);
return ctx->kv[key_id].value.arr.n;
}

Expand Down Expand Up @@ -19959,6 +19963,7 @@ const char * gguf_get_val_str(const struct gguf_context * ctx, int key_id) {
const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_ARRAY);
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_OBJ);
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_STRING);
return &ctx->kv[key_id].value;
}
Expand Down Expand Up @@ -20103,6 +20108,10 @@ void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_ty
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
ctx->kv[idx].value.arr.type = type;
ctx->kv[idx].value.arr.n = n;
if (data == NULL) {
ctx->kv[idx].value.arr.data = NULL;
return;
}
ctx->kv[idx].value.arr.data = GGML_MALLOC(n*gguf_type_size(type));
memcpy(ctx->kv[idx].value.arr.data, data, n*gguf_type_size(type));
}
Expand All @@ -20121,6 +20130,38 @@ void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char **
}
}

void gguf_set_arr_obj(struct gguf_context * ctx, const char * key, int n) {
const int idx = gguf_get_or_add_key(ctx, key);

ctx->kv[idx].type = GGUF_TYPE_ARRAY;
ctx->kv[idx].value.arr.type = GGUF_TYPE_OBJ;
ctx->kv[idx].value.arr.n = n;
ctx->kv[idx].value.arr.data = NULL;
}

void gguf_set_arr_arr(struct gguf_context * ctx, const char * key, int n) {
const int idx = gguf_get_or_add_key(ctx, key);

ctx->kv[idx].type = GGUF_TYPE_ARRAY;
ctx->kv[idx].value.arr.type = GGUF_TYPE_ARRAY;
ctx->kv[idx].value.arr.n = n;
ctx->kv[idx].value.arr.data = NULL;
}

void gguf_set_obj_str(struct gguf_context * ctx, const char * key, const char ** data, int n) {
const int idx = gguf_get_or_add_key(ctx, key);

ctx->kv[idx].type = GGUF_TYPE_OBJ;
ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING;
ctx->kv[idx].value.arr.n = n;
ctx->kv[idx].value.arr.data = GGML_MALLOC(n*sizeof(struct gguf_str));
for (int i = 0; i < n; i++) {
struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
str->n = strlen(data[i]);
str->data = strdup(data[i]);
}
}

// set or add KV pairs from another context
void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
for (uint32_t i = 0; i < src->header.n_kv; i++) {
Expand All @@ -20137,6 +20178,15 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break;
case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
case GGUF_TYPE_OBJ:
{
const char ** data = malloc(src->kv[i].value.arr.n*sizeof(char *));
for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
}
gguf_set_obj_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
free((void *)data);
} break;
case GGUF_TYPE_ARRAY:
{
if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) {
Expand All @@ -20146,8 +20196,6 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
}
gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
GGML_FREE((void *)data);
} else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
GGML_ASSERT(false && "nested arrays not supported");
} else {
gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
}
Expand Down Expand Up @@ -20301,6 +20349,7 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break;
case GGUF_TYPE_OBJ:
case GGUF_TYPE_ARRAY:
{
gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type));
Expand All @@ -20327,7 +20376,8 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
gguf_bwrite_str(buf, &((struct gguf_str *) kv->value.arr.data)[j]);
}
} break;
case GGUF_TYPE_ARRAY:
case GGUF_TYPE_OBJ:
case GGUF_TYPE_ARRAY: break;
default: GGML_ASSERT(false && "invalid type"); break;
}
} break;
Expand Down
4 changes: 4 additions & 0 deletions ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -2141,6 +2141,7 @@ extern "C" {
GGUF_TYPE_UINT64 = 10,
GGUF_TYPE_INT64 = 11,
GGUF_TYPE_FLOAT64 = 12,
GGUF_TYPE_OBJ = 13,
GGUF_TYPE_COUNT, // marks the end of the enum
};

Expand Down Expand Up @@ -2212,6 +2213,9 @@ extern "C" {
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
GGML_API void gguf_set_arr_obj (struct gguf_context * ctx, const char * key, int n);
GGML_API void gguf_set_arr_arr (struct gguf_context * ctx, const char * key, int n);
GGML_API void gguf_set_obj_str (struct gguf_context * ctx, const char * key, const char ** data, int n);

// set or add KV pairs from another context
GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
Expand Down
59 changes: 59 additions & 0 deletions gguf-py/gguf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sys
from enum import Enum, IntEnum, auto
from typing import Any
import numpy as np

#
# constants
Expand Down Expand Up @@ -550,6 +551,64 @@ class GGUFValueType(IntEnum):
UINT64 = 10
INT64 = 11
FLOAT64 = 12
OBJ = 13

@staticmethod
def get_type_ex(val: Any) -> GGUFValueType:
if isinstance(val, (str, bytes, bytearray)):
return GGUFValueType.STRING
elif isinstance(val, list):
return GGUFValueType.ARRAY
elif isinstance(val, np.float32):
return GGUFValueType.FLOAT32
elif isinstance(val, np.float64):
return GGUFValueType.FLOAT64
elif isinstance(val, float):
return GGUFValueType.FLOAT32
elif isinstance(val, bool):
return GGUFValueType.BOOL
elif isinstance(val, np.uint8):
return GGUFValueType.UINT8
elif isinstance(val, np.uint16):
return GGUFValueType.UINT16
elif isinstance(val, np.uint32):
return GGUFValueType.UINT32
elif isinstance(val, np.uint64):
return GGUFValueType.UINT64
elif isinstance(val, np.int8):
return GGUFValueType.INT8
elif isinstance(val, np.int16):
return GGUFValueType.INT16
elif isinstance(val, np.int32):
return GGUFValueType.INT32
elif isinstance(val, np.int64):
return GGUFValueType.INT64
elif isinstance(val, int):
if val >=0 and val <= np.iinfo(np.uint8).max:
return GGUFValueType.UINT8
elif val >=0 and val <= np.iinfo(np.uint16).max:
return GGUFValueType.UINT16
elif val >=0 and val <= np.iinfo(np.uint32).max:
return GGUFValueType.UINT32
elif val >=0 and val <= np.iinfo(np.uint64).max:
return GGUFValueType.UINT64
elif val >=np.iinfo(np.int8).min and val <= np.iinfo(np.int8).max:
return GGUFValueType.INT8
elif val >=np.iinfo(np.int16).min and val <= np.iinfo(np.int16).max:
return GGUFValueType.INT16
elif val >=np.iinfo(np.int32).min and val <= np.iinfo(np.int32).max:
return GGUFValueType.INT32
elif val >=np.iinfo(np.int64).min and val <= np.iinfo(np.int64).max:
return GGUFValueType.INT64
else:
print("The integer exceed limit:", val)
sys.exit()
elif isinstance(val, dict):
return GGUFValueType.OBJ
# TODO: need help with 64-bit types in Python
else:
print("Unknown type:", type(val))
sys.exit()

@staticmethod
def get_type(val: Any) -> GGUFValueType:
Expand Down
Loading

0 comments on commit fe25927

Please sign in to comment.