Skip to content

Allow s390x to load little endian models unmodified #11234

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1418,8 +1418,9 @@ struct llama_model * common_load_model_from_url(
int n_split = 0;
{
struct gguf_init_params gguf_params = {
/*.no_alloc = */ true,
/*.ctx = */ NULL,
/*.no_alloc = */ true,
/*.ctx = */ NULL,
/*.allow_byteswapping = */ true,
};
auto * ctx_gguf = gguf_init_from_file(local_path.c_str(), gguf_params);
if (!ctx_gguf) {
Expand Down Expand Up @@ -2063,8 +2064,9 @@ static common_control_vector_data common_control_vector_load_one(const common_co

ggml_context * ctx = nullptr;
struct gguf_init_params meta_gguf_params = {
/* .no_alloc = */ false,
/* .ctx = */ &ctx,
/* .no_alloc = */ false,
/* .ctx = */ &ctx,
/* .allow_byteswapping = */ true,
};
struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params);
if (!ctx_gguf) {
Expand Down
5 changes: 3 additions & 2 deletions examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,8 +533,9 @@ static void load_vocab(const char * filename, const Config * config, struct my_l
struct ggml_context * ctx_data = NULL;

struct gguf_init_params params = {
/*.no_alloc = */ false,
/*.ctx = */ &ctx_data,
/*.no_alloc = */ false,
/*.ctx = */ &ctx_data,
/*.allow_byteswapping = */ true,
};

struct gguf_context * ctx = gguf_init_from_file(filename, params);
Expand Down
5 changes: 3 additions & 2 deletions examples/export-lora/export-lora.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ static std::string ggml_ne_string(const ggml_tensor * t) {

static struct gguf_context * load_gguf(std::string & fname, struct ggml_context ** ctx_ggml) {
struct gguf_init_params params = {
/*.no_alloc = */ true,
/*.ctx = */ ctx_ggml,
/*.no_alloc = */ true,
/*.ctx = */ ctx_ggml,
/*.allow_byteswapping = */ true,
};
struct gguf_context * ctx_gguf = gguf_init_from_file(fname.c_str(), params);
if (!ctx_gguf) {
Expand Down
5 changes: 3 additions & 2 deletions examples/gguf-hash/gguf-hash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,8 +288,9 @@ static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
struct ggml_context * ctx_data = NULL;

struct gguf_init_params params = {
/*.no_alloc = */ false,
/*.ctx = */ &ctx_data,
/*.no_alloc = */ false,
/*.ctx = */ &ctx_data,
/*.allow_byteswapping = */ true,
};

// xxh64 init
Expand Down
32 changes: 26 additions & 6 deletions examples/gguf-split/gguf-split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,14 +328,20 @@ struct split_strategy {
const char * t_name = gguf_get_tensor_name(ctx_out, i);
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
auto n_bytes = ggml_nbytes(t);
auto n_elements = ggml_nelements(t) / ggml_blck_size(t->type);
read_buf.resize(n_bytes);

// calculate offset
auto i_tensor_in = gguf_find_tensor(ctx_gguf, t_name); // idx of tensor in the input file
auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor_in);

ggml_byteswap_t byteswap_func = nullptr;
if (gguf_needs_byteswap(ctx_gguf)) {
byteswap_func = ggml_get_type_traits(t->type)->byteswap;
}

// copy tensor from input to output file
copy_file_to_file(f_input, fout, offset, n_bytes);
copy_file_to_file(f_input, fout, offset, n_bytes, n_elements, byteswap_func);
zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
}

Expand All @@ -346,13 +352,18 @@ struct split_strategy {
}
}

void copy_file_to_file(std::ifstream & f_in, std::ofstream & f_out, const size_t in_offset, const size_t len) {
void copy_file_to_file(std::ifstream & f_in, std::ofstream & f_out, const size_t in_offset, const size_t len, const size_t elements, ggml_byteswap_t byteswap_func) {
// TODO: detect OS and use copy_file_range() here for better performance
if (read_buf.size() < len) {
read_buf.resize(len);
}
f_in.seekg(in_offset);
f_in.read((char *)read_buf.data(), len);

if (byteswap_func != nullptr) {
byteswap_func(read_buf.data(), elements);
}

f_out.write((const char *)read_buf.data(), len);
}
};
Expand All @@ -361,8 +372,9 @@ static void gguf_split(const split_params & split_params) {
struct ggml_context * ctx_meta = NULL;

struct gguf_init_params params = {
/*.no_alloc = */ true,
/*.ctx = */ &ctx_meta,
/*.no_alloc = */ true,
/*.ctx = */ &ctx_meta,
/*.allow_byteswapping = */ true,
};

std::ifstream f_input(split_params.input.c_str(), std::ios::binary);
Expand Down Expand Up @@ -426,8 +438,9 @@ static void gguf_merge(const split_params & split_params) {
struct ggml_context * ctx_meta = NULL;

struct gguf_init_params params = {
/*.no_alloc = */ true,
/*.ctx = */ &ctx_meta,
/*.no_alloc = */ true,
/*.ctx = */ &ctx_meta,
/*.allow_byteswapping = */ true,
};

if (i_split > 0) {
Expand Down Expand Up @@ -541,6 +554,13 @@ static void gguf_merge(const split_params & split_params) {
f_input.seekg(offset);
f_input.read((char *)read_data.data(), n_bytes);

if (gguf_needs_byteswap(ctx_gguf)) {
auto byteswap = ggml_get_type_traits(t->type)->byteswap;
if (byteswap != nullptr) {
byteswap(read_data.data(), ggml_nelements(t) / ggml_blck_size(t->type));
}
}

// write tensor data + padding
fout.write((const char *)read_data.data(), n_bytes);
zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
Expand Down
10 changes: 6 additions & 4 deletions examples/gguf/gguf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,9 @@ static bool gguf_ex_write(const std::string & fname) {
// just read tensor info
static bool gguf_ex_read_0(const std::string & fname) {
struct gguf_init_params params = {
/*.no_alloc = */ false,
/*.ctx = */ NULL,
/*.no_alloc = */ false,
/*.ctx = */ NULL,
/*.allow_byteswapping = */ true,
};

struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
Expand Down Expand Up @@ -151,8 +152,9 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
struct ggml_context * ctx_data = NULL;

struct gguf_init_params params = {
/*.no_alloc = */ false,
/*.ctx = */ &ctx_data,
/*.no_alloc = */ false,
/*.ctx = */ &ctx_data,
/*.allow_byteswapping = */ true,
};

struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
Expand Down
5 changes: 3 additions & 2 deletions examples/llava/clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1122,8 +1122,9 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
struct ggml_context * meta = NULL;

struct gguf_init_params params = {
/*.no_alloc = */ true,
/*.ctx = */ &meta,
/*.no_alloc = */ true,
/*.ctx = */ &meta,
/*.allow_byteswapping = */ true,
};

struct gguf_context * ctx = gguf_init_from_file(fname, params);
Expand Down
2 changes: 2 additions & 0 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -2144,6 +2144,7 @@ extern "C" {
#endif
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
typedef void (*ggml_byteswap_t) ( void * GGML_RESTRICT buffer, size_t elements);

struct ggml_type_traits {
const char * type_name;
Expand All @@ -2153,6 +2154,7 @@ extern "C" {
bool is_quantized;
ggml_to_float_t to_float;
ggml_from_float_t from_float_ref;
ggml_byteswap_t byteswap;
};

GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
Expand Down
5 changes: 5 additions & 0 deletions ggml/include/gguf.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ extern "C" {

// if not NULL, create a ggml_context and allocate the tensor data in it
struct ggml_context ** ctx;

bool allow_byteswapping;
};

GGML_API struct gguf_context * gguf_init_empty(void);
Expand Down Expand Up @@ -197,6 +199,9 @@ extern "C" {
// writes the meta data to pointer "data"
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);

// returns true if gguf file needs byteswapping when reading. byteswapping for writing not implemented
GGML_API bool gguf_needs_byteswap(const struct gguf_context * ctx);

#ifdef __cplusplus
}
#endif
43 changes: 43 additions & 0 deletions ggml/src/ggml-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@
#include <immintrin.h>
#endif

#if defined(__gnu_linux__)
#include <byteswap.h>
#else // defined(__gnu_linux__)
#define bswap_16(x) (x)
#define bswap_32(x) (x)
#define bswap_64(x) (x)
#endif // defined(__gnu_linux__)

#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -553,12 +561,47 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
#define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
#define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)

// endianness conversion
static inline void ggml_bswap16(void * value) {
*((uint16_t*)value) = bswap_16(*((uint16_t*)value));
}

static inline void ggml_bswap32(void * value) {
*((uint32_t*)value) = bswap_32(*((uint32_t*)value));
}

static inline void ggml_bswap64(void * value) {
*((uint64_t*)value) = bswap_64(*((uint64_t*)value));
}

#ifdef __cplusplus
}
#endif

#ifdef __cplusplus
#include <vector>
#include <type_traits>

template <typename T, std::enable_if_t<sizeof(T) == 1, int> = 0>
static inline void ggml_bswap(T * value)
{
GGML_UNUSED(value);
}

template <typename T, std::enable_if_t<sizeof(T) == 2, int> = 0>
static inline void ggml_bswap(T * value) {
ggml_bswap16(value);
}

template <typename T, std::enable_if_t<sizeof(T) == 4, int> = 0>
static inline void ggml_bswap(T * value) {
ggml_bswap32(value);
}

template <typename T, std::enable_if_t<sizeof(T) == 8, int> = 0>
static inline void ggml_bswap(T * value) {
ggml_bswap64(value);
}

// expose GGUF internals for test code
GGML_API size_t gguf_type_size(enum gguf_type type);
Expand Down
Loading
Loading