Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow s390x to load little endian models unmodified #11234

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
22 changes: 20 additions & 2 deletions examples/gguf-split/gguf-split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,14 +328,20 @@ struct split_strategy {
const char * t_name = gguf_get_tensor_name(ctx_out, i);
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
auto n_bytes = ggml_nbytes(t);
auto n_elements = ggml_nelements(t) / ggml_blck_size(t->type);
read_buf.resize(n_bytes);

// calculate offset
auto i_tensor_in = gguf_find_tensor(ctx_gguf, t_name); // idx of tensor in the input file
auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor_in);

ggml_byteswap_t byteswap_func = nullptr;
if (gguf_needs_byteswap(ctx_gguf)) {
byteswap_func = ggml_get_type_traits(t->type)->byteswap;
}

// copy tensor from input to output file
copy_file_to_file(f_input, fout, offset, n_bytes);
copy_file_to_file(f_input, fout, offset, n_bytes, n_elements, byteswap_func);
zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
}

Expand All @@ -346,13 +352,18 @@ struct split_strategy {
}
}

void copy_file_to_file(std::ifstream & f_in, std::ofstream & f_out, const size_t in_offset, const size_t len) {
void copy_file_to_file(std::ifstream & f_in, std::ofstream & f_out, const size_t in_offset, const size_t len, const size_t elements, ggml_byteswap_t byteswap_func) {
// TODO: detect OS and use copy_file_range() here for better performance
if (read_buf.size() < len) {
read_buf.resize(len);
}
f_in.seekg(in_offset);
f_in.read((char *)read_buf.data(), len);

if (byteswap_func != nullptr) {
byteswap_func(read_buf.data(), elements);
}

f_out.write((const char *)read_buf.data(), len);
}
};
Expand Down Expand Up @@ -541,6 +552,13 @@ static void gguf_merge(const split_params & split_params) {
f_input.seekg(offset);
f_input.read((char *)read_data.data(), n_bytes);

if (gguf_needs_byteswap(ctx_gguf)) {
auto byteswap = ggml_get_type_traits(t->type)->byteswap;
if (byteswap != nullptr) {
byteswap(read_data.data(), ggml_nelements(t) / ggml_blck_size(t->type));
}
}

// write tensor data + padding
fout.write((const char *)read_data.data(), n_bytes);
zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
Expand Down
2 changes: 2 additions & 0 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -2144,6 +2144,7 @@ extern "C" {
#endif
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
typedef void (*ggml_byteswap_t) ( void * GGML_RESTRICT buffer, size_t elements);

struct ggml_type_traits {
const char * type_name;
Expand All @@ -2153,6 +2154,7 @@ extern "C" {
bool is_quantized;
ggml_to_float_t to_float;
ggml_from_float_t from_float_ref;
ggml_byteswap_t byteswap;
};

GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
Expand Down
3 changes: 3 additions & 0 deletions ggml/include/gguf.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,9 @@ extern "C" {
// writes the meta data to pointer "data"
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);

// returns true if gguf file needs byteswapping when reading. byteswapping for writing not implemented
GGML_API bool gguf_needs_byteswap(const struct gguf_context * ctx);

#ifdef __cplusplus
}
#endif
43 changes: 43 additions & 0 deletions ggml/src/ggml-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@
#include <immintrin.h>
#endif

#if defined(__gnu_linux__)
#include <byteswap.h>
#else // defined(__gnu_linux__)
#define bswap_16(x) (x)
#define bswap_32(x) (x)
#define bswap_64(x) (x)
#endif // defined(__gnu_linux__)

#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -553,12 +561,47 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
#define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
#define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)

// endianness conversion
static inline void ggml_bswap16(void * value) {
*((uint16_t*)value) = bswap_16(*((uint16_t*)value));
}

static inline void ggml_bswap32(void * value) {
*((uint32_t*)value) = bswap_32(*((uint32_t*)value));
}

static inline void ggml_bswap64(void * value) {
*((uint64_t*)value) = bswap_64(*((uint64_t*)value));
}

#ifdef __cplusplus
}
#endif

#ifdef __cplusplus
#include <vector>
#include <type_traits>

template <typename T, std::enable_if_t<sizeof(T) == 1, int> = 0>
static inline void ggml_bswap(T * value)
{
GGML_UNUSED(value);
}

template <typename T, std::enable_if_t<sizeof(T) == 2, int> = 0>
static inline void ggml_bswap(T * value) {
ggml_bswap16(value);
}

template <typename T, std::enable_if_t<sizeof(T) == 4, int> = 0>
static inline void ggml_bswap(T * value) {
ggml_bswap32(value);
}

template <typename T, std::enable_if_t<sizeof(T) == 8, int> = 0>
static inline void ggml_bswap(T * value) {
ggml_bswap64(value);
}

// expose GGUF internals for test code
GGML_API size_t gguf_type_size(enum gguf_type type);
Expand Down
Loading
Loading