Skip to content

Commit

Permalink
ggml : fix q4xx mat mul, increase ggml_aligned_malloc alignment (#10167)
Browse files Browse the repository at this point in the history
  • Loading branch information
slaren authored Nov 4, 2024
1 parent 9e0ecfb commit 401558b
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
5 changes: 2 additions & 3 deletions ggml/src/ggml-cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
.nrows = 1,
},
[GGML_TYPE_Q8_0] = {
.from_float_to_mat = quantize_mat_q8_0,
.vec_dot = ggml_vec_dot_q8_0_q8_0,
.vec_dot_type = GGML_TYPE_Q8_0,
#if defined (__ARM_FEATURE_MATMUL_INT8)
Expand Down Expand Up @@ -13692,9 +13693,7 @@ void ggml_cpu_init(void) {
uint16_t u16;
ggml_fp16_t fp16;
} u = {i};
// FIXME: this table is used in conversion functions outside of compute
// current code depends on ggml_init initializing this table
float f = ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
float f = GGML_FP16_TO_FP32(u.fp16);
ggml_table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f));
}
Expand Down
9 changes: 6 additions & 3 deletions ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -220,17 +220,20 @@ void ggml_log_callback_default(enum ggml_log_level level, const char * text, voi


void * ggml_aligned_malloc(size_t size) {
const int alignment = 64;

#if defined(_MSC_VER) || defined(__MINGW32__)
return _aligned_malloc(size, TENSOR_ALIGNMENT);
return _aligned_malloc(size, alignment);
#else
if (size == 0) {
GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
return NULL;
}
void * aligned_memory = NULL;
#ifdef GGML_USE_CPU_HBM
int result = hbw_posix_memalign(&aligned_memory, TENSOR_ALIGNMENT, size);
int result = hbw_posix_memalign(&aligned_memory, alignment, size);
#elif TARGET_OS_OSX
GGML_UNUSED(alignment);
kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
int result = EFAULT;
switch (alloc_status) {
Expand All @@ -248,7 +251,7 @@ void * ggml_aligned_malloc(size_t size) {
break;
}
#else
int result = posix_memalign(&aligned_memory, TENSOR_ALIGNMENT, size);
int result = posix_memalign(&aligned_memory, alignment, size);
#endif
if (result != 0) {
// Handle allocation failure
Expand Down

0 comments on commit 401558b

Please sign in to comment.