Skip to content

Commit d98608e

Browse files
ggerganovfm240223
authored andcommitted
metal : avoid using Metal's gpuAddress property (ggml-org#16576)
* metal : avoid using Metal's gpuAddress property * metal : fix rope kernels buffer check
1 parent 7756d7e commit d98608e

File tree

4 files changed

+20
-14
lines changed

4 files changed

+20
-14
lines changed

ggml/src/ggml-metal/ggml-metal-device.m

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
#include <Metal/Metal.h>
99

10+
#include <stdatomic.h>
11+
1012
#ifndef TARGET_OS_VISION
1113
#define TARGET_OS_VISION 0
1214
#endif
@@ -22,6 +24,9 @@
2224
// overload of MTLGPUFamilyMetal3 (not available in some environments)
2325
static const NSInteger MTLGPUFamilyMetal3_GGML = 5001;
2426

27+
// virtual address for GPU memory allocations
28+
static atomic_uintptr_t g_addr_device = 0x000000400ULL;
29+
2530
#if !GGML_METAL_EMBED_LIBRARY
2631
// Here to assist with NSBundle Path Hack
2732
@interface GGMLMetalClass : NSObject
@@ -827,7 +832,7 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
827832
};
828833

829834
struct ggml_metal_buffer {
830-
void * all_data; // TODO: https://github.com/ggml-org/llama.cpp/pull/15985
835+
void * all_data;
831836
size_t all_size;
832837

833838
// if false, the Metal buffer data is allocated in private GPU memory and is not shared with the host
@@ -965,14 +970,15 @@ ggml_metal_buffer_t ggml_metal_buffer_init(ggml_metal_device_t dev, size_t size,
965970
if (shared) {
966971
res->all_data = ggml_metal_host_malloc(size_aligned);
967972
res->is_shared = true;
968-
res->owned = true;
969973
} else {
970-
// dummy, non-NULL value - we'll populate this after creating the Metal buffer below
971-
res->all_data = (void *) 0x000000400ULL;
974+
// use virtual address from g_addr_device counter
975+
res->all_data = (void *) atomic_fetch_add_explicit(&g_addr_device, size_aligned, memory_order_relaxed);
972976
res->is_shared = false;
973977
}
974978
res->all_size = size_aligned;
975979

980+
res->owned = true;
981+
976982
res->device = ggml_metal_device_get_obj(dev);
977983
res->queue = ggml_metal_device_get_queue(dev);
978984

@@ -983,15 +989,13 @@ ggml_metal_buffer_t ggml_metal_buffer_init(ggml_metal_device_t dev, size_t size,
983989
res->buffers[0].metal = nil;
984990

985991
if (size_aligned > 0) {
986-
if (props_dev->use_shared_buffers &&shared) {
992+
if (props_dev->use_shared_buffers && shared) {
987993
res->buffers[0].metal = [res->device newBufferWithBytesNoCopy:res->all_data
988994
length:size_aligned
989995
options:MTLResourceStorageModeShared
990996
deallocator:nil];
991997
} else {
992998
res->buffers[0].metal = [res->device newBufferWithLength:size_aligned options:MTLResourceStorageModePrivate];
993-
994-
res->all_data = (void *) (res->buffers[0].metal.gpuAddress);
995999
}
9961000
}
9971001

@@ -1139,7 +1143,7 @@ bool ggml_metal_buffer_is_shared(ggml_metal_buffer_t buf) {
11391143

11401144
void ggml_metal_buffer_memset_tensor(ggml_metal_buffer_t buf, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
11411145
if (buf->is_shared) {
1142-
memset((char *)tensor->data + offset, value, size);
1146+
memset((char *) tensor->data + offset, value, size);
11431147
return;
11441148
}
11451149

@@ -1168,7 +1172,7 @@ void ggml_metal_buffer_memset_tensor(ggml_metal_buffer_t buf, struct ggml_tensor
11681172

11691173
void ggml_metal_buffer_set_tensor(ggml_metal_buffer_t buf, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
11701174
if (buf->is_shared) {
1171-
memcpy((char *)tensor->data + offset, data, size);
1175+
memcpy((char *) tensor->data + offset, data, size);
11721176
return;
11731177
}
11741178

@@ -1223,7 +1227,7 @@ void ggml_metal_buffer_set_tensor(ggml_metal_buffer_t buf, struct ggml_tensor *
12231227

12241228
void ggml_metal_buffer_get_tensor(ggml_metal_buffer_t buf, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
12251229
if (buf->is_shared) {
1226-
memcpy(data, (const char *)tensor->data + offset, size);
1230+
memcpy(data, (const char *) tensor->data + offset, size);
12271231
return;
12281232
}
12291233

ggml/src/ggml-metal/ggml-metal-impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ typedef struct {
251251
int32_t sect_1;
252252
int32_t sect_2;
253253
int32_t sect_3;
254+
bool src2;
254255
} ggml_metal_kargs_rope;
255256

256257
typedef struct {

ggml/src/ggml-metal/ggml-metal-ops.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2969,6 +2969,7 @@ int ggml_metal_op_rope(ggml_metal_op_t ctx, int idx) {
29692969
/* sect_1 =*/ sect_1,
29702970
/* sect_2 =*/ sect_2,
29712971
/* sect_3 =*/ sect_3,
2972+
/* src2 =*/ op->src[2] != nullptr,
29722973
};
29732974

29742975
ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_rope(lib, op);

ggml/src/ggml-metal/ggml-metal.metal

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3748,7 +3748,7 @@ kernel void kernel_rope_norm(
37483748

37493749
const float theta = theta_base * pow(args.freq_base, inv_ndims*i0);
37503750

3751-
const float freq_factor = src2 != src0 ? ((device const float *) src2)[ic] : 1.0f;
3751+
const float freq_factor = args.src2 ? ((device const float *) src2)[ic] : 1.0f;
37523752

37533753
rope_yarn(theta/freq_factor, args.freq_scale, corr_dims, i0, args.ext_factor, args.attn_factor, &cos_theta, &sin_theta);
37543754

@@ -3801,7 +3801,7 @@ kernel void kernel_rope_neox(
38013801

38023802
const float theta = theta_base * pow(args.freq_base, inv_ndims*i0);
38033803

3804-
const float freq_factor = src2 != src0 ? ((device const float *) src2)[ic] : 1.0f;
3804+
const float freq_factor = args.src2 ? ((device const float *) src2)[ic] : 1.0f;
38053805

38063806
rope_yarn(theta/freq_factor, args.freq_scale, corr_dims, i0, args.ext_factor, args.attn_factor, &cos_theta, &sin_theta);
38073807

@@ -3872,7 +3872,7 @@ kernel void kernel_rope_multi(
38723872

38733873
const float theta = theta_base * pow(args.freq_base, inv_ndims*i0);
38743874

3875-
const float freq_factor = src2 != src0 ? ((device const float *) src2)[ic] : 1.0f;
3875+
const float freq_factor = args.src2 ? ((device const float *) src2)[ic] : 1.0f;
38763876

38773877
rope_yarn(theta/freq_factor, args.freq_scale, corr_dims, i0, args.ext_factor, args.attn_factor, &cos_theta, &sin_theta);
38783878

@@ -3939,7 +3939,7 @@ kernel void kernel_rope_vision(
39393939
const float theta = theta_base * pow(args.freq_base, 2.0f * inv_ndims * p);
39403940
// end of mrope
39413941

3942-
const float freq_factor = src2 != src0 ? ((device const float *) src2)[ic] : 1.0f;
3942+
const float freq_factor = args.src2 ? ((device const float *) src2)[ic] : 1.0f;
39433943

39443944
rope_yarn(theta/freq_factor, args.freq_scale, corr_dims, i0, args.ext_factor, args.attn_factor, &cos_theta, &sin_theta);
39453945

0 commit comments

Comments
 (0)