77
88#include < Metal/Metal.h>
99
10+ #include < stdatomic.h>
11+
1012#ifndef TARGET_OS_VISION
1113#define TARGET_OS_VISION 0
1214#endif
2224// overload of MTLGPUFamilyMetal3 (not available in some environments)
2325static const NSInteger MTLGPUFamilyMetal3_GGML = 5001 ;
2426
27+ // virtual address for GPU memory allocations
28+ static atomic_uintptr_t g_addr_device = 0x000000400ULL ;
29+
2530#if !GGML_METAL_EMBED_LIBRARY
2631// Here to assist with NSBundle Path Hack
2732@interface GGMLMetalClass : NSObject
@@ -827,7 +832,7 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
827832};
828833
829834struct ggml_metal_buffer {
830- void * all_data; // TODO: https://github.com/ggml-org/llama.cpp/pull/15985
835+ void * all_data;
831836 size_t all_size;
832837
833838 // if false, the Metal buffer data is allocated in private GPU memory and is not shared with the host
@@ -965,14 +970,15 @@ ggml_metal_buffer_t ggml_metal_buffer_init(ggml_metal_device_t dev, size_t size,
965970 if (shared) {
966971 res->all_data = ggml_metal_host_malloc (size_aligned);
967972 res->is_shared = true ;
968- res->owned = true ;
969973 } else {
970- // dummy, non-NULL value - we'll populate this after creating the Metal buffer below
971- res->all_data = (void *) 0x000000400ULL ;
974+ // use virtual address from g_addr_device counter
975+ res->all_data = (void *) atomic_fetch_add_explicit (&g_addr_device, size_aligned, memory_order_relaxed) ;
972976 res->is_shared = false ;
973977 }
974978 res->all_size = size_aligned;
975979
980+ res->owned = true ;
981+
976982 res->device = ggml_metal_device_get_obj (dev);
977983 res->queue = ggml_metal_device_get_queue (dev);
978984
@@ -983,15 +989,13 @@ ggml_metal_buffer_t ggml_metal_buffer_init(ggml_metal_device_t dev, size_t size,
983989 res->buffers [0 ].metal = nil ;
984990
985991 if (size_aligned > 0 ) {
986- if (props_dev->use_shared_buffers &&shared) {
992+ if (props_dev->use_shared_buffers && shared) {
987993 res->buffers [0 ].metal = [res->device newBufferWithBytesNoCopy: res->all_data
988994 length: size_aligned
989995 options: MTLResourceStorageModeShared
990996 deallocator: nil ];
991997 } else {
992998 res->buffers [0 ].metal = [res->device newBufferWithLength: size_aligned options: MTLResourceStorageModePrivate ];
993-
994- res->all_data = (void *) (res->buffers [0 ].metal .gpuAddress );
995999 }
9961000 }
9971001
@@ -1139,7 +1143,7 @@ bool ggml_metal_buffer_is_shared(ggml_metal_buffer_t buf) {
11391143
11401144void ggml_metal_buffer_memset_tensor (ggml_metal_buffer_t buf, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
11411145 if (buf->is_shared ) {
1142- memset ((char *)tensor->data + offset, value, size);
1146+ memset ((char *) tensor->data + offset, value, size);
11431147 return ;
11441148 }
11451149
@@ -1168,7 +1172,7 @@ void ggml_metal_buffer_memset_tensor(ggml_metal_buffer_t buf, struct ggml_tensor
11681172
11691173void ggml_metal_buffer_set_tensor (ggml_metal_buffer_t buf, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
11701174 if (buf->is_shared ) {
1171- memcpy ((char *)tensor->data + offset, data, size);
1175+ memcpy ((char *) tensor->data + offset, data, size);
11721176 return ;
11731177 }
11741178
@@ -1223,7 +1227,7 @@ void ggml_metal_buffer_set_tensor(ggml_metal_buffer_t buf, struct ggml_tensor *
12231227
12241228void ggml_metal_buffer_get_tensor (ggml_metal_buffer_t buf, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
12251229 if (buf->is_shared ) {
1226- memcpy (data, (const char *)tensor->data + offset, size);
1230+ memcpy (data, (const char *) tensor->data + offset, size);
12271231 return ;
12281232 }
12291233
0 commit comments