Skip to content

Commit c83ad6d

Browse files
ggml-backend : add device and backend reg interfaces (#9707)
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
1 parent a39ab21 commit c83ad6d

28 files changed

+1769
-1263
lines changed

.github/workflows/bench.yml.disabled

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ on:
2727
push:
2828
branches:
2929
- master
30-
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
30+
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
3131
pull_request_target:
3232
types: [opened, synchronize, reopened]
33-
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
33+
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
3434
schedule:
3535
- cron: '04 2 * * *'
3636

Makefile

+3-2
Original file line numberDiff line numberDiff line change
@@ -1054,10 +1054,11 @@ ggml/src/ggml-alloc.o: \
10541054
$(CC) $(CFLAGS) -c $< -o $@
10551055

10561056
ggml/src/ggml-backend.o: \
1057-
ggml/src/ggml-backend.c \
1057+
ggml/src/ggml-backend.cpp \
1058+
ggml/src/ggml-backend-impl.h \
10581059
ggml/include/ggml.h \
10591060
ggml/include/ggml-backend.h
1060-
$(CC) $(CFLAGS) -c $< -o $@
1061+
$(CXX) $(CXXFLAGS) -c $< -o $@
10611062

10621063
ggml/src/ggml-quants.o: \
10631064
ggml/src/ggml-quants.c \

Package.swift

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ var sources = [
1111
"src/unicode-data.cpp",
1212
"ggml/src/ggml.c",
1313
"ggml/src/ggml-alloc.c",
14-
"ggml/src/ggml-backend.c",
14+
"ggml/src/ggml-backend.cpp",
1515
"ggml/src/ggml-quants.c",
1616
"ggml/src/ggml-aarch64.c",
1717
]

ggml/include/ggml-backend.h

+144-59
Large diffs are not rendered by default.

ggml/include/ggml-blas.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@ extern "C" {
99
#endif
1010

1111
// backend API
12-
GGML_API GGML_CALL ggml_backend_t ggml_backend_blas_init(void);
12+
GGML_API ggml_backend_t ggml_backend_blas_init(void);
1313

14-
GGML_API GGML_CALL bool ggml_backend_is_blas(ggml_backend_t backend);
14+
GGML_API bool ggml_backend_is_blas(ggml_backend_t backend);
1515

1616
// number of threads used for conversion to float
1717
// for openblas and blis, this will also set the number of threads used for blas operations
18-
GGML_API GGML_CALL void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
18+
GGML_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
1919

2020

2121
#ifdef __cplusplus

ggml/include/ggml-cann.h

+9-9
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ extern "C" {
4444
* @param device The index of the device to initialize.
4545
* @return A pointer to the initialized backend instance, or nullptr on failure.
4646
*/
47-
GGML_API GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device);
47+
GGML_API ggml_backend_t ggml_backend_cann_init(int32_t device);
4848

4949
/**
5050
* @brief Checks if a given backend is a CANN backend.
@@ -55,7 +55,7 @@ GGML_API GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device);
5555
* @param backend The backend instance to check.
5656
* @return True if the backend is a CANN backend, false otherwise.
5757
*/
58-
GGML_API GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend);
58+
GGML_API bool ggml_backend_is_cann(ggml_backend_t backend);
5959

6060
/**
6161
* @brief Retrieves the CANN buffer type for a specified device.
@@ -67,7 +67,7 @@ GGML_API GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend);
6767
* @return A pointer to the buffer type interface for the specified device, or
6868
* nullptr if the device index is out of range.
6969
*/
70-
GGML_API GGML_CALL ggml_backend_buffer_type_t
70+
GGML_API ggml_backend_buffer_type_t
7171
ggml_backend_cann_buffer_type(int32_t device);
7272

7373
/**
@@ -78,14 +78,14 @@ ggml_backend_cann_buffer_type(int32_t device);
7878
*
7979
* @return The number of CANN devices available.
8080
*/
81-
GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
81+
GGML_API int32_t ggml_backend_cann_get_device_count(void);
8282

8383
/**
8484
* @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
8585
*
8686
* @return A pointer to the host buffer type interface.
8787
*/
88-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
88+
GGML_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
8989

9090
/**
9191
* @brief Retrieves the description of a specific CANN device.
@@ -97,7 +97,7 @@ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type
9797
* @param description Pointer to a buffer where the description will be written.
9898
* @param description_size Size of the description buffer.
9999
*/
100-
GGML_API GGML_CALL void ggml_backend_cann_get_device_description(
100+
GGML_API void ggml_backend_cann_get_device_description(
101101
int32_t device, char* description, size_t description_size);
102102

103103
/**
@@ -112,9 +112,9 @@ GGML_API GGML_CALL void ggml_backend_cann_get_device_description(
112112
* @param total Pointer to a variable where the total memory size will be
113113
* stored.
114114
*/
115-
GGML_API GGML_CALL void ggml_backend_cann_get_device_memory(int32_t device,
116-
size_t* free,
117-
size_t* total);
115+
GGML_API void ggml_backend_cann_get_device_memory(int32_t device,
116+
size_t* free,
117+
size_t* total);
118118

119119
/**
120120
* @brief Set the logging callback for GGML.

ggml/include/ggml-cuda.h

+17-15
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
#include "ggml.h"
44
#include "ggml-backend.h"
55

6+
#ifdef __cplusplus
7+
extern "C" {
8+
#endif
9+
610
#ifdef GGML_USE_HIPBLAS
711
#define GGML_CUDA_NAME "ROCm"
812
#define GGML_CUBLAS_NAME "hipBLAS"
@@ -13,35 +17,33 @@
1317
#define GGML_CUDA_NAME "CUDA"
1418
#define GGML_CUBLAS_NAME "cuBLAS"
1519
#endif
16-
17-
#ifdef __cplusplus
18-
extern "C" {
19-
#endif
20-
2120
#define GGML_CUDA_MAX_DEVICES 16
2221

2322
// backend API
24-
GGML_API GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device);
23+
GGML_API ggml_backend_t ggml_backend_cuda_init(int device);
2524

26-
GGML_API GGML_CALL bool ggml_backend_is_cuda(ggml_backend_t backend);
25+
GGML_API bool ggml_backend_is_cuda(ggml_backend_t backend);
2726

2827
// device buffer
29-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
28+
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
3029

3130
// split tensor buffer that splits matrices by rows across multiple devices
32-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split);
31+
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split);
3332

3433
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
35-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
34+
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
3635

37-
GGML_API GGML_CALL int ggml_backend_cuda_get_device_count(void);
38-
GGML_API GGML_CALL void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
39-
GGML_API GGML_CALL void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
36+
GGML_API int ggml_backend_cuda_get_device_count(void);
37+
GGML_API void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
38+
GGML_API void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
4039

41-
GGML_API GGML_CALL bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
42-
GGML_API GGML_CALL void ggml_backend_cuda_unregister_host_buffer(void * buffer);
40+
GGML_API bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
41+
GGML_API void ggml_backend_cuda_unregister_host_buffer(void * buffer);
4342

4443
GGML_API void ggml_backend_cuda_log_set_callback(ggml_log_callback log_callback, void * user_data);
44+
45+
GGML_API ggml_backend_reg_t ggml_backend_cuda_reg(void);
46+
4547
#ifdef __cplusplus
4648
}
4749
#endif

ggml/include/ggml-metal.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// Note: this description is outdated
2+
//
13
// An interface allowing to compute ggml_cgraph with Metal
24
//
35
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
@@ -43,11 +45,11 @@ GGML_API ggml_backend_t ggml_backend_metal_init(void);
4345

4446
GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
4547

46-
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size);
48+
GGML_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size);
4749

4850
GGML_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
4951

50-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
52+
GGML_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
5153

5254
// helper to check if the device supports a specific family
5355
// ideally, the user code should be doing these checks

ggml/include/ggml-rpc.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@ extern "C" {
1010
#define GGML_RPC_MAX_SERVERS 16
1111

1212
// backend API
13-
GGML_API GGML_CALL ggml_backend_t ggml_backend_rpc_init(const char * endpoint);
14-
GGML_API GGML_CALL bool ggml_backend_is_rpc(ggml_backend_t backend);
13+
GGML_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint);
14+
GGML_API bool ggml_backend_is_rpc(ggml_backend_t backend);
1515

16-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint);
16+
GGML_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint);
1717

18-
GGML_API GGML_CALL void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
18+
GGML_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
1919

20-
GGML_API GGML_CALL void start_rpc_server(ggml_backend_t backend, const char * endpoint, size_t free_mem, size_t total_mem);
20+
GGML_API void start_rpc_server(ggml_backend_t backend, const char * endpoint, size_t free_mem, size_t total_mem);
2121

2222
#ifdef __cplusplus
2323
}

ggml/include/ggml-sycl.h

+8-8
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,20 @@ GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
2323
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
2424

2525
// split tensor buffer that splits matrices by rows across multiple devices
26-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
26+
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
2727

2828
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
2929
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
3030

31-
GGML_API void ggml_backend_sycl_print_sycl_devices(void);
32-
GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len);
33-
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
34-
GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
35-
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
31+
GGML_API void ggml_backend_sycl_print_sycl_devices(void);
32+
GGML_API void ggml_sycl_get_gpu_list(int *id_list, int max_len);
33+
GGML_API void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
34+
GGML_API int ggml_backend_sycl_get_device_count();
35+
GGML_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
3636

3737
// SYCL doesn't support registering host memory, keep here for reference
38-
// GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
39-
// GGML_API GGML_CALL void ggml_backend_sycl_unregister_host_buffer(void * buffer);
38+
// GGML_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
39+
// GGML_API void ggml_backend_sycl_unregister_host_buffer(void * buffer);
4040
#ifdef __cplusplus
4141
}
4242
#endif

ggml/include/ggml-vulkan.h

+7-7
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,16 @@ extern "C" {
1313
GGML_API void ggml_vk_instance_init(void);
1414

1515
// backend API
16-
GGML_API GGML_CALL ggml_backend_t ggml_backend_vk_init(size_t dev_num);
16+
GGML_API ggml_backend_t ggml_backend_vk_init(size_t dev_num);
1717

18-
GGML_API GGML_CALL bool ggml_backend_is_vk(ggml_backend_t backend);
19-
GGML_API GGML_CALL int ggml_backend_vk_get_device_count(void);
20-
GGML_API GGML_CALL void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size);
21-
GGML_API GGML_CALL void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total);
18+
GGML_API bool ggml_backend_is_vk(ggml_backend_t backend);
19+
GGML_API int ggml_backend_vk_get_device_count(void);
20+
GGML_API void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size);
21+
GGML_API void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total);
2222

23-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
23+
GGML_API ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
2424
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
25-
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
25+
GGML_API ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
2626

2727
#ifdef __cplusplus
2828
}

ggml/include/ggml.h

+29-39
Original file line numberDiff line numberDiff line change
@@ -187,16 +187,6 @@
187187
# define GGML_API
188188
#endif
189189

190-
#ifdef GGML_MULTIPLATFORM
191-
# if defined(_WIN32)
192-
# define GGML_CALL
193-
# else
194-
# define GGML_CALL __attribute__((__ms_abi__))
195-
# endif
196-
#else
197-
# define GGML_CALL
198-
#endif
199-
200190
// TODO: support for clang
201191
#ifdef __GNUC__
202192
# define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
@@ -340,7 +330,7 @@ extern "C" {
340330
};
341331

342332
// get ggml_status name string
343-
GGML_API GGML_CALL const char * ggml_status_to_string(enum ggml_status status);
333+
GGML_API const char * ggml_status_to_string(enum ggml_status status);
344334

345335
// ieee 754-2008 half-precision float16
346336
// todo: make this not an integral type
@@ -716,46 +706,46 @@ extern "C" {
716706
GGML_API void ggml_print_object (const struct ggml_object * obj);
717707
GGML_API void ggml_print_objects(const struct ggml_context * ctx);
718708

719-
GGML_API GGML_CALL int64_t ggml_nelements (const struct ggml_tensor * tensor);
720-
GGML_API GGML_CALL int64_t ggml_nrows (const struct ggml_tensor * tensor);
721-
GGML_API GGML_CALL size_t ggml_nbytes (const struct ggml_tensor * tensor);
722-
GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
709+
GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
710+
GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
711+
GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
712+
GGML_API size_t ggml_nbytes_pad(const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
723713

724-
GGML_API GGML_CALL int64_t ggml_blck_size(enum ggml_type type);
725-
GGML_API GGML_CALL size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block
726-
GGML_API GGML_CALL size_t ggml_row_size (enum ggml_type type, int64_t ne); // size in bytes for all elements in a row
714+
GGML_API int64_t ggml_blck_size(enum ggml_type type);
715+
GGML_API size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block
716+
GGML_API size_t ggml_row_size (enum ggml_type type, int64_t ne); // size in bytes for all elements in a row
727717

728718
GGML_DEPRECATED(
729719
GGML_API double ggml_type_sizef(enum ggml_type type), // ggml_type_size()/ggml_blck_size() as float
730720
"use ggml_row_size() instead");
731721

732-
GGML_API GGML_CALL const char * ggml_type_name(enum ggml_type type);
733-
GGML_API GGML_CALL const char * ggml_op_name (enum ggml_op op);
734-
GGML_API const char * ggml_op_symbol(enum ggml_op op);
722+
GGML_API const char * ggml_type_name(enum ggml_type type);
723+
GGML_API const char * ggml_op_name (enum ggml_op op);
724+
GGML_API const char * ggml_op_symbol(enum ggml_op op);
735725

736-
GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
737-
GGML_API GGML_CALL const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
726+
GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
727+
GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
738728

739-
GGML_API GGML_CALL size_t ggml_element_size(const struct ggml_tensor * tensor);
729+
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
740730

741-
GGML_API GGML_CALL bool ggml_is_quantized(enum ggml_type type);
731+
GGML_API bool ggml_is_quantized(enum ggml_type type);
742732

743733
// TODO: temporary until model loading of ggml examples is refactored
744734
GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
745735

746-
GGML_API GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor);
747-
GGML_API GGML_CALL bool ggml_is_permuted (const struct ggml_tensor * tensor);
748-
GGML_API GGML_CALL bool ggml_is_empty (const struct ggml_tensor * tensor);
749-
GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor);
750-
GGML_API bool ggml_is_vector (const struct ggml_tensor * tensor);
751-
GGML_API bool ggml_is_matrix (const struct ggml_tensor * tensor);
752-
GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
753-
GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
736+
GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
737+
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
738+
GGML_API bool ggml_is_empty (const struct ggml_tensor * tensor);
739+
GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor);
740+
GGML_API bool ggml_is_vector (const struct ggml_tensor * tensor);
741+
GGML_API bool ggml_is_matrix (const struct ggml_tensor * tensor);
742+
GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
743+
GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
754744

755-
GGML_API GGML_CALL bool ggml_is_contiguous (const struct ggml_tensor * tensor);
756-
GGML_API GGML_CALL bool ggml_is_contiguous_0(const struct ggml_tensor * tensor); // same as ggml_is_contiguous()
757-
GGML_API GGML_CALL bool ggml_is_contiguous_1(const struct ggml_tensor * tensor); // contiguous for dims >= 1
758-
GGML_API GGML_CALL bool ggml_is_contiguous_2(const struct ggml_tensor * tensor); // contiguous for dims >= 2
745+
GGML_API bool ggml_is_contiguous (const struct ggml_tensor * tensor);
746+
GGML_API bool ggml_is_contiguous_0(const struct ggml_tensor * tensor); // same as ggml_is_contiguous()
747+
GGML_API bool ggml_is_contiguous_1(const struct ggml_tensor * tensor); // contiguous for dims >= 1
748+
GGML_API bool ggml_is_contiguous_2(const struct ggml_tensor * tensor); // contiguous for dims >= 2
759749

760750
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
761751
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
@@ -847,7 +837,7 @@ extern "C" {
847837
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
848838
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
849839

850-
GGML_API GGML_CALL enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
840+
GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
851841

852842
GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);
853843
GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);
@@ -1561,7 +1551,7 @@ extern "C" {
15611551
"use ggml_rope_ext_inplace instead");
15621552

15631553
// compute correction dims for YaRN RoPE scaling
1564-
GGML_CALL void ggml_rope_yarn_corr_dims(
1554+
void ggml_rope_yarn_corr_dims(
15651555
int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]);
15661556

15671557
// rotary position embedding backward, i.e compute dx from dy

ggml/src/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1325,7 +1325,7 @@ add_library(ggml
13251325
../include/ggml-backend.h
13261326
ggml.c
13271327
ggml-alloc.c
1328-
ggml-backend.c
1328+
ggml-backend.cpp
13291329
ggml-quants.c
13301330
ggml-quants.h
13311331
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}

0 commit comments

Comments
 (0)