@@ -41,8 +41,13 @@ if (NOT MSVC)
41
41
endif ()
42
42
43
43
# 3rd party libs
44
- option (LLAMA_CUBLAS "llama: use cuBLAS" ON )
45
-
44
+ option (LLAMA_CUBLAS "llama: use cuBLAS" OFF )
45
+ set (LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels" )
46
+ set (LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels" )
47
+ option (LLAMA_CUDA_DMMV_F16 "llama: use 16 bit floats for dmmv CUDA kernels" OFF )
48
+ set (LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K" )
49
+ option (LLAMA_HIPBLAS "llama: use hipBLAS" OFF )
50
+ option (LLAMA_K_QUANTS "llama: use k-quants" ON )
46
51
47
52
48
53
#
@@ -72,6 +77,12 @@ if (LLAMA_CUBLAS)
72
77
set (GGML_V2_LEGACY_CUDA_SOURCES otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h)
73
78
74
79
add_compile_definitions (GGML_USE_CUBLAS)
80
+ add_compile_definitions (GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X} )
81
+ add_compile_definitions (GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y} )
82
+ if (LLAMA_CUDA_DMMV_F16)
83
+ add_compile_definitions (GGML_CUDA_DMMV_F16)
84
+ endif ()
85
+ add_compile_definitions (K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER} )
75
86
76
87
if (LLAMA_STATIC)
77
88
set (LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
@@ -84,7 +95,37 @@ if (LLAMA_CUBLAS)
84
95
endif ()
85
96
endif ()
86
97
98
+ if (LLAMA_HIPBLAS)
99
+ list (APPEND CMAKE_PREFIX_PATH /opt/rocm)
100
+
101
+ if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang" )
102
+ message (WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang" )
103
+ endif ()
104
+ if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang" )
105
+ message (WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++" )
106
+ endif ()
107
+
108
+ find_package (hip)
109
+ find_package (hipblas)
87
110
111
+ if (${hipblas_FOUND} AND ${hip_FOUND} )
112
+ message (STATUS "HIP and hipBLAS found" )
113
+ add_compile_definitions (GGML_USE_HIPBLAS GGML_USE_CUBLAS)
114
+ add_library (ggml-rocm OBJECT ggml-cuda.cu ggml-cuda.h)
115
+ target_compile_definitions (ggml-rocm PRIVATE GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X} )
116
+ target_compile_definitions (ggml-rocm PRIVATE GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y} )
117
+ target_compile_definitions (ggml-rocm PRIVATE K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER} )
118
+ set_source_files_properties (ggml-cuda.cu PROPERTIES LANGUAGE CXX)
119
+ target_link_libraries (ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::hipblas)
120
+
121
+ if (LLAMA_STATIC)
122
+ message (FATAL_ERROR "Static linking not supported for HIP/ROCm" )
123
+ endif ()
124
+ set (LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ggml-rocm)
125
+ else ()
126
+ message (WARNING "hipBLAS or HIP not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm" )
127
+ endif ()
128
+ endif ()
88
129
89
130
if (LLAMA_ALL_WARNINGS)
90
131
if (NOT MSVC )
0 commit comments