Skip to content

Commit 2741ffb

Browse files
committed
move hipblas definitions to header files
1 parent bf49a93 commit 2741ffb

File tree

4 files changed

+114
-109
lines changed

4 files changed

+114
-109
lines changed

otherarch/ggml_v2-cuda-legacy.cu

+1-54
Original file line numberDiff line numberDiff line change
@@ -4,60 +4,7 @@
44
#include <stdio.h>
55
#include <atomic>
66

7-
#if defined(GGML_USE_HIPBLAS)
8-
#include <hip/hip_runtime.h>
9-
#include <hipblas/hipblas.h>
10-
#include <hip/hip_fp16.h>
11-
#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F
12-
#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F
13-
#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
14-
#define CUBLAS_OP_N HIPBLAS_OP_N
15-
#define CUBLAS_OP_T HIPBLAS_OP_T
16-
#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
17-
#define CUBLAS_TF32_TENSOR_OP_MATH 0
18-
#define CUDA_R_16F HIPBLAS_R_16F
19-
#define CUDA_R_32F HIPBLAS_R_32F
20-
#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
21-
#define cublasCreate hipblasCreate
22-
#define cublasGemmEx hipblasGemmEx
23-
#define cublasHandle_t hipblasHandle_t
24-
#define cublasSetMathMode(handle, mode) CUBLAS_STATUS_SUCCESS
25-
#define cublasSetStream hipblasSetStream
26-
#define cublasSgemm hipblasSgemm
27-
#define cublasStatus_t hipblasStatus_t
28-
#define cudaDeviceProp hipDeviceProp_t
29-
#define cudaDeviceSynchronize hipDeviceSynchronize
30-
#define cudaError_t hipError_t
31-
#define cudaEventCreateWithFlags hipEventCreateWithFlags
32-
#define cudaEventDisableTiming hipEventDisableTiming
33-
#define cudaEventRecord hipEventRecord
34-
#define cudaEvent_t hipEvent_t
35-
#define cudaFree hipFree
36-
#define cudaFreeHost hipHostFree
37-
#define cudaGetDevice hipGetDevice
38-
#define cudaGetDeviceCount hipGetDeviceCount
39-
#define cudaGetDeviceProperties hipGetDeviceProperties
40-
#define cudaGetErrorString hipGetErrorString
41-
#define cudaGetLastError hipGetLastError
42-
#define cudaMalloc hipMalloc
43-
#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault)
44-
#define cudaMemcpy hipMemcpy
45-
#define cudaMemcpy2DAsync hipMemcpy2DAsync
46-
#define cudaMemcpyAsync hipMemcpyAsync
47-
#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice
48-
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
49-
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
50-
#define cudaMemcpyKind hipMemcpyKind
51-
#define cudaMemset hipMemset
52-
#define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize
53-
#define cudaSetDevice hipSetDevice
54-
#define cudaStreamCreateWithFlags hipStreamCreateWithFlags
55-
#define cudaStreamNonBlocking hipStreamNonBlocking
56-
#define cudaStreamSynchronize hipStreamSynchronize
57-
#define cudaStreamWaitEvent hipStreamWaitEvent
58-
#define cudaStream_t hipStream_t
59-
#define cudaSuccess hipSuccess
60-
#else
7+
#ifndef GGML_USE_HIPBLAS
618
#include <cuda_runtime.h>
629
#include <cublas_v2.h>
6310
#include <cuda_fp16.h>

otherarch/ggml_v2-cuda-legacy.h

+56
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,61 @@
11
#include "ggml_v2.h"
22

3+
#if defined(GGML_USE_HIPBLAS)
4+
#include <hip/hip_runtime.h>
5+
#include <hipblas/hipblas.h>
6+
#include <hip/hip_fp16.h>
7+
8+
#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F
9+
#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F
10+
#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
11+
#define CUBLAS_OP_N HIPBLAS_OP_N
12+
#define CUBLAS_OP_T HIPBLAS_OP_T
13+
#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
14+
#define CUBLAS_TF32_TENSOR_OP_MATH 0
15+
#define CUDA_R_16F HIPBLAS_R_16F
16+
#define CUDA_R_32F HIPBLAS_R_32F
17+
#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
18+
#define cublasCreate hipblasCreate
19+
#define cublasGemmEx hipblasGemmEx
20+
#define cublasHandle_t hipblasHandle_t
21+
#define cublasSetMathMode(handle, mode) CUBLAS_STATUS_SUCCESS
22+
#define cublasSetStream hipblasSetStream
23+
#define cublasSgemm hipblasSgemm
24+
#define cublasStatus_t hipblasStatus_t
25+
#define cudaDeviceProp hipDeviceProp_t
26+
#define cudaDeviceSynchronize hipDeviceSynchronize
27+
#define cudaError_t hipError_t
28+
#define cudaEventCreateWithFlags hipEventCreateWithFlags
29+
#define cudaEventDisableTiming hipEventDisableTiming
30+
#define cudaEventRecord hipEventRecord
31+
#define cudaEvent_t hipEvent_t
32+
#define cudaFree hipFree
33+
#define cudaFreeHost hipHostFree
34+
#define cudaGetDevice hipGetDevice
35+
#define cudaGetDeviceCount hipGetDeviceCount
36+
#define cudaGetDeviceProperties hipGetDeviceProperties
37+
#define cudaGetErrorString hipGetErrorString
38+
#define cudaGetLastError hipGetLastError
39+
#define cudaMalloc hipMalloc
40+
#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault)
41+
#define cudaMemcpy hipMemcpy
42+
#define cudaMemcpy2DAsync hipMemcpy2DAsync
43+
#define cudaMemcpyAsync hipMemcpyAsync
44+
#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice
45+
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
46+
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
47+
#define cudaMemcpyKind hipMemcpyKind
48+
#define cudaMemset hipMemset
49+
#define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize
50+
#define cudaSetDevice hipSetDevice
51+
#define cudaStreamCreateWithFlags hipStreamCreateWithFlags
52+
#define cudaStreamNonBlocking hipStreamNonBlocking
53+
#define cudaStreamSynchronize hipStreamSynchronize
54+
#define cudaStreamWaitEvent hipStreamWaitEvent
55+
#define cudaStream_t hipStream_t
56+
#define cudaSuccess hipSuccess
57+
#endif
58+
359
#ifdef __cplusplus
460
extern "C" {
561
#endif

otherarch/ggml_v2-cuda.cu

+1-55
Original file line numberDiff line numberDiff line change
@@ -4,64 +4,10 @@
44
#include <stdio.h>
55
#include <atomic>
66

7-
#if defined(GGML_USE_HIPBLAS)
8-
#include <hip/hip_runtime.h>
9-
#include <hipblas/hipblas.h>
10-
#include <hip/hip_fp16.h>
11-
#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F
12-
#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F
13-
#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
14-
#define CUBLAS_OP_N HIPBLAS_OP_N
15-
#define CUBLAS_OP_T HIPBLAS_OP_T
16-
#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
17-
#define CUBLAS_TF32_TENSOR_OP_MATH 0
18-
#define CUDA_R_16F HIPBLAS_R_16F
19-
#define CUDA_R_32F HIPBLAS_R_32F
20-
#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
21-
#define cublasCreate hipblasCreate
22-
#define cublasGemmEx hipblasGemmEx
23-
#define cublasHandle_t hipblasHandle_t
24-
#define cublasSetMathMode(handle, mode) CUBLAS_STATUS_SUCCESS
25-
#define cublasSetStream hipblasSetStream
26-
#define cublasSgemm hipblasSgemm
27-
#define cublasStatus_t hipblasStatus_t
28-
#define cudaDeviceProp hipDeviceProp_t
29-
#define cudaDeviceSynchronize hipDeviceSynchronize
30-
#define cudaError_t hipError_t
31-
#define cudaEventCreateWithFlags hipEventCreateWithFlags
32-
#define cudaEventDisableTiming hipEventDisableTiming
33-
#define cudaEventRecord hipEventRecord
34-
#define cudaEvent_t hipEvent_t
35-
#define cudaFree hipFree
36-
#define cudaFreeHost hipHostFree
37-
#define cudaGetDevice hipGetDevice
38-
#define cudaGetDeviceCount hipGetDeviceCount
39-
#define cudaGetDeviceProperties hipGetDeviceProperties
40-
#define cudaGetErrorString hipGetErrorString
41-
#define cudaGetLastError hipGetLastError
42-
#define cudaMalloc hipMalloc
43-
#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault)
44-
#define cudaMemcpy hipMemcpy
45-
#define cudaMemcpy2DAsync hipMemcpy2DAsync
46-
#define cudaMemcpyAsync hipMemcpyAsync
47-
#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice
48-
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
49-
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
50-
#define cudaMemcpyKind hipMemcpyKind
51-
#define cudaMemset hipMemset
52-
#define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize
53-
#define cudaSetDevice hipSetDevice
54-
#define cudaStreamCreateWithFlags hipStreamCreateWithFlags
55-
#define cudaStreamNonBlocking hipStreamNonBlocking
56-
#define cudaStreamSynchronize hipStreamSynchronize
57-
#define cudaStreamWaitEvent hipStreamWaitEvent
58-
#define cudaStream_t hipStream_t
59-
#define cudaSuccess hipSuccess
60-
#else
7+
#ifndef GGML_USE_HIPBLAS
618
#include <cuda_runtime.h>
629
#include <cublas_v2.h>
6310
#include <cuda_fp16.h>
64-
6511
#endif
6612

6713
#include "ggml_v2-cuda.h"

otherarch/ggml_v2-cuda.h

+56
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,61 @@
11
#include "ggml_v2.h"
22

3+
#if defined(GGML_USE_HIPBLAS)
4+
#include <hip/hip_runtime.h>
5+
#include <hipblas/hipblas.h>
6+
#include <hip/hip_fp16.h>
7+
8+
#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F
9+
#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F
10+
#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
11+
#define CUBLAS_OP_N HIPBLAS_OP_N
12+
#define CUBLAS_OP_T HIPBLAS_OP_T
13+
#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
14+
#define CUBLAS_TF32_TENSOR_OP_MATH 0
15+
#define CUDA_R_16F HIPBLAS_R_16F
16+
#define CUDA_R_32F HIPBLAS_R_32F
17+
#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
18+
#define cublasCreate hipblasCreate
19+
#define cublasGemmEx hipblasGemmEx
20+
#define cublasHandle_t hipblasHandle_t
21+
#define cublasSetMathMode(handle, mode) CUBLAS_STATUS_SUCCESS
22+
#define cublasSetStream hipblasSetStream
23+
#define cublasSgemm hipblasSgemm
24+
#define cublasStatus_t hipblasStatus_t
25+
#define cudaDeviceProp hipDeviceProp_t
26+
#define cudaDeviceSynchronize hipDeviceSynchronize
27+
#define cudaError_t hipError_t
28+
#define cudaEventCreateWithFlags hipEventCreateWithFlags
29+
#define cudaEventDisableTiming hipEventDisableTiming
30+
#define cudaEventRecord hipEventRecord
31+
#define cudaEvent_t hipEvent_t
32+
#define cudaFree hipFree
33+
#define cudaFreeHost hipHostFree
34+
#define cudaGetDevice hipGetDevice
35+
#define cudaGetDeviceCount hipGetDeviceCount
36+
#define cudaGetDeviceProperties hipGetDeviceProperties
37+
#define cudaGetErrorString hipGetErrorString
38+
#define cudaGetLastError hipGetLastError
39+
#define cudaMalloc hipMalloc
40+
#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault)
41+
#define cudaMemcpy hipMemcpy
42+
#define cudaMemcpy2DAsync hipMemcpy2DAsync
43+
#define cudaMemcpyAsync hipMemcpyAsync
44+
#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice
45+
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
46+
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
47+
#define cudaMemcpyKind hipMemcpyKind
48+
#define cudaMemset hipMemset
49+
#define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize
50+
#define cudaSetDevice hipSetDevice
51+
#define cudaStreamCreateWithFlags hipStreamCreateWithFlags
52+
#define cudaStreamNonBlocking hipStreamNonBlocking
53+
#define cudaStreamSynchronize hipStreamSynchronize
54+
#define cudaStreamWaitEvent hipStreamWaitEvent
55+
#define cudaStream_t hipStream_t
56+
#define cudaSuccess hipSuccess
57+
#endif
58+
359
#ifdef __cplusplus
460
extern "C" {
561
#endif

0 commit comments

Comments
 (0)