Skip to content

Commit

Permalink
Cleanup cublas comments
Browse files Browse the repository at this point in the history
  • Loading branch information
slaren committed Apr 18, 2023
1 parent 5fc6799 commit efa97ce
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 15 deletions.
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.17) # Don't bump this version for no reason
cmake_minimum_required(VERSION 3.12) # Don't bump this version for no reason
project("llama.cpp" C CXX)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
Expand Down Expand Up @@ -144,6 +144,8 @@ if (LLAMA_OPENBLAS)
endif()

if (LLAMA_CUBLAS)
cmake_minimum_required(VERSION 3.17)

find_package(CUDAToolkit)
if (CUDAToolkit_FOUND)
message(STATUS "cuBLAS found")
Expand Down
25 changes: 11 additions & 14 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,14 +172,14 @@ static cublasHandle_t cublasH = NULL;
static cudaStream_t cudaStream = NULL;
static void init_cublas(void) {
if (cublasH == NULL) {
/* step 1: create cublas handle, bind a stream */
// create cublas handle, bind a stream
CUBLAS_CHECK(cublasCreate(&cublasH));

CUDA_CHECK(cudaStreamCreateWithFlags(&cudaStream, cudaStreamNonBlocking));
CUBLAS_CHECK(cublasSetStream(cublasH, cudaStream));

// configure logging to stdout
//CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, NULL));
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, NULL));
}
}
#endif
Expand Down Expand Up @@ -7336,19 +7336,19 @@ static void ggml_compute_forward_mul_mat_f32(
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);

#if defined(GGML_USE_CUBLAS)
/* step 2: copy data to device */
// copy data to device
CUDA_CHECK(cudaMemcpyAsync(d_X, x, sizeof(float) * x_ne, cudaMemcpyHostToDevice, cudaStream));
CUDA_CHECK(cudaMemcpyAsync(d_Y, y, sizeof(float) * y_ne, cudaMemcpyHostToDevice, cudaStream));

/* step 3: compute */
// compute
CUBLAS_CHECK(
cublasSgemm(cublasH, CUBLAS_OP_T, CUBLAS_OP_N,
ne01, ne11, ne10,
&alpha, d_X, ne00,
d_Y, ne10,
&beta, d_D, ne01));

/* step 4: copy data to host */
// copy data to host
CUDA_CHECK(cudaMemcpyAsync(d, d_D, sizeof(float) * d_ne, cudaMemcpyDeviceToHost, cudaStream));
CUDA_CHECK(cudaStreamSynchronize(cudaStream));
#else
Expand All @@ -7362,7 +7362,6 @@ static void ggml_compute_forward_mul_mat_f32(
}
}
#if defined(GGML_USE_CUBLAS)
/* free resources */
CUDA_CHECK(cudaFree(d_X));
CUDA_CHECK(cudaFree(d_Y));
CUDA_CHECK(cudaFree(d_D));
Expand Down Expand Up @@ -7559,11 +7558,11 @@ static void ggml_compute_forward_mul_mat_f16_f32(

float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);

/* step 2: copy data to device */
// copy data to device
CUDA_CHECK(cudaMemcpyAsync(d_X, x, sizeof(ggml_fp16_t) * x_ne, cudaMemcpyHostToDevice, cudaStream));
CUDA_CHECK(cudaMemcpyAsync(d_Y, y, sizeof(ggml_fp16_t) * y_ne, cudaMemcpyHostToDevice, cudaStream));

/* step 3: compute */
// compute
CUBLAS_CHECK(
cublasGemmEx(cublasH, CUBLAS_OP_T, CUBLAS_OP_N,
ne01, ne11, ne10,
Expand All @@ -7573,7 +7572,7 @@ static void ggml_compute_forward_mul_mat_f16_f32(
CUBLAS_COMPUTE_32F,
CUBLAS_GEMM_DEFAULT));

/* step 4: copy data to host */
// copy data to host
CUDA_CHECK(cudaMemcpyAsync(d, d_D, sizeof(float) * d_ne, cudaMemcpyDeviceToHost, cudaStream));
CUDA_CHECK(cudaStreamSynchronize(cudaStream));
#else
Expand All @@ -7593,7 +7592,6 @@ static void ggml_compute_forward_mul_mat_f16_f32(
}

#if defined(GGML_USE_CUBLAS)
/* free resources */
CUDA_CHECK(cudaFree(d_X));
CUDA_CHECK(cudaFree(d_Y));
CUDA_CHECK(cudaFree(d_D));
Expand Down Expand Up @@ -7797,19 +7795,19 @@ static void ggml_compute_forward_mul_mat_q_f32(
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);

#if defined(GGML_USE_CUBLAS)
/* step 2: copy data to device */
// copy data to device
CUDA_CHECK(cudaMemcpyAsync(d_X, x, sizeof(float) * x_ne, cudaMemcpyHostToDevice, cudaStream));
CUDA_CHECK(cudaMemcpyAsync(d_Y, y, sizeof(float) * y_ne, cudaMemcpyHostToDevice, cudaStream));

/* step 3: compute */
// compute
CUBLAS_CHECK(
cublasSgemm(cublasH, CUBLAS_OP_T, CUBLAS_OP_N,
ne01, ne11, ne10,
&alpha, d_X, ne00,
d_Y, ne10,
&beta, d_D, ne01));

/* step 4: copy data to host */
// copy data to host
CUDA_CHECK(cudaMemcpyAsync(d, d_D, sizeof(float) * d_ne, cudaMemcpyDeviceToHost, cudaStream));
CUDA_CHECK(cudaStreamSynchronize(cudaStream));
#else
Expand All @@ -7824,7 +7822,6 @@ static void ggml_compute_forward_mul_mat_q_f32(
}

#if defined(GGML_USE_CUBLAS)
/* free resources */
CUDA_CHECK(cudaFree(d_X));
CUDA_CHECK(cudaFree(d_Y));
CUDA_CHECK(cudaFree(d_D));
Expand Down

0 comments on commit efa97ce

Please sign in to comment.