Skip to content

Commit

Permalink
Auto-format code changes (#645)
Browse files Browse the repository at this point in the history
Auto-format code using Clang-Format

Co-authored-by: GitHub Actions <actions@github.com>
  • Loading branch information
github-actions[bot] and actions-user committed Sep 4, 2024
1 parent 93dba08 commit 7c5f4bf
Show file tree
Hide file tree
Showing 10 changed files with 404 additions and 107 deletions.
8 changes: 7 additions & 1 deletion include/micm/cuda/util/cuda_dense_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,13 @@ namespace micm
{
// the cudaMemset function only works for integer types and is an asynchronous function:
// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1gf7338650f7683c51ee26aadc6973c63a
CHECK_CUDA_ERROR(cudaMemsetAsync(this->param_.d_data_, val, sizeof(T) * this->param_.number_of_elements_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaMemset");
CHECK_CUDA_ERROR(
cudaMemsetAsync(
this->param_.d_data_,
val,
sizeof(T) * this->param_.number_of_elements_,
micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMemset");
}
else
{
Expand Down
13 changes: 6 additions & 7 deletions include/micm/cuda/util/cuda_util.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
#include <cuda_runtime.h>

#include <map>
#include <string>
#include <memory>
#include <mutex>
#include <string>

#define CHECK_CUDA_ERROR(err, msg) micm::cuda::CheckCudaError(err, __FILE__, __LINE__, msg)
#define CHECK_CUBLAS_ERROR(err, msg) micm::cuda::CheckCublasError(err, __FILE__, __LINE__, msg)
Expand Down Expand Up @@ -54,24 +54,23 @@ namespace micm
/// @brief Singleton class to manage CUDA streams
class CudaStreamSingleton
{
public:

public:
~CudaStreamSingleton() = default;

CudaStreamSingleton(const CudaStreamSingleton&) = delete;

CudaStreamSingleton& operator=(const CudaStreamSingleton&) = delete;

// Get the only one instance of the singleton class
static CudaStreamSingleton& GetInstance();

// Get the CUDA stream given a stream ID
cudaStream_t& GetCudaStream(std::size_t stream_id);

// Empty the map variable to clean up all CUDA streams
void CleanUp();
private:

private:
// Private constructor to prevent direct instantiation
CudaStreamSingleton() = default;

Expand Down
101 changes: 83 additions & 18 deletions src/process/process_set.cu
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,32 @@ namespace micm
ProcessSetParam devstruct;

/// Allocate memory space on the device
CHECK_CUDA_ERROR(cudaMallocAsync(&(devstruct.number_of_reactants_), number_of_reactants_bytes, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMallocAsync(&(devstruct.reactant_ids_), reactant_ids_bytes, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMallocAsync(&(devstruct.number_of_products_), number_of_products_bytes, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMallocAsync(&(devstruct.product_ids_), product_ids_bytes, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMallocAsync(&(devstruct.yields_), yields_bytes, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaMalloc");
CHECK_CUDA_ERROR(
cudaMallocAsync(
&(devstruct.number_of_reactants_),
number_of_reactants_bytes,
micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMalloc");
CHECK_CUDA_ERROR(
cudaMallocAsync(
&(devstruct.reactant_ids_),
reactant_ids_bytes,
micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMalloc");
CHECK_CUDA_ERROR(
cudaMallocAsync(
&(devstruct.number_of_products_),
number_of_products_bytes,
micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMalloc");
CHECK_CUDA_ERROR(
cudaMallocAsync(
&(devstruct.product_ids_), product_ids_bytes, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMalloc");
CHECK_CUDA_ERROR(
cudaMallocAsync(
&(devstruct.yields_), yields_bytes, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMalloc");

/// Copy the data from host to device
CHECK_CUDA_ERROR(
Expand All @@ -152,7 +173,12 @@ namespace micm
micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMemcpy");
CHECK_CUDA_ERROR(
cudaMemcpyAsync(devstruct.reactant_ids_, hoststruct.reactant_ids_, reactant_ids_bytes, cudaMemcpyHostToDevice, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
cudaMemcpyAsync(
devstruct.reactant_ids_,
hoststruct.reactant_ids_,
reactant_ids_bytes,
cudaMemcpyHostToDevice,
micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMemcpy");
CHECK_CUDA_ERROR(
cudaMemcpyAsync(
Expand All @@ -163,10 +189,21 @@ namespace micm
micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMemcpy");
CHECK_CUDA_ERROR(
cudaMemcpyAsync(devstruct.product_ids_, hoststruct.product_ids_, product_ids_bytes, cudaMemcpyHostToDevice, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
cudaMemcpyAsync(
devstruct.product_ids_,
hoststruct.product_ids_,
product_ids_bytes,
cudaMemcpyHostToDevice,
micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMemcpy");
CHECK_CUDA_ERROR(
cudaMemcpyAsync(devstruct.yields_, hoststruct.yields_, yields_bytes, cudaMemcpyHostToDevice, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaMemcpy");
cudaMemcpyAsync(
devstruct.yields_,
hoststruct.yields_,
yields_bytes,
cudaMemcpyHostToDevice,
micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMemcpy");

devstruct.number_of_reactants_size_ = hoststruct.number_of_reactants_size_;
devstruct.reactant_ids_size_ = hoststruct.reactant_ids_size_;
Expand All @@ -186,12 +223,21 @@ namespace micm
size_t jacobian_flat_ids_bytes = sizeof(size_t) * hoststruct.jacobian_flat_ids_size_;

/// Allocate memory space on the device
CHECK_CUDA_ERROR(cudaMallocAsync(&(devstruct.jacobian_flat_ids_), jacobian_flat_ids_bytes, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaMalloc");
CHECK_CUDA_ERROR(
cudaMallocAsync(
&(devstruct.jacobian_flat_ids_),
jacobian_flat_ids_bytes,
micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMalloc");

/// Copy the data from host to device
CHECK_CUDA_ERROR(
cudaMemcpyAsync(
devstruct.jacobian_flat_ids_, hoststruct.jacobian_flat_ids_, jacobian_flat_ids_bytes, cudaMemcpyHostToDevice, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
devstruct.jacobian_flat_ids_,
hoststruct.jacobian_flat_ids_,
jacobian_flat_ids_bytes,
cudaMemcpyHostToDevice,
micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMemcpy");

devstruct.jacobian_flat_ids_size_ = hoststruct.jacobian_flat_ids_size_;
Expand All @@ -202,18 +248,29 @@ namespace micm
void FreeConstData(ProcessSetParam& devstruct)
{
if (devstruct.number_of_reactants_ != nullptr)
CHECK_CUDA_ERROR(cudaFreeAsync(devstruct.number_of_reactants_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaFree");
CHECK_CUDA_ERROR(
cudaFreeAsync(devstruct.number_of_reactants_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaFree");
if (devstruct.reactant_ids_ != nullptr)
CHECK_CUDA_ERROR(cudaFreeAsync(devstruct.reactant_ids_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaFree");
CHECK_CUDA_ERROR(
cudaFreeAsync(devstruct.reactant_ids_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaFree");
if (devstruct.number_of_products_ != nullptr)
CHECK_CUDA_ERROR(cudaFreeAsync(devstruct.number_of_products_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaFree");
CHECK_CUDA_ERROR(
cudaFreeAsync(devstruct.number_of_products_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaFree");
if (devstruct.product_ids_ != nullptr)
CHECK_CUDA_ERROR(cudaFreeAsync(devstruct.product_ids_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaFree");
CHECK_CUDA_ERROR(
cudaFreeAsync(devstruct.product_ids_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaFree");
if (devstruct.yields_ != nullptr)
CHECK_CUDA_ERROR(cudaFreeAsync(devstruct.yields_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaFree");
CHECK_CUDA_ERROR(
cudaFreeAsync(devstruct.yields_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaFree");
if (devstruct.jacobian_flat_ids_ != nullptr)
{
CHECK_CUDA_ERROR(cudaFreeAsync(devstruct.jacobian_flat_ids_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaFree");
CHECK_CUDA_ERROR(
cudaFreeAsync(devstruct.jacobian_flat_ids_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaFree");
}
}

Expand All @@ -224,7 +281,11 @@ namespace micm
const ProcessSetParam& devstruct)
{
size_t number_of_blocks = (rate_constants_param.number_of_grid_cells_ + BLOCK_SIZE - 1) / BLOCK_SIZE;
SubtractJacobianTermsKernel<<<number_of_blocks, BLOCK_SIZE, 0, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)>>>(
SubtractJacobianTermsKernel<<<
number_of_blocks,
BLOCK_SIZE,
0,
micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)>>>(
rate_constants_param, state_variables_param, jacobian_param, devstruct);
} // end of SubtractJacobianTermsKernelDriver
Expand All @@ -235,7 +296,11 @@ namespace micm
const ProcessSetParam& devstruct)
{
size_t number_of_blocks = (rate_constants_param.number_of_grid_cells_ + BLOCK_SIZE - 1) / BLOCK_SIZE;
AddForcingTermsKernel<<<number_of_blocks, BLOCK_SIZE, 0, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)>>>(
AddForcingTermsKernel<<<
number_of_blocks,
BLOCK_SIZE,
0,
micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)>>>(
rate_constants_param, state_variables_param, forcing_param, devstruct);
} // end of AddForcingTermsKernelDriver
} // namespace cuda
Expand Down
67 changes: 54 additions & 13 deletions src/solver/linear_solver.cu
Original file line number Diff line number Diff line change
Expand Up @@ -89,20 +89,56 @@ namespace micm

/// Create a struct whose members contain the addresses in the device memory.
LinearSolverParam devstruct;
CHECK_CUDA_ERROR(cudaMallocAsync(&(devstruct.nLij_Lii_), nLij_Lii_bytes, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMallocAsync(&(devstruct.Lij_yj_), Lij_yj_bytes, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMallocAsync(&(devstruct.nUij_Uii_), nUij_Uii_bytes, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMallocAsync(&(devstruct.Uij_xj_), Uij_xj_bytes, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaMalloc");
CHECK_CUDA_ERROR(
cudaMallocAsync(
&(devstruct.nLij_Lii_), nLij_Lii_bytes, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMalloc");
CHECK_CUDA_ERROR(
cudaMallocAsync(
&(devstruct.Lij_yj_), Lij_yj_bytes, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMalloc");
CHECK_CUDA_ERROR(
cudaMallocAsync(
&(devstruct.nUij_Uii_), nUij_Uii_bytes, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMalloc");
CHECK_CUDA_ERROR(
cudaMallocAsync(
&(devstruct.Uij_xj_), Uij_xj_bytes, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMalloc");

/// Copy the data from host to device
CHECK_CUDA_ERROR(
cudaMemcpyAsync(devstruct.nLij_Lii_, hoststruct.nLij_Lii_, nLij_Lii_bytes, cudaMemcpyHostToDevice, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaMemcpy");
cudaMemcpyAsync(
devstruct.nLij_Lii_,
hoststruct.nLij_Lii_,
nLij_Lii_bytes,
cudaMemcpyHostToDevice,
micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMemcpy");
CHECK_CUDA_ERROR(
cudaMemcpyAsync(devstruct.Lij_yj_, hoststruct.Lij_yj_, Lij_yj_bytes, cudaMemcpyHostToDevice, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaMemcpy");
cudaMemcpyAsync(
devstruct.Lij_yj_,
hoststruct.Lij_yj_,
Lij_yj_bytes,
cudaMemcpyHostToDevice,
micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMemcpy");
CHECK_CUDA_ERROR(
cudaMemcpyAsync(devstruct.nUij_Uii_, hoststruct.nUij_Uii_, nUij_Uii_bytes, cudaMemcpyHostToDevice, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaMemcpy");
cudaMemcpyAsync(
devstruct.nUij_Uii_,
hoststruct.nUij_Uii_,
nUij_Uii_bytes,
cudaMemcpyHostToDevice,
micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMemcpy");
CHECK_CUDA_ERROR(
cudaMemcpyAsync(devstruct.Uij_xj_, hoststruct.Uij_xj_, Uij_xj_bytes, cudaMemcpyHostToDevice, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaMemcpy");
cudaMemcpyAsync(
devstruct.Uij_xj_,
hoststruct.Uij_xj_,
Uij_xj_bytes,
cudaMemcpyHostToDevice,
micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)),
"cudaMemcpy");

devstruct.nLij_Lii_size_ = hoststruct.nLij_Lii_size_;
devstruct.Lij_yj_size_ = hoststruct.Lij_yj_size_;
Expand All @@ -117,13 +153,17 @@ namespace micm
void FreeConstData(LinearSolverParam& devstruct)
{
if (devstruct.nLij_Lii_ != nullptr)
CHECK_CUDA_ERROR(cudaFreeAsync(devstruct.nLij_Lii_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaFree");
CHECK_CUDA_ERROR(
cudaFreeAsync(devstruct.nLij_Lii_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaFree");
if (devstruct.Lij_yj_ != nullptr)
CHECK_CUDA_ERROR(cudaFreeAsync(devstruct.Lij_yj_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaFree");
CHECK_CUDA_ERROR(
cudaFreeAsync(devstruct.Lij_yj_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaFree");
if (devstruct.nUij_Uii_ != nullptr)
CHECK_CUDA_ERROR(cudaFreeAsync(devstruct.nUij_Uii_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaFree");
CHECK_CUDA_ERROR(
cudaFreeAsync(devstruct.nUij_Uii_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaFree");
if (devstruct.Uij_xj_ != nullptr)
CHECK_CUDA_ERROR(cudaFreeAsync(devstruct.Uij_xj_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaFree");
CHECK_CUDA_ERROR(
cudaFreeAsync(devstruct.Uij_xj_, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)), "cudaFree");
}

void SolveKernelDriver(
Expand All @@ -133,7 +173,8 @@ namespace micm
const LinearSolverParam& devstruct)
{
size_t number_of_blocks = (x_param.number_of_grid_cells_ + BLOCK_SIZE - 1) / BLOCK_SIZE;
SolveKernel<<<number_of_blocks, BLOCK_SIZE, 0, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)>>>(x_param, L_param, U_param, devstruct);
SolveKernel<<<number_of_blocks, BLOCK_SIZE, 0, micm::cuda::CudaStreamSingleton::GetInstance().GetCudaStream(0)>>>(
x_param, L_param, U_param, devstruct);
}
} // namespace cuda
} // namespace micm
Loading

0 comments on commit 7c5f4bf

Please sign in to comment.