Skip to content

Commit

Permalink
updating CUDA code
Browse files Browse the repository at this point in the history
  • Loading branch information
akielaries committed Dec 10, 2023
1 parent e1ed879 commit 8ceb04b
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 12 deletions.
2 changes: 0 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,13 @@ OBJDIR = $(PROJDIR)/obj

# check if nvcc (CUDA compiler) is available
ifeq ($(shell command -v nvcc -V 2> /dev/null),)
CUDA_SRC =
HAS_NVCC =
OBJ_CUDA =

# has NVCC
else
# set CUDA defs
CUDA = $(shell find $(PROJDIR)/src -name '*.cu')
CUDA_SRC = $(CUDA)
HAS_NVCC = -D__HAS_NVCC__
NVCC_FLGS = -pg -g -Wno-deprecated-gpu-targets
OBJ_CUDA = $(patsubst $(SRCDIR)/%.cu,$(OBJDIR)/%.o,$(CUDA))
Expand Down
57 changes: 52 additions & 5 deletions benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,14 @@
#include "lib/primes.hpp" // prime number methods
#include "lib/sys.hpp" // system information methods
#include "lib/threadpool.hpp" // threadpool methods

#ifdef __HAS_NVCC__

#include "lib/montecarlo.cuh" // CUDA monte carlo methods
#include "lib/primes.cuh" // CUDA prime number methods
#include "lib/primes.cuh" // CUDA prime number methods
#include <cuda.h> // CUDA C header
#include <curand_kernel.h> // RAND for CUDA devices
#include <stdio.h> // C STD IO

#endif

Expand Down Expand Up @@ -158,7 +162,47 @@ void bench_monte_carlo() {
#ifdef __HAS_NVCC__

void gpu_bench_monte_carlo() {
std::cout << "This should be a call to some GPU monte carlo method...\n";
clock_t start, stop;
float host[BLOCKS * THREADS];
float *dev;
curandState *devStates;

printf("# of trials per thread = %d, # of blocks = %d, # of threads/block "
"= %d.\n",
TRIALS_PER_THREAD,
BLOCKS,
THREADS);

start = clock();

cudaMalloc((void **)&dev,
BLOCKS * THREADS *
sizeof(float)); // allocate device mem. for counts

cudaMalloc((void **)&devStates, THREADS * BLOCKS * sizeof(curandState));

//gpu_monte_carlo<<<BLOCKS, THREADS>>>(dev, devStates);
run_gpu_monte_carlo(dev, devStates);

cudaMemcpy(host,
dev,
BLOCKS * THREADS * sizeof(float),
cudaMemcpyDeviceToHost); // return results

float pi_gpu;
for (int i = 0; i < BLOCKS * THREADS; i++) {
pi_gpu += host[i];
}

pi_gpu /= (BLOCKS * THREADS);

stop = clock();

printf("GPU pi calculated in %f s.\n",
(stop - start) / (float)CLOCKS_PER_SEC);

printf("CUDA estimate of PI = %f [error of %f]\n", pi_gpu, pi_gpu - PI);

}

#endif
Expand Down Expand Up @@ -287,6 +331,7 @@ int main(int argc, char *argv[]) {

// if -b cpu
if (mode == "cpu") {
std::cout << "<--------- CPU TESTS --------->";
// display CPU information and number of running processes from
// `ps`
sys.cpu_info();
Expand All @@ -301,9 +346,7 @@ int main(int argc, char *argv[]) {
// TODO FIXME I am thinking this system info should be logged
// from a separate thread

std::cout
<< "Starting with primality testing using the Miller-Rabin "
"algorithm...\n";
std::cout << "Primality tests with Miller-Rabin algorithm...\n";

bench_naive_primes(nums);
bench_threadpool_primes(nums);
Expand All @@ -315,6 +358,8 @@ int main(int argc, char *argv[]) {
sys.mem_info();
sys.cpu_temp();

std::cout << "Pi estimation using Monte Carlo methods...\n";
bench_monte_carlo();
sys.cpu_usage();
sys.mem_info();
sys.cpu_temp();
Expand All @@ -326,6 +371,8 @@ int main(int argc, char *argv[]) {

#ifdef __HAS_NVCC__

std::cout << "<--------- GPU TESTS --------->";

std::cout << "NVIDIA DEVICE!\n";
gpu_bench_monte_carlo();

Expand Down
8 changes: 8 additions & 0 deletions lib/montecarlo.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@
#ifndef __MONTECARLO_CUH__
#define __MONTECARLO_CUH__

#define TRIALS_PER_THREAD 4096
#define BLOCKS 256
#define THREADS 256
#define PI 3.1415926535 // known value of pi

#include <cuda.h>
#include <curand_kernel.h>

void run_gpu_monte_carlo(float *dev, curandState *devStates);

#endif
11 changes: 6 additions & 5 deletions src/montecarlo_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,6 @@
#include <stdlib.h>
#include <time.h>

#define TRIALS_PER_THREAD 4096
#define BLOCKS 256
#define THREADS 256
#define PI 3.1415926535 // known value of pi

__global__ void gpu_monte_carlo(float *estimate, curandState *states) {
unsigned int thread_id = threadIdx.x + blockDim.x * blockIdx.x;
int points_in_circle = 0;
Expand Down Expand Up @@ -39,6 +34,12 @@ float host_monte_carlo(long trials) {
return 4.0f * points_in_circle / trials;
}

void run_gpu_monte_carlo(float *dev, curandState *devStates) {

gpu_monte_carlo<<<BLOCKS, THREADS>>>(dev, devStates);

}

/*
int main(int argc, char *argv[]) {
clock_t start, stop;
Expand Down

0 comments on commit 8ceb04b

Please sign in to comment.