Skip to content

Commit

Permalink
updates to timers
Browse files Browse the repository at this point in the history
  • Loading branch information
akielaries committed Dec 11, 2023
1 parent 5c8f48b commit a1aead1
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 26 deletions.
39 changes: 13 additions & 26 deletions benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,16 +161,17 @@ void bench_monte_carlo() {
// if host has NVCC installed
#ifdef __HAS_NVCC__

// does not achieve same accuracy as CPU based Miller-Rabin
void gpu_bench_miller_rabin() {
clock_t start, stop;
int n = nums.size();
int iters = 120000;

// Allocate device memory for input and output arrays
uint32_t *d_input;
bool *d_output;

std::chrono::steady_clock::time_point start_time =
std::chrono::steady_clock::now();
start = clock();

cudaMalloc((void **)&d_input, n * sizeof(uint32_t));
cudaMalloc((void **)&d_output, n * sizeof(bool));
Expand All @@ -195,8 +196,8 @@ void gpu_bench_miller_rabin() {
bool *results = new bool[n];
cudaMemcpy(results, d_output, n * sizeof(bool), cudaMemcpyDeviceToHost);

std::chrono::steady_clock::time_point end_time =
std::chrono::steady_clock::now();
stop = clock();


// print results
for (int i = 0; i < n; ++i) {
Expand All @@ -212,11 +213,8 @@ void gpu_bench_miller_rabin() {
cudaFree(d_output);
delete[] results;

std::cout << "Time elapsed: "
<< std::chrono::duration_cast<std::chrono::milliseconds>(
end_time - start_time)
.count()
<< " ms" << std::endl;
printf("Finished in %f s.\n",
(stop - start) / (float)CLOCKS_PER_SEC);

}

Expand All @@ -225,13 +223,15 @@ void gpu_bench_monte_carlo() {
float host[BLOCKS * THREADS];
float *dev;
curandState *devStates;
int total_trials = TRIALS_PER_THREAD * BLOCKS * THREADS;

printf("# of trials per thread = %d, # of blocks = %d, # of threads/block "
"= %d.\n",
"= %d, total trials %d.\n",
TRIALS_PER_THREAD,
BLOCKS,
THREADS);

THREADS,
total_trials);

start = clock();

cudaMalloc((void **)&dev,
Expand All @@ -240,15 +240,9 @@ void gpu_bench_monte_carlo() {

cudaMalloc((void **)&devStates, THREADS * BLOCKS * sizeof(curandState));

//gpu_monte_carlo<<<BLOCKS, THREADS>>>(dev, devStates);
start = clock();

std::chrono::steady_clock::time_point start_time =
std::chrono::steady_clock::now();

run_gpu_monte_carlo(dev, devStates);
std::chrono::steady_clock::time_point end_time =
std::chrono::steady_clock::now();

cudaMemcpy(host,
dev,
Expand All @@ -269,12 +263,6 @@ void gpu_bench_monte_carlo() {

printf("CUDA estimate of PI = %f [error of %f]\n", pi_gpu, pi_gpu - PI);

std::cout << "Time elapsed: "
<< std::chrono::duration_cast<std::chrono::milliseconds>(
end_time - start_time)
.count()
<< " ms" << std::endl;


}

Expand Down Expand Up @@ -460,10 +448,9 @@ int main(int argc, char *argv[]) {
else if (mode == "gpu") {
// if host has NVCC installed

std::cout << "<--------- GPU TESTS --------->\n";
#ifdef __HAS_NVCC__

std::cout << "<--------- GPU TESTS --------->\n";

std::cout << "NVIDIA device found!\n";
gpu_bench_monte_carlo();
gpu_bench_miller_rabin();
Expand Down
5 changes: 5 additions & 0 deletions lib/sys.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,11 @@ class System {
/** GPU MAX grid size */
int max_grid_size[3];

int _ConvertSMVer2Cores(int major, int minor);

void getGPUInfo();

void printGPUInfo();

#endif

Expand Down
6 changes: 6 additions & 0 deletions src/sys.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@
#include <vector>
#include <stdexcept>

#ifdef __HAS_NVCC__

#include <cuda_runtime_api.h>

#endif

// function to read and return the contents of a file
std::string System::read_file(const std::string &filename) {
std::ifstream file(filename);
Expand Down

0 comments on commit a1aead1

Please sign in to comment.