From cf24880e25c50811956d43e6dac5b5772e6c8c7e Mon Sep 17 00:00:00 2001 From: "Yuhsiang M. Tsai" Date: Fri, 27 Nov 2020 21:50:42 +0100 Subject: [PATCH] Review update - fix gpu_timer in script - use int64_t - add const to function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Aditya Kashi Co-authored-by: Pratik Nayak Co-authored-by: Thomas Grützmacher --- benchmark/run_all_benchmarks.sh | 8 ++++---- benchmark/utils/timer.hpp | 34 +++++++++++++++++---------------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/benchmark/run_all_benchmarks.sh b/benchmark/run_all_benchmarks.sh index 24bea8efbe2..d8bb6c6a88b 100644 --- a/benchmark/run_all_benchmarks.sh +++ b/benchmark/run_all_benchmarks.sh @@ -149,7 +149,7 @@ run_conversion_benchmarks() { cp "$1" "$1.imd" # make sure we're not loosing the original input ./conversions/conversions --backup="$1.bkp" --double_buffer="$1.bkp2" \ --executor="${EXECUTOR}" --formats="${FORMATS}" \ - --device_id="${DEVICE_ID}" --gpu_time=${GPU_TIMER} \ + --device_id="${DEVICE_ID}" --gpu_timer=${GPU_TIMER} \ <"$1.imd" 2>&1 >"$1" keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd" } @@ -165,7 +165,7 @@ run_spmv_benchmarks() { cp "$1" "$1.imd" # make sure we're not loosing the original input ./spmv/spmv --backup="$1.bkp" --double_buffer="$1.bkp2" \ --executor="${EXECUTOR}" --formats="${FORMATS}" \ - --device_id="${DEVICE_ID}" --gpu_time=${GPU_TIMER} \ + --device_id="${DEVICE_ID}" --gpu_timer=${GPU_TIMER} \ <"$1.imd" 2>&1 >"$1" keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd" } @@ -183,7 +183,7 @@ run_solver_benchmarks() { --executor="${EXECUTOR}" --solvers="${SOLVERS}" \ --preconditioners="${PRECONDS}" \ --max_iters=${SOLVERS_MAX_ITERATIONS} --rel_res_goal=${SOLVERS_PRECISION} \ - ${SOLVERS_RHS_FLAG} ${DETAILED_STR} --device_id="${DEVICE_ID}" --gpu_time=${GPU_TIMER} \ + ${SOLVERS_RHS_FLAG} ${DETAILED_STR} --device_id="${DEVICE_ID}" --gpu_timer=${GPU_TIMER} \ <"$1.imd" 2>&1 >"$1" keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd" } @@ -209,7 +209,7 @@ run_preconditioner_benchmarks() { --executor="${EXECUTOR}" --preconditioners="jacobi" \ --jacobi_max_block_size="${bsize}" \ --jacobi_storage="${prec}" \ - --device_id="${DEVICE_ID}" --gpu_time=${GPU_TIMER} \ + --device_id="${DEVICE_ID}" --gpu_timer=${GPU_TIMER} \ <"$1.imd" 2>&1 >"$1" keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd" done diff --git a/benchmark/utils/timer.hpp b/benchmark/utils/timer.hpp index ffd39919337..e9aabb12c0f 100644 --- a/benchmark/utils/timer.hpp +++ b/benchmark/utils/timer.hpp @@ -62,7 +62,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "hip/base/device_guard.hip.hpp" -#endif // HAS_CUDA +#endif // HAS_HIP DEFINE_bool(gpu_timer, false, @@ -79,7 +79,7 @@ class Timer { tic_called_ = true; } - std::size_t toc() + std::int64_t toc() { assert(tic_called_ == true); auto ns = this->toc_impl(); @@ -88,16 +88,18 @@ class Timer { return ns; } - std::size_t get_total_time() { return total_duration_ns_; } + std::int64_t get_total_time() const { return total_duration_ns_; } - std::size_t get_tictoc_num() { return duration_ns_.size(); } + std::int64_t get_num_repetitions() const { return duration_ns_.size(); } - double get_average_time() + double get_average_time() const { return static_cast(this->get_total_time()) / - this->get_tictoc_num(); + this->get_num_repetitions(); } + std::vector get_time_detail() const{ return duration_ns_; } + void clear() { duration_ns_.clear(); @@ -108,7 +110,7 @@ class Timer { Timer() : tic_called_(false), total_duration_ns_(0) {} protected: - void add_record(std::size_t ns) + void add_record(std::int64_t ns) { // add the result; duration_ns_.emplace_back(ns); @@ -117,12 +119,12 @@ class Timer { virtual void tic_impl() = 0; - virtual std::size_t toc_impl() = 0; + virtual std::int64_t toc_impl() = 0; private: - std::vector duration_ns_; + std::vector duration_ns_; bool tic_called_; - std::size_t total_duration_ns_; + std::int64_t total_duration_ns_; }; @@ -138,14 +140,14 @@ class CpuTimer : public Timer { start_ = std::chrono::steady_clock::now(); } - std::size_t toc_impl() override + std::int64_t toc_impl() override { exec_->synchronize(); auto stop = std::chrono::steady_clock::now(); auto duration_time = std::chrono::duration_cast(stop - start_) .count(); - return static_cast(duration_time); + return static_cast(duration_time); } private: @@ -181,7 +183,7 @@ class CudaTimer : public Timer { GKO_ASSERT_NO_CUDA_ERRORS(cudaEventRecord(start_)); } - std::size_t toc_impl() override + std::int64_t toc_impl() override { gko::cuda::device_guard g{id_}; // Currently, gko::CudaExecutor always use default stream. @@ -192,7 +194,7 @@ class CudaTimer : public Timer { // resolution of around 0.5 microseconds GKO_ASSERT_NO_CUDA_ERRORS( cudaEventElapsedTime(&duration_time, start_, stop_)); - return static_cast(duration_time * 1e6); + return static_cast(duration_time * 1e6); } private: @@ -233,7 +235,7 @@ class HipTimer : public Timer { GKO_ASSERT_NO_HIP_ERRORS(hipEventRecord(start_)); } - std::size_t toc_impl() override + std::int64_t toc_impl() override { gko::hip::device_guard g{id_}; // Currently, gko::HipExecutor always use default stream. @@ -244,7 +246,7 @@ class HipTimer : public Timer { // resolution of around 0.5 microseconds GKO_ASSERT_NO_HIP_ERRORS( hipEventElapsedTime(&duration_time, start_, stop_)); - return static_cast(duration_time * 1e6); + return static_cast(duration_time * 1e6); } private: