Skip to content

Commit

Permalink
Review update
Browse files Browse the repository at this point in the history
- fix gpu_timer in script
- use int64_t
- add const to function
- update documentation
- add get_latest_time

Co-authored-by: Aditya Kashi <aditya.kashi@kit.edu>
Co-authored-by: Pratik Nayak <pratikvn@protonmail.com>
Co-authored-by: Terry Cojean <terry.cojean@kit.edu>
Co-authored-by: Thomas Grützmacher <thomas.gruetzmacher@kit.edu>
  • Loading branch information
5 people committed Dec 7, 2020
1 parent d8a3775 commit 3ecb031
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 24 deletions.
2 changes: 2 additions & 0 deletions BENCHMARKING.md
Original file line number Diff line number Diff line change
Expand Up @@ -305,3 +305,5 @@ The supported environment variables are described in the following list:
values as the right-hand side in solver benchmarks. Default is `unit`.
* `DETAILED={0,1}` - selects whether detailed benchmarks should be ran for the
solver benchmarks, can be either `0` (off) or `1` (on). The default is `0`.
* `GPU_TIMER={true, false}` - If set to `true`, use the gpu timer, which is
valid for cuda/hip executor, to measure the timing. Default is `false`.
8 changes: 4 additions & 4 deletions benchmark/run_all_benchmarks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ run_conversion_benchmarks() {
cp "$1" "$1.imd" # make sure we're not loosing the original input
./conversions/conversions --backup="$1.bkp" --double_buffer="$1.bkp2" \
--executor="${EXECUTOR}" --formats="${FORMATS}" \
--device_id="${DEVICE_ID}" --gpu_time=${GPU_TIMER} \
--device_id="${DEVICE_ID}" --gpu_timer=${GPU_TIMER} \
<"$1.imd" 2>&1 >"$1"
keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd"
}
Expand All @@ -165,7 +165,7 @@ run_spmv_benchmarks() {
cp "$1" "$1.imd" # make sure we're not loosing the original input
./spmv/spmv --backup="$1.bkp" --double_buffer="$1.bkp2" \
--executor="${EXECUTOR}" --formats="${FORMATS}" \
--device_id="${DEVICE_ID}" --gpu_time=${GPU_TIMER} \
--device_id="${DEVICE_ID}" --gpu_timer=${GPU_TIMER} \
<"$1.imd" 2>&1 >"$1"
keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd"
}
Expand All @@ -183,7 +183,7 @@ run_solver_benchmarks() {
--executor="${EXECUTOR}" --solvers="${SOLVERS}" \
--preconditioners="${PRECONDS}" \
--max_iters=${SOLVERS_MAX_ITERATIONS} --rel_res_goal=${SOLVERS_PRECISION} \
${SOLVERS_RHS_FLAG} ${DETAILED_STR} --device_id="${DEVICE_ID}" --gpu_time=${GPU_TIMER} \
${SOLVERS_RHS_FLAG} ${DETAILED_STR} --device_id="${DEVICE_ID}" --gpu_timer=${GPU_TIMER} \
<"$1.imd" 2>&1 >"$1"
keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd"
}
Expand All @@ -209,7 +209,7 @@ run_preconditioner_benchmarks() {
--executor="${EXECUTOR}" --preconditioners="jacobi" \
--jacobi_max_block_size="${bsize}" \
--jacobi_storage="${prec}" \
--device_id="${DEVICE_ID}" --gpu_time=${GPU_TIMER} \
--device_id="${DEVICE_ID}" --gpu_timer=${GPU_TIMER} \
<"$1.imd" 2>&1 >"$1"
keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd"
done
Expand Down
143 changes: 123 additions & 20 deletions benchmark/utils/timer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,72 +62,142 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "hip/base/device_guard.hip.hpp"


#endif // HAS_CUDA
#endif // HAS_HIP


// Command-line arguments
DEFINE_bool(gpu_timer, false,
"use gpu timer based on event. It is valid only when "
"executor is cuda or hip");


/**
* Timer stores the timing information
*/
class Timer {
public:
/**
* Starts the timer
*/
void tic()
{
assert(tic_called_ == false);
this->tic_impl();
tic_called_ = true;
}

std::size_t toc()
/**
* Finishs the timer
*/
void toc()
{
assert(tic_called_ == true);
auto ns = this->toc_impl();
tic_called_ = false;
this->add_record(ns);
return ns;
}

std::size_t get_total_time() { return total_duration_ns_; }

std::size_t get_tictoc_num() { return duration_ns_.size(); }

double get_average_time()
/**
* Get the summation of each time in nanoseconds.
*
* @return the nanoseconds of total time
*/
std::int64_t get_total_time() const { return total_duration_ns_; }

/**
* Get the number of repetitions.
*
* @return the number of repetitions
*/
std::int64_t get_num_repetitions() const { return duration_ns_.size(); }

/**
* Get the average time of repetitions in nanoseconds
*
* @return the average time in nanoseconds
*/
double get_average_time() const
{
return static_cast<double>(this->get_total_time()) /
this->get_tictoc_num();
this->get_num_repetitions();
}

/**
* Get the vector containing the each time results in nanoseconds.
*
* @return the vector of each time results in nanoseconds
*/
std::vector<std::int64_t> get_time_detail() const { return duration_ns_; }

/**
* Get the latest result in nanoseconds. If there is not result yet, return 0.
*
* @return the latest result in nanoseconds
*/
std::int64_t get_latest_time() const {
if (duration_ns_.size() >= 1) {
return duration_ns_.back();
} else {
return 0;
}
}

/**
* Clear the results of timer
*/
void clear()
{
duration_ns_.clear();
tic_called_ = false;
total_duration_ns_ = 0;
}

/**
* Create a timer
*/
Timer() : tic_called_(false), total_duration_ns_(0) {}

protected:
void add_record(std::size_t ns)
/**
* Put the nanosecond result into vector
*/
void add_record(std::int64_t ns)
{
// add the result;
duration_ns_.emplace_back(ns);
total_duration_ns_ += ns;
}

/**
* The implementation of tic.
*/
virtual void tic_impl() = 0;

virtual std::size_t toc_impl() = 0;
/**
* The implementation of toc. Return the nanoseconds result.
*
* @return the nanoseconds result
*/
virtual std::int64_t toc_impl() = 0;

private:
std::vector<std::size_t> duration_ns_;
std::vector<std::int64_t> duration_ns_;
bool tic_called_;
std::size_t total_duration_ns_;
std::int64_t total_duration_ns_;
};


/**
* CpuTimer use synchronize of executor and std::chrono to measure the
* timing.
*/
class CpuTimer : public Timer {
public:
/**
* Create a CpuTimer
*
* @param exec Executor associated to the timer
*/
CpuTimer(std::shared_ptr<const gko::Executor> exec) : Timer(), exec_(exec)
{}

Expand All @@ -138,14 +208,14 @@ class CpuTimer : public Timer {
start_ = std::chrono::steady_clock::now();
}

std::size_t toc_impl() override
std::int64_t toc_impl() override
{
exec_->synchronize();
auto stop = std::chrono::steady_clock::now();
auto duration_time =
std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start_)
.count();
return static_cast<std::size_t>(duration_time);
return duration_time;
}

private:
Expand All @@ -157,12 +227,25 @@ class CpuTimer : public Timer {
#ifdef HAS_CUDA


/**
* CudaTimer uses cuda executor and cudaEvent to measure the timing.
*/
class CudaTimer : public Timer {
public:
/**
* Create a CudaTimer.
*
* @param exec Executor which is CudaExecutor indeed
*/
CudaTimer(std::shared_ptr<const gko::Executor> exec)
: CudaTimer(std::dynamic_pointer_cast<const gko::CudaExecutor>(exec))
{}

/**
* Create a CudaTimer.
*
* @param exec CudaExecutor associated to the timer
*/
CudaTimer(std::shared_ptr<const gko::CudaExecutor> exec) : Timer()
{
assert(exec != nullptr);
Expand All @@ -181,7 +264,7 @@ class CudaTimer : public Timer {
GKO_ASSERT_NO_CUDA_ERRORS(cudaEventRecord(start_));
}

std::size_t toc_impl() override
std::int64_t toc_impl() override
{
gko::cuda::device_guard g{id_};
// Currently, gko::CudaExecutor always use default stream.
Expand All @@ -192,7 +275,7 @@ class CudaTimer : public Timer {
// resolution of around 0.5 microseconds
GKO_ASSERT_NO_CUDA_ERRORS(
cudaEventElapsedTime(&duration_time, start_, stop_));
return static_cast<std::size_t>(duration_time * 1e6);
return static_cast<std::int64_t>(duration_time * 1e6);
}

private:
Expand All @@ -209,12 +292,25 @@ class CudaTimer : public Timer {
#ifdef HAS_HIP


/**
* HipTimer uses hip executor and hipEvent to measure the timing.
*/
class HipTimer : public Timer {
public:
/**
* Create a HipTimer.
*
* @param exec Executor which is HipExecutor indeed
*/
HipTimer(std::shared_ptr<const gko::Executor> exec)
: HipTimer(std::dynamic_pointer_cast<const gko::HipExecutor>(exec))
{}

/**
* Create a HipTimer.
*
* @param exec HipExecutor associated to the timer
*/
HipTimer(std::shared_ptr<const gko::HipExecutor> exec) : Timer()
{
assert(exec != nullptr);
Expand All @@ -233,7 +329,7 @@ class HipTimer : public Timer {
GKO_ASSERT_NO_HIP_ERRORS(hipEventRecord(start_));
}

std::size_t toc_impl() override
std::int64_t toc_impl() override
{
gko::hip::device_guard g{id_};
// Currently, gko::HipExecutor always use default stream.
Expand All @@ -244,7 +340,7 @@ class HipTimer : public Timer {
// resolution of around 0.5 microseconds
GKO_ASSERT_NO_HIP_ERRORS(
hipEventElapsedTime(&duration_time, start_, stop_));
return static_cast<std::size_t>(duration_time * 1e6);
return static_cast<std::int64_t>(duration_time * 1e6);
}

private:
Expand All @@ -258,6 +354,13 @@ class HipTimer : public Timer {
#endif // HAS_HIP


/**
* Get the timer. If the executor does not support gpu timer, still return the
* cpu timer.
*
* @param exec Executor associated to the timer
* @param use_gpu_timer whether to use the gpu timer
*/
std::shared_ptr<Timer> get_timer(std::shared_ptr<const gko::Executor> exec,
bool use_gpu_timer)
{
Expand All @@ -276,6 +379,6 @@ std::shared_ptr<Timer> get_timer(std::shared_ptr<const gko::Executor> exec,
}
#endif // HAS_HIP
}
// Not use gpu_timer or not cuda/hip executor
// No cuda/hip executor available or no gpu_timer used
return std::make_shared<CpuTimer>(exec);
}

0 comments on commit 3ecb031

Please sign in to comment.