Skip to content

Commit

Permalink
Merge pull request #1429 from kokkos/half-precision
Browse files Browse the repository at this point in the history
Minor changes for half precision paper
  • Loading branch information
lucbv authored Jun 9, 2022
2 parents 09dfb65 + bae78fb commit 1369cdd
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 3 deletions.
4 changes: 2 additions & 2 deletions perf_test/blas/blas3/KokkosBatched_BatchedGemm_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ export KOKKOS_SRC_DIR=$(realpath $KOKKOS_SRC_DIR)
export KOKKOS_SHA=${KOKKOS_SHA:-"tags/3.6.00"}
export KOKKOSKERNELS_SRC_DIR=${KOKKOSKERNELS_SRC_DIR:-"$HOME/KOKKOS.base/kokkos-kernels"}
export KOKKOSKERNELS_SRC_DIR=$(realpath $KOKKOSKERNELS_SRC_DIR)
export KOKKOSKERNELS_SHA=${KOKKOSKERNELS_SHA:-"half_examples"}
export KOKKOSKERNELS_SHA=${KOKKOSKERNELS_SHA:-"tags/papers/us-rse-escience-2022"}
envprint KOKKOS_SRC_DIR KOKKOS_SHA KOKKOSKERNELS_SRC_DIR KOKKOSKERNELS_SHA

dry_run="off"
Expand Down Expand Up @@ -208,7 +208,7 @@ echo "cd $benchmark_dir" >> $KOKKOSKERNELS_BUILD_DIR/bench.sh
echo "$KOKKOSKERNELS_BUILD_DIR/perf_test/blas/blas3/KokkosBlas3_perf_test \
--test=batched_heuristic --routines=gemm --loop_type=parallel --batch_size_last_dim=0 \
--matrix_size_start=2x2,2x2,2x2 --matrix_size_stop=64x64,64x64,64x64 \
--matrix_size_step=2 --batch_size=1024 \
--matrix_size_step=2 --batch_size=$((32*1024)) \
--warm_up_loop=10 --iter=20 --verify=1 \
${use_simd} \
--csv=${benchmark_dir}/${precision}_bench.csv" \
Expand Down
4 changes: 3 additions & 1 deletion perf_test/blas/blas3/KokkosBlas3_gemm_perf_test.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,9 +263,11 @@ static std::string gemm_csv_header_str =
// Flop count formula from lapack working note 41:
// http://www.icl.utk.edu/~mgates3/docs/lawn41.pdf
static inline double __gemm_flop_count(double a_m, double a_n, double b_n) {
// TODO: if not Kokkos::complex.
if (std::is_same<double, default_scalar>::value ||
std::is_same<float, default_scalar>::value ||
std::is_same<Kokkos::Experimental::half_t, default_scalar>::value)
std::is_same<Kokkos::Experimental::half_t, default_scalar>::value ||
std::is_same<Kokkos::Experimental::bhalf_t, default_scalar>::value)
return 2 * a_m * b_n * a_n;
else
// For complex, we need to count 2 flops for each add and 6 flops for each
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
## To reproduce the half precision results for batched-GEMM:
```bash
git clone https://github.com/kokkos/kokkos.git
git clone https://github.com/kokkos/kokkos-kernels.git
cd kokkos-kernels
git checkout tags/papers/us-rse-escience-2022
cd perf_test/blas/blas3
export KOKKOS_SRC_DIR=/path/to/kokkos
export KOKKOSKERNELS_SRC_DIR=/path/to/kokkos-kernels
```

### On V100
```bash
./KokkosBatched_BatchedGemm_benchmark.sh double SNB VOLTA70
./KokkosBatched_BatchedGemm_benchmark.sh float SNB VOLTA70
./KokkosBatched_BatchedGemm_benchmark.sh half SNB VOLTA70
./KokkosBatched_BatchedGemm_benchmark.sh bhalf SNB VOLTA70
```

### On A100
```bash
./KokkosBatched_BatchedGemm_benchmark.sh double DEFAULT AMPERE80
./KokkosBatched_BatchedGemm_benchmark.sh float DEFAULT AMPERE80
./KokkosBatched_BatchedGemm_benchmark.sh half DEFAULT AMPERE80
./KokkosBatched_BatchedGemm_benchmark.sh bhalf DEFAULT AMPERE80
```

0 comments on commit 1369cdd

Please sign in to comment.