From 69145fd857e39a4abe108bf0ee9a0dbc0f266dfe Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Tue, 13 Sep 2022 16:23:10 -0600 Subject: [PATCH 1/5] ROTG: adding Givens rotation to our BLAS implementation This provides a basic implementation for Givens rotations but does not include TPLs backend or the ability to run on GPUs. These might be added later if required. --- blas/CMakeLists.txt | 7 + .../KokkosBlas1_rotg_eti_spec_inst.cpp.in | 54 +++ .../KokkosBlas1_rotg_eti_spec_avail.hpp.in | 51 +++ .../KokkosBlas1_rotg_eti_spec_decl.hpp.in | 51 +++ blas/impl/KokkosBlas1_rotg_impl.hpp | 117 +++++++ blas/impl/KokkosBlas1_rotg_spec.hpp | 141 ++++++++ blas/src/KokkosBlas1_rotg.hpp | 70 ++++ blas/tpls/KokkosBlas1_rotg_tpl_spec_avail.hpp | 93 ++++++ blas/tpls/KokkosBlas1_rotg_tpl_spec_decl.hpp | 308 ++++++++++++++++++ blas/unit_test/Test_Blas.hpp | 1 + blas/unit_test/Test_Blas1_rotg.hpp | 75 +++++ 11 files changed, 968 insertions(+) create mode 100644 blas/eti/generated_specializations_cpp/rotg/KokkosBlas1_rotg_eti_spec_inst.cpp.in create mode 100644 blas/eti/generated_specializations_hpp/KokkosBlas1_rotg_eti_spec_avail.hpp.in create mode 100644 blas/eti/generated_specializations_hpp/KokkosBlas1_rotg_eti_spec_decl.hpp.in create mode 100644 blas/impl/KokkosBlas1_rotg_impl.hpp create mode 100644 blas/impl/KokkosBlas1_rotg_spec.hpp create mode 100644 blas/src/KokkosBlas1_rotg.hpp create mode 100644 blas/tpls/KokkosBlas1_rotg_tpl_spec_avail.hpp create mode 100644 blas/tpls/KokkosBlas1_rotg_tpl_spec_decl.hpp create mode 100644 blas/unit_test/Test_Blas1_rotg.hpp diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt index 02be1d20a7..6c27145188 100644 --- a/blas/CMakeLists.txt +++ b/blas/CMakeLists.txt @@ -248,6 +248,13 @@ KOKKOSKERNELS_GENERATE_ETI(Blas1_reciprocal_mv reciprocal TYPE_LISTS FLOATS LAYOUTS DEVICES ) +KOKKOSKERNELS_GENERATE_ETI(Blas1_rotg rotg + COMPONENTS blas + HEADER_LIST ETI_HEADERS + SOURCE_LIST SOURCES + TYPE_LISTS FLOATS +) + KOKKOSKERNELS_GENERATE_ETI(Blas2_gemv gemv COMPONENTS blas HEADER_LIST ETI_HEADERS diff --git a/blas/eti/generated_specializations_cpp/rotg/KokkosBlas1_rotg_eti_spec_inst.cpp.in b/blas/eti/generated_specializations_cpp/rotg/KokkosBlas1_rotg_eti_spec_inst.cpp.in new file mode 100644 index 0000000000..2a6a441a70 --- /dev/null +++ b/blas/eti/generated_specializations_cpp/rotg/KokkosBlas1_rotg_eti_spec_inst.cpp.in @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true +#include "KokkosKernels_config.h" +#include "KokkosBlas1_rotg_spec.hpp" + +namespace KokkosBlas { +namespace Impl { +@BLAS1_ROTG_ETI_INST_BLOCK@ +} //IMPL +} //Kokkos diff --git a/blas/eti/generated_specializations_hpp/KokkosBlas1_rotg_eti_spec_avail.hpp.in b/blas/eti/generated_specializations_hpp/KokkosBlas1_rotg_eti_spec_avail.hpp.in new file mode 100644 index 0000000000..6a451d6afe --- /dev/null +++ b/blas/eti/generated_specializations_hpp/KokkosBlas1_rotg_eti_spec_avail.hpp.in @@ -0,0 +1,51 @@ +/* +//@HEADER +// ************************************************************************ +// +// KokkosKernels 0.9: Linear Algebra and Graph Kernels +// Copyright 2017 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef KOKKOSBLAS1_ROTG_ETI_SPEC_AVAIL_HPP_ +#define KOKKOSBLAS1_ROTG_ETI_SPEC_AVAIL_HPP_ + +namespace KokkosBlas { +namespace Impl { +@BLAS1_ROTG_ETI_AVAIL_BLOCK@ + } //IMPL +} //Kokkos +#endif diff --git a/blas/eti/generated_specializations_hpp/KokkosBlas1_rotg_eti_spec_decl.hpp.in b/blas/eti/generated_specializations_hpp/KokkosBlas1_rotg_eti_spec_decl.hpp.in new file mode 100644 index 0000000000..cb59ead97e --- /dev/null +++ b/blas/eti/generated_specializations_hpp/KokkosBlas1_rotg_eti_spec_decl.hpp.in @@ -0,0 +1,51 @@ +/* +//@HEADER +// ************************************************************************ +// +// KokkosKernels 0.9: Linear Algebra and Graph Kernels +// Copyright 2017 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef KOKKOSBLAS1_ROTG_ETI_SPEC_DECL_HPP_ +#define KOKKOSBLAS1_ROTG_ETI_SPEC_DECL_HPP_ + +namespace KokkosBlas { +namespace Impl { +@BLAS1_ROTG_ETI_DECL_BLOCK@ + } //IMPL +} //Kokkos +#endif diff --git a/blas/impl/KokkosBlas1_rotg_impl.hpp b/blas/impl/KokkosBlas1_rotg_impl.hpp new file mode 100644 index 0000000000..dad4fdb3ec --- /dev/null +++ b/blas/impl/KokkosBlas1_rotg_impl.hpp @@ -0,0 +1,117 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef KOKKOSBLAS1_ROTG_IMPL_HPP_ +#define KOKKOSBLAS1_ROTG_IMPL_HPP_ + +#include +#include +#include + +namespace KokkosBlas { +namespace Impl { + +/// \brief Compute Givens rotation coefficients. +template ::is_complex, + bool>::type = true> +void Rotg_Invoke(Scalar& a, Scalar& b, Scalar& c, Scalar& s) { + const Scalar one = Kokkos::ArithTraits::one(); + const Scalar zero = Kokkos::ArithTraits::zero(); + + const Scalar numerical_scaling = Kokkos::abs(a) + Kokkos::abs(b); + if (numerical_scaling == zero) { + c = one; + s = zero; + a = zero; + b = zero; + } else { + const Scalar scaled_a = a / numerical_scaling; + const Scalar scaled_b = b / numerical_scaling; + Scalar norm = Kokkos::sqrt(scaled_a * scaled_a + scaled_b * scaled_b) * + numerical_scaling; + Scalar sign = Kokkos::abs(a) > Kokkos::abs(b) ? a : b; + norm = Kokkos::copysign(norm, sign); + c = a / norm; + s = b / norm; + + Scalar z = one; + if (Kokkos::abs(a) > Kokkos::abs(b)) { + z = s; + } + if ((Kokkos::abs(b) >= Kokkos::abs(a)) && (c != zero)) { + z = one / c; + } + a = norm; + b = z; + } +} + +template ::is_complex, + bool>::type = true> +void Rotg_Invoke(Scalar& a, Scalar& b, Scalar& c, Scalar& s) { + const Scalar one = Kokkos::ArithTraits::one(); + const Scalar zero = Kokkos::ArithTraits::zero(); + + const Scalar numerical_scaling = Kokkos::abs(a) + Kokkos::abs(b); + if (Kokkos::abs(a) == zero) { + c = zero; + s = one; + a = b; + } else { + const Scalar scaled_a = Kokkos::abs(a / numerical_scaling); + const Scalar scaled_b = Kokkos::abs(b / numerical_scaling); + Scalar norm = Kokkos::sqrt(scaled_a * scaled_a + scaled_b * scaled_b) * + numerical_scaling; + Scalar unit_a = a / Kokkos::abs(a); + c = Kokkos::abs(a) / norm; + s = unit_a * Kokkos::conj(b) / norm; + a = unit_a * norm; + } +} + +} // namespace Impl +} // namespace KokkosBlas + +#endif // KOKKOSBLAS1_ROTG_IMPL_HPP_ diff --git a/blas/impl/KokkosBlas1_rotg_spec.hpp b/blas/impl/KokkosBlas1_rotg_spec.hpp new file mode 100644 index 0000000000..7a41a070dc --- /dev/null +++ b/blas/impl/KokkosBlas1_rotg_spec.hpp @@ -0,0 +1,141 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef KOKKOSBLAS1_ROTG_SPEC_HPP_ +#define KOKKOSBLAS1_ROTG_SPEC_HPP_ + +#include +#include +#include + +// Include the actual functors +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#include +#endif + +namespace KokkosBlas { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct rotg_eti_spec_avail { + enum : bool { value = false }; +}; +} // namespace Impl +} // namespace KokkosBlas + +// +// Macro for declaration of full specialization availability +// KokkosBlas::Impl::Rotg. This is NOT for users!!! All +// the declarations of full specializations go in this header file. +// We may spread out definitions (see _INST macro below) across one or +// more .cpp files. +// +#define KOKKOSBLAS1_ROTG_ETI_SPEC_AVAIL(SCALAR) \ + template <> \ + struct rotg_eti_spec_avail { \ + enum : bool { value = true }; \ + }; + +// Include the actual specialization declarations +#include +#include + +namespace KokkosBlas { +namespace Impl { + +// Unification layer +template ::value, + bool eti_spec_avail = rotg_eti_spec_avail::value> +struct Rotg { + static void rotg(Scalar& a, Scalar& b, Scalar& c, Scalar& s); +}; + +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +//! Full specialization of Rotg. +template +struct Rotg { + static void rotg(Scalar& a, Scalar& b, Scalar& c, Scalar& s) { + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY + ? "KokkosBlas::rotg[ETI]" + : "KokkosBlas::rotg[noETI]"); +#ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION + if (KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) + printf("KokkosBlas1::rotg<> ETI specialization for < %s >\n", + typeid(Scalar).name()); + else { + printf("KokkosBlas1::rotg<> non-ETI specialization for < %s >\n", + typeid(Scalar).name()); + } +#endif + Rotg_Invoke(a, b, c, s); + Kokkos::Profiling::popRegion(); + } +}; +#endif + +} // namespace Impl +} // namespace KokkosBlas + +// +// Macro for declaration of full specialization of +// KokkosBlas::Impl::Rotg. This is NOT for users!!! All +// the declarations of full specializations go in this header file. +// We may spread out definitions (see _DEF macro below) across one or +// more .cpp files. +// +#define KOKKOSBLAS1_ROTG_ETI_SPEC_DECL(SCALAR) \ + extern template struct Rotg; + +// +// Macro for definition of full specialization of +// KokkosBlas::Impl::Rotg. This is NOT for users!!! We +// use this macro in one or more .cpp files in this directory. +// +#define KOKKOSBLAS1_ROTG_ETI_SPEC_INST(SCALAR) \ + template struct Rotg; + +#include +#include + +#endif // KOKKOSBLAS1_ROTG_SPEC_HPP_ diff --git a/blas/src/KokkosBlas1_rotg.hpp b/blas/src/KokkosBlas1_rotg.hpp new file mode 100644 index 0000000000..783b823247 --- /dev/null +++ b/blas/src/KokkosBlas1_rotg.hpp @@ -0,0 +1,70 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSBLAS1_ROTG_HPP_ +#define KOKKOSBLAS1_ROTG_HPP_ + +#include +#include + +namespace KokkosBlas { + +/// \brief Compute the coefficient to apply a Givens rotation. +/// +/// \tparam Scalar data type of inputs and outputs +/// +/// \param a [in/out] on input one of the values to rotate, on output the +/// rotated value \param b [in/out] on input one of the values to rotate, on +/// output the rotated value \param c [out] cosine value associated with the +/// rotation \param s [out] sine value associated with the rotation +template +void rotg(Scalar& a, Scalar& b, Scalar& c, Scalar& s) { + Kokkos::Profiling::pushRegion("KokkosBlas::rotg"); + Impl::Rotg::rotg(a, b, c, s); + Kokkos::Profiling::popRegion(); +} + +} // namespace KokkosBlas + +#endif // KOKKOSBLAS1_ROTG_HPP_ diff --git a/blas/tpls/KokkosBlas1_rotg_tpl_spec_avail.hpp b/blas/tpls/KokkosBlas1_rotg_tpl_spec_avail.hpp new file mode 100644 index 0000000000..1b0bb898f3 --- /dev/null +++ b/blas/tpls/KokkosBlas1_rotg_tpl_spec_avail.hpp @@ -0,0 +1,93 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_HPP_ +#define KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_HPP_ + +namespace KokkosBlas { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct rotg_tpl_spec_avail { + enum : bool { value = false }; +}; +} // namespace Impl +} // namespace KokkosBlas + +namespace KokkosBlas { +namespace Impl { + +// // Generic Host side BLAS (could be MKL or whatever) +// #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS +// // double +// #define KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(SCALAR) \ +// struct rotg_tpl_spec_avail { \ +// enum : bool { value = true }; \ +// }; + +// KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(double) +// KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(float) +// KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex) +// KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex) + +// #endif + +// // cuBLAS +// #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS +// // double +// #define KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(SCALAR) \ +// struct nrm1_tpl_spec_avail { \ +// enum : bool { value = true }; \ +// }; + +// KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(double) +// KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(float) +// KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex) +// KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex) + +// #endif + +} // namespace Impl +} // namespace KokkosBlas +#endif diff --git a/blas/tpls/KokkosBlas1_rotg_tpl_spec_decl.hpp b/blas/tpls/KokkosBlas1_rotg_tpl_spec_decl.hpp new file mode 100644 index 0000000000..724ccfd93d --- /dev/null +++ b/blas/tpls/KokkosBlas1_rotg_tpl_spec_decl.hpp @@ -0,0 +1,308 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSBLAS1_ROTG_TPL_SPEC_DECL_HPP_ +#define KOKKOSBLAS1_ROTG_TPL_SPEC_DECL_HPP_ + +namespace KokkosBlas { +namespace Impl { + +namespace { +template +inline void rotg_print_specialization() { +#ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION + printf("KokkosBlas1::rotg<> TPL Blas specialization for < %s >\n", + typeid(Scalar).name()); +#endif +} +} // namespace +} // namespace Impl +} // namespace KokkosBlas + +// Generic Host side BLAS (could be MKL or whatever) +#ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS +#include "KokkosBlas_Host_tpl.hpp" + +namespace KokkosBlas { +namespace Impl { + +// #define KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(ETI_SPEC_AVAIL) \ +// struct Rotg { \ +// \ +// static void rotg(double& a, double& b, double& c, double& s) { \ +// Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_BLAS,double]"); \ +// Kokkos::Profiling::popRegion(); \ +// } \ +// }; + +// #define KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(ETI_SPEC_AVAIL) \ +// struct Rotg { \ +// \ +// static void rotg(float& a, float& b, float& c, float& s) { \ +// Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_BLAS,float]"); \ +// Kokkos::Profiling::popRegion(); \ +// } \ +// }; + +// #define KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(ETI_SPEC_AVAIL) \ +// struct Rotg { \ +// \ +// static void rotg(Kokkos::complex& a, \ +// Kokkos::complex& b, \ +// Kokkos::complex& c, Kokkos::complex& s) { \ +// Kokkos::Profiling::pushRegion( \ +// "KokkosBlas::rotg[TPL_BLAS,complex]"); \ +// Kokkos::Profiling::popRegion(); \ +// } \ +// }; + +// #define KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_BLAS(ETI_SPEC_AVAIL) \ +// struct Rotg, true, ETI_SPEC_AVAIL> { \ +// \ +// static void rotg(Kokkos::complex& a, \ +// Kokkos::complex& b, \ +// Kokkos::complex& c, \ +// Kokkos::complex& s) { \ +// Kokkos::Profiling::pushRegion( \ +// "KokkosBlas::rotg[TPL_BLAS,complex]"); \ +// Kokkos::Profiling::popRegion(); \ +// } \ +// }; + +// KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_BLAS(true) +// KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_BLAS(false) + +// KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_BLAS(true) +// KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_BLAS(false) + +// KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_BLAS(true) +// KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_BLAS(false) + +// KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_BLAS(true) +// KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_BLAS(false) + +} // namespace Impl +} // namespace KokkosBlas + +#endif + +// cuBLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS +#include + +namespace KokkosBlas { +namespace Impl { + +#define KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_CUBLAS(LAYOUT, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct Nrm1< \ + Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View > \ + RV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef typename XV::size_type size_type; \ + \ + static void nrm1(RV& R, const XV& X) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_CUBLAS,double]"); \ + const size_type numElems = X.extent(0); \ + if (numElems < static_cast(INT_MAX)) { \ + nrm1_print_specialization(); \ + const int N = static_cast(numElems); \ + constexpr int one = 1; \ + KokkosBlas::Impl::CudaBlasSingleton& s = \ + KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasDasum(s.handle, N, X.data(), one, R.data()); \ + } else { \ + Nrm1::nrm1(R, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_CUBLAS(LAYOUT, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct Nrm1< \ + Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View > \ + RV; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef typename XV::size_type size_type; \ + \ + static void nrm1(RV& R, const XV& X) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_CUBLAS,float]"); \ + const size_type numElems = X.extent(0); \ + if (numElems < static_cast(INT_MAX)) { \ + nrm1_print_specialization(); \ + const int N = static_cast(numElems); \ + constexpr int one = 1; \ + KokkosBlas::Impl::CudaBlasSingleton& s = \ + KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasSasum(s.handle, N, X.data(), one, R.data()); \ + } else { \ + Nrm1::nrm1(R, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_CUBLAS(LAYOUT, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct Nrm1 >, \ + Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View > \ + RV; \ + typedef Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef typename XV::size_type size_type; \ + \ + static void nrm1(RV& R, const XV& X) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::nrm1[TPL_CUBLAS,complex]"); \ + const size_type numElems = X.extent(0); \ + if (numElems < static_cast(INT_MAX)) { \ + nrm1_print_specialization(); \ + const int N = static_cast(numElems); \ + constexpr int one = 1; \ + KokkosBlas::Impl::CudaBlasSingleton& s = \ + KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasDzasum(s.handle, N, \ + reinterpret_cast(X.data()), one, \ + R.data()); \ + } else { \ + Nrm1::nrm1(R, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_CUBLAS(LAYOUT, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct Nrm1 >, \ + Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + 1, true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::View > \ + RV; \ + typedef Kokkos::View*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits > \ + XV; \ + typedef typename XV::size_type size_type; \ + \ + static void nrm1(RV& R, const XV& X) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::nrm1[TPL_CUBLAS,complex]"); \ + const size_type numElems = X.extent(0); \ + if (numElems < static_cast(INT_MAX)) { \ + nrm1_print_specialization(); \ + const int N = static_cast(numElems); \ + constexpr int one = 1; \ + KokkosBlas::Impl::CudaBlasSingleton& s = \ + KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasScasum(s.handle, N, \ + reinterpret_cast(X.data()), one, \ + R.data()); \ + } else { \ + Nrm1::nrm1(R, X); \ + } \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, + true) +KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, + false) + +KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, + true) +KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, + false) + +KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, + true) +KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, + false) + +KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, + true) +KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, + false) + +} // namespace Impl +} // namespace KokkosBlas + +#endif + +#endif diff --git a/blas/unit_test/Test_Blas.hpp b/blas/unit_test/Test_Blas.hpp index b6d4f88314..b794d74bde 100644 --- a/blas/unit_test/Test_Blas.hpp +++ b/blas/unit_test/Test_Blas.hpp @@ -19,6 +19,7 @@ #include "Test_Blas1_nrm2w.hpp" #include "Test_Blas1_nrminf.hpp" #include "Test_Blas1_reciprocal.hpp" +#include "Test_Blas1_rotg.hpp" #include "Test_Blas1_scal.hpp" #include "Test_Blas1_sum.hpp" #include "Test_Blas1_update.hpp" diff --git a/blas/unit_test/Test_Blas1_rotg.hpp b/blas/unit_test/Test_Blas1_rotg.hpp new file mode 100644 index 0000000000..0be1a96f41 --- /dev/null +++ b/blas/unit_test/Test_Blas1_rotg.hpp @@ -0,0 +1,75 @@ +#include + +namespace Test { +template +void test_rotg_impl(const Scalar a_in, const Scalar b_in) { + using magnitude_type = typename Kokkos::ArithTraits::mag_type; + const magnitude_type eps = Kokkos::ArithTraits::eps(); + const Scalar zero = Kokkos::ArithTraits::zero(); + + // Initialize inputs/outputs + Scalar a = a_in; + Scalar b = b_in; + Scalar c = zero, s = zero; + + KokkosBlas::rotg(a, b, c, s); + + // Check that a*c - b*s == 0 + // and a == sqrt(a*a + b*b) + EXPECT_NEAR_KK(a_in * s - b_in * c, zero, 10 * eps); + EXPECT_NEAR_KK(Kokkos::sqrt(a_in * a_in + b_in * b_in), a, 10 * eps); +} +} // namespace Test + +template +int test_rotg() { + const Scalar zero = Kokkos::ArithTraits::zero(); + const Scalar one = Kokkos::ArithTraits::one(); + const Scalar two = one + one; + + Test::test_rotg_impl(one, zero); + Test::test_rotg_impl(one / two, one / two); + Test::test_rotg_impl(2.1 * one, 1.3 * one); + + return 1; +} + +#if defined(KOKKOSKERNELS_INST_FLOAT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, rotg_float) { + Kokkos::Profiling::pushRegion("KokkosBlas::Test::rotg"); + test_rotg(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, rotg_double) { + Kokkos::Profiling::pushRegion("KokkosBlas::Test::rotg"); + test_rotg(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, rotg_complex_float) { + Kokkos::Profiling::pushRegion("KokkosBlas::Test::rotg"); + test_rotg, TestExecSpace>(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, rotg_complex_double) { + Kokkos::Profiling::pushRegion("KokkosBlas::Test::rotg"); + test_rotg, TestExecSpace>(); + Kokkos::Profiling::popRegion(); +} +#endif From c331650caf0a1e76c90fc6c19510d5f6bb64e93f Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Wed, 14 Sep 2022 10:07:13 -0600 Subject: [PATCH 2/5] ROTG: adding support for TPLs Adding support for host BLAS, cuBLAS and rocBLAS --- blas/impl/KokkosBlas1_rotg_impl.hpp | 18 +- blas/impl/KokkosBlas1_rotg_spec.hpp | 8 +- blas/src/KokkosBlas1_rotg.hpp | 3 +- blas/tpls/KokkosBlas1_rotg_tpl_spec_avail.hpp | 60 ++- blas/tpls/KokkosBlas1_rotg_tpl_spec_decl.hpp | 423 +++++++++--------- blas/tpls/KokkosBlas_Host_tpl.cpp | 37 ++ blas/tpls/KokkosBlas_Host_tpl.hpp | 2 + 7 files changed, 306 insertions(+), 245 deletions(-) diff --git a/blas/impl/KokkosBlas1_rotg_impl.hpp b/blas/impl/KokkosBlas1_rotg_impl.hpp index dad4fdb3ec..ea93da605e 100644 --- a/blas/impl/KokkosBlas1_rotg_impl.hpp +++ b/blas/impl/KokkosBlas1_rotg_impl.hpp @@ -90,20 +90,24 @@ void Rotg_Invoke(Scalar& a, Scalar& b, Scalar& c, Scalar& s) { template ::is_complex, bool>::type = true> -void Rotg_Invoke(Scalar& a, Scalar& b, Scalar& c, Scalar& s) { - const Scalar one = Kokkos::ArithTraits::one(); - const Scalar zero = Kokkos::ArithTraits::zero(); +void Rotg_Invoke(Scalar& a, Scalar& b, + typename Kokkos::ArithTraits::mag_type& c, Scalar& s) { + using mag_type = typename Kokkos::ArithTraits::mag_type; - const Scalar numerical_scaling = Kokkos::abs(a) + Kokkos::abs(b); + const Scalar one = Kokkos::ArithTraits::one(); + const Scalar zero = Kokkos::ArithTraits::zero(); + const mag_type mag_zero = Kokkos::ArithTraits::zero(); + + const mag_type numerical_scaling = Kokkos::abs(a) + Kokkos::abs(b); if (Kokkos::abs(a) == zero) { - c = zero; + c = mag_zero; s = one; a = b; } else { const Scalar scaled_a = Kokkos::abs(a / numerical_scaling); const Scalar scaled_b = Kokkos::abs(b / numerical_scaling); - Scalar norm = Kokkos::sqrt(scaled_a * scaled_a + scaled_b * scaled_b) * - numerical_scaling; + mag_type norm = Kokkos::sqrt(scaled_a * scaled_a + scaled_b * scaled_b) * + numerical_scaling; Scalar unit_a = a / Kokkos::abs(a); c = Kokkos::abs(a) / norm; s = unit_a * Kokkos::conj(b) / norm; diff --git a/blas/impl/KokkosBlas1_rotg_spec.hpp b/blas/impl/KokkosBlas1_rotg_spec.hpp index 7a41a070dc..1b13095eea 100644 --- a/blas/impl/KokkosBlas1_rotg_spec.hpp +++ b/blas/impl/KokkosBlas1_rotg_spec.hpp @@ -88,14 +88,18 @@ template ::value, bool eti_spec_avail = rotg_eti_spec_avail::value> struct Rotg { - static void rotg(Scalar& a, Scalar& b, Scalar& c, Scalar& s); + static void rotg(Scalar& a, Scalar& b, + typename Kokkos::ArithTraits::mag_type& c, + Scalar& s); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of Rotg. template struct Rotg { - static void rotg(Scalar& a, Scalar& b, Scalar& c, Scalar& s) { + static void rotg(Scalar& a, Scalar& b, + typename Kokkos::ArithTraits::mag_type& c, + Scalar& s) { Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::rotg[ETI]" : "KokkosBlas::rotg[noETI]"); diff --git a/blas/src/KokkosBlas1_rotg.hpp b/blas/src/KokkosBlas1_rotg.hpp index 783b823247..3e030b9e87 100644 --- a/blas/src/KokkosBlas1_rotg.hpp +++ b/blas/src/KokkosBlas1_rotg.hpp @@ -59,7 +59,8 @@ namespace KokkosBlas { /// output the rotated value \param c [out] cosine value associated with the /// rotation \param s [out] sine value associated with the rotation template -void rotg(Scalar& a, Scalar& b, Scalar& c, Scalar& s) { +void rotg(Scalar& a, Scalar& b, + typename Kokkos::ArithTraits::mag_type& c, Scalar& s) { Kokkos::Profiling::pushRegion("KokkosBlas::rotg"); Impl::Rotg::rotg(a, b, c, s); Kokkos::Profiling::popRegion(); diff --git a/blas/tpls/KokkosBlas1_rotg_tpl_spec_avail.hpp b/blas/tpls/KokkosBlas1_rotg_tpl_spec_avail.hpp index 1b0bb898f3..b3d762887f 100644 --- a/blas/tpls/KokkosBlas1_rotg_tpl_spec_avail.hpp +++ b/blas/tpls/KokkosBlas1_rotg_tpl_spec_avail.hpp @@ -58,35 +58,47 @@ struct rotg_tpl_spec_avail { namespace KokkosBlas { namespace Impl { -// // Generic Host side BLAS (could be MKL or whatever) -// #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS -// // double -// #define KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(SCALAR) \ -// struct rotg_tpl_spec_avail { \ -// enum : bool { value = true }; \ -// }; +// Generic Host side BLAS (could be MKL or whatever) +#ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS +#define KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(SCALAR) \ + template <> \ + struct rotg_tpl_spec_avail { \ + enum : bool { value = true }; \ + }; -// KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(double) -// KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(float) -// KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex) -// KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(double) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(float) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_BLAS(Kokkos::complex) +#endif -// #endif +// cuBLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS +#define KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(SCALAR) \ + template <> \ + struct rotg_tpl_spec_avail { \ + enum : bool { value = true }; \ + }; -// // cuBLAS -// #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS -// // double -// #define KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(SCALAR) \ -// struct nrm1_tpl_spec_avail { \ -// enum : bool { value = true }; \ -// }; +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(double) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(float) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex) +#endif -// KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(double) -// KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(float) -// KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex) -// KOKKOSBLAS1_NRM1_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex) +// rocBLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS +#define KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(SCALAR) \ + template <> \ + struct rotg_tpl_spec_avail { \ + enum : bool { value = true }; \ + }; -// #endif +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(double) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(float) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex) +KOKKOSBLAS1_ROTG_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex) +#endif } // namespace Impl } // namespace KokkosBlas diff --git a/blas/tpls/KokkosBlas1_rotg_tpl_spec_decl.hpp b/blas/tpls/KokkosBlas1_rotg_tpl_spec_decl.hpp index 724ccfd93d..f1002c42a3 100644 --- a/blas/tpls/KokkosBlas1_rotg_tpl_spec_decl.hpp +++ b/blas/tpls/KokkosBlas1_rotg_tpl_spec_decl.hpp @@ -67,60 +67,67 @@ inline void rotg_print_specialization() { namespace KokkosBlas { namespace Impl { -// #define KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(ETI_SPEC_AVAIL) \ -// struct Rotg { \ -// \ -// static void rotg(double& a, double& b, double& c, double& s) { \ -// Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_BLAS,double]"); \ -// Kokkos::Profiling::popRegion(); \ -// } \ -// }; - -// #define KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(ETI_SPEC_AVAIL) \ -// struct Rotg { \ -// \ -// static void rotg(float& a, float& b, float& c, float& s) { \ -// Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_BLAS,float]"); \ -// Kokkos::Profiling::popRegion(); \ -// } \ -// }; - -// #define KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(ETI_SPEC_AVAIL) \ -// struct Rotg { \ -// \ -// static void rotg(Kokkos::complex& a, \ -// Kokkos::complex& b, \ -// Kokkos::complex& c, Kokkos::complex& s) { \ -// Kokkos::Profiling::pushRegion( \ -// "KokkosBlas::rotg[TPL_BLAS,complex]"); \ -// Kokkos::Profiling::popRegion(); \ -// } \ -// }; - -// #define KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_BLAS(ETI_SPEC_AVAIL) \ -// struct Rotg, true, ETI_SPEC_AVAIL> { \ -// \ -// static void rotg(Kokkos::complex& a, \ -// Kokkos::complex& b, \ -// Kokkos::complex& c, \ -// Kokkos::complex& s) { \ -// Kokkos::Profiling::pushRegion( \ -// "KokkosBlas::rotg[TPL_BLAS,complex]"); \ -// Kokkos::Profiling::popRegion(); \ -// } \ -// }; - -// KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_BLAS(true) -// KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_BLAS(false) - -// KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_BLAS(true) -// KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_BLAS(false) - -// KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_BLAS(true) -// KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_BLAS(false) - -// KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_BLAS(true) -// KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_BLAS(false) +#define KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg { \ + static void rotg(double& a, double& b, double& c, double& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_BLAS,double]"); \ + HostBlas::rotg(&a, &b, &c, &s); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg { \ + static void rotg(float& a, float& b, float& c, float& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rotg[TPL_BLAS,float]"); \ + HostBlas::rotg(&a, &b, &c, &s); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg, true, ETI_SPEC_AVAIL> { \ + static void rotg(Kokkos::complex& a, Kokkos::complex& b, \ + double& c, Kokkos::complex& s) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::rotg[TPL_BLAS,complex]"); \ + HostBlas >::rotg( \ + reinterpret_cast*>(&a), \ + reinterpret_cast*>(&b), &c, \ + reinterpret_cast*>(&s)); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg, true, ETI_SPEC_AVAIL> { \ + static void rotg(Kokkos::complex& a, Kokkos::complex& b, \ + float& c, Kokkos::complex& s) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::rotg[TPL_BLAS,complex]"); \ + HostBlas >::rotg( \ + reinterpret_cast*>(&a), \ + reinterpret_cast*>(&b), &c, \ + reinterpret_cast*>(&s)); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(true) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_BLAS(false) + +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(true) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_BLAS(false) + +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(true) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_BLAS(false) + +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(true) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_BLAS(false) } // namespace Impl } // namespace KokkosBlas @@ -134,171 +141,165 @@ namespace Impl { namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_CUBLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct Nrm1< \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrm1(RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_CUBLAS,double]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - nrm1_print_specialization(); \ - const int N = static_cast(numElems); \ - constexpr int one = 1; \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - cublasDasum(s.handle, N, X.data(), one, R.data()); \ - } else { \ - Nrm1::nrm1(R, X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg { \ + static void rotg(double& a, double& b, double& c, double& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_CUBLAS,double]"); \ + rotg_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = \ + KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasDrotg(singleton.handle, &a, &b, &c, &s); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg { \ + static void rotg(float& a, float& b, float& c, float& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_CUBLAS,float]"); \ + rotg_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = \ + KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasSrotg(singleton.handle, &a, &b, &c, &s); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg, true, ETI_SPEC_AVAIL> { \ + static void rotg(Kokkos::complex& a, Kokkos::complex& b, \ + double& c, Kokkos::complex& s) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::nrm1[TPL_CUBLAS,complex]"); \ + rotg_print_specialization >(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = \ + KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasZrotg(singleton.handle, reinterpret_cast(&a), \ + reinterpret_cast(&b), &c, \ + reinterpret_cast(&s)); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg, true, ETI_SPEC_AVAIL> { \ + static void rotg(Kokkos::complex& a, Kokkos::complex& b, \ + float& c, Kokkos::complex& s) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::nrm1[TPL_CUBLAS,complex]"); \ + rotg_print_specialization >(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = \ + KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + cublasCrotg(singleton.handle, reinterpret_cast(&a), \ + reinterpret_cast(&b), &c, \ + reinterpret_cast(&s)); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(true) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_CUBLAS(false) + +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(true) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_CUBLAS(false) + +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(true) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_CUBLAS(false) + +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(true) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_CUBLAS(false) + +} // namespace Impl +} // namespace KokkosBlas + +#endif + +// rocBLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS +#include + +namespace KokkosBlas { +namespace Impl { + +#define KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_ROCBLAS(ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg { \ + static void rotg(double& a, double& b, double& c, double& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_ROCBLAS,double]"); \ + rotg_print_specialization(); \ + KokkosBlas::Impl::RocBlasSingleton& singleton = \ + KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ + rocblas_drotg(singleton.handle, &a, &b, &c, &s)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_CUBLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct Nrm1< \ - Kokkos::View >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrm1(RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_CUBLAS,float]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - nrm1_print_specialization(); \ - const int N = static_cast(numElems); \ - constexpr int one = 1; \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - cublasSasum(s.handle, N, X.data(), one, R.data()); \ - } else { \ - Nrm1::nrm1(R, X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_ROCBLAS(ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg { \ + static void rotg(float& a, float& b, float& c, float& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::nrm1[TPL_ROCBLAS,float]"); \ + rotg_print_specialization(); \ + KokkosBlas::Impl::RocBlasSingleton& singleton = \ + KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( \ + rocblas_srotg(singleton.handle, &a, &b, &c, &s)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_CUBLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct Nrm1 >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrm1(RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::nrm1[TPL_CUBLAS,complex]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - nrm1_print_specialization(); \ - const int N = static_cast(numElems); \ - constexpr int one = 1; \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - cublasDzasum(s.handle, N, \ - reinterpret_cast(X.data()), one, \ - R.data()); \ - } else { \ - Nrm1::nrm1(R, X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_ROCBLAS(ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg, true, ETI_SPEC_AVAIL> { \ + static void rotg(Kokkos::complex& a, Kokkos::complex& b, \ + double& c, Kokkos::complex& s) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::nrm1[TPL_ROCBLAS,complex]"); \ + rotg_print_specialization >(); \ + KokkosBlas::Impl::RocBlasSingleton& singleton = \ + KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_zrotg( \ + singleton.handle, reinterpret_cast(&a), \ + reinterpret_cast(&b), &c, \ + reinterpret_cast(&s))); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_CUBLAS(LAYOUT, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct Nrm1 >, \ - Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - 1, true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::View > \ - RV; \ - typedef Kokkos::View*, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits > \ - XV; \ - typedef typename XV::size_type size_type; \ - \ - static void nrm1(RV& R, const XV& X) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::nrm1[TPL_CUBLAS,complex]"); \ - const size_type numElems = X.extent(0); \ - if (numElems < static_cast(INT_MAX)) { \ - nrm1_print_specialization(); \ - const int N = static_cast(numElems); \ - constexpr int one = 1; \ - KokkosBlas::Impl::CudaBlasSingleton& s = \ - KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - cublasScasum(s.handle, N, \ - reinterpret_cast(X.data()), one, \ - R.data()); \ - } else { \ - Nrm1::nrm1(R, X); \ - } \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_ROCBLAS(ETI_SPEC_AVAIL) \ + template <> \ + struct Rotg, true, ETI_SPEC_AVAIL> { \ + static void rotg(Kokkos::complex& a, Kokkos::complex& b, \ + float& c, Kokkos::complex& s) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::nrm1[TPL_ROCBLAS,complex]"); \ + rotg_print_specialization >(); \ + KokkosBlas::Impl::RocBlasSingleton& singleton = \ + KokkosBlas::Impl::RocBlasSingleton::singleton(); \ + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_crotg( \ + singleton.handle, reinterpret_cast(&a), \ + reinterpret_cast(&b), &c, \ + reinterpret_cast(&s))); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, - true) -KOKKOSBLAS1_DNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, - false) - -KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, - true) -KOKKOSBLAS1_SNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, - false) - -KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, - true) -KOKKOSBLAS1_ZNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, - false) - -KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, - true) -KOKKOSBLAS1_CNRM1_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::CudaSpace, - false) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_ROCBLAS(true) +KOKKOSBLAS1_DROTG_TPL_SPEC_DECL_ROCBLAS(false) + +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_ROCBLAS(true) +KOKKOSBLAS1_SROTG_TPL_SPEC_DECL_ROCBLAS(false) + +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_ROCBLAS(true) +KOKKOSBLAS1_ZROTG_TPL_SPEC_DECL_ROCBLAS(false) + +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_ROCBLAS(true) +KOKKOSBLAS1_CROTG_TPL_SPEC_DECL_ROCBLAS(false) } // namespace Impl } // namespace KokkosBlas diff --git a/blas/tpls/KokkosBlas_Host_tpl.cpp b/blas/tpls/KokkosBlas_Host_tpl.cpp index b834e4fcf0..d2e8435fdf 100644 --- a/blas/tpls/KokkosBlas_Host_tpl.cpp +++ b/blas/tpls/KokkosBlas_Host_tpl.cpp @@ -127,6 +127,18 @@ void F77_BLAS_MANGLE(zaxpy, const std::complex* x, const int* x_inc, /* */ std::complex* y, const int* y_inc); +/// +/// rotg +/// +void F77_BLAS_MANGLE(srotg, SROTG)(float* a, float* b, float* c, float* s); +void F77_BLAS_MANGLE(drotg, DROTG)(double* a, double* b, double* c, double* s); +void F77_BLAS_MANGLE(crotg, CROTG)(std::complex* a, + std::complex* b, float* c, + std::complex* s); +void F77_BLAS_MANGLE(zrotg, ZROTG)(std::complex* a, + std::complex* b, double* c, + std::complex* s); + /// /// Gemv /// @@ -339,6 +351,11 @@ void F77_BLAS_MANGLE(zscal, #define F77_FUNC_CAXPY F77_BLAS_MANGLE(caxpy, CAXPY) #define F77_FUNC_ZAXPY F77_BLAS_MANGLE(zaxpy, ZAXPY) +#define F77_FUNC_SROTG F77_BLAS_MANGLE(srotg, SROTG) +#define F77_FUNC_DROTG F77_BLAS_MANGLE(drotg, DROTG) +#define F77_FUNC_CROTG F77_BLAS_MANGLE(crotg, CROTG) +#define F77_FUNC_ZROTG F77_BLAS_MANGLE(zrotg, ZROTG) + #define F77_FUNC_SGEMV F77_BLAS_MANGLE(sgemv, SGEMV) #define F77_FUNC_DGEMV F77_BLAS_MANGLE(dgemv, DGEMV) #define F77_FUNC_CGEMV F77_BLAS_MANGLE(cgemv, CGEMV) @@ -414,6 +431,10 @@ void HostBlas::axpy(int n, const float alpha, const float* x, int x_inc, F77_FUNC_SAXPY(&n, &alpha, x, &x_inc, y, &y_inc); } template <> +void HostBlas::rotg(float* a, float* b, float* c, float* s) { + F77_FUNC_SROTG(a, b, c, s); +} +template <> void HostBlas::gemv(const char trans, int m, int n, const float alpha, const float* a, int lda, const float* b, int ldb, const float beta, @@ -503,6 +524,10 @@ void HostBlas::axpy(int n, const double alpha, const double* x, F77_FUNC_DAXPY(&n, &alpha, x, &x_inc, y, &y_inc); } template <> +void HostBlas::rotg(double* a, double* b, double* c, double* s) { + F77_FUNC_DROTG(a, b, c, s); +} +template <> void HostBlas::gemv(const char trans, int m, int n, const double alpha, const double* a, int lda, const double* b, int ldb, const double beta, @@ -606,6 +631,12 @@ void HostBlas >::axpy(int n, int y_inc) { F77_FUNC_CAXPY(&n, &alpha, x, &x_inc, y, &y_inc); } +template <> +void HostBlas >::rotg(std::complex* a, + std::complex* b, float* c, + std::complex* s) { + F77_FUNC_CROTG(a, b, c, s); +} template <> void HostBlas >::gemv(const char trans, int m, int n, @@ -740,6 +771,12 @@ void HostBlas >::axpy(int n, int y_inc) { F77_FUNC_ZAXPY(&n, &alpha, x, &x_inc, y, &y_inc); } +template <> +void HostBlas >::rotg(std::complex* a, + std::complex* b, double* c, + std::complex* s) { + F77_FUNC_ZROTG(a, b, c, s); +} template <> void HostBlas >::gemv( diff --git a/blas/tpls/KokkosBlas_Host_tpl.hpp b/blas/tpls/KokkosBlas_Host_tpl.hpp index efde4db81a..b598cd1556 100644 --- a/blas/tpls/KokkosBlas_Host_tpl.hpp +++ b/blas/tpls/KokkosBlas_Host_tpl.hpp @@ -76,6 +76,8 @@ struct HostBlas { static void axpy(int n, const T alpha, const T *x, int x_inc, /* */ T *y, int y_inc); + static void rotg(T *a, T *b, mag_type *c, T *s); + static void gemv(const char trans, int m, int n, const T alpha, const T *a, int lda, const T *b, int ldb, const T beta, /* */ T *c, int ldc); From 0f03ff5ee0f3725b55f7df6c0ac3b7f91eea2749 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Wed, 14 Sep 2022 18:12:24 -0600 Subject: [PATCH 3/5] ROTG: fix issue with return type of sqrt Kokkos::sqrt(Kokkos::complex<>) returns a Kokkos::complex<> so in this case we need to take the module of that since we want a magnitude_type. --- blas/impl/KokkosBlas1_rotg_impl.hpp | 5 +++-- blas/unit_test/Test_Blas1_rotg.hpp | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/blas/impl/KokkosBlas1_rotg_impl.hpp b/blas/impl/KokkosBlas1_rotg_impl.hpp index ea93da605e..dcf259f841 100644 --- a/blas/impl/KokkosBlas1_rotg_impl.hpp +++ b/blas/impl/KokkosBlas1_rotg_impl.hpp @@ -106,8 +106,9 @@ void Rotg_Invoke(Scalar& a, Scalar& b, } else { const Scalar scaled_a = Kokkos::abs(a / numerical_scaling); const Scalar scaled_b = Kokkos::abs(b / numerical_scaling); - mag_type norm = Kokkos::sqrt(scaled_a * scaled_a + scaled_b * scaled_b) * - numerical_scaling; + mag_type norm = + Kokkos::abs(Kokkos::sqrt(scaled_a * scaled_a + scaled_b * scaled_b)) * + numerical_scaling; Scalar unit_a = a / Kokkos::abs(a); c = Kokkos::abs(a) / norm; s = unit_a * Kokkos::conj(b) / norm; diff --git a/blas/unit_test/Test_Blas1_rotg.hpp b/blas/unit_test/Test_Blas1_rotg.hpp index 0be1a96f41..bee64ee167 100644 --- a/blas/unit_test/Test_Blas1_rotg.hpp +++ b/blas/unit_test/Test_Blas1_rotg.hpp @@ -10,7 +10,8 @@ void test_rotg_impl(const Scalar a_in, const Scalar b_in) { // Initialize inputs/outputs Scalar a = a_in; Scalar b = b_in; - Scalar c = zero, s = zero; + magnitude_type c = Kokkos::ArithTraits::zero(); + Scalar s = zero; KokkosBlas::rotg(a, b, c, s); From 29a8d821bcad065ec9a61d83b906f082e2f0c32b Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Thu, 15 Sep 2022 09:01:21 -0600 Subject: [PATCH 4/5] ROTG: apply clang-format --- blas/unit_test/Test_Blas1_rotg.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/blas/unit_test/Test_Blas1_rotg.hpp b/blas/unit_test/Test_Blas1_rotg.hpp index bee64ee167..5e54ee671e 100644 --- a/blas/unit_test/Test_Blas1_rotg.hpp +++ b/blas/unit_test/Test_Blas1_rotg.hpp @@ -8,10 +8,10 @@ void test_rotg_impl(const Scalar a_in, const Scalar b_in) { const Scalar zero = Kokkos::ArithTraits::zero(); // Initialize inputs/outputs - Scalar a = a_in; - Scalar b = b_in; + Scalar a = a_in; + Scalar b = b_in; magnitude_type c = Kokkos::ArithTraits::zero(); - Scalar s = zero; + Scalar s = zero; KokkosBlas::rotg(a, b, c, s); From 0b5e4fba27aacb374a3f5e8259702a9f5e030f8a Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Thu, 15 Sep 2022 14:08:43 -0600 Subject: [PATCH 5/5] ROTG: rebase on develop and fix typo in documentation --- blas/src/KokkosBlas1_rotg.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blas/src/KokkosBlas1_rotg.hpp b/blas/src/KokkosBlas1_rotg.hpp index 3e030b9e87..ff349444eb 100644 --- a/blas/src/KokkosBlas1_rotg.hpp +++ b/blas/src/KokkosBlas1_rotg.hpp @@ -50,7 +50,7 @@ namespace KokkosBlas { -/// \brief Compute the coefficient to apply a Givens rotation. +/// \brief Compute the coefficients to apply a Givens rotation. /// /// \tparam Scalar data type of inputs and outputs ///