Merge 'trilinos/Trilinos:develop' (a5c6a8b) into 'tcad-charon/Trilino…

…s:develop' (a7632d8). * trilinos-develop: Tpetra,kokkos-kernels: Fix trilinos#6633 Reformulates the driver in a residual form. That is, we invoke Vcycles to correct the existing solution using the residual as the right hand side. We then removes a few matvecs and some setup stages that are not actually needed. In particular, we take advantage of the fact that the initial guess is zero on the presmoothing side of the MG cycle. Also removed the smoother setup for the coarsest region level when we have not requested to smooth on this level. Tpetra: Add debug output to sparse matrix-matrix multiply test Belos: Fix test build failure on Mac Clang (missing header) Tpetra: Fix Scalar=float BCRS test failure on Mac Clang Anasazi: Fix trilinos#6612 Tpetra: Fix trilinos#6611
tcad-charon · Jan 24, 2020 · 2bfd2c7 · 2bfd2c7
2 parents a7632d8 + a5c6a8b
commit 2bfd2c7
Show file tree

Hide file tree

Showing 13 changed files with 266 additions and 64 deletions.
diff --git a/packages/anasazi/src/AnasaziMVOPTester.hpp b/packages/anasazi/src/AnasaziMVOPTester.hpp
@@ -862,8 +862,8 @@ namespace Anasazi {
       std::vector<MagType> normsB1(p), normsB2(p),
                            normsC1(p), normsC2(p),
                            normsD1(p), normsD2(p);
-      ScalarType alpha = 0.5 * SCT::one(),
-                  beta = 0.33 * SCT::one();
+      ScalarType alpha = MagType(0.5) * SCT::one();
+      ScalarType beta = MagType(0.33) * SCT::one();
 
       B = MVT::Clone(*A,p);
       C = MVT::Clone(*A,p);

diff --git a/packages/belos/tpetra/test/BlockGmres/test_bl_gmres_hb_df.cpp b/packages/belos/tpetra/test/BlockGmres/test_bl_gmres_hb_df.cpp
@@ -60,6 +60,8 @@
 #include <Tpetra_CrsMatrix.hpp>
 #include <Teuchos_TypeNameTraits.hpp>
 
+#include <fstream>
+
 using namespace Teuchos;
 using namespace Belos;
 using Tpetra::Operator;

diff --git a/packages/kokkos-kernels/src/Kokkos_InnerProductSpaceTraits.hpp b/packages/kokkos-kernels/src/Kokkos_InnerProductSpaceTraits.hpp
@@ -160,8 +160,8 @@ class InnerProductSpaceTraits {
   /// complex.  In that case, see the partial specialization for
   /// Kokkos::complex below to see our convention for which input gets
   /// conjugated.
-  static KOKKOS_FORCEINLINE_FUNCTION dot_type
-  dot (const val_type& x, const val_type& y) {
+  static KOKKOS_FORCEINLINE_FUNCTION
+  dot_type dot (const val_type& x, const val_type& y) {
     return x * y;
   }
 };
@@ -196,8 +196,8 @@ class InnerProductSpaceTraits<Kokkos::complex<T> > {
   mag_type norm (const val_type& x) {
     return ArithTraits<val_type>::abs (x);
   }
-  static KOKKOS_FORCEINLINE_FUNCTION dot_type
-  dot (const val_type& x, const val_type& y) {
+  static KOKKOS_FORCEINLINE_FUNCTION
+  dot_type dot (const val_type& x, const val_type& y) {
     return Kokkos::conj (x) * y;
   }
 };
@@ -291,6 +291,66 @@ struct InnerProductSpaceTraits<qd_real>
 };
 #endif // HAVE_KOKKOS_QD
 
+template<class ResultType, class InputType1, class InputType2>
+KOKKOS_INLINE_FUNCTION void
+updateDot(ResultType& sum, const InputType1& x, const InputType2& y)
+{
+  // FIXME (mfh 22 Jan 2020) We should actually pick the type with the
+  // greater precision.
+  sum += InnerProductSpaceTraits<InputType1>::dot(x, y);
+}
+
+KOKKOS_INLINE_FUNCTION void
+updateDot(double& sum, const double x, const double y)
+{
+  sum += x * y;
+}
+
+KOKKOS_INLINE_FUNCTION void
+updateDot(double& sum, const float x, const float y)
+{
+  sum += x * y;
+}
+
+// This exists because complex<float> += complex<double> is not defined.
+KOKKOS_INLINE_FUNCTION void
+updateDot(Kokkos::complex<double>& sum,
+          const Kokkos::complex<float> x,
+          const Kokkos::complex<float> y)
+{
+  const auto tmp = Kokkos::conj(x) * y;
+  sum += Kokkos::complex<double>(tmp.real(), tmp.imag());
+}
+
+// This exists in case people call the overload of KokkosBlas::dot
+// that takes an output View, and the output View has element type
+// Kokkos::complex<float>.
+KOKKOS_INLINE_FUNCTION void
+updateDot(Kokkos::complex<float>& sum,
+          const Kokkos::complex<float> x,
+          const Kokkos::complex<float> y)
+{
+  sum += Kokkos::conj(x) * y;
+}
+
+// This exists because Kokkos::complex<double> =
+// Kokkos::complex<float> is not defined.
+template<class Out, class In>
+struct CastPossiblyComplex {
+  static Out cast(const In& x) {
+    return x;
+  }
+};
+
+template<class OutReal, class InReal>
+struct CastPossiblyComplex<Kokkos::complex<OutReal>, Kokkos::complex<InReal>>
+{
+  static Kokkos::complex<OutReal>
+  cast (const Kokkos::complex<InReal>& x) {
+    return {x.real(), x.imag()};
+  }
+};
+
 } // namespace Details
 } // namespace Kokkos
 

diff --git a/packages/kokkos-kernels/src/blas/KokkosBlas1_dot.hpp b/packages/kokkos-kernels/src/blas/KokkosBlas1_dot.hpp
@@ -90,19 +90,39 @@ dot (const XVector& x, const YVector& y)
     typename YVector::device_type,
     Kokkos::MemoryTraits<Kokkos::Unmanaged> > YVector_Internal;
 
-  typedef Kokkos::View<typename XVector::non_const_value_type,
+  using dot_type =
+    typename Kokkos::Details::InnerProductSpaceTraits<
+      typename XVector::non_const_value_type>::dot_type;
+  // Some platforms, such as Mac Clang, seem to get poor accuracy with
+  // float and complex<float>.  Work around some Trilinos test
+  // failures by using a higher-precision type for intermediate dot
+  // product sums.
+  constexpr bool is_complex_float =
+    std::is_same<dot_type, Kokkos::complex<float>>::value;
+  constexpr bool is_real_float = std::is_same<dot_type, float>::value;
+  using result_type = typename std::conditional<is_complex_float,
+    Kokkos::complex<double>,
+    typename std::conditional<is_real_float,
+      double,
+      dot_type
+      >::type
+    >::type;
+  using RVector_Internal = Kokkos::View<result_type,
     Kokkos::LayoutLeft,
     Kokkos::HostSpace,
-    Kokkos::MemoryTraits<Kokkos::Unmanaged> > RVector_Internal;
+    Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
 
-  typename XVector::non_const_value_type result = 0;
+  result_type result {};
   RVector_Internal R = RVector_Internal(&result);
   XVector_Internal X = x;
   YVector_Internal Y = y;
 
   Impl::Dot<RVector_Internal,XVector_Internal,YVector_Internal>::dot (R,X,Y);
   Kokkos::fence();
-  return result;
+  // mfh 22 Jan 2020: We need the line below because
+  // Kokkos::complex<T> lacks a constructor that takes a
+  // Kokkos::complex<U> with U != T.
+  return Kokkos::Details::CastPossiblyComplex<dot_type, result_type>::cast(result);
 }
 
 /// \brief Compute the column-wise dot products of two multivectors.

diff --git a/packages/kokkos-kernels/src/blas/impl/KokkosBlas1_dot_impl.hpp b/packages/kokkos-kernels/src/blas/impl/KokkosBlas1_dot_impl.hpp
@@ -81,7 +81,7 @@ struct DotFunctor
   KOKKOS_FORCEINLINE_FUNCTION void
   operator() (const size_type &i, value_type& sum) const
   {
-    sum += IPT::dot (m_x(i), m_y(i));  // m_x(i) * m_y(i)
+    Kokkos::Details::updateDot(sum, m_x(i), m_y(i)); // sum += m_x(i) * m_y(i)
   }
 
   KOKKOS_INLINE_FUNCTION void

diff --git a/packages/muelu/research/mmayr/composite_to_regions/src/SetupRegionHierarchy_def.hpp b/packages/muelu/research/mmayr/composite_to_regions/src/SetupRegionHierarchy_def.hpp
@@ -775,7 +775,10 @@ void createRegionHierarchy(const int maxRegPerProc,
                               regRowImporters,
                               quasiRegRowMaps);
 
-  for(int levelIdx = 0; levelIdx < numLevels; ++levelIdx) {
+  int noCoarseSmoothing = 1;
+  if (coarseSolverType == "smoother") noCoarseSmoothing = 0;
+
+  for(int levelIdx = 0; levelIdx < numLevels-noCoarseSmoothing; ++levelIdx) {
     smootherSetup(smootherParams[levelIdx], regRowMaps[levelIdx],
                   regMatrices[levelIdx], regInterfaceScalings[levelIdx],
                   regRowImporters[levelIdx]);
@@ -845,6 +848,7 @@ void vCycle(const int l, ///< ID of current level
             Array<std::vector<RCP<Xpetra::Import<LocalOrdinal, GlobalOrdinal, Node> > > > regRowImporters, ///< regional row importers
             Array<Array<RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > > > regInterfaceScalings, ///< regional interface scaling factors
             Array<RCP<Teuchos::ParameterList> > smootherParams, ///< region smoother parameter list
+	    bool& zeroInitGuess,
             RCP<ParameterList> coarseSolverData = Teuchos::null,
             RCP<ParameterList> hierarchyData = Teuchos::null)
 {
@@ -875,7 +879,7 @@ void vCycle(const int l, ///< ID of current level
 
     // pre-smoothing
     smootherApply(smootherParams[l], fineRegX, fineRegB, regMatrices[l],
-                  regRowMaps[l], regRowImporters[l]);
+                  regRowMaps[l], regRowImporters[l], zeroInitGuess);
 
     tm = Teuchos::null;
     tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 2 - compute residual")));
@@ -915,12 +919,13 @@ void vCycle(const int l, ///< ID of current level
     sumInterfaceValues(coarseRegB, regRowMaps[l+1], regRowImporters[l+1]);
 
     tm = Teuchos::null;
+    bool coarseZeroInitGuess = true; 
 
     // Call V-cycle recursively
     vCycle(l+1, numLevels,
            coarseRegX, coarseRegB, regMatrices, regProlong, compRowMaps,
            quasiRegRowMaps, regRowMaps, regRowImporters, regInterfaceScalings,
-           smootherParams, coarseSolverData, hierarchyData);
+           smootherParams, coarseZeroInitGuess, coarseSolverData, hierarchyData);
 
     tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 6 - transfer coarse to fine")));
 
@@ -940,6 +945,7 @@ void vCycle(const int l, ///< ID of current level
     for (int j = 0; j < maxRegPerProc; j++) {
       fineRegX[j]->update(SC_ONE, *regCorrection[j], SC_ONE);
     }
+    if (coarseZeroInitGuess) zeroInitGuess = true;
 
 //    std::cout << "level: " << l << std::endl;
 
@@ -948,7 +954,7 @@ void vCycle(const int l, ///< ID of current level
 
     // post-smoothing
     smootherApply(smootherParams[l], fineRegX, fineRegB, regMatrices[l],
-                  regRowMaps[l], regRowImporters[l]);
+                  regRowMaps[l], regRowImporters[l], zeroInitGuess);
 
     tm = Teuchos::null;
 
@@ -964,8 +970,9 @@ void vCycle(const int l, ///< ID of current level
     const std::string coarseSolverType = coarseSolverData->get<std::string>("coarse solver type");
     if (coarseSolverType == "smoother") {
       smootherApply(smootherParams[l], fineRegX, fineRegB, regMatrices[l],
-                  regRowMaps[l], regRowImporters[l]);
+                  regRowMaps[l], regRowImporters[l], zeroInitGuess);
     } else {
+      zeroInitGuess = false;
       // First get the Xpetra vectors from region to composite format
       RCP<const Map> coarseRowMap = coarseSolverData->get<RCP<const Map> >("compCoarseRowMap");
       RCP<Vector> compX = VectorFactory::Build(coarseRowMap, true);
@@ -1032,7 +1039,8 @@ void vCycle(const int l, ///< ID of current level
         RCP<Hierarchy> amgHierarchy = coarseSolverData->get<RCP<Hierarchy>>("amg hierarchy object");
 
         // Run a single V-cycle
-        amgHierarchy->Iterate(*compRhs, *compX, 1);
+
+        amgHierarchy->Iterate(*compRhs, *compX, true, 1);
       }
       else
       {