Skip to content

Commit

Permalink
Merge 'trilinos/Trilinos:develop' (a5c6a8b) into 'tcad-charon/Trilino…
Browse files Browse the repository at this point in the history
…s:develop' (a7632d8).

* trilinos-develop:
  Tpetra,kokkos-kernels: Fix trilinos#6633
  Reformulates the driver in a residual form. That is, we invoke Vcycles to correct the existing solution using the residual as the right hand side.  We then removes a few matvecs and some setup stages that are not actually needed. In particular, we take advantage of the fact that the initial guess is zero on the presmoothing side of the MG cycle. Also removed the smoother setup for the coarsest region level when we have not requested to smooth on this level.
  Tpetra: Add debug output to sparse matrix-matrix multiply test
  Belos: Fix test build failure on Mac Clang (missing header)
  Tpetra: Fix Scalar=float BCRS test failure on Mac Clang
  Anasazi: Fix trilinos#6612
  Tpetra: Fix trilinos#6611
  • Loading branch information
Jenkins Pipeline committed Jan 24, 2020
2 parents a7632d8 + a5c6a8b commit 2bfd2c7
Show file tree
Hide file tree
Showing 13 changed files with 266 additions and 64 deletions.
4 changes: 2 additions & 2 deletions packages/anasazi/src/AnasaziMVOPTester.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -862,8 +862,8 @@ namespace Anasazi {
std::vector<MagType> normsB1(p), normsB2(p),
normsC1(p), normsC2(p),
normsD1(p), normsD2(p);
ScalarType alpha = 0.5 * SCT::one(),
beta = 0.33 * SCT::one();
ScalarType alpha = MagType(0.5) * SCT::one();
ScalarType beta = MagType(0.33) * SCT::one();

B = MVT::Clone(*A,p);
C = MVT::Clone(*A,p);
Expand Down
2 changes: 2 additions & 0 deletions packages/belos/tpetra/test/BlockGmres/test_bl_gmres_hb_df.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
#include <Tpetra_CrsMatrix.hpp>
#include <Teuchos_TypeNameTraits.hpp>

#include <fstream>

using namespace Teuchos;
using namespace Belos;
using Tpetra::Operator;
Expand Down
68 changes: 64 additions & 4 deletions packages/kokkos-kernels/src/Kokkos_InnerProductSpaceTraits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,8 @@ class InnerProductSpaceTraits {
/// complex. In that case, see the partial specialization for
/// Kokkos::complex below to see our convention for which input gets
/// conjugated.
static KOKKOS_FORCEINLINE_FUNCTION dot_type
dot (const val_type& x, const val_type& y) {
static KOKKOS_FORCEINLINE_FUNCTION
dot_type dot (const val_type& x, const val_type& y) {
return x * y;
}
};
Expand Down Expand Up @@ -196,8 +196,8 @@ class InnerProductSpaceTraits<Kokkos::complex<T> > {
mag_type norm (const val_type& x) {
return ArithTraits<val_type>::abs (x);
}
static KOKKOS_FORCEINLINE_FUNCTION dot_type
dot (const val_type& x, const val_type& y) {
static KOKKOS_FORCEINLINE_FUNCTION
dot_type dot (const val_type& x, const val_type& y) {
return Kokkos::conj (x) * y;
}
};
Expand Down Expand Up @@ -291,6 +291,66 @@ struct InnerProductSpaceTraits<qd_real>
};
#endif // HAVE_KOKKOS_QD

template<class ResultType, class InputType1, class InputType2>
KOKKOS_INLINE_FUNCTION void
updateDot(ResultType& sum, const InputType1& x, const InputType2& y)
{
// FIXME (mfh 22 Jan 2020) We should actually pick the type with the
// greater precision.
sum += InnerProductSpaceTraits<InputType1>::dot(x, y);
}

KOKKOS_INLINE_FUNCTION void
updateDot(double& sum, const double x, const double y)
{
sum += x * y;
}

KOKKOS_INLINE_FUNCTION void
updateDot(double& sum, const float x, const float y)
{
sum += x * y;
}

// This exists because complex<float> += complex<double> is not defined.
KOKKOS_INLINE_FUNCTION void
updateDot(Kokkos::complex<double>& sum,
const Kokkos::complex<float> x,
const Kokkos::complex<float> y)
{
const auto tmp = Kokkos::conj(x) * y;
sum += Kokkos::complex<double>(tmp.real(), tmp.imag());
}

// This exists in case people call the overload of KokkosBlas::dot
// that takes an output View, and the output View has element type
// Kokkos::complex<float>.
KOKKOS_INLINE_FUNCTION void
updateDot(Kokkos::complex<float>& sum,
const Kokkos::complex<float> x,
const Kokkos::complex<float> y)
{
sum += Kokkos::conj(x) * y;
}

// This exists because Kokkos::complex<double> =
// Kokkos::complex<float> is not defined.
template<class Out, class In>
struct CastPossiblyComplex {
static Out cast(const In& x) {
return x;
}
};

template<class OutReal, class InReal>
struct CastPossiblyComplex<Kokkos::complex<OutReal>, Kokkos::complex<InReal>>
{
static Kokkos::complex<OutReal>
cast (const Kokkos::complex<InReal>& x) {
return {x.real(), x.imag()};
}
};

} // namespace Details
} // namespace Kokkos

Expand Down
28 changes: 24 additions & 4 deletions packages/kokkos-kernels/src/blas/KokkosBlas1_dot.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,19 +90,39 @@ dot (const XVector& x, const YVector& y)
typename YVector::device_type,
Kokkos::MemoryTraits<Kokkos::Unmanaged> > YVector_Internal;

typedef Kokkos::View<typename XVector::non_const_value_type,
using dot_type =
typename Kokkos::Details::InnerProductSpaceTraits<
typename XVector::non_const_value_type>::dot_type;
// Some platforms, such as Mac Clang, seem to get poor accuracy with
// float and complex<float>. Work around some Trilinos test
// failures by using a higher-precision type for intermediate dot
// product sums.
constexpr bool is_complex_float =
std::is_same<dot_type, Kokkos::complex<float>>::value;
constexpr bool is_real_float = std::is_same<dot_type, float>::value;
using result_type = typename std::conditional<is_complex_float,
Kokkos::complex<double>,
typename std::conditional<is_real_float,
double,
dot_type
>::type
>::type;
using RVector_Internal = Kokkos::View<result_type,
Kokkos::LayoutLeft,
Kokkos::HostSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> > RVector_Internal;
Kokkos::MemoryTraits<Kokkos::Unmanaged>>;

typename XVector::non_const_value_type result = 0;
result_type result {};
RVector_Internal R = RVector_Internal(&result);
XVector_Internal X = x;
YVector_Internal Y = y;

Impl::Dot<RVector_Internal,XVector_Internal,YVector_Internal>::dot (R,X,Y);
Kokkos::fence();
return result;
// mfh 22 Jan 2020: We need the line below because
// Kokkos::complex<T> lacks a constructor that takes a
// Kokkos::complex<U> with U != T.
return Kokkos::Details::CastPossiblyComplex<dot_type, result_type>::cast(result);
}

/// \brief Compute the column-wise dot products of two multivectors.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ struct DotFunctor
KOKKOS_FORCEINLINE_FUNCTION void
operator() (const size_type &i, value_type& sum) const
{
sum += IPT::dot (m_x(i), m_y(i)); // m_x(i) * m_y(i)
Kokkos::Details::updateDot(sum, m_x(i), m_y(i)); // sum += m_x(i) * m_y(i)
}

KOKKOS_INLINE_FUNCTION void
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,10 @@ void createRegionHierarchy(const int maxRegPerProc,
regRowImporters,
quasiRegRowMaps);

for(int levelIdx = 0; levelIdx < numLevels; ++levelIdx) {
int noCoarseSmoothing = 1;
if (coarseSolverType == "smoother") noCoarseSmoothing = 0;

for(int levelIdx = 0; levelIdx < numLevels-noCoarseSmoothing; ++levelIdx) {
smootherSetup(smootherParams[levelIdx], regRowMaps[levelIdx],
regMatrices[levelIdx], regInterfaceScalings[levelIdx],
regRowImporters[levelIdx]);
Expand Down Expand Up @@ -845,6 +848,7 @@ void vCycle(const int l, ///< ID of current level
Array<std::vector<RCP<Xpetra::Import<LocalOrdinal, GlobalOrdinal, Node> > > > regRowImporters, ///< regional row importers
Array<Array<RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > > > regInterfaceScalings, ///< regional interface scaling factors
Array<RCP<Teuchos::ParameterList> > smootherParams, ///< region smoother parameter list
bool& zeroInitGuess,
RCP<ParameterList> coarseSolverData = Teuchos::null,
RCP<ParameterList> hierarchyData = Teuchos::null)
{
Expand Down Expand Up @@ -875,7 +879,7 @@ void vCycle(const int l, ///< ID of current level

// pre-smoothing
smootherApply(smootherParams[l], fineRegX, fineRegB, regMatrices[l],
regRowMaps[l], regRowImporters[l]);
regRowMaps[l], regRowImporters[l], zeroInitGuess);

tm = Teuchos::null;
tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 2 - compute residual")));
Expand Down Expand Up @@ -915,12 +919,13 @@ void vCycle(const int l, ///< ID of current level
sumInterfaceValues(coarseRegB, regRowMaps[l+1], regRowImporters[l+1]);

tm = Teuchos::null;
bool coarseZeroInitGuess = true;

// Call V-cycle recursively
vCycle(l+1, numLevels,
coarseRegX, coarseRegB, regMatrices, regProlong, compRowMaps,
quasiRegRowMaps, regRowMaps, regRowImporters, regInterfaceScalings,
smootherParams, coarseSolverData, hierarchyData);
smootherParams, coarseZeroInitGuess, coarseSolverData, hierarchyData);

tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 6 - transfer coarse to fine")));

Expand All @@ -940,6 +945,7 @@ void vCycle(const int l, ///< ID of current level
for (int j = 0; j < maxRegPerProc; j++) {
fineRegX[j]->update(SC_ONE, *regCorrection[j], SC_ONE);
}
if (coarseZeroInitGuess) zeroInitGuess = true;

// std::cout << "level: " << l << std::endl;

Expand All @@ -948,7 +954,7 @@ void vCycle(const int l, ///< ID of current level

// post-smoothing
smootherApply(smootherParams[l], fineRegX, fineRegB, regMatrices[l],
regRowMaps[l], regRowImporters[l]);
regRowMaps[l], regRowImporters[l], zeroInitGuess);

tm = Teuchos::null;

Expand All @@ -964,8 +970,9 @@ void vCycle(const int l, ///< ID of current level
const std::string coarseSolverType = coarseSolverData->get<std::string>("coarse solver type");
if (coarseSolverType == "smoother") {
smootherApply(smootherParams[l], fineRegX, fineRegB, regMatrices[l],
regRowMaps[l], regRowImporters[l]);
regRowMaps[l], regRowImporters[l], zeroInitGuess);
} else {
zeroInitGuess = false;
// First get the Xpetra vectors from region to composite format
RCP<const Map> coarseRowMap = coarseSolverData->get<RCP<const Map> >("compCoarseRowMap");
RCP<Vector> compX = VectorFactory::Build(coarseRowMap, true);
Expand Down Expand Up @@ -1032,7 +1039,8 @@ void vCycle(const int l, ///< ID of current level
RCP<Hierarchy> amgHierarchy = coarseSolverData->get<RCP<Hierarchy>>("amg hierarchy object");

// Run a single V-cycle
amgHierarchy->Iterate(*compRhs, *compX, 1);

amgHierarchy->Iterate(*compRhs, *compX, true, 1);
}
else
{
Expand Down
Loading

0 comments on commit 2bfd2c7

Please sign in to comment.