Skip to content

Commit

Permalink
Merge pull request #485 from kokkos/update-testing
Browse files Browse the repository at this point in the history
Update testing and KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND
  • Loading branch information
ndellingwood authored Oct 25, 2019
2 parents 648b2a1 + 57a0ecc commit 5d42fb8
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 8 deletions.
10 changes: 10 additions & 0 deletions perf_test/graph/KokkosGraph_color_d2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,16 @@ int main(int argc, char *argv[])
}
#endif

#if defined(KOKKOS_ENABLE_THREADS)
if(params.use_threads)
{
if(!use_multi_mem)
{
KokkosKernels::Experiment::experiment_driver<kk_size_type, kk_lno_t, Kokkos::Threads, Kokkos::Threads::memory_space>(params);
}
}
#endif

#if defined(KOKKOS_ENABLE_CUDA)
if(params.use_cuda)
{
Expand Down
67 changes: 66 additions & 1 deletion scripts/test_all_sandia
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ if [[ "$HOSTNAME" == apollo\.* ]]; then
MACHINE=apollo
fi

if [[ "$HOSTNAME" == kokkos-dev-2* ]]; then
MACHINE=kokkos-dev-2
fi

if [[ "$HOSTNAME" == mayer\.* ]]; then
MACHINE=mayer
# module load git
Expand Down Expand Up @@ -69,7 +73,8 @@ CUDA_IBM_BUILD_LIST="Cuda_OpenMP,Cuda_Serial"

GCC_WARNING_FLAGS="-Werror,-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized"
IBM_WARNING_FLAGS="-Werror,-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CLANG_WARNING_FLAGS="-Werror,-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized"
#CLANG_WARNING_FLAGS="-Werror,-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized"
INTEL_WARNING_FLAGS="-Werror,-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized"
PGI_WARNING_FLAGS=""
Expand Down Expand Up @@ -283,6 +288,64 @@ elif [ "$MACHINE" = "kokkos-dev" ]; then
"cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
fi
elif [ "$MACHINE" = "kokkos-dev-2" ]; then
source /projects/sems/modulefiles/utils/sems-modules-init.sh
module use /home/projects/x86-64/modulefiles/local
module purge
module load sems-env
module load kokkos-env

module load sems-git
module load sems-tex
module load sems-cmake/3.12.2
module load sems-gdb

SKIP_HWLOC=True

BASE_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,kokkos-hwloc/1.10.1/base,sems-<COMPILER_NAME>/<COMPILER_VERSION>"
GCC91_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,kokkos-hwloc/1.10.1/base,<COMPILER_NAME>/<COMPILER_VERSION>"
NVCC_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,kokkos-hwloc/1.10.1/base,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/7.3.0"

CLANG_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/6.1.0"
CLANG8_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/10.0"

BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_Pthread"
BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_OpenMP"
BUILD_LIST_CLANG="Serial,Pthread,OpenMP"

if [ "$SPOT_CHECK" = "True" ]; then
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/7.3.0 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
"gcc/9.1 $GCC91_MODULE_LIST "OpenMP,Serial" g++ $GCC_WARNING_FLAGS"
"intel/18.0.5 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
"clang/8.0 $CLANG8_MODULE_LIST "Cuda_OpenMP,Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
"cuda/10.1 $NVCC_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
else
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("cuda/10.0 $CUDA_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/10.1 $CUDA_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
"clang/8.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
"clang/8.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
"gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/7.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/7.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/18.0.5 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/5.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/7.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
)
fi

if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG="--arch=SNB,Volta70"
fi
elif [ "$MACHINE" = "white" ]; then
source /etc/profile.d/modules.sh
SKIP_HWLOC=True
Expand All @@ -305,6 +368,7 @@ elif [ "$MACHINE" = "white" ]; then
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/6.4.0 $BASE_MODULE_LIST "OpenMP_Serial" g++ $GCC_WARNING_FLAGS"
"gcc/7.2.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/7.4.0 $BASE_MODULE_LIST "OpenMP" g++ $GCC_WARNING_FLAGS"
"ibm/16.1.0 $IBM_MODULE_LIST "Serial" xlC $IBM_WARNING_FLAGS"
"cuda/9.2.88 $CUDA_MODULE_LIST "Cuda_OpenMP" ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/10.0.130 $CUDA10_MODULE_LIST "Cuda_Serial" ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
Expand All @@ -313,6 +377,7 @@ elif [ "$MACHINE" = "white" ]; then
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/6.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/7.2.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/7.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"ibm/16.1.0 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
"ibm/16.1.1 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
"cuda/9.2.88 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
Expand Down
2 changes: 1 addition & 1 deletion src/blas/impl/KokkosBlas3_gemm_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@

#ifdef KOKKOS_ENABLE_CXX14
#ifdef KOKKOS_COMPILER_GNU
#if KOKKOS_COMPILER_GNU<=720
#if KOKKOS_COMPILER_GNU<=740
#define KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND
#endif
#endif
Expand Down
12 changes: 6 additions & 6 deletions src/sparse/impl/KokkosSparse_spmv_struct_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -530,15 +530,15 @@ struct SPMV_Struct_Functor {
KOKKOS_INLINE_FUNCTION
void operator() (const exterior1DTag&, const ordinal_type& exteriorIdx) const
{
typedef typename YVector::non_const_value_type y_value_type;
typedef typename YVector::non_const_value_type y_value_type_;

ordinal_type rowIdx = exteriorIdx*(ni - 1);

const size_type rowOffset = m_A.graph.row_map(rowIdx);
const ordinal_type row_length = static_cast<ordinal_type> (m_A.graph.row_map(rowIdx + 1) - rowOffset);
const value_type* value_ptr = &(m_A.values(rowOffset));
const ordinal_type* column_ptr = &(m_A.graph.entries(rowOffset));
y_value_type sum = 0;
y_value_type_ sum = 0;
for(ordinal_type entryIdx = 0; entryIdx < row_length; ++entryIdx) {
sum += (*(value_ptr + entryIdx))*m_x(*(column_ptr + entryIdx));
}
Expand All @@ -548,7 +548,7 @@ struct SPMV_Struct_Functor {
KOKKOS_INLINE_FUNCTION
void operator() (const exterior2DTag&, const ordinal_type& exteriorIdx) const
{
typedef typename YVector::non_const_value_type y_value_type;
typedef typename YVector::non_const_value_type y_value_type_;
const ordinal_type topFlag = exteriorIdx / (ni + 2*nj - 4);
const ordinal_type bottomFlag = static_cast<ordinal_type>((exteriorIdx / ni) == 0);

Expand All @@ -568,7 +568,7 @@ struct SPMV_Struct_Functor {
const ordinal_type row_length = static_cast<ordinal_type> (m_A.graph.row_map(rowIdx + 1) - rowOffset);
const value_type* value_ptr = &(m_A.values(rowOffset));
const ordinal_type* column_ptr = &(m_A.graph.entries(rowOffset));
y_value_type sum = 0;
y_value_type_ sum = 0;
for(ordinal_type entryIdx = 0; entryIdx < row_length; ++entryIdx) {
sum += (*(value_ptr + entryIdx))*m_x(*(column_ptr + entryIdx));
}
Expand All @@ -578,7 +578,7 @@ struct SPMV_Struct_Functor {
KOKKOS_INLINE_FUNCTION
void operator() (const exterior3DTag&, const ordinal_type& exteriorIdx) const
{
typedef typename YVector::non_const_value_type y_value_type;
typedef typename YVector::non_const_value_type y_value_type_;
const ordinal_type topFlag = static_cast<ordinal_type>(numExterior - exteriorIdx - 1 < ni*nj);
const ordinal_type bottomFlag = static_cast<ordinal_type>(exteriorIdx / (ni*nj) == 0);

Expand Down Expand Up @@ -612,7 +612,7 @@ struct SPMV_Struct_Functor {
const ordinal_type row_length = static_cast<ordinal_type> (m_A.graph.row_map(rowIdx + 1) - rowOffset);
const value_type* value_ptr = &(m_A.values(rowOffset));
const ordinal_type* column_ptr = &(m_A.graph.entries(rowOffset));
y_value_type sum = 0;
y_value_type_ sum = 0;
for(ordinal_type entryIdx = 0; entryIdx < row_length; ++entryIdx) {
sum += (*(value_ptr + entryIdx))*m_x(*(column_ptr + entryIdx));
}
Expand Down

0 comments on commit 5d42fb8

Please sign in to comment.