diff --git a/perf_test/graph/KokkosGraph_color_d2.cpp b/perf_test/graph/KokkosGraph_color_d2.cpp index 130466825c..ee19481d58 100644 --- a/perf_test/graph/KokkosGraph_color_d2.cpp +++ b/perf_test/graph/KokkosGraph_color_d2.cpp @@ -683,6 +683,16 @@ int main(int argc, char *argv[]) } #endif + #if defined(KOKKOS_ENABLE_THREADS) + if(params.use_threads) + { + if(!use_multi_mem) + { + KokkosKernels::Experiment::experiment_driver(params); + } + } + #endif + #if defined(KOKKOS_ENABLE_CUDA) if(params.use_cuda) { diff --git a/scripts/test_all_sandia b/scripts/test_all_sandia index efe5ed3b95..75cf4c2854 100755 --- a/scripts/test_all_sandia +++ b/scripts/test_all_sandia @@ -31,6 +31,10 @@ if [[ "$HOSTNAME" == apollo\.* ]]; then MACHINE=apollo fi +if [[ "$HOSTNAME" == kokkos-dev-2* ]]; then + MACHINE=kokkos-dev-2 +fi + if [[ "$HOSTNAME" == mayer\.* ]]; then MACHINE=mayer # module load git @@ -69,7 +73,8 @@ CUDA_IBM_BUILD_LIST="Cuda_OpenMP,Cuda_Serial" GCC_WARNING_FLAGS="-Werror,-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized" IBM_WARNING_FLAGS="-Werror,-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" -CLANG_WARNING_FLAGS="-Werror,-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" +CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" +#CLANG_WARNING_FLAGS="-Werror,-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" INTEL_WARNING_FLAGS="-Werror,-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" PGI_WARNING_FLAGS="" @@ -283,6 +288,64 @@ elif [ "$MACHINE" = "kokkos-dev" ]; then "cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) fi +elif [ "$MACHINE" = "kokkos-dev-2" ]; then + source /projects/sems/modulefiles/utils/sems-modules-init.sh + module use /home/projects/x86-64/modulefiles/local + module purge + module load sems-env + module load kokkos-env + + module load sems-git + module load sems-tex + module load sems-cmake/3.12.2 + module load sems-gdb + + SKIP_HWLOC=True + + BASE_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,kokkos-hwloc/1.10.1/base,sems-/" + GCC91_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,kokkos-hwloc/1.10.1/base,/" + NVCC_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,kokkos-hwloc/1.10.1/base,/,sems-gcc/7.3.0" + + CLANG_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,sems-/,sems-gcc/6.1.0" + CLANG8_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,/,cuda/10.0" + + BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_Pthread" + BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_OpenMP" + BUILD_LIST_CLANG="Serial,Pthread,OpenMP" + + if [ "$SPOT_CHECK" = "True" ]; then + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/7.3.0 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS" + "gcc/9.1 $GCC91_MODULE_LIST "OpenMP,Serial" g++ $GCC_WARNING_FLAGS" + "intel/18.0.5 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" + "clang/8.0 $CLANG8_MODULE_LIST "Cuda_OpenMP,Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" + "cuda/10.1 $NVCC_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + ) + else + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("cuda/10.0 $CUDA_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/10.1 $CUDA_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "clang/8.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS" + "clang/8.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS" + "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/7.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/7.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/18.0.5 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/5.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/7.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + ) + fi + + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=SNB,Volta70" + fi elif [ "$MACHINE" = "white" ]; then source /etc/profile.d/modules.sh SKIP_HWLOC=True @@ -305,6 +368,7 @@ elif [ "$MACHINE" = "white" ]; then # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("gcc/6.4.0 $BASE_MODULE_LIST "OpenMP_Serial" g++ $GCC_WARNING_FLAGS" "gcc/7.2.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/7.4.0 $BASE_MODULE_LIST "OpenMP" g++ $GCC_WARNING_FLAGS" "ibm/16.1.0 $IBM_MODULE_LIST "Serial" xlC $IBM_WARNING_FLAGS" "cuda/9.2.88 $CUDA_MODULE_LIST "Cuda_OpenMP" ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" "cuda/10.0.130 $CUDA10_MODULE_LIST "Cuda_Serial" ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" @@ -313,6 +377,7 @@ elif [ "$MACHINE" = "white" ]; then # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("gcc/6.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/7.2.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/7.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" "ibm/16.1.0 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" "ibm/16.1.1 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" "cuda/9.2.88 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" diff --git a/src/blas/impl/KokkosBlas3_gemm_impl.hpp b/src/blas/impl/KokkosBlas3_gemm_impl.hpp index e68d03116c..da8a6a6de6 100644 --- a/src/blas/impl/KokkosBlas3_gemm_impl.hpp +++ b/src/blas/impl/KokkosBlas3_gemm_impl.hpp @@ -48,7 +48,7 @@ #ifdef KOKKOS_ENABLE_CXX14 #ifdef KOKKOS_COMPILER_GNU -#if KOKKOS_COMPILER_GNU<=720 +#if KOKKOS_COMPILER_GNU<=740 #define KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND #endif #endif diff --git a/src/sparse/impl/KokkosSparse_spmv_struct_impl.hpp b/src/sparse/impl/KokkosSparse_spmv_struct_impl.hpp index 65b7dfbf87..b7c9012d5b 100644 --- a/src/sparse/impl/KokkosSparse_spmv_struct_impl.hpp +++ b/src/sparse/impl/KokkosSparse_spmv_struct_impl.hpp @@ -530,7 +530,7 @@ struct SPMV_Struct_Functor { KOKKOS_INLINE_FUNCTION void operator() (const exterior1DTag&, const ordinal_type& exteriorIdx) const { - typedef typename YVector::non_const_value_type y_value_type; + typedef typename YVector::non_const_value_type y_value_type_; ordinal_type rowIdx = exteriorIdx*(ni - 1); @@ -538,7 +538,7 @@ struct SPMV_Struct_Functor { const ordinal_type row_length = static_cast (m_A.graph.row_map(rowIdx + 1) - rowOffset); const value_type* value_ptr = &(m_A.values(rowOffset)); const ordinal_type* column_ptr = &(m_A.graph.entries(rowOffset)); - y_value_type sum = 0; + y_value_type_ sum = 0; for(ordinal_type entryIdx = 0; entryIdx < row_length; ++entryIdx) { sum += (*(value_ptr + entryIdx))*m_x(*(column_ptr + entryIdx)); } @@ -548,7 +548,7 @@ struct SPMV_Struct_Functor { KOKKOS_INLINE_FUNCTION void operator() (const exterior2DTag&, const ordinal_type& exteriorIdx) const { - typedef typename YVector::non_const_value_type y_value_type; + typedef typename YVector::non_const_value_type y_value_type_; const ordinal_type topFlag = exteriorIdx / (ni + 2*nj - 4); const ordinal_type bottomFlag = static_cast((exteriorIdx / ni) == 0); @@ -568,7 +568,7 @@ struct SPMV_Struct_Functor { const ordinal_type row_length = static_cast (m_A.graph.row_map(rowIdx + 1) - rowOffset); const value_type* value_ptr = &(m_A.values(rowOffset)); const ordinal_type* column_ptr = &(m_A.graph.entries(rowOffset)); - y_value_type sum = 0; + y_value_type_ sum = 0; for(ordinal_type entryIdx = 0; entryIdx < row_length; ++entryIdx) { sum += (*(value_ptr + entryIdx))*m_x(*(column_ptr + entryIdx)); } @@ -578,7 +578,7 @@ struct SPMV_Struct_Functor { KOKKOS_INLINE_FUNCTION void operator() (const exterior3DTag&, const ordinal_type& exteriorIdx) const { - typedef typename YVector::non_const_value_type y_value_type; + typedef typename YVector::non_const_value_type y_value_type_; const ordinal_type topFlag = static_cast(numExterior - exteriorIdx - 1 < ni*nj); const ordinal_type bottomFlag = static_cast(exteriorIdx / (ni*nj) == 0); @@ -612,7 +612,7 @@ struct SPMV_Struct_Functor { const ordinal_type row_length = static_cast (m_A.graph.row_map(rowIdx + 1) - rowOffset); const value_type* value_ptr = &(m_A.values(rowOffset)); const ordinal_type* column_ptr = &(m_A.graph.entries(rowOffset)); - y_value_type sum = 0; + y_value_type_ sum = 0; for(ordinal_type entryIdx = 0; entryIdx < row_length; ++entryIdx) { sum += (*(value_ptr + entryIdx))*m_x(*(column_ptr + entryIdx)); }