From 1ff37f4186031243712577b5bed50fd741c8cacd Mon Sep 17 00:00:00 2001 From: Robert Chen Date: Tue, 3 Sep 2024 16:43:23 -0700 Subject: [PATCH 1/6] Fix memory leak in Launch OpenMP backend. --- include/RAJA/policy/openmp/launch.hpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/include/RAJA/policy/openmp/launch.hpp b/include/RAJA/policy/openmp/launch.hpp index f3aabcf579..82394709b0 100644 --- a/include/RAJA/policy/openmp/launch.hpp +++ b/include/RAJA/policy/openmp/launch.hpp @@ -77,6 +77,9 @@ struct LaunchExecute { ctx.shared_mem_ptr = (char*) malloc(launch_params.shared_mem_size); expt::invoke_body(f_params, loop_body.get_priv(), ctx); + + free(ctx.shared_mem_ptr); + ctx.shared_mem_ptr = nullptr; } expt::ParamMultiplexer::resolve(f_params); @@ -97,6 +100,7 @@ struct LoopExecute { BODY const &body) { + printf( "RCC omp_parallel_for_exec 1\n" ); int len = segment.end() - segment.begin(); RAJA::region([&]() { using RAJA::internal::thread_privatize; @@ -116,6 +120,7 @@ struct LoopExecute { BODY const &body) { + printf( "RCC omp_parallel_for_exec 2\n" ); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -143,6 +148,7 @@ struct LoopExecute { BODY const &body) { + printf( "RCC omp_parallel_for_exec 3\n" ); const int len2 = segment2.end() - segment2.begin(); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -175,6 +181,7 @@ struct LoopExecute { BODY const &body) { + //printf( "RCC omp_for_exec 1\n" ); int len = segment.end() - segment.begin(); #pragma omp for for (int i = 0; i < len; i++) { @@ -191,6 +198,7 @@ struct LoopExecute { BODY const &body) { + printf( "RCC omp_for_exec 2\n" ); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -212,6 +220,7 @@ struct LoopExecute { BODY const &body) { + printf( "RCC omp_for_exec 3\n" ); const int len2 = segment2.end() - segment2.begin(); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -242,6 +251,7 @@ struct LoopICountExecute { BODY const &body) { + printf( "RCC omp_for_exec icount 1\n" ); int len = segment.end() - segment.begin(); #pragma omp for @@ -258,6 +268,7 @@ struct LoopICountExecute { BODY const &body) { + printf( "RCC omp_for_exec icount 2\n" ); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -282,6 +293,7 @@ struct LoopICountExecute { BODY const &body) { + printf( "RCC omp_for_exec icount 3\n" ); const int len2 = segment2.end() - segment2.begin(); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -316,6 +328,7 @@ struct LoopExecute { BODY const &body) { + printf( "RCC omp_parallel_nested_for_exec 1\n" ); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -343,6 +356,7 @@ struct LoopExecute { BODY const &body) { + printf( "RCC omp_parallel_nested_for_exec 2\n" ); const int len2 = segment2.end() - segment2.begin(); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -377,6 +391,7 @@ struct LoopICountExecute { BODY const &body) { + printf( "RCC omp_parallel_nested_for_exec icount 1\n" ); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -406,6 +421,7 @@ struct LoopICountExecute { BODY const &body) { + printf( "RCC omp_parallel_nested_for_exec icount 2\n" ); const int len2 = segment2.end() - segment2.begin(); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -443,6 +459,7 @@ struct TileExecute { BODY const &body) { + printf( "RCC omp_parallel_for_exec tile 1\n" ); int len = segment.end() - segment.begin(); RAJA::region([&]() { @@ -468,6 +485,7 @@ struct TileTCountExecute { BODY const &body) { + printf( "RCC omp_parallel_for_exec tile 2\n" ); const int len = segment.end() - segment.begin(); const int numTiles = (len - 1) / tile_size + 1; @@ -495,6 +513,7 @@ struct TileExecute { BODY const &body) { + printf( "RCC omp_for_exec tile seg 1\n" ); int len = segment.end() - segment.begin(); #pragma omp for for (int i = 0; i < len; i += tile_size) { @@ -514,6 +533,7 @@ struct TileTCountExecute { BODY const &body) { + printf( "RCC omp_for_exec tile tcount seg 1\n" ); const int len = segment.end() - segment.begin(); const int numTiles = (len - 1) / tile_size + 1; From 2468bb74082a09db78dbe5c75086c7a0006e42ff Mon Sep 17 00:00:00 2001 From: Robert Chen Date: Tue, 3 Sep 2024 16:43:56 -0700 Subject: [PATCH 2/6] Updates to Intel script paths. --- scripts/lc-builds/toss4_icpc-classic.sh | 2 +- scripts/lc-builds/toss4_icpx.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/lc-builds/toss4_icpc-classic.sh b/scripts/lc-builds/toss4_icpc-classic.sh index 248d8050ef..447cf35ac8 100755 --- a/scripts/lc-builds/toss4_icpc-classic.sh +++ b/scripts/lc-builds/toss4_icpc-classic.sh @@ -55,6 +55,6 @@ echo echo " Please note that you may need to add some intel openmp libraries to your" echo " LD_LIBRARY_PATH to run with openmp." echo -echo " LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/usr/tce/packages/intel/intel-${COMP_VER}/compiler/lib/intel64_lin" +echo " LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/usr/tce/packages/intel-classic/intel-classic-${COMP_VER}/compiler/lib/intel64_lin" echo echo "***********************************************************************" diff --git a/scripts/lc-builds/toss4_icpx.sh b/scripts/lc-builds/toss4_icpx.sh index c97479da9a..d6fe0d867b 100755 --- a/scripts/lc-builds/toss4_icpx.sh +++ b/scripts/lc-builds/toss4_icpx.sh @@ -39,8 +39,8 @@ source /usr/tce/packages/intel/intel-${COMP_VER}/setvars.sh cmake \ -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_CXX_COMPILER=/usr/tce/packages/intel/intel-${COMP_VER}/compiler/${COMP_VER}/linux/bin/icpx \ - -DCMAKE_C_COMPILER=/usr/tce/packages/intel/intel-${COMP_VER}/compiler/${COMP_VER}/linux/bin/icx \ + -DCMAKE_CXX_COMPILER=/usr/tce/packages/intel/intel-${COMP_VER}/bin/icpx \ + -DCMAKE_C_COMPILER=/usr/tce/packages/intel/intel-${COMP_VER}/bin/icx \ -DBLT_CXX_STD=c++14 \ -C ../host-configs/lc-builds/toss4/icpx_X.cmake \ -DRAJA_ENABLE_FORCEINLINE_RECURSIVE=Off \ From bd1cc80b0c4a6e27865a819fbf1475af1067163b Mon Sep 17 00:00:00 2001 From: Robert Chen Date: Tue, 3 Sep 2024 17:16:03 -0700 Subject: [PATCH 3/6] TOSS4 sanitizer builds for clang. ASAN and UBSAN only. --- .../lc-builds/toss4/clang_X_asan.cmake | 14 +++++ .../lc-builds/toss4/clang_X_ubsan.cmake | 14 +++++ scripts/lc-builds/toss4_clang_san.sh | 57 +++++++++++++++++++ 3 files changed, 85 insertions(+) create mode 100644 host-configs/lc-builds/toss4/clang_X_asan.cmake create mode 100644 host-configs/lc-builds/toss4/clang_X_ubsan.cmake create mode 100755 scripts/lc-builds/toss4_clang_san.sh diff --git a/host-configs/lc-builds/toss4/clang_X_asan.cmake b/host-configs/lc-builds/toss4/clang_X_asan.cmake new file mode 100644 index 0000000000..ca6d88a222 --- /dev/null +++ b/host-configs/lc-builds/toss4/clang_X_asan.cmake @@ -0,0 +1,14 @@ +############################################################################### +# Copyright (c) 2016-24, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +set(RAJA_COMPILER "RAJA_COMPILER_CLANG" CACHE STRING "") + +set(CMAKE_CXX_FLAGS_RELEASE "--gcc-toolchain=/usr/tce/packages/gcc/gcc-10.3.1 -O3 -march=native -funroll-loops -finline-functions" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "--gcc-toolchain=/usr/tce/packages/gcc/gcc-10.3.1 -O3 -g -march=native -funroll-loops -finline-functions" CACHE STRING "") +set(CMAKE_CXX_FLAGS_DEBUG "--gcc-toolchain=/usr/tce/packages/gcc/gcc-10.3.1 -O0 -g -fsanitize=address -fno-omit-frame-pointer" CACHE STRING "") + +set(RAJA_HOST_CONFIG_LOADED On CACHE BOOL "") diff --git a/host-configs/lc-builds/toss4/clang_X_ubsan.cmake b/host-configs/lc-builds/toss4/clang_X_ubsan.cmake new file mode 100644 index 0000000000..47016823ba --- /dev/null +++ b/host-configs/lc-builds/toss4/clang_X_ubsan.cmake @@ -0,0 +1,14 @@ +############################################################################### +# Copyright (c) 2016-24, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +set(RAJA_COMPILER "RAJA_COMPILER_CLANG" CACHE STRING "") + +set(CMAKE_CXX_FLAGS_RELEASE "--gcc-toolchain=/usr/tce/packages/gcc/gcc-10.3.1 -O3 -march=native -funroll-loops -finline-functions" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "--gcc-toolchain=/usr/tce/packages/gcc/gcc-10.3.1 -O3 -g -march=native -funroll-loops -finline-functions" CACHE STRING "") +set(CMAKE_CXX_FLAGS_DEBUG "--gcc-toolchain=/usr/tce/packages/gcc/gcc-10.3.1 -O0 -g -fsanitize=undefined -fno-omit-frame-pointer -fsanitize=integer" CACHE STRING "") + +set(RAJA_HOST_CONFIG_LOADED On CACHE BOOL "") diff --git a/scripts/lc-builds/toss4_clang_san.sh b/scripts/lc-builds/toss4_clang_san.sh new file mode 100755 index 0000000000..e7501c09cd --- /dev/null +++ b/scripts/lc-builds/toss4_clang_san.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash + +############################################################################### +# Copyright (c) 2016-24, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +if [ "$1" == "" ]; then + echo + echo "You must pass 2 arguments to the script (in this order): " + echo " 1) compiler version number for clang" + echo " 2) sanitizer version (one of 2 options: asan, or ubsan)" + echo + echo "For example: " + echo " toss4_clang.sh 14.0.6-magic asan" + exit +fi + +COMP_VER=$1 +SAN_VER=$2 +shift 2 + +if [[ ( ${SAN_VER} != "asan" ) && ( ${SAN_VER} != "ubsan" ) ]] ; then + echo "Sanitizer version must be \"asan\" or \"ubsan\". Exiting!" ; exit +fi + +BUILD_SUFFIX=lc_toss4-clang-${COMP_VER}-${SAN_VER} + +echo +echo "Creating build directory build_${BUILD_SUFFIX} and generating configuration in it" +echo "Configuration extra arguments:" +echo " $@" +echo + +rm -rf build_${BUILD_SUFFIX} 2>/dev/null +mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} + +module load cmake/3.23.1 + +cmake \ + -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_CXX_COMPILER=/usr/tce/packages/clang/clang-${COMP_VER}/bin/clang++ \ + -DBLT_CXX_STD=c++14 \ + -C ../host-configs/lc-builds/toss4/clang_X_${SAN_VER}.cmake \ + -DENABLE_OPENMP=On \ + -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ + "$@" \ + .. + +if [[ ( ${SAN_VER} = "ubsan" ) ]] ; then + echo "To view ubsan output, set the following environment variable: " + echo " UBSAN_OPTIONS=log_path=/path/to/ubsan_log_prefix" + echo + echo "Each test will create a ubsan output file with the prefix \"ubsan_log_prefix\"." +fi From 13d640a7305f772f556b9604033e520c8765f89e Mon Sep 17 00:00:00 2001 From: Robert Chen Date: Tue, 3 Sep 2024 17:22:02 -0700 Subject: [PATCH 4/6] Remove debugging prints. --- include/RAJA/policy/openmp/launch.hpp | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/include/RAJA/policy/openmp/launch.hpp b/include/RAJA/policy/openmp/launch.hpp index 82394709b0..7856bd6fda 100644 --- a/include/RAJA/policy/openmp/launch.hpp +++ b/include/RAJA/policy/openmp/launch.hpp @@ -100,7 +100,6 @@ struct LoopExecute { BODY const &body) { - printf( "RCC omp_parallel_for_exec 1\n" ); int len = segment.end() - segment.begin(); RAJA::region([&]() { using RAJA::internal::thread_privatize; @@ -120,7 +119,6 @@ struct LoopExecute { BODY const &body) { - printf( "RCC omp_parallel_for_exec 2\n" ); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -148,7 +146,6 @@ struct LoopExecute { BODY const &body) { - printf( "RCC omp_parallel_for_exec 3\n" ); const int len2 = segment2.end() - segment2.begin(); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -181,7 +178,6 @@ struct LoopExecute { BODY const &body) { - //printf( "RCC omp_for_exec 1\n" ); int len = segment.end() - segment.begin(); #pragma omp for for (int i = 0; i < len; i++) { @@ -198,7 +194,6 @@ struct LoopExecute { BODY const &body) { - printf( "RCC omp_for_exec 2\n" ); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -220,7 +215,6 @@ struct LoopExecute { BODY const &body) { - printf( "RCC omp_for_exec 3\n" ); const int len2 = segment2.end() - segment2.begin(); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -251,7 +245,6 @@ struct LoopICountExecute { BODY const &body) { - printf( "RCC omp_for_exec icount 1\n" ); int len = segment.end() - segment.begin(); #pragma omp for @@ -268,7 +261,6 @@ struct LoopICountExecute { BODY const &body) { - printf( "RCC omp_for_exec icount 2\n" ); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -293,7 +285,6 @@ struct LoopICountExecute { BODY const &body) { - printf( "RCC omp_for_exec icount 3\n" ); const int len2 = segment2.end() - segment2.begin(); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -328,7 +319,6 @@ struct LoopExecute { BODY const &body) { - printf( "RCC omp_parallel_nested_for_exec 1\n" ); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -356,7 +346,6 @@ struct LoopExecute { BODY const &body) { - printf( "RCC omp_parallel_nested_for_exec 2\n" ); const int len2 = segment2.end() - segment2.begin(); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -391,7 +380,6 @@ struct LoopICountExecute { BODY const &body) { - printf( "RCC omp_parallel_nested_for_exec icount 1\n" ); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -421,7 +409,6 @@ struct LoopICountExecute { BODY const &body) { - printf( "RCC omp_parallel_nested_for_exec icount 2\n" ); const int len2 = segment2.end() - segment2.begin(); const int len1 = segment1.end() - segment1.begin(); const int len0 = segment0.end() - segment0.begin(); @@ -459,7 +446,6 @@ struct TileExecute { BODY const &body) { - printf( "RCC omp_parallel_for_exec tile 1\n" ); int len = segment.end() - segment.begin(); RAJA::region([&]() { @@ -485,7 +471,6 @@ struct TileTCountExecute { BODY const &body) { - printf( "RCC omp_parallel_for_exec tile 2\n" ); const int len = segment.end() - segment.begin(); const int numTiles = (len - 1) / tile_size + 1; @@ -513,7 +498,6 @@ struct TileExecute { BODY const &body) { - printf( "RCC omp_for_exec tile seg 1\n" ); int len = segment.end() - segment.begin(); #pragma omp for for (int i = 0; i < len; i += tile_size) { @@ -533,7 +517,6 @@ struct TileTCountExecute { BODY const &body) { - printf( "RCC omp_for_exec tile tcount seg 1\n" ); const int len = segment.end() - segment.begin(); const int numTiles = (len - 1) / tile_size + 1; From ffa8397e26e872aa7de641923ba2312dadc5fb1b Mon Sep 17 00:00:00 2001 From: Robert Chen Date: Tue, 3 Sep 2024 17:26:20 -0700 Subject: [PATCH 5/6] clang_X_asan.cmake --- host-configs/lc-builds/toss4/clang_X_ubsan.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/host-configs/lc-builds/toss4/clang_X_ubsan.cmake b/host-configs/lc-builds/toss4/clang_X_ubsan.cmake index 47016823ba..3cda001f04 100644 --- a/host-configs/lc-builds/toss4/clang_X_ubsan.cmake +++ b/host-configs/lc-builds/toss4/clang_X_ubsan.cmake @@ -7,8 +7,8 @@ set(RAJA_COMPILER "RAJA_COMPILER_CLANG" CACHE STRING "") -set(CMAKE_CXX_FLAGS_RELEASE "--gcc-toolchain=/usr/tce/packages/gcc/gcc-10.3.1 -O3 -march=native -funroll-loops -finline-functions" CACHE STRING "") -set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "--gcc-toolchain=/usr/tce/packages/gcc/gcc-10.3.1 -O3 -g -march=native -funroll-loops -finline-functions" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELEASE "--gcc-toolchain=/usr/tce/packages/gcc/gcc-10.3.1 -O3 -march=native -funroll-loops -finline-functions -fsanitize=undefined -fno-omit-frame-pointer -fsanitize=integer" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "--gcc-toolchain=/usr/tce/packages/gcc/gcc-10.3.1 -O3 -g -march=native -funroll-loops -finline-functions -fsanitize=undefined -fno-omit-frame-pointer -fsanitize=integer" CACHE STRING "") set(CMAKE_CXX_FLAGS_DEBUG "--gcc-toolchain=/usr/tce/packages/gcc/gcc-10.3.1 -O0 -g -fsanitize=undefined -fno-omit-frame-pointer -fsanitize=integer" CACHE STRING "") set(RAJA_HOST_CONFIG_LOADED On CACHE BOOL "") From a222c7fbfedaa77e5a7eb8e26839357c4e72d651 Mon Sep 17 00:00:00 2001 From: Robert Chen Date: Tue, 3 Sep 2024 17:26:32 -0700 Subject: [PATCH 6/6] clang_X_asan.cmake --- host-configs/lc-builds/toss4/clang_X_asan.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/host-configs/lc-builds/toss4/clang_X_asan.cmake b/host-configs/lc-builds/toss4/clang_X_asan.cmake index ca6d88a222..fa2daa95b0 100644 --- a/host-configs/lc-builds/toss4/clang_X_asan.cmake +++ b/host-configs/lc-builds/toss4/clang_X_asan.cmake @@ -7,8 +7,8 @@ set(RAJA_COMPILER "RAJA_COMPILER_CLANG" CACHE STRING "") -set(CMAKE_CXX_FLAGS_RELEASE "--gcc-toolchain=/usr/tce/packages/gcc/gcc-10.3.1 -O3 -march=native -funroll-loops -finline-functions" CACHE STRING "") -set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "--gcc-toolchain=/usr/tce/packages/gcc/gcc-10.3.1 -O3 -g -march=native -funroll-loops -finline-functions" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELEASE "--gcc-toolchain=/usr/tce/packages/gcc/gcc-10.3.1 -O3 -march=native -funroll-loops -finline-functions -fsanitize=address -fno-omit-frame-pointer" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "--gcc-toolchain=/usr/tce/packages/gcc/gcc-10.3.1 -O3 -g -march=native -funroll-loops -finline-functions -fsanitize=address -fno-omit-frame-pointer" CACHE STRING "") set(CMAKE_CXX_FLAGS_DEBUG "--gcc-toolchain=/usr/tce/packages/gcc/gcc-10.3.1 -O0 -g -fsanitize=address -fno-omit-frame-pointer" CACHE STRING "") set(RAJA_HOST_CONFIG_LOADED On CACHE BOOL "")