From e01058370cb006fb619420164f0b3575b8b28c3e Mon Sep 17 00:00:00 2001 From: Anderson Chauphan Date: Tue, 3 Dec 2024 16:03:13 -0600 Subject: [PATCH 01/33] Update default get_dependencies case GenConfig is open-sourced now, point default get_dependencies to the GitHub repository and remove the container specific cases. Signed-off-by: Anderson Chauphan --- packages/framework/get_dependencies.sh | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/packages/framework/get_dependencies.sh b/packages/framework/get_dependencies.sh index 74c26e4eb601..d07bec01f206 100755 --- a/packages/framework/get_dependencies.sh +++ b/packages/framework/get_dependencies.sh @@ -3,7 +3,8 @@ ini_file_option=$1 script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)" # Data that needs to be updated when GenConfig changes! -genconfig_sha1=924a08af66f0a0573b5dd1128179731489339aec +genconfig_sha1=HEAD + # The following code contains no changing data @@ -59,7 +60,7 @@ function tril_genconfig_clone_or_update_repo() { if [[ "${has_submodules}" == "has-submodules" ]] ; then echo echo "STATUS: ${sub_dir}: Update submodules" - cmd="git submodule update --force --init --recursive" + cmd="git submodule update --force --init" retry_command "${cmd}" cd - > /dev/null elif [[ "${has_submodules}" != "" ]] ; then @@ -70,15 +71,10 @@ function tril_genconfig_clone_or_update_repo() { popd &> /dev/null } -# Clone or update the repos -if [[ "$ini_file_option" == "--container" ]] ; then - echo "In a container it is assumed that GenConfig is already in the container at /GenConfig" -else - #Clone GenConfig from gitlab-ex - tril_genconfig_clone_or_update_repo \ - git@gitlab-ex.sandia.gov:trilinos-devops-consolidation/code/GenConfig.git \ - GenConfig has-submodules ${genconfig_sha1} -fi +# Clone GenConfig from GitHub +tril_genconfig_clone_or_update_repo \ + https://github.com/sandialabs/GenConfig.git \ + GenConfig has-submodules ${genconfig_sha1} if [[ "$ini_file_option" == "--srn" ]] ; then #Clone srn-ini-files from cee-gitlab @@ -92,10 +88,6 @@ elif [[ "$ini_file_option" == "--son" ]] ; then git@gitlab-ex.sandia.gov:trilinos-project/son-ini-files.git \ son-ini-files -elif [[ "$ini_file_option" == "--container" ]] ; then - #Copy Genconfig into place from /GenConfig - cp -R /GenConfig ${script_dir} - elif [[ "$ini_file_option" != "" ]] ; then echo "ERROR: Option '${ini_file_option}' not allowed! Must select '--son', '--srn' or ''." exit 1 From 57b04b65ad3597509e95deaaa5dfea623523931f Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Mon, 16 Dec 2024 13:35:19 -0700 Subject: [PATCH 02/33] Correct Spack CUDA TPL specs Realized that most of the "SEMS_CUDA_TPL_LOCATIONS" were actually general to a default installation of those TPLs using Spack. So roll that into CUDA_TPL_ENABLES and retitle it SPACK_CUDA_TPLS. Separate out the part that does the BLAS/LAPACK from the system, since _that_ is SEMS-specific, and include it into SEMS_COMMON_CUDA. Signed-off-by: Samuel E. Browne --- packages/framework/ini-files/config-specs.ini | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index a59a18d50591..8faf62e8e79a 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -1181,7 +1181,7 @@ opt-set-cmake-var Trilinos_WARNINGS_MODE STRING : WARN [COMPILER|INTEL] opt-set-cmake-var MPI_EXEC FILEPATH : mpirun -[CUDA_TPL_ENABLES] +[SPACK_CUDA_TPLS] opt-set-cmake-var TPL_ENABLE_BLAS BOOL FORCE : ON opt-set-cmake-var TPL_ENABLE_BinUtils BOOL FORCE : OFF opt-set-cmake-var TPL_ENABLE_Boost BOOL FORCE : ON @@ -1206,16 +1206,12 @@ opt-set-cmake-var EpetraExt_ENABLE_HDF5 BOOL FORCE : OFF opt-set-cmake-var Kokkos_ENABLE_CUDA BOOL FORCE : ON opt-set-cmake-var Zoltan_ENABLE_Scotch BOOL FORCE : OFF -[SEMS_CUDA_TPL_LOCATIONS] opt-set-cmake-var TPL_BoostLib_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a opt-set-cmake-var TPL_Boost_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a opt-set-cmake-var TPL_DLlib_LIBRARIES FILEPATH FORCE : "-ldl" opt-set-cmake-var TPL_HDF5_LIBRARIES STRING FORCE : ${HDF5_LIB|ENV}/libhdf5_hl.so;${HDF5_LIB|ENV}/libhdf5.a;${ZLIB_LIB|ENV}/libz.a;-ldl opt-set-cmake-var TPL_METIS_LIBRARIES STRING FORCE : ${METIS_LIB|ENV}/libmetis.so opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : -L${NETCDF_C_ROOT|ENV}/lib64;${NETCDF_C_ROOT|ENV}/lib/libnetcdf.a;${PARALLEL_NETCDF_ROOT|ENV}/lib/libpnetcdf.a;${TPL_HDF5_LIBRARIES|CMAKE} -# see https://github.com/trilinos/Trilinos/issues/11109#issuecomment-1272146298 -opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : /lib64/libblas.so.3 -opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : /lib64/liblapack.so.3 opt-set-cmake-var Netcdf_INCLUDE_DIRS STRING FORCE : ${NETCDF_C_INC|ENV} opt-set-cmake-var ParMETIS_INCLUDE_DIRS STRING FORCE : ${PARMETIS_INC|ENV} @@ -1226,7 +1222,7 @@ opt-set-cmake-var SuperLU_INCLUDE_DIRS STRING FORCE : ${SUPERLU_INC|ENV} opt-set-cmake-var SuperLU_LIBRARY_DIRS STRING FORCE : ${SUPERLU_LIB|ENV} [CUDA] -use CUDA_TPL_ENABLES +use SPACK_CUDA_TPLS #CXX Settings opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -fPIC -Wall -Warray-bounds -Wchar-subscripts -Wcomment -Wenum-compare -Wformat -Wuninitialized -Wmaybe-uninitialized -Wmain -Wnarrowing -Wnonnull -Wreorder -Wreturn-type -Wsequence-point -Wtrigraphs -Wunused-function -Wunused-but-set-variable -Wwrite-strings @@ -1243,7 +1239,9 @@ opt-set-cmake-var Tpetra_INST_SERIAL BOOL FORCE : ON [SEMS_COMMON_CUDA] use CUDA -use SEMS_CUDA_TPL_LOCATIONS +# see https://github.com/trilinos/Trilinos/issues/11109#issuecomment-1272146298 +opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : /lib64/libblas.so.3 +opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : /lib64/liblapack.so.3 [CUDA-RUN-SERIAL-TESTS] opt-set-cmake-var Kokkos_CoreUnitTest_Cuda1_SET_RUN_SERIAL BOOL FORCE : ON From b9c325845f34cd947789bbe404ada934b564df28 Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Mon, 16 Dec 2024 13:39:38 -0700 Subject: [PATCH 03/33] Retitle SEMS_COMMON_CUDA to SEMS_CUDA Nothing COMMON about it. Signed-off-by: Samuel E. Browne --- packages/framework/ini-files/config-specs.ini | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index 8faf62e8e79a..a00f0b6cf4cf 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -1237,7 +1237,7 @@ opt-set-cmake-var Phalanx_KOKKOS_DEVICE_TYPE STRING FORCE : CUDA opt-set-cmake-var Sacado_ENABLE_HIERARCHICAL_DFAD BOOL FORCE : ON opt-set-cmake-var Tpetra_INST_SERIAL BOOL FORCE : ON -[SEMS_COMMON_CUDA] +[SEMS_CUDA] use CUDA # see https://github.com/trilinos/Trilinos/issues/11109#issuecomment-1272146298 opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : /lib64/libblas.so.3 @@ -1441,7 +1441,7 @@ use USE-UVM|NO use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS -use SEMS_COMMON_CUDA +use SEMS_CUDA # TPL ENABLE/DISABLE settings opt-set-cmake-var TPL_ENABLE_BLAS BOOL FORCE : ON @@ -1554,7 +1554,7 @@ use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use PACKAGE-ENABLES|NO-EPETRA use COMMON_SPACK_TPLS -use SEMS_COMMON_CUDA +use SEMS_CUDA use CUDA-RUN-SERIAL-TESTS opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF @@ -1589,7 +1589,7 @@ use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS -use SEMS_COMMON_CUDA +use SEMS_CUDA # TPL ENABLE/DISABLE settings opt-set-cmake-var TPL_ENABLE_BLAS BOOL FORCE : ON From f729ee3bd8b0a8e206e5364ca71dcffc9ca80eaa Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Mon, 16 Dec 2024 13:42:50 -0700 Subject: [PATCH 04/33] Consolidate CUDA-RUN-SERIAL-TESTS If it is what it says it is, we want it any time we have CUDA. Signed-off-by: Samuel E. Browne --- packages/framework/ini-files/config-specs.ini | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index a00f0b6cf4cf..6a482f660472 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -1237,6 +1237,8 @@ opt-set-cmake-var Phalanx_KOKKOS_DEVICE_TYPE STRING FORCE : CUDA opt-set-cmake-var Sacado_ENABLE_HIERARCHICAL_DFAD BOOL FORCE : ON opt-set-cmake-var Tpetra_INST_SERIAL BOOL FORCE : ON +use CUDA-RUN-SERIAL-TESTS + [SEMS_CUDA] use CUDA # see https://github.com/trilinos/Trilinos/issues/11109#issuecomment-1272146298 @@ -1526,8 +1528,6 @@ opt-set-cmake-var Adelus_vector_random_npr4_rhs1_MPI_4_DISABLE BOOL : ON use PACKAGE-ENABLES|NO-EPETRA -use CUDA-RUN-SERIAL-TESTS - [rhel8_sems-cuda-11.4.2-sems-gnu-10.1.0-sems-openmpi-4.1.4_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_all] # uses sems-v2 modules use rhel8_sems-cuda-11.4.2-sems-gnu-10.1.0-sems-openmpi-4.1.4_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_no-package-enables @@ -1555,7 +1555,6 @@ use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use PACKAGE-ENABLES|NO-EPETRA use COMMON_SPACK_TPLS use SEMS_CUDA -use CUDA-RUN-SERIAL-TESTS opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF @@ -1566,7 +1565,6 @@ use PACKAGE-ENABLES|ALL-NO-EPETRA [rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_all-no-epetra] use rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_all -use CUDA-RUN-SERIAL-TESTS opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : ON [rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_no-package-enables] @@ -1673,8 +1671,6 @@ opt-set-cmake-var Adelus_vector_random_npr4_rhs1_MPI_4_DISABLE BOOL : ON use PACKAGE-ENABLES|NO-EPETRA -use CUDA-RUN-SERIAL-TESTS - [rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_all] # uses sems-v2 modules use rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_no-package-enables @@ -2101,7 +2097,6 @@ use USE-UVM|NO use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-EPETRA use CUDA -use CUDA-RUN-SERIAL-TESTS opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL : ON opt-set-cmake-var TPL_ENABLE_X11 BOOL : OFF @@ -2128,7 +2123,6 @@ use USE-UVM|YES use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-EPETRA use CUDA -use CUDA-RUN-SERIAL-TESTS opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF opt-set-cmake-var Kokkos_ENABLE_TESTS BOOL FORCE : ON From 286d1cab4675feb760a218bf47c7b3e250d74c98 Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Mon, 16 Dec 2024 14:06:25 -0700 Subject: [PATCH 05/33] Remove CUDA setting duplication Signed-off-by: Samuel E. Browne --- packages/framework/ini-files/config-specs.ini | 108 ------------------ 1 file changed, 108 deletions(-) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index 6a482f660472..6e0536851330 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -1445,60 +1445,6 @@ use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS use SEMS_CUDA -# TPL ENABLE/DISABLE settings -opt-set-cmake-var TPL_ENABLE_BLAS BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_BinUtils BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_Boost BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_CGNS BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_CUDA BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_CUSPARSE BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_DLlib BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_HDF5 BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_HWLOC BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_LAPACK BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_METIS BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_Matio BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_MPI BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_Netcdf BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_ParMETIS BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_Pthread BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_Scotch BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_SuperLU BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_SuperLUDist BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_Zlib BOOL FORCE : ON - -#TPL_*_LIBRARIES -opt-set-cmake-var TPL_BoostLib_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a -opt-set-cmake-var TPL_Boost_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a -opt-set-cmake-var TPL_DLlib_LIBRARIES FILEPATH FORCE : "-ldl" -opt-set-cmake-var TPL_HDF5_LIBRARIES STRING FORCE : ${HDF5_LIB|ENV}/libhdf5_hl.so;${HDF5_LIB|ENV}/libhdf5.a;${ZLIB_LIB|ENV}/libz.a;-ldl -opt-set-cmake-var TPL_METIS_LIBRARIES STRING FORCE : ${METIS_LIB|ENV}/libmetis.so -opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : -L${NETCDF_C_ROOT|ENV}/lib64;${NETCDF_C_ROOT|ENV}/lib/libnetcdf.a;${PARALLEL_NETCDF_ROOT|ENV}/lib/libpnetcdf.a;${TPL_HDF5_LIBRARIES|CMAKE} - -#TPL_[INCLUDE|LIBRARY]_DIRS -opt-set-cmake-var Netcdf_INCLUDE_DIRS STRING FORCE : ${NETCDF_C_INC|ENV} -opt-set-cmake-var ParMETIS_INCLUDE_DIRS STRING FORCE : ${PARMETIS_INC|ENV} -opt-set-cmake-var ParMETIS_LIBRARY_DIRS STRING FORCE : ${PARMETIS_LIB|ENV} -opt-set-cmake-var Scotch_INCLUDE_DIRS STRING FORCE : ${SCOTCH_INC|ENV} -opt-set-cmake-var Scotch_LIBRARY_DIRS STRING FORCE : ${SCOTCH_LIB|ENV} -opt-set-cmake-var SuperLU_INCLUDE_DIRS STRING FORCE : ${SUPERLU_INC|ENV} -opt-set-cmake-var SuperLU_LIBRARY_DIRS STRING FORCE : ${SUPERLU_LIB|ENV} - -#CXX Settings -opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -fPIC -Wall -Warray-bounds -Wchar-subscripts -Wcomment -Wenum-compare -Wformat -Wuninitialized -Wmaybe-uninitialized -Wmain -Wnarrowing -Wnonnull -Wreorder -Wreturn-type -Wsequence-point -Wtrigraphs -Wunused-function -Wunused-but-set-variable -Wwrite-strings - -#Package Options -opt-set-cmake-var EpetraExt_ENABLE_HDF5 BOOL FORCE : OFF -opt-set-cmake-var Kokkos_ENABLE_CUDA BOOL FORCE : ON -opt-set-cmake-var Kokkos_ENABLE_CUDA_LAMBDA BOOL FORCE : ON -opt-set-cmake-var Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA BOOL FORCE : ON -#opt-set-cmake-var Kokkos_ENABLE_Debug_Bounds_Check BOOL FORCE : ON -opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none -opt-set-cmake-var Panzer_FADTYPE STRING FORCE : "Sacado::Fad::DFad" -opt-set-cmake-var Phalanx_KOKKOS_DEVICE_TYPE STRING FORCE : CUDA -opt-set-cmake-var Sacado_ENABLE_HIERARCHICAL_DFAD BOOL FORCE : ON -opt-set-cmake-var Tpetra_INST_SERIAL BOOL FORCE : ON -opt-set-cmake-var Zoltan_ENABLE_Scotch BOOL FORCE : OFF opt-set-cmake-var Trilinos_ENABLE_TrilinosFrameworkTests BOOL FORCE : OFF opt-set-cmake-var Trilinos_ENABLE_TrilinosBuildStats BOOL FORCE : OFF @@ -1589,60 +1535,6 @@ use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS use SEMS_CUDA -# TPL ENABLE/DISABLE settings -opt-set-cmake-var TPL_ENABLE_BLAS BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_BinUtils BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_Boost BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_CGNS BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_CUDA BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_CUSPARSE BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_DLlib BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_HDF5 BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_HWLOC BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_LAPACK BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_METIS BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_Matio BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_MPI BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_Netcdf BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_ParMETIS BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_Pthread BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_Scotch BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_SuperLU BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_SuperLUDist BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_Zlib BOOL FORCE : ON - -#TPL_*_LIBRARIES -opt-set-cmake-var TPL_BoostLib_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a -opt-set-cmake-var TPL_Boost_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a -opt-set-cmake-var TPL_DLlib_LIBRARIES FILEPATH FORCE : "-ldl" -opt-set-cmake-var TPL_HDF5_LIBRARIES STRING FORCE : ${HDF5_LIB|ENV}/libhdf5_hl.so;${HDF5_LIB|ENV}/libhdf5.a;${ZLIB_LIB|ENV}/libz.a;-ldl -opt-set-cmake-var TPL_METIS_LIBRARIES STRING FORCE : ${METIS_LIB|ENV}/libmetis.so -opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : -L${NETCDF_C_ROOT|ENV}/lib64;${NETCDF_C_ROOT|ENV}/lib/libnetcdf.a;${PARALLEL_NETCDF_ROOT|ENV}/lib/libpnetcdf.a;${TPL_HDF5_LIBRARIES|CMAKE} - -#TPL_[INCLUDE|LIBRARY]_DIRS -opt-set-cmake-var Netcdf_INCLUDE_DIRS STRING FORCE : ${NETCDF_C_INC|ENV} -opt-set-cmake-var ParMETIS_INCLUDE_DIRS STRING FORCE : ${PARMETIS_INC|ENV} -opt-set-cmake-var ParMETIS_LIBRARY_DIRS STRING FORCE : ${PARMETIS_LIB|ENV} -opt-set-cmake-var Scotch_INCLUDE_DIRS STRING FORCE : ${SCOTCH_INC|ENV} -opt-set-cmake-var Scotch_LIBRARY_DIRS STRING FORCE : ${SCOTCH_LIB|ENV} -opt-set-cmake-var SuperLU_INCLUDE_DIRS STRING FORCE : ${SUPERLU_INC|ENV} -opt-set-cmake-var SuperLU_LIBRARY_DIRS STRING FORCE : ${SUPERLU_LIB|ENV} - -#CXX Settings -opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -fPIC -Wall -Warray-bounds -Wchar-subscripts -Wcomment -Wenum-compare -Wformat -Wuninitialized -Wmaybe-uninitialized -Wmain -Wnarrowing -Wnonnull -Wreorder -Wreturn-type -Wsequence-point -Wtrigraphs -Wunused-function -Wunused-but-set-variable -Wwrite-strings - -#Package Options -opt-set-cmake-var EpetraExt_ENABLE_HDF5 BOOL FORCE : OFF -opt-set-cmake-var Kokkos_ENABLE_CUDA BOOL FORCE : ON -opt-set-cmake-var Kokkos_ENABLE_CUDA_LAMBDA BOOL FORCE : ON -opt-set-cmake-var Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA BOOL FORCE : ON -#opt-set-cmake-var Kokkos_ENABLE_Debug_Bounds_Check BOOL FORCE : ON -opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none -opt-set-cmake-var Panzer_FADTYPE STRING FORCE : "Sacado::Fad::DFad" -opt-set-cmake-var Phalanx_KOKKOS_DEVICE_TYPE STRING FORCE : CUDA -opt-set-cmake-var Sacado_ENABLE_HIERARCHICAL_DFAD BOOL FORCE : ON -opt-set-cmake-var Tpetra_INST_SERIAL BOOL FORCE : ON -opt-set-cmake-var Zoltan_ENABLE_Scotch BOOL FORCE : OFF opt-set-cmake-var Trilinos_ENABLE_TrilinosFrameworkTests BOOL FORCE : OFF opt-set-cmake-var Trilinos_ENABLE_TrilinosBuildStats BOOL FORCE : OFF From e7a53ddc89d0e4e83c7acdb32f0a7871c1989fb9 Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Mon, 16 Dec 2024 15:13:13 -0700 Subject: [PATCH 06/33] Add -lm to link flags when SuperLU is used Can be removed if we use `find_package(superlu)` since the -lm will get picked up from SuperLU's build system. Signed-off-by: Samuel E. Browne --- packages/framework/ini-files/config-specs.ini | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index 6e0536851330..b65b29b919c6 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -440,6 +440,9 @@ opt-set-cmake-var SuperLU_INCLUDE_DIRS PATH FORCE : ${SUPERLU_INC|ENV} opt-set-cmake-var SuperLU_INCLUDE_DIRS STRING FORCE : ${SUPERLU_INC|ENV} opt-set-cmake-var SuperLU_LIBRARY_DIRS PATH FORCE : ${SUPERLU_LIB|ENV} opt-set-cmake-var SuperLU_LIBRARY_DIRS STRING FORCE : ${SUPERLU_LIB|ENV} +# FIXME: Wouldn't need this if we used find_package(superlu) +opt-set-cmake-var Trilinos_EXTRA_LINK_FLAGS STRING FORCE : -lm + # Metis opt-set-cmake-var TPL_METIS_LIBRARIES STRING FORCE : ${METIS_LIB|ENV}/libmetis.so @@ -460,8 +463,6 @@ use COMMON_SPACK_TPLS opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : "" opt-set-cmake-var TPL_HDF5_LIBRARIES STRING FORCE : "" -opt-set-cmake-var SuperLU_LIBRARY_NAMES STRING FORCE : superlu;m - [COMMON_USE-MPI|NO] use COMMON opt-set-cmake-var TPL_ENABLE_ParMETIS BOOL FORCE : OFF @@ -1220,6 +1221,8 @@ opt-set-cmake-var Scotch_INCLUDE_DIRS STRING FORCE : ${SCOTCH_INC|ENV} opt-set-cmake-var Scotch_LIBRARY_DIRS STRING FORCE : ${SCOTCH_LIB|ENV} opt-set-cmake-var SuperLU_INCLUDE_DIRS STRING FORCE : ${SUPERLU_INC|ENV} opt-set-cmake-var SuperLU_LIBRARY_DIRS STRING FORCE : ${SUPERLU_LIB|ENV} +# FIXME: Wouldn't need this if we used find_package(superlu) +opt-set-cmake-var Trilinos_EXTRA_LINK_FLAGS STRING FORCE : -lm [CUDA] use SPACK_CUDA_TPLS @@ -1340,7 +1343,6 @@ opt-set-cmake-var Teko_DISABLE_LSCSTABALIZED_TPETRA_ALPAH_INV_D BOOL : ON use TEST_DISABLES|CLANG -opt-set-cmake-var SuperLU_LIBRARY_NAMES STRING : superlu;m opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF opt-set-cmake-var Pliris_vector_random_MPI_3_DISABLE BOOL : ON @@ -1588,7 +1590,6 @@ use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS use SPACK_NETLIB_BLAS_LAPACK -opt-set-cmake-var SuperLU_LIBRARY_NAMES STRING : superlu;m opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF opt-set-cmake-var TPL_ENABLE_ParMETIS BOOL FORCE : OFF opt-set-cmake-var TPL_ENABLE_Scotch BOOL FORCE : OFF @@ -1660,7 +1661,6 @@ use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS use SPACK_NETLIB_BLAS_LAPACK -opt-set-cmake-var SuperLU_LIBRARY_NAMES STRING FORCE : superlu;m opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none --mca btl vader,self From 03b6ba8dfe8941c476758d8091c3ec0790ff26d2 Mon Sep 17 00:00:00 2001 From: malphil Date: Mon, 16 Dec 2024 15:19:40 -0700 Subject: [PATCH 07/33] Use KLU as default coarse grid solver Signed-off-by: malphil --- packages/muelu/src/MueCentral/MueLu_MasterList.cpp | 2 +- packages/muelu/src/Smoothers/MueLu_Amesos2Smoother_def.hpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/muelu/src/MueCentral/MueLu_MasterList.cpp b/packages/muelu/src/MueCentral/MueLu_MasterList.cpp index 658ab2275839..8fc641feeb2e 100644 --- a/packages/muelu/src/MueCentral/MueLu_MasterList.cpp +++ b/packages/muelu/src/MueCentral/MueLu_MasterList.cpp @@ -161,7 +161,7 @@ namespace MueLu { "" "" "" - "" + "" "" "" "" diff --git a/packages/muelu/src/Smoothers/MueLu_Amesos2Smoother_def.hpp b/packages/muelu/src/Smoothers/MueLu_Amesos2Smoother_def.hpp index a78cc24697e5..d6f0ad05c03d 100644 --- a/packages/muelu/src/Smoothers/MueLu_Amesos2Smoother_def.hpp +++ b/packages/muelu/src/Smoothers/MueLu_Amesos2Smoother_def.hpp @@ -110,10 +110,10 @@ Amesos2Smoother::Amesos2Smoother(cons // TODO: It would be great is Amesos2 provides directly this kind of logic for us if (type_ == "" || Amesos2::query(type_) == false) { std::string oldtype = type_; -#if defined(HAVE_AMESOS2_SUPERLU) - type_ = "Superlu"; -#elif defined(HAVE_AMESOS2_KLU2) +#if defined(HAVE_AMESOS2_KLU2) type_ = "Klu"; +#elif defined(HAVE_AMESOS2_SUPERLU) + type_ = "Superlu"; #elif defined(HAVE_AMESOS2_SUPERLUDIST) type_ = "Superludist"; #elif defined(HAVE_AMESOS2_BASKER) From 99e45853576f278cfd2755035242ed7a81318af6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Dec 2024 23:03:14 +0000 Subject: [PATCH 08/33] Bump github/codeql-action from 3.27.6 to 3.27.9 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.27.6 to 3.27.9. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/aa578102511db1f4524ed59b8cc2bae4f6e88195...df409f7d9260372bd5f19e5b04e83cb3c43714ae) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/codeql.yml | 4 ++-- .github/workflows/scorecards.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 5a5e701def17..95f3c5b21fbe 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -45,7 +45,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Initialize CodeQL - uses: github/codeql-action/init@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6 + uses: github/codeql-action/init@df409f7d9260372bd5f19e5b04e83cb3c43714ae # v3.27.9 with: languages: ${{ matrix.language }} build-mode: ${{ matrix.build-mode }} @@ -108,6 +108,6 @@ jobs: ninja -j 16 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6 + uses: github/codeql-action/analyze@df409f7d9260372bd5f19e5b04e83cb3c43714ae # v3.27.9 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 88c2a1fcf484..cbe8a034c3a2 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -66,6 +66,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6 + uses: github/codeql-action/upload-sarif@df409f7d9260372bd5f19e5b04e83cb3c43714ae # v3.27.9 with: sarif_file: results.sarif From 284ac7245c11107e7ce4ff6efb73c919eb0eb52b Mon Sep 17 00:00:00 2001 From: Anderson Chauphan Date: Mon, 16 Dec 2024 21:36:41 -0600 Subject: [PATCH 09/33] Update GenConfig SHA1 and recursive submodule Signed-off-by: Anderson Chauphan --- packages/framework/get_dependencies.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/framework/get_dependencies.sh b/packages/framework/get_dependencies.sh index d07bec01f206..956857ecaf50 100755 --- a/packages/framework/get_dependencies.sh +++ b/packages/framework/get_dependencies.sh @@ -3,7 +3,7 @@ ini_file_option=$1 script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)" # Data that needs to be updated when GenConfig changes! -genconfig_sha1=HEAD +genconfig_sha1=88c44e347c0377a170ec9ca45a47732a9630b4ec # The following code contains no changing data @@ -60,7 +60,7 @@ function tril_genconfig_clone_or_update_repo() { if [[ "${has_submodules}" == "has-submodules" ]] ; then echo echo "STATUS: ${sub_dir}: Update submodules" - cmd="git submodule update --force --init" + cmd="git submodule update --force --init --recursive" retry_command "${cmd}" cd - > /dev/null elif [[ "${has_submodules}" != "" ]] ; then From 1d967f0fc7f6dd0556fa8552c8f48e9c0f12e9cc Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Tue, 17 Dec 2024 01:32:47 -0700 Subject: [PATCH 10/33] Tacho : rval on device Signed-off-by: iyamazaki --- .../src/impl/Tacho_NumericTools_LevelSet.hpp | 48 +++++++++++++------ 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp index 25068de037d7..813d6c386d70 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp @@ -2190,8 +2190,13 @@ class NumericToolsLevelSet : public NumericToolsBase { // const ordinal_type team_size_factor[2] = { 16, 16 }, vector_size_factor[2] = { 32, 32}; const ordinal_type team_size_factor[2] = {64, 64}, vector_size_factor[2] = {8, 4}; const ordinal_type team_size_update[2] = {16, 8}, vector_size_update[2] = {32, 32}; + // returned value from team Chol + colind_view d_rval("rval",1); + auto h_rval = Kokkos::create_mirror_view(host_memory_space(), d_rval); { typedef TeamFunctor_FactorizeChol functor_type; + functor_type functor(_info, _factorize_mode, _level_sids, _buf, d_rval.data()); + #if defined(TACHO_TEST_LEVELSET_TOOLS_KERNEL_OVERHEAD) typedef Kokkos::TeamPolicy, exec_space, typename functor_type::DummyTag> team_policy_factorize; @@ -2204,11 +2209,8 @@ class NumericToolsLevelSet : public NumericToolsBase { typedef Kokkos::TeamPolicy, exec_space, typename functor_type::UpdateTag> team_policy_update; #endif - - int rval = 0; team_policy_factor policy_factor(1, 1, 1); team_policy_update policy_update(1, 1, 1); - functor_type functor(_info, _factorize_mode, _level_sids, _buf, &rval); // get max vector size const ordinal_type vmax = policy_factor.vector_length_max(); @@ -2259,10 +2261,14 @@ class NumericToolsLevelSet : public NumericToolsBase { Kokkos::fence(); time_device += tick.seconds(); tick.reset(); } - Kokkos::fence(); + Kokkos::deep_copy(h_rval, d_rval); + int rval = h_rval(0); if (rval != 0) { TACHO_TEST_FOR_EXCEPTION(rval, std::runtime_error, "POTRF (team) returns non-zero error code."); } + //if (_status != 0) { + // TACHO_TEST_FOR_EXCEPTION(rval, std::runtime_error, "POTRF (device) returns non-zero error code."); + //} Kokkos::parallel_for("update factor", policy_update, functor); if (verbose) { @@ -3926,8 +3932,13 @@ class NumericToolsLevelSet : public NumericToolsBase { const ordinal_type team_size_factor[2] = {64, 64}, vector_size_factor[2] = {8, 4}; #endif const ordinal_type team_size_update[2] = {16, 8}, vector_size_update[2] = {32, 32}; + // returned value from team LDL + colind_view d_rval("rval",1); + auto h_rval = Kokkos::create_mirror_view(host_memory_space(), d_rval); { typedef TeamFunctor_FactorizeLDL functor_type; + functor_type functor(_info, _factorize_mode, _level_sids, _piv, _diag, _buf, d_rval.data()); + #if defined(TACHO_TEST_LEVELSET_TOOLS_KERNEL_OVERHEAD) typedef Kokkos::TeamPolicy, exec_space, typename functor_type::DummyTag> team_policy_factorize; @@ -3940,12 +3951,10 @@ class NumericToolsLevelSet : public NumericToolsBase { typedef Kokkos::TeamPolicy, exec_space, typename functor_type::UpdateTag> team_policy_update; #endif - int rval = 0; - team_policy_factor policy_factor(1, 1, 1); - team_policy_update policy_update(1, 1, 1); - functor_type functor(_info, _factorize_mode, _level_sids, _piv, _diag, _buf, &rval); // get max vector length + team_policy_factor policy_factor(1, 1, 1); + team_policy_update policy_update(1, 1, 1); const ordinal_type vmax = policy_factor.vector_length_max(); { for (ordinal_type lvl = (_team_serial_level_cut - 1); lvl >= 0; --lvl) { @@ -3994,10 +4003,14 @@ class NumericToolsLevelSet : public NumericToolsBase { Kokkos::fence(); time_device += tick.seconds(); tick.reset(); } - Kokkos::fence(); + Kokkos::deep_copy(h_rval, d_rval); + int rval = h_rval(0); if (rval != 0) { TACHO_TEST_FOR_EXCEPTION(rval, std::runtime_error, "SYTRF (team) returns non-zero error code."); } + //if (_status != 0) { + // TACHO_TEST_FOR_EXCEPTION(rval, std::runtime_error, "SYTRF (device) returns non-zero error code."); + //} Kokkos::parallel_for("update factor", policy_update, functor); if (verbose) { @@ -4271,6 +4284,10 @@ class NumericToolsLevelSet : public NumericToolsBase { const ordinal_type team_size_factor[2] = {64, 64}, vector_size_factor[2] = {8, 4}; #endif const ordinal_type team_size_update[2] = {16, 8}, vector_size_update[2] = {32, 32}; + + // returned value from team LU + colind_view d_rval("rval",1); + auto h_rval = Kokkos::create_mirror_view(host_memory_space(), d_rval); { typedef TeamFunctor_FactorizeLU functor_type; #if defined(TACHO_TEST_LEVELSET_TOOLS_KERNEL_OVERHEAD) @@ -4285,10 +4302,9 @@ class NumericToolsLevelSet : public NumericToolsBase { typedef Kokkos::TeamPolicy, exec_space, typename functor_type::UpdateTag> team_policy_update; #endif - int rval = 0; team_policy_factor policy_factor(1, 1, 1); team_policy_update policy_update(1, 1, 1); - functor_type functor(_info, _factorize_mode, _level_sids, _piv, _buf, &rval); + functor_type functor(_info, _factorize_mode, _level_sids, _piv, _buf, d_rval.data()); if (pivot_tol > 0.0) { functor.setDiagPertubationTol(pivot_tol); } @@ -4342,13 +4358,15 @@ class NumericToolsLevelSet : public NumericToolsBase { Kokkos::fence(); time_device += tick.seconds(); tick.reset(); } - Kokkos::fence(); + Kokkos::deep_copy(h_rval, d_rval); + int rval = h_rval(0); if (rval != 0) { TACHO_TEST_FOR_EXCEPTION(rval, std::runtime_error, "GETRF (team) returns non-zero error code."); } - if (_status != 0) { - TACHO_TEST_FOR_EXCEPTION(rval, std::runtime_error, "GETRF (device) returns non-zero error code."); - } + //if (_status != 0) { + // TACHO_TEST_FOR_EXCEPTION(rval, std::runtime_error, "GETRF (device) returns non-zero error code."); + //} + Kokkos::parallel_for("update factor", policy_update, functor); if (verbose) { Kokkos::fence(); time_update += tick.seconds(); From bfbe7ac70c0415e7f7140e89ffa7fe0d80138b2b Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Tue, 17 Dec 2024 06:10:00 -0700 Subject: [PATCH 11/33] Serialize Intrepid2 unit test Frequent timeouts on GCC/OpenMPI Debug Shared configuration in PR testing, and before it was marked serial for CUDA had repeated failures there as well (though those were not timeouts). Signed-off-by: Samuel E. Browne --- packages/framework/ini-files/config-specs.ini | 1 - packages/intrepid2/unit-test/MonolithicExecutable/CMakeLists.txt | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index a59a18d50591..74fc1cd93849 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -1251,7 +1251,6 @@ opt-set-cmake-var Kokkos_CoreUnitTest_CudaTimingBased_SET_RUN_SERIAL BOOL FORCE opt-set-cmake-var Kokkos_CoreUnitTest_Default_SET_RUN_SERIAL BOOL FORCE : ON opt-set-cmake-var KokkosKernels_sparse_cuda_MPI_1_SET_RUN_SERIAL BOOL FORCE : ON opt-set-cmake-var KokkosKernels_batched_dla_cuda_MPI_1_SET_RUN_SERIAL BOOL FORCE : ON -opt-set-cmake-var Intrepid2_unit-test_MonolithicExecutable_Intrepid2_Tests_MPI_1_SET_RUN_SERIAL BOOL FORCE : ON # Full configurations intended to be loaded. diff --git a/packages/intrepid2/unit-test/MonolithicExecutable/CMakeLists.txt b/packages/intrepid2/unit-test/MonolithicExecutable/CMakeLists.txt index 4a6393836293..0d47249d9ad8 100644 --- a/packages/intrepid2/unit-test/MonolithicExecutable/CMakeLists.txt +++ b/packages/intrepid2/unit-test/MonolithicExecutable/CMakeLists.txt @@ -13,6 +13,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( NUM_MPI_PROCS 1 PASS_REGULAR_EXPRESSION "TEST PASSED" ADD_DIR_TO_NAME + RUN_SERIAL ) # add single-group tests; allows for easier targeted builds and debugging (especially useful under CUDA) From 6517be9824d4755c061982ba3254c4c49796f508 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 17 Dec 2024 07:31:10 -0700 Subject: [PATCH 12/33] Teko: Examples with Epetra=OFF Signed-off-by: Christian Glusa --- packages/teko/CMakeLists.txt | 4 +- .../BuildPreconditioner/step1/CMakeLists.txt | 14 ++++--- packages/teko/examples/CMakeLists.txt | 41 ++++++++++--------- 3 files changed, 31 insertions(+), 28 deletions(-) diff --git a/packages/teko/CMakeLists.txt b/packages/teko/CMakeLists.txt index 245c3484a32f..c8c219d6962a 100644 --- a/packages/teko/CMakeLists.txt +++ b/packages/teko/CMakeLists.txt @@ -70,9 +70,7 @@ ENDIF() ADD_SUBDIRECTORY(src) TRIBITS_ADD_TEST_DIRECTORIES(tests) -IF(TEKO_HAVE_EPETRA) - TRIBITS_ADD_EXAMPLE_DIRECTORIES(examples) -ENDIF() +TRIBITS_ADD_EXAMPLE_DIRECTORIES(examples) TRIBITS_EXCLUDE_FILES( tests/data/nsjac.mm diff --git a/packages/teko/examples/BuildPreconditioner/step1/CMakeLists.txt b/packages/teko/examples/BuildPreconditioner/step1/CMakeLists.txt index 8b4c1d802fbe..50943256f7bf 100644 --- a/packages/teko/examples/BuildPreconditioner/step1/CMakeLists.txt +++ b/packages/teko/examples/BuildPreconditioner/step1/CMakeLists.txt @@ -1,9 +1,11 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +IF (TEKO_ENABLE_EPETRA) -TRIBITS_ADD_EXECUTABLE( - BuilderPreconditioner_Step1_Example - SOURCES - example-test.cpp - COMM mpi - ) + TRIBITS_ADD_EXECUTABLE( + BuilderPreconditioner_Step1_Example + SOURCES + example-test.cpp + COMM mpi + ) +ENDIF() diff --git a/packages/teko/examples/CMakeLists.txt b/packages/teko/examples/CMakeLists.txt index 360f396e2853..18406fd59b4a 100644 --- a/packages/teko/examples/CMakeLists.txt +++ b/packages/teko/examples/CMakeLists.txt @@ -1,25 +1,28 @@ -ADD_SUBDIRECTORY(StridedSolve) -ADD_SUBDIRECTORY(AddMultiplyPrecs) ADD_SUBDIRECTORY(BuildPreconditioner) -ADD_SUBDIRECTORY(BasicExamples) +IF (TEKO_ENABLE_EPETRA) + ADD_SUBDIRECTORY(StridedSolve) + ADD_SUBDIRECTORY(AddMultiplyPrecs) + ADD_SUBDIRECTORY(BasicExamples) -IF (ML_ENABLE_TekoSmoothers) - ADD_SUBDIRECTORY(ML-Teko-Coupling) -ENDIF() - -TRIBITS_COPY_FILES_TO_BINARY_DIR(example_copyfiles - SOURCE_FILES nslhs_test.mm - nsrhs_test.mm - SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/data" - DEST_DIR "${CMAKE_CURRENT_BINARY_DIR}/data" - EXEDEPS StridedSolve_Example - ) - -IF(${PROJECT_NAME}_ENABLE_DEVELOPMENT_MODE) - TRIBITS_COPY_FILES_TO_BINARY_DIR(example_copyfiles_from_tests - SOURCE_FILES nsjac.mm - SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../tests/data" + TRIBITS_COPY_FILES_TO_BINARY_DIR(example_copyfiles + SOURCE_FILES nslhs_test.mm + nsrhs_test.mm + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/data" DEST_DIR "${CMAKE_CURRENT_BINARY_DIR}/data" EXEDEPS StridedSolve_Example + ) + + IF(${PROJECT_NAME}_ENABLE_DEVELOPMENT_MODE) + TRIBITS_COPY_FILES_TO_BINARY_DIR(example_copyfiles_from_tests + SOURCE_FILES nsjac.mm + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../tests/data" + DEST_DIR "${CMAKE_CURRENT_BINARY_DIR}/data" + EXEDEPS StridedSolve_Example ) + ENDIF() + +ENDIF() + +IF (ML_ENABLE_TekoSmoothers) + ADD_SUBDIRECTORY(ML-Teko-Coupling) ENDIF() From 5910651db934b2247caaf713729465ef7751bcd4 Mon Sep 17 00:00:00 2001 From: malphil Date: Tue, 17 Dec 2024 08:42:13 -0700 Subject: [PATCH 13/33] Swap default coarse grid solver in masterList.xml, run update_params.sh script. Signed-off-by: malphil --- packages/muelu/doc/UsersGuide/masterList.xml | 2 +- packages/muelu/doc/UsersGuide/options_smoothing_and_coarse.tex | 2 +- packages/muelu/doc/UsersGuide/paramlist.tex | 2 +- packages/muelu/doc/UsersGuide/paramlist_hidden.tex | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/muelu/doc/UsersGuide/masterList.xml b/packages/muelu/doc/UsersGuide/masterList.xml index d9e273f00443..e4c00ee95a40 100644 --- a/packages/muelu/doc/UsersGuide/masterList.xml +++ b/packages/muelu/doc/UsersGuide/masterList.xml @@ -353,7 +353,7 @@ coarse: type needs special treatment in ML string - "SuperLU" + "KLU" Coarse solver. Possible values: see Table~\ref{tab:coarse_solvers}. diff --git a/packages/muelu/doc/UsersGuide/options_smoothing_and_coarse.tex b/packages/muelu/doc/UsersGuide/options_smoothing_and_coarse.tex index 9e260be65b4a..10c501c10e50 100644 --- a/packages/muelu/doc/UsersGuide/options_smoothing_and_coarse.tex +++ b/packages/muelu/doc/UsersGuide/options_smoothing_and_coarse.tex @@ -21,7 +21,7 @@ \cbb{coarse: max size}{int}{2000}{Maximum dimension of a coarse grid. \muelu will stop coarsening once it is achieved.} -\cbb{coarse: type}{string}{"SuperLU"}{Coarse solver. Possible values: see Table~\ref{tab:coarse_solvers}.} +\cbb{coarse: type}{string}{"KLU"}{Coarse solver. Possible values: see Table~\ref{tab:coarse_solvers}.} \cba{coarse: params}{\parameterlist}{Coarse solver parameters. \muelu passes them directly to the appropriate package library.} diff --git a/packages/muelu/doc/UsersGuide/paramlist.tex b/packages/muelu/doc/UsersGuide/paramlist.tex index f9346acebbf1..86249ef58d12 100644 --- a/packages/muelu/doc/UsersGuide/paramlist.tex +++ b/packages/muelu/doc/UsersGuide/paramlist.tex @@ -46,7 +46,7 @@ \cbb{coarse: max size}{int}{2000}{Maximum dimension of a coarse grid. \muelu will stop coarsening once it is achieved.} -\cbb{coarse: type}{string}{"SuperLU"}{Coarse solver. Possible values: see Table~\ref{tab:coarse_solvers}.} +\cbb{coarse: type}{string}{"KLU"}{Coarse solver. Possible values: see Table~\ref{tab:coarse_solvers}.} \cba{coarse: params}{\parameterlist}{Coarse solver parameters. \muelu passes them directly to the appropriate package library.} diff --git a/packages/muelu/doc/UsersGuide/paramlist_hidden.tex b/packages/muelu/doc/UsersGuide/paramlist_hidden.tex index cbc68a68e2f9..0d14c4917a68 100644 --- a/packages/muelu/doc/UsersGuide/paramlist_hidden.tex +++ b/packages/muelu/doc/UsersGuide/paramlist_hidden.tex @@ -56,7 +56,7 @@ \cbb{coarse: max size}{int}{2000}{Maximum dimension of a coarse grid. \muelu will stop coarsening once it is achieved.} -\cbb{coarse: type}{string}{"SuperLU"}{Coarse solver. Possible values: see Table~\ref{tab:coarse_solvers}.} +\cbb{coarse: type}{string}{"KLU"}{Coarse solver. Possible values: see Table~\ref{tab:coarse_solvers}.} \cba{coarse: params}{\parameterlist}{Coarse solver parameters. \muelu passes them directly to the appropriate package library.} From 782c2d28e03b32b1f4a353ca0cb4fe657df22165 Mon Sep 17 00:00:00 2001 From: Anderson Chauphan Date: Tue, 17 Dec 2024 10:29:58 -0600 Subject: [PATCH 14/33] Remove --container from AT2 get_dependencies.sh calls Signed-off-by: Anderson Chauphan --- .github/workflows/AT2.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/AT2.yml b/.github/workflows/AT2.yml index c72e311ad456..53e68eca9564 100644 --- a/.github/workflows/AT2.yml +++ b/.github/workflows/AT2.yml @@ -74,7 +74,7 @@ jobs: - name: get dependencies working-directory: ./packages/framework run: | - bash -l -c "./get_dependencies.sh --container" + bash -l -c "./get_dependencies.sh" - name: PullRequestLinuxDriverTest.py shell: bash -l {0} working-directory: /home/Trilinos/build @@ -167,7 +167,7 @@ jobs: - name: get dependencies working-directory: ./packages/framework run: | - bash -l -c "./get_dependencies.sh --container" + bash -l -c "./get_dependencies.sh" - name: PullRequestLinuxDriverTest.py shell: bash -l {0} working-directory: /home/Trilinos/build @@ -260,7 +260,7 @@ jobs: - name: get dependencies working-directory: ./packages/framework run: | - bash -l -c "./get_dependencies.sh --container" + bash -l -c "./get_dependencies.sh" - name: PullRequestLinuxDriverTest.py shell: bash -l {0} working-directory: /home/Trilinos/build @@ -353,7 +353,7 @@ jobs: - name: get dependencies working-directory: ./packages/framework run: | - bash -l -c "./get_dependencies.sh --container" + bash -l -c "./get_dependencies.sh" - name: PullRequestLinuxDriverTest.py shell: bash -l {0} working-directory: /home/Trilinos/build From 746a444aac4ea4188aad85ce424025b3e0dc3436 Mon Sep 17 00:00:00 2001 From: reuterb Date: Tue, 17 Dec 2024 09:43:02 -0700 Subject: [PATCH 15/33] Panzer tangents: ScatterDirichletResiduals Tpetra (#13641) * Panzer Tangents :: Add Dirichlet residual scatters Signed-off-by: Bryan Reuter --- .../test/evaluator_tests/CMakeLists.txt | 7 + ...tra_blocked_scatter_dirichlet_residual.cpp | 1145 ++++++++++++++++ .../tpetra_scatter_dirichlet_residual.cpp | 1216 +++++++++++------ ...ScatterDirichletResidual_BlockedTpetra.hpp | 100 ++ ...erDirichletResidual_BlockedTpetra_impl.hpp | 254 +++- ...r_ScatterDirichletResidual_Tpetra_decl.hpp | 17 +- ...r_ScatterDirichletResidual_Tpetra_impl.hpp | 469 ++++--- 7 files changed, 2613 insertions(+), 595 deletions(-) create mode 100644 packages/panzer/adapters-stk/test/evaluator_tests/tpetra_blocked_scatter_dirichlet_residual.cpp diff --git a/packages/panzer/adapters-stk/test/evaluator_tests/CMakeLists.txt b/packages/panzer/adapters-stk/test/evaluator_tests/CMakeLists.txt index 079cdad0848b..c1cd8c62f902 100644 --- a/packages/panzer/adapters-stk/test/evaluator_tests/CMakeLists.txt +++ b/packages/panzer/adapters-stk/test/evaluator_tests/CMakeLists.txt @@ -71,6 +71,13 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( NUM_MPI_PROCS 2 ) +TRIBITS_ADD_EXECUTABLE_AND_TEST( + tScatterDirichletResidual_BlockedTpetra + SOURCES tpetra_blocked_scatter_dirichlet_residual.cpp ${UNIT_TEST_DRIVER} + COMM serial mpi + NUM_MPI_PROCS 2 + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( tBasisTimesVector SOURCES basis_times_vector.cpp ${UNIT_TEST_DRIVER} diff --git a/packages/panzer/adapters-stk/test/evaluator_tests/tpetra_blocked_scatter_dirichlet_residual.cpp b/packages/panzer/adapters-stk/test/evaluator_tests/tpetra_blocked_scatter_dirichlet_residual.cpp new file mode 100644 index 000000000000..37ffb58f2698 --- /dev/null +++ b/packages/panzer/adapters-stk/test/evaluator_tests/tpetra_blocked_scatter_dirichlet_residual.cpp @@ -0,0 +1,1145 @@ +// @HEADER +// ***************************************************************************** +// Panzer: A partial differential equation assembly +// engine for strongly coupled complex multiphysics systems +// +// Copyright 2011 NTESS and the Panzer contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +#include +#include +#include +#include + +using Teuchos::RCP; +using Teuchos::rcp; + +#include "Teuchos_DefaultComm.hpp" +#include "Teuchos_GlobalMPISession.hpp" + +#include "Panzer_FieldManagerBuilder.hpp" +#include "Panzer_BlockedDOFManager.hpp" +#include "Panzer_BlockedTpetraLinearObjFactory.hpp" +#include "Panzer_PureBasis.hpp" +#include "Panzer_BasisIRLayout.hpp" +#include "Panzer_Workset.hpp" +#include "Panzer_GatherOrientation.hpp" +#include "Panzer_ScatterDirichletResidual_BlockedTpetra.hpp" +#include "Panzer_GatherSolution_BlockedTpetra.hpp" +#include "Panzer_LOCPair_GlobalEvaluationData.hpp" +#include "Panzer_GlobalEvaluationDataContainer.hpp" +#include "Panzer_ParameterList_GlobalEvaluationData.hpp" + +#include "Panzer_STK_Version.hpp" +#include "PanzerAdaptersSTK_config.hpp" +#include "Panzer_STK_Interface.hpp" +#include "Panzer_STK_SquareQuadMeshFactory.hpp" +#include "Panzer_STK_SetupUtilities.hpp" +#include "Panzer_STKConnManager.hpp" + +#include "Teuchos_DefaultMpiComm.hpp" +#include "Teuchos_OpaqueWrapper.hpp" + +#include "Thyra_VectorStdOps.hpp" +#include "Thyra_ProductVectorBase.hpp" +#include "Thyra_SpmdVectorBase.hpp" + +#include "Tpetra_CrsMatrix.hpp" +#include "Tpetra_Map.hpp" + +#include "user_app_EquationSetFactory.hpp" + +#include // for get char +#include +#include + +#include "Panzer_Evaluator_WithBaseImpl.hpp" + +namespace panzer +{ + typedef Teuchos::ArrayRCP::size_type size_type; + + using TpetraBlockedLinObjFactoryType = panzer::BlockedTpetraLinearObjFactory; + using TpetraBlockedLinObjContainerType = panzer::BlockedTpetraLinearObjContainer; + using Tpetra_CrsMatrix = Tpetra::CrsMatrix; + using Thyra_TpetraLinearOp = Thyra::TpetraLinearOp; + + Teuchos::RCP buildBasis(std::size_t worksetSize, const std::string &basisName); + void testInitialization(const Teuchos::RCP &ipb); + Teuchos::RCP buildMesh(int elemX, int elemY); + + TEUCHOS_UNIT_TEST(block_assembly, scatter_dirichlet_residual) + { + +#ifdef HAVE_MPI + Teuchos::RCP> tComm = Teuchos::rcp(new Teuchos::MpiComm(MPI_COMM_WORLD)); +#else + NOPE_PANZER_DOESNT_SUPPORT_SERIAL +#endif + + int myRank = tComm->getRank(); + + const std::size_t workset_size = 4; + const std::string fieldName1_q1 = "U"; + const std::string fieldName2_q1 = "V"; + const std::string fieldName_qedge1 = "B"; + + Teuchos::RCP mesh = buildMesh(2, 2); + + // build input physics block + Teuchos::RCP basis_q1 = buildBasis(workset_size, "Q1"); + Teuchos::RCP basis_qedge1 = buildBasis(workset_size, "QEdge1"); + + Teuchos::RCP ipb = Teuchos::parameterList(); + testInitialization(ipb); + + const int default_int_order = 1; + std::string eBlockID = "eblock-0_0"; + Teuchos::RCP eqset_factory = Teuchos::rcp(new user_app::MyFactory); + panzer::CellData cellData(workset_size, mesh->getCellTopology("eblock-0_0")); + Teuchos::RCP gd = panzer::createGlobalData(); + Teuchos::RCP physicsBlock = + Teuchos::rcp(new PhysicsBlock(ipb, eBlockID, default_int_order, cellData, eqset_factory, gd, false)); + + Teuchos::RCP> work_sets = panzer_stk::buildWorksets(*mesh, physicsBlock->elementBlockID(), + physicsBlock->getWorksetNeeds()); + TEST_EQUALITY(work_sets->size(), 1); + + // build connection manager and field manager + const Teuchos::RCP conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); + RCP dofManager = Teuchos::rcp(new panzer::BlockedDOFManager(conn_manager, MPI_COMM_WORLD)); + + dofManager->addField(fieldName1_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName2_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName_qedge1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_qedge1->getIntrepid2Basis()))); + + std::vector> fieldOrder(3); + fieldOrder[0].push_back(fieldName1_q1); + fieldOrder[1].push_back(fieldName_qedge1); + fieldOrder[2].push_back(fieldName2_q1); + dofManager->setFieldOrder(fieldOrder); + + // dofManager->setOrientationsRequired(true); + dofManager->buildGlobalUnknowns(); + + // setup linear object factory + ///////////////////////////////////////////////////////////// + + Teuchos::RCP bt_lof = Teuchos::rcp(new TpetraBlockedLinObjFactoryType(tComm.getConst(), dofManager)); + Teuchos::RCP> lof = bt_lof; + Teuchos::RCP dd_loc = bt_lof->buildGhostedLinearObjContainer(); + Teuchos::RCP loc = bt_lof->buildGhostedLinearObjContainer(); + bt_lof->initializeGhostedContainer(LinearObjContainer::F, *dd_loc); + dd_loc->initialize(); + + bt_lof->initializeGhostedContainer(LinearObjContainer::X | LinearObjContainer::F, *loc); + loc->initialize(); + + Teuchos::RCP b_dd_loc = Teuchos::rcp_dynamic_cast(dd_loc); + Teuchos::RCP b_loc = Teuchos::rcp_dynamic_cast(loc); + Teuchos::RCP> p_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_x()); + Thyra::assign(p_vec->getNonconstVectorBlock(0).ptr(), 123.0 + myRank); + Thyra::assign(p_vec->getNonconstVectorBlock(1).ptr(), 456.0 + myRank); + Thyra::assign(p_vec->getNonconstVectorBlock(2).ptr(), 789.0 + myRank); + + // setup field manager, add evaluator under test + ///////////////////////////////////////////////////////////// + + PHX::FieldManager fm; + + std::string resName = ""; + Teuchos::RCP> names_map = + Teuchos::rcp(new std::map); + names_map->insert(std::make_pair(fieldName1_q1, resName + fieldName1_q1)); + names_map->insert(std::make_pair(fieldName2_q1, resName + fieldName2_q1)); + names_map->insert(std::make_pair(fieldName_qedge1, resName + fieldName_qedge1)); + + // evaluators under test + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName1_q1); + names->push_back(resName + fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQ1"); + pl.set("Basis", basis_q1); + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm.registerEvaluator(evaluator); + fm.requireField(*evaluator->evaluatedFields()[0]); + } + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQEdge1"); + pl.set("Basis", basis_qedge1); + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm.registerEvaluator(evaluator); + fm.requireField(*evaluator->evaluatedFields()[0]); + } + + // support evaluators + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName1_q1); + names->push_back(fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_q1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm.registerEvaluator(evaluator); + } + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_qedge1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm.registerEvaluator(evaluator); + } + + std::vector derivative_dimensions; + derivative_dimensions.push_back(12); + fm.setKokkosExtendedDataTypeDimensions(derivative_dimensions); + + panzer::Traits::SD sd; + sd.worksets_ = work_sets; + + fm.postRegistrationSetup(sd); + + // panzer::Traits::PED ped; + // ped.dirichletData.ghostedCounter = dd_loc; + // fm.preEvaluate(ped); + panzer::Traits::PED ped; + ped.gedc->addDataObject("Dirichlet Counter", dd_loc); + ped.gedc->addDataObject("Solution Gather Container", loc); + ped.gedc->addDataObject("Residual Scatter Container", loc); + fm.preEvaluate(ped); + + // run tests + ///////////////////////////////////////////////////////////// + + panzer::Workset &workset = (*work_sets)[0]; + workset.alpha = 0.0; + workset.beta = 2.0; // derivatives multiplied by 2 + workset.time = 0.0; + workset.evaluate_transient_terms = false; + + fm.evaluateFields(workset); + + // test Residual fields + panzer::index_t dd_count(0); + Teuchos::ArrayRCP data, dd_data; + Teuchos::RCP> f_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_f()); + Teuchos::RCP> dd_vec = Teuchos::rcp_dynamic_cast>(b_dd_loc->get_f()); + + // check all the residual values. This is kind of crappy test since it simply checks twice the target + // value and the target. Its this way because you add two entries across elements. + + Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(data)); + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(0)->getLocalNumElements()); + TEST_EQUALITY(data.size(), dd_data.size()); + dd_count = 0; + for (int i = 0; i < data.size(); i++) + { + + double target = 123.0 + myRank; + if (dd_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dd_count++; + } + } + TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared + + Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(data)); + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(1)->getLocalNumElements()); + TEST_EQUALITY(data.size(), dd_data.size()); + dd_count = 0; + for (int i = 0; i < data.size(); i++) + { + + double target = 456.0 + myRank; + if (dd_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dd_count++; + } + } + TEST_EQUALITY(dd_count, workset.num_cells); // there are 2 nodes on the side and the sides are not shared + + Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(data)); + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(2)->getLocalNumElements()); + TEST_EQUALITY(data.size(), dd_data.size()); + dd_count = 0; + for (int i = 0; i < data.size(); i++) + { + + double target = 789.0 + myRank; + if (dd_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dd_count++; + } + } + TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared + } + + TEUCHOS_UNIT_TEST(block_assembly, scatter_dirichlet_jacobian) + { + +#ifdef HAVE_MPI + Teuchos::RCP> tComm = Teuchos::rcp(new Teuchos::MpiComm(MPI_COMM_WORLD)); +#else + NOPE_PANZER_DOESNT_SUPPORT_SERIAL +#endif + + int myRank = tComm->getRank(); + + const std::size_t workset_size = 4; + const std::string fieldName1_q1 = "U"; + const std::string fieldName2_q1 = "V"; + const std::string fieldName_qedge1 = "B"; + + Teuchos::RCP mesh = buildMesh(2, 2); + + // build input physics block + Teuchos::RCP basis_q1 = buildBasis(workset_size, "Q1"); + Teuchos::RCP basis_qedge1 = buildBasis(workset_size, "QEdge1"); + + Teuchos::RCP ipb = Teuchos::parameterList(); + testInitialization(ipb); + + const int default_int_order = 1; + std::string eBlockID = "eblock-0_0"; + Teuchos::RCP eqset_factory = Teuchos::rcp(new user_app::MyFactory); + panzer::CellData cellData(workset_size, mesh->getCellTopology("eblock-0_0")); + Teuchos::RCP gd = panzer::createGlobalData(); + Teuchos::RCP physicsBlock = + Teuchos::rcp(new PhysicsBlock(ipb, eBlockID, default_int_order, cellData, eqset_factory, gd, false)); + + Teuchos::RCP> work_sets = panzer_stk::buildWorksets(*mesh, physicsBlock->elementBlockID(), + physicsBlock->getWorksetNeeds()); + TEST_EQUALITY(work_sets->size(), 1); + + // build connection manager and field manager + const Teuchos::RCP conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); + RCP dofManager = Teuchos::rcp(new panzer::BlockedDOFManager(conn_manager, MPI_COMM_WORLD)); + + dofManager->addField(fieldName1_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName2_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName_qedge1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_qedge1->getIntrepid2Basis()))); + + std::vector> fieldOrder(3); + fieldOrder[0].push_back(fieldName1_q1); + fieldOrder[1].push_back(fieldName_qedge1); + fieldOrder[2].push_back(fieldName2_q1); + dofManager->setFieldOrder(fieldOrder); + + // dofManager->setOrientationsRequired(true); + dofManager->buildGlobalUnknowns(); + + // setup linear object factory + ///////////////////////////////////////////////////////////// + + Teuchos::RCP bt_lof = Teuchos::rcp(new TpetraBlockedLinObjFactoryType(tComm.getConst(), dofManager)); + Teuchos::RCP> lof = bt_lof; + Teuchos::RCP dd_loc = bt_lof->buildGhostedLinearObjContainer(); + Teuchos::RCP loc = bt_lof->buildGhostedLinearObjContainer(); + bt_lof->initializeGhostedContainer(LinearObjContainer::F, *dd_loc); + dd_loc->initialize(); + + bt_lof->initializeGhostedContainer(LinearObjContainer::X | LinearObjContainer::F | LinearObjContainer::Mat, *loc); + loc->initialize(); + + Teuchos::RCP b_dd_loc = Teuchos::rcp_dynamic_cast(dd_loc); + Teuchos::RCP b_loc = Teuchos::rcp_dynamic_cast(loc); + Teuchos::RCP> p_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_x()); + Thyra::assign(p_vec->getNonconstVectorBlock(0).ptr(), 123.0 + myRank); + Thyra::assign(p_vec->getNonconstVectorBlock(1).ptr(), 456.0 + myRank); + Thyra::assign(p_vec->getNonconstVectorBlock(2).ptr(), 789.0 + myRank); + + auto blk_A = Teuchos::rcp_dynamic_cast>(b_loc->get_A()); + double values[] = {123.0 + myRank, 456.0 + myRank, 789.0 + myRank}; + + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + { + auto thyraOp = Teuchos::rcp_dynamic_cast(blk_A->getNonconstBlock(i, j), false); + auto tpetraCrsMatrix = Teuchos::rcp_dynamic_cast(thyraOp->getTpetraOperator(), true); + tpetraCrsMatrix->setAllToScalar(values[i] * values[j]); + } + + // setup field manager, add evaluator under test + ///////////////////////////////////////////////////////////// + + PHX::FieldManager fm; + + std::string resName = ""; + Teuchos::RCP> names_map = + Teuchos::rcp(new std::map); + names_map->insert(std::make_pair(fieldName1_q1, resName + fieldName1_q1)); + names_map->insert(std::make_pair(fieldName2_q1, resName + fieldName2_q1)); + names_map->insert(std::make_pair(fieldName_qedge1, resName + fieldName_qedge1)); + + // evaluators under test + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName1_q1); + names->push_back(resName + fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQ1"); + pl.set("Basis", basis_q1); + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm.registerEvaluator(evaluator); + fm.requireField(*evaluator->evaluatedFields()[0]); + } + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQEdge1"); + pl.set("Basis", basis_qedge1); + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm.registerEvaluator(evaluator); + fm.requireField(*evaluator->evaluatedFields()[0]); + } + + // support evaluators + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName1_q1); + names->push_back(fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_q1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm.registerEvaluator(evaluator); + } + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_qedge1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm.registerEvaluator(evaluator); + } + + std::vector derivative_dimensions; + derivative_dimensions.push_back(12); + fm.setKokkosExtendedDataTypeDimensions(derivative_dimensions); + + panzer::Traits::SD sd; + sd.worksets_ = work_sets; + + fm.postRegistrationSetup(sd); + + // panzer::Traits::PED ped; + // ped.dirichletData.ghostedCounter = dd_loc; + // fm.preEvaluate(ped); + panzer::Traits::PED ped; + ped.gedc->addDataObject("Dirichlet Counter", dd_loc); + ped.gedc->addDataObject("Solution Gather Container", loc); + ped.gedc->addDataObject("Residual Scatter Container", loc); + fm.preEvaluate(ped); + + // run tests + ///////////////////////////////////////////////////////////// + + panzer::Workset &workset = (*work_sets)[0]; + workset.alpha = 0.0; + workset.beta = 2.0; // derivatives multiplied by 2 + workset.time = 0.0; + workset.evaluate_transient_terms = false; + + fm.evaluateFields(workset); + + // test Residual fields + panzer::index_t dd_count(0); + Teuchos::ArrayRCP data, dd_data; + Teuchos::RCP> f_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_f()); + Teuchos::RCP> dd_vec = Teuchos::rcp_dynamic_cast>(b_dd_loc->get_f()); + + // check all the residual values. This is kind of crappy test since it simply checks twice the target + // value and the target. Its this way because you add two entries across elements. + + Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(data)); + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(0)->getLocalNumElements()); + TEST_EQUALITY(data.size(), dd_data.size()); + dd_count = 0; + for (int i = 0; i < data.size(); i++) + { + + double target = 123.0 + myRank; + if (dd_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dd_count++; + } + } + TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared + + Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(data)); + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(1)->getLocalNumElements()); + TEST_EQUALITY(data.size(), dd_data.size()); + dd_count = 0; + for (int i = 0; i < data.size(); i++) + { + + double target = 456.0 + myRank; + if (dd_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dd_count++; + } + } + TEST_EQUALITY(dd_count, workset.num_cells); // there are 2 nodes on the side and the sides are not shared + + Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(data)); + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(2)->getLocalNumElements()); + TEST_EQUALITY(data.size(), dd_data.size()); + dd_count = 0; + for (int i = 0; i < data.size(); i++) + { + + double target = 789.0 + myRank; + if (dd_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dd_count++; + } + } + TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared + } + + TEUCHOS_UNIT_TEST(block_assembly, scatter_dirichlet_tangent) + { + +#ifdef HAVE_MPI + Teuchos::RCP> tComm = Teuchos::rcp(new Teuchos::MpiComm(MPI_COMM_WORLD)); +#else + NOPE_PANZER_DOESNT_SUPPORT_SERIAL +#endif + + int myRank = tComm->getRank(); + + const std::size_t workset_size = 4; + const std::string fieldName1_q1 = "U"; + const std::string fieldName2_q1 = "V"; + const std::string fieldName_qedge1 = "B"; + const std::size_t numParams = 2; + + Teuchos::RCP mesh = buildMesh(2, 2); + + // build input physics block + Teuchos::RCP basis_q1 = buildBasis(workset_size, "Q1"); + Teuchos::RCP basis_qedge1 = buildBasis(workset_size, "QEdge1"); + + Teuchos::RCP ipb = Teuchos::parameterList(); + testInitialization(ipb); + + const int default_int_order = 1; + std::string eBlockID = "eblock-0_0"; + Teuchos::RCP eqset_factory = Teuchos::rcp(new user_app::MyFactory); + panzer::CellData cellData(workset_size, mesh->getCellTopology("eblock-0_0")); + Teuchos::RCP gd = panzer::createGlobalData(); + Teuchos::RCP physicsBlock = + Teuchos::rcp(new PhysicsBlock(ipb, eBlockID, default_int_order, cellData, eqset_factory, gd, false)); + + Teuchos::RCP> work_sets = panzer_stk::buildWorksets(*mesh, physicsBlock->elementBlockID(), + physicsBlock->getWorksetNeeds()); + TEST_EQUALITY(work_sets->size(), 1); + + std::vector scatter_IC_vec = {false,true}; + + for (const bool scatter_IC : scatter_IC_vec) { + // build connection manager and field manager + const Teuchos::RCP conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); + RCP dofManager = Teuchos::rcp(new panzer::BlockedDOFManager(conn_manager, MPI_COMM_WORLD)); + + dofManager->addField(fieldName1_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName2_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName_qedge1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_qedge1->getIntrepid2Basis()))); + + std::vector> fieldOrder(3); + fieldOrder[0].push_back(fieldName1_q1); + fieldOrder[1].push_back(fieldName_qedge1); + fieldOrder[2].push_back(fieldName2_q1); + dofManager->setFieldOrder(fieldOrder); + + // dofManager->setOrientationsRequired(true); + dofManager->buildGlobalUnknowns(); + + // setup linear object factory + ///////////////////////////////////////////////////////////// + + Teuchos::RCP bt_lof = Teuchos::rcp(new TpetraBlockedLinObjFactoryType(tComm.getConst(), dofManager)); + Teuchos::RCP> lof = bt_lof; + Teuchos::RCP dd_loc = bt_lof->buildGhostedLinearObjContainer(); + Teuchos::RCP loc = bt_lof->buildGhostedLinearObjContainer(); + bt_lof->initializeGhostedContainer(LinearObjContainer::F, *dd_loc); + dd_loc->initialize(); + + bt_lof->initializeGhostedContainer(LinearObjContainer::X | LinearObjContainer::F, *loc); + loc->initialize(); + + Teuchos::RCP b_dd_loc = Teuchos::rcp_dynamic_cast(dd_loc); + Teuchos::RCP b_loc = Teuchos::rcp_dynamic_cast(loc); + Teuchos::RCP> p_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_x()); + Thyra::assign(p_vec->getNonconstVectorBlock(0).ptr(), 123.0 + myRank); + Thyra::assign(p_vec->getNonconstVectorBlock(1).ptr(), 456.0 + myRank); + Thyra::assign(p_vec->getNonconstVectorBlock(2).ptr(), 789.0 + myRank); + + std::vector> tangentContainers; + + using LOCPair = panzer::LOCPair_GlobalEvaluationData; + using Teuchos::rcp_dynamic_cast; + + // generate tangent data + for (std::size_t i=0;i(locPair->getGlobalLOC()); + Teuchos::RCP> global_p_vec = Teuchos::rcp_dynamic_cast>(global_bt_loc->get_x()); + Thyra::assign(global_p_vec->getNonconstVectorBlock(0).ptr(), 0.123 + myRank + i); + Thyra::assign(global_p_vec->getNonconstVectorBlock(1).ptr(), 0.456 + myRank + i); + Thyra::assign(global_p_vec->getNonconstVectorBlock(2).ptr(), 0.789 + myRank + i); + + auto ghosted_bt_loc = rcp_dynamic_cast(locPair->getGhostedLOC()); + Teuchos::RCP> ghosted_p_vec = Teuchos::rcp_dynamic_cast>(ghosted_bt_loc->get_x()); + Thyra::assign(ghosted_p_vec->getNonconstVectorBlock(0).ptr(), 0.123 + myRank + i); + Thyra::assign(ghosted_p_vec->getNonconstVectorBlock(1).ptr(), 0.456 + myRank + i); + Thyra::assign(ghosted_p_vec->getNonconstVectorBlock(2).ptr(), 0.789 + myRank + i); + + tangentContainers.push_back(locPair); + } + + // setup field manager, add evaluator under test + ///////////////////////////////////////////////////////////// + + auto fm = Teuchos::rcp(new PHX::FieldManager); + + std::vector derivative_dimensions; + derivative_dimensions.push_back(numParams); + fm->setKokkosExtendedDataTypeDimensions(derivative_dimensions); + + std::string resName = ""; + Teuchos::RCP> names_map = + Teuchos::rcp(new std::map); + names_map->insert(std::make_pair(fieldName1_q1, resName + fieldName1_q1)); + names_map->insert(std::make_pair(fieldName2_q1, resName + fieldName2_q1)); + names_map->insert(std::make_pair(fieldName_qedge1, resName + fieldName_qedge1)); + + // evaluators under test + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName1_q1); + names->push_back(resName + fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQ1"); + if (scatter_IC) { + pl.set("Basis", basis_q1.getConst()); + } else { + pl.set("Basis", basis_q1); + } + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + pl.set("Scatter Initial Condition", scatter_IC); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm->registerEvaluator(evaluator); + fm->requireField(*evaluator->evaluatedFields()[0]); + } + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQEdge1"); + if (scatter_IC) { + pl.set("Basis", basis_qedge1.getConst()); + } else { + pl.set("Basis", basis_qedge1); + } + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + pl.set("Scatter Initial Condition", scatter_IC); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm->registerEvaluator(evaluator); + fm->requireField(*evaluator->evaluatedFields()[0]); + } + + // support evaluators + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName1_q1); + names->push_back(fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_q1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + Teuchos::RCP>> tangent_names = + Teuchos::rcp(new std::vector>(2)); + for (std::size_t i = 0; i < numParams; ++i) + { + std::stringstream ss1, ss2; + ss1 << fieldName1_q1 << " Tangent " << i; + ss2 << fieldName2_q1 << " Tangent " << i; + (*tangent_names)[0].push_back(ss1.str()); + (*tangent_names)[1].push_back(ss2.str()); + } + pl.set("Tangent Names", tangent_names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm->registerEvaluator(evaluator); + } + for (std::size_t i = 0; i < numParams; ++i) { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + RCP> tangent_names = rcp(new std::vector); + names->push_back(fieldName1_q1); + names->push_back(fieldName2_q1); + { + std::stringstream ss1, ss2; + ss1 << fieldName1_q1 << " Tangent " << i; + ss2 << fieldName2_q1 << " Tangent " << i; + tangent_names->push_back(ss1.str()); + tangent_names->push_back(ss2.str()); + } + + Teuchos::ParameterList pl; + pl.set("Basis", basis_q1); + pl.set("DOF Names", tangent_names); + pl.set("Indexer Names", names); + + std::stringstream ss; + ss << "Tangent Container " << i; + pl.set("Global Data Key", ss.str()); + + Teuchos::RCP> evaluator = + lof->buildGatherTangent(pl); + + fm->registerEvaluator(evaluator); + } + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_qedge1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + Teuchos::RCP>> tangent_names = + Teuchos::rcp(new std::vector>(1)); + for (std::size_t i = 0; i < numParams; ++i) + { + std::stringstream ss; + ss << fieldName_qedge1 << " Tangent " << i; + (*tangent_names)[0].push_back(ss.str()); + } + pl.set("Tangent Names", tangent_names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm->registerEvaluator(evaluator); + } + for (std::size_t i = 0; i < numParams; ++i) { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + RCP> tangent_names = rcp(new std::vector); + names->push_back(fieldName_qedge1); + { + std::stringstream ss; + ss << fieldName_qedge1 << " Tangent " << i; + tangent_names->push_back(ss.str()); + } + + Teuchos::ParameterList pl; + pl.set("Basis", basis_qedge1); + pl.set("DOF Names", tangent_names); + pl.set("Indexer Names", names); + + std::stringstream ss; + ss << "Tangent Container " << i; + pl.set("Global Data Key", ss.str()); + + Teuchos::RCP> evaluator = + lof->buildGatherTangent(pl); + + fm->registerEvaluator(evaluator); + } + + panzer::Traits::SD sd; + sd.worksets_ = work_sets; + + fm->postRegistrationSetup(sd); + + panzer::Traits::PED ped; + ped.gedc->addDataObject("Dirichlet Counter", dd_loc); + ped.gedc->addDataObject("Solution Gather Container", loc); + ped.gedc->addDataObject("Residual Scatter Container", loc); + for (size_t i=0; iaddDataObject(ss.str(), tangentContainers[i]); + } + std::vector params; + std::vector> paramContainers; + for (std::size_t i = 0; iaddDataObject(ss.str(),paramContainer->getGhostedLOC()); + paramContainers.push_back(paramContainer); + } + Teuchos::RCP activeParams = + Teuchos::rcp(new panzer::ParameterList_GlobalEvaluationData(params)); + ped.gedc->addDataObject("PARAMETER_NAMES",activeParams); + + fm->preEvaluate(ped); + + // run tests + ///////////////////////////////////////////////////////////// + + panzer::Workset &workset = (*work_sets)[0]; + workset.alpha = 0.0; + workset.beta = 2.0; // derivatives multiplied by 2 + workset.time = 0.0; + workset.evaluate_transient_terms = false; + + fm->evaluateFields(workset); + + fm = Teuchos::null; + + // test Residual fields + panzer::index_t dd_count(0); + Teuchos::ArrayRCP data, dd_data; + Teuchos::RCP> x_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_x()); + Teuchos::RCP> f_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_f()); + Teuchos::RCP> dd_vec = Teuchos::rcp_dynamic_cast>(b_dd_loc->get_f()); + + // check all the residual values. + + if (scatter_IC) { + Teuchos::rcp_dynamic_cast>(x_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(data)); + } else { + Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(data)); + } + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(0)->getLocalNumElements()); + TEST_EQUALITY(data.size(), dd_data.size()); + dd_count = 0; + for (int i = 0; i < data.size(); i++) + { + + double target = 123.0 + myRank; + if (dd_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dd_count++; + } + } + if (scatter_IC) { + TEST_EQUALITY(dd_count, data.size()); // filled everywhere + } else { + TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared + } + + if (scatter_IC) { + Teuchos::rcp_dynamic_cast>(x_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(data)); + } else { + Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(data)); + } + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(1)->getLocalNumElements()); + TEST_EQUALITY(data.size(), dd_data.size()); + dd_count = 0; + for (int i = 0; i < data.size(); i++) + { + + double target = 456.0 + myRank; + if (dd_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dd_count++; + } + } + if (scatter_IC) { + TEST_EQUALITY(dd_count, data.size()); // filled everywhere + } else { + TEST_EQUALITY(dd_count, workset.num_cells); // there are 1 edge on the side and the sides are not shared + } + + if (scatter_IC) { + Teuchos::rcp_dynamic_cast>(x_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(data)); + } else { + Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(data)); + } + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(2)->getLocalNumElements()); + TEST_EQUALITY(data.size(), dd_data.size()); + dd_count = 0; + for (int i = 0; i < data.size(); i++) + { + + double target = 789.0 + myRank; + if (dd_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dd_count++; + } + } + if (scatter_IC) { + TEST_EQUALITY(dd_count, data.size()); // filled everywhere + } else { + TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared + } + + // now test tangents + for (std::size_t i=0; i> param_f_vec = + Teuchos::rcp_dynamic_cast>( + Teuchos::rcp_dynamic_cast(paramContainers[i]->getGhostedLOC())->get_f()); + + Teuchos::rcp_dynamic_cast>(param_f_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(data)); + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(0)->getLocalNumElements()); + for (size_type j = 0; j < data.size(); j++) + { + double target = .123 + myRank + i; + if (dd_data[j] == 0.0) + { + TEST_EQUALITY(data[j],0.0); + } + else + { + TEST_EQUALITY(data[j],target); + } + } + Teuchos::rcp_dynamic_cast>(param_f_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(data)); + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(1)->getLocalNumElements()); + for (size_type j = 0; j < data.size(); j++) + { + double target = .456 + myRank + i; + if (dd_data[j] == 0.0) + { + TEST_EQUALITY(data[j],0.0); + } + else + { + TEST_EQUALITY(data[j],target); + } + } + Teuchos::rcp_dynamic_cast>(param_f_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(data)); + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(2)->getLocalNumElements()); + for (size_type j = 0; j < data.size(); j++) + { + double target = .789 + myRank + i; + if (dd_data[j] == 0.0) + { + TEST_EQUALITY(data[j],0.0); + } + else + { + TEST_EQUALITY(data[j],target); + } + } + } + } + } + Teuchos::RCP buildBasis(std::size_t worksetSize, const std::string &basisName) + { + Teuchos::RCP topo = + Teuchos::rcp(new shards::CellTopology(shards::getCellTopologyData>())); + + panzer::CellData cellData(worksetSize, topo); + return Teuchos::rcp(new panzer::PureBasis(basisName, 1, cellData)); + } + + Teuchos::RCP buildMesh(int elemX, int elemY) + { + RCP pl = rcp(new Teuchos::ParameterList); + pl->set("X Blocks", 1); + pl->set("Y Blocks", 1); + pl->set("X Elements", elemX); + pl->set("Y Elements", elemY); + + panzer_stk::SquareQuadMeshFactory factory; + factory.setParameterList(pl); + RCP mesh = factory.buildUncommitedMesh(MPI_COMM_WORLD); + factory.completeMeshConstruction(*mesh, MPI_COMM_WORLD); + + return mesh; + } + + void testInitialization(const Teuchos::RCP &ipb) + { + // Physics block + ipb->setName("test physics"); + { + Teuchos::ParameterList &p = ipb->sublist("a"); + p.set("Type", "Energy"); + p.set("Prefix", ""); + p.set("Model ID", "solid"); + p.set("Basis Type", "HGrad"); + p.set("Basis Order", 1); + p.set("Integration Order", 1); + } + { + Teuchos::ParameterList &p = ipb->sublist("b"); + p.set("Type", "Energy"); + p.set("Prefix", "ION_"); + p.set("Model ID", "solid"); + p.set("Basis Type", "HCurl"); + p.set("Basis Order", 1); + p.set("Integration Order", 1); + } + } + +} diff --git a/packages/panzer/adapters-stk/test/evaluator_tests/tpetra_scatter_dirichlet_residual.cpp b/packages/panzer/adapters-stk/test/evaluator_tests/tpetra_scatter_dirichlet_residual.cpp index ff01ebb4f976..158e0d5ddbe2 100644 --- a/packages/panzer/adapters-stk/test/evaluator_tests/tpetra_scatter_dirichlet_residual.cpp +++ b/packages/panzer/adapters-stk/test/evaluator_tests/tpetra_scatter_dirichlet_residual.cpp @@ -20,15 +20,16 @@ using Teuchos::rcp; #include "Teuchos_GlobalMPISession.hpp" #include "Panzer_FieldManagerBuilder.hpp" -#include "Panzer_BlockedDOFManager.hpp" -#include "Panzer_BlockedTpetraLinearObjFactory.hpp" +#include "Panzer_DOFManager.hpp" +#include "Panzer_TpetraLinearObjFactory.hpp" #include "Panzer_PureBasis.hpp" #include "Panzer_BasisIRLayout.hpp" #include "Panzer_Workset.hpp" #include "Panzer_GatherOrientation.hpp" -#include "Panzer_ScatterResidual_BlockedTpetra.hpp" -#include "Panzer_GatherSolution_BlockedTpetra.hpp" +#include "Panzer_ScatterDirichletResidual_Tpetra.hpp" #include "Panzer_GlobalEvaluationDataContainer.hpp" +#include "Panzer_LOCPair_GlobalEvaluationData.hpp" +#include "Panzer_ParameterList_GlobalEvaluationData.hpp" #include "Panzer_STK_Version.hpp" #include "PanzerAdaptersSTK_config.hpp" @@ -43,7 +44,6 @@ using Teuchos::rcp; #include "Thyra_VectorStdOps.hpp" #include "Thyra_ProductVectorBase.hpp" #include "Thyra_SpmdVectorBase.hpp" -//#include "Thyra_get_Epetra_Operator.hpp" #include "Tpetra_CrsMatrix.hpp" #include "Tpetra_Map.hpp" @@ -59,8 +59,8 @@ using Teuchos::rcp; namespace panzer { - using TpetraBlockedLinObjFactoryType = panzer::BlockedTpetraLinearObjFactory; - using TpetraBlockedLinObjContainerType = panzer::BlockedTpetraLinearObjContainer; + using TpetraLinObjFactoryType = panzer::TpetraLinearObjFactory; + using TpetraLinObjContainerType = panzer::TpetraLinearObjContainer; using Tpetra_CrsMatrix = Tpetra::CrsMatrix; using Thyra_TpetraLinearOp = Thyra::TpetraLinearOp; @@ -68,7 +68,7 @@ namespace panzer void testInitialization(const Teuchos::RCP &ipb); Teuchos::RCP buildMesh(int elemX, int elemY); - TEUCHOS_UNIT_TEST(block_assembly, scatter_dirichlet_residual) + TEUCHOS_UNIT_TEST(assembly, scatter_dirichlet_residual) { #ifdef HAVE_MPI @@ -105,238 +105,225 @@ namespace panzer physicsBlock->getWorksetNeeds()); TEST_EQUALITY(work_sets->size(), 1); - // build connection manager and field manager - const Teuchos::RCP conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); - RCP dofManager = Teuchos::rcp(new panzer::BlockedDOFManager(conn_manager, MPI_COMM_WORLD)); + std::vector scatter_IC_vec = {false,true}; - dofManager->addField(fieldName1_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); - dofManager->addField(fieldName2_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); - dofManager->addField(fieldName_qedge1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_qedge1->getIntrepid2Basis()))); + for (const bool scatter_IC : scatter_IC_vec) { + // build connection manager and field manager + const Teuchos::RCP conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); + RCP dofManager = Teuchos::rcp(new panzer::DOFManager(conn_manager, MPI_COMM_WORLD)); - std::vector> fieldOrder(3); - fieldOrder[0].push_back(fieldName1_q1); - fieldOrder[1].push_back(fieldName_qedge1); - fieldOrder[2].push_back(fieldName2_q1); - dofManager->setFieldOrder(fieldOrder); + dofManager->addField(fieldName1_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName2_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName_qedge1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_qedge1->getIntrepid2Basis()))); - // dofManager->setOrientationsRequired(true); - dofManager->buildGlobalUnknowns(); + std::vector fieldOrder; + fieldOrder.push_back(fieldName1_q1); + fieldOrder.push_back(fieldName_qedge1); + fieldOrder.push_back(fieldName2_q1); + dofManager->setFieldOrder(fieldOrder); - // setup linear object factory - ///////////////////////////////////////////////////////////// + dofManager->buildGlobalUnknowns(); - Teuchos::RCP bt_lof = Teuchos::rcp(new TpetraBlockedLinObjFactoryType(tComm.getConst(), dofManager)); - Teuchos::RCP> lof = bt_lof; - Teuchos::RCP dd_loc = bt_lof->buildGhostedLinearObjContainer(); - Teuchos::RCP loc = bt_lof->buildGhostedLinearObjContainer(); - bt_lof->initializeGhostedContainer(LinearObjContainer::F, *dd_loc); - dd_loc->initialize(); + // setup linear object factory + ///////////////////////////////////////////////////////////// + Teuchos::RCP t_lof = Teuchos::rcp(new TpetraLinObjFactoryType(tComm.getConst(), dofManager)); + Teuchos::RCP> lof = t_lof; + Teuchos::RCP loc = t_lof->buildGhostedLinearObjContainer(); + Teuchos::RCP dc_loc = t_lof->buildGhostedLinearObjContainer(); - bt_lof->initializeGhostedContainer(LinearObjContainer::X | LinearObjContainer::F, *loc); - loc->initialize(); + t_lof->initializeGhostedContainer(LinearObjContainer::X | LinearObjContainer::F, *loc); + loc->initialize(); - Teuchos::RCP b_dd_loc = Teuchos::rcp_dynamic_cast(dd_loc); - Teuchos::RCP b_loc = Teuchos::rcp_dynamic_cast(loc); - Teuchos::RCP> p_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_x()); - Thyra::assign(p_vec->getNonconstVectorBlock(0).ptr(), 123.0 + myRank); - Thyra::assign(p_vec->getNonconstVectorBlock(1).ptr(), 456.0 + myRank); - Thyra::assign(p_vec->getNonconstVectorBlock(2).ptr(), 789.0 + myRank); + t_lof->initializeGhostedContainer(LinearObjContainer::F, *dc_loc); + dc_loc->initialize(); + Teuchos::RCP t_dc_loc = Teuchos::rcp_dynamic_cast(dc_loc); - // setup field manager, add evaluator under test - ///////////////////////////////////////////////////////////// + Teuchos::RCP t_loc = Teuchos::rcp_dynamic_cast(loc); - PHX::FieldManager fm; + Teuchos::RCP> x_vec = t_loc->get_x_th(); + Thyra::assign(x_vec.ptr(), 123.0 + myRank); - std::string resName = ""; - Teuchos::RCP> names_map = - Teuchos::rcp(new std::map); - names_map->insert(std::make_pair(fieldName1_q1, resName + fieldName1_q1)); - names_map->insert(std::make_pair(fieldName2_q1, resName + fieldName2_q1)); - names_map->insert(std::make_pair(fieldName_qedge1, resName + fieldName_qedge1)); + // setup field manager, add evaluator under test + ///////////////////////////////////////////////////////////// - // evaluators under test - { - using Teuchos::RCP; - using Teuchos::rcp; - RCP> names = rcp(new std::vector); - names->push_back(resName + fieldName1_q1); - names->push_back(resName + fieldName2_q1); - - Teuchos::ParameterList pl; - pl.set("Scatter Name", "ScatterQ1"); - pl.set("Basis", basis_q1); - pl.set("Dependent Names", names); - pl.set("Dependent Map", names_map); - pl.set("Side Subcell Dimension", 1); - pl.set("Local Side ID", 2); - pl.set("Check Apply BC", false); - - Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); - - TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); - - fm.registerEvaluator(evaluator); - fm.requireField(*evaluator->evaluatedFields()[0]); - } - { - using Teuchos::RCP; - using Teuchos::rcp; - RCP> names = rcp(new std::vector); - names->push_back(resName + fieldName_qedge1); - - Teuchos::ParameterList pl; - pl.set("Scatter Name", "ScatterQEdge1"); - pl.set("Basis", basis_qedge1); - pl.set("Dependent Names", names); - pl.set("Dependent Map", names_map); - pl.set("Side Subcell Dimension", 1); - pl.set("Local Side ID", 2); - pl.set("Check Apply BC", false); - - Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); - - TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); - - fm.registerEvaluator(evaluator); - fm.requireField(*evaluator->evaluatedFields()[0]); - } - - // support evaluators - { - using Teuchos::RCP; - using Teuchos::rcp; - RCP> names = rcp(new std::vector); - names->push_back(fieldName1_q1); - names->push_back(fieldName2_q1); - - Teuchos::ParameterList pl; - pl.set("Basis", basis_q1); - pl.set("DOF Names", names); - pl.set("Indexer Names", names); + PHX::FieldManager fm; - Teuchos::RCP> evaluator = lof->buildGather(pl); - - fm.registerEvaluator(evaluator); - } - { - using Teuchos::RCP; - using Teuchos::rcp; - RCP> names = rcp(new std::vector); - names->push_back(fieldName_qedge1); - - Teuchos::ParameterList pl; - pl.set("Basis", basis_qedge1); - pl.set("DOF Names", names); - pl.set("Indexer Names", names); - - Teuchos::RCP> evaluator = lof->buildGather(pl); - - fm.registerEvaluator(evaluator); - } + std::string resName = ""; + Teuchos::RCP> names_map = + Teuchos::rcp(new std::map); + names_map->insert(std::make_pair(fieldName1_q1, resName + fieldName1_q1)); + names_map->insert(std::make_pair(fieldName2_q1, resName + fieldName2_q1)); + names_map->insert(std::make_pair(fieldName_qedge1, resName + fieldName_qedge1)); - std::vector derivative_dimensions; - derivative_dimensions.push_back(12); - fm.setKokkosExtendedDataTypeDimensions(derivative_dimensions); - - panzer::Traits::SD sd; - sd.worksets_ = work_sets; - - fm.postRegistrationSetup(sd); - - // panzer::Traits::PED ped; - // ped.dirichletData.ghostedCounter = dd_loc; - // fm.preEvaluate(ped); - panzer::Traits::PED ped; - ped.gedc->addDataObject("Dirichlet Counter", dd_loc); - ped.gedc->addDataObject("Solution Gather Container", loc); - ped.gedc->addDataObject("Residual Scatter Container", loc); - fm.preEvaluate(ped); - - // run tests - ///////////////////////////////////////////////////////////// - - panzer::Workset &workset = (*work_sets)[0]; - workset.alpha = 0.0; - workset.beta = 2.0; // derivatives multiplied by 2 - workset.time = 0.0; - workset.evaluate_transient_terms = false; - - fm.evaluateFields(workset); - - // test Residual fields - panzer::index_t dd_count(0); - Teuchos::ArrayRCP data, dd_data; - Teuchos::RCP> f_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_f()); - Teuchos::RCP> dd_vec = Teuchos::rcp_dynamic_cast>(b_dd_loc->get_f()); - - // check all the residual values. This is kind of crappy test since it simply checks twice the target - // value and the target. Its this way because you add two entries across elements. - - Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(data)); - Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(dd_data)); - TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(0)->getLocalNumElements()); - TEST_EQUALITY(data.size(), dd_data.size()); - dd_count = 0; - for (int i = 0; i < data.size(); i++) - { - - double target = 123.0 + myRank; - if (dd_data[i] == 0.0) + // evaluators under test { - TEST_EQUALITY(data[i], 0.0); + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName1_q1); + names->push_back(resName + fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQ1"); + if (scatter_IC) { + pl.set("Basis", basis_q1.getConst()); + } else { + pl.set("Basis", basis_q1); + } + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + pl.set("Scatter Initial Condition", scatter_IC); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm.registerEvaluator(evaluator); + fm.requireField(*evaluator->evaluatedFields()[0]); } - else { - TEST_EQUALITY(data[i], target); - dd_count++; + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQEdge1"); + if (scatter_IC) { + pl.set("Basis", basis_qedge1.getConst()); + } else { + pl.set("Basis", basis_qedge1); + } + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + pl.set("Scatter Initial Condition", scatter_IC); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm.registerEvaluator(evaluator); + fm.requireField(*evaluator->evaluatedFields()[0]); } - } - TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared - - Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(data)); - Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(dd_data)); - TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(1)->getLocalNumElements()); - TEST_EQUALITY(data.size(), dd_data.size()); - dd_count = 0; - for (int i = 0; i < data.size(); i++) - { - double target = 456.0 + myRank; - if (dd_data[i] == 0.0) + // support evaluators { - TEST_EQUALITY(data[i], 0.0); + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName1_q1); + names->push_back(fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_q1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm.registerEvaluator(evaluator); } - else { - TEST_EQUALITY(data[i], target); - dd_count++; - } - } - TEST_EQUALITY(dd_count, workset.num_cells); // there are 2 nodes on the side and the sides are not shared - - Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(data)); - Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(dd_data)); - TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(2)->getLocalNumElements()); - TEST_EQUALITY(data.size(), dd_data.size()); - dd_count = 0; - for (int i = 0; i < data.size(); i++) - { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName_qedge1); - double target = 789.0 + myRank; - if (dd_data[i] == 0.0) - { - TEST_EQUALITY(data[i], 0.0); + Teuchos::ParameterList pl; + pl.set("Basis", basis_qedge1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm.registerEvaluator(evaluator); } - else - { - TEST_EQUALITY(data[i], target); - dd_count++; + + panzer::Traits::SD sd; + sd.worksets_ = work_sets; + + fm.postRegistrationSetup(sd); + + panzer::Traits::PED ped; + ped.gedc->addDataObject("Dirichlet Counter", dc_loc); + ped.gedc->addDataObject("Solution Gather Container", loc); + ped.gedc->addDataObject("Residual Scatter Container", loc); + fm.preEvaluate(ped); + + // run tests + ///////////////////////////////////////////////////////////// + + panzer::Workset &workset = (*work_sets)[0]; + workset.alpha = 0.0; + workset.beta = 2.0; // derivatives multiplied by 2 + workset.time = 0.0; + workset.evaluate_transient_terms = false; + + fm.evaluateFields(workset); + fm.postEvaluate(0); + + // test Residual fields + panzer::index_t dc_count(0); + Teuchos::ArrayRCP data, dc_data; + Teuchos::RCP> f_vec = t_loc->get_f_th(); + Teuchos::RCP> dc_vec = t_dc_loc->get_f_th(); + + // check all the residual values and the count + + Teuchos::rcp_dynamic_cast>(dc_vec)->getLocalData(Teuchos::ptrFromRef(dc_data)); + if (scatter_IC) { + Teuchos::rcp_dynamic_cast>(x_vec)->getLocalData(Teuchos::ptrFromRef(data)); + + TEST_EQUALITY(static_cast(data.size()), t_lof->getGhostedMap()->getLocalNumElements()); + TEST_EQUALITY(data.size(), dc_data.size()); + dc_count = 0; + for (int i = 0; i < data.size(); i++) + { + double target = 123.0 + myRank; + if (dc_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dc_count++; + } + } + // Filled everywhere + TEST_EQUALITY(dc_count, data.size()); + } else { + Teuchos::rcp_dynamic_cast>(f_vec)->getLocalData(Teuchos::ptrFromRef(data)); + TEST_EQUALITY(static_cast(data.size()), t_lof->getGhostedMap()->getLocalNumElements()); + TEST_EQUALITY(data.size(), dc_data.size()); + dc_count = 0; + for (int i = 0; i < data.size(); i++) + { + double target = 123.0 + myRank; + if (dc_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dc_count++; + } + } + // there are 2 nodes or 1 edge on the side and the sides are not shared. + // 2 nodal functions, 1 edge function + TEST_EQUALITY(dc_count, 5 * workset.num_cells); } } - TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared } - TEUCHOS_UNIT_TEST(block_assembly, scatter_dirichlet_jacobian) + TEUCHOS_UNIT_TEST(assembly, scatter_dirichlet_tangent) { #ifdef HAVE_MPI @@ -351,6 +338,7 @@ namespace panzer const std::string fieldName1_q1 = "U"; const std::string fieldName2_q1 = "V"; const std::string fieldName_qedge1 = "B"; + const std::size_t numParams = 3; Teuchos::RCP mesh = buildMesh(2, 2); @@ -373,247 +361,637 @@ namespace panzer physicsBlock->getWorksetNeeds()); TEST_EQUALITY(work_sets->size(), 1); - // build connection manager and field manager - const Teuchos::RCP conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); - RCP dofManager = Teuchos::rcp(new panzer::BlockedDOFManager(conn_manager, MPI_COMM_WORLD)); + std::vector scatter_IC_vec = {false,true}; - dofManager->addField(fieldName1_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); - dofManager->addField(fieldName2_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); - dofManager->addField(fieldName_qedge1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_qedge1->getIntrepid2Basis()))); + for (const bool scatter_IC : scatter_IC_vec) { + // build connection manager and field manager + const Teuchos::RCP conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); + RCP dofManager = Teuchos::rcp(new panzer::DOFManager(conn_manager, MPI_COMM_WORLD)); - std::vector> fieldOrder(3); - fieldOrder[0].push_back(fieldName1_q1); - fieldOrder[1].push_back(fieldName_qedge1); - fieldOrder[2].push_back(fieldName2_q1); - dofManager->setFieldOrder(fieldOrder); + dofManager->addField(fieldName1_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName2_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName_qedge1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_qedge1->getIntrepid2Basis()))); - // dofManager->setOrientationsRequired(true); - dofManager->buildGlobalUnknowns(); + std::vector fieldOrder; + fieldOrder.push_back(fieldName1_q1); + fieldOrder.push_back(fieldName_qedge1); + fieldOrder.push_back(fieldName2_q1); + dofManager->setFieldOrder(fieldOrder); - // setup linear object factory - ///////////////////////////////////////////////////////////// + dofManager->buildGlobalUnknowns(); - Teuchos::RCP bt_lof = Teuchos::rcp(new TpetraBlockedLinObjFactoryType(tComm.getConst(), dofManager)); - Teuchos::RCP> lof = bt_lof; - Teuchos::RCP dd_loc = bt_lof->buildGhostedLinearObjContainer(); - Teuchos::RCP loc = bt_lof->buildGhostedLinearObjContainer(); - bt_lof->initializeGhostedContainer(LinearObjContainer::F, *dd_loc); - dd_loc->initialize(); + // setup linear object factory + ///////////////////////////////////////////////////////////// + Teuchos::RCP t_lof = Teuchos::rcp(new TpetraLinObjFactoryType(tComm.getConst(), dofManager)); + Teuchos::RCP> lof = t_lof; + Teuchos::RCP loc = t_lof->buildGhostedLinearObjContainer(); + Teuchos::RCP dc_loc = t_lof->buildGhostedLinearObjContainer(); - bt_lof->initializeGhostedContainer(LinearObjContainer::X | LinearObjContainer::F | LinearObjContainer::Mat, *loc); - loc->initialize(); + t_lof->initializeGhostedContainer(LinearObjContainer::X | LinearObjContainer::F, *loc); + loc->initialize(); - Teuchos::RCP b_dd_loc = Teuchos::rcp_dynamic_cast(dd_loc); - Teuchos::RCP b_loc = Teuchos::rcp_dynamic_cast(loc); - Teuchos::RCP> p_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_x()); - Thyra::assign(p_vec->getNonconstVectorBlock(0).ptr(), 123.0 + myRank); - Thyra::assign(p_vec->getNonconstVectorBlock(1).ptr(), 456.0 + myRank); - Thyra::assign(p_vec->getNonconstVectorBlock(2).ptr(), 789.0 + myRank); + t_lof->initializeGhostedContainer(LinearObjContainer::F, *dc_loc); + dc_loc->initialize(); + Teuchos::RCP t_dc_loc = Teuchos::rcp_dynamic_cast(dc_loc); - auto blk_A = Teuchos::rcp_dynamic_cast>(b_loc->get_A()); - double values[] = {123.0 + myRank, 456.0 + myRank, 789.0 + myRank}; + Teuchos::RCP t_loc = Teuchos::rcp_dynamic_cast(loc); - for (int i = 0; i < 3; i++) - for (int j = 0; j < 3; j++) - { - auto thyraOp = Teuchos::rcp_dynamic_cast(blk_A->getNonconstBlock(i, j), false); - auto tpetraCrsMatrix = Teuchos::rcp_dynamic_cast(thyraOp->getTpetraOperator(), true); - tpetraCrsMatrix->setAllToScalar(values[i] * values[j]); - } + Teuchos::RCP> x_vec = t_loc->get_x_th(); + Thyra::assign(x_vec.ptr(), 123.0 + myRank); - // setup field manager, add evaluator under test - ///////////////////////////////////////////////////////////// + std::vector> tangentContainers; - PHX::FieldManager fm; + using LOCPair = panzer::LOCPair_GlobalEvaluationData; + using Teuchos::rcp_dynamic_cast; - std::string resName = ""; - Teuchos::RCP> names_map = - Teuchos::rcp(new std::map); - names_map->insert(std::make_pair(fieldName1_q1, resName + fieldName1_q1)); - names_map->insert(std::make_pair(fieldName2_q1, resName + fieldName2_q1)); - names_map->insert(std::make_pair(fieldName_qedge1, resName + fieldName_qedge1)); + // generate tangent data + for (std::size_t i=0;i> names = rcp(new std::vector); - names->push_back(resName + fieldName1_q1); - names->push_back(resName + fieldName2_q1); - - Teuchos::ParameterList pl; - pl.set("Scatter Name", "ScatterQ1"); - pl.set("Basis", basis_q1); - pl.set("Dependent Names", names); - pl.set("Dependent Map", names_map); - pl.set("Side Subcell Dimension", 1); - pl.set("Local Side ID", 2); - pl.set("Check Apply BC", false); - - Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); - - TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); - - fm.registerEvaluator(evaluator); - fm.requireField(*evaluator->evaluatedFields()[0]); - } - { - using Teuchos::RCP; - using Teuchos::rcp; - RCP> names = rcp(new std::vector); - names->push_back(resName + fieldName_qedge1); - - Teuchos::ParameterList pl; - pl.set("Scatter Name", "ScatterQEdge1"); - pl.set("Basis", basis_qedge1); - pl.set("Dependent Names", names); - pl.set("Dependent Map", names_map); - pl.set("Side Subcell Dimension", 1); - pl.set("Local Side ID", 2); - pl.set("Check Apply BC", false); - - Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); - - TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); - - fm.registerEvaluator(evaluator); - fm.requireField(*evaluator->evaluatedFields()[0]); - } - - // support evaluators - { - using Teuchos::RCP; - using Teuchos::rcp; - RCP> names = rcp(new std::vector); - names->push_back(fieldName1_q1); - names->push_back(fieldName2_q1); + auto global_t_loc = rcp_dynamic_cast(locPair->getGlobalLOC()); + Teuchos::RCP> global_x_vec = global_t_loc->get_x_th(); + Thyra::assign(global_x_vec.ptr(), 0.123 + myRank + i); - Teuchos::ParameterList pl; - pl.set("Basis", basis_q1); - pl.set("DOF Names", names); - pl.set("Indexer Names", names); + auto ghosted_t_loc = rcp_dynamic_cast(locPair->getGhostedLOC()); + Teuchos::RCP> ghosted_x_vec = ghosted_t_loc->get_x_th(); + Thyra::assign(ghosted_x_vec.ptr(), 0.123 + myRank + i); - Teuchos::RCP> evaluator = lof->buildGather(pl); - - fm.registerEvaluator(evaluator); - } - { - using Teuchos::RCP; - using Teuchos::rcp; - RCP> names = rcp(new std::vector); - names->push_back(fieldName_qedge1); + tangentContainers.push_back(locPair); + } - Teuchos::ParameterList pl; - pl.set("Basis", basis_qedge1); - pl.set("DOF Names", names); - pl.set("Indexer Names", names); + // setup field manager, add evaluator under test + ///////////////////////////////////////////////////////////// - Teuchos::RCP> evaluator = lof->buildGather(pl); + auto fm = Teuchos::rcp(new PHX::FieldManager); - fm.registerEvaluator(evaluator); - } + std::string resName = ""; + Teuchos::RCP> names_map = + Teuchos::rcp(new std::map); + names_map->insert(std::make_pair(fieldName1_q1, resName + fieldName1_q1)); + names_map->insert(std::make_pair(fieldName2_q1, resName + fieldName2_q1)); + names_map->insert(std::make_pair(fieldName_qedge1, resName + fieldName_qedge1)); - std::vector derivative_dimensions; - derivative_dimensions.push_back(12); - fm.setKokkosExtendedDataTypeDimensions(derivative_dimensions); - - panzer::Traits::SD sd; - sd.worksets_ = work_sets; - - fm.postRegistrationSetup(sd); - - // panzer::Traits::PED ped; - // ped.dirichletData.ghostedCounter = dd_loc; - // fm.preEvaluate(ped); - panzer::Traits::PED ped; - ped.gedc->addDataObject("Dirichlet Counter", dd_loc); - ped.gedc->addDataObject("Solution Gather Container", loc); - ped.gedc->addDataObject("Residual Scatter Container", loc); - fm.preEvaluate(ped); - - // run tests - ///////////////////////////////////////////////////////////// - - panzer::Workset &workset = (*work_sets)[0]; - workset.alpha = 0.0; - workset.beta = 2.0; // derivatives multiplied by 2 - workset.time = 0.0; - workset.evaluate_transient_terms = false; - - fm.evaluateFields(workset); - - // test Residual fields - panzer::index_t dd_count(0); - Teuchos::ArrayRCP data, dd_data; - Teuchos::RCP> f_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_f()); - Teuchos::RCP> dd_vec = Teuchos::rcp_dynamic_cast>(b_dd_loc->get_f()); - - // check all the residual values. This is kind of crappy test since it simply checks twice the target - // value and the target. Its this way because you add two entries across elements. - - Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(data)); - Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(dd_data)); - TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(0)->getLocalNumElements()); - TEST_EQUALITY(data.size(), dd_data.size()); - dd_count = 0; - for (int i = 0; i < data.size(); i++) - { + std::vector derivative_dimensions; + derivative_dimensions.push_back(numParams); + fm->setKokkosExtendedDataTypeDimensions(derivative_dimensions); - double target = 123.0 + myRank; - if (dd_data[i] == 0.0) + // evaluators under test { - TEST_EQUALITY(data[i], 0.0); + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName1_q1); + names->push_back(resName + fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQ1"); + if (scatter_IC) { + pl.set("Basis", basis_q1.getConst()); + } else { + pl.set("Basis", basis_q1); + } + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + pl.set("Scatter Initial Condition", scatter_IC); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm->registerEvaluator(evaluator); + fm->requireField(*evaluator->evaluatedFields()[0]); } - else { - TEST_EQUALITY(data[i], target); - dd_count++; + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQEdge1"); + if (scatter_IC) { + pl.set("Basis", basis_qedge1.getConst()); + } else { + pl.set("Basis", basis_qedge1); + } + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + pl.set("Scatter Initial Condition", scatter_IC); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm->registerEvaluator(evaluator); + fm->requireField(*evaluator->evaluatedFields()[0]); } - } - TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared - - Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(data)); - Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(dd_data)); - TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(1)->getLocalNumElements()); - TEST_EQUALITY(data.size(), dd_data.size()); - dd_count = 0; - for (int i = 0; i < data.size(); i++) - { - double target = 456.0 + myRank; - if (dd_data[i] == 0.0) + // support evaluators { - TEST_EQUALITY(data[i], 0.0); + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName1_q1); + names->push_back(fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_q1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + Teuchos::RCP>> tangent_names = + Teuchos::rcp(new std::vector>(2)); + for (std::size_t i = 0; i < numParams; ++i) + { + std::stringstream ss1, ss2; + ss1 << fieldName1_q1 << " Tangent " << i; + ss2 << fieldName2_q1 << " Tangent " << i; + (*tangent_names)[0].push_back(ss1.str()); + (*tangent_names)[1].push_back(ss2.str()); + } + pl.set("Tangent Names", tangent_names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm->registerEvaluator(evaluator); + } + for (std::size_t i = 0; i < numParams; ++i) { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + RCP> tangent_names = rcp(new std::vector); + names->push_back(fieldName1_q1); + names->push_back(fieldName2_q1); + { + std::stringstream ss1, ss2; + ss1 << fieldName1_q1 << " Tangent " << i; + ss2 << fieldName2_q1 << " Tangent " << i; + tangent_names->push_back(ss1.str()); + tangent_names->push_back(ss2.str()); + } + + Teuchos::ParameterList pl; + pl.set("Basis", basis_q1); + pl.set("DOF Names", tangent_names); + pl.set("Indexer Names", names); + + std::stringstream ss; + ss << "Tangent Container " << i; + pl.set("Global Data Key", ss.str()); + + Teuchos::RCP> evaluator = + lof->buildGatherTangent(pl); + + fm->registerEvaluator(evaluator); } - else { - TEST_EQUALITY(data[i], target); - dd_count++; + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_qedge1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + Teuchos::RCP>> tangent_names = + Teuchos::rcp(new std::vector>(1)); + for (std::size_t i = 0; i < numParams; ++i) + { + std::stringstream ss; + ss << fieldName_qedge1 << " Tangent " << i; + (*tangent_names)[0].push_back(ss.str()); + } + pl.set("Tangent Names", tangent_names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm->registerEvaluator(evaluator); + } + for (std::size_t i = 0; i < numParams; ++i) { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + RCP> tangent_names = rcp(new std::vector); + names->push_back(fieldName_qedge1); + { + std::stringstream ss; + ss << fieldName_qedge1 << " Tangent " << i; + tangent_names->push_back(ss.str()); + } + + Teuchos::ParameterList pl; + pl.set("Basis", basis_qedge1); + pl.set("DOF Names", tangent_names); + pl.set("Indexer Names", names); + + std::stringstream ss; + ss << "Tangent Container " << i; + pl.set("Global Data Key", ss.str()); + + Teuchos::RCP> evaluator = + lof->buildGatherTangent(pl); + + fm->registerEvaluator(evaluator); } - } - TEST_EQUALITY(dd_count, workset.num_cells); // there are 2 nodes on the side and the sides are not shared - - Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(data)); - Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(dd_data)); - TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(2)->getLocalNumElements()); - TEST_EQUALITY(data.size(), dd_data.size()); - dd_count = 0; - for (int i = 0; i < data.size(); i++) - { - double target = 789.0 + myRank; - if (dd_data[i] == 0.0) - { - TEST_EQUALITY(data[i], 0.0); + panzer::Traits::SD sd; + sd.worksets_ = work_sets; + + fm->postRegistrationSetup(sd); + + panzer::Traits::PED ped; + ped.gedc->addDataObject("Dirichlet Counter", dc_loc); + ped.gedc->addDataObject("Solution Gather Container", loc); + ped.gedc->addDataObject("Residual Scatter Container", loc); + for (size_t i=0; iaddDataObject(ss.str(), tangentContainers[i]); + } + std::vector params; + std::vector> paramContainers; + for (std::size_t i = 0; iaddDataObject(ss.str(),paramContainer->getGhostedLOC()); + paramContainers.push_back(paramContainer); } - else + Teuchos::RCP activeParams = + Teuchos::rcp(new panzer::ParameterList_GlobalEvaluationData(params)); + ped.gedc->addDataObject("PARAMETER_NAMES",activeParams); + fm->preEvaluate(ped); + + // run tests + ///////////////////////////////////////////////////////////// + + panzer::Workset &workset = (*work_sets)[0]; + workset.alpha = 0.0; + workset.beta = 2.0; // derivatives multiplied by 2 + workset.time = 0.0; + workset.evaluate_transient_terms = false; + + fm->evaluateFields(workset); + fm->postEvaluate(0); + + fm = Teuchos::null; + + // test Tangent fields + panzer::index_t dc_count(0); + Teuchos::ArrayRCP data, dc_data; + Teuchos::RCP> f_vec = t_loc->get_f_th(); + Teuchos::RCP> dc_vec = t_dc_loc->get_f_th(); + + // check all the residual values and the count + + Teuchos::rcp_dynamic_cast>(dc_vec)->getLocalData(Teuchos::ptrFromRef(dc_data)); + if (scatter_IC) { + Teuchos::rcp_dynamic_cast>(x_vec)->getLocalData(Teuchos::ptrFromRef(data)); + + TEST_EQUALITY(static_cast(data.size()), t_lof->getGhostedMap()->getLocalNumElements()); + TEST_EQUALITY(data.size(), dc_data.size()); + dc_count = 0; + for (int i = 0; i < data.size(); i++) + { + double target = 123.0 + myRank; + if (dc_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dc_count++; + } + } + // Filled everywhere + TEST_EQUALITY(dc_count, data.size()); + } else { + Teuchos::rcp_dynamic_cast>(f_vec)->getLocalData(Teuchos::ptrFromRef(data)); + TEST_EQUALITY(static_cast(data.size()), t_lof->getGhostedMap()->getLocalNumElements()); + TEST_EQUALITY(data.size(), dc_data.size()); + dc_count = 0; + for (int i = 0; i < data.size(); i++) + { + double target = 123.0 + myRank; + if (dc_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dc_count++; + } + } + // there are 2 nodes or 1 edge on the side and the sides are not shared. + // 2 nodal functions, 1 edge function + TEST_EQUALITY(dc_count, 5 * workset.num_cells); + } + for (std::size_t i=0; i tan_data; + Teuchos::RCP> tan_vec = Teuchos::rcp_dynamic_cast(paramContainers[i]->getGhostedLOC())->get_f_th(); + Teuchos::rcp_dynamic_cast>(tan_vec)->getLocalData(Teuchos::ptrFromRef(tan_data)); + + for (int j = 0; j < tan_data.size(); ++j) + { + if (dc_data[j] == 0.) { + TEST_EQUALITY(data[j],0.0); + } else { + const double target = .123 + myRank + i; + TEST_EQUALITY(tan_data[j],target); + } + } } } - TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared } +// TEUCHOS_UNIT_TEST(assembly, scatter_dirichlet_jacobian) +// { +// +//#ifdef HAVE_MPI +// Teuchos::RCP> tComm = Teuchos::rcp(new Teuchos::MpiComm(MPI_COMM_WORLD)); +//#else +// NOPE_PANZER_DOESNT_SUPPORT_SERIAL +//#endif +// +// int myRank = tComm->getRank(); +// +// const std::size_t workset_size = 4; +// const std::string fieldName1_q1 = "U"; +// const std::string fieldName2_q1 = "V"; +// const std::string fieldName_qedge1 = "B"; +// +// Teuchos::RCP mesh = buildMesh(2, 2); +// +// // build input physics block +// Teuchos::RCP basis_q1 = buildBasis(workset_size, "Q1"); +// Teuchos::RCP basis_qedge1 = buildBasis(workset_size, "QEdge1"); +// +// Teuchos::RCP ipb = Teuchos::parameterList(); +// testInitialization(ipb); +// +// const int default_int_order = 1; +// std::string eBlockID = "eblock-0_0"; +// Teuchos::RCP eqset_factory = Teuchos::rcp(new user_app::MyFactory); +// panzer::CellData cellData(workset_size, mesh->getCellTopology("eblock-0_0")); +// Teuchos::RCP gd = panzer::createGlobalData(); +// Teuchos::RCP physicsBlock = +// Teuchos::rcp(new PhysicsBlock(ipb, eBlockID, default_int_order, cellData, eqset_factory, gd, false)); +// +// Teuchos::RCP> work_sets = panzer_stk::buildWorksets(*mesh, physicsBlock->elementBlockID(), +// physicsBlock->getWorksetNeeds()); +// TEST_EQUALITY(work_sets->size(), 1); +// +// // build connection manager and field manager +// const Teuchos::RCP conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); +// RCP dofManager = Teuchos::rcp(new panzer::BlockedDOFManager(conn_manager, MPI_COMM_WORLD)); +// +// dofManager->addField(fieldName1_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); +// dofManager->addField(fieldName2_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); +// dofManager->addField(fieldName_qedge1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_qedge1->getIntrepid2Basis()))); +// +// std::vector> fieldOrder(3); +// fieldOrder[0].push_back(fieldName1_q1); +// fieldOrder[1].push_back(fieldName_qedge1); +// fieldOrder[2].push_back(fieldName2_q1); +// dofManager->setFieldOrder(fieldOrder); +// +// // dofManager->setOrientationsRequired(true); +// dofManager->buildGlobalUnknowns(); +// +// // setup linear object factory +// ///////////////////////////////////////////////////////////// +// +// Teuchos::RCP bt_lof = Teuchos::rcp(new TpetraBlockedLinObjFactoryType(tComm.getConst(), dofManager)); +// Teuchos::RCP> lof = bt_lof; +// Teuchos::RCP dd_loc = bt_lof->buildGhostedLinearObjContainer(); +// Teuchos::RCP loc = bt_lof->buildGhostedLinearObjContainer(); +// bt_lof->initializeGhostedContainer(LinearObjContainer::F, *dd_loc); +// dd_loc->initialize(); +// +// bt_lof->initializeGhostedContainer(LinearObjContainer::X | LinearObjContainer::F | LinearObjContainer::Mat, *loc); +// loc->initialize(); +// +// Teuchos::RCP b_dd_loc = Teuchos::rcp_dynamic_cast(dd_loc); +// Teuchos::RCP b_loc = Teuchos::rcp_dynamic_cast(loc); +// Teuchos::RCP> p_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_x()); +// Thyra::assign(p_vec->getNonconstVectorBlock(0).ptr(), 123.0 + myRank); +// Thyra::assign(p_vec->getNonconstVectorBlock(1).ptr(), 456.0 + myRank); +// Thyra::assign(p_vec->getNonconstVectorBlock(2).ptr(), 789.0 + myRank); +// +// auto blk_A = Teuchos::rcp_dynamic_cast>(b_loc->get_A()); +// double values[] = {123.0 + myRank, 456.0 + myRank, 789.0 + myRank}; +// +// for (int i = 0; i < 3; i++) +// for (int j = 0; j < 3; j++) +// { +// auto thyraOp = Teuchos::rcp_dynamic_cast(blk_A->getNonconstBlock(i, j), false); +// auto tpetraCrsMatrix = Teuchos::rcp_dynamic_cast(thyraOp->getTpetraOperator(), true); +// tpetraCrsMatrix->setAllToScalar(values[i] * values[j]); +// } +// +// // setup field manager, add evaluator under test +// ///////////////////////////////////////////////////////////// +// +// PHX::FieldManager fm; +// +// std::string resName = ""; +// Teuchos::RCP> names_map = +// Teuchos::rcp(new std::map); +// names_map->insert(std::make_pair(fieldName1_q1, resName + fieldName1_q1)); +// names_map->insert(std::make_pair(fieldName2_q1, resName + fieldName2_q1)); +// names_map->insert(std::make_pair(fieldName_qedge1, resName + fieldName_qedge1)); +// +// // evaluators under test +// { +// using Teuchos::RCP; +// using Teuchos::rcp; +// RCP> names = rcp(new std::vector); +// names->push_back(resName + fieldName1_q1); +// names->push_back(resName + fieldName2_q1); +// +// Teuchos::ParameterList pl; +// pl.set("Scatter Name", "ScatterQ1"); +// pl.set("Basis", basis_q1); +// pl.set("Dependent Names", names); +// pl.set("Dependent Map", names_map); +// pl.set("Side Subcell Dimension", 1); +// pl.set("Local Side ID", 2); +// pl.set("Check Apply BC", false); +// +// Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); +// +// TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); +// +// fm.registerEvaluator(evaluator); +// fm.requireField(*evaluator->evaluatedFields()[0]); +// } +// { +// using Teuchos::RCP; +// using Teuchos::rcp; +// RCP> names = rcp(new std::vector); +// names->push_back(resName + fieldName_qedge1); +// +// Teuchos::ParameterList pl; +// pl.set("Scatter Name", "ScatterQEdge1"); +// pl.set("Basis", basis_qedge1); +// pl.set("Dependent Names", names); +// pl.set("Dependent Map", names_map); +// pl.set("Side Subcell Dimension", 1); +// pl.set("Local Side ID", 2); +// pl.set("Check Apply BC", false); +// +// Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); +// +// TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); +// +// fm.registerEvaluator(evaluator); +// fm.requireField(*evaluator->evaluatedFields()[0]); +// } +// +// // support evaluators +// { +// using Teuchos::RCP; +// using Teuchos::rcp; +// RCP> names = rcp(new std::vector); +// names->push_back(fieldName1_q1); +// names->push_back(fieldName2_q1); +// +// Teuchos::ParameterList pl; +// pl.set("Basis", basis_q1); +// pl.set("DOF Names", names); +// pl.set("Indexer Names", names); +// +// Teuchos::RCP> evaluator = lof->buildGather(pl); +// +// fm.registerEvaluator(evaluator); +// } +// { +// using Teuchos::RCP; +// using Teuchos::rcp; +// RCP> names = rcp(new std::vector); +// names->push_back(fieldName_qedge1); +// +// Teuchos::ParameterList pl; +// pl.set("Basis", basis_qedge1); +// pl.set("DOF Names", names); +// pl.set("Indexer Names", names); +// +// Teuchos::RCP> evaluator = lof->buildGather(pl); +// +// fm.registerEvaluator(evaluator); +// } +// +// std::vector derivative_dimensions; +// derivative_dimensions.push_back(12); +// fm.setKokkosExtendedDataTypeDimensions(derivative_dimensions); +// +// panzer::Traits::SD sd; +// sd.worksets_ = work_sets; +// +// fm.postRegistrationSetup(sd); +// +// // panzer::Traits::PED ped; +// // ped.dirichletData.ghostedCounter = dd_loc; +// // fm.preEvaluate(ped); +// panzer::Traits::PED ped; +// ped.gedc->addDataObject("Dirichlet Counter", dd_loc); +// ped.gedc->addDataObject("Solution Gather Container", loc); +// ped.gedc->addDataObject("Residual Scatter Container", loc); +// fm.preEvaluate(ped); +// +// // run tests +// ///////////////////////////////////////////////////////////// +// +// panzer::Workset &workset = (*work_sets)[0]; +// workset.alpha = 0.0; +// workset.beta = 2.0; // derivatives multiplied by 2 +// workset.time = 0.0; +// workset.evaluate_transient_terms = false; +// +// fm.evaluateFields(workset); +// +// // test Residual fields +// panzer::index_t dd_count(0); +// Teuchos::ArrayRCP data, dd_data; +// Teuchos::RCP> f_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_f()); +// Teuchos::RCP> dd_vec = Teuchos::rcp_dynamic_cast>(b_dd_loc->get_f()); +// +// // check all the residual values. This is kind of crappy test since it simply checks twice the target +// // value and the target. Its this way because you add two entries across elements. +// +// Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(data)); +// Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(dd_data)); +// TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(0)->getLocalNumElements()); +// TEST_EQUALITY(data.size(), dd_data.size()); +// dd_count = 0; +// for (int i = 0; i < data.size(); i++) +// { +// +// double target = 123.0 + myRank; +// if (dd_data[i] == 0.0) +// { +// TEST_EQUALITY(data[i], 0.0); +// } +// else +// { +// TEST_EQUALITY(data[i], target); +// dd_count++; +// } +// } +// TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared +// +// Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(data)); +// Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(dd_data)); +// TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(1)->getLocalNumElements()); +// TEST_EQUALITY(data.size(), dd_data.size()); +// dd_count = 0; +// for (int i = 0; i < data.size(); i++) +// { +// +// double target = 456.0 + myRank; +// if (dd_data[i] == 0.0) +// { +// TEST_EQUALITY(data[i], 0.0); +// } +// else +// { +// TEST_EQUALITY(data[i], target); +// dd_count++; +// } +// } +// TEST_EQUALITY(dd_count, workset.num_cells); // there are 2 nodes on the side and the sides are not shared +// +// Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(data)); +// Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(dd_data)); +// TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(2)->getLocalNumElements()); +// TEST_EQUALITY(data.size(), dd_data.size()); +// dd_count = 0; +// for (int i = 0; i < data.size(); i++) +// { +// +// double target = 789.0 + myRank; +// if (dd_data[i] == 0.0) +// { +// TEST_EQUALITY(data[i], 0.0); +// } +// else +// { +// TEST_EQUALITY(data[i], target); +// dd_count++; +// } +// } +// TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared +// } Teuchos::RCP buildBasis(std::size_t worksetSize, const std::string &basisName) { diff --git a/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_BlockedTpetra.hpp b/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_BlockedTpetra.hpp index 266eab9cc25c..6210f66a950c 100644 --- a/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_BlockedTpetra.hpp +++ b/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_BlockedTpetra.hpp @@ -14,6 +14,7 @@ #include "Phalanx_config.hpp" #include "Phalanx_Evaluator_Macros.hpp" #include "Phalanx_MDField.hpp" +#include "Phalanx_KokkosViewOfViews.hpp" #include "Teuchos_ParameterList.hpp" @@ -260,6 +261,105 @@ class ScatterDirichletResidual_BlockedTpetra +class ScatterDirichletResidual_BlockedTpetra + : public panzer::EvaluatorWithBaseImpl, + public PHX::EvaluatorDerived, + public panzer::CloneableEvaluator { + +public: + ScatterDirichletResidual_BlockedTpetra(const Teuchos::RCP & indexer) + : globalIndexer_(indexer) {} + + ScatterDirichletResidual_BlockedTpetra(const Teuchos::RCP & indexer, + const Teuchos::ParameterList& p); + + void postRegistrationSetup(typename TRAITS::SetupData d, + PHX::FieldManager& vm); + + void preEvaluate(typename TRAITS::PreEvalData d); + + void evaluateFields(typename TRAITS::EvalData workset); + + virtual Teuchos::RCP clone(const Teuchos::ParameterList & pl) const + { return Teuchos::rcp(new ScatterDirichletResidual_BlockedTpetra(globalIndexer_,pl)); } + +private: + typedef typename panzer::Traits::Tangent::ScalarT ScalarT; + typedef typename TRAITS::RealType RealType; + + typedef BlockedTpetraLinearObjContainer ContainerType; + typedef Tpetra::Vector VectorType; + typedef Tpetra::CrsMatrix CrsMatrixType; + typedef Tpetra::CrsGraph CrsGraphType; + typedef Tpetra::Map MapType; + typedef Tpetra::Import ImportType; + typedef Tpetra::Export ExportType; + + // dummy field so that the evaluator will have something to do + Teuchos::RCP scatterHolder_; + + // fields that need to be scattered will be put in this vector + std::vector< PHX::MDField > scatterFields_; + + // maps the local (field,element,basis) triplet to a global ID + // for scattering + Teuchos::RCP globalIndexer_; + + //! Vector of global indexers, one for each scattered field + //! respectively. This is the global indexer for the Thyra + //! ProductVector sub-block. + std::vector> fieldGlobalIndexers_; + + //! Field IDs in the local product vector block (not global field id) + std::vector fieldIds_; + + //! Returns the index into the Thyra ProductVector sub-block. Size + //! of number of fields to scatter. + std::vector productVectorBlockIndex_; + + // This maps the scattered field names to the DOF manager field + // For instance a Navier-Stokes map might look like + // fieldMap_["RESIDUAL_Velocity"] --> "Velocity" + // fieldMap_["RESIDUAL_Pressure"] --> "Pressure" + Teuchos::RCP > fieldMap_; + + //! Local indices for unknowns + PHX::View worksetLIDs_; + + //! Offset into the cell lids for each field + std::vector> fieldOffsets_; + + //! The local basis index corresponding to the fieldOffset_. Used to + //! index into the basis index of MDFields. This is only required + //! for tangent/normal BCs. + std::vector> basisIndexForMDFieldOffsets_; + + std::size_t side_subcell_dim_; + std::size_t local_side_id_; + + Teuchos::RCP > dirichletCounter_; + std::string globalDataKey_; // what global data does this fill? + Teuchos::RCP > blockedContainer_; + + //! If set to true, allows runtime disabling of dirichlet BCs on node-by-node basis + bool checkApplyBC_; + + // If set to true, scattering an initial condition + bool scatterIC_; + + // Allows runtime disabling of dirichlet BCs on node-by-node basis + std::vector< PHX::MDField > applyBC_; + + // Storage for the tangent data + PHX::ViewOfViews<2,Kokkos::View> dfdpFieldsVoV_; + + ScatterDirichletResidual_BlockedTpetra() {} +}; + } #ifdef Panzer_BUILD_HESSIAN_SUPPORT diff --git a/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_BlockedTpetra_impl.hpp b/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_BlockedTpetra_impl.hpp index 8af54d7c30aa..a975889e5a0b 100644 --- a/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_BlockedTpetra_impl.hpp +++ b/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_BlockedTpetra_impl.hpp @@ -24,6 +24,7 @@ #include "Panzer_BlockedDOFManager.hpp" #include "Panzer_PureBasis.hpp" #include "Panzer_BlockedTpetraLinearObjContainer.hpp" +#include "Panzer_ParameterList_GlobalEvaluationData.hpp" #include "Panzer_GlobalEvaluationDataContainer.hpp" #include "Phalanx_DataLayout_MDALayout.hpp" @@ -125,7 +126,7 @@ ScatterDirichletResidual_BlockedTpetra(const Teuchos::RCP("Global Data Key")) globalDataKey_ = p.get("Global Data Key"); - this->setName(scatterName+" Scatter Residual"); + this->setName(scatterName+" Scatter Dirichlet Residual"); } // ********************************************************************** @@ -339,7 +340,7 @@ ScatterDirichletResidual_BlockedTpetra(const Teuchos::RCP("Global Data Key")) globalDataKey_ = p.get("Global Data Key"); - this->setName(scatterName+" Scatter Residual (Jacobian)"); + this->setName(scatterName+" Scatter Dirichlet Residual (Jacobian)"); } // ********************************************************************** @@ -617,4 +618,253 @@ evaluateFields(typename TRAITS::EvalData workset) // ********************************************************************** +// ********************************************************************** +// Specialization: Tangent +// ********************************************************************** + + +template +panzer::ScatterDirichletResidual_BlockedTpetra:: +ScatterDirichletResidual_BlockedTpetra(const Teuchos::RCP & indexer, + const Teuchos::ParameterList& p) + : globalIndexer_(indexer) + , globalDataKey_("Residual Scatter Container") +{ + std::string scatterName = p.get("Scatter Name"); + scatterHolder_ = + Teuchos::rcp(new PHX::Tag(scatterName,Teuchos::rcp(new PHX::MDALayout(0)))); + + // get names to be evaluated + const std::vector& names = + *(p.get< Teuchos::RCP< std::vector > >("Dependent Names")); + + // grab map from evaluated names to field names + fieldMap_ = p.get< Teuchos::RCP< std::map > >("Dependent Map"); + + // determine if we are scattering an initial condition + scatterIC_ = p.isParameter("Scatter Initial Condition") ? p.get("Scatter Initial Condition") : false; + + Teuchos::RCP dl = (!scatterIC_) ? + p.get< Teuchos::RCP >("Basis")->functional : + p.get< Teuchos::RCP >("Basis")->functional; + if (!scatterIC_) { + side_subcell_dim_ = p.get("Side Subcell Dimension"); + local_side_id_ = p.get("Local Side ID"); + } + + // build the vector of fields that this is dependent on + scatterFields_.resize(names.size()); + for (std::size_t eq = 0; eq < names.size(); ++eq) { + scatterFields_[eq] = PHX::MDField(names[eq],dl); + + // tell the field manager that we depend on this field + this->addDependentField(scatterFields_[eq]); + } + + checkApplyBC_ = p.isParameter("Check Apply BC") ? p.get("Check Apply BC") : false; + applyBC_.resize(names.size()); // must allocate (even if not used) to support lambda capture + if (checkApplyBC_) { + for (std::size_t eq = 0; eq < names.size(); ++eq) { + applyBC_[eq] = PHX::MDField(std::string("APPLY_BC_")+fieldMap_->find(names[eq])->second,dl); + this->addDependentField(applyBC_[eq]); + } + } + + // this is what this evaluator provides + this->addEvaluatedField(*scatterHolder_); + + if (p.isType("Global Data Key")) + globalDataKey_ = p.get("Global Data Key"); + + this->setName(scatterName+" Scatter Dirichlet Residual"); +} + +// ********************************************************************** +template +void panzer::ScatterDirichletResidual_BlockedTpetra:: +postRegistrationSetup(typename TRAITS::SetupData d, + PHX::FieldManager& /* fm */) +{ + const Workset & workset_0 = (*d.worksets_)[0]; + const std::string blockId = this->wda(workset_0).block_id; + + fieldIds_.resize(scatterFields_.size()); + fieldOffsets_.resize(scatterFields_.size()); + basisIndexForMDFieldOffsets_.resize(scatterFields_.size()); + fieldGlobalIndexers_.resize(scatterFields_.size()); + productVectorBlockIndex_.resize(scatterFields_.size()); + int maxElementBlockGIDCount = -1; + for(std::size_t fd=0;fdfind(scatterFields_[fd].fieldTag().name())->second; + + const int globalFieldNum = globalIndexer_->getFieldNum(fieldName); // Field number in the aggregate BlockDOFManager + productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum); + fieldGlobalIndexers_[fd] = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]]; + fieldIds_[fd] = fieldGlobalIndexers_[fd]->getFieldNum(fieldName); // Field number in the sub-global-indexer + + // Offsets and basisIndex depend on whether scattering IC or Dirichlet BC + if (!scatterIC_) { + const auto& offsetPair = fieldGlobalIndexers_[fd]->getGIDFieldOffsets_closure(blockId,fieldIds_[fd],side_subcell_dim_,local_side_id_); + { + const auto& offsets = offsetPair.first; + fieldOffsets_[fd] = PHX::View("ScatterDirichletResidual_BlockedTpetra(Tangent):fieldOffsets",offsets.size()); + auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]); + for (std::size_t i=0; i < offsets.size(); ++i) + hostOffsets(i) = offsets[i]; + Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets); + } + { + const auto& basisIndex = offsetPair.second; + basisIndexForMDFieldOffsets_[fd] = PHX::View("ScatterDirichletResidual_BlockedTpetra(Tangent):basisIndexForMDFieldOffsets",basisIndex.size()); + auto hostBasisIndex = Kokkos::create_mirror_view(basisIndexForMDFieldOffsets_[fd]); + for (std::size_t i=0; i < basisIndex.size(); ++i) + hostBasisIndex(i) = basisIndex[i]; + Kokkos::deep_copy(basisIndexForMDFieldOffsets_[fd], hostBasisIndex); + } + } + else { + // For ICs, only need offsets, not basisIndex + const std::vector& offsets = fieldGlobalIndexers_[fd]->getGIDFieldOffsets(blockId,fieldIds_[fd]); + fieldOffsets_[fd] = PHX::View("ScatterDirichletResidual_BlockedTpetra(Tangent):fieldOffsets",offsets.size()); + auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]); + for (std::size_t i=0; i < offsets.size(); ++i) + hostOffsets(i) = offsets[i]; + Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets); + } + + maxElementBlockGIDCount = std::max(fieldGlobalIndexers_[fd]->getElementBlockGIDCount(blockId),maxElementBlockGIDCount); + } + + // We will use one workset lid view for all fields, but has to be + // sized big enough to hold the largest elementBlockGIDCount in the + // ProductVector. + worksetLIDs_ = PHX::View("ScatterResidual_BlockedTpetra(Tangent):worksetLIDs", + scatterFields_[0].extent(0), + maxElementBlockGIDCount); +} + +// ********************************************************************** +template +void panzer::ScatterDirichletResidual_BlockedTpetra:: +preEvaluate(typename TRAITS::PreEvalData d) +{ + + // this is the list of parameters and their names that this scatter has to account for + std::vector activeParameters = + Teuchos::rcp_dynamic_cast(d.gedc->getDataObject("PARAMETER_NAMES"))->getActiveParameters(); + + const int numBlocks = static_cast(globalIndexer_->getFieldDOFManagers().size()); + + dfdpFieldsVoV_.initialize("ScatterResidual_Tpetra::dfdpFieldsVoV_",activeParameters.size(),numBlocks); + + for(std::size_t i=0;i paramBlockedContainer = Teuchos::rcp_dynamic_cast(d.gedc->getDataObject(activeParameters[i]),true); + Teuchos::RCP> productVector = + Teuchos::rcp_dynamic_cast>(paramBlockedContainer->get_f(),true); + for(int j=0;j>(productVector->getNonconstVectorBlock(j),true))->getTpetraVector()); + const auto& dfdp_view = tpetraBlock.getLocalViewDevice(Tpetra::Access::ReadWrite); + dfdpFieldsVoV_.addView(dfdp_view,i,j); + } + } + + dfdpFieldsVoV_.syncHostToDevice(); + + // extract dirichlet counter from container + Teuchos::RCP blockContainer + = Teuchos::rcp_dynamic_cast(d.gedc->getDataObject("Dirichlet Counter"),true); + + dirichletCounter_ = Teuchos::rcp_dynamic_cast >(blockContainer->get_f(),true); + TEUCHOS_ASSERT(!Teuchos::is_null(dirichletCounter_)); + + // extract linear object container + blockedContainer_ = Teuchos::rcp_dynamic_cast(d.gedc->getDataObject(globalDataKey_),true); + TEUCHOS_ASSERT(!Teuchos::is_null(blockedContainer_)); +} + +// ********************************************************************** +template +void panzer::ScatterDirichletResidual_BlockedTpetra:: +evaluateFields(typename TRAITS::EvalData workset) +{ + using Teuchos::RCP; + using Teuchos::rcp_dynamic_cast; + using Thyra::VectorBase; + using Thyra::ProductVectorBase; + + const auto& localCellIds = this->wda(workset).cell_local_ids_k; + + RCP > thyraScatterTarget = (!scatterIC_) ? + rcp_dynamic_cast >(blockedContainer_->get_f(),true) : + rcp_dynamic_cast >(blockedContainer_->get_x(),true); + + // Loop over scattered fields + int currentWorksetLIDSubBlock = -1; + for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { + // workset LIDs only change for different sub blocks + if (productVectorBlockIndex_[fieldIndex] != currentWorksetLIDSubBlock) { + fieldGlobalIndexers_[fieldIndex]->getElementLIDs(localCellIds,worksetLIDs_); + currentWorksetLIDSubBlock = productVectorBlockIndex_[fieldIndex]; + } + + // Get Scatter target block + auto& tpetraScatterTarget = *((rcp_dynamic_cast>(thyraScatterTarget->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),true))->getTpetraVector()); + const auto& kokkosScatterTarget = tpetraScatterTarget.getLocalViewDevice(Tpetra::Access::ReadWrite); + + // Get dirichlet counter block + auto& tpetraDirichletCounter = *((rcp_dynamic_cast>(dirichletCounter_->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),true))->getTpetraVector()); + const auto& kokkosDirichletCounter = tpetraDirichletCounter.getLocalViewDevice(Tpetra::Access::ReadWrite); + + // Class data fields for lambda capture + const auto fieldOffsets = fieldOffsets_[fieldIndex]; + const auto basisIndices = basisIndexForMDFieldOffsets_[fieldIndex]; + const auto worksetLIDs = worksetLIDs_; + const auto fieldValues = scatterFields_[fieldIndex].get_static_view(); + const auto applyBC = applyBC_[fieldIndex].get_static_view(); + const bool checkApplyBC = checkApplyBC_; + const auto& tangentFieldsDevice = dfdpFieldsVoV_.getViewDevice(); + const auto& kokkosTangents = Kokkos::subview(tangentFieldsDevice,Kokkos::ALL(),productVectorBlockIndex_[fieldIndex]); + const double num_params = Kokkos::dimension_scalar(fieldValues)-1; + + if (!scatterIC_) { + + Kokkos::parallel_for(Kokkos::RangePolicy(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) { + for (int basis=0; basis < static_cast(fieldOffsets.size()); ++basis) { + const int lid = worksetLIDs(cell,fieldOffsets(basis)); + if (lid < 0) // not on this processor! + continue; + const int basisIndex = basisIndices(basis); + + // Possible warp divergence for hierarchic + if (checkApplyBC) + if (!applyBC(cell,basisIndex)) + continue; + + kokkosScatterTarget(lid,0) = fieldValues(cell,basisIndex).val(); + for(int i_param=0; i_param(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) { + for (int basis=0; basis < static_cast(fieldOffsets.size()); ++basis) { + const int lid = worksetLIDs(cell,fieldOffsets(basis)); + if (lid < 0) // not on this processor! + continue; + kokkosScatterTarget(lid,0) = fieldValues(cell,basis).val(); + for(int i_param=0; i_param "Pressure" Teuchos::RCP > fieldMap_; - std::size_t num_nodes; - std::size_t side_subcell_dim_; std::size_t local_side_id_; @@ -116,6 +115,10 @@ class ScatterDirichletResidual_Tpetra > applyBC_; + + PHX::View scratch_lids_; + std::vector > scratch_offsets_; + std::vector > scratch_basisIds_; }; // ************************************************************** @@ -146,6 +149,7 @@ class ScatterDirichletResidual_Tpetra LOC; // dummy field so that the evaluator will have something to do @@ -165,8 +169,6 @@ class ScatterDirichletResidual_Tpetra "Pressure" Teuchos::RCP > fieldMap_; - std::size_t num_nodes; - std::size_t side_subcell_dim_; std::size_t local_side_id_; @@ -186,6 +188,13 @@ class ScatterDirichletResidual_Tpetra > applyBC_; + + PHX::View scratch_lids_; + std::vector > scratch_offsets_; + std::vector > scratch_basisIds_; + + /// Storage for the tangent data + PHX::ViewOfViews<1,Kokkos::View> dfdpFieldsVoV_; }; // ************************************************************** diff --git a/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_Tpetra_impl.hpp b/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_Tpetra_impl.hpp index dd307eb2fb40..f45a3e539b23 100644 --- a/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_Tpetra_impl.hpp +++ b/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_Tpetra_impl.hpp @@ -59,10 +59,12 @@ ScatterDirichletResidual_Tpetra(const Teuchos::RCP & indexe if (!scatterIC_) { side_subcell_dim_ = p.get("Side Subcell Dimension"); local_side_id_ = p.get("Local Side ID"); + scratch_basisIds_.resize(names.size()); } // build the vector of fields that this is dependent on scatterFields_.resize(names.size()); + scratch_offsets_.resize(names.size()); for (std::size_t eq = 0; eq < names.size(); ++eq) { scatterFields_[eq] = PHX::MDField(names[eq],dl); @@ -91,20 +93,40 @@ ScatterDirichletResidual_Tpetra(const Teuchos::RCP & indexe // ********************************************************************** template void panzer::ScatterDirichletResidual_Tpetra:: -postRegistrationSetup(typename TRAITS::SetupData /* d */, +postRegistrationSetup(typename TRAITS::SetupData d, PHX::FieldManager& /* fm */) { fieldIds_.resize(scatterFields_.size()); + const Workset & workset_0 = (*d.worksets_)[0]; + std::string blockId = this->wda(workset_0).block_id; // load required field numbers for fast use for(std::size_t fd=0;fdfind(scatterFields_[fd].fieldTag().name())->second; fieldIds_[fd] = globalIndexer_->getFieldNum(fieldName); - } - // get the number of nodes (Should be renamed basis) - num_nodes = scatterFields_[0].extent(1); + if (!scatterIC_) { + const std::pair,std::vector > & indicePair + = globalIndexer_->getGIDFieldOffsets_closure(blockId,fieldIds_[fd], side_subcell_dim_, local_side_id_); + const std::vector & offsets = indicePair.first; + const std::vector & basisIdMap = indicePair.second; + + scratch_offsets_[fd] = PHX::View("offsets",offsets.size()); + Kokkos::deep_copy(scratch_offsets_[fd], Kokkos::View(offsets.data(), offsets.size())); + + scratch_basisIds_[fd] = PHX::View("basisIds",basisIdMap.size()); + Kokkos::deep_copy(scratch_basisIds_[fd], Kokkos::View(basisIdMap.data(), basisIdMap.size())); + + } else { + const std::vector & offsets = globalIndexer_->getGIDFieldOffsets(blockId,fieldIds_[fd]); + scratch_offsets_[fd] = PHX::View("offsets",offsets.size()); + Kokkos::deep_copy(scratch_offsets_[fd], Kokkos::View(offsets.data(), offsets.size())); + } + } + + scratch_lids_ = PHX::View("lids",scatterFields_[0].extent(0), + globalIndexer_->getElementBlockGIDCount(blockId)); } // ********************************************************************** @@ -132,6 +154,85 @@ preEvaluate(typename TRAITS::PreEvalData d) } } +// ********************************************************************** +namespace panzer { +namespace { + +template +class ScatterDirichletResidual_Residual_Functor { +public: + typedef typename PHX::Device execution_space; + typedef PHX::MDField ScalarFieldType; + typedef PHX::MDField BoolFieldType; + + Kokkos::View r_data; + Kokkos::View dirichlet_counter; + + PHX::View lids; // local indices for unknowns + PHX::View offsets; // how to get a particular field + PHX::View basisIds; + ScalarFieldType field; + BoolFieldType applyBC; + + bool checkApplyBC; + + KOKKOS_INLINE_FUNCTION + void operator()(const unsigned int cell) const + { + + // loop over the basis functions (currently they are nodes) + for(std::size_t basis=0; basis < offsets.extent(0); basis++) { + int offset = offsets(basis); + LO lid = lids(cell,offset); + if (lid<0) continue; // not on this processor + + int basisId = basisIds(basis); + if (checkApplyBC) + if(!applyBC(cell,basisId)) continue; + + r_data(lid,0) = field(cell,basisId); + + // record that you set a dirichlet condition + dirichlet_counter(lid,0) = 1.0; + + } // end basis + } +}; + +template +class ScatterDirichletResidualIC_Residual_Functor { +public: + typedef typename PHX::Device execution_space; + typedef PHX::MDField FieldType; + + Kokkos::View r_data; + Kokkos::View dirichlet_counter; + + PHX::View lids; // local indices for unknowns + PHX::View offsets; // how to get a particular field + FieldType field; + + KOKKOS_INLINE_FUNCTION + void operator()(const unsigned int cell) const + { + + // loop over the basis functions (currently they are nodes) + for(std::size_t basis=0; basis < offsets.extent(0); basis++) { + int offset = offsets(basis); + LO lid = lids(cell,offset); + if (lid<0) continue; // not on this processor + + r_data(lid,0) = field(cell,basis); + + // record that you set a dirichlet condition + dirichlet_counter(lid,0) = 1.0; + + } // end basis + } +}; +} +} + // ********************************************************************** template void panzer::ScatterDirichletResidual_Tpetra:: @@ -142,83 +243,44 @@ evaluateFields(typename TRAITS::EvalData workset) // for convenience pull out some objects from workset std::string blockId = this->wda(workset).block_id; - const std::vector & localCellIds = this->wda(workset).cell_local_ids; - - Teuchos::RCP r = (!scatterIC_) ? - tpetraContainer_->get_f() : - tpetraContainer_->get_x(); - Teuchos::ArrayRCP r_array = r->get1dViewNonConst(); - Teuchos::ArrayRCP dc_array = dirichletCounter_->get1dViewNonConst(); + globalIndexer_->getElementLIDs(this->wda(workset).cell_local_ids_k,scratch_lids_); - // NOTE: A reordering of these loops will likely improve performance - // The "getGIDFieldOffsets may be expensive. However the - // "getElementGIDs" can be cheaper. However the lookup for LIDs - // may be more expensive! + Teuchos::RCP r = (!scatterIC_) ? + tpetraContainer_->get_f() : + tpetraContainer_->get_x(); + if (scatterIC_) { + ScatterDirichletResidualIC_Residual_Functor functor; + functor.r_data = r->getLocalViewDevice(Tpetra::Access::ReadWrite); + functor.lids = scratch_lids_; + functor.dirichlet_counter = dirichletCounter_->getLocalViewDevice(Tpetra::Access::ReadWrite); + // for each field, do a parallel for loop + for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { + functor.offsets = scratch_offsets_[fieldIndex]; + functor.field = scatterFields_[fieldIndex]; - // loop over each field to be scattered - for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { - int fieldNum = fieldIds_[fieldIndex]; - auto scatterFields_h = Kokkos::create_mirror_view(scatterFields_[fieldIndex].get_static_view()); - Kokkos::deep_copy(scatterFields_h, scatterFields_[fieldIndex].get_static_view()); - - // scatter operation for each cell in workset - for(std::size_t worksetCellIndex=0;worksetCellIndexgetElementGIDs(cellLocalId,GIDs); + Kokkos::parallel_for(workset.num_cells,functor); + } + } else { + ScatterDirichletResidual_Residual_Functor functor; + functor.r_data = r->getLocalViewDevice(Tpetra::Access::ReadWrite); + functor.lids = scratch_lids_; + functor.dirichlet_counter = dirichletCounter_->getLocalViewDevice(Tpetra::Access::ReadWrite); + + // for each field, do a parallel for loop + for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { + functor.offsets = scratch_offsets_[fieldIndex]; + functor.field = scatterFields_[fieldIndex]; + if (checkApplyBC_) functor.applyBC = applyBC_[fieldIndex]; + functor.checkApplyBC = checkApplyBC_; + functor.basisIds = scratch_basisIds_[fieldIndex]; + + Kokkos::parallel_for(workset.num_cells,functor); + } + } - // caculate the local IDs for this element - LIDs.resize(GIDs.size()); - for(std::size_t i=0;igetMap()->getLocalElement(GIDs[i]); - - if (!scatterIC_) { - // this call "should" get the right ordering according to the Intrepid2 basis - const std::pair,std::vector > & indicePair - = globalIndexer_->getGIDFieldOffsets_closure(blockId,fieldNum, side_subcell_dim_, local_side_id_); - const std::vector & elmtOffset = indicePair.first; - const std::vector & basisIdMap = indicePair.second; - - // loop over basis functions - for(std::size_t basis=0;basis & elmtOffset = globalIndexer_->getGIDFieldOffsets(blockId,fieldNum); - - // loop over basis functions - for(std::size_t basis=0;basis & indexe if (!scatterIC_) { side_subcell_dim_ = p.get("Side Subcell Dimension"); local_side_id_ = p.get("Local Side ID"); + scratch_basisIds_.resize(names.size()); } // build the vector of fields that this is dependent on scatterFields_.resize(names.size()); + scratch_offsets_.resize(names.size()); for (std::size_t eq = 0; eq < names.size(); ++eq) { scatterFields_[eq] = PHX::MDField(names[eq],dl); @@ -285,20 +349,41 @@ ScatterDirichletResidual_Tpetra(const Teuchos::RCP & indexe // ********************************************************************** template void panzer::ScatterDirichletResidual_Tpetra:: -postRegistrationSetup(typename TRAITS::SetupData /* d */, +postRegistrationSetup(typename TRAITS::SetupData d, PHX::FieldManager& /* fm */) { fieldIds_.resize(scatterFields_.size()); + const Workset & workset_0 = (*d.worksets_)[0]; + std::string blockId = this->wda(workset_0).block_id; // load required field numbers for fast use for(std::size_t fd=0;fdfind(scatterFields_[fd].fieldTag().name())->second; fieldIds_[fd] = globalIndexer_->getFieldNum(fieldName); - } - // get the number of nodes (Should be renamed basis) - num_nodes = scatterFields_[0].extent(1); + if (!scatterIC_) { + const std::pair,std::vector > & indicePair + = globalIndexer_->getGIDFieldOffsets_closure(blockId,fieldIds_[fd], side_subcell_dim_, local_side_id_); + const std::vector & offsets = indicePair.first; + const std::vector & basisIdMap = indicePair.second; + + scratch_offsets_[fd] = PHX::View("offsets",offsets.size()); + Kokkos::deep_copy(scratch_offsets_[fd], Kokkos::View(offsets.data(), offsets.size())); + + scratch_basisIds_[fd] = PHX::View("basisIds",basisIdMap.size()); + Kokkos::deep_copy(scratch_basisIds_[fd], Kokkos::View(basisIdMap.data(), basisIdMap.size())); + + } else { + const std::vector & offsets = globalIndexer_->getGIDFieldOffsets(blockId,fieldIds_[fd]); + scratch_offsets_[fd] = PHX::View("offsets",offsets.size()); + Kokkos::deep_copy(scratch_offsets_[fd], Kokkos::View(offsets.data(), offsets.size())); + } + } + + scratch_lids_ = PHX::View("lids",scatterFields_[0].extent(0), + globalIndexer_->getElementBlockGIDCount(blockId)); + } // ********************************************************************** @@ -332,15 +417,111 @@ preEvaluate(typename TRAITS::PreEvalData d) std::vector activeParameters = rcp_dynamic_cast(d.gedc->getDataObject("PARAMETER_NAMES"))->getActiveParameters(); - // ETP 02/03/16: This code needs to be updated to properly handle scatterIC_ - TEUCHOS_ASSERT(!scatterIC_); - dfdp_vectors_.clear(); + dfdpFieldsVoV_.initialize("ScatterResidual_Tpetra::dfdpFieldsVoV_",activeParameters.size()); + for(std::size_t i=0;i vec = rcp_dynamic_cast(d.gedc->getDataObject(activeParameters[i]),true)->get_f(); - Teuchos::ArrayRCP vec_array = vec->get1dViewNonConst(); - dfdp_vectors_.push_back(vec_array); + auto dfdp_view = vec->getLocalViewDevice(Tpetra::Access::ReadWrite); + + dfdpFieldsVoV_.addView(dfdp_view,i); + } + + dfdpFieldsVoV_.syncHostToDevice(); + +} + +// ********************************************************************** +namespace panzer { +namespace { + +template +class ScatterDirichletResidual_Tangent_Functor { +public: + typedef typename PHX::Device execution_space; + typedef PHX::MDField ScalarFieldType; + typedef PHX::MDField BoolFieldType; + + Kokkos::View r_data; + Kokkos::View dirichlet_counter; + + Kokkos::View*> dfdp_fields; // tangent fields + double num_params; + + PHX::View lids; // local indices for unknowns + PHX::View offsets; // how to get a particular field + PHX::View basisIds; + ScalarFieldType field; + BoolFieldType applyBC; + + bool checkApplyBC; + + KOKKOS_INLINE_FUNCTION + void operator()(const unsigned int cell) const + { + + // loop over the basis functions (currently they are nodes) + for(std::size_t basis=0; basis < offsets.extent(0); basis++) { + int offset = offsets(basis); + LO lid = lids(cell,offset); + if (lid<0) continue; // not on this processor + + int basisId = basisIds(basis); + if (checkApplyBC) + if(!applyBC(cell,basisId)) continue; + + r_data(lid,0) = field(cell,basisId).val(); + + // loop over the tangents + for(int i_param=0; i_param +class ScatterDirichletResidualIC_Tangent_Functor { +public: + typedef typename PHX::Device execution_space; + typedef PHX::MDField FieldType; + + Kokkos::View r_data; + Kokkos::View dirichlet_counter; + + Kokkos::View*> dfdp_fields; // tangent fields + double num_params; + + PHX::View lids; // local indices for unknowns + PHX::View offsets; // how to get a particular field + FieldType field; + + KOKKOS_INLINE_FUNCTION + void operator()(const unsigned int cell) const + { + + // loop over the basis functions (currently they are nodes) + for(std::size_t basis=0; basis < offsets.extent(0); basis++) { + int offset = offsets(basis); + LO lid = lids(cell,offset); + if (lid<0) continue; // not on this processor + + r_data(lid,0) = field(cell,basis).val(); + + // loop over the tangents + for(int i_param=0; i_paramwda(workset).block_id; - const std::vector & localCellIds = this->wda(workset).cell_local_ids; + + globalIndexer_->getElementLIDs(this->wda(workset).cell_local_ids_k,scratch_lids_); Teuchos::RCP r = (!scatterIC_) ? tpetraContainer_->get_f() : tpetraContainer_->get_x(); - Teuchos::ArrayRCP r_array = r->get1dViewNonConst(); - Teuchos::ArrayRCP dc_array = dirichletCounter_->get1dViewNonConst(); + if (scatterIC_) { + ScatterDirichletResidualIC_Tangent_Functor functor; + functor.r_data = r->getLocalViewDevice(Tpetra::Access::ReadWrite); + functor.lids = scratch_lids_; + functor.dirichlet_counter = dirichletCounter_->getLocalViewDevice(Tpetra::Access::ReadWrite); + functor.dfdp_fields = dfdpFieldsVoV_.getViewDevice(); - // NOTE: A reordering of these loops will likely improve performance - // The "getGIDFieldOffsets may be expensive. However the - // "getElementGIDs" can be cheaper. However the lookup for LIDs - // may be more expensive! + // for each field, do a parallel for loop + for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { + functor.offsets = scratch_offsets_[fieldIndex]; + functor.field = scatterFields_[fieldIndex]; + functor.num_params = Kokkos::dimension_scalar(scatterFields_[fieldIndex].get_view())-1; + Kokkos::parallel_for(workset.num_cells,functor); + } + } else { + ScatterDirichletResidual_Tangent_Functor functor; + functor.r_data = r->getLocalViewDevice(Tpetra::Access::ReadWrite); + functor.lids = scratch_lids_; + functor.dirichlet_counter = dirichletCounter_->getLocalViewDevice(Tpetra::Access::ReadWrite); + functor.dfdp_fields = dfdpFieldsVoV_.getViewDevice(); + + // for each field, do a parallel for loop + for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { + functor.offsets = scratch_offsets_[fieldIndex]; + functor.field = scatterFields_[fieldIndex]; + if (checkApplyBC_) functor.applyBC = applyBC_[fieldIndex]; + functor.checkApplyBC = checkApplyBC_; + functor.basisIds = scratch_basisIds_[fieldIndex]; + functor.num_params = Kokkos::dimension_scalar(scatterFields_[fieldIndex].get_view())-1; + + Kokkos::parallel_for(workset.num_cells,functor); + } + } - // scatter operation for each cell in workset - for(std::size_t worksetCellIndex=0;worksetCellIndexgetElementGIDs(cellLocalId,GIDs); - - // caculate the local IDs for this element - LIDs.resize(GIDs.size()); - for(std::size_t i=0;igetMap()->getLocalElement(GIDs[i]); - - // loop over each field to be scattered - for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { - int fieldNum = fieldIds_[fieldIndex]; - - if (!scatterIC_) { - // this call "should" get the right ordering according to the Intrepid2 basis - const std::pair,std::vector > & indicePair - = globalIndexer_->getGIDFieldOffsets_closure(blockId,fieldNum, side_subcell_dim_, local_side_id_); - const std::vector & elmtOffset = indicePair.first; - const std::vector & basisIdMap = indicePair.second; - - // loop over basis functions - for(std::size_t basis=0;basis & elmtOffset = globalIndexer_->getGIDFieldOffsets(blockId,fieldNum); - - // loop over basis functions - for(std::size_t basis=0;basis Date: Tue, 17 Dec 2024 12:13:36 -0700 Subject: [PATCH 16/33] MueLu CoalesceDrop_kokkos: Move serial sort to DroppingCommon Signed-off-by: Christian Glusa --- .../MatrixTransformation/MueLu_CutDrop.hpp | 33 +------------------ .../MueLu_DroppingCommon.hpp | 31 +++++++++++++++++ 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp index 1bb2fa1b1648..087a3de61020 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp @@ -423,37 +423,6 @@ class ScaledDistanceLaplacianComparison { } }; -template -KOKKOS_INLINE_FUNCTION void serialHeapSort(view_type& v, comparator_type comparator) { - auto N = v.extent(0); - size_t start = N / 2; - size_t end = N; - while (end > 1) { - if (start > 0) - start = start - 1; - else { - end = end - 1; - auto temp = v(0); - v(0) = v(end); - v(end) = temp; - } - size_t root = start; - while (2 * root + 1 < end) { - size_t child = 2 * root + 1; - if ((child + 1 < end) and (comparator(v(child), v(child + 1)))) - ++child; - - if (comparator(v(root), v(child))) { - auto temp = v(root); - v(root) = v(child); - v(child) = temp; - root = child; - } else - break; - } - } -} - /*! @class CutDropFunctor @brief Order each row by a criterion, compare the ratio of values and drop all entries once the ratio is below the threshold. @@ -499,7 +468,7 @@ class CutDropFunctor { for (size_t i = 0; i < nnz; ++i) { row_permutation(i) = i; } - serialHeapSort(row_permutation, comparator); + Misc::serialHeapSort(row_permutation, comparator); size_t keepStart = 0; size_t dropStart = nnz; diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp index dd371c124fcd..94a6b7da8e03 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp @@ -402,6 +402,37 @@ class SymmetrizeFunctor { } }; +template +KOKKOS_INLINE_FUNCTION void serialHeapSort(view_type& v, comparator_type comparator) { + auto N = v.extent(0); + size_t start = N / 2; + size_t end = N; + while (end > 1) { + if (start > 0) + start = start - 1; + else { + end = end - 1; + auto temp = v(0); + v(0) = v(end); + v(end) = temp; + } + size_t root = start; + while (2 * root + 1 < end) { + size_t child = 2 * root + 1; + if ((child + 1 < end) and (comparator(v(child), v(child + 1)))) + ++child; + + if (comparator(v(root), v(child))) { + auto temp = v(root); + v(root) = v(child); + v(child) = temp; + root = child; + } else + break; + } + } +} + } // namespace Misc } // namespace MueLu From 11a3c9158c2b55867652148bcd40be211ffc5f00 Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Tue, 17 Dec 2024 09:38:13 -0700 Subject: [PATCH 17/33] MueLu: Initial cleanup of MueLu Matlab (plus some C++ deprecation fixes and Stratimikos/Thyra support fixes) Signed-off-by: Chris Siefert --- packages/muelu/CMakeLists.txt | 4 --- packages/muelu/matlab/bin/CMakeLists.txt | 36 +++++++++---------- packages/muelu/matlab/bin/muemex.cpp | 29 +++++++++++---- packages/muelu/matlab/bin/muemex.h | 14 +++++++- .../matlab/src/MueLu_MatlabSmoother_def.hpp | 3 +- .../muelu/matlab/src/MueLu_MatlabUtils.cpp | 12 +++++-- .../matlab/src/MueLu_MatlabUtils_decl.hpp | 20 ++++++++--- .../matlab/src/MueLu_MatlabUtils_def.hpp | 13 +++++-- .../MueLu_SingleLevelMatlabFactory_decl.hpp | 2 -- .../MueLu_SingleLevelMatlabFactory_def.hpp | 2 -- .../src/MueLu_TwoLevelMatlabFactory_decl.hpp | 6 ++-- .../src/MueLu_TwoLevelMatlabFactory_def.hpp | 2 -- packages/muelu/src/CMakeLists.txt | 22 ++++++------ .../src/Headers/MueLu_UseShortNamesScalar.hpp | 10 +++--- .../muelu/src/Headers/gen_UseShortNames.sh | 11 ------ .../Interface/MueLu_FactoryFactory_decl.hpp | 6 ++++ .../Interface/MueLu_FactoryFactory_def.hpp | 18 ++++------ .../src/Utils/ClassList/SC-LO-GO-NO.classList | 3 ++ .../ETI_SC_LO_GO_NO_classes.cmake | 4 ++- .../MueLu_MatlabSmootherFactory_fwd.hpp | 27 ++++++++++++++ .../MueLu_MatlabSmoother_fwd.hpp | 5 +++ .../MueLu_SingleLevelMatlabFactory_fwd.hpp | 5 +++ .../MueLu_TwoLevelMatlabFactory_fwd.hpp | 5 +++ 23 files changed, 172 insertions(+), 87 deletions(-) create mode 100644 packages/muelu/src/Utils/ForwardDeclaration/MueLu_MatlabSmootherFactory_fwd.hpp diff --git a/packages/muelu/CMakeLists.txt b/packages/muelu/CMakeLists.txt index 6f27ca3b36b5..8ebedd2fb637 100644 --- a/packages/muelu/CMakeLists.txt +++ b/packages/muelu/CMakeLists.txt @@ -425,10 +425,6 @@ if(TPL_ENABLE_MATLAB) IF (NOT ${PROJECT_NAME}_ENABLE_EXPLICIT_INSTANTIATION) MESSAGE(FATAL_ERROR "Muemex interfaces require \"-D${PROJECT_NAME}_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON\".") ENDIF() - - IF(NOT ${PACKAGE_NAME}_ENABLE_Epetra) - MESSAGE(FATAL_ERROR "Muemex interfaces require \"-D${PACKAGE_NAME}_ENABLE_Epetra:BOOL=ON\".") - ENDIF() ENDIF() if(TPL_ENABLE_MATLAB) diff --git a/packages/muelu/matlab/bin/CMakeLists.txt b/packages/muelu/matlab/bin/CMakeLists.txt index 57c534d6c569..9b43885df783 100644 --- a/packages/muelu/matlab/bin/CMakeLists.txt +++ b/packages/muelu/matlab/bin/CMakeLists.txt @@ -15,8 +15,8 @@ if(TPL_ENABLE_MATLAB) TRIBITS_SET_AND_INC_DIRS(DIR ${CMAKE_CURRENT_BINARY_DIR}) # Force the code into R2017b compatibility mode - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMX_COMPAT_64 -fPIC -DMATLAB_MEXCMD_RELEASE=R2017b") - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMX_COMPAT_64 -fPIC -DMATLAB_MEXCMD_RELEASE=R2017b") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMX_COMPAT_64 -fPIC -DMATLAB_MEXCMD_RELEASE=R2017b") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMX_COMPAT_64 -fPIC -DMATLAB_MEXCMD_RELEASE=R2017b") # Debugging information @@ -31,29 +31,30 @@ if(TPL_ENABLE_MATLAB) APPEND_SET(SOURCES_EXEC muemex.cpp) # First, grab the package's own libraries - SET(LINK_LIBS) - APPEND_SET(LINK_LIBS ${${PACKAGE_NAME}_LIBRARIES}) + + #SET(LINK_LIBS) + #APPEND_SET(LINK_LIBS ${${PACKAGE_NAME}_LIBRARIES}) # Third, add test dependent package libraries - TRIBITS_GATHER_ENABLED_ITEMS(${PACKAGE_NAME} TEST PACKAGES ALL_DEP_PACKAGES) - TRIBITS_SORT_AND_APPEND_INCLUDE_AND_LINK_DIRS_AND_LIBS("${${PROJECT_NAME}_REVERSE_PACKAGES}" - "${ALL_DEP_PACKAGES}" "" LINK_LIBS "") + #TRIBITS_GATHER_ENABLED_ITEMS(${PACKAGE_NAME} TEST PACKAGES ALL_DEP_PACKAGES) + #TRIBITS_SORT_AND_APPEND_INCLUDE_AND_LINK_DIRS_AND_LIBS("${${PROJECT_NAME}_REVERSE_PACKAGES}" + # "${ALL_DEP_PACKAGES}" "" LINK_LIBS "") # Fourth, add dependent test TPL libraries - TRIBITS_GATHER_ENABLED_ITEMS(${PACKAGE_NAME} TEST TPLS ALL_TPLS) - TRIBITS_SORT_AND_APPEND_INCLUDE_AND_LINK_DIRS_AND_LIBS("${${PROJECT_NAME}_REVERSE_TPLS}" "${ALL_TPLS}" - TPL_ LINK_LIBS "") + #TRIBITS_GATHER_ENABLED_ITEMS(${PACKAGE_NAME} TEST TPLS ALL_TPLS) + #TRIBITS_SORT_AND_APPEND_INCLUDE_AND_LINK_DIRS_AND_LIBS("${${PROJECT_NAME}_REVERSE_TPLS}" "${ALL_TPLS}" + # TPL_ LINK_LIBS "") # Fifth, add matlab-specific libs - SET(LINK_LIBS ${LINK_LIBS} "mx" "mex" "mat") + #SET(LINK_LIBS ${LINK_LIBS} "mx" "mex" "mat") # Last, add last_lib to get extra link options on the link linee - IF (${PROJECT_NAME}_EXTRA_LINK_FLAGS) - APPEND_SET(LINK_LIBS ${last_lib}) - ENDIF() - IF (${PROJECT_NAME}_VERBOSE_CONFIGURE) - PRINT_VAR(LINK_LIBS) - ENDIF() + #IF (${PROJECT_NAME}_EXTRA_LINK_FLAGS) + # APPEND_SET(LINK_LIBS ${last_lib}) + #ENDIF() + #IF (${PROJECT_NAME}_VERBOSE_CONFIGURE) + # PRINT_VAR(LINK_LIBS) + #ENDIF() # Manually drop in options from the mex script (R2009b) on a linux platform. # g++ -O -pthread -shared -Wl,--version-script,/usr/local/matlab/7.9/extern/lib/glnxa64/mexFunction.map -Wl,--no-undefined -o "mlmex.mexa64" "mlmex-mlmex.o" -lm -Wl,-rpath-link,/usr/local/matlab/7.9/bin/glnxa64 -L/usr/local/matlab/7.9/bin/glnxa64 -lmx -lmex -lmat -lm @@ -83,4 +84,3 @@ if(TPL_ENABLE_MATLAB) CONFIGURE_FILE(matlab ${CMAKE_CURRENT_BINARY_DIR}/matlab @ONLY) ENDIF() - diff --git a/packages/muelu/matlab/bin/muemex.cpp b/packages/muelu/matlab/bin/muemex.cpp index 03068adff0e4..3a8e6b5cffba 100644 --- a/packages/muelu/matlab/bin/muemex.cpp +++ b/packages/muelu/matlab/bin/muemex.cpp @@ -14,8 +14,8 @@ #define MUEMEX_ERROR -1 // Do not compile MueMex if any of these aren't available -#if !defined HAVE_MUELU_EPETRA || !defined HAVE_MUELU_MATLAB -#error "MueMex requires Epetra, Tpetra and MATLAB." +#if !defined HAVE_MUELU_TPETRA || !defined HAVE_MUELU_MATLAB +#error "MueMex requires Tpetra and MATLAB." #endif #include @@ -295,11 +295,13 @@ template <> RCP getDatapackHierarchy(MuemexSystem* dp) { RCP> hier; switch (dp->type) { - case EPETRA: { +#ifdef HAVE_MUELU_EPETRA + case EPETRA: { EpetraSystem* pack = (EpetraSystem*)dp; hier = pack->getHierarchy(); break; } +#endif case TPETRA: { TpetraSystem* pack = (TpetraSystem*)dp; hier = pack->getHierarchy(); @@ -322,10 +324,13 @@ RCP getDatapackHierarchy(MuemexSystem* dp) { template void setHierarchyData(MuemexSystem* problem, int levelID, T& data, string& dataName) { RCP level; +#ifdef HAVE_MUELU_EPETRA if (problem->type == EPETRA) { RCP> hier = ((EpetraSystem*)problem)->getHierarchy(); level = hier->GetLevel(levelID); - } else if (problem->type == TPETRA) { + } else +#endif + if (problem->type == TPETRA) { RCP> hier = ((TpetraSystem*)problem)->getHierarchy(); level = hier->GetLevel(levelID); } else if (problem->type == TPETRA_COMPLEX) { @@ -493,6 +498,7 @@ mxArray* MuemexSystem::getHierarchyData(string dataName, MuemexType dataType, in return output; } +#ifdef HAVE_MUELU_EPETRA // EpetraSystem impl EpetraSystem::EpetraSystem() @@ -608,6 +614,7 @@ RCP EpetraSystem::getHierarchy() { throw runtime_error("Hierarchy from Epetra problem was null."); return hier; } +#endif // tpetra_double_data_pack implementation @@ -1038,9 +1045,12 @@ void parse_list_item(RCP List, char* option_name, const mxArray* opt_str = opt_char; List->set(option_name, opt_str); if (strcmp(option_name, MUEMEX_INTERFACE) == 0) { +#ifdef HAVE_MUELU_EPETRA if (strcmp(opt_str.c_str(), "epetra") == 0) useEpetra = true; - else if (strcmp(opt_str.c_str(), "tpetra") == 0) + else +#endif + if (strcmp(opt_str.c_str(), "tpetra") == 0) useEpetra = false; } mxFree(opt_char); @@ -1254,6 +1264,7 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { } intf = List->get(MUEMEX_INTERFACE, "tpetra"); List->remove(MUEMEX_INTERFACE); // no longer need this parameter +#ifdef HAVE_MUELU_EPETRA if (intf == "epetra") { if (mxIsComplex(prhs[1])) { mexPrintf("Error: Attempting to use complex-valued matrix with Epetra, which is unsupported.\n"); @@ -1265,7 +1276,9 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { dp->setup(prhs[1], haveCoords, haveCoords ? prhs[2] : (mxArray*)NULL); oc = dp->operatorComplexity; D = rcp_implicit_cast(dp); - } else if (intf == "tpetra") { + } else +#endif + if (intf == "tpetra") { // infer scalar type from prhs (can be double or complex) if (mxIsComplex(prhs[1])) { #ifdef HAVE_COMPLEX_SCALARS @@ -1353,6 +1366,7 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { // get pointer to MATLAB array that will be "B" or "rhs" multivector const mxArray* rhs = reuse ? prhs[2] : prhs[3]; switch (dp->type) { +#ifdef HAVE_MUELU_EPETRA case EPETRA: { RCP esys = rcp_static_cast(dp); RCP matrix; @@ -1363,6 +1377,7 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { plhs[0] = esys->solve(List, matrix, rhs, iters); break; } +#endif case TPETRA: { RCP> tsys = rcp_static_cast, MuemexSystem>(dp); RCP matrix; @@ -1410,11 +1425,13 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { // get pointer to MATLAB array that will be "B" or "rhs" multivector const mxArray* rhs = prhs[2]; switch (dp->type) { +#ifdef HAVE_MUELU_EPETRA case EPETRA: { RCP esys = rcp_static_cast(dp); plhs[0] = esys->apply(rhs); break; } +#endif case TPETRA: { RCP> tsys = rcp_static_cast, MuemexSystem>(dp); plhs[0] = tsys->apply(rhs); diff --git a/packages/muelu/matlab/bin/muemex.h b/packages/muelu/matlab/bin/muemex.h index fb66654a13de..0951e247b84f 100644 --- a/packages/muelu/matlab/bin/muemex.h +++ b/packages/muelu/matlab/bin/muemex.h @@ -21,21 +21,31 @@ #include "Teuchos_RCP.hpp" #include "MueLu_config.hpp" #include "MueLu.hpp" +#ifdef HAVE_MUELU_EPETRA #include "MueLu_EpetraOperator.hpp" +#endif #include "MueLu_TpetraOperator.hpp" #include "MueLu_Hierarchy.hpp" #include "MueLu_MatlabUtils.hpp" +#ifdef HAVE_MUELU_EPETRA #include "MueLu_CreateEpetraPreconditioner.hpp" +#endif #include "MueLu_CreateTpetraPreconditioner.hpp" +#ifdef HAVE_MUELU_EPETRA #include "Epetra_SerialComm.h" #include "Epetra_Map.h" #include "Epetra_MultiVector.h" #include "Epetra_CrsMatrix.h" #include "Epetra_LinearProblem.h" +#endif #include "Tpetra_CrsMatrix.hpp" +#ifdef HAVE_MUELU_EPETRA #include "Xpetra_EpetraCrsMatrix.hpp" +#endif #include "BelosSolverFactory.hpp" +#ifdef HAVE_MUELU_EPETRA #include "BelosEpetraAdapter.hpp" +#endif #include "BelosTpetraAdapter.hpp" #include "BelosPseudoBlockGmresSolMgr.hpp" #include "BelosBlockGmresSolMgr.hpp" @@ -89,6 +99,7 @@ class MuemexSystem mxArray* getHierarchyData(std::string dataName, MuemexType dataType, int levelID); //Works for all dp types }; +#ifdef HAVE_MUELU_EPETRA class EpetraSystem : public MuemexSystem { public: @@ -120,6 +131,7 @@ class EpetraSystem : public MuemexSystem Teuchos::RCP A; Teuchos::RCP prec; }; +#endif //Scalar can be double or std::complex (complex_t) //Note: DataPackType is either TPETRA or TPETRA_COMPLEX @@ -140,7 +152,7 @@ class TpetraSystem : public MuemexSystem mxArray* apply(const mxArray* rhs); //note: I typedef'd mm_node_t at the top of this file as the Kokkos default type Teuchos::RCP GetMatrix() - { + { return A; } Teuchos::RCP GetPrec() diff --git a/packages/muelu/matlab/src/MueLu_MatlabSmoother_def.hpp b/packages/muelu/matlab/src/MueLu_MatlabSmoother_def.hpp index eaf8160a3cef..426875bea237 100644 --- a/packages/muelu/matlab/src/MueLu_MatlabSmoother_def.hpp +++ b/packages/muelu/matlab/src/MueLu_MatlabSmoother_def.hpp @@ -6,10 +6,9 @@ // SPDX-License-Identifier: BSD-3-Clause // ***************************************************************************** // @HEADER - -#include "MueLu_MatlabSmoother_decl.hpp" #ifndef MUELU_MATLABSMOOTHER_DEF_HPP #define MUELU_MATLABSMOOTHER_DEF_HPP +#include "MueLu_MatlabSmoother_decl.hpp" #include "MueLu_MatlabUtils_decl.hpp" #if defined(HAVE_MUELU_MATLAB) diff --git a/packages/muelu/matlab/src/MueLu_MatlabUtils.cpp b/packages/muelu/matlab/src/MueLu_MatlabUtils.cpp index 9d0e4444069b..3df02f3543f9 100644 --- a/packages/muelu/matlab/src/MueLu_MatlabUtils.cpp +++ b/packages/muelu/matlab/src/MueLu_MatlabUtils.cpp @@ -9,8 +9,8 @@ #include "MueLu_MatlabUtils_def.hpp" -#if !defined(HAVE_MUELU_MATLAB) || !defined(HAVE_MUELU_EPETRA) -#error "Muemex types require MATLAB, Epetra and Tpetra." +#if !defined(HAVE_MUELU_MATLAB) || !defined(HAVE_MUELU_TPETRA) +#error "Muemex types require MATLAB and Tpetra." #else /* Stuff for MATLAB R2006b vs. previous versions */ @@ -38,7 +38,9 @@ template class MuemexData; template class MuemexData; template class MuemexData > >; template class MuemexData > >; +#ifdef HAVE_MUELU_EPETRA template class MuemexData >; +#endif template class MuemexData > >; template class MuemexData > >; template class MuemexData > >; @@ -147,12 +149,14 @@ std::vector > callMatlab(std::string function, int numOutputs, st case XPETRA_MULTIVECTOR_COMPLEX: matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; +#ifdef HAVE_MUELU_EPETRA case EPETRA_CRSMATRIX: matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; case EPETRA_MULTIVECTOR: matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; +#endif case AGGREGATES: matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; @@ -380,8 +384,10 @@ template RCP loadDataFromMatlab template RCP loadDataFromMatlab >(const mxArray* mxa); template RCP loadDataFromMatlab >(const mxArray* mxa); template RCP loadDataFromMatlab >(const mxArray* mxa); +#ifdef HAVE_MUELU_EPETRA template RCP loadDataFromMatlab >(const mxArray* mxa); template RCP loadDataFromMatlab >(const mxArray* mxa); +#endif template RCP loadDataFromMatlab >(const mxArray* mxa); template RCP loadDataFromMatlab >(const mxArray* mxa); @@ -399,8 +405,10 @@ template mxArray* saveDataToMatlab(RCP& data); template mxArray* saveDataToMatlab(RCP& data); template mxArray* saveDataToMatlab(RCP& data); template mxArray* saveDataToMatlab(RCP& data); +#ifdef HAVE_MUELU_EPETRA template mxArray* saveDataToMatlab(RCP& data); template mxArray* saveDataToMatlab(RCP& data); +#endif template mxArray* saveDataToMatlab(RCP& data); template mxArray* saveDataToMatlab(RCP& data); diff --git a/packages/muelu/matlab/src/MueLu_MatlabUtils_decl.hpp b/packages/muelu/matlab/src/MueLu_MatlabUtils_decl.hpp index ddcdf3ba6ef1..10765d409829 100644 --- a/packages/muelu/matlab/src/MueLu_MatlabUtils_decl.hpp +++ b/packages/muelu/matlab/src/MueLu_MatlabUtils_decl.hpp @@ -12,11 +12,15 @@ #include "MueLu_ConfigDefs.hpp" -#if !defined(HAVE_MUELU_MATLAB) || !defined(HAVE_MUELU_EPETRA) +#if !defined(HAVE_MUELU_MATLAB) || !defined(HAVE_MUELU_TPETRA) #error "Muemex requires MATLAB, Epetra and Tpetra." #else -#include "mex.h" +// Matlab fwd style declarations +struct mxArray_tag; +typedef struct mxArray_tag mxArray; +typedef size_t mwIndex; + #include #include #include @@ -28,11 +32,15 @@ #include "MueLu_Aggregates_decl.hpp" #include "MueLu_AmalgamationInfo_decl.hpp" #include "MueLu_Utilities_decl.hpp" -#include "MueLu_Graph_decl.hpp" +#include "MueLu_Graph_fwd.hpp" +#ifdef HAVE_MUELU_EPETRA #include "Epetra_MultiVector.h" #include "Epetra_CrsMatrix.h" +#endif #include "Tpetra_CrsMatrix_decl.hpp" +#ifdef HAVE_MUELU_EPETRA #include "Xpetra_EpetraCrsMatrix.hpp" +#endif #include "Xpetra_MapFactory.hpp" #include "Xpetra_CrsGraph.hpp" #include "Xpetra_VectorFactory.hpp" @@ -58,8 +66,10 @@ enum MuemexType { XPETRA_MATRIX_COMPLEX, XPETRA_MULTIVECTOR_DOUBLE, XPETRA_MULTIVECTOR_COMPLEX, +#ifdef HAVE_MUELU_EPETRA EPETRA_CRSMATRIX, EPETRA_MULTIVECTOR, +#endif AGGREGATES, AMALGAMATION_INFO, GRAPH @@ -163,13 +173,13 @@ Teuchos::RCP convertMatlabVar(const mxArray* mxa); // trim from start static inline std::string& ltrim(std::string& s) { - s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun(std::isspace)))); + s.erase(0,s.find_first_not_of(" ")); return s; } // trim from end static inline std::string& rtrim(std::string& s) { - s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(std::isspace))).base(), s.end()); + s.erase(s.find_last_not_of(" "),std::string::npos); return s; } diff --git a/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp b/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp index bb70471b1066..93831c6614e6 100644 --- a/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp +++ b/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp @@ -11,9 +11,10 @@ #define MUELU_MATLABUTILS_DEF_HPP #include "MueLu_MatlabUtils_decl.hpp" +#include -#if !defined(HAVE_MUELU_MATLAB) || !defined(HAVE_MUELU_EPETRA) -#error "Muemex types require MATLAB, Epetra and Tpetra." +#if !defined(HAVE_MUELU_MATLAB) || !defined(HAVE_MUELU_TPETRA) +#error "Muemex types require MATLAB and Tpetra." #else using Teuchos::RCP; @@ -103,6 +104,7 @@ MuemexType getMuemexType(const RCP& data) { return XPETRA template <> MuemexType getMuemexType >() { return XPETRA_MATRIX_COMPLEX; } +#ifdef HAVE_MUELU_EPETRA template <> MuemexType getMuemexType(const RCP& data) { return EPETRA_CRSMATRIX; } template <> @@ -112,6 +114,7 @@ template <> MuemexType getMuemexType(const RCP& data) { return EPETRA_MULTIVECTOR; } template <> MuemexType getMuemexType >() { return EPETRA_MULTIVECTOR; } +#endif template <> MuemexType getMuemexType(const RCP& data) { return AGGREGATES; } @@ -445,6 +448,7 @@ RCP > loadD return MueLu::TpetraMultiVector_To_XpetraMultiVector(tpetraMV); } + #ifdef HAVE_MUELU_EPETRA template <> RCP loadDataFromMatlab >(const mxArray* mxa) { RCP matrix; @@ -492,6 +496,7 @@ RCP loadDataFromMatlab >(const mxArr Epetra_BlockMap map(nr * nc, 1, 0, Comm); return rcp(new Epetra_MultiVector(Epetra_DataAccess::Copy, map, mxGetPr(mxa), nr, nc)); } +#endif template <> RCP loadDataFromMatlab >(const mxArray* mxa) { @@ -606,7 +611,7 @@ RCP loadDataFromMatlab >(const mxArray* mxa) { tgraph->insertGlobalIndices((mm_GlobalOrd)i, cols(rows[i], entriesPerRow[i])); } tgraph->fillComplete(map, map); - RCP mgraph = rcp(new MueLu::Graph(tgraph)); + RCP mgraph = rcp(new MueLu::LWGraph(tgraph)); // Set boundary nodes int numBoundaryNodes = mxGetNumberOfElements(boundaryNodes); bool* boundaryFlags = new bool[nRows]; @@ -1005,6 +1010,7 @@ mxArray* saveDataToMatlab(RCP mxArray* saveDataToMatlab(RCP& data) { RCP xmat = EpetraCrs_To_XpetraMatrix(data); @@ -1018,6 +1024,7 @@ mxArray* saveDataToMatlab(RCP& data) { data->ExtractCopy(dataPtr, data->GlobalLength()); return output; } +#endif template <> mxArray* saveDataToMatlab(RCP& data) { diff --git a/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_decl.hpp b/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_decl.hpp index 8f6b966e7bf1..0321a8ba5994 100644 --- a/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_decl.hpp +++ b/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_decl.hpp @@ -27,8 +27,6 @@ #include "MueLu_Utilities_fwd.hpp" #ifdef HAVE_MUELU_MATLAB -#include "mex.h" - namespace MueLu { /*! @class SingleLevelMatlabFactory diff --git a/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_def.hpp b/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_def.hpp index e5da86652aec..1fb49d3e7b41 100644 --- a/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_def.hpp +++ b/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_def.hpp @@ -19,7 +19,6 @@ #include "MueLu_MatlabUtils_decl.hpp" #ifdef HAVE_MUELU_MATLAB -#include "mex.h" namespace MueLu { @@ -79,7 +78,6 @@ std::string SingleLevelMatlabFactory: } // namespace MueLu -#define MUELU_SINGLELEVELMATLABFACTORY_SHORT #endif // HAVE_MUELU_MATLAB #endif // MUELU_SINGLELEVELMATLABFACTORY_DEF_HPP diff --git a/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_decl.hpp b/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_decl.hpp index 1e29e6d46665..8a8890e1f065 100644 --- a/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_decl.hpp +++ b/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_decl.hpp @@ -28,7 +28,6 @@ #include "MueLu_Utilities_fwd.hpp" #ifdef HAVE_MUELU_MATLAB -#include "mex.h" namespace MueLu { /*! @@ -36,7 +35,10 @@ namespace MueLu { @ingroup MueMexClasses @brief Factory for interacting with Matlab */ -template +template class TwoLevelMatlabFactory : public TwoLevelFactoryBase { #undef MUELU_TWOLEVELMATLABFACTORY_SHORT #include "MueLu_UseShortNames.hpp" diff --git a/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_def.hpp b/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_def.hpp index a9b02e873e33..cf4f94611853 100644 --- a/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_def.hpp +++ b/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_def.hpp @@ -21,7 +21,6 @@ #include #ifdef HAVE_MUELU_MATLAB -#include "mex.h" namespace MueLu { @@ -96,7 +95,6 @@ std::string TwoLevelMatlabFactory::de } // namespace MueLu -#define MUELU_TWOLEVELMATLABFACTORY_SHORT #endif // HAVE_MUELU_MATLAB #endif // MUELU_TWOLEVELMATLABFACTORY_DEF_HPP diff --git a/packages/muelu/src/CMakeLists.txt b/packages/muelu/src/CMakeLists.txt index 99b81b1109c6..74a9eaecce87 100644 --- a/packages/muelu/src/CMakeLists.txt +++ b/packages/muelu/src/CMakeLists.txt @@ -59,6 +59,12 @@ TRIBITS_INCLUDE_DIRECTORIES(${DIR}/Utils) TRIBITS_INCLUDE_DIRECTORIES(${DIR}/Utils/ForwardDeclaration) TRIBITS_INCLUDE_DIRECTORIES(${DIR}/../adapters/xpetra) TRIBITS_INCLUDE_DIRECTORIES(${DIR}/../adapters/tpetra) +IF (${PACKAGE_NAME}_ENABLE_Stratimikos) + TRIBITS_INCLUDE_DIRECTORIES(${DIR}/../adapters/stratimikos) +ENDIF() +IF (${PACKAGE_NAME}_ENABLE_Thyra) + TRIBITS_INCLUDE_DIRECTORIES(${DIR}/../adapters/thyra) +ENDIF() IF (${PACKAGE_NAME}_ENABLE_Epetra) TRIBITS_INCLUDE_DIRECTORIES(${DIR}/../adapters/epetra) ENDIF() @@ -72,7 +78,9 @@ IF (${PACKAGE_NAME}_ENABLE_Experimental) TRIBITS_INCLUDE_DIRECTORIES(${DIR}/../research/regionMG/src) ENDIF() IF (${PACKAGE_NAME}_ENABLE_Stratimikos) - TRIBITS_INCLUDE_DIRECTORIES(${DIR}/../adapters/stratimikos) + ENDIF() +IF (TPL_ENABLE_MATLAB) + TRIBITS_INCLUDE_DIRECTORIES(${DIR}/../matlab/src) ENDIF() # Function to generate ETI (explicit template instantiation) files @@ -397,6 +405,9 @@ TRILINOS_CREATE_CLIENT_TEMPLATE_HEADERS(${DIR}/../adapters/tpetra NOSIERRABJAM) IF (${PACKAGE_NAME}_ENABLE_Intrepid2) TRILINOS_CREATE_CLIENT_TEMPLATE_HEADERS(${DIR}/Transfers/PCoarsen NOSIERRABJAM) ENDIF() +IF (TPL_ENABLE_MATLAB) + TRIBITS_CREATE_CLIENT_TEMPLATE_HEADERS(${DIR}/../matlab/src NOSIERRABJAM) +ENDIF() # Cpp file IF(${PACKAGE_NAME}_ENABLE_EXPLICIT_INSTANTIATION) @@ -425,12 +436,3 @@ TRIBITS_ADD_LIBRARY( ) # touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration # touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration -# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration -# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration -# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration -# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration -# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration -# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration -# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration -# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration -# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration diff --git a/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp b/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp index a5bf3f68b544..97a7123ada8e 100644 --- a/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp +++ b/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp @@ -455,12 +455,12 @@ using MultiPhys [[maybe_unused]] = MueLu::MultiPhys; #endif -#ifdef MUELU_TWOLEVELMATLABFACTORY_SHORT -typedef MueLu::TwoLevelMatlabFactory TwoLevelMatlabFactory; -#endif #ifdef MUELU_SINGLELEVELMATLABFACTORY_SHORT -typedef MueLu::SingleLevelMatlabFactory SingleLevelMatlabFactory; +using SingleLevelMatlabFactory [[maybe_unused]] = MueLu::SingleLevelMatlabFactory; +#endif +#ifdef MUELU_TWOLEVELMATLABFACTORY_SHORT +using TwoLevelMatlabFactory [[maybe_unused]] = MueLu::TwoLevelMatlabFactory; #endif #ifdef MUELU_MATLABSMOOTHER_SHORT -typedef MueLu::MatlabSmoother MatlabSmoother; +using MatlabSmoother [[maybe_unused]] = MueLu::MatlabSmoother; #endif diff --git a/packages/muelu/src/Headers/gen_UseShortNames.sh b/packages/muelu/src/Headers/gen_UseShortNames.sh index 7bbe12dc5e1e..58158a4dd86f 100755 --- a/packages/muelu/src/Headers/gen_UseShortNames.sh +++ b/packages/muelu/src/Headers/gen_UseShortNames.sh @@ -67,14 +67,3 @@ echo "#endif" >> MueLu_UseShortNamesOrdinal.hpp echo "#ifdef MUELU_IFPACKSMOOTHER_SHORT" >> MueLu_UseShortNamesOrdinal.hpp echo "typedef MueLu::IfpackSmoother IfpackSmoother;" >> MueLu_UseShortNamesOrdinal.hpp echo "#endif" >> MueLu_UseShortNamesOrdinal.hpp - -# Add the matlab utilities to end of file -echo "#ifdef MUELU_TWOLEVELMATLABFACTORY_SHORT" >> MueLu_UseShortNamesScalar.hpp -echo "typedef MueLu::TwoLevelMatlabFactory TwoLevelMatlabFactory;" >> MueLu_UseShortNamesScalar.hpp -echo "#endif" >> MueLu_UseShortNamesScalar.hpp -echo "#ifdef MUELU_SINGLELEVELMATLABFACTORY_SHORT" >> MueLu_UseShortNamesScalar.hpp -echo "typedef MueLu::SingleLevelMatlabFactory SingleLevelMatlabFactory;" >> MueLu_UseShortNamesScalar.hpp -echo "#endif" >> MueLu_UseShortNamesScalar.hpp -echo "#ifdef MUELU_MATLABSMOOTHER_SHORT" >> MueLu_UseShortNamesScalar.hpp -echo "typedef MueLu::MatlabSmoother MatlabSmoother;" >> MueLu_UseShortNamesScalar.hpp -echo "#endif" >> MueLu_UseShortNamesScalar.hpp diff --git a/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp b/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp index 5176bc2fb677..bde0c5e4eaee 100644 --- a/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp +++ b/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp @@ -134,6 +134,12 @@ #include "MueLu_MatrixFreeTentativePFactory_fwd.hpp" #include "MueLu_RegionRFactory_kokkos_fwd.hpp" +#ifdef HAVE_MUELU_MATLAB +#include "MueLu_SingleLevelMatlabFactory_fwd.hpp" +#include "MueLu_TwoLevelMatlabFactory_fwd.hpp" +#include "MueLu_MatlabSmoother_fwd.hpp" +#endif + #ifdef HAVE_MUELU_INTREPID2 #include "MueLu_IntrepidPCoarsenFactory_fwd.hpp" #endif diff --git a/packages/muelu/src/Interface/MueLu_FactoryFactory_def.hpp b/packages/muelu/src/Interface/MueLu_FactoryFactory_def.hpp index 0694928ceceb..1b959f055f8a 100644 --- a/packages/muelu/src/Interface/MueLu_FactoryFactory_def.hpp +++ b/packages/muelu/src/Interface/MueLu_FactoryFactory_def.hpp @@ -114,13 +114,9 @@ #include "MueLu_RegionRFactory_kokkos.hpp" #ifdef HAVE_MUELU_MATLAB -// This is distasteful, but (sadly) neccesary due to peculiarities in MueLu's build system. -#include "../matlab/src/MueLu_SingleLevelMatlabFactory_decl.hpp" -#include "../matlab/src/MueLu_SingleLevelMatlabFactory_def.hpp" -#include "../matlab/src/MueLu_TwoLevelMatlabFactory_decl.hpp" -#include "../matlab/src/MueLu_TwoLevelMatlabFactory_def.hpp" -#include "../matlab/src/MueLu_MatlabSmoother_decl.hpp" -#include "../matlab/src/MueLu_MatlabSmoother_def.hpp" +#include "MueLu_SingleLevelMatlabFactory.hpp" +#include "MueLu_TwoLevelMatlabFactory.hpp" +#include "MueLu_MatlabSmoother.hpp" #endif #ifdef HAVE_MUELU_INTREPID2 @@ -293,8 +289,8 @@ RCP FactoryFactory // Matlab factories #ifdef HAVE_MUELU_MATLAB - if (factoryName == "TwoLevelMatlabFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); if (factoryName == "SingleLevelMatlabFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TwoLevelMatlabFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); if (factoryName == "MatlabSmoother") return BuildMatlabSmoother(paramList, factoryMapIn, factoryManagersIn); #endif @@ -592,9 +588,9 @@ RCP FactoryFactory::Buil } #ifdef HAVE_MUELU_MATLAB -FactoryFactory:: - RCP - FactoryFactory::BuildMatlabSmoother(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { +template +RCP +FactoryFactory::BuildMatlabSmoother(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { if (paramList.begin() == paramList.end()) return rcp(new SmootherFactory(rcp(new MatlabSmoother()))); diff --git a/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList b/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList index c606287c8edf..495bda4df6a1 100644 --- a/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList +++ b/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList @@ -146,3 +146,6 @@ RefMaxwell Maxwell1 MultiPhys Maxwell_Utils +SingleLevelMatlabFactory - #if defined(HAVE_MUELU_MATLAB) +TwoLevelMatlabFactory - #if defined(HAVE_MUELU_MATLAB) +MatlabSmoother - #if defined(HAVE_MUELU_MATLAB) \ No newline at end of file diff --git a/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake b/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake index 20dc4095f4f0..f38b99ced3c6 100644 --- a/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake +++ b/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake @@ -48,7 +48,6 @@ APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::GeometricInterpolationPFactory ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::GeometricInterpolationPFactory_kokkos ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::GMRESSolver ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::Hierarchy ) -APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::HierarchyManager ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::HierarchyUtils ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::InterfaceAggregationFactory ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::Ifpack2Smoother-.?if.defined[HAVE_MUELU_IFPACK2] ) @@ -141,3 +140,6 @@ APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::RefMaxwell ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::Maxwell1 ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::MultiPhys ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::Maxwell_Utils ) +APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::SingleLevelMatlabFactory-.?if.defined[HAVE_MUELU_MATLAB] ) +APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::TwoLevelMatlabFactory-.?if.defined[HAVE_MUELU_MATLAB] ) +APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::MatlabSmoother-.?if.defined[HAVE_MUELU_MATLAB] ) diff --git a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MatlabSmootherFactory_fwd.hpp b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MatlabSmootherFactory_fwd.hpp new file mode 100644 index 000000000000..2589b90bfe5c --- /dev/null +++ b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MatlabSmootherFactory_fwd.hpp @@ -0,0 +1,27 @@ +// @HEADER +// ***************************************************************************** +// MueLu: A package for multigrid based preconditioning +// +// Copyright 2012 NTESS and the MueLu contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +#ifndef MUELU_MATLABSMOOTHERFACTORY_FWD_HPP +#define MUELU_MATLABSMOOTHERFACTORY_FWD_HPP + +#include "MueLu_ConfigDefs.hpp" +#if defined(HAVE_MUELU_MATLAB) + +namespace MueLu { +template +class MatlabSmootherFactory; +} + +#ifndef MUELU_MATLABSMOOTHERFACTORY_SHORT +#define MUELU_MATLABSMOOTHERFACTORY_SHORT +#endif + +#endif + +#endif // MUELU_MATLABSMOOTHERFACTORY_FWD_HPP diff --git a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MatlabSmoother_fwd.hpp b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MatlabSmoother_fwd.hpp index 47720187baa0..4ecd2a792fa1 100644 --- a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MatlabSmoother_fwd.hpp +++ b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MatlabSmoother_fwd.hpp @@ -10,6 +10,9 @@ #ifndef MUELU_MATLABSMOOTHER_FWD_HPP #define MUELU_MATLABSMOOTHER_FWD_HPP +#include "MueLu_ConfigDefs.hpp" +#if defined(HAVE_MUELU_MATLAB) + namespace MueLu { template class MatlabSmoother; @@ -19,4 +22,6 @@ class MatlabSmoother; #define MUELU_MATLABSMOOTHER_SHORT #endif +#endif + #endif // MUELU_MATLABSMOOTHER_FWD_HPP diff --git a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_SingleLevelMatlabFactory_fwd.hpp b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_SingleLevelMatlabFactory_fwd.hpp index ef3c9dba35dc..b8a5644661dd 100644 --- a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_SingleLevelMatlabFactory_fwd.hpp +++ b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_SingleLevelMatlabFactory_fwd.hpp @@ -10,6 +10,9 @@ #ifndef MUELU_SINGLELEVELMATLABFACTORY_FWD_HPP #define MUELU_SINGLELEVELMATLABFACTORY_FWD_HPP +#include "MueLu_ConfigDefs.hpp" +#if defined(HAVE_MUELU_MATLAB) + namespace MueLu { template class SingleLevelMatlabFactory; @@ -19,4 +22,6 @@ class SingleLevelMatlabFactory; #define MUELU_SINGLELEVELMATLABFACTORY_SHORT #endif +#endif + #endif // MUELU_SINGLELEVELMATLABFACTORY_FWD_HPP diff --git a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_TwoLevelMatlabFactory_fwd.hpp b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_TwoLevelMatlabFactory_fwd.hpp index bd0a64152247..035ff6bfe317 100644 --- a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_TwoLevelMatlabFactory_fwd.hpp +++ b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_TwoLevelMatlabFactory_fwd.hpp @@ -10,6 +10,9 @@ #ifndef MUELU_TWOLEVELMATLABFACTORY_FWD_HPP #define MUELU_TWOLEVELMATLABFACTORY_FWD_HPP +#include "MueLu_ConfigDefs.hpp" +#if defined(HAVE_MUELU_MATLAB) + namespace MueLu { template class TwoLevelMatlabFactory; @@ -19,4 +22,6 @@ class TwoLevelMatlabFactory; #define MUELU_TWOLEVELMATLABFACTORY_SHORT #endif +#endif + #endif // MUELU_TWOLEVELMATLABFACTORY_FWD_HPP From 23305340c329d977cfe60d36a5a5ddc0cd3ccf31 Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Tue, 17 Dec 2024 13:06:04 -0700 Subject: [PATCH 18/33] MueLu: Finishing matlab fixes Signed-off-by: Chris Siefert --- packages/muelu/CMakeLists.txt | 2 +- packages/muelu/matlab/bin/CMakeLists.txt | 10 ++++------ packages/muelu/matlab/bin/muemex.cpp | 2 ++ packages/muelu/matlab/bin/muemex.h | 2 ++ packages/muelu/matlab/src/CMakeLists.txt | 7 +++---- .../muelu/matlab/src/MueLu_MatlabUtils_def.hpp | 16 ++++++++-------- packages/muelu/src/CMakeLists.txt | 2 +- 7 files changed, 21 insertions(+), 20 deletions(-) diff --git a/packages/muelu/CMakeLists.txt b/packages/muelu/CMakeLists.txt index 8ebedd2fb637..c0cd4cee8777 100644 --- a/packages/muelu/CMakeLists.txt +++ b/packages/muelu/CMakeLists.txt @@ -427,10 +427,10 @@ if(TPL_ENABLE_MATLAB) ENDIF() ENDIF() +ADD_SUBDIRECTORY(adapters) if(TPL_ENABLE_MATLAB) ADD_SUBDIRECTORY(matlab) ENDIF() -ADD_SUBDIRECTORY(adapters) if(TPL_ENABLE_MATLAB) ADD_SUBDIRECTORY(matlab/bin) ADD_SUBDIRECTORY(matlab/tests) diff --git a/packages/muelu/matlab/bin/CMakeLists.txt b/packages/muelu/matlab/bin/CMakeLists.txt index 9b43885df783..177af9b7963f 100644 --- a/packages/muelu/matlab/bin/CMakeLists.txt +++ b/packages/muelu/matlab/bin/CMakeLists.txt @@ -20,12 +20,10 @@ if(TPL_ENABLE_MATLAB) # Debugging information - IF (${PROJECT_NAME}_VERBOSE_CONFIGURE) - MESSAGE("MEX_COMPILER = " ${MEX_COMPILER}) - MESSAGE("MEX_MEXEXT = " ${MEX_MEXEXT}) - MESSAGE("MEX_EXTENSION = " ${MEX_EXTENSION}) - MESSAGE("MATLAB_MEX_DIR = " ${MATLAB_MEX_DIR}) - ENDIF() + MESSAGE("MEX_COMPILER = " ${MEX_COMPILER}) + MESSAGE("MEX_MEXEXT = " ${MEX_MEXEXT}) + MESSAGE("MEX_EXTENSION = " ${MEX_EXTENSION}) + MESSAGE("MATLAB_MEX_DIR = " ${MATLAB_MEX_DIR}) APPEND_SET(HEADERS_EXEC muemex.h) APPEND_SET(SOURCES_EXEC muemex.cpp) diff --git a/packages/muelu/matlab/bin/muemex.cpp b/packages/muelu/matlab/bin/muemex.cpp index 3a8e6b5cffba..87f5002b1ebf 100644 --- a/packages/muelu/matlab/bin/muemex.cpp +++ b/packages/muelu/matlab/bin/muemex.cpp @@ -379,7 +379,9 @@ mxArray* MuemexSystem::getHierarchyData(string dataName, MuemexType dataType, in // Otherwise would break getting A and P when 'keep' is off needFMB = false; switch (this->type) { +#ifdef HAVE_MUELU_EPETRA case EPETRA: +#endif case TPETRA: { RCP> hier = rcp_static_cast>(getDatapackHierarchy(this)); level = hier->GetLevel(levelID); diff --git a/packages/muelu/matlab/bin/muemex.h b/packages/muelu/matlab/bin/muemex.h index 0951e247b84f..ab8c30788cc2 100644 --- a/packages/muelu/matlab/bin/muemex.h +++ b/packages/muelu/matlab/bin/muemex.h @@ -63,7 +63,9 @@ namespace MueLu typedef enum { +#ifdef HAVE_MUELU_EPETRA EPETRA, +#endif TPETRA, TPETRA_COMPLEX } DataPackType; diff --git a/packages/muelu/matlab/src/CMakeLists.txt b/packages/muelu/matlab/src/CMakeLists.txt index 967f19485e8c..a58e94a69715 100644 --- a/packages/muelu/matlab/src/CMakeLists.txt +++ b/packages/muelu/matlab/src/CMakeLists.txt @@ -15,8 +15,8 @@ if(TPL_ENABLE_MATLAB) TRIBITS_SET_AND_INC_DIRS(DIR ${CMAKE_CURRENT_BINARY_DIR}) # Force the code into R2017b compatibility mode - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMX_COMPAT_64 -DMATLAB_MEXCMD_RELEASE=R2017b") - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMX_COMPAT_64 -DMATLAB_MEXCMD_RELEASE=R2017b") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMX_COMPAT_64 -DMATLAB_MEXCMD_RELEASE=R2017b") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMX_COMPAT_64 -DMATLAB_MEXCMD_RELEASE=R2017b") APPEND_GLOB(HEADERS_LIB *.hpp) APPEND_SET(SOURCES_LIB MueLu_SingleLevelMatlabFactory.cpp MueLu_TwoLevelMatlabFactory.cpp MueLu_MatlabSmoother.cpp MueLu_MatlabUtils.cpp) @@ -29,7 +29,6 @@ if(TPL_ENABLE_MATLAB) muelu-matlab HEADERS ${HEADERS_LIB} SOURCES ${SOURCES_LIB} -# DEPLIBS muelu muelu-adapters + DEPLIBS muelu muelu-adapters ) ENDIF() - diff --git a/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp b/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp index 93831c6614e6..5608edc1e855 100644 --- a/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp +++ b/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp @@ -614,15 +614,15 @@ RCP loadDataFromMatlab >(const mxArray* mxa) { RCP mgraph = rcp(new MueLu::LWGraph(tgraph)); // Set boundary nodes int numBoundaryNodes = mxGetNumberOfElements(boundaryNodes); - bool* boundaryFlags = new bool[nRows]; + Kokkos::View boundaryFlags("boundaryFlags",nRows); + // NOTE: This will not work correctly for non-CPU Node types for (int i = 0; i < nRows; i++) { boundaryFlags[i] = false; } for (int i = 0; i < numBoundaryNodes; i++) { boundaryFlags[boundaryList[i]] = true; } - ArrayRCP boundaryNodesInput(boundaryFlags, 0, nRows, true); - mgraph->SetBoundaryNodeMap(boundaryNodesInput); + mgraph->SetBoundaryNodeMap(boundaryFlags); return mgraph; } @@ -1092,7 +1092,7 @@ mxArray* saveDataToMatlab(RCP& data) { } dataIn[4] = mxCreateNumericArray(1, aggArrayDims, mxINT32_CLASS, mxREAL); int* as = (int*)mxGetData(dataIn[4]); // list of aggregate sizes - ArrayRCP aggSizes = data->ComputeAggregateSizes(); + auto aggSizes = data->ComputeAggregateSizes(); for (int i = 0; i < numAggs; i++) { as[i] = aggSizes[i]; } @@ -1126,7 +1126,7 @@ mxArray* saveDataToMatlab(RCP& data) { entriesPerCol[i] = 0; } for (int i = 0; i < numRows; i++) { - ArrayView neighbors = data->getNeighborVertices(i); // neighbors has the column indices for row i + ArrayView neighbors = data->getNeighborVertices_av(i); // neighbors has the column indices for row i memcpy(iter, neighbors.getRawPtr(), sizeof(mm_LocalOrd) * neighbors.size()); entriesPerRow[i] = neighbors.size(); for (int j = 0; j < neighbors.size(); j++) { @@ -1173,9 +1173,9 @@ mxArray* saveDataToMatlab(RCP& data) { delete[] entriesPerRow; delete[] entriesPerCol; // Construct list of boundary nodes - const ArrayRCP boundaryFlags = data->GetBoundaryNodeMap(); + auto boundaryFlags = data->GetBoundaryNodeMap(); int numBoundaryNodes = 0; - for (int i = 0; i < boundaryFlags.size(); i++) { + for (int i = 0; i < (int)boundaryFlags.size(); i++) { if (boundaryFlags[i]) numBoundaryNodes++; } @@ -1184,7 +1184,7 @@ mxArray* saveDataToMatlab(RCP& data) { mxArray* boundaryList = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL); int* dest = (int*)mxGetData(boundaryList); int* destIter = dest; - for (int i = 0; i < boundaryFlags.size(); i++) { + for (int i = 0; i < (int) boundaryFlags.size(); i++) { if (boundaryFlags[i]) { *destIter = i; destIter++; diff --git a/packages/muelu/src/CMakeLists.txt b/packages/muelu/src/CMakeLists.txt index 74a9eaecce87..6719fb231b1d 100644 --- a/packages/muelu/src/CMakeLists.txt +++ b/packages/muelu/src/CMakeLists.txt @@ -406,7 +406,7 @@ IF (${PACKAGE_NAME}_ENABLE_Intrepid2) TRILINOS_CREATE_CLIENT_TEMPLATE_HEADERS(${DIR}/Transfers/PCoarsen NOSIERRABJAM) ENDIF() IF (TPL_ENABLE_MATLAB) - TRIBITS_CREATE_CLIENT_TEMPLATE_HEADERS(${DIR}/../matlab/src NOSIERRABJAM) + TRIBITS_CREATE_CLIENT_TEMPLATE_HEADERS(${DIR}/../matlab/src) ENDIF() # Cpp file From 17600fa75f8afef773b9c70766840495b7c18aaf Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Tue, 17 Dec 2024 13:10:36 -0700 Subject: [PATCH 19/33] MueLu: Clang! Signed-off-by: Chris Siefert --- packages/muelu/matlab/bin/muemex.cpp | 8 ++++---- .../muelu/matlab/src/MueLu_MatlabUtils_decl.hpp | 4 ++-- .../muelu/matlab/src/MueLu_MatlabUtils_def.hpp | 14 +++++++------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/packages/muelu/matlab/bin/muemex.cpp b/packages/muelu/matlab/bin/muemex.cpp index 87f5002b1ebf..786c8c789b08 100644 --- a/packages/muelu/matlab/bin/muemex.cpp +++ b/packages/muelu/matlab/bin/muemex.cpp @@ -296,7 +296,7 @@ RCP getDatapackHierarchy(MuemexSystem* dp) { RCP> hier; switch (dp->type) { #ifdef HAVE_MUELU_EPETRA - case EPETRA: { + case EPETRA: { EpetraSystem* pack = (EpetraSystem*)dp; hier = pack->getHierarchy(); break; @@ -330,7 +330,7 @@ void setHierarchyData(MuemexSystem* problem, int levelID, T& data, string& dataN level = hier->GetLevel(levelID); } else #endif - if (problem->type == TPETRA) { + if (problem->type == TPETRA) { RCP> hier = ((TpetraSystem*)problem)->getHierarchy(); level = hier->GetLevel(levelID); } else if (problem->type == TPETRA_COMPLEX) { @@ -1052,7 +1052,7 @@ void parse_list_item(RCP List, char* option_name, const mxArray* useEpetra = true; else #endif - if (strcmp(opt_str.c_str(), "tpetra") == 0) + if (strcmp(opt_str.c_str(), "tpetra") == 0) useEpetra = false; } mxFree(opt_char); @@ -1280,7 +1280,7 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { D = rcp_implicit_cast(dp); } else #endif - if (intf == "tpetra") { + if (intf == "tpetra") { // infer scalar type from prhs (can be double or complex) if (mxIsComplex(prhs[1])) { #ifdef HAVE_COMPLEX_SCALARS diff --git a/packages/muelu/matlab/src/MueLu_MatlabUtils_decl.hpp b/packages/muelu/matlab/src/MueLu_MatlabUtils_decl.hpp index 10765d409829..408eb343ba74 100644 --- a/packages/muelu/matlab/src/MueLu_MatlabUtils_decl.hpp +++ b/packages/muelu/matlab/src/MueLu_MatlabUtils_decl.hpp @@ -173,13 +173,13 @@ Teuchos::RCP convertMatlabVar(const mxArray* mxa); // trim from start static inline std::string& ltrim(std::string& s) { - s.erase(0,s.find_first_not_of(" ")); + s.erase(0, s.find_first_not_of(" ")); return s; } // trim from end static inline std::string& rtrim(std::string& s) { - s.erase(s.find_last_not_of(" "),std::string::npos); + s.erase(s.find_last_not_of(" "), std::string::npos); return s; } diff --git a/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp b/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp index 5608edc1e855..069cbbe930a3 100644 --- a/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp +++ b/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp @@ -448,7 +448,7 @@ RCP > loadD return MueLu::TpetraMultiVector_To_XpetraMultiVector(tpetraMV); } - #ifdef HAVE_MUELU_EPETRA +#ifdef HAVE_MUELU_EPETRA template <> RCP loadDataFromMatlab >(const mxArray* mxa) { RCP matrix; @@ -614,7 +614,7 @@ RCP loadDataFromMatlab >(const mxArray* mxa) { RCP mgraph = rcp(new MueLu::LWGraph(tgraph)); // Set boundary nodes int numBoundaryNodes = mxGetNumberOfElements(boundaryNodes); - Kokkos::View boundaryFlags("boundaryFlags",nRows); + Kokkos::View boundaryFlags("boundaryFlags", nRows); // NOTE: This will not work correctly for non-CPU Node types for (int i = 0; i < nRows; i++) { boundaryFlags[i] = false; @@ -1090,8 +1090,8 @@ mxArray* saveDataToMatlab(RCP& data) { throw runtime_error("Cannot store invalid aggregates in MATLAB - fewer root nodes than aggregates."); } } - dataIn[4] = mxCreateNumericArray(1, aggArrayDims, mxINT32_CLASS, mxREAL); - int* as = (int*)mxGetData(dataIn[4]); // list of aggregate sizes + dataIn[4] = mxCreateNumericArray(1, aggArrayDims, mxINT32_CLASS, mxREAL); + int* as = (int*)mxGetData(dataIn[4]); // list of aggregate sizes auto aggSizes = data->ComputeAggregateSizes(); for (int i = 0; i < numAggs; i++) { as[i] = aggSizes[i]; @@ -1173,8 +1173,8 @@ mxArray* saveDataToMatlab(RCP& data) { delete[] entriesPerRow; delete[] entriesPerCol; // Construct list of boundary nodes - auto boundaryFlags = data->GetBoundaryNodeMap(); - int numBoundaryNodes = 0; + auto boundaryFlags = data->GetBoundaryNodeMap(); + int numBoundaryNodes = 0; for (int i = 0; i < (int)boundaryFlags.size(); i++) { if (boundaryFlags[i]) numBoundaryNodes++; @@ -1184,7 +1184,7 @@ mxArray* saveDataToMatlab(RCP& data) { mxArray* boundaryList = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL); int* dest = (int*)mxGetData(boundaryList); int* destIter = dest; - for (int i = 0; i < (int) boundaryFlags.size(); i++) { + for (int i = 0; i < (int)boundaryFlags.size(); i++) { if (boundaryFlags[i]) { *destIter = i; destIter++; From e321f6fd4cf7f39003458a5886deeec643a1d164 Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Tue, 17 Dec 2024 13:23:04 -0700 Subject: [PATCH 20/33] MueLu: Fixes as per @cgcgcg Signed-off-by: Chris Siefert --- packages/muelu/matlab/bin/CMakeLists.txt | 29 +----------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/packages/muelu/matlab/bin/CMakeLists.txt b/packages/muelu/matlab/bin/CMakeLists.txt index 177af9b7963f..46ea7c7f0935 100644 --- a/packages/muelu/matlab/bin/CMakeLists.txt +++ b/packages/muelu/matlab/bin/CMakeLists.txt @@ -17,8 +17,7 @@ if(TPL_ENABLE_MATLAB) # Force the code into R2017b compatibility mode SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMX_COMPAT_64 -fPIC -DMATLAB_MEXCMD_RELEASE=R2017b") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMX_COMPAT_64 -fPIC -DMATLAB_MEXCMD_RELEASE=R2017b") - - +DW # Debugging information MESSAGE("MEX_COMPILER = " ${MEX_COMPILER}) MESSAGE("MEX_MEXEXT = " ${MEX_MEXEXT}) @@ -28,32 +27,6 @@ if(TPL_ENABLE_MATLAB) APPEND_SET(HEADERS_EXEC muemex.h) APPEND_SET(SOURCES_EXEC muemex.cpp) - # First, grab the package's own libraries - - #SET(LINK_LIBS) - #APPEND_SET(LINK_LIBS ${${PACKAGE_NAME}_LIBRARIES}) - - # Third, add test dependent package libraries - #TRIBITS_GATHER_ENABLED_ITEMS(${PACKAGE_NAME} TEST PACKAGES ALL_DEP_PACKAGES) - #TRIBITS_SORT_AND_APPEND_INCLUDE_AND_LINK_DIRS_AND_LIBS("${${PROJECT_NAME}_REVERSE_PACKAGES}" - # "${ALL_DEP_PACKAGES}" "" LINK_LIBS "") - - # Fourth, add dependent test TPL libraries - #TRIBITS_GATHER_ENABLED_ITEMS(${PACKAGE_NAME} TEST TPLS ALL_TPLS) - #TRIBITS_SORT_AND_APPEND_INCLUDE_AND_LINK_DIRS_AND_LIBS("${${PROJECT_NAME}_REVERSE_TPLS}" "${ALL_TPLS}" - # TPL_ LINK_LIBS "") - - # Fifth, add matlab-specific libs - #SET(LINK_LIBS ${LINK_LIBS} "mx" "mex" "mat") - - # Last, add last_lib to get extra link options on the link linee - #IF (${PROJECT_NAME}_EXTRA_LINK_FLAGS) - # APPEND_SET(LINK_LIBS ${last_lib}) - #ENDIF() - #IF (${PROJECT_NAME}_VERBOSE_CONFIGURE) - # PRINT_VAR(LINK_LIBS) - #ENDIF() - # Manually drop in options from the mex script (R2009b) on a linux platform. # g++ -O -pthread -shared -Wl,--version-script,/usr/local/matlab/7.9/extern/lib/glnxa64/mexFunction.map -Wl,--no-undefined -o "mlmex.mexa64" "mlmex-mlmex.o" -lm -Wl,-rpath-link,/usr/local/matlab/7.9/bin/glnxa64 -L/usr/local/matlab/7.9/bin/glnxa64 -lmx -lmex -lmat -lm if (NOT APPLE) From cda0d80370309b809323b566e0f51f836b892003 Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Tue, 17 Dec 2024 13:36:35 -0700 Subject: [PATCH 21/33] Update CMakeLists.txt Fixing. Signed-off-by: Chris Siefert --- packages/muelu/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/muelu/CMakeLists.txt b/packages/muelu/CMakeLists.txt index c0cd4cee8777..7770c0a164ba 100644 --- a/packages/muelu/CMakeLists.txt +++ b/packages/muelu/CMakeLists.txt @@ -430,8 +430,6 @@ ENDIF() ADD_SUBDIRECTORY(adapters) if(TPL_ENABLE_MATLAB) ADD_SUBDIRECTORY(matlab) -ENDIF() -if(TPL_ENABLE_MATLAB) ADD_SUBDIRECTORY(matlab/bin) ADD_SUBDIRECTORY(matlab/tests) ENDIF() From a3327f9f3b1a67cf08ab4be5eddaa17f89788381 Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Tue, 17 Dec 2024 14:05:29 -0700 Subject: [PATCH 22/33] MueLu: Fixing comment Signed-off-by: Chris Siefert --- packages/muelu/CMakeLists.txt | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/packages/muelu/CMakeLists.txt b/packages/muelu/CMakeLists.txt index 7770c0a164ba..2d916631ad6d 100644 --- a/packages/muelu/CMakeLists.txt +++ b/packages/muelu/CMakeLists.txt @@ -404,17 +404,6 @@ ENDIF () # C) Add the libraries, tests, and examples # -# The build has to be in a very specific order because of the existence of Matlab interface: -# 1. Build the majority of the code in src/ [except src/Interface; src/CMakeLists.txt is responsible for ignoring that] -# 2. Build matlab factories [located in matlab/; matlab/bin is ignored in matlab/CMakeLists.txt] -# 3. Build the interpreter [requires all factories] -# 4. Build the adapters [required for Matlab] -# 5. Build the rest of matlab code -# This order required splitting of the src/ library into two: muelu and muelu-interface, as steps 1. and 3. are separated. -# A simple diagram explains this: -# / muelu -# matlab/bin - muelu-adapters - muelu-interface | -# \ matlab ADD_SUBDIRECTORY(src) if(TPL_ENABLE_MATLAB) @@ -427,6 +416,7 @@ if(TPL_ENABLE_MATLAB) ENDIF() ENDIF() +# Not: adapters needs to come before matlab ADD_SUBDIRECTORY(adapters) if(TPL_ENABLE_MATLAB) ADD_SUBDIRECTORY(matlab) From 9570e405029f5e276cf4ec1615f33d730dd46fe3 Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Tue, 17 Dec 2024 17:36:05 -0700 Subject: [PATCH 23/33] Update CMakeLists.txt Signed-off-by: Chris Siefert --- packages/muelu/matlab/bin/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/muelu/matlab/bin/CMakeLists.txt b/packages/muelu/matlab/bin/CMakeLists.txt index 46ea7c7f0935..55660f49ecb4 100644 --- a/packages/muelu/matlab/bin/CMakeLists.txt +++ b/packages/muelu/matlab/bin/CMakeLists.txt @@ -17,7 +17,6 @@ if(TPL_ENABLE_MATLAB) # Force the code into R2017b compatibility mode SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMX_COMPAT_64 -fPIC -DMATLAB_MEXCMD_RELEASE=R2017b") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMX_COMPAT_64 -fPIC -DMATLAB_MEXCMD_RELEASE=R2017b") -DW # Debugging information MESSAGE("MEX_COMPILER = " ${MEX_COMPILER}) MESSAGE("MEX_MEXEXT = " ${MEX_MEXEXT}) From 06072472c78da163f9f7a9c5327123742085a5ad Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 17 Dec 2024 12:14:34 -0700 Subject: [PATCH 24/33] MueLu CoalesceDrop_kokkos: rewrite vector counting and matrix fill functors Signed-off-by: Christian Glusa Co-authored-by: Jonathan Hu Co-authored-by: Jonathan Hu --- .../MueLu_DroppingCommon.hpp | 2 +- .../MueLu_MatrixConstruction.hpp | 259 +++++++++++------- 2 files changed, 166 insertions(+), 95 deletions(-) diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp index 94a6b7da8e03..1b45ff16d18d 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp @@ -21,7 +21,7 @@ namespace MueLu { Once we are done with dropping, we should have no UNDECIDED entries left. Normally, both DROP and BOUNDARY entries will be dropped, but we distinguish them in case we want to keep boundaries. */ -enum DecisionType { +enum DecisionType : char { UNDECIDED = 0, // no decision has been taken yet, used for initialization KEEP = 1, // keeep the entry DROP = 2, // drop it diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp index 1a5f2729c72e..ead5b407e2f8 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp @@ -360,6 +360,55 @@ class PointwiseFillNoReuseFunctor { } }; +template +class BlockRowComparison { + public: + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using block_indices_view_type = Kokkos::View; + + local_matrix_type A; + local_ordinal_type bsize; + block_indices_view_type ghosted_point_to_block; + + public: + BlockRowComparison(local_matrix_type& A_, local_ordinal_type bsize_, block_indices_view_type ghosted_point_to_block_) + : A(A_) + , bsize(bsize_) + , ghosted_point_to_block(ghosted_point_to_block_) {} + + template + struct Comparator { + private: + using local_ordinal_type = typename local_matrix_type2::ordinal_type; + using memory_space = typename local_matrix_type2::memory_space; + using block_indices_view_type = Kokkos::View; + + const local_matrix_type2 A; + const local_ordinal_type offset; + const block_indices_view_type ghosted_point_to_block; + + public: + KOKKOS_INLINE_FUNCTION + Comparator(const local_matrix_type2& A_, local_ordinal_type bsize_, local_ordinal_type brlid_, block_indices_view_type ghosted_point_to_block_) + : A(A_) + , offset(A_.graph.row_map(bsize_ * brlid_)) + , ghosted_point_to_block(ghosted_point_to_block_) {} + + KOKKOS_INLINE_FUNCTION + bool operator()(size_t x, size_t y) const { + return ghosted_point_to_block(A.graph.entries(offset + x)) < ghosted_point_to_block(A.graph.entries(offset + y)); + } + }; + + using comparator_type = Comparator; + + KOKKOS_INLINE_FUNCTION + comparator_type getComparator(local_ordinal_type brlid) const { + return comparator_type(A, bsize, brlid, ghosted_point_to_block); + } +}; + /*! @class VectorCountingFunctor @brief Functor that executes a sequence of sub-functors on each block of rows. @@ -380,6 +429,7 @@ class VectorCountingFunctor { using memory_space = typename local_matrix_type::memory_space; using results_view = Kokkos::View; using block_indices_view_type = Kokkos::View; + using permutation_type = Kokkos::View; using rowptr_type = typename local_matrix_type::row_map_type::non_const_type; using ATS = Kokkos::ArithTraits; @@ -392,6 +442,10 @@ class VectorCountingFunctor { rowptr_type graph_rowptr; functor_type functor; + + BlockRowComparison comparison; + permutation_type permutation; + VectorCountingFunctor remainingFunctors; std::vector functorNames; @@ -405,7 +459,9 @@ class VectorCountingFunctor { , filtered_rowptr(filtered_rowptr_) , graph_rowptr(graph_rowptr_) , functor(functor_) + , comparison(BlockRowComparison(A, blockSize_, ghosted_point_to_block)) , remainingFunctors(A_, blockSize_, ghosted_point_to_block_, results_, filtered_rowptr_, graph_rowptr_, remainingFunctors_...) { + permutation = permutation_type("permutation", A.nnz()); #ifdef MUELU_COALESCE_DROP_DEBUG std::string mangledFunctorName = typeid(decltype(functor)).name(); int status = 0; @@ -495,40 +551,41 @@ class VectorCountingFunctor { Kokkos::printf("Done with block row %d\nGraph indices ", brlid); #endif - local_ordinal_type* nextIndices = new local_ordinal_type[blockSize]; - for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { - nextIndices[block_index] = 0; - } + // column lids for all rows in the block + auto block_clids = Kokkos::subview(A.graph.entries, Kokkos::make_pair(A.graph.row_map(blockSize * brlid), + A.graph.row_map(blockSize * (brlid + 1)))); + // set up a permutatation index + auto block_permutation = Kokkos::subview(permutation, Kokkos::make_pair(A.graph.row_map(blockSize * brlid), + A.graph.row_map(blockSize * (brlid + 1)))); + for (size_t i = 0; i < block_permutation.extent(0); ++i) + block_permutation(i) = i; + // get permuatation for sorted column indices of the entire block + auto comparator = comparison.getComparator(brlid); + Misc::serialHeapSort(block_permutation, comparator); + local_ordinal_type prev_bclid = -1; - while (true) { - local_ordinal_type min_block_index = -1; - local_ordinal_type min_clid = ATS::max(); - local_ordinal_type min_offset = -1; - for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { - auto rlid = blockSize * brlid + block_index; - auto offset = A.graph.row_map(rlid) + nextIndices[block_index]; - if (offset == A.graph.row_map(rlid + 1)) - continue; - auto clid = A.graph.entries(offset); - if (clid < min_clid) { - min_block_index = block_index; - min_clid = clid; - min_offset = offset; - } - } - if (min_block_index == -1) - break; - ++nextIndices[min_block_index]; - auto bclid = ghosted_point_to_block(min_clid); - if (prev_bclid < bclid) { - if (results(min_offset) == KEEP) { - ++(*nnz_graph); + bool alreadyAdded = false; + + // loop over all sorted entries in block + auto offset = A.graph.row_map(blockSize * brlid); + for (size_t i = 0; i < block_permutation.extent(0); ++i) { + auto idx = offset + block_permutation(i); + auto clid = A.graph.entries(idx); + auto bclid = ghosted_point_to_block(clid); + + // unseen block column index + if (bclid > prev_bclid) + alreadyAdded = false; + + // add entry to graph + if (!alreadyAdded && (results(idx) == KEEP)) { + ++(*nnz_graph); + alreadyAdded = true; #ifdef MUELU_COALESCE_DROP_DEBUG - Kokkos::printf("%5d ", bclid); + Kokkos::printf("%5d ", bclid); #endif - prev_bclid = bclid; - } } + prev_bclid = bclid; } #ifdef MUELU_COALESCE_DROP_DEBUG Kokkos::printf("\n"); @@ -547,6 +604,7 @@ class VectorCountingFunctor { using memory_space = typename local_matrix_type::memory_space; using results_view = Kokkos::View; using block_indices_view_type = Kokkos::View; + using permutation_type = Kokkos::View; using rowptr_type = typename local_matrix_type::row_map_type::non_const_type; using ATS = Kokkos::ArithTraits; @@ -563,6 +621,9 @@ class VectorCountingFunctor { std::vector functorNames; + BlockRowComparison comparison; + permutation_type permutation; + public: VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, functor_type& functor_) : A(A_) @@ -571,7 +632,9 @@ class VectorCountingFunctor { , results(results_) , filtered_rowptr(filtered_rowptr_) , graph_rowptr(graph_rowptr_) - , functor(functor_) { + , functor(functor_) + , comparison(BlockRowComparison(A, blockSize_, ghosted_point_to_block)) { + permutation = permutation_type("permutation", A.nnz()); #ifdef MUELU_COALESCE_DROP_DEBUG std::string mangledFunctorName = typeid(decltype(functor)).name(); int status = 0; @@ -659,40 +722,41 @@ class VectorCountingFunctor { Kokkos::printf("Done with block row %d\nGraph indices ", brlid); #endif - local_ordinal_type* nextIndices = new local_ordinal_type[blockSize]; - for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { - nextIndices[block_index] = 0; - } + // column lids for all rows in the block + auto block_clids = Kokkos::subview(A.graph.entries, Kokkos::make_pair(A.graph.row_map(blockSize * brlid), + A.graph.row_map(blockSize * (brlid + 1)))); + // set up a permutation index + auto block_permutation = Kokkos::subview(permutation, Kokkos::make_pair(A.graph.row_map(blockSize * brlid), + A.graph.row_map(blockSize * (brlid + 1)))); + for (size_t i = 0; i < block_permutation.extent(0); ++i) + block_permutation(i) = i; + // get permutation for sorted column indices of the entire block + auto comparator = comparison.getComparator(brlid); + Misc::serialHeapSort(block_permutation, comparator); + local_ordinal_type prev_bclid = -1; - while (true) { - local_ordinal_type min_block_index = -1; - local_ordinal_type min_clid = ATS::max(); - local_ordinal_type min_offset = -1; - for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { - auto rlid = blockSize * brlid + block_index; - auto offset = A.graph.row_map(rlid) + nextIndices[block_index]; - if (offset == A.graph.row_map(rlid + 1)) - continue; - auto clid = A.graph.entries(offset); - if (clid < min_clid) { - min_block_index = block_index; - min_clid = clid; - min_offset = offset; - } - } - if (min_block_index == -1) - break; - ++nextIndices[min_block_index]; - auto bclid = ghosted_point_to_block(min_clid); - if (prev_bclid < bclid) { - if (results(min_offset) == KEEP) { - ++(*nnz_graph); + bool alreadyAdded = false; + + // loop over all sorted entries in block + auto offset = A.graph.row_map(blockSize * brlid); + for (size_t i = 0; i < block_permutation.extent(0); ++i) { + auto idx = offset + block_permutation(i); + auto clid = A.graph.entries(idx); + auto bclid = ghosted_point_to_block(clid); + + // unseen block column index + if (bclid > prev_bclid) + alreadyAdded = false; + + // add entry to graph + if (!alreadyAdded && (results(idx) == KEEP)) { + ++(*nnz_graph); + alreadyAdded = true; #ifdef MUELU_COALESCE_DROP_DEBUG - Kokkos::printf("%5d ", bclid); + Kokkos::printf("%5d ", bclid); #endif - prev_bclid = bclid; - } } + prev_bclid = bclid; } #ifdef MUELU_COALESCE_DROP_DEBUG Kokkos::printf("\n"); @@ -720,6 +784,7 @@ class VectorFillFunctor { using ATS = Kokkos::ArithTraits; using OTS = Kokkos::ArithTraits; using block_indices_view_type = Kokkos::View; + using permutation_type = Kokkos::View; local_matrix_type A; local_ordinal_type blockSize; @@ -729,6 +794,9 @@ class VectorFillFunctor { local_graph_type graph; const scalar_type zero = ATS::zero(); + BlockRowComparison comparison; + permutation_type permutation; + public: VectorFillFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, local_matrix_type& filteredA_, local_graph_type& graph_) : A(A_) @@ -736,7 +804,10 @@ class VectorFillFunctor { , ghosted_point_to_block(ghosted_point_to_block_) , results(results_) , filteredA(filteredA_) - , graph(graph_) {} + , graph(graph_) + , comparison(BlockRowComparison(A, blockSize_, ghosted_point_to_block)) { + permutation = permutation_type("permutation", A.nnz()); + } KOKKOS_INLINE_FUNCTION void operator()(const local_ordinal_type brlid) const { @@ -776,40 +847,40 @@ class VectorFillFunctor { } } - local_ordinal_type* nextIndices = new local_ordinal_type[blockSize]; - for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { - nextIndices[block_index] = 0; - } - local_ordinal_type prev_bclid = -1; + // column lids for all rows in the block + auto block_clids = Kokkos::subview(A.graph.entries, Kokkos::make_pair(A.graph.row_map(blockSize * brlid), + A.graph.row_map(blockSize * (brlid + 1)))); + // set up a permuatation index + auto block_permutation = Kokkos::subview(permutation, Kokkos::make_pair(A.graph.row_map(blockSize * brlid), + A.graph.row_map(blockSize * (brlid + 1)))); + for (size_t i = 0; i < block_permutation.extent(0); ++i) + block_permutation(i) = i; + // get permutation for sorted column indices of the entire block + auto comparator = comparison.getComparator(brlid); + Misc::serialHeapSort(block_permutation, comparator); - local_ordinal_type j = graph.row_map(brlid); - while (true) { - local_ordinal_type min_block_index = -1; - local_ordinal_type min_clid = OTS::max(); - local_ordinal_type min_offset = -1; - for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { - auto rlid = blockSize * brlid + block_index; - auto offset = A.graph.row_map(rlid) + nextIndices[block_index]; - if (offset == A.graph.row_map(rlid + 1)) - continue; - auto clid = A.graph.entries(offset); - if (clid < min_clid) { - min_block_index = block_index; - min_clid = clid; - min_offset = offset; - } - } - if (min_block_index == -1) - break; - ++nextIndices[min_block_index]; - auto bclid = ghosted_point_to_block(min_clid); - if (prev_bclid < bclid) { - if (results(min_offset) == KEEP) { - graph.entries(j) = bclid; - ++j; - prev_bclid = bclid; - } + local_ordinal_type prev_bclid = -1; + bool alreadyAdded = false; + local_ordinal_type j = graph.row_map(brlid); + + // loop over all sorted entries in block + auto offset = A.graph.row_map(blockSize * brlid); + for (size_t i = 0; i < block_permutation.extent(0); ++i) { + auto idx = offset + block_permutation(i); + auto clid = A.graph.entries(idx); + auto bclid = ghosted_point_to_block(clid); + + // unseen block column index + if (bclid > prev_bclid) + alreadyAdded = false; + + // add entry to graph + if (!alreadyAdded && (results(idx) == KEEP)) { + graph.entries(j) = bclid; + ++j; + alreadyAdded = true; } + prev_bclid = bclid; } } }; From cbe67432ac876a841ad81de90abd2842cf7fb11f Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Mon, 16 Dec 2024 15:16:18 -0700 Subject: [PATCH 25/33] Turn off SuperLU for ML anywhere we enable SuperLU Signed-off-by: Samuel E. Browne --- packages/framework/ini-files/config-specs.ini | 22 ++----------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index b65b29b919c6..7f70ebe97982 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -337,6 +337,7 @@ opt-set-cmake-var TPL_ENABLE_Zlib BOOL : ON opt-set-cmake-var TPL_ENABLE_HDF5 BOOL : ON opt-set-cmake-var TPL_ENABLE_Netcdf BOOL : ON opt-set-cmake-var TPL_ENABLE_SuperLU BOOL : ON +opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF opt-set-cmake-var Trilinos_TRACE_ADD_TEST BOOL : ON opt-set-cmake-var TPL_ENABLE_Scotch BOOL : ON @@ -1343,8 +1344,6 @@ opt-set-cmake-var Teko_DISABLE_LSCSTABALIZED_TPETRA_ALPAH_INV_D BOOL : ON use TEST_DISABLES|CLANG -opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF - opt-set-cmake-var Pliris_vector_random_MPI_3_DISABLE BOOL : ON opt-set-cmake-var Pliris_vector_random_MPI_4_DISABLE BOOL : ON @@ -1383,10 +1382,6 @@ opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --b opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : -L${NETCDF_C_LIB|ENV}/lib;${NETCDF_C_LIB|ENV}/libnetcdf.so;${PARALLEL_NETCDF_LIB|ENV}/libpnetcdf.a opt-set-cmake-var TPL_HDF5_LIBRARIES STRING FORCE : ${HDF5_LIB|ENV}/libhdf5_hl.so;${HDF5_LIB|ENV}/libhdf5.a;${ZLIB_LIB|ENV}/libz.a;-ldl -# I get the following error if I do not disable ML_ENABLE_SuperLU: -# ML CONFIGURATION ERROR: SuperLU_5.0 detected - only SuperLU version < 5.0 currently supported for this package. -opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF - opt-set-cmake-var Zoltan_ch_simple_parmetis_parallel_DISABLE BOOL FORCE : ON opt-set-cmake-var Zoltan_ch_7944_parmetis_parallel_DISABLE BOOL FORCE : ON opt-set-cmake-var Zoltan_ch_simple_scotch_parallel_DISABLE BOOL FORCE : ON @@ -1590,7 +1585,6 @@ use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS use SPACK_NETLIB_BLAS_LAPACK -opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF opt-set-cmake-var TPL_ENABLE_ParMETIS BOOL FORCE : OFF opt-set-cmake-var TPL_ENABLE_Scotch BOOL FORCE : OFF opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : ${NETCDF_C_LIB|ENV}/libnetcdf.so @@ -1631,10 +1625,7 @@ opt-set-cmake-var CMAKE_CXX_EXTENSIONS BOOL : O opt-set-cmake-var Teko_DISABLE_LSCSTABALIZED_TPETRA_ALPAH_INV_D BOOL : ON opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -fno-strict-aliasing -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-inline -Wno-nonnull-compare -Wno-address -# TPL_BLAS_LIBRARIES is redefined here with libm for SuperLU to properly link -opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : -L${BLAS_ROOT|ENV}/lib;-lblas;-lgfortran;-lgomp;-lm opt-set-cmake-var TPL_HDF5_LIBRARIES STRING : ${HDF5_LIB|ENV}/libhdf5_hl.so;${HDF5_LIB|ENV}/libhdf5.so;${ZLIB_LIB|ENV}/libz.so;-ldl -opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF [rhel8_sems-gnu-8.5.0-openmpi-4.1.6-openmp_release-debug_static_no-kokkos-arch_no-asan_no-complex_no-fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_all] use rhel8_sems-gnu-8.5.0-openmpi-4.1.6-openmp_release-debug_static_no-kokkos-arch_no-asan_no-complex_no-fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_no-package-enables @@ -1661,8 +1652,6 @@ use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS use SPACK_NETLIB_BLAS_LAPACK -opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF - opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none --mca btl vader,self opt-set-cmake-var Teko_DISABLE_LSCSTABALIZED_TPETRA_ALPAH_INV_D BOOL : ON opt-set-cmake-var KokkosKernels_blas_serial_MPI_1_DISABLE BOOL : ON @@ -1772,7 +1761,6 @@ opt-set-cmake-var Amesos_ENABLE_SuperLU BOOL FORCE : OFF opt-set-cmake-var Amesos_ENABLE_SuperLUDist BOOL FORCE : OFF opt-set-cmake-var Amesos2_ENABLE_SuperLU BOOL FORCE : OFF opt-set-cmake-var Amesos2_ENABLE_SuperLUDist BOOL FORCE : OFF -opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF opt-set-cmake-var Teko_DISABLE_LSCSTABALIZED_TPETRA_ALPAH_INV_D BOOL : ON opt-set-cmake-var KokkosKernels_blas_serial_MPI_1_DISABLE BOOL : ON @@ -1823,9 +1811,6 @@ opt-set-cmake-var TPL_ENABLE_Pnetcdf BOOL FORCE : OFF opt-set-cmake-var Trilinos_ENABLE_TrilinosFrameworkTests BOOL FORCE : OFF opt-set-cmake-var Trilinos_ENABLE_TrilinosBuildStats BOOL FORCE : OFF -# Turned off to bypass: ML CONFIGURATION ERROR: SuperLU_5.0 detected - only SuperLU version < 5.0 currently supported for this package. -opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF - use RHEL8_POST [rhel8_aue-gcc-openmpi_debug_shared_no-kokkos-arch_no-asan_complex_no-fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_no-package-enables] @@ -1863,9 +1848,6 @@ opt-set-cmake-var TPL_ENABLE_SuperLUDist BOOL FORCE: OFF opt-set-cmake-var Trilinos_ENABLE_TrilinosFrameworkTests BOOL FORCE : OFF opt-set-cmake-var Trilinos_ENABLE_TrilinosBuildStats BOOL FORCE : OFF -# Turned off to bypass: ML CONFIGURATION ERROR: SuperLU_5.0 detected - only SuperLU version < 5.0 currently supported for this package. -opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF - use RHEL8_POST [rhel8_gcc-openmpi_debug_shared_no-kokkos-arch_no-asan_complex_no-fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_all] @@ -1923,6 +1905,7 @@ opt-set-cmake-var TPL_ENABLE_HDF5 BOOL : ON opt-set-cmake-var TPL_HDF5_LIBRARIES STRING : "${HDF5_LIB|ENV}/libhdf5_hl.so;${HDF5_LIB|ENV}/libhdf5.so;${ZLIB_LIB|ENV}/libz.so" opt-set-cmake-var TPL_ENABLE_Netcdf BOOL : ON opt-set-cmake-var TPL_ENABLE_SuperLU BOOL : ON +opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF opt-set-cmake-var TPL_ENABLE_Scotch BOOL : OFF opt-set-cmake-var CMAKE_C_COMPILER FILEPATH : ${MPICC|ENV} @@ -1947,7 +1930,6 @@ opt-set-cmake-var Trilinos_ENABLE_Komplex BOOL : OFF opt-set-cmake-var Trilinos_ENABLE_TriKota BOOL : OFF opt-set-cmake-var Trilinos_ENABLE_Moertel BOOL : OFF opt-set-cmake-var Trilinos_ENABLE_Domi BOOL : OFF -opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF [ubuntu_gnu_release-debug_shared_no-kokkos-arch_no-asan_no-complex_fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_all] use BUILD-TYPE|RELEASE-DEBUG From 7f938383bb6e53572bf32b42273334e20b3df7f2 Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Mon, 16 Dec 2024 20:01:11 -0700 Subject: [PATCH 26/33] Revert "Use default finds for BLAS and LAPACK" This reverts commit 773e0f9fcd4fd4421afc6ebdbdf2673c9e1b6d2d. Signed-off-by: Samuel E. Browne --- packages/framework/ini-files/config-specs.ini | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index 7f70ebe97982..17a0b27e6b24 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -393,6 +393,8 @@ opt-set-cmake-var Scotch_LIBRARY_DIRS PATH : ${SEMS_SCOTCH_LIBRARY_PATH|ENV} # Explicit libraries opt-set-cmake-var TPL_DLlib_LIBRARIES PATH : ${DL_LIBRARIES|ENV} opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING : ${SEMS_NETCDF_LIBRARY_PATH|ENV}/libnetcdf.so;${SEMS_NETCDF_LIBRARY_PATH|ENV}/libpnetcdf.a +opt-set-cmake-var TPL_BLAS_LIBRARIES STRING : ${BLAS_LIBRARIES|ENV} +opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING : ${LAPACK_LIBRARIES|ENV} [SPACK_NETLIB_BLAS_LAPACK] opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : -L${BLAS_ROOT|ENV}/lib;-lblas;-lgfortran;-lgomp @@ -400,6 +402,12 @@ opt-set-cmake-var TPL_BLAS_LIBRARY_DIRS STRING FORCE : ${BLAS_ROOT|ENV}/lib opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : -L${BLAS_ROOT|ENV}/lib;-llapack;-lgfortran;-lgomp opt-set-cmake-var TPL_LAPACK_LIBRARY_DIRS STRING FORCE : ${BLAS_ROOT|ENV}/lib +[SPACK_OPENBLAS_BLAS_LAPACK] +opt-set-cmake-var TPL_BLAS_LIBRARY_DIRS STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib +opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-L${OPENBLAS_ROOT|ENV}/lib;-lgfortran;-lgomp;-lm +opt-set-cmake-var TPL_LAPACK_LIBRARY_DIRS STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib +opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-L${OPENBLAS_ROOT|ENV}/lib;-lgfortran;-lgomp;-lm + [COMMON_SPACK_TPLS] use COMMON @@ -1751,6 +1759,7 @@ use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS +use SPACK_OPENBLAS_BLAS_LAPACK opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none --mca btl vader,self opt-set-cmake-var CMAKE_CXX_FLAGS STRING FORCE : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-nonnull-compare -Wno-address -Wno-inline @@ -1800,9 +1809,11 @@ use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS +use SPACK_OPENBLAS_BLAS_LAPACK opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-nonnull-compare -Wno-address -Wno-inline -Wno-unused-but-set-variable -Wno-unused-label + opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : "" opt-set-cmake-var TPL_ENABLE_ParMETIS BOOL FORCE : OFF @@ -1971,6 +1982,7 @@ use USE-UVM|NO use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-EPETRA use CUDA +use SPACK_OPENBLAS_BLAS_LAPACK opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL : ON opt-set-cmake-var TPL_ENABLE_X11 BOOL : OFF @@ -1997,6 +2009,7 @@ use USE-UVM|YES use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-EPETRA use CUDA +use SPACK_OPENBLAS_BLAS_LAPACK opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF opt-set-cmake-var Kokkos_ENABLE_TESTS BOOL FORCE : ON From 4e76058dd43277e0a48cbf0939c510db6cd39a4b Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Mon, 16 Dec 2024 20:06:35 -0700 Subject: [PATCH 27/33] Re-specify static OpenBLAS libraries Static libopenblas.a still needs (for now) shared libgfortran and libm. So do it the "hacky" way, but isolate it to where we need static libopenblas. Signed-off-by: Samuel E. Browne --- packages/framework/ini-files/config-specs.ini | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index 17a0b27e6b24..413a97326dfd 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -402,11 +402,10 @@ opt-set-cmake-var TPL_BLAS_LIBRARY_DIRS STRING FORCE : ${BLAS_ROOT|ENV}/lib opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : -L${BLAS_ROOT|ENV}/lib;-llapack;-lgfortran;-lgomp opt-set-cmake-var TPL_LAPACK_LIBRARY_DIRS STRING FORCE : ${BLAS_ROOT|ENV}/lib -[SPACK_OPENBLAS_BLAS_LAPACK] -opt-set-cmake-var TPL_BLAS_LIBRARY_DIRS STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib -opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-L${OPENBLAS_ROOT|ENV}/lib;-lgfortran;-lgomp;-lm -opt-set-cmake-var TPL_LAPACK_LIBRARY_DIRS STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib -opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-L${OPENBLAS_ROOT|ENV}/lib;-lgfortran;-lgomp;-lm +[SPACK_SERIAL_OPENBLAS] +# Static OpenBLAS, but shared m and gfortran +opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-lgfortran;-lm +opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-lgfortran;-lm [COMMON_SPACK_TPLS] use COMMON @@ -1759,7 +1758,6 @@ use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS -use SPACK_OPENBLAS_BLAS_LAPACK opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none --mca btl vader,self opt-set-cmake-var CMAKE_CXX_FLAGS STRING FORCE : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-nonnull-compare -Wno-address -Wno-inline @@ -1809,11 +1807,9 @@ use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS -use SPACK_OPENBLAS_BLAS_LAPACK opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-nonnull-compare -Wno-address -Wno-inline -Wno-unused-but-set-variable -Wno-unused-label - opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : "" opt-set-cmake-var TPL_ENABLE_ParMETIS BOOL FORCE : OFF @@ -1982,7 +1978,7 @@ use USE-UVM|NO use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-EPETRA use CUDA -use SPACK_OPENBLAS_BLAS_LAPACK +use SPACK_SERIAL_OPENBLAS opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL : ON opt-set-cmake-var TPL_ENABLE_X11 BOOL : OFF @@ -2009,7 +2005,7 @@ use USE-UVM|YES use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-EPETRA use CUDA -use SPACK_OPENBLAS_BLAS_LAPACK +use SPACK_SERIAL_OPENBLAS opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF opt-set-cmake-var Kokkos_ENABLE_TESTS BOOL FORCE : ON From b31f7123507133873f4fe6449b936216bf7d5be6 Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Mon, 16 Dec 2024 20:09:04 -0700 Subject: [PATCH 28/33] Use default finds for BLAS and LAPACK Or rather, do not specify TPL__LIBRARIES for the default case. Signed-off-by: Samuel E. Browne --- packages/framework/ini-files/config-specs.ini | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index 413a97326dfd..b3a2b1a40726 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -393,8 +393,6 @@ opt-set-cmake-var Scotch_LIBRARY_DIRS PATH : ${SEMS_SCOTCH_LIBRARY_PATH|ENV} # Explicit libraries opt-set-cmake-var TPL_DLlib_LIBRARIES PATH : ${DL_LIBRARIES|ENV} opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING : ${SEMS_NETCDF_LIBRARY_PATH|ENV}/libnetcdf.so;${SEMS_NETCDF_LIBRARY_PATH|ENV}/libpnetcdf.a -opt-set-cmake-var TPL_BLAS_LIBRARIES STRING : ${BLAS_LIBRARIES|ENV} -opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING : ${LAPACK_LIBRARIES|ENV} [SPACK_NETLIB_BLAS_LAPACK] opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : -L${BLAS_ROOT|ENV}/lib;-lblas;-lgfortran;-lgomp From 3e07177acae0d1acde6147e3c71ad695eef0602b Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Wed, 18 Dec 2024 13:09:22 -0700 Subject: [PATCH 29/33] Rename AT2 builds to indicate production-readiness Change main workflow name from AT2-EXPERIMENTAL back to AT2. Mark those individual runs that are anticipated to be transitioned to required as no longer EXPERIMENTAL. Signed-off-by: Samuel E. Browne --- .github/workflows/AT2.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/AT2.yml b/.github/workflows/AT2.yml index d4575ba6457d..3c1d753bd0b4 100644 --- a/.github/workflows/AT2.yml +++ b/.github/workflows/AT2.yml @@ -1,4 +1,4 @@ -name: AT2-EXPERIMENTAL +name: AT2 on: pull_request: @@ -128,7 +128,7 @@ jobs: echo "https://github.com/trilinos/Trilinos/wiki/Containers" >> $GITHUB_STEP_SUMMARY echo "https://gitlab-ex.sandia.gov/trilinos-project/trilinos-containers/-/wikis/Containers-at-Sandia" >> $GITHUB_STEP_SUMMARY - gcc830-serial-EXPERIMENTAL: + gcc830: needs: pre-checks runs-on: [self-hosted, gcc-8.3.0_serial] if: ${{ needs.pre-checks.outputs.should_skip != 'true' && (github.event.action == 'synchronize' || github.event.action == 'opened' || github.event.review.state == 'APPROVED') }} @@ -314,7 +314,7 @@ jobs: echo "https://github.com/trilinos/Trilinos/wiki/Containers" >> $GITHUB_STEP_SUMMARY echo "https://gitlab-ex.sandia.gov/trilinos-project/trilinos-containers/-/wikis/Containers-at-Sandia" >> $GITHUB_STEP_SUMMARY - framework-tests-EXPERIMENTAL: + framework-tests: needs: pre-checks runs-on: [self-hosted, python-3.9] if: ${{ needs.pre-checks.outputs.should_skip != 'true' && (github.event.action == 'synchronize' || github.event.action == 'opened' || github.event.review.state == 'APPROVED') }} From 0bac0656115507c7f08972ece5fb42fa10eb2f2d Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Tue, 17 Dec 2024 11:42:10 -0700 Subject: [PATCH 30/33] Snapshot of kokkos-kernels.git from commit 42593705e42e661fe68151415df607cbf6f89e47 From repository at git@github.com:kokkos/kokkos-kernels.git At commit: commit 42593705e42e661fe68151415df607cbf6f89e47 Author: Nathan Ellingwood Date: Tue Dec 17 11:39:48 2024 -0700 update master_history.txt Signed-off-by: Nathan Ellingwood --- packages/kokkos-kernels/CHANGELOG.md | 6 ++++++ packages/kokkos-kernels/CMakeLists.txt | 4 ++-- packages/kokkos-kernels/master_history.txt | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/packages/kokkos-kernels/CHANGELOG.md b/packages/kokkos-kernels/CHANGELOG.md index 58695228e4ef..37c25f8525fe 100644 --- a/packages/kokkos-kernels/CHANGELOG.md +++ b/packages/kokkos-kernels/CHANGELOG.md @@ -1,5 +1,11 @@ # Change Log +## [4.5.01](https://github.com/kokkos/kokkos-kernels/tree/4.5.01) +[Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/4.5.00...4.5.01) + +### Bug Fixes: +- Fix the package version [\#2460](https://github.com/kokkos/kokkos-kernels/pull/2460) + ## [4.5.00](https://github.com/kokkos/kokkos-kernels/tree/4.5.00) [Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/4.4.01...4.5.00) diff --git a/packages/kokkos-kernels/CMakeLists.txt b/packages/kokkos-kernels/CMakeLists.txt index c766cdf18713..5c8f6b0dc955 100644 --- a/packages/kokkos-kernels/CMakeLists.txt +++ b/packages/kokkos-kernels/CMakeLists.txt @@ -11,7 +11,7 @@ SET(KOKKOSKERNELS_TOP_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) SET(KokkosKernels_VERSION_MAJOR 4) SET(KokkosKernels_VERSION_MINOR 5) -SET(KokkosKernels_VERSION_PATCH 0) +SET(KokkosKernels_VERSION_PATCH 1) SET(KokkosKernels_VERSION "${KokkosKernels_VERSION_MAJOR}.${KokkosKernels_VERSION_MINOR}.${KokkosKernels_VERSION_PATCH}") #Set variables for config file @@ -32,7 +32,7 @@ IF(NOT KOKKOSKERNELS_HAS_TRILINOS) ENDIF() ENDIF() IF(NOT DEFINED ${PROJECT_NAME}) - PROJECT(KokkosKernels CXX) + PROJECT(KokkosKernels VERSION ${KokkosKernels_VERSION} LANGUAGES CXX) ENDIF() ENDIF() diff --git a/packages/kokkos-kernels/master_history.txt b/packages/kokkos-kernels/master_history.txt index a02c157740d6..ef872959d470 100644 --- a/packages/kokkos-kernels/master_history.txt +++ b/packages/kokkos-kernels/master_history.txt @@ -29,3 +29,4 @@ tag: 4.3.01 date: 05/07/2024 master: 1b0a15f5 release: 58785c1b tag: 4.4.00 date: 08/08/2024 master: d1a91b8a release: 1145f529 tag: 4.4.01 date: 09/12/2024 master: 0608a337 release: 6b340287 tag: 4.5.00 date: 11/11/2024 master: 0b43169e release: 4a7590af +tag: 4.5.01 date: 12/17/2024 master: 957ac849 release: 0b3d5a3b From a748c40f7fc159a6c27b43a10066182121fabeac Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Thu, 19 Dec 2024 11:27:29 -0700 Subject: [PATCH 31/33] Snapshot of kokkos.git from commit ff977240c493f8b5a170cb81c7dc111cd2fb1861 From repository at git@github.com:kokkos/kokkos.git At commit: commit ff977240c493f8b5a170cb81c7dc111cd2fb1861 Author: Nathan Ellingwood Date: Thu Dec 19 11:22:34 2024 -0700 update master_history.txt Signed-off-by: Nathan Ellingwood --- packages/kokkos/CHANGELOG.md | 11 ++ packages/kokkos/CMakeLists.txt | 2 +- packages/kokkos/Makefile.kokkos | 11 +- packages/kokkos/README.md | 6 +- .../containers/src/Kokkos_DynRankView.hpp | 127 +++++++++++++++++- .../kokkos/core/unit_test/TestAtomicViews.hpp | 8 +- .../core/unit_test/TestViewBadAlloc.hpp | 6 + packages/kokkos/master_history.txt | 1 + packages/kokkos/scripts/docker/Dockerfile.gcc | 2 +- .../kokkos/scripts/docker/Dockerfile.hipcc | 2 +- .../docker/Dockerfile.kokkosllvmproject | 2 +- .../kokkos/scripts/docker/Dockerfile.nvcc | 2 +- .../kokkos/scripts/docker/Dockerfile.nvhpc | 2 +- .../scripts/docker/Dockerfile.openmptarget | 2 +- .../kokkos/scripts/docker/Dockerfile.sycl | 4 +- .../experimental/__p0009_bits/config.hpp | 8 +- .../__p2630_bits/submdspan_mapping.hpp | 17 ++- 17 files changed, 182 insertions(+), 31 deletions(-) diff --git a/packages/kokkos/CHANGELOG.md b/packages/kokkos/CHANGELOG.md index 6c237ebca867..84bbd03585bd 100644 --- a/packages/kokkos/CHANGELOG.md +++ b/packages/kokkos/CHANGELOG.md @@ -1,5 +1,16 @@ # CHANGELOG +## 4.5.01 + +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.5.00...4.5.01) + +### Bug Fixes + +* Fix re-builds after cleaning the binary tree when doing `add_subdirectory` on the Kokkos source [\#7557](https://github.com/kokkos/kokkos/pull/7557) +* Update mdspan to include fix for submdspan and bracket operator with clang 15&16 [\#7559](https://github.com/kokkos/kokkos/pull/7559) +* Fix DynRankView performance regression by re-introducing shortcut operator() impls [\#7606](https://github.com/kokkos/kokkos/pull/7606) +* Add missing MI300A (`GFX942_APU`) option to Makefile build-system + ## 4.5.00 [Full Changelog](https://github.com/kokkos/kokkos/compare/4.4.01...4.5.00) diff --git a/packages/kokkos/CMakeLists.txt b/packages/kokkos/CMakeLists.txt index f0bf8e3634a9..6a70bea14973 100644 --- a/packages/kokkos/CMakeLists.txt +++ b/packages/kokkos/CMakeLists.txt @@ -149,7 +149,7 @@ endif() set(Kokkos_VERSION_MAJOR 4) set(Kokkos_VERSION_MINOR 5) -set(Kokkos_VERSION_PATCH 0) +set(Kokkos_VERSION_PATCH 1) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") message(STATUS "Kokkos version: ${Kokkos_VERSION}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") diff --git a/packages/kokkos/Makefile.kokkos b/packages/kokkos/Makefile.kokkos index 9e6ad3241564..f67eadf241f3 100644 --- a/packages/kokkos/Makefile.kokkos +++ b/packages/kokkos/Makefile.kokkos @@ -2,7 +2,7 @@ KOKKOS_VERSION_MAJOR = 4 KOKKOS_VERSION_MINOR = 5 -KOKKOS_VERSION_PATCH = 0 +KOKKOS_VERSION_PATCH = 1 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial @@ -13,7 +13,7 @@ KOKKOS_DEVICES ?= "Threads" # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90 # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace # IBM: Power8,Power9 -# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103 +# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX942_APU,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 # Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC KOKKOS_ARCH ?= "" @@ -454,6 +454,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 0) endif KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942_APU) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030) @@ -468,6 +469,7 @@ KOKKOS_INTERNAL_USE_ARCH_AMD := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX9 + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942) \ + + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103)) @@ -1196,6 +1198,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx942\"") KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx942 endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX942_APU") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx942\"") + KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx942 +endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1030") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx1030\"") diff --git a/packages/kokkos/README.md b/packages/kokkos/README.md index 0ea07f9ea2f6..56159b35c29b 100644 --- a/packages/kokkos/README.md +++ b/packages/kokkos/README.md @@ -30,12 +30,12 @@ To start learning about Kokkos: The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest). -The current release is [4.5.00](https://github.com/kokkos/kokkos/releases/tag/4.5.00). +The current release is [4.5.01](https://github.com/kokkos/kokkos/releases/tag/4.5.01). ```bash -curl -OJ -L https://github.com/kokkos/kokkos/releases/download/4.5.00/kokkos-4.5.00.tar.gz +curl -OJ -L https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.tar.gz # Or with wget -wget https://github.com/kokkos/kokkos/releases/download/4.5.00/kokkos-4.5.00.tar.gz +wget https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.tar.gz ``` To clone the latest development version of Kokkos from GitHub: diff --git a/packages/kokkos/containers/src/Kokkos_DynRankView.hpp b/packages/kokkos/containers/src/Kokkos_DynRankView.hpp index 2f2f4433e7ca..b8603595264c 100644 --- a/packages/kokkos/containers/src/Kokkos_DynRankView.hpp +++ b/packages/kokkos/containers/src/Kokkos_DynRankView.hpp @@ -524,7 +524,10 @@ class DynRankView : private View { std::is_same_v, is_default_map = std::is_void_v && - (is_layout_left || is_layout_right || is_layout_stride) + (is_layout_left || is_layout_right || is_layout_stride), + + is_default_access = + is_default_map && std::is_same_v }; // Bounds checking macros @@ -574,12 +577,134 @@ class DynRankView : private View { using view_type::stride_7; // FIXME: not tested using view_type::use_count; +#ifdef KOKKOS_ENABLE_CUDA KOKKOS_FUNCTION reference_type operator()(index_type i0 = 0, index_type i1 = 0, index_type i2 = 0, index_type i3 = 0, index_type i4 = 0, index_type i5 = 0, index_type i6 = 0) const { return view_type::operator()(i0, i1, i2, i3, i4, i5, i6); } +#else + // Adding shortcut operators for rank-0 to rank-3 for default layouts + // and access modalities. + // This removes performance overhead for always using rank-7 mapping. + // See https://github.com/kokkos/kokkos/issues/7604 + // When boundschecking is enabled we still go through the underlying + // rank-7 View to leverage the error checks there. + + KOKKOS_FUNCTION reference_type operator()() const { +#ifdef KOKKOS_ENABLE_DEBUG + if (rank() != 0u) + Kokkos::abort( + "DynRankView rank 0 operator() called with invalid number of " + "arguments."); +#endif +#ifndef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK + if constexpr (is_default_access) { + return view_type::data()[0]; + } else +#endif + return view_type::operator()(0, 0, 0, 0, 0, 0, 0); + } + + KOKKOS_FUNCTION reference_type operator()(index_type i0) const { +#ifdef KOKKOS_ENABLE_DEBUG + // FIXME: Should be equal, only access(...) allows mismatch of rank and + // index args + if (rank() > 1u) + Kokkos::abort( + "DynRankView rank 1 operator() called with invalid number of " + "arguments."); +#endif +#ifndef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK + if constexpr (is_default_access) { + if constexpr (is_layout_stride) { + return view_type::data()[i0 * view_type::stride(0)]; + } else { + return view_type::data()[i0]; + } + } else +#endif + return view_type::operator()(i0, 0, 0, 0, 0, 0, 0); +#if defined KOKKOS_COMPILER_INTEL || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif + } + + KOKKOS_FUNCTION reference_type operator()(index_type i0, + index_type i1) const { +#ifdef KOKKOS_ENABLE_DEBUG + // FIXME: Should be equal, only access(...) allows mismatch of rank and + // index args + if (rank() > 2u) + Kokkos::abort( + "DynRankView rank 2 operator() called with invalid number of " + "arguments."); +#endif +#ifndef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK + if constexpr (is_default_access) { + if constexpr (is_layout_left) { + return view_type::data()[i0 + i1 * view_type::stride(1)]; + } else if constexpr (is_layout_right) { + return view_type::data()[i0 * view_type::extent(1) + i1]; + } else { + return view_type::data()[i0 * view_type::stride(0) + + i1 * view_type::stride(1)]; + } + } else +#endif + return view_type::operator()(i0, i1, 0, 0, 0, 0, 0); +#if defined KOKKOS_COMPILER_INTEL || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif + } + + KOKKOS_FUNCTION reference_type operator()(index_type i0, index_type i1, + index_type i2) const { +#ifdef KOKKOS_ENABLE_DEBUG + // FIXME: Should be equal, only access(...) allows mismatch of rank and + // index args + if (rank() > 3u) + Kokkos::abort( + "DynRankView rank 3 operator() called with invalid number of " + "arguments."); +#endif +#ifndef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK + if constexpr (is_default_access) { + if constexpr (is_layout_left) { + return view_type::data()[i0 + view_type::stride(1) * + (i1 + i2 * view_type::extent(1))]; + } else if constexpr (is_layout_right) { + return view_type::data()[(i0 * view_type::extent(1) + i1) * + view_type::extent(2) + + i2]; + } else { + return view_type::data()[i0 * view_type::stride(0) + + i1 * view_type::stride(1) + + i2 * view_type::stride(2)]; + } + } else +#endif + return view_type::operator()(i0, i1, i2, 0, 0, 0, 0); +#if defined KOKKOS_COMPILER_INTEL || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif + } + + KOKKOS_FUNCTION reference_type operator()(index_type i0, index_type i1, + index_type i2, index_type i3, + index_type i4 = 0, + index_type i5 = 0, + index_type i6 = 0) const { + return view_type::operator()(i0, i1, i2, i3, i4, i5, i6); + } +#endif // This is an accomodation for Phalanx, that is usint the operator[] to access // all elements in a linear fashion even when the rank is not 1 diff --git a/packages/kokkos/core/unit_test/TestAtomicViews.hpp b/packages/kokkos/core/unit_test/TestAtomicViews.hpp index fa72e0b0cfdb..55ff62822bcc 100644 --- a/packages/kokkos/core/unit_test/TestAtomicViews.hpp +++ b/packages/kokkos/core/unit_test/TestAtomicViews.hpp @@ -1065,13 +1065,9 @@ T AndEqualAtomicViewCheck(const int64_t input_length) { const int64_t N = input_length; T result[2] = {1}; for (int64_t i = 0; i < N; ++i) { - if (N % 2 == 0) { - result[0] &= (T)i; - } else { - result[1] &= (T)i; - } + int64_t idx = N % 2; + result[idx] &= (T)i; } - return (result[0]); } diff --git a/packages/kokkos/core/unit_test/TestViewBadAlloc.hpp b/packages/kokkos/core/unit_test/TestViewBadAlloc.hpp index c876ceb787e8..1707a9d5d25d 100644 --- a/packages/kokkos/core/unit_test/TestViewBadAlloc.hpp +++ b/packages/kokkos/core/unit_test/TestViewBadAlloc.hpp @@ -66,6 +66,12 @@ TEST(TEST_CATEGORY, view_bad_alloc) { } #endif +#if defined(_WIN32) && defined(KOKKOS_ENABLE_CUDA) + if (std::is_same_v) { + GTEST_SKIP() << "MSVC/CUDA segfaults when allocating too much memory"; + } +#endif + test_view_bad_alloc(); constexpr bool execution_space_is_device = diff --git a/packages/kokkos/master_history.txt b/packages/kokkos/master_history.txt index 3f9e4c6e159a..c9e454c1af0f 100644 --- a/packages/kokkos/master_history.txt +++ b/packages/kokkos/master_history.txt @@ -40,3 +40,4 @@ tag: 4.3.01 date: 05:07:2024 master: 486cc745 release: 262d2d6e tag: 4.4.00 date: 08:08:2024 master: 6ecdf605 release: 6068673c tag: 4.4.01 date: 09:12:2024 master: 08ceff92 release: 2d60c039 tag: 4.5.00 date: 11:11:2024 master: 15dc143e release: 5164f2f6 +tag: 4.5.01 date: 12:19:2024 master: 09e775bf release: e0d656f9 diff --git a/packages/kokkos/scripts/docker/Dockerfile.gcc b/packages/kokkos/scripts/docker/Dockerfile.gcc index b93c7452b09c..3bca9834b524 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.gcc +++ b/packages/kokkos/scripts/docker/Dockerfile.gcc @@ -18,7 +18,7 @@ RUN echo "deb http://dk.archive.ubuntu.com/ubuntu/ xenial main" >> /etc/apt/sour apt-get clean && rm -rf /var/lib/apt/lists/* -RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \ +RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \ KEYDUMP_FILE=keydump && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.hipcc b/packages/kokkos/scripts/docker/Dockerfile.hipcc index 909c6a3d25f8..f8d3851d749b 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.hipcc +++ b/packages/kokkos/scripts/docker/Dockerfile.hipcc @@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y \ ENV PATH=/opt/rocm/bin:$PATH -RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \ +RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \ KEYDUMP_FILE=keydump && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject b/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject index 7f4af6468d30..2a29e0041c4c 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject +++ b/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject @@ -22,7 +22,7 @@ RUN apt-get update && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \ +RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \ KEYDUMP_FILE=keydump && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.nvcc b/packages/kokkos/scripts/docker/Dockerfile.nvcc index 11e926fe091f..e87f7dcb055b 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.nvcc +++ b/packages/kokkos/scripts/docker/Dockerfile.nvcc @@ -14,7 +14,7 @@ RUN apt-get update && apt-get install -y \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \ +RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \ KEYDUMP_FILE=keydump && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.nvhpc b/packages/kokkos/scripts/docker/Dockerfile.nvhpc index 88e59de2827a..5f611e98d847 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.nvhpc +++ b/packages/kokkos/scripts/docker/Dockerfile.nvhpc @@ -1,7 +1,7 @@ ARG BASE=nvcr.io/nvidia/nvhpc:23.7-devel-cuda12.2-ubuntu20.04 FROM $BASE -RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \ +RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \ KEYDUMP_FILE=keydump && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.openmptarget b/packages/kokkos/scripts/docker/Dockerfile.openmptarget index a555b29dd8fb..0d278972aef7 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.openmptarget +++ b/packages/kokkos/scripts/docker/Dockerfile.openmptarget @@ -15,7 +15,7 @@ RUN apt-get update && apt-get install -y \ ARG NPROC=8 -RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \ +RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \ KEYDUMP_FILE=keydump && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.sycl b/packages/kokkos/scripts/docker/Dockerfile.sycl index b2d4ab8a9e67..1e653e0878c7 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.sycl +++ b/packages/kokkos/scripts/docker/Dockerfile.sycl @@ -15,7 +15,7 @@ RUN apt-get update && apt-get install -y \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \ +RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \ KEYDUMP_FILE=keydump && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \ @@ -46,7 +46,7 @@ RUN wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCT apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN wget https://cloud.cees.ornl.gov/download/oneapi-for-nvidia-gpus-2023.0.0-linux.sh && \ +RUN wget https://cloud1.cees.ornl.gov/download/oneapi-for-nvidia-gpus-2023.0.0-linux.sh && \ echo "3416721faf83e5858e65795231bae47bb51ff91d4e8738613d498674f1636f72 oneapi-for-nvidia-gpus-2023.0.0-linux.sh" | sha256sum --check && \ chmod +x oneapi-for-nvidia-gpus-2023.0.0-linux.sh && \ ./oneapi-for-nvidia-gpus-2023.0.0-linux.sh -y && \ diff --git a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/config.hpp b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/config.hpp index 24166462e7ab..e8cacf40d601 100644 --- a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/config.hpp +++ b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/config.hpp @@ -240,7 +240,13 @@ static_assert(_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_14, "mdspan requires C++14 or #ifndef MDSPAN_USE_BRACKET_OPERATOR # if defined(__cpp_multidimensional_subscript) -# define MDSPAN_USE_BRACKET_OPERATOR 1 +// The following if/else is necessary to workaround a clang issue +// relative to using a parameter pack inside a bracket operator in C++2b/C++23 mode +# if defined(_MDSPAN_COMPILER_CLANG) && ((__clang_major__ == 15) || (__clang_major__ == 16)) +# define MDSPAN_USE_BRACKET_OPERATOR 0 +# else +# define MDSPAN_USE_BRACKET_OPERATOR 1 +# endif # else # define MDSPAN_USE_BRACKET_OPERATOR 0 # endif diff --git a/packages/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp b/packages/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp index 2a2cdf76b923..46ccbaadebe0 100644 --- a/packages/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/packages/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -252,7 +252,7 @@ layout_left::mapping::submdspan_mapping_impl( *this, inv_map, // HIP needs deduction guides to have markups so we need to be explicit // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have -// the issue But Clang-CUDA also doesn't accept the use of deduction guide so +// the issue but Clang-CUDA also doesn't accept the use of deduction guide so // disable it for CUDA altogether #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) detail::tuple{ @@ -330,7 +330,7 @@ MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::mapping{ @@ -485,7 +485,7 @@ layout_right::mapping::submdspan_mapping_impl( *this, inv_map, // HIP needs deduction guides to have markups so we need to be explicit // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have -// the issue But Clang-CUDA also doesn't accept the use of deduction guide so +// the issue but Clang-CUDA also doesn't accept the use of deduction guide so // disable it for CUDA altogether #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple{ @@ -555,7 +555,7 @@ MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::mapping{ @@ -603,12 +603,11 @@ layout_stride::mapping::submdspan_mapping_impl( *this, inv_map, // HIP needs deduction guides to have markups so we need to be explicit // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have -// the issue -#if defined(_MDSPAN_HAS_HIP) || \ - (defined(__NVCC__) && \ - (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10) < 1120) +// the issue but Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA alltogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple( - detail::stride_of(slices)...).values)), + detail::stride_of(slices)...)).values), #else MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple(detail::stride_of(slices)...)).values), #endif From d78b31dd7c668cf548696c963200d5be27dd475d Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Tue, 17 Dec 2024 11:42:51 -0700 Subject: [PATCH 32/33] tpetra: update supported kokkos version to 4.5.1 Signed-off-by: Nathan Ellingwood --- packages/tpetra/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tpetra/CMakeLists.txt b/packages/tpetra/CMakeLists.txt index 4c470608119f..f2f928cfca35 100644 --- a/packages/tpetra/CMakeLists.txt +++ b/packages/tpetra/CMakeLists.txt @@ -24,7 +24,7 @@ TRIBITS_ADD_OPTION_AND_DEFINE( # Supported Kokkos version in Trilinos # NOTE: When we snapshot Kokkos into Trilinos, we have to update these numbers to maintain # compatibility with external Kokkos -SET(Tpetra_SUPPORTED_KOKKOS_VERSION "4.5.0") +SET(Tpetra_SUPPORTED_KOKKOS_VERSION "4.5.1") # Option to allow developers to ignore incompatible Kokkos versions From 7f976674d62c2ba874bc271da5529cfc679286ad Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Thu, 19 Dec 2024 19:25:26 -0700 Subject: [PATCH 33/33] intrepid2: disable 7 tests with intel-2021.3 job The following tests have diff vs tol issues in the intel-2021.3 job Intrepid2_unit-test_Discretization_Basis_HCURL_TET_In_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1 Intrepid2_unit-test_Discretization_Basis_HCURL_TRI_In_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1 Intrepid2_unit-test_Discretization_Basis_HDIV_TET_In_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1 Intrepid2_unit-test_Discretization_Basis_HDIV_TRI_In_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1 Intrepid2_unit-test_Discretization_Basis_HGRAD_TET_Cn_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1 Intrepid2_unit-test_Discretization_Basis_HGRAD_TRI_Cn_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1 Intrepid2_unit-test_Discretization_Basis_HVOL_TET_Cn_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1 Signed-off-by: Nathan Ellingwood --- packages/framework/ini-files/config-specs.ini | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index a59a18d50591..8947d5e75156 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -1417,6 +1417,15 @@ opt-set-cmake-var Tempus_IMEX_RK_Staggered_FSA_Tangent_MPI_1_DISABLE BOOL FORCE opt-set-cmake-var Tempus_Newmark_MPI_1_DISABLE BOOL FORCE : ON opt-set-cmake-var Tempus_Test_NewmarkImplicitAForm_HarmonicOscillator_Damped_FirstOrder_MPI_1_DISABLE BOOL FORCE : ON +# These intrepid2 tests have diff vs tol issues with this job , will be reevaluated +opt-set-cmake-var Intrepid2_unit-test_Discretization_Basis_HCURL_TET_In_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1_DISABLE BOOL FORCE : ON +opt-set-cmake-var Intrepid2_unit-test_Discretization_Basis_HCURL_TRI_In_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1_DISABLE BOOL FORCE : ON +opt-set-cmake-var Intrepid2_unit-test_Discretization_Basis_HDIV_TET_In_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1_DISABLE BOOL FORCE : ON +opt-set-cmake-var Intrepid2_unit-test_Discretization_Basis_HDIV_TRI_In_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1_DISABLE BOOL FORCE : ON +opt-set-cmake-var Intrepid2_unit-test_Discretization_Basis_HGRAD_TET_Cn_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1_DISABLE BOOL FORCE : ON +opt-set-cmake-var Intrepid2_unit-test_Discretization_Basis_HGRAD_TRI_Cn_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1_DISABLE BOOL FORCE : ON +opt-set-cmake-var Intrepid2_unit-test_Discretization_Basis_HVOL_TET_Cn_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1_DISABLE BOOL FORCE : ON + opt-set-cmake-var Trilinos_ENABLE_TrilinosFrameworkTests BOOL FORCE : OFF opt-set-cmake-var Trilinos_ENABLE_TrilinosBuildStats BOOL FORCE : OFF