diff --git a/.github/workflows/AT2.yml b/.github/workflows/AT2.yml index d4575ba6457d..8855f236ee1f 100644 --- a/.github/workflows/AT2.yml +++ b/.github/workflows/AT2.yml @@ -1,4 +1,4 @@ -name: AT2-EXPERIMENTAL +name: AT2 on: pull_request: @@ -74,7 +74,7 @@ jobs: - name: get dependencies working-directory: ./packages/framework run: | - bash -l -c "./get_dependencies.sh --container" + bash -l -c "./get_dependencies.sh" - name: PullRequestLinuxDriverTest.py shell: bash -l {0} working-directory: /home/Trilinos/build @@ -128,7 +128,7 @@ jobs: echo "https://github.com/trilinos/Trilinos/wiki/Containers" >> $GITHUB_STEP_SUMMARY echo "https://gitlab-ex.sandia.gov/trilinos-project/trilinos-containers/-/wikis/Containers-at-Sandia" >> $GITHUB_STEP_SUMMARY - gcc830-serial-EXPERIMENTAL: + gcc830: needs: pre-checks runs-on: [self-hosted, gcc-8.3.0_serial] if: ${{ needs.pre-checks.outputs.should_skip != 'true' && (github.event.action == 'synchronize' || github.event.action == 'opened' || github.event.review.state == 'APPROVED') }} @@ -167,7 +167,7 @@ jobs: - name: get dependencies working-directory: ./packages/framework run: | - bash -l -c "./get_dependencies.sh --container" + bash -l -c "./get_dependencies.sh" - name: PullRequestLinuxDriverTest.py shell: bash -l {0} working-directory: /home/Trilinos/build @@ -260,7 +260,7 @@ jobs: - name: get dependencies working-directory: ./packages/framework run: | - bash -l -c "./get_dependencies.sh --container" + bash -l -c "./get_dependencies.sh" - name: PullRequestLinuxDriverTest.py shell: bash -l {0} working-directory: /home/Trilinos/build @@ -314,7 +314,7 @@ jobs: echo "https://github.com/trilinos/Trilinos/wiki/Containers" >> $GITHUB_STEP_SUMMARY echo "https://gitlab-ex.sandia.gov/trilinos-project/trilinos-containers/-/wikis/Containers-at-Sandia" >> $GITHUB_STEP_SUMMARY - framework-tests-EXPERIMENTAL: + framework-tests: needs: pre-checks runs-on: [self-hosted, python-3.9] if: ${{ needs.pre-checks.outputs.should_skip != 'true' && (github.event.action == 'synchronize' || github.event.action == 'opened' || github.event.review.state == 'APPROVED') }} @@ -353,7 +353,7 @@ jobs: - name: get dependencies working-directory: ./packages/framework run: | - bash -l -c "./get_dependencies.sh --container" + bash -l -c "./get_dependencies.sh" - name: PullRequestLinuxDriverTest.py shell: bash -l {0} working-directory: /home/Trilinos/build diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 5a5e701def17..95f3c5b21fbe 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -45,7 +45,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Initialize CodeQL - uses: github/codeql-action/init@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6 + uses: github/codeql-action/init@df409f7d9260372bd5f19e5b04e83cb3c43714ae # v3.27.9 with: languages: ${{ matrix.language }} build-mode: ${{ matrix.build-mode }} @@ -108,6 +108,6 @@ jobs: ninja -j 16 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6 + uses: github/codeql-action/analyze@df409f7d9260372bd5f19e5b04e83cb3c43714ae # v3.27.9 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 88c2a1fcf484..cbe8a034c3a2 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -66,6 +66,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6 + uses: github/codeql-action/upload-sarif@df409f7d9260372bd5f19e5b04e83cb3c43714ae # v3.27.9 with: sarif_file: results.sarif diff --git a/packages/framework/get_dependencies.sh b/packages/framework/get_dependencies.sh index 74c26e4eb601..956857ecaf50 100755 --- a/packages/framework/get_dependencies.sh +++ b/packages/framework/get_dependencies.sh @@ -3,7 +3,8 @@ ini_file_option=$1 script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)" # Data that needs to be updated when GenConfig changes! -genconfig_sha1=924a08af66f0a0573b5dd1128179731489339aec +genconfig_sha1=88c44e347c0377a170ec9ca45a47732a9630b4ec + # The following code contains no changing data @@ -70,15 +71,10 @@ function tril_genconfig_clone_or_update_repo() { popd &> /dev/null } -# Clone or update the repos -if [[ "$ini_file_option" == "--container" ]] ; then - echo "In a container it is assumed that GenConfig is already in the container at /GenConfig" -else - #Clone GenConfig from gitlab-ex - tril_genconfig_clone_or_update_repo \ - git@gitlab-ex.sandia.gov:trilinos-devops-consolidation/code/GenConfig.git \ - GenConfig has-submodules ${genconfig_sha1} -fi +# Clone GenConfig from GitHub +tril_genconfig_clone_or_update_repo \ + https://github.com/sandialabs/GenConfig.git \ + GenConfig has-submodules ${genconfig_sha1} if [[ "$ini_file_option" == "--srn" ]] ; then #Clone srn-ini-files from cee-gitlab @@ -92,10 +88,6 @@ elif [[ "$ini_file_option" == "--son" ]] ; then git@gitlab-ex.sandia.gov:trilinos-project/son-ini-files.git \ son-ini-files -elif [[ "$ini_file_option" == "--container" ]] ; then - #Copy Genconfig into place from /GenConfig - cp -R /GenConfig ${script_dir} - elif [[ "$ini_file_option" != "" ]] ; then echo "ERROR: Option '${ini_file_option}' not allowed! Must select '--son', '--srn' or ''." exit 1 diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index a59a18d50591..e6fd483513dc 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -337,6 +337,7 @@ opt-set-cmake-var TPL_ENABLE_Zlib BOOL : ON opt-set-cmake-var TPL_ENABLE_HDF5 BOOL : ON opt-set-cmake-var TPL_ENABLE_Netcdf BOOL : ON opt-set-cmake-var TPL_ENABLE_SuperLU BOOL : ON +opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF opt-set-cmake-var Trilinos_TRACE_ADD_TEST BOOL : ON opt-set-cmake-var TPL_ENABLE_Scotch BOOL : ON @@ -399,6 +400,11 @@ opt-set-cmake-var TPL_BLAS_LIBRARY_DIRS STRING FORCE : ${BLAS_ROOT|ENV}/lib opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : -L${BLAS_ROOT|ENV}/lib;-llapack;-lgfortran;-lgomp opt-set-cmake-var TPL_LAPACK_LIBRARY_DIRS STRING FORCE : ${BLAS_ROOT|ENV}/lib +[SPACK_SERIAL_OPENBLAS] +# Static OpenBLAS, but shared m and gfortran +opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-lgfortran;-lm +opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-lgfortran;-lm + [COMMON_SPACK_TPLS] use COMMON @@ -440,6 +446,9 @@ opt-set-cmake-var SuperLU_INCLUDE_DIRS PATH FORCE : ${SUPERLU_INC|ENV} opt-set-cmake-var SuperLU_INCLUDE_DIRS STRING FORCE : ${SUPERLU_INC|ENV} opt-set-cmake-var SuperLU_LIBRARY_DIRS PATH FORCE : ${SUPERLU_LIB|ENV} opt-set-cmake-var SuperLU_LIBRARY_DIRS STRING FORCE : ${SUPERLU_LIB|ENV} +# FIXME: Wouldn't need this if we used find_package(superlu) +opt-set-cmake-var Trilinos_EXTRA_LINK_FLAGS STRING FORCE : -lm + # Metis opt-set-cmake-var TPL_METIS_LIBRARIES STRING FORCE : ${METIS_LIB|ENV}/libmetis.so @@ -460,8 +469,6 @@ use COMMON_SPACK_TPLS opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : "" opt-set-cmake-var TPL_HDF5_LIBRARIES STRING FORCE : "" -opt-set-cmake-var SuperLU_LIBRARY_NAMES STRING FORCE : superlu;m - [COMMON_USE-MPI|NO] use COMMON opt-set-cmake-var TPL_ENABLE_ParMETIS BOOL FORCE : OFF @@ -1181,7 +1188,7 @@ opt-set-cmake-var Trilinos_WARNINGS_MODE STRING : WARN [COMPILER|INTEL] opt-set-cmake-var MPI_EXEC FILEPATH : mpirun -[CUDA_TPL_ENABLES] +[SPACK_CUDA_TPLS] opt-set-cmake-var TPL_ENABLE_BLAS BOOL FORCE : ON opt-set-cmake-var TPL_ENABLE_BinUtils BOOL FORCE : OFF opt-set-cmake-var TPL_ENABLE_Boost BOOL FORCE : ON @@ -1206,16 +1213,12 @@ opt-set-cmake-var EpetraExt_ENABLE_HDF5 BOOL FORCE : OFF opt-set-cmake-var Kokkos_ENABLE_CUDA BOOL FORCE : ON opt-set-cmake-var Zoltan_ENABLE_Scotch BOOL FORCE : OFF -[SEMS_CUDA_TPL_LOCATIONS] opt-set-cmake-var TPL_BoostLib_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a opt-set-cmake-var TPL_Boost_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a opt-set-cmake-var TPL_DLlib_LIBRARIES FILEPATH FORCE : "-ldl" opt-set-cmake-var TPL_HDF5_LIBRARIES STRING FORCE : ${HDF5_LIB|ENV}/libhdf5_hl.so;${HDF5_LIB|ENV}/libhdf5.a;${ZLIB_LIB|ENV}/libz.a;-ldl opt-set-cmake-var TPL_METIS_LIBRARIES STRING FORCE : ${METIS_LIB|ENV}/libmetis.so opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : -L${NETCDF_C_ROOT|ENV}/lib64;${NETCDF_C_ROOT|ENV}/lib/libnetcdf.a;${PARALLEL_NETCDF_ROOT|ENV}/lib/libpnetcdf.a;${TPL_HDF5_LIBRARIES|CMAKE} -# see https://github.com/trilinos/Trilinos/issues/11109#issuecomment-1272146298 -opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : /lib64/libblas.so.3 -opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : /lib64/liblapack.so.3 opt-set-cmake-var Netcdf_INCLUDE_DIRS STRING FORCE : ${NETCDF_C_INC|ENV} opt-set-cmake-var ParMETIS_INCLUDE_DIRS STRING FORCE : ${PARMETIS_INC|ENV} @@ -1224,9 +1227,11 @@ opt-set-cmake-var Scotch_INCLUDE_DIRS STRING FORCE : ${SCOTCH_INC|ENV} opt-set-cmake-var Scotch_LIBRARY_DIRS STRING FORCE : ${SCOTCH_LIB|ENV} opt-set-cmake-var SuperLU_INCLUDE_DIRS STRING FORCE : ${SUPERLU_INC|ENV} opt-set-cmake-var SuperLU_LIBRARY_DIRS STRING FORCE : ${SUPERLU_LIB|ENV} +# FIXME: Wouldn't need this if we used find_package(superlu) +opt-set-cmake-var Trilinos_EXTRA_LINK_FLAGS STRING FORCE : -lm [CUDA] -use CUDA_TPL_ENABLES +use SPACK_CUDA_TPLS #CXX Settings opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -fPIC -Wall -Warray-bounds -Wchar-subscripts -Wcomment -Wenum-compare -Wformat -Wuninitialized -Wmaybe-uninitialized -Wmain -Wnarrowing -Wnonnull -Wreorder -Wreturn-type -Wsequence-point -Wtrigraphs -Wunused-function -Wunused-but-set-variable -Wwrite-strings @@ -1241,9 +1246,13 @@ opt-set-cmake-var Phalanx_KOKKOS_DEVICE_TYPE STRING FORCE : CUDA opt-set-cmake-var Sacado_ENABLE_HIERARCHICAL_DFAD BOOL FORCE : ON opt-set-cmake-var Tpetra_INST_SERIAL BOOL FORCE : ON -[SEMS_COMMON_CUDA] +use CUDA-RUN-SERIAL-TESTS + +[SEMS_CUDA] use CUDA -use SEMS_CUDA_TPL_LOCATIONS +# see https://github.com/trilinos/Trilinos/issues/11109#issuecomment-1272146298 +opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : /lib64/libblas.so.3 +opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : /lib64/liblapack.so.3 [CUDA-RUN-SERIAL-TESTS] opt-set-cmake-var Kokkos_CoreUnitTest_Cuda1_SET_RUN_SERIAL BOOL FORCE : ON @@ -1251,7 +1260,6 @@ opt-set-cmake-var Kokkos_CoreUnitTest_CudaTimingBased_SET_RUN_SERIAL BOOL FORCE opt-set-cmake-var Kokkos_CoreUnitTest_Default_SET_RUN_SERIAL BOOL FORCE : ON opt-set-cmake-var KokkosKernels_sparse_cuda_MPI_1_SET_RUN_SERIAL BOOL FORCE : ON opt-set-cmake-var KokkosKernels_batched_dla_cuda_MPI_1_SET_RUN_SERIAL BOOL FORCE : ON -opt-set-cmake-var Intrepid2_unit-test_MonolithicExecutable_Intrepid2_Tests_MPI_1_SET_RUN_SERIAL BOOL FORCE : ON # Full configurations intended to be loaded. @@ -1340,9 +1348,6 @@ opt-set-cmake-var Teko_DISABLE_LSCSTABALIZED_TPETRA_ALPAH_INV_D BOOL : ON use TEST_DISABLES|CLANG -opt-set-cmake-var SuperLU_LIBRARY_NAMES STRING : superlu;m -opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF - opt-set-cmake-var Pliris_vector_random_MPI_3_DISABLE BOOL : ON opt-set-cmake-var Pliris_vector_random_MPI_4_DISABLE BOOL : ON @@ -1381,10 +1386,6 @@ opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --b opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : -L${NETCDF_C_LIB|ENV}/lib;${NETCDF_C_LIB|ENV}/libnetcdf.so;${PARALLEL_NETCDF_LIB|ENV}/libpnetcdf.a opt-set-cmake-var TPL_HDF5_LIBRARIES STRING FORCE : ${HDF5_LIB|ENV}/libhdf5_hl.so;${HDF5_LIB|ENV}/libhdf5.a;${ZLIB_LIB|ENV}/libz.a;-ldl -# I get the following error if I do not disable ML_ENABLE_SuperLU: -# ML CONFIGURATION ERROR: SuperLU_5.0 detected - only SuperLU version < 5.0 currently supported for this package. -opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF - opt-set-cmake-var Zoltan_ch_simple_parmetis_parallel_DISABLE BOOL FORCE : ON opt-set-cmake-var Zoltan_ch_7944_parmetis_parallel_DISABLE BOOL FORCE : ON opt-set-cmake-var Zoltan_ch_simple_scotch_parallel_DISABLE BOOL FORCE : ON @@ -1417,6 +1418,15 @@ opt-set-cmake-var Tempus_IMEX_RK_Staggered_FSA_Tangent_MPI_1_DISABLE BOOL FORCE opt-set-cmake-var Tempus_Newmark_MPI_1_DISABLE BOOL FORCE : ON opt-set-cmake-var Tempus_Test_NewmarkImplicitAForm_HarmonicOscillator_Damped_FirstOrder_MPI_1_DISABLE BOOL FORCE : ON +# These intrepid2 tests have diff vs tol issues with this job , will be reevaluated +opt-set-cmake-var Intrepid2_unit-test_Discretization_Basis_HCURL_TET_In_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1_DISABLE BOOL FORCE : ON +opt-set-cmake-var Intrepid2_unit-test_Discretization_Basis_HCURL_TRI_In_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1_DISABLE BOOL FORCE : ON +opt-set-cmake-var Intrepid2_unit-test_Discretization_Basis_HDIV_TET_In_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1_DISABLE BOOL FORCE : ON +opt-set-cmake-var Intrepid2_unit-test_Discretization_Basis_HDIV_TRI_In_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1_DISABLE BOOL FORCE : ON +opt-set-cmake-var Intrepid2_unit-test_Discretization_Basis_HGRAD_TET_Cn_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1_DISABLE BOOL FORCE : ON +opt-set-cmake-var Intrepid2_unit-test_Discretization_Basis_HGRAD_TRI_Cn_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1_DISABLE BOOL FORCE : ON +opt-set-cmake-var Intrepid2_unit-test_Discretization_Basis_HVOL_TET_Cn_FEM_test_02_Serial_DOUBLE_DOUBLE_MPI_1_DISABLE BOOL FORCE : ON + opt-set-cmake-var Trilinos_ENABLE_TrilinosFrameworkTests BOOL FORCE : OFF opt-set-cmake-var Trilinos_ENABLE_TrilinosBuildStats BOOL FORCE : OFF @@ -1443,62 +1453,8 @@ use USE-UVM|NO use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS -use SEMS_COMMON_CUDA - -# TPL ENABLE/DISABLE settings -opt-set-cmake-var TPL_ENABLE_BLAS BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_BinUtils BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_Boost BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_CGNS BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_CUDA BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_CUSPARSE BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_DLlib BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_HDF5 BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_HWLOC BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_LAPACK BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_METIS BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_Matio BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_MPI BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_Netcdf BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_ParMETIS BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_Pthread BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_Scotch BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_SuperLU BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_SuperLUDist BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_Zlib BOOL FORCE : ON - -#TPL_*_LIBRARIES -opt-set-cmake-var TPL_BoostLib_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a -opt-set-cmake-var TPL_Boost_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a -opt-set-cmake-var TPL_DLlib_LIBRARIES FILEPATH FORCE : "-ldl" -opt-set-cmake-var TPL_HDF5_LIBRARIES STRING FORCE : ${HDF5_LIB|ENV}/libhdf5_hl.so;${HDF5_LIB|ENV}/libhdf5.a;${ZLIB_LIB|ENV}/libz.a;-ldl -opt-set-cmake-var TPL_METIS_LIBRARIES STRING FORCE : ${METIS_LIB|ENV}/libmetis.so -opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : -L${NETCDF_C_ROOT|ENV}/lib64;${NETCDF_C_ROOT|ENV}/lib/libnetcdf.a;${PARALLEL_NETCDF_ROOT|ENV}/lib/libpnetcdf.a;${TPL_HDF5_LIBRARIES|CMAKE} - -#TPL_[INCLUDE|LIBRARY]_DIRS -opt-set-cmake-var Netcdf_INCLUDE_DIRS STRING FORCE : ${NETCDF_C_INC|ENV} -opt-set-cmake-var ParMETIS_INCLUDE_DIRS STRING FORCE : ${PARMETIS_INC|ENV} -opt-set-cmake-var ParMETIS_LIBRARY_DIRS STRING FORCE : ${PARMETIS_LIB|ENV} -opt-set-cmake-var Scotch_INCLUDE_DIRS STRING FORCE : ${SCOTCH_INC|ENV} -opt-set-cmake-var Scotch_LIBRARY_DIRS STRING FORCE : ${SCOTCH_LIB|ENV} -opt-set-cmake-var SuperLU_INCLUDE_DIRS STRING FORCE : ${SUPERLU_INC|ENV} -opt-set-cmake-var SuperLU_LIBRARY_DIRS STRING FORCE : ${SUPERLU_LIB|ENV} - -#CXX Settings -opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -fPIC -Wall -Warray-bounds -Wchar-subscripts -Wcomment -Wenum-compare -Wformat -Wuninitialized -Wmaybe-uninitialized -Wmain -Wnarrowing -Wnonnull -Wreorder -Wreturn-type -Wsequence-point -Wtrigraphs -Wunused-function -Wunused-but-set-variable -Wwrite-strings +use SEMS_CUDA -#Package Options -opt-set-cmake-var EpetraExt_ENABLE_HDF5 BOOL FORCE : OFF -opt-set-cmake-var Kokkos_ENABLE_CUDA BOOL FORCE : ON -opt-set-cmake-var Kokkos_ENABLE_CUDA_LAMBDA BOOL FORCE : ON -opt-set-cmake-var Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA BOOL FORCE : ON -#opt-set-cmake-var Kokkos_ENABLE_Debug_Bounds_Check BOOL FORCE : ON -opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none -opt-set-cmake-var Panzer_FADTYPE STRING FORCE : "Sacado::Fad::DFad" -opt-set-cmake-var Phalanx_KOKKOS_DEVICE_TYPE STRING FORCE : CUDA -opt-set-cmake-var Sacado_ENABLE_HIERARCHICAL_DFAD BOOL FORCE : ON -opt-set-cmake-var Tpetra_INST_SERIAL BOOL FORCE : ON -opt-set-cmake-var Zoltan_ENABLE_Scotch BOOL FORCE : OFF opt-set-cmake-var Trilinos_ENABLE_TrilinosFrameworkTests BOOL FORCE : OFF opt-set-cmake-var Trilinos_ENABLE_TrilinosBuildStats BOOL FORCE : OFF @@ -1528,8 +1484,6 @@ opt-set-cmake-var Adelus_vector_random_npr4_rhs1_MPI_4_DISABLE BOOL : ON use PACKAGE-ENABLES|NO-EPETRA -use CUDA-RUN-SERIAL-TESTS - [rhel8_sems-cuda-11.4.2-sems-gnu-10.1.0-sems-openmpi-4.1.4_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_all] # uses sems-v2 modules use rhel8_sems-cuda-11.4.2-sems-gnu-10.1.0-sems-openmpi-4.1.4_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_no-package-enables @@ -1556,8 +1510,7 @@ use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use PACKAGE-ENABLES|NO-EPETRA use COMMON_SPACK_TPLS -use SEMS_COMMON_CUDA -use CUDA-RUN-SERIAL-TESTS +use SEMS_CUDA opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF @@ -1568,7 +1521,6 @@ use PACKAGE-ENABLES|ALL-NO-EPETRA [rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_all-no-epetra] use rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_all -use CUDA-RUN-SERIAL-TESTS opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : ON [rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_no-package-enables] @@ -1591,62 +1543,8 @@ use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS -use SEMS_COMMON_CUDA - -# TPL ENABLE/DISABLE settings -opt-set-cmake-var TPL_ENABLE_BLAS BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_BinUtils BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_Boost BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_CGNS BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_CUDA BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_CUSPARSE BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_DLlib BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_HDF5 BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_HWLOC BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_LAPACK BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_METIS BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_Matio BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_MPI BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_Netcdf BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_ParMETIS BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_Pthread BOOL FORCE : ON -opt-set-cmake-var TPL_ENABLE_Scotch BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_SuperLU BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_SuperLUDist BOOL FORCE : OFF -opt-set-cmake-var TPL_ENABLE_Zlib BOOL FORCE : ON +use SEMS_CUDA -#TPL_*_LIBRARIES -opt-set-cmake-var TPL_BoostLib_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a -opt-set-cmake-var TPL_Boost_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a -opt-set-cmake-var TPL_DLlib_LIBRARIES FILEPATH FORCE : "-ldl" -opt-set-cmake-var TPL_HDF5_LIBRARIES STRING FORCE : ${HDF5_LIB|ENV}/libhdf5_hl.so;${HDF5_LIB|ENV}/libhdf5.a;${ZLIB_LIB|ENV}/libz.a;-ldl -opt-set-cmake-var TPL_METIS_LIBRARIES STRING FORCE : ${METIS_LIB|ENV}/libmetis.so -opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : -L${NETCDF_C_ROOT|ENV}/lib64;${NETCDF_C_ROOT|ENV}/lib/libnetcdf.a;${PARALLEL_NETCDF_ROOT|ENV}/lib/libpnetcdf.a;${TPL_HDF5_LIBRARIES|CMAKE} - -#TPL_[INCLUDE|LIBRARY]_DIRS -opt-set-cmake-var Netcdf_INCLUDE_DIRS STRING FORCE : ${NETCDF_C_INC|ENV} -opt-set-cmake-var ParMETIS_INCLUDE_DIRS STRING FORCE : ${PARMETIS_INC|ENV} -opt-set-cmake-var ParMETIS_LIBRARY_DIRS STRING FORCE : ${PARMETIS_LIB|ENV} -opt-set-cmake-var Scotch_INCLUDE_DIRS STRING FORCE : ${SCOTCH_INC|ENV} -opt-set-cmake-var Scotch_LIBRARY_DIRS STRING FORCE : ${SCOTCH_LIB|ENV} -opt-set-cmake-var SuperLU_INCLUDE_DIRS STRING FORCE : ${SUPERLU_INC|ENV} -opt-set-cmake-var SuperLU_LIBRARY_DIRS STRING FORCE : ${SUPERLU_LIB|ENV} - -#CXX Settings -opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -fPIC -Wall -Warray-bounds -Wchar-subscripts -Wcomment -Wenum-compare -Wformat -Wuninitialized -Wmaybe-uninitialized -Wmain -Wnarrowing -Wnonnull -Wreorder -Wreturn-type -Wsequence-point -Wtrigraphs -Wunused-function -Wunused-but-set-variable -Wwrite-strings - -#Package Options -opt-set-cmake-var EpetraExt_ENABLE_HDF5 BOOL FORCE : OFF -opt-set-cmake-var Kokkos_ENABLE_CUDA BOOL FORCE : ON -opt-set-cmake-var Kokkos_ENABLE_CUDA_LAMBDA BOOL FORCE : ON -opt-set-cmake-var Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA BOOL FORCE : ON -#opt-set-cmake-var Kokkos_ENABLE_Debug_Bounds_Check BOOL FORCE : ON -opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none -opt-set-cmake-var Panzer_FADTYPE STRING FORCE : "Sacado::Fad::DFad" -opt-set-cmake-var Phalanx_KOKKOS_DEVICE_TYPE STRING FORCE : CUDA -opt-set-cmake-var Sacado_ENABLE_HIERARCHICAL_DFAD BOOL FORCE : ON -opt-set-cmake-var Tpetra_INST_SERIAL BOOL FORCE : ON -opt-set-cmake-var Zoltan_ENABLE_Scotch BOOL FORCE : OFF opt-set-cmake-var Trilinos_ENABLE_TrilinosFrameworkTests BOOL FORCE : OFF opt-set-cmake-var Trilinos_ENABLE_TrilinosBuildStats BOOL FORCE : OFF @@ -1675,8 +1573,6 @@ opt-set-cmake-var Adelus_vector_random_npr4_rhs1_MPI_4_DISABLE BOOL : ON use PACKAGE-ENABLES|NO-EPETRA -use CUDA-RUN-SERIAL-TESTS - [rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_all] # uses sems-v2 modules use rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_no-package-enables @@ -1702,8 +1598,6 @@ use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS use SPACK_NETLIB_BLAS_LAPACK -opt-set-cmake-var SuperLU_LIBRARY_NAMES STRING : superlu;m -opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF opt-set-cmake-var TPL_ENABLE_ParMETIS BOOL FORCE : OFF opt-set-cmake-var TPL_ENABLE_Scotch BOOL FORCE : OFF opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : ${NETCDF_C_LIB|ENV}/libnetcdf.so @@ -1744,10 +1638,7 @@ opt-set-cmake-var CMAKE_CXX_EXTENSIONS BOOL : O opt-set-cmake-var Teko_DISABLE_LSCSTABALIZED_TPETRA_ALPAH_INV_D BOOL : ON opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -fno-strict-aliasing -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-inline -Wno-nonnull-compare -Wno-address -# TPL_BLAS_LIBRARIES is redefined here with libm for SuperLU to properly link -opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : -L${BLAS_ROOT|ENV}/lib;-lblas;-lgfortran;-lgomp;-lm opt-set-cmake-var TPL_HDF5_LIBRARIES STRING : ${HDF5_LIB|ENV}/libhdf5_hl.so;${HDF5_LIB|ENV}/libhdf5.so;${ZLIB_LIB|ENV}/libz.so;-ldl -opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF [rhel8_sems-gnu-8.5.0-openmpi-4.1.6-openmp_release-debug_static_no-kokkos-arch_no-asan_no-complex_no-fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_all] use rhel8_sems-gnu-8.5.0-openmpi-4.1.6-openmp_release-debug_static_no-kokkos-arch_no-asan_no-complex_no-fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_no-package-enables @@ -1774,9 +1665,6 @@ use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS use SPACK_NETLIB_BLAS_LAPACK -opt-set-cmake-var SuperLU_LIBRARY_NAMES STRING FORCE : superlu;m -opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF - opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none --mca btl vader,self opt-set-cmake-var Teko_DISABLE_LSCSTABALIZED_TPETRA_ALPAH_INV_D BOOL : ON opt-set-cmake-var KokkosKernels_blas_serial_MPI_1_DISABLE BOOL : ON @@ -1886,7 +1774,6 @@ opt-set-cmake-var Amesos_ENABLE_SuperLU BOOL FORCE : OFF opt-set-cmake-var Amesos_ENABLE_SuperLUDist BOOL FORCE : OFF opt-set-cmake-var Amesos2_ENABLE_SuperLU BOOL FORCE : OFF opt-set-cmake-var Amesos2_ENABLE_SuperLUDist BOOL FORCE : OFF -opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF opt-set-cmake-var Teko_DISABLE_LSCSTABALIZED_TPETRA_ALPAH_INV_D BOOL : ON opt-set-cmake-var KokkosKernels_blas_serial_MPI_1_DISABLE BOOL : ON @@ -1937,9 +1824,6 @@ opt-set-cmake-var TPL_ENABLE_Pnetcdf BOOL FORCE : OFF opt-set-cmake-var Trilinos_ENABLE_TrilinosFrameworkTests BOOL FORCE : OFF opt-set-cmake-var Trilinos_ENABLE_TrilinosBuildStats BOOL FORCE : OFF -# Turned off to bypass: ML CONFIGURATION ERROR: SuperLU_5.0 detected - only SuperLU version < 5.0 currently supported for this package. -opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF - use RHEL8_POST [rhel8_aue-gcc-openmpi_debug_shared_no-kokkos-arch_no-asan_complex_no-fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_no-package-enables] @@ -1977,9 +1861,6 @@ opt-set-cmake-var TPL_ENABLE_SuperLUDist BOOL FORCE: OFF opt-set-cmake-var Trilinos_ENABLE_TrilinosFrameworkTests BOOL FORCE : OFF opt-set-cmake-var Trilinos_ENABLE_TrilinosBuildStats BOOL FORCE : OFF -# Turned off to bypass: ML CONFIGURATION ERROR: SuperLU_5.0 detected - only SuperLU version < 5.0 currently supported for this package. -opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF - use RHEL8_POST [rhel8_gcc-openmpi_debug_shared_no-kokkos-arch_no-asan_complex_no-fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_all] @@ -2037,6 +1918,7 @@ opt-set-cmake-var TPL_ENABLE_HDF5 BOOL : ON opt-set-cmake-var TPL_HDF5_LIBRARIES STRING : "${HDF5_LIB|ENV}/libhdf5_hl.so;${HDF5_LIB|ENV}/libhdf5.so;${ZLIB_LIB|ENV}/libz.so" opt-set-cmake-var TPL_ENABLE_Netcdf BOOL : ON opt-set-cmake-var TPL_ENABLE_SuperLU BOOL : ON +opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF opt-set-cmake-var TPL_ENABLE_Scotch BOOL : OFF opt-set-cmake-var CMAKE_C_COMPILER FILEPATH : ${MPICC|ENV} @@ -2061,7 +1943,6 @@ opt-set-cmake-var Trilinos_ENABLE_Komplex BOOL : OFF opt-set-cmake-var Trilinos_ENABLE_TriKota BOOL : OFF opt-set-cmake-var Trilinos_ENABLE_Moertel BOOL : OFF opt-set-cmake-var Trilinos_ENABLE_Domi BOOL : OFF -opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF [ubuntu_gnu_release-debug_shared_no-kokkos-arch_no-asan_no-complex_fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_all] use BUILD-TYPE|RELEASE-DEBUG @@ -2103,7 +1984,7 @@ use USE-UVM|NO use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-EPETRA use CUDA -use CUDA-RUN-SERIAL-TESTS +use SPACK_SERIAL_OPENBLAS opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL : ON opt-set-cmake-var TPL_ENABLE_X11 BOOL : OFF @@ -2130,7 +2011,7 @@ use USE-UVM|YES use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-EPETRA use CUDA -use CUDA-RUN-SERIAL-TESTS +use SPACK_SERIAL_OPENBLAS opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF opt-set-cmake-var Kokkos_ENABLE_TESTS BOOL FORCE : ON diff --git a/packages/intrepid2/unit-test/MonolithicExecutable/CMakeLists.txt b/packages/intrepid2/unit-test/MonolithicExecutable/CMakeLists.txt index 4a6393836293..0d47249d9ad8 100644 --- a/packages/intrepid2/unit-test/MonolithicExecutable/CMakeLists.txt +++ b/packages/intrepid2/unit-test/MonolithicExecutable/CMakeLists.txt @@ -13,6 +13,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( NUM_MPI_PROCS 1 PASS_REGULAR_EXPRESSION "TEST PASSED" ADD_DIR_TO_NAME + RUN_SERIAL ) # add single-group tests; allows for easier targeted builds and debugging (especially useful under CUDA) diff --git a/packages/kokkos-kernels/CHANGELOG.md b/packages/kokkos-kernels/CHANGELOG.md index 58695228e4ef..37c25f8525fe 100644 --- a/packages/kokkos-kernels/CHANGELOG.md +++ b/packages/kokkos-kernels/CHANGELOG.md @@ -1,5 +1,11 @@ # Change Log +## [4.5.01](https://github.com/kokkos/kokkos-kernels/tree/4.5.01) +[Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/4.5.00...4.5.01) + +### Bug Fixes: +- Fix the package version [\#2460](https://github.com/kokkos/kokkos-kernels/pull/2460) + ## [4.5.00](https://github.com/kokkos/kokkos-kernels/tree/4.5.00) [Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/4.4.01...4.5.00) diff --git a/packages/kokkos-kernels/CMakeLists.txt b/packages/kokkos-kernels/CMakeLists.txt index c766cdf18713..5c8f6b0dc955 100644 --- a/packages/kokkos-kernels/CMakeLists.txt +++ b/packages/kokkos-kernels/CMakeLists.txt @@ -11,7 +11,7 @@ SET(KOKKOSKERNELS_TOP_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) SET(KokkosKernels_VERSION_MAJOR 4) SET(KokkosKernels_VERSION_MINOR 5) -SET(KokkosKernels_VERSION_PATCH 0) +SET(KokkosKernels_VERSION_PATCH 1) SET(KokkosKernels_VERSION "${KokkosKernels_VERSION_MAJOR}.${KokkosKernels_VERSION_MINOR}.${KokkosKernels_VERSION_PATCH}") #Set variables for config file @@ -32,7 +32,7 @@ IF(NOT KOKKOSKERNELS_HAS_TRILINOS) ENDIF() ENDIF() IF(NOT DEFINED ${PROJECT_NAME}) - PROJECT(KokkosKernels CXX) + PROJECT(KokkosKernels VERSION ${KokkosKernels_VERSION} LANGUAGES CXX) ENDIF() ENDIF() diff --git a/packages/kokkos-kernels/master_history.txt b/packages/kokkos-kernels/master_history.txt index a02c157740d6..ef872959d470 100644 --- a/packages/kokkos-kernels/master_history.txt +++ b/packages/kokkos-kernels/master_history.txt @@ -29,3 +29,4 @@ tag: 4.3.01 date: 05/07/2024 master: 1b0a15f5 release: 58785c1b tag: 4.4.00 date: 08/08/2024 master: d1a91b8a release: 1145f529 tag: 4.4.01 date: 09/12/2024 master: 0608a337 release: 6b340287 tag: 4.5.00 date: 11/11/2024 master: 0b43169e release: 4a7590af +tag: 4.5.01 date: 12/17/2024 master: 957ac849 release: 0b3d5a3b diff --git a/packages/kokkos/CHANGELOG.md b/packages/kokkos/CHANGELOG.md index 6c237ebca867..84bbd03585bd 100644 --- a/packages/kokkos/CHANGELOG.md +++ b/packages/kokkos/CHANGELOG.md @@ -1,5 +1,16 @@ # CHANGELOG +## 4.5.01 + +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.5.00...4.5.01) + +### Bug Fixes + +* Fix re-builds after cleaning the binary tree when doing `add_subdirectory` on the Kokkos source [\#7557](https://github.com/kokkos/kokkos/pull/7557) +* Update mdspan to include fix for submdspan and bracket operator with clang 15&16 [\#7559](https://github.com/kokkos/kokkos/pull/7559) +* Fix DynRankView performance regression by re-introducing shortcut operator() impls [\#7606](https://github.com/kokkos/kokkos/pull/7606) +* Add missing MI300A (`GFX942_APU`) option to Makefile build-system + ## 4.5.00 [Full Changelog](https://github.com/kokkos/kokkos/compare/4.4.01...4.5.00) diff --git a/packages/kokkos/CMakeLists.txt b/packages/kokkos/CMakeLists.txt index f0bf8e3634a9..6a70bea14973 100644 --- a/packages/kokkos/CMakeLists.txt +++ b/packages/kokkos/CMakeLists.txt @@ -149,7 +149,7 @@ endif() set(Kokkos_VERSION_MAJOR 4) set(Kokkos_VERSION_MINOR 5) -set(Kokkos_VERSION_PATCH 0) +set(Kokkos_VERSION_PATCH 1) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") message(STATUS "Kokkos version: ${Kokkos_VERSION}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") diff --git a/packages/kokkos/Makefile.kokkos b/packages/kokkos/Makefile.kokkos index 9e6ad3241564..f67eadf241f3 100644 --- a/packages/kokkos/Makefile.kokkos +++ b/packages/kokkos/Makefile.kokkos @@ -2,7 +2,7 @@ KOKKOS_VERSION_MAJOR = 4 KOKKOS_VERSION_MINOR = 5 -KOKKOS_VERSION_PATCH = 0 +KOKKOS_VERSION_PATCH = 1 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial @@ -13,7 +13,7 @@ KOKKOS_DEVICES ?= "Threads" # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90 # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace # IBM: Power8,Power9 -# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103 +# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX942_APU,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 # Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC KOKKOS_ARCH ?= "" @@ -454,6 +454,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 0) endif KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942_APU) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030) @@ -468,6 +469,7 @@ KOKKOS_INTERNAL_USE_ARCH_AMD := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX9 + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942) \ + + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103)) @@ -1196,6 +1198,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx942\"") KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx942 endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX942_APU") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx942\"") + KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx942 +endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1030") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx1030\"") diff --git a/packages/kokkos/README.md b/packages/kokkos/README.md index 0ea07f9ea2f6..56159b35c29b 100644 --- a/packages/kokkos/README.md +++ b/packages/kokkos/README.md @@ -30,12 +30,12 @@ To start learning about Kokkos: The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest). -The current release is [4.5.00](https://github.com/kokkos/kokkos/releases/tag/4.5.00). +The current release is [4.5.01](https://github.com/kokkos/kokkos/releases/tag/4.5.01). ```bash -curl -OJ -L https://github.com/kokkos/kokkos/releases/download/4.5.00/kokkos-4.5.00.tar.gz +curl -OJ -L https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.tar.gz # Or with wget -wget https://github.com/kokkos/kokkos/releases/download/4.5.00/kokkos-4.5.00.tar.gz +wget https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.tar.gz ``` To clone the latest development version of Kokkos from GitHub: diff --git a/packages/kokkos/containers/src/Kokkos_DynRankView.hpp b/packages/kokkos/containers/src/Kokkos_DynRankView.hpp index 2f2f4433e7ca..b8603595264c 100644 --- a/packages/kokkos/containers/src/Kokkos_DynRankView.hpp +++ b/packages/kokkos/containers/src/Kokkos_DynRankView.hpp @@ -524,7 +524,10 @@ class DynRankView : private View { std::is_same_v, is_default_map = std::is_void_v && - (is_layout_left || is_layout_right || is_layout_stride) + (is_layout_left || is_layout_right || is_layout_stride), + + is_default_access = + is_default_map && std::is_same_v }; // Bounds checking macros @@ -574,12 +577,134 @@ class DynRankView : private View { using view_type::stride_7; // FIXME: not tested using view_type::use_count; +#ifdef KOKKOS_ENABLE_CUDA KOKKOS_FUNCTION reference_type operator()(index_type i0 = 0, index_type i1 = 0, index_type i2 = 0, index_type i3 = 0, index_type i4 = 0, index_type i5 = 0, index_type i6 = 0) const { return view_type::operator()(i0, i1, i2, i3, i4, i5, i6); } +#else + // Adding shortcut operators for rank-0 to rank-3 for default layouts + // and access modalities. + // This removes performance overhead for always using rank-7 mapping. + // See https://github.com/kokkos/kokkos/issues/7604 + // When boundschecking is enabled we still go through the underlying + // rank-7 View to leverage the error checks there. + + KOKKOS_FUNCTION reference_type operator()() const { +#ifdef KOKKOS_ENABLE_DEBUG + if (rank() != 0u) + Kokkos::abort( + "DynRankView rank 0 operator() called with invalid number of " + "arguments."); +#endif +#ifndef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK + if constexpr (is_default_access) { + return view_type::data()[0]; + } else +#endif + return view_type::operator()(0, 0, 0, 0, 0, 0, 0); + } + + KOKKOS_FUNCTION reference_type operator()(index_type i0) const { +#ifdef KOKKOS_ENABLE_DEBUG + // FIXME: Should be equal, only access(...) allows mismatch of rank and + // index args + if (rank() > 1u) + Kokkos::abort( + "DynRankView rank 1 operator() called with invalid number of " + "arguments."); +#endif +#ifndef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK + if constexpr (is_default_access) { + if constexpr (is_layout_stride) { + return view_type::data()[i0 * view_type::stride(0)]; + } else { + return view_type::data()[i0]; + } + } else +#endif + return view_type::operator()(i0, 0, 0, 0, 0, 0, 0); +#if defined KOKKOS_COMPILER_INTEL || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif + } + + KOKKOS_FUNCTION reference_type operator()(index_type i0, + index_type i1) const { +#ifdef KOKKOS_ENABLE_DEBUG + // FIXME: Should be equal, only access(...) allows mismatch of rank and + // index args + if (rank() > 2u) + Kokkos::abort( + "DynRankView rank 2 operator() called with invalid number of " + "arguments."); +#endif +#ifndef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK + if constexpr (is_default_access) { + if constexpr (is_layout_left) { + return view_type::data()[i0 + i1 * view_type::stride(1)]; + } else if constexpr (is_layout_right) { + return view_type::data()[i0 * view_type::extent(1) + i1]; + } else { + return view_type::data()[i0 * view_type::stride(0) + + i1 * view_type::stride(1)]; + } + } else +#endif + return view_type::operator()(i0, i1, 0, 0, 0, 0, 0); +#if defined KOKKOS_COMPILER_INTEL || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif + } + + KOKKOS_FUNCTION reference_type operator()(index_type i0, index_type i1, + index_type i2) const { +#ifdef KOKKOS_ENABLE_DEBUG + // FIXME: Should be equal, only access(...) allows mismatch of rank and + // index args + if (rank() > 3u) + Kokkos::abort( + "DynRankView rank 3 operator() called with invalid number of " + "arguments."); +#endif +#ifndef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK + if constexpr (is_default_access) { + if constexpr (is_layout_left) { + return view_type::data()[i0 + view_type::stride(1) * + (i1 + i2 * view_type::extent(1))]; + } else if constexpr (is_layout_right) { + return view_type::data()[(i0 * view_type::extent(1) + i1) * + view_type::extent(2) + + i2]; + } else { + return view_type::data()[i0 * view_type::stride(0) + + i1 * view_type::stride(1) + + i2 * view_type::stride(2)]; + } + } else +#endif + return view_type::operator()(i0, i1, i2, 0, 0, 0, 0); +#if defined KOKKOS_COMPILER_INTEL || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif + } + + KOKKOS_FUNCTION reference_type operator()(index_type i0, index_type i1, + index_type i2, index_type i3, + index_type i4 = 0, + index_type i5 = 0, + index_type i6 = 0) const { + return view_type::operator()(i0, i1, i2, i3, i4, i5, i6); + } +#endif // This is an accomodation for Phalanx, that is usint the operator[] to access // all elements in a linear fashion even when the rank is not 1 diff --git a/packages/kokkos/core/unit_test/TestAtomicViews.hpp b/packages/kokkos/core/unit_test/TestAtomicViews.hpp index fa72e0b0cfdb..55ff62822bcc 100644 --- a/packages/kokkos/core/unit_test/TestAtomicViews.hpp +++ b/packages/kokkos/core/unit_test/TestAtomicViews.hpp @@ -1065,13 +1065,9 @@ T AndEqualAtomicViewCheck(const int64_t input_length) { const int64_t N = input_length; T result[2] = {1}; for (int64_t i = 0; i < N; ++i) { - if (N % 2 == 0) { - result[0] &= (T)i; - } else { - result[1] &= (T)i; - } + int64_t idx = N % 2; + result[idx] &= (T)i; } - return (result[0]); } diff --git a/packages/kokkos/core/unit_test/TestViewBadAlloc.hpp b/packages/kokkos/core/unit_test/TestViewBadAlloc.hpp index c876ceb787e8..1707a9d5d25d 100644 --- a/packages/kokkos/core/unit_test/TestViewBadAlloc.hpp +++ b/packages/kokkos/core/unit_test/TestViewBadAlloc.hpp @@ -66,6 +66,12 @@ TEST(TEST_CATEGORY, view_bad_alloc) { } #endif +#if defined(_WIN32) && defined(KOKKOS_ENABLE_CUDA) + if (std::is_same_v) { + GTEST_SKIP() << "MSVC/CUDA segfaults when allocating too much memory"; + } +#endif + test_view_bad_alloc(); constexpr bool execution_space_is_device = diff --git a/packages/kokkos/master_history.txt b/packages/kokkos/master_history.txt index 3f9e4c6e159a..c9e454c1af0f 100644 --- a/packages/kokkos/master_history.txt +++ b/packages/kokkos/master_history.txt @@ -40,3 +40,4 @@ tag: 4.3.01 date: 05:07:2024 master: 486cc745 release: 262d2d6e tag: 4.4.00 date: 08:08:2024 master: 6ecdf605 release: 6068673c tag: 4.4.01 date: 09:12:2024 master: 08ceff92 release: 2d60c039 tag: 4.5.00 date: 11:11:2024 master: 15dc143e release: 5164f2f6 +tag: 4.5.01 date: 12:19:2024 master: 09e775bf release: e0d656f9 diff --git a/packages/kokkos/scripts/docker/Dockerfile.gcc b/packages/kokkos/scripts/docker/Dockerfile.gcc index b93c7452b09c..3bca9834b524 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.gcc +++ b/packages/kokkos/scripts/docker/Dockerfile.gcc @@ -18,7 +18,7 @@ RUN echo "deb http://dk.archive.ubuntu.com/ubuntu/ xenial main" >> /etc/apt/sour apt-get clean && rm -rf /var/lib/apt/lists/* -RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \ +RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \ KEYDUMP_FILE=keydump && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.hipcc b/packages/kokkos/scripts/docker/Dockerfile.hipcc index 909c6a3d25f8..f8d3851d749b 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.hipcc +++ b/packages/kokkos/scripts/docker/Dockerfile.hipcc @@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y \ ENV PATH=/opt/rocm/bin:$PATH -RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \ +RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \ KEYDUMP_FILE=keydump && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject b/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject index 7f4af6468d30..2a29e0041c4c 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject +++ b/packages/kokkos/scripts/docker/Dockerfile.kokkosllvmproject @@ -22,7 +22,7 @@ RUN apt-get update && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \ +RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \ KEYDUMP_FILE=keydump && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.nvcc b/packages/kokkos/scripts/docker/Dockerfile.nvcc index 11e926fe091f..e87f7dcb055b 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.nvcc +++ b/packages/kokkos/scripts/docker/Dockerfile.nvcc @@ -14,7 +14,7 @@ RUN apt-get update && apt-get install -y \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \ +RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \ KEYDUMP_FILE=keydump && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.nvhpc b/packages/kokkos/scripts/docker/Dockerfile.nvhpc index 88e59de2827a..5f611e98d847 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.nvhpc +++ b/packages/kokkos/scripts/docker/Dockerfile.nvhpc @@ -1,7 +1,7 @@ ARG BASE=nvcr.io/nvidia/nvhpc:23.7-devel-cuda12.2-ubuntu20.04 FROM $BASE -RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \ +RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \ KEYDUMP_FILE=keydump && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.openmptarget b/packages/kokkos/scripts/docker/Dockerfile.openmptarget index a555b29dd8fb..0d278972aef7 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.openmptarget +++ b/packages/kokkos/scripts/docker/Dockerfile.openmptarget @@ -15,7 +15,7 @@ RUN apt-get update && apt-get install -y \ ARG NPROC=8 -RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \ +RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \ KEYDUMP_FILE=keydump && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \ diff --git a/packages/kokkos/scripts/docker/Dockerfile.sycl b/packages/kokkos/scripts/docker/Dockerfile.sycl index b2d4ab8a9e67..1e653e0878c7 100644 --- a/packages/kokkos/scripts/docker/Dockerfile.sycl +++ b/packages/kokkos/scripts/docker/Dockerfile.sycl @@ -15,7 +15,7 @@ RUN apt-get update && apt-get install -y \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \ +RUN KEYDUMP_URL=https://cloud1.cees.ornl.gov/download && \ KEYDUMP_FILE=keydump && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \ wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \ @@ -46,7 +46,7 @@ RUN wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCT apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN wget https://cloud.cees.ornl.gov/download/oneapi-for-nvidia-gpus-2023.0.0-linux.sh && \ +RUN wget https://cloud1.cees.ornl.gov/download/oneapi-for-nvidia-gpus-2023.0.0-linux.sh && \ echo "3416721faf83e5858e65795231bae47bb51ff91d4e8738613d498674f1636f72 oneapi-for-nvidia-gpus-2023.0.0-linux.sh" | sha256sum --check && \ chmod +x oneapi-for-nvidia-gpus-2023.0.0-linux.sh && \ ./oneapi-for-nvidia-gpus-2023.0.0-linux.sh -y && \ diff --git a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/config.hpp b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/config.hpp index 24166462e7ab..e8cacf40d601 100644 --- a/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/config.hpp +++ b/packages/kokkos/tpls/mdspan/include/experimental/__p0009_bits/config.hpp @@ -240,7 +240,13 @@ static_assert(_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_14, "mdspan requires C++14 or #ifndef MDSPAN_USE_BRACKET_OPERATOR # if defined(__cpp_multidimensional_subscript) -# define MDSPAN_USE_BRACKET_OPERATOR 1 +// The following if/else is necessary to workaround a clang issue +// relative to using a parameter pack inside a bracket operator in C++2b/C++23 mode +# if defined(_MDSPAN_COMPILER_CLANG) && ((__clang_major__ == 15) || (__clang_major__ == 16)) +# define MDSPAN_USE_BRACKET_OPERATOR 0 +# else +# define MDSPAN_USE_BRACKET_OPERATOR 1 +# endif # else # define MDSPAN_USE_BRACKET_OPERATOR 0 # endif diff --git a/packages/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp b/packages/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp index 2a2cdf76b923..46ccbaadebe0 100644 --- a/packages/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/packages/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -252,7 +252,7 @@ layout_left::mapping::submdspan_mapping_impl( *this, inv_map, // HIP needs deduction guides to have markups so we need to be explicit // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have -// the issue But Clang-CUDA also doesn't accept the use of deduction guide so +// the issue but Clang-CUDA also doesn't accept the use of deduction guide so // disable it for CUDA altogether #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) detail::tuple{ @@ -330,7 +330,7 @@ MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::mapping{ @@ -485,7 +485,7 @@ layout_right::mapping::submdspan_mapping_impl( *this, inv_map, // HIP needs deduction guides to have markups so we need to be explicit // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have -// the issue But Clang-CUDA also doesn't accept the use of deduction guide so +// the issue but Clang-CUDA also doesn't accept the use of deduction guide so // disable it for CUDA altogether #if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple{ @@ -555,7 +555,7 @@ MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::mapping{ @@ -603,12 +603,11 @@ layout_stride::mapping::submdspan_mapping_impl( *this, inv_map, // HIP needs deduction guides to have markups so we need to be explicit // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have -// the issue -#if defined(_MDSPAN_HAS_HIP) || \ - (defined(__NVCC__) && \ - (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10) < 1120) +// the issue but Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA alltogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple( - detail::stride_of(slices)...).values)), + detail::stride_of(slices)...)).values), #else MDSPAN_IMPL_STANDARD_NAMESPACE::detail::tuple(detail::stride_of(slices)...)).values), #endif diff --git a/packages/muelu/CMakeLists.txt b/packages/muelu/CMakeLists.txt index 6f27ca3b36b5..2d916631ad6d 100644 --- a/packages/muelu/CMakeLists.txt +++ b/packages/muelu/CMakeLists.txt @@ -404,17 +404,6 @@ ENDIF () # C) Add the libraries, tests, and examples # -# The build has to be in a very specific order because of the existence of Matlab interface: -# 1. Build the majority of the code in src/ [except src/Interface; src/CMakeLists.txt is responsible for ignoring that] -# 2. Build matlab factories [located in matlab/; matlab/bin is ignored in matlab/CMakeLists.txt] -# 3. Build the interpreter [requires all factories] -# 4. Build the adapters [required for Matlab] -# 5. Build the rest of matlab code -# This order required splitting of the src/ library into two: muelu and muelu-interface, as steps 1. and 3. are separated. -# A simple diagram explains this: -# / muelu -# matlab/bin - muelu-adapters - muelu-interface | -# \ matlab ADD_SUBDIRECTORY(src) if(TPL_ENABLE_MATLAB) @@ -425,17 +414,12 @@ if(TPL_ENABLE_MATLAB) IF (NOT ${PROJECT_NAME}_ENABLE_EXPLICIT_INSTANTIATION) MESSAGE(FATAL_ERROR "Muemex interfaces require \"-D${PROJECT_NAME}_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON\".") ENDIF() - - IF(NOT ${PACKAGE_NAME}_ENABLE_Epetra) - MESSAGE(FATAL_ERROR "Muemex interfaces require \"-D${PACKAGE_NAME}_ENABLE_Epetra:BOOL=ON\".") - ENDIF() ENDIF() -if(TPL_ENABLE_MATLAB) - ADD_SUBDIRECTORY(matlab) -ENDIF() +# Not: adapters needs to come before matlab ADD_SUBDIRECTORY(adapters) if(TPL_ENABLE_MATLAB) + ADD_SUBDIRECTORY(matlab) ADD_SUBDIRECTORY(matlab/bin) ADD_SUBDIRECTORY(matlab/tests) ENDIF() diff --git a/packages/muelu/doc/UsersGuide/masterList.xml b/packages/muelu/doc/UsersGuide/masterList.xml index d9e273f00443..e4c00ee95a40 100644 --- a/packages/muelu/doc/UsersGuide/masterList.xml +++ b/packages/muelu/doc/UsersGuide/masterList.xml @@ -353,7 +353,7 @@ coarse: type needs special treatment in ML string - "SuperLU" + "KLU" Coarse solver. Possible values: see Table~\ref{tab:coarse_solvers}. diff --git a/packages/muelu/doc/UsersGuide/options_smoothing_and_coarse.tex b/packages/muelu/doc/UsersGuide/options_smoothing_and_coarse.tex index 9e260be65b4a..10c501c10e50 100644 --- a/packages/muelu/doc/UsersGuide/options_smoothing_and_coarse.tex +++ b/packages/muelu/doc/UsersGuide/options_smoothing_and_coarse.tex @@ -21,7 +21,7 @@ \cbb{coarse: max size}{int}{2000}{Maximum dimension of a coarse grid. \muelu will stop coarsening once it is achieved.} -\cbb{coarse: type}{string}{"SuperLU"}{Coarse solver. Possible values: see Table~\ref{tab:coarse_solvers}.} +\cbb{coarse: type}{string}{"KLU"}{Coarse solver. Possible values: see Table~\ref{tab:coarse_solvers}.} \cba{coarse: params}{\parameterlist}{Coarse solver parameters. \muelu passes them directly to the appropriate package library.} diff --git a/packages/muelu/doc/UsersGuide/paramlist.tex b/packages/muelu/doc/UsersGuide/paramlist.tex index f9346acebbf1..86249ef58d12 100644 --- a/packages/muelu/doc/UsersGuide/paramlist.tex +++ b/packages/muelu/doc/UsersGuide/paramlist.tex @@ -46,7 +46,7 @@ \cbb{coarse: max size}{int}{2000}{Maximum dimension of a coarse grid. \muelu will stop coarsening once it is achieved.} -\cbb{coarse: type}{string}{"SuperLU"}{Coarse solver. Possible values: see Table~\ref{tab:coarse_solvers}.} +\cbb{coarse: type}{string}{"KLU"}{Coarse solver. Possible values: see Table~\ref{tab:coarse_solvers}.} \cba{coarse: params}{\parameterlist}{Coarse solver parameters. \muelu passes them directly to the appropriate package library.} diff --git a/packages/muelu/doc/UsersGuide/paramlist_hidden.tex b/packages/muelu/doc/UsersGuide/paramlist_hidden.tex index cbc68a68e2f9..0d14c4917a68 100644 --- a/packages/muelu/doc/UsersGuide/paramlist_hidden.tex +++ b/packages/muelu/doc/UsersGuide/paramlist_hidden.tex @@ -56,7 +56,7 @@ \cbb{coarse: max size}{int}{2000}{Maximum dimension of a coarse grid. \muelu will stop coarsening once it is achieved.} -\cbb{coarse: type}{string}{"SuperLU"}{Coarse solver. Possible values: see Table~\ref{tab:coarse_solvers}.} +\cbb{coarse: type}{string}{"KLU"}{Coarse solver. Possible values: see Table~\ref{tab:coarse_solvers}.} \cba{coarse: params}{\parameterlist}{Coarse solver parameters. \muelu passes them directly to the appropriate package library.} diff --git a/packages/muelu/matlab/bin/CMakeLists.txt b/packages/muelu/matlab/bin/CMakeLists.txt index 57c534d6c569..55660f49ecb4 100644 --- a/packages/muelu/matlab/bin/CMakeLists.txt +++ b/packages/muelu/matlab/bin/CMakeLists.txt @@ -15,46 +15,17 @@ if(TPL_ENABLE_MATLAB) TRIBITS_SET_AND_INC_DIRS(DIR ${CMAKE_CURRENT_BINARY_DIR}) # Force the code into R2017b compatibility mode - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMX_COMPAT_64 -fPIC -DMATLAB_MEXCMD_RELEASE=R2017b") - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMX_COMPAT_64 -fPIC -DMATLAB_MEXCMD_RELEASE=R2017b") - - + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMX_COMPAT_64 -fPIC -DMATLAB_MEXCMD_RELEASE=R2017b") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMX_COMPAT_64 -fPIC -DMATLAB_MEXCMD_RELEASE=R2017b") # Debugging information - IF (${PROJECT_NAME}_VERBOSE_CONFIGURE) - MESSAGE("MEX_COMPILER = " ${MEX_COMPILER}) - MESSAGE("MEX_MEXEXT = " ${MEX_MEXEXT}) - MESSAGE("MEX_EXTENSION = " ${MEX_EXTENSION}) - MESSAGE("MATLAB_MEX_DIR = " ${MATLAB_MEX_DIR}) - ENDIF() + MESSAGE("MEX_COMPILER = " ${MEX_COMPILER}) + MESSAGE("MEX_MEXEXT = " ${MEX_MEXEXT}) + MESSAGE("MEX_EXTENSION = " ${MEX_EXTENSION}) + MESSAGE("MATLAB_MEX_DIR = " ${MATLAB_MEX_DIR}) APPEND_SET(HEADERS_EXEC muemex.h) APPEND_SET(SOURCES_EXEC muemex.cpp) - # First, grab the package's own libraries - SET(LINK_LIBS) - APPEND_SET(LINK_LIBS ${${PACKAGE_NAME}_LIBRARIES}) - - # Third, add test dependent package libraries - TRIBITS_GATHER_ENABLED_ITEMS(${PACKAGE_NAME} TEST PACKAGES ALL_DEP_PACKAGES) - TRIBITS_SORT_AND_APPEND_INCLUDE_AND_LINK_DIRS_AND_LIBS("${${PROJECT_NAME}_REVERSE_PACKAGES}" - "${ALL_DEP_PACKAGES}" "" LINK_LIBS "") - - # Fourth, add dependent test TPL libraries - TRIBITS_GATHER_ENABLED_ITEMS(${PACKAGE_NAME} TEST TPLS ALL_TPLS) - TRIBITS_SORT_AND_APPEND_INCLUDE_AND_LINK_DIRS_AND_LIBS("${${PROJECT_NAME}_REVERSE_TPLS}" "${ALL_TPLS}" - TPL_ LINK_LIBS "") - - # Fifth, add matlab-specific libs - SET(LINK_LIBS ${LINK_LIBS} "mx" "mex" "mat") - - # Last, add last_lib to get extra link options on the link linee - IF (${PROJECT_NAME}_EXTRA_LINK_FLAGS) - APPEND_SET(LINK_LIBS ${last_lib}) - ENDIF() - IF (${PROJECT_NAME}_VERBOSE_CONFIGURE) - PRINT_VAR(LINK_LIBS) - ENDIF() - # Manually drop in options from the mex script (R2009b) on a linux platform. # g++ -O -pthread -shared -Wl,--version-script,/usr/local/matlab/7.9/extern/lib/glnxa64/mexFunction.map -Wl,--no-undefined -o "mlmex.mexa64" "mlmex-mlmex.o" -lm -Wl,-rpath-link,/usr/local/matlab/7.9/bin/glnxa64 -L/usr/local/matlab/7.9/bin/glnxa64 -lmx -lmex -lmat -lm if (NOT APPLE) @@ -83,4 +54,3 @@ if(TPL_ENABLE_MATLAB) CONFIGURE_FILE(matlab ${CMAKE_CURRENT_BINARY_DIR}/matlab @ONLY) ENDIF() - diff --git a/packages/muelu/matlab/bin/muemex.cpp b/packages/muelu/matlab/bin/muemex.cpp index 03068adff0e4..786c8c789b08 100644 --- a/packages/muelu/matlab/bin/muemex.cpp +++ b/packages/muelu/matlab/bin/muemex.cpp @@ -14,8 +14,8 @@ #define MUEMEX_ERROR -1 // Do not compile MueMex if any of these aren't available -#if !defined HAVE_MUELU_EPETRA || !defined HAVE_MUELU_MATLAB -#error "MueMex requires Epetra, Tpetra and MATLAB." +#if !defined HAVE_MUELU_TPETRA || !defined HAVE_MUELU_MATLAB +#error "MueMex requires Tpetra and MATLAB." #endif #include @@ -295,11 +295,13 @@ template <> RCP getDatapackHierarchy(MuemexSystem* dp) { RCP> hier; switch (dp->type) { +#ifdef HAVE_MUELU_EPETRA case EPETRA: { EpetraSystem* pack = (EpetraSystem*)dp; hier = pack->getHierarchy(); break; } +#endif case TPETRA: { TpetraSystem* pack = (TpetraSystem*)dp; hier = pack->getHierarchy(); @@ -322,10 +324,13 @@ RCP getDatapackHierarchy(MuemexSystem* dp) { template void setHierarchyData(MuemexSystem* problem, int levelID, T& data, string& dataName) { RCP level; +#ifdef HAVE_MUELU_EPETRA if (problem->type == EPETRA) { RCP> hier = ((EpetraSystem*)problem)->getHierarchy(); level = hier->GetLevel(levelID); - } else if (problem->type == TPETRA) { + } else +#endif + if (problem->type == TPETRA) { RCP> hier = ((TpetraSystem*)problem)->getHierarchy(); level = hier->GetLevel(levelID); } else if (problem->type == TPETRA_COMPLEX) { @@ -374,7 +379,9 @@ mxArray* MuemexSystem::getHierarchyData(string dataName, MuemexType dataType, in // Otherwise would break getting A and P when 'keep' is off needFMB = false; switch (this->type) { +#ifdef HAVE_MUELU_EPETRA case EPETRA: +#endif case TPETRA: { RCP> hier = rcp_static_cast>(getDatapackHierarchy(this)); level = hier->GetLevel(levelID); @@ -493,6 +500,7 @@ mxArray* MuemexSystem::getHierarchyData(string dataName, MuemexType dataType, in return output; } +#ifdef HAVE_MUELU_EPETRA // EpetraSystem impl EpetraSystem::EpetraSystem() @@ -608,6 +616,7 @@ RCP EpetraSystem::getHierarchy() { throw runtime_error("Hierarchy from Epetra problem was null."); return hier; } +#endif // tpetra_double_data_pack implementation @@ -1038,9 +1047,12 @@ void parse_list_item(RCP List, char* option_name, const mxArray* opt_str = opt_char; List->set(option_name, opt_str); if (strcmp(option_name, MUEMEX_INTERFACE) == 0) { +#ifdef HAVE_MUELU_EPETRA if (strcmp(opt_str.c_str(), "epetra") == 0) useEpetra = true; - else if (strcmp(opt_str.c_str(), "tpetra") == 0) + else +#endif + if (strcmp(opt_str.c_str(), "tpetra") == 0) useEpetra = false; } mxFree(opt_char); @@ -1254,6 +1266,7 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { } intf = List->get(MUEMEX_INTERFACE, "tpetra"); List->remove(MUEMEX_INTERFACE); // no longer need this parameter +#ifdef HAVE_MUELU_EPETRA if (intf == "epetra") { if (mxIsComplex(prhs[1])) { mexPrintf("Error: Attempting to use complex-valued matrix with Epetra, which is unsupported.\n"); @@ -1265,7 +1278,9 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { dp->setup(prhs[1], haveCoords, haveCoords ? prhs[2] : (mxArray*)NULL); oc = dp->operatorComplexity; D = rcp_implicit_cast(dp); - } else if (intf == "tpetra") { + } else +#endif + if (intf == "tpetra") { // infer scalar type from prhs (can be double or complex) if (mxIsComplex(prhs[1])) { #ifdef HAVE_COMPLEX_SCALARS @@ -1353,6 +1368,7 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { // get pointer to MATLAB array that will be "B" or "rhs" multivector const mxArray* rhs = reuse ? prhs[2] : prhs[3]; switch (dp->type) { +#ifdef HAVE_MUELU_EPETRA case EPETRA: { RCP esys = rcp_static_cast(dp); RCP matrix; @@ -1363,6 +1379,7 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { plhs[0] = esys->solve(List, matrix, rhs, iters); break; } +#endif case TPETRA: { RCP> tsys = rcp_static_cast, MuemexSystem>(dp); RCP matrix; @@ -1410,11 +1427,13 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { // get pointer to MATLAB array that will be "B" or "rhs" multivector const mxArray* rhs = prhs[2]; switch (dp->type) { +#ifdef HAVE_MUELU_EPETRA case EPETRA: { RCP esys = rcp_static_cast(dp); plhs[0] = esys->apply(rhs); break; } +#endif case TPETRA: { RCP> tsys = rcp_static_cast, MuemexSystem>(dp); plhs[0] = tsys->apply(rhs); diff --git a/packages/muelu/matlab/bin/muemex.h b/packages/muelu/matlab/bin/muemex.h index fb66654a13de..ab8c30788cc2 100644 --- a/packages/muelu/matlab/bin/muemex.h +++ b/packages/muelu/matlab/bin/muemex.h @@ -21,21 +21,31 @@ #include "Teuchos_RCP.hpp" #include "MueLu_config.hpp" #include "MueLu.hpp" +#ifdef HAVE_MUELU_EPETRA #include "MueLu_EpetraOperator.hpp" +#endif #include "MueLu_TpetraOperator.hpp" #include "MueLu_Hierarchy.hpp" #include "MueLu_MatlabUtils.hpp" +#ifdef HAVE_MUELU_EPETRA #include "MueLu_CreateEpetraPreconditioner.hpp" +#endif #include "MueLu_CreateTpetraPreconditioner.hpp" +#ifdef HAVE_MUELU_EPETRA #include "Epetra_SerialComm.h" #include "Epetra_Map.h" #include "Epetra_MultiVector.h" #include "Epetra_CrsMatrix.h" #include "Epetra_LinearProblem.h" +#endif #include "Tpetra_CrsMatrix.hpp" +#ifdef HAVE_MUELU_EPETRA #include "Xpetra_EpetraCrsMatrix.hpp" +#endif #include "BelosSolverFactory.hpp" +#ifdef HAVE_MUELU_EPETRA #include "BelosEpetraAdapter.hpp" +#endif #include "BelosTpetraAdapter.hpp" #include "BelosPseudoBlockGmresSolMgr.hpp" #include "BelosBlockGmresSolMgr.hpp" @@ -53,7 +63,9 @@ namespace MueLu typedef enum { +#ifdef HAVE_MUELU_EPETRA EPETRA, +#endif TPETRA, TPETRA_COMPLEX } DataPackType; @@ -89,6 +101,7 @@ class MuemexSystem mxArray* getHierarchyData(std::string dataName, MuemexType dataType, int levelID); //Works for all dp types }; +#ifdef HAVE_MUELU_EPETRA class EpetraSystem : public MuemexSystem { public: @@ -120,6 +133,7 @@ class EpetraSystem : public MuemexSystem Teuchos::RCP A; Teuchos::RCP prec; }; +#endif //Scalar can be double or std::complex (complex_t) //Note: DataPackType is either TPETRA or TPETRA_COMPLEX @@ -140,7 +154,7 @@ class TpetraSystem : public MuemexSystem mxArray* apply(const mxArray* rhs); //note: I typedef'd mm_node_t at the top of this file as the Kokkos default type Teuchos::RCP GetMatrix() - { + { return A; } Teuchos::RCP GetPrec() diff --git a/packages/muelu/matlab/src/CMakeLists.txt b/packages/muelu/matlab/src/CMakeLists.txt index 967f19485e8c..a58e94a69715 100644 --- a/packages/muelu/matlab/src/CMakeLists.txt +++ b/packages/muelu/matlab/src/CMakeLists.txt @@ -15,8 +15,8 @@ if(TPL_ENABLE_MATLAB) TRIBITS_SET_AND_INC_DIRS(DIR ${CMAKE_CURRENT_BINARY_DIR}) # Force the code into R2017b compatibility mode - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMX_COMPAT_64 -DMATLAB_MEXCMD_RELEASE=R2017b") - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMX_COMPAT_64 -DMATLAB_MEXCMD_RELEASE=R2017b") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMX_COMPAT_64 -DMATLAB_MEXCMD_RELEASE=R2017b") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMX_COMPAT_64 -DMATLAB_MEXCMD_RELEASE=R2017b") APPEND_GLOB(HEADERS_LIB *.hpp) APPEND_SET(SOURCES_LIB MueLu_SingleLevelMatlabFactory.cpp MueLu_TwoLevelMatlabFactory.cpp MueLu_MatlabSmoother.cpp MueLu_MatlabUtils.cpp) @@ -29,7 +29,6 @@ if(TPL_ENABLE_MATLAB) muelu-matlab HEADERS ${HEADERS_LIB} SOURCES ${SOURCES_LIB} -# DEPLIBS muelu muelu-adapters + DEPLIBS muelu muelu-adapters ) ENDIF() - diff --git a/packages/muelu/matlab/src/MueLu_MatlabSmoother_def.hpp b/packages/muelu/matlab/src/MueLu_MatlabSmoother_def.hpp index eaf8160a3cef..426875bea237 100644 --- a/packages/muelu/matlab/src/MueLu_MatlabSmoother_def.hpp +++ b/packages/muelu/matlab/src/MueLu_MatlabSmoother_def.hpp @@ -6,10 +6,9 @@ // SPDX-License-Identifier: BSD-3-Clause // ***************************************************************************** // @HEADER - -#include "MueLu_MatlabSmoother_decl.hpp" #ifndef MUELU_MATLABSMOOTHER_DEF_HPP #define MUELU_MATLABSMOOTHER_DEF_HPP +#include "MueLu_MatlabSmoother_decl.hpp" #include "MueLu_MatlabUtils_decl.hpp" #if defined(HAVE_MUELU_MATLAB) diff --git a/packages/muelu/matlab/src/MueLu_MatlabUtils.cpp b/packages/muelu/matlab/src/MueLu_MatlabUtils.cpp index 9d0e4444069b..3df02f3543f9 100644 --- a/packages/muelu/matlab/src/MueLu_MatlabUtils.cpp +++ b/packages/muelu/matlab/src/MueLu_MatlabUtils.cpp @@ -9,8 +9,8 @@ #include "MueLu_MatlabUtils_def.hpp" -#if !defined(HAVE_MUELU_MATLAB) || !defined(HAVE_MUELU_EPETRA) -#error "Muemex types require MATLAB, Epetra and Tpetra." +#if !defined(HAVE_MUELU_MATLAB) || !defined(HAVE_MUELU_TPETRA) +#error "Muemex types require MATLAB and Tpetra." #else /* Stuff for MATLAB R2006b vs. previous versions */ @@ -38,7 +38,9 @@ template class MuemexData; template class MuemexData; template class MuemexData > >; template class MuemexData > >; +#ifdef HAVE_MUELU_EPETRA template class MuemexData >; +#endif template class MuemexData > >; template class MuemexData > >; template class MuemexData > >; @@ -147,12 +149,14 @@ std::vector > callMatlab(std::string function, int numOutputs, st case XPETRA_MULTIVECTOR_COMPLEX: matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; +#ifdef HAVE_MUELU_EPETRA case EPETRA_CRSMATRIX: matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; case EPETRA_MULTIVECTOR: matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; +#endif case AGGREGATES: matlabArgs[i] = rcp_static_cast >, MuemexArg>(args[i])->convertToMatlab(); break; @@ -380,8 +384,10 @@ template RCP loadDataFromMatlab template RCP loadDataFromMatlab >(const mxArray* mxa); template RCP loadDataFromMatlab >(const mxArray* mxa); template RCP loadDataFromMatlab >(const mxArray* mxa); +#ifdef HAVE_MUELU_EPETRA template RCP loadDataFromMatlab >(const mxArray* mxa); template RCP loadDataFromMatlab >(const mxArray* mxa); +#endif template RCP loadDataFromMatlab >(const mxArray* mxa); template RCP loadDataFromMatlab >(const mxArray* mxa); @@ -399,8 +405,10 @@ template mxArray* saveDataToMatlab(RCP& data); template mxArray* saveDataToMatlab(RCP& data); template mxArray* saveDataToMatlab(RCP& data); template mxArray* saveDataToMatlab(RCP& data); +#ifdef HAVE_MUELU_EPETRA template mxArray* saveDataToMatlab(RCP& data); template mxArray* saveDataToMatlab(RCP& data); +#endif template mxArray* saveDataToMatlab(RCP& data); template mxArray* saveDataToMatlab(RCP& data); diff --git a/packages/muelu/matlab/src/MueLu_MatlabUtils_decl.hpp b/packages/muelu/matlab/src/MueLu_MatlabUtils_decl.hpp index ddcdf3ba6ef1..408eb343ba74 100644 --- a/packages/muelu/matlab/src/MueLu_MatlabUtils_decl.hpp +++ b/packages/muelu/matlab/src/MueLu_MatlabUtils_decl.hpp @@ -12,11 +12,15 @@ #include "MueLu_ConfigDefs.hpp" -#if !defined(HAVE_MUELU_MATLAB) || !defined(HAVE_MUELU_EPETRA) +#if !defined(HAVE_MUELU_MATLAB) || !defined(HAVE_MUELU_TPETRA) #error "Muemex requires MATLAB, Epetra and Tpetra." #else -#include "mex.h" +// Matlab fwd style declarations +struct mxArray_tag; +typedef struct mxArray_tag mxArray; +typedef size_t mwIndex; + #include #include #include @@ -28,11 +32,15 @@ #include "MueLu_Aggregates_decl.hpp" #include "MueLu_AmalgamationInfo_decl.hpp" #include "MueLu_Utilities_decl.hpp" -#include "MueLu_Graph_decl.hpp" +#include "MueLu_Graph_fwd.hpp" +#ifdef HAVE_MUELU_EPETRA #include "Epetra_MultiVector.h" #include "Epetra_CrsMatrix.h" +#endif #include "Tpetra_CrsMatrix_decl.hpp" +#ifdef HAVE_MUELU_EPETRA #include "Xpetra_EpetraCrsMatrix.hpp" +#endif #include "Xpetra_MapFactory.hpp" #include "Xpetra_CrsGraph.hpp" #include "Xpetra_VectorFactory.hpp" @@ -58,8 +66,10 @@ enum MuemexType { XPETRA_MATRIX_COMPLEX, XPETRA_MULTIVECTOR_DOUBLE, XPETRA_MULTIVECTOR_COMPLEX, +#ifdef HAVE_MUELU_EPETRA EPETRA_CRSMATRIX, EPETRA_MULTIVECTOR, +#endif AGGREGATES, AMALGAMATION_INFO, GRAPH @@ -163,13 +173,13 @@ Teuchos::RCP convertMatlabVar(const mxArray* mxa); // trim from start static inline std::string& ltrim(std::string& s) { - s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun(std::isspace)))); + s.erase(0, s.find_first_not_of(" ")); return s; } // trim from end static inline std::string& rtrim(std::string& s) { - s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(std::isspace))).base(), s.end()); + s.erase(s.find_last_not_of(" "), std::string::npos); return s; } diff --git a/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp b/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp index bb70471b1066..069cbbe930a3 100644 --- a/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp +++ b/packages/muelu/matlab/src/MueLu_MatlabUtils_def.hpp @@ -11,9 +11,10 @@ #define MUELU_MATLABUTILS_DEF_HPP #include "MueLu_MatlabUtils_decl.hpp" +#include -#if !defined(HAVE_MUELU_MATLAB) || !defined(HAVE_MUELU_EPETRA) -#error "Muemex types require MATLAB, Epetra and Tpetra." +#if !defined(HAVE_MUELU_MATLAB) || !defined(HAVE_MUELU_TPETRA) +#error "Muemex types require MATLAB and Tpetra." #else using Teuchos::RCP; @@ -103,6 +104,7 @@ MuemexType getMuemexType(const RCP& data) { return XPETRA template <> MuemexType getMuemexType >() { return XPETRA_MATRIX_COMPLEX; } +#ifdef HAVE_MUELU_EPETRA template <> MuemexType getMuemexType(const RCP& data) { return EPETRA_CRSMATRIX; } template <> @@ -112,6 +114,7 @@ template <> MuemexType getMuemexType(const RCP& data) { return EPETRA_MULTIVECTOR; } template <> MuemexType getMuemexType >() { return EPETRA_MULTIVECTOR; } +#endif template <> MuemexType getMuemexType(const RCP& data) { return AGGREGATES; } @@ -445,6 +448,7 @@ RCP > loadD return MueLu::TpetraMultiVector_To_XpetraMultiVector(tpetraMV); } +#ifdef HAVE_MUELU_EPETRA template <> RCP loadDataFromMatlab >(const mxArray* mxa) { RCP matrix; @@ -492,6 +496,7 @@ RCP loadDataFromMatlab >(const mxArr Epetra_BlockMap map(nr * nc, 1, 0, Comm); return rcp(new Epetra_MultiVector(Epetra_DataAccess::Copy, map, mxGetPr(mxa), nr, nc)); } +#endif template <> RCP loadDataFromMatlab >(const mxArray* mxa) { @@ -606,18 +611,18 @@ RCP loadDataFromMatlab >(const mxArray* mxa) { tgraph->insertGlobalIndices((mm_GlobalOrd)i, cols(rows[i], entriesPerRow[i])); } tgraph->fillComplete(map, map); - RCP mgraph = rcp(new MueLu::Graph(tgraph)); + RCP mgraph = rcp(new MueLu::LWGraph(tgraph)); // Set boundary nodes int numBoundaryNodes = mxGetNumberOfElements(boundaryNodes); - bool* boundaryFlags = new bool[nRows]; + Kokkos::View boundaryFlags("boundaryFlags", nRows); + // NOTE: This will not work correctly for non-CPU Node types for (int i = 0; i < nRows; i++) { boundaryFlags[i] = false; } for (int i = 0; i < numBoundaryNodes; i++) { boundaryFlags[boundaryList[i]] = true; } - ArrayRCP boundaryNodesInput(boundaryFlags, 0, nRows, true); - mgraph->SetBoundaryNodeMap(boundaryNodesInput); + mgraph->SetBoundaryNodeMap(boundaryFlags); return mgraph; } @@ -1005,6 +1010,7 @@ mxArray* saveDataToMatlab(RCP mxArray* saveDataToMatlab(RCP& data) { RCP xmat = EpetraCrs_To_XpetraMatrix(data); @@ -1018,6 +1024,7 @@ mxArray* saveDataToMatlab(RCP& data) { data->ExtractCopy(dataPtr, data->GlobalLength()); return output; } +#endif template <> mxArray* saveDataToMatlab(RCP& data) { @@ -1083,9 +1090,9 @@ mxArray* saveDataToMatlab(RCP& data) { throw runtime_error("Cannot store invalid aggregates in MATLAB - fewer root nodes than aggregates."); } } - dataIn[4] = mxCreateNumericArray(1, aggArrayDims, mxINT32_CLASS, mxREAL); - int* as = (int*)mxGetData(dataIn[4]); // list of aggregate sizes - ArrayRCP aggSizes = data->ComputeAggregateSizes(); + dataIn[4] = mxCreateNumericArray(1, aggArrayDims, mxINT32_CLASS, mxREAL); + int* as = (int*)mxGetData(dataIn[4]); // list of aggregate sizes + auto aggSizes = data->ComputeAggregateSizes(); for (int i = 0; i < numAggs; i++) { as[i] = aggSizes[i]; } @@ -1119,7 +1126,7 @@ mxArray* saveDataToMatlab(RCP& data) { entriesPerCol[i] = 0; } for (int i = 0; i < numRows; i++) { - ArrayView neighbors = data->getNeighborVertices(i); // neighbors has the column indices for row i + ArrayView neighbors = data->getNeighborVertices_av(i); // neighbors has the column indices for row i memcpy(iter, neighbors.getRawPtr(), sizeof(mm_LocalOrd) * neighbors.size()); entriesPerRow[i] = neighbors.size(); for (int j = 0; j < neighbors.size(); j++) { @@ -1166,9 +1173,9 @@ mxArray* saveDataToMatlab(RCP& data) { delete[] entriesPerRow; delete[] entriesPerCol; // Construct list of boundary nodes - const ArrayRCP boundaryFlags = data->GetBoundaryNodeMap(); - int numBoundaryNodes = 0; - for (int i = 0; i < boundaryFlags.size(); i++) { + auto boundaryFlags = data->GetBoundaryNodeMap(); + int numBoundaryNodes = 0; + for (int i = 0; i < (int)boundaryFlags.size(); i++) { if (boundaryFlags[i]) numBoundaryNodes++; } @@ -1177,7 +1184,7 @@ mxArray* saveDataToMatlab(RCP& data) { mxArray* boundaryList = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL); int* dest = (int*)mxGetData(boundaryList); int* destIter = dest; - for (int i = 0; i < boundaryFlags.size(); i++) { + for (int i = 0; i < (int)boundaryFlags.size(); i++) { if (boundaryFlags[i]) { *destIter = i; destIter++; diff --git a/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_decl.hpp b/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_decl.hpp index 8f6b966e7bf1..0321a8ba5994 100644 --- a/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_decl.hpp +++ b/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_decl.hpp @@ -27,8 +27,6 @@ #include "MueLu_Utilities_fwd.hpp" #ifdef HAVE_MUELU_MATLAB -#include "mex.h" - namespace MueLu { /*! @class SingleLevelMatlabFactory diff --git a/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_def.hpp b/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_def.hpp index e5da86652aec..1fb49d3e7b41 100644 --- a/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_def.hpp +++ b/packages/muelu/matlab/src/MueLu_SingleLevelMatlabFactory_def.hpp @@ -19,7 +19,6 @@ #include "MueLu_MatlabUtils_decl.hpp" #ifdef HAVE_MUELU_MATLAB -#include "mex.h" namespace MueLu { @@ -79,7 +78,6 @@ std::string SingleLevelMatlabFactory: } // namespace MueLu -#define MUELU_SINGLELEVELMATLABFACTORY_SHORT #endif // HAVE_MUELU_MATLAB #endif // MUELU_SINGLELEVELMATLABFACTORY_DEF_HPP diff --git a/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_decl.hpp b/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_decl.hpp index 1e29e6d46665..8a8890e1f065 100644 --- a/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_decl.hpp +++ b/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_decl.hpp @@ -28,7 +28,6 @@ #include "MueLu_Utilities_fwd.hpp" #ifdef HAVE_MUELU_MATLAB -#include "mex.h" namespace MueLu { /*! @@ -36,7 +35,10 @@ namespace MueLu { @ingroup MueMexClasses @brief Factory for interacting with Matlab */ -template +template class TwoLevelMatlabFactory : public TwoLevelFactoryBase { #undef MUELU_TWOLEVELMATLABFACTORY_SHORT #include "MueLu_UseShortNames.hpp" diff --git a/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_def.hpp b/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_def.hpp index a9b02e873e33..cf4f94611853 100644 --- a/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_def.hpp +++ b/packages/muelu/matlab/src/MueLu_TwoLevelMatlabFactory_def.hpp @@ -21,7 +21,6 @@ #include #ifdef HAVE_MUELU_MATLAB -#include "mex.h" namespace MueLu { @@ -96,7 +95,6 @@ std::string TwoLevelMatlabFactory::de } // namespace MueLu -#define MUELU_TWOLEVELMATLABFACTORY_SHORT #endif // HAVE_MUELU_MATLAB #endif // MUELU_TWOLEVELMATLABFACTORY_DEF_HPP diff --git a/packages/muelu/src/CMakeLists.txt b/packages/muelu/src/CMakeLists.txt index 99b81b1109c6..6719fb231b1d 100644 --- a/packages/muelu/src/CMakeLists.txt +++ b/packages/muelu/src/CMakeLists.txt @@ -59,6 +59,12 @@ TRIBITS_INCLUDE_DIRECTORIES(${DIR}/Utils) TRIBITS_INCLUDE_DIRECTORIES(${DIR}/Utils/ForwardDeclaration) TRIBITS_INCLUDE_DIRECTORIES(${DIR}/../adapters/xpetra) TRIBITS_INCLUDE_DIRECTORIES(${DIR}/../adapters/tpetra) +IF (${PACKAGE_NAME}_ENABLE_Stratimikos) + TRIBITS_INCLUDE_DIRECTORIES(${DIR}/../adapters/stratimikos) +ENDIF() +IF (${PACKAGE_NAME}_ENABLE_Thyra) + TRIBITS_INCLUDE_DIRECTORIES(${DIR}/../adapters/thyra) +ENDIF() IF (${PACKAGE_NAME}_ENABLE_Epetra) TRIBITS_INCLUDE_DIRECTORIES(${DIR}/../adapters/epetra) ENDIF() @@ -72,7 +78,9 @@ IF (${PACKAGE_NAME}_ENABLE_Experimental) TRIBITS_INCLUDE_DIRECTORIES(${DIR}/../research/regionMG/src) ENDIF() IF (${PACKAGE_NAME}_ENABLE_Stratimikos) - TRIBITS_INCLUDE_DIRECTORIES(${DIR}/../adapters/stratimikos) + ENDIF() +IF (TPL_ENABLE_MATLAB) + TRIBITS_INCLUDE_DIRECTORIES(${DIR}/../matlab/src) ENDIF() # Function to generate ETI (explicit template instantiation) files @@ -397,6 +405,9 @@ TRILINOS_CREATE_CLIENT_TEMPLATE_HEADERS(${DIR}/../adapters/tpetra NOSIERRABJAM) IF (${PACKAGE_NAME}_ENABLE_Intrepid2) TRILINOS_CREATE_CLIENT_TEMPLATE_HEADERS(${DIR}/Transfers/PCoarsen NOSIERRABJAM) ENDIF() +IF (TPL_ENABLE_MATLAB) + TRIBITS_CREATE_CLIENT_TEMPLATE_HEADERS(${DIR}/../matlab/src) +ENDIF() # Cpp file IF(${PACKAGE_NAME}_ENABLE_EXPLICIT_INSTANTIATION) @@ -425,12 +436,3 @@ TRIBITS_ADD_LIBRARY( ) # touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration # touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration -# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration -# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration -# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration -# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration -# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration -# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration -# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration -# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration -# touch CMakeLists.txt because a new file was created in Utils/ExplicitInstantiation of Utils/ForwardDeclaration diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp index 1bb2fa1b1648..087a3de61020 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CutDrop.hpp @@ -423,37 +423,6 @@ class ScaledDistanceLaplacianComparison { } }; -template -KOKKOS_INLINE_FUNCTION void serialHeapSort(view_type& v, comparator_type comparator) { - auto N = v.extent(0); - size_t start = N / 2; - size_t end = N; - while (end > 1) { - if (start > 0) - start = start - 1; - else { - end = end - 1; - auto temp = v(0); - v(0) = v(end); - v(end) = temp; - } - size_t root = start; - while (2 * root + 1 < end) { - size_t child = 2 * root + 1; - if ((child + 1 < end) and (comparator(v(child), v(child + 1)))) - ++child; - - if (comparator(v(root), v(child))) { - auto temp = v(root); - v(root) = v(child); - v(child) = temp; - root = child; - } else - break; - } - } -} - /*! @class CutDropFunctor @brief Order each row by a criterion, compare the ratio of values and drop all entries once the ratio is below the threshold. @@ -499,7 +468,7 @@ class CutDropFunctor { for (size_t i = 0; i < nnz; ++i) { row_permutation(i) = i; } - serialHeapSort(row_permutation, comparator); + Misc::serialHeapSort(row_permutation, comparator); size_t keepStart = 0; size_t dropStart = nnz; diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp index dd371c124fcd..1b45ff16d18d 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_DroppingCommon.hpp @@ -21,7 +21,7 @@ namespace MueLu { Once we are done with dropping, we should have no UNDECIDED entries left. Normally, both DROP and BOUNDARY entries will be dropped, but we distinguish them in case we want to keep boundaries. */ -enum DecisionType { +enum DecisionType : char { UNDECIDED = 0, // no decision has been taken yet, used for initialization KEEP = 1, // keeep the entry DROP = 2, // drop it @@ -402,6 +402,37 @@ class SymmetrizeFunctor { } }; +template +KOKKOS_INLINE_FUNCTION void serialHeapSort(view_type& v, comparator_type comparator) { + auto N = v.extent(0); + size_t start = N / 2; + size_t end = N; + while (end > 1) { + if (start > 0) + start = start - 1; + else { + end = end - 1; + auto temp = v(0); + v(0) = v(end); + v(end) = temp; + } + size_t root = start; + while (2 * root + 1 < end) { + size_t child = 2 * root + 1; + if ((child + 1 < end) and (comparator(v(child), v(child + 1)))) + ++child; + + if (comparator(v(root), v(child))) { + auto temp = v(root); + v(root) = v(child); + v(child) = temp; + root = child; + } else + break; + } + } +} + } // namespace Misc } // namespace MueLu diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp index 1a5f2729c72e..ead5b407e2f8 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_MatrixConstruction.hpp @@ -360,6 +360,55 @@ class PointwiseFillNoReuseFunctor { } }; +template +class BlockRowComparison { + public: + using local_ordinal_type = typename local_matrix_type::ordinal_type; + using memory_space = typename local_matrix_type::memory_space; + using block_indices_view_type = Kokkos::View; + + local_matrix_type A; + local_ordinal_type bsize; + block_indices_view_type ghosted_point_to_block; + + public: + BlockRowComparison(local_matrix_type& A_, local_ordinal_type bsize_, block_indices_view_type ghosted_point_to_block_) + : A(A_) + , bsize(bsize_) + , ghosted_point_to_block(ghosted_point_to_block_) {} + + template + struct Comparator { + private: + using local_ordinal_type = typename local_matrix_type2::ordinal_type; + using memory_space = typename local_matrix_type2::memory_space; + using block_indices_view_type = Kokkos::View; + + const local_matrix_type2 A; + const local_ordinal_type offset; + const block_indices_view_type ghosted_point_to_block; + + public: + KOKKOS_INLINE_FUNCTION + Comparator(const local_matrix_type2& A_, local_ordinal_type bsize_, local_ordinal_type brlid_, block_indices_view_type ghosted_point_to_block_) + : A(A_) + , offset(A_.graph.row_map(bsize_ * brlid_)) + , ghosted_point_to_block(ghosted_point_to_block_) {} + + KOKKOS_INLINE_FUNCTION + bool operator()(size_t x, size_t y) const { + return ghosted_point_to_block(A.graph.entries(offset + x)) < ghosted_point_to_block(A.graph.entries(offset + y)); + } + }; + + using comparator_type = Comparator; + + KOKKOS_INLINE_FUNCTION + comparator_type getComparator(local_ordinal_type brlid) const { + return comparator_type(A, bsize, brlid, ghosted_point_to_block); + } +}; + /*! @class VectorCountingFunctor @brief Functor that executes a sequence of sub-functors on each block of rows. @@ -380,6 +429,7 @@ class VectorCountingFunctor { using memory_space = typename local_matrix_type::memory_space; using results_view = Kokkos::View; using block_indices_view_type = Kokkos::View; + using permutation_type = Kokkos::View; using rowptr_type = typename local_matrix_type::row_map_type::non_const_type; using ATS = Kokkos::ArithTraits; @@ -392,6 +442,10 @@ class VectorCountingFunctor { rowptr_type graph_rowptr; functor_type functor; + + BlockRowComparison comparison; + permutation_type permutation; + VectorCountingFunctor remainingFunctors; std::vector functorNames; @@ -405,7 +459,9 @@ class VectorCountingFunctor { , filtered_rowptr(filtered_rowptr_) , graph_rowptr(graph_rowptr_) , functor(functor_) + , comparison(BlockRowComparison(A, blockSize_, ghosted_point_to_block)) , remainingFunctors(A_, blockSize_, ghosted_point_to_block_, results_, filtered_rowptr_, graph_rowptr_, remainingFunctors_...) { + permutation = permutation_type("permutation", A.nnz()); #ifdef MUELU_COALESCE_DROP_DEBUG std::string mangledFunctorName = typeid(decltype(functor)).name(); int status = 0; @@ -495,40 +551,41 @@ class VectorCountingFunctor { Kokkos::printf("Done with block row %d\nGraph indices ", brlid); #endif - local_ordinal_type* nextIndices = new local_ordinal_type[blockSize]; - for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { - nextIndices[block_index] = 0; - } + // column lids for all rows in the block + auto block_clids = Kokkos::subview(A.graph.entries, Kokkos::make_pair(A.graph.row_map(blockSize * brlid), + A.graph.row_map(blockSize * (brlid + 1)))); + // set up a permutatation index + auto block_permutation = Kokkos::subview(permutation, Kokkos::make_pair(A.graph.row_map(blockSize * brlid), + A.graph.row_map(blockSize * (brlid + 1)))); + for (size_t i = 0; i < block_permutation.extent(0); ++i) + block_permutation(i) = i; + // get permuatation for sorted column indices of the entire block + auto comparator = comparison.getComparator(brlid); + Misc::serialHeapSort(block_permutation, comparator); + local_ordinal_type prev_bclid = -1; - while (true) { - local_ordinal_type min_block_index = -1; - local_ordinal_type min_clid = ATS::max(); - local_ordinal_type min_offset = -1; - for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { - auto rlid = blockSize * brlid + block_index; - auto offset = A.graph.row_map(rlid) + nextIndices[block_index]; - if (offset == A.graph.row_map(rlid + 1)) - continue; - auto clid = A.graph.entries(offset); - if (clid < min_clid) { - min_block_index = block_index; - min_clid = clid; - min_offset = offset; - } - } - if (min_block_index == -1) - break; - ++nextIndices[min_block_index]; - auto bclid = ghosted_point_to_block(min_clid); - if (prev_bclid < bclid) { - if (results(min_offset) == KEEP) { - ++(*nnz_graph); + bool alreadyAdded = false; + + // loop over all sorted entries in block + auto offset = A.graph.row_map(blockSize * brlid); + for (size_t i = 0; i < block_permutation.extent(0); ++i) { + auto idx = offset + block_permutation(i); + auto clid = A.graph.entries(idx); + auto bclid = ghosted_point_to_block(clid); + + // unseen block column index + if (bclid > prev_bclid) + alreadyAdded = false; + + // add entry to graph + if (!alreadyAdded && (results(idx) == KEEP)) { + ++(*nnz_graph); + alreadyAdded = true; #ifdef MUELU_COALESCE_DROP_DEBUG - Kokkos::printf("%5d ", bclid); + Kokkos::printf("%5d ", bclid); #endif - prev_bclid = bclid; - } } + prev_bclid = bclid; } #ifdef MUELU_COALESCE_DROP_DEBUG Kokkos::printf("\n"); @@ -547,6 +604,7 @@ class VectorCountingFunctor { using memory_space = typename local_matrix_type::memory_space; using results_view = Kokkos::View; using block_indices_view_type = Kokkos::View; + using permutation_type = Kokkos::View; using rowptr_type = typename local_matrix_type::row_map_type::non_const_type; using ATS = Kokkos::ArithTraits; @@ -563,6 +621,9 @@ class VectorCountingFunctor { std::vector functorNames; + BlockRowComparison comparison; + permutation_type permutation; + public: VectorCountingFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, rowptr_type& filtered_rowptr_, rowptr_type& graph_rowptr_, functor_type& functor_) : A(A_) @@ -571,7 +632,9 @@ class VectorCountingFunctor { , results(results_) , filtered_rowptr(filtered_rowptr_) , graph_rowptr(graph_rowptr_) - , functor(functor_) { + , functor(functor_) + , comparison(BlockRowComparison(A, blockSize_, ghosted_point_to_block)) { + permutation = permutation_type("permutation", A.nnz()); #ifdef MUELU_COALESCE_DROP_DEBUG std::string mangledFunctorName = typeid(decltype(functor)).name(); int status = 0; @@ -659,40 +722,41 @@ class VectorCountingFunctor { Kokkos::printf("Done with block row %d\nGraph indices ", brlid); #endif - local_ordinal_type* nextIndices = new local_ordinal_type[blockSize]; - for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { - nextIndices[block_index] = 0; - } + // column lids for all rows in the block + auto block_clids = Kokkos::subview(A.graph.entries, Kokkos::make_pair(A.graph.row_map(blockSize * brlid), + A.graph.row_map(blockSize * (brlid + 1)))); + // set up a permutation index + auto block_permutation = Kokkos::subview(permutation, Kokkos::make_pair(A.graph.row_map(blockSize * brlid), + A.graph.row_map(blockSize * (brlid + 1)))); + for (size_t i = 0; i < block_permutation.extent(0); ++i) + block_permutation(i) = i; + // get permutation for sorted column indices of the entire block + auto comparator = comparison.getComparator(brlid); + Misc::serialHeapSort(block_permutation, comparator); + local_ordinal_type prev_bclid = -1; - while (true) { - local_ordinal_type min_block_index = -1; - local_ordinal_type min_clid = ATS::max(); - local_ordinal_type min_offset = -1; - for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { - auto rlid = blockSize * brlid + block_index; - auto offset = A.graph.row_map(rlid) + nextIndices[block_index]; - if (offset == A.graph.row_map(rlid + 1)) - continue; - auto clid = A.graph.entries(offset); - if (clid < min_clid) { - min_block_index = block_index; - min_clid = clid; - min_offset = offset; - } - } - if (min_block_index == -1) - break; - ++nextIndices[min_block_index]; - auto bclid = ghosted_point_to_block(min_clid); - if (prev_bclid < bclid) { - if (results(min_offset) == KEEP) { - ++(*nnz_graph); + bool alreadyAdded = false; + + // loop over all sorted entries in block + auto offset = A.graph.row_map(blockSize * brlid); + for (size_t i = 0; i < block_permutation.extent(0); ++i) { + auto idx = offset + block_permutation(i); + auto clid = A.graph.entries(idx); + auto bclid = ghosted_point_to_block(clid); + + // unseen block column index + if (bclid > prev_bclid) + alreadyAdded = false; + + // add entry to graph + if (!alreadyAdded && (results(idx) == KEEP)) { + ++(*nnz_graph); + alreadyAdded = true; #ifdef MUELU_COALESCE_DROP_DEBUG - Kokkos::printf("%5d ", bclid); + Kokkos::printf("%5d ", bclid); #endif - prev_bclid = bclid; - } } + prev_bclid = bclid; } #ifdef MUELU_COALESCE_DROP_DEBUG Kokkos::printf("\n"); @@ -720,6 +784,7 @@ class VectorFillFunctor { using ATS = Kokkos::ArithTraits; using OTS = Kokkos::ArithTraits; using block_indices_view_type = Kokkos::View; + using permutation_type = Kokkos::View; local_matrix_type A; local_ordinal_type blockSize; @@ -729,6 +794,9 @@ class VectorFillFunctor { local_graph_type graph; const scalar_type zero = ATS::zero(); + BlockRowComparison comparison; + permutation_type permutation; + public: VectorFillFunctor(local_matrix_type& A_, local_ordinal_type blockSize_, block_indices_view_type ghosted_point_to_block_, results_view& results_, local_matrix_type& filteredA_, local_graph_type& graph_) : A(A_) @@ -736,7 +804,10 @@ class VectorFillFunctor { , ghosted_point_to_block(ghosted_point_to_block_) , results(results_) , filteredA(filteredA_) - , graph(graph_) {} + , graph(graph_) + , comparison(BlockRowComparison(A, blockSize_, ghosted_point_to_block)) { + permutation = permutation_type("permutation", A.nnz()); + } KOKKOS_INLINE_FUNCTION void operator()(const local_ordinal_type brlid) const { @@ -776,40 +847,40 @@ class VectorFillFunctor { } } - local_ordinal_type* nextIndices = new local_ordinal_type[blockSize]; - for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { - nextIndices[block_index] = 0; - } - local_ordinal_type prev_bclid = -1; + // column lids for all rows in the block + auto block_clids = Kokkos::subview(A.graph.entries, Kokkos::make_pair(A.graph.row_map(blockSize * brlid), + A.graph.row_map(blockSize * (brlid + 1)))); + // set up a permuatation index + auto block_permutation = Kokkos::subview(permutation, Kokkos::make_pair(A.graph.row_map(blockSize * brlid), + A.graph.row_map(blockSize * (brlid + 1)))); + for (size_t i = 0; i < block_permutation.extent(0); ++i) + block_permutation(i) = i; + // get permutation for sorted column indices of the entire block + auto comparator = comparison.getComparator(brlid); + Misc::serialHeapSort(block_permutation, comparator); - local_ordinal_type j = graph.row_map(brlid); - while (true) { - local_ordinal_type min_block_index = -1; - local_ordinal_type min_clid = OTS::max(); - local_ordinal_type min_offset = -1; - for (local_ordinal_type block_index = 0; block_index < blockSize; ++block_index) { - auto rlid = blockSize * brlid + block_index; - auto offset = A.graph.row_map(rlid) + nextIndices[block_index]; - if (offset == A.graph.row_map(rlid + 1)) - continue; - auto clid = A.graph.entries(offset); - if (clid < min_clid) { - min_block_index = block_index; - min_clid = clid; - min_offset = offset; - } - } - if (min_block_index == -1) - break; - ++nextIndices[min_block_index]; - auto bclid = ghosted_point_to_block(min_clid); - if (prev_bclid < bclid) { - if (results(min_offset) == KEEP) { - graph.entries(j) = bclid; - ++j; - prev_bclid = bclid; - } + local_ordinal_type prev_bclid = -1; + bool alreadyAdded = false; + local_ordinal_type j = graph.row_map(brlid); + + // loop over all sorted entries in block + auto offset = A.graph.row_map(blockSize * brlid); + for (size_t i = 0; i < block_permutation.extent(0); ++i) { + auto idx = offset + block_permutation(i); + auto clid = A.graph.entries(idx); + auto bclid = ghosted_point_to_block(clid); + + // unseen block column index + if (bclid > prev_bclid) + alreadyAdded = false; + + // add entry to graph + if (!alreadyAdded && (results(idx) == KEEP)) { + graph.entries(j) = bclid; + ++j; + alreadyAdded = true; } + prev_bclid = bclid; } } }; diff --git a/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp b/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp index a5bf3f68b544..97a7123ada8e 100644 --- a/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp +++ b/packages/muelu/src/Headers/MueLu_UseShortNamesScalar.hpp @@ -455,12 +455,12 @@ using MultiPhys [[maybe_unused]] = MueLu::MultiPhys; #endif -#ifdef MUELU_TWOLEVELMATLABFACTORY_SHORT -typedef MueLu::TwoLevelMatlabFactory TwoLevelMatlabFactory; -#endif #ifdef MUELU_SINGLELEVELMATLABFACTORY_SHORT -typedef MueLu::SingleLevelMatlabFactory SingleLevelMatlabFactory; +using SingleLevelMatlabFactory [[maybe_unused]] = MueLu::SingleLevelMatlabFactory; +#endif +#ifdef MUELU_TWOLEVELMATLABFACTORY_SHORT +using TwoLevelMatlabFactory [[maybe_unused]] = MueLu::TwoLevelMatlabFactory; #endif #ifdef MUELU_MATLABSMOOTHER_SHORT -typedef MueLu::MatlabSmoother MatlabSmoother; +using MatlabSmoother [[maybe_unused]] = MueLu::MatlabSmoother; #endif diff --git a/packages/muelu/src/Headers/gen_UseShortNames.sh b/packages/muelu/src/Headers/gen_UseShortNames.sh index 7bbe12dc5e1e..58158a4dd86f 100755 --- a/packages/muelu/src/Headers/gen_UseShortNames.sh +++ b/packages/muelu/src/Headers/gen_UseShortNames.sh @@ -67,14 +67,3 @@ echo "#endif" >> MueLu_UseShortNamesOrdinal.hpp echo "#ifdef MUELU_IFPACKSMOOTHER_SHORT" >> MueLu_UseShortNamesOrdinal.hpp echo "typedef MueLu::IfpackSmoother IfpackSmoother;" >> MueLu_UseShortNamesOrdinal.hpp echo "#endif" >> MueLu_UseShortNamesOrdinal.hpp - -# Add the matlab utilities to end of file -echo "#ifdef MUELU_TWOLEVELMATLABFACTORY_SHORT" >> MueLu_UseShortNamesScalar.hpp -echo "typedef MueLu::TwoLevelMatlabFactory TwoLevelMatlabFactory;" >> MueLu_UseShortNamesScalar.hpp -echo "#endif" >> MueLu_UseShortNamesScalar.hpp -echo "#ifdef MUELU_SINGLELEVELMATLABFACTORY_SHORT" >> MueLu_UseShortNamesScalar.hpp -echo "typedef MueLu::SingleLevelMatlabFactory SingleLevelMatlabFactory;" >> MueLu_UseShortNamesScalar.hpp -echo "#endif" >> MueLu_UseShortNamesScalar.hpp -echo "#ifdef MUELU_MATLABSMOOTHER_SHORT" >> MueLu_UseShortNamesScalar.hpp -echo "typedef MueLu::MatlabSmoother MatlabSmoother;" >> MueLu_UseShortNamesScalar.hpp -echo "#endif" >> MueLu_UseShortNamesScalar.hpp diff --git a/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp b/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp index 5176bc2fb677..bde0c5e4eaee 100644 --- a/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp +++ b/packages/muelu/src/Interface/MueLu_FactoryFactory_decl.hpp @@ -134,6 +134,12 @@ #include "MueLu_MatrixFreeTentativePFactory_fwd.hpp" #include "MueLu_RegionRFactory_kokkos_fwd.hpp" +#ifdef HAVE_MUELU_MATLAB +#include "MueLu_SingleLevelMatlabFactory_fwd.hpp" +#include "MueLu_TwoLevelMatlabFactory_fwd.hpp" +#include "MueLu_MatlabSmoother_fwd.hpp" +#endif + #ifdef HAVE_MUELU_INTREPID2 #include "MueLu_IntrepidPCoarsenFactory_fwd.hpp" #endif diff --git a/packages/muelu/src/Interface/MueLu_FactoryFactory_def.hpp b/packages/muelu/src/Interface/MueLu_FactoryFactory_def.hpp index 0694928ceceb..1b959f055f8a 100644 --- a/packages/muelu/src/Interface/MueLu_FactoryFactory_def.hpp +++ b/packages/muelu/src/Interface/MueLu_FactoryFactory_def.hpp @@ -114,13 +114,9 @@ #include "MueLu_RegionRFactory_kokkos.hpp" #ifdef HAVE_MUELU_MATLAB -// This is distasteful, but (sadly) neccesary due to peculiarities in MueLu's build system. -#include "../matlab/src/MueLu_SingleLevelMatlabFactory_decl.hpp" -#include "../matlab/src/MueLu_SingleLevelMatlabFactory_def.hpp" -#include "../matlab/src/MueLu_TwoLevelMatlabFactory_decl.hpp" -#include "../matlab/src/MueLu_TwoLevelMatlabFactory_def.hpp" -#include "../matlab/src/MueLu_MatlabSmoother_decl.hpp" -#include "../matlab/src/MueLu_MatlabSmoother_def.hpp" +#include "MueLu_SingleLevelMatlabFactory.hpp" +#include "MueLu_TwoLevelMatlabFactory.hpp" +#include "MueLu_MatlabSmoother.hpp" #endif #ifdef HAVE_MUELU_INTREPID2 @@ -293,8 +289,8 @@ RCP FactoryFactory // Matlab factories #ifdef HAVE_MUELU_MATLAB - if (factoryName == "TwoLevelMatlabFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); if (factoryName == "SingleLevelMatlabFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); + if (factoryName == "TwoLevelMatlabFactory") return Build2(paramList, factoryMapIn, factoryManagersIn); if (factoryName == "MatlabSmoother") return BuildMatlabSmoother(paramList, factoryMapIn, factoryManagersIn); #endif @@ -592,9 +588,9 @@ RCP FactoryFactory::Buil } #ifdef HAVE_MUELU_MATLAB -FactoryFactory:: - RCP - FactoryFactory::BuildMatlabSmoother(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { +template +RCP +FactoryFactory::BuildMatlabSmoother(const Teuchos::ParameterList& paramList, const FactoryMap& factoryMapIn, const FactoryManagerMap& factoryManagersIn) const { if (paramList.begin() == paramList.end()) return rcp(new SmootherFactory(rcp(new MatlabSmoother()))); diff --git a/packages/muelu/src/MueCentral/MueLu_MasterList.cpp b/packages/muelu/src/MueCentral/MueLu_MasterList.cpp index 658ab2275839..8fc641feeb2e 100644 --- a/packages/muelu/src/MueCentral/MueLu_MasterList.cpp +++ b/packages/muelu/src/MueCentral/MueLu_MasterList.cpp @@ -161,7 +161,7 @@ namespace MueLu { "" "" "" - "" + "" "" "" "" diff --git a/packages/muelu/src/Smoothers/MueLu_Amesos2Smoother_def.hpp b/packages/muelu/src/Smoothers/MueLu_Amesos2Smoother_def.hpp index a78cc24697e5..d6f0ad05c03d 100644 --- a/packages/muelu/src/Smoothers/MueLu_Amesos2Smoother_def.hpp +++ b/packages/muelu/src/Smoothers/MueLu_Amesos2Smoother_def.hpp @@ -110,10 +110,10 @@ Amesos2Smoother::Amesos2Smoother(cons // TODO: It would be great is Amesos2 provides directly this kind of logic for us if (type_ == "" || Amesos2::query(type_) == false) { std::string oldtype = type_; -#if defined(HAVE_AMESOS2_SUPERLU) - type_ = "Superlu"; -#elif defined(HAVE_AMESOS2_KLU2) +#if defined(HAVE_AMESOS2_KLU2) type_ = "Klu"; +#elif defined(HAVE_AMESOS2_SUPERLU) + type_ = "Superlu"; #elif defined(HAVE_AMESOS2_SUPERLUDIST) type_ = "Superludist"; #elif defined(HAVE_AMESOS2_BASKER) diff --git a/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList b/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList index c606287c8edf..495bda4df6a1 100644 --- a/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList +++ b/packages/muelu/src/Utils/ClassList/SC-LO-GO-NO.classList @@ -146,3 +146,6 @@ RefMaxwell Maxwell1 MultiPhys Maxwell_Utils +SingleLevelMatlabFactory - #if defined(HAVE_MUELU_MATLAB) +TwoLevelMatlabFactory - #if defined(HAVE_MUELU_MATLAB) +MatlabSmoother - #if defined(HAVE_MUELU_MATLAB) \ No newline at end of file diff --git a/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake b/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake index 20dc4095f4f0..f38b99ced3c6 100644 --- a/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake +++ b/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake @@ -48,7 +48,6 @@ APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::GeometricInterpolationPFactory ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::GeometricInterpolationPFactory_kokkos ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::GMRESSolver ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::Hierarchy ) -APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::HierarchyManager ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::HierarchyUtils ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::InterfaceAggregationFactory ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::Ifpack2Smoother-.?if.defined[HAVE_MUELU_IFPACK2] ) @@ -141,3 +140,6 @@ APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::RefMaxwell ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::Maxwell1 ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::MultiPhys ) APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::Maxwell_Utils ) +APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::SingleLevelMatlabFactory-.?if.defined[HAVE_MUELU_MATLAB] ) +APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::TwoLevelMatlabFactory-.?if.defined[HAVE_MUELU_MATLAB] ) +APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::MatlabSmoother-.?if.defined[HAVE_MUELU_MATLAB] ) diff --git a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MatlabSmootherFactory_fwd.hpp b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MatlabSmootherFactory_fwd.hpp new file mode 100644 index 000000000000..2589b90bfe5c --- /dev/null +++ b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MatlabSmootherFactory_fwd.hpp @@ -0,0 +1,27 @@ +// @HEADER +// ***************************************************************************** +// MueLu: A package for multigrid based preconditioning +// +// Copyright 2012 NTESS and the MueLu contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +#ifndef MUELU_MATLABSMOOTHERFACTORY_FWD_HPP +#define MUELU_MATLABSMOOTHERFACTORY_FWD_HPP + +#include "MueLu_ConfigDefs.hpp" +#if defined(HAVE_MUELU_MATLAB) + +namespace MueLu { +template +class MatlabSmootherFactory; +} + +#ifndef MUELU_MATLABSMOOTHERFACTORY_SHORT +#define MUELU_MATLABSMOOTHERFACTORY_SHORT +#endif + +#endif + +#endif // MUELU_MATLABSMOOTHERFACTORY_FWD_HPP diff --git a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MatlabSmoother_fwd.hpp b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MatlabSmoother_fwd.hpp index 47720187baa0..4ecd2a792fa1 100644 --- a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MatlabSmoother_fwd.hpp +++ b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_MatlabSmoother_fwd.hpp @@ -10,6 +10,9 @@ #ifndef MUELU_MATLABSMOOTHER_FWD_HPP #define MUELU_MATLABSMOOTHER_FWD_HPP +#include "MueLu_ConfigDefs.hpp" +#if defined(HAVE_MUELU_MATLAB) + namespace MueLu { template class MatlabSmoother; @@ -19,4 +22,6 @@ class MatlabSmoother; #define MUELU_MATLABSMOOTHER_SHORT #endif +#endif + #endif // MUELU_MATLABSMOOTHER_FWD_HPP diff --git a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_SingleLevelMatlabFactory_fwd.hpp b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_SingleLevelMatlabFactory_fwd.hpp index ef3c9dba35dc..b8a5644661dd 100644 --- a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_SingleLevelMatlabFactory_fwd.hpp +++ b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_SingleLevelMatlabFactory_fwd.hpp @@ -10,6 +10,9 @@ #ifndef MUELU_SINGLELEVELMATLABFACTORY_FWD_HPP #define MUELU_SINGLELEVELMATLABFACTORY_FWD_HPP +#include "MueLu_ConfigDefs.hpp" +#if defined(HAVE_MUELU_MATLAB) + namespace MueLu { template class SingleLevelMatlabFactory; @@ -19,4 +22,6 @@ class SingleLevelMatlabFactory; #define MUELU_SINGLELEVELMATLABFACTORY_SHORT #endif +#endif + #endif // MUELU_SINGLELEVELMATLABFACTORY_FWD_HPP diff --git a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_TwoLevelMatlabFactory_fwd.hpp b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_TwoLevelMatlabFactory_fwd.hpp index bd0a64152247..035ff6bfe317 100644 --- a/packages/muelu/src/Utils/ForwardDeclaration/MueLu_TwoLevelMatlabFactory_fwd.hpp +++ b/packages/muelu/src/Utils/ForwardDeclaration/MueLu_TwoLevelMatlabFactory_fwd.hpp @@ -10,6 +10,9 @@ #ifndef MUELU_TWOLEVELMATLABFACTORY_FWD_HPP #define MUELU_TWOLEVELMATLABFACTORY_FWD_HPP +#include "MueLu_ConfigDefs.hpp" +#if defined(HAVE_MUELU_MATLAB) + namespace MueLu { template class TwoLevelMatlabFactory; @@ -19,4 +22,6 @@ class TwoLevelMatlabFactory; #define MUELU_TWOLEVELMATLABFACTORY_SHORT #endif +#endif + #endif // MUELU_TWOLEVELMATLABFACTORY_FWD_HPP diff --git a/packages/panzer/adapters-stk/test/evaluator_tests/CMakeLists.txt b/packages/panzer/adapters-stk/test/evaluator_tests/CMakeLists.txt index 079cdad0848b..c1cd8c62f902 100644 --- a/packages/panzer/adapters-stk/test/evaluator_tests/CMakeLists.txt +++ b/packages/panzer/adapters-stk/test/evaluator_tests/CMakeLists.txt @@ -71,6 +71,13 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( NUM_MPI_PROCS 2 ) +TRIBITS_ADD_EXECUTABLE_AND_TEST( + tScatterDirichletResidual_BlockedTpetra + SOURCES tpetra_blocked_scatter_dirichlet_residual.cpp ${UNIT_TEST_DRIVER} + COMM serial mpi + NUM_MPI_PROCS 2 + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( tBasisTimesVector SOURCES basis_times_vector.cpp ${UNIT_TEST_DRIVER} diff --git a/packages/panzer/adapters-stk/test/evaluator_tests/tpetra_blocked_scatter_dirichlet_residual.cpp b/packages/panzer/adapters-stk/test/evaluator_tests/tpetra_blocked_scatter_dirichlet_residual.cpp new file mode 100644 index 000000000000..37ffb58f2698 --- /dev/null +++ b/packages/panzer/adapters-stk/test/evaluator_tests/tpetra_blocked_scatter_dirichlet_residual.cpp @@ -0,0 +1,1145 @@ +// @HEADER +// ***************************************************************************** +// Panzer: A partial differential equation assembly +// engine for strongly coupled complex multiphysics systems +// +// Copyright 2011 NTESS and the Panzer contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +#include +#include +#include +#include + +using Teuchos::RCP; +using Teuchos::rcp; + +#include "Teuchos_DefaultComm.hpp" +#include "Teuchos_GlobalMPISession.hpp" + +#include "Panzer_FieldManagerBuilder.hpp" +#include "Panzer_BlockedDOFManager.hpp" +#include "Panzer_BlockedTpetraLinearObjFactory.hpp" +#include "Panzer_PureBasis.hpp" +#include "Panzer_BasisIRLayout.hpp" +#include "Panzer_Workset.hpp" +#include "Panzer_GatherOrientation.hpp" +#include "Panzer_ScatterDirichletResidual_BlockedTpetra.hpp" +#include "Panzer_GatherSolution_BlockedTpetra.hpp" +#include "Panzer_LOCPair_GlobalEvaluationData.hpp" +#include "Panzer_GlobalEvaluationDataContainer.hpp" +#include "Panzer_ParameterList_GlobalEvaluationData.hpp" + +#include "Panzer_STK_Version.hpp" +#include "PanzerAdaptersSTK_config.hpp" +#include "Panzer_STK_Interface.hpp" +#include "Panzer_STK_SquareQuadMeshFactory.hpp" +#include "Panzer_STK_SetupUtilities.hpp" +#include "Panzer_STKConnManager.hpp" + +#include "Teuchos_DefaultMpiComm.hpp" +#include "Teuchos_OpaqueWrapper.hpp" + +#include "Thyra_VectorStdOps.hpp" +#include "Thyra_ProductVectorBase.hpp" +#include "Thyra_SpmdVectorBase.hpp" + +#include "Tpetra_CrsMatrix.hpp" +#include "Tpetra_Map.hpp" + +#include "user_app_EquationSetFactory.hpp" + +#include // for get char +#include +#include + +#include "Panzer_Evaluator_WithBaseImpl.hpp" + +namespace panzer +{ + typedef Teuchos::ArrayRCP::size_type size_type; + + using TpetraBlockedLinObjFactoryType = panzer::BlockedTpetraLinearObjFactory; + using TpetraBlockedLinObjContainerType = panzer::BlockedTpetraLinearObjContainer; + using Tpetra_CrsMatrix = Tpetra::CrsMatrix; + using Thyra_TpetraLinearOp = Thyra::TpetraLinearOp; + + Teuchos::RCP buildBasis(std::size_t worksetSize, const std::string &basisName); + void testInitialization(const Teuchos::RCP &ipb); + Teuchos::RCP buildMesh(int elemX, int elemY); + + TEUCHOS_UNIT_TEST(block_assembly, scatter_dirichlet_residual) + { + +#ifdef HAVE_MPI + Teuchos::RCP> tComm = Teuchos::rcp(new Teuchos::MpiComm(MPI_COMM_WORLD)); +#else + NOPE_PANZER_DOESNT_SUPPORT_SERIAL +#endif + + int myRank = tComm->getRank(); + + const std::size_t workset_size = 4; + const std::string fieldName1_q1 = "U"; + const std::string fieldName2_q1 = "V"; + const std::string fieldName_qedge1 = "B"; + + Teuchos::RCP mesh = buildMesh(2, 2); + + // build input physics block + Teuchos::RCP basis_q1 = buildBasis(workset_size, "Q1"); + Teuchos::RCP basis_qedge1 = buildBasis(workset_size, "QEdge1"); + + Teuchos::RCP ipb = Teuchos::parameterList(); + testInitialization(ipb); + + const int default_int_order = 1; + std::string eBlockID = "eblock-0_0"; + Teuchos::RCP eqset_factory = Teuchos::rcp(new user_app::MyFactory); + panzer::CellData cellData(workset_size, mesh->getCellTopology("eblock-0_0")); + Teuchos::RCP gd = panzer::createGlobalData(); + Teuchos::RCP physicsBlock = + Teuchos::rcp(new PhysicsBlock(ipb, eBlockID, default_int_order, cellData, eqset_factory, gd, false)); + + Teuchos::RCP> work_sets = panzer_stk::buildWorksets(*mesh, physicsBlock->elementBlockID(), + physicsBlock->getWorksetNeeds()); + TEST_EQUALITY(work_sets->size(), 1); + + // build connection manager and field manager + const Teuchos::RCP conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); + RCP dofManager = Teuchos::rcp(new panzer::BlockedDOFManager(conn_manager, MPI_COMM_WORLD)); + + dofManager->addField(fieldName1_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName2_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName_qedge1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_qedge1->getIntrepid2Basis()))); + + std::vector> fieldOrder(3); + fieldOrder[0].push_back(fieldName1_q1); + fieldOrder[1].push_back(fieldName_qedge1); + fieldOrder[2].push_back(fieldName2_q1); + dofManager->setFieldOrder(fieldOrder); + + // dofManager->setOrientationsRequired(true); + dofManager->buildGlobalUnknowns(); + + // setup linear object factory + ///////////////////////////////////////////////////////////// + + Teuchos::RCP bt_lof = Teuchos::rcp(new TpetraBlockedLinObjFactoryType(tComm.getConst(), dofManager)); + Teuchos::RCP> lof = bt_lof; + Teuchos::RCP dd_loc = bt_lof->buildGhostedLinearObjContainer(); + Teuchos::RCP loc = bt_lof->buildGhostedLinearObjContainer(); + bt_lof->initializeGhostedContainer(LinearObjContainer::F, *dd_loc); + dd_loc->initialize(); + + bt_lof->initializeGhostedContainer(LinearObjContainer::X | LinearObjContainer::F, *loc); + loc->initialize(); + + Teuchos::RCP b_dd_loc = Teuchos::rcp_dynamic_cast(dd_loc); + Teuchos::RCP b_loc = Teuchos::rcp_dynamic_cast(loc); + Teuchos::RCP> p_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_x()); + Thyra::assign(p_vec->getNonconstVectorBlock(0).ptr(), 123.0 + myRank); + Thyra::assign(p_vec->getNonconstVectorBlock(1).ptr(), 456.0 + myRank); + Thyra::assign(p_vec->getNonconstVectorBlock(2).ptr(), 789.0 + myRank); + + // setup field manager, add evaluator under test + ///////////////////////////////////////////////////////////// + + PHX::FieldManager fm; + + std::string resName = ""; + Teuchos::RCP> names_map = + Teuchos::rcp(new std::map); + names_map->insert(std::make_pair(fieldName1_q1, resName + fieldName1_q1)); + names_map->insert(std::make_pair(fieldName2_q1, resName + fieldName2_q1)); + names_map->insert(std::make_pair(fieldName_qedge1, resName + fieldName_qedge1)); + + // evaluators under test + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName1_q1); + names->push_back(resName + fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQ1"); + pl.set("Basis", basis_q1); + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm.registerEvaluator(evaluator); + fm.requireField(*evaluator->evaluatedFields()[0]); + } + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQEdge1"); + pl.set("Basis", basis_qedge1); + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm.registerEvaluator(evaluator); + fm.requireField(*evaluator->evaluatedFields()[0]); + } + + // support evaluators + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName1_q1); + names->push_back(fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_q1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm.registerEvaluator(evaluator); + } + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_qedge1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm.registerEvaluator(evaluator); + } + + std::vector derivative_dimensions; + derivative_dimensions.push_back(12); + fm.setKokkosExtendedDataTypeDimensions(derivative_dimensions); + + panzer::Traits::SD sd; + sd.worksets_ = work_sets; + + fm.postRegistrationSetup(sd); + + // panzer::Traits::PED ped; + // ped.dirichletData.ghostedCounter = dd_loc; + // fm.preEvaluate(ped); + panzer::Traits::PED ped; + ped.gedc->addDataObject("Dirichlet Counter", dd_loc); + ped.gedc->addDataObject("Solution Gather Container", loc); + ped.gedc->addDataObject("Residual Scatter Container", loc); + fm.preEvaluate(ped); + + // run tests + ///////////////////////////////////////////////////////////// + + panzer::Workset &workset = (*work_sets)[0]; + workset.alpha = 0.0; + workset.beta = 2.0; // derivatives multiplied by 2 + workset.time = 0.0; + workset.evaluate_transient_terms = false; + + fm.evaluateFields(workset); + + // test Residual fields + panzer::index_t dd_count(0); + Teuchos::ArrayRCP data, dd_data; + Teuchos::RCP> f_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_f()); + Teuchos::RCP> dd_vec = Teuchos::rcp_dynamic_cast>(b_dd_loc->get_f()); + + // check all the residual values. This is kind of crappy test since it simply checks twice the target + // value and the target. Its this way because you add two entries across elements. + + Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(data)); + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(0)->getLocalNumElements()); + TEST_EQUALITY(data.size(), dd_data.size()); + dd_count = 0; + for (int i = 0; i < data.size(); i++) + { + + double target = 123.0 + myRank; + if (dd_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dd_count++; + } + } + TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared + + Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(data)); + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(1)->getLocalNumElements()); + TEST_EQUALITY(data.size(), dd_data.size()); + dd_count = 0; + for (int i = 0; i < data.size(); i++) + { + + double target = 456.0 + myRank; + if (dd_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dd_count++; + } + } + TEST_EQUALITY(dd_count, workset.num_cells); // there are 2 nodes on the side and the sides are not shared + + Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(data)); + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(2)->getLocalNumElements()); + TEST_EQUALITY(data.size(), dd_data.size()); + dd_count = 0; + for (int i = 0; i < data.size(); i++) + { + + double target = 789.0 + myRank; + if (dd_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dd_count++; + } + } + TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared + } + + TEUCHOS_UNIT_TEST(block_assembly, scatter_dirichlet_jacobian) + { + +#ifdef HAVE_MPI + Teuchos::RCP> tComm = Teuchos::rcp(new Teuchos::MpiComm(MPI_COMM_WORLD)); +#else + NOPE_PANZER_DOESNT_SUPPORT_SERIAL +#endif + + int myRank = tComm->getRank(); + + const std::size_t workset_size = 4; + const std::string fieldName1_q1 = "U"; + const std::string fieldName2_q1 = "V"; + const std::string fieldName_qedge1 = "B"; + + Teuchos::RCP mesh = buildMesh(2, 2); + + // build input physics block + Teuchos::RCP basis_q1 = buildBasis(workset_size, "Q1"); + Teuchos::RCP basis_qedge1 = buildBasis(workset_size, "QEdge1"); + + Teuchos::RCP ipb = Teuchos::parameterList(); + testInitialization(ipb); + + const int default_int_order = 1; + std::string eBlockID = "eblock-0_0"; + Teuchos::RCP eqset_factory = Teuchos::rcp(new user_app::MyFactory); + panzer::CellData cellData(workset_size, mesh->getCellTopology("eblock-0_0")); + Teuchos::RCP gd = panzer::createGlobalData(); + Teuchos::RCP physicsBlock = + Teuchos::rcp(new PhysicsBlock(ipb, eBlockID, default_int_order, cellData, eqset_factory, gd, false)); + + Teuchos::RCP> work_sets = panzer_stk::buildWorksets(*mesh, physicsBlock->elementBlockID(), + physicsBlock->getWorksetNeeds()); + TEST_EQUALITY(work_sets->size(), 1); + + // build connection manager and field manager + const Teuchos::RCP conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); + RCP dofManager = Teuchos::rcp(new panzer::BlockedDOFManager(conn_manager, MPI_COMM_WORLD)); + + dofManager->addField(fieldName1_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName2_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName_qedge1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_qedge1->getIntrepid2Basis()))); + + std::vector> fieldOrder(3); + fieldOrder[0].push_back(fieldName1_q1); + fieldOrder[1].push_back(fieldName_qedge1); + fieldOrder[2].push_back(fieldName2_q1); + dofManager->setFieldOrder(fieldOrder); + + // dofManager->setOrientationsRequired(true); + dofManager->buildGlobalUnknowns(); + + // setup linear object factory + ///////////////////////////////////////////////////////////// + + Teuchos::RCP bt_lof = Teuchos::rcp(new TpetraBlockedLinObjFactoryType(tComm.getConst(), dofManager)); + Teuchos::RCP> lof = bt_lof; + Teuchos::RCP dd_loc = bt_lof->buildGhostedLinearObjContainer(); + Teuchos::RCP loc = bt_lof->buildGhostedLinearObjContainer(); + bt_lof->initializeGhostedContainer(LinearObjContainer::F, *dd_loc); + dd_loc->initialize(); + + bt_lof->initializeGhostedContainer(LinearObjContainer::X | LinearObjContainer::F | LinearObjContainer::Mat, *loc); + loc->initialize(); + + Teuchos::RCP b_dd_loc = Teuchos::rcp_dynamic_cast(dd_loc); + Teuchos::RCP b_loc = Teuchos::rcp_dynamic_cast(loc); + Teuchos::RCP> p_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_x()); + Thyra::assign(p_vec->getNonconstVectorBlock(0).ptr(), 123.0 + myRank); + Thyra::assign(p_vec->getNonconstVectorBlock(1).ptr(), 456.0 + myRank); + Thyra::assign(p_vec->getNonconstVectorBlock(2).ptr(), 789.0 + myRank); + + auto blk_A = Teuchos::rcp_dynamic_cast>(b_loc->get_A()); + double values[] = {123.0 + myRank, 456.0 + myRank, 789.0 + myRank}; + + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + { + auto thyraOp = Teuchos::rcp_dynamic_cast(blk_A->getNonconstBlock(i, j), false); + auto tpetraCrsMatrix = Teuchos::rcp_dynamic_cast(thyraOp->getTpetraOperator(), true); + tpetraCrsMatrix->setAllToScalar(values[i] * values[j]); + } + + // setup field manager, add evaluator under test + ///////////////////////////////////////////////////////////// + + PHX::FieldManager fm; + + std::string resName = ""; + Teuchos::RCP> names_map = + Teuchos::rcp(new std::map); + names_map->insert(std::make_pair(fieldName1_q1, resName + fieldName1_q1)); + names_map->insert(std::make_pair(fieldName2_q1, resName + fieldName2_q1)); + names_map->insert(std::make_pair(fieldName_qedge1, resName + fieldName_qedge1)); + + // evaluators under test + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName1_q1); + names->push_back(resName + fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQ1"); + pl.set("Basis", basis_q1); + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm.registerEvaluator(evaluator); + fm.requireField(*evaluator->evaluatedFields()[0]); + } + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQEdge1"); + pl.set("Basis", basis_qedge1); + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm.registerEvaluator(evaluator); + fm.requireField(*evaluator->evaluatedFields()[0]); + } + + // support evaluators + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName1_q1); + names->push_back(fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_q1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm.registerEvaluator(evaluator); + } + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_qedge1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm.registerEvaluator(evaluator); + } + + std::vector derivative_dimensions; + derivative_dimensions.push_back(12); + fm.setKokkosExtendedDataTypeDimensions(derivative_dimensions); + + panzer::Traits::SD sd; + sd.worksets_ = work_sets; + + fm.postRegistrationSetup(sd); + + // panzer::Traits::PED ped; + // ped.dirichletData.ghostedCounter = dd_loc; + // fm.preEvaluate(ped); + panzer::Traits::PED ped; + ped.gedc->addDataObject("Dirichlet Counter", dd_loc); + ped.gedc->addDataObject("Solution Gather Container", loc); + ped.gedc->addDataObject("Residual Scatter Container", loc); + fm.preEvaluate(ped); + + // run tests + ///////////////////////////////////////////////////////////// + + panzer::Workset &workset = (*work_sets)[0]; + workset.alpha = 0.0; + workset.beta = 2.0; // derivatives multiplied by 2 + workset.time = 0.0; + workset.evaluate_transient_terms = false; + + fm.evaluateFields(workset); + + // test Residual fields + panzer::index_t dd_count(0); + Teuchos::ArrayRCP data, dd_data; + Teuchos::RCP> f_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_f()); + Teuchos::RCP> dd_vec = Teuchos::rcp_dynamic_cast>(b_dd_loc->get_f()); + + // check all the residual values. This is kind of crappy test since it simply checks twice the target + // value and the target. Its this way because you add two entries across elements. + + Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(data)); + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(0)->getLocalNumElements()); + TEST_EQUALITY(data.size(), dd_data.size()); + dd_count = 0; + for (int i = 0; i < data.size(); i++) + { + + double target = 123.0 + myRank; + if (dd_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dd_count++; + } + } + TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared + + Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(data)); + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(1)->getLocalNumElements()); + TEST_EQUALITY(data.size(), dd_data.size()); + dd_count = 0; + for (int i = 0; i < data.size(); i++) + { + + double target = 456.0 + myRank; + if (dd_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dd_count++; + } + } + TEST_EQUALITY(dd_count, workset.num_cells); // there are 2 nodes on the side and the sides are not shared + + Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(data)); + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(2)->getLocalNumElements()); + TEST_EQUALITY(data.size(), dd_data.size()); + dd_count = 0; + for (int i = 0; i < data.size(); i++) + { + + double target = 789.0 + myRank; + if (dd_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dd_count++; + } + } + TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared + } + + TEUCHOS_UNIT_TEST(block_assembly, scatter_dirichlet_tangent) + { + +#ifdef HAVE_MPI + Teuchos::RCP> tComm = Teuchos::rcp(new Teuchos::MpiComm(MPI_COMM_WORLD)); +#else + NOPE_PANZER_DOESNT_SUPPORT_SERIAL +#endif + + int myRank = tComm->getRank(); + + const std::size_t workset_size = 4; + const std::string fieldName1_q1 = "U"; + const std::string fieldName2_q1 = "V"; + const std::string fieldName_qedge1 = "B"; + const std::size_t numParams = 2; + + Teuchos::RCP mesh = buildMesh(2, 2); + + // build input physics block + Teuchos::RCP basis_q1 = buildBasis(workset_size, "Q1"); + Teuchos::RCP basis_qedge1 = buildBasis(workset_size, "QEdge1"); + + Teuchos::RCP ipb = Teuchos::parameterList(); + testInitialization(ipb); + + const int default_int_order = 1; + std::string eBlockID = "eblock-0_0"; + Teuchos::RCP eqset_factory = Teuchos::rcp(new user_app::MyFactory); + panzer::CellData cellData(workset_size, mesh->getCellTopology("eblock-0_0")); + Teuchos::RCP gd = panzer::createGlobalData(); + Teuchos::RCP physicsBlock = + Teuchos::rcp(new PhysicsBlock(ipb, eBlockID, default_int_order, cellData, eqset_factory, gd, false)); + + Teuchos::RCP> work_sets = panzer_stk::buildWorksets(*mesh, physicsBlock->elementBlockID(), + physicsBlock->getWorksetNeeds()); + TEST_EQUALITY(work_sets->size(), 1); + + std::vector scatter_IC_vec = {false,true}; + + for (const bool scatter_IC : scatter_IC_vec) { + // build connection manager and field manager + const Teuchos::RCP conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); + RCP dofManager = Teuchos::rcp(new panzer::BlockedDOFManager(conn_manager, MPI_COMM_WORLD)); + + dofManager->addField(fieldName1_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName2_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName_qedge1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_qedge1->getIntrepid2Basis()))); + + std::vector> fieldOrder(3); + fieldOrder[0].push_back(fieldName1_q1); + fieldOrder[1].push_back(fieldName_qedge1); + fieldOrder[2].push_back(fieldName2_q1); + dofManager->setFieldOrder(fieldOrder); + + // dofManager->setOrientationsRequired(true); + dofManager->buildGlobalUnknowns(); + + // setup linear object factory + ///////////////////////////////////////////////////////////// + + Teuchos::RCP bt_lof = Teuchos::rcp(new TpetraBlockedLinObjFactoryType(tComm.getConst(), dofManager)); + Teuchos::RCP> lof = bt_lof; + Teuchos::RCP dd_loc = bt_lof->buildGhostedLinearObjContainer(); + Teuchos::RCP loc = bt_lof->buildGhostedLinearObjContainer(); + bt_lof->initializeGhostedContainer(LinearObjContainer::F, *dd_loc); + dd_loc->initialize(); + + bt_lof->initializeGhostedContainer(LinearObjContainer::X | LinearObjContainer::F, *loc); + loc->initialize(); + + Teuchos::RCP b_dd_loc = Teuchos::rcp_dynamic_cast(dd_loc); + Teuchos::RCP b_loc = Teuchos::rcp_dynamic_cast(loc); + Teuchos::RCP> p_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_x()); + Thyra::assign(p_vec->getNonconstVectorBlock(0).ptr(), 123.0 + myRank); + Thyra::assign(p_vec->getNonconstVectorBlock(1).ptr(), 456.0 + myRank); + Thyra::assign(p_vec->getNonconstVectorBlock(2).ptr(), 789.0 + myRank); + + std::vector> tangentContainers; + + using LOCPair = panzer::LOCPair_GlobalEvaluationData; + using Teuchos::rcp_dynamic_cast; + + // generate tangent data + for (std::size_t i=0;i(locPair->getGlobalLOC()); + Teuchos::RCP> global_p_vec = Teuchos::rcp_dynamic_cast>(global_bt_loc->get_x()); + Thyra::assign(global_p_vec->getNonconstVectorBlock(0).ptr(), 0.123 + myRank + i); + Thyra::assign(global_p_vec->getNonconstVectorBlock(1).ptr(), 0.456 + myRank + i); + Thyra::assign(global_p_vec->getNonconstVectorBlock(2).ptr(), 0.789 + myRank + i); + + auto ghosted_bt_loc = rcp_dynamic_cast(locPair->getGhostedLOC()); + Teuchos::RCP> ghosted_p_vec = Teuchos::rcp_dynamic_cast>(ghosted_bt_loc->get_x()); + Thyra::assign(ghosted_p_vec->getNonconstVectorBlock(0).ptr(), 0.123 + myRank + i); + Thyra::assign(ghosted_p_vec->getNonconstVectorBlock(1).ptr(), 0.456 + myRank + i); + Thyra::assign(ghosted_p_vec->getNonconstVectorBlock(2).ptr(), 0.789 + myRank + i); + + tangentContainers.push_back(locPair); + } + + // setup field manager, add evaluator under test + ///////////////////////////////////////////////////////////// + + auto fm = Teuchos::rcp(new PHX::FieldManager); + + std::vector derivative_dimensions; + derivative_dimensions.push_back(numParams); + fm->setKokkosExtendedDataTypeDimensions(derivative_dimensions); + + std::string resName = ""; + Teuchos::RCP> names_map = + Teuchos::rcp(new std::map); + names_map->insert(std::make_pair(fieldName1_q1, resName + fieldName1_q1)); + names_map->insert(std::make_pair(fieldName2_q1, resName + fieldName2_q1)); + names_map->insert(std::make_pair(fieldName_qedge1, resName + fieldName_qedge1)); + + // evaluators under test + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName1_q1); + names->push_back(resName + fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQ1"); + if (scatter_IC) { + pl.set("Basis", basis_q1.getConst()); + } else { + pl.set("Basis", basis_q1); + } + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + pl.set("Scatter Initial Condition", scatter_IC); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm->registerEvaluator(evaluator); + fm->requireField(*evaluator->evaluatedFields()[0]); + } + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQEdge1"); + if (scatter_IC) { + pl.set("Basis", basis_qedge1.getConst()); + } else { + pl.set("Basis", basis_qedge1); + } + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + pl.set("Scatter Initial Condition", scatter_IC); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm->registerEvaluator(evaluator); + fm->requireField(*evaluator->evaluatedFields()[0]); + } + + // support evaluators + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName1_q1); + names->push_back(fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_q1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + Teuchos::RCP>> tangent_names = + Teuchos::rcp(new std::vector>(2)); + for (std::size_t i = 0; i < numParams; ++i) + { + std::stringstream ss1, ss2; + ss1 << fieldName1_q1 << " Tangent " << i; + ss2 << fieldName2_q1 << " Tangent " << i; + (*tangent_names)[0].push_back(ss1.str()); + (*tangent_names)[1].push_back(ss2.str()); + } + pl.set("Tangent Names", tangent_names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm->registerEvaluator(evaluator); + } + for (std::size_t i = 0; i < numParams; ++i) { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + RCP> tangent_names = rcp(new std::vector); + names->push_back(fieldName1_q1); + names->push_back(fieldName2_q1); + { + std::stringstream ss1, ss2; + ss1 << fieldName1_q1 << " Tangent " << i; + ss2 << fieldName2_q1 << " Tangent " << i; + tangent_names->push_back(ss1.str()); + tangent_names->push_back(ss2.str()); + } + + Teuchos::ParameterList pl; + pl.set("Basis", basis_q1); + pl.set("DOF Names", tangent_names); + pl.set("Indexer Names", names); + + std::stringstream ss; + ss << "Tangent Container " << i; + pl.set("Global Data Key", ss.str()); + + Teuchos::RCP> evaluator = + lof->buildGatherTangent(pl); + + fm->registerEvaluator(evaluator); + } + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_qedge1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + Teuchos::RCP>> tangent_names = + Teuchos::rcp(new std::vector>(1)); + for (std::size_t i = 0; i < numParams; ++i) + { + std::stringstream ss; + ss << fieldName_qedge1 << " Tangent " << i; + (*tangent_names)[0].push_back(ss.str()); + } + pl.set("Tangent Names", tangent_names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm->registerEvaluator(evaluator); + } + for (std::size_t i = 0; i < numParams; ++i) { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + RCP> tangent_names = rcp(new std::vector); + names->push_back(fieldName_qedge1); + { + std::stringstream ss; + ss << fieldName_qedge1 << " Tangent " << i; + tangent_names->push_back(ss.str()); + } + + Teuchos::ParameterList pl; + pl.set("Basis", basis_qedge1); + pl.set("DOF Names", tangent_names); + pl.set("Indexer Names", names); + + std::stringstream ss; + ss << "Tangent Container " << i; + pl.set("Global Data Key", ss.str()); + + Teuchos::RCP> evaluator = + lof->buildGatherTangent(pl); + + fm->registerEvaluator(evaluator); + } + + panzer::Traits::SD sd; + sd.worksets_ = work_sets; + + fm->postRegistrationSetup(sd); + + panzer::Traits::PED ped; + ped.gedc->addDataObject("Dirichlet Counter", dd_loc); + ped.gedc->addDataObject("Solution Gather Container", loc); + ped.gedc->addDataObject("Residual Scatter Container", loc); + for (size_t i=0; iaddDataObject(ss.str(), tangentContainers[i]); + } + std::vector params; + std::vector> paramContainers; + for (std::size_t i = 0; iaddDataObject(ss.str(),paramContainer->getGhostedLOC()); + paramContainers.push_back(paramContainer); + } + Teuchos::RCP activeParams = + Teuchos::rcp(new panzer::ParameterList_GlobalEvaluationData(params)); + ped.gedc->addDataObject("PARAMETER_NAMES",activeParams); + + fm->preEvaluate(ped); + + // run tests + ///////////////////////////////////////////////////////////// + + panzer::Workset &workset = (*work_sets)[0]; + workset.alpha = 0.0; + workset.beta = 2.0; // derivatives multiplied by 2 + workset.time = 0.0; + workset.evaluate_transient_terms = false; + + fm->evaluateFields(workset); + + fm = Teuchos::null; + + // test Residual fields + panzer::index_t dd_count(0); + Teuchos::ArrayRCP data, dd_data; + Teuchos::RCP> x_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_x()); + Teuchos::RCP> f_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_f()); + Teuchos::RCP> dd_vec = Teuchos::rcp_dynamic_cast>(b_dd_loc->get_f()); + + // check all the residual values. + + if (scatter_IC) { + Teuchos::rcp_dynamic_cast>(x_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(data)); + } else { + Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(data)); + } + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(0)->getLocalNumElements()); + TEST_EQUALITY(data.size(), dd_data.size()); + dd_count = 0; + for (int i = 0; i < data.size(); i++) + { + + double target = 123.0 + myRank; + if (dd_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dd_count++; + } + } + if (scatter_IC) { + TEST_EQUALITY(dd_count, data.size()); // filled everywhere + } else { + TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared + } + + if (scatter_IC) { + Teuchos::rcp_dynamic_cast>(x_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(data)); + } else { + Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(data)); + } + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(1)->getLocalNumElements()); + TEST_EQUALITY(data.size(), dd_data.size()); + dd_count = 0; + for (int i = 0; i < data.size(); i++) + { + + double target = 456.0 + myRank; + if (dd_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dd_count++; + } + } + if (scatter_IC) { + TEST_EQUALITY(dd_count, data.size()); // filled everywhere + } else { + TEST_EQUALITY(dd_count, workset.num_cells); // there are 1 edge on the side and the sides are not shared + } + + if (scatter_IC) { + Teuchos::rcp_dynamic_cast>(x_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(data)); + } else { + Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(data)); + } + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(2)->getLocalNumElements()); + TEST_EQUALITY(data.size(), dd_data.size()); + dd_count = 0; + for (int i = 0; i < data.size(); i++) + { + + double target = 789.0 + myRank; + if (dd_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dd_count++; + } + } + if (scatter_IC) { + TEST_EQUALITY(dd_count, data.size()); // filled everywhere + } else { + TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared + } + + // now test tangents + for (std::size_t i=0; i> param_f_vec = + Teuchos::rcp_dynamic_cast>( + Teuchos::rcp_dynamic_cast(paramContainers[i]->getGhostedLOC())->get_f()); + + Teuchos::rcp_dynamic_cast>(param_f_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(data)); + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(0)->getLocalNumElements()); + for (size_type j = 0; j < data.size(); j++) + { + double target = .123 + myRank + i; + if (dd_data[j] == 0.0) + { + TEST_EQUALITY(data[j],0.0); + } + else + { + TEST_EQUALITY(data[j],target); + } + } + Teuchos::rcp_dynamic_cast>(param_f_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(data)); + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(1)->getLocalNumElements()); + for (size_type j = 0; j < data.size(); j++) + { + double target = .456 + myRank + i; + if (dd_data[j] == 0.0) + { + TEST_EQUALITY(data[j],0.0); + } + else + { + TEST_EQUALITY(data[j],target); + } + } + Teuchos::rcp_dynamic_cast>(param_f_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(data)); + Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(dd_data)); + TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(2)->getLocalNumElements()); + for (size_type j = 0; j < data.size(); j++) + { + double target = .789 + myRank + i; + if (dd_data[j] == 0.0) + { + TEST_EQUALITY(data[j],0.0); + } + else + { + TEST_EQUALITY(data[j],target); + } + } + } + } + } + Teuchos::RCP buildBasis(std::size_t worksetSize, const std::string &basisName) + { + Teuchos::RCP topo = + Teuchos::rcp(new shards::CellTopology(shards::getCellTopologyData>())); + + panzer::CellData cellData(worksetSize, topo); + return Teuchos::rcp(new panzer::PureBasis(basisName, 1, cellData)); + } + + Teuchos::RCP buildMesh(int elemX, int elemY) + { + RCP pl = rcp(new Teuchos::ParameterList); + pl->set("X Blocks", 1); + pl->set("Y Blocks", 1); + pl->set("X Elements", elemX); + pl->set("Y Elements", elemY); + + panzer_stk::SquareQuadMeshFactory factory; + factory.setParameterList(pl); + RCP mesh = factory.buildUncommitedMesh(MPI_COMM_WORLD); + factory.completeMeshConstruction(*mesh, MPI_COMM_WORLD); + + return mesh; + } + + void testInitialization(const Teuchos::RCP &ipb) + { + // Physics block + ipb->setName("test physics"); + { + Teuchos::ParameterList &p = ipb->sublist("a"); + p.set("Type", "Energy"); + p.set("Prefix", ""); + p.set("Model ID", "solid"); + p.set("Basis Type", "HGrad"); + p.set("Basis Order", 1); + p.set("Integration Order", 1); + } + { + Teuchos::ParameterList &p = ipb->sublist("b"); + p.set("Type", "Energy"); + p.set("Prefix", "ION_"); + p.set("Model ID", "solid"); + p.set("Basis Type", "HCurl"); + p.set("Basis Order", 1); + p.set("Integration Order", 1); + } + } + +} diff --git a/packages/panzer/adapters-stk/test/evaluator_tests/tpetra_scatter_dirichlet_residual.cpp b/packages/panzer/adapters-stk/test/evaluator_tests/tpetra_scatter_dirichlet_residual.cpp index ff01ebb4f976..158e0d5ddbe2 100644 --- a/packages/panzer/adapters-stk/test/evaluator_tests/tpetra_scatter_dirichlet_residual.cpp +++ b/packages/panzer/adapters-stk/test/evaluator_tests/tpetra_scatter_dirichlet_residual.cpp @@ -20,15 +20,16 @@ using Teuchos::rcp; #include "Teuchos_GlobalMPISession.hpp" #include "Panzer_FieldManagerBuilder.hpp" -#include "Panzer_BlockedDOFManager.hpp" -#include "Panzer_BlockedTpetraLinearObjFactory.hpp" +#include "Panzer_DOFManager.hpp" +#include "Panzer_TpetraLinearObjFactory.hpp" #include "Panzer_PureBasis.hpp" #include "Panzer_BasisIRLayout.hpp" #include "Panzer_Workset.hpp" #include "Panzer_GatherOrientation.hpp" -#include "Panzer_ScatterResidual_BlockedTpetra.hpp" -#include "Panzer_GatherSolution_BlockedTpetra.hpp" +#include "Panzer_ScatterDirichletResidual_Tpetra.hpp" #include "Panzer_GlobalEvaluationDataContainer.hpp" +#include "Panzer_LOCPair_GlobalEvaluationData.hpp" +#include "Panzer_ParameterList_GlobalEvaluationData.hpp" #include "Panzer_STK_Version.hpp" #include "PanzerAdaptersSTK_config.hpp" @@ -43,7 +44,6 @@ using Teuchos::rcp; #include "Thyra_VectorStdOps.hpp" #include "Thyra_ProductVectorBase.hpp" #include "Thyra_SpmdVectorBase.hpp" -//#include "Thyra_get_Epetra_Operator.hpp" #include "Tpetra_CrsMatrix.hpp" #include "Tpetra_Map.hpp" @@ -59,8 +59,8 @@ using Teuchos::rcp; namespace panzer { - using TpetraBlockedLinObjFactoryType = panzer::BlockedTpetraLinearObjFactory; - using TpetraBlockedLinObjContainerType = panzer::BlockedTpetraLinearObjContainer; + using TpetraLinObjFactoryType = panzer::TpetraLinearObjFactory; + using TpetraLinObjContainerType = panzer::TpetraLinearObjContainer; using Tpetra_CrsMatrix = Tpetra::CrsMatrix; using Thyra_TpetraLinearOp = Thyra::TpetraLinearOp; @@ -68,7 +68,7 @@ namespace panzer void testInitialization(const Teuchos::RCP &ipb); Teuchos::RCP buildMesh(int elemX, int elemY); - TEUCHOS_UNIT_TEST(block_assembly, scatter_dirichlet_residual) + TEUCHOS_UNIT_TEST(assembly, scatter_dirichlet_residual) { #ifdef HAVE_MPI @@ -105,238 +105,225 @@ namespace panzer physicsBlock->getWorksetNeeds()); TEST_EQUALITY(work_sets->size(), 1); - // build connection manager and field manager - const Teuchos::RCP conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); - RCP dofManager = Teuchos::rcp(new panzer::BlockedDOFManager(conn_manager, MPI_COMM_WORLD)); + std::vector scatter_IC_vec = {false,true}; - dofManager->addField(fieldName1_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); - dofManager->addField(fieldName2_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); - dofManager->addField(fieldName_qedge1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_qedge1->getIntrepid2Basis()))); + for (const bool scatter_IC : scatter_IC_vec) { + // build connection manager and field manager + const Teuchos::RCP conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); + RCP dofManager = Teuchos::rcp(new panzer::DOFManager(conn_manager, MPI_COMM_WORLD)); - std::vector> fieldOrder(3); - fieldOrder[0].push_back(fieldName1_q1); - fieldOrder[1].push_back(fieldName_qedge1); - fieldOrder[2].push_back(fieldName2_q1); - dofManager->setFieldOrder(fieldOrder); + dofManager->addField(fieldName1_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName2_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName_qedge1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_qedge1->getIntrepid2Basis()))); - // dofManager->setOrientationsRequired(true); - dofManager->buildGlobalUnknowns(); + std::vector fieldOrder; + fieldOrder.push_back(fieldName1_q1); + fieldOrder.push_back(fieldName_qedge1); + fieldOrder.push_back(fieldName2_q1); + dofManager->setFieldOrder(fieldOrder); - // setup linear object factory - ///////////////////////////////////////////////////////////// + dofManager->buildGlobalUnknowns(); - Teuchos::RCP bt_lof = Teuchos::rcp(new TpetraBlockedLinObjFactoryType(tComm.getConst(), dofManager)); - Teuchos::RCP> lof = bt_lof; - Teuchos::RCP dd_loc = bt_lof->buildGhostedLinearObjContainer(); - Teuchos::RCP loc = bt_lof->buildGhostedLinearObjContainer(); - bt_lof->initializeGhostedContainer(LinearObjContainer::F, *dd_loc); - dd_loc->initialize(); + // setup linear object factory + ///////////////////////////////////////////////////////////// + Teuchos::RCP t_lof = Teuchos::rcp(new TpetraLinObjFactoryType(tComm.getConst(), dofManager)); + Teuchos::RCP> lof = t_lof; + Teuchos::RCP loc = t_lof->buildGhostedLinearObjContainer(); + Teuchos::RCP dc_loc = t_lof->buildGhostedLinearObjContainer(); - bt_lof->initializeGhostedContainer(LinearObjContainer::X | LinearObjContainer::F, *loc); - loc->initialize(); + t_lof->initializeGhostedContainer(LinearObjContainer::X | LinearObjContainer::F, *loc); + loc->initialize(); - Teuchos::RCP b_dd_loc = Teuchos::rcp_dynamic_cast(dd_loc); - Teuchos::RCP b_loc = Teuchos::rcp_dynamic_cast(loc); - Teuchos::RCP> p_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_x()); - Thyra::assign(p_vec->getNonconstVectorBlock(0).ptr(), 123.0 + myRank); - Thyra::assign(p_vec->getNonconstVectorBlock(1).ptr(), 456.0 + myRank); - Thyra::assign(p_vec->getNonconstVectorBlock(2).ptr(), 789.0 + myRank); + t_lof->initializeGhostedContainer(LinearObjContainer::F, *dc_loc); + dc_loc->initialize(); + Teuchos::RCP t_dc_loc = Teuchos::rcp_dynamic_cast(dc_loc); - // setup field manager, add evaluator under test - ///////////////////////////////////////////////////////////// + Teuchos::RCP t_loc = Teuchos::rcp_dynamic_cast(loc); - PHX::FieldManager fm; + Teuchos::RCP> x_vec = t_loc->get_x_th(); + Thyra::assign(x_vec.ptr(), 123.0 + myRank); - std::string resName = ""; - Teuchos::RCP> names_map = - Teuchos::rcp(new std::map); - names_map->insert(std::make_pair(fieldName1_q1, resName + fieldName1_q1)); - names_map->insert(std::make_pair(fieldName2_q1, resName + fieldName2_q1)); - names_map->insert(std::make_pair(fieldName_qedge1, resName + fieldName_qedge1)); + // setup field manager, add evaluator under test + ///////////////////////////////////////////////////////////// - // evaluators under test - { - using Teuchos::RCP; - using Teuchos::rcp; - RCP> names = rcp(new std::vector); - names->push_back(resName + fieldName1_q1); - names->push_back(resName + fieldName2_q1); - - Teuchos::ParameterList pl; - pl.set("Scatter Name", "ScatterQ1"); - pl.set("Basis", basis_q1); - pl.set("Dependent Names", names); - pl.set("Dependent Map", names_map); - pl.set("Side Subcell Dimension", 1); - pl.set("Local Side ID", 2); - pl.set("Check Apply BC", false); - - Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); - - TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); - - fm.registerEvaluator(evaluator); - fm.requireField(*evaluator->evaluatedFields()[0]); - } - { - using Teuchos::RCP; - using Teuchos::rcp; - RCP> names = rcp(new std::vector); - names->push_back(resName + fieldName_qedge1); - - Teuchos::ParameterList pl; - pl.set("Scatter Name", "ScatterQEdge1"); - pl.set("Basis", basis_qedge1); - pl.set("Dependent Names", names); - pl.set("Dependent Map", names_map); - pl.set("Side Subcell Dimension", 1); - pl.set("Local Side ID", 2); - pl.set("Check Apply BC", false); - - Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); - - TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); - - fm.registerEvaluator(evaluator); - fm.requireField(*evaluator->evaluatedFields()[0]); - } - - // support evaluators - { - using Teuchos::RCP; - using Teuchos::rcp; - RCP> names = rcp(new std::vector); - names->push_back(fieldName1_q1); - names->push_back(fieldName2_q1); - - Teuchos::ParameterList pl; - pl.set("Basis", basis_q1); - pl.set("DOF Names", names); - pl.set("Indexer Names", names); + PHX::FieldManager fm; - Teuchos::RCP> evaluator = lof->buildGather(pl); - - fm.registerEvaluator(evaluator); - } - { - using Teuchos::RCP; - using Teuchos::rcp; - RCP> names = rcp(new std::vector); - names->push_back(fieldName_qedge1); - - Teuchos::ParameterList pl; - pl.set("Basis", basis_qedge1); - pl.set("DOF Names", names); - pl.set("Indexer Names", names); - - Teuchos::RCP> evaluator = lof->buildGather(pl); - - fm.registerEvaluator(evaluator); - } + std::string resName = ""; + Teuchos::RCP> names_map = + Teuchos::rcp(new std::map); + names_map->insert(std::make_pair(fieldName1_q1, resName + fieldName1_q1)); + names_map->insert(std::make_pair(fieldName2_q1, resName + fieldName2_q1)); + names_map->insert(std::make_pair(fieldName_qedge1, resName + fieldName_qedge1)); - std::vector derivative_dimensions; - derivative_dimensions.push_back(12); - fm.setKokkosExtendedDataTypeDimensions(derivative_dimensions); - - panzer::Traits::SD sd; - sd.worksets_ = work_sets; - - fm.postRegistrationSetup(sd); - - // panzer::Traits::PED ped; - // ped.dirichletData.ghostedCounter = dd_loc; - // fm.preEvaluate(ped); - panzer::Traits::PED ped; - ped.gedc->addDataObject("Dirichlet Counter", dd_loc); - ped.gedc->addDataObject("Solution Gather Container", loc); - ped.gedc->addDataObject("Residual Scatter Container", loc); - fm.preEvaluate(ped); - - // run tests - ///////////////////////////////////////////////////////////// - - panzer::Workset &workset = (*work_sets)[0]; - workset.alpha = 0.0; - workset.beta = 2.0; // derivatives multiplied by 2 - workset.time = 0.0; - workset.evaluate_transient_terms = false; - - fm.evaluateFields(workset); - - // test Residual fields - panzer::index_t dd_count(0); - Teuchos::ArrayRCP data, dd_data; - Teuchos::RCP> f_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_f()); - Teuchos::RCP> dd_vec = Teuchos::rcp_dynamic_cast>(b_dd_loc->get_f()); - - // check all the residual values. This is kind of crappy test since it simply checks twice the target - // value and the target. Its this way because you add two entries across elements. - - Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(data)); - Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(dd_data)); - TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(0)->getLocalNumElements()); - TEST_EQUALITY(data.size(), dd_data.size()); - dd_count = 0; - for (int i = 0; i < data.size(); i++) - { - - double target = 123.0 + myRank; - if (dd_data[i] == 0.0) + // evaluators under test { - TEST_EQUALITY(data[i], 0.0); + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName1_q1); + names->push_back(resName + fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQ1"); + if (scatter_IC) { + pl.set("Basis", basis_q1.getConst()); + } else { + pl.set("Basis", basis_q1); + } + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + pl.set("Scatter Initial Condition", scatter_IC); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm.registerEvaluator(evaluator); + fm.requireField(*evaluator->evaluatedFields()[0]); } - else { - TEST_EQUALITY(data[i], target); - dd_count++; + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQEdge1"); + if (scatter_IC) { + pl.set("Basis", basis_qedge1.getConst()); + } else { + pl.set("Basis", basis_qedge1); + } + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + pl.set("Scatter Initial Condition", scatter_IC); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm.registerEvaluator(evaluator); + fm.requireField(*evaluator->evaluatedFields()[0]); } - } - TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared - - Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(data)); - Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(dd_data)); - TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(1)->getLocalNumElements()); - TEST_EQUALITY(data.size(), dd_data.size()); - dd_count = 0; - for (int i = 0; i < data.size(); i++) - { - double target = 456.0 + myRank; - if (dd_data[i] == 0.0) + // support evaluators { - TEST_EQUALITY(data[i], 0.0); + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName1_q1); + names->push_back(fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_q1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm.registerEvaluator(evaluator); } - else { - TEST_EQUALITY(data[i], target); - dd_count++; - } - } - TEST_EQUALITY(dd_count, workset.num_cells); // there are 2 nodes on the side and the sides are not shared - - Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(data)); - Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(dd_data)); - TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(2)->getLocalNumElements()); - TEST_EQUALITY(data.size(), dd_data.size()); - dd_count = 0; - for (int i = 0; i < data.size(); i++) - { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName_qedge1); - double target = 789.0 + myRank; - if (dd_data[i] == 0.0) - { - TEST_EQUALITY(data[i], 0.0); + Teuchos::ParameterList pl; + pl.set("Basis", basis_qedge1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm.registerEvaluator(evaluator); } - else - { - TEST_EQUALITY(data[i], target); - dd_count++; + + panzer::Traits::SD sd; + sd.worksets_ = work_sets; + + fm.postRegistrationSetup(sd); + + panzer::Traits::PED ped; + ped.gedc->addDataObject("Dirichlet Counter", dc_loc); + ped.gedc->addDataObject("Solution Gather Container", loc); + ped.gedc->addDataObject("Residual Scatter Container", loc); + fm.preEvaluate(ped); + + // run tests + ///////////////////////////////////////////////////////////// + + panzer::Workset &workset = (*work_sets)[0]; + workset.alpha = 0.0; + workset.beta = 2.0; // derivatives multiplied by 2 + workset.time = 0.0; + workset.evaluate_transient_terms = false; + + fm.evaluateFields(workset); + fm.postEvaluate(0); + + // test Residual fields + panzer::index_t dc_count(0); + Teuchos::ArrayRCP data, dc_data; + Teuchos::RCP> f_vec = t_loc->get_f_th(); + Teuchos::RCP> dc_vec = t_dc_loc->get_f_th(); + + // check all the residual values and the count + + Teuchos::rcp_dynamic_cast>(dc_vec)->getLocalData(Teuchos::ptrFromRef(dc_data)); + if (scatter_IC) { + Teuchos::rcp_dynamic_cast>(x_vec)->getLocalData(Teuchos::ptrFromRef(data)); + + TEST_EQUALITY(static_cast(data.size()), t_lof->getGhostedMap()->getLocalNumElements()); + TEST_EQUALITY(data.size(), dc_data.size()); + dc_count = 0; + for (int i = 0; i < data.size(); i++) + { + double target = 123.0 + myRank; + if (dc_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dc_count++; + } + } + // Filled everywhere + TEST_EQUALITY(dc_count, data.size()); + } else { + Teuchos::rcp_dynamic_cast>(f_vec)->getLocalData(Teuchos::ptrFromRef(data)); + TEST_EQUALITY(static_cast(data.size()), t_lof->getGhostedMap()->getLocalNumElements()); + TEST_EQUALITY(data.size(), dc_data.size()); + dc_count = 0; + for (int i = 0; i < data.size(); i++) + { + double target = 123.0 + myRank; + if (dc_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dc_count++; + } + } + // there are 2 nodes or 1 edge on the side and the sides are not shared. + // 2 nodal functions, 1 edge function + TEST_EQUALITY(dc_count, 5 * workset.num_cells); } } - TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared } - TEUCHOS_UNIT_TEST(block_assembly, scatter_dirichlet_jacobian) + TEUCHOS_UNIT_TEST(assembly, scatter_dirichlet_tangent) { #ifdef HAVE_MPI @@ -351,6 +338,7 @@ namespace panzer const std::string fieldName1_q1 = "U"; const std::string fieldName2_q1 = "V"; const std::string fieldName_qedge1 = "B"; + const std::size_t numParams = 3; Teuchos::RCP mesh = buildMesh(2, 2); @@ -373,247 +361,637 @@ namespace panzer physicsBlock->getWorksetNeeds()); TEST_EQUALITY(work_sets->size(), 1); - // build connection manager and field manager - const Teuchos::RCP conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); - RCP dofManager = Teuchos::rcp(new panzer::BlockedDOFManager(conn_manager, MPI_COMM_WORLD)); + std::vector scatter_IC_vec = {false,true}; - dofManager->addField(fieldName1_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); - dofManager->addField(fieldName2_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); - dofManager->addField(fieldName_qedge1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_qedge1->getIntrepid2Basis()))); + for (const bool scatter_IC : scatter_IC_vec) { + // build connection manager and field manager + const Teuchos::RCP conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); + RCP dofManager = Teuchos::rcp(new panzer::DOFManager(conn_manager, MPI_COMM_WORLD)); - std::vector> fieldOrder(3); - fieldOrder[0].push_back(fieldName1_q1); - fieldOrder[1].push_back(fieldName_qedge1); - fieldOrder[2].push_back(fieldName2_q1); - dofManager->setFieldOrder(fieldOrder); + dofManager->addField(fieldName1_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName2_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(fieldName_qedge1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_qedge1->getIntrepid2Basis()))); - // dofManager->setOrientationsRequired(true); - dofManager->buildGlobalUnknowns(); + std::vector fieldOrder; + fieldOrder.push_back(fieldName1_q1); + fieldOrder.push_back(fieldName_qedge1); + fieldOrder.push_back(fieldName2_q1); + dofManager->setFieldOrder(fieldOrder); - // setup linear object factory - ///////////////////////////////////////////////////////////// + dofManager->buildGlobalUnknowns(); - Teuchos::RCP bt_lof = Teuchos::rcp(new TpetraBlockedLinObjFactoryType(tComm.getConst(), dofManager)); - Teuchos::RCP> lof = bt_lof; - Teuchos::RCP dd_loc = bt_lof->buildGhostedLinearObjContainer(); - Teuchos::RCP loc = bt_lof->buildGhostedLinearObjContainer(); - bt_lof->initializeGhostedContainer(LinearObjContainer::F, *dd_loc); - dd_loc->initialize(); + // setup linear object factory + ///////////////////////////////////////////////////////////// + Teuchos::RCP t_lof = Teuchos::rcp(new TpetraLinObjFactoryType(tComm.getConst(), dofManager)); + Teuchos::RCP> lof = t_lof; + Teuchos::RCP loc = t_lof->buildGhostedLinearObjContainer(); + Teuchos::RCP dc_loc = t_lof->buildGhostedLinearObjContainer(); - bt_lof->initializeGhostedContainer(LinearObjContainer::X | LinearObjContainer::F | LinearObjContainer::Mat, *loc); - loc->initialize(); + t_lof->initializeGhostedContainer(LinearObjContainer::X | LinearObjContainer::F, *loc); + loc->initialize(); - Teuchos::RCP b_dd_loc = Teuchos::rcp_dynamic_cast(dd_loc); - Teuchos::RCP b_loc = Teuchos::rcp_dynamic_cast(loc); - Teuchos::RCP> p_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_x()); - Thyra::assign(p_vec->getNonconstVectorBlock(0).ptr(), 123.0 + myRank); - Thyra::assign(p_vec->getNonconstVectorBlock(1).ptr(), 456.0 + myRank); - Thyra::assign(p_vec->getNonconstVectorBlock(2).ptr(), 789.0 + myRank); + t_lof->initializeGhostedContainer(LinearObjContainer::F, *dc_loc); + dc_loc->initialize(); + Teuchos::RCP t_dc_loc = Teuchos::rcp_dynamic_cast(dc_loc); - auto blk_A = Teuchos::rcp_dynamic_cast>(b_loc->get_A()); - double values[] = {123.0 + myRank, 456.0 + myRank, 789.0 + myRank}; + Teuchos::RCP t_loc = Teuchos::rcp_dynamic_cast(loc); - for (int i = 0; i < 3; i++) - for (int j = 0; j < 3; j++) - { - auto thyraOp = Teuchos::rcp_dynamic_cast(blk_A->getNonconstBlock(i, j), false); - auto tpetraCrsMatrix = Teuchos::rcp_dynamic_cast(thyraOp->getTpetraOperator(), true); - tpetraCrsMatrix->setAllToScalar(values[i] * values[j]); - } + Teuchos::RCP> x_vec = t_loc->get_x_th(); + Thyra::assign(x_vec.ptr(), 123.0 + myRank); - // setup field manager, add evaluator under test - ///////////////////////////////////////////////////////////// + std::vector> tangentContainers; - PHX::FieldManager fm; + using LOCPair = panzer::LOCPair_GlobalEvaluationData; + using Teuchos::rcp_dynamic_cast; - std::string resName = ""; - Teuchos::RCP> names_map = - Teuchos::rcp(new std::map); - names_map->insert(std::make_pair(fieldName1_q1, resName + fieldName1_q1)); - names_map->insert(std::make_pair(fieldName2_q1, resName + fieldName2_q1)); - names_map->insert(std::make_pair(fieldName_qedge1, resName + fieldName_qedge1)); + // generate tangent data + for (std::size_t i=0;i> names = rcp(new std::vector); - names->push_back(resName + fieldName1_q1); - names->push_back(resName + fieldName2_q1); - - Teuchos::ParameterList pl; - pl.set("Scatter Name", "ScatterQ1"); - pl.set("Basis", basis_q1); - pl.set("Dependent Names", names); - pl.set("Dependent Map", names_map); - pl.set("Side Subcell Dimension", 1); - pl.set("Local Side ID", 2); - pl.set("Check Apply BC", false); - - Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); - - TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); - - fm.registerEvaluator(evaluator); - fm.requireField(*evaluator->evaluatedFields()[0]); - } - { - using Teuchos::RCP; - using Teuchos::rcp; - RCP> names = rcp(new std::vector); - names->push_back(resName + fieldName_qedge1); - - Teuchos::ParameterList pl; - pl.set("Scatter Name", "ScatterQEdge1"); - pl.set("Basis", basis_qedge1); - pl.set("Dependent Names", names); - pl.set("Dependent Map", names_map); - pl.set("Side Subcell Dimension", 1); - pl.set("Local Side ID", 2); - pl.set("Check Apply BC", false); - - Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); - - TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); - - fm.registerEvaluator(evaluator); - fm.requireField(*evaluator->evaluatedFields()[0]); - } - - // support evaluators - { - using Teuchos::RCP; - using Teuchos::rcp; - RCP> names = rcp(new std::vector); - names->push_back(fieldName1_q1); - names->push_back(fieldName2_q1); + auto global_t_loc = rcp_dynamic_cast(locPair->getGlobalLOC()); + Teuchos::RCP> global_x_vec = global_t_loc->get_x_th(); + Thyra::assign(global_x_vec.ptr(), 0.123 + myRank + i); - Teuchos::ParameterList pl; - pl.set("Basis", basis_q1); - pl.set("DOF Names", names); - pl.set("Indexer Names", names); + auto ghosted_t_loc = rcp_dynamic_cast(locPair->getGhostedLOC()); + Teuchos::RCP> ghosted_x_vec = ghosted_t_loc->get_x_th(); + Thyra::assign(ghosted_x_vec.ptr(), 0.123 + myRank + i); - Teuchos::RCP> evaluator = lof->buildGather(pl); - - fm.registerEvaluator(evaluator); - } - { - using Teuchos::RCP; - using Teuchos::rcp; - RCP> names = rcp(new std::vector); - names->push_back(fieldName_qedge1); + tangentContainers.push_back(locPair); + } - Teuchos::ParameterList pl; - pl.set("Basis", basis_qedge1); - pl.set("DOF Names", names); - pl.set("Indexer Names", names); + // setup field manager, add evaluator under test + ///////////////////////////////////////////////////////////// - Teuchos::RCP> evaluator = lof->buildGather(pl); + auto fm = Teuchos::rcp(new PHX::FieldManager); - fm.registerEvaluator(evaluator); - } + std::string resName = ""; + Teuchos::RCP> names_map = + Teuchos::rcp(new std::map); + names_map->insert(std::make_pair(fieldName1_q1, resName + fieldName1_q1)); + names_map->insert(std::make_pair(fieldName2_q1, resName + fieldName2_q1)); + names_map->insert(std::make_pair(fieldName_qedge1, resName + fieldName_qedge1)); - std::vector derivative_dimensions; - derivative_dimensions.push_back(12); - fm.setKokkosExtendedDataTypeDimensions(derivative_dimensions); - - panzer::Traits::SD sd; - sd.worksets_ = work_sets; - - fm.postRegistrationSetup(sd); - - // panzer::Traits::PED ped; - // ped.dirichletData.ghostedCounter = dd_loc; - // fm.preEvaluate(ped); - panzer::Traits::PED ped; - ped.gedc->addDataObject("Dirichlet Counter", dd_loc); - ped.gedc->addDataObject("Solution Gather Container", loc); - ped.gedc->addDataObject("Residual Scatter Container", loc); - fm.preEvaluate(ped); - - // run tests - ///////////////////////////////////////////////////////////// - - panzer::Workset &workset = (*work_sets)[0]; - workset.alpha = 0.0; - workset.beta = 2.0; // derivatives multiplied by 2 - workset.time = 0.0; - workset.evaluate_transient_terms = false; - - fm.evaluateFields(workset); - - // test Residual fields - panzer::index_t dd_count(0); - Teuchos::ArrayRCP data, dd_data; - Teuchos::RCP> f_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_f()); - Teuchos::RCP> dd_vec = Teuchos::rcp_dynamic_cast>(b_dd_loc->get_f()); - - // check all the residual values. This is kind of crappy test since it simply checks twice the target - // value and the target. Its this way because you add two entries across elements. - - Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(data)); - Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(dd_data)); - TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(0)->getLocalNumElements()); - TEST_EQUALITY(data.size(), dd_data.size()); - dd_count = 0; - for (int i = 0; i < data.size(); i++) - { + std::vector derivative_dimensions; + derivative_dimensions.push_back(numParams); + fm->setKokkosExtendedDataTypeDimensions(derivative_dimensions); - double target = 123.0 + myRank; - if (dd_data[i] == 0.0) + // evaluators under test { - TEST_EQUALITY(data[i], 0.0); + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName1_q1); + names->push_back(resName + fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQ1"); + if (scatter_IC) { + pl.set("Basis", basis_q1.getConst()); + } else { + pl.set("Basis", basis_q1); + } + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + pl.set("Scatter Initial Condition", scatter_IC); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm->registerEvaluator(evaluator); + fm->requireField(*evaluator->evaluatedFields()[0]); } - else { - TEST_EQUALITY(data[i], target); - dd_count++; + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(resName + fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Scatter Name", "ScatterQEdge1"); + if (scatter_IC) { + pl.set("Basis", basis_qedge1.getConst()); + } else { + pl.set("Basis", basis_qedge1); + } + pl.set("Dependent Names", names); + pl.set("Dependent Map", names_map); + pl.set("Side Subcell Dimension", 1); + pl.set("Local Side ID", 2); + pl.set("Check Apply BC", false); + pl.set("Scatter Initial Condition", scatter_IC); + + Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm->registerEvaluator(evaluator); + fm->requireField(*evaluator->evaluatedFields()[0]); } - } - TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared - - Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(data)); - Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(dd_data)); - TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(1)->getLocalNumElements()); - TEST_EQUALITY(data.size(), dd_data.size()); - dd_count = 0; - for (int i = 0; i < data.size(); i++) - { - double target = 456.0 + myRank; - if (dd_data[i] == 0.0) + // support evaluators { - TEST_EQUALITY(data[i], 0.0); + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName1_q1); + names->push_back(fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_q1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + Teuchos::RCP>> tangent_names = + Teuchos::rcp(new std::vector>(2)); + for (std::size_t i = 0; i < numParams; ++i) + { + std::stringstream ss1, ss2; + ss1 << fieldName1_q1 << " Tangent " << i; + ss2 << fieldName2_q1 << " Tangent " << i; + (*tangent_names)[0].push_back(ss1.str()); + (*tangent_names)[1].push_back(ss2.str()); + } + pl.set("Tangent Names", tangent_names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm->registerEvaluator(evaluator); + } + for (std::size_t i = 0; i < numParams; ++i) { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + RCP> tangent_names = rcp(new std::vector); + names->push_back(fieldName1_q1); + names->push_back(fieldName2_q1); + { + std::stringstream ss1, ss2; + ss1 << fieldName1_q1 << " Tangent " << i; + ss2 << fieldName2_q1 << " Tangent " << i; + tangent_names->push_back(ss1.str()); + tangent_names->push_back(ss2.str()); + } + + Teuchos::ParameterList pl; + pl.set("Basis", basis_q1); + pl.set("DOF Names", tangent_names); + pl.set("Indexer Names", names); + + std::stringstream ss; + ss << "Tangent Container " << i; + pl.set("Global Data Key", ss.str()); + + Teuchos::RCP> evaluator = + lof->buildGatherTangent(pl); + + fm->registerEvaluator(evaluator); } - else { - TEST_EQUALITY(data[i], target); - dd_count++; + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_qedge1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + Teuchos::RCP>> tangent_names = + Teuchos::rcp(new std::vector>(1)); + for (std::size_t i = 0; i < numParams; ++i) + { + std::stringstream ss; + ss << fieldName_qedge1 << " Tangent " << i; + (*tangent_names)[0].push_back(ss.str()); + } + pl.set("Tangent Names", tangent_names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + fm->registerEvaluator(evaluator); + } + for (std::size_t i = 0; i < numParams; ++i) { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + RCP> tangent_names = rcp(new std::vector); + names->push_back(fieldName_qedge1); + { + std::stringstream ss; + ss << fieldName_qedge1 << " Tangent " << i; + tangent_names->push_back(ss.str()); + } + + Teuchos::ParameterList pl; + pl.set("Basis", basis_qedge1); + pl.set("DOF Names", tangent_names); + pl.set("Indexer Names", names); + + std::stringstream ss; + ss << "Tangent Container " << i; + pl.set("Global Data Key", ss.str()); + + Teuchos::RCP> evaluator = + lof->buildGatherTangent(pl); + + fm->registerEvaluator(evaluator); } - } - TEST_EQUALITY(dd_count, workset.num_cells); // there are 2 nodes on the side and the sides are not shared - - Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(data)); - Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(dd_data)); - TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(2)->getLocalNumElements()); - TEST_EQUALITY(data.size(), dd_data.size()); - dd_count = 0; - for (int i = 0; i < data.size(); i++) - { - double target = 789.0 + myRank; - if (dd_data[i] == 0.0) - { - TEST_EQUALITY(data[i], 0.0); + panzer::Traits::SD sd; + sd.worksets_ = work_sets; + + fm->postRegistrationSetup(sd); + + panzer::Traits::PED ped; + ped.gedc->addDataObject("Dirichlet Counter", dc_loc); + ped.gedc->addDataObject("Solution Gather Container", loc); + ped.gedc->addDataObject("Residual Scatter Container", loc); + for (size_t i=0; iaddDataObject(ss.str(), tangentContainers[i]); + } + std::vector params; + std::vector> paramContainers; + for (std::size_t i = 0; iaddDataObject(ss.str(),paramContainer->getGhostedLOC()); + paramContainers.push_back(paramContainer); } - else + Teuchos::RCP activeParams = + Teuchos::rcp(new panzer::ParameterList_GlobalEvaluationData(params)); + ped.gedc->addDataObject("PARAMETER_NAMES",activeParams); + fm->preEvaluate(ped); + + // run tests + ///////////////////////////////////////////////////////////// + + panzer::Workset &workset = (*work_sets)[0]; + workset.alpha = 0.0; + workset.beta = 2.0; // derivatives multiplied by 2 + workset.time = 0.0; + workset.evaluate_transient_terms = false; + + fm->evaluateFields(workset); + fm->postEvaluate(0); + + fm = Teuchos::null; + + // test Tangent fields + panzer::index_t dc_count(0); + Teuchos::ArrayRCP data, dc_data; + Teuchos::RCP> f_vec = t_loc->get_f_th(); + Teuchos::RCP> dc_vec = t_dc_loc->get_f_th(); + + // check all the residual values and the count + + Teuchos::rcp_dynamic_cast>(dc_vec)->getLocalData(Teuchos::ptrFromRef(dc_data)); + if (scatter_IC) { + Teuchos::rcp_dynamic_cast>(x_vec)->getLocalData(Teuchos::ptrFromRef(data)); + + TEST_EQUALITY(static_cast(data.size()), t_lof->getGhostedMap()->getLocalNumElements()); + TEST_EQUALITY(data.size(), dc_data.size()); + dc_count = 0; + for (int i = 0; i < data.size(); i++) + { + double target = 123.0 + myRank; + if (dc_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dc_count++; + } + } + // Filled everywhere + TEST_EQUALITY(dc_count, data.size()); + } else { + Teuchos::rcp_dynamic_cast>(f_vec)->getLocalData(Teuchos::ptrFromRef(data)); + TEST_EQUALITY(static_cast(data.size()), t_lof->getGhostedMap()->getLocalNumElements()); + TEST_EQUALITY(data.size(), dc_data.size()); + dc_count = 0; + for (int i = 0; i < data.size(); i++) + { + double target = 123.0 + myRank; + if (dc_data[i] == 0.0) + { + TEST_EQUALITY(data[i], 0.0); + } + else + { + TEST_EQUALITY(data[i], target); + dc_count++; + } + } + // there are 2 nodes or 1 edge on the side and the sides are not shared. + // 2 nodal functions, 1 edge function + TEST_EQUALITY(dc_count, 5 * workset.num_cells); + } + for (std::size_t i=0; i tan_data; + Teuchos::RCP> tan_vec = Teuchos::rcp_dynamic_cast(paramContainers[i]->getGhostedLOC())->get_f_th(); + Teuchos::rcp_dynamic_cast>(tan_vec)->getLocalData(Teuchos::ptrFromRef(tan_data)); + + for (int j = 0; j < tan_data.size(); ++j) + { + if (dc_data[j] == 0.) { + TEST_EQUALITY(data[j],0.0); + } else { + const double target = .123 + myRank + i; + TEST_EQUALITY(tan_data[j],target); + } + } } } - TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared } +// TEUCHOS_UNIT_TEST(assembly, scatter_dirichlet_jacobian) +// { +// +//#ifdef HAVE_MPI +// Teuchos::RCP> tComm = Teuchos::rcp(new Teuchos::MpiComm(MPI_COMM_WORLD)); +//#else +// NOPE_PANZER_DOESNT_SUPPORT_SERIAL +//#endif +// +// int myRank = tComm->getRank(); +// +// const std::size_t workset_size = 4; +// const std::string fieldName1_q1 = "U"; +// const std::string fieldName2_q1 = "V"; +// const std::string fieldName_qedge1 = "B"; +// +// Teuchos::RCP mesh = buildMesh(2, 2); +// +// // build input physics block +// Teuchos::RCP basis_q1 = buildBasis(workset_size, "Q1"); +// Teuchos::RCP basis_qedge1 = buildBasis(workset_size, "QEdge1"); +// +// Teuchos::RCP ipb = Teuchos::parameterList(); +// testInitialization(ipb); +// +// const int default_int_order = 1; +// std::string eBlockID = "eblock-0_0"; +// Teuchos::RCP eqset_factory = Teuchos::rcp(new user_app::MyFactory); +// panzer::CellData cellData(workset_size, mesh->getCellTopology("eblock-0_0")); +// Teuchos::RCP gd = panzer::createGlobalData(); +// Teuchos::RCP physicsBlock = +// Teuchos::rcp(new PhysicsBlock(ipb, eBlockID, default_int_order, cellData, eqset_factory, gd, false)); +// +// Teuchos::RCP> work_sets = panzer_stk::buildWorksets(*mesh, physicsBlock->elementBlockID(), +// physicsBlock->getWorksetNeeds()); +// TEST_EQUALITY(work_sets->size(), 1); +// +// // build connection manager and field manager +// const Teuchos::RCP conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); +// RCP dofManager = Teuchos::rcp(new panzer::BlockedDOFManager(conn_manager, MPI_COMM_WORLD)); +// +// dofManager->addField(fieldName1_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); +// dofManager->addField(fieldName2_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); +// dofManager->addField(fieldName_qedge1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_qedge1->getIntrepid2Basis()))); +// +// std::vector> fieldOrder(3); +// fieldOrder[0].push_back(fieldName1_q1); +// fieldOrder[1].push_back(fieldName_qedge1); +// fieldOrder[2].push_back(fieldName2_q1); +// dofManager->setFieldOrder(fieldOrder); +// +// // dofManager->setOrientationsRequired(true); +// dofManager->buildGlobalUnknowns(); +// +// // setup linear object factory +// ///////////////////////////////////////////////////////////// +// +// Teuchos::RCP bt_lof = Teuchos::rcp(new TpetraBlockedLinObjFactoryType(tComm.getConst(), dofManager)); +// Teuchos::RCP> lof = bt_lof; +// Teuchos::RCP dd_loc = bt_lof->buildGhostedLinearObjContainer(); +// Teuchos::RCP loc = bt_lof->buildGhostedLinearObjContainer(); +// bt_lof->initializeGhostedContainer(LinearObjContainer::F, *dd_loc); +// dd_loc->initialize(); +// +// bt_lof->initializeGhostedContainer(LinearObjContainer::X | LinearObjContainer::F | LinearObjContainer::Mat, *loc); +// loc->initialize(); +// +// Teuchos::RCP b_dd_loc = Teuchos::rcp_dynamic_cast(dd_loc); +// Teuchos::RCP b_loc = Teuchos::rcp_dynamic_cast(loc); +// Teuchos::RCP> p_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_x()); +// Thyra::assign(p_vec->getNonconstVectorBlock(0).ptr(), 123.0 + myRank); +// Thyra::assign(p_vec->getNonconstVectorBlock(1).ptr(), 456.0 + myRank); +// Thyra::assign(p_vec->getNonconstVectorBlock(2).ptr(), 789.0 + myRank); +// +// auto blk_A = Teuchos::rcp_dynamic_cast>(b_loc->get_A()); +// double values[] = {123.0 + myRank, 456.0 + myRank, 789.0 + myRank}; +// +// for (int i = 0; i < 3; i++) +// for (int j = 0; j < 3; j++) +// { +// auto thyraOp = Teuchos::rcp_dynamic_cast(blk_A->getNonconstBlock(i, j), false); +// auto tpetraCrsMatrix = Teuchos::rcp_dynamic_cast(thyraOp->getTpetraOperator(), true); +// tpetraCrsMatrix->setAllToScalar(values[i] * values[j]); +// } +// +// // setup field manager, add evaluator under test +// ///////////////////////////////////////////////////////////// +// +// PHX::FieldManager fm; +// +// std::string resName = ""; +// Teuchos::RCP> names_map = +// Teuchos::rcp(new std::map); +// names_map->insert(std::make_pair(fieldName1_q1, resName + fieldName1_q1)); +// names_map->insert(std::make_pair(fieldName2_q1, resName + fieldName2_q1)); +// names_map->insert(std::make_pair(fieldName_qedge1, resName + fieldName_qedge1)); +// +// // evaluators under test +// { +// using Teuchos::RCP; +// using Teuchos::rcp; +// RCP> names = rcp(new std::vector); +// names->push_back(resName + fieldName1_q1); +// names->push_back(resName + fieldName2_q1); +// +// Teuchos::ParameterList pl; +// pl.set("Scatter Name", "ScatterQ1"); +// pl.set("Basis", basis_q1); +// pl.set("Dependent Names", names); +// pl.set("Dependent Map", names_map); +// pl.set("Side Subcell Dimension", 1); +// pl.set("Local Side ID", 2); +// pl.set("Check Apply BC", false); +// +// Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); +// +// TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); +// +// fm.registerEvaluator(evaluator); +// fm.requireField(*evaluator->evaluatedFields()[0]); +// } +// { +// using Teuchos::RCP; +// using Teuchos::rcp; +// RCP> names = rcp(new std::vector); +// names->push_back(resName + fieldName_qedge1); +// +// Teuchos::ParameterList pl; +// pl.set("Scatter Name", "ScatterQEdge1"); +// pl.set("Basis", basis_qedge1); +// pl.set("Dependent Names", names); +// pl.set("Dependent Map", names_map); +// pl.set("Side Subcell Dimension", 1); +// pl.set("Local Side ID", 2); +// pl.set("Check Apply BC", false); +// +// Teuchos::RCP> evaluator = lof->buildScatterDirichlet(pl); +// +// TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); +// +// fm.registerEvaluator(evaluator); +// fm.requireField(*evaluator->evaluatedFields()[0]); +// } +// +// // support evaluators +// { +// using Teuchos::RCP; +// using Teuchos::rcp; +// RCP> names = rcp(new std::vector); +// names->push_back(fieldName1_q1); +// names->push_back(fieldName2_q1); +// +// Teuchos::ParameterList pl; +// pl.set("Basis", basis_q1); +// pl.set("DOF Names", names); +// pl.set("Indexer Names", names); +// +// Teuchos::RCP> evaluator = lof->buildGather(pl); +// +// fm.registerEvaluator(evaluator); +// } +// { +// using Teuchos::RCP; +// using Teuchos::rcp; +// RCP> names = rcp(new std::vector); +// names->push_back(fieldName_qedge1); +// +// Teuchos::ParameterList pl; +// pl.set("Basis", basis_qedge1); +// pl.set("DOF Names", names); +// pl.set("Indexer Names", names); +// +// Teuchos::RCP> evaluator = lof->buildGather(pl); +// +// fm.registerEvaluator(evaluator); +// } +// +// std::vector derivative_dimensions; +// derivative_dimensions.push_back(12); +// fm.setKokkosExtendedDataTypeDimensions(derivative_dimensions); +// +// panzer::Traits::SD sd; +// sd.worksets_ = work_sets; +// +// fm.postRegistrationSetup(sd); +// +// // panzer::Traits::PED ped; +// // ped.dirichletData.ghostedCounter = dd_loc; +// // fm.preEvaluate(ped); +// panzer::Traits::PED ped; +// ped.gedc->addDataObject("Dirichlet Counter", dd_loc); +// ped.gedc->addDataObject("Solution Gather Container", loc); +// ped.gedc->addDataObject("Residual Scatter Container", loc); +// fm.preEvaluate(ped); +// +// // run tests +// ///////////////////////////////////////////////////////////// +// +// panzer::Workset &workset = (*work_sets)[0]; +// workset.alpha = 0.0; +// workset.beta = 2.0; // derivatives multiplied by 2 +// workset.time = 0.0; +// workset.evaluate_transient_terms = false; +// +// fm.evaluateFields(workset); +// +// // test Residual fields +// panzer::index_t dd_count(0); +// Teuchos::ArrayRCP data, dd_data; +// Teuchos::RCP> f_vec = Teuchos::rcp_dynamic_cast>(b_loc->get_f()); +// Teuchos::RCP> dd_vec = Teuchos::rcp_dynamic_cast>(b_dd_loc->get_f()); +// +// // check all the residual values. This is kind of crappy test since it simply checks twice the target +// // value and the target. Its this way because you add two entries across elements. +// +// Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(data)); +// Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(0))->getLocalData(Teuchos::ptrFromRef(dd_data)); +// TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(0)->getLocalNumElements()); +// TEST_EQUALITY(data.size(), dd_data.size()); +// dd_count = 0; +// for (int i = 0; i < data.size(); i++) +// { +// +// double target = 123.0 + myRank; +// if (dd_data[i] == 0.0) +// { +// TEST_EQUALITY(data[i], 0.0); +// } +// else +// { +// TEST_EQUALITY(data[i], target); +// dd_count++; +// } +// } +// TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared +// +// Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(data)); +// Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(1))->getLocalData(Teuchos::ptrFromRef(dd_data)); +// TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(1)->getLocalNumElements()); +// TEST_EQUALITY(data.size(), dd_data.size()); +// dd_count = 0; +// for (int i = 0; i < data.size(); i++) +// { +// +// double target = 456.0 + myRank; +// if (dd_data[i] == 0.0) +// { +// TEST_EQUALITY(data[i], 0.0); +// } +// else +// { +// TEST_EQUALITY(data[i], target); +// dd_count++; +// } +// } +// TEST_EQUALITY(dd_count, workset.num_cells); // there are 2 nodes on the side and the sides are not shared +// +// Teuchos::rcp_dynamic_cast>(f_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(data)); +// Teuchos::rcp_dynamic_cast>(dd_vec->getVectorBlock(2))->getLocalData(Teuchos::ptrFromRef(dd_data)); +// TEST_EQUALITY(static_cast(data.size()), b_loc->getMapForBlock(2)->getLocalNumElements()); +// TEST_EQUALITY(data.size(), dd_data.size()); +// dd_count = 0; +// for (int i = 0; i < data.size(); i++) +// { +// +// double target = 789.0 + myRank; +// if (dd_data[i] == 0.0) +// { +// TEST_EQUALITY(data[i], 0.0); +// } +// else +// { +// TEST_EQUALITY(data[i], target); +// dd_count++; +// } +// } +// TEST_EQUALITY(dd_count, 2 * workset.num_cells); // there are 2 nodes on the side and the sides are not shared +// } Teuchos::RCP buildBasis(std::size_t worksetSize, const std::string &basisName) { diff --git a/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_BlockedTpetra.hpp b/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_BlockedTpetra.hpp index 266eab9cc25c..6210f66a950c 100644 --- a/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_BlockedTpetra.hpp +++ b/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_BlockedTpetra.hpp @@ -14,6 +14,7 @@ #include "Phalanx_config.hpp" #include "Phalanx_Evaluator_Macros.hpp" #include "Phalanx_MDField.hpp" +#include "Phalanx_KokkosViewOfViews.hpp" #include "Teuchos_ParameterList.hpp" @@ -260,6 +261,105 @@ class ScatterDirichletResidual_BlockedTpetra +class ScatterDirichletResidual_BlockedTpetra + : public panzer::EvaluatorWithBaseImpl, + public PHX::EvaluatorDerived, + public panzer::CloneableEvaluator { + +public: + ScatterDirichletResidual_BlockedTpetra(const Teuchos::RCP & indexer) + : globalIndexer_(indexer) {} + + ScatterDirichletResidual_BlockedTpetra(const Teuchos::RCP & indexer, + const Teuchos::ParameterList& p); + + void postRegistrationSetup(typename TRAITS::SetupData d, + PHX::FieldManager& vm); + + void preEvaluate(typename TRAITS::PreEvalData d); + + void evaluateFields(typename TRAITS::EvalData workset); + + virtual Teuchos::RCP clone(const Teuchos::ParameterList & pl) const + { return Teuchos::rcp(new ScatterDirichletResidual_BlockedTpetra(globalIndexer_,pl)); } + +private: + typedef typename panzer::Traits::Tangent::ScalarT ScalarT; + typedef typename TRAITS::RealType RealType; + + typedef BlockedTpetraLinearObjContainer ContainerType; + typedef Tpetra::Vector VectorType; + typedef Tpetra::CrsMatrix CrsMatrixType; + typedef Tpetra::CrsGraph CrsGraphType; + typedef Tpetra::Map MapType; + typedef Tpetra::Import ImportType; + typedef Tpetra::Export ExportType; + + // dummy field so that the evaluator will have something to do + Teuchos::RCP scatterHolder_; + + // fields that need to be scattered will be put in this vector + std::vector< PHX::MDField > scatterFields_; + + // maps the local (field,element,basis) triplet to a global ID + // for scattering + Teuchos::RCP globalIndexer_; + + //! Vector of global indexers, one for each scattered field + //! respectively. This is the global indexer for the Thyra + //! ProductVector sub-block. + std::vector> fieldGlobalIndexers_; + + //! Field IDs in the local product vector block (not global field id) + std::vector fieldIds_; + + //! Returns the index into the Thyra ProductVector sub-block. Size + //! of number of fields to scatter. + std::vector productVectorBlockIndex_; + + // This maps the scattered field names to the DOF manager field + // For instance a Navier-Stokes map might look like + // fieldMap_["RESIDUAL_Velocity"] --> "Velocity" + // fieldMap_["RESIDUAL_Pressure"] --> "Pressure" + Teuchos::RCP > fieldMap_; + + //! Local indices for unknowns + PHX::View worksetLIDs_; + + //! Offset into the cell lids for each field + std::vector> fieldOffsets_; + + //! The local basis index corresponding to the fieldOffset_. Used to + //! index into the basis index of MDFields. This is only required + //! for tangent/normal BCs. + std::vector> basisIndexForMDFieldOffsets_; + + std::size_t side_subcell_dim_; + std::size_t local_side_id_; + + Teuchos::RCP > dirichletCounter_; + std::string globalDataKey_; // what global data does this fill? + Teuchos::RCP > blockedContainer_; + + //! If set to true, allows runtime disabling of dirichlet BCs on node-by-node basis + bool checkApplyBC_; + + // If set to true, scattering an initial condition + bool scatterIC_; + + // Allows runtime disabling of dirichlet BCs on node-by-node basis + std::vector< PHX::MDField > applyBC_; + + // Storage for the tangent data + PHX::ViewOfViews<2,Kokkos::View> dfdpFieldsVoV_; + + ScatterDirichletResidual_BlockedTpetra() {} +}; + } #ifdef Panzer_BUILD_HESSIAN_SUPPORT diff --git a/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_BlockedTpetra_impl.hpp b/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_BlockedTpetra_impl.hpp index 8af54d7c30aa..a975889e5a0b 100644 --- a/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_BlockedTpetra_impl.hpp +++ b/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_BlockedTpetra_impl.hpp @@ -24,6 +24,7 @@ #include "Panzer_BlockedDOFManager.hpp" #include "Panzer_PureBasis.hpp" #include "Panzer_BlockedTpetraLinearObjContainer.hpp" +#include "Panzer_ParameterList_GlobalEvaluationData.hpp" #include "Panzer_GlobalEvaluationDataContainer.hpp" #include "Phalanx_DataLayout_MDALayout.hpp" @@ -125,7 +126,7 @@ ScatterDirichletResidual_BlockedTpetra(const Teuchos::RCP("Global Data Key")) globalDataKey_ = p.get("Global Data Key"); - this->setName(scatterName+" Scatter Residual"); + this->setName(scatterName+" Scatter Dirichlet Residual"); } // ********************************************************************** @@ -339,7 +340,7 @@ ScatterDirichletResidual_BlockedTpetra(const Teuchos::RCP("Global Data Key")) globalDataKey_ = p.get("Global Data Key"); - this->setName(scatterName+" Scatter Residual (Jacobian)"); + this->setName(scatterName+" Scatter Dirichlet Residual (Jacobian)"); } // ********************************************************************** @@ -617,4 +618,253 @@ evaluateFields(typename TRAITS::EvalData workset) // ********************************************************************** +// ********************************************************************** +// Specialization: Tangent +// ********************************************************************** + + +template +panzer::ScatterDirichletResidual_BlockedTpetra:: +ScatterDirichletResidual_BlockedTpetra(const Teuchos::RCP & indexer, + const Teuchos::ParameterList& p) + : globalIndexer_(indexer) + , globalDataKey_("Residual Scatter Container") +{ + std::string scatterName = p.get("Scatter Name"); + scatterHolder_ = + Teuchos::rcp(new PHX::Tag(scatterName,Teuchos::rcp(new PHX::MDALayout(0)))); + + // get names to be evaluated + const std::vector& names = + *(p.get< Teuchos::RCP< std::vector > >("Dependent Names")); + + // grab map from evaluated names to field names + fieldMap_ = p.get< Teuchos::RCP< std::map > >("Dependent Map"); + + // determine if we are scattering an initial condition + scatterIC_ = p.isParameter("Scatter Initial Condition") ? p.get("Scatter Initial Condition") : false; + + Teuchos::RCP dl = (!scatterIC_) ? + p.get< Teuchos::RCP >("Basis")->functional : + p.get< Teuchos::RCP >("Basis")->functional; + if (!scatterIC_) { + side_subcell_dim_ = p.get("Side Subcell Dimension"); + local_side_id_ = p.get("Local Side ID"); + } + + // build the vector of fields that this is dependent on + scatterFields_.resize(names.size()); + for (std::size_t eq = 0; eq < names.size(); ++eq) { + scatterFields_[eq] = PHX::MDField(names[eq],dl); + + // tell the field manager that we depend on this field + this->addDependentField(scatterFields_[eq]); + } + + checkApplyBC_ = p.isParameter("Check Apply BC") ? p.get("Check Apply BC") : false; + applyBC_.resize(names.size()); // must allocate (even if not used) to support lambda capture + if (checkApplyBC_) { + for (std::size_t eq = 0; eq < names.size(); ++eq) { + applyBC_[eq] = PHX::MDField(std::string("APPLY_BC_")+fieldMap_->find(names[eq])->second,dl); + this->addDependentField(applyBC_[eq]); + } + } + + // this is what this evaluator provides + this->addEvaluatedField(*scatterHolder_); + + if (p.isType("Global Data Key")) + globalDataKey_ = p.get("Global Data Key"); + + this->setName(scatterName+" Scatter Dirichlet Residual"); +} + +// ********************************************************************** +template +void panzer::ScatterDirichletResidual_BlockedTpetra:: +postRegistrationSetup(typename TRAITS::SetupData d, + PHX::FieldManager& /* fm */) +{ + const Workset & workset_0 = (*d.worksets_)[0]; + const std::string blockId = this->wda(workset_0).block_id; + + fieldIds_.resize(scatterFields_.size()); + fieldOffsets_.resize(scatterFields_.size()); + basisIndexForMDFieldOffsets_.resize(scatterFields_.size()); + fieldGlobalIndexers_.resize(scatterFields_.size()); + productVectorBlockIndex_.resize(scatterFields_.size()); + int maxElementBlockGIDCount = -1; + for(std::size_t fd=0;fdfind(scatterFields_[fd].fieldTag().name())->second; + + const int globalFieldNum = globalIndexer_->getFieldNum(fieldName); // Field number in the aggregate BlockDOFManager + productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum); + fieldGlobalIndexers_[fd] = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]]; + fieldIds_[fd] = fieldGlobalIndexers_[fd]->getFieldNum(fieldName); // Field number in the sub-global-indexer + + // Offsets and basisIndex depend on whether scattering IC or Dirichlet BC + if (!scatterIC_) { + const auto& offsetPair = fieldGlobalIndexers_[fd]->getGIDFieldOffsets_closure(blockId,fieldIds_[fd],side_subcell_dim_,local_side_id_); + { + const auto& offsets = offsetPair.first; + fieldOffsets_[fd] = PHX::View("ScatterDirichletResidual_BlockedTpetra(Tangent):fieldOffsets",offsets.size()); + auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]); + for (std::size_t i=0; i < offsets.size(); ++i) + hostOffsets(i) = offsets[i]; + Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets); + } + { + const auto& basisIndex = offsetPair.second; + basisIndexForMDFieldOffsets_[fd] = PHX::View("ScatterDirichletResidual_BlockedTpetra(Tangent):basisIndexForMDFieldOffsets",basisIndex.size()); + auto hostBasisIndex = Kokkos::create_mirror_view(basisIndexForMDFieldOffsets_[fd]); + for (std::size_t i=0; i < basisIndex.size(); ++i) + hostBasisIndex(i) = basisIndex[i]; + Kokkos::deep_copy(basisIndexForMDFieldOffsets_[fd], hostBasisIndex); + } + } + else { + // For ICs, only need offsets, not basisIndex + const std::vector& offsets = fieldGlobalIndexers_[fd]->getGIDFieldOffsets(blockId,fieldIds_[fd]); + fieldOffsets_[fd] = PHX::View("ScatterDirichletResidual_BlockedTpetra(Tangent):fieldOffsets",offsets.size()); + auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]); + for (std::size_t i=0; i < offsets.size(); ++i) + hostOffsets(i) = offsets[i]; + Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets); + } + + maxElementBlockGIDCount = std::max(fieldGlobalIndexers_[fd]->getElementBlockGIDCount(blockId),maxElementBlockGIDCount); + } + + // We will use one workset lid view for all fields, but has to be + // sized big enough to hold the largest elementBlockGIDCount in the + // ProductVector. + worksetLIDs_ = PHX::View("ScatterResidual_BlockedTpetra(Tangent):worksetLIDs", + scatterFields_[0].extent(0), + maxElementBlockGIDCount); +} + +// ********************************************************************** +template +void panzer::ScatterDirichletResidual_BlockedTpetra:: +preEvaluate(typename TRAITS::PreEvalData d) +{ + + // this is the list of parameters and their names that this scatter has to account for + std::vector activeParameters = + Teuchos::rcp_dynamic_cast(d.gedc->getDataObject("PARAMETER_NAMES"))->getActiveParameters(); + + const int numBlocks = static_cast(globalIndexer_->getFieldDOFManagers().size()); + + dfdpFieldsVoV_.initialize("ScatterResidual_Tpetra::dfdpFieldsVoV_",activeParameters.size(),numBlocks); + + for(std::size_t i=0;i paramBlockedContainer = Teuchos::rcp_dynamic_cast(d.gedc->getDataObject(activeParameters[i]),true); + Teuchos::RCP> productVector = + Teuchos::rcp_dynamic_cast>(paramBlockedContainer->get_f(),true); + for(int j=0;j>(productVector->getNonconstVectorBlock(j),true))->getTpetraVector()); + const auto& dfdp_view = tpetraBlock.getLocalViewDevice(Tpetra::Access::ReadWrite); + dfdpFieldsVoV_.addView(dfdp_view,i,j); + } + } + + dfdpFieldsVoV_.syncHostToDevice(); + + // extract dirichlet counter from container + Teuchos::RCP blockContainer + = Teuchos::rcp_dynamic_cast(d.gedc->getDataObject("Dirichlet Counter"),true); + + dirichletCounter_ = Teuchos::rcp_dynamic_cast >(blockContainer->get_f(),true); + TEUCHOS_ASSERT(!Teuchos::is_null(dirichletCounter_)); + + // extract linear object container + blockedContainer_ = Teuchos::rcp_dynamic_cast(d.gedc->getDataObject(globalDataKey_),true); + TEUCHOS_ASSERT(!Teuchos::is_null(blockedContainer_)); +} + +// ********************************************************************** +template +void panzer::ScatterDirichletResidual_BlockedTpetra:: +evaluateFields(typename TRAITS::EvalData workset) +{ + using Teuchos::RCP; + using Teuchos::rcp_dynamic_cast; + using Thyra::VectorBase; + using Thyra::ProductVectorBase; + + const auto& localCellIds = this->wda(workset).cell_local_ids_k; + + RCP > thyraScatterTarget = (!scatterIC_) ? + rcp_dynamic_cast >(blockedContainer_->get_f(),true) : + rcp_dynamic_cast >(blockedContainer_->get_x(),true); + + // Loop over scattered fields + int currentWorksetLIDSubBlock = -1; + for (std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { + // workset LIDs only change for different sub blocks + if (productVectorBlockIndex_[fieldIndex] != currentWorksetLIDSubBlock) { + fieldGlobalIndexers_[fieldIndex]->getElementLIDs(localCellIds,worksetLIDs_); + currentWorksetLIDSubBlock = productVectorBlockIndex_[fieldIndex]; + } + + // Get Scatter target block + auto& tpetraScatterTarget = *((rcp_dynamic_cast>(thyraScatterTarget->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),true))->getTpetraVector()); + const auto& kokkosScatterTarget = tpetraScatterTarget.getLocalViewDevice(Tpetra::Access::ReadWrite); + + // Get dirichlet counter block + auto& tpetraDirichletCounter = *((rcp_dynamic_cast>(dirichletCounter_->getNonconstVectorBlock(productVectorBlockIndex_[fieldIndex]),true))->getTpetraVector()); + const auto& kokkosDirichletCounter = tpetraDirichletCounter.getLocalViewDevice(Tpetra::Access::ReadWrite); + + // Class data fields for lambda capture + const auto fieldOffsets = fieldOffsets_[fieldIndex]; + const auto basisIndices = basisIndexForMDFieldOffsets_[fieldIndex]; + const auto worksetLIDs = worksetLIDs_; + const auto fieldValues = scatterFields_[fieldIndex].get_static_view(); + const auto applyBC = applyBC_[fieldIndex].get_static_view(); + const bool checkApplyBC = checkApplyBC_; + const auto& tangentFieldsDevice = dfdpFieldsVoV_.getViewDevice(); + const auto& kokkosTangents = Kokkos::subview(tangentFieldsDevice,Kokkos::ALL(),productVectorBlockIndex_[fieldIndex]); + const double num_params = Kokkos::dimension_scalar(fieldValues)-1; + + if (!scatterIC_) { + + Kokkos::parallel_for(Kokkos::RangePolicy(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) { + for (int basis=0; basis < static_cast(fieldOffsets.size()); ++basis) { + const int lid = worksetLIDs(cell,fieldOffsets(basis)); + if (lid < 0) // not on this processor! + continue; + const int basisIndex = basisIndices(basis); + + // Possible warp divergence for hierarchic + if (checkApplyBC) + if (!applyBC(cell,basisIndex)) + continue; + + kokkosScatterTarget(lid,0) = fieldValues(cell,basisIndex).val(); + for(int i_param=0; i_param(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) { + for (int basis=0; basis < static_cast(fieldOffsets.size()); ++basis) { + const int lid = worksetLIDs(cell,fieldOffsets(basis)); + if (lid < 0) // not on this processor! + continue; + kokkosScatterTarget(lid,0) = fieldValues(cell,basis).val(); + for(int i_param=0; i_param "Pressure" Teuchos::RCP > fieldMap_; - std::size_t num_nodes; - std::size_t side_subcell_dim_; std::size_t local_side_id_; @@ -116,6 +115,10 @@ class ScatterDirichletResidual_Tpetra > applyBC_; + + PHX::View scratch_lids_; + std::vector > scratch_offsets_; + std::vector > scratch_basisIds_; }; // ************************************************************** @@ -146,6 +149,7 @@ class ScatterDirichletResidual_Tpetra LOC; // dummy field so that the evaluator will have something to do @@ -165,8 +169,6 @@ class ScatterDirichletResidual_Tpetra "Pressure" Teuchos::RCP > fieldMap_; - std::size_t num_nodes; - std::size_t side_subcell_dim_; std::size_t local_side_id_; @@ -186,6 +188,13 @@ class ScatterDirichletResidual_Tpetra > applyBC_; + + PHX::View scratch_lids_; + std::vector > scratch_offsets_; + std::vector > scratch_basisIds_; + + /// Storage for the tangent data + PHX::ViewOfViews<1,Kokkos::View> dfdpFieldsVoV_; }; // ************************************************************** diff --git a/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_Tpetra_impl.hpp b/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_Tpetra_impl.hpp index dd307eb2fb40..f45a3e539b23 100644 --- a/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_Tpetra_impl.hpp +++ b/packages/panzer/disc-fe/src/evaluators/Panzer_ScatterDirichletResidual_Tpetra_impl.hpp @@ -59,10 +59,12 @@ ScatterDirichletResidual_Tpetra(const Teuchos::RCP & indexe if (!scatterIC_) { side_subcell_dim_ = p.get("Side Subcell Dimension"); local_side_id_ = p.get("Local Side ID"); + scratch_basisIds_.resize(names.size()); } // build the vector of fields that this is dependent on scatterFields_.resize(names.size()); + scratch_offsets_.resize(names.size()); for (std::size_t eq = 0; eq < names.size(); ++eq) { scatterFields_[eq] = PHX::MDField(names[eq],dl); @@ -91,20 +93,40 @@ ScatterDirichletResidual_Tpetra(const Teuchos::RCP & indexe // ********************************************************************** template void panzer::ScatterDirichletResidual_Tpetra:: -postRegistrationSetup(typename TRAITS::SetupData /* d */, +postRegistrationSetup(typename TRAITS::SetupData d, PHX::FieldManager& /* fm */) { fieldIds_.resize(scatterFields_.size()); + const Workset & workset_0 = (*d.worksets_)[0]; + std::string blockId = this->wda(workset_0).block_id; // load required field numbers for fast use for(std::size_t fd=0;fdfind(scatterFields_[fd].fieldTag().name())->second; fieldIds_[fd] = globalIndexer_->getFieldNum(fieldName); - } - // get the number of nodes (Should be renamed basis) - num_nodes = scatterFields_[0].extent(1); + if (!scatterIC_) { + const std::pair,std::vector > & indicePair + = globalIndexer_->getGIDFieldOffsets_closure(blockId,fieldIds_[fd], side_subcell_dim_, local_side_id_); + const std::vector & offsets = indicePair.first; + const std::vector & basisIdMap = indicePair.second; + + scratch_offsets_[fd] = PHX::View("offsets",offsets.size()); + Kokkos::deep_copy(scratch_offsets_[fd], Kokkos::View(offsets.data(), offsets.size())); + + scratch_basisIds_[fd] = PHX::View("basisIds",basisIdMap.size()); + Kokkos::deep_copy(scratch_basisIds_[fd], Kokkos::View(basisIdMap.data(), basisIdMap.size())); + + } else { + const std::vector & offsets = globalIndexer_->getGIDFieldOffsets(blockId,fieldIds_[fd]); + scratch_offsets_[fd] = PHX::View("offsets",offsets.size()); + Kokkos::deep_copy(scratch_offsets_[fd], Kokkos::View(offsets.data(), offsets.size())); + } + } + + scratch_lids_ = PHX::View("lids",scatterFields_[0].extent(0), + globalIndexer_->getElementBlockGIDCount(blockId)); } // ********************************************************************** @@ -132,6 +154,85 @@ preEvaluate(typename TRAITS::PreEvalData d) } } +// ********************************************************************** +namespace panzer { +namespace { + +template +class ScatterDirichletResidual_Residual_Functor { +public: + typedef typename PHX::Device execution_space; + typedef PHX::MDField ScalarFieldType; + typedef PHX::MDField BoolFieldType; + + Kokkos::View r_data; + Kokkos::View dirichlet_counter; + + PHX::View lids; // local indices for unknowns + PHX::View offsets; // how to get a particular field + PHX::View basisIds; + ScalarFieldType field; + BoolFieldType applyBC; + + bool checkApplyBC; + + KOKKOS_INLINE_FUNCTION + void operator()(const unsigned int cell) const + { + + // loop over the basis functions (currently they are nodes) + for(std::size_t basis=0; basis < offsets.extent(0); basis++) { + int offset = offsets(basis); + LO lid = lids(cell,offset); + if (lid<0) continue; // not on this processor + + int basisId = basisIds(basis); + if (checkApplyBC) + if(!applyBC(cell,basisId)) continue; + + r_data(lid,0) = field(cell,basisId); + + // record that you set a dirichlet condition + dirichlet_counter(lid,0) = 1.0; + + } // end basis + } +}; + +template +class ScatterDirichletResidualIC_Residual_Functor { +public: + typedef typename PHX::Device execution_space; + typedef PHX::MDField FieldType; + + Kokkos::View r_data; + Kokkos::View dirichlet_counter; + + PHX::View lids; // local indices for unknowns + PHX::View offsets; // how to get a particular field + FieldType field; + + KOKKOS_INLINE_FUNCTION + void operator()(const unsigned int cell) const + { + + // loop over the basis functions (currently they are nodes) + for(std::size_t basis=0; basis < offsets.extent(0); basis++) { + int offset = offsets(basis); + LO lid = lids(cell,offset); + if (lid<0) continue; // not on this processor + + r_data(lid,0) = field(cell,basis); + + // record that you set a dirichlet condition + dirichlet_counter(lid,0) = 1.0; + + } // end basis + } +}; +} +} + // ********************************************************************** template void panzer::ScatterDirichletResidual_Tpetra:: @@ -142,83 +243,44 @@ evaluateFields(typename TRAITS::EvalData workset) // for convenience pull out some objects from workset std::string blockId = this->wda(workset).block_id; - const std::vector & localCellIds = this->wda(workset).cell_local_ids; - - Teuchos::RCP r = (!scatterIC_) ? - tpetraContainer_->get_f() : - tpetraContainer_->get_x(); - Teuchos::ArrayRCP r_array = r->get1dViewNonConst(); - Teuchos::ArrayRCP dc_array = dirichletCounter_->get1dViewNonConst(); + globalIndexer_->getElementLIDs(this->wda(workset).cell_local_ids_k,scratch_lids_); - // NOTE: A reordering of these loops will likely improve performance - // The "getGIDFieldOffsets may be expensive. However the - // "getElementGIDs" can be cheaper. However the lookup for LIDs - // may be more expensive! + Teuchos::RCP r = (!scatterIC_) ? + tpetraContainer_->get_f() : + tpetraContainer_->get_x(); + if (scatterIC_) { + ScatterDirichletResidualIC_Residual_Functor functor; + functor.r_data = r->getLocalViewDevice(Tpetra::Access::ReadWrite); + functor.lids = scratch_lids_; + functor.dirichlet_counter = dirichletCounter_->getLocalViewDevice(Tpetra::Access::ReadWrite); + // for each field, do a parallel for loop + for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { + functor.offsets = scratch_offsets_[fieldIndex]; + functor.field = scatterFields_[fieldIndex]; - // loop over each field to be scattered - for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { - int fieldNum = fieldIds_[fieldIndex]; - auto scatterFields_h = Kokkos::create_mirror_view(scatterFields_[fieldIndex].get_static_view()); - Kokkos::deep_copy(scatterFields_h, scatterFields_[fieldIndex].get_static_view()); - - // scatter operation for each cell in workset - for(std::size_t worksetCellIndex=0;worksetCellIndexgetElementGIDs(cellLocalId,GIDs); + Kokkos::parallel_for(workset.num_cells,functor); + } + } else { + ScatterDirichletResidual_Residual_Functor functor; + functor.r_data = r->getLocalViewDevice(Tpetra::Access::ReadWrite); + functor.lids = scratch_lids_; + functor.dirichlet_counter = dirichletCounter_->getLocalViewDevice(Tpetra::Access::ReadWrite); + + // for each field, do a parallel for loop + for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { + functor.offsets = scratch_offsets_[fieldIndex]; + functor.field = scatterFields_[fieldIndex]; + if (checkApplyBC_) functor.applyBC = applyBC_[fieldIndex]; + functor.checkApplyBC = checkApplyBC_; + functor.basisIds = scratch_basisIds_[fieldIndex]; + + Kokkos::parallel_for(workset.num_cells,functor); + } + } - // caculate the local IDs for this element - LIDs.resize(GIDs.size()); - for(std::size_t i=0;igetMap()->getLocalElement(GIDs[i]); - - if (!scatterIC_) { - // this call "should" get the right ordering according to the Intrepid2 basis - const std::pair,std::vector > & indicePair - = globalIndexer_->getGIDFieldOffsets_closure(blockId,fieldNum, side_subcell_dim_, local_side_id_); - const std::vector & elmtOffset = indicePair.first; - const std::vector & basisIdMap = indicePair.second; - - // loop over basis functions - for(std::size_t basis=0;basis & elmtOffset = globalIndexer_->getGIDFieldOffsets(blockId,fieldNum); - - // loop over basis functions - for(std::size_t basis=0;basis & indexe if (!scatterIC_) { side_subcell_dim_ = p.get("Side Subcell Dimension"); local_side_id_ = p.get("Local Side ID"); + scratch_basisIds_.resize(names.size()); } // build the vector of fields that this is dependent on scatterFields_.resize(names.size()); + scratch_offsets_.resize(names.size()); for (std::size_t eq = 0; eq < names.size(); ++eq) { scatterFields_[eq] = PHX::MDField(names[eq],dl); @@ -285,20 +349,41 @@ ScatterDirichletResidual_Tpetra(const Teuchos::RCP & indexe // ********************************************************************** template void panzer::ScatterDirichletResidual_Tpetra:: -postRegistrationSetup(typename TRAITS::SetupData /* d */, +postRegistrationSetup(typename TRAITS::SetupData d, PHX::FieldManager& /* fm */) { fieldIds_.resize(scatterFields_.size()); + const Workset & workset_0 = (*d.worksets_)[0]; + std::string blockId = this->wda(workset_0).block_id; // load required field numbers for fast use for(std::size_t fd=0;fdfind(scatterFields_[fd].fieldTag().name())->second; fieldIds_[fd] = globalIndexer_->getFieldNum(fieldName); - } - // get the number of nodes (Should be renamed basis) - num_nodes = scatterFields_[0].extent(1); + if (!scatterIC_) { + const std::pair,std::vector > & indicePair + = globalIndexer_->getGIDFieldOffsets_closure(blockId,fieldIds_[fd], side_subcell_dim_, local_side_id_); + const std::vector & offsets = indicePair.first; + const std::vector & basisIdMap = indicePair.second; + + scratch_offsets_[fd] = PHX::View("offsets",offsets.size()); + Kokkos::deep_copy(scratch_offsets_[fd], Kokkos::View(offsets.data(), offsets.size())); + + scratch_basisIds_[fd] = PHX::View("basisIds",basisIdMap.size()); + Kokkos::deep_copy(scratch_basisIds_[fd], Kokkos::View(basisIdMap.data(), basisIdMap.size())); + + } else { + const std::vector & offsets = globalIndexer_->getGIDFieldOffsets(blockId,fieldIds_[fd]); + scratch_offsets_[fd] = PHX::View("offsets",offsets.size()); + Kokkos::deep_copy(scratch_offsets_[fd], Kokkos::View(offsets.data(), offsets.size())); + } + } + + scratch_lids_ = PHX::View("lids",scatterFields_[0].extent(0), + globalIndexer_->getElementBlockGIDCount(blockId)); + } // ********************************************************************** @@ -332,15 +417,111 @@ preEvaluate(typename TRAITS::PreEvalData d) std::vector activeParameters = rcp_dynamic_cast(d.gedc->getDataObject("PARAMETER_NAMES"))->getActiveParameters(); - // ETP 02/03/16: This code needs to be updated to properly handle scatterIC_ - TEUCHOS_ASSERT(!scatterIC_); - dfdp_vectors_.clear(); + dfdpFieldsVoV_.initialize("ScatterResidual_Tpetra::dfdpFieldsVoV_",activeParameters.size()); + for(std::size_t i=0;i vec = rcp_dynamic_cast(d.gedc->getDataObject(activeParameters[i]),true)->get_f(); - Teuchos::ArrayRCP vec_array = vec->get1dViewNonConst(); - dfdp_vectors_.push_back(vec_array); + auto dfdp_view = vec->getLocalViewDevice(Tpetra::Access::ReadWrite); + + dfdpFieldsVoV_.addView(dfdp_view,i); + } + + dfdpFieldsVoV_.syncHostToDevice(); + +} + +// ********************************************************************** +namespace panzer { +namespace { + +template +class ScatterDirichletResidual_Tangent_Functor { +public: + typedef typename PHX::Device execution_space; + typedef PHX::MDField ScalarFieldType; + typedef PHX::MDField BoolFieldType; + + Kokkos::View r_data; + Kokkos::View dirichlet_counter; + + Kokkos::View*> dfdp_fields; // tangent fields + double num_params; + + PHX::View lids; // local indices for unknowns + PHX::View offsets; // how to get a particular field + PHX::View basisIds; + ScalarFieldType field; + BoolFieldType applyBC; + + bool checkApplyBC; + + KOKKOS_INLINE_FUNCTION + void operator()(const unsigned int cell) const + { + + // loop over the basis functions (currently they are nodes) + for(std::size_t basis=0; basis < offsets.extent(0); basis++) { + int offset = offsets(basis); + LO lid = lids(cell,offset); + if (lid<0) continue; // not on this processor + + int basisId = basisIds(basis); + if (checkApplyBC) + if(!applyBC(cell,basisId)) continue; + + r_data(lid,0) = field(cell,basisId).val(); + + // loop over the tangents + for(int i_param=0; i_param +class ScatterDirichletResidualIC_Tangent_Functor { +public: + typedef typename PHX::Device execution_space; + typedef PHX::MDField FieldType; + + Kokkos::View r_data; + Kokkos::View dirichlet_counter; + + Kokkos::View*> dfdp_fields; // tangent fields + double num_params; + + PHX::View lids; // local indices for unknowns + PHX::View offsets; // how to get a particular field + FieldType field; + + KOKKOS_INLINE_FUNCTION + void operator()(const unsigned int cell) const + { + + // loop over the basis functions (currently they are nodes) + for(std::size_t basis=0; basis < offsets.extent(0); basis++) { + int offset = offsets(basis); + LO lid = lids(cell,offset); + if (lid<0) continue; // not on this processor + + r_data(lid,0) = field(cell,basis).val(); + + // loop over the tangents + for(int i_param=0; i_paramwda(workset).block_id; - const std::vector & localCellIds = this->wda(workset).cell_local_ids; + + globalIndexer_->getElementLIDs(this->wda(workset).cell_local_ids_k,scratch_lids_); Teuchos::RCP r = (!scatterIC_) ? tpetraContainer_->get_f() : tpetraContainer_->get_x(); - Teuchos::ArrayRCP r_array = r->get1dViewNonConst(); - Teuchos::ArrayRCP dc_array = dirichletCounter_->get1dViewNonConst(); + if (scatterIC_) { + ScatterDirichletResidualIC_Tangent_Functor functor; + functor.r_data = r->getLocalViewDevice(Tpetra::Access::ReadWrite); + functor.lids = scratch_lids_; + functor.dirichlet_counter = dirichletCounter_->getLocalViewDevice(Tpetra::Access::ReadWrite); + functor.dfdp_fields = dfdpFieldsVoV_.getViewDevice(); - // NOTE: A reordering of these loops will likely improve performance - // The "getGIDFieldOffsets may be expensive. However the - // "getElementGIDs" can be cheaper. However the lookup for LIDs - // may be more expensive! + // for each field, do a parallel for loop + for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { + functor.offsets = scratch_offsets_[fieldIndex]; + functor.field = scatterFields_[fieldIndex]; + functor.num_params = Kokkos::dimension_scalar(scatterFields_[fieldIndex].get_view())-1; + Kokkos::parallel_for(workset.num_cells,functor); + } + } else { + ScatterDirichletResidual_Tangent_Functor functor; + functor.r_data = r->getLocalViewDevice(Tpetra::Access::ReadWrite); + functor.lids = scratch_lids_; + functor.dirichlet_counter = dirichletCounter_->getLocalViewDevice(Tpetra::Access::ReadWrite); + functor.dfdp_fields = dfdpFieldsVoV_.getViewDevice(); + + // for each field, do a parallel for loop + for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { + functor.offsets = scratch_offsets_[fieldIndex]; + functor.field = scatterFields_[fieldIndex]; + if (checkApplyBC_) functor.applyBC = applyBC_[fieldIndex]; + functor.checkApplyBC = checkApplyBC_; + functor.basisIds = scratch_basisIds_[fieldIndex]; + functor.num_params = Kokkos::dimension_scalar(scatterFields_[fieldIndex].get_view())-1; + + Kokkos::parallel_for(workset.num_cells,functor); + } + } - // scatter operation for each cell in workset - for(std::size_t worksetCellIndex=0;worksetCellIndexgetElementGIDs(cellLocalId,GIDs); - - // caculate the local IDs for this element - LIDs.resize(GIDs.size()); - for(std::size_t i=0;igetMap()->getLocalElement(GIDs[i]); - - // loop over each field to be scattered - for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) { - int fieldNum = fieldIds_[fieldIndex]; - - if (!scatterIC_) { - // this call "should" get the right ordering according to the Intrepid2 basis - const std::pair,std::vector > & indicePair - = globalIndexer_->getGIDFieldOffsets_closure(blockId,fieldNum, side_subcell_dim_, local_side_id_); - const std::vector & elmtOffset = indicePair.first; - const std::vector & basisIdMap = indicePair.second; - - // loop over basis functions - for(std::size_t basis=0;basis & elmtOffset = globalIndexer_->getGIDFieldOffsets(blockId,fieldNum); - - // loop over basis functions - for(std::size_t basis=0;basis { // const ordinal_type team_size_factor[2] = { 16, 16 }, vector_size_factor[2] = { 32, 32}; const ordinal_type team_size_factor[2] = {64, 64}, vector_size_factor[2] = {8, 4}; const ordinal_type team_size_update[2] = {16, 8}, vector_size_update[2] = {32, 32}; + // returned value from team Chol + colind_view d_rval("rval",1); + auto h_rval = Kokkos::create_mirror_view(host_memory_space(), d_rval); { typedef TeamFunctor_FactorizeChol functor_type; + functor_type functor(_info, _factorize_mode, _level_sids, _buf, d_rval.data()); + #if defined(TACHO_TEST_LEVELSET_TOOLS_KERNEL_OVERHEAD) typedef Kokkos::TeamPolicy, exec_space, typename functor_type::DummyTag> team_policy_factorize; @@ -2204,11 +2209,8 @@ class NumericToolsLevelSet : public NumericToolsBase { typedef Kokkos::TeamPolicy, exec_space, typename functor_type::UpdateTag> team_policy_update; #endif - - int rval = 0; team_policy_factor policy_factor(1, 1, 1); team_policy_update policy_update(1, 1, 1); - functor_type functor(_info, _factorize_mode, _level_sids, _buf, &rval); // get max vector size const ordinal_type vmax = policy_factor.vector_length_max(); @@ -2259,10 +2261,14 @@ class NumericToolsLevelSet : public NumericToolsBase { Kokkos::fence(); time_device += tick.seconds(); tick.reset(); } - Kokkos::fence(); + Kokkos::deep_copy(h_rval, d_rval); + int rval = h_rval(0); if (rval != 0) { TACHO_TEST_FOR_EXCEPTION(rval, std::runtime_error, "POTRF (team) returns non-zero error code."); } + //if (_status != 0) { + // TACHO_TEST_FOR_EXCEPTION(rval, std::runtime_error, "POTRF (device) returns non-zero error code."); + //} Kokkos::parallel_for("update factor", policy_update, functor); if (verbose) { @@ -3926,8 +3932,13 @@ class NumericToolsLevelSet : public NumericToolsBase { const ordinal_type team_size_factor[2] = {64, 64}, vector_size_factor[2] = {8, 4}; #endif const ordinal_type team_size_update[2] = {16, 8}, vector_size_update[2] = {32, 32}; + // returned value from team LDL + colind_view d_rval("rval",1); + auto h_rval = Kokkos::create_mirror_view(host_memory_space(), d_rval); { typedef TeamFunctor_FactorizeLDL functor_type; + functor_type functor(_info, _factorize_mode, _level_sids, _piv, _diag, _buf, d_rval.data()); + #if defined(TACHO_TEST_LEVELSET_TOOLS_KERNEL_OVERHEAD) typedef Kokkos::TeamPolicy, exec_space, typename functor_type::DummyTag> team_policy_factorize; @@ -3940,12 +3951,10 @@ class NumericToolsLevelSet : public NumericToolsBase { typedef Kokkos::TeamPolicy, exec_space, typename functor_type::UpdateTag> team_policy_update; #endif - int rval = 0; - team_policy_factor policy_factor(1, 1, 1); - team_policy_update policy_update(1, 1, 1); - functor_type functor(_info, _factorize_mode, _level_sids, _piv, _diag, _buf, &rval); // get max vector length + team_policy_factor policy_factor(1, 1, 1); + team_policy_update policy_update(1, 1, 1); const ordinal_type vmax = policy_factor.vector_length_max(); { for (ordinal_type lvl = (_team_serial_level_cut - 1); lvl >= 0; --lvl) { @@ -3994,10 +4003,14 @@ class NumericToolsLevelSet : public NumericToolsBase { Kokkos::fence(); time_device += tick.seconds(); tick.reset(); } - Kokkos::fence(); + Kokkos::deep_copy(h_rval, d_rval); + int rval = h_rval(0); if (rval != 0) { TACHO_TEST_FOR_EXCEPTION(rval, std::runtime_error, "SYTRF (team) returns non-zero error code."); } + //if (_status != 0) { + // TACHO_TEST_FOR_EXCEPTION(rval, std::runtime_error, "SYTRF (device) returns non-zero error code."); + //} Kokkos::parallel_for("update factor", policy_update, functor); if (verbose) { @@ -4271,6 +4284,10 @@ class NumericToolsLevelSet : public NumericToolsBase { const ordinal_type team_size_factor[2] = {64, 64}, vector_size_factor[2] = {8, 4}; #endif const ordinal_type team_size_update[2] = {16, 8}, vector_size_update[2] = {32, 32}; + + // returned value from team LU + colind_view d_rval("rval",1); + auto h_rval = Kokkos::create_mirror_view(host_memory_space(), d_rval); { typedef TeamFunctor_FactorizeLU functor_type; #if defined(TACHO_TEST_LEVELSET_TOOLS_KERNEL_OVERHEAD) @@ -4285,10 +4302,9 @@ class NumericToolsLevelSet : public NumericToolsBase { typedef Kokkos::TeamPolicy, exec_space, typename functor_type::UpdateTag> team_policy_update; #endif - int rval = 0; team_policy_factor policy_factor(1, 1, 1); team_policy_update policy_update(1, 1, 1); - functor_type functor(_info, _factorize_mode, _level_sids, _piv, _buf, &rval); + functor_type functor(_info, _factorize_mode, _level_sids, _piv, _buf, d_rval.data()); if (pivot_tol > 0.0) { functor.setDiagPertubationTol(pivot_tol); } @@ -4342,13 +4358,15 @@ class NumericToolsLevelSet : public NumericToolsBase { Kokkos::fence(); time_device += tick.seconds(); tick.reset(); } - Kokkos::fence(); + Kokkos::deep_copy(h_rval, d_rval); + int rval = h_rval(0); if (rval != 0) { TACHO_TEST_FOR_EXCEPTION(rval, std::runtime_error, "GETRF (team) returns non-zero error code."); } - if (_status != 0) { - TACHO_TEST_FOR_EXCEPTION(rval, std::runtime_error, "GETRF (device) returns non-zero error code."); - } + //if (_status != 0) { + // TACHO_TEST_FOR_EXCEPTION(rval, std::runtime_error, "GETRF (device) returns non-zero error code."); + //} + Kokkos::parallel_for("update factor", policy_update, functor); if (verbose) { Kokkos::fence(); time_update += tick.seconds(); diff --git a/packages/teko/CMakeLists.txt b/packages/teko/CMakeLists.txt index 245c3484a32f..c8c219d6962a 100644 --- a/packages/teko/CMakeLists.txt +++ b/packages/teko/CMakeLists.txt @@ -70,9 +70,7 @@ ENDIF() ADD_SUBDIRECTORY(src) TRIBITS_ADD_TEST_DIRECTORIES(tests) -IF(TEKO_HAVE_EPETRA) - TRIBITS_ADD_EXAMPLE_DIRECTORIES(examples) -ENDIF() +TRIBITS_ADD_EXAMPLE_DIRECTORIES(examples) TRIBITS_EXCLUDE_FILES( tests/data/nsjac.mm diff --git a/packages/teko/examples/BuildPreconditioner/step1/CMakeLists.txt b/packages/teko/examples/BuildPreconditioner/step1/CMakeLists.txt index 8b4c1d802fbe..50943256f7bf 100644 --- a/packages/teko/examples/BuildPreconditioner/step1/CMakeLists.txt +++ b/packages/teko/examples/BuildPreconditioner/step1/CMakeLists.txt @@ -1,9 +1,11 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +IF (TEKO_ENABLE_EPETRA) -TRIBITS_ADD_EXECUTABLE( - BuilderPreconditioner_Step1_Example - SOURCES - example-test.cpp - COMM mpi - ) + TRIBITS_ADD_EXECUTABLE( + BuilderPreconditioner_Step1_Example + SOURCES + example-test.cpp + COMM mpi + ) +ENDIF() diff --git a/packages/teko/examples/CMakeLists.txt b/packages/teko/examples/CMakeLists.txt index 360f396e2853..18406fd59b4a 100644 --- a/packages/teko/examples/CMakeLists.txt +++ b/packages/teko/examples/CMakeLists.txt @@ -1,25 +1,28 @@ -ADD_SUBDIRECTORY(StridedSolve) -ADD_SUBDIRECTORY(AddMultiplyPrecs) ADD_SUBDIRECTORY(BuildPreconditioner) -ADD_SUBDIRECTORY(BasicExamples) +IF (TEKO_ENABLE_EPETRA) + ADD_SUBDIRECTORY(StridedSolve) + ADD_SUBDIRECTORY(AddMultiplyPrecs) + ADD_SUBDIRECTORY(BasicExamples) -IF (ML_ENABLE_TekoSmoothers) - ADD_SUBDIRECTORY(ML-Teko-Coupling) -ENDIF() - -TRIBITS_COPY_FILES_TO_BINARY_DIR(example_copyfiles - SOURCE_FILES nslhs_test.mm - nsrhs_test.mm - SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/data" - DEST_DIR "${CMAKE_CURRENT_BINARY_DIR}/data" - EXEDEPS StridedSolve_Example - ) - -IF(${PROJECT_NAME}_ENABLE_DEVELOPMENT_MODE) - TRIBITS_COPY_FILES_TO_BINARY_DIR(example_copyfiles_from_tests - SOURCE_FILES nsjac.mm - SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../tests/data" + TRIBITS_COPY_FILES_TO_BINARY_DIR(example_copyfiles + SOURCE_FILES nslhs_test.mm + nsrhs_test.mm + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/data" DEST_DIR "${CMAKE_CURRENT_BINARY_DIR}/data" EXEDEPS StridedSolve_Example + ) + + IF(${PROJECT_NAME}_ENABLE_DEVELOPMENT_MODE) + TRIBITS_COPY_FILES_TO_BINARY_DIR(example_copyfiles_from_tests + SOURCE_FILES nsjac.mm + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../tests/data" + DEST_DIR "${CMAKE_CURRENT_BINARY_DIR}/data" + EXEDEPS StridedSolve_Example ) + ENDIF() + +ENDIF() + +IF (ML_ENABLE_TekoSmoothers) + ADD_SUBDIRECTORY(ML-Teko-Coupling) ENDIF() diff --git a/packages/tpetra/CMakeLists.txt b/packages/tpetra/CMakeLists.txt index 4c470608119f..f2f928cfca35 100644 --- a/packages/tpetra/CMakeLists.txt +++ b/packages/tpetra/CMakeLists.txt @@ -24,7 +24,7 @@ TRIBITS_ADD_OPTION_AND_DEFINE( # Supported Kokkos version in Trilinos # NOTE: When we snapshot Kokkos into Trilinos, we have to update these numbers to maintain # compatibility with external Kokkos -SET(Tpetra_SUPPORTED_KOKKOS_VERSION "4.5.0") +SET(Tpetra_SUPPORTED_KOKKOS_VERSION "4.5.1") # Option to allow developers to ignore incompatible Kokkos versions